feat(tasks): replace PaperlessTask model with structured redesign

Drop the old string-based PaperlessTask table and recreate it with
Status/TaskType/TriggerSource enums, JSONField result storage, and
duration tracking fields. Update all call sites to use the new API.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
stumpylog
2026-04-15 13:59:01 -07:00
parent dc06b679d3
commit ef4e3d31ef
10 changed files with 450 additions and 165 deletions

View File

@@ -144,18 +144,19 @@ class StoragePathAdmin(GuardedModelAdmin):
class TaskAdmin(admin.ModelAdmin):
list_display = ("task_id", "task_file_name", "task_name", "date_done", "status")
list_filter = ("status", "date_done", "task_name")
search_fields = ("task_name", "task_id", "status", "task_file_name")
list_display = ("task_id", "task_type", "trigger_source", "date_done", "status")
list_filter = ("status", "date_done", "task_type")
search_fields = ("task_type", "task_id", "status", "trigger_source")
readonly_fields = (
"task_id",
"task_file_name",
"task_name",
"task_type",
"trigger_source",
"status",
"date_created",
"date_started",
"date_done",
"result",
"result_message",
"result_data",
)

View File

@@ -870,8 +870,8 @@ class PaperlessTaskFilterSet(FilterSet):
class Meta:
model = PaperlessTask
fields = {
"type": ["exact"],
"task_name": ["exact"],
"task_type": ["exact"],
"trigger_source": ["exact"],
"status": ["exact"],
}

View File

@@ -0,0 +1,213 @@
"""
Drop and recreate the PaperlessTask table with the new structured schema.
We intentionally drop all existing task data -- the old schema was
string-based and incompatible with the new JSONField result storage.
"""
import django.db.models.deletion
import django.utils.timezone
from django.conf import settings
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "0018_saved_view_simple_search_rules"),
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
]
operations = [
migrations.DeleteModel(name="PaperlessTask"),
migrations.CreateModel(
name="PaperlessTask",
fields=[
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
(
"owner",
models.ForeignKey(
blank=True,
default=None,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
to=settings.AUTH_USER_MODEL,
verbose_name="owner",
),
),
(
"task_id",
models.CharField(
help_text="Celery task ID",
max_length=255,
unique=True,
verbose_name="Task ID",
),
),
(
"task_type",
models.CharField(
choices=[
("consume_file", "Consume File"),
("train_classifier", "Train Classifier"),
("sanity_check", "Sanity Check"),
("index_optimize", "Index Optimize"),
("index_rebuild", "Index Rebuild"),
("mail_fetch", "Mail Fetch"),
("llm_index", "LLM Index"),
],
db_index=True,
help_text="The kind of work being performed",
max_length=50,
verbose_name="Task Type",
),
),
(
"trigger_source",
models.CharField(
choices=[
("scheduled", "Scheduled"),
("web_ui", "Web UI"),
("api_upload", "API Upload"),
("folder_consume", "Folder Consume"),
("email_consume", "Email Consume"),
("system", "System"),
("manual", "Manual"),
],
db_index=True,
help_text="What initiated this task",
max_length=50,
verbose_name="Trigger Source",
),
),
(
"status",
models.CharField(
choices=[
("pending", "Pending"),
("started", "Started"),
("success", "Success"),
("failure", "Failure"),
("revoked", "Revoked"),
],
db_index=True,
default="pending",
max_length=30,
verbose_name="Status",
),
),
(
"date_created",
models.DateTimeField(
db_index=True,
default=django.utils.timezone.now,
verbose_name="Created",
),
),
(
"date_started",
models.DateTimeField(
blank=True,
null=True,
verbose_name="Started",
),
),
(
"date_done",
models.DateTimeField(
blank=True,
db_index=True,
null=True,
verbose_name="Completed",
),
),
(
"duration_seconds",
models.FloatField(
blank=True,
help_text="Elapsed time from start to completion",
null=True,
verbose_name="Duration (seconds)",
),
),
(
"wait_time_seconds",
models.FloatField(
blank=True,
help_text="Time from task creation to worker pickup",
null=True,
verbose_name="Wait Time (seconds)",
),
),
(
"input_data",
models.JSONField(
blank=True,
default=dict,
help_text="Structured input parameters for the task",
verbose_name="Input Data",
),
),
(
"result_data",
models.JSONField(
blank=True,
help_text="Structured result data from task execution",
null=True,
verbose_name="Result Data",
),
),
(
"result_message",
models.TextField(
blank=True,
help_text="Human-readable result message",
null=True,
verbose_name="Result Message",
),
),
(
"acknowledged",
models.BooleanField(
db_index=True,
default=False,
verbose_name="Acknowledged",
),
),
],
options={
"verbose_name": "Task",
"verbose_name_plural": "Tasks",
"ordering": ["-date_created"],
},
),
migrations.AddIndex(
model_name="paperlesstask",
index=models.Index(
fields=["status", "date_created"],
name="documents_p_status_8aa687_idx",
),
),
migrations.AddIndex(
model_name="paperlesstask",
index=models.Index(
fields=["task_type", "status"],
name="documents_p_task_ty_e4a93f_idx",
),
),
migrations.AddIndex(
model_name="paperlesstask",
index=models.Index(
fields=["owner", "acknowledged", "date_created"],
name="documents_p_owner_i_62c545_idx",
),
),
]

View File

@@ -3,7 +3,6 @@ from pathlib import Path
from typing import Final
import pathvalidate
from celery import states
from django.conf import settings
from django.contrib.auth.models import Group
from django.contrib.auth.models import User
@@ -663,97 +662,170 @@ class UiSettings(models.Model):
class PaperlessTask(ModelWithOwner):
ALL_STATES = sorted(states.ALL_STATES)
TASK_STATE_CHOICES = sorted(zip(ALL_STATES, ALL_STATES))
"""
Tracks background task execution for user visibility and debugging.
State transitions:
PENDING -> STARTED -> SUCCESS
PENDING -> STARTED -> FAILURE
PENDING -> REVOKED (if cancelled before starting)
"""
class Status(models.TextChoices):
PENDING = "pending", _("Pending")
STARTED = "started", _("Started")
SUCCESS = "success", _("Success")
FAILURE = "failure", _("Failure")
REVOKED = "revoked", _("Revoked")
class TaskType(models.TextChoices):
AUTO = ("auto_task", _("Auto Task"))
SCHEDULED_TASK = ("scheduled_task", _("Scheduled Task"))
MANUAL_TASK = ("manual_task", _("Manual Task"))
CONSUME_FILE = "consume_file", _("Consume File")
TRAIN_CLASSIFIER = "train_classifier", _("Train Classifier")
SANITY_CHECK = "sanity_check", _("Sanity Check")
INDEX_OPTIMIZE = "index_optimize", _("Index Optimize")
INDEX_REBUILD = "index_rebuild", _("Index Rebuild")
MAIL_FETCH = "mail_fetch", _("Mail Fetch")
LLM_INDEX = "llm_index", _("LLM Index")
class TaskName(models.TextChoices):
CONSUME_FILE = ("consume_file", _("Consume File"))
TRAIN_CLASSIFIER = ("train_classifier", _("Train Classifier"))
CHECK_SANITY = ("check_sanity", _("Check Sanity"))
INDEX_OPTIMIZE = ("index_optimize", _("Index Optimize"))
LLMINDEX_UPDATE = ("llmindex_update", _("LLM Index Update"))
class TriggerSource(models.TextChoices):
SCHEDULED = "scheduled", _("Scheduled") # Celery beat
WEB_UI = "web_ui", _("Web UI") # Document uploaded via web
API_UPLOAD = "api_upload", _("API Upload") # Document uploaded via API
FOLDER_CONSUME = "folder_consume", _("Folder Consume") # Consume folder
EMAIL_CONSUME = "email_consume", _("Email Consume") # Email attachment
SYSTEM = (
"system",
_("System"),
) # Auto-triggered by system (self-heal, config side-effect)
MANUAL = "manual", _("Manual") # User explicitly ran via /api/tasks/run/
# Identification
task_id = models.CharField(
max_length=255,
unique=True,
verbose_name=_("Task ID"),
help_text=_("Celery ID for the Task that was run"),
help_text=_("Celery task ID"),
)
acknowledged = models.BooleanField(
default=False,
verbose_name=_("Acknowledged"),
help_text=_("If the task is acknowledged via the frontend or API"),
task_type = models.CharField(
max_length=50,
choices=TaskType.choices,
verbose_name=_("Task Type"),
help_text=_("The kind of work being performed"),
db_index=True,
)
task_file_name = models.CharField(
null=True,
max_length=255,
verbose_name=_("Task Filename"),
help_text=_("Name of the file which the Task was run for"),
)
task_name = models.CharField(
null=True,
max_length=255,
choices=TaskName.choices,
verbose_name=_("Task Name"),
help_text=_("Name of the task that was run"),
trigger_source = models.CharField(
max_length=50,
choices=TriggerSource.choices,
verbose_name=_("Trigger Source"),
help_text=_("What initiated this task"),
db_index=True,
)
# State tracking
status = models.CharField(
max_length=30,
default=states.PENDING,
choices=TASK_STATE_CHOICES,
verbose_name=_("Task State"),
help_text=_("Current state of the task being run"),
choices=Status.choices,
default=Status.PENDING,
verbose_name=_("Status"),
db_index=True,
)
# Timestamps
date_created = models.DateTimeField(
null=True,
default=timezone.now,
verbose_name=_("Created DateTime"),
help_text=_("Datetime field when the task result was created in UTC"),
verbose_name=_("Created"),
db_index=True,
)
date_started = models.DateTimeField(
null=True,
default=None,
verbose_name=_("Started DateTime"),
help_text=_("Datetime field when the task was started in UTC"),
blank=True,
verbose_name=_("Started"),
)
date_done = models.DateTimeField(
null=True,
default=None,
verbose_name=_("Completed DateTime"),
help_text=_("Datetime field when the task was completed in UTC"),
blank=True,
verbose_name=_("Completed"),
db_index=True,
)
result = models.TextField(
# Duration fields -- populated by task_postrun signal handler
duration_seconds = models.FloatField(
null=True,
default=None,
verbose_name=_("Result Data"),
help_text=_(
"The data returned by the task",
),
blank=True,
verbose_name=_("Duration (seconds)"),
help_text=_("Elapsed time from start to completion"),
)
type = models.CharField(
max_length=30,
choices=TaskType.choices,
default=TaskType.AUTO,
verbose_name=_("Task Type"),
help_text=_("The type of task that was run"),
wait_time_seconds = models.FloatField(
null=True,
blank=True,
verbose_name=_("Wait Time (seconds)"),
help_text=_("Time from task creation to worker pickup"),
)
# Input/Output data
input_data = models.JSONField(
default=dict,
blank=True,
verbose_name=_("Input Data"),
help_text=_("Structured input parameters for the task"),
)
result_data = models.JSONField(
null=True,
blank=True,
verbose_name=_("Result Data"),
help_text=_("Structured result data from task execution"),
)
result_message = models.TextField(
null=True,
blank=True,
verbose_name=_("Result Message"),
help_text=_("Human-readable result message"),
)
# Acknowledgment
acknowledged = models.BooleanField(
default=False,
verbose_name=_("Acknowledged"),
db_index=True,
)
class Meta:
verbose_name = _("Task")
verbose_name_plural = _("Tasks")
ordering = ["-date_created"]
indexes = [
models.Index(fields=["status", "date_created"]),
models.Index(fields=["task_type", "status"]),
models.Index(fields=["owner", "acknowledged", "date_created"]),
]
def __str__(self) -> str:
return f"Task {self.task_id}"
return f"{self.get_task_type_display()} [{self.task_id[:8]}]"
@property
def is_complete(self) -> bool:
return self.status in (
self.Status.SUCCESS,
self.Status.FAILURE,
self.Status.REVOKED,
)
@property
def related_document_ids(self) -> list[int]:
if not self.result_data:
return []
if doc_id := self.result_data.get("document_id"):
return [doc_id]
if dup_id := self.result_data.get("duplicate_of"):
return [dup_id]
return []
class Note(SoftDeleteModel):

View File

@@ -18,7 +18,6 @@ from typing import TYPE_CHECKING
from typing import Final
from typing import TypedDict
from celery import states
from django.conf import settings
from django.utils import timezone
@@ -303,13 +302,13 @@ def check_sanity(
"""
paperless_task = PaperlessTask.objects.create(
task_id=uuid.uuid4(),
type=(
PaperlessTask.TaskType.SCHEDULED_TASK
trigger_source=(
PaperlessTask.TriggerSource.SCHEDULED
if scheduled
else PaperlessTask.TaskType.MANUAL_TASK
else PaperlessTask.TriggerSource.MANUAL
),
task_name=PaperlessTask.TaskName.CHECK_SANITY,
status=states.STARTED,
task_type=PaperlessTask.TaskType.SANITY_CHECK,
status=PaperlessTask.Status.STARTED,
date_created=timezone.now(),
date_started=timezone.now(),
)
@@ -332,9 +331,13 @@ def check_sanity(
for extra_file in present_files:
messages.warning(None, f"Orphaned file in media dir: {extra_file}")
paperless_task.status = states.SUCCESS if not messages.has_error else states.FAILURE
paperless_task.status = (
PaperlessTask.Status.SUCCESS
if not messages.has_error
else PaperlessTask.Status.FAILURE
)
if messages.total_issue_count == 0:
paperless_task.result = "No issues found."
paperless_task.result_message = "No issues found."
else:
parts: list[str] = []
if messages.document_error_count:
@@ -343,11 +346,11 @@ def check_sanity(
parts.append(f"{messages.document_warning_count} document(s) with warnings")
if messages.global_warning_count:
parts.append(f"{messages.global_warning_count} global warning(s)")
paperless_task.result = ", ".join(parts) + " found."
paperless_task.result_message = ", ".join(parts) + " found."
if messages.has_error:
paperless_task.result += " Check logs for details."
paperless_task.result_message += " Check logs for details."
paperless_task.date_done = timezone.now()
paperless_task.save(update_fields=["status", "result", "date_done"])
paperless_task.save(update_fields=["status", "result_message", "date_done"])
return messages

View File

@@ -12,7 +12,6 @@ from typing import Literal
from typing import TypedDict
import magic
from celery import states
from django.conf import settings
from django.contrib.auth.models import Group
from django.contrib.auth.models import User
@@ -2437,13 +2436,13 @@ class TasksViewSerializer(OwnedObjectSerializer):
fields = (
"id",
"task_id",
"task_name",
"task_file_name",
"task_type",
"trigger_source",
"date_created",
"date_done",
"type",
"status",
"result",
"result_message",
"result_data",
"acknowledged",
"related_document",
"duplicate_documents",
@@ -2452,29 +2451,10 @@ class TasksViewSerializer(OwnedObjectSerializer):
related_document = serializers.SerializerMethodField()
duplicate_documents = serializers.SerializerMethodField()
created_doc_re = re.compile(r"New document id (\d+) created")
duplicate_doc_re = re.compile(r"It is a duplicate of .* \(#(\d+)\)")
def get_related_document(self, obj) -> str | None:
result = None
re = None
if obj.result:
match obj.status:
case states.SUCCESS:
re = self.created_doc_re
case states.FAILURE:
re = (
self.duplicate_doc_re
if "existing document is in the trash" not in obj.result
else None
)
if re is not None:
try:
result = re.search(obj.result).group(1)
except Exception:
pass
return result
def get_related_document(self, obj) -> int | None:
doc_ids = obj.related_document_ids
return doc_ids[0] if doc_ids else None
@extend_schema_field(DuplicateDocumentSummarySerializer(many=True))
def get_duplicate_documents(self, obj):
@@ -2489,9 +2469,9 @@ class TasksViewSerializer(OwnedObjectSerializer):
class RunTaskViewSerializer(serializers.Serializer[dict[str, Any]]):
task_name = serializers.ChoiceField(
choices=PaperlessTask.TaskName.choices,
label="Task Name",
task_type = serializers.ChoiceField(
choices=PaperlessTask.TaskType.choices,
label="Task Type",
write_only=True,
)

View File

@@ -1021,12 +1021,11 @@ def before_task_publish_handler(sender=None, headers=None, body=None, **kwargs)
user_id = overrides.owner_id if overrides else None
PaperlessTask.objects.create(
type=PaperlessTask.TaskType.AUTO,
trigger_source=PaperlessTask.TriggerSource.FOLDER_CONSUME,
task_id=headers["id"],
status=states.PENDING,
task_file_name=task_file_name,
task_name=PaperlessTask.TaskName.CONSUME_FILE,
result=None,
status=PaperlessTask.Status.PENDING,
input_data={"filename": task_file_name},
task_type=PaperlessTask.TaskType.CONSUME_FILE,
date_created=timezone.now(),
date_started=None,
date_done=None,
@@ -1052,7 +1051,7 @@ def task_prerun_handler(sender=None, task_id=None, task=None, **kwargs) -> None:
task_instance = PaperlessTask.objects.filter(task_id=task_id).first()
if task_instance is not None:
task_instance.status = states.STARTED
task_instance.status = PaperlessTask.Status.STARTED
task_instance.date_started = timezone.now()
task_instance.save()
except Exception: # pragma: no cover
@@ -1080,8 +1079,19 @@ def task_postrun_handler(
task_instance = PaperlessTask.objects.filter(task_id=task_id).first()
if task_instance is not None:
task_instance.status = state or states.FAILURE
task_instance.result = retval
_CELERY_STATE_MAP = {
states.SUCCESS: PaperlessTask.Status.SUCCESS,
states.FAILURE: PaperlessTask.Status.FAILURE,
states.REVOKED: PaperlessTask.Status.REVOKED,
states.STARTED: PaperlessTask.Status.STARTED,
states.PENDING: PaperlessTask.Status.PENDING,
}
task_instance.status = _CELERY_STATE_MAP.get(
state,
PaperlessTask.Status.FAILURE,
)
if isinstance(retval, str):
task_instance.result_message = retval
task_instance.date_done = timezone.now()
task_instance.save()
except Exception: # pragma: no cover
@@ -1108,9 +1118,9 @@ def task_failure_handler(
close_old_connections()
task_instance = PaperlessTask.objects.filter(task_id=task_id).first()
if task_instance is not None and task_instance.result is None:
task_instance.status = states.FAILURE
task_instance.result = traceback
if task_instance is not None and task_instance.result_message is None:
task_instance.status = PaperlessTask.Status.FAILURE
task_instance.result_message = str(traceback) if traceback else None
task_instance.date_done = timezone.now()
task_instance.save()
except Exception: # pragma: no cover

View File

@@ -10,7 +10,6 @@ from tempfile import mkstemp
from celery import Task
from celery import shared_task
from celery import states
from django.conf import settings
from django.contrib.contenttypes.models import ContentType
from django.db import models
@@ -88,12 +87,12 @@ def train_classifier(
status_callback: Callable[[str], None] | None = None,
) -> None:
task = PaperlessTask.objects.create(
type=PaperlessTask.TaskType.SCHEDULED_TASK
trigger_source=PaperlessTask.TriggerSource.SCHEDULED
if scheduled
else PaperlessTask.TaskType.MANUAL_TASK,
else PaperlessTask.TriggerSource.MANUAL,
task_id=uuid.uuid4(),
task_name=PaperlessTask.TaskName.TRAIN_CLASSIFIER,
status=states.STARTED,
task_type=PaperlessTask.TaskType.TRAIN_CLASSIFIER,
status=PaperlessTask.Status.STARTED,
date_created=timezone.now(),
date_started=timezone.now(),
)
@@ -110,8 +109,8 @@ def train_classifier(
if settings.MODEL_FILE.exists():
logger.info(f"Removing {settings.MODEL_FILE} so it won't be used")
settings.MODEL_FILE.unlink()
task.status = states.SUCCESS
task.result = result
task.status = PaperlessTask.Status.SUCCESS
task.result_message = result
task.date_done = timezone.now()
task.save()
return
@@ -127,20 +126,20 @@ def train_classifier(
f"Saving updated classifier model to {settings.MODEL_FILE}...",
)
classifier.save()
task.result = "Training completed successfully"
task.result_message = "Training completed successfully"
else:
logger.debug("Training data unchanged.")
task.result = "Training data unchanged"
task.result_message = "Training data unchanged"
task.status = states.SUCCESS
task.status = PaperlessTask.Status.SUCCESS
except Exception as e:
logger.warning("Classifier error: " + str(e))
task.status = states.FAILURE
task.result = str(e)
task.status = PaperlessTask.Status.FAILURE
task.result_message = str(e)
task.date_done = timezone.now()
task.save(update_fields=["status", "result", "date_done"])
task.save(update_fields=["status", "result_message", "date_done"])
@shared_task(bind=True)
@@ -642,14 +641,14 @@ def llmindex_index(
ai_config = AIConfig()
if ai_config.llm_index_enabled:
task = PaperlessTask.objects.create(
type=PaperlessTask.TaskType.SCHEDULED_TASK
trigger_source=PaperlessTask.TriggerSource.SCHEDULED
if scheduled
else PaperlessTask.TaskType.AUTO
else PaperlessTask.TriggerSource.SYSTEM
if auto
else PaperlessTask.TaskType.MANUAL_TASK,
else PaperlessTask.TriggerSource.MANUAL,
task_id=uuid.uuid4(),
task_name=PaperlessTask.TaskName.LLMINDEX_UPDATE,
status=states.STARTED,
task_type=PaperlessTask.TaskType.LLM_INDEX,
status=PaperlessTask.Status.STARTED,
date_created=timezone.now(),
date_started=timezone.now(),
)
@@ -660,15 +659,15 @@ def llmindex_index(
iter_wrapper=iter_wrapper,
rebuild=rebuild,
)
task.status = states.SUCCESS
task.result = result
task.status = PaperlessTask.Status.SUCCESS
task.result_message = result
except Exception as e:
logger.error("LLM index error: " + str(e))
task.status = states.FAILURE
task.result = str(e)
task.status = PaperlessTask.Status.FAILURE
task.result_message = str(e)
task.date_done = timezone.now()
task.save(update_fields=["status", "result", "date_done"])
task.save(update_fields=["status", "result_message", "date_done"])
else:
logger.info("LLM index is disabled, skipping update.")

View File

@@ -20,7 +20,6 @@ from urllib.parse import urlparse
import httpx
import magic
import pathvalidate
from celery import states
from django.conf import settings
from django.contrib.auth.models import Group
from django.contrib.auth.models import User
@@ -3777,16 +3776,16 @@ class TasksViewSet(ReadOnlyModelViewSet[PaperlessTask]):
filterset_class = PaperlessTaskFilterSet
TASK_AND_ARGS_BY_NAME = {
PaperlessTask.TaskName.INDEX_OPTIMIZE: (index_optimize, {}),
PaperlessTask.TaskName.TRAIN_CLASSIFIER: (
PaperlessTask.TaskType.INDEX_OPTIMIZE: (index_optimize, {}),
PaperlessTask.TaskType.TRAIN_CLASSIFIER: (
train_classifier,
{"scheduled": False},
),
PaperlessTask.TaskName.CHECK_SANITY: (
PaperlessTask.TaskType.SANITY_CHECK: (
sanity_check,
{"scheduled": False, "raise_on_error": False},
),
PaperlessTask.TaskName.LLMINDEX_UPDATE: (
PaperlessTask.TaskType.LLM_INDEX: (
llmindex_index,
{"scheduled": False, "rebuild": False},
),
@@ -3824,13 +3823,13 @@ class TasksViewSet(ReadOnlyModelViewSet[PaperlessTask]):
def run(self, request):
serializer = RunTaskViewSerializer(data=request.data)
serializer.is_valid(raise_exception=True)
task_name = serializer.validated_data.get("task_name")
task_type = serializer.validated_data.get("task_type")
if not request.user.is_superuser:
return HttpResponseForbidden("Insufficient permissions")
try:
task_func, task_args = self.TASK_AND_ARGS_BY_NAME[task_name]
task_func, task_args = self.TASK_AND_ARGS_BY_NAME[task_type]
result = task_func(**task_args)
return Response({"result": result})
except Exception as e:
@@ -4466,11 +4465,11 @@ class SystemStatusView(PassUserMixin):
last_trained_task = (
PaperlessTask.objects.filter(
task_name=PaperlessTask.TaskName.TRAIN_CLASSIFIER,
task_type=PaperlessTask.TaskType.TRAIN_CLASSIFIER,
status__in=[
states.SUCCESS,
states.FAILURE,
states.REVOKED,
PaperlessTask.Status.SUCCESS,
PaperlessTask.Status.FAILURE,
PaperlessTask.Status.REVOKED,
], # ignore running tasks
)
.order_by("-date_done")
@@ -4481,20 +4480,23 @@ class SystemStatusView(PassUserMixin):
if last_trained_task is None:
classifier_status = "WARNING"
classifier_error = "No classifier training tasks found"
elif last_trained_task and last_trained_task.status != states.SUCCESS:
elif (
last_trained_task
and last_trained_task.status != PaperlessTask.Status.SUCCESS
):
classifier_status = "ERROR"
classifier_error = last_trained_task.result
classifier_error = last_trained_task.result_message
classifier_last_trained = (
last_trained_task.date_done if last_trained_task else None
)
last_sanity_check = (
PaperlessTask.objects.filter(
task_name=PaperlessTask.TaskName.CHECK_SANITY,
task_type=PaperlessTask.TaskType.SANITY_CHECK,
status__in=[
states.SUCCESS,
states.FAILURE,
states.REVOKED,
PaperlessTask.Status.SUCCESS,
PaperlessTask.Status.FAILURE,
PaperlessTask.Status.REVOKED,
], # ignore running tasks
)
.order_by("-date_done")
@@ -4505,9 +4507,12 @@ class SystemStatusView(PassUserMixin):
if last_sanity_check is None:
sanity_check_status = "WARNING"
sanity_check_error = "No sanity check tasks found"
elif last_sanity_check and last_sanity_check.status != states.SUCCESS:
elif (
last_sanity_check
and last_sanity_check.status != PaperlessTask.Status.SUCCESS
):
sanity_check_status = "ERROR"
sanity_check_error = last_sanity_check.result
sanity_check_error = last_sanity_check.result_message
sanity_check_last_run = (
last_sanity_check.date_done if last_sanity_check else None
)
@@ -4520,7 +4525,7 @@ class SystemStatusView(PassUserMixin):
else:
last_llmindex_update = (
PaperlessTask.objects.filter(
task_name=PaperlessTask.TaskName.LLMINDEX_UPDATE,
task_type=PaperlessTask.TaskType.LLM_INDEX,
)
.order_by("-date_done")
.first()
@@ -4530,9 +4535,12 @@ class SystemStatusView(PassUserMixin):
if last_llmindex_update is None:
llmindex_status = "WARNING"
llmindex_error = "No LLM index update tasks found"
elif last_llmindex_update and last_llmindex_update.status == states.FAILURE:
elif (
last_llmindex_update
and last_llmindex_update.status == PaperlessTask.Status.FAILURE
):
llmindex_status = "ERROR"
llmindex_error = last_llmindex_update.result
llmindex_error = last_llmindex_update.result_message
llmindex_last_modified = (
last_llmindex_update.date_done if last_llmindex_update else None
)

View File

@@ -4,7 +4,6 @@ from datetime import timedelta
from pathlib import Path
from typing import TYPE_CHECKING
from celery import states
from django.conf import settings
from django.utils import timezone
@@ -28,11 +27,11 @@ def queue_llm_index_update_if_needed(*, rebuild: bool, reason: str) -> bool:
from documents.tasks import llmindex_index
has_running = PaperlessTask.objects.filter(
task_name=PaperlessTask.TaskName.LLMINDEX_UPDATE,
status__in=[states.PENDING, states.STARTED],
task_type=PaperlessTask.TaskType.LLM_INDEX,
status__in=[PaperlessTask.Status.PENDING, PaperlessTask.Status.STARTED],
).exists()
has_recent = PaperlessTask.objects.filter(
task_name=PaperlessTask.TaskName.LLMINDEX_UPDATE,
task_type=PaperlessTask.TaskType.LLM_INDEX,
date_created__gte=(timezone.now() - timedelta(minutes=5)),
).exists()
if has_running or has_recent: