diff --git a/src/documents/admin.py b/src/documents/admin.py index f0e5ccd25..63d10fd1d 100644 --- a/src/documents/admin.py +++ b/src/documents/admin.py @@ -144,18 +144,19 @@ class StoragePathAdmin(GuardedModelAdmin): class TaskAdmin(admin.ModelAdmin): - list_display = ("task_id", "task_file_name", "task_name", "date_done", "status") - list_filter = ("status", "date_done", "task_name") - search_fields = ("task_name", "task_id", "status", "task_file_name") + list_display = ("task_id", "task_type", "trigger_source", "date_done", "status") + list_filter = ("status", "date_done", "task_type") + search_fields = ("task_type", "task_id", "status", "trigger_source") readonly_fields = ( "task_id", - "task_file_name", - "task_name", + "task_type", + "trigger_source", "status", "date_created", "date_started", "date_done", - "result", + "result_message", + "result_data", ) diff --git a/src/documents/filters.py b/src/documents/filters.py index b2b226ee1..caf3bfe25 100644 --- a/src/documents/filters.py +++ b/src/documents/filters.py @@ -870,8 +870,8 @@ class PaperlessTaskFilterSet(FilterSet): class Meta: model = PaperlessTask fields = { - "type": ["exact"], - "task_name": ["exact"], + "task_type": ["exact"], + "trigger_source": ["exact"], "status": ["exact"], } diff --git a/src/documents/migrations/0019_task_system_redesign.py b/src/documents/migrations/0019_task_system_redesign.py new file mode 100644 index 000000000..790c26650 --- /dev/null +++ b/src/documents/migrations/0019_task_system_redesign.py @@ -0,0 +1,213 @@ +""" +Drop and recreate the PaperlessTask table with the new structured schema. + +We intentionally drop all existing task data -- the old schema was +string-based and incompatible with the new JSONField result storage. +""" + +import django.db.models.deletion +import django.utils.timezone +from django.conf import settings +from django.db import migrations +from django.db import models + + +class Migration(migrations.Migration): + dependencies = [ + ("documents", "0018_saved_view_simple_search_rules"), + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ] + + operations = [ + migrations.DeleteModel(name="PaperlessTask"), + migrations.CreateModel( + name="PaperlessTask", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ( + "owner", + models.ForeignKey( + blank=True, + default=None, + null=True, + on_delete=django.db.models.deletion.SET_NULL, + to=settings.AUTH_USER_MODEL, + verbose_name="owner", + ), + ), + ( + "task_id", + models.CharField( + help_text="Celery task ID", + max_length=255, + unique=True, + verbose_name="Task ID", + ), + ), + ( + "task_type", + models.CharField( + choices=[ + ("consume_file", "Consume File"), + ("train_classifier", "Train Classifier"), + ("sanity_check", "Sanity Check"), + ("index_optimize", "Index Optimize"), + ("index_rebuild", "Index Rebuild"), + ("mail_fetch", "Mail Fetch"), + ("llm_index", "LLM Index"), + ], + db_index=True, + help_text="The kind of work being performed", + max_length=50, + verbose_name="Task Type", + ), + ), + ( + "trigger_source", + models.CharField( + choices=[ + ("scheduled", "Scheduled"), + ("web_ui", "Web UI"), + ("api_upload", "API Upload"), + ("folder_consume", "Folder Consume"), + ("email_consume", "Email Consume"), + ("system", "System"), + ("manual", "Manual"), + ], + db_index=True, + help_text="What initiated this task", + max_length=50, + verbose_name="Trigger Source", + ), + ), + ( + "status", + models.CharField( + choices=[ + ("pending", "Pending"), + ("started", "Started"), + ("success", "Success"), + ("failure", "Failure"), + ("revoked", "Revoked"), + ], + db_index=True, + default="pending", + max_length=30, + verbose_name="Status", + ), + ), + ( + "date_created", + models.DateTimeField( + db_index=True, + default=django.utils.timezone.now, + verbose_name="Created", + ), + ), + ( + "date_started", + models.DateTimeField( + blank=True, + null=True, + verbose_name="Started", + ), + ), + ( + "date_done", + models.DateTimeField( + blank=True, + db_index=True, + null=True, + verbose_name="Completed", + ), + ), + ( + "duration_seconds", + models.FloatField( + blank=True, + help_text="Elapsed time from start to completion", + null=True, + verbose_name="Duration (seconds)", + ), + ), + ( + "wait_time_seconds", + models.FloatField( + blank=True, + help_text="Time from task creation to worker pickup", + null=True, + verbose_name="Wait Time (seconds)", + ), + ), + ( + "input_data", + models.JSONField( + blank=True, + default=dict, + help_text="Structured input parameters for the task", + verbose_name="Input Data", + ), + ), + ( + "result_data", + models.JSONField( + blank=True, + help_text="Structured result data from task execution", + null=True, + verbose_name="Result Data", + ), + ), + ( + "result_message", + models.TextField( + blank=True, + help_text="Human-readable result message", + null=True, + verbose_name="Result Message", + ), + ), + ( + "acknowledged", + models.BooleanField( + db_index=True, + default=False, + verbose_name="Acknowledged", + ), + ), + ], + options={ + "verbose_name": "Task", + "verbose_name_plural": "Tasks", + "ordering": ["-date_created"], + }, + ), + migrations.AddIndex( + model_name="paperlesstask", + index=models.Index( + fields=["status", "date_created"], + name="documents_p_status_8aa687_idx", + ), + ), + migrations.AddIndex( + model_name="paperlesstask", + index=models.Index( + fields=["task_type", "status"], + name="documents_p_task_ty_e4a93f_idx", + ), + ), + migrations.AddIndex( + model_name="paperlesstask", + index=models.Index( + fields=["owner", "acknowledged", "date_created"], + name="documents_p_owner_i_62c545_idx", + ), + ), + ] diff --git a/src/documents/models.py b/src/documents/models.py index e2f5bb3dc..83b310394 100644 --- a/src/documents/models.py +++ b/src/documents/models.py @@ -3,7 +3,6 @@ from pathlib import Path from typing import Final import pathvalidate -from celery import states from django.conf import settings from django.contrib.auth.models import Group from django.contrib.auth.models import User @@ -663,97 +662,170 @@ class UiSettings(models.Model): class PaperlessTask(ModelWithOwner): - ALL_STATES = sorted(states.ALL_STATES) - TASK_STATE_CHOICES = sorted(zip(ALL_STATES, ALL_STATES)) + """ + Tracks background task execution for user visibility and debugging. + + State transitions: + PENDING -> STARTED -> SUCCESS + PENDING -> STARTED -> FAILURE + PENDING -> REVOKED (if cancelled before starting) + """ + + class Status(models.TextChoices): + PENDING = "pending", _("Pending") + STARTED = "started", _("Started") + SUCCESS = "success", _("Success") + FAILURE = "failure", _("Failure") + REVOKED = "revoked", _("Revoked") class TaskType(models.TextChoices): - AUTO = ("auto_task", _("Auto Task")) - SCHEDULED_TASK = ("scheduled_task", _("Scheduled Task")) - MANUAL_TASK = ("manual_task", _("Manual Task")) + CONSUME_FILE = "consume_file", _("Consume File") + TRAIN_CLASSIFIER = "train_classifier", _("Train Classifier") + SANITY_CHECK = "sanity_check", _("Sanity Check") + INDEX_OPTIMIZE = "index_optimize", _("Index Optimize") + INDEX_REBUILD = "index_rebuild", _("Index Rebuild") + MAIL_FETCH = "mail_fetch", _("Mail Fetch") + LLM_INDEX = "llm_index", _("LLM Index") - class TaskName(models.TextChoices): - CONSUME_FILE = ("consume_file", _("Consume File")) - TRAIN_CLASSIFIER = ("train_classifier", _("Train Classifier")) - CHECK_SANITY = ("check_sanity", _("Check Sanity")) - INDEX_OPTIMIZE = ("index_optimize", _("Index Optimize")) - LLMINDEX_UPDATE = ("llmindex_update", _("LLM Index Update")) + class TriggerSource(models.TextChoices): + SCHEDULED = "scheduled", _("Scheduled") # Celery beat + WEB_UI = "web_ui", _("Web UI") # Document uploaded via web + API_UPLOAD = "api_upload", _("API Upload") # Document uploaded via API + FOLDER_CONSUME = "folder_consume", _("Folder Consume") # Consume folder + EMAIL_CONSUME = "email_consume", _("Email Consume") # Email attachment + SYSTEM = ( + "system", + _("System"), + ) # Auto-triggered by system (self-heal, config side-effect) + MANUAL = "manual", _("Manual") # User explicitly ran via /api/tasks/run/ + # Identification task_id = models.CharField( max_length=255, unique=True, verbose_name=_("Task ID"), - help_text=_("Celery ID for the Task that was run"), + help_text=_("Celery task ID"), ) - acknowledged = models.BooleanField( - default=False, - verbose_name=_("Acknowledged"), - help_text=_("If the task is acknowledged via the frontend or API"), + task_type = models.CharField( + max_length=50, + choices=TaskType.choices, + verbose_name=_("Task Type"), + help_text=_("The kind of work being performed"), + db_index=True, ) - task_file_name = models.CharField( - null=True, - max_length=255, - verbose_name=_("Task Filename"), - help_text=_("Name of the file which the Task was run for"), - ) - - task_name = models.CharField( - null=True, - max_length=255, - choices=TaskName.choices, - verbose_name=_("Task Name"), - help_text=_("Name of the task that was run"), + trigger_source = models.CharField( + max_length=50, + choices=TriggerSource.choices, + verbose_name=_("Trigger Source"), + help_text=_("What initiated this task"), + db_index=True, ) + # State tracking status = models.CharField( max_length=30, - default=states.PENDING, - choices=TASK_STATE_CHOICES, - verbose_name=_("Task State"), - help_text=_("Current state of the task being run"), + choices=Status.choices, + default=Status.PENDING, + verbose_name=_("Status"), + db_index=True, ) + # Timestamps date_created = models.DateTimeField( - null=True, default=timezone.now, - verbose_name=_("Created DateTime"), - help_text=_("Datetime field when the task result was created in UTC"), + verbose_name=_("Created"), + db_index=True, ) date_started = models.DateTimeField( null=True, - default=None, - verbose_name=_("Started DateTime"), - help_text=_("Datetime field when the task was started in UTC"), + blank=True, + verbose_name=_("Started"), ) date_done = models.DateTimeField( null=True, - default=None, - verbose_name=_("Completed DateTime"), - help_text=_("Datetime field when the task was completed in UTC"), + blank=True, + verbose_name=_("Completed"), + db_index=True, ) - result = models.TextField( + # Duration fields -- populated by task_postrun signal handler + duration_seconds = models.FloatField( null=True, - default=None, - verbose_name=_("Result Data"), - help_text=_( - "The data returned by the task", - ), + blank=True, + verbose_name=_("Duration (seconds)"), + help_text=_("Elapsed time from start to completion"), ) - type = models.CharField( - max_length=30, - choices=TaskType.choices, - default=TaskType.AUTO, - verbose_name=_("Task Type"), - help_text=_("The type of task that was run"), + wait_time_seconds = models.FloatField( + null=True, + blank=True, + verbose_name=_("Wait Time (seconds)"), + help_text=_("Time from task creation to worker pickup"), ) + # Input/Output data + input_data = models.JSONField( + default=dict, + blank=True, + verbose_name=_("Input Data"), + help_text=_("Structured input parameters for the task"), + ) + + result_data = models.JSONField( + null=True, + blank=True, + verbose_name=_("Result Data"), + help_text=_("Structured result data from task execution"), + ) + + result_message = models.TextField( + null=True, + blank=True, + verbose_name=_("Result Message"), + help_text=_("Human-readable result message"), + ) + + # Acknowledgment + acknowledged = models.BooleanField( + default=False, + verbose_name=_("Acknowledged"), + db_index=True, + ) + + class Meta: + verbose_name = _("Task") + verbose_name_plural = _("Tasks") + ordering = ["-date_created"] + indexes = [ + models.Index(fields=["status", "date_created"]), + models.Index(fields=["task_type", "status"]), + models.Index(fields=["owner", "acknowledged", "date_created"]), + ] + def __str__(self) -> str: - return f"Task {self.task_id}" + return f"{self.get_task_type_display()} [{self.task_id[:8]}]" + + @property + def is_complete(self) -> bool: + return self.status in ( + self.Status.SUCCESS, + self.Status.FAILURE, + self.Status.REVOKED, + ) + + @property + def related_document_ids(self) -> list[int]: + if not self.result_data: + return [] + if doc_id := self.result_data.get("document_id"): + return [doc_id] + if dup_id := self.result_data.get("duplicate_of"): + return [dup_id] + return [] class Note(SoftDeleteModel): diff --git a/src/documents/sanity_checker.py b/src/documents/sanity_checker.py index a6d00cd3a..ee05fe16c 100644 --- a/src/documents/sanity_checker.py +++ b/src/documents/sanity_checker.py @@ -18,7 +18,6 @@ from typing import TYPE_CHECKING from typing import Final from typing import TypedDict -from celery import states from django.conf import settings from django.utils import timezone @@ -303,13 +302,13 @@ def check_sanity( """ paperless_task = PaperlessTask.objects.create( task_id=uuid.uuid4(), - type=( - PaperlessTask.TaskType.SCHEDULED_TASK + trigger_source=( + PaperlessTask.TriggerSource.SCHEDULED if scheduled - else PaperlessTask.TaskType.MANUAL_TASK + else PaperlessTask.TriggerSource.MANUAL ), - task_name=PaperlessTask.TaskName.CHECK_SANITY, - status=states.STARTED, + task_type=PaperlessTask.TaskType.SANITY_CHECK, + status=PaperlessTask.Status.STARTED, date_created=timezone.now(), date_started=timezone.now(), ) @@ -332,9 +331,13 @@ def check_sanity( for extra_file in present_files: messages.warning(None, f"Orphaned file in media dir: {extra_file}") - paperless_task.status = states.SUCCESS if not messages.has_error else states.FAILURE + paperless_task.status = ( + PaperlessTask.Status.SUCCESS + if not messages.has_error + else PaperlessTask.Status.FAILURE + ) if messages.total_issue_count == 0: - paperless_task.result = "No issues found." + paperless_task.result_message = "No issues found." else: parts: list[str] = [] if messages.document_error_count: @@ -343,11 +346,11 @@ def check_sanity( parts.append(f"{messages.document_warning_count} document(s) with warnings") if messages.global_warning_count: parts.append(f"{messages.global_warning_count} global warning(s)") - paperless_task.result = ", ".join(parts) + " found." + paperless_task.result_message = ", ".join(parts) + " found." if messages.has_error: - paperless_task.result += " Check logs for details." + paperless_task.result_message += " Check logs for details." paperless_task.date_done = timezone.now() - paperless_task.save(update_fields=["status", "result", "date_done"]) + paperless_task.save(update_fields=["status", "result_message", "date_done"]) return messages diff --git a/src/documents/serialisers.py b/src/documents/serialisers.py index 1efbf6b7b..2125c3c84 100644 --- a/src/documents/serialisers.py +++ b/src/documents/serialisers.py @@ -12,7 +12,6 @@ from typing import Literal from typing import TypedDict import magic -from celery import states from django.conf import settings from django.contrib.auth.models import Group from django.contrib.auth.models import User @@ -2437,13 +2436,13 @@ class TasksViewSerializer(OwnedObjectSerializer): fields = ( "id", "task_id", - "task_name", - "task_file_name", + "task_type", + "trigger_source", "date_created", "date_done", - "type", "status", - "result", + "result_message", + "result_data", "acknowledged", "related_document", "duplicate_documents", @@ -2452,29 +2451,10 @@ class TasksViewSerializer(OwnedObjectSerializer): related_document = serializers.SerializerMethodField() duplicate_documents = serializers.SerializerMethodField() - created_doc_re = re.compile(r"New document id (\d+) created") - duplicate_doc_re = re.compile(r"It is a duplicate of .* \(#(\d+)\)") - def get_related_document(self, obj) -> str | None: - result = None - re = None - if obj.result: - match obj.status: - case states.SUCCESS: - re = self.created_doc_re - case states.FAILURE: - re = ( - self.duplicate_doc_re - if "existing document is in the trash" not in obj.result - else None - ) - if re is not None: - try: - result = re.search(obj.result).group(1) - except Exception: - pass - - return result + def get_related_document(self, obj) -> int | None: + doc_ids = obj.related_document_ids + return doc_ids[0] if doc_ids else None @extend_schema_field(DuplicateDocumentSummarySerializer(many=True)) def get_duplicate_documents(self, obj): @@ -2489,9 +2469,9 @@ class TasksViewSerializer(OwnedObjectSerializer): class RunTaskViewSerializer(serializers.Serializer[dict[str, Any]]): - task_name = serializers.ChoiceField( - choices=PaperlessTask.TaskName.choices, - label="Task Name", + task_type = serializers.ChoiceField( + choices=PaperlessTask.TaskType.choices, + label="Task Type", write_only=True, ) diff --git a/src/documents/signals/handlers.py b/src/documents/signals/handlers.py index dd49a718c..7cb83ccc8 100644 --- a/src/documents/signals/handlers.py +++ b/src/documents/signals/handlers.py @@ -1021,12 +1021,11 @@ def before_task_publish_handler(sender=None, headers=None, body=None, **kwargs) user_id = overrides.owner_id if overrides else None PaperlessTask.objects.create( - type=PaperlessTask.TaskType.AUTO, + trigger_source=PaperlessTask.TriggerSource.FOLDER_CONSUME, task_id=headers["id"], - status=states.PENDING, - task_file_name=task_file_name, - task_name=PaperlessTask.TaskName.CONSUME_FILE, - result=None, + status=PaperlessTask.Status.PENDING, + input_data={"filename": task_file_name}, + task_type=PaperlessTask.TaskType.CONSUME_FILE, date_created=timezone.now(), date_started=None, date_done=None, @@ -1052,7 +1051,7 @@ def task_prerun_handler(sender=None, task_id=None, task=None, **kwargs) -> None: task_instance = PaperlessTask.objects.filter(task_id=task_id).first() if task_instance is not None: - task_instance.status = states.STARTED + task_instance.status = PaperlessTask.Status.STARTED task_instance.date_started = timezone.now() task_instance.save() except Exception: # pragma: no cover @@ -1080,8 +1079,19 @@ def task_postrun_handler( task_instance = PaperlessTask.objects.filter(task_id=task_id).first() if task_instance is not None: - task_instance.status = state or states.FAILURE - task_instance.result = retval + _CELERY_STATE_MAP = { + states.SUCCESS: PaperlessTask.Status.SUCCESS, + states.FAILURE: PaperlessTask.Status.FAILURE, + states.REVOKED: PaperlessTask.Status.REVOKED, + states.STARTED: PaperlessTask.Status.STARTED, + states.PENDING: PaperlessTask.Status.PENDING, + } + task_instance.status = _CELERY_STATE_MAP.get( + state, + PaperlessTask.Status.FAILURE, + ) + if isinstance(retval, str): + task_instance.result_message = retval task_instance.date_done = timezone.now() task_instance.save() except Exception: # pragma: no cover @@ -1108,9 +1118,9 @@ def task_failure_handler( close_old_connections() task_instance = PaperlessTask.objects.filter(task_id=task_id).first() - if task_instance is not None and task_instance.result is None: - task_instance.status = states.FAILURE - task_instance.result = traceback + if task_instance is not None and task_instance.result_message is None: + task_instance.status = PaperlessTask.Status.FAILURE + task_instance.result_message = str(traceback) if traceback else None task_instance.date_done = timezone.now() task_instance.save() except Exception: # pragma: no cover diff --git a/src/documents/tasks.py b/src/documents/tasks.py index 57c819492..f94596e17 100644 --- a/src/documents/tasks.py +++ b/src/documents/tasks.py @@ -10,7 +10,6 @@ from tempfile import mkstemp from celery import Task from celery import shared_task -from celery import states from django.conf import settings from django.contrib.contenttypes.models import ContentType from django.db import models @@ -88,12 +87,12 @@ def train_classifier( status_callback: Callable[[str], None] | None = None, ) -> None: task = PaperlessTask.objects.create( - type=PaperlessTask.TaskType.SCHEDULED_TASK + trigger_source=PaperlessTask.TriggerSource.SCHEDULED if scheduled - else PaperlessTask.TaskType.MANUAL_TASK, + else PaperlessTask.TriggerSource.MANUAL, task_id=uuid.uuid4(), - task_name=PaperlessTask.TaskName.TRAIN_CLASSIFIER, - status=states.STARTED, + task_type=PaperlessTask.TaskType.TRAIN_CLASSIFIER, + status=PaperlessTask.Status.STARTED, date_created=timezone.now(), date_started=timezone.now(), ) @@ -110,8 +109,8 @@ def train_classifier( if settings.MODEL_FILE.exists(): logger.info(f"Removing {settings.MODEL_FILE} so it won't be used") settings.MODEL_FILE.unlink() - task.status = states.SUCCESS - task.result = result + task.status = PaperlessTask.Status.SUCCESS + task.result_message = result task.date_done = timezone.now() task.save() return @@ -127,20 +126,20 @@ def train_classifier( f"Saving updated classifier model to {settings.MODEL_FILE}...", ) classifier.save() - task.result = "Training completed successfully" + task.result_message = "Training completed successfully" else: logger.debug("Training data unchanged.") - task.result = "Training data unchanged" + task.result_message = "Training data unchanged" - task.status = states.SUCCESS + task.status = PaperlessTask.Status.SUCCESS except Exception as e: logger.warning("Classifier error: " + str(e)) - task.status = states.FAILURE - task.result = str(e) + task.status = PaperlessTask.Status.FAILURE + task.result_message = str(e) task.date_done = timezone.now() - task.save(update_fields=["status", "result", "date_done"]) + task.save(update_fields=["status", "result_message", "date_done"]) @shared_task(bind=True) @@ -642,14 +641,14 @@ def llmindex_index( ai_config = AIConfig() if ai_config.llm_index_enabled: task = PaperlessTask.objects.create( - type=PaperlessTask.TaskType.SCHEDULED_TASK + trigger_source=PaperlessTask.TriggerSource.SCHEDULED if scheduled - else PaperlessTask.TaskType.AUTO + else PaperlessTask.TriggerSource.SYSTEM if auto - else PaperlessTask.TaskType.MANUAL_TASK, + else PaperlessTask.TriggerSource.MANUAL, task_id=uuid.uuid4(), - task_name=PaperlessTask.TaskName.LLMINDEX_UPDATE, - status=states.STARTED, + task_type=PaperlessTask.TaskType.LLM_INDEX, + status=PaperlessTask.Status.STARTED, date_created=timezone.now(), date_started=timezone.now(), ) @@ -660,15 +659,15 @@ def llmindex_index( iter_wrapper=iter_wrapper, rebuild=rebuild, ) - task.status = states.SUCCESS - task.result = result + task.status = PaperlessTask.Status.SUCCESS + task.result_message = result except Exception as e: logger.error("LLM index error: " + str(e)) - task.status = states.FAILURE - task.result = str(e) + task.status = PaperlessTask.Status.FAILURE + task.result_message = str(e) task.date_done = timezone.now() - task.save(update_fields=["status", "result", "date_done"]) + task.save(update_fields=["status", "result_message", "date_done"]) else: logger.info("LLM index is disabled, skipping update.") diff --git a/src/documents/views.py b/src/documents/views.py index c57e43b35..2aea9aba4 100644 --- a/src/documents/views.py +++ b/src/documents/views.py @@ -20,7 +20,6 @@ from urllib.parse import urlparse import httpx import magic import pathvalidate -from celery import states from django.conf import settings from django.contrib.auth.models import Group from django.contrib.auth.models import User @@ -3777,16 +3776,16 @@ class TasksViewSet(ReadOnlyModelViewSet[PaperlessTask]): filterset_class = PaperlessTaskFilterSet TASK_AND_ARGS_BY_NAME = { - PaperlessTask.TaskName.INDEX_OPTIMIZE: (index_optimize, {}), - PaperlessTask.TaskName.TRAIN_CLASSIFIER: ( + PaperlessTask.TaskType.INDEX_OPTIMIZE: (index_optimize, {}), + PaperlessTask.TaskType.TRAIN_CLASSIFIER: ( train_classifier, {"scheduled": False}, ), - PaperlessTask.TaskName.CHECK_SANITY: ( + PaperlessTask.TaskType.SANITY_CHECK: ( sanity_check, {"scheduled": False, "raise_on_error": False}, ), - PaperlessTask.TaskName.LLMINDEX_UPDATE: ( + PaperlessTask.TaskType.LLM_INDEX: ( llmindex_index, {"scheduled": False, "rebuild": False}, ), @@ -3824,13 +3823,13 @@ class TasksViewSet(ReadOnlyModelViewSet[PaperlessTask]): def run(self, request): serializer = RunTaskViewSerializer(data=request.data) serializer.is_valid(raise_exception=True) - task_name = serializer.validated_data.get("task_name") + task_type = serializer.validated_data.get("task_type") if not request.user.is_superuser: return HttpResponseForbidden("Insufficient permissions") try: - task_func, task_args = self.TASK_AND_ARGS_BY_NAME[task_name] + task_func, task_args = self.TASK_AND_ARGS_BY_NAME[task_type] result = task_func(**task_args) return Response({"result": result}) except Exception as e: @@ -4466,11 +4465,11 @@ class SystemStatusView(PassUserMixin): last_trained_task = ( PaperlessTask.objects.filter( - task_name=PaperlessTask.TaskName.TRAIN_CLASSIFIER, + task_type=PaperlessTask.TaskType.TRAIN_CLASSIFIER, status__in=[ - states.SUCCESS, - states.FAILURE, - states.REVOKED, + PaperlessTask.Status.SUCCESS, + PaperlessTask.Status.FAILURE, + PaperlessTask.Status.REVOKED, ], # ignore running tasks ) .order_by("-date_done") @@ -4481,20 +4480,23 @@ class SystemStatusView(PassUserMixin): if last_trained_task is None: classifier_status = "WARNING" classifier_error = "No classifier training tasks found" - elif last_trained_task and last_trained_task.status != states.SUCCESS: + elif ( + last_trained_task + and last_trained_task.status != PaperlessTask.Status.SUCCESS + ): classifier_status = "ERROR" - classifier_error = last_trained_task.result + classifier_error = last_trained_task.result_message classifier_last_trained = ( last_trained_task.date_done if last_trained_task else None ) last_sanity_check = ( PaperlessTask.objects.filter( - task_name=PaperlessTask.TaskName.CHECK_SANITY, + task_type=PaperlessTask.TaskType.SANITY_CHECK, status__in=[ - states.SUCCESS, - states.FAILURE, - states.REVOKED, + PaperlessTask.Status.SUCCESS, + PaperlessTask.Status.FAILURE, + PaperlessTask.Status.REVOKED, ], # ignore running tasks ) .order_by("-date_done") @@ -4505,9 +4507,12 @@ class SystemStatusView(PassUserMixin): if last_sanity_check is None: sanity_check_status = "WARNING" sanity_check_error = "No sanity check tasks found" - elif last_sanity_check and last_sanity_check.status != states.SUCCESS: + elif ( + last_sanity_check + and last_sanity_check.status != PaperlessTask.Status.SUCCESS + ): sanity_check_status = "ERROR" - sanity_check_error = last_sanity_check.result + sanity_check_error = last_sanity_check.result_message sanity_check_last_run = ( last_sanity_check.date_done if last_sanity_check else None ) @@ -4520,7 +4525,7 @@ class SystemStatusView(PassUserMixin): else: last_llmindex_update = ( PaperlessTask.objects.filter( - task_name=PaperlessTask.TaskName.LLMINDEX_UPDATE, + task_type=PaperlessTask.TaskType.LLM_INDEX, ) .order_by("-date_done") .first() @@ -4530,9 +4535,12 @@ class SystemStatusView(PassUserMixin): if last_llmindex_update is None: llmindex_status = "WARNING" llmindex_error = "No LLM index update tasks found" - elif last_llmindex_update and last_llmindex_update.status == states.FAILURE: + elif ( + last_llmindex_update + and last_llmindex_update.status == PaperlessTask.Status.FAILURE + ): llmindex_status = "ERROR" - llmindex_error = last_llmindex_update.result + llmindex_error = last_llmindex_update.result_message llmindex_last_modified = ( last_llmindex_update.date_done if last_llmindex_update else None ) diff --git a/src/paperless_ai/indexing.py b/src/paperless_ai/indexing.py index a54492f1f..d9b1a7f90 100644 --- a/src/paperless_ai/indexing.py +++ b/src/paperless_ai/indexing.py @@ -4,7 +4,6 @@ from datetime import timedelta from pathlib import Path from typing import TYPE_CHECKING -from celery import states from django.conf import settings from django.utils import timezone @@ -28,11 +27,11 @@ def queue_llm_index_update_if_needed(*, rebuild: bool, reason: str) -> bool: from documents.tasks import llmindex_index has_running = PaperlessTask.objects.filter( - task_name=PaperlessTask.TaskName.LLMINDEX_UPDATE, - status__in=[states.PENDING, states.STARTED], + task_type=PaperlessTask.TaskType.LLM_INDEX, + status__in=[PaperlessTask.Status.PENDING, PaperlessTask.Status.STARTED], ).exists() has_recent = PaperlessTask.objects.filter( - task_name=PaperlessTask.TaskName.LLMINDEX_UPDATE, + task_type=PaperlessTask.TaskType.LLM_INDEX, date_created__gte=(timezone.now() - timedelta(minutes=5)), ).exists() if has_running or has_recent: