feat(tasks): replace PaperlessTask model with structured redesign

Drop the old string-based PaperlessTask table and recreate it with
Status/TaskType/TriggerSource enums, JSONField result storage, and
duration tracking fields. Update all call sites to use the new API.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
stumpylog
2026-04-15 13:59:01 -07:00
parent dc06b679d3
commit ef4e3d31ef
10 changed files with 450 additions and 165 deletions
+7 -6
View File
@@ -144,18 +144,19 @@ class StoragePathAdmin(GuardedModelAdmin):
class TaskAdmin(admin.ModelAdmin): class TaskAdmin(admin.ModelAdmin):
list_display = ("task_id", "task_file_name", "task_name", "date_done", "status") list_display = ("task_id", "task_type", "trigger_source", "date_done", "status")
list_filter = ("status", "date_done", "task_name") list_filter = ("status", "date_done", "task_type")
search_fields = ("task_name", "task_id", "status", "task_file_name") search_fields = ("task_type", "task_id", "status", "trigger_source")
readonly_fields = ( readonly_fields = (
"task_id", "task_id",
"task_file_name", "task_type",
"task_name", "trigger_source",
"status", "status",
"date_created", "date_created",
"date_started", "date_started",
"date_done", "date_done",
"result", "result_message",
"result_data",
) )
+2 -2
View File
@@ -870,8 +870,8 @@ class PaperlessTaskFilterSet(FilterSet):
class Meta: class Meta:
model = PaperlessTask model = PaperlessTask
fields = { fields = {
"type": ["exact"], "task_type": ["exact"],
"task_name": ["exact"], "trigger_source": ["exact"],
"status": ["exact"], "status": ["exact"],
} }
@@ -0,0 +1,213 @@
"""
Drop and recreate the PaperlessTask table with the new structured schema.
We intentionally drop all existing task data -- the old schema was
string-based and incompatible with the new JSONField result storage.
"""
import django.db.models.deletion
import django.utils.timezone
from django.conf import settings
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "0018_saved_view_simple_search_rules"),
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
]
operations = [
migrations.DeleteModel(name="PaperlessTask"),
migrations.CreateModel(
name="PaperlessTask",
fields=[
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
(
"owner",
models.ForeignKey(
blank=True,
default=None,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
to=settings.AUTH_USER_MODEL,
verbose_name="owner",
),
),
(
"task_id",
models.CharField(
help_text="Celery task ID",
max_length=255,
unique=True,
verbose_name="Task ID",
),
),
(
"task_type",
models.CharField(
choices=[
("consume_file", "Consume File"),
("train_classifier", "Train Classifier"),
("sanity_check", "Sanity Check"),
("index_optimize", "Index Optimize"),
("index_rebuild", "Index Rebuild"),
("mail_fetch", "Mail Fetch"),
("llm_index", "LLM Index"),
],
db_index=True,
help_text="The kind of work being performed",
max_length=50,
verbose_name="Task Type",
),
),
(
"trigger_source",
models.CharField(
choices=[
("scheduled", "Scheduled"),
("web_ui", "Web UI"),
("api_upload", "API Upload"),
("folder_consume", "Folder Consume"),
("email_consume", "Email Consume"),
("system", "System"),
("manual", "Manual"),
],
db_index=True,
help_text="What initiated this task",
max_length=50,
verbose_name="Trigger Source",
),
),
(
"status",
models.CharField(
choices=[
("pending", "Pending"),
("started", "Started"),
("success", "Success"),
("failure", "Failure"),
("revoked", "Revoked"),
],
db_index=True,
default="pending",
max_length=30,
verbose_name="Status",
),
),
(
"date_created",
models.DateTimeField(
db_index=True,
default=django.utils.timezone.now,
verbose_name="Created",
),
),
(
"date_started",
models.DateTimeField(
blank=True,
null=True,
verbose_name="Started",
),
),
(
"date_done",
models.DateTimeField(
blank=True,
db_index=True,
null=True,
verbose_name="Completed",
),
),
(
"duration_seconds",
models.FloatField(
blank=True,
help_text="Elapsed time from start to completion",
null=True,
verbose_name="Duration (seconds)",
),
),
(
"wait_time_seconds",
models.FloatField(
blank=True,
help_text="Time from task creation to worker pickup",
null=True,
verbose_name="Wait Time (seconds)",
),
),
(
"input_data",
models.JSONField(
blank=True,
default=dict,
help_text="Structured input parameters for the task",
verbose_name="Input Data",
),
),
(
"result_data",
models.JSONField(
blank=True,
help_text="Structured result data from task execution",
null=True,
verbose_name="Result Data",
),
),
(
"result_message",
models.TextField(
blank=True,
help_text="Human-readable result message",
null=True,
verbose_name="Result Message",
),
),
(
"acknowledged",
models.BooleanField(
db_index=True,
default=False,
verbose_name="Acknowledged",
),
),
],
options={
"verbose_name": "Task",
"verbose_name_plural": "Tasks",
"ordering": ["-date_created"],
},
),
migrations.AddIndex(
model_name="paperlesstask",
index=models.Index(
fields=["status", "date_created"],
name="documents_p_status_8aa687_idx",
),
),
migrations.AddIndex(
model_name="paperlesstask",
index=models.Index(
fields=["task_type", "status"],
name="documents_p_task_ty_e4a93f_idx",
),
),
migrations.AddIndex(
model_name="paperlesstask",
index=models.Index(
fields=["owner", "acknowledged", "date_created"],
name="documents_p_owner_i_62c545_idx",
),
),
]
+128 -56
View File
@@ -3,7 +3,6 @@ from pathlib import Path
from typing import Final from typing import Final
import pathvalidate import pathvalidate
from celery import states
from django.conf import settings from django.conf import settings
from django.contrib.auth.models import Group from django.contrib.auth.models import Group
from django.contrib.auth.models import User from django.contrib.auth.models import User
@@ -663,97 +662,170 @@ class UiSettings(models.Model):
class PaperlessTask(ModelWithOwner): class PaperlessTask(ModelWithOwner):
ALL_STATES = sorted(states.ALL_STATES) """
TASK_STATE_CHOICES = sorted(zip(ALL_STATES, ALL_STATES)) Tracks background task execution for user visibility and debugging.
State transitions:
PENDING -> STARTED -> SUCCESS
PENDING -> STARTED -> FAILURE
PENDING -> REVOKED (if cancelled before starting)
"""
class Status(models.TextChoices):
PENDING = "pending", _("Pending")
STARTED = "started", _("Started")
SUCCESS = "success", _("Success")
FAILURE = "failure", _("Failure")
REVOKED = "revoked", _("Revoked")
class TaskType(models.TextChoices): class TaskType(models.TextChoices):
AUTO = ("auto_task", _("Auto Task")) CONSUME_FILE = "consume_file", _("Consume File")
SCHEDULED_TASK = ("scheduled_task", _("Scheduled Task")) TRAIN_CLASSIFIER = "train_classifier", _("Train Classifier")
MANUAL_TASK = ("manual_task", _("Manual Task")) SANITY_CHECK = "sanity_check", _("Sanity Check")
INDEX_OPTIMIZE = "index_optimize", _("Index Optimize")
INDEX_REBUILD = "index_rebuild", _("Index Rebuild")
MAIL_FETCH = "mail_fetch", _("Mail Fetch")
LLM_INDEX = "llm_index", _("LLM Index")
class TaskName(models.TextChoices): class TriggerSource(models.TextChoices):
CONSUME_FILE = ("consume_file", _("Consume File")) SCHEDULED = "scheduled", _("Scheduled") # Celery beat
TRAIN_CLASSIFIER = ("train_classifier", _("Train Classifier")) WEB_UI = "web_ui", _("Web UI") # Document uploaded via web
CHECK_SANITY = ("check_sanity", _("Check Sanity")) API_UPLOAD = "api_upload", _("API Upload") # Document uploaded via API
INDEX_OPTIMIZE = ("index_optimize", _("Index Optimize")) FOLDER_CONSUME = "folder_consume", _("Folder Consume") # Consume folder
LLMINDEX_UPDATE = ("llmindex_update", _("LLM Index Update")) EMAIL_CONSUME = "email_consume", _("Email Consume") # Email attachment
SYSTEM = (
"system",
_("System"),
) # Auto-triggered by system (self-heal, config side-effect)
MANUAL = "manual", _("Manual") # User explicitly ran via /api/tasks/run/
# Identification
task_id = models.CharField( task_id = models.CharField(
max_length=255, max_length=255,
unique=True, unique=True,
verbose_name=_("Task ID"), verbose_name=_("Task ID"),
help_text=_("Celery ID for the Task that was run"), help_text=_("Celery task ID"),
) )
acknowledged = models.BooleanField( task_type = models.CharField(
default=False, max_length=50,
verbose_name=_("Acknowledged"), choices=TaskType.choices,
help_text=_("If the task is acknowledged via the frontend or API"), verbose_name=_("Task Type"),
help_text=_("The kind of work being performed"),
db_index=True,
) )
task_file_name = models.CharField( trigger_source = models.CharField(
null=True, max_length=50,
max_length=255, choices=TriggerSource.choices,
verbose_name=_("Task Filename"), verbose_name=_("Trigger Source"),
help_text=_("Name of the file which the Task was run for"), help_text=_("What initiated this task"),
) db_index=True,
task_name = models.CharField(
null=True,
max_length=255,
choices=TaskName.choices,
verbose_name=_("Task Name"),
help_text=_("Name of the task that was run"),
) )
# State tracking
status = models.CharField( status = models.CharField(
max_length=30, max_length=30,
default=states.PENDING, choices=Status.choices,
choices=TASK_STATE_CHOICES, default=Status.PENDING,
verbose_name=_("Task State"), verbose_name=_("Status"),
help_text=_("Current state of the task being run"), db_index=True,
) )
# Timestamps
date_created = models.DateTimeField( date_created = models.DateTimeField(
null=True,
default=timezone.now, default=timezone.now,
verbose_name=_("Created DateTime"), verbose_name=_("Created"),
help_text=_("Datetime field when the task result was created in UTC"), db_index=True,
) )
date_started = models.DateTimeField( date_started = models.DateTimeField(
null=True, null=True,
default=None, blank=True,
verbose_name=_("Started DateTime"), verbose_name=_("Started"),
help_text=_("Datetime field when the task was started in UTC"),
) )
date_done = models.DateTimeField( date_done = models.DateTimeField(
null=True, null=True,
default=None, blank=True,
verbose_name=_("Completed DateTime"), verbose_name=_("Completed"),
help_text=_("Datetime field when the task was completed in UTC"), db_index=True,
) )
result = models.TextField( # Duration fields -- populated by task_postrun signal handler
duration_seconds = models.FloatField(
null=True, null=True,
default=None, blank=True,
verbose_name=_("Result Data"), verbose_name=_("Duration (seconds)"),
help_text=_( help_text=_("Elapsed time from start to completion"),
"The data returned by the task",
),
) )
type = models.CharField( wait_time_seconds = models.FloatField(
max_length=30, null=True,
choices=TaskType.choices, blank=True,
default=TaskType.AUTO, verbose_name=_("Wait Time (seconds)"),
verbose_name=_("Task Type"), help_text=_("Time from task creation to worker pickup"),
help_text=_("The type of task that was run"),
) )
# Input/Output data
input_data = models.JSONField(
default=dict,
blank=True,
verbose_name=_("Input Data"),
help_text=_("Structured input parameters for the task"),
)
result_data = models.JSONField(
null=True,
blank=True,
verbose_name=_("Result Data"),
help_text=_("Structured result data from task execution"),
)
result_message = models.TextField(
null=True,
blank=True,
verbose_name=_("Result Message"),
help_text=_("Human-readable result message"),
)
# Acknowledgment
acknowledged = models.BooleanField(
default=False,
verbose_name=_("Acknowledged"),
db_index=True,
)
class Meta:
verbose_name = _("Task")
verbose_name_plural = _("Tasks")
ordering = ["-date_created"]
indexes = [
models.Index(fields=["status", "date_created"]),
models.Index(fields=["task_type", "status"]),
models.Index(fields=["owner", "acknowledged", "date_created"]),
]
def __str__(self) -> str: def __str__(self) -> str:
return f"Task {self.task_id}" return f"{self.get_task_type_display()} [{self.task_id[:8]}]"
@property
def is_complete(self) -> bool:
return self.status in (
self.Status.SUCCESS,
self.Status.FAILURE,
self.Status.REVOKED,
)
@property
def related_document_ids(self) -> list[int]:
if not self.result_data:
return []
if doc_id := self.result_data.get("document_id"):
return [doc_id]
if dup_id := self.result_data.get("duplicate_of"):
return [dup_id]
return []
class Note(SoftDeleteModel): class Note(SoftDeleteModel):
+14 -11
View File
@@ -18,7 +18,6 @@ from typing import TYPE_CHECKING
from typing import Final from typing import Final
from typing import TypedDict from typing import TypedDict
from celery import states
from django.conf import settings from django.conf import settings
from django.utils import timezone from django.utils import timezone
@@ -303,13 +302,13 @@ def check_sanity(
""" """
paperless_task = PaperlessTask.objects.create( paperless_task = PaperlessTask.objects.create(
task_id=uuid.uuid4(), task_id=uuid.uuid4(),
type=( trigger_source=(
PaperlessTask.TaskType.SCHEDULED_TASK PaperlessTask.TriggerSource.SCHEDULED
if scheduled if scheduled
else PaperlessTask.TaskType.MANUAL_TASK else PaperlessTask.TriggerSource.MANUAL
), ),
task_name=PaperlessTask.TaskName.CHECK_SANITY, task_type=PaperlessTask.TaskType.SANITY_CHECK,
status=states.STARTED, status=PaperlessTask.Status.STARTED,
date_created=timezone.now(), date_created=timezone.now(),
date_started=timezone.now(), date_started=timezone.now(),
) )
@@ -332,9 +331,13 @@ def check_sanity(
for extra_file in present_files: for extra_file in present_files:
messages.warning(None, f"Orphaned file in media dir: {extra_file}") messages.warning(None, f"Orphaned file in media dir: {extra_file}")
paperless_task.status = states.SUCCESS if not messages.has_error else states.FAILURE paperless_task.status = (
PaperlessTask.Status.SUCCESS
if not messages.has_error
else PaperlessTask.Status.FAILURE
)
if messages.total_issue_count == 0: if messages.total_issue_count == 0:
paperless_task.result = "No issues found." paperless_task.result_message = "No issues found."
else: else:
parts: list[str] = [] parts: list[str] = []
if messages.document_error_count: if messages.document_error_count:
@@ -343,11 +346,11 @@ def check_sanity(
parts.append(f"{messages.document_warning_count} document(s) with warnings") parts.append(f"{messages.document_warning_count} document(s) with warnings")
if messages.global_warning_count: if messages.global_warning_count:
parts.append(f"{messages.global_warning_count} global warning(s)") parts.append(f"{messages.global_warning_count} global warning(s)")
paperless_task.result = ", ".join(parts) + " found." paperless_task.result_message = ", ".join(parts) + " found."
if messages.has_error: if messages.has_error:
paperless_task.result += " Check logs for details." paperless_task.result_message += " Check logs for details."
paperless_task.date_done = timezone.now() paperless_task.date_done = timezone.now()
paperless_task.save(update_fields=["status", "result", "date_done"]) paperless_task.save(update_fields=["status", "result_message", "date_done"])
return messages return messages
+10 -30
View File
@@ -12,7 +12,6 @@ from typing import Literal
from typing import TypedDict from typing import TypedDict
import magic import magic
from celery import states
from django.conf import settings from django.conf import settings
from django.contrib.auth.models import Group from django.contrib.auth.models import Group
from django.contrib.auth.models import User from django.contrib.auth.models import User
@@ -2437,13 +2436,13 @@ class TasksViewSerializer(OwnedObjectSerializer):
fields = ( fields = (
"id", "id",
"task_id", "task_id",
"task_name", "task_type",
"task_file_name", "trigger_source",
"date_created", "date_created",
"date_done", "date_done",
"type",
"status", "status",
"result", "result_message",
"result_data",
"acknowledged", "acknowledged",
"related_document", "related_document",
"duplicate_documents", "duplicate_documents",
@@ -2452,29 +2451,10 @@ class TasksViewSerializer(OwnedObjectSerializer):
related_document = serializers.SerializerMethodField() related_document = serializers.SerializerMethodField()
duplicate_documents = serializers.SerializerMethodField() duplicate_documents = serializers.SerializerMethodField()
created_doc_re = re.compile(r"New document id (\d+) created")
duplicate_doc_re = re.compile(r"It is a duplicate of .* \(#(\d+)\)")
def get_related_document(self, obj) -> str | None: def get_related_document(self, obj) -> int | None:
result = None doc_ids = obj.related_document_ids
re = None return doc_ids[0] if doc_ids else None
if obj.result:
match obj.status:
case states.SUCCESS:
re = self.created_doc_re
case states.FAILURE:
re = (
self.duplicate_doc_re
if "existing document is in the trash" not in obj.result
else None
)
if re is not None:
try:
result = re.search(obj.result).group(1)
except Exception:
pass
return result
@extend_schema_field(DuplicateDocumentSummarySerializer(many=True)) @extend_schema_field(DuplicateDocumentSummarySerializer(many=True))
def get_duplicate_documents(self, obj): def get_duplicate_documents(self, obj):
@@ -2489,9 +2469,9 @@ class TasksViewSerializer(OwnedObjectSerializer):
class RunTaskViewSerializer(serializers.Serializer[dict[str, Any]]): class RunTaskViewSerializer(serializers.Serializer[dict[str, Any]]):
task_name = serializers.ChoiceField( task_type = serializers.ChoiceField(
choices=PaperlessTask.TaskName.choices, choices=PaperlessTask.TaskType.choices,
label="Task Name", label="Task Type",
write_only=True, write_only=True,
) )
+21 -11
View File
@@ -1021,12 +1021,11 @@ def before_task_publish_handler(sender=None, headers=None, body=None, **kwargs)
user_id = overrides.owner_id if overrides else None user_id = overrides.owner_id if overrides else None
PaperlessTask.objects.create( PaperlessTask.objects.create(
type=PaperlessTask.TaskType.AUTO, trigger_source=PaperlessTask.TriggerSource.FOLDER_CONSUME,
task_id=headers["id"], task_id=headers["id"],
status=states.PENDING, status=PaperlessTask.Status.PENDING,
task_file_name=task_file_name, input_data={"filename": task_file_name},
task_name=PaperlessTask.TaskName.CONSUME_FILE, task_type=PaperlessTask.TaskType.CONSUME_FILE,
result=None,
date_created=timezone.now(), date_created=timezone.now(),
date_started=None, date_started=None,
date_done=None, date_done=None,
@@ -1052,7 +1051,7 @@ def task_prerun_handler(sender=None, task_id=None, task=None, **kwargs) -> None:
task_instance = PaperlessTask.objects.filter(task_id=task_id).first() task_instance = PaperlessTask.objects.filter(task_id=task_id).first()
if task_instance is not None: if task_instance is not None:
task_instance.status = states.STARTED task_instance.status = PaperlessTask.Status.STARTED
task_instance.date_started = timezone.now() task_instance.date_started = timezone.now()
task_instance.save() task_instance.save()
except Exception: # pragma: no cover except Exception: # pragma: no cover
@@ -1080,8 +1079,19 @@ def task_postrun_handler(
task_instance = PaperlessTask.objects.filter(task_id=task_id).first() task_instance = PaperlessTask.objects.filter(task_id=task_id).first()
if task_instance is not None: if task_instance is not None:
task_instance.status = state or states.FAILURE _CELERY_STATE_MAP = {
task_instance.result = retval states.SUCCESS: PaperlessTask.Status.SUCCESS,
states.FAILURE: PaperlessTask.Status.FAILURE,
states.REVOKED: PaperlessTask.Status.REVOKED,
states.STARTED: PaperlessTask.Status.STARTED,
states.PENDING: PaperlessTask.Status.PENDING,
}
task_instance.status = _CELERY_STATE_MAP.get(
state,
PaperlessTask.Status.FAILURE,
)
if isinstance(retval, str):
task_instance.result_message = retval
task_instance.date_done = timezone.now() task_instance.date_done = timezone.now()
task_instance.save() task_instance.save()
except Exception: # pragma: no cover except Exception: # pragma: no cover
@@ -1108,9 +1118,9 @@ def task_failure_handler(
close_old_connections() close_old_connections()
task_instance = PaperlessTask.objects.filter(task_id=task_id).first() task_instance = PaperlessTask.objects.filter(task_id=task_id).first()
if task_instance is not None and task_instance.result is None: if task_instance is not None and task_instance.result_message is None:
task_instance.status = states.FAILURE task_instance.status = PaperlessTask.Status.FAILURE
task_instance.result = traceback task_instance.result_message = str(traceback) if traceback else None
task_instance.date_done = timezone.now() task_instance.date_done = timezone.now()
task_instance.save() task_instance.save()
except Exception: # pragma: no cover except Exception: # pragma: no cover
+22 -23
View File
@@ -10,7 +10,6 @@ from tempfile import mkstemp
from celery import Task from celery import Task
from celery import shared_task from celery import shared_task
from celery import states
from django.conf import settings from django.conf import settings
from django.contrib.contenttypes.models import ContentType from django.contrib.contenttypes.models import ContentType
from django.db import models from django.db import models
@@ -88,12 +87,12 @@ def train_classifier(
status_callback: Callable[[str], None] | None = None, status_callback: Callable[[str], None] | None = None,
) -> None: ) -> None:
task = PaperlessTask.objects.create( task = PaperlessTask.objects.create(
type=PaperlessTask.TaskType.SCHEDULED_TASK trigger_source=PaperlessTask.TriggerSource.SCHEDULED
if scheduled if scheduled
else PaperlessTask.TaskType.MANUAL_TASK, else PaperlessTask.TriggerSource.MANUAL,
task_id=uuid.uuid4(), task_id=uuid.uuid4(),
task_name=PaperlessTask.TaskName.TRAIN_CLASSIFIER, task_type=PaperlessTask.TaskType.TRAIN_CLASSIFIER,
status=states.STARTED, status=PaperlessTask.Status.STARTED,
date_created=timezone.now(), date_created=timezone.now(),
date_started=timezone.now(), date_started=timezone.now(),
) )
@@ -110,8 +109,8 @@ def train_classifier(
if settings.MODEL_FILE.exists(): if settings.MODEL_FILE.exists():
logger.info(f"Removing {settings.MODEL_FILE} so it won't be used") logger.info(f"Removing {settings.MODEL_FILE} so it won't be used")
settings.MODEL_FILE.unlink() settings.MODEL_FILE.unlink()
task.status = states.SUCCESS task.status = PaperlessTask.Status.SUCCESS
task.result = result task.result_message = result
task.date_done = timezone.now() task.date_done = timezone.now()
task.save() task.save()
return return
@@ -127,20 +126,20 @@ def train_classifier(
f"Saving updated classifier model to {settings.MODEL_FILE}...", f"Saving updated classifier model to {settings.MODEL_FILE}...",
) )
classifier.save() classifier.save()
task.result = "Training completed successfully" task.result_message = "Training completed successfully"
else: else:
logger.debug("Training data unchanged.") logger.debug("Training data unchanged.")
task.result = "Training data unchanged" task.result_message = "Training data unchanged"
task.status = states.SUCCESS task.status = PaperlessTask.Status.SUCCESS
except Exception as e: except Exception as e:
logger.warning("Classifier error: " + str(e)) logger.warning("Classifier error: " + str(e))
task.status = states.FAILURE task.status = PaperlessTask.Status.FAILURE
task.result = str(e) task.result_message = str(e)
task.date_done = timezone.now() task.date_done = timezone.now()
task.save(update_fields=["status", "result", "date_done"]) task.save(update_fields=["status", "result_message", "date_done"])
@shared_task(bind=True) @shared_task(bind=True)
@@ -642,14 +641,14 @@ def llmindex_index(
ai_config = AIConfig() ai_config = AIConfig()
if ai_config.llm_index_enabled: if ai_config.llm_index_enabled:
task = PaperlessTask.objects.create( task = PaperlessTask.objects.create(
type=PaperlessTask.TaskType.SCHEDULED_TASK trigger_source=PaperlessTask.TriggerSource.SCHEDULED
if scheduled if scheduled
else PaperlessTask.TaskType.AUTO else PaperlessTask.TriggerSource.SYSTEM
if auto if auto
else PaperlessTask.TaskType.MANUAL_TASK, else PaperlessTask.TriggerSource.MANUAL,
task_id=uuid.uuid4(), task_id=uuid.uuid4(),
task_name=PaperlessTask.TaskName.LLMINDEX_UPDATE, task_type=PaperlessTask.TaskType.LLM_INDEX,
status=states.STARTED, status=PaperlessTask.Status.STARTED,
date_created=timezone.now(), date_created=timezone.now(),
date_started=timezone.now(), date_started=timezone.now(),
) )
@@ -660,15 +659,15 @@ def llmindex_index(
iter_wrapper=iter_wrapper, iter_wrapper=iter_wrapper,
rebuild=rebuild, rebuild=rebuild,
) )
task.status = states.SUCCESS task.status = PaperlessTask.Status.SUCCESS
task.result = result task.result_message = result
except Exception as e: except Exception as e:
logger.error("LLM index error: " + str(e)) logger.error("LLM index error: " + str(e))
task.status = states.FAILURE task.status = PaperlessTask.Status.FAILURE
task.result = str(e) task.result_message = str(e)
task.date_done = timezone.now() task.date_done = timezone.now()
task.save(update_fields=["status", "result", "date_done"]) task.save(update_fields=["status", "result_message", "date_done"])
else: else:
logger.info("LLM index is disabled, skipping update.") logger.info("LLM index is disabled, skipping update.")
+30 -22
View File
@@ -20,7 +20,6 @@ from urllib.parse import urlparse
import httpx import httpx
import magic import magic
import pathvalidate import pathvalidate
from celery import states
from django.conf import settings from django.conf import settings
from django.contrib.auth.models import Group from django.contrib.auth.models import Group
from django.contrib.auth.models import User from django.contrib.auth.models import User
@@ -3777,16 +3776,16 @@ class TasksViewSet(ReadOnlyModelViewSet[PaperlessTask]):
filterset_class = PaperlessTaskFilterSet filterset_class = PaperlessTaskFilterSet
TASK_AND_ARGS_BY_NAME = { TASK_AND_ARGS_BY_NAME = {
PaperlessTask.TaskName.INDEX_OPTIMIZE: (index_optimize, {}), PaperlessTask.TaskType.INDEX_OPTIMIZE: (index_optimize, {}),
PaperlessTask.TaskName.TRAIN_CLASSIFIER: ( PaperlessTask.TaskType.TRAIN_CLASSIFIER: (
train_classifier, train_classifier,
{"scheduled": False}, {"scheduled": False},
), ),
PaperlessTask.TaskName.CHECK_SANITY: ( PaperlessTask.TaskType.SANITY_CHECK: (
sanity_check, sanity_check,
{"scheduled": False, "raise_on_error": False}, {"scheduled": False, "raise_on_error": False},
), ),
PaperlessTask.TaskName.LLMINDEX_UPDATE: ( PaperlessTask.TaskType.LLM_INDEX: (
llmindex_index, llmindex_index,
{"scheduled": False, "rebuild": False}, {"scheduled": False, "rebuild": False},
), ),
@@ -3824,13 +3823,13 @@ class TasksViewSet(ReadOnlyModelViewSet[PaperlessTask]):
def run(self, request): def run(self, request):
serializer = RunTaskViewSerializer(data=request.data) serializer = RunTaskViewSerializer(data=request.data)
serializer.is_valid(raise_exception=True) serializer.is_valid(raise_exception=True)
task_name = serializer.validated_data.get("task_name") task_type = serializer.validated_data.get("task_type")
if not request.user.is_superuser: if not request.user.is_superuser:
return HttpResponseForbidden("Insufficient permissions") return HttpResponseForbidden("Insufficient permissions")
try: try:
task_func, task_args = self.TASK_AND_ARGS_BY_NAME[task_name] task_func, task_args = self.TASK_AND_ARGS_BY_NAME[task_type]
result = task_func(**task_args) result = task_func(**task_args)
return Response({"result": result}) return Response({"result": result})
except Exception as e: except Exception as e:
@@ -4466,11 +4465,11 @@ class SystemStatusView(PassUserMixin):
last_trained_task = ( last_trained_task = (
PaperlessTask.objects.filter( PaperlessTask.objects.filter(
task_name=PaperlessTask.TaskName.TRAIN_CLASSIFIER, task_type=PaperlessTask.TaskType.TRAIN_CLASSIFIER,
status__in=[ status__in=[
states.SUCCESS, PaperlessTask.Status.SUCCESS,
states.FAILURE, PaperlessTask.Status.FAILURE,
states.REVOKED, PaperlessTask.Status.REVOKED,
], # ignore running tasks ], # ignore running tasks
) )
.order_by("-date_done") .order_by("-date_done")
@@ -4481,20 +4480,23 @@ class SystemStatusView(PassUserMixin):
if last_trained_task is None: if last_trained_task is None:
classifier_status = "WARNING" classifier_status = "WARNING"
classifier_error = "No classifier training tasks found" classifier_error = "No classifier training tasks found"
elif last_trained_task and last_trained_task.status != states.SUCCESS: elif (
last_trained_task
and last_trained_task.status != PaperlessTask.Status.SUCCESS
):
classifier_status = "ERROR" classifier_status = "ERROR"
classifier_error = last_trained_task.result classifier_error = last_trained_task.result_message
classifier_last_trained = ( classifier_last_trained = (
last_trained_task.date_done if last_trained_task else None last_trained_task.date_done if last_trained_task else None
) )
last_sanity_check = ( last_sanity_check = (
PaperlessTask.objects.filter( PaperlessTask.objects.filter(
task_name=PaperlessTask.TaskName.CHECK_SANITY, task_type=PaperlessTask.TaskType.SANITY_CHECK,
status__in=[ status__in=[
states.SUCCESS, PaperlessTask.Status.SUCCESS,
states.FAILURE, PaperlessTask.Status.FAILURE,
states.REVOKED, PaperlessTask.Status.REVOKED,
], # ignore running tasks ], # ignore running tasks
) )
.order_by("-date_done") .order_by("-date_done")
@@ -4505,9 +4507,12 @@ class SystemStatusView(PassUserMixin):
if last_sanity_check is None: if last_sanity_check is None:
sanity_check_status = "WARNING" sanity_check_status = "WARNING"
sanity_check_error = "No sanity check tasks found" sanity_check_error = "No sanity check tasks found"
elif last_sanity_check and last_sanity_check.status != states.SUCCESS: elif (
last_sanity_check
and last_sanity_check.status != PaperlessTask.Status.SUCCESS
):
sanity_check_status = "ERROR" sanity_check_status = "ERROR"
sanity_check_error = last_sanity_check.result sanity_check_error = last_sanity_check.result_message
sanity_check_last_run = ( sanity_check_last_run = (
last_sanity_check.date_done if last_sanity_check else None last_sanity_check.date_done if last_sanity_check else None
) )
@@ -4520,7 +4525,7 @@ class SystemStatusView(PassUserMixin):
else: else:
last_llmindex_update = ( last_llmindex_update = (
PaperlessTask.objects.filter( PaperlessTask.objects.filter(
task_name=PaperlessTask.TaskName.LLMINDEX_UPDATE, task_type=PaperlessTask.TaskType.LLM_INDEX,
) )
.order_by("-date_done") .order_by("-date_done")
.first() .first()
@@ -4530,9 +4535,12 @@ class SystemStatusView(PassUserMixin):
if last_llmindex_update is None: if last_llmindex_update is None:
llmindex_status = "WARNING" llmindex_status = "WARNING"
llmindex_error = "No LLM index update tasks found" llmindex_error = "No LLM index update tasks found"
elif last_llmindex_update and last_llmindex_update.status == states.FAILURE: elif (
last_llmindex_update
and last_llmindex_update.status == PaperlessTask.Status.FAILURE
):
llmindex_status = "ERROR" llmindex_status = "ERROR"
llmindex_error = last_llmindex_update.result llmindex_error = last_llmindex_update.result_message
llmindex_last_modified = ( llmindex_last_modified = (
last_llmindex_update.date_done if last_llmindex_update else None last_llmindex_update.date_done if last_llmindex_update else None
) )
+3 -4
View File
@@ -4,7 +4,6 @@ from datetime import timedelta
from pathlib import Path from pathlib import Path
from typing import TYPE_CHECKING from typing import TYPE_CHECKING
from celery import states
from django.conf import settings from django.conf import settings
from django.utils import timezone from django.utils import timezone
@@ -28,11 +27,11 @@ def queue_llm_index_update_if_needed(*, rebuild: bool, reason: str) -> bool:
from documents.tasks import llmindex_index from documents.tasks import llmindex_index
has_running = PaperlessTask.objects.filter( has_running = PaperlessTask.objects.filter(
task_name=PaperlessTask.TaskName.LLMINDEX_UPDATE, task_type=PaperlessTask.TaskType.LLM_INDEX,
status__in=[states.PENDING, states.STARTED], status__in=[PaperlessTask.Status.PENDING, PaperlessTask.Status.STARTED],
).exists() ).exists()
has_recent = PaperlessTask.objects.filter( has_recent = PaperlessTask.objects.filter(
task_name=PaperlessTask.TaskName.LLMINDEX_UPDATE, task_type=PaperlessTask.TaskType.LLM_INDEX,
date_created__gte=(timezone.now() - timedelta(minutes=5)), date_created__gte=(timezone.now() - timedelta(minutes=5)),
).exists() ).exists()
if has_running or has_recent: if has_running or has_recent: