mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-04-16 04:58:53 +00:00
feat(tasks): replace PaperlessTask model with structured redesign
Drop the old string-based PaperlessTask table and recreate it with Status/TaskType/TriggerSource enums, JSONField result storage, and duration tracking fields. Update all call sites to use the new API. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -144,18 +144,19 @@ class StoragePathAdmin(GuardedModelAdmin):
|
||||
|
||||
|
||||
class TaskAdmin(admin.ModelAdmin):
|
||||
list_display = ("task_id", "task_file_name", "task_name", "date_done", "status")
|
||||
list_filter = ("status", "date_done", "task_name")
|
||||
search_fields = ("task_name", "task_id", "status", "task_file_name")
|
||||
list_display = ("task_id", "task_type", "trigger_source", "date_done", "status")
|
||||
list_filter = ("status", "date_done", "task_type")
|
||||
search_fields = ("task_type", "task_id", "status", "trigger_source")
|
||||
readonly_fields = (
|
||||
"task_id",
|
||||
"task_file_name",
|
||||
"task_name",
|
||||
"task_type",
|
||||
"trigger_source",
|
||||
"status",
|
||||
"date_created",
|
||||
"date_started",
|
||||
"date_done",
|
||||
"result",
|
||||
"result_message",
|
||||
"result_data",
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -870,8 +870,8 @@ class PaperlessTaskFilterSet(FilterSet):
|
||||
class Meta:
|
||||
model = PaperlessTask
|
||||
fields = {
|
||||
"type": ["exact"],
|
||||
"task_name": ["exact"],
|
||||
"task_type": ["exact"],
|
||||
"trigger_source": ["exact"],
|
||||
"status": ["exact"],
|
||||
}
|
||||
|
||||
|
||||
213
src/documents/migrations/0019_task_system_redesign.py
Normal file
213
src/documents/migrations/0019_task_system_redesign.py
Normal file
@@ -0,0 +1,213 @@
|
||||
"""
|
||||
Drop and recreate the PaperlessTask table with the new structured schema.
|
||||
|
||||
We intentionally drop all existing task data -- the old schema was
|
||||
string-based and incompatible with the new JSONField result storage.
|
||||
"""
|
||||
|
||||
import django.db.models.deletion
|
||||
import django.utils.timezone
|
||||
from django.conf import settings
|
||||
from django.db import migrations
|
||||
from django.db import models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
("documents", "0018_saved_view_simple_search_rules"),
|
||||
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.DeleteModel(name="PaperlessTask"),
|
||||
migrations.CreateModel(
|
||||
name="PaperlessTask",
|
||||
fields=[
|
||||
(
|
||||
"id",
|
||||
models.AutoField(
|
||||
auto_created=True,
|
||||
primary_key=True,
|
||||
serialize=False,
|
||||
verbose_name="ID",
|
||||
),
|
||||
),
|
||||
(
|
||||
"owner",
|
||||
models.ForeignKey(
|
||||
blank=True,
|
||||
default=None,
|
||||
null=True,
|
||||
on_delete=django.db.models.deletion.SET_NULL,
|
||||
to=settings.AUTH_USER_MODEL,
|
||||
verbose_name="owner",
|
||||
),
|
||||
),
|
||||
(
|
||||
"task_id",
|
||||
models.CharField(
|
||||
help_text="Celery task ID",
|
||||
max_length=255,
|
||||
unique=True,
|
||||
verbose_name="Task ID",
|
||||
),
|
||||
),
|
||||
(
|
||||
"task_type",
|
||||
models.CharField(
|
||||
choices=[
|
||||
("consume_file", "Consume File"),
|
||||
("train_classifier", "Train Classifier"),
|
||||
("sanity_check", "Sanity Check"),
|
||||
("index_optimize", "Index Optimize"),
|
||||
("index_rebuild", "Index Rebuild"),
|
||||
("mail_fetch", "Mail Fetch"),
|
||||
("llm_index", "LLM Index"),
|
||||
],
|
||||
db_index=True,
|
||||
help_text="The kind of work being performed",
|
||||
max_length=50,
|
||||
verbose_name="Task Type",
|
||||
),
|
||||
),
|
||||
(
|
||||
"trigger_source",
|
||||
models.CharField(
|
||||
choices=[
|
||||
("scheduled", "Scheduled"),
|
||||
("web_ui", "Web UI"),
|
||||
("api_upload", "API Upload"),
|
||||
("folder_consume", "Folder Consume"),
|
||||
("email_consume", "Email Consume"),
|
||||
("system", "System"),
|
||||
("manual", "Manual"),
|
||||
],
|
||||
db_index=True,
|
||||
help_text="What initiated this task",
|
||||
max_length=50,
|
||||
verbose_name="Trigger Source",
|
||||
),
|
||||
),
|
||||
(
|
||||
"status",
|
||||
models.CharField(
|
||||
choices=[
|
||||
("pending", "Pending"),
|
||||
("started", "Started"),
|
||||
("success", "Success"),
|
||||
("failure", "Failure"),
|
||||
("revoked", "Revoked"),
|
||||
],
|
||||
db_index=True,
|
||||
default="pending",
|
||||
max_length=30,
|
||||
verbose_name="Status",
|
||||
),
|
||||
),
|
||||
(
|
||||
"date_created",
|
||||
models.DateTimeField(
|
||||
db_index=True,
|
||||
default=django.utils.timezone.now,
|
||||
verbose_name="Created",
|
||||
),
|
||||
),
|
||||
(
|
||||
"date_started",
|
||||
models.DateTimeField(
|
||||
blank=True,
|
||||
null=True,
|
||||
verbose_name="Started",
|
||||
),
|
||||
),
|
||||
(
|
||||
"date_done",
|
||||
models.DateTimeField(
|
||||
blank=True,
|
||||
db_index=True,
|
||||
null=True,
|
||||
verbose_name="Completed",
|
||||
),
|
||||
),
|
||||
(
|
||||
"duration_seconds",
|
||||
models.FloatField(
|
||||
blank=True,
|
||||
help_text="Elapsed time from start to completion",
|
||||
null=True,
|
||||
verbose_name="Duration (seconds)",
|
||||
),
|
||||
),
|
||||
(
|
||||
"wait_time_seconds",
|
||||
models.FloatField(
|
||||
blank=True,
|
||||
help_text="Time from task creation to worker pickup",
|
||||
null=True,
|
||||
verbose_name="Wait Time (seconds)",
|
||||
),
|
||||
),
|
||||
(
|
||||
"input_data",
|
||||
models.JSONField(
|
||||
blank=True,
|
||||
default=dict,
|
||||
help_text="Structured input parameters for the task",
|
||||
verbose_name="Input Data",
|
||||
),
|
||||
),
|
||||
(
|
||||
"result_data",
|
||||
models.JSONField(
|
||||
blank=True,
|
||||
help_text="Structured result data from task execution",
|
||||
null=True,
|
||||
verbose_name="Result Data",
|
||||
),
|
||||
),
|
||||
(
|
||||
"result_message",
|
||||
models.TextField(
|
||||
blank=True,
|
||||
help_text="Human-readable result message",
|
||||
null=True,
|
||||
verbose_name="Result Message",
|
||||
),
|
||||
),
|
||||
(
|
||||
"acknowledged",
|
||||
models.BooleanField(
|
||||
db_index=True,
|
||||
default=False,
|
||||
verbose_name="Acknowledged",
|
||||
),
|
||||
),
|
||||
],
|
||||
options={
|
||||
"verbose_name": "Task",
|
||||
"verbose_name_plural": "Tasks",
|
||||
"ordering": ["-date_created"],
|
||||
},
|
||||
),
|
||||
migrations.AddIndex(
|
||||
model_name="paperlesstask",
|
||||
index=models.Index(
|
||||
fields=["status", "date_created"],
|
||||
name="documents_p_status_8aa687_idx",
|
||||
),
|
||||
),
|
||||
migrations.AddIndex(
|
||||
model_name="paperlesstask",
|
||||
index=models.Index(
|
||||
fields=["task_type", "status"],
|
||||
name="documents_p_task_ty_e4a93f_idx",
|
||||
),
|
||||
),
|
||||
migrations.AddIndex(
|
||||
model_name="paperlesstask",
|
||||
index=models.Index(
|
||||
fields=["owner", "acknowledged", "date_created"],
|
||||
name="documents_p_owner_i_62c545_idx",
|
||||
),
|
||||
),
|
||||
]
|
||||
@@ -3,7 +3,6 @@ from pathlib import Path
|
||||
from typing import Final
|
||||
|
||||
import pathvalidate
|
||||
from celery import states
|
||||
from django.conf import settings
|
||||
from django.contrib.auth.models import Group
|
||||
from django.contrib.auth.models import User
|
||||
@@ -663,97 +662,170 @@ class UiSettings(models.Model):
|
||||
|
||||
|
||||
class PaperlessTask(ModelWithOwner):
|
||||
ALL_STATES = sorted(states.ALL_STATES)
|
||||
TASK_STATE_CHOICES = sorted(zip(ALL_STATES, ALL_STATES))
|
||||
"""
|
||||
Tracks background task execution for user visibility and debugging.
|
||||
|
||||
State transitions:
|
||||
PENDING -> STARTED -> SUCCESS
|
||||
PENDING -> STARTED -> FAILURE
|
||||
PENDING -> REVOKED (if cancelled before starting)
|
||||
"""
|
||||
|
||||
class Status(models.TextChoices):
|
||||
PENDING = "pending", _("Pending")
|
||||
STARTED = "started", _("Started")
|
||||
SUCCESS = "success", _("Success")
|
||||
FAILURE = "failure", _("Failure")
|
||||
REVOKED = "revoked", _("Revoked")
|
||||
|
||||
class TaskType(models.TextChoices):
|
||||
AUTO = ("auto_task", _("Auto Task"))
|
||||
SCHEDULED_TASK = ("scheduled_task", _("Scheduled Task"))
|
||||
MANUAL_TASK = ("manual_task", _("Manual Task"))
|
||||
CONSUME_FILE = "consume_file", _("Consume File")
|
||||
TRAIN_CLASSIFIER = "train_classifier", _("Train Classifier")
|
||||
SANITY_CHECK = "sanity_check", _("Sanity Check")
|
||||
INDEX_OPTIMIZE = "index_optimize", _("Index Optimize")
|
||||
INDEX_REBUILD = "index_rebuild", _("Index Rebuild")
|
||||
MAIL_FETCH = "mail_fetch", _("Mail Fetch")
|
||||
LLM_INDEX = "llm_index", _("LLM Index")
|
||||
|
||||
class TaskName(models.TextChoices):
|
||||
CONSUME_FILE = ("consume_file", _("Consume File"))
|
||||
TRAIN_CLASSIFIER = ("train_classifier", _("Train Classifier"))
|
||||
CHECK_SANITY = ("check_sanity", _("Check Sanity"))
|
||||
INDEX_OPTIMIZE = ("index_optimize", _("Index Optimize"))
|
||||
LLMINDEX_UPDATE = ("llmindex_update", _("LLM Index Update"))
|
||||
class TriggerSource(models.TextChoices):
|
||||
SCHEDULED = "scheduled", _("Scheduled") # Celery beat
|
||||
WEB_UI = "web_ui", _("Web UI") # Document uploaded via web
|
||||
API_UPLOAD = "api_upload", _("API Upload") # Document uploaded via API
|
||||
FOLDER_CONSUME = "folder_consume", _("Folder Consume") # Consume folder
|
||||
EMAIL_CONSUME = "email_consume", _("Email Consume") # Email attachment
|
||||
SYSTEM = (
|
||||
"system",
|
||||
_("System"),
|
||||
) # Auto-triggered by system (self-heal, config side-effect)
|
||||
MANUAL = "manual", _("Manual") # User explicitly ran via /api/tasks/run/
|
||||
|
||||
# Identification
|
||||
task_id = models.CharField(
|
||||
max_length=255,
|
||||
unique=True,
|
||||
verbose_name=_("Task ID"),
|
||||
help_text=_("Celery ID for the Task that was run"),
|
||||
help_text=_("Celery task ID"),
|
||||
)
|
||||
|
||||
acknowledged = models.BooleanField(
|
||||
default=False,
|
||||
verbose_name=_("Acknowledged"),
|
||||
help_text=_("If the task is acknowledged via the frontend or API"),
|
||||
task_type = models.CharField(
|
||||
max_length=50,
|
||||
choices=TaskType.choices,
|
||||
verbose_name=_("Task Type"),
|
||||
help_text=_("The kind of work being performed"),
|
||||
db_index=True,
|
||||
)
|
||||
|
||||
task_file_name = models.CharField(
|
||||
null=True,
|
||||
max_length=255,
|
||||
verbose_name=_("Task Filename"),
|
||||
help_text=_("Name of the file which the Task was run for"),
|
||||
)
|
||||
|
||||
task_name = models.CharField(
|
||||
null=True,
|
||||
max_length=255,
|
||||
choices=TaskName.choices,
|
||||
verbose_name=_("Task Name"),
|
||||
help_text=_("Name of the task that was run"),
|
||||
trigger_source = models.CharField(
|
||||
max_length=50,
|
||||
choices=TriggerSource.choices,
|
||||
verbose_name=_("Trigger Source"),
|
||||
help_text=_("What initiated this task"),
|
||||
db_index=True,
|
||||
)
|
||||
|
||||
# State tracking
|
||||
status = models.CharField(
|
||||
max_length=30,
|
||||
default=states.PENDING,
|
||||
choices=TASK_STATE_CHOICES,
|
||||
verbose_name=_("Task State"),
|
||||
help_text=_("Current state of the task being run"),
|
||||
choices=Status.choices,
|
||||
default=Status.PENDING,
|
||||
verbose_name=_("Status"),
|
||||
db_index=True,
|
||||
)
|
||||
|
||||
# Timestamps
|
||||
date_created = models.DateTimeField(
|
||||
null=True,
|
||||
default=timezone.now,
|
||||
verbose_name=_("Created DateTime"),
|
||||
help_text=_("Datetime field when the task result was created in UTC"),
|
||||
verbose_name=_("Created"),
|
||||
db_index=True,
|
||||
)
|
||||
|
||||
date_started = models.DateTimeField(
|
||||
null=True,
|
||||
default=None,
|
||||
verbose_name=_("Started DateTime"),
|
||||
help_text=_("Datetime field when the task was started in UTC"),
|
||||
blank=True,
|
||||
verbose_name=_("Started"),
|
||||
)
|
||||
|
||||
date_done = models.DateTimeField(
|
||||
null=True,
|
||||
default=None,
|
||||
verbose_name=_("Completed DateTime"),
|
||||
help_text=_("Datetime field when the task was completed in UTC"),
|
||||
blank=True,
|
||||
verbose_name=_("Completed"),
|
||||
db_index=True,
|
||||
)
|
||||
|
||||
result = models.TextField(
|
||||
# Duration fields -- populated by task_postrun signal handler
|
||||
duration_seconds = models.FloatField(
|
||||
null=True,
|
||||
default=None,
|
||||
verbose_name=_("Result Data"),
|
||||
help_text=_(
|
||||
"The data returned by the task",
|
||||
),
|
||||
blank=True,
|
||||
verbose_name=_("Duration (seconds)"),
|
||||
help_text=_("Elapsed time from start to completion"),
|
||||
)
|
||||
|
||||
type = models.CharField(
|
||||
max_length=30,
|
||||
choices=TaskType.choices,
|
||||
default=TaskType.AUTO,
|
||||
verbose_name=_("Task Type"),
|
||||
help_text=_("The type of task that was run"),
|
||||
wait_time_seconds = models.FloatField(
|
||||
null=True,
|
||||
blank=True,
|
||||
verbose_name=_("Wait Time (seconds)"),
|
||||
help_text=_("Time from task creation to worker pickup"),
|
||||
)
|
||||
|
||||
# Input/Output data
|
||||
input_data = models.JSONField(
|
||||
default=dict,
|
||||
blank=True,
|
||||
verbose_name=_("Input Data"),
|
||||
help_text=_("Structured input parameters for the task"),
|
||||
)
|
||||
|
||||
result_data = models.JSONField(
|
||||
null=True,
|
||||
blank=True,
|
||||
verbose_name=_("Result Data"),
|
||||
help_text=_("Structured result data from task execution"),
|
||||
)
|
||||
|
||||
result_message = models.TextField(
|
||||
null=True,
|
||||
blank=True,
|
||||
verbose_name=_("Result Message"),
|
||||
help_text=_("Human-readable result message"),
|
||||
)
|
||||
|
||||
# Acknowledgment
|
||||
acknowledged = models.BooleanField(
|
||||
default=False,
|
||||
verbose_name=_("Acknowledged"),
|
||||
db_index=True,
|
||||
)
|
||||
|
||||
class Meta:
|
||||
verbose_name = _("Task")
|
||||
verbose_name_plural = _("Tasks")
|
||||
ordering = ["-date_created"]
|
||||
indexes = [
|
||||
models.Index(fields=["status", "date_created"]),
|
||||
models.Index(fields=["task_type", "status"]),
|
||||
models.Index(fields=["owner", "acknowledged", "date_created"]),
|
||||
]
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"Task {self.task_id}"
|
||||
return f"{self.get_task_type_display()} [{self.task_id[:8]}]"
|
||||
|
||||
@property
|
||||
def is_complete(self) -> bool:
|
||||
return self.status in (
|
||||
self.Status.SUCCESS,
|
||||
self.Status.FAILURE,
|
||||
self.Status.REVOKED,
|
||||
)
|
||||
|
||||
@property
|
||||
def related_document_ids(self) -> list[int]:
|
||||
if not self.result_data:
|
||||
return []
|
||||
if doc_id := self.result_data.get("document_id"):
|
||||
return [doc_id]
|
||||
if dup_id := self.result_data.get("duplicate_of"):
|
||||
return [dup_id]
|
||||
return []
|
||||
|
||||
|
||||
class Note(SoftDeleteModel):
|
||||
|
||||
@@ -18,7 +18,6 @@ from typing import TYPE_CHECKING
|
||||
from typing import Final
|
||||
from typing import TypedDict
|
||||
|
||||
from celery import states
|
||||
from django.conf import settings
|
||||
from django.utils import timezone
|
||||
|
||||
@@ -303,13 +302,13 @@ def check_sanity(
|
||||
"""
|
||||
paperless_task = PaperlessTask.objects.create(
|
||||
task_id=uuid.uuid4(),
|
||||
type=(
|
||||
PaperlessTask.TaskType.SCHEDULED_TASK
|
||||
trigger_source=(
|
||||
PaperlessTask.TriggerSource.SCHEDULED
|
||||
if scheduled
|
||||
else PaperlessTask.TaskType.MANUAL_TASK
|
||||
else PaperlessTask.TriggerSource.MANUAL
|
||||
),
|
||||
task_name=PaperlessTask.TaskName.CHECK_SANITY,
|
||||
status=states.STARTED,
|
||||
task_type=PaperlessTask.TaskType.SANITY_CHECK,
|
||||
status=PaperlessTask.Status.STARTED,
|
||||
date_created=timezone.now(),
|
||||
date_started=timezone.now(),
|
||||
)
|
||||
@@ -332,9 +331,13 @@ def check_sanity(
|
||||
for extra_file in present_files:
|
||||
messages.warning(None, f"Orphaned file in media dir: {extra_file}")
|
||||
|
||||
paperless_task.status = states.SUCCESS if not messages.has_error else states.FAILURE
|
||||
paperless_task.status = (
|
||||
PaperlessTask.Status.SUCCESS
|
||||
if not messages.has_error
|
||||
else PaperlessTask.Status.FAILURE
|
||||
)
|
||||
if messages.total_issue_count == 0:
|
||||
paperless_task.result = "No issues found."
|
||||
paperless_task.result_message = "No issues found."
|
||||
else:
|
||||
parts: list[str] = []
|
||||
if messages.document_error_count:
|
||||
@@ -343,11 +346,11 @@ def check_sanity(
|
||||
parts.append(f"{messages.document_warning_count} document(s) with warnings")
|
||||
if messages.global_warning_count:
|
||||
parts.append(f"{messages.global_warning_count} global warning(s)")
|
||||
paperless_task.result = ", ".join(parts) + " found."
|
||||
paperless_task.result_message = ", ".join(parts) + " found."
|
||||
if messages.has_error:
|
||||
paperless_task.result += " Check logs for details."
|
||||
paperless_task.result_message += " Check logs for details."
|
||||
|
||||
paperless_task.date_done = timezone.now()
|
||||
paperless_task.save(update_fields=["status", "result", "date_done"])
|
||||
paperless_task.save(update_fields=["status", "result_message", "date_done"])
|
||||
|
||||
return messages
|
||||
|
||||
@@ -12,7 +12,6 @@ from typing import Literal
|
||||
from typing import TypedDict
|
||||
|
||||
import magic
|
||||
from celery import states
|
||||
from django.conf import settings
|
||||
from django.contrib.auth.models import Group
|
||||
from django.contrib.auth.models import User
|
||||
@@ -2437,13 +2436,13 @@ class TasksViewSerializer(OwnedObjectSerializer):
|
||||
fields = (
|
||||
"id",
|
||||
"task_id",
|
||||
"task_name",
|
||||
"task_file_name",
|
||||
"task_type",
|
||||
"trigger_source",
|
||||
"date_created",
|
||||
"date_done",
|
||||
"type",
|
||||
"status",
|
||||
"result",
|
||||
"result_message",
|
||||
"result_data",
|
||||
"acknowledged",
|
||||
"related_document",
|
||||
"duplicate_documents",
|
||||
@@ -2452,29 +2451,10 @@ class TasksViewSerializer(OwnedObjectSerializer):
|
||||
|
||||
related_document = serializers.SerializerMethodField()
|
||||
duplicate_documents = serializers.SerializerMethodField()
|
||||
created_doc_re = re.compile(r"New document id (\d+) created")
|
||||
duplicate_doc_re = re.compile(r"It is a duplicate of .* \(#(\d+)\)")
|
||||
|
||||
def get_related_document(self, obj) -> str | None:
|
||||
result = None
|
||||
re = None
|
||||
if obj.result:
|
||||
match obj.status:
|
||||
case states.SUCCESS:
|
||||
re = self.created_doc_re
|
||||
case states.FAILURE:
|
||||
re = (
|
||||
self.duplicate_doc_re
|
||||
if "existing document is in the trash" not in obj.result
|
||||
else None
|
||||
)
|
||||
if re is not None:
|
||||
try:
|
||||
result = re.search(obj.result).group(1)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return result
|
||||
def get_related_document(self, obj) -> int | None:
|
||||
doc_ids = obj.related_document_ids
|
||||
return doc_ids[0] if doc_ids else None
|
||||
|
||||
@extend_schema_field(DuplicateDocumentSummarySerializer(many=True))
|
||||
def get_duplicate_documents(self, obj):
|
||||
@@ -2489,9 +2469,9 @@ class TasksViewSerializer(OwnedObjectSerializer):
|
||||
|
||||
|
||||
class RunTaskViewSerializer(serializers.Serializer[dict[str, Any]]):
|
||||
task_name = serializers.ChoiceField(
|
||||
choices=PaperlessTask.TaskName.choices,
|
||||
label="Task Name",
|
||||
task_type = serializers.ChoiceField(
|
||||
choices=PaperlessTask.TaskType.choices,
|
||||
label="Task Type",
|
||||
write_only=True,
|
||||
)
|
||||
|
||||
|
||||
@@ -1021,12 +1021,11 @@ def before_task_publish_handler(sender=None, headers=None, body=None, **kwargs)
|
||||
user_id = overrides.owner_id if overrides else None
|
||||
|
||||
PaperlessTask.objects.create(
|
||||
type=PaperlessTask.TaskType.AUTO,
|
||||
trigger_source=PaperlessTask.TriggerSource.FOLDER_CONSUME,
|
||||
task_id=headers["id"],
|
||||
status=states.PENDING,
|
||||
task_file_name=task_file_name,
|
||||
task_name=PaperlessTask.TaskName.CONSUME_FILE,
|
||||
result=None,
|
||||
status=PaperlessTask.Status.PENDING,
|
||||
input_data={"filename": task_file_name},
|
||||
task_type=PaperlessTask.TaskType.CONSUME_FILE,
|
||||
date_created=timezone.now(),
|
||||
date_started=None,
|
||||
date_done=None,
|
||||
@@ -1052,7 +1051,7 @@ def task_prerun_handler(sender=None, task_id=None, task=None, **kwargs) -> None:
|
||||
task_instance = PaperlessTask.objects.filter(task_id=task_id).first()
|
||||
|
||||
if task_instance is not None:
|
||||
task_instance.status = states.STARTED
|
||||
task_instance.status = PaperlessTask.Status.STARTED
|
||||
task_instance.date_started = timezone.now()
|
||||
task_instance.save()
|
||||
except Exception: # pragma: no cover
|
||||
@@ -1080,8 +1079,19 @@ def task_postrun_handler(
|
||||
task_instance = PaperlessTask.objects.filter(task_id=task_id).first()
|
||||
|
||||
if task_instance is not None:
|
||||
task_instance.status = state or states.FAILURE
|
||||
task_instance.result = retval
|
||||
_CELERY_STATE_MAP = {
|
||||
states.SUCCESS: PaperlessTask.Status.SUCCESS,
|
||||
states.FAILURE: PaperlessTask.Status.FAILURE,
|
||||
states.REVOKED: PaperlessTask.Status.REVOKED,
|
||||
states.STARTED: PaperlessTask.Status.STARTED,
|
||||
states.PENDING: PaperlessTask.Status.PENDING,
|
||||
}
|
||||
task_instance.status = _CELERY_STATE_MAP.get(
|
||||
state,
|
||||
PaperlessTask.Status.FAILURE,
|
||||
)
|
||||
if isinstance(retval, str):
|
||||
task_instance.result_message = retval
|
||||
task_instance.date_done = timezone.now()
|
||||
task_instance.save()
|
||||
except Exception: # pragma: no cover
|
||||
@@ -1108,9 +1118,9 @@ def task_failure_handler(
|
||||
close_old_connections()
|
||||
task_instance = PaperlessTask.objects.filter(task_id=task_id).first()
|
||||
|
||||
if task_instance is not None and task_instance.result is None:
|
||||
task_instance.status = states.FAILURE
|
||||
task_instance.result = traceback
|
||||
if task_instance is not None and task_instance.result_message is None:
|
||||
task_instance.status = PaperlessTask.Status.FAILURE
|
||||
task_instance.result_message = str(traceback) if traceback else None
|
||||
task_instance.date_done = timezone.now()
|
||||
task_instance.save()
|
||||
except Exception: # pragma: no cover
|
||||
|
||||
@@ -10,7 +10,6 @@ from tempfile import mkstemp
|
||||
|
||||
from celery import Task
|
||||
from celery import shared_task
|
||||
from celery import states
|
||||
from django.conf import settings
|
||||
from django.contrib.contenttypes.models import ContentType
|
||||
from django.db import models
|
||||
@@ -88,12 +87,12 @@ def train_classifier(
|
||||
status_callback: Callable[[str], None] | None = None,
|
||||
) -> None:
|
||||
task = PaperlessTask.objects.create(
|
||||
type=PaperlessTask.TaskType.SCHEDULED_TASK
|
||||
trigger_source=PaperlessTask.TriggerSource.SCHEDULED
|
||||
if scheduled
|
||||
else PaperlessTask.TaskType.MANUAL_TASK,
|
||||
else PaperlessTask.TriggerSource.MANUAL,
|
||||
task_id=uuid.uuid4(),
|
||||
task_name=PaperlessTask.TaskName.TRAIN_CLASSIFIER,
|
||||
status=states.STARTED,
|
||||
task_type=PaperlessTask.TaskType.TRAIN_CLASSIFIER,
|
||||
status=PaperlessTask.Status.STARTED,
|
||||
date_created=timezone.now(),
|
||||
date_started=timezone.now(),
|
||||
)
|
||||
@@ -110,8 +109,8 @@ def train_classifier(
|
||||
if settings.MODEL_FILE.exists():
|
||||
logger.info(f"Removing {settings.MODEL_FILE} so it won't be used")
|
||||
settings.MODEL_FILE.unlink()
|
||||
task.status = states.SUCCESS
|
||||
task.result = result
|
||||
task.status = PaperlessTask.Status.SUCCESS
|
||||
task.result_message = result
|
||||
task.date_done = timezone.now()
|
||||
task.save()
|
||||
return
|
||||
@@ -127,20 +126,20 @@ def train_classifier(
|
||||
f"Saving updated classifier model to {settings.MODEL_FILE}...",
|
||||
)
|
||||
classifier.save()
|
||||
task.result = "Training completed successfully"
|
||||
task.result_message = "Training completed successfully"
|
||||
else:
|
||||
logger.debug("Training data unchanged.")
|
||||
task.result = "Training data unchanged"
|
||||
task.result_message = "Training data unchanged"
|
||||
|
||||
task.status = states.SUCCESS
|
||||
task.status = PaperlessTask.Status.SUCCESS
|
||||
|
||||
except Exception as e:
|
||||
logger.warning("Classifier error: " + str(e))
|
||||
task.status = states.FAILURE
|
||||
task.result = str(e)
|
||||
task.status = PaperlessTask.Status.FAILURE
|
||||
task.result_message = str(e)
|
||||
|
||||
task.date_done = timezone.now()
|
||||
task.save(update_fields=["status", "result", "date_done"])
|
||||
task.save(update_fields=["status", "result_message", "date_done"])
|
||||
|
||||
|
||||
@shared_task(bind=True)
|
||||
@@ -642,14 +641,14 @@ def llmindex_index(
|
||||
ai_config = AIConfig()
|
||||
if ai_config.llm_index_enabled:
|
||||
task = PaperlessTask.objects.create(
|
||||
type=PaperlessTask.TaskType.SCHEDULED_TASK
|
||||
trigger_source=PaperlessTask.TriggerSource.SCHEDULED
|
||||
if scheduled
|
||||
else PaperlessTask.TaskType.AUTO
|
||||
else PaperlessTask.TriggerSource.SYSTEM
|
||||
if auto
|
||||
else PaperlessTask.TaskType.MANUAL_TASK,
|
||||
else PaperlessTask.TriggerSource.MANUAL,
|
||||
task_id=uuid.uuid4(),
|
||||
task_name=PaperlessTask.TaskName.LLMINDEX_UPDATE,
|
||||
status=states.STARTED,
|
||||
task_type=PaperlessTask.TaskType.LLM_INDEX,
|
||||
status=PaperlessTask.Status.STARTED,
|
||||
date_created=timezone.now(),
|
||||
date_started=timezone.now(),
|
||||
)
|
||||
@@ -660,15 +659,15 @@ def llmindex_index(
|
||||
iter_wrapper=iter_wrapper,
|
||||
rebuild=rebuild,
|
||||
)
|
||||
task.status = states.SUCCESS
|
||||
task.result = result
|
||||
task.status = PaperlessTask.Status.SUCCESS
|
||||
task.result_message = result
|
||||
except Exception as e:
|
||||
logger.error("LLM index error: " + str(e))
|
||||
task.status = states.FAILURE
|
||||
task.result = str(e)
|
||||
task.status = PaperlessTask.Status.FAILURE
|
||||
task.result_message = str(e)
|
||||
|
||||
task.date_done = timezone.now()
|
||||
task.save(update_fields=["status", "result", "date_done"])
|
||||
task.save(update_fields=["status", "result_message", "date_done"])
|
||||
else:
|
||||
logger.info("LLM index is disabled, skipping update.")
|
||||
|
||||
|
||||
@@ -20,7 +20,6 @@ from urllib.parse import urlparse
|
||||
import httpx
|
||||
import magic
|
||||
import pathvalidate
|
||||
from celery import states
|
||||
from django.conf import settings
|
||||
from django.contrib.auth.models import Group
|
||||
from django.contrib.auth.models import User
|
||||
@@ -3777,16 +3776,16 @@ class TasksViewSet(ReadOnlyModelViewSet[PaperlessTask]):
|
||||
filterset_class = PaperlessTaskFilterSet
|
||||
|
||||
TASK_AND_ARGS_BY_NAME = {
|
||||
PaperlessTask.TaskName.INDEX_OPTIMIZE: (index_optimize, {}),
|
||||
PaperlessTask.TaskName.TRAIN_CLASSIFIER: (
|
||||
PaperlessTask.TaskType.INDEX_OPTIMIZE: (index_optimize, {}),
|
||||
PaperlessTask.TaskType.TRAIN_CLASSIFIER: (
|
||||
train_classifier,
|
||||
{"scheduled": False},
|
||||
),
|
||||
PaperlessTask.TaskName.CHECK_SANITY: (
|
||||
PaperlessTask.TaskType.SANITY_CHECK: (
|
||||
sanity_check,
|
||||
{"scheduled": False, "raise_on_error": False},
|
||||
),
|
||||
PaperlessTask.TaskName.LLMINDEX_UPDATE: (
|
||||
PaperlessTask.TaskType.LLM_INDEX: (
|
||||
llmindex_index,
|
||||
{"scheduled": False, "rebuild": False},
|
||||
),
|
||||
@@ -3824,13 +3823,13 @@ class TasksViewSet(ReadOnlyModelViewSet[PaperlessTask]):
|
||||
def run(self, request):
|
||||
serializer = RunTaskViewSerializer(data=request.data)
|
||||
serializer.is_valid(raise_exception=True)
|
||||
task_name = serializer.validated_data.get("task_name")
|
||||
task_type = serializer.validated_data.get("task_type")
|
||||
|
||||
if not request.user.is_superuser:
|
||||
return HttpResponseForbidden("Insufficient permissions")
|
||||
|
||||
try:
|
||||
task_func, task_args = self.TASK_AND_ARGS_BY_NAME[task_name]
|
||||
task_func, task_args = self.TASK_AND_ARGS_BY_NAME[task_type]
|
||||
result = task_func(**task_args)
|
||||
return Response({"result": result})
|
||||
except Exception as e:
|
||||
@@ -4466,11 +4465,11 @@ class SystemStatusView(PassUserMixin):
|
||||
|
||||
last_trained_task = (
|
||||
PaperlessTask.objects.filter(
|
||||
task_name=PaperlessTask.TaskName.TRAIN_CLASSIFIER,
|
||||
task_type=PaperlessTask.TaskType.TRAIN_CLASSIFIER,
|
||||
status__in=[
|
||||
states.SUCCESS,
|
||||
states.FAILURE,
|
||||
states.REVOKED,
|
||||
PaperlessTask.Status.SUCCESS,
|
||||
PaperlessTask.Status.FAILURE,
|
||||
PaperlessTask.Status.REVOKED,
|
||||
], # ignore running tasks
|
||||
)
|
||||
.order_by("-date_done")
|
||||
@@ -4481,20 +4480,23 @@ class SystemStatusView(PassUserMixin):
|
||||
if last_trained_task is None:
|
||||
classifier_status = "WARNING"
|
||||
classifier_error = "No classifier training tasks found"
|
||||
elif last_trained_task and last_trained_task.status != states.SUCCESS:
|
||||
elif (
|
||||
last_trained_task
|
||||
and last_trained_task.status != PaperlessTask.Status.SUCCESS
|
||||
):
|
||||
classifier_status = "ERROR"
|
||||
classifier_error = last_trained_task.result
|
||||
classifier_error = last_trained_task.result_message
|
||||
classifier_last_trained = (
|
||||
last_trained_task.date_done if last_trained_task else None
|
||||
)
|
||||
|
||||
last_sanity_check = (
|
||||
PaperlessTask.objects.filter(
|
||||
task_name=PaperlessTask.TaskName.CHECK_SANITY,
|
||||
task_type=PaperlessTask.TaskType.SANITY_CHECK,
|
||||
status__in=[
|
||||
states.SUCCESS,
|
||||
states.FAILURE,
|
||||
states.REVOKED,
|
||||
PaperlessTask.Status.SUCCESS,
|
||||
PaperlessTask.Status.FAILURE,
|
||||
PaperlessTask.Status.REVOKED,
|
||||
], # ignore running tasks
|
||||
)
|
||||
.order_by("-date_done")
|
||||
@@ -4505,9 +4507,12 @@ class SystemStatusView(PassUserMixin):
|
||||
if last_sanity_check is None:
|
||||
sanity_check_status = "WARNING"
|
||||
sanity_check_error = "No sanity check tasks found"
|
||||
elif last_sanity_check and last_sanity_check.status != states.SUCCESS:
|
||||
elif (
|
||||
last_sanity_check
|
||||
and last_sanity_check.status != PaperlessTask.Status.SUCCESS
|
||||
):
|
||||
sanity_check_status = "ERROR"
|
||||
sanity_check_error = last_sanity_check.result
|
||||
sanity_check_error = last_sanity_check.result_message
|
||||
sanity_check_last_run = (
|
||||
last_sanity_check.date_done if last_sanity_check else None
|
||||
)
|
||||
@@ -4520,7 +4525,7 @@ class SystemStatusView(PassUserMixin):
|
||||
else:
|
||||
last_llmindex_update = (
|
||||
PaperlessTask.objects.filter(
|
||||
task_name=PaperlessTask.TaskName.LLMINDEX_UPDATE,
|
||||
task_type=PaperlessTask.TaskType.LLM_INDEX,
|
||||
)
|
||||
.order_by("-date_done")
|
||||
.first()
|
||||
@@ -4530,9 +4535,12 @@ class SystemStatusView(PassUserMixin):
|
||||
if last_llmindex_update is None:
|
||||
llmindex_status = "WARNING"
|
||||
llmindex_error = "No LLM index update tasks found"
|
||||
elif last_llmindex_update and last_llmindex_update.status == states.FAILURE:
|
||||
elif (
|
||||
last_llmindex_update
|
||||
and last_llmindex_update.status == PaperlessTask.Status.FAILURE
|
||||
):
|
||||
llmindex_status = "ERROR"
|
||||
llmindex_error = last_llmindex_update.result
|
||||
llmindex_error = last_llmindex_update.result_message
|
||||
llmindex_last_modified = (
|
||||
last_llmindex_update.date_done if last_llmindex_update else None
|
||||
)
|
||||
|
||||
@@ -4,7 +4,6 @@ from datetime import timedelta
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from celery import states
|
||||
from django.conf import settings
|
||||
from django.utils import timezone
|
||||
|
||||
@@ -28,11 +27,11 @@ def queue_llm_index_update_if_needed(*, rebuild: bool, reason: str) -> bool:
|
||||
from documents.tasks import llmindex_index
|
||||
|
||||
has_running = PaperlessTask.objects.filter(
|
||||
task_name=PaperlessTask.TaskName.LLMINDEX_UPDATE,
|
||||
status__in=[states.PENDING, states.STARTED],
|
||||
task_type=PaperlessTask.TaskType.LLM_INDEX,
|
||||
status__in=[PaperlessTask.Status.PENDING, PaperlessTask.Status.STARTED],
|
||||
).exists()
|
||||
has_recent = PaperlessTask.objects.filter(
|
||||
task_name=PaperlessTask.TaskName.LLMINDEX_UPDATE,
|
||||
task_type=PaperlessTask.TaskType.LLM_INDEX,
|
||||
date_created__gte=(timezone.now() - timedelta(minutes=5)),
|
||||
).exists()
|
||||
if has_running or has_recent:
|
||||
|
||||
Reference in New Issue
Block a user