feat(tasks): update serializer, filter, and viewset with v9 backwards compat

- Replace TasksViewSerializer/RunTaskViewSerializer with TaskSerializerV10
  (new field names), TaskSerializerV9 (v9 compat), TaskSummarySerializer,
  and RunTaskSerializer
- Add AcknowledgeTasksViewSerializer unchanged (kept existing validation)
- Expand PaperlessTaskFilterSet with MultipleChoiceFilter for task_type,
  trigger_source, status; add is_complete, date_created_after/before filters
- Replace TasksViewSet.get_serializer_class() to branch on request.version
- Add get_queryset() v9 compat for task_name/type query params
- Add acknowledge_all, summary, active actions to TasksViewSet
- Rewrite run action to use apply_async with trigger_source header
- Add timedelta import to views.py; add MultipleChoiceFilter/DateTimeFilter
  to filters.py imports

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
stumpylog
2026-04-15 13:59:01 -07:00
parent 868ddbc2fa
commit 39c7a04dd2
3 changed files with 296 additions and 58 deletions

View File

@@ -26,8 +26,10 @@ from django.db.models.functions import Cast
from django.utils.translation import gettext_lazy as _
from django_filters import DateFilter
from django_filters.rest_framework import BooleanFilter
from django_filters.rest_framework import DateTimeFilter
from django_filters.rest_framework import Filter
from django_filters.rest_framework import FilterSet
from django_filters.rest_framework import MultipleChoiceFilter
from drf_spectacular.utils import extend_schema_field
from guardian.utils import get_group_obj_perms_model
from guardian.utils import get_user_obj_perms_model
@@ -862,18 +864,56 @@ class ShareLinkBundleFilterSet(FilterSet):
class PaperlessTaskFilterSet(FilterSet):
task_type = MultipleChoiceFilter(
choices=PaperlessTask.TaskType.choices,
label="Task Type",
)
trigger_source = MultipleChoiceFilter(
choices=PaperlessTask.TriggerSource.choices,
label="Trigger Source",
)
status = MultipleChoiceFilter(
choices=PaperlessTask.Status.choices,
label="Status",
)
is_complete = BooleanFilter(
method="filter_is_complete",
label="Is Complete",
)
acknowledged = BooleanFilter(
label="Acknowledged",
field_name="acknowledged",
)
date_created_after = DateTimeFilter(
field_name="date_created",
lookup_expr="gte",
label="Created After",
)
date_created_before = DateTimeFilter(
field_name="date_created",
lookup_expr="lte",
label="Created Before",
)
class Meta:
model = PaperlessTask
fields = {
"task_type": ["exact"],
"trigger_source": ["exact"],
"status": ["exact"],
}
fields = ["task_type", "trigger_source", "status", "acknowledged", "owner"]
def filter_is_complete(self, queryset, name, value):
complete = [
PaperlessTask.Status.SUCCESS,
PaperlessTask.Status.FAILURE,
PaperlessTask.Status.REVOKED,
]
if value:
return queryset.filter(status__in=complete)
return queryset.exclude(status__in=complete)
class ObjectOwnedOrGrantedPermissionsFilter(ObjectPermissionsFilter):

View File

@@ -2430,45 +2430,138 @@ class UiSettingsViewSerializer(serializers.ModelSerializer[UiSettings]):
return ui_settings
class TasksViewSerializer(OwnedObjectSerializer):
class TaskSerializerV10(OwnedObjectSerializer):
"""Task serializer for API v10+ using new field names."""
related_document_ids = serializers.ListField(
child=serializers.IntegerField(),
read_only=True,
source="related_document_ids",
)
task_type_display = serializers.CharField(
source="get_task_type_display",
read_only=True,
)
trigger_source_display = serializers.CharField(
source="get_trigger_source_display",
read_only=True,
)
status_display = serializers.CharField(
source="get_status_display",
read_only=True,
)
class Meta:
model = PaperlessTask
fields = (
"id",
"task_id",
"task_type",
"task_type_display",
"trigger_source",
"trigger_source_display",
"status",
"status_display",
"date_created",
"date_started",
"date_done",
"duration_seconds",
"wait_time_seconds",
"input_data",
"result_data",
"result_message",
"related_document_ids",
"acknowledged",
"owner",
)
read_only_fields = fields
class TaskSerializerV9(serializers.ModelSerializer):
"""Task serializer for API v9 backwards compatibility.
Maps old field names to the new model fields so existing clients continue
to work unchanged.
"""
# v9 field: task_name -> task_type
task_name = serializers.CharField(source="task_type", read_only=True)
# v9 field: task_file_name -> input_data.filename
task_file_name = serializers.SerializerMethodField()
# v9 field: type -> trigger_source (mapped to old enum labels)
type = serializers.SerializerMethodField()
# v9 field: result -> result_message (with legacy format fallback)
result = serializers.CharField(
source="result_message",
read_only=True,
allow_null=True,
)
# v9 field: related_document -> first document ID from result_data
related_document = serializers.SerializerMethodField()
# v9 field: duplicate_documents -> list of duplicate IDs from result_data
duplicate_documents = serializers.SerializerMethodField()
class Meta:
model = PaperlessTask
fields = (
"id",
"task_id",
"task_name",
"task_file_name",
"type",
"status",
"date_created",
"date_done",
"status",
"result_message",
"result_data",
"result",
"acknowledged",
"related_document",
"duplicate_documents",
"owner",
)
related_document = serializers.SerializerMethodField()
duplicate_documents = serializers.SerializerMethodField()
def get_task_file_name(self, obj: PaperlessTask) -> str | None:
if not obj.input_data:
return None
return obj.input_data.get("filename")
def get_related_document(self, obj) -> int | None:
doc_ids = obj.related_document_ids
return doc_ids[0] if doc_ids else None
def get_type(self, obj: PaperlessTask) -> str:
# Old type values: AUTO_TASK, SCHEDULED_TASK, MANUAL_TASK
source_to_old_type = {
PaperlessTask.TriggerSource.SCHEDULED: "SCHEDULED_TASK",
PaperlessTask.TriggerSource.SYSTEM: "AUTO_TASK",
}
return source_to_old_type.get(obj.trigger_source, "MANUAL_TASK")
@extend_schema_field(DuplicateDocumentSummarySerializer(many=True))
def get_duplicate_documents(self, obj):
related_document = self.get_related_document(obj)
request = self.context.get("request")
user = request.user if request else None
document = Document.global_objects.filter(pk=related_document).first()
if not related_document or not user or not document:
def get_related_document(self, obj: PaperlessTask) -> int | None:
ids = obj.related_document_ids
return ids[0] if ids else None
def get_duplicate_documents(self, obj: PaperlessTask) -> list[int]:
if not obj.result_data:
return []
duplicates = _get_viewable_duplicates(document, user)
return list(duplicates.values("id", "title", "deleted_at"))
dup_of = obj.result_data.get("duplicate_of")
return [dup_of] if dup_of is not None else []
class RunTaskViewSerializer(serializers.Serializer[dict[str, Any]]):
class TaskSummarySerializer(serializers.Serializer):
task_type = serializers.CharField()
total_count = serializers.IntegerField()
pending_count = serializers.IntegerField()
success_count = serializers.IntegerField()
failure_count = serializers.IntegerField()
avg_duration_seconds = serializers.FloatField(allow_null=True)
avg_wait_time_seconds = serializers.FloatField(allow_null=True)
last_run = serializers.DateTimeField(allow_null=True)
last_success = serializers.DateTimeField(allow_null=True)
last_failure = serializers.DateTimeField(allow_null=True)
class RunTaskSerializer(serializers.Serializer):
task_type = serializers.ChoiceField(
choices=PaperlessTask.TaskType.choices,
label="Task Type",

View File

@@ -8,6 +8,7 @@ import zipfile
from collections import defaultdict
from collections import deque
from datetime import datetime
from datetime import timedelta
from pathlib import Path
from time import mktime
from typing import TYPE_CHECKING
@@ -192,7 +193,7 @@ from documents.serialisers import PostDocumentSerializer
from documents.serialisers import RemovePasswordDocumentsSerializer
from documents.serialisers import ReprocessDocumentsSerializer
from documents.serialisers import RotateDocumentsSerializer
from documents.serialisers import RunTaskViewSerializer
from documents.serialisers import RunTaskSerializer
from documents.serialisers import SavedViewSerializer
from documents.serialisers import SearchResultSerializer
from documents.serialisers import SerializerWithPerms
@@ -201,7 +202,9 @@ from documents.serialisers import ShareLinkSerializer
from documents.serialisers import StoragePathSerializer
from documents.serialisers import StoragePathTestSerializer
from documents.serialisers import TagSerializer
from documents.serialisers import TasksViewSerializer
from documents.serialisers import TaskSerializerV9
from documents.serialisers import TaskSerializerV10
from documents.serialisers import TaskSummarySerializer
from documents.serialisers import TrashSerializer
from documents.serialisers import UiSettingsViewSerializer
from documents.serialisers import WorkflowActionSerializer
@@ -3767,35 +3770,50 @@ class RemoteVersionView(GenericAPIView[Any]):
)
class TasksViewSet(ReadOnlyModelViewSet[PaperlessTask]):
permission_classes = (IsAuthenticated, PaperlessObjectPermissions)
serializer_class = TasksViewSerializer
filter_backends = (
DjangoFilterBackend,
OrderingFilter,
ObjectOwnedOrGrantedPermissionsFilter,
)
filterset_class = PaperlessTaskFilterSet
ordering_fields = [
"date_created",
"date_done",
"status",
"task_type",
"duration_seconds",
"wait_time_seconds",
]
ordering = ["-date_created"]
TASK_AND_ARGS_BY_NAME = {
PaperlessTask.TaskType.INDEX_OPTIMIZE: (index_optimize, {}),
PaperlessTask.TaskType.TRAIN_CLASSIFIER: (
train_classifier,
{"scheduled": False},
),
PaperlessTask.TaskType.SANITY_CHECK: (
sanity_check,
{"scheduled": False, "raise_on_error": False},
),
PaperlessTask.TaskType.LLM_INDEX: (
llmindex_index,
{"scheduled": False, "rebuild": False},
),
}
def get_serializer_class(self):
# v9: use backwards-compatible serializer with old field names
if self.request.version and int(self.request.version) < 10:
return TaskSerializerV9
return TaskSerializerV10
def get_queryset(self):
queryset = PaperlessTask.objects.all().order_by("-date_created")
queryset = PaperlessTask.objects.all()
# v9 backwards compat: map old query params to new field names
if self.request.version and int(self.request.version) < 10:
task_name = self.request.query_params.get("task_name")
if task_name is not None:
queryset = queryset.filter(task_type=task_name)
task_type_old = self.request.query_params.get("type")
if task_type_old is not None:
# Old type values: AUTO_TASK -> SYSTEM, SCHEDULED_TASK -> SCHEDULED, MANUAL_TASK -> MANUAL
old_to_new = {
"AUTO_TASK": PaperlessTask.TriggerSource.SYSTEM,
"SCHEDULED_TASK": PaperlessTask.TriggerSource.SCHEDULED,
"MANUAL_TASK": PaperlessTask.TriggerSource.MANUAL,
}
new_source = old_to_new.get(task_type_old)
if new_source:
queryset = queryset.filter(trigger_source=new_source)
# v10+: direct task_id param for backwards compat
task_id = self.request.query_params.get("task_id")
if task_id is not None:
queryset = PaperlessTask.objects.filter(task_id=task_id)
queryset = queryset.filter(task_id=task_id)
return queryset
@action(
@@ -3807,33 +3825,120 @@ class TasksViewSet(ReadOnlyModelViewSet[PaperlessTask]):
serializer = AcknowledgeTasksViewSerializer(data=request.data)
serializer.is_valid(raise_exception=True)
task_ids = serializer.validated_data.get("tasks")
tasks = self.get_queryset().filter(id__in=task_ids)
count = tasks.update(acknowledged=True)
return Response({"result": count})
try:
tasks = PaperlessTask.objects.filter(id__in=task_ids)
if request.user is not None and not request.user.is_superuser:
tasks = tasks.filter(owner=request.user) | tasks.filter(owner=None)
result = tasks.update(
acknowledged=True,
@action(
methods=["post"],
detail=False,
permission_classes=[IsAuthenticated, AcknowledgeTasksPermissions],
)
def acknowledge_all(self, request):
"""Acknowledge all completed tasks visible to the requesting user."""
count = (
self.get_queryset()
.filter(
acknowledged=False,
status__in=[
PaperlessTask.Status.SUCCESS,
PaperlessTask.Status.FAILURE,
PaperlessTask.Status.REVOKED,
],
)
return Response({"result": result})
except Exception:
return HttpResponseBadRequest()
.update(acknowledged=True)
)
return Response({"result": count})
@action(methods=["get"], detail=False)
def summary(self, request):
"""Aggregated task statistics per task_type over the last N days (default 30)."""
from django.db.models import Avg
from django.db.models import Count
from django.db.models import Max
from django.db.models import Q
days = int(request.query_params.get("days", 30))
cutoff = timezone.now() - timedelta(days=days)
queryset = self.get_queryset().filter(date_created__gte=cutoff)
data = queryset.values("task_type").annotate(
total_count=Count("id"),
pending_count=Count("id", filter=Q(status=PaperlessTask.Status.PENDING)),
success_count=Count("id", filter=Q(status=PaperlessTask.Status.SUCCESS)),
failure_count=Count("id", filter=Q(status=PaperlessTask.Status.FAILURE)),
avg_duration_seconds=Avg(
"duration_seconds",
filter=Q(duration_seconds__isnull=False),
),
avg_wait_time_seconds=Avg(
"wait_time_seconds",
filter=Q(wait_time_seconds__isnull=False),
),
last_run=Max("date_created"),
last_success=Max(
"date_done",
filter=Q(status=PaperlessTask.Status.SUCCESS),
),
last_failure=Max(
"date_done",
filter=Q(status=PaperlessTask.Status.FAILURE),
),
)
serializer = TaskSummarySerializer(data, many=True)
return Response(serializer.data)
@action(methods=["get"], detail=False)
def active(self, request):
"""Currently pending and running tasks (capped at 50)."""
queryset = (
self.get_queryset()
.filter(
status__in=[PaperlessTask.Status.PENDING, PaperlessTask.Status.STARTED],
)
.order_by("-date_created")[:50]
)
serializer = self.get_serializer(queryset, many=True)
return Response(serializer.data)
@action(methods=["post"], detail=False)
def run(self, request):
serializer = RunTaskViewSerializer(data=request.data)
"""Manually dispatch a background task. Superuser only."""
serializer = RunTaskSerializer(data=request.data)
serializer.is_valid(raise_exception=True)
task_type = serializer.validated_data.get("task_type")
if not request.user.is_superuser:
return HttpResponseForbidden("Insufficient permissions")
task_func_map = {
PaperlessTask.TaskType.INDEX_OPTIMIZE: (index_optimize, {}),
PaperlessTask.TaskType.TRAIN_CLASSIFIER: (train_classifier, {}),
PaperlessTask.TaskType.SANITY_CHECK: (
sanity_check,
{"raise_on_error": False},
),
PaperlessTask.TaskType.LLM_INDEX: (
llmindex_index,
{"rebuild": False},
),
}
if task_type not in task_func_map:
return Response(
{"error": f"Task type '{task_type}' cannot be manually triggered"},
status=status.HTTP_400_BAD_REQUEST,
)
try:
task_func, task_args = self.TASK_AND_ARGS_BY_NAME[task_type]
result = task_func(**task_args)
return Response({"result": result})
task_func, task_kwargs = task_func_map[task_type]
async_result = task_func.apply_async(
kwargs=task_kwargs,
headers={"trigger_source": "manual"},
)
return Response({"task_id": async_result.id})
except Exception as e:
logger.warning(f"An error occurred running task: {e!s}")
logger.warning(f"Error running task: {e!s}")
return HttpResponseServerError(
"Error running task, check logs for more detail.",
)