diff --git a/src/documents/filters.py b/src/documents/filters.py index caf3bfe25..751f0aa0b 100644 --- a/src/documents/filters.py +++ b/src/documents/filters.py @@ -26,8 +26,10 @@ from django.db.models.functions import Cast from django.utils.translation import gettext_lazy as _ from django_filters import DateFilter from django_filters.rest_framework import BooleanFilter +from django_filters.rest_framework import DateTimeFilter from django_filters.rest_framework import Filter from django_filters.rest_framework import FilterSet +from django_filters.rest_framework import MultipleChoiceFilter from drf_spectacular.utils import extend_schema_field from guardian.utils import get_group_obj_perms_model from guardian.utils import get_user_obj_perms_model @@ -862,18 +864,56 @@ class ShareLinkBundleFilterSet(FilterSet): class PaperlessTaskFilterSet(FilterSet): + task_type = MultipleChoiceFilter( + choices=PaperlessTask.TaskType.choices, + label="Task Type", + ) + + trigger_source = MultipleChoiceFilter( + choices=PaperlessTask.TriggerSource.choices, + label="Trigger Source", + ) + + status = MultipleChoiceFilter( + choices=PaperlessTask.Status.choices, + label="Status", + ) + + is_complete = BooleanFilter( + method="filter_is_complete", + label="Is Complete", + ) + acknowledged = BooleanFilter( label="Acknowledged", field_name="acknowledged", ) + date_created_after = DateTimeFilter( + field_name="date_created", + lookup_expr="gte", + label="Created After", + ) + + date_created_before = DateTimeFilter( + field_name="date_created", + lookup_expr="lte", + label="Created Before", + ) + class Meta: model = PaperlessTask - fields = { - "task_type": ["exact"], - "trigger_source": ["exact"], - "status": ["exact"], - } + fields = ["task_type", "trigger_source", "status", "acknowledged", "owner"] + + def filter_is_complete(self, queryset, name, value): + complete = [ + PaperlessTask.Status.SUCCESS, + PaperlessTask.Status.FAILURE, + PaperlessTask.Status.REVOKED, + ] + if value: + return queryset.filter(status__in=complete) + return queryset.exclude(status__in=complete) class ObjectOwnedOrGrantedPermissionsFilter(ObjectPermissionsFilter): diff --git a/src/documents/serialisers.py b/src/documents/serialisers.py index 2125c3c84..e18ac52ac 100644 --- a/src/documents/serialisers.py +++ b/src/documents/serialisers.py @@ -2430,45 +2430,138 @@ class UiSettingsViewSerializer(serializers.ModelSerializer[UiSettings]): return ui_settings -class TasksViewSerializer(OwnedObjectSerializer): +class TaskSerializerV10(OwnedObjectSerializer): + """Task serializer for API v10+ using new field names.""" + + related_document_ids = serializers.ListField( + child=serializers.IntegerField(), + read_only=True, + source="related_document_ids", + ) + task_type_display = serializers.CharField( + source="get_task_type_display", + read_only=True, + ) + trigger_source_display = serializers.CharField( + source="get_trigger_source_display", + read_only=True, + ) + status_display = serializers.CharField( + source="get_status_display", + read_only=True, + ) + class Meta: model = PaperlessTask fields = ( "id", "task_id", "task_type", + "task_type_display", "trigger_source", + "trigger_source_display", + "status", + "status_display", + "date_created", + "date_started", + "date_done", + "duration_seconds", + "wait_time_seconds", + "input_data", + "result_data", + "result_message", + "related_document_ids", + "acknowledged", + "owner", + ) + read_only_fields = fields + + +class TaskSerializerV9(serializers.ModelSerializer): + """Task serializer for API v9 backwards compatibility. + + Maps old field names to the new model fields so existing clients continue + to work unchanged. + """ + + # v9 field: task_name -> task_type + task_name = serializers.CharField(source="task_type", read_only=True) + + # v9 field: task_file_name -> input_data.filename + task_file_name = serializers.SerializerMethodField() + + # v9 field: type -> trigger_source (mapped to old enum labels) + type = serializers.SerializerMethodField() + + # v9 field: result -> result_message (with legacy format fallback) + result = serializers.CharField( + source="result_message", + read_only=True, + allow_null=True, + ) + + # v9 field: related_document -> first document ID from result_data + related_document = serializers.SerializerMethodField() + + # v9 field: duplicate_documents -> list of duplicate IDs from result_data + duplicate_documents = serializers.SerializerMethodField() + + class Meta: + model = PaperlessTask + fields = ( + "id", + "task_id", + "task_name", + "task_file_name", + "type", + "status", "date_created", "date_done", - "status", - "result_message", - "result_data", + "result", "acknowledged", "related_document", "duplicate_documents", "owner", ) - related_document = serializers.SerializerMethodField() - duplicate_documents = serializers.SerializerMethodField() + def get_task_file_name(self, obj: PaperlessTask) -> str | None: + if not obj.input_data: + return None + return obj.input_data.get("filename") - def get_related_document(self, obj) -> int | None: - doc_ids = obj.related_document_ids - return doc_ids[0] if doc_ids else None + def get_type(self, obj: PaperlessTask) -> str: + # Old type values: AUTO_TASK, SCHEDULED_TASK, MANUAL_TASK + source_to_old_type = { + PaperlessTask.TriggerSource.SCHEDULED: "SCHEDULED_TASK", + PaperlessTask.TriggerSource.SYSTEM: "AUTO_TASK", + } + return source_to_old_type.get(obj.trigger_source, "MANUAL_TASK") - @extend_schema_field(DuplicateDocumentSummarySerializer(many=True)) - def get_duplicate_documents(self, obj): - related_document = self.get_related_document(obj) - request = self.context.get("request") - user = request.user if request else None - document = Document.global_objects.filter(pk=related_document).first() - if not related_document or not user or not document: + def get_related_document(self, obj: PaperlessTask) -> int | None: + ids = obj.related_document_ids + return ids[0] if ids else None + + def get_duplicate_documents(self, obj: PaperlessTask) -> list[int]: + if not obj.result_data: return [] - duplicates = _get_viewable_duplicates(document, user) - return list(duplicates.values("id", "title", "deleted_at")) + dup_of = obj.result_data.get("duplicate_of") + return [dup_of] if dup_of is not None else [] -class RunTaskViewSerializer(serializers.Serializer[dict[str, Any]]): +class TaskSummarySerializer(serializers.Serializer): + task_type = serializers.CharField() + total_count = serializers.IntegerField() + pending_count = serializers.IntegerField() + success_count = serializers.IntegerField() + failure_count = serializers.IntegerField() + avg_duration_seconds = serializers.FloatField(allow_null=True) + avg_wait_time_seconds = serializers.FloatField(allow_null=True) + last_run = serializers.DateTimeField(allow_null=True) + last_success = serializers.DateTimeField(allow_null=True) + last_failure = serializers.DateTimeField(allow_null=True) + + +class RunTaskSerializer(serializers.Serializer): task_type = serializers.ChoiceField( choices=PaperlessTask.TaskType.choices, label="Task Type", diff --git a/src/documents/views.py b/src/documents/views.py index 2aea9aba4..a08ce1f5b 100644 --- a/src/documents/views.py +++ b/src/documents/views.py @@ -8,6 +8,7 @@ import zipfile from collections import defaultdict from collections import deque from datetime import datetime +from datetime import timedelta from pathlib import Path from time import mktime from typing import TYPE_CHECKING @@ -192,7 +193,7 @@ from documents.serialisers import PostDocumentSerializer from documents.serialisers import RemovePasswordDocumentsSerializer from documents.serialisers import ReprocessDocumentsSerializer from documents.serialisers import RotateDocumentsSerializer -from documents.serialisers import RunTaskViewSerializer +from documents.serialisers import RunTaskSerializer from documents.serialisers import SavedViewSerializer from documents.serialisers import SearchResultSerializer from documents.serialisers import SerializerWithPerms @@ -201,7 +202,9 @@ from documents.serialisers import ShareLinkSerializer from documents.serialisers import StoragePathSerializer from documents.serialisers import StoragePathTestSerializer from documents.serialisers import TagSerializer -from documents.serialisers import TasksViewSerializer +from documents.serialisers import TaskSerializerV9 +from documents.serialisers import TaskSerializerV10 +from documents.serialisers import TaskSummarySerializer from documents.serialisers import TrashSerializer from documents.serialisers import UiSettingsViewSerializer from documents.serialisers import WorkflowActionSerializer @@ -3767,35 +3770,50 @@ class RemoteVersionView(GenericAPIView[Any]): ) class TasksViewSet(ReadOnlyModelViewSet[PaperlessTask]): permission_classes = (IsAuthenticated, PaperlessObjectPermissions) - serializer_class = TasksViewSerializer filter_backends = ( DjangoFilterBackend, OrderingFilter, ObjectOwnedOrGrantedPermissionsFilter, ) filterset_class = PaperlessTaskFilterSet + ordering_fields = [ + "date_created", + "date_done", + "status", + "task_type", + "duration_seconds", + "wait_time_seconds", + ] + ordering = ["-date_created"] - TASK_AND_ARGS_BY_NAME = { - PaperlessTask.TaskType.INDEX_OPTIMIZE: (index_optimize, {}), - PaperlessTask.TaskType.TRAIN_CLASSIFIER: ( - train_classifier, - {"scheduled": False}, - ), - PaperlessTask.TaskType.SANITY_CHECK: ( - sanity_check, - {"scheduled": False, "raise_on_error": False}, - ), - PaperlessTask.TaskType.LLM_INDEX: ( - llmindex_index, - {"scheduled": False, "rebuild": False}, - ), - } + def get_serializer_class(self): + # v9: use backwards-compatible serializer with old field names + if self.request.version and int(self.request.version) < 10: + return TaskSerializerV9 + return TaskSerializerV10 def get_queryset(self): - queryset = PaperlessTask.objects.all().order_by("-date_created") + queryset = PaperlessTask.objects.all() + # v9 backwards compat: map old query params to new field names + if self.request.version and int(self.request.version) < 10: + task_name = self.request.query_params.get("task_name") + if task_name is not None: + queryset = queryset.filter(task_type=task_name) + task_type_old = self.request.query_params.get("type") + if task_type_old is not None: + # Old type values: AUTO_TASK -> SYSTEM, SCHEDULED_TASK -> SCHEDULED, MANUAL_TASK -> MANUAL + old_to_new = { + "AUTO_TASK": PaperlessTask.TriggerSource.SYSTEM, + "SCHEDULED_TASK": PaperlessTask.TriggerSource.SCHEDULED, + "MANUAL_TASK": PaperlessTask.TriggerSource.MANUAL, + } + new_source = old_to_new.get(task_type_old) + if new_source: + queryset = queryset.filter(trigger_source=new_source) + # v10+: direct task_id param for backwards compat task_id = self.request.query_params.get("task_id") if task_id is not None: - queryset = PaperlessTask.objects.filter(task_id=task_id) + queryset = queryset.filter(task_id=task_id) return queryset @action( @@ -3807,33 +3825,120 @@ class TasksViewSet(ReadOnlyModelViewSet[PaperlessTask]): serializer = AcknowledgeTasksViewSerializer(data=request.data) serializer.is_valid(raise_exception=True) task_ids = serializer.validated_data.get("tasks") + tasks = self.get_queryset().filter(id__in=task_ids) + count = tasks.update(acknowledged=True) + return Response({"result": count}) - try: - tasks = PaperlessTask.objects.filter(id__in=task_ids) - if request.user is not None and not request.user.is_superuser: - tasks = tasks.filter(owner=request.user) | tasks.filter(owner=None) - result = tasks.update( - acknowledged=True, + @action( + methods=["post"], + detail=False, + permission_classes=[IsAuthenticated, AcknowledgeTasksPermissions], + ) + def acknowledge_all(self, request): + """Acknowledge all completed tasks visible to the requesting user.""" + count = ( + self.get_queryset() + .filter( + acknowledged=False, + status__in=[ + PaperlessTask.Status.SUCCESS, + PaperlessTask.Status.FAILURE, + PaperlessTask.Status.REVOKED, + ], ) - return Response({"result": result}) - except Exception: - return HttpResponseBadRequest() + .update(acknowledged=True) + ) + return Response({"result": count}) + + @action(methods=["get"], detail=False) + def summary(self, request): + """Aggregated task statistics per task_type over the last N days (default 30).""" + from django.db.models import Avg + from django.db.models import Count + from django.db.models import Max + from django.db.models import Q + + days = int(request.query_params.get("days", 30)) + cutoff = timezone.now() - timedelta(days=days) + queryset = self.get_queryset().filter(date_created__gte=cutoff) + + data = queryset.values("task_type").annotate( + total_count=Count("id"), + pending_count=Count("id", filter=Q(status=PaperlessTask.Status.PENDING)), + success_count=Count("id", filter=Q(status=PaperlessTask.Status.SUCCESS)), + failure_count=Count("id", filter=Q(status=PaperlessTask.Status.FAILURE)), + avg_duration_seconds=Avg( + "duration_seconds", + filter=Q(duration_seconds__isnull=False), + ), + avg_wait_time_seconds=Avg( + "wait_time_seconds", + filter=Q(wait_time_seconds__isnull=False), + ), + last_run=Max("date_created"), + last_success=Max( + "date_done", + filter=Q(status=PaperlessTask.Status.SUCCESS), + ), + last_failure=Max( + "date_done", + filter=Q(status=PaperlessTask.Status.FAILURE), + ), + ) + serializer = TaskSummarySerializer(data, many=True) + return Response(serializer.data) + + @action(methods=["get"], detail=False) + def active(self, request): + """Currently pending and running tasks (capped at 50).""" + queryset = ( + self.get_queryset() + .filter( + status__in=[PaperlessTask.Status.PENDING, PaperlessTask.Status.STARTED], + ) + .order_by("-date_created")[:50] + ) + serializer = self.get_serializer(queryset, many=True) + return Response(serializer.data) @action(methods=["post"], detail=False) def run(self, request): - serializer = RunTaskViewSerializer(data=request.data) + """Manually dispatch a background task. Superuser only.""" + serializer = RunTaskSerializer(data=request.data) serializer.is_valid(raise_exception=True) task_type = serializer.validated_data.get("task_type") if not request.user.is_superuser: return HttpResponseForbidden("Insufficient permissions") + task_func_map = { + PaperlessTask.TaskType.INDEX_OPTIMIZE: (index_optimize, {}), + PaperlessTask.TaskType.TRAIN_CLASSIFIER: (train_classifier, {}), + PaperlessTask.TaskType.SANITY_CHECK: ( + sanity_check, + {"raise_on_error": False}, + ), + PaperlessTask.TaskType.LLM_INDEX: ( + llmindex_index, + {"rebuild": False}, + ), + } + + if task_type not in task_func_map: + return Response( + {"error": f"Task type '{task_type}' cannot be manually triggered"}, + status=status.HTTP_400_BAD_REQUEST, + ) + try: - task_func, task_args = self.TASK_AND_ARGS_BY_NAME[task_type] - result = task_func(**task_args) - return Response({"result": result}) + task_func, task_kwargs = task_func_map[task_type] + async_result = task_func.apply_async( + kwargs=task_kwargs, + headers={"trigger_source": "manual"}, + ) + return Response({"task_id": async_result.id}) except Exception as e: - logger.warning(f"An error occurred running task: {e!s}") + logger.warning(f"Error running task: {e!s}") return HttpResponseServerError( "Error running task, check logs for more detail.", )