From e0ffa745f58331c48a58304a6b475699e72f601e Mon Sep 17 00:00:00 2001 From: stumpylog <797416+stumpylog@users.noreply.github.com> Date: Thu, 16 Apr 2026 14:04:12 -0700 Subject: [PATCH] Handle JSON serialization for datetime and Path. Further restrist the v9 permissions as Copilot suggests --- src/documents/serialisers.py | 22 ++++-- src/documents/signals/handlers.py | 15 ++-- src/documents/tests/test_api_tasks.py | 95 ++++++++++++++++++++++++ src/documents/tests/test_task_signals.py | 29 ++++++++ src/documents/tests/test_tasks.py | 2 +- 5 files changed, 150 insertions(+), 13 deletions(-) diff --git a/src/documents/serialisers.py b/src/documents/serialisers.py index 3a205f746..986fdf720 100644 --- a/src/documents/serialisers.py +++ b/src/documents/serialisers.py @@ -39,6 +39,7 @@ from drf_spectacular.utils import extend_schema_field from drf_spectacular.utils import extend_schema_serializer from drf_writable_nested.serializers import NestedUpdateMixin from guardian.core import ObjectPermissionChecker +from guardian.shortcuts import get_objects_for_user from guardian.shortcuts import get_users_with_perms from guardian.utils import get_group_obj_perms_model from guardian.utils import get_user_obj_perms_model @@ -2575,13 +2576,20 @@ class TaskSerializerV9(serializers.ModelSerializer): dup_of = obj.result_data.get("duplicate_of") if dup_of is None: return [] - return list( - Document.global_objects.filter(pk=dup_of).values( - "id", - "title", - "deleted_at", - ), - ) + request = self.context.get("request") + if request is None: + return [] + user = request.user + qs = Document.global_objects.filter(pk=dup_of) + if not user.is_staff: + with_perms = get_objects_for_user( + user, + "documents.view_document", + qs, + accept_global_perms=False, + ) + qs = with_perms | qs.filter(owner=user) | qs.filter(owner__isnull=True) + return list(qs.values("id", "title", "deleted_at")) class TaskSummarySerializer(serializers.Serializer): diff --git a/src/documents/signals/handlers.py b/src/documents/signals/handlers.py index 78da89833..9bb602ad0 100644 --- a/src/documents/signals/handlers.py +++ b/src/documents/signals/handlers.py @@ -1,5 +1,6 @@ from __future__ import annotations +import datetime import hashlib import logging import re as _re @@ -1066,11 +1067,15 @@ def _extract_input_data( if input_doc.mailrule_id: data["mailrule_id"] = input_doc.mailrule_id if overrides: - override_dict = { - k: v - for k, v in vars(overrides).items() - if v is not None and not k.startswith("_") - } + override_dict = {} + for k, v in vars(overrides).items(): + if v is None or k.startswith("_"): + continue + if isinstance(v, datetime.date): + v = v.isoformat() + elif isinstance(v, Path): + v = str(v) + override_dict[k] = v if override_dict: data["overrides"] = override_dict return data diff --git a/src/documents/tests/test_api_tasks.py b/src/documents/tests/test_api_tasks.py index 9fce72cf3..06bcbbccb 100644 --- a/src/documents/tests/test_api_tasks.py +++ b/src/documents/tests/test_api_tasks.py @@ -14,6 +14,7 @@ import pytest from django.contrib.auth.models import Permission from django.contrib.auth.models import User from django.utils import timezone +from guardian.shortcuts import assign_perm from rest_framework import status from rest_framework.test import APIClient @@ -746,3 +747,97 @@ class TestRun: kwargs={"raise_on_error": False}, headers={"trigger_source": PaperlessTask.TriggerSource.MANUAL}, ) + + +@pytest.mark.django_db() +class TestDuplicateDocumentsPermissions: + """duplicate_documents in the v9 response must respect document-level permissions.""" + + @pytest.fixture() + def user_v9_client(self, regular_user: User) -> APIClient: + regular_user.user_permissions.add( + Permission.objects.get(codename="view_paperlesstask"), + ) + client = APIClient() + client.force_authenticate(user=regular_user) + client.credentials(HTTP_ACCEPT=ACCEPT_V9) + return client + + def test_owner_sees_duplicate_document( + self, + user_v9_client: APIClient, + regular_user: User, + ) -> None: + """A non-staff user sees a duplicate_of document they own.""" + doc = DocumentFactory(owner=regular_user, title="My Doc") + PaperlessTaskFactory( + owner=regular_user, + status=PaperlessTask.Status.SUCCESS, + result_data={"duplicate_of": doc.pk}, + ) + + response = user_v9_client.get(ENDPOINT) + + assert response.status_code == status.HTTP_200_OK + dupes = response.data[0]["duplicate_documents"] + assert len(dupes) == 1 + assert dupes[0]["id"] == doc.pk + + def test_unowned_duplicate_document_is_visible( + self, + user_v9_client: APIClient, + regular_user: User, + ) -> None: + """An unowned duplicate_of document is visible to any authenticated user.""" + doc = DocumentFactory(owner=None, title="Shared Doc") + PaperlessTaskFactory( + owner=regular_user, + status=PaperlessTask.Status.SUCCESS, + result_data={"duplicate_of": doc.pk}, + ) + + response = user_v9_client.get(ENDPOINT) + + assert response.status_code == status.HTTP_200_OK + assert len(response.data[0]["duplicate_documents"]) == 1 + + def test_other_users_duplicate_document_is_hidden( + self, + user_v9_client: APIClient, + regular_user: User, + admin_user: User, + ) -> None: + """A non-staff user cannot see a duplicate_of document owned by another user.""" + doc = DocumentFactory(owner=admin_user, title="Admin Doc") + PaperlessTaskFactory( + owner=regular_user, + status=PaperlessTask.Status.SUCCESS, + result_data={"duplicate_of": doc.pk}, + ) + + response = user_v9_client.get(ENDPOINT) + + assert response.status_code == status.HTTP_200_OK + assert response.data[0]["duplicate_documents"] == [] + + def test_explicit_permission_grants_visibility( + self, + user_v9_client: APIClient, + regular_user: User, + admin_user: User, + ) -> None: + """A user with explicit guardian view_document permission sees the duplicate_of document.""" + doc = DocumentFactory(owner=admin_user, title="Granted Doc") + assign_perm("view_document", regular_user, doc) + PaperlessTaskFactory( + owner=regular_user, + status=PaperlessTask.Status.SUCCESS, + result_data={"duplicate_of": doc.pk}, + ) + + response = user_v9_client.get(ENDPOINT) + + assert response.status_code == status.HTTP_200_OK + dupes = response.data[0]["duplicate_documents"] + assert len(dupes) == 1 + assert dupes[0]["id"] == doc.pk diff --git a/src/documents/tests/test_task_signals.py b/src/documents/tests/test_task_signals.py index 9af733098..80b5e5075 100644 --- a/src/documents/tests/test_task_signals.py +++ b/src/documents/tests/test_task_signals.py @@ -1,5 +1,7 @@ +import datetime import sys import uuid +from pathlib import Path from unittest import mock import pytest @@ -94,6 +96,33 @@ class TestBeforeTaskPublishHandler: task = PaperlessTask.objects.get(task_id=task_id) assert task.input_data == {} + def test_overrides_date_serialized_as_iso_string(self, consume_input_doc): + """A datetime.date in overrides is stored as an ISO string so input_data is JSON-safe.""" + overrides = DocumentMetadataOverrides(created=datetime.date(2024, 1, 15)) + + task_id = send_publish( + "documents.tasks.consume_file", + (consume_input_doc, overrides), + {}, + ) + + task = PaperlessTask.objects.get(task_id=task_id) + assert task.input_data["overrides"]["created"] == "2024-01-15" + + def test_overrides_path_serialized_as_string(self, consume_input_doc): + """A Path value in overrides is stored as a plain string so input_data is JSON-safe.""" + overrides = DocumentMetadataOverrides() + overrides.filename = Path("/uploads/invoice.pdf") # type: ignore[assignment] + + task_id = send_publish( + "documents.tasks.consume_file", + (consume_input_doc, overrides), + {}, + ) + + task = PaperlessTask.objects.get(task_id=task_id) + assert task.input_data["overrides"]["filename"] == "/uploads/invoice.pdf" + @pytest.mark.parametrize( ("header_value", "expected_trigger_source"), [ diff --git a/src/documents/tests/test_tasks.py b/src/documents/tests/test_tasks.py index 0db6a9559..fb06f7d60 100644 --- a/src/documents/tests/test_tasks.py +++ b/src/documents/tests/test_tasks.py @@ -334,7 +334,7 @@ class TestAIIndex(DirectoriesMixin, TestCase): # lazy-loaded so mock the actual function with mock.patch("paperless_ai.indexing.update_llm_index") as update_llm_index: update_llm_index.side_effect = Exception("LLM index update failed.") - with self.assertRaises(Exception, msg="LLM index update failed."): + with self.assertRaisesRegex(Exception, "LLM index update failed."): tasks.llmindex_index() update_llm_index.assert_called_once()