diff --git a/src/documents/bulk_edit.py b/src/documents/bulk_edit.py index 3f80b699d..3b5f5fb5b 100644 --- a/src/documents/bulk_edit.py +++ b/src/documents/bulk_edit.py @@ -29,8 +29,6 @@ from documents.plugins.helpers import DocumentsStatusManager from documents.tasks import bulk_update_documents from documents.tasks import consume_file from documents.tasks import update_document_content_maybe_archive_file -from documents.versioning import get_latest_version_for_root -from documents.versioning import get_root_document if TYPE_CHECKING: from django.contrib.auth.models import User @@ -81,23 +79,6 @@ def restore_archive_serial_numbers(backup: dict[int, int | None]) -> None: logger.info(f"Restored archive serial numbers for documents {list(backup.keys())}") -def _resolve_root_and_source_doc( - doc: Document, - *, - source_mode: SourceMode = SourceModeChoices.LATEST_VERSION, -) -> tuple[Document, Document]: - root_doc = get_root_document(doc) - - if source_mode == SourceModeChoices.EXPLICIT_SELECTION: - return root_doc, doc - - # Version IDs are explicit by default, only a selected root resolves to latest - if doc.root_document_id is not None: - return root_doc, doc - - return root_doc, get_latest_version_for_root(root_doc) - - def set_correspondent( doc_ids: list[int], correspondent: Correspondent, @@ -334,20 +315,10 @@ def modify_custom_fields( @shared_task def delete(doc_ids: list[int]) -> Literal["OK"]: try: - root_ids = ( - Document.objects.filter(id__in=doc_ids, root_document__isnull=True) - .values_list("id", flat=True) - .distinct() - ) - version_ids = ( - Document.objects.filter(root_document_id__in=root_ids) - .exclude(id__in=doc_ids) - .values_list("id", flat=True) - .distinct() - ) - delete_ids = list({*doc_ids, *version_ids}) + delete_ids = list(doc_ids) Document.objects.filter(id__in=delete_ids).delete() + # DocumentVersion rows are removed by CASCADE automatically. from documents.search import get_backend @@ -413,7 +384,7 @@ def rotate( ) docs_by_id = { doc.id: doc - for doc in Document.objects.select_related("root_document").filter( + for doc in Document.objects.filter( id__in=doc_ids, ) } @@ -422,11 +393,7 @@ def rotate( doc = docs_by_id.get(doc_id) if doc is None: continue - root_doc, source_doc = _resolve_root_and_source_doc( - doc, - source_mode=source_mode, - ) - docs_by_root_id.setdefault(root_doc.id, (root_doc, source_doc)) + docs_by_root_id.setdefault(doc.id, (doc, doc)) import pikepdf @@ -482,7 +449,7 @@ def merge( logger.info( f"Attempting to merge {len(doc_ids)} documents into a single document.", ) - qs = Document.objects.select_related("root_document").filter(id__in=doc_ids) + qs = Document.objects.filter(id__in=doc_ids) docs_by_id = {doc.id: doc for doc in qs} affected_docs: list[int] = [] import pikepdf @@ -495,10 +462,7 @@ def merge( doc = docs_by_id.get(doc_id) if doc is None: continue - _, source_doc = _resolve_root_and_source_doc( - doc, - source_mode=source_mode, - ) + source_doc = doc try: doc_path = ( source_doc.archive_path @@ -593,11 +557,8 @@ def split( logger.info( f"Attempting to split document {doc_ids[0]} into {len(pages)} documents", ) - doc = Document.objects.select_related("root_document").get(id=doc_ids[0]) - _, source_doc = _resolve_root_and_source_doc( - doc, - source_mode=source_mode, - ) + doc = Document.objects.get(id=doc_ids[0]) + source_doc = doc import pikepdf consume_tasks = [] @@ -673,11 +634,9 @@ def delete_pages( logger.info( f"Attempting to delete pages {pages} from {len(doc_ids)} documents", ) - doc = Document.objects.select_related("root_document").get(id=doc_ids[0]) - root_doc, source_doc = _resolve_root_and_source_doc( - doc, - source_mode=source_mode, - ) + doc = Document.objects.get(id=doc_ids[0]) + root_doc = doc + source_doc = doc pages = sorted(pages) # sort pages to avoid index issues import pikepdf @@ -736,11 +695,9 @@ def edit_pdf( logger.info( f"Editing PDF of document {doc_ids[0]} with {len(operations)} operations", ) - doc = Document.objects.select_related("root_document").get(id=doc_ids[0]) - root_doc, source_doc = _resolve_root_and_source_doc( - doc, - source_mode=source_mode, - ) + doc = Document.objects.get(id=doc_ids[0]) + root_doc = doc + source_doc = doc import pikepdf pdf_docs: list[pikepdf.Pdf] = [] @@ -860,11 +817,9 @@ def remove_password( import pikepdf for doc_id in doc_ids: - doc = Document.objects.select_related("root_document").get(id=doc_id) - root_doc, source_doc = _resolve_root_and_source_doc( - doc, - source_mode=source_mode, - ) + doc = Document.objects.get(id=doc_id) + root_doc = doc + source_doc = doc try: logger.info( f"Attempting password removal from document {doc_ids[0]}", diff --git a/src/documents/tasks.py b/src/documents/tasks.py index 57c819492..f29ae8a4e 100644 --- a/src/documents/tasks.py +++ b/src/documents/tasks.py @@ -476,19 +476,16 @@ def check_scheduled_workflows() -> None: match trigger.schedule_date_field: case WorkflowTrigger.ScheduleDateField.ADDED: documents = Document.objects.filter( - root_document__isnull=True, added__lte=threshold, ) case WorkflowTrigger.ScheduleDateField.CREATED: documents = Document.objects.filter( - root_document__isnull=True, created__lte=threshold, ) case WorkflowTrigger.ScheduleDateField.MODIFIED: documents = Document.objects.filter( - root_document__isnull=True, modified__lte=threshold, ) @@ -529,7 +526,6 @@ def check_scheduled_workflows() -> None: ] documents = Document.objects.filter( - root_document__isnull=True, id__in=matched_ids, ) diff --git a/src/documents/tests/factories.py b/src/documents/tests/factories.py index 7bdc7341c..af57dd0c7 100644 --- a/src/documents/tests/factories.py +++ b/src/documents/tests/factories.py @@ -67,6 +67,27 @@ class DocumentFactory(DjangoModelFactory): document_type = None storage_path = None + @factory.post_generation + def with_version(self, create, extracted, **kwargs): + """Create an initial DocumentVersion(version_number=1) matching the Document's fields.""" + if not create: + return + from documents.models import DocumentVersion + + DocumentVersion.objects.create( + document=self, + version_number=1, + checksum=self.checksum or "default", + archive_checksum=self.archive_checksum, + content=self.content, + page_count=self.page_count, + mime_type=self.mime_type or "application/pdf", + original_filename=self.original_filename, + filename=self.filename, + archive_filename=self.archive_filename, + added=self.added, + ) + class DocumentVersionFactory(DjangoModelFactory): class Meta: diff --git a/src/documents/tests/test_api_documents.py b/src/documents/tests/test_api_documents.py index 1361aedfc..2b07d8c42 100644 --- a/src/documents/tests/test_api_documents.py +++ b/src/documents/tests/test_api_documents.py @@ -36,6 +36,7 @@ from documents.models import CustomField from documents.models import CustomFieldInstance from documents.models import Document from documents.models import DocumentType +from documents.models import DocumentVersion from documents.models import MatchingModel from documents.models import Note from documents.models import SavedView @@ -316,10 +317,17 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase): filename=Path(filename).name, mime_type="application/pdf", ) + dv = DocumentVersion.objects.create( + document=doc, + version_number=1, + checksum=doc.checksum or "default", + mime_type=doc.mime_type, + filename=doc.filename, + ) if TYPE_CHECKING: assert isinstance(self.dirs.thumbnail_dir, Path), self.dirs.thumbnail_dir - with (self.dirs.thumbnail_dir / f"{doc.pk:07d}.webp").open("wb") as f: + with dv.thumbnail_path.open("wb") as f: f.write(content_thumbnail) response = self.client.get(f"/api/documents/{doc.pk}/download/") @@ -369,8 +377,15 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase): mime_type="application/pdf", owner=user1, ) + dv = DocumentVersion.objects.create( + document=doc, + version_number=1, + checksum=doc.checksum or "default", + mime_type=doc.mime_type, + filename=doc.filename, + ) - with (Path(self.dirs.thumbnail_dir) / f"{doc.pk:07d}.webp").open("wb") as f: + with dv.thumbnail_path.open("wb") as f: f.write(content_thumbnail) response = self.client.get(f"/api/documents/{doc.pk}/download/") @@ -404,6 +419,14 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase): archive_filename="archived.pdf", mime_type="application/pdf", ) + DocumentVersion.objects.create( + document=doc, + version_number=1, + checksum=doc.checksum or "default", + mime_type=doc.mime_type, + filename=doc.filename, + archive_filename=doc.archive_filename, + ) with Path(doc.source_path).open("wb") as f: f.write(content) @@ -446,6 +469,14 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase): archive_filename="archived.pdf", mime_type="application/pdf", ) + DocumentVersion.objects.create( + document=doc, + version_number=1, + checksum=doc.checksum or "default", + mime_type=doc.mime_type, + filename=doc.filename, + archive_filename=doc.archive_filename, + ) with Path(doc.source_path).open("wb") as f: f.write(content) @@ -585,16 +616,21 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase): mime_type="application/pdf", owner=self.user, ) - version_doc = Document.objects.create( - title="Version", + DocumentVersion.objects.create( + document=root_doc, + version_number=1, + checksum="123", + mime_type="application/pdf", + ) + v2 = DocumentVersion.objects.create( + document=root_doc, + version_number=2, checksum="456", mime_type="application/pdf", - root_document=root_doc, - owner=self.user, ) response = self.client.delete( - f"/api/documents/{root_doc.pk}/versions/{version_doc.pk}/", + f"/api/documents/{root_doc.pk}/versions/{v2.pk}/", ) self.assertEqual(response.status_code, status.HTTP_200_OK) @@ -605,7 +641,7 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase): self.assertEqual(response.data[0]["action"], "update") self.assertEqual( response.data[0]["changes"], - {"Version Deleted": ["None", version_doc.pk]}, + {"Version Deleted": ["None", v2.pk]}, ) @override_settings(AUDIT_LOG_ENABLED=False) @@ -1452,17 +1488,24 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase): ) def test_document_filters_use_latest_version_content(self) -> None: - root = Document.objects.create( + doc = Document.objects.create( title="versioned root", - checksum="root", + checksum="v2", mime_type="application/pdf", - content="root-content", + content="latest-version-content", ) - version = Document.objects.create( - title="versioned root", + DocumentVersion.objects.create( + document=doc, + version_number=1, checksum="v1", mime_type="application/pdf", - root_document=root, + content="old-content", + ) + DocumentVersion.objects.create( + document=doc, + version_number=2, + checksum="v2", + mime_type="application/pdf", content="latest-version-content", ) @@ -1472,8 +1515,8 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase): self.assertEqual(response.status_code, status.HTTP_200_OK) results = response.data["results"] self.assertEqual(len(results), 1) - self.assertEqual(results[0]["id"], root.id) - self.assertEqual(results[0]["content"], version.content) + self.assertEqual(results[0]["id"], doc.id) + self.assertEqual(results[0]["content"], "latest-version-content") response = self.client.get( "/api/documents/?title_content=latest-version-content", @@ -1481,7 +1524,7 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase): self.assertEqual(response.status_code, status.HTTP_200_OK) results = response.data["results"] self.assertEqual(len(results), 1) - self.assertEqual(results[0]["id"], root.id) + self.assertEqual(results[0]["id"], doc.id) def test_create_wrong_endpoint(self) -> None: response = self.client.post( @@ -2042,6 +2085,15 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase): archive_checksum="A", archive_filename="archive.pdf", ) + DocumentVersion.objects.create( + document=doc, + version_number=1, + checksum=doc.checksum or "default", + mime_type=doc.mime_type, + filename=doc.filename, + archive_filename=doc.archive_filename, + archive_checksum=doc.archive_checksum, + ) source_file: Path = ( Path(__file__).parent @@ -2082,6 +2134,13 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase): filename="file.pdf", mime_type="application/pdf", ) + DocumentVersion.objects.create( + document=doc, + version_number=1, + checksum=doc.checksum or "default", + mime_type=doc.mime_type, + filename=doc.filename, + ) shutil.copy(Path(__file__).parent / "samples" / "simple.pdf", doc.source_path) @@ -2105,6 +2164,15 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase): archive_checksum="B", checksum="A", ) + DocumentVersion.objects.create( + document=doc, + version_number=1, + checksum=doc.checksum, + mime_type=doc.mime_type, + filename=doc.filename, + archive_filename=doc.archive_filename, + archive_checksum=doc.archive_checksum, + ) response = self.client.get(f"/api/documents/{doc.pk}/metadata/") self.assertEqual(response.status_code, status.HTTP_200_OK) diff --git a/src/documents/tests/test_bulk_edit.py b/src/documents/tests/test_bulk_edit.py index 9b6c3c468..df7f448c1 100644 --- a/src/documents/tests/test_bulk_edit.py +++ b/src/documents/tests/test_bulk_edit.py @@ -381,52 +381,20 @@ class TestBulkEdit(DirectoriesMixin, TestCase): [self.doc3.id, self.doc4.id, self.doc5.id], ) - def test_delete_root_document_deletes_all_versions(self) -> None: - version = Document.objects.create( - checksum="A-v1", - title="A version", - root_document=self.doc1, + def test_delete_document_deletes_document_versions_via_cascade(self) -> None: + from documents.models import DocumentVersion + + v1 = DocumentVersion.objects.create( + document=self.doc1, + version_number=1, + checksum="A", + mime_type="application/pdf", ) bulk_edit.delete([self.doc1.id]) self.assertFalse(Document.objects.filter(id=self.doc1.id).exists()) - self.assertFalse(Document.objects.filter(id=version.id).exists()) - - def test_delete_version_document_keeps_root(self) -> None: - version = Document.objects.create( - checksum="A-v1", - title="A version", - root_document=self.doc1, - ) - - bulk_edit.delete([version.id]) - - self.assertTrue(Document.objects.filter(id=self.doc1.id).exists()) - self.assertFalse(Document.objects.filter(id=version.id).exists()) - - def test_resolve_root_and_source_doc_latest_version_prefers_newest_version( - self, - ) -> None: - version1 = Document.objects.create( - checksum="B-v1", - title="B version 1", - root_document=self.doc2, - ) - version2 = Document.objects.create( - checksum="B-v2", - title="B version 2", - root_document=self.doc2, - ) - - root_doc, source_doc = bulk_edit._resolve_root_and_source_doc( - self.doc2, - source_mode="latest_version", - ) - - self.assertEqual(root_doc.id, self.doc2.id) - self.assertEqual(source_doc.id, version2.id) - self.assertNotEqual(source_doc.id, version1.id) + self.assertFalse(DocumentVersion.objects.filter(id=v1.id).exists()) @mock.patch("documents.tasks.bulk_update_documents.delay") def test_set_permissions(self, m) -> None: @@ -662,20 +630,11 @@ class TestPDFActions(DirectoriesMixin, TestCase): @mock.patch("pikepdf.open") @mock.patch("documents.tasks.consume_file.s") - def test_merge_uses_latest_version_source_for_root_selection( + def test_merge_uses_document_source_path( self, mock_consume_file, mock_open_pdf, ) -> None: - version_file = self.dirs.scratch_dir / "sample2_version_merge.pdf" - shutil.copy(self.doc2.source_path, version_file) - version = Document.objects.create( - checksum="B-v1", - title="B version 1", - root_document=self.doc2, - filename=version_file, - mime_type="application/pdf", - ) fake_pdf = mock.MagicMock() fake_pdf.pdf_version = "1.7" fake_pdf.pages = [mock.Mock()] @@ -684,7 +643,7 @@ class TestPDFActions(DirectoriesMixin, TestCase): result = bulk_edit.merge([self.doc2.id]) self.assertEqual(result, "OK") - mock_open_pdf.assert_called_once_with(str(version.source_path)) + mock_open_pdf.assert_called_once_with(str(self.doc2.source_path)) mock_consume_file.assert_not_called() @mock.patch("documents.bulk_edit.delete.si") @@ -898,21 +857,12 @@ class TestPDFActions(DirectoriesMixin, TestCase): @mock.patch("documents.bulk_edit.group") @mock.patch("pikepdf.open") @mock.patch("documents.tasks.consume_file.s") - def test_split_uses_latest_version_source_for_root_selection( + def test_split_uses_document_source_path( self, mock_consume_file, mock_open_pdf, mock_group, ) -> None: - version_file = self.dirs.scratch_dir / "sample2_version_split.pdf" - shutil.copy(self.doc2.source_path, version_file) - version = Document.objects.create( - checksum="B-v1", - title="B version 1", - root_document=self.doc2, - filename=version_file, - mime_type="application/pdf", - ) fake_pdf = mock.MagicMock() fake_pdf.pages = [mock.Mock(), mock.Mock()] mock_open_pdf.return_value.__enter__.return_value = fake_pdf @@ -921,7 +871,7 @@ class TestPDFActions(DirectoriesMixin, TestCase): result = bulk_edit.split([self.doc2.id], [[1], [2]]) self.assertEqual(result, "OK") - mock_open_pdf.assert_called_once_with(version.source_path) + mock_open_pdf.assert_called_once_with(self.doc2.source_path) mock_consume_file.assert_not_called() mock_group.return_value.delay.assert_not_called() @@ -1099,17 +1049,12 @@ class TestPDFActions(DirectoriesMixin, TestCase): @mock.patch("documents.data_models.magic.from_file", return_value="application/pdf") @mock.patch("documents.tasks.consume_file.delay") @mock.patch("pikepdf.open") - def test_rotate_explicit_selection_uses_root_source_when_root_selected( + def test_rotate_uses_document_source_path( self, mock_open, mock_consume_delay, mock_magic, ): - Document.objects.create( - checksum="B-v1", - title="B version 1", - root_document=self.doc2, - ) fake_pdf = mock.MagicMock() fake_pdf.pages = [mock.Mock()] mock_open.return_value.__enter__.return_value = fake_pdf @@ -1117,7 +1062,6 @@ class TestPDFActions(DirectoriesMixin, TestCase): result = bulk_edit.rotate( [self.doc2.id], 90, - source_mode="explicit_selection", ) self.assertEqual(result, "OK") @@ -1151,17 +1095,12 @@ class TestPDFActions(DirectoriesMixin, TestCase): @mock.patch("documents.data_models.magic.from_file", return_value="application/pdf") @mock.patch("documents.tasks.consume_file.delay") @mock.patch("pikepdf.open") - def test_delete_pages_explicit_selection_uses_root_source_when_root_selected( + def test_delete_pages_uses_document_source_path( self, mock_open, mock_consume_delay, mock_magic, ): - Document.objects.create( - checksum="B-v1", - title="B version 1", - root_document=self.doc2, - ) fake_pdf = mock.MagicMock() fake_pdf.pages = [mock.Mock(), mock.Mock()] mock_open.return_value.__enter__.return_value = fake_pdf @@ -1169,7 +1108,6 @@ class TestPDFActions(DirectoriesMixin, TestCase): result = bulk_edit.delete_pages( [self.doc2.id], [1], - source_mode="explicit_selection", ) self.assertEqual(result, "OK") @@ -1328,18 +1266,13 @@ class TestPDFActions(DirectoriesMixin, TestCase): @mock.patch("documents.tasks.consume_file.delay") @mock.patch("pikepdf.new") @mock.patch("pikepdf.open") - def test_edit_pdf_explicit_selection_uses_root_source_when_root_selected( + def test_edit_pdf_uses_document_source_path( self, mock_open, mock_new, mock_consume_delay, mock_magic, ): - Document.objects.create( - checksum="B-v1", - title="B version 1", - root_document=self.doc2, - ) fake_pdf = mock.MagicMock() fake_pdf.pages = [mock.Mock()] mock_open.return_value.__enter__.return_value = fake_pdf @@ -1351,7 +1284,6 @@ class TestPDFActions(DirectoriesMixin, TestCase): [self.doc2.id], operations=[{"page": 1}], update_document=True, - source_mode="explicit_selection", ) self.assertEqual(result, "OK") @@ -1481,17 +1413,12 @@ class TestPDFActions(DirectoriesMixin, TestCase): @mock.patch("documents.data_models.magic.from_file", return_value="application/pdf") @mock.patch("documents.tasks.consume_file.delay") @mock.patch("pikepdf.open") - def test_remove_password_explicit_selection_uses_root_source_when_root_selected( + def test_remove_password_uses_document_source_path( self, mock_open, mock_consume_delay, mock_magic, ) -> None: - Document.objects.create( - checksum="A-v1", - title="A version 1", - root_document=self.doc1, - ) fake_pdf = mock.MagicMock() mock_open.return_value.__enter__.return_value = fake_pdf @@ -1499,7 +1426,6 @@ class TestPDFActions(DirectoriesMixin, TestCase): [self.doc1.id], password="secret", update_document=True, - source_mode="explicit_selection", ) self.assertEqual(result, "OK")