From da78e50f9860101bf9889404e51c5c97cd9bfb4e Mon Sep 17 00:00:00 2001 From: shamoon <4887959+shamoon@users.noreply.github.com> Date: Mon, 2 Mar 2026 10:08:04 -0800 Subject: [PATCH] Ok, now move suffix into centralized generate_filename, use root doc for filenaming --- docs/advanced_usage.md | 6 + docs/usage.md | 1 + src/documents/consumer.py | 1 - src/documents/file_handling.py | 19 ++- src/documents/signals/handlers.py | 10 ++ src/documents/tests/test_consumer.py | 64 ++++++++++ src/documents/tests/test_file_handling.py | 140 ++++++++++++++++++++++ 7 files changed, 235 insertions(+), 6 deletions(-) diff --git a/docs/advanced_usage.md b/docs/advanced_usage.md index a293eb91d..13194fe87 100644 --- a/docs/advanced_usage.md +++ b/docs/advanced_usage.md @@ -262,6 +262,10 @@ your files differently, you can do that by adjusting the or using [storage paths (see below)](#storage-paths). Paperless adds the correct file extension e.g. `.pdf`, `.jpg` automatically. +When a document has file versions, each version uses the same naming rules and +storage path resolution as any other document file, with an added version suffix +such as `_v1`, `_v2`, etc. + This variable allows you to configure the filename (folders are allowed) using placeholders. For example, configuring this to @@ -353,6 +357,8 @@ If paperless detects that two documents share the same filename, paperless will automatically append `_01`, `_02`, etc to the filename. This happens if all the placeholders in a filename evaluate to the same value. +For versioned files, this counter is appended after the version suffix +(for example `statement_v2_01.pdf`). If there are any errors in the placeholders included in `PAPERLESS_FILENAME_FORMAT`, paperless will fall back to using the default naming scheme instead. diff --git a/docs/usage.md b/docs/usage.md index 2f2c0a396..fd1d31850 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -95,6 +95,7 @@ Think of versions as **file history** for a document. - Versions track the underlying file and extracted text content (OCR/text). - Metadata such as tags, correspondent, document type, storage path and custom fields stay on the "root" document. +- Version files follow normal filename formatting (including storage paths) and add a `_vN` suffix (for example `_v1`, `_v2`). - By default, search and document content use the latest version. - In document detail, selecting a version switches the preview, file metadata and content (and download etc buttons) to that version. - Deleting a non-root version keeps metadata and falls back to the latest remaining version. diff --git a/src/documents/consumer.py b/src/documents/consumer.py index b4b33d43c..f5cfb3d17 100644 --- a/src/documents/consumer.py +++ b/src/documents/consumer.py @@ -203,7 +203,6 @@ class ConsumerPlugin( version_doc.page_count = page_count version_doc.mime_type = mime_type version_doc.original_filename = self.filename - version_doc.storage_path = root_doc_frozen.storage_path # Clear unique file path fields so they can be generated uniquely later version_doc.filename = None version_doc.archive_filename = None diff --git a/src/documents/file_handling.py b/src/documents/file_handling.py index 39831016d..4978cd14f 100644 --- a/src/documents/file_handling.py +++ b/src/documents/file_handling.py @@ -128,11 +128,18 @@ def generate_filename( counter=0, archive_filename=False, ) -> Path: + # version docs use the root document for formatting, just with a suffix + context_doc = doc if doc.root_document_id is None else doc.root_document + version_suffix = ( + f"_v{doc.version_index}" + if doc.root_document_id is not None and doc.version_index is not None + else "" + ) base_path: Path | None = None # Determine the source of the format string - if doc.storage_path is not None: - filename_format = doc.storage_path.path + if context_doc.storage_path is not None: + filename_format = context_doc.storage_path.path elif settings.FILENAME_FORMAT is not None: # Maybe convert old to new style filename_format = convert_format_str_to_template_format( @@ -143,7 +150,7 @@ def generate_filename( # If we have one, render it if filename_format is not None: - rendered_path: str | None = format_filename(doc, filename_format) + rendered_path: str | None = format_filename(context_doc, filename_format) if rendered_path: base_path = Path(rendered_path) @@ -157,7 +164,7 @@ def generate_filename( base_filename = base_path.name # Build the final filename with counter and filetype - final_filename = f"{base_filename}{counter_str}{filetype_str}" + final_filename = f"{base_filename}{version_suffix}{counter_str}{filetype_str}" # If we have a directory component, include it if str(directory) != ".": @@ -166,7 +173,9 @@ def generate_filename( full_path = Path(final_filename) else: # No template, use document ID - final_filename = f"{doc.pk:07}{counter_str}{filetype_str}" + final_filename = ( + f"{context_doc.pk:07}{version_suffix}{counter_str}{filetype_str}" + ) full_path = Path(final_filename) return full_path diff --git a/src/documents/signals/handlers.py b/src/documents/signals/handlers.py index 5fb7ef8c5..6cd0ca5c9 100644 --- a/src/documents/signals/handlers.py +++ b/src/documents/signals/handlers.py @@ -596,6 +596,16 @@ def update_filename_and_move_files( root=settings.ARCHIVE_DIR, ) + # Keep version files in sync with root + if instance.root_document_id is None: + for version_doc in Document.objects.filter(root_document_id=instance.pk).only( + "pk", + ): + update_filename_and_move_files( + Document, + version_doc, + ) + @shared_task def process_cf_select_update(custom_field: CustomField) -> None: diff --git a/src/documents/tests/test_consumer.py b/src/documents/tests/test_consumer.py index 4007ff552..113d416bc 100644 --- a/src/documents/tests/test_consumer.py +++ b/src/documents/tests/test_consumer.py @@ -699,6 +699,13 @@ class TestConsumer( self.assertIsNotNone(root_doc) assert root_doc is not None + root_storage_path = StoragePath.objects.create( + name="version-root-path", + path="root/{{title}}", + ) + root_doc.storage_path = root_storage_path + root_doc.save() + actor = User.objects.create_user( username="actor", email="actor@example.com", @@ -810,6 +817,63 @@ class TestConsumer( self.assertEqual(version.original_filename, "valid_pdf_version-upload") self.assertTrue(bool(version.content)) + @override_settings(AUDIT_LOG_ENABLED=True) + @mock.patch("documents.consumer.load_classifier") + def test_consume_version_index_monotonic_after_version_deletion(self, m) -> None: + m.return_value = MagicMock() + + with self.get_consumer(self.get_test_file()) as consumer: + consumer.run() + + root_doc = Document.objects.first() + self.assertIsNotNone(root_doc) + assert root_doc is not None + + def consume_version(version_file: Path) -> Document: + status = DummyProgressManager(version_file.name, None) + overrides = DocumentMetadataOverrides() + doc = ConsumableDocument( + DocumentSource.ApiUpload, + original_file=version_file, + root_document_id=root_doc.pk, + ) + preflight = ConsumerPreflightPlugin( + doc, + overrides, + status, # type: ignore[arg-type] + self.dirs.scratch_dir, + "task-id", + ) + preflight.setup() + preflight.run() + + consumer = ConsumerPlugin( + doc, + overrides, + status, # type: ignore[arg-type] + self.dirs.scratch_dir, + "task-id", + ) + consumer.setup() + try: + consumer.run() + finally: + consumer.cleanup() + + version = ( + Document.objects.filter(root_document=root_doc).order_by("-id").first() + ) + assert version is not None + return version + + v1 = consume_version(self.get_test_file2()) + self.assertEqual(v1.version_index, 1) + v1.delete() + + # The next version should have version_index 2, even though version_index 1 was deleted + v2 = consume_version(self.get_test_file()) + self.assertEqual(v2.version_index, 2) + @mock.patch("documents.consumer.load_classifier") def testClassifyDocument(self, m) -> None: correspondent = Correspondent.objects.create( diff --git a/src/documents/tests/test_file_handling.py b/src/documents/tests/test_file_handling.py index 1e35b96bc..f3fbbfd76 100644 --- a/src/documents/tests/test_file_handling.py +++ b/src/documents/tests/test_file_handling.py @@ -77,6 +77,58 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase): settings.ORIGINALS_DIR / "test" / "test.pdf", ) + @override_settings(FILENAME_FORMAT=None) + def test_root_storage_path_change_updates_version_files(self) -> None: + old_storage_path = StoragePath.objects.create( + name="old-path", + path="old/{{title}}", + ) + new_storage_path = StoragePath.objects.create( + name="new-path", + path="new/{{title}}", + ) + + root_doc = Document.objects.create( + title="rootdoc", + mime_type="application/pdf", + checksum="root-checksum", + storage_path=old_storage_path, + ) + version_doc = Document.objects.create( + title="version-title", + mime_type="application/pdf", + checksum="version-checksum", + root_document=root_doc, + version_index=1, + ) + + Document.objects.filter(pk=root_doc.pk).update( + filename=generate_filename(root_doc), + ) + Document.objects.filter(pk=version_doc.pk).update( + filename=generate_filename(version_doc), + ) + root_doc.refresh_from_db() + version_doc.refresh_from_db() + + create_source_path_directory(root_doc.source_path) + Path(root_doc.source_path).touch() + create_source_path_directory(version_doc.source_path) + Path(version_doc.source_path).touch() + + root_doc.storage_path = new_storage_path + root_doc.save() + + root_doc.refresh_from_db() + version_doc.refresh_from_db() + + self.assertEqual(root_doc.filename, "new/rootdoc.pdf") + self.assertEqual(version_doc.filename, "new/rootdoc_v1.pdf") + self.assertIsFile(root_doc.source_path) + self.assertIsFile(version_doc.source_path) + self.assertIsNotFile(settings.ORIGINALS_DIR / "old" / "rootdoc.pdf") + self.assertIsNotFile(settings.ORIGINALS_DIR / "old" / "rootdoc_v1.pdf") + @override_settings(FILENAME_FORMAT="{correspondent}/{correspondent}") def test_file_renaming_missing_permissions(self) -> None: document = Document() @@ -1222,6 +1274,94 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase): Path("logs.pdf"), ) + @override_settings(FILENAME_FORMAT="{title}") + def test_version_index_suffix_for_template_filename(self) -> None: + root_doc = Document.objects.create( + title="the_doc", + mime_type="application/pdf", + checksum="root-checksum", + ) + version_doc = Document.objects.create( + title="the_doc", + mime_type="application/pdf", + checksum="version-checksum", + root_document=root_doc, + version_index=1, + ) + + self.assertEqual(generate_filename(version_doc), Path("the_doc_v1.pdf")) + self.assertEqual( + generate_filename(version_doc, counter=1), + Path("the_doc_v1_01.pdf"), + ) + + @override_settings(FILENAME_FORMAT=None) + def test_version_index_suffix_for_default_filename(self) -> None: + root_doc = Document.objects.create( + title="root", + mime_type="text/plain", + checksum="root-checksum", + ) + version_doc = Document.objects.create( + title="root", + mime_type="text/plain", + checksum="version-checksum", + root_document=root_doc, + version_index=2, + ) + + self.assertEqual( + generate_filename(version_doc), + Path(f"{root_doc.pk:07d}_v2.txt"), + ) + self.assertEqual( + generate_filename(version_doc, archive_filename=True), + Path(f"{root_doc.pk:07d}_v2.pdf"), + ) + + @override_settings(FILENAME_FORMAT="{original_name}") + def test_version_index_suffix_with_original_name_placeholder(self) -> None: + root_doc = Document.objects.create( + title="root", + mime_type="application/pdf", + checksum="root-checksum", + original_filename="root-upload.pdf", + ) + version_doc = Document.objects.create( + title="root", + mime_type="application/pdf", + checksum="version-checksum", + root_document=root_doc, + version_index=1, + original_filename="version-upload.pdf", + ) + + self.assertEqual(generate_filename(version_doc), Path("root-upload_v1.pdf")) + + def test_version_index_suffix_with_storage_path(self) -> None: + storage_path = StoragePath.objects.create( + name="vtest", + path="folder/{{title}}", + ) + root_doc = Document.objects.create( + title="storage_doc", + mime_type="application/pdf", + checksum="root-checksum", + storage_path=storage_path, + ) + version_doc = Document.objects.create( + title="version_title_should_not_be_used", + mime_type="application/pdf", + checksum="version-checksum", + root_document=root_doc, + version_index=3, + ) + + self.assertEqual( + generate_filename(version_doc), + Path("folder/storage_doc_v3.pdf"), + ) + @override_settings( FILENAME_FORMAT="XX{correspondent}/{title}", FILENAME_FORMAT_REMOVE_NONE=True,