Compare commits

...

4 Commits

9 changed files with 326 additions and 44 deletions

View File

@@ -262,6 +262,10 @@ your files differently, you can do that by adjusting the
or using [storage paths (see below)](#storage-paths). Paperless adds the
correct file extension e.g. `.pdf`, `.jpg` automatically.
When a document has file versions, each version uses the same naming rules and
storage path resolution as any other document file, with an added version suffix
such as `_v1`, `_v2`, etc.
This variable allows you to configure the filename (folders are allowed)
using placeholders. For example, configuring this to
@@ -353,6 +357,8 @@ If paperless detects that two documents share the same filename,
paperless will automatically append `_01`, `_02`, etc to the filename.
This happens if all the placeholders in a filename evaluate to the same
value.
For versioned files, this counter is appended after the version suffix
(for example `statement_v2_01.pdf`).
If there are any errors in the placeholders included in `PAPERLESS_FILENAME_FORMAT`,
paperless will fall back to using the default naming scheme instead.

View File

@@ -95,6 +95,7 @@ Think of versions as **file history** for a document.
- Versions track the underlying file and extracted text content (OCR/text).
- Metadata such as tags, correspondent, document type, storage path and custom fields stay on the "root" document.
- Version files follow normal filename formatting (including storage paths) and add a `_vN` suffix (for example `_v1`, `_v2`).
- By default, search and document content use the latest version.
- In document detail, selecting a version switches the preview, file metadata and content (and download etc buttons) to that version.
- Deleting a non-root version keeps metadata and falls back to the latest remaining version.

View File

@@ -11,6 +11,7 @@ import magic
from django.conf import settings
from django.contrib.auth.models import User
from django.db import transaction
from django.db.models import Max
from django.db.models import Q
from django.utils import timezone
from filelock import FileLock
@@ -123,22 +124,6 @@ class ConsumerPluginMixin:
self.filename = self.metadata.filename or self.input_doc.original_file.name
if input_doc.root_document_id:
self.log.debug(
f"Document root document id: {input_doc.root_document_id}",
)
root_document = Document.objects.get(pk=input_doc.root_document_id)
version_index = Document.objects.filter(root_document=root_document).count()
filename_path = Path(self.filename)
if filename_path.suffix:
self.filename = str(
filename_path.with_name(
f"{filename_path.stem}_v{version_index}{filename_path.suffix}",
),
)
else:
self.filename = f"{self.filename}_v{version_index}"
def _send_progress(
self,
current_progress: int,
@@ -184,7 +169,7 @@ class ConsumerPlugin(
):
logging_name = LOGGING_NAME
def _clone_root_into_version(
def _create_version_from_root(
self,
root_doc: Document,
*,
@@ -193,30 +178,38 @@ class ConsumerPlugin(
mime_type: str,
) -> Document:
self.log.debug("Saving record for updated version to database")
version_doc = Document.objects.get(pk=root_doc.pk)
setattr(version_doc, "pk", None)
version_doc.root_document = root_doc
root_doc_frozen = Document.objects.select_for_update().get(pk=root_doc.pk)
next_version_index = (
Document.global_objects.filter(
root_document_id=root_doc_frozen.pk,
).aggregate(
max_index=Max("version_index"),
)["max_index"]
or 0
)
file_for_checksum = (
self.unmodified_original
if self.unmodified_original is not None
else self.working_copy
)
version_doc.checksum = hashlib.md5(
file_for_checksum.read_bytes(),
).hexdigest()
version_doc.content = text or ""
version_doc.page_count = page_count
version_doc.mime_type = mime_type
version_doc.original_filename = self.filename
version_doc.storage_path = root_doc.storage_path
# Clear unique file path fields so they can be generated uniquely later
version_doc.filename = None
version_doc.archive_filename = None
version_doc.archive_checksum = None
version_doc = Document(
root_document=root_doc_frozen,
version_index=next_version_index + 1,
checksum=hashlib.md5(
file_for_checksum.read_bytes(),
).hexdigest(),
content=text or "",
page_count=page_count,
mime_type=mime_type,
original_filename=self.filename,
owner_id=root_doc_frozen.owner_id,
created=root_doc_frozen.created,
title=root_doc_frozen.title,
added=timezone.now(),
modified=timezone.now(),
)
if self.metadata.version_label is not None:
version_doc.version_label = self.metadata.version_label
version_doc.added = timezone.now()
version_doc.modified = timezone.now()
return version_doc
def run_pre_consume_script(self) -> None:
@@ -542,7 +535,7 @@ class ConsumerPlugin(
root_doc = Document.objects.get(
pk=self.input_doc.root_document_id,
)
original_document = self._clone_root_into_version(
original_document = self._create_version_from_root(
root_doc,
text=text,
page_count=page_count,

View File

@@ -128,11 +128,18 @@ def generate_filename(
counter=0,
archive_filename=False,
) -> Path:
# version docs use the root document for formatting, just with a suffix
context_doc = doc if doc.root_document_id is None else doc.root_document
version_suffix = (
f"_v{doc.version_index}"
if doc.root_document_id is not None and doc.version_index is not None
else ""
)
base_path: Path | None = None
# Determine the source of the format string
if doc.storage_path is not None:
filename_format = doc.storage_path.path
if context_doc.storage_path is not None:
filename_format = context_doc.storage_path.path
elif settings.FILENAME_FORMAT is not None:
# Maybe convert old to new style
filename_format = convert_format_str_to_template_format(
@@ -143,7 +150,7 @@ def generate_filename(
# If we have one, render it
if filename_format is not None:
rendered_path: str | None = format_filename(doc, filename_format)
rendered_path: str | None = format_filename(context_doc, filename_format)
if rendered_path:
base_path = Path(rendered_path)
@@ -157,7 +164,7 @@ def generate_filename(
base_filename = base_path.name
# Build the final filename with counter and filetype
final_filename = f"{base_filename}{counter_str}{filetype_str}"
final_filename = f"{base_filename}{version_suffix}{counter_str}{filetype_str}"
# If we have a directory component, include it
if str(directory) != ".":
@@ -166,7 +173,9 @@ def generate_filename(
full_path = Path(final_filename)
else:
# No template, use document ID
final_filename = f"{doc.pk:07}{counter_str}{filetype_str}"
final_filename = (
f"{context_doc.pk:07}{version_suffix}{counter_str}{filetype_str}"
)
full_path = Path(final_filename)
return full_path

View File

@@ -0,0 +1,37 @@
# Generated by Django 5.2.11 on 2026-03-02 17:48
from django.conf import settings
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "0013_document_root_document"),
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
]
operations = [
migrations.AddField(
model_name="document",
name="version_index",
field=models.PositiveIntegerField(
blank=True,
db_index=True,
help_text="Index of this version within the root document.",
null=True,
verbose_name="version index",
),
),
migrations.AddConstraint(
model_name="document",
constraint=models.UniqueConstraint(
condition=models.Q(
("root_document__isnull", False),
("version_index__isnull", False),
),
fields=("root_document", "version_index"),
name="documents_document_root_version_index_uniq",
),
),
]

View File

@@ -317,6 +317,14 @@ class Document(SoftDeleteModel, ModelWithOwner): # type: ignore[django-manager-
verbose_name=_("root document for this version"),
)
version_index = models.PositiveIntegerField(
_("version index"),
blank=True,
null=True,
db_index=True,
help_text=_("Index of this version within the root document."),
)
version_label = models.CharField(
_("version label"),
max_length=64,
@@ -329,6 +337,16 @@ class Document(SoftDeleteModel, ModelWithOwner): # type: ignore[django-manager-
ordering = ("-created",)
verbose_name = _("document")
verbose_name_plural = _("documents")
constraints = [
models.UniqueConstraint(
fields=["root_document", "version_index"],
condition=models.Q(
root_document__isnull=False,
version_index__isnull=False,
),
name="documents_document_root_version_index_uniq",
),
]
def __str__(self) -> str:
created = self.created.isoformat()

View File

@@ -596,6 +596,16 @@ def update_filename_and_move_files(
root=settings.ARCHIVE_DIR,
)
# Keep version files in sync with root
if instance.root_document_id is None:
for version_doc in Document.objects.filter(root_document_id=instance.pk).only(
"pk",
):
update_filename_and_move_files(
Document,
version_doc,
)
@shared_task
def process_cf_select_update(custom_field: CustomField) -> None:

View File

@@ -699,6 +699,14 @@ class TestConsumer(
self.assertIsNotNone(root_doc)
assert root_doc is not None
root_storage_path = StoragePath.objects.create(
name="version-root-path",
path="root/{{title}}",
)
root_doc.storage_path = root_storage_path
root_doc.archive_serial_number = 42
root_doc.save()
actor = User.objects.create_user(
username="actor",
email="actor@example.com",
@@ -735,7 +743,7 @@ class TestConsumer(
)
consumer.setup()
try:
self.assertTrue(consumer.filename.endswith("_v0.pdf"))
self.assertEqual(consumer.filename, version_file.name)
consumer.run()
finally:
consumer.cleanup()
@@ -745,8 +753,10 @@ class TestConsumer(
version = versions.first()
assert version is not None
assert version.original_filename is not None
self.assertEqual(version.version_index, 1)
self.assertEqual(version.version_label, "v2")
self.assertTrue(version.original_filename.endswith("_v0.pdf"))
self.assertIsNone(version.archive_serial_number)
self.assertEqual(version.original_filename, version_file.name)
self.assertTrue(bool(version.content))
@override_settings(AUDIT_LOG_ENABLED=True)
@@ -795,7 +805,7 @@ class TestConsumer(
)
consumer.setup()
try:
self.assertEqual(consumer.filename, "valid_pdf_version-upload_v0")
self.assertEqual(consumer.filename, "valid_pdf_version-upload")
consumer.run()
finally:
consumer.cleanup()
@@ -805,9 +815,67 @@ class TestConsumer(
)
self.assertIsNotNone(version)
assert version is not None
self.assertEqual(version.original_filename, "valid_pdf_version-upload_v0")
self.assertEqual(version.version_index, 1)
self.assertEqual(version.original_filename, "valid_pdf_version-upload")
self.assertTrue(bool(version.content))
@override_settings(AUDIT_LOG_ENABLED=True)
@mock.patch("documents.consumer.load_classifier")
def test_consume_version_index_monotonic_after_version_deletion(self, m) -> None:
m.return_value = MagicMock()
with self.get_consumer(self.get_test_file()) as consumer:
consumer.run()
root_doc = Document.objects.first()
self.assertIsNotNone(root_doc)
assert root_doc is not None
def consume_version(version_file: Path) -> Document:
status = DummyProgressManager(version_file.name, None)
overrides = DocumentMetadataOverrides()
doc = ConsumableDocument(
DocumentSource.ApiUpload,
original_file=version_file,
root_document_id=root_doc.pk,
)
preflight = ConsumerPreflightPlugin(
doc,
overrides,
status, # type: ignore[arg-type]
self.dirs.scratch_dir,
"task-id",
)
preflight.setup()
preflight.run()
consumer = ConsumerPlugin(
doc,
overrides,
status, # type: ignore[arg-type]
self.dirs.scratch_dir,
"task-id",
)
consumer.setup()
try:
consumer.run()
finally:
consumer.cleanup()
version = (
Document.objects.filter(root_document=root_doc).order_by("-id").first()
)
assert version is not None
return version
v1 = consume_version(self.get_test_file2())
self.assertEqual(v1.version_index, 1)
v1.delete()
# The next version should have version_index 2, even though version_index 1 was deleted
v2 = consume_version(self.get_test_file())
self.assertEqual(v2.version_index, 2)
@mock.patch("documents.consumer.load_classifier")
def testClassifyDocument(self, m) -> None:
correspondent = Correspondent.objects.create(

View File

@@ -77,6 +77,58 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
settings.ORIGINALS_DIR / "test" / "test.pdf",
)
@override_settings(FILENAME_FORMAT=None)
def test_root_storage_path_change_updates_version_files(self) -> None:
old_storage_path = StoragePath.objects.create(
name="old-path",
path="old/{{title}}",
)
new_storage_path = StoragePath.objects.create(
name="new-path",
path="new/{{title}}",
)
root_doc = Document.objects.create(
title="rootdoc",
mime_type="application/pdf",
checksum="root-checksum",
storage_path=old_storage_path,
)
version_doc = Document.objects.create(
title="version-title",
mime_type="application/pdf",
checksum="version-checksum",
root_document=root_doc,
version_index=1,
)
Document.objects.filter(pk=root_doc.pk).update(
filename=generate_filename(root_doc),
)
Document.objects.filter(pk=version_doc.pk).update(
filename=generate_filename(version_doc),
)
root_doc.refresh_from_db()
version_doc.refresh_from_db()
create_source_path_directory(root_doc.source_path)
Path(root_doc.source_path).touch()
create_source_path_directory(version_doc.source_path)
Path(version_doc.source_path).touch()
root_doc.storage_path = new_storage_path
root_doc.save()
root_doc.refresh_from_db()
version_doc.refresh_from_db()
self.assertEqual(root_doc.filename, "new/rootdoc.pdf")
self.assertEqual(version_doc.filename, "new/rootdoc_v1.pdf")
self.assertIsFile(root_doc.source_path)
self.assertIsFile(version_doc.source_path)
self.assertIsNotFile(settings.ORIGINALS_DIR / "old" / "rootdoc.pdf")
self.assertIsNotFile(settings.ORIGINALS_DIR / "old" / "rootdoc_v1.pdf")
@override_settings(FILENAME_FORMAT="{correspondent}/{correspondent}")
def test_file_renaming_missing_permissions(self) -> None:
document = Document()
@@ -1222,6 +1274,94 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
Path("logs.pdf"),
)
@override_settings(FILENAME_FORMAT="{title}")
def test_version_index_suffix_for_template_filename(self) -> None:
root_doc = Document.objects.create(
title="the_doc",
mime_type="application/pdf",
checksum="root-checksum",
)
version_doc = Document.objects.create(
title="the_doc",
mime_type="application/pdf",
checksum="version-checksum",
root_document=root_doc,
version_index=1,
)
self.assertEqual(generate_filename(version_doc), Path("the_doc_v1.pdf"))
self.assertEqual(
generate_filename(version_doc, counter=1),
Path("the_doc_v1_01.pdf"),
)
@override_settings(FILENAME_FORMAT=None)
def test_version_index_suffix_for_default_filename(self) -> None:
root_doc = Document.objects.create(
title="root",
mime_type="text/plain",
checksum="root-checksum",
)
version_doc = Document.objects.create(
title="root",
mime_type="text/plain",
checksum="version-checksum",
root_document=root_doc,
version_index=2,
)
self.assertEqual(
generate_filename(version_doc),
Path(f"{root_doc.pk:07d}_v2.txt"),
)
self.assertEqual(
generate_filename(version_doc, archive_filename=True),
Path(f"{root_doc.pk:07d}_v2.pdf"),
)
@override_settings(FILENAME_FORMAT="{original_name}")
def test_version_index_suffix_with_original_name_placeholder(self) -> None:
root_doc = Document.objects.create(
title="root",
mime_type="application/pdf",
checksum="root-checksum",
original_filename="root-upload.pdf",
)
version_doc = Document.objects.create(
title="root",
mime_type="application/pdf",
checksum="version-checksum",
root_document=root_doc,
version_index=1,
original_filename="version-upload.pdf",
)
self.assertEqual(generate_filename(version_doc), Path("root-upload_v1.pdf"))
def test_version_index_suffix_with_storage_path(self) -> None:
storage_path = StoragePath.objects.create(
name="vtest",
path="folder/{{title}}",
)
root_doc = Document.objects.create(
title="storage_doc",
mime_type="application/pdf",
checksum="root-checksum",
storage_path=storage_path,
)
version_doc = Document.objects.create(
title="version_title_should_not_be_used",
mime_type="application/pdf",
checksum="version-checksum",
root_document=root_doc,
version_index=3,
)
self.assertEqual(
generate_filename(version_doc),
Path("folder/storage_doc_v3.pdf"),
)
@override_settings(
FILENAME_FORMAT="XX{correspondent}/{title}",
FILENAME_FORMAT_REMOVE_NONE=True,