Compare commits

...

3 Commits

Author SHA1 Message Date
shamoon
559f824b85 Great, now use latest_content for reading/filtering 2026-03-31 12:17:22 -07:00
shamoon
c994841a75 Add sync_root_latest_content helper to always update the root 2026-03-31 12:09:31 -07:00
shamoon
2ea83374ed Add latest_content field 2026-03-31 11:52:13 -07:00
10 changed files with 101 additions and 29 deletions

View File

@@ -50,6 +50,7 @@ from documents.utils import compute_checksum
from documents.utils import copy_basic_file_stats
from documents.utils import copy_file_with_basic_stats
from documents.utils import run_subprocess
from documents.versioning import sync_root_latest_content
from paperless.parsers import ParserContext
from paperless.parsers import ParserProtocol
from paperless.parsers.registry import get_parser_registry
@@ -538,6 +539,8 @@ class ConsumerPlugin(
else:
original_document.save()
sync_root_latest_content(root_doc)
# Create a log entry for the version addition, if enabled
if settings.AUDIT_LOG_ENABLED:
from auditlog.models import ( # type: ignore[import-untyped]

View File

@@ -0,0 +1,23 @@
# Generated by Django 5.2.12 on 2026-03-31 18:52
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "0016_sha256_checksums"),
]
operations = [
migrations.AddField(
model_name="document",
name="latest_content",
field=models.TextField(
blank=True,
help_text="Materialized effective content for root documents. Uses the latest version content when available.",
null=True,
verbose_name="latest content",
),
),
]

View File

@@ -196,6 +196,16 @@ class Document(SoftDeleteModel, ModelWithOwner): # type: ignore[django-manager-
),
)
latest_content = models.TextField(
_("latest content"),
blank=True,
null=True,
help_text=_(
"Materialized effective content for root documents. "
"Uses the latest version content when available.",
),
)
content_length = models.GeneratedField(
expression=Length("content"),
output_field=PositiveIntegerField(default=0),
@@ -375,27 +385,7 @@ class Document(SoftDeleteModel, ModelWithOwner): # type: ignore[django-manager-
if self.root_document_id is not None or self.pk is None:
return self.content
prefetched_cache = getattr(self, "_prefetched_objects_cache", None)
prefetched_versions = (
prefetched_cache.get("versions")
if isinstance(prefetched_cache, dict)
else None
)
if prefetched_versions:
latest_prefetched = max(prefetched_versions, key=lambda doc: doc.id)
return latest_prefetched.content
latest_version_content = (
Document.objects.filter(root_document=self)
.order_by("-id")
.values_list("content", flat=True)
.first()
)
return (
latest_version_content
if latest_version_content is not None
else self.content
)
return self.latest_content if self.latest_content is not None else self.content
@property
def suggestion_content(self):

View File

@@ -136,6 +136,8 @@ class TestDocumentVersioningApi(DirectoriesMixin, APITestCase):
root_document=root,
content="v2-content",
)
root.latest_content = v2.content
root.save(update_fields=["latest_content"])
with (
mock.patch("documents.index.remove_document_from_index"),
@@ -148,6 +150,7 @@ class TestDocumentVersioningApi(DirectoriesMixin, APITestCase):
self.assertEqual(resp.data["current_version_id"], v1.id)
root.refresh_from_db()
self.assertEqual(root.content, "root-content")
self.assertEqual(root.latest_content, "v1-content")
with (
mock.patch("documents.index.remove_document_from_index"),
@@ -160,6 +163,7 @@ class TestDocumentVersioningApi(DirectoriesMixin, APITestCase):
self.assertEqual(resp.data["current_version_id"], root.id)
root.refresh_from_db()
self.assertEqual(root.content, "root-content")
self.assertIsNone(root.latest_content)
def test_delete_version_writes_audit_log_entry(self) -> None:
root = Document.objects.create(
@@ -695,6 +699,7 @@ class TestDocumentVersioningApi(DirectoriesMixin, APITestCase):
v2.refresh_from_db()
self.assertEqual(v2.content, "edited-content")
self.assertEqual(root.content, "root-content")
self.assertEqual(root.latest_content, "edited-content")
self.assertEqual(v1.content, "v1-content")
def test_patch_content_updates_selected_version_content(self) -> None:
@@ -718,6 +723,8 @@ class TestDocumentVersioningApi(DirectoriesMixin, APITestCase):
root_document=root,
content="v2-content",
)
root.latest_content = v2.content
root.save(update_fields=["latest_content"])
resp = self.client.patch(
f"/api/documents/{root.id}/?version={v1.id}",
@@ -733,6 +740,28 @@ class TestDocumentVersioningApi(DirectoriesMixin, APITestCase):
self.assertEqual(v1.content, "edited-v1")
self.assertEqual(v2.content, "v2-content")
self.assertEqual(root.content, "root-content")
self.assertEqual(root.latest_content, "v2-content")
def test_patch_root_content_without_versions_keeps_latest_content_null(
self,
) -> None:
root = Document.objects.create(
title="root",
checksum="root",
mime_type="application/pdf",
content="root-content",
)
resp = self.client.patch(
f"/api/documents/{root.id}/",
{"content": "edited-root"},
format="json",
)
self.assertEqual(resp.status_code, status.HTTP_200_OK)
root.refresh_from_db()
self.assertEqual(root.content, "edited-root")
self.assertIsNone(root.latest_content)
def test_retrieve_returns_latest_version_content(self) -> None:
root = Document.objects.create(
@@ -748,6 +777,8 @@ class TestDocumentVersioningApi(DirectoriesMixin, APITestCase):
root_document=root,
content="v1-content",
)
root.latest_content = "v1-content"
root.save(update_fields=["latest_content"])
resp = self.client.get(f"/api/documents/{root.id}/")
@@ -768,6 +799,8 @@ class TestDocumentVersioningApi(DirectoriesMixin, APITestCase):
root_document=root,
content="v1-content",
)
root.latest_content = v1.content
root.save(update_fields=["latest_content"])
resp = self.client.get(f"/api/documents/{root.id}/?version={v1.id}")

View File

@@ -1356,6 +1356,8 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
root_document=root,
content="latest-version-content",
)
root.latest_content = version.content
root.save(update_fields=["latest_content"])
response = self.client.get(
"/api/documents/?content__icontains=latest-version-content",

View File

@@ -785,11 +785,13 @@ class TestConsumer(
version = versions.first()
assert version is not None
assert version.original_filename is not None
root_doc.refresh_from_db()
self.assertEqual(version.version_index, 1)
self.assertEqual(version.version_label, "v2")
self.assertIsNone(version.archive_serial_number)
self.assertEqual(version.original_filename, version_file.name)
self.assertTrue(bool(version.content))
self.assertEqual(root_doc.latest_content, version.content)
@override_settings(AUDIT_LOG_ENABLED=True)
@mock.patch("documents.consumer.load_classifier")

View File

@@ -172,6 +172,8 @@ class TestDocument(TestCase):
root_document=root,
content="latest version content",
)
root.latest_content = version.content
root.save(update_fields=["latest_content"])
self.assertEqual(root.suggestion_content, version.content)

View File

@@ -62,6 +62,8 @@ class TestMatching(_TestMatchingBase):
root_document=root,
content="latest version contains keyword",
)
root.latest_content = "latest version contains keyword"
root.save(update_fields=["latest_content"])
tag = Tag.objects.create(
name="tag",
match="keyword",
@@ -86,6 +88,8 @@ class TestMatching(_TestMatchingBase):
root_document=root,
content="latest version without token",
)
root.latest_content = "latest version without token"
root.save(update_fields=["latest_content"])
tag = Tag.objects.create(
name="tag",
match="keyword",

View File

@@ -55,6 +55,23 @@ def get_latest_version_for_root(
return latest or root_doc
def sync_root_latest_content(
root_doc: Document,
*,
include_deleted: bool = False,
) -> None:
manager = _document_manager(include_deleted=include_deleted)
latest_version_content = (
manager.filter(root_document_id=root_doc.pk)
.order_by("-id")
.values_list("content", flat=True)
.first()
)
Document.objects.filter(pk=root_doc.pk).update(
latest_content=latest_version_content,
)
def resolve_requested_version_for_root(
root_doc: Document,
request: Any,

View File

@@ -35,10 +35,8 @@ from django.db.models import F
from django.db.models import IntegerField
from django.db.models import Max
from django.db.models import Model
from django.db.models import OuterRef
from django.db.models import Prefetch
from django.db.models import Q
from django.db.models import Subquery
from django.db.models import Sum
from django.db.models import When
from django.db.models.functions import Coalesce
@@ -220,6 +218,7 @@ from documents.versioning import get_latest_version_for_root
from documents.versioning import get_request_version_param
from documents.versioning import get_root_document
from documents.versioning import resolve_requested_version_for_root
from documents.versioning import sync_root_latest_content
from paperless import version
from paperless.celery import app as celery_app
from paperless.config import AIConfig
@@ -894,16 +893,11 @@ class DocumentViewSet(
}
def get_queryset(self):
latest_version_content = Subquery(
Document.objects.filter(root_document=OuterRef("pk"))
.order_by("-id")
.values("content")[:1],
)
return (
Document.objects.filter(root_document__isnull=True)
.distinct()
.order_by("-created")
.annotate(effective_content=Coalesce(latest_version_content, F("content")))
.annotate(effective_content=Coalesce(F("latest_content"), F("content")))
.annotate(num_notes=Count("notes"))
.select_related("correspondent", "storage_path", "document_type", "owner")
.prefetch_related(
@@ -1022,6 +1016,7 @@ class DocumentViewSet(
str(updated_content) if updated_content is not None else ""
)
content_doc.save(update_fields=["content", "modified"])
sync_root_latest_content(root_doc)
refreshed_doc = self.get_queryset().get(pk=root_doc.pk)
response_data = self.get_serializer(refreshed_doc).data
@@ -1825,6 +1820,7 @@ class DocumentViewSet(
index.remove_document_from_index(version_doc)
version_doc_id = version_doc.id
version_doc.delete()
sync_root_latest_content(root_doc)
index.add_or_update_document(root_doc)
if settings.AUDIT_LOG_ENABLED:
actor = (