Compare commits

...

3 Commits

Author SHA1 Message Date
shamoon
559f824b85 Great, now use latest_content for reading/filtering 2026-03-31 12:17:22 -07:00
shamoon
c994841a75 Add sync_root_latest_content helper to always update the root 2026-03-31 12:09:31 -07:00
shamoon
2ea83374ed Add latest_content field 2026-03-31 11:52:13 -07:00
10 changed files with 101 additions and 29 deletions

View File

@@ -50,6 +50,7 @@ from documents.utils import compute_checksum
from documents.utils import copy_basic_file_stats from documents.utils import copy_basic_file_stats
from documents.utils import copy_file_with_basic_stats from documents.utils import copy_file_with_basic_stats
from documents.utils import run_subprocess from documents.utils import run_subprocess
from documents.versioning import sync_root_latest_content
from paperless.parsers import ParserContext from paperless.parsers import ParserContext
from paperless.parsers import ParserProtocol from paperless.parsers import ParserProtocol
from paperless.parsers.registry import get_parser_registry from paperless.parsers.registry import get_parser_registry
@@ -538,6 +539,8 @@ class ConsumerPlugin(
else: else:
original_document.save() original_document.save()
sync_root_latest_content(root_doc)
# Create a log entry for the version addition, if enabled # Create a log entry for the version addition, if enabled
if settings.AUDIT_LOG_ENABLED: if settings.AUDIT_LOG_ENABLED:
from auditlog.models import ( # type: ignore[import-untyped] from auditlog.models import ( # type: ignore[import-untyped]

View File

@@ -0,0 +1,23 @@
# Generated by Django 5.2.12 on 2026-03-31 18:52
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "0016_sha256_checksums"),
]
operations = [
migrations.AddField(
model_name="document",
name="latest_content",
field=models.TextField(
blank=True,
help_text="Materialized effective content for root documents. Uses the latest version content when available.",
null=True,
verbose_name="latest content",
),
),
]

View File

@@ -196,6 +196,16 @@ class Document(SoftDeleteModel, ModelWithOwner): # type: ignore[django-manager-
), ),
) )
latest_content = models.TextField(
_("latest content"),
blank=True,
null=True,
help_text=_(
"Materialized effective content for root documents. "
"Uses the latest version content when available.",
),
)
content_length = models.GeneratedField( content_length = models.GeneratedField(
expression=Length("content"), expression=Length("content"),
output_field=PositiveIntegerField(default=0), output_field=PositiveIntegerField(default=0),
@@ -375,27 +385,7 @@ class Document(SoftDeleteModel, ModelWithOwner): # type: ignore[django-manager-
if self.root_document_id is not None or self.pk is None: if self.root_document_id is not None or self.pk is None:
return self.content return self.content
prefetched_cache = getattr(self, "_prefetched_objects_cache", None) return self.latest_content if self.latest_content is not None else self.content
prefetched_versions = (
prefetched_cache.get("versions")
if isinstance(prefetched_cache, dict)
else None
)
if prefetched_versions:
latest_prefetched = max(prefetched_versions, key=lambda doc: doc.id)
return latest_prefetched.content
latest_version_content = (
Document.objects.filter(root_document=self)
.order_by("-id")
.values_list("content", flat=True)
.first()
)
return (
latest_version_content
if latest_version_content is not None
else self.content
)
@property @property
def suggestion_content(self): def suggestion_content(self):

View File

@@ -136,6 +136,8 @@ class TestDocumentVersioningApi(DirectoriesMixin, APITestCase):
root_document=root, root_document=root,
content="v2-content", content="v2-content",
) )
root.latest_content = v2.content
root.save(update_fields=["latest_content"])
with ( with (
mock.patch("documents.index.remove_document_from_index"), mock.patch("documents.index.remove_document_from_index"),
@@ -148,6 +150,7 @@ class TestDocumentVersioningApi(DirectoriesMixin, APITestCase):
self.assertEqual(resp.data["current_version_id"], v1.id) self.assertEqual(resp.data["current_version_id"], v1.id)
root.refresh_from_db() root.refresh_from_db()
self.assertEqual(root.content, "root-content") self.assertEqual(root.content, "root-content")
self.assertEqual(root.latest_content, "v1-content")
with ( with (
mock.patch("documents.index.remove_document_from_index"), mock.patch("documents.index.remove_document_from_index"),
@@ -160,6 +163,7 @@ class TestDocumentVersioningApi(DirectoriesMixin, APITestCase):
self.assertEqual(resp.data["current_version_id"], root.id) self.assertEqual(resp.data["current_version_id"], root.id)
root.refresh_from_db() root.refresh_from_db()
self.assertEqual(root.content, "root-content") self.assertEqual(root.content, "root-content")
self.assertIsNone(root.latest_content)
def test_delete_version_writes_audit_log_entry(self) -> None: def test_delete_version_writes_audit_log_entry(self) -> None:
root = Document.objects.create( root = Document.objects.create(
@@ -695,6 +699,7 @@ class TestDocumentVersioningApi(DirectoriesMixin, APITestCase):
v2.refresh_from_db() v2.refresh_from_db()
self.assertEqual(v2.content, "edited-content") self.assertEqual(v2.content, "edited-content")
self.assertEqual(root.content, "root-content") self.assertEqual(root.content, "root-content")
self.assertEqual(root.latest_content, "edited-content")
self.assertEqual(v1.content, "v1-content") self.assertEqual(v1.content, "v1-content")
def test_patch_content_updates_selected_version_content(self) -> None: def test_patch_content_updates_selected_version_content(self) -> None:
@@ -718,6 +723,8 @@ class TestDocumentVersioningApi(DirectoriesMixin, APITestCase):
root_document=root, root_document=root,
content="v2-content", content="v2-content",
) )
root.latest_content = v2.content
root.save(update_fields=["latest_content"])
resp = self.client.patch( resp = self.client.patch(
f"/api/documents/{root.id}/?version={v1.id}", f"/api/documents/{root.id}/?version={v1.id}",
@@ -733,6 +740,28 @@ class TestDocumentVersioningApi(DirectoriesMixin, APITestCase):
self.assertEqual(v1.content, "edited-v1") self.assertEqual(v1.content, "edited-v1")
self.assertEqual(v2.content, "v2-content") self.assertEqual(v2.content, "v2-content")
self.assertEqual(root.content, "root-content") self.assertEqual(root.content, "root-content")
self.assertEqual(root.latest_content, "v2-content")
def test_patch_root_content_without_versions_keeps_latest_content_null(
self,
) -> None:
root = Document.objects.create(
title="root",
checksum="root",
mime_type="application/pdf",
content="root-content",
)
resp = self.client.patch(
f"/api/documents/{root.id}/",
{"content": "edited-root"},
format="json",
)
self.assertEqual(resp.status_code, status.HTTP_200_OK)
root.refresh_from_db()
self.assertEqual(root.content, "edited-root")
self.assertIsNone(root.latest_content)
def test_retrieve_returns_latest_version_content(self) -> None: def test_retrieve_returns_latest_version_content(self) -> None:
root = Document.objects.create( root = Document.objects.create(
@@ -748,6 +777,8 @@ class TestDocumentVersioningApi(DirectoriesMixin, APITestCase):
root_document=root, root_document=root,
content="v1-content", content="v1-content",
) )
root.latest_content = "v1-content"
root.save(update_fields=["latest_content"])
resp = self.client.get(f"/api/documents/{root.id}/") resp = self.client.get(f"/api/documents/{root.id}/")
@@ -768,6 +799,8 @@ class TestDocumentVersioningApi(DirectoriesMixin, APITestCase):
root_document=root, root_document=root,
content="v1-content", content="v1-content",
) )
root.latest_content = v1.content
root.save(update_fields=["latest_content"])
resp = self.client.get(f"/api/documents/{root.id}/?version={v1.id}") resp = self.client.get(f"/api/documents/{root.id}/?version={v1.id}")

View File

@@ -1356,6 +1356,8 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
root_document=root, root_document=root,
content="latest-version-content", content="latest-version-content",
) )
root.latest_content = version.content
root.save(update_fields=["latest_content"])
response = self.client.get( response = self.client.get(
"/api/documents/?content__icontains=latest-version-content", "/api/documents/?content__icontains=latest-version-content",

View File

@@ -785,11 +785,13 @@ class TestConsumer(
version = versions.first() version = versions.first()
assert version is not None assert version is not None
assert version.original_filename is not None assert version.original_filename is not None
root_doc.refresh_from_db()
self.assertEqual(version.version_index, 1) self.assertEqual(version.version_index, 1)
self.assertEqual(version.version_label, "v2") self.assertEqual(version.version_label, "v2")
self.assertIsNone(version.archive_serial_number) self.assertIsNone(version.archive_serial_number)
self.assertEqual(version.original_filename, version_file.name) self.assertEqual(version.original_filename, version_file.name)
self.assertTrue(bool(version.content)) self.assertTrue(bool(version.content))
self.assertEqual(root_doc.latest_content, version.content)
@override_settings(AUDIT_LOG_ENABLED=True) @override_settings(AUDIT_LOG_ENABLED=True)
@mock.patch("documents.consumer.load_classifier") @mock.patch("documents.consumer.load_classifier")

View File

@@ -172,6 +172,8 @@ class TestDocument(TestCase):
root_document=root, root_document=root,
content="latest version content", content="latest version content",
) )
root.latest_content = version.content
root.save(update_fields=["latest_content"])
self.assertEqual(root.suggestion_content, version.content) self.assertEqual(root.suggestion_content, version.content)

View File

@@ -62,6 +62,8 @@ class TestMatching(_TestMatchingBase):
root_document=root, root_document=root,
content="latest version contains keyword", content="latest version contains keyword",
) )
root.latest_content = "latest version contains keyword"
root.save(update_fields=["latest_content"])
tag = Tag.objects.create( tag = Tag.objects.create(
name="tag", name="tag",
match="keyword", match="keyword",
@@ -86,6 +88,8 @@ class TestMatching(_TestMatchingBase):
root_document=root, root_document=root,
content="latest version without token", content="latest version without token",
) )
root.latest_content = "latest version without token"
root.save(update_fields=["latest_content"])
tag = Tag.objects.create( tag = Tag.objects.create(
name="tag", name="tag",
match="keyword", match="keyword",

View File

@@ -55,6 +55,23 @@ def get_latest_version_for_root(
return latest or root_doc return latest or root_doc
def sync_root_latest_content(
root_doc: Document,
*,
include_deleted: bool = False,
) -> None:
manager = _document_manager(include_deleted=include_deleted)
latest_version_content = (
manager.filter(root_document_id=root_doc.pk)
.order_by("-id")
.values_list("content", flat=True)
.first()
)
Document.objects.filter(pk=root_doc.pk).update(
latest_content=latest_version_content,
)
def resolve_requested_version_for_root( def resolve_requested_version_for_root(
root_doc: Document, root_doc: Document,
request: Any, request: Any,

View File

@@ -35,10 +35,8 @@ from django.db.models import F
from django.db.models import IntegerField from django.db.models import IntegerField
from django.db.models import Max from django.db.models import Max
from django.db.models import Model from django.db.models import Model
from django.db.models import OuterRef
from django.db.models import Prefetch from django.db.models import Prefetch
from django.db.models import Q from django.db.models import Q
from django.db.models import Subquery
from django.db.models import Sum from django.db.models import Sum
from django.db.models import When from django.db.models import When
from django.db.models.functions import Coalesce from django.db.models.functions import Coalesce
@@ -220,6 +218,7 @@ from documents.versioning import get_latest_version_for_root
from documents.versioning import get_request_version_param from documents.versioning import get_request_version_param
from documents.versioning import get_root_document from documents.versioning import get_root_document
from documents.versioning import resolve_requested_version_for_root from documents.versioning import resolve_requested_version_for_root
from documents.versioning import sync_root_latest_content
from paperless import version from paperless import version
from paperless.celery import app as celery_app from paperless.celery import app as celery_app
from paperless.config import AIConfig from paperless.config import AIConfig
@@ -894,16 +893,11 @@ class DocumentViewSet(
} }
def get_queryset(self): def get_queryset(self):
latest_version_content = Subquery(
Document.objects.filter(root_document=OuterRef("pk"))
.order_by("-id")
.values("content")[:1],
)
return ( return (
Document.objects.filter(root_document__isnull=True) Document.objects.filter(root_document__isnull=True)
.distinct() .distinct()
.order_by("-created") .order_by("-created")
.annotate(effective_content=Coalesce(latest_version_content, F("content"))) .annotate(effective_content=Coalesce(F("latest_content"), F("content")))
.annotate(num_notes=Count("notes")) .annotate(num_notes=Count("notes"))
.select_related("correspondent", "storage_path", "document_type", "owner") .select_related("correspondent", "storage_path", "document_type", "owner")
.prefetch_related( .prefetch_related(
@@ -1022,6 +1016,7 @@ class DocumentViewSet(
str(updated_content) if updated_content is not None else "" str(updated_content) if updated_content is not None else ""
) )
content_doc.save(update_fields=["content", "modified"]) content_doc.save(update_fields=["content", "modified"])
sync_root_latest_content(root_doc)
refreshed_doc = self.get_queryset().get(pk=root_doc.pk) refreshed_doc = self.get_queryset().get(pk=root_doc.pk)
response_data = self.get_serializer(refreshed_doc).data response_data = self.get_serializer(refreshed_doc).data
@@ -1825,6 +1820,7 @@ class DocumentViewSet(
index.remove_document_from_index(version_doc) index.remove_document_from_index(version_doc)
version_doc_id = version_doc.id version_doc_id = version_doc.id
version_doc.delete() version_doc.delete()
sync_root_latest_content(root_doc)
index.add_or_update_document(root_doc) index.add_or_update_document(root_doc)
if settings.AUDIT_LOG_ENABLED: if settings.AUDIT_LOG_ENABLED:
actor = ( actor = (