Compare commits

..

1 Commits

Author SHA1 Message Date
dependabot[bot]
6ff3065fbe Chore(deps): Bump nltk from 3.9.3 to 3.9.4 in the data-nlp-search group
Bumps the data-nlp-search group with 1 update: [nltk](https://github.com/nltk/nltk).


Updates `nltk` from 3.9.3 to 3.9.4
- [Changelog](https://github.com/nltk/nltk/blob/develop/ChangeLog)
- [Commits](https://github.com/nltk/nltk/compare/3.9.3...3.9.4)

---
updated-dependencies:
- dependency-name: nltk
  dependency-version: 3.9.4
  dependency-type: direct:production
  update-type: version-update:semver-patch
  dependency-group: data-nlp-search
...

Signed-off-by: dependabot[bot] <support@github.com>
2026-04-02 00:58:24 +00:00
11 changed files with 32 additions and 104 deletions

View File

@@ -50,7 +50,6 @@ from documents.utils import compute_checksum
from documents.utils import copy_basic_file_stats
from documents.utils import copy_file_with_basic_stats
from documents.utils import run_subprocess
from documents.versioning import sync_root_latest_content
from paperless.parsers import ParserContext
from paperless.parsers import ParserProtocol
from paperless.parsers.registry import get_parser_registry
@@ -539,8 +538,6 @@ class ConsumerPlugin(
else:
original_document.save()
sync_root_latest_content(root_doc)
# Create a log entry for the version addition, if enabled
if settings.AUDIT_LOG_ENABLED:
from auditlog.models import ( # type: ignore[import-untyped]

View File

@@ -1,23 +0,0 @@
# Generated by Django 5.2.12 on 2026-03-31 18:52
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "0016_sha256_checksums"),
]
operations = [
migrations.AddField(
model_name="document",
name="latest_content",
field=models.TextField(
blank=True,
help_text="Materialized effective content for root documents. Uses the latest version content when available.",
null=True,
verbose_name="latest content",
),
),
]

View File

@@ -196,16 +196,6 @@ class Document(SoftDeleteModel, ModelWithOwner): # type: ignore[django-manager-
),
)
latest_content = models.TextField(
_("latest content"),
blank=True,
null=True,
help_text=_(
"Materialized effective content for root documents. "
"Uses the latest version content when available.",
),
)
content_length = models.GeneratedField(
expression=Length("content"),
output_field=PositiveIntegerField(default=0),
@@ -385,7 +375,27 @@ class Document(SoftDeleteModel, ModelWithOwner): # type: ignore[django-manager-
if self.root_document_id is not None or self.pk is None:
return self.content
return self.latest_content if self.latest_content is not None else self.content
prefetched_cache = getattr(self, "_prefetched_objects_cache", None)
prefetched_versions = (
prefetched_cache.get("versions")
if isinstance(prefetched_cache, dict)
else None
)
if prefetched_versions:
latest_prefetched = max(prefetched_versions, key=lambda doc: doc.id)
return latest_prefetched.content
latest_version_content = (
Document.objects.filter(root_document=self)
.order_by("-id")
.values_list("content", flat=True)
.first()
)
return (
latest_version_content
if latest_version_content is not None
else self.content
)
@property
def suggestion_content(self):

View File

@@ -136,8 +136,6 @@ class TestDocumentVersioningApi(DirectoriesMixin, APITestCase):
root_document=root,
content="v2-content",
)
root.latest_content = v2.content
root.save(update_fields=["latest_content"])
with (
mock.patch("documents.index.remove_document_from_index"),
@@ -150,7 +148,6 @@ class TestDocumentVersioningApi(DirectoriesMixin, APITestCase):
self.assertEqual(resp.data["current_version_id"], v1.id)
root.refresh_from_db()
self.assertEqual(root.content, "root-content")
self.assertEqual(root.latest_content, "v1-content")
with (
mock.patch("documents.index.remove_document_from_index"),
@@ -163,7 +160,6 @@ class TestDocumentVersioningApi(DirectoriesMixin, APITestCase):
self.assertEqual(resp.data["current_version_id"], root.id)
root.refresh_from_db()
self.assertEqual(root.content, "root-content")
self.assertIsNone(root.latest_content)
def test_delete_version_writes_audit_log_entry(self) -> None:
root = Document.objects.create(
@@ -699,7 +695,6 @@ class TestDocumentVersioningApi(DirectoriesMixin, APITestCase):
v2.refresh_from_db()
self.assertEqual(v2.content, "edited-content")
self.assertEqual(root.content, "root-content")
self.assertEqual(root.latest_content, "edited-content")
self.assertEqual(v1.content, "v1-content")
def test_patch_content_updates_selected_version_content(self) -> None:
@@ -723,8 +718,6 @@ class TestDocumentVersioningApi(DirectoriesMixin, APITestCase):
root_document=root,
content="v2-content",
)
root.latest_content = v2.content
root.save(update_fields=["latest_content"])
resp = self.client.patch(
f"/api/documents/{root.id}/?version={v1.id}",
@@ -740,28 +733,6 @@ class TestDocumentVersioningApi(DirectoriesMixin, APITestCase):
self.assertEqual(v1.content, "edited-v1")
self.assertEqual(v2.content, "v2-content")
self.assertEqual(root.content, "root-content")
self.assertEqual(root.latest_content, "v2-content")
def test_patch_root_content_without_versions_keeps_latest_content_null(
self,
) -> None:
root = Document.objects.create(
title="root",
checksum="root",
mime_type="application/pdf",
content="root-content",
)
resp = self.client.patch(
f"/api/documents/{root.id}/",
{"content": "edited-root"},
format="json",
)
self.assertEqual(resp.status_code, status.HTTP_200_OK)
root.refresh_from_db()
self.assertEqual(root.content, "edited-root")
self.assertIsNone(root.latest_content)
def test_retrieve_returns_latest_version_content(self) -> None:
root = Document.objects.create(
@@ -777,8 +748,6 @@ class TestDocumentVersioningApi(DirectoriesMixin, APITestCase):
root_document=root,
content="v1-content",
)
root.latest_content = "v1-content"
root.save(update_fields=["latest_content"])
resp = self.client.get(f"/api/documents/{root.id}/")
@@ -799,8 +768,6 @@ class TestDocumentVersioningApi(DirectoriesMixin, APITestCase):
root_document=root,
content="v1-content",
)
root.latest_content = v1.content
root.save(update_fields=["latest_content"])
resp = self.client.get(f"/api/documents/{root.id}/?version={v1.id}")

View File

@@ -1356,8 +1356,6 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
root_document=root,
content="latest-version-content",
)
root.latest_content = version.content
root.save(update_fields=["latest_content"])
response = self.client.get(
"/api/documents/?content__icontains=latest-version-content",

View File

@@ -785,13 +785,11 @@ class TestConsumer(
version = versions.first()
assert version is not None
assert version.original_filename is not None
root_doc.refresh_from_db()
self.assertEqual(version.version_index, 1)
self.assertEqual(version.version_label, "v2")
self.assertIsNone(version.archive_serial_number)
self.assertEqual(version.original_filename, version_file.name)
self.assertTrue(bool(version.content))
self.assertEqual(root_doc.latest_content, version.content)
@override_settings(AUDIT_LOG_ENABLED=True)
@mock.patch("documents.consumer.load_classifier")

View File

@@ -172,8 +172,6 @@ class TestDocument(TestCase):
root_document=root,
content="latest version content",
)
root.latest_content = version.content
root.save(update_fields=["latest_content"])
self.assertEqual(root.suggestion_content, version.content)

View File

@@ -62,8 +62,6 @@ class TestMatching(_TestMatchingBase):
root_document=root,
content="latest version contains keyword",
)
root.latest_content = "latest version contains keyword"
root.save(update_fields=["latest_content"])
tag = Tag.objects.create(
name="tag",
match="keyword",
@@ -88,8 +86,6 @@ class TestMatching(_TestMatchingBase):
root_document=root,
content="latest version without token",
)
root.latest_content = "latest version without token"
root.save(update_fields=["latest_content"])
tag = Tag.objects.create(
name="tag",
match="keyword",

View File

@@ -55,23 +55,6 @@ def get_latest_version_for_root(
return latest or root_doc
def sync_root_latest_content(
root_doc: Document,
*,
include_deleted: bool = False,
) -> None:
manager = _document_manager(include_deleted=include_deleted)
latest_version_content = (
manager.filter(root_document_id=root_doc.pk)
.order_by("-id")
.values_list("content", flat=True)
.first()
)
Document.objects.filter(pk=root_doc.pk).update(
latest_content=latest_version_content,
)
def resolve_requested_version_for_root(
root_doc: Document,
request: Any,

View File

@@ -35,8 +35,10 @@ from django.db.models import F
from django.db.models import IntegerField
from django.db.models import Max
from django.db.models import Model
from django.db.models import OuterRef
from django.db.models import Prefetch
from django.db.models import Q
from django.db.models import Subquery
from django.db.models import Sum
from django.db.models import When
from django.db.models.functions import Coalesce
@@ -218,7 +220,6 @@ from documents.versioning import get_latest_version_for_root
from documents.versioning import get_request_version_param
from documents.versioning import get_root_document
from documents.versioning import resolve_requested_version_for_root
from documents.versioning import sync_root_latest_content
from paperless import version
from paperless.celery import app as celery_app
from paperless.config import AIConfig
@@ -893,11 +894,16 @@ class DocumentViewSet(
}
def get_queryset(self):
latest_version_content = Subquery(
Document.objects.filter(root_document=OuterRef("pk"))
.order_by("-id")
.values("content")[:1],
)
return (
Document.objects.filter(root_document__isnull=True)
.distinct()
.order_by("-created")
.annotate(effective_content=Coalesce(F("latest_content"), F("content")))
.annotate(effective_content=Coalesce(latest_version_content, F("content")))
.annotate(num_notes=Count("notes"))
.select_related("correspondent", "storage_path", "document_type", "owner")
.prefetch_related(
@@ -1016,7 +1022,6 @@ class DocumentViewSet(
str(updated_content) if updated_content is not None else ""
)
content_doc.save(update_fields=["content", "modified"])
sync_root_latest_content(root_doc)
refreshed_doc = self.get_queryset().get(pk=root_doc.pk)
response_data = self.get_serializer(refreshed_doc).data
@@ -1820,7 +1825,6 @@ class DocumentViewSet(
index.remove_document_from_index(version_doc)
version_doc_id = version_doc.id
version_doc.delete()
sync_root_latest_content(root_doc)
index.add_or_update_document(root_doc)
if settings.AUDIT_LOG_ENABLED:
actor = (

6
uv.lock generated
View File

@@ -2700,7 +2700,7 @@ wheels = [
[[package]]
name = "nltk"
version = "3.9.3"
version = "3.9.4"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "click", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
@@ -2708,9 +2708,9 @@ dependencies = [
{ name = "regex", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "tqdm", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
]
sdist = { url = "https://files.pythonhosted.org/packages/e1/8f/915e1c12df07c70ed779d18ab83d065718a926e70d3ea33eb0cd66ffb7c0/nltk-3.9.3.tar.gz", hash = "sha256:cb5945d6424a98d694c2b9a0264519fab4363711065a46aa0ae7a2195b92e71f", size = 2923673, upload-time = "2026-02-24T12:05:53.833Z" }
sdist = { url = "https://files.pythonhosted.org/packages/74/a1/b3b4adf15585a5bc4c357adde150c01ebeeb642173ded4d871e89468767c/nltk-3.9.4.tar.gz", hash = "sha256:ed03bc098a40481310320808b2db712d95d13ca65b27372f8a403949c8b523d0", size = 2946864, upload-time = "2026-03-24T06:13:40.641Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/c2/7e/9af5a710a1236e4772de8dfcc6af942a561327bb9f42b5b4a24d0cf100fd/nltk-3.9.3-py3-none-any.whl", hash = "sha256:60b3db6e9995b3dd976b1f0fa7dec22069b2677e759c28eb69b62ddd44870522", size = 1525385, upload-time = "2026-02-24T12:05:46.54Z" },
{ url = "https://files.pythonhosted.org/packages/9d/91/04e965f8e717ba0ab4bdca5c112deeab11c9e750d94c4d4602f050295d39/nltk-3.9.4-py3-none-any.whl", hash = "sha256:f2fa301c3a12718ce4a0e9305c5675299da5ad9e26068218b69d692fda84828f", size = 1552087, upload-time = "2026-03-24T06:13:38.47Z" },
]
[[package]]