Compare commits

...

3 Commits

Author SHA1 Message Date
shamoon
05beea9215 Fix: support string coercion in filepath jinja templates 2026-03-04 07:25:06 -08:00
shamoon
5b809122b5 Fix: apply ordering after annotating tag document count (#12238) 2026-03-04 00:33:13 -08:00
shamoon
8b8307571a Fix: enforce path limit for db filename fields (#12235) 2026-03-03 13:19:56 -08:00
9 changed files with 186 additions and 25 deletions

View File

@@ -19,6 +19,7 @@ from documents.classifier import load_classifier
from documents.data_models import ConsumableDocument
from documents.data_models import DocumentMetadataOverrides
from documents.file_handling import create_source_path_directory
from documents.file_handling import generate_filename
from documents.file_handling import generate_unique_filename
from documents.loggers import LoggingMixin
from documents.models import Correspondent
@@ -493,7 +494,19 @@ class ConsumerPlugin(
# After everything is in the database, copy the files into
# place. If this fails, we'll also rollback the transaction.
with FileLock(settings.MEDIA_LOCK):
document.filename = generate_unique_filename(document)
generated_filename = generate_unique_filename(document)
if (
len(str(generated_filename))
> Document.MAX_STORED_FILENAME_LENGTH
):
self.log.warning(
"Generated source filename exceeds db path limit, falling back to default naming",
)
generated_filename = generate_filename(
document,
use_format=False,
)
document.filename = generated_filename
create_source_path_directory(document.source_path)
self._write(
@@ -511,10 +524,23 @@ class ConsumerPlugin(
)
if archive_path and Path(archive_path).is_file():
document.archive_filename = generate_unique_filename(
generated_archive_filename = generate_unique_filename(
document,
archive_filename=True,
)
if (
len(str(generated_archive_filename))
> Document.MAX_STORED_FILENAME_LENGTH
):
self.log.warning(
"Generated archive filename exceeds db path limit, falling back to default naming",
)
generated_archive_filename = generate_filename(
document,
archive_filename=True,
use_format=False,
)
document.archive_filename = generated_archive_filename
create_source_path_directory(document.archive_path)
self._write(
document.storage_type,

View File

@@ -128,17 +128,21 @@ def generate_filename(
counter=0,
append_gpg=True,
archive_filename=False,
use_format=True,
) -> Path:
base_path: Path | None = None
# Determine the source of the format string
if doc.storage_path is not None:
filename_format = doc.storage_path.path
elif settings.FILENAME_FORMAT is not None:
# Maybe convert old to new style
filename_format = convert_format_str_to_template_format(
settings.FILENAME_FORMAT,
)
if use_format:
if doc.storage_path is not None:
filename_format = doc.storage_path.path
elif settings.FILENAME_FORMAT is not None:
# Maybe convert old to new style
filename_format = convert_format_str_to_template_format(
settings.FILENAME_FORMAT,
)
else:
filename_format = None
else:
filename_format = None

View File

@@ -160,6 +160,7 @@ class Document(SoftDeleteModel, ModelWithOwner):
(STORAGE_TYPE_UNENCRYPTED, _("Unencrypted")),
(STORAGE_TYPE_GPG, _("Encrypted with GNU Privacy Guard")),
)
MAX_STORED_FILENAME_LENGTH: Final[int] = 1024
correspondent = models.ForeignKey(
Correspondent,
@@ -267,7 +268,7 @@ class Document(SoftDeleteModel, ModelWithOwner):
filename = models.FilePathField(
_("filename"),
max_length=1024,
max_length=MAX_STORED_FILENAME_LENGTH,
editable=False,
default=None,
unique=True,
@@ -277,7 +278,7 @@ class Document(SoftDeleteModel, ModelWithOwner):
archive_filename = models.FilePathField(
_("archive filename"),
max_length=1024,
max_length=MAX_STORED_FILENAME_LENGTH,
editable=False,
default=None,
unique=True,
@@ -287,7 +288,7 @@ class Document(SoftDeleteModel, ModelWithOwner):
original_filename = models.CharField(
_("original filename"),
max_length=1024,
max_length=MAX_STORED_FILENAME_LENGTH,
editable=False,
default=None,
unique=False,

View File

@@ -460,8 +460,22 @@ def update_filename_and_move_files(
old_filename = instance.filename
old_source_path = instance.source_path
move_original = False
old_archive_filename = instance.archive_filename
old_archive_path = instance.archive_path
move_archive = False
candidate_filename = generate_filename(instance)
if len(str(candidate_filename)) > Document.MAX_STORED_FILENAME_LENGTH:
msg = (
f"Document {instance!s}: Generated filename exceeds db path "
f"limit ({len(str(candidate_filename))} > "
f"{Document.MAX_STORED_FILENAME_LENGTH}): {candidate_filename!s}"
)
logger.warning(msg)
raise CannotMoveFilesException(msg)
candidate_source_path = (
settings.ORIGINALS_DIR / candidate_filename
).resolve()
@@ -480,11 +494,16 @@ def update_filename_and_move_files(
instance.filename = str(new_filename)
move_original = old_filename != instance.filename
old_archive_filename = instance.archive_filename
old_archive_path = instance.archive_path
if instance.has_archive_version:
archive_candidate = generate_filename(instance, archive_filename=True)
if len(str(archive_candidate)) > Document.MAX_STORED_FILENAME_LENGTH:
msg = (
f"Document {instance!s}: Generated archive filename exceeds "
f"db path limit ({len(str(archive_candidate))} > "
f"{Document.MAX_STORED_FILENAME_LENGTH}): {archive_candidate!s}"
)
logger.warning(msg)
raise CannotMoveFilesException(msg)
archive_candidate_path = (
settings.ARCHIVE_DIR / archive_candidate
).resolve()

View File

@@ -79,6 +79,23 @@ class PlaceholderString(str):
NO_VALUE_PLACEHOLDER = PlaceholderString("-none-")
class MatchingModelContext:
"""
Safe template context for related objects.
Keeps legacy behavior where including the object ina template yields the related object's
name as a string, while still exposing limited attributes.
"""
def __init__(self, *, id: int, name: str, path: str | None = None):
self.id = id
self.name = name
self.path = path
def __str__(self) -> str:
return self.name
_template_environment.undefined = _LogStrictUndefined
_template_environment.filters["get_cf_value"] = get_cf_value
@@ -221,19 +238,26 @@ def get_safe_document_context(
else None,
"tags": [{"name": tag.name, "id": tag.id} for tag in tags],
"correspondent": (
{"name": document.correspondent.name, "id": document.correspondent.id}
MatchingModelContext(
name=document.correspondent.name,
id=document.correspondent.id,
)
if document.correspondent
else None
),
"document_type": (
{"name": document.document_type.name, "id": document.document_type.id}
MatchingModelContext(
name=document.document_type.name,
id=document.document_type.id,
)
if document.document_type
else None
),
"storage_path": {
"path": document.storage_path.path,
"id": document.storage_path.id,
}
"storage_path": MatchingModelContext(
name=document.storage_path.name,
path=document.storage_path.path,
id=document.storage_path.id,
)
if document.storage_path
else None,
}

View File

@@ -633,6 +633,33 @@ class TestConsumer(
self._assert_first_last_send_progress()
@mock.patch("documents.consumer.generate_unique_filename")
def testFilenameHandlingFallsBackWhenGeneratedPathExceedsDbLimit(self, m):
m.side_effect = lambda doc, archive_filename=False: Path(
("a" * 1100 + ".pdf") if not archive_filename else ("b" * 1100 + ".pdf"),
)
with self.get_consumer(
self.get_test_file(),
DocumentMetadataOverrides(title="new docs"),
) as consumer:
consumer.run()
document = Document.objects.first()
self.assertIsNotNone(document)
assert document is not None
self.assertEqual(document.filename, f"{document.pk:07d}.pdf")
self.assertLessEqual(len(document.filename), 1024)
self.assertLessEqual(
len(document.archive_filename),
1024,
)
self.assertIsFile(document.source_path)
self.assertIsFile(document.archive_path)
self._assert_first_last_send_progress()
@override_settings(FILENAME_FORMAT="{correspondent}/{title}")
@mock.patch("documents.signals.handlers.generate_unique_filename")
def testFilenameHandlingUnstableFormat(self, m):

View File

@@ -1341,6 +1341,41 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
Path("somepath/asn-201-400/asn-3xx/Does Matter.pdf"),
)
def test_template_related_context_keeps_legacy_string_coercion(self):
"""
GIVEN:
- A storage path template that uses related objects directly as strings
WHEN:
- Filepath for a document with this format is called
THEN:
- Related objects coerce to their names (legacy behavior)
- Explicit attribute access remains available for new templates
"""
sp = StoragePath.objects.create(
name="PARTNER",
path=(
"{{ document.storage_path|lower }} / "
"{{ document.correspondent|lower|replace('mi:', 'mieter/') }} / "
"{{ document_type|lower }} / "
"{{ title|lower }}"
),
)
doc = Document.objects.create(
title="scan_017562",
created=datetime.date(2025, 7, 2),
added=timezone.make_aware(datetime.datetime(2026, 3, 3, 11, 53, 16)),
mime_type="application/pdf",
checksum="test-checksum",
storage_path=sp,
correspondent=Correspondent.objects.create(name="mi:kochkach"),
document_type=DocumentType.objects.create(name="Mietvertrag"),
)
self.assertEqual(
generate_filename(doc),
Path("partner/mieter/kochkach/mietvertrag/scan_017562.pdf"),
)
@override_settings(
FILENAME_FORMAT="{{creation_date}}/{{ title_name_str }}",
)
@@ -1699,6 +1734,21 @@ class TestCustomFieldFilenameUpdates(
self.assertTrue(Path(self.doc.source_path).is_file())
self.assertLessEqual(m.call_count, 1)
@override_settings(FILENAME_FORMAT=None)
def test_overlong_storage_path_keeps_existing_filename(self):
initial_filename = generate_filename(self.doc)
Document.objects.filter(pk=self.doc.pk).update(filename=str(initial_filename))
self.doc.refresh_from_db()
Path(self.doc.source_path).parent.mkdir(parents=True, exist_ok=True)
Path(self.doc.source_path).touch()
self.doc.storage_path = StoragePath.objects.create(path="a" * 1100)
self.doc.save()
self.doc.refresh_from_db()
self.assertEqual(Path(self.doc.filename), initial_filename)
self.assertTrue(Path(self.doc.source_path).is_file())
class TestPathDateLocalization:
"""

View File

@@ -147,6 +147,16 @@ class TestTagHierarchy(APITestCase):
assert serializer.data # triggers serialization
assert "document_count_filter" in context
def test_tag_list_can_order_by_document_count_with_children(self) -> None:
self.document.tags.add(self.child)
response = self.client.get(
"/api/tags/",
{"ordering": "document_count"},
)
assert response.status_code == 200
def test_cannot_set_parent_to_self(self):
tag = Tag.objects.create(name="Selfie")
resp = self.client.patch(

View File

@@ -487,13 +487,13 @@ class TagViewSet(PermissionsAwareDocumentCountMixin, ModelViewSet):
user = getattr(getattr(self, "request", None), "user", None)
children_source = list(
annotate_document_count_for_related_queryset(
Tag.objects.filter(pk__in=descendant_pks | {t.pk for t in all_tags})
.select_related("owner")
.order_by(*ordering),
Tag.objects.filter(
pk__in=descendant_pks | {t.pk for t in all_tags},
).select_related("owner"),
through_model=self.document_count_through,
related_object_field=self.document_count_source_field,
user=user,
),
).order_by(*ordering),
)
else:
children_source = all_tags