mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-06-18 03:14:19 +00:00
Fix: Apply unicode normalization to all paths and path components (#12993)
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import unicodedata
|
||||
from collections.abc import Iterable
|
||||
from pathlib import PurePath
|
||||
|
||||
@@ -36,10 +37,12 @@ class FilePathTemplate(Template):
|
||||
def clean_filepath(value: str) -> str:
|
||||
"""
|
||||
Clean up a filepath by:
|
||||
1. Removing newlines and carriage returns
|
||||
2. Removing extra spaces before and after forward slashes
|
||||
3. Preserving spaces in other parts of the path
|
||||
1. Normalizing Unicode to NFC form to prevent byte-level mismatches
|
||||
2. Removing newlines and carriage returns
|
||||
3. Removing extra spaces before and after forward slashes
|
||||
4. Preserving spaces in other parts of the path
|
||||
"""
|
||||
value = unicodedata.normalize("NFC", value)
|
||||
value = value.replace("\n", "").replace("\r", "")
|
||||
value = re.sub(r"\s*/\s*", "/", value)
|
||||
|
||||
@@ -181,17 +184,17 @@ def get_basic_metadata_context(
|
||||
"""
|
||||
return {
|
||||
"title": pathvalidate.sanitize_filename(
|
||||
document.title,
|
||||
unicodedata.normalize("NFC", document.title),
|
||||
replacement_text="-",
|
||||
),
|
||||
"correspondent": pathvalidate.sanitize_filename(
|
||||
document.correspondent.name,
|
||||
unicodedata.normalize("NFC", document.correspondent.name),
|
||||
replacement_text="-",
|
||||
)
|
||||
if document.correspondent
|
||||
else no_value_default,
|
||||
"document_type": pathvalidate.sanitize_filename(
|
||||
document.document_type.name,
|
||||
unicodedata.normalize("NFC", document.document_type.name),
|
||||
replacement_text="-",
|
||||
)
|
||||
if document.document_type
|
||||
@@ -202,7 +205,10 @@ def get_basic_metadata_context(
|
||||
"owner_username": document.owner.username
|
||||
if document.owner
|
||||
else no_value_default,
|
||||
"original_name": PurePath(document.original_filename).with_suffix("").name
|
||||
"original_name": unicodedata.normalize(
|
||||
"NFC",
|
||||
PurePath(document.original_filename).with_suffix("").name,
|
||||
)
|
||||
if document.original_filename
|
||||
else no_value_default,
|
||||
"doc_pk": f"{document.pk:07}",
|
||||
@@ -269,12 +275,12 @@ def get_tags_context(tags: Iterable[Tag]) -> dict[str, str | list[str]]:
|
||||
return {
|
||||
"tag_list": pathvalidate.sanitize_filename(
|
||||
",".join(
|
||||
sorted(tag.name for tag in tags),
|
||||
sorted(unicodedata.normalize("NFC", tag.name) for tag in tags),
|
||||
),
|
||||
replacement_text="-",
|
||||
),
|
||||
# Assumed to be ordered, but a template could loop through to find what they want
|
||||
"tag_name_list": [x.name for x in tags],
|
||||
"tag_name_list": [unicodedata.normalize("NFC", x.name) for x in tags],
|
||||
}
|
||||
|
||||
|
||||
@@ -301,7 +307,7 @@ def get_custom_fields_context(
|
||||
CustomField.FieldDataType.LONG_TEXT,
|
||||
}:
|
||||
value = pathvalidate.sanitize_filename(
|
||||
field_instance.value,
|
||||
unicodedata.normalize("NFC", field_instance.value),
|
||||
replacement_text="-",
|
||||
)
|
||||
elif (
|
||||
@@ -310,10 +316,13 @@ def get_custom_fields_context(
|
||||
):
|
||||
options = field_instance.field.extra_data["select_options"]
|
||||
value = pathvalidate.sanitize_filename(
|
||||
next(
|
||||
option["label"]
|
||||
for option in options
|
||||
if option["id"] == field_instance.value
|
||||
unicodedata.normalize(
|
||||
"NFC",
|
||||
next(
|
||||
option["label"]
|
||||
for option in options
|
||||
if option["id"] == field_instance.value
|
||||
),
|
||||
),
|
||||
replacement_text="-",
|
||||
)
|
||||
@@ -321,7 +330,7 @@ def get_custom_fields_context(
|
||||
value = field_instance.value
|
||||
field_data["custom_fields"][
|
||||
pathvalidate.sanitize_filename(
|
||||
field_instance.field.name,
|
||||
unicodedata.normalize("NFC", field_instance.field.name),
|
||||
replacement_text="-",
|
||||
)
|
||||
] = {
|
||||
|
||||
@@ -0,0 +1,95 @@
|
||||
import unicodedata
|
||||
from typing import TYPE_CHECKING
|
||||
from unittest import mock
|
||||
|
||||
import celery.result
|
||||
import pytest
|
||||
from django.core.files.uploadedfile import SimpleUploadedFile
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from documents.data_models import ConsumableDocument
|
||||
from documents.data_models import DocumentMetadataOverrides
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def consume_file_mock():
|
||||
with mock.patch("documents.tasks.consume_file.apply_async") as m:
|
||||
m.return_value = celery.result.AsyncResult(id="test-task-id")
|
||||
yield m
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def directories(tmp_path, settings, _media_settings):
|
||||
scratch = tmp_path / "scratch"
|
||||
scratch.mkdir()
|
||||
settings.SCRATCH_DIR = scratch
|
||||
return scratch
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
class TestPostDocumentNFCNormalization:
|
||||
def test_nfd_filename_normalized_to_nfc(
|
||||
self,
|
||||
admin_client,
|
||||
consume_file_mock: mock.MagicMock,
|
||||
directories,
|
||||
):
|
||||
"""Uploaded file with NFD filename must have its name stored as NFC."""
|
||||
nfd = unicodedata.normalize("NFD", "Rechnung März.pdf")
|
||||
nfc = unicodedata.normalize("NFC", "Rechnung März.pdf")
|
||||
|
||||
# Verify our test strings actually differ at the byte level
|
||||
assert nfd != nfc
|
||||
|
||||
uploaded = SimpleUploadedFile(
|
||||
nfd,
|
||||
b"%PDF-1.4 test",
|
||||
content_type="application/pdf",
|
||||
)
|
||||
response = admin_client.post(
|
||||
"/api/documents/post_document/",
|
||||
{"document": uploaded},
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
|
||||
task_kwargs = consume_file_mock.call_args.kwargs["kwargs"]
|
||||
input_doc: ConsumableDocument = task_kwargs["input_doc"]
|
||||
overrides: DocumentMetadataOverrides = task_kwargs["overrides"]
|
||||
|
||||
# The temp file on disk must have an NFC name
|
||||
assert input_doc.original_file.name == nfc, (
|
||||
f"Expected NFC filename {nfc!r}, got {input_doc.original_file.name!r}"
|
||||
)
|
||||
# The override filename stored for later use must also be NFC
|
||||
assert overrides.filename == nfc, (
|
||||
f"Expected NFC override filename {nfc!r}, got {overrides.filename!r}"
|
||||
)
|
||||
assert unicodedata.is_normalized("NFC", overrides.filename)
|
||||
|
||||
def test_already_nfc_filename_unchanged(
|
||||
self,
|
||||
admin_client,
|
||||
consume_file_mock: mock.MagicMock,
|
||||
directories,
|
||||
):
|
||||
"""Uploaded file with already-NFC filename must pass through unchanged."""
|
||||
nfc = unicodedata.normalize("NFC", "Invoice_2024.pdf")
|
||||
|
||||
uploaded = SimpleUploadedFile(
|
||||
nfc,
|
||||
b"%PDF-1.4 test",
|
||||
content_type="application/pdf",
|
||||
)
|
||||
response = admin_client.post(
|
||||
"/api/documents/post_document/",
|
||||
{"document": uploaded},
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
|
||||
task_kwargs = consume_file_mock.call_args.kwargs["kwargs"]
|
||||
overrides: DocumentMetadataOverrides = task_kwargs["overrides"]
|
||||
|
||||
assert overrides.filename == nfc
|
||||
assert unicodedata.is_normalized("NFC", overrides.filename)
|
||||
@@ -0,0 +1,187 @@
|
||||
"""
|
||||
Tests for NFC Unicode normalization in generate_filename / FilePathTemplate.render().
|
||||
|
||||
NFC `ü` (UTF-8: c3 bc) and NFD `ü` (UTF-8: 75 cc 88) are visually identical but
|
||||
produce different byte sequences. On Linux (ext4, ZFS) these are distinct filenames.
|
||||
All paths produced by the templating system must be NFC-normalized.
|
||||
"""
|
||||
|
||||
import unicodedata
|
||||
|
||||
import pytest
|
||||
|
||||
from documents.file_handling import generate_filename
|
||||
from documents.models import CustomField
|
||||
from documents.models import CustomFieldInstance
|
||||
from documents.tests.factories import CorrespondentFactory
|
||||
from documents.tests.factories import DocumentFactory
|
||||
from documents.tests.factories import StoragePathFactory
|
||||
from documents.tests.factories import TagFactory
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
class TestGenerateFilenameNFCNormalization:
|
||||
@pytest.mark.parametrize(
|
||||
"raw,display",
|
||||
[
|
||||
(unicodedata.normalize("NFD", "Gemüse"), "Gemüse"),
|
||||
(unicodedata.normalize("NFD", "Café"), "Café"),
|
||||
(unicodedata.normalize("NFD", "naïve"), "naïve"),
|
||||
],
|
||||
)
|
||||
def test_nfd_title_normalized_to_nfc(self, settings, raw, display):
|
||||
"""NFD title must produce NFC path bytes."""
|
||||
settings.FILENAME_FORMAT = "{{ title }}"
|
||||
nfc = unicodedata.normalize("NFC", display)
|
||||
assert raw != nfc # confirm byte-level difference
|
||||
|
||||
doc = DocumentFactory(title=raw, mime_type="application/pdf")
|
||||
result = generate_filename(doc)
|
||||
|
||||
assert str(result) == f"{nfc}.pdf"
|
||||
assert str(result).encode() == f"{nfc}.pdf".encode()
|
||||
|
||||
def test_nfd_correspondent_normalized_to_nfc(self, settings):
|
||||
"""NFD correspondent name must produce NFC path component."""
|
||||
settings.FILENAME_FORMAT = "{{ correspondent }}/{{ title }}"
|
||||
nfd = unicodedata.normalize("NFD", "Müller")
|
||||
nfc = unicodedata.normalize("NFC", "Müller")
|
||||
|
||||
correspondent = CorrespondentFactory(name=nfd)
|
||||
doc = DocumentFactory(
|
||||
title="invoice",
|
||||
correspondent=correspondent,
|
||||
mime_type="application/pdf",
|
||||
)
|
||||
result = generate_filename(doc)
|
||||
|
||||
assert str(result) == f"{nfc}/invoice.pdf"
|
||||
assert str(result).encode() == f"{nfc}/invoice.pdf".encode()
|
||||
|
||||
def test_nfd_storage_path_normalized_to_nfc(self, settings):
|
||||
"""NFD literal in StoragePath.path template must produce NFC path bytes."""
|
||||
settings.FILENAME_FORMAT = None
|
||||
nfd = unicodedata.normalize("NFD", "Büro")
|
||||
nfc = unicodedata.normalize("NFC", "Büro")
|
||||
|
||||
# StoragePath.path is used directly as the format/template string.
|
||||
# Literal NFD characters in the template must survive rendering as NFC.
|
||||
sp = StoragePathFactory(path=f"{nfd}/{{{{ title }}}}")
|
||||
doc = DocumentFactory(title="doc", storage_path=sp, mime_type="application/pdf")
|
||||
result = generate_filename(doc)
|
||||
|
||||
assert str(result).encode() == f"{nfc}/doc.pdf".encode()
|
||||
|
||||
def test_nfd_raw_document_title_normalized_to_nfc(self, settings):
|
||||
"""NFD title accessed via document.title (unsanitized context) must also be NFC."""
|
||||
settings.FILENAME_FORMAT = "{{ document.title }}"
|
||||
nfd = unicodedata.normalize("NFD", "Café")
|
||||
nfc = unicodedata.normalize("NFC", "Café")
|
||||
|
||||
doc = DocumentFactory(title=nfd, mime_type="application/pdf")
|
||||
result = generate_filename(doc)
|
||||
|
||||
assert str(result) == f"{nfc}.pdf"
|
||||
assert str(result).encode() == f"{nfc}.pdf".encode()
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
class TestContextBuilderNFCNormalization:
|
||||
"""
|
||||
Defense-in-depth: context builder functions must NFC-normalize string inputs
|
||||
before passing them to sanitize_filename(). Task 1 already normalizes the
|
||||
final rendered path via clean_filepath(), so these tests may already pass;
|
||||
they exist as regression guards for the context-builder layer.
|
||||
"""
|
||||
|
||||
def test_nfd_tag_name_normalized_in_tag_list(self, settings):
|
||||
"""NFD tag name must appear as NFC bytes in the {{ tag_list }} shorthand."""
|
||||
settings.FILENAME_FORMAT = "{{ tag_list }}/{{ title }}"
|
||||
nfd = unicodedata.normalize("NFD", "Büro")
|
||||
nfc = unicodedata.normalize("NFC", "Büro")
|
||||
assert nfd != nfc # confirm they differ at byte level
|
||||
|
||||
tag = TagFactory(name=nfd)
|
||||
doc = DocumentFactory(title="doc", mime_type="application/pdf")
|
||||
doc.tags.set([tag])
|
||||
|
||||
result = generate_filename(doc)
|
||||
|
||||
assert str(result).encode() == f"{nfc}/doc.pdf".encode()
|
||||
|
||||
def test_nfd_original_name_normalized_to_nfc(self, settings):
|
||||
settings.FILENAME_FORMAT = "{{ original_name }}"
|
||||
nfd = unicodedata.normalize("NFD", "Rechnung März")
|
||||
nfc = unicodedata.normalize("NFC", "Rechnung März")
|
||||
|
||||
doc = DocumentFactory(
|
||||
original_filename=f"{nfd}.pdf",
|
||||
mime_type="application/pdf",
|
||||
)
|
||||
result = generate_filename(doc)
|
||||
|
||||
assert str(result).encode() == f"{nfc}.pdf".encode()
|
||||
|
||||
def test_nfd_custom_field_string_value_normalized(self, settings):
|
||||
"""NFD value in a STRING-type custom field must appear as NFC in the context."""
|
||||
settings.FILENAME_FORMAT = (
|
||||
"{{ custom_fields['Location']['value'] }}/{{ title }}"
|
||||
)
|
||||
nfd_value = unicodedata.normalize("NFD", "Düsseldorf")
|
||||
nfc_value = unicodedata.normalize("NFC", "Düsseldorf")
|
||||
assert nfd_value != nfc_value
|
||||
|
||||
doc = DocumentFactory(title="report", mime_type="application/pdf")
|
||||
cf = CustomField.objects.create(
|
||||
name="Location",
|
||||
data_type=CustomField.FieldDataType.STRING,
|
||||
)
|
||||
CustomFieldInstance.objects.create(
|
||||
document=doc,
|
||||
field=cf,
|
||||
value_text=nfd_value,
|
||||
)
|
||||
|
||||
result = generate_filename(doc)
|
||||
|
||||
assert str(result).encode() == f"{nfc_value}/report.pdf".encode()
|
||||
|
||||
def test_nfd_custom_field_name_normalized_as_key(self, settings):
|
||||
"""NFD characters in a custom field name must appear as NFC in the context dict key."""
|
||||
nfd_name = unicodedata.normalize("NFD", "Größe")
|
||||
nfc_name = unicodedata.normalize("NFC", "Größe")
|
||||
assert nfd_name != nfc_name
|
||||
|
||||
settings.FILENAME_FORMAT = f"{{% if custom_fields['{nfc_name}'] %}}{{{{ custom_fields['{nfc_name}']['value'] }}}}/{{{{ title }}}}{{% else %}}{{{{ title }}}}{{% endif %}}"
|
||||
|
||||
doc = DocumentFactory(title="letter", mime_type="application/pdf")
|
||||
cf = CustomField.objects.create(
|
||||
name=nfd_name,
|
||||
data_type=CustomField.FieldDataType.STRING,
|
||||
)
|
||||
CustomFieldInstance.objects.create(
|
||||
document=doc,
|
||||
field=cf,
|
||||
value_text="Berlin",
|
||||
)
|
||||
|
||||
result = generate_filename(doc)
|
||||
|
||||
# If field name key is NFC-normalized, the template condition succeeds
|
||||
# and result is "Berlin/letter.pdf"; otherwise it falls back to "letter.pdf"
|
||||
assert str(result) == "Berlin/letter.pdf"
|
||||
|
||||
def test_nfd_tag_name_list_normalized_to_nfc(self, settings):
|
||||
"""NFD tag names in tag_name_list must appear as NFC bytes when iterated."""
|
||||
settings.FILENAME_FORMAT = (
|
||||
"{% for t in tag_name_list %}{{ t }}{% endfor %}/{{ title }}"
|
||||
)
|
||||
nfd = unicodedata.normalize("NFD", "Büro")
|
||||
nfc = unicodedata.normalize("NFC", "Büro")
|
||||
assert nfd != nfc # confirm byte-level difference
|
||||
|
||||
doc = DocumentFactory(title="doc", mime_type="application/pdf")
|
||||
doc.tags.add(TagFactory(name=nfd))
|
||||
result = generate_filename(doc)
|
||||
|
||||
assert str(result).encode() == f"{nfc}/doc.pdf".encode()
|
||||
@@ -3126,6 +3126,7 @@ class PostDocumentView(GenericAPIView[Any]):
|
||||
serializer.is_valid(raise_exception=True)
|
||||
|
||||
doc_name, doc_data = serializer.validated_data.get("document")
|
||||
doc_name = normalize("NFC", doc_name)
|
||||
correspondent_id = serializer.validated_data.get("correspondent")
|
||||
document_type_id = serializer.validated_data.get("document_type")
|
||||
storage_path_id = serializer.validated_data.get("storage_path")
|
||||
|
||||
@@ -4,6 +4,7 @@ import logging
|
||||
import ssl
|
||||
import tempfile
|
||||
import traceback
|
||||
import unicodedata
|
||||
from datetime import date
|
||||
from datetime import timedelta
|
||||
from fnmatch import fnmatch
|
||||
@@ -496,10 +497,10 @@ class MailAccountHandler(LoggingMixin):
|
||||
rule: MailRule,
|
||||
) -> str | None:
|
||||
if rule.assign_title_from == MailRule.TitleSource.FROM_SUBJECT:
|
||||
return message.subject
|
||||
return unicodedata.normalize("NFC", message.subject)
|
||||
|
||||
elif rule.assign_title_from == MailRule.TitleSource.FROM_FILENAME:
|
||||
return Path(att.filename).stem
|
||||
return unicodedata.normalize("NFC", Path(att.filename).stem)
|
||||
|
||||
elif rule.assign_title_from == MailRule.TitleSource.NONE:
|
||||
return None
|
||||
@@ -866,7 +867,9 @@ class MailAccountHandler(LoggingMixin):
|
||||
),
|
||||
)
|
||||
|
||||
attachment_name = pathvalidate.sanitize_filename(att.filename)
|
||||
attachment_name = pathvalidate.sanitize_filename(
|
||||
unicodedata.normalize("NFC", att.filename),
|
||||
)
|
||||
if attachment_name:
|
||||
temp_filename = temp_dir / attachment_name
|
||||
else: # pragma: no cover
|
||||
@@ -882,7 +885,7 @@ class MailAccountHandler(LoggingMixin):
|
||||
)
|
||||
doc_overrides = DocumentMetadataOverrides(
|
||||
title=title,
|
||||
filename=pathvalidate.sanitize_filename(att.filename),
|
||||
filename=attachment_name,
|
||||
correspondent_id=correspondent.id if correspondent else None,
|
||||
document_type_id=doc_type.id if doc_type else None,
|
||||
tag_ids=tag_ids,
|
||||
@@ -988,7 +991,9 @@ class MailAccountHandler(LoggingMixin):
|
||||
)
|
||||
doc_overrides = DocumentMetadataOverrides(
|
||||
title=message.subject,
|
||||
filename=pathvalidate.sanitize_filename(f"{message.subject}.eml"),
|
||||
filename=pathvalidate.sanitize_filename(
|
||||
unicodedata.normalize("NFC", f"{message.subject}.eml"),
|
||||
),
|
||||
correspondent_id=correspondent.id if correspondent else None,
|
||||
document_type_id=doc_type.id if doc_type else None,
|
||||
tag_ids=tag_ids,
|
||||
|
||||
@@ -0,0 +1,182 @@
|
||||
"""
|
||||
Tests that mail attachment filenames and EML subject filenames are
|
||||
normalized to NFC Unicode before being stored as document overrides.
|
||||
|
||||
Filenames from MIME headers can arrive in NFD form (e.g. from macOS Mail),
|
||||
and must be normalized to NFC so filenames are consistent regardless of the
|
||||
sending client.
|
||||
"""
|
||||
|
||||
import unicodedata
|
||||
from pathlib import Path
|
||||
from unittest import mock
|
||||
|
||||
import pytest
|
||||
|
||||
from documents.tests.utils import remove_dirs
|
||||
from documents.tests.utils import setup_directories
|
||||
from paperless_mail.models import MailRule
|
||||
from paperless_mail.tests.factories import MailAccountFactory
|
||||
from paperless_mail.tests.test_mail import MessageBuilder
|
||||
from paperless_mail.tests.test_mail import _AttachmentDef
|
||||
from paperless_mail.tests.test_mail import fake_magic_from_buffer
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def directories(settings):
|
||||
dirs = setup_directories()
|
||||
yield dirs
|
||||
remove_dirs(dirs)
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def queue_consumption_tasks_mock():
|
||||
with mock.patch("paperless_mail.mail.queue_consumption_tasks") as m:
|
||||
yield m
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def mail_account(db):
|
||||
return MailAccountFactory()
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def attachment_rule(mail_account):
|
||||
rule = MailRule(
|
||||
name="attachment rule",
|
||||
account=mail_account,
|
||||
assign_title_from=MailRule.TitleSource.FROM_FILENAME,
|
||||
consumption_scope=MailRule.ConsumptionScope.ATTACHMENTS_ONLY,
|
||||
attachment_type=MailRule.AttachmentProcessing.ATTACHMENTS_ONLY,
|
||||
)
|
||||
rule.save()
|
||||
return rule
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def eml_rule(mail_account):
|
||||
rule = MailRule(
|
||||
name="eml rule",
|
||||
account=mail_account,
|
||||
assign_title_from=MailRule.TitleSource.FROM_SUBJECT,
|
||||
consumption_scope=MailRule.ConsumptionScope.EML_ONLY,
|
||||
attachment_type=MailRule.AttachmentProcessing.ATTACHMENTS_ONLY,
|
||||
)
|
||||
rule.save()
|
||||
return rule
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def message_builder():
|
||||
return MessageBuilder()
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
@mock.patch("paperless_mail.mail.magic.from_buffer", fake_magic_from_buffer)
|
||||
class TestMailNFCNormalization:
|
||||
"""Attachment filenames and EML subject filenames must be NFC-normalized."""
|
||||
|
||||
def test_attachment_nfd_filename_normalized_to_nfc(
|
||||
self,
|
||||
directories,
|
||||
queue_consumption_tasks_mock,
|
||||
attachment_rule,
|
||||
mail_account_handler,
|
||||
message_builder,
|
||||
):
|
||||
"""Attachment filename arriving as NFD must be stored as NFC in both
|
||||
the overrides and the temp file written to disk.
|
||||
"""
|
||||
nfd_filename = unicodedata.normalize("NFD", "Rechnung März.pdf")
|
||||
nfc_filename = unicodedata.normalize("NFC", "Rechnung März.pdf")
|
||||
|
||||
# Confirm the fixture is actually NFD (not already NFC)
|
||||
assert unicodedata.is_normalized("NFD", nfd_filename)
|
||||
assert not unicodedata.is_normalized("NFC", nfd_filename)
|
||||
|
||||
message = message_builder.create_message(
|
||||
subject="Test invoice",
|
||||
from_="sender@example.com",
|
||||
attachments=[
|
||||
_AttachmentDef(filename=nfd_filename, content=b"%PDF-1.4 test"),
|
||||
],
|
||||
)
|
||||
|
||||
result = mail_account_handler._handle_message(message, attachment_rule)
|
||||
|
||||
assert result == 1
|
||||
queue_consumption_tasks_mock.assert_called_once()
|
||||
|
||||
call_kwargs = queue_consumption_tasks_mock.call_args.kwargs
|
||||
consume_tasks = call_kwargs["consume_tasks"]
|
||||
assert len(consume_tasks) == 1
|
||||
|
||||
overrides = consume_tasks[0].kwargs["overrides"]
|
||||
assert overrides.filename == nfc_filename
|
||||
assert unicodedata.is_normalized("NFC", overrides.filename)
|
||||
assert unicodedata.is_normalized("NFC", overrides.title)
|
||||
|
||||
input_doc = consume_tasks[0].kwargs["input_doc"]
|
||||
original_file = Path(input_doc.original_file)
|
||||
assert original_file.exists()
|
||||
assert original_file.name == nfc_filename
|
||||
|
||||
def test_eml_subject_filename_nfc(
|
||||
self,
|
||||
directories,
|
||||
queue_consumption_tasks_mock,
|
||||
eml_rule,
|
||||
mail_account_handler,
|
||||
message_builder,
|
||||
):
|
||||
"""EML filename derived from subject arriving as NFD must be stored as NFC."""
|
||||
nfd_subject = unicodedata.normalize("NFD", "Rechnung März 2024")
|
||||
nfc_expected_filename = unicodedata.normalize("NFC", "Rechnung März 2024.eml")
|
||||
|
||||
# Confirm the fixture is actually NFD
|
||||
assert unicodedata.is_normalized("NFD", nfd_subject)
|
||||
|
||||
message = message_builder.create_message(
|
||||
subject=nfd_subject,
|
||||
from_="sender@example.com",
|
||||
attachments=0,
|
||||
)
|
||||
|
||||
mail_account_handler._handle_message(message, eml_rule)
|
||||
|
||||
queue_consumption_tasks_mock.assert_called_once()
|
||||
|
||||
call_kwargs = queue_consumption_tasks_mock.call_args.kwargs
|
||||
consume_tasks = call_kwargs["consume_tasks"]
|
||||
assert len(consume_tasks) == 1
|
||||
|
||||
overrides = consume_tasks[0].kwargs["overrides"]
|
||||
assert overrides.filename == nfc_expected_filename
|
||||
assert unicodedata.is_normalized("NFC", overrides.filename)
|
||||
|
||||
def test_already_nfc_attachment_filename_unchanged(
|
||||
self,
|
||||
directories,
|
||||
queue_consumption_tasks_mock,
|
||||
attachment_rule,
|
||||
mail_account_handler,
|
||||
message_builder,
|
||||
):
|
||||
"""An attachment filename already in NFC must pass through unchanged."""
|
||||
nfc_filename = "Invoice_2024.pdf"
|
||||
assert unicodedata.is_normalized("NFC", nfc_filename)
|
||||
|
||||
message = message_builder.create_message(
|
||||
subject="Invoice",
|
||||
from_="sender@example.com",
|
||||
attachments=[
|
||||
_AttachmentDef(filename=nfc_filename, content=b"%PDF-1.4 test"),
|
||||
],
|
||||
)
|
||||
|
||||
mail_account_handler._handle_message(message, attachment_rule)
|
||||
|
||||
call_kwargs = queue_consumption_tasks_mock.call_args.kwargs
|
||||
consume_tasks = call_kwargs["consume_tasks"]
|
||||
overrides = consume_tasks[0].kwargs["overrides"]
|
||||
assert overrides.filename == nfc_filename
|
||||
Reference in New Issue
Block a user