From 8ed4bf2011818bb5518ed71cc3d5d0773cce27c9 Mon Sep 17 00:00:00 2001
From: Trenton H <797416+stumpylog@users.noreply.github.com>
Date: Sat, 13 Jun 2026 05:45:54 -0700
Subject: [PATCH] Fix: Apply unicode normalization to all paths and path
 components (#12993)

---
 src/documents/templating/filepath.py          |  39 ++--
 .../tests/test_api_post_document_nfc.py       |  95 +++++++++
 src/documents/tests/test_filepath_nfc.py      | 187 ++++++++++++++++++
 src/documents/views.py                        |   1 +
 src/paperless_mail/mail.py                    |  15 +-
 src/paperless_mail/tests/test_mail_nfc.py     | 182 +++++++++++++++++
 6 files changed, 499 insertions(+), 20 deletions(-)
 create mode 100644 src/documents/tests/test_api_post_document_nfc.py
 create mode 100644 src/documents/tests/test_filepath_nfc.py
 create mode 100644 src/paperless_mail/tests/test_mail_nfc.py

diff --git a/src/documents/templating/filepath.py b/src/documents/templating/filepath.py
index 008f1d0a9..d827b7c89 100644
--- a/src/documents/templating/filepath.py
+++ b/src/documents/templating/filepath.py
@@ -1,6 +1,7 @@
 import logging
 import os
 import re
+import unicodedata
 from collections.abc import Iterable
 from pathlib import PurePath
 
@@ -36,10 +37,12 @@ class FilePathTemplate(Template):
         def clean_filepath(value: str) -> str:
             """
             Clean up a filepath by:
-            1. Removing newlines and carriage returns
-            2. Removing extra spaces before and after forward slashes
-            3. Preserving spaces in other parts of the path
+            1. Normalizing Unicode to NFC form to prevent byte-level mismatches
+            2. Removing newlines and carriage returns
+            3. Removing extra spaces before and after forward slashes
+            4. Preserving spaces in other parts of the path
             """
+            value = unicodedata.normalize("NFC", value)
             value = value.replace("\n", "").replace("\r", "")
             value = re.sub(r"\s*/\s*", "/", value)
 
@@ -181,17 +184,17 @@ def get_basic_metadata_context(
     """
     return {
         "title": pathvalidate.sanitize_filename(
-            document.title,
+            unicodedata.normalize("NFC", document.title),
             replacement_text="-",
         ),
         "correspondent": pathvalidate.sanitize_filename(
-            document.correspondent.name,
+            unicodedata.normalize("NFC", document.correspondent.name),
             replacement_text="-",
         )
         if document.correspondent
         else no_value_default,
         "document_type": pathvalidate.sanitize_filename(
-            document.document_type.name,
+            unicodedata.normalize("NFC", document.document_type.name),
             replacement_text="-",
         )
         if document.document_type
@@ -202,7 +205,10 @@ def get_basic_metadata_context(
         "owner_username": document.owner.username
         if document.owner
         else no_value_default,
-        "original_name": PurePath(document.original_filename).with_suffix("").name
+        "original_name": unicodedata.normalize(
+            "NFC",
+            PurePath(document.original_filename).with_suffix("").name,
+        )
         if document.original_filename
         else no_value_default,
         "doc_pk": f"{document.pk:07}",
@@ -269,12 +275,12 @@ def get_tags_context(tags: Iterable[Tag]) -> dict[str, str | list[str]]:
     return {
         "tag_list": pathvalidate.sanitize_filename(
             ",".join(
-                sorted(tag.name for tag in tags),
+                sorted(unicodedata.normalize("NFC", tag.name) for tag in tags),
             ),
             replacement_text="-",
         ),
         # Assumed to be ordered, but a template could loop through to find what they want
-        "tag_name_list": [x.name for x in tags],
+        "tag_name_list": [unicodedata.normalize("NFC", x.name) for x in tags],
     }
 
 
@@ -301,7 +307,7 @@ def get_custom_fields_context(
             CustomField.FieldDataType.LONG_TEXT,
         }:
             value = pathvalidate.sanitize_filename(
-                field_instance.value,
+                unicodedata.normalize("NFC", field_instance.value),
                 replacement_text="-",
             )
         elif (
@@ -310,10 +316,13 @@ def get_custom_fields_context(
         ):
             options = field_instance.field.extra_data["select_options"]
             value = pathvalidate.sanitize_filename(
-                next(
-                    option["label"]
-                    for option in options
-                    if option["id"] == field_instance.value
+                unicodedata.normalize(
+                    "NFC",
+                    next(
+                        option["label"]
+                        for option in options
+                        if option["id"] == field_instance.value
+                    ),
                 ),
                 replacement_text="-",
             )
@@ -321,7 +330,7 @@ def get_custom_fields_context(
             value = field_instance.value
         field_data["custom_fields"][
             pathvalidate.sanitize_filename(
-                field_instance.field.name,
+                unicodedata.normalize("NFC", field_instance.field.name),
                 replacement_text="-",
             )
         ] = {
diff --git a/src/documents/tests/test_api_post_document_nfc.py b/src/documents/tests/test_api_post_document_nfc.py
new file mode 100644
index 000000000..5b3088b01
--- /dev/null
+++ b/src/documents/tests/test_api_post_document_nfc.py
@@ -0,0 +1,95 @@
+import unicodedata
+from typing import TYPE_CHECKING
+from unittest import mock
+
+import celery.result
+import pytest
+from django.core.files.uploadedfile import SimpleUploadedFile
+
+if TYPE_CHECKING:
+    from documents.data_models import ConsumableDocument
+    from documents.data_models import DocumentMetadataOverrides
+
+
+@pytest.fixture()
+def consume_file_mock():
+    with mock.patch("documents.tasks.consume_file.apply_async") as m:
+        m.return_value = celery.result.AsyncResult(id="test-task-id")
+        yield m
+
+
+@pytest.fixture()
+def directories(tmp_path, settings, _media_settings):
+    scratch = tmp_path / "scratch"
+    scratch.mkdir()
+    settings.SCRATCH_DIR = scratch
+    return scratch
+
+
+@pytest.mark.django_db
+class TestPostDocumentNFCNormalization:
+    def test_nfd_filename_normalized_to_nfc(
+        self,
+        admin_client,
+        consume_file_mock: mock.MagicMock,
+        directories,
+    ):
+        """Uploaded file with NFD filename must have its name stored as NFC."""
+        nfd = unicodedata.normalize("NFD", "Rechnung März.pdf")
+        nfc = unicodedata.normalize("NFC", "Rechnung März.pdf")
+
+        # Verify our test strings actually differ at the byte level
+        assert nfd != nfc
+
+        uploaded = SimpleUploadedFile(
+            nfd,
+            b"%PDF-1.4 test",
+            content_type="application/pdf",
+        )
+        response = admin_client.post(
+            "/api/documents/post_document/",
+            {"document": uploaded},
+        )
+
+        assert response.status_code == 200
+
+        task_kwargs = consume_file_mock.call_args.kwargs["kwargs"]
+        input_doc: ConsumableDocument = task_kwargs["input_doc"]
+        overrides: DocumentMetadataOverrides = task_kwargs["overrides"]
+
+        # The temp file on disk must have an NFC name
+        assert input_doc.original_file.name == nfc, (
+            f"Expected NFC filename {nfc!r}, got {input_doc.original_file.name!r}"
+        )
+        # The override filename stored for later use must also be NFC
+        assert overrides.filename == nfc, (
+            f"Expected NFC override filename {nfc!r}, got {overrides.filename!r}"
+        )
+        assert unicodedata.is_normalized("NFC", overrides.filename)
+
+    def test_already_nfc_filename_unchanged(
+        self,
+        admin_client,
+        consume_file_mock: mock.MagicMock,
+        directories,
+    ):
+        """Uploaded file with already-NFC filename must pass through unchanged."""
+        nfc = unicodedata.normalize("NFC", "Invoice_2024.pdf")
+
+        uploaded = SimpleUploadedFile(
+            nfc,
+            b"%PDF-1.4 test",
+            content_type="application/pdf",
+        )
+        response = admin_client.post(
+            "/api/documents/post_document/",
+            {"document": uploaded},
+        )
+
+        assert response.status_code == 200
+
+        task_kwargs = consume_file_mock.call_args.kwargs["kwargs"]
+        overrides: DocumentMetadataOverrides = task_kwargs["overrides"]
+
+        assert overrides.filename == nfc
+        assert unicodedata.is_normalized("NFC", overrides.filename)
diff --git a/src/documents/tests/test_filepath_nfc.py b/src/documents/tests/test_filepath_nfc.py
new file mode 100644
index 000000000..e1d3ef9a8
--- /dev/null
+++ b/src/documents/tests/test_filepath_nfc.py
@@ -0,0 +1,187 @@
+"""
+Tests for NFC Unicode normalization in generate_filename / FilePathTemplate.render().
+
+NFC `ü` (UTF-8: c3 bc) and NFD `ü` (UTF-8: 75 cc 88) are visually identical but
+produce different byte sequences.  On Linux (ext4, ZFS) these are distinct filenames.
+All paths produced by the templating system must be NFC-normalized.
+"""
+
+import unicodedata
+
+import pytest
+
+from documents.file_handling import generate_filename
+from documents.models import CustomField
+from documents.models import CustomFieldInstance
+from documents.tests.factories import CorrespondentFactory
+from documents.tests.factories import DocumentFactory
+from documents.tests.factories import StoragePathFactory
+from documents.tests.factories import TagFactory
+
+
+@pytest.mark.django_db
+class TestGenerateFilenameNFCNormalization:
+    @pytest.mark.parametrize(
+        "raw,display",
+        [
+            (unicodedata.normalize("NFD", "Gemüse"), "Gemüse"),
+            (unicodedata.normalize("NFD", "Café"), "Café"),
+            (unicodedata.normalize("NFD", "naïve"), "naïve"),
+        ],
+    )
+    def test_nfd_title_normalized_to_nfc(self, settings, raw, display):
+        """NFD title must produce NFC path bytes."""
+        settings.FILENAME_FORMAT = "{{ title }}"
+        nfc = unicodedata.normalize("NFC", display)
+        assert raw != nfc  # confirm byte-level difference
+
+        doc = DocumentFactory(title=raw, mime_type="application/pdf")
+        result = generate_filename(doc)
+
+        assert str(result) == f"{nfc}.pdf"
+        assert str(result).encode() == f"{nfc}.pdf".encode()
+
+    def test_nfd_correspondent_normalized_to_nfc(self, settings):
+        """NFD correspondent name must produce NFC path component."""
+        settings.FILENAME_FORMAT = "{{ correspondent }}/{{ title }}"
+        nfd = unicodedata.normalize("NFD", "Müller")
+        nfc = unicodedata.normalize("NFC", "Müller")
+
+        correspondent = CorrespondentFactory(name=nfd)
+        doc = DocumentFactory(
+            title="invoice",
+            correspondent=correspondent,
+            mime_type="application/pdf",
+        )
+        result = generate_filename(doc)
+
+        assert str(result) == f"{nfc}/invoice.pdf"
+        assert str(result).encode() == f"{nfc}/invoice.pdf".encode()
+
+    def test_nfd_storage_path_normalized_to_nfc(self, settings):
+        """NFD literal in StoragePath.path template must produce NFC path bytes."""
+        settings.FILENAME_FORMAT = None
+        nfd = unicodedata.normalize("NFD", "Büro")
+        nfc = unicodedata.normalize("NFC", "Büro")
+
+        # StoragePath.path is used directly as the format/template string.
+        # Literal NFD characters in the template must survive rendering as NFC.
+        sp = StoragePathFactory(path=f"{nfd}/{{{{ title }}}}")
+        doc = DocumentFactory(title="doc", storage_path=sp, mime_type="application/pdf")
+        result = generate_filename(doc)
+
+        assert str(result).encode() == f"{nfc}/doc.pdf".encode()
+
+    def test_nfd_raw_document_title_normalized_to_nfc(self, settings):
+        """NFD title accessed via document.title (unsanitized context) must also be NFC."""
+        settings.FILENAME_FORMAT = "{{ document.title }}"
+        nfd = unicodedata.normalize("NFD", "Café")
+        nfc = unicodedata.normalize("NFC", "Café")
+
+        doc = DocumentFactory(title=nfd, mime_type="application/pdf")
+        result = generate_filename(doc)
+
+        assert str(result) == f"{nfc}.pdf"
+        assert str(result).encode() == f"{nfc}.pdf".encode()
+
+
+@pytest.mark.django_db
+class TestContextBuilderNFCNormalization:
+    """
+    Defense-in-depth: context builder functions must NFC-normalize string inputs
+    before passing them to sanitize_filename().  Task 1 already normalizes the
+    final rendered path via clean_filepath(), so these tests may already pass;
+    they exist as regression guards for the context-builder layer.
+    """
+
+    def test_nfd_tag_name_normalized_in_tag_list(self, settings):
+        """NFD tag name must appear as NFC bytes in the {{ tag_list }} shorthand."""
+        settings.FILENAME_FORMAT = "{{ tag_list }}/{{ title }}"
+        nfd = unicodedata.normalize("NFD", "Büro")
+        nfc = unicodedata.normalize("NFC", "Büro")
+        assert nfd != nfc  # confirm they differ at byte level
+
+        tag = TagFactory(name=nfd)
+        doc = DocumentFactory(title="doc", mime_type="application/pdf")
+        doc.tags.set([tag])
+
+        result = generate_filename(doc)
+
+        assert str(result).encode() == f"{nfc}/doc.pdf".encode()
+
+    def test_nfd_original_name_normalized_to_nfc(self, settings):
+        settings.FILENAME_FORMAT = "{{ original_name }}"
+        nfd = unicodedata.normalize("NFD", "Rechnung März")
+        nfc = unicodedata.normalize("NFC", "Rechnung März")
+
+        doc = DocumentFactory(
+            original_filename=f"{nfd}.pdf",
+            mime_type="application/pdf",
+        )
+        result = generate_filename(doc)
+
+        assert str(result).encode() == f"{nfc}.pdf".encode()
+
+    def test_nfd_custom_field_string_value_normalized(self, settings):
+        """NFD value in a STRING-type custom field must appear as NFC in the context."""
+        settings.FILENAME_FORMAT = (
+            "{{ custom_fields['Location']['value'] }}/{{ title }}"
+        )
+        nfd_value = unicodedata.normalize("NFD", "Düsseldorf")
+        nfc_value = unicodedata.normalize("NFC", "Düsseldorf")
+        assert nfd_value != nfc_value
+
+        doc = DocumentFactory(title="report", mime_type="application/pdf")
+        cf = CustomField.objects.create(
+            name="Location",
+            data_type=CustomField.FieldDataType.STRING,
+        )
+        CustomFieldInstance.objects.create(
+            document=doc,
+            field=cf,
+            value_text=nfd_value,
+        )
+
+        result = generate_filename(doc)
+
+        assert str(result).encode() == f"{nfc_value}/report.pdf".encode()
+
+    def test_nfd_custom_field_name_normalized_as_key(self, settings):
+        """NFD characters in a custom field name must appear as NFC in the context dict key."""
+        nfd_name = unicodedata.normalize("NFD", "Größe")
+        nfc_name = unicodedata.normalize("NFC", "Größe")
+        assert nfd_name != nfc_name
+
+        settings.FILENAME_FORMAT = f"{{% if custom_fields['{nfc_name}'] %}}{{{{ custom_fields['{nfc_name}']['value'] }}}}/{{{{ title }}}}{{% else %}}{{{{ title }}}}{{% endif %}}"
+
+        doc = DocumentFactory(title="letter", mime_type="application/pdf")
+        cf = CustomField.objects.create(
+            name=nfd_name,
+            data_type=CustomField.FieldDataType.STRING,
+        )
+        CustomFieldInstance.objects.create(
+            document=doc,
+            field=cf,
+            value_text="Berlin",
+        )
+
+        result = generate_filename(doc)
+
+        # If field name key is NFC-normalized, the template condition succeeds
+        # and result is "Berlin/letter.pdf"; otherwise it falls back to "letter.pdf"
+        assert str(result) == "Berlin/letter.pdf"
+
+    def test_nfd_tag_name_list_normalized_to_nfc(self, settings):
+        """NFD tag names in tag_name_list must appear as NFC bytes when iterated."""
+        settings.FILENAME_FORMAT = (
+            "{% for t in tag_name_list %}{{ t }}{% endfor %}/{{ title }}"
+        )
+        nfd = unicodedata.normalize("NFD", "Büro")
+        nfc = unicodedata.normalize("NFC", "Büro")
+        assert nfd != nfc  # confirm byte-level difference
+
+        doc = DocumentFactory(title="doc", mime_type="application/pdf")
+        doc.tags.add(TagFactory(name=nfd))
+        result = generate_filename(doc)
+
+        assert str(result).encode() == f"{nfc}/doc.pdf".encode()
diff --git a/src/documents/views.py b/src/documents/views.py
index cbc4560d8..5ed6fdaf5 100644
--- a/src/documents/views.py
+++ b/src/documents/views.py
@@ -3126,6 +3126,7 @@ class PostDocumentView(GenericAPIView[Any]):
         serializer.is_valid(raise_exception=True)
 
         doc_name, doc_data = serializer.validated_data.get("document")
+        doc_name = normalize("NFC", doc_name)
         correspondent_id = serializer.validated_data.get("correspondent")
         document_type_id = serializer.validated_data.get("document_type")
         storage_path_id = serializer.validated_data.get("storage_path")
diff --git a/src/paperless_mail/mail.py b/src/paperless_mail/mail.py
index d551cc8cd..acdc72703 100644
--- a/src/paperless_mail/mail.py
+++ b/src/paperless_mail/mail.py
@@ -4,6 +4,7 @@ import logging
 import ssl
 import tempfile
 import traceback
+import unicodedata
 from datetime import date
 from datetime import timedelta
 from fnmatch import fnmatch
@@ -496,10 +497,10 @@ class MailAccountHandler(LoggingMixin):
         rule: MailRule,
     ) -> str | None:
         if rule.assign_title_from == MailRule.TitleSource.FROM_SUBJECT:
-            return message.subject
+            return unicodedata.normalize("NFC", message.subject)
 
         elif rule.assign_title_from == MailRule.TitleSource.FROM_FILENAME:
-            return Path(att.filename).stem
+            return unicodedata.normalize("NFC", Path(att.filename).stem)
 
         elif rule.assign_title_from == MailRule.TitleSource.NONE:
             return None
@@ -866,7 +867,9 @@ class MailAccountHandler(LoggingMixin):
                     ),
                 )
 
-                attachment_name = pathvalidate.sanitize_filename(att.filename)
+                attachment_name = pathvalidate.sanitize_filename(
+                    unicodedata.normalize("NFC", att.filename),
+                )
                 if attachment_name:
                     temp_filename = temp_dir / attachment_name
                 else:  # pragma: no cover
@@ -882,7 +885,7 @@ class MailAccountHandler(LoggingMixin):
                 )
                 doc_overrides = DocumentMetadataOverrides(
                     title=title,
-                    filename=pathvalidate.sanitize_filename(att.filename),
+                    filename=attachment_name,
                     correspondent_id=correspondent.id if correspondent else None,
                     document_type_id=doc_type.id if doc_type else None,
                     tag_ids=tag_ids,
@@ -988,7 +991,9 @@ class MailAccountHandler(LoggingMixin):
         )
         doc_overrides = DocumentMetadataOverrides(
             title=message.subject,
-            filename=pathvalidate.sanitize_filename(f"{message.subject}.eml"),
+            filename=pathvalidate.sanitize_filename(
+                unicodedata.normalize("NFC", f"{message.subject}.eml"),
+            ),
             correspondent_id=correspondent.id if correspondent else None,
             document_type_id=doc_type.id if doc_type else None,
             tag_ids=tag_ids,
diff --git a/src/paperless_mail/tests/test_mail_nfc.py b/src/paperless_mail/tests/test_mail_nfc.py
new file mode 100644
index 000000000..bfef06da9
--- /dev/null
+++ b/src/paperless_mail/tests/test_mail_nfc.py
@@ -0,0 +1,182 @@
+"""
+Tests that mail attachment filenames and EML subject filenames are
+normalized to NFC Unicode before being stored as document overrides.
+
+Filenames from MIME headers can arrive in NFD form (e.g. from macOS Mail),
+and must be normalized to NFC so filenames are consistent regardless of the
+sending client.
+"""
+
+import unicodedata
+from pathlib import Path
+from unittest import mock
+
+import pytest
+
+from documents.tests.utils import remove_dirs
+from documents.tests.utils import setup_directories
+from paperless_mail.models import MailRule
+from paperless_mail.tests.factories import MailAccountFactory
+from paperless_mail.tests.test_mail import MessageBuilder
+from paperless_mail.tests.test_mail import _AttachmentDef
+from paperless_mail.tests.test_mail import fake_magic_from_buffer
+
+
+@pytest.fixture()
+def directories(settings):
+    dirs = setup_directories()
+    yield dirs
+    remove_dirs(dirs)
+
+
+@pytest.fixture()
+def queue_consumption_tasks_mock():
+    with mock.patch("paperless_mail.mail.queue_consumption_tasks") as m:
+        yield m
+
+
+@pytest.fixture()
+def mail_account(db):
+    return MailAccountFactory()
+
+
+@pytest.fixture()
+def attachment_rule(mail_account):
+    rule = MailRule(
+        name="attachment rule",
+        account=mail_account,
+        assign_title_from=MailRule.TitleSource.FROM_FILENAME,
+        consumption_scope=MailRule.ConsumptionScope.ATTACHMENTS_ONLY,
+        attachment_type=MailRule.AttachmentProcessing.ATTACHMENTS_ONLY,
+    )
+    rule.save()
+    return rule
+
+
+@pytest.fixture()
+def eml_rule(mail_account):
+    rule = MailRule(
+        name="eml rule",
+        account=mail_account,
+        assign_title_from=MailRule.TitleSource.FROM_SUBJECT,
+        consumption_scope=MailRule.ConsumptionScope.EML_ONLY,
+        attachment_type=MailRule.AttachmentProcessing.ATTACHMENTS_ONLY,
+    )
+    rule.save()
+    return rule
+
+
+@pytest.fixture()
+def message_builder():
+    return MessageBuilder()
+
+
+@pytest.mark.django_db
+@mock.patch("paperless_mail.mail.magic.from_buffer", fake_magic_from_buffer)
+class TestMailNFCNormalization:
+    """Attachment filenames and EML subject filenames must be NFC-normalized."""
+
+    def test_attachment_nfd_filename_normalized_to_nfc(
+        self,
+        directories,
+        queue_consumption_tasks_mock,
+        attachment_rule,
+        mail_account_handler,
+        message_builder,
+    ):
+        """Attachment filename arriving as NFD must be stored as NFC in both
+        the overrides and the temp file written to disk.
+        """
+        nfd_filename = unicodedata.normalize("NFD", "Rechnung März.pdf")
+        nfc_filename = unicodedata.normalize("NFC", "Rechnung März.pdf")
+
+        # Confirm the fixture is actually NFD (not already NFC)
+        assert unicodedata.is_normalized("NFD", nfd_filename)
+        assert not unicodedata.is_normalized("NFC", nfd_filename)
+
+        message = message_builder.create_message(
+            subject="Test invoice",
+            from_="sender@example.com",
+            attachments=[
+                _AttachmentDef(filename=nfd_filename, content=b"%PDF-1.4 test"),
+            ],
+        )
+
+        result = mail_account_handler._handle_message(message, attachment_rule)
+
+        assert result == 1
+        queue_consumption_tasks_mock.assert_called_once()
+
+        call_kwargs = queue_consumption_tasks_mock.call_args.kwargs
+        consume_tasks = call_kwargs["consume_tasks"]
+        assert len(consume_tasks) == 1
+
+        overrides = consume_tasks[0].kwargs["overrides"]
+        assert overrides.filename == nfc_filename
+        assert unicodedata.is_normalized("NFC", overrides.filename)
+        assert unicodedata.is_normalized("NFC", overrides.title)
+
+        input_doc = consume_tasks[0].kwargs["input_doc"]
+        original_file = Path(input_doc.original_file)
+        assert original_file.exists()
+        assert original_file.name == nfc_filename
+
+    def test_eml_subject_filename_nfc(
+        self,
+        directories,
+        queue_consumption_tasks_mock,
+        eml_rule,
+        mail_account_handler,
+        message_builder,
+    ):
+        """EML filename derived from subject arriving as NFD must be stored as NFC."""
+        nfd_subject = unicodedata.normalize("NFD", "Rechnung März 2024")
+        nfc_expected_filename = unicodedata.normalize("NFC", "Rechnung März 2024.eml")
+
+        # Confirm the fixture is actually NFD
+        assert unicodedata.is_normalized("NFD", nfd_subject)
+
+        message = message_builder.create_message(
+            subject=nfd_subject,
+            from_="sender@example.com",
+            attachments=0,
+        )
+
+        mail_account_handler._handle_message(message, eml_rule)
+
+        queue_consumption_tasks_mock.assert_called_once()
+
+        call_kwargs = queue_consumption_tasks_mock.call_args.kwargs
+        consume_tasks = call_kwargs["consume_tasks"]
+        assert len(consume_tasks) == 1
+
+        overrides = consume_tasks[0].kwargs["overrides"]
+        assert overrides.filename == nfc_expected_filename
+        assert unicodedata.is_normalized("NFC", overrides.filename)
+
+    def test_already_nfc_attachment_filename_unchanged(
+        self,
+        directories,
+        queue_consumption_tasks_mock,
+        attachment_rule,
+        mail_account_handler,
+        message_builder,
+    ):
+        """An attachment filename already in NFC must pass through unchanged."""
+        nfc_filename = "Invoice_2024.pdf"
+        assert unicodedata.is_normalized("NFC", nfc_filename)
+
+        message = message_builder.create_message(
+            subject="Invoice",
+            from_="sender@example.com",
+            attachments=[
+                _AttachmentDef(filename=nfc_filename, content=b"%PDF-1.4 test"),
+            ],
+        )
+
+        mail_account_handler._handle_message(message, attachment_rule)
+
+        call_kwargs = queue_consumption_tasks_mock.call_args.kwargs
+        consume_tasks = call_kwargs["consume_tasks"]
+        overrides = consume_tasks[0].kwargs["overrides"]
+        assert overrides.filename == nfc_filename