diff --git a/src/paperless_ai/ai_classifier.py b/src/paperless_ai/ai_classifier.py index f9c2f1e06..41bf8e903 100644 --- a/src/paperless_ai/ai_classifier.py +++ b/src/paperless_ai/ai_classifier.py @@ -87,7 +87,7 @@ def build_localization_prompt(suggestions: dict, output_language: str) -> str: Return the same JSON schema with all fields present. Suggestions: - {json.dumps(suggestions)} + {json.dumps(suggestions, ensure_ascii=False)} """.strip() diff --git a/src/paperless_ai/tests/test_ai_classifier.py b/src/paperless_ai/tests/test_ai_classifier.py index 45822b14b..470c7fe07 100644 --- a/src/paperless_ai/tests/test_ai_classifier.py +++ b/src/paperless_ai/tests/test_ai_classifier.py @@ -239,6 +239,23 @@ def test_get_language_name_falls_back_to_language_code(): assert get_language_name("zz-zz") == "zz-zz" +def test_build_localization_prompt_preserves_unicode_characters(): + prompt = build_localization_prompt( + { + "title": "Gebührenbescheid", + "tags": [], + "correspondents": [], + "document_types": [], + "storage_paths": [], + "dates": [], + }, + output_language="de-de", + ) + + assert "Gebührenbescheid" in prompt + assert "\\u00fc" not in prompt + + @patch("paperless_ai.ai_classifier.query_similar_documents") def test_get_context_for_document( mock_query_similar_documents,