Fix (beta): preserve Unicode in localization prompt (#13055)

This commit is contained in:
shamoon
2026-06-27 06:53:36 -06:00
committed by GitHub
parent 00baacb26c
commit a8bfa25efd
2 changed files with 18 additions and 1 deletions
+1 -1
View File
@@ -87,7 +87,7 @@ def build_localization_prompt(suggestions: dict, output_language: str) -> str:
Return the same JSON schema with all fields present.
Suggestions:
{json.dumps(suggestions)}
{json.dumps(suggestions, ensure_ascii=False)}
""".strip()
@@ -239,6 +239,23 @@ def test_get_language_name_falls_back_to_language_code():
assert get_language_name("zz-zz") == "zz-zz"
def test_build_localization_prompt_preserves_unicode_characters():
prompt = build_localization_prompt(
{
"title": "Gebührenbescheid",
"tags": [],
"correspondents": [],
"document_types": [],
"storage_paths": [],
"dates": [],
},
output_language="de-de",
)
assert "Gebührenbescheid" in prompt
assert "\\u00fc" not in prompt
@patch("paperless_ai.ai_classifier.query_similar_documents")
def test_get_context_for_document(
mock_query_similar_documents,