Files
paperless-ngx/src/paperless/tests/test_utils.py
2026-03-06 13:04:23 -08:00

68 lines
2.1 KiB
Python

import logging
import pytest
from paperless import utils
from paperless.utils import ocr_to_dateparser_languages
@pytest.mark.parametrize(
("ocr_language", "expected"),
[
pytest.param("eng", ["en"], id="single-language"),
pytest.param("fra+ita+lao", ["fr", "it", "lo"], id="multiple-languages"),
pytest.param("fil", ["fil"], id="no-two-letter-equivalent"),
pytest.param(
"aze_cyrl+srp_latn",
["az-Cyrl", "sr-Latn"],
id="script-supported-by-dateparser",
),
pytest.param(
"deu_frak",
["de"],
id="script-not-supported-falls-back-to-language",
),
pytest.param(
"chi_tra+chi_sim",
["zh"],
id="chinese-variants-collapse-to-general",
),
pytest.param(
"eng+unsupported_language+por",
["en", "pt"],
id="unsupported-language-skipped",
),
pytest.param(
"unsupported1+unsupported2",
[],
id="all-unsupported-returns-empty",
),
pytest.param("eng+eng", ["en"], id="duplicates-deduplicated"),
pytest.param(
"ita_unknownscript",
["it"],
id="unknown-script-falls-back-to-language",
),
],
)
def test_ocr_to_dateparser_languages(ocr_language: str, expected: list[str]) -> None:
assert sorted(ocr_to_dateparser_languages(ocr_language)) == sorted(expected)
def test_ocr_to_dateparser_languages_exception(
monkeypatch: pytest.MonkeyPatch,
caplog: pytest.LogCaptureFixture,
) -> None:
# Patch LocaleDataLoader.get_locale_map to raise an exception
class DummyLoader:
def get_locale_map(self, locales=None):
raise RuntimeError("Simulated error")
with caplog.at_level(logging.WARNING):
monkeypatch.setattr(utils, "LocaleDataLoader", lambda: DummyLoader())
result = utils.ocr_to_dateparser_languages("eng+fra")
assert result == []
assert (
"Set PAPERLESS_DATE_PARSER_LANGUAGES parameter to avoid this" in caplog.text
)