mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-03-07 17:51:22 +00:00
68 lines
2.1 KiB
Python
68 lines
2.1 KiB
Python
import logging
|
|
|
|
import pytest
|
|
|
|
from paperless import utils
|
|
from paperless.utils import ocr_to_dateparser_languages
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
("ocr_language", "expected"),
|
|
[
|
|
pytest.param("eng", ["en"], id="single-language"),
|
|
pytest.param("fra+ita+lao", ["fr", "it", "lo"], id="multiple-languages"),
|
|
pytest.param("fil", ["fil"], id="no-two-letter-equivalent"),
|
|
pytest.param(
|
|
"aze_cyrl+srp_latn",
|
|
["az-Cyrl", "sr-Latn"],
|
|
id="script-supported-by-dateparser",
|
|
),
|
|
pytest.param(
|
|
"deu_frak",
|
|
["de"],
|
|
id="script-not-supported-falls-back-to-language",
|
|
),
|
|
pytest.param(
|
|
"chi_tra+chi_sim",
|
|
["zh"],
|
|
id="chinese-variants-collapse-to-general",
|
|
),
|
|
pytest.param(
|
|
"eng+unsupported_language+por",
|
|
["en", "pt"],
|
|
id="unsupported-language-skipped",
|
|
),
|
|
pytest.param(
|
|
"unsupported1+unsupported2",
|
|
[],
|
|
id="all-unsupported-returns-empty",
|
|
),
|
|
pytest.param("eng+eng", ["en"], id="duplicates-deduplicated"),
|
|
pytest.param(
|
|
"ita_unknownscript",
|
|
["it"],
|
|
id="unknown-script-falls-back-to-language",
|
|
),
|
|
],
|
|
)
|
|
def test_ocr_to_dateparser_languages(ocr_language: str, expected: list[str]) -> None:
|
|
assert sorted(ocr_to_dateparser_languages(ocr_language)) == sorted(expected)
|
|
|
|
|
|
def test_ocr_to_dateparser_languages_exception(
|
|
monkeypatch: pytest.MonkeyPatch,
|
|
caplog: pytest.LogCaptureFixture,
|
|
) -> None:
|
|
# Patch LocaleDataLoader.get_locale_map to raise an exception
|
|
class DummyLoader:
|
|
def get_locale_map(self, locales=None):
|
|
raise RuntimeError("Simulated error")
|
|
|
|
with caplog.at_level(logging.WARNING):
|
|
monkeypatch.setattr(utils, "LocaleDataLoader", lambda: DummyLoader())
|
|
result = utils.ocr_to_dateparser_languages("eng+fra")
|
|
assert result == []
|
|
assert (
|
|
"Set PAPERLESS_DATE_PARSER_LANGUAGES parameter to avoid this" in caplog.text
|
|
)
|