mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-03-31 13:22:43 +00:00
test: use pytest-django settings fixture and pytest.param in new tests
- TestShouldProduceArchive: replace @override_settings decorators with settings fixture; consolidate 10 individual tests into 2 parametrized tests (test_generation_setting, test_auto_pdf_archive_decision) - TestDeprecatedV2OcrEnvVarWarnings: call check_deprecated_v2_ocr_env_vars() directly instead of django_checks.run_checks(); use mocker.patch.dict for env isolation; consolidate warn cases into one parametrized test Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -7,7 +7,6 @@ from unittest.mock import MagicMock
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
from django.test import override_settings
|
||||
|
||||
from documents.consumer import should_produce_archive
|
||||
|
||||
@@ -54,115 +53,105 @@ def patch_app_config(mocker, null_app_config):
|
||||
|
||||
|
||||
class TestShouldProduceArchive:
|
||||
@override_settings(ARCHIVE_FILE_GENERATION="never")
|
||||
def test_never_setting_returns_false(self) -> None:
|
||||
parser = _parser_instance(can_produce=True, requires_rendition=False)
|
||||
result = should_produce_archive(
|
||||
parser,
|
||||
"application/pdf",
|
||||
Path("/tmp/doc.pdf"),
|
||||
)
|
||||
assert result is False
|
||||
|
||||
@override_settings(ARCHIVE_FILE_GENERATION="always")
|
||||
def test_always_setting_returns_true(self) -> None:
|
||||
parser = _parser_instance(can_produce=True, requires_rendition=False)
|
||||
result = should_produce_archive(
|
||||
parser,
|
||||
"application/pdf",
|
||||
Path("/tmp/doc.pdf"),
|
||||
)
|
||||
assert result is True
|
||||
|
||||
@override_settings(ARCHIVE_FILE_GENERATION="never")
|
||||
def test_requires_pdf_rendition_overrides_never(self) -> None:
|
||||
"""requires_pdf_rendition=True forces archive even when setting is never."""
|
||||
parser = _parser_instance(can_produce=True, requires_rendition=True)
|
||||
result = should_produce_archive(
|
||||
parser,
|
||||
"application/pdf",
|
||||
Path("/tmp/doc.pdf"),
|
||||
)
|
||||
assert result is True
|
||||
|
||||
@override_settings(ARCHIVE_FILE_GENERATION="always")
|
||||
def test_cannot_produce_archive_overrides_always(self) -> None:
|
||||
"""can_produce_archive=False prevents archive even when setting is always."""
|
||||
parser = _parser_instance(can_produce=False, requires_rendition=False)
|
||||
result = should_produce_archive(parser, "text/plain", Path("/tmp/doc.txt"))
|
||||
assert result is False
|
||||
|
||||
@override_settings(ARCHIVE_FILE_GENERATION="auto")
|
||||
def test_auto_image_returns_true(self) -> None:
|
||||
"""auto mode: image/* MIME types always produce archive (scanned doc)."""
|
||||
parser = _parser_instance(can_produce=True, requires_rendition=False)
|
||||
result = should_produce_archive(parser, "image/tiff", Path("/tmp/scan.tiff"))
|
||||
assert result is True
|
||||
|
||||
@override_settings(ARCHIVE_FILE_GENERATION="auto")
|
||||
def test_auto_born_digital_pdf_returns_false(self) -> None:
|
||||
"""auto mode: PDF with substantial text (born-digital) skips archive."""
|
||||
parser = _parser_instance(can_produce=True, requires_rendition=False)
|
||||
long_text = "This is a born-digital PDF with lots of text content. " * 10
|
||||
with patch(
|
||||
"documents.consumer.extract_pdf_text",
|
||||
return_value=long_text,
|
||||
):
|
||||
result = should_produce_archive(
|
||||
parser,
|
||||
@pytest.mark.parametrize(
|
||||
("generation", "can_produce", "requires_rendition", "mime", "expected"),
|
||||
[
|
||||
pytest.param(
|
||||
"never",
|
||||
True,
|
||||
False,
|
||||
"application/pdf",
|
||||
Path("/tmp/doc.pdf"),
|
||||
)
|
||||
assert result is False
|
||||
|
||||
@override_settings(ARCHIVE_FILE_GENERATION="auto")
|
||||
def test_auto_scanned_pdf_no_text_returns_true(self) -> None:
|
||||
"""auto mode: PDF where pdftotext returns None (scanned) produces archive."""
|
||||
parser = _parser_instance(can_produce=True, requires_rendition=False)
|
||||
with patch(
|
||||
"documents.consumer.extract_pdf_text",
|
||||
return_value=None,
|
||||
):
|
||||
result = should_produce_archive(
|
||||
parser,
|
||||
False,
|
||||
id="never-returns-false",
|
||||
),
|
||||
pytest.param(
|
||||
"always",
|
||||
True,
|
||||
False,
|
||||
"application/pdf",
|
||||
Path("/tmp/scan.pdf"),
|
||||
)
|
||||
assert result is True
|
||||
|
||||
@override_settings(ARCHIVE_FILE_GENERATION="auto")
|
||||
def test_auto_pdf_short_text_returns_true(self) -> None:
|
||||
"""auto mode: PDF with very short text (<=50 chars) is treated as scanned."""
|
||||
parser = _parser_instance(can_produce=True, requires_rendition=False)
|
||||
with patch(
|
||||
"documents.consumer.extract_pdf_text",
|
||||
return_value="tiny",
|
||||
):
|
||||
result = should_produce_archive(
|
||||
parser,
|
||||
True,
|
||||
id="always-returns-true",
|
||||
),
|
||||
pytest.param(
|
||||
"never",
|
||||
True,
|
||||
True,
|
||||
"application/pdf",
|
||||
Path("/tmp/scan.pdf"),
|
||||
)
|
||||
assert result is True
|
||||
True,
|
||||
id="requires-rendition-overrides-never",
|
||||
),
|
||||
pytest.param(
|
||||
"always",
|
||||
False,
|
||||
False,
|
||||
"text/plain",
|
||||
False,
|
||||
id="cannot-produce-overrides-always",
|
||||
),
|
||||
pytest.param(
|
||||
"always",
|
||||
False,
|
||||
True,
|
||||
"application/pdf",
|
||||
True,
|
||||
id="requires-rendition-wins-even-if-cannot-produce",
|
||||
),
|
||||
pytest.param(
|
||||
"auto",
|
||||
True,
|
||||
False,
|
||||
"image/tiff",
|
||||
True,
|
||||
id="auto-image-returns-true",
|
||||
),
|
||||
pytest.param(
|
||||
"auto",
|
||||
True,
|
||||
False,
|
||||
"message/rfc822",
|
||||
False,
|
||||
id="auto-non-pdf-non-image-returns-false",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_generation_setting(
|
||||
self,
|
||||
settings,
|
||||
generation: str,
|
||||
can_produce: bool, # noqa: FBT001
|
||||
requires_rendition: bool, # noqa: FBT001
|
||||
mime: str,
|
||||
expected: bool, # noqa: FBT001
|
||||
) -> None:
|
||||
settings.ARCHIVE_FILE_GENERATION = generation
|
||||
parser = _parser_instance(
|
||||
can_produce=can_produce,
|
||||
requires_rendition=requires_rendition,
|
||||
)
|
||||
assert should_produce_archive(parser, mime, Path("/tmp/doc")) is expected
|
||||
|
||||
@override_settings(ARCHIVE_FILE_GENERATION="auto")
|
||||
def test_auto_non_pdf_non_image_returns_false(self) -> None:
|
||||
"""auto mode: other MIME types (e.g. email) don't produce archive by default."""
|
||||
@pytest.mark.parametrize(
|
||||
("extracted_text", "expected"),
|
||||
[
|
||||
pytest.param(
|
||||
"This is a born-digital PDF with lots of text content. " * 10,
|
||||
False,
|
||||
id="born-digital-long-text-skips-archive",
|
||||
),
|
||||
pytest.param(None, True, id="no-text-scanned-produces-archive"),
|
||||
pytest.param("tiny", True, id="short-text-treated-as-scanned"),
|
||||
],
|
||||
)
|
||||
def test_auto_pdf_archive_decision(
|
||||
self,
|
||||
settings,
|
||||
extracted_text: str | None,
|
||||
expected: bool, # noqa: FBT001
|
||||
) -> None:
|
||||
settings.ARCHIVE_FILE_GENERATION = "auto"
|
||||
parser = _parser_instance(can_produce=True, requires_rendition=False)
|
||||
result = should_produce_archive(
|
||||
parser,
|
||||
"message/rfc822",
|
||||
Path("/tmp/email.eml"),
|
||||
)
|
||||
assert result is False
|
||||
|
||||
@override_settings(ARCHIVE_FILE_GENERATION="always")
|
||||
def test_requires_rendition_with_can_produce_false_returns_true(self) -> None:
|
||||
"""requires_pdf_rendition=True always wins, even if can_produce_archive=False."""
|
||||
parser = _parser_instance(can_produce=False, requires_rendition=True)
|
||||
result = should_produce_archive(
|
||||
parser,
|
||||
"application/pdf",
|
||||
Path("/tmp/doc.pdf"),
|
||||
)
|
||||
assert result is True
|
||||
with patch("documents.consumer.extract_pdf_text", return_value=extracted_text):
|
||||
assert (
|
||||
should_produce_archive(parser, "application/pdf", Path("/tmp/doc.pdf"))
|
||||
is expected
|
||||
)
|
||||
|
||||
@@ -2,52 +2,63 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import pytest
|
||||
from django.core import checks as django_checks
|
||||
|
||||
from paperless.checks import check_deprecated_v2_ocr_env_vars
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pytest_mock import MockerFixture
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
class TestDeprecatedV2OcrEnvVarWarnings:
|
||||
def test_old_skip_archive_file_env_warns(self, monkeypatch) -> None:
|
||||
monkeypatch.setenv("PAPERLESS_OCR_SKIP_ARCHIVE_FILE", "always")
|
||||
all_checks = django_checks.run_checks()
|
||||
warns = [
|
||||
e
|
||||
for e in all_checks
|
||||
if "PAPERLESS_OCR_SKIP_ARCHIVE_FILE" in str(getattr(e, "msg", ""))
|
||||
]
|
||||
assert warns
|
||||
def test_no_deprecated_vars_returns_empty(self, mocker: MockerFixture) -> None:
|
||||
"""No warnings when neither deprecated variable is set."""
|
||||
mocker.patch.dict(os.environ, {"PAPERLESS_OCR_MODE": "auto"}, clear=True)
|
||||
result = check_deprecated_v2_ocr_env_vars(None)
|
||||
assert result == []
|
||||
|
||||
def test_old_skip_mode_env_warns(self, monkeypatch) -> None:
|
||||
monkeypatch.setenv("PAPERLESS_OCR_MODE", "skip")
|
||||
all_checks = django_checks.run_checks()
|
||||
warns = [
|
||||
e
|
||||
for e in all_checks
|
||||
if "skip" in str(getattr(e, "msg", "")).lower()
|
||||
and "OCR_MODE" in str(getattr(e, "msg", ""))
|
||||
]
|
||||
assert warns
|
||||
@pytest.mark.parametrize(
|
||||
("env_var", "env_value", "expected_id", "expected_fragment"),
|
||||
[
|
||||
pytest.param(
|
||||
"PAPERLESS_OCR_SKIP_ARCHIVE_FILE",
|
||||
"always",
|
||||
"paperless.W002",
|
||||
"PAPERLESS_OCR_SKIP_ARCHIVE_FILE",
|
||||
id="skip-archive-file-warns",
|
||||
),
|
||||
pytest.param(
|
||||
"PAPERLESS_OCR_MODE",
|
||||
"skip",
|
||||
"paperless.W003",
|
||||
"skip",
|
||||
id="ocr-mode-skip-warns",
|
||||
),
|
||||
pytest.param(
|
||||
"PAPERLESS_OCR_MODE",
|
||||
"skip_noarchive",
|
||||
"paperless.W003",
|
||||
"skip_noarchive",
|
||||
id="ocr-mode-skip-noarchive-warns",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_deprecated_var_produces_one_warning(
|
||||
self,
|
||||
mocker: MockerFixture,
|
||||
env_var: str,
|
||||
env_value: str,
|
||||
expected_id: str,
|
||||
expected_fragment: str,
|
||||
) -> None:
|
||||
"""Each deprecated setting in isolation produces exactly one warning."""
|
||||
mocker.patch.dict(os.environ, {env_var: env_value}, clear=True)
|
||||
result = check_deprecated_v2_ocr_env_vars(None)
|
||||
|
||||
def test_old_skip_noarchive_mode_env_warns(self, monkeypatch) -> None:
|
||||
monkeypatch.setenv("PAPERLESS_OCR_MODE", "skip_noarchive")
|
||||
all_checks = django_checks.run_checks()
|
||||
warns = [
|
||||
e for e in all_checks if "skip_noarchive" in str(getattr(e, "msg", ""))
|
||||
]
|
||||
assert warns
|
||||
|
||||
def test_no_deprecated_vars_no_warning(self, monkeypatch) -> None:
|
||||
monkeypatch.delenv("PAPERLESS_OCR_SKIP_ARCHIVE_FILE", raising=False)
|
||||
monkeypatch.setenv("PAPERLESS_OCR_MODE", "auto")
|
||||
all_checks = django_checks.run_checks()
|
||||
deprecated_warns = [
|
||||
e
|
||||
for e in all_checks
|
||||
if "PAPERLESS_OCR_SKIP_ARCHIVE_FILE" in str(getattr(e, "msg", ""))
|
||||
or (
|
||||
"skip" in str(getattr(e, "msg", "")).lower()
|
||||
and "OCR_MODE" in str(getattr(e, "msg", ""))
|
||||
)
|
||||
]
|
||||
assert not deprecated_warns
|
||||
assert len(result) == 1
|
||||
warning = result[0]
|
||||
assert warning.id == expected_id
|
||||
assert expected_fragment in warning.msg
|
||||
|
||||
Reference in New Issue
Block a user