mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-07-05 19:45:10 +00:00
test: use pytest-django settings fixture and pytest.param in new tests
- TestShouldProduceArchive: replace @override_settings decorators with settings fixture; consolidate 10 individual tests into 2 parametrized tests (test_generation_setting, test_auto_pdf_archive_decision) - TestDeprecatedV2OcrEnvVarWarnings: call check_deprecated_v2_ocr_env_vars() directly instead of django_checks.run_checks(); use mocker.patch.dict for env isolation; consolidate warn cases into one parametrized test Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -7,7 +7,6 @@ from unittest.mock import MagicMock
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
from django.test import override_settings
|
||||
|
||||
from documents.consumer import should_produce_archive
|
||||
|
||||
@@ -54,115 +53,105 @@ def patch_app_config(mocker, null_app_config):
|
||||
|
||||
|
||||
class TestShouldProduceArchive:
|
||||
@override_settings(ARCHIVE_FILE_GENERATION="never")
|
||||
def test_never_setting_returns_false(self) -> None:
|
||||
parser = _parser_instance(can_produce=True, requires_rendition=False)
|
||||
result = should_produce_archive(
|
||||
parser,
|
||||
"application/pdf",
|
||||
Path("/tmp/doc.pdf"),
|
||||
)
|
||||
assert result is False
|
||||
|
||||
@override_settings(ARCHIVE_FILE_GENERATION="always")
|
||||
def test_always_setting_returns_true(self) -> None:
|
||||
parser = _parser_instance(can_produce=True, requires_rendition=False)
|
||||
result = should_produce_archive(
|
||||
parser,
|
||||
"application/pdf",
|
||||
Path("/tmp/doc.pdf"),
|
||||
)
|
||||
assert result is True
|
||||
|
||||
@override_settings(ARCHIVE_FILE_GENERATION="never")
|
||||
def test_requires_pdf_rendition_overrides_never(self) -> None:
|
||||
"""requires_pdf_rendition=True forces archive even when setting is never."""
|
||||
parser = _parser_instance(can_produce=True, requires_rendition=True)
|
||||
result = should_produce_archive(
|
||||
parser,
|
||||
"application/pdf",
|
||||
Path("/tmp/doc.pdf"),
|
||||
)
|
||||
assert result is True
|
||||
|
||||
@override_settings(ARCHIVE_FILE_GENERATION="always")
|
||||
def test_cannot_produce_archive_overrides_always(self) -> None:
|
||||
"""can_produce_archive=False prevents archive even when setting is always."""
|
||||
parser = _parser_instance(can_produce=False, requires_rendition=False)
|
||||
result = should_produce_archive(parser, "text/plain", Path("/tmp/doc.txt"))
|
||||
assert result is False
|
||||
|
||||
@override_settings(ARCHIVE_FILE_GENERATION="auto")
|
||||
def test_auto_image_returns_true(self) -> None:
|
||||
"""auto mode: image/* MIME types always produce archive (scanned doc)."""
|
||||
parser = _parser_instance(can_produce=True, requires_rendition=False)
|
||||
result = should_produce_archive(parser, "image/tiff", Path("/tmp/scan.tiff"))
|
||||
assert result is True
|
||||
|
||||
@override_settings(ARCHIVE_FILE_GENERATION="auto")
|
||||
def test_auto_born_digital_pdf_returns_false(self) -> None:
|
||||
"""auto mode: PDF with substantial text (born-digital) skips archive."""
|
||||
parser = _parser_instance(can_produce=True, requires_rendition=False)
|
||||
long_text = "This is a born-digital PDF with lots of text content. " * 10
|
||||
with patch(
|
||||
"documents.consumer.extract_pdf_text",
|
||||
return_value=long_text,
|
||||
):
|
||||
result = should_produce_archive(
|
||||
parser,
|
||||
@pytest.mark.parametrize(
|
||||
("generation", "can_produce", "requires_rendition", "mime", "expected"),
|
||||
[
|
||||
pytest.param(
|
||||
"never",
|
||||
True,
|
||||
False,
|
||||
"application/pdf",
|
||||
Path("/tmp/doc.pdf"),
|
||||
)
|
||||
assert result is False
|
||||
|
||||
@override_settings(ARCHIVE_FILE_GENERATION="auto")
|
||||
def test_auto_scanned_pdf_no_text_returns_true(self) -> None:
|
||||
"""auto mode: PDF where pdftotext returns None (scanned) produces archive."""
|
||||
parser = _parser_instance(can_produce=True, requires_rendition=False)
|
||||
with patch(
|
||||
"documents.consumer.extract_pdf_text",
|
||||
return_value=None,
|
||||
):
|
||||
result = should_produce_archive(
|
||||
parser,
|
||||
False,
|
||||
id="never-returns-false",
|
||||
),
|
||||
pytest.param(
|
||||
"always",
|
||||
True,
|
||||
False,
|
||||
"application/pdf",
|
||||
Path("/tmp/scan.pdf"),
|
||||
)
|
||||
assert result is True
|
||||
|
||||
@override_settings(ARCHIVE_FILE_GENERATION="auto")
|
||||
def test_auto_pdf_short_text_returns_true(self) -> None:
|
||||
"""auto mode: PDF with very short text (<=50 chars) is treated as scanned."""
|
||||
parser = _parser_instance(can_produce=True, requires_rendition=False)
|
||||
with patch(
|
||||
"documents.consumer.extract_pdf_text",
|
||||
return_value="tiny",
|
||||
):
|
||||
result = should_produce_archive(
|
||||
parser,
|
||||
True,
|
||||
id="always-returns-true",
|
||||
),
|
||||
pytest.param(
|
||||
"never",
|
||||
True,
|
||||
True,
|
||||
"application/pdf",
|
||||
Path("/tmp/scan.pdf"),
|
||||
)
|
||||
assert result is True
|
||||
True,
|
||||
id="requires-rendition-overrides-never",
|
||||
),
|
||||
pytest.param(
|
||||
"always",
|
||||
False,
|
||||
False,
|
||||
"text/plain",
|
||||
False,
|
||||
id="cannot-produce-overrides-always",
|
||||
),
|
||||
pytest.param(
|
||||
"always",
|
||||
False,
|
||||
True,
|
||||
"application/pdf",
|
||||
True,
|
||||
id="requires-rendition-wins-even-if-cannot-produce",
|
||||
),
|
||||
pytest.param(
|
||||
"auto",
|
||||
True,
|
||||
False,
|
||||
"image/tiff",
|
||||
True,
|
||||
id="auto-image-returns-true",
|
||||
),
|
||||
pytest.param(
|
||||
"auto",
|
||||
True,
|
||||
False,
|
||||
"message/rfc822",
|
||||
False,
|
||||
id="auto-non-pdf-non-image-returns-false",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_generation_setting(
|
||||
self,
|
||||
settings,
|
||||
generation: str,
|
||||
can_produce: bool, # noqa: FBT001
|
||||
requires_rendition: bool, # noqa: FBT001
|
||||
mime: str,
|
||||
expected: bool, # noqa: FBT001
|
||||
) -> None:
|
||||
settings.ARCHIVE_FILE_GENERATION = generation
|
||||
parser = _parser_instance(
|
||||
can_produce=can_produce,
|
||||
requires_rendition=requires_rendition,
|
||||
)
|
||||
assert should_produce_archive(parser, mime, Path("/tmp/doc")) is expected
|
||||
|
||||
@override_settings(ARCHIVE_FILE_GENERATION="auto")
|
||||
def test_auto_non_pdf_non_image_returns_false(self) -> None:
|
||||
"""auto mode: other MIME types (e.g. email) don't produce archive by default."""
|
||||
@pytest.mark.parametrize(
|
||||
("extracted_text", "expected"),
|
||||
[
|
||||
pytest.param(
|
||||
"This is a born-digital PDF with lots of text content. " * 10,
|
||||
False,
|
||||
id="born-digital-long-text-skips-archive",
|
||||
),
|
||||
pytest.param(None, True, id="no-text-scanned-produces-archive"),
|
||||
pytest.param("tiny", True, id="short-text-treated-as-scanned"),
|
||||
],
|
||||
)
|
||||
def test_auto_pdf_archive_decision(
|
||||
self,
|
||||
settings,
|
||||
extracted_text: str | None,
|
||||
expected: bool, # noqa: FBT001
|
||||
) -> None:
|
||||
settings.ARCHIVE_FILE_GENERATION = "auto"
|
||||
parser = _parser_instance(can_produce=True, requires_rendition=False)
|
||||
result = should_produce_archive(
|
||||
parser,
|
||||
"message/rfc822",
|
||||
Path("/tmp/email.eml"),
|
||||
)
|
||||
assert result is False
|
||||
|
||||
@override_settings(ARCHIVE_FILE_GENERATION="always")
|
||||
def test_requires_rendition_with_can_produce_false_returns_true(self) -> None:
|
||||
"""requires_pdf_rendition=True always wins, even if can_produce_archive=False."""
|
||||
parser = _parser_instance(can_produce=False, requires_rendition=True)
|
||||
result = should_produce_archive(
|
||||
parser,
|
||||
"application/pdf",
|
||||
Path("/tmp/doc.pdf"),
|
||||
)
|
||||
assert result is True
|
||||
with patch("documents.consumer.extract_pdf_text", return_value=extracted_text):
|
||||
assert (
|
||||
should_produce_archive(parser, "application/pdf", Path("/tmp/doc.pdf"))
|
||||
is expected
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user