test: use pytest-django settings fixture and pytest.param in new tests

- TestShouldProduceArchive: replace @override_settings decorators with
  settings fixture; consolidate 10 individual tests into 2 parametrized
  tests (test_generation_setting, test_auto_pdf_archive_decision)
- TestDeprecatedV2OcrEnvVarWarnings: call check_deprecated_v2_ocr_env_vars()
  directly instead of django_checks.run_checks(); use mocker.patch.dict for
  env isolation; consolidate warn cases into one parametrized test

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Trenton H
2026-03-26 16:27:37 -07:00
parent 2729b0d3dc
commit 68322376f2
2 changed files with 151 additions and 151 deletions
+97 -108
View File
@@ -7,7 +7,6 @@ from unittest.mock import MagicMock
from unittest.mock import patch
import pytest
from django.test import override_settings
from documents.consumer import should_produce_archive
@@ -54,115 +53,105 @@ def patch_app_config(mocker, null_app_config):
class TestShouldProduceArchive:
@override_settings(ARCHIVE_FILE_GENERATION="never")
def test_never_setting_returns_false(self) -> None:
parser = _parser_instance(can_produce=True, requires_rendition=False)
result = should_produce_archive(
parser,
"application/pdf",
Path("/tmp/doc.pdf"),
)
assert result is False
@override_settings(ARCHIVE_FILE_GENERATION="always")
def test_always_setting_returns_true(self) -> None:
parser = _parser_instance(can_produce=True, requires_rendition=False)
result = should_produce_archive(
parser,
"application/pdf",
Path("/tmp/doc.pdf"),
)
assert result is True
@override_settings(ARCHIVE_FILE_GENERATION="never")
def test_requires_pdf_rendition_overrides_never(self) -> None:
"""requires_pdf_rendition=True forces archive even when setting is never."""
parser = _parser_instance(can_produce=True, requires_rendition=True)
result = should_produce_archive(
parser,
"application/pdf",
Path("/tmp/doc.pdf"),
)
assert result is True
@override_settings(ARCHIVE_FILE_GENERATION="always")
def test_cannot_produce_archive_overrides_always(self) -> None:
"""can_produce_archive=False prevents archive even when setting is always."""
parser = _parser_instance(can_produce=False, requires_rendition=False)
result = should_produce_archive(parser, "text/plain", Path("/tmp/doc.txt"))
assert result is False
@override_settings(ARCHIVE_FILE_GENERATION="auto")
def test_auto_image_returns_true(self) -> None:
"""auto mode: image/* MIME types always produce archive (scanned doc)."""
parser = _parser_instance(can_produce=True, requires_rendition=False)
result = should_produce_archive(parser, "image/tiff", Path("/tmp/scan.tiff"))
assert result is True
@override_settings(ARCHIVE_FILE_GENERATION="auto")
def test_auto_born_digital_pdf_returns_false(self) -> None:
"""auto mode: PDF with substantial text (born-digital) skips archive."""
parser = _parser_instance(can_produce=True, requires_rendition=False)
long_text = "This is a born-digital PDF with lots of text content. " * 10
with patch(
"documents.consumer.extract_pdf_text",
return_value=long_text,
):
result = should_produce_archive(
parser,
@pytest.mark.parametrize(
("generation", "can_produce", "requires_rendition", "mime", "expected"),
[
pytest.param(
"never",
True,
False,
"application/pdf",
Path("/tmp/doc.pdf"),
)
assert result is False
@override_settings(ARCHIVE_FILE_GENERATION="auto")
def test_auto_scanned_pdf_no_text_returns_true(self) -> None:
"""auto mode: PDF where pdftotext returns None (scanned) produces archive."""
parser = _parser_instance(can_produce=True, requires_rendition=False)
with patch(
"documents.consumer.extract_pdf_text",
return_value=None,
):
result = should_produce_archive(
parser,
False,
id="never-returns-false",
),
pytest.param(
"always",
True,
False,
"application/pdf",
Path("/tmp/scan.pdf"),
)
assert result is True
@override_settings(ARCHIVE_FILE_GENERATION="auto")
def test_auto_pdf_short_text_returns_true(self) -> None:
"""auto mode: PDF with very short text (<=50 chars) is treated as scanned."""
parser = _parser_instance(can_produce=True, requires_rendition=False)
with patch(
"documents.consumer.extract_pdf_text",
return_value="tiny",
):
result = should_produce_archive(
parser,
True,
id="always-returns-true",
),
pytest.param(
"never",
True,
True,
"application/pdf",
Path("/tmp/scan.pdf"),
)
assert result is True
True,
id="requires-rendition-overrides-never",
),
pytest.param(
"always",
False,
False,
"text/plain",
False,
id="cannot-produce-overrides-always",
),
pytest.param(
"always",
False,
True,
"application/pdf",
True,
id="requires-rendition-wins-even-if-cannot-produce",
),
pytest.param(
"auto",
True,
False,
"image/tiff",
True,
id="auto-image-returns-true",
),
pytest.param(
"auto",
True,
False,
"message/rfc822",
False,
id="auto-non-pdf-non-image-returns-false",
),
],
)
def test_generation_setting(
self,
settings,
generation: str,
can_produce: bool, # noqa: FBT001
requires_rendition: bool, # noqa: FBT001
mime: str,
expected: bool, # noqa: FBT001
) -> None:
settings.ARCHIVE_FILE_GENERATION = generation
parser = _parser_instance(
can_produce=can_produce,
requires_rendition=requires_rendition,
)
assert should_produce_archive(parser, mime, Path("/tmp/doc")) is expected
@override_settings(ARCHIVE_FILE_GENERATION="auto")
def test_auto_non_pdf_non_image_returns_false(self) -> None:
"""auto mode: other MIME types (e.g. email) don't produce archive by default."""
@pytest.mark.parametrize(
("extracted_text", "expected"),
[
pytest.param(
"This is a born-digital PDF with lots of text content. " * 10,
False,
id="born-digital-long-text-skips-archive",
),
pytest.param(None, True, id="no-text-scanned-produces-archive"),
pytest.param("tiny", True, id="short-text-treated-as-scanned"),
],
)
def test_auto_pdf_archive_decision(
self,
settings,
extracted_text: str | None,
expected: bool, # noqa: FBT001
) -> None:
settings.ARCHIVE_FILE_GENERATION = "auto"
parser = _parser_instance(can_produce=True, requires_rendition=False)
result = should_produce_archive(
parser,
"message/rfc822",
Path("/tmp/email.eml"),
)
assert result is False
@override_settings(ARCHIVE_FILE_GENERATION="always")
def test_requires_rendition_with_can_produce_false_returns_true(self) -> None:
"""requires_pdf_rendition=True always wins, even if can_produce_archive=False."""
parser = _parser_instance(can_produce=False, requires_rendition=True)
result = should_produce_archive(
parser,
"application/pdf",
Path("/tmp/doc.pdf"),
)
assert result is True
with patch("documents.consumer.extract_pdf_text", return_value=extracted_text):
assert (
should_produce_archive(parser, "application/pdf", Path("/tmp/doc.pdf"))
is expected
)