test: use pytest-django settings fixture and pytest.param in new tests

- TestShouldProduceArchive: replace @override_settings decorators with
  settings fixture; consolidate 10 individual tests into 2 parametrized
  tests (test_generation_setting, test_auto_pdf_archive_decision)
- TestDeprecatedV2OcrEnvVarWarnings: call check_deprecated_v2_ocr_env_vars()
  directly instead of django_checks.run_checks(); use mocker.patch.dict for
  env isolation; consolidate warn cases into one parametrized test

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Trenton H
2026-03-26 16:27:37 -07:00
parent 2729b0d3dc
commit 68322376f2
2 changed files with 151 additions and 151 deletions

View File

@@ -7,7 +7,6 @@ from unittest.mock import MagicMock
from unittest.mock import patch
import pytest
from django.test import override_settings
from documents.consumer import should_produce_archive
@@ -54,115 +53,105 @@ def patch_app_config(mocker, null_app_config):
class TestShouldProduceArchive:
@override_settings(ARCHIVE_FILE_GENERATION="never")
def test_never_setting_returns_false(self) -> None:
parser = _parser_instance(can_produce=True, requires_rendition=False)
result = should_produce_archive(
parser,
"application/pdf",
Path("/tmp/doc.pdf"),
)
assert result is False
@override_settings(ARCHIVE_FILE_GENERATION="always")
def test_always_setting_returns_true(self) -> None:
parser = _parser_instance(can_produce=True, requires_rendition=False)
result = should_produce_archive(
parser,
"application/pdf",
Path("/tmp/doc.pdf"),
)
assert result is True
@override_settings(ARCHIVE_FILE_GENERATION="never")
def test_requires_pdf_rendition_overrides_never(self) -> None:
"""requires_pdf_rendition=True forces archive even when setting is never."""
parser = _parser_instance(can_produce=True, requires_rendition=True)
result = should_produce_archive(
parser,
"application/pdf",
Path("/tmp/doc.pdf"),
)
assert result is True
@override_settings(ARCHIVE_FILE_GENERATION="always")
def test_cannot_produce_archive_overrides_always(self) -> None:
"""can_produce_archive=False prevents archive even when setting is always."""
parser = _parser_instance(can_produce=False, requires_rendition=False)
result = should_produce_archive(parser, "text/plain", Path("/tmp/doc.txt"))
assert result is False
@override_settings(ARCHIVE_FILE_GENERATION="auto")
def test_auto_image_returns_true(self) -> None:
"""auto mode: image/* MIME types always produce archive (scanned doc)."""
parser = _parser_instance(can_produce=True, requires_rendition=False)
result = should_produce_archive(parser, "image/tiff", Path("/tmp/scan.tiff"))
assert result is True
@override_settings(ARCHIVE_FILE_GENERATION="auto")
def test_auto_born_digital_pdf_returns_false(self) -> None:
"""auto mode: PDF with substantial text (born-digital) skips archive."""
parser = _parser_instance(can_produce=True, requires_rendition=False)
long_text = "This is a born-digital PDF with lots of text content. " * 10
with patch(
"documents.consumer.extract_pdf_text",
return_value=long_text,
):
result = should_produce_archive(
parser,
@pytest.mark.parametrize(
("generation", "can_produce", "requires_rendition", "mime", "expected"),
[
pytest.param(
"never",
True,
False,
"application/pdf",
Path("/tmp/doc.pdf"),
)
assert result is False
@override_settings(ARCHIVE_FILE_GENERATION="auto")
def test_auto_scanned_pdf_no_text_returns_true(self) -> None:
"""auto mode: PDF where pdftotext returns None (scanned) produces archive."""
parser = _parser_instance(can_produce=True, requires_rendition=False)
with patch(
"documents.consumer.extract_pdf_text",
return_value=None,
):
result = should_produce_archive(
parser,
False,
id="never-returns-false",
),
pytest.param(
"always",
True,
False,
"application/pdf",
Path("/tmp/scan.pdf"),
)
assert result is True
@override_settings(ARCHIVE_FILE_GENERATION="auto")
def test_auto_pdf_short_text_returns_true(self) -> None:
"""auto mode: PDF with very short text (<=50 chars) is treated as scanned."""
parser = _parser_instance(can_produce=True, requires_rendition=False)
with patch(
"documents.consumer.extract_pdf_text",
return_value="tiny",
):
result = should_produce_archive(
parser,
True,
id="always-returns-true",
),
pytest.param(
"never",
True,
True,
"application/pdf",
Path("/tmp/scan.pdf"),
)
assert result is True
True,
id="requires-rendition-overrides-never",
),
pytest.param(
"always",
False,
False,
"text/plain",
False,
id="cannot-produce-overrides-always",
),
pytest.param(
"always",
False,
True,
"application/pdf",
True,
id="requires-rendition-wins-even-if-cannot-produce",
),
pytest.param(
"auto",
True,
False,
"image/tiff",
True,
id="auto-image-returns-true",
),
pytest.param(
"auto",
True,
False,
"message/rfc822",
False,
id="auto-non-pdf-non-image-returns-false",
),
],
)
def test_generation_setting(
self,
settings,
generation: str,
can_produce: bool, # noqa: FBT001
requires_rendition: bool, # noqa: FBT001
mime: str,
expected: bool, # noqa: FBT001
) -> None:
settings.ARCHIVE_FILE_GENERATION = generation
parser = _parser_instance(
can_produce=can_produce,
requires_rendition=requires_rendition,
)
assert should_produce_archive(parser, mime, Path("/tmp/doc")) is expected
@override_settings(ARCHIVE_FILE_GENERATION="auto")
def test_auto_non_pdf_non_image_returns_false(self) -> None:
"""auto mode: other MIME types (e.g. email) don't produce archive by default."""
@pytest.mark.parametrize(
("extracted_text", "expected"),
[
pytest.param(
"This is a born-digital PDF with lots of text content. " * 10,
False,
id="born-digital-long-text-skips-archive",
),
pytest.param(None, True, id="no-text-scanned-produces-archive"),
pytest.param("tiny", True, id="short-text-treated-as-scanned"),
],
)
def test_auto_pdf_archive_decision(
self,
settings,
extracted_text: str | None,
expected: bool, # noqa: FBT001
) -> None:
settings.ARCHIVE_FILE_GENERATION = "auto"
parser = _parser_instance(can_produce=True, requires_rendition=False)
result = should_produce_archive(
parser,
"message/rfc822",
Path("/tmp/email.eml"),
)
assert result is False
@override_settings(ARCHIVE_FILE_GENERATION="always")
def test_requires_rendition_with_can_produce_false_returns_true(self) -> None:
"""requires_pdf_rendition=True always wins, even if can_produce_archive=False."""
parser = _parser_instance(can_produce=False, requires_rendition=True)
result = should_produce_archive(
parser,
"application/pdf",
Path("/tmp/doc.pdf"),
)
assert result is True
with patch("documents.consumer.extract_pdf_text", return_value=extracted_text):
assert (
should_produce_archive(parser, "application/pdf", Path("/tmp/doc.pdf"))
is expected
)

View File

@@ -2,52 +2,63 @@
from __future__ import annotations
import os
from typing import TYPE_CHECKING
import pytest
from django.core import checks as django_checks
from paperless.checks import check_deprecated_v2_ocr_env_vars
if TYPE_CHECKING:
from pytest_mock import MockerFixture
@pytest.mark.django_db
class TestDeprecatedV2OcrEnvVarWarnings:
def test_old_skip_archive_file_env_warns(self, monkeypatch) -> None:
monkeypatch.setenv("PAPERLESS_OCR_SKIP_ARCHIVE_FILE", "always")
all_checks = django_checks.run_checks()
warns = [
e
for e in all_checks
if "PAPERLESS_OCR_SKIP_ARCHIVE_FILE" in str(getattr(e, "msg", ""))
]
assert warns
def test_no_deprecated_vars_returns_empty(self, mocker: MockerFixture) -> None:
"""No warnings when neither deprecated variable is set."""
mocker.patch.dict(os.environ, {"PAPERLESS_OCR_MODE": "auto"}, clear=True)
result = check_deprecated_v2_ocr_env_vars(None)
assert result == []
def test_old_skip_mode_env_warns(self, monkeypatch) -> None:
monkeypatch.setenv("PAPERLESS_OCR_MODE", "skip")
all_checks = django_checks.run_checks()
warns = [
e
for e in all_checks
if "skip" in str(getattr(e, "msg", "")).lower()
and "OCR_MODE" in str(getattr(e, "msg", ""))
]
assert warns
@pytest.mark.parametrize(
("env_var", "env_value", "expected_id", "expected_fragment"),
[
pytest.param(
"PAPERLESS_OCR_SKIP_ARCHIVE_FILE",
"always",
"paperless.W002",
"PAPERLESS_OCR_SKIP_ARCHIVE_FILE",
id="skip-archive-file-warns",
),
pytest.param(
"PAPERLESS_OCR_MODE",
"skip",
"paperless.W003",
"skip",
id="ocr-mode-skip-warns",
),
pytest.param(
"PAPERLESS_OCR_MODE",
"skip_noarchive",
"paperless.W003",
"skip_noarchive",
id="ocr-mode-skip-noarchive-warns",
),
],
)
def test_deprecated_var_produces_one_warning(
self,
mocker: MockerFixture,
env_var: str,
env_value: str,
expected_id: str,
expected_fragment: str,
) -> None:
"""Each deprecated setting in isolation produces exactly one warning."""
mocker.patch.dict(os.environ, {env_var: env_value}, clear=True)
result = check_deprecated_v2_ocr_env_vars(None)
def test_old_skip_noarchive_mode_env_warns(self, monkeypatch) -> None:
monkeypatch.setenv("PAPERLESS_OCR_MODE", "skip_noarchive")
all_checks = django_checks.run_checks()
warns = [
e for e in all_checks if "skip_noarchive" in str(getattr(e, "msg", ""))
]
assert warns
def test_no_deprecated_vars_no_warning(self, monkeypatch) -> None:
monkeypatch.delenv("PAPERLESS_OCR_SKIP_ARCHIVE_FILE", raising=False)
monkeypatch.setenv("PAPERLESS_OCR_MODE", "auto")
all_checks = django_checks.run_checks()
deprecated_warns = [
e
for e in all_checks
if "PAPERLESS_OCR_SKIP_ARCHIVE_FILE" in str(getattr(e, "msg", ""))
or (
"skip" in str(getattr(e, "msg", "")).lower()
and "OCR_MODE" in str(getattr(e, "msg", ""))
)
]
assert not deprecated_warns
assert len(result) == 1
warning = result[0]
assert warning.id == expected_id
assert expected_fragment in warning.msg