diff --git a/src/documents/tests/test_consumer_archive.py b/src/documents/tests/test_consumer_archive.py index 0af416322..68f95056c 100644 --- a/src/documents/tests/test_consumer_archive.py +++ b/src/documents/tests/test_consumer_archive.py @@ -7,7 +7,6 @@ from unittest.mock import MagicMock from unittest.mock import patch import pytest -from django.test import override_settings from documents.consumer import should_produce_archive @@ -54,115 +53,105 @@ def patch_app_config(mocker, null_app_config): class TestShouldProduceArchive: - @override_settings(ARCHIVE_FILE_GENERATION="never") - def test_never_setting_returns_false(self) -> None: - parser = _parser_instance(can_produce=True, requires_rendition=False) - result = should_produce_archive( - parser, - "application/pdf", - Path("/tmp/doc.pdf"), - ) - assert result is False - - @override_settings(ARCHIVE_FILE_GENERATION="always") - def test_always_setting_returns_true(self) -> None: - parser = _parser_instance(can_produce=True, requires_rendition=False) - result = should_produce_archive( - parser, - "application/pdf", - Path("/tmp/doc.pdf"), - ) - assert result is True - - @override_settings(ARCHIVE_FILE_GENERATION="never") - def test_requires_pdf_rendition_overrides_never(self) -> None: - """requires_pdf_rendition=True forces archive even when setting is never.""" - parser = _parser_instance(can_produce=True, requires_rendition=True) - result = should_produce_archive( - parser, - "application/pdf", - Path("/tmp/doc.pdf"), - ) - assert result is True - - @override_settings(ARCHIVE_FILE_GENERATION="always") - def test_cannot_produce_archive_overrides_always(self) -> None: - """can_produce_archive=False prevents archive even when setting is always.""" - parser = _parser_instance(can_produce=False, requires_rendition=False) - result = should_produce_archive(parser, "text/plain", Path("/tmp/doc.txt")) - assert result is False - - @override_settings(ARCHIVE_FILE_GENERATION="auto") - def test_auto_image_returns_true(self) -> None: - """auto mode: image/* MIME types always produce archive (scanned doc).""" - parser = _parser_instance(can_produce=True, requires_rendition=False) - result = should_produce_archive(parser, "image/tiff", Path("/tmp/scan.tiff")) - assert result is True - - @override_settings(ARCHIVE_FILE_GENERATION="auto") - def test_auto_born_digital_pdf_returns_false(self) -> None: - """auto mode: PDF with substantial text (born-digital) skips archive.""" - parser = _parser_instance(can_produce=True, requires_rendition=False) - long_text = "This is a born-digital PDF with lots of text content. " * 10 - with patch( - "documents.consumer.extract_pdf_text", - return_value=long_text, - ): - result = should_produce_archive( - parser, + @pytest.mark.parametrize( + ("generation", "can_produce", "requires_rendition", "mime", "expected"), + [ + pytest.param( + "never", + True, + False, "application/pdf", - Path("/tmp/doc.pdf"), - ) - assert result is False - - @override_settings(ARCHIVE_FILE_GENERATION="auto") - def test_auto_scanned_pdf_no_text_returns_true(self) -> None: - """auto mode: PDF where pdftotext returns None (scanned) produces archive.""" - parser = _parser_instance(can_produce=True, requires_rendition=False) - with patch( - "documents.consumer.extract_pdf_text", - return_value=None, - ): - result = should_produce_archive( - parser, + False, + id="never-returns-false", + ), + pytest.param( + "always", + True, + False, "application/pdf", - Path("/tmp/scan.pdf"), - ) - assert result is True - - @override_settings(ARCHIVE_FILE_GENERATION="auto") - def test_auto_pdf_short_text_returns_true(self) -> None: - """auto mode: PDF with very short text (<=50 chars) is treated as scanned.""" - parser = _parser_instance(can_produce=True, requires_rendition=False) - with patch( - "documents.consumer.extract_pdf_text", - return_value="tiny", - ): - result = should_produce_archive( - parser, + True, + id="always-returns-true", + ), + pytest.param( + "never", + True, + True, "application/pdf", - Path("/tmp/scan.pdf"), - ) - assert result is True + True, + id="requires-rendition-overrides-never", + ), + pytest.param( + "always", + False, + False, + "text/plain", + False, + id="cannot-produce-overrides-always", + ), + pytest.param( + "always", + False, + True, + "application/pdf", + True, + id="requires-rendition-wins-even-if-cannot-produce", + ), + pytest.param( + "auto", + True, + False, + "image/tiff", + True, + id="auto-image-returns-true", + ), + pytest.param( + "auto", + True, + False, + "message/rfc822", + False, + id="auto-non-pdf-non-image-returns-false", + ), + ], + ) + def test_generation_setting( + self, + settings, + generation: str, + can_produce: bool, # noqa: FBT001 + requires_rendition: bool, # noqa: FBT001 + mime: str, + expected: bool, # noqa: FBT001 + ) -> None: + settings.ARCHIVE_FILE_GENERATION = generation + parser = _parser_instance( + can_produce=can_produce, + requires_rendition=requires_rendition, + ) + assert should_produce_archive(parser, mime, Path("/tmp/doc")) is expected - @override_settings(ARCHIVE_FILE_GENERATION="auto") - def test_auto_non_pdf_non_image_returns_false(self) -> None: - """auto mode: other MIME types (e.g. email) don't produce archive by default.""" + @pytest.mark.parametrize( + ("extracted_text", "expected"), + [ + pytest.param( + "This is a born-digital PDF with lots of text content. " * 10, + False, + id="born-digital-long-text-skips-archive", + ), + pytest.param(None, True, id="no-text-scanned-produces-archive"), + pytest.param("tiny", True, id="short-text-treated-as-scanned"), + ], + ) + def test_auto_pdf_archive_decision( + self, + settings, + extracted_text: str | None, + expected: bool, # noqa: FBT001 + ) -> None: + settings.ARCHIVE_FILE_GENERATION = "auto" parser = _parser_instance(can_produce=True, requires_rendition=False) - result = should_produce_archive( - parser, - "message/rfc822", - Path("/tmp/email.eml"), - ) - assert result is False - - @override_settings(ARCHIVE_FILE_GENERATION="always") - def test_requires_rendition_with_can_produce_false_returns_true(self) -> None: - """requires_pdf_rendition=True always wins, even if can_produce_archive=False.""" - parser = _parser_instance(can_produce=False, requires_rendition=True) - result = should_produce_archive( - parser, - "application/pdf", - Path("/tmp/doc.pdf"), - ) - assert result is True + with patch("documents.consumer.extract_pdf_text", return_value=extracted_text): + assert ( + should_produce_archive(parser, "application/pdf", Path("/tmp/doc.pdf")) + is expected + ) diff --git a/src/paperless/tests/test_checks_v3.py b/src/paperless/tests/test_checks_v3.py index abdfc1fa4..a87a19727 100644 --- a/src/paperless/tests/test_checks_v3.py +++ b/src/paperless/tests/test_checks_v3.py @@ -2,52 +2,63 @@ from __future__ import annotations +import os +from typing import TYPE_CHECKING + import pytest -from django.core import checks as django_checks + +from paperless.checks import check_deprecated_v2_ocr_env_vars + +if TYPE_CHECKING: + from pytest_mock import MockerFixture -@pytest.mark.django_db class TestDeprecatedV2OcrEnvVarWarnings: - def test_old_skip_archive_file_env_warns(self, monkeypatch) -> None: - monkeypatch.setenv("PAPERLESS_OCR_SKIP_ARCHIVE_FILE", "always") - all_checks = django_checks.run_checks() - warns = [ - e - for e in all_checks - if "PAPERLESS_OCR_SKIP_ARCHIVE_FILE" in str(getattr(e, "msg", "")) - ] - assert warns + def test_no_deprecated_vars_returns_empty(self, mocker: MockerFixture) -> None: + """No warnings when neither deprecated variable is set.""" + mocker.patch.dict(os.environ, {"PAPERLESS_OCR_MODE": "auto"}, clear=True) + result = check_deprecated_v2_ocr_env_vars(None) + assert result == [] - def test_old_skip_mode_env_warns(self, monkeypatch) -> None: - monkeypatch.setenv("PAPERLESS_OCR_MODE", "skip") - all_checks = django_checks.run_checks() - warns = [ - e - for e in all_checks - if "skip" in str(getattr(e, "msg", "")).lower() - and "OCR_MODE" in str(getattr(e, "msg", "")) - ] - assert warns + @pytest.mark.parametrize( + ("env_var", "env_value", "expected_id", "expected_fragment"), + [ + pytest.param( + "PAPERLESS_OCR_SKIP_ARCHIVE_FILE", + "always", + "paperless.W002", + "PAPERLESS_OCR_SKIP_ARCHIVE_FILE", + id="skip-archive-file-warns", + ), + pytest.param( + "PAPERLESS_OCR_MODE", + "skip", + "paperless.W003", + "skip", + id="ocr-mode-skip-warns", + ), + pytest.param( + "PAPERLESS_OCR_MODE", + "skip_noarchive", + "paperless.W003", + "skip_noarchive", + id="ocr-mode-skip-noarchive-warns", + ), + ], + ) + def test_deprecated_var_produces_one_warning( + self, + mocker: MockerFixture, + env_var: str, + env_value: str, + expected_id: str, + expected_fragment: str, + ) -> None: + """Each deprecated setting in isolation produces exactly one warning.""" + mocker.patch.dict(os.environ, {env_var: env_value}, clear=True) + result = check_deprecated_v2_ocr_env_vars(None) - def test_old_skip_noarchive_mode_env_warns(self, monkeypatch) -> None: - monkeypatch.setenv("PAPERLESS_OCR_MODE", "skip_noarchive") - all_checks = django_checks.run_checks() - warns = [ - e for e in all_checks if "skip_noarchive" in str(getattr(e, "msg", "")) - ] - assert warns - - def test_no_deprecated_vars_no_warning(self, monkeypatch) -> None: - monkeypatch.delenv("PAPERLESS_OCR_SKIP_ARCHIVE_FILE", raising=False) - monkeypatch.setenv("PAPERLESS_OCR_MODE", "auto") - all_checks = django_checks.run_checks() - deprecated_warns = [ - e - for e in all_checks - if "PAPERLESS_OCR_SKIP_ARCHIVE_FILE" in str(getattr(e, "msg", "")) - or ( - "skip" in str(getattr(e, "msg", "")).lower() - and "OCR_MODE" in str(getattr(e, "msg", "")) - ) - ] - assert not deprecated_warns + assert len(result) == 1 + warning = result[0] + assert warning.id == expected_id + assert expected_fragment in warning.msg