mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-05-08 23:55:24 +00:00
Refactor: use fixture factory and usefixtures in remote parser tests
- `_make_azure_mock` helper promoted to `make_azure_mock` factory fixture
in conftest.py; tests call `make_azure_mock()` or
`make_azure_mock("custom text")` instead of a module-level function
- `azure_settings` and `no_engine_settings` applied via
`@pytest.mark.usefixtures` wherever their value is not referenced
inside the test body; `TestRemoteParserParseError` marked at the class
level since all three tests need the same setting
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -7,6 +7,7 @@ so it is easy to see which files belong to which test module.
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
from unittest.mock import Mock
|
||||
|
||||
import pytest
|
||||
|
||||
@@ -14,6 +15,7 @@ from paperless.parsers.remote import RemoteDocumentParser
|
||||
from paperless.parsers.text import TextDocumentParser
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Callable
|
||||
from collections.abc import Generator
|
||||
from pathlib import Path
|
||||
|
||||
@@ -163,3 +165,37 @@ def no_engine_settings(settings: SettingsWrapper) -> SettingsWrapper:
|
||||
settings.REMOTE_OCR_API_KEY = None
|
||||
settings.REMOTE_OCR_ENDPOINT = None
|
||||
return settings
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Azure mock factory
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def make_azure_mock() -> Callable[[str], Mock]:
|
||||
"""Return a factory that builds a mock Azure DocumentIntelligenceClient.
|
||||
|
||||
Usage::
|
||||
|
||||
mock_client = make_azure_mock() # default text
|
||||
mock_client = make_azure_mock("My text.") # custom extracted text
|
||||
|
||||
Returns
|
||||
-------
|
||||
Callable[[str], Mock]
|
||||
Factory function that accepts an optional ``text`` argument and
|
||||
returns a fully configured mock client.
|
||||
"""
|
||||
|
||||
def _factory(text: str = "Extracted text.") -> Mock:
|
||||
mock_client = Mock()
|
||||
mock_poller = Mock()
|
||||
mock_poller.wait.return_value = None
|
||||
mock_poller.details = {"operation_id": "fake-op-id"}
|
||||
mock_poller.result.return_value.content = text
|
||||
mock_client.begin_analyze_document.return_value = mock_poller
|
||||
mock_client.get_analyze_result_pdf.return_value = [b"%PDF-1.4 FAKE"]
|
||||
return mock_client
|
||||
|
||||
return _factory
|
||||
|
||||
@@ -2,10 +2,11 @@
|
||||
Tests for paperless.parsers.remote.RemoteDocumentParser.
|
||||
|
||||
All tests use the context-manager protocol for parser lifecycle. The Azure
|
||||
AI client is always mocked via the ``mocker`` fixture so no real network
|
||||
calls are made. Django settings are overridden via the pytest-django
|
||||
``settings`` fixture (or the ``azure_settings`` / ``no_engine_settings``
|
||||
helpers defined in conftest.py).
|
||||
AI client is mocked via the ``make_azure_mock`` fixture (a factory). Django
|
||||
settings are overridden via the pytest-django ``settings`` fixture or the
|
||||
``azure_settings`` / ``no_engine_settings`` convenience fixtures from
|
||||
conftest.py — both of which are applied with ``@pytest.mark.usefixtures``
|
||||
when their value is not needed inside the test body.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -19,29 +20,13 @@ from paperless.parsers import ParserProtocol
|
||||
from paperless.parsers.remote import RemoteDocumentParser
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Callable
|
||||
from pathlib import Path
|
||||
|
||||
from pytest_django.fixtures import SettingsWrapper
|
||||
from pytest_mock import MockerFixture
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _make_azure_mock(text: str = "Extracted text.") -> Mock:
|
||||
"""Return a configured mock Azure DocumentIntelligenceClient."""
|
||||
mock_client = Mock()
|
||||
mock_poller = Mock()
|
||||
mock_poller.wait.return_value = None
|
||||
mock_poller.details = {"operation_id": "fake-op-id"}
|
||||
mock_poller.result.return_value.content = text
|
||||
mock_client.begin_analyze_document.return_value = mock_poller
|
||||
mock_client.get_analyze_result_pdf.return_value = [b"%PDF-1.4 FAKE"]
|
||||
return mock_client
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Protocol contract
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -93,10 +78,8 @@ class TestRemoteParserSupportedMimeTypes:
|
||||
}
|
||||
assert expected == set(mime_types.keys())
|
||||
|
||||
def test_returns_full_set_when_not_configured(
|
||||
self,
|
||||
no_engine_settings: SettingsWrapper,
|
||||
) -> None:
|
||||
@pytest.mark.usefixtures("no_engine_settings")
|
||||
def test_returns_full_set_when_not_configured(self) -> None:
|
||||
"""
|
||||
GIVEN: No remote engine is configured
|
||||
WHEN: supported_mime_types() is called
|
||||
@@ -114,6 +97,7 @@ class TestRemoteParserSupportedMimeTypes:
|
||||
class TestRemoteParserScore:
|
||||
"""score() encodes the activation logic: None when unconfigured, 20 when active."""
|
||||
|
||||
@pytest.mark.usefixtures("azure_settings")
|
||||
@pytest.mark.parametrize(
|
||||
"mime_type",
|
||||
[
|
||||
@@ -126,23 +110,16 @@ class TestRemoteParserScore:
|
||||
"image/webp",
|
||||
],
|
||||
)
|
||||
def test_score_returns_20_when_configured(
|
||||
self,
|
||||
azure_settings: SettingsWrapper,
|
||||
mime_type: str,
|
||||
) -> None:
|
||||
def test_score_returns_20_when_configured(self, mime_type: str) -> None:
|
||||
result = RemoteDocumentParser.score(mime_type, "doc.pdf")
|
||||
assert result == 20
|
||||
|
||||
@pytest.mark.usefixtures("no_engine_settings")
|
||||
@pytest.mark.parametrize(
|
||||
"mime_type",
|
||||
["application/pdf", "image/png", "image/jpeg"],
|
||||
)
|
||||
def test_score_returns_none_when_no_engine(
|
||||
self,
|
||||
no_engine_settings: SettingsWrapper,
|
||||
mime_type: str,
|
||||
) -> None:
|
||||
def test_score_returns_none_when_no_engine(self, mime_type: str) -> None:
|
||||
result = RemoteDocumentParser.score(mime_type, "doc.pdf")
|
||||
assert result is None
|
||||
|
||||
@@ -166,17 +143,13 @@ class TestRemoteParserScore:
|
||||
result = RemoteDocumentParser.score("application/pdf", "doc.pdf")
|
||||
assert result is None
|
||||
|
||||
def test_score_returns_none_for_unsupported_mime_type(
|
||||
self,
|
||||
azure_settings: SettingsWrapper,
|
||||
) -> None:
|
||||
@pytest.mark.usefixtures("azure_settings")
|
||||
def test_score_returns_none_for_unsupported_mime_type(self) -> None:
|
||||
result = RemoteDocumentParser.score("text/plain", "doc.txt")
|
||||
assert result is None
|
||||
|
||||
def test_score_higher_than_tesseract_default(
|
||||
self,
|
||||
azure_settings: SettingsWrapper,
|
||||
) -> None:
|
||||
@pytest.mark.usefixtures("azure_settings")
|
||||
def test_score_higher_than_tesseract_default(self) -> None:
|
||||
"""Remote parser (20) outranks the tesseract default (10) when configured."""
|
||||
score = RemoteDocumentParser.score("application/pdf", "doc.pdf")
|
||||
assert score is not None and score > 10
|
||||
@@ -229,14 +202,15 @@ class TestRemoteParserLifecycle:
|
||||
|
||||
|
||||
class TestRemoteParserParse:
|
||||
@pytest.mark.usefixtures("azure_settings")
|
||||
def test_parse_returns_text_from_azure(
|
||||
self,
|
||||
remote_parser: RemoteDocumentParser,
|
||||
sample_pdf_file: Path,
|
||||
azure_settings: SettingsWrapper,
|
||||
make_azure_mock: Callable[[str], Mock],
|
||||
mocker: MockerFixture,
|
||||
) -> None:
|
||||
mock_client = _make_azure_mock("Hello from Azure.")
|
||||
mock_client = make_azure_mock("Hello from Azure.")
|
||||
mocker.patch(
|
||||
"azure.ai.documentintelligence.DocumentIntelligenceClient",
|
||||
return_value=mock_client,
|
||||
@@ -246,17 +220,17 @@ class TestRemoteParserParse:
|
||||
|
||||
assert remote_parser.get_text() == "Hello from Azure."
|
||||
|
||||
@pytest.mark.usefixtures("azure_settings")
|
||||
def test_parse_sets_archive_path(
|
||||
self,
|
||||
remote_parser: RemoteDocumentParser,
|
||||
sample_pdf_file: Path,
|
||||
azure_settings: SettingsWrapper,
|
||||
make_azure_mock: Callable[[str], Mock],
|
||||
mocker: MockerFixture,
|
||||
) -> None:
|
||||
mock_client = _make_azure_mock()
|
||||
mocker.patch(
|
||||
"azure.ai.documentintelligence.DocumentIntelligenceClient",
|
||||
return_value=mock_client,
|
||||
return_value=make_azure_mock(),
|
||||
)
|
||||
|
||||
remote_parser.parse(sample_pdf_file, "application/pdf")
|
||||
@@ -266,14 +240,15 @@ class TestRemoteParserParse:
|
||||
assert archive.exists()
|
||||
assert archive.suffix == ".pdf"
|
||||
|
||||
@pytest.mark.usefixtures("azure_settings")
|
||||
def test_parse_closes_client_on_success(
|
||||
self,
|
||||
remote_parser: RemoteDocumentParser,
|
||||
sample_pdf_file: Path,
|
||||
azure_settings: SettingsWrapper,
|
||||
make_azure_mock: Callable[[str], Mock],
|
||||
mocker: MockerFixture,
|
||||
) -> None:
|
||||
mock_client = _make_azure_mock()
|
||||
mock_client = make_azure_mock()
|
||||
mocker.patch(
|
||||
"azure.ai.documentintelligence.DocumentIntelligenceClient",
|
||||
return_value=mock_client,
|
||||
@@ -283,11 +258,11 @@ class TestRemoteParserParse:
|
||||
|
||||
mock_client.close.assert_called_once()
|
||||
|
||||
@pytest.mark.usefixtures("no_engine_settings")
|
||||
def test_parse_sets_empty_text_when_not_configured(
|
||||
self,
|
||||
remote_parser: RemoteDocumentParser,
|
||||
sample_pdf_file: Path,
|
||||
no_engine_settings: SettingsWrapper,
|
||||
) -> None:
|
||||
remote_parser.parse(sample_pdf_file, "application/pdf")
|
||||
|
||||
@@ -300,17 +275,17 @@ class TestRemoteParserParse:
|
||||
) -> None:
|
||||
assert remote_parser.get_text() is None
|
||||
|
||||
@pytest.mark.usefixtures("azure_settings")
|
||||
def test_get_date_always_none(
|
||||
self,
|
||||
remote_parser: RemoteDocumentParser,
|
||||
sample_pdf_file: Path,
|
||||
azure_settings: SettingsWrapper,
|
||||
make_azure_mock: Callable[[str], Mock],
|
||||
mocker: MockerFixture,
|
||||
) -> None:
|
||||
mock_client = _make_azure_mock()
|
||||
mocker.patch(
|
||||
"azure.ai.documentintelligence.DocumentIntelligenceClient",
|
||||
return_value=mock_client,
|
||||
return_value=make_azure_mock(),
|
||||
)
|
||||
|
||||
remote_parser.parse(sample_pdf_file, "application/pdf")
|
||||
@@ -323,12 +298,12 @@ class TestRemoteParserParse:
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.usefixtures("azure_settings")
|
||||
class TestRemoteParserParseError:
|
||||
def test_parse_returns_none_on_azure_error(
|
||||
self,
|
||||
remote_parser: RemoteDocumentParser,
|
||||
sample_pdf_file: Path,
|
||||
azure_settings: SettingsWrapper,
|
||||
mocker: MockerFixture,
|
||||
) -> None:
|
||||
mock_client = Mock()
|
||||
@@ -346,7 +321,6 @@ class TestRemoteParserParseError:
|
||||
self,
|
||||
remote_parser: RemoteDocumentParser,
|
||||
sample_pdf_file: Path,
|
||||
azure_settings: SettingsWrapper,
|
||||
mocker: MockerFixture,
|
||||
) -> None:
|
||||
mock_client = Mock()
|
||||
@@ -364,7 +338,6 @@ class TestRemoteParserParseError:
|
||||
self,
|
||||
remote_parser: RemoteDocumentParser,
|
||||
sample_pdf_file: Path,
|
||||
azure_settings: SettingsWrapper,
|
||||
mocker: MockerFixture,
|
||||
) -> None:
|
||||
mock_client = Mock()
|
||||
@@ -475,10 +448,8 @@ class TestRemoteParserRegistry:
|
||||
|
||||
assert RemoteDocumentParser in registry._builtins
|
||||
|
||||
def test_get_parser_returns_remote_when_configured(
|
||||
self,
|
||||
azure_settings: SettingsWrapper,
|
||||
) -> None:
|
||||
@pytest.mark.usefixtures("azure_settings")
|
||||
def test_get_parser_returns_remote_when_configured(self) -> None:
|
||||
from paperless.parsers.registry import get_parser_registry
|
||||
|
||||
registry = get_parser_registry()
|
||||
@@ -486,10 +457,8 @@ class TestRemoteParserRegistry:
|
||||
|
||||
assert parser_cls is RemoteDocumentParser
|
||||
|
||||
def test_get_parser_returns_none_for_pdf_when_not_configured(
|
||||
self,
|
||||
no_engine_settings: SettingsWrapper,
|
||||
) -> None:
|
||||
@pytest.mark.usefixtures("no_engine_settings")
|
||||
def test_get_parser_returns_none_for_pdf_when_not_configured(self) -> None:
|
||||
"""With no tesseract parser registered yet, PDF has no handler if remote is off."""
|
||||
from paperless.parsers.registry import ParserRegistry
|
||||
|
||||
|
||||
Reference in New Issue
Block a user