From cdeabaf75de1d8259c39993aee4d50b3420df9c0 Mon Sep 17 00:00:00 2001 From: Trenton H <797416+stumpylog@users.noreply.github.com> Date: Mon, 9 Mar 2026 20:53:20 -0700 Subject: [PATCH] Chore: move paperless_text parser and tests to paperless/ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move TextDocumentParser and its test suite from paperless_text/ into the new paperless/ package where parsers are being consolidated: - paperless_text/parsers.py → paperless/parsers/text.py - paperless_text/tests/test_parser.py → paperless/tests/parsers/test_text_parser.py - paperless_text/tests/conftest.py → paperless/tests/parsers/conftest.py - paperless_text/tests/samples/*.txt → paperless/tests/samples/text/ Also add paperless/tests/__init__.py, paperless/tests/parsers/__init__.py, and a new top-level paperless/tests/conftest.py for shared fixtures. The parser and test files are unchanged; subsequent commits will update them to implement ParserProtocol. Co-Authored-By: Claude Sonnet 4.6 --- .../parsers.py => paperless/parsers/text.py} | 0 src/paperless/tests/conftest.py | 48 +++++++++++++++++++ src/paperless/tests/parsers/__init__.py | 0 .../tests/parsers}/conftest.py | 1 - .../tests/parsers/test_text_parser.py} | 0 .../tests/samples/text}/decode_error.txt | 0 .../tests/samples/text}/test.txt | 0 7 files changed, 48 insertions(+), 1 deletion(-) rename src/{paperless_text/parsers.py => paperless/parsers/text.py} (100%) create mode 100644 src/paperless/tests/conftest.py create mode 100644 src/paperless/tests/parsers/__init__.py rename src/{paperless_text/tests => paperless/tests/parsers}/conftest.py (99%) rename src/{paperless_text/tests/test_parser.py => paperless/tests/parsers/test_text_parser.py} (100%) rename src/{paperless_text/tests/samples => paperless/tests/samples/text}/decode_error.txt (100%) rename src/{paperless_text/tests/samples => paperless/tests/samples/text}/test.txt (100%) diff --git a/src/paperless_text/parsers.py b/src/paperless/parsers/text.py similarity index 100% rename from src/paperless_text/parsers.py rename to src/paperless/parsers/text.py diff --git a/src/paperless/tests/conftest.py b/src/paperless/tests/conftest.py new file mode 100644 index 000000000..b016191c4 --- /dev/null +++ b/src/paperless/tests/conftest.py @@ -0,0 +1,48 @@ +""" +Fixtures defined here are available to every test module under +src/paperless/tests/ (including sub-packages such as parsers/). + +Session-scoped fixtures for the shared samples directory live here so +sub-package conftest files can reference them without duplicating path logic. +Parser-specific fixtures (concrete parser instances, format-specific sample +files) live in paperless/tests/parsers/conftest.py. +""" + +from __future__ import annotations + +from pathlib import Path +from typing import TYPE_CHECKING + +import pytest + +from paperless.parsers.registry import reset_parser_registry + +if TYPE_CHECKING: + from collections.abc import Generator + + +@pytest.fixture(scope="session") +def samples_dir() -> Path: + """Absolute path to the shared parser sample files directory. + + Sub-package conftest files derive format-specific paths from this root, + e.g. ``samples_dir / "text" / "test.txt"``. + + Returns + ------- + Path + Directory containing all sample documents used by parser tests. + """ + return (Path(__file__).parent / "samples").resolve() + + +@pytest.fixture(autouse=True) +def clean_registry() -> Generator[None, None, None]: + """Reset the parser registry before and after every test. + + This prevents registry state from leaking between tests that call + get_parser_registry() or init_builtin_parsers(). + """ + reset_parser_registry() + yield + reset_parser_registry() diff --git a/src/paperless/tests/parsers/__init__.py b/src/paperless/tests/parsers/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/paperless_text/tests/conftest.py b/src/paperless/tests/parsers/conftest.py similarity index 99% rename from src/paperless_text/tests/conftest.py rename to src/paperless/tests/parsers/conftest.py index 1d9e4fc2f..fa03e6f6b 100644 --- a/src/paperless_text/tests/conftest.py +++ b/src/paperless/tests/parsers/conftest.py @@ -2,7 +2,6 @@ from collections.abc import Generator from pathlib import Path import pytest - from paperless_text.parsers import TextDocumentParser diff --git a/src/paperless_text/tests/test_parser.py b/src/paperless/tests/parsers/test_text_parser.py similarity index 100% rename from src/paperless_text/tests/test_parser.py rename to src/paperless/tests/parsers/test_text_parser.py diff --git a/src/paperless_text/tests/samples/decode_error.txt b/src/paperless/tests/samples/text/decode_error.txt similarity index 100% rename from src/paperless_text/tests/samples/decode_error.txt rename to src/paperless/tests/samples/text/decode_error.txt diff --git a/src/paperless_text/tests/samples/test.txt b/src/paperless/tests/samples/text/test.txt similarity index 100% rename from src/paperless_text/tests/samples/test.txt rename to src/paperless/tests/samples/text/test.txt