diff --git a/src/paperless_tika/parsers.py b/src/paperless/parsers/tika.py similarity index 100% rename from src/paperless_tika/parsers.py rename to src/paperless/parsers/tika.py diff --git a/src/paperless/tests/parsers/conftest.py b/src/paperless/tests/parsers/conftest.py index 2d5deb684..936058733 100644 --- a/src/paperless/tests/parsers/conftest.py +++ b/src/paperless/tests/parsers/conftest.py @@ -11,6 +11,7 @@ from typing import TYPE_CHECKING import pytest from paperless.parsers.text import TextDocumentParser +from paperless.parsers.tika import TikaDocumentParser if TYPE_CHECKING: from collections.abc import Generator @@ -74,3 +75,86 @@ def text_parser() -> Generator[TextDocumentParser, None, None]: """ with TextDocumentParser() as parser: yield parser + + +# ------------------------------------------------------------------ +# Tika parser sample files +# ------------------------------------------------------------------ + + +@pytest.fixture(scope="session") +def tika_samples_dir(samples_dir: Path) -> Path: + """Absolute path to the Tika parser sample files directory. + + Returns + ------- + Path + ``/tika/`` + """ + return samples_dir / "tika" + + +@pytest.fixture(scope="session") +def sample_odt_file(tika_samples_dir: Path) -> Path: + """Path to a sample ODT file. + + Returns + ------- + Path + Absolute path to ``tika/sample.odt``. + """ + return tika_samples_dir / "sample.odt" + + +@pytest.fixture(scope="session") +def sample_docx_file(tika_samples_dir: Path) -> Path: + """Path to a sample DOCX file. + + Returns + ------- + Path + Absolute path to ``tika/sample.docx``. + """ + return tika_samples_dir / "sample.docx" + + +@pytest.fixture(scope="session") +def sample_doc_file(tika_samples_dir: Path) -> Path: + """Path to a sample DOC file. + + Returns + ------- + Path + Absolute path to ``tika/sample.doc``. + """ + return tika_samples_dir / "sample.doc" + + +@pytest.fixture(scope="session") +def sample_broken_odt(tika_samples_dir: Path) -> Path: + """Path to a broken ODT file that triggers the multi-part fallback. + + Returns + ------- + Path + Absolute path to ``tika/multi-part-broken.odt``. + """ + return tika_samples_dir / "multi-part-broken.odt" + + +# ------------------------------------------------------------------ +# Tika parser instance +# ------------------------------------------------------------------ + + +@pytest.fixture() +def tika_parser() -> Generator[TikaDocumentParser, None, None]: + """Yield a TikaDocumentParser and clean up its temporary directory afterwards. + + Yields + ------ + TikaDocumentParser + A ready-to-use parser instance. + """ + with TikaDocumentParser() as parser: + yield parser diff --git a/src/paperless_tika/tests/test_tika_parser.py b/src/paperless/tests/parsers/test_tika_parser.py similarity index 100% rename from src/paperless_tika/tests/test_tika_parser.py rename to src/paperless/tests/parsers/test_tika_parser.py index e1c71c131..2cf39da59 100644 --- a/src/paperless_tika/tests/test_tika_parser.py +++ b/src/paperless/tests/parsers/test_tika_parser.py @@ -5,11 +5,11 @@ from pathlib import Path import pytest from httpx import codes +from paperless_tika.parsers import TikaDocumentParser from pytest_django.fixtures import SettingsWrapper from pytest_httpx import HTTPXMock from documents.parsers import ParseError -from paperless_tika.parsers import TikaDocumentParser @pytest.mark.django_db() diff --git a/src/paperless_tika/tests/samples/multi-part-broken.odt b/src/paperless/tests/samples/tika/multi-part-broken.odt similarity index 100% rename from src/paperless_tika/tests/samples/multi-part-broken.odt rename to src/paperless/tests/samples/tika/multi-part-broken.odt diff --git a/src/paperless_tika/tests/samples/sample.doc b/src/paperless/tests/samples/tika/sample.doc similarity index 100% rename from src/paperless_tika/tests/samples/sample.doc rename to src/paperless/tests/samples/tika/sample.doc diff --git a/src/paperless_tika/tests/samples/sample.docx b/src/paperless/tests/samples/tika/sample.docx similarity index 100% rename from src/paperless_tika/tests/samples/sample.docx rename to src/paperless/tests/samples/tika/sample.docx diff --git a/src/paperless_tika/tests/samples/sample.odt b/src/paperless/tests/samples/tika/sample.odt similarity index 100% rename from src/paperless_tika/tests/samples/sample.odt rename to src/paperless/tests/samples/tika/sample.odt diff --git a/src/paperless_tika/tests/conftest.py b/src/paperless_tika/tests/conftest.py deleted file mode 100644 index 5a54dae95..000000000 --- a/src/paperless_tika/tests/conftest.py +++ /dev/null @@ -1,41 +0,0 @@ -from collections.abc import Generator -from pathlib import Path - -import pytest - -from paperless_tika.parsers import TikaDocumentParser - - -@pytest.fixture() -def tika_parser() -> Generator[TikaDocumentParser, None, None]: - try: - parser = TikaDocumentParser(logging_group=None) - yield parser - finally: - # TODO(stumpylog): Cleanup once all parsers are handled - parser.cleanup() - - -@pytest.fixture(scope="session") -def sample_dir() -> Path: - return (Path(__file__).parent / Path("samples")).resolve() - - -@pytest.fixture(scope="session") -def sample_odt_file(sample_dir: Path) -> Path: - return sample_dir / "sample.odt" - - -@pytest.fixture(scope="session") -def sample_docx_file(sample_dir: Path) -> Path: - return sample_dir / "sample.docx" - - -@pytest.fixture(scope="session") -def sample_doc_file(sample_dir: Path) -> Path: - return sample_dir / "sample.doc" - - -@pytest.fixture(scope="session") -def sample_broken_odt(sample_dir: Path) -> Path: - return sample_dir / "multi-part-broken.odt"