Chore: move Tika parser and tests to paperless/

Move TikaDocumentParser and its tests to the canonical parser package
location, matching the pattern established for TextDocumentParser:

- src/paperless_tika/parsers.py → src/paperless/parsers/tika.py
- src/paperless_tika/tests/test_tika_parser.py → src/paperless/tests/parsers/test_tika_parser.py
- src/paperless_tika/tests/samples/ → src/paperless/tests/samples/tika/

Merge tika fixtures (tika_parser, sample_odt_file, sample_docx_file,
sample_doc_file, sample_broken_odt) into the shared parsers conftest.
Remove the now-empty src/paperless_tika/tests/conftest.py.

Content is unchanged — this commit is rename-only so git history is
preserved on the moved files.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Trenton H
2026-03-12 15:06:20 -07:00
parent d86cfdb088
commit 0a9c67e9b1
8 changed files with 85 additions and 42 deletions
+84
View File
@@ -11,6 +11,7 @@ from typing import TYPE_CHECKING
import pytest
from paperless.parsers.text import TextDocumentParser
from paperless.parsers.tika import TikaDocumentParser
if TYPE_CHECKING:
from collections.abc import Generator
@@ -74,3 +75,86 @@ def text_parser() -> Generator[TextDocumentParser, None, None]:
"""
with TextDocumentParser() as parser:
yield parser
# ------------------------------------------------------------------
# Tika parser sample files
# ------------------------------------------------------------------
@pytest.fixture(scope="session")
def tika_samples_dir(samples_dir: Path) -> Path:
"""Absolute path to the Tika parser sample files directory.
Returns
-------
Path
``<samples_dir>/tika/``
"""
return samples_dir / "tika"
@pytest.fixture(scope="session")
def sample_odt_file(tika_samples_dir: Path) -> Path:
"""Path to a sample ODT file.
Returns
-------
Path
Absolute path to ``tika/sample.odt``.
"""
return tika_samples_dir / "sample.odt"
@pytest.fixture(scope="session")
def sample_docx_file(tika_samples_dir: Path) -> Path:
"""Path to a sample DOCX file.
Returns
-------
Path
Absolute path to ``tika/sample.docx``.
"""
return tika_samples_dir / "sample.docx"
@pytest.fixture(scope="session")
def sample_doc_file(tika_samples_dir: Path) -> Path:
"""Path to a sample DOC file.
Returns
-------
Path
Absolute path to ``tika/sample.doc``.
"""
return tika_samples_dir / "sample.doc"
@pytest.fixture(scope="session")
def sample_broken_odt(tika_samples_dir: Path) -> Path:
"""Path to a broken ODT file that triggers the multi-part fallback.
Returns
-------
Path
Absolute path to ``tika/multi-part-broken.odt``.
"""
return tika_samples_dir / "multi-part-broken.odt"
# ------------------------------------------------------------------
# Tika parser instance
# ------------------------------------------------------------------
@pytest.fixture()
def tika_parser() -> Generator[TikaDocumentParser, None, None]:
"""Yield a TikaDocumentParser and clean up its temporary directory afterwards.
Yields
------
TikaDocumentParser
A ready-to-use parser instance.
"""
with TikaDocumentParser() as parser:
yield parser
@@ -5,11 +5,11 @@ from pathlib import Path
import pytest
from httpx import codes
from paperless_tika.parsers import TikaDocumentParser
from pytest_django.fixtures import SettingsWrapper
from pytest_httpx import HTTPXMock
from documents.parsers import ParseError
from paperless_tika.parsers import TikaDocumentParser
@pytest.mark.django_db()
-41
View File
@@ -1,41 +0,0 @@
from collections.abc import Generator
from pathlib import Path
import pytest
from paperless_tika.parsers import TikaDocumentParser
@pytest.fixture()
def tika_parser() -> Generator[TikaDocumentParser, None, None]:
try:
parser = TikaDocumentParser(logging_group=None)
yield parser
finally:
# TODO(stumpylog): Cleanup once all parsers are handled
parser.cleanup()
@pytest.fixture(scope="session")
def sample_dir() -> Path:
return (Path(__file__).parent / Path("samples")).resolve()
@pytest.fixture(scope="session")
def sample_odt_file(sample_dir: Path) -> Path:
return sample_dir / "sample.odt"
@pytest.fixture(scope="session")
def sample_docx_file(sample_dir: Path) -> Path:
return sample_dir / "sample.docx"
@pytest.fixture(scope="session")
def sample_doc_file(sample_dir: Path) -> Path:
return sample_dir / "sample.doc"
@pytest.fixture(scope="session")
def sample_broken_odt(sample_dir: Path) -> Path:
return sample_dir / "multi-part-broken.odt"