From d0b95f2cda13c7307cf6c0a185cd5fd6aa050556 Mon Sep 17 00:00:00 2001 From: Trenton H <797416+stumpylog@users.noreply.github.com> Date: Thu, 12 Mar 2026 15:38:25 -0700 Subject: [PATCH] Fix: update remaining imports and move live Tika tests after parser migration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - src/documents/tests/test_parsers.py: import TikaDocumentParser from paperless.parsers.tika (old paperless_tika.parsers no longer exists) - git mv paperless_tika/tests/test_live_tika.py → paperless/tests/parsers/test_live_tika.py to co-locate all Tika tests with the parser; update import and replace old attribute API (tika_parser.text/.archive_path) with accessor methods (get_text/get_archive_path) Co-Authored-By: Claude Sonnet 4.6 --- src/documents/tests/test_parsers.py | 2 +- .../tests/parsers}/test_live_tika.py | 38 ++++++++++--------- 2 files changed, 21 insertions(+), 19 deletions(-) rename src/{paperless_tika/tests => paperless/tests/parsers}/test_live_tika.py (77%) diff --git a/src/documents/tests/test_parsers.py b/src/documents/tests/test_parsers.py index 5383975d1..7be6ad20d 100644 --- a/src/documents/tests/test_parsers.py +++ b/src/documents/tests/test_parsers.py @@ -10,8 +10,8 @@ from documents.parsers import get_parser_class_for_mime_type from documents.parsers import get_supported_file_extensions from documents.parsers import is_file_ext_supported from paperless.parsers.text import TextDocumentParser +from paperless.parsers.tika import TikaDocumentParser from paperless_tesseract.parsers import RasterisedDocumentParser -from paperless_tika.parsers import TikaDocumentParser class TestParserDiscovery(TestCase): diff --git a/src/paperless_tika/tests/test_live_tika.py b/src/paperless/tests/parsers/test_live_tika.py similarity index 77% rename from src/paperless_tika/tests/test_live_tika.py rename to src/paperless/tests/parsers/test_live_tika.py index 8275708b4..87cdd88a5 100644 --- a/src/paperless_tika/tests/test_live_tika.py +++ b/src/paperless/tests/parsers/test_live_tika.py @@ -4,7 +4,7 @@ from pathlib import Path import pytest from documents.tests.utils import util_call_with_backoff -from paperless_tika.parsers import TikaDocumentParser +from paperless.parsers.tika import TikaDocumentParser @pytest.mark.skipif( @@ -42,14 +42,15 @@ class TestTikaParserAgainstServer: ) assert ( - tika_parser.text + tika_parser.get_text() == "This is an ODT test document, created September 14, 2022" ) - assert tika_parser.archive_path is not None - assert b"PDF-" in tika_parser.archive_path.read_bytes()[:10] + archive = tika_parser.get_archive_path() + assert archive is not None + assert b"PDF-" in archive.read_bytes()[:10] # TODO: Unsure what can set the Creation-Date field in a document, enable when possible - # self.assertEqual(tika_parser.date, datetime.datetime(2022, 9, 14)) + # self.assertEqual(tika_parser.get_date(), datetime.datetime(2022, 9, 14)) def test_basic_parse_docx( self, @@ -74,14 +75,15 @@ class TestTikaParserAgainstServer: ) assert ( - tika_parser.text + tika_parser.get_text() == "This is an DOCX test document, also made September 14, 2022" ) - assert tika_parser.archive_path is not None - with Path(tika_parser.archive_path).open("rb") as f: + archive = tika_parser.get_archive_path() + assert archive is not None + with archive.open("rb") as f: assert b"PDF-" in f.read()[:10] - # self.assertEqual(tika_parser.date, datetime.datetime(2022, 9, 14)) + # self.assertEqual(tika_parser.get_date(), datetime.datetime(2022, 9, 14)) def test_basic_parse_doc( self, @@ -102,13 +104,12 @@ class TestTikaParserAgainstServer: [sample_doc_file, "application/msword"], ) - assert tika_parser.text is not None - assert ( - "This is a test document, saved in the older .doc format" - in tika_parser.text - ) - assert tika_parser.archive_path is not None - with Path(tika_parser.archive_path).open("rb") as f: + text = tika_parser.get_text() + assert text is not None + assert "This is a test document, saved in the older .doc format" in text + archive = tika_parser.get_archive_path() + assert archive is not None + with archive.open("rb") as f: assert b"PDF-" in f.read()[:10] def test_tika_fails_multi_part( @@ -133,6 +134,7 @@ class TestTikaParserAgainstServer: [sample_broken_odt, "application/vnd.oasis.opendocument.text"], ) - assert tika_parser.archive_path is not None - with Path(tika_parser.archive_path).open("rb") as f: + archive = tika_parser.get_archive_path() + assert archive is not None + with archive.open("rb") as f: assert b"PDF-" in f.read()[:10]