From fe1e35b9acaeb66dc0c58ed28962634838704506 Mon Sep 17 00:00:00 2001 From: Trenton H <797416+stumpylog@users.noreply.github.com> Date: Thu, 19 Mar 2026 11:43:12 -0700 Subject: [PATCH] Increases test coverage --- .../tests/parsers/test_mail_parser.py | 56 +++++++++++++++++++ .../tests/parsers/test_text_parser.py | 5 ++ .../tests/parsers/test_tika_parser.py | 25 +++++++++ 3 files changed, 86 insertions(+) diff --git a/src/paperless/tests/parsers/test_mail_parser.py b/src/paperless/tests/parsers/test_mail_parser.py index cfcd341d6..83d81409c 100644 --- a/src/paperless/tests/parsers/test_mail_parser.py +++ b/src/paperless/tests/parsers/test_mail_parser.py @@ -13,9 +13,65 @@ from pytest_mock import MockerFixture from documents.parsers import ParseError from paperless.parsers import ParserContext +from paperless.parsers import ParserProtocol from paperless.parsers.mail import MailDocumentParser +class TestMailParserProtocol: + """Verify that MailDocumentParser satisfies the ParserProtocol contract.""" + + def test_isinstance_satisfies_protocol( + self, + mail_parser: MailDocumentParser, + ) -> None: + assert isinstance(mail_parser, ParserProtocol) + + def test_supported_mime_types(self) -> None: + mime_types = MailDocumentParser.supported_mime_types() + assert isinstance(mime_types, dict) + assert "message/rfc822" in mime_types + + @pytest.mark.parametrize( + ("mime_type", "expected"), + [ + ("message/rfc822", 10), + ("application/pdf", None), + ("text/plain", None), + ], + ) + def test_score(self, mime_type: str, expected: int | None) -> None: + assert MailDocumentParser.score(mime_type, "email.eml") == expected + + def test_can_produce_archive_is_false( + self, + mail_parser: MailDocumentParser, + ) -> None: + assert mail_parser.can_produce_archive is False + + def test_requires_pdf_rendition_is_true( + self, + mail_parser: MailDocumentParser, + ) -> None: + assert mail_parser.requires_pdf_rendition is True + + def test_get_page_count_returns_none_without_archive( + self, + mail_parser: MailDocumentParser, + html_email_file: Path, + ) -> None: + assert mail_parser.get_page_count(html_email_file, "message/rfc822") is None + + def test_get_page_count_returns_int_with_pdf_archive( + self, + mail_parser: MailDocumentParser, + simple_txt_email_pdf_file: Path, + ) -> None: + mail_parser._archive_path = simple_txt_email_pdf_file + count = mail_parser.get_page_count(simple_txt_email_pdf_file, "message/rfc822") + assert isinstance(count, int) + assert count > 0 + + class TestEmailFileParsing: """ Tests around reading a file and parsing it into a diff --git a/src/paperless/tests/parsers/test_text_parser.py b/src/paperless/tests/parsers/test_text_parser.py index d2f095f5c..091d8287e 100644 --- a/src/paperless/tests/parsers/test_text_parser.py +++ b/src/paperless/tests/parsers/test_text_parser.py @@ -12,6 +12,7 @@ from pathlib import Path import pytest +from paperless.parsers import ParserContext from paperless.parsers import ParserProtocol from paperless.parsers.text import TextDocumentParser @@ -93,6 +94,7 @@ class TestTextParserParse: text_parser: TextDocumentParser, sample_txt_file: Path, ) -> None: + text_parser.configure(ParserContext()) text_parser.parse(sample_txt_file, "text/plain") assert text_parser.get_text() == "This is a test file.\n" @@ -102,6 +104,7 @@ class TestTextParserParse: text_parser: TextDocumentParser, sample_txt_file: Path, ) -> None: + text_parser.configure(ParserContext()) text_parser.parse(sample_txt_file, "text/plain") assert text_parser.get_archive_path() is None @@ -111,6 +114,7 @@ class TestTextParserParse: text_parser: TextDocumentParser, sample_txt_file: Path, ) -> None: + text_parser.configure(ParserContext()) text_parser.parse(sample_txt_file, "text/plain") assert text_parser.get_date() is None @@ -129,6 +133,7 @@ class TestTextParserParse: - Parsing succeeds - Invalid bytes are replaced with the Unicode replacement character """ + text_parser.configure(ParserContext()) text_parser.parse(malformed_txt_file, "text/plain") assert text_parser.get_text() == "Pantothens\ufffdure\n" diff --git a/src/paperless/tests/parsers/test_tika_parser.py b/src/paperless/tests/parsers/test_tika_parser.py index d18d178e6..010969259 100644 --- a/src/paperless/tests/parsers/test_tika_parser.py +++ b/src/paperless/tests/parsers/test_tika_parser.py @@ -9,6 +9,7 @@ from pytest_django.fixtures import SettingsWrapper from pytest_httpx import HTTPXMock from documents.parsers import ParseError +from paperless.parsers import ParserContext from paperless.parsers import ParserProtocol from paperless.parsers.tika import TikaDocumentParser @@ -60,6 +61,29 @@ class TestTikaParserRegistryInterface: def test_requires_pdf_rendition_is_true(self) -> None: assert TikaDocumentParser().requires_pdf_rendition is True + def test_get_page_count_returns_none_without_archive( + self, + tika_parser: TikaDocumentParser, + sample_odt_file: Path, + ) -> None: + assert ( + tika_parser.get_page_count( + sample_odt_file, + "application/vnd.oasis.opendocument.text", + ) + is None + ) + + def test_get_page_count_returns_int_with_pdf_archive( + self, + tika_parser: TikaDocumentParser, + sample_pdf_file: Path, + ) -> None: + tika_parser._archive_path = sample_pdf_file + count = tika_parser.get_page_count(sample_pdf_file, "application/pdf") + assert isinstance(count, int) + assert count > 0 + @pytest.mark.django_db() class TestTikaParser: @@ -83,6 +107,7 @@ class TestTikaParser: # Pretend convert to PDF response httpx_mock.add_response(content=b"PDF document") + tika_parser.configure(ParserContext()) tika_parser.parse(sample_odt_file, "application/vnd.oasis.opendocument.text") assert tika_parser.get_text() == "the content"