Increases test coverage

This commit is contained in:
Trenton H
2026-03-19 11:43:12 -07:00
parent d01513a869
commit fe1e35b9ac
3 changed files with 86 additions and 0 deletions
@@ -13,9 +13,65 @@ from pytest_mock import MockerFixture
from documents.parsers import ParseError
from paperless.parsers import ParserContext
from paperless.parsers import ParserProtocol
from paperless.parsers.mail import MailDocumentParser
class TestMailParserProtocol:
"""Verify that MailDocumentParser satisfies the ParserProtocol contract."""
def test_isinstance_satisfies_protocol(
self,
mail_parser: MailDocumentParser,
) -> None:
assert isinstance(mail_parser, ParserProtocol)
def test_supported_mime_types(self) -> None:
mime_types = MailDocumentParser.supported_mime_types()
assert isinstance(mime_types, dict)
assert "message/rfc822" in mime_types
@pytest.mark.parametrize(
("mime_type", "expected"),
[
("message/rfc822", 10),
("application/pdf", None),
("text/plain", None),
],
)
def test_score(self, mime_type: str, expected: int | None) -> None:
assert MailDocumentParser.score(mime_type, "email.eml") == expected
def test_can_produce_archive_is_false(
self,
mail_parser: MailDocumentParser,
) -> None:
assert mail_parser.can_produce_archive is False
def test_requires_pdf_rendition_is_true(
self,
mail_parser: MailDocumentParser,
) -> None:
assert mail_parser.requires_pdf_rendition is True
def test_get_page_count_returns_none_without_archive(
self,
mail_parser: MailDocumentParser,
html_email_file: Path,
) -> None:
assert mail_parser.get_page_count(html_email_file, "message/rfc822") is None
def test_get_page_count_returns_int_with_pdf_archive(
self,
mail_parser: MailDocumentParser,
simple_txt_email_pdf_file: Path,
) -> None:
mail_parser._archive_path = simple_txt_email_pdf_file
count = mail_parser.get_page_count(simple_txt_email_pdf_file, "message/rfc822")
assert isinstance(count, int)
assert count > 0
class TestEmailFileParsing:
"""
Tests around reading a file and parsing it into a
@@ -12,6 +12,7 @@ from pathlib import Path
import pytest
from paperless.parsers import ParserContext
from paperless.parsers import ParserProtocol
from paperless.parsers.text import TextDocumentParser
@@ -93,6 +94,7 @@ class TestTextParserParse:
text_parser: TextDocumentParser,
sample_txt_file: Path,
) -> None:
text_parser.configure(ParserContext())
text_parser.parse(sample_txt_file, "text/plain")
assert text_parser.get_text() == "This is a test file.\n"
@@ -102,6 +104,7 @@ class TestTextParserParse:
text_parser: TextDocumentParser,
sample_txt_file: Path,
) -> None:
text_parser.configure(ParserContext())
text_parser.parse(sample_txt_file, "text/plain")
assert text_parser.get_archive_path() is None
@@ -111,6 +114,7 @@ class TestTextParserParse:
text_parser: TextDocumentParser,
sample_txt_file: Path,
) -> None:
text_parser.configure(ParserContext())
text_parser.parse(sample_txt_file, "text/plain")
assert text_parser.get_date() is None
@@ -129,6 +133,7 @@ class TestTextParserParse:
- Parsing succeeds
- Invalid bytes are replaced with the Unicode replacement character
"""
text_parser.configure(ParserContext())
text_parser.parse(malformed_txt_file, "text/plain")
assert text_parser.get_text() == "Pantothens\ufffdure\n"
@@ -9,6 +9,7 @@ from pytest_django.fixtures import SettingsWrapper
from pytest_httpx import HTTPXMock
from documents.parsers import ParseError
from paperless.parsers import ParserContext
from paperless.parsers import ParserProtocol
from paperless.parsers.tika import TikaDocumentParser
@@ -60,6 +61,29 @@ class TestTikaParserRegistryInterface:
def test_requires_pdf_rendition_is_true(self) -> None:
assert TikaDocumentParser().requires_pdf_rendition is True
def test_get_page_count_returns_none_without_archive(
self,
tika_parser: TikaDocumentParser,
sample_odt_file: Path,
) -> None:
assert (
tika_parser.get_page_count(
sample_odt_file,
"application/vnd.oasis.opendocument.text",
)
is None
)
def test_get_page_count_returns_int_with_pdf_archive(
self,
tika_parser: TikaDocumentParser,
sample_pdf_file: Path,
) -> None:
tika_parser._archive_path = sample_pdf_file
count = tika_parser.get_page_count(sample_pdf_file, "application/pdf")
assert isinstance(count, int)
assert count > 0
@pytest.mark.django_db()
class TestTikaParser:
@@ -83,6 +107,7 @@ class TestTikaParser:
# Pretend convert to PDF response
httpx_mock.add_response(content=b"PDF document")
tika_parser.configure(ParserContext())
tika_parser.parse(sample_odt_file, "application/vnd.oasis.opendocument.text")
assert tika_parser.get_text() == "the content"