mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-06-16 18:34:19 +00:00
Increases test coverage
This commit is contained in:
@@ -13,9 +13,65 @@ from pytest_mock import MockerFixture
|
||||
|
||||
from documents.parsers import ParseError
|
||||
from paperless.parsers import ParserContext
|
||||
from paperless.parsers import ParserProtocol
|
||||
from paperless.parsers.mail import MailDocumentParser
|
||||
|
||||
|
||||
class TestMailParserProtocol:
|
||||
"""Verify that MailDocumentParser satisfies the ParserProtocol contract."""
|
||||
|
||||
def test_isinstance_satisfies_protocol(
|
||||
self,
|
||||
mail_parser: MailDocumentParser,
|
||||
) -> None:
|
||||
assert isinstance(mail_parser, ParserProtocol)
|
||||
|
||||
def test_supported_mime_types(self) -> None:
|
||||
mime_types = MailDocumentParser.supported_mime_types()
|
||||
assert isinstance(mime_types, dict)
|
||||
assert "message/rfc822" in mime_types
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("mime_type", "expected"),
|
||||
[
|
||||
("message/rfc822", 10),
|
||||
("application/pdf", None),
|
||||
("text/plain", None),
|
||||
],
|
||||
)
|
||||
def test_score(self, mime_type: str, expected: int | None) -> None:
|
||||
assert MailDocumentParser.score(mime_type, "email.eml") == expected
|
||||
|
||||
def test_can_produce_archive_is_false(
|
||||
self,
|
||||
mail_parser: MailDocumentParser,
|
||||
) -> None:
|
||||
assert mail_parser.can_produce_archive is False
|
||||
|
||||
def test_requires_pdf_rendition_is_true(
|
||||
self,
|
||||
mail_parser: MailDocumentParser,
|
||||
) -> None:
|
||||
assert mail_parser.requires_pdf_rendition is True
|
||||
|
||||
def test_get_page_count_returns_none_without_archive(
|
||||
self,
|
||||
mail_parser: MailDocumentParser,
|
||||
html_email_file: Path,
|
||||
) -> None:
|
||||
assert mail_parser.get_page_count(html_email_file, "message/rfc822") is None
|
||||
|
||||
def test_get_page_count_returns_int_with_pdf_archive(
|
||||
self,
|
||||
mail_parser: MailDocumentParser,
|
||||
simple_txt_email_pdf_file: Path,
|
||||
) -> None:
|
||||
mail_parser._archive_path = simple_txt_email_pdf_file
|
||||
count = mail_parser.get_page_count(simple_txt_email_pdf_file, "message/rfc822")
|
||||
assert isinstance(count, int)
|
||||
assert count > 0
|
||||
|
||||
|
||||
class TestEmailFileParsing:
|
||||
"""
|
||||
Tests around reading a file and parsing it into a
|
||||
|
||||
@@ -12,6 +12,7 @@ from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from paperless.parsers import ParserContext
|
||||
from paperless.parsers import ParserProtocol
|
||||
from paperless.parsers.text import TextDocumentParser
|
||||
|
||||
@@ -93,6 +94,7 @@ class TestTextParserParse:
|
||||
text_parser: TextDocumentParser,
|
||||
sample_txt_file: Path,
|
||||
) -> None:
|
||||
text_parser.configure(ParserContext())
|
||||
text_parser.parse(sample_txt_file, "text/plain")
|
||||
|
||||
assert text_parser.get_text() == "This is a test file.\n"
|
||||
@@ -102,6 +104,7 @@ class TestTextParserParse:
|
||||
text_parser: TextDocumentParser,
|
||||
sample_txt_file: Path,
|
||||
) -> None:
|
||||
text_parser.configure(ParserContext())
|
||||
text_parser.parse(sample_txt_file, "text/plain")
|
||||
|
||||
assert text_parser.get_archive_path() is None
|
||||
@@ -111,6 +114,7 @@ class TestTextParserParse:
|
||||
text_parser: TextDocumentParser,
|
||||
sample_txt_file: Path,
|
||||
) -> None:
|
||||
text_parser.configure(ParserContext())
|
||||
text_parser.parse(sample_txt_file, "text/plain")
|
||||
|
||||
assert text_parser.get_date() is None
|
||||
@@ -129,6 +133,7 @@ class TestTextParserParse:
|
||||
- Parsing succeeds
|
||||
- Invalid bytes are replaced with the Unicode replacement character
|
||||
"""
|
||||
text_parser.configure(ParserContext())
|
||||
text_parser.parse(malformed_txt_file, "text/plain")
|
||||
|
||||
assert text_parser.get_text() == "Pantothens\ufffdure\n"
|
||||
|
||||
@@ -9,6 +9,7 @@ from pytest_django.fixtures import SettingsWrapper
|
||||
from pytest_httpx import HTTPXMock
|
||||
|
||||
from documents.parsers import ParseError
|
||||
from paperless.parsers import ParserContext
|
||||
from paperless.parsers import ParserProtocol
|
||||
from paperless.parsers.tika import TikaDocumentParser
|
||||
|
||||
@@ -60,6 +61,29 @@ class TestTikaParserRegistryInterface:
|
||||
def test_requires_pdf_rendition_is_true(self) -> None:
|
||||
assert TikaDocumentParser().requires_pdf_rendition is True
|
||||
|
||||
def test_get_page_count_returns_none_without_archive(
|
||||
self,
|
||||
tika_parser: TikaDocumentParser,
|
||||
sample_odt_file: Path,
|
||||
) -> None:
|
||||
assert (
|
||||
tika_parser.get_page_count(
|
||||
sample_odt_file,
|
||||
"application/vnd.oasis.opendocument.text",
|
||||
)
|
||||
is None
|
||||
)
|
||||
|
||||
def test_get_page_count_returns_int_with_pdf_archive(
|
||||
self,
|
||||
tika_parser: TikaDocumentParser,
|
||||
sample_pdf_file: Path,
|
||||
) -> None:
|
||||
tika_parser._archive_path = sample_pdf_file
|
||||
count = tika_parser.get_page_count(sample_pdf_file, "application/pdf")
|
||||
assert isinstance(count, int)
|
||||
assert count > 0
|
||||
|
||||
|
||||
@pytest.mark.django_db()
|
||||
class TestTikaParser:
|
||||
@@ -83,6 +107,7 @@ class TestTikaParser:
|
||||
# Pretend convert to PDF response
|
||||
httpx_mock.add_response(content=b"PDF document")
|
||||
|
||||
tika_parser.configure(ParserContext())
|
||||
tika_parser.parse(sample_odt_file, "application/vnd.oasis.opendocument.text")
|
||||
|
||||
assert tika_parser.get_text() == "the content"
|
||||
|
||||
Reference in New Issue
Block a user