Updates so we can report a page count for these parsers, assuming we do have an archive produced when called

This commit is contained in:
Trenton H
2026-03-19 11:42:38 -07:00
parent 9e3c93f72d
commit d01513a869
2 changed files with 18 additions and 2 deletions

View File

@@ -358,11 +358,19 @@ class MailDocumentParser:
) -> int | None:
"""Return the number of pages in the document.
Counts pages in the archive PDF produced by a preceding parse()
call. Returns ``None`` if parse() has not been called yet or if
no archive was produced.
Returns
-------
int | None
Always None — page count is not available for email files.
Page count of the archive PDF, or ``None``.
"""
if self._archive_path is not None:
from paperless.parsers.utils import get_page_count_for_pdf
return get_page_count_for_pdf(self._archive_path, log=logger)
return None
def extract_metadata(

View File

@@ -344,11 +344,19 @@ class TikaDocumentParser:
) -> int | None:
"""Return the number of pages in the document.
Counts pages in the archive PDF produced by a preceding parse()
call. Returns ``None`` if parse() has not been called yet or if
no archive was produced.
Returns
-------
int | None
Always None — page count is not available from Tika.
Page count of the archive PDF, or ``None``.
"""
if self._archive_path is not None:
from paperless.parsers.utils import get_page_count_for_pdf
return get_page_count_for_pdf(self._archive_path, log=logger)
return None
def extract_metadata(