From d01513a869dc7daa809d8609f71ba59a1e783d1f Mon Sep 17 00:00:00 2001 From: Trenton H <797416+stumpylog@users.noreply.github.com> Date: Thu, 19 Mar 2026 11:42:38 -0700 Subject: [PATCH] Updates so we can report a page count for these parsers, assuming we do have an archive produced when called --- src/paperless/parsers/mail.py | 10 +++++++++- src/paperless/parsers/tika.py | 10 +++++++++- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/src/paperless/parsers/mail.py b/src/paperless/parsers/mail.py index 947892bd4..53825d70d 100644 --- a/src/paperless/parsers/mail.py +++ b/src/paperless/parsers/mail.py @@ -358,11 +358,19 @@ class MailDocumentParser: ) -> int | None: """Return the number of pages in the document. + Counts pages in the archive PDF produced by a preceding parse() + call. Returns ``None`` if parse() has not been called yet or if + no archive was produced. + Returns ------- int | None - Always None — page count is not available for email files. + Page count of the archive PDF, or ``None``. """ + if self._archive_path is not None: + from paperless.parsers.utils import get_page_count_for_pdf + + return get_page_count_for_pdf(self._archive_path, log=logger) return None def extract_metadata( diff --git a/src/paperless/parsers/tika.py b/src/paperless/parsers/tika.py index f7e41e444..674d74fe2 100644 --- a/src/paperless/parsers/tika.py +++ b/src/paperless/parsers/tika.py @@ -344,11 +344,19 @@ class TikaDocumentParser: ) -> int | None: """Return the number of pages in the document. + Counts pages in the archive PDF produced by a preceding parse() + call. Returns ``None`` if parse() has not been called yet or if + no archive was produced. + Returns ------- int | None - Always None — page count is not available from Tika. + Page count of the archive PDF, or ``None``. """ + if self._archive_path is not None: + from paperless.parsers.utils import get_page_count_for_pdf + + return get_page_count_for_pdf(self._archive_path, log=logger) return None def extract_metadata(