diff --git a/pyproject.toml b/pyproject.toml
index 43ad1e1cc..d41a918c0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -256,7 +256,7 @@ lint.isort.force-single-line = true
 [tool.codespell]
 write-changes = true
 ignore-words-list = "criterias,afterall,valeu,ureue,equest,ure,assertIn,Oktober,commitish"
-skip = "src-ui/src/locale/*,src-ui/pnpm-lock.yaml,src-ui/e2e/*,src/paperless_mail/tests/samples/*,src/documents/tests/samples/*,*.po,*.json"
+skip = "src-ui/src/locale/*,src-ui/pnpm-lock.yaml,src-ui/e2e/*,src/paperless_mail/tests/samples/*,src/paperless/tests/samples/mail/*,src/documents/tests/samples/*,*.po,*.json"
 
 [tool.pytest]
 minversion = "9.0"
diff --git a/src/documents/consumer.py b/src/documents/consumer.py
index 11abf46d4..ba2bba473 100644
--- a/src/documents/consumer.py
+++ b/src/documents/consumer.py
@@ -51,10 +51,11 @@ from documents.templating.workflows import parse_w_workflow_placeholders
 from documents.utils import copy_basic_file_stats
 from documents.utils import copy_file_with_basic_stats
 from documents.utils import run_subprocess
+from paperless.parsers import ParserContext
+from paperless.parsers.mail import MailDocumentParser
 from paperless.parsers.remote import RemoteDocumentParser
 from paperless.parsers.text import TextDocumentParser
 from paperless.parsers.tika import TikaDocumentParser
-from paperless_mail.parsers import MailDocumentParser
 
 LOGGING_NAME: Final[str] = "paperless.consumer"
 
@@ -71,7 +72,12 @@ def _parser_cleanup(parser: DocumentParser) -> None:
     """
     if isinstance(
         parser,
-        (TextDocumentParser, RemoteDocumentParser, TikaDocumentParser),
+        (
+            MailDocumentParser,
+            RemoteDocumentParser,
+            TextDocumentParser,
+            TikaDocumentParser,
+        ),
     ):
         parser.__exit__(None, None, None)
     else:
@@ -453,13 +459,20 @@ class ConsumerPlugin(
             progress_callback=progress_callback,
         )
 
+        parser_is_new_style = isinstance(
+            document_parser,
+            (
+                MailDocumentParser,
+                RemoteDocumentParser,
+                TextDocumentParser,
+                TikaDocumentParser,
+            ),
+        )
+
         # New-style parsers use __enter__/__exit__ for resource management.
         # _parser_cleanup (below) handles __exit__; call __enter__ here.
         # TODO(stumpylog): Remove me in the future
-        if isinstance(
-            document_parser,
-            (TextDocumentParser, RemoteDocumentParser, TikaDocumentParser),
-        ):
+        if parser_is_new_style:
             document_parser.__enter__()
 
         self.log.debug(f"Parser: {type(document_parser).__name__}")
@@ -480,20 +493,12 @@ class ConsumerPlugin(
                 ConsumerStatusShortMessage.PARSING_DOCUMENT,
             )
             self.log.debug(f"Parsing {self.filename}...")
-            if (
-                isinstance(document_parser, MailDocumentParser)
-                and self.input_doc.mailrule_id
-            ):
-                document_parser.parse(
-                    self.working_copy,
-                    mime_type,
-                    self.filename,
-                    self.input_doc.mailrule_id,
+
+            # TODO(stumpylog): Remove me in the future when all parsers use new protocol
+            if parser_is_new_style:
+                document_parser.configure(
+                    ParserContext(mailrule_id=self.input_doc.mailrule_id),
                 )
-            elif isinstance(
-                document_parser,
-                (TextDocumentParser, RemoteDocumentParser, TikaDocumentParser),
-            ):
                 # TODO(stumpylog): Remove me in the future
                 document_parser.parse(self.working_copy, mime_type)
             else:
@@ -506,11 +511,8 @@ class ConsumerPlugin(
                 ProgressStatusOptions.WORKING,
                 ConsumerStatusShortMessage.GENERATING_THUMBNAIL,
             )
-            if isinstance(
-                document_parser,
-                (TextDocumentParser, RemoteDocumentParser, TikaDocumentParser),
-            ):
-                # TODO(stumpylog): Remove me in the future
+            # TODO(stumpylog): Remove me in the future when all parsers use new protocol
+            if parser_is_new_style:
                 thumbnail = document_parser.get_thumbnail(self.working_copy, mime_type)
             else:
                 thumbnail = document_parser.get_thumbnail(
diff --git a/src/documents/tasks.py b/src/documents/tasks.py
index 378695731..947da878f 100644
--- a/src/documents/tasks.py
+++ b/src/documents/tasks.py
@@ -65,6 +65,11 @@ from documents.signals.handlers import run_workflows
 from documents.signals.handlers import send_websocket_document_updated
 from documents.workflows.utils import get_workflows_for_trigger
 from paperless.config import AIConfig
+from paperless.parsers import ParserContext
+from paperless.parsers.mail import MailDocumentParser
+from paperless.parsers.remote import RemoteDocumentParser
+from paperless.parsers.text import TextDocumentParser
+from paperless.parsers.tika import TikaDocumentParser
 from paperless_ai.indexing import llm_index_add_or_update_document
 from paperless_ai.indexing import llm_index_remove_document
 from paperless_ai.indexing import update_llm_index
@@ -304,7 +309,9 @@ def update_document_content_maybe_archive_file(document_id) -> None:
 
     mime_type = document.mime_type
 
-    parser_class: type[DocumentParser] = get_parser_class_for_mime_type(mime_type)
+    parser_class: type[DocumentParser] | None = get_parser_class_for_mime_type(
+        mime_type,
+    )
 
     if not parser_class:
         logger.error(
@@ -315,14 +322,41 @@ def update_document_content_maybe_archive_file(document_id) -> None:
 
     parser: DocumentParser = parser_class(logging_group=uuid.uuid4())
 
-    try:
-        parser.parse(document.source_path, mime_type, document.get_public_filename())
+    parser_is_new_style = isinstance(
+        parser,
+        (
+            MailDocumentParser,
+            RemoteDocumentParser,
+            TextDocumentParser,
+            TikaDocumentParser,
+        ),
+    )
 
-        thumbnail = parser.get_thumbnail(
-            document.source_path,
-            mime_type,
-            document.get_public_filename(),
-        )
+    # TODO(stumpylog): Remove branch in the future when all parsers use new protocol
+    if parser_is_new_style:
+        parser.__enter__()
+
+    try:
+        # TODO(stumpylog): Remove branch in the future when all parsers use new protocol
+        if parser_is_new_style:
+            parser.configure(ParserContext())
+            parser.parse(document.source_path, mime_type)
+        else:
+            parser.parse(
+                document.source_path,
+                mime_type,
+                document.get_public_filename(),
+            )
+
+        # TODO(stumpylog): Remove branch in the future when all parsers use new protocol
+        if parser_is_new_style:
+            thumbnail = parser.get_thumbnail(document.source_path, mime_type)
+        else:
+            thumbnail = parser.get_thumbnail(
+                document.source_path,
+                mime_type,
+                document.get_public_filename(),
+            )
 
         with transaction.atomic():
             oldDocument = Document.objects.get(pk=document.pk)
@@ -403,8 +437,14 @@ def update_document_content_maybe_archive_file(document_id) -> None:
             f"Error while parsing document {document} (ID: {document_id})",
         )
     finally:
-        # TODO(stumpylog): Cleanup once all parsers are handled
-        parser.cleanup()
+        # TODO(stumpylog): Remove branch in the future when all parsers use new protocol
+        if isinstance(
+            parser,
+            (MailDocumentParser, TextDocumentParser, TikaDocumentParser),
+        ):
+            parser.__exit__(None, None, None)
+        else:
+            parser.cleanup()
 
 
 @shared_task
diff --git a/src/documents/tests/test_consumer.py b/src/documents/tests/test_consumer.py
index a59c7d676..a3574fdce 100644
--- a/src/documents/tests/test_consumer.py
+++ b/src/documents/tests/test_consumer.py
@@ -36,7 +36,6 @@ from documents.tests.utils import DummyProgressManager
 from documents.tests.utils import FileSystemAssertsMixin
 from documents.tests.utils import GetConsumerMixin
 from paperless_mail.models import MailRule
-from paperless_mail.parsers import MailDocumentParser
 
 
 class _BaseTestParser(DocumentParser):
@@ -1091,7 +1090,7 @@ class TestConsumer(
             self.assertEqual(command[1], "--replace-input")
 
     @mock.patch("paperless_mail.models.MailRule.objects.get")
-    @mock.patch("paperless_mail.parsers.MailDocumentParser.parse")
+    @mock.patch("paperless.parsers.mail.MailDocumentParser.parse")
     @mock.patch("documents.parsers.document_consumer_declaration.send")
     def test_mail_parser_receives_mailrule(
         self,
@@ -1107,11 +1106,13 @@ class TestConsumer(
         THEN:
             - The mail parser should receive the mail rule
         """
+        from paperless_mail.signals import get_parser as mail_get_parser
+
         mock_consumer_declaration_send.return_value = [
             (
                 None,
                 {
-                    "parser": MailDocumentParser,
+                    "parser": mail_get_parser,
                     "mime_types": {"message/rfc822": ".eml"},
                     "weight": 0,
                 },
@@ -1123,9 +1124,10 @@ class TestConsumer(
         with self.get_consumer(
             filepath=(
                 Path(__file__).parent.parent.parent
-                / Path("paperless_mail")
+                / Path("paperless")
                 / Path("tests")
                 / Path("samples")
+                / Path("mail")
             ).resolve()
             / "html.eml",
             source=DocumentSource.MailFetch,
@@ -1136,12 +1138,10 @@ class TestConsumer(
                 ConsumerError,
             ):
                 consumer.run()
-                mock_mail_parser_parse.assert_called_once_with(
-                    consumer.working_copy,
-                    "message/rfc822",
-                    file_name="sample.pdf",
-                    mailrule=mock_mailrule_get.return_value,
-                )
+            mock_mail_parser_parse.assert_called_once_with(
+                consumer.working_copy,
+                "message/rfc822",
+            )
 
 
 @mock.patch("documents.consumer.magic.from_file", fake_magic_from_file)
diff --git a/src/paperless/parsers/__init__.py b/src/paperless/parsers/__init__.py
index ea67ade00..c9c1530a5 100644
--- a/src/paperless/parsers/__init__.py
+++ b/src/paperless/parsers/__init__.py
@@ -35,6 +35,7 @@ Usage example (third-party parser)::
 
 from __future__ import annotations
 
+from dataclasses import dataclass
 from typing import TYPE_CHECKING
 from typing import Protocol
 from typing import Self
@@ -48,6 +49,7 @@ if TYPE_CHECKING:
 
 __all__ = [
     "MetadataEntry",
+    "ParserContext",
     "ParserProtocol",
 ]
 
@@ -73,6 +75,44 @@ class MetadataEntry(TypedDict):
     """String representation of the field value."""
 
 
+@dataclass(frozen=True, slots=True)
+class ParserContext:
+    """Immutable context passed to a parser before parse().
+
+    The consumer assembles this from the ingestion event and Django
+    settings, then calls ``parser.configure(context)`` before
+    ``parser.parse()``.  Parsers read only the fields relevant to them;
+    unneeded fields are ignored.
+
+    ``frozen=True`` prevents accidental mutation after the consumer
+    hands the context off.  ``slots=True`` keeps instances lightweight.
+
+    Fields
+    ------
+    mailrule_id : int | None
+        Primary key of the ``MailRule`` that triggered this ingestion,
+        or ``None`` when the document did not arrive via a mail rule.
+        Used by ``MailDocumentParser`` to select the PDF layout.
+
+    Notes
+    -----
+    Future fields (not yet implemented):
+
+    * ``output_type`` — PDF/A variant for archive generation
+      (replaces ``settings.OCR_OUTPUT_TYPE`` reads inside parsers).
+    * ``ocr_mode`` — skip-text, redo, force, etc.
+      (replaces ``settings.OCR_MODE`` reads inside parsers).
+    * ``ocr_language`` — Tesseract language string.
+      (replaces ``settings.OCR_LANGUAGE`` reads inside parsers).
+
+    When those fields are added the consumer will read from Django
+    settings once and populate them here, decoupling parsers from
+    ``settings.*`` entirely.
+    """
+
+    mailrule_id: int | None = None
+
+
 @runtime_checkable
 class ParserProtocol(Protocol):
     """Structural contract for all Paperless-ngx document parsers.
@@ -191,6 +231,21 @@ class ParserProtocol(Protocol):
     # Core parsing interface
     # ------------------------------------------------------------------
 
+    def configure(self, context: ParserContext) -> None:
+        """Apply source context before parse().
+
+        Called by the consumer after instantiation and before parse().
+        The default implementation is a no-op; parsers override only the
+        fields they need.
+
+        Parameters
+        ----------
+        context:
+            Immutable context assembled by the consumer for this
+            specific ingestion event.
+        """
+        ...
+
     def parse(
         self,
         document_path: Path,
diff --git a/src/paperless/parsers/mail.py b/src/paperless/parsers/mail.py
new file mode 100644
index 000000000..9914b2ec6
--- /dev/null
+++ b/src/paperless/parsers/mail.py
@@ -0,0 +1,834 @@
+"""
+Built-in mail document parser.
+
+Handles message/rfc822 (EML) MIME type by:
+- Parsing the email using imap_tools
+- Generating a PDF via Gotenberg (for display and archive)
+- Extracting text via Tika for HTML content
+- Extracting metadata from email headers
+
+The parser always produces a PDF because EML files cannot be rendered
+natively in a browser (requires_pdf_rendition=True).
+"""
+
+from __future__ import annotations
+
+import logging
+import re
+import shutil
+import tempfile
+from html import escape
+from pathlib import Path
+from typing import TYPE_CHECKING
+from typing import Self
+
+from bleach import clean
+from bleach import linkify
+from django.conf import settings
+from django.utils import timezone
+from django.utils.timezone import is_naive
+from django.utils.timezone import make_aware
+from gotenberg_client import GotenbergClient
+from gotenberg_client.constants import A4
+from gotenberg_client.options import Measurement
+from gotenberg_client.options import MeasurementUnitType
+from gotenberg_client.options import PageMarginsType
+from gotenberg_client.options import PdfAFormat
+from humanize import naturalsize
+from imap_tools import MailAttachment
+from imap_tools import MailMessage
+from tika_client import TikaClient
+
+from documents.parsers import ParseError
+from documents.parsers import make_thumbnail_from_pdf
+from paperless.models import OutputTypeChoices
+from paperless.version import __full_version_str__
+from paperless_mail.models import MailRule
+
+if TYPE_CHECKING:
+    import datetime
+    from types import TracebackType
+
+    from paperless.parsers import MetadataEntry
+    from paperless.parsers import ParserContext
+
+logger = logging.getLogger("paperless.parsing.mail")
+
+_SUPPORTED_MIME_TYPES: dict[str, str] = {
+    "message/rfc822": ".eml",
+}
+
+
+class MailDocumentParser:
+    """Parse .eml email files for Paperless-ngx.
+
+    Uses imap_tools to parse .eml files, generates a PDF using Gotenberg,
+    and sends the HTML part to a Tika server for text extraction.  Because
+    EML files cannot be rendered natively in a browser, the parser always
+    produces a PDF rendition (requires_pdf_rendition=True).
+
+    Pass a ``ParserContext`` to ``configure()`` before ``parse()`` to
+    apply mail-rule-specific PDF layout options:
+
+        parser.configure(ParserContext(mailrule_id=rule.pk))
+        parser.parse(path, mime_type)
+
+    Class attributes
+    ----------------
+    name : str
+        Human-readable parser name.
+    version : str
+        Semantic version string, kept in sync with Paperless-ngx releases.
+    author : str
+        Maintainer name.
+    url : str
+        Issue tracker / source URL.
+    """
+
+    name: str = "Paperless-ngx Mail Parser"
+    version: str = __full_version_str__
+    author: str = "Paperless-ngx Contributors"
+    url: str = "https://github.com/paperless-ngx/paperless-ngx"
+
+    # ------------------------------------------------------------------
+    # Class methods
+    # ------------------------------------------------------------------
+
+    @classmethod
+    def supported_mime_types(cls) -> dict[str, str]:
+        """Return the MIME types this parser handles.
+
+        Returns
+        -------
+        dict[str, str]
+            Mapping of MIME type to preferred file extension.
+        """
+        return _SUPPORTED_MIME_TYPES
+
+    @classmethod
+    def score(
+        cls,
+        mime_type: str,
+        filename: str,
+        path: Path | None = None,
+    ) -> int | None:
+        """Return the priority score for handling this file.
+
+        Parameters
+        ----------
+        mime_type:
+            Detected MIME type of the file.
+        filename:
+            Original filename including extension.
+        path:
+            Optional filesystem path. Not inspected by this parser.
+
+        Returns
+        -------
+        int | None
+            10 if the MIME type is supported, otherwise None.
+        """
+        if mime_type in _SUPPORTED_MIME_TYPES:
+            return 10
+        return None
+
+    # ------------------------------------------------------------------
+    # Properties
+    # ------------------------------------------------------------------
+
+    @property
+    def can_produce_archive(self) -> bool:
+        """Whether this parser can produce a searchable PDF archive copy.
+
+        Returns
+        -------
+        bool
+            Always False — the mail parser produces a display PDF
+            (requires_pdf_rendition=True), not an optional OCR archive.
+        """
+        return False
+
+    @property
+    def requires_pdf_rendition(self) -> bool:
+        """Whether the parser must produce a PDF for the frontend to display.
+
+        Returns
+        -------
+        bool
+            Always True — EML files cannot be rendered natively in a browser,
+            so a PDF conversion is always required for display.
+        """
+        return True
+
+    # ------------------------------------------------------------------
+    # Lifecycle
+    # ------------------------------------------------------------------
+
+    def __init__(self, logging_group: object = None) -> None:
+        settings.SCRATCH_DIR.mkdir(parents=True, exist_ok=True)
+        self._tempdir = Path(
+            tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR),
+        )
+        self._text: str | None = None
+        self._date: datetime.datetime | None = None
+        self._archive_path: Path | None = None
+        self._mailrule_id: int | None = None
+
+    def __enter__(self) -> Self:
+        return self
+
+    def __exit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc_val: BaseException | None,
+        exc_tb: TracebackType | None,
+    ) -> None:
+        logger.debug("Cleaning up temporary directory %s", self._tempdir)
+        shutil.rmtree(self._tempdir, ignore_errors=True)
+
+    # ------------------------------------------------------------------
+    # Core parsing interface
+    # ------------------------------------------------------------------
+
+    def configure(self, context: ParserContext) -> None:
+        self._mailrule_id = context.mailrule_id
+
+    def parse(
+        self,
+        document_path: Path,
+        mime_type: str,
+        *,
+        produce_archive: bool = True,
+    ) -> None:
+        """Parse the given .eml into formatted text and a PDF archive.
+
+        Call ``configure(ParserContext(mailrule_id=...))`` before this method
+        to apply mail-rule-specific PDF layout options.  The ``produce_archive``
+        flag is accepted for protocol compatibility but is always honoured —
+        the mail parser always produces a PDF since EML files cannot be
+        displayed natively.
+
+        Parameters
+        ----------
+        document_path:
+            Absolute path to the .eml file.
+        mime_type:
+            Detected MIME type of the document (should be "message/rfc822").
+        produce_archive:
+            Accepted for protocol compatibility. The PDF rendition is always
+            produced since EML files cannot be displayed natively in a browser.
+
+        Raises
+        ------
+        documents.parsers.ParseError
+            If the file cannot be parsed or PDF generation fails.
+        """
+
+        def strip_text(text: str) -> str:
+            """Reduces the spacing of the given text string."""
+            text = re.sub(r"\s+", " ", text)
+            text = re.sub(r"(\n *)+", "\n", text)
+            return text.strip()
+
+        def build_formatted_text(mail_message: MailMessage) -> str:
+            """Constructs a formatted string based on the given email."""
+            fmt_text = f"Subject: {mail_message.subject}\n\n"
+            fmt_text += f"From: {mail_message.from_values.full if mail_message.from_values else ''}\n\n"
+            to_list = [address.full for address in mail_message.to_values]
+            fmt_text += f"To: {', '.join(to_list)}\n\n"
+            if mail_message.cc_values:
+                fmt_text += (
+                    f"CC: {', '.join(address.full for address in mail.cc_values)}\n\n"
+                )
+            if mail_message.bcc_values:
+                fmt_text += (
+                    f"BCC: {', '.join(address.full for address in mail.bcc_values)}\n\n"
+                )
+            if mail_message.attachments:
+                att = []
+                for a in mail.attachments:
+                    attachment_size = naturalsize(a.size, binary=True, format="%.2f")
+                    att.append(
+                        f"{a.filename} ({attachment_size})",
+                    )
+                fmt_text += f"Attachments: {', '.join(att)}\n\n"
+
+            if mail.html:
+                fmt_text += "HTML content: " + strip_text(self.tika_parse(mail.html))
+
+            fmt_text += f"\n\n{strip_text(mail.text)}"
+
+            return fmt_text
+
+        logger.debug("Parsing file %s into an email", document_path.name)
+        mail = self.parse_file_to_message(document_path)
+
+        logger.debug("Building formatted text from email")
+        self._text = build_formatted_text(mail)
+
+        if is_naive(mail.date):
+            self._date = make_aware(mail.date)
+        else:
+            self._date = mail.date
+
+        logger.debug("Creating a PDF from the email")
+        if self._mailrule_id:
+            rule = MailRule.objects.get(pk=self._mailrule_id)
+            self._archive_path = self.generate_pdf(
+                mail,
+                MailRule.PdfLayout(rule.pdf_layout),
+            )
+        else:
+            self._archive_path = self.generate_pdf(mail)
+
+    # ------------------------------------------------------------------
+    # Result accessors
+    # ------------------------------------------------------------------
+
+    def get_text(self) -> str | None:
+        """Return the plain-text content extracted during parse.
+
+        Returns
+        -------
+        str | None
+            Extracted text, or None if parse has not been called yet.
+        """
+        return self._text
+
+    def get_date(self) -> datetime.datetime | None:
+        """Return the document date detected during parse.
+
+        Returns
+        -------
+        datetime.datetime | None
+            Date from the email headers, or None if not detected.
+        """
+        return self._date
+
+    def get_archive_path(self) -> Path | None:
+        """Return the path to the generated archive PDF, or None.
+
+        Returns
+        -------
+        Path | None
+            Path to the PDF produced by Gotenberg, or None if parse has not
+            been called yet.
+        """
+        return self._archive_path
+
+    # ------------------------------------------------------------------
+    # Thumbnail and metadata
+    # ------------------------------------------------------------------
+
+    def get_thumbnail(
+        self,
+        document_path: Path,
+        mime_type: str,
+        file_name: str | None = None,
+    ) -> Path:
+        """Generate a thumbnail from the PDF rendition of the email.
+
+        Converts the document to PDF first if not already done.
+
+        Parameters
+        ----------
+        document_path:
+            Absolute path to the source document.
+        mime_type:
+            Detected MIME type of the document.
+        file_name:
+            Kept for backward compatibility; not used.
+
+        Returns
+        -------
+        Path
+            Path to the generated WebP thumbnail inside the temporary directory.
+        """
+        if not self._archive_path:
+            self._archive_path = self.generate_pdf(
+                self.parse_file_to_message(document_path),
+            )
+
+        return make_thumbnail_from_pdf(
+            self._archive_path,
+            self._tempdir,
+        )
+
+    def get_page_count(
+        self,
+        document_path: Path,
+        mime_type: str,
+    ) -> int | None:
+        """Return the number of pages in the document.
+
+        Counts pages in the archive PDF produced by a preceding parse()
+        call.  Returns ``None`` if parse() has not been called yet or if
+        no archive was produced.
+
+        Returns
+        -------
+        int | None
+            Page count of the archive PDF, or ``None``.
+        """
+        if self._archive_path is not None:
+            from paperless.parsers.utils import get_page_count_for_pdf
+
+            return get_page_count_for_pdf(self._archive_path, log=logger)
+        return None
+
+    def extract_metadata(
+        self,
+        document_path: Path,
+        mime_type: str,
+    ) -> list[MetadataEntry]:
+        """Extract metadata from the email headers.
+
+        Returns email headers as metadata entries with prefix "header",
+        plus summary entries for attachments and date.
+
+        Returns
+        -------
+        list[MetadataEntry]
+            Sorted list of metadata entries, or ``[]`` on parse failure.
+        """
+        result: list[MetadataEntry] = []
+
+        try:
+            mail = self.parse_file_to_message(document_path)
+        except ParseError as e:
+            logger.warning(
+                "Error while fetching document metadata for %s: %s",
+                document_path,
+                e,
+            )
+            return result
+
+        for key, header_values in mail.headers.items():
+            value = ", ".join(header_values)
+            try:
+                value.encode("utf-8")
+            except UnicodeEncodeError as e:  # pragma: no cover
+                logger.debug("Skipping header %s: %s", key, e)
+                continue
+
+            result.append(
+                {
+                    "namespace": "",
+                    "prefix": "header",
+                    "key": key,
+                    "value": value,
+                },
+            )
+
+        result.append(
+            {
+                "namespace": "",
+                "prefix": "",
+                "key": "attachments",
+                "value": ", ".join(
+                    f"{attachment.filename}"
+                    f"({naturalsize(attachment.size, binary=True, format='%.2f')})"
+                    for attachment in mail.attachments
+                ),
+            },
+        )
+
+        result.append(
+            {
+                "namespace": "",
+                "prefix": "",
+                "key": "date",
+                "value": mail.date.strftime("%Y-%m-%d %H:%M:%S %Z"),
+            },
+        )
+
+        result.sort(key=lambda item: (item["prefix"], item["key"]))
+        return result
+
+    # ------------------------------------------------------------------
+    # Email-specific methods
+    # ------------------------------------------------------------------
+
+    def _settings_to_gotenberg_pdfa(self) -> PdfAFormat | None:
+        """Convert the OCR output type setting to a Gotenberg PdfAFormat."""
+        if settings.OCR_OUTPUT_TYPE in {
+            OutputTypeChoices.PDF_A,
+            OutputTypeChoices.PDF_A2,
+        }:
+            return PdfAFormat.A2b
+        elif settings.OCR_OUTPUT_TYPE == OutputTypeChoices.PDF_A1:  # pragma: no cover
+            logger.warning(
+                "Gotenberg does not support PDF/A-1a, choosing PDF/A-2b instead",
+            )
+            return PdfAFormat.A2b
+        elif settings.OCR_OUTPUT_TYPE == OutputTypeChoices.PDF_A3:  # pragma: no cover
+            return PdfAFormat.A3b
+        return None
+
+    @staticmethod
+    def parse_file_to_message(filepath: Path) -> MailMessage:
+        """Parse the given .eml file into a MailMessage object.
+
+        Parameters
+        ----------
+        filepath:
+            Path to the .eml file.
+
+        Returns
+        -------
+        MailMessage
+            Parsed mail message.
+
+        Raises
+        ------
+        documents.parsers.ParseError
+            If the file cannot be parsed or is missing required fields.
+        """
+        try:
+            with filepath.open("rb") as eml:
+                parsed = MailMessage.from_bytes(eml.read())
+                if parsed.from_values is None:
+                    raise ParseError(
+                        f"Could not parse {filepath}: Missing 'from'",
+                    )
+        except Exception as err:
+            raise ParseError(
+                f"Could not parse {filepath}: {err}",
+            ) from err
+
+        return parsed
+
+    def tika_parse(self, html: str) -> str:
+        """Send HTML content to the Tika server for text extraction.
+
+        Parameters
+        ----------
+        html:
+            HTML string to parse.
+
+        Returns
+        -------
+        str
+            Extracted plain text.
+
+        Raises
+        ------
+        documents.parsers.ParseError
+            If the Tika server cannot be reached or returns an error.
+        """
+        logger.info("Sending content to Tika server")
+
+        try:
+            with TikaClient(tika_url=settings.TIKA_ENDPOINT) as client:
+                parsed = client.tika.as_text.from_buffer(html, "text/html")
+
+                if parsed.content is not None:
+                    return parsed.content.strip()
+                return ""
+        except Exception as err:
+            raise ParseError(
+                f"Could not parse content with tika server at "
+                f"{settings.TIKA_ENDPOINT}: {err}",
+            ) from err
+
+    def generate_pdf(
+        self,
+        mail_message: MailMessage,
+        pdf_layout: MailRule.PdfLayout | None = None,
+    ) -> Path:
+        """Generate a PDF from the email message.
+
+        Creates separate PDFs for the email body and HTML content, then
+        merges them according to the requested layout.
+
+        Parameters
+        ----------
+        mail_message:
+            Parsed email message.
+        pdf_layout:
+            Layout option for the PDF. Falls back to the
+            EMAIL_PARSE_DEFAULT_LAYOUT setting if not provided.
+
+        Returns
+        -------
+        Path
+            Path to the generated PDF inside the temporary directory.
+        """
+        archive_path = Path(self._tempdir) / "merged.pdf"
+
+        mail_pdf_file = self.generate_pdf_from_mail(mail_message)
+
+        if pdf_layout is None:
+            pdf_layout = MailRule.PdfLayout(settings.EMAIL_PARSE_DEFAULT_LAYOUT)
+
+        # If no HTML content, create the PDF from the message.
+        # Otherwise, create 2 PDFs and merge them with Gotenberg.
+        if not mail_message.html:
+            archive_path.write_bytes(mail_pdf_file.read_bytes())
+        else:
+            pdf_of_html_content = self.generate_pdf_from_html(
+                mail_message.html,
+                mail_message.attachments,
+            )
+
+            logger.debug("Merging email text and HTML content into single PDF")
+
+            with (
+                GotenbergClient(
+                    host=settings.TIKA_GOTENBERG_ENDPOINT,
+                    timeout=settings.CELERY_TASK_TIME_LIMIT,
+                ) as client,
+                client.merge.merge() as route,
+            ):
+                # Configure requested PDF/A formatting, if any
+                pdf_a_format = self._settings_to_gotenberg_pdfa()
+                if pdf_a_format is not None:
+                    route.pdf_format(pdf_a_format)
+
+                match pdf_layout:
+                    case MailRule.PdfLayout.HTML_TEXT:
+                        route.merge([pdf_of_html_content, mail_pdf_file])
+                    case MailRule.PdfLayout.HTML_ONLY:
+                        route.merge([pdf_of_html_content])
+                    case MailRule.PdfLayout.TEXT_ONLY:
+                        route.merge([mail_pdf_file])
+                    case MailRule.PdfLayout.TEXT_HTML | _:
+                        route.merge([mail_pdf_file, pdf_of_html_content])
+
+                try:
+                    response = route.run()
+                    archive_path.write_bytes(response.content)
+                except Exception as err:
+                    raise ParseError(
+                        f"Error while merging email HTML into PDF: {err}",
+                    ) from err
+
+        return archive_path
+
+    def mail_to_html(self, mail: MailMessage) -> Path:
+        """Convert the given email into an HTML file using a template.
+
+        Parameters
+        ----------
+        mail:
+            Parsed mail message.
+
+        Returns
+        -------
+        Path
+            Path to the rendered HTML file inside the temporary directory.
+        """
+
+        def clean_html(text: str) -> str:
+            """Attempt to clean, escape, and linkify the given HTML string."""
+            if isinstance(text, list):
+                text = "\n".join([str(e) for e in text])
+            if not isinstance(text, str):
+                text = str(text)
+            text = escape(text)
+            text = clean(text)
+            text = linkify(text, parse_email=True)
+            text = text.replace("\n", "<br>")
+            return text
+
+        data = {}
+
+        data["subject"] = clean_html(mail.subject)
+        if data["subject"]:
+            data["subject_label"] = "Subject"
+        data["from"] = clean_html(mail.from_values.full if mail.from_values else "")
+        if data["from"]:
+            data["from_label"] = "From"
+        data["to"] = clean_html(", ".join(address.full for address in mail.to_values))
+        if data["to"]:
+            data["to_label"] = "To"
+        data["cc"] = clean_html(", ".join(address.full for address in mail.cc_values))
+        if data["cc"]:
+            data["cc_label"] = "CC"
+        data["bcc"] = clean_html(", ".join(address.full for address in mail.bcc_values))
+        if data["bcc"]:
+            data["bcc_label"] = "BCC"
+
+        att = []
+        for a in mail.attachments:
+            att.append(
+                f"{a.filename} ({naturalsize(a.size, binary=True, format='%.2f')})",
+            )
+        data["attachments"] = clean_html(", ".join(att))
+        if data["attachments"]:
+            data["attachments_label"] = "Attachments"
+
+        data["date"] = clean_html(
+            timezone.localtime(mail.date).strftime("%Y-%m-%d %H:%M"),
+        )
+        data["content"] = clean_html(mail.text.strip())
+
+        from django.template.loader import render_to_string
+
+        html_file = Path(self._tempdir) / "email_as_html.html"
+        html_file.write_text(render_to_string("email_msg_template.html", context=data))
+
+        return html_file
+
+    def generate_pdf_from_mail(self, mail: MailMessage) -> Path:
+        """Create a PDF from the email body using an HTML template and Gotenberg.
+
+        Parameters
+        ----------
+        mail:
+            Parsed mail message.
+
+        Returns
+        -------
+        Path
+            Path to the generated PDF inside the temporary directory.
+
+        Raises
+        ------
+        documents.parsers.ParseError
+            If Gotenberg returns an error.
+        """
+        logger.info("Converting mail to PDF")
+
+        css_file = (
+            Path(__file__).parent.parent.parent
+            / "paperless_mail"
+            / "templates"
+            / "output.css"
+        )
+        email_html_file = self.mail_to_html(mail)
+
+        with (
+            GotenbergClient(
+                host=settings.TIKA_GOTENBERG_ENDPOINT,
+                timeout=settings.CELERY_TASK_TIME_LIMIT,
+            ) as client,
+            client.chromium.html_to_pdf() as route,
+        ):
+            # Configure requested PDF/A formatting, if any
+            pdf_a_format = self._settings_to_gotenberg_pdfa()
+            if pdf_a_format is not None:
+                route.pdf_format(pdf_a_format)
+
+            try:
+                response = (
+                    route.index(email_html_file)
+                    .resource(css_file)
+                    .margins(
+                        PageMarginsType(
+                            top=Measurement(0.1, MeasurementUnitType.Inches),
+                            bottom=Measurement(0.1, MeasurementUnitType.Inches),
+                            left=Measurement(0.1, MeasurementUnitType.Inches),
+                            right=Measurement(0.1, MeasurementUnitType.Inches),
+                        ),
+                    )
+                    .size(A4)
+                    .scale(1.0)
+                    .run()
+                )
+            except Exception as err:
+                raise ParseError(
+                    f"Error while converting email to PDF: {err}",
+                ) from err
+
+        email_as_pdf_file = Path(self._tempdir) / "email_as_pdf.pdf"
+        email_as_pdf_file.write_bytes(response.content)
+
+        return email_as_pdf_file
+
+    def generate_pdf_from_html(
+        self,
+        orig_html: str,
+        attachments: list[MailAttachment],
+    ) -> Path:
+        """Generate a PDF from the HTML content of the email.
+
+        Parameters
+        ----------
+        orig_html:
+            Raw HTML string from the email body.
+        attachments:
+            List of email attachments (used as inline resources).
+
+        Returns
+        -------
+        Path
+            Path to the generated PDF inside the temporary directory.
+
+        Raises
+        ------
+        documents.parsers.ParseError
+            If Gotenberg returns an error.
+        """
+
+        def clean_html_script(text: str) -> str:
+            compiled_open = re.compile(re.escape("<script"), re.IGNORECASE)
+            text = compiled_open.sub("<div hidden ", text)
+
+            compiled_close = re.compile(re.escape("</script"), re.IGNORECASE)
+            text = compiled_close.sub("</div", text)
+            return text
+
+        logger.info("Converting message html to PDF")
+
+        tempdir = Path(self._tempdir)
+
+        html_clean = clean_html_script(orig_html)
+        html_clean_file = tempdir / "index.html"
+        html_clean_file.write_text(html_clean)
+
+        with (
+            GotenbergClient(
+                host=settings.TIKA_GOTENBERG_ENDPOINT,
+                timeout=settings.CELERY_TASK_TIME_LIMIT,
+            ) as client,
+            client.chromium.html_to_pdf() as route,
+        ):
+            # Configure requested PDF/A formatting, if any
+            pdf_a_format = self._settings_to_gotenberg_pdfa()
+            if pdf_a_format is not None:
+                route.pdf_format(pdf_a_format)
+
+            # Add attachments as resources, cleaning the filename and replacing
+            # it in the index file for inclusion
+            for attachment in attachments:
+                # Clean the attachment name to be valid
+                name_cid = f"cid:{attachment.content_id}"
+                name_clean = "".join(e for e in name_cid if e.isalnum())
+
+                # Write attachment payload to a temp file
+                temp_file = tempdir / name_clean
+                temp_file.write_bytes(attachment.payload)
+
+                route.resource(temp_file)
+
+                # Replace as needed the name with the clean name
+                html_clean = html_clean.replace(name_cid, name_clean)
+
+            # Now store the cleaned up HTML version
+            html_clean_file = tempdir / "index.html"
+            html_clean_file.write_text(html_clean)
+            # This is our index file, the main page basically
+            route.index(html_clean_file)
+
+            # Set page size, margins
+            route.margins(
+                PageMarginsType(
+                    top=Measurement(0.1, MeasurementUnitType.Inches),
+                    bottom=Measurement(0.1, MeasurementUnitType.Inches),
+                    left=Measurement(0.1, MeasurementUnitType.Inches),
+                    right=Measurement(0.1, MeasurementUnitType.Inches),
+                ),
+            ).size(A4).scale(1.0)
+
+            try:
+                response = route.run()
+
+            except Exception as err:
+                raise ParseError(
+                    f"Error while converting document to PDF: {err}",
+                ) from err
+
+        html_pdf = tempdir / "html.pdf"
+        html_pdf.write_bytes(response.content)
+        return html_pdf
diff --git a/src/paperless/parsers/registry.py b/src/paperless/parsers/registry.py
index e579d24dd..dc227ce7a 100644
--- a/src/paperless/parsers/registry.py
+++ b/src/paperless/parsers/registry.py
@@ -193,6 +193,7 @@ class ParserRegistry:
         that log output is predictable; scoring determines which parser wins
         at runtime regardless of registration order.
         """
+        from paperless.parsers.mail import MailDocumentParser
         from paperless.parsers.remote import RemoteDocumentParser
         from paperless.parsers.text import TextDocumentParser
         from paperless.parsers.tika import TikaDocumentParser
@@ -200,6 +201,7 @@ class ParserRegistry:
         self.register_builtin(TextDocumentParser)
         self.register_builtin(RemoteDocumentParser)
         self.register_builtin(TikaDocumentParser)
+        self.register_builtin(MailDocumentParser)
 
     # ------------------------------------------------------------------
     # Discovery
diff --git a/src/paperless/parsers/remote.py b/src/paperless/parsers/remote.py
index 45311cdbd..10e89649e 100644
--- a/src/paperless/parsers/remote.py
+++ b/src/paperless/parsers/remote.py
@@ -28,6 +28,7 @@ if TYPE_CHECKING:
     from types import TracebackType
 
     from paperless.parsers import MetadataEntry
+    from paperless.parsers import ParserContext
 
 logger = logging.getLogger("paperless.parsing.remote")
 
@@ -204,6 +205,9 @@ class RemoteDocumentParser:
     # Core parsing interface
     # ------------------------------------------------------------------
 
+    def configure(self, context: ParserContext) -> None:
+        pass
+
     def parse(
         self,
         document_path: Path,
diff --git a/src/paperless/parsers/text.py b/src/paperless/parsers/text.py
index 99d9dab08..00d738995 100644
--- a/src/paperless/parsers/text.py
+++ b/src/paperless/parsers/text.py
@@ -27,6 +27,7 @@ if TYPE_CHECKING:
     from types import TracebackType
 
     from paperless.parsers import MetadataEntry
+    from paperless.parsers import ParserContext
 
 logger = logging.getLogger("paperless.parsing.text")
 
@@ -156,6 +157,9 @@ class TextDocumentParser:
     # Core parsing interface
     # ------------------------------------------------------------------
 
+    def configure(self, context: ParserContext) -> None:
+        pass
+
     def parse(
         self,
         document_path: Path,
diff --git a/src/paperless/parsers/tika.py b/src/paperless/parsers/tika.py
index b9307858a..674d74fe2 100644
--- a/src/paperless/parsers/tika.py
+++ b/src/paperless/parsers/tika.py
@@ -35,6 +35,7 @@ if TYPE_CHECKING:
     from types import TracebackType
 
     from paperless.parsers import MetadataEntry
+    from paperless.parsers import ParserContext
 
 logger = logging.getLogger("paperless.parsing.tika")
 
@@ -205,6 +206,9 @@ class TikaDocumentParser:
     # Core parsing interface
     # ------------------------------------------------------------------
 
+    def configure(self, context: ParserContext) -> None:
+        pass
+
     def parse(
         self,
         document_path: Path,
@@ -340,11 +344,19 @@ class TikaDocumentParser:
     ) -> int | None:
         """Return the number of pages in the document.
 
+        Counts pages in the archive PDF produced by a preceding parse()
+        call.  Returns ``None`` if parse() has not been called yet or if
+        no archive was produced.
+
         Returns
         -------
         int | None
-            Always None — page count is not available from Tika.
+            Page count of the archive PDF, or ``None``.
         """
+        if self._archive_path is not None:
+            from paperless.parsers.utils import get_page_count_for_pdf
+
+            return get_page_count_for_pdf(self._archive_path, log=logger)
         return None
 
     def extract_metadata(
diff --git a/src/paperless/tests/parsers/conftest.py b/src/paperless/tests/parsers/conftest.py
index 19603db83..5a22f24ab 100644
--- a/src/paperless/tests/parsers/conftest.py
+++ b/src/paperless/tests/parsers/conftest.py
@@ -10,6 +10,7 @@ from typing import TYPE_CHECKING
 
 import pytest
 
+from paperless.parsers.mail import MailDocumentParser
 from paperless.parsers.remote import RemoteDocumentParser
 from paperless.parsers.text import TextDocumentParser
 from paperless.parsers.tika import TikaDocumentParser
@@ -247,3 +248,166 @@ def tika_parser() -> Generator[TikaDocumentParser, None, None]:
     """
     with TikaDocumentParser() as parser:
         yield parser
+
+
+# ------------------------------------------------------------------
+# Mail parser sample files
+# ------------------------------------------------------------------
+
+
+@pytest.fixture(scope="session")
+def mail_samples_dir(samples_dir: Path) -> Path:
+    """Absolute path to the mail parser sample files directory.
+
+    Returns
+    -------
+    Path
+        ``<samples_dir>/mail/``
+    """
+    return samples_dir / "mail"
+
+
+@pytest.fixture(scope="session")
+def broken_email_file(mail_samples_dir: Path) -> Path:
+    """Path to a broken/malformed EML sample file.
+
+    Returns
+    -------
+    Path
+        Absolute path to ``mail/broken.eml``.
+    """
+    return mail_samples_dir / "broken.eml"
+
+
+@pytest.fixture(scope="session")
+def simple_txt_email_file(mail_samples_dir: Path) -> Path:
+    """Path to a plain-text email sample file.
+
+    Returns
+    -------
+    Path
+        Absolute path to ``mail/simple_text.eml``.
+    """
+    return mail_samples_dir / "simple_text.eml"
+
+
+@pytest.fixture(scope="session")
+def simple_txt_email_pdf_file(mail_samples_dir: Path) -> Path:
+    """Path to the expected PDF rendition of the plain-text email.
+
+    Returns
+    -------
+    Path
+        Absolute path to ``mail/simple_text.eml.pdf``.
+    """
+    return mail_samples_dir / "simple_text.eml.pdf"
+
+
+@pytest.fixture(scope="session")
+def simple_txt_email_thumbnail_file(mail_samples_dir: Path) -> Path:
+    """Path to the expected thumbnail for the plain-text email.
+
+    Returns
+    -------
+    Path
+        Absolute path to ``mail/simple_text.eml.pdf.webp``.
+    """
+    return mail_samples_dir / "simple_text.eml.pdf.webp"
+
+
+@pytest.fixture(scope="session")
+def html_email_file(mail_samples_dir: Path) -> Path:
+    """Path to an HTML email sample file.
+
+    Returns
+    -------
+    Path
+        Absolute path to ``mail/html.eml``.
+    """
+    return mail_samples_dir / "html.eml"
+
+
+@pytest.fixture(scope="session")
+def html_email_pdf_file(mail_samples_dir: Path) -> Path:
+    """Path to the expected PDF rendition of the HTML email.
+
+    Returns
+    -------
+    Path
+        Absolute path to ``mail/html.eml.pdf``.
+    """
+    return mail_samples_dir / "html.eml.pdf"
+
+
+@pytest.fixture(scope="session")
+def html_email_thumbnail_file(mail_samples_dir: Path) -> Path:
+    """Path to the expected thumbnail for the HTML email.
+
+    Returns
+    -------
+    Path
+        Absolute path to ``mail/html.eml.pdf.webp``.
+    """
+    return mail_samples_dir / "html.eml.pdf.webp"
+
+
+@pytest.fixture(scope="session")
+def html_email_html_file(mail_samples_dir: Path) -> Path:
+    """Path to the HTML body of the HTML email sample.
+
+    Returns
+    -------
+    Path
+        Absolute path to ``mail/html.eml.html``.
+    """
+    return mail_samples_dir / "html.eml.html"
+
+
+@pytest.fixture(scope="session")
+def merged_pdf_first(mail_samples_dir: Path) -> Path:
+    """Path to the first PDF used in PDF-merge tests.
+
+    Returns
+    -------
+    Path
+        Absolute path to ``mail/first.pdf``.
+    """
+    return mail_samples_dir / "first.pdf"
+
+
+@pytest.fixture(scope="session")
+def merged_pdf_second(mail_samples_dir: Path) -> Path:
+    """Path to the second PDF used in PDF-merge tests.
+
+    Returns
+    -------
+    Path
+        Absolute path to ``mail/second.pdf``.
+    """
+    return mail_samples_dir / "second.pdf"
+
+
+# ------------------------------------------------------------------
+# Mail parser instance
+# ------------------------------------------------------------------
+
+
+@pytest.fixture()
+def mail_parser() -> Generator[MailDocumentParser, None, None]:
+    """Yield a MailDocumentParser and clean up its temporary directory afterwards.
+
+    Yields
+    ------
+    MailDocumentParser
+        A ready-to-use parser instance.
+    """
+    with MailDocumentParser() as parser:
+        yield parser
+
+
+@pytest.fixture(scope="session")
+def nginx_base_url() -> Generator[str, None, None]:
+    """
+    The base URL for the nginx HTTP server we expect to be alive
+    """
+    yield "http://localhost:8080"
diff --git a/src/paperless_mail/tests/test_parsers.py b/src/paperless/tests/parsers/test_mail_parser.py
similarity index 89%
rename from src/paperless_mail/tests/test_parsers.py
rename to src/paperless/tests/parsers/test_mail_parser.py
index 9746731f8..83d81409c 100644
--- a/src/paperless_mail/tests/test_parsers.py
+++ b/src/paperless/tests/parsers/test_mail_parser.py
@@ -12,7 +12,64 @@ from pytest_httpx import HTTPXMock
 from pytest_mock import MockerFixture
 
 from documents.parsers import ParseError
-from paperless_mail.parsers import MailDocumentParser
+from paperless.parsers import ParserContext
+from paperless.parsers import ParserProtocol
+from paperless.parsers.mail import MailDocumentParser
+
+
+class TestMailParserProtocol:
+    """Verify that MailDocumentParser satisfies the ParserProtocol contract."""
+
+    def test_isinstance_satisfies_protocol(
+        self,
+        mail_parser: MailDocumentParser,
+    ) -> None:
+        assert isinstance(mail_parser, ParserProtocol)
+
+    def test_supported_mime_types(self) -> None:
+        mime_types = MailDocumentParser.supported_mime_types()
+        assert isinstance(mime_types, dict)
+        assert "message/rfc822" in mime_types
+
+    @pytest.mark.parametrize(
+        ("mime_type", "expected"),
+        [
+            ("message/rfc822", 10),
+            ("application/pdf", None),
+            ("text/plain", None),
+        ],
+    )
+    def test_score(self, mime_type: str, expected: int | None) -> None:
+        assert MailDocumentParser.score(mime_type, "email.eml") == expected
+
+    def test_can_produce_archive_is_false(
+        self,
+        mail_parser: MailDocumentParser,
+    ) -> None:
+        assert mail_parser.can_produce_archive is False
+
+    def test_requires_pdf_rendition_is_true(
+        self,
+        mail_parser: MailDocumentParser,
+    ) -> None:
+        assert mail_parser.requires_pdf_rendition is True
+
+    def test_get_page_count_returns_none_without_archive(
+        self,
+        mail_parser: MailDocumentParser,
+        html_email_file: Path,
+    ) -> None:
+        assert mail_parser.get_page_count(html_email_file, "message/rfc822") is None
+
+    def test_get_page_count_returns_int_with_pdf_archive(
+        self,
+        mail_parser: MailDocumentParser,
+        simple_txt_email_pdf_file: Path,
+    ) -> None:
+        mail_parser._archive_path = simple_txt_email_pdf_file
+        count = mail_parser.get_page_count(simple_txt_email_pdf_file, "message/rfc822")
+        assert isinstance(count, int)
+        assert count > 0
 
 
 class TestEmailFileParsing:
@@ -24,7 +81,7 @@ class TestEmailFileParsing:
     def test_parse_error_missing_file(
         self,
         mail_parser: MailDocumentParser,
-        sample_dir: Path,
+        mail_samples_dir: Path,
     ) -> None:
         """
         GIVEN:
@@ -35,7 +92,7 @@ class TestEmailFileParsing:
             - An Exception is thrown
         """
         # Check if exception is raised when parsing fails.
-        test_file = sample_dir / "doesntexist.eml"
+        test_file = mail_samples_dir / "doesntexist.eml"
 
         assert not test_file.exists()
 
@@ -246,12 +303,12 @@ class TestEmailThumbnailGenerate:
         """
         mocked_return = "Passing the return value through.."
         mock_make_thumbnail_from_pdf = mocker.patch(
-            "paperless_mail.parsers.make_thumbnail_from_pdf",
+            "paperless.parsers.mail.make_thumbnail_from_pdf",
         )
         mock_make_thumbnail_from_pdf.return_value = mocked_return
 
         mock_generate_pdf = mocker.patch(
-            "paperless_mail.parsers.MailDocumentParser.generate_pdf",
+            "paperless.parsers.mail.MailDocumentParser.generate_pdf",
         )
         mock_generate_pdf.return_value = "Mocked return value.."
 
@@ -260,8 +317,7 @@ class TestEmailThumbnailGenerate:
         mock_generate_pdf.assert_called_once()
         mock_make_thumbnail_from_pdf.assert_called_once_with(
             "Mocked return value..",
-            mail_parser.tempdir,
-            None,
+            mail_parser._tempdir,
         )
 
         assert mocked_return == thumb
@@ -373,7 +429,7 @@ class TestParser:
         """
         # Validate parsing returns the expected results
         mock_generate_pdf = mocker.patch(
-            "paperless_mail.parsers.MailDocumentParser.generate_pdf",
+            "paperless.parsers.mail.MailDocumentParser.generate_pdf",
         )
 
         mail_parser.parse(simple_txt_email_file, "message/rfc822")
@@ -385,7 +441,7 @@ class TestParser:
             "BCC: fdf@fvf.de\n\n"
             "\n\nThis is just a simple Text Mail."
         )
-        assert text_expected == mail_parser.text
+        assert text_expected == mail_parser.get_text()
         assert (
             datetime.datetime(
                 2022,
@@ -396,7 +452,7 @@ class TestParser:
                 43,
                 tzinfo=datetime.timezone(datetime.timedelta(seconds=7200)),
             )
-            == mail_parser.date
+            == mail_parser.get_date()
         )
 
         # Just check if tried to generate archive, the unittest for generate_pdf() goes deeper.
@@ -419,7 +475,7 @@ class TestParser:
         """
 
         mock_generate_pdf = mocker.patch(
-            "paperless_mail.parsers.MailDocumentParser.generate_pdf",
+            "paperless.parsers.mail.MailDocumentParser.generate_pdf",
         )
 
         # Validate parsing returns the expected results
@@ -443,7 +499,7 @@ class TestParser:
         mail_parser.parse(html_email_file, "message/rfc822")
 
         mock_generate_pdf.assert_called_once()
-        assert text_expected == mail_parser.text
+        assert text_expected == mail_parser.get_text()
         assert (
             datetime.datetime(
                 2022,
@@ -454,7 +510,7 @@ class TestParser:
                 19,
                 tzinfo=datetime.timezone(datetime.timedelta(seconds=7200)),
             )
-            == mail_parser.date
+            == mail_parser.get_date()
         )
 
     def test_generate_pdf_parse_error(
@@ -501,7 +557,7 @@ class TestParser:
 
         mail_parser.parse(simple_txt_email_file, "message/rfc822")
 
-        assert mail_parser.archive_path is not None
+        assert mail_parser.get_archive_path() is not None
 
     @pytest.mark.httpx_mock(can_send_already_matched_responses=True)
     def test_generate_pdf_html_email(
@@ -542,7 +598,7 @@ class TestParser:
         )
         mail_parser.parse(html_email_file, "message/rfc822")
 
-        assert mail_parser.archive_path is not None
+        assert mail_parser.get_archive_path() is not None
 
     def test_generate_pdf_html_email_html_to_pdf_failure(
         self,
@@ -712,10 +768,10 @@ class TestParser:
 
         def test_layout_option(layout_option, expected_calls, expected_pdf_names):
             mock_mailrule_get.return_value = mock.Mock(pdf_layout=layout_option)
+            mail_parser.configure(ParserContext(mailrule_id=1))
             mail_parser.parse(
                 document_path=html_email_file,
                 mime_type="message/rfc822",
-                mailrule_id=1,
             )
             args, _ = mock_merge_route.call_args
             assert len(args[0]) == expected_calls
diff --git a/src/paperless_mail/tests/test_parsers_live.py b/src/paperless/tests/parsers/test_mail_parser_live.py
similarity index 97%
rename from src/paperless_mail/tests/test_parsers_live.py
rename to src/paperless/tests/parsers/test_mail_parser_live.py
index 8a9487c16..dd17af314 100644
--- a/src/paperless_mail/tests/test_parsers_live.py
+++ b/src/paperless/tests/parsers/test_mail_parser_live.py
@@ -11,7 +11,7 @@ from PIL import Image
 from pytest_mock import MockerFixture
 
 from documents.tests.utils import util_call_with_backoff
-from paperless_mail.parsers import MailDocumentParser
+from paperless.parsers.mail import MailDocumentParser
 
 
 def extract_text(pdf_path: Path) -> str:
@@ -159,7 +159,7 @@ class TestParserLive:
             - The returned thumbnail image file shall match the expected hash
         """
         mock_generate_pdf = mocker.patch(
-            "paperless_mail.parsers.MailDocumentParser.generate_pdf",
+            "paperless.parsers.mail.MailDocumentParser.generate_pdf",
         )
         mock_generate_pdf.return_value = simple_txt_email_pdf_file
 
@@ -216,10 +216,10 @@ class TestParserLive:
             - The merged PDF shall contain text from both source PDFs
         """
         mock_generate_pdf_from_html = mocker.patch(
-            "paperless_mail.parsers.MailDocumentParser.generate_pdf_from_html",
+            "paperless.parsers.mail.MailDocumentParser.generate_pdf_from_html",
         )
         mock_generate_pdf_from_mail = mocker.patch(
-            "paperless_mail.parsers.MailDocumentParser.generate_pdf_from_mail",
+            "paperless.parsers.mail.MailDocumentParser.generate_pdf_from_mail",
         )
         mock_generate_pdf_from_mail.return_value = merged_pdf_first
         mock_generate_pdf_from_html.return_value = merged_pdf_second
diff --git a/src/paperless/tests/parsers/test_remote_parser.py b/src/paperless/tests/parsers/test_remote_parser.py
index 3e58498c1..d0b9effba 100644
--- a/src/paperless/tests/parsers/test_remote_parser.py
+++ b/src/paperless/tests/parsers/test_remote_parser.py
@@ -20,6 +20,7 @@ from unittest.mock import Mock
 
 import pytest
 
+from paperless.parsers import ParserContext
 from paperless.parsers import ParserProtocol
 from paperless.parsers.remote import RemoteDocumentParser
 
@@ -302,6 +303,7 @@ class TestRemoteParserParse:
         sample_pdf_file: Path,
         azure_client: Mock,
     ) -> None:
+        remote_parser.configure(ParserContext())
         remote_parser.parse(sample_pdf_file, "application/pdf")
 
         azure_client.close.assert_called_once()
diff --git a/src/paperless/tests/parsers/test_text_parser.py b/src/paperless/tests/parsers/test_text_parser.py
index d2f095f5c..091d8287e 100644
--- a/src/paperless/tests/parsers/test_text_parser.py
+++ b/src/paperless/tests/parsers/test_text_parser.py
@@ -12,6 +12,7 @@ from pathlib import Path
 
 import pytest
 
+from paperless.parsers import ParserContext
 from paperless.parsers import ParserProtocol
 from paperless.parsers.text import TextDocumentParser
 
@@ -93,6 +94,7 @@ class TestTextParserParse:
         text_parser: TextDocumentParser,
         sample_txt_file: Path,
     ) -> None:
+        text_parser.configure(ParserContext())
         text_parser.parse(sample_txt_file, "text/plain")
 
         assert text_parser.get_text() == "This is a test file.\n"
@@ -102,6 +104,7 @@ class TestTextParserParse:
         text_parser: TextDocumentParser,
         sample_txt_file: Path,
     ) -> None:
+        text_parser.configure(ParserContext())
         text_parser.parse(sample_txt_file, "text/plain")
 
         assert text_parser.get_archive_path() is None
@@ -111,6 +114,7 @@ class TestTextParserParse:
         text_parser: TextDocumentParser,
         sample_txt_file: Path,
     ) -> None:
+        text_parser.configure(ParserContext())
         text_parser.parse(sample_txt_file, "text/plain")
 
         assert text_parser.get_date() is None
@@ -129,6 +133,7 @@ class TestTextParserParse:
             - Parsing succeeds
             - Invalid bytes are replaced with the Unicode replacement character
         """
+        text_parser.configure(ParserContext())
         text_parser.parse(malformed_txt_file, "text/plain")
 
         assert text_parser.get_text() == "Pantothens\ufffdure\n"
diff --git a/src/paperless/tests/parsers/test_tika_parser.py b/src/paperless/tests/parsers/test_tika_parser.py
index d18d178e6..010969259 100644
--- a/src/paperless/tests/parsers/test_tika_parser.py
+++ b/src/paperless/tests/parsers/test_tika_parser.py
@@ -9,6 +9,7 @@ from pytest_django.fixtures import SettingsWrapper
 from pytest_httpx import HTTPXMock
 
 from documents.parsers import ParseError
+from paperless.parsers import ParserContext
 from paperless.parsers import ParserProtocol
 from paperless.parsers.tika import TikaDocumentParser
 
@@ -60,6 +61,29 @@ class TestTikaParserRegistryInterface:
     def test_requires_pdf_rendition_is_true(self) -> None:
         assert TikaDocumentParser().requires_pdf_rendition is True
 
+    def test_get_page_count_returns_none_without_archive(
+        self,
+        tika_parser: TikaDocumentParser,
+        sample_odt_file: Path,
+    ) -> None:
+        assert (
+            tika_parser.get_page_count(
+                sample_odt_file,
+                "application/vnd.oasis.opendocument.text",
+            )
+            is None
+        )
+
+    def test_get_page_count_returns_int_with_pdf_archive(
+        self,
+        tika_parser: TikaDocumentParser,
+        sample_pdf_file: Path,
+    ) -> None:
+        tika_parser._archive_path = sample_pdf_file
+        count = tika_parser.get_page_count(sample_pdf_file, "application/pdf")
+        assert isinstance(count, int)
+        assert count > 0
+
 
 @pytest.mark.django_db()
 class TestTikaParser:
@@ -83,6 +107,7 @@ class TestTikaParser:
         # Pretend convert to PDF response
         httpx_mock.add_response(content=b"PDF document")
 
+        tika_parser.configure(ParserContext())
         tika_parser.parse(sample_odt_file, "application/vnd.oasis.opendocument.text")
 
         assert tika_parser.get_text() == "the content"
diff --git a/src/paperless_mail/tests/samples/broken.eml b/src/paperless/tests/samples/mail/broken.eml
similarity index 100%
rename from src/paperless_mail/tests/samples/broken.eml
rename to src/paperless/tests/samples/mail/broken.eml
diff --git a/src/paperless_mail/tests/samples/first.pdf b/src/paperless/tests/samples/mail/first.pdf
similarity index 100%
rename from src/paperless_mail/tests/samples/first.pdf
rename to src/paperless/tests/samples/mail/first.pdf
diff --git a/src/paperless_mail/tests/samples/html.eml b/src/paperless/tests/samples/mail/html.eml
similarity index 100%
rename from src/paperless_mail/tests/samples/html.eml
rename to src/paperless/tests/samples/mail/html.eml
diff --git a/src/paperless_mail/tests/samples/html.eml.html b/src/paperless/tests/samples/mail/html.eml.html
similarity index 100%
rename from src/paperless_mail/tests/samples/html.eml.html
rename to src/paperless/tests/samples/mail/html.eml.html
diff --git a/src/paperless_mail/tests/samples/html.eml.pdf b/src/paperless/tests/samples/mail/html.eml.pdf
similarity index 100%
rename from src/paperless_mail/tests/samples/html.eml.pdf
rename to src/paperless/tests/samples/mail/html.eml.pdf
diff --git a/src/paperless_mail/tests/samples/html.eml.pdf.webp b/src/paperless/tests/samples/mail/html.eml.pdf.webp
similarity index 100%
rename from src/paperless_mail/tests/samples/html.eml.pdf.webp
rename to src/paperless/tests/samples/mail/html.eml.pdf.webp
diff --git a/src/paperless_mail/tests/samples/sample.html b/src/paperless/tests/samples/mail/sample.html
similarity index 100%
rename from src/paperless_mail/tests/samples/sample.html
rename to src/paperless/tests/samples/mail/sample.html
diff --git a/src/paperless_mail/tests/samples/sample.html.pdf b/src/paperless/tests/samples/mail/sample.html.pdf
similarity index 100%
rename from src/paperless_mail/tests/samples/sample.html.pdf
rename to src/paperless/tests/samples/mail/sample.html.pdf
diff --git a/src/paperless_mail/tests/samples/sample.html.pdf.webp b/src/paperless/tests/samples/mail/sample.html.pdf.webp
similarity index 100%
rename from src/paperless_mail/tests/samples/sample.html.pdf.webp
rename to src/paperless/tests/samples/mail/sample.html.pdf.webp
diff --git a/src/paperless_mail/tests/samples/sample.png b/src/paperless/tests/samples/mail/sample.png
similarity index 100%
rename from src/paperless_mail/tests/samples/sample.png
rename to src/paperless/tests/samples/mail/sample.png
diff --git a/src/paperless_mail/tests/samples/second.pdf b/src/paperless/tests/samples/mail/second.pdf
similarity index 100%
rename from src/paperless_mail/tests/samples/second.pdf
rename to src/paperless/tests/samples/mail/second.pdf
diff --git a/src/paperless_mail/tests/samples/simple_text.eml b/src/paperless/tests/samples/mail/simple_text.eml
similarity index 100%
rename from src/paperless_mail/tests/samples/simple_text.eml
rename to src/paperless/tests/samples/mail/simple_text.eml
diff --git a/src/paperless_mail/tests/samples/simple_text.eml.pdf b/src/paperless/tests/samples/mail/simple_text.eml.pdf
similarity index 100%
rename from src/paperless_mail/tests/samples/simple_text.eml.pdf
rename to src/paperless/tests/samples/mail/simple_text.eml.pdf
diff --git a/src/paperless_mail/tests/samples/simple_text.eml.pdf.webp b/src/paperless/tests/samples/mail/simple_text.eml.pdf.webp
similarity index 100%
rename from src/paperless_mail/tests/samples/simple_text.eml.pdf.webp
rename to src/paperless/tests/samples/mail/simple_text.eml.pdf.webp
diff --git a/src/paperless/tests/test_registry.py b/src/paperless/tests/test_registry.py
index 80c686bc4..5c2d20d50 100644
--- a/src/paperless/tests/test_registry.py
+++ b/src/paperless/tests/test_registry.py
@@ -18,6 +18,7 @@ from unittest.mock import patch
 
 import pytest
 
+from paperless.parsers import ParserContext
 from paperless.parsers import ParserProtocol
 from paperless.parsers.registry import ParserRegistry
 from paperless.parsers.registry import get_parser_registry
@@ -103,6 +104,11 @@ def dummy_parser_cls() -> type:
         ) -> list:
             return []
 
+        def configure(self, context: ParserContext) -> None:
+            """
+            Required to exist, but doesn't need to do anything
+            """
+
         def __enter__(self) -> Self:
             return self
 
@@ -144,6 +150,7 @@ class TestParserProtocol:
     @pytest.mark.parametrize(
         "missing_method",
         [
+            pytest.param("configure", id="missing-configure"),
             pytest.param("parse", id="missing-parse"),
             pytest.param("get_text", id="missing-get_text"),
             pytest.param("get_thumbnail", id="missing-get_thumbnail"),
diff --git a/src/paperless_mail/parsers.py b/src/paperless_mail/parsers.py
deleted file mode 100644
index 86cba23ab..000000000
--- a/src/paperless_mail/parsers.py
+++ /dev/null
@@ -1,481 +0,0 @@
-import re
-from html import escape
-from pathlib import Path
-
-from bleach import clean
-from bleach import linkify
-from django.conf import settings
-from django.utils import timezone
-from django.utils.timezone import is_naive
-from django.utils.timezone import make_aware
-from gotenberg_client import GotenbergClient
-from gotenberg_client.constants import A4
-from gotenberg_client.options import Measurement
-from gotenberg_client.options import MeasurementUnitType
-from gotenberg_client.options import PageMarginsType
-from gotenberg_client.options import PdfAFormat
-from humanize import naturalsize
-from imap_tools import MailAttachment
-from imap_tools import MailMessage
-from tika_client import TikaClient
-
-from documents.parsers import DocumentParser
-from documents.parsers import ParseError
-from documents.parsers import make_thumbnail_from_pdf
-from paperless.models import OutputTypeChoices
-from paperless_mail.models import MailRule
-
-
-class MailDocumentParser(DocumentParser):
-    """
-    This parser uses imap_tools to parse .eml files, generates pdf using
-    Gotenberg and sends the html part to a Tika server for text extraction.
-    """
-
-    logging_name = "paperless.parsing.mail"
-
-    def _settings_to_gotenberg_pdfa(self) -> PdfAFormat | None:
-        """
-        Converts our requested PDF/A output into the Gotenberg API
-        format
-        """
-        if settings.OCR_OUTPUT_TYPE in {
-            OutputTypeChoices.PDF_A,
-            OutputTypeChoices.PDF_A2,
-        }:
-            return PdfAFormat.A2b
-        elif settings.OCR_OUTPUT_TYPE == OutputTypeChoices.PDF_A1:  # pragma: no cover
-            self.log.warning(
-                "Gotenberg does not support PDF/A-1a, choosing PDF/A-2b instead",
-            )
-            return PdfAFormat.A2b
-        elif settings.OCR_OUTPUT_TYPE == OutputTypeChoices.PDF_A3:  # pragma: no cover
-            return PdfAFormat.A3b
-        return None
-
-    def get_thumbnail(
-        self,
-        document_path: Path,
-        mime_type: str,
-        file_name=None,
-    ) -> Path:
-        if not self.archive_path:
-            self.archive_path = self.generate_pdf(
-                self.parse_file_to_message(document_path),
-            )
-
-        return make_thumbnail_from_pdf(
-            self.archive_path,
-            self.tempdir,
-            self.logging_group,
-        )
-
-    def extract_metadata(self, document_path: Path, mime_type: str):
-        result = []
-
-        try:
-            mail = self.parse_file_to_message(document_path)
-        except ParseError as e:
-            self.log.warning(
-                f"Error while fetching document metadata for {document_path}: {e}",
-            )
-            return result
-
-        for key, value in mail.headers.items():
-            value = ", ".join(i for i in value)
-            try:
-                value.encode("utf-8")
-            except UnicodeEncodeError as e:  # pragma: no cover
-                self.log.debug(f"Skipping header {key}: {e}")
-                continue
-
-            result.append(
-                {
-                    "namespace": "",
-                    "prefix": "header",
-                    "key": key,
-                    "value": value,
-                },
-            )
-
-        result.append(
-            {
-                "namespace": "",
-                "prefix": "",
-                "key": "attachments",
-                "value": ", ".join(
-                    f"{attachment.filename}"
-                    f"({naturalsize(attachment.size, binary=True, format='%.2f')})"
-                    for attachment in mail.attachments
-                ),
-            },
-        )
-
-        result.append(
-            {
-                "namespace": "",
-                "prefix": "",
-                "key": "date",
-                "value": mail.date.strftime("%Y-%m-%d %H:%M:%S %Z"),
-            },
-        )
-
-        result.sort(key=lambda item: (item["prefix"], item["key"]))
-        return result
-
-    def parse(
-        self,
-        document_path: Path,
-        mime_type: str,
-        file_name=None,
-        mailrule_id: int | None = None,
-    ) -> None:
-        """
-        Parses the given .eml into formatted text, based on the decoded email.
-
-        """
-
-        def strip_text(text: str):
-            """
-            Reduces the spacing of the given text string
-            """
-            text = re.sub(r"\s+", " ", text)
-            text = re.sub(r"(\n *)+", "\n", text)
-            return text.strip()
-
-        def build_formatted_text(mail_message: MailMessage) -> str:
-            """
-            Constructs a formatted string, based on the given email.  Basically tries
-            to get most of the email content, included front matter, into a nice string
-            """
-            fmt_text = f"Subject: {mail_message.subject}\n\n"
-            fmt_text += f"From: {mail_message.from_values.full}\n\n"
-            to_list = [address.full for address in mail_message.to_values]
-            fmt_text += f"To: {', '.join(to_list)}\n\n"
-            if mail_message.cc_values:
-                fmt_text += (
-                    f"CC: {', '.join(address.full for address in mail.cc_values)}\n\n"
-                )
-            if mail_message.bcc_values:
-                fmt_text += (
-                    f"BCC: {', '.join(address.full for address in mail.bcc_values)}\n\n"
-                )
-            if mail_message.attachments:
-                att = []
-                for a in mail.attachments:
-                    attachment_size = naturalsize(a.size, binary=True, format="%.2f")
-                    att.append(
-                        f"{a.filename} ({attachment_size})",
-                    )
-                fmt_text += f"Attachments: {', '.join(att)}\n\n"
-
-            if mail.html:
-                fmt_text += "HTML content: " + strip_text(self.tika_parse(mail.html))
-
-            fmt_text += f"\n\n{strip_text(mail.text)}"
-
-            return fmt_text
-
-        self.log.debug(f"Parsing file {document_path.name} into an email")
-        mail = self.parse_file_to_message(document_path)
-
-        self.log.debug("Building formatted text from email")
-        self.text = build_formatted_text(mail)
-
-        if is_naive(mail.date):
-            self.date = make_aware(mail.date)
-        else:
-            self.date = mail.date
-
-        self.log.debug("Creating a PDF from the email")
-        if mailrule_id:
-            rule = MailRule.objects.get(pk=mailrule_id)
-            self.archive_path = self.generate_pdf(mail, rule.pdf_layout)
-        else:
-            self.archive_path = self.generate_pdf(mail)
-
-    @staticmethod
-    def parse_file_to_message(filepath: Path) -> MailMessage:
-        """
-        Parses the given .eml file into a MailMessage object
-        """
-        try:
-            with filepath.open("rb") as eml:
-                parsed = MailMessage.from_bytes(eml.read())
-                if parsed.from_values is None:
-                    raise ParseError(
-                        f"Could not parse {filepath}: Missing 'from'",
-                    )
-        except Exception as err:
-            raise ParseError(
-                f"Could not parse {filepath}: {err}",
-            ) from err
-
-        return parsed
-
-    def tika_parse(self, html: str):
-        self.log.info("Sending content to Tika server")
-
-        try:
-            with TikaClient(tika_url=settings.TIKA_ENDPOINT) as client:
-                parsed = client.tika.as_text.from_buffer(html, "text/html")
-
-                if parsed.content is not None:
-                    return parsed.content.strip()
-                return ""
-        except Exception as err:
-            raise ParseError(
-                f"Could not parse content with tika server at "
-                f"{settings.TIKA_ENDPOINT}: {err}",
-            ) from err
-
-    def generate_pdf(
-        self,
-        mail_message: MailMessage,
-        pdf_layout: MailRule.PdfLayout | None = None,
-    ) -> Path:
-        archive_path = Path(self.tempdir) / "merged.pdf"
-
-        mail_pdf_file = self.generate_pdf_from_mail(mail_message)
-
-        pdf_layout = (
-            pdf_layout or settings.EMAIL_PARSE_DEFAULT_LAYOUT
-        )  # EMAIL_PARSE_DEFAULT_LAYOUT is a MailRule.PdfLayout
-
-        # If no HTML content, create the PDF from the message
-        # Otherwise, create 2 PDFs and merge them with Gotenberg
-        if not mail_message.html:
-            archive_path.write_bytes(mail_pdf_file.read_bytes())
-        else:
-            pdf_of_html_content = self.generate_pdf_from_html(
-                mail_message.html,
-                mail_message.attachments,
-            )
-
-            self.log.debug("Merging email text and HTML content into single PDF")
-
-            with (
-                GotenbergClient(
-                    host=settings.TIKA_GOTENBERG_ENDPOINT,
-                    timeout=settings.CELERY_TASK_TIME_LIMIT,
-                ) as client,
-                client.merge.merge() as route,
-            ):
-                # Configure requested PDF/A formatting, if any
-                pdf_a_format = self._settings_to_gotenberg_pdfa()
-                if pdf_a_format is not None:
-                    route.pdf_format(pdf_a_format)
-
-                match pdf_layout:
-                    case MailRule.PdfLayout.HTML_TEXT:
-                        route.merge([pdf_of_html_content, mail_pdf_file])
-                    case MailRule.PdfLayout.HTML_ONLY:
-                        route.merge([pdf_of_html_content])
-                    case MailRule.PdfLayout.TEXT_ONLY:
-                        route.merge([mail_pdf_file])
-                    case MailRule.PdfLayout.TEXT_HTML | _:
-                        route.merge([mail_pdf_file, pdf_of_html_content])
-
-                try:
-                    response = route.run()
-                    archive_path.write_bytes(response.content)
-                except Exception as err:
-                    raise ParseError(
-                        f"Error while merging email HTML into PDF: {err}",
-                    ) from err
-
-        return archive_path
-
-    def mail_to_html(self, mail: MailMessage) -> Path:
-        """
-        Converts the given email into an HTML file, formatted
-        based on the given template
-        """
-
-        def clean_html(text: str) -> str:
-            """
-            Attempts to clean, escape and linkify the given HTML string
-            """
-            if isinstance(text, list):
-                text = "\n".join([str(e) for e in text])
-            if not isinstance(text, str):
-                text = str(text)
-            text = escape(text)
-            text = clean(text)
-            text = linkify(text, parse_email=True)
-            text = text.replace("\n", "<br>")
-            return text
-
-        data = {}
-
-        data["subject"] = clean_html(mail.subject)
-        if data["subject"]:
-            data["subject_label"] = "Subject"
-        data["from"] = clean_html(mail.from_values.full)
-        if data["from"]:
-            data["from_label"] = "From"
-        data["to"] = clean_html(", ".join(address.full for address in mail.to_values))
-        if data["to"]:
-            data["to_label"] = "To"
-        data["cc"] = clean_html(", ".join(address.full for address in mail.cc_values))
-        if data["cc"]:
-            data["cc_label"] = "CC"
-        data["bcc"] = clean_html(", ".join(address.full for address in mail.bcc_values))
-        if data["bcc"]:
-            data["bcc_label"] = "BCC"
-
-        att = []
-        for a in mail.attachments:
-            att.append(
-                f"{a.filename} ({naturalsize(a.size, binary=True, format='%.2f')})",
-            )
-        data["attachments"] = clean_html(", ".join(att))
-        if data["attachments"]:
-            data["attachments_label"] = "Attachments"
-
-        data["date"] = clean_html(
-            timezone.localtime(mail.date).strftime("%Y-%m-%d %H:%M"),
-        )
-        data["content"] = clean_html(mail.text.strip())
-
-        from django.template.loader import render_to_string
-
-        html_file = Path(self.tempdir) / "email_as_html.html"
-        html_file.write_text(render_to_string("email_msg_template.html", context=data))
-
-        return html_file
-
-    def generate_pdf_from_mail(self, mail: MailMessage) -> Path:
-        """
-        Creates a PDF based on the given email, using the email's values in a
-        an HTML template
-        """
-        self.log.info("Converting mail to PDF")
-
-        css_file = Path(__file__).parent / "templates" / "output.css"
-        email_html_file = self.mail_to_html(mail)
-
-        with (
-            GotenbergClient(
-                host=settings.TIKA_GOTENBERG_ENDPOINT,
-                timeout=settings.CELERY_TASK_TIME_LIMIT,
-            ) as client,
-            client.chromium.html_to_pdf() as route,
-        ):
-            # Configure requested PDF/A formatting, if any
-            pdf_a_format = self._settings_to_gotenberg_pdfa()
-            if pdf_a_format is not None:
-                route.pdf_format(pdf_a_format)
-
-            try:
-                response = (
-                    route.index(email_html_file)
-                    .resource(css_file)
-                    .margins(
-                        PageMarginsType(
-                            top=Measurement(0.1, MeasurementUnitType.Inches),
-                            bottom=Measurement(0.1, MeasurementUnitType.Inches),
-                            left=Measurement(0.1, MeasurementUnitType.Inches),
-                            right=Measurement(0.1, MeasurementUnitType.Inches),
-                        ),
-                    )
-                    .size(A4)
-                    .scale(1.0)
-                    .run()
-                )
-            except Exception as err:
-                raise ParseError(
-                    f"Error while converting email to PDF: {err}",
-                ) from err
-
-        email_as_pdf_file = Path(self.tempdir) / "email_as_pdf.pdf"
-        email_as_pdf_file.write_bytes(response.content)
-
-        return email_as_pdf_file
-
-    def generate_pdf_from_html(
-        self,
-        orig_html: str,
-        attachments: list[MailAttachment],
-    ) -> Path:
-        """
-        Generates a PDF file based on the HTML and attachments of the email
-        """
-
-        def clean_html_script(text: str):
-            compiled_open = re.compile(re.escape("<script"), re.IGNORECASE)
-            text = compiled_open.sub("<div hidden ", text)
-
-            compiled_close = re.compile(re.escape("</script"), re.IGNORECASE)
-            text = compiled_close.sub("</div", text)
-            return text
-
-        self.log.info("Converting message html to PDF")
-
-        tempdir = Path(self.tempdir)
-
-        html_clean = clean_html_script(orig_html)
-        html_clean_file = tempdir / "index.html"
-        html_clean_file.write_text(html_clean)
-
-        with (
-            GotenbergClient(
-                host=settings.TIKA_GOTENBERG_ENDPOINT,
-                timeout=settings.CELERY_TASK_TIME_LIMIT,
-            ) as client,
-            client.chromium.html_to_pdf() as route,
-        ):
-            # Configure requested PDF/A formatting, if any
-            pdf_a_format = self._settings_to_gotenberg_pdfa()
-            if pdf_a_format is not None:
-                route.pdf_format(pdf_a_format)
-
-            # Add attachments as resources, cleaning the filename and replacing
-            # it in the index file for inclusion
-            for attachment in attachments:
-                # Clean the attachment name to be valid
-                name_cid = f"cid:{attachment.content_id}"
-                name_clean = "".join(e for e in name_cid if e.isalnum())
-
-                # Write attachment payload to a temp file
-                temp_file = tempdir / name_clean
-                temp_file.write_bytes(attachment.payload)
-
-                route.resource(temp_file)
-
-                # Replace as needed the name with the clean name
-                html_clean = html_clean.replace(name_cid, name_clean)
-
-            # Now store the cleaned up HTML version
-            html_clean_file = tempdir / "index.html"
-            html_clean_file.write_text(html_clean)
-            # This is our index file, the main page basically
-            route.index(html_clean_file)
-
-            # Set page size, margins
-            route.margins(
-                PageMarginsType(
-                    top=Measurement(0.1, MeasurementUnitType.Inches),
-                    bottom=Measurement(0.1, MeasurementUnitType.Inches),
-                    left=Measurement(0.1, MeasurementUnitType.Inches),
-                    right=Measurement(0.1, MeasurementUnitType.Inches),
-                ),
-            ).size(A4).scale(1.0)
-
-            try:
-                response = route.run()
-
-            except Exception as err:
-                raise ParseError(
-                    f"Error while converting document to PDF: {err}",
-                ) from err
-
-        html_pdf = tempdir / "html.pdf"
-        html_pdf.write_bytes(response.content)
-        return html_pdf
-
-    def get_settings(self) -> None:
-        """
-        This parser does not implement additional settings yet
-        """
-        return None
diff --git a/src/paperless_mail/signals.py b/src/paperless_mail/signals.py
index 4a1d82df6..8fe046393 100644
--- a/src/paperless_mail/signals.py
+++ b/src/paperless_mail/signals.py
@@ -1,7 +1,12 @@
 def get_parser(*args, **kwargs):
-    from paperless_mail.parsers import MailDocumentParser
+    from paperless.parsers.mail import MailDocumentParser
 
-    return MailDocumentParser(*args, **kwargs)
+    # MailDocumentParser accepts no constructor args in the new-style protocol.
+    # Pop legacy args that arrive from the signal-based consumer path.
+    # Phase 4 will replace this signal path with the ParserRegistry.
+    kwargs.pop("logging_group", None)
+    kwargs.pop("progress_callback", None)
+    return MailDocumentParser()
 
 
 def mail_consumer_declaration(sender, **kwargs):
diff --git a/src/paperless_mail/tests/conftest.py b/src/paperless_mail/tests/conftest.py
index 0742edfa3..b662d46f3 100644
--- a/src/paperless_mail/tests/conftest.py
+++ b/src/paperless_mail/tests/conftest.py
@@ -1,71 +1,9 @@
 from collections.abc import Generator
-from pathlib import Path
 
 import pytest
 
 from paperless_mail.mail import MailAccountHandler
 from paperless_mail.models import MailAccount
-from paperless_mail.parsers import MailDocumentParser
-
-
-@pytest.fixture(scope="session")
-def sample_dir() -> Path:
-    return (Path(__file__).parent / Path("samples")).resolve()
-
-
-@pytest.fixture(scope="session")
-def broken_email_file(sample_dir: Path) -> Path:
-    return sample_dir / "broken.eml"
-
-
-@pytest.fixture(scope="session")
-def simple_txt_email_file(sample_dir: Path) -> Path:
-    return sample_dir / "simple_text.eml"
-
-
-@pytest.fixture(scope="session")
-def simple_txt_email_pdf_file(sample_dir: Path) -> Path:
-    return sample_dir / "simple_text.eml.pdf"
-
-
-@pytest.fixture(scope="session")
-def simple_txt_email_thumbnail_file(sample_dir: Path) -> Path:
-    return sample_dir / "simple_text.eml.pdf.webp"
-
-
-@pytest.fixture(scope="session")
-def html_email_file(sample_dir: Path) -> Path:
-    return sample_dir / "html.eml"
-
-
-@pytest.fixture(scope="session")
-def html_email_pdf_file(sample_dir: Path) -> Path:
-    return sample_dir / "html.eml.pdf"
-
-
-@pytest.fixture(scope="session")
-def html_email_thumbnail_file(sample_dir: Path) -> Path:
-    return sample_dir / "html.eml.pdf.webp"
-
-
-@pytest.fixture(scope="session")
-def html_email_html_file(sample_dir: Path) -> Path:
-    return sample_dir / "html.eml.html"
-
-
-@pytest.fixture(scope="session")
-def merged_pdf_first(sample_dir: Path) -> Path:
-    return sample_dir / "first.pdf"
-
-
-@pytest.fixture(scope="session")
-def merged_pdf_second(sample_dir: Path) -> Path:
-    return sample_dir / "second.pdf"
-
-
-@pytest.fixture()
-def mail_parser() -> MailDocumentParser:
-    return MailDocumentParser(logging_group=None)
 
 
 @pytest.fixture()
@@ -89,11 +27,3 @@ def greenmail_mail_account(db: None) -> Generator[MailAccount, None, None]:
 @pytest.fixture()
 def mail_account_handler() -> MailAccountHandler:
     return MailAccountHandler()
-
-
-@pytest.fixture(scope="session")
-def nginx_base_url() -> Generator[str, None, None]:
-    """
-    The base URL for the nginx HTTP server we expect to be alive
-    """
-    yield "http://localhost:8080"