Enhancement: auto-hide the search bar on mobile

2026-03-19 23:45:57 +00:00 · 2026-03-19 15:35:39 -07:00
39 changed files with 744 additions and 1288 deletions
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -256,7 +256,7 @@ lint.isort.force-single-line = true
 [tool.codespell]
 write-changes = true
 ignore-words-list = "criterias,afterall,valeu,ureue,equest,ure,assertIn,Oktober,commitish"
-skip = "src-ui/src/locale/*,src-ui/pnpm-lock.yaml,src-ui/e2e/*,src/paperless_mail/tests/samples/*,src/paperless/tests/samples/mail/*,src/documents/tests/samples/*,*.po,*.json"
+skip = "src-ui/src/locale/*,src-ui/pnpm-lock.yaml,src-ui/e2e/*,src/paperless_mail/tests/samples/*,src/documents/tests/samples/*,*.po,*.json"

 [tool.pytest]
 minversion = "9.0"
--- a/src-ui/src/app/components/app-frame/app-frame.component.html
+++ b/src-ui/src/app/components/app-frame/app-frame.component.html
@@ -1,7 +1,7 @@
 <nav class="navbar navbar-dark fixed-top bg-primary flex-md-nowrap p-0 shadow-sm">
  <button class="navbar-toggler d-md-none collapsed border-0" type="button" data-toggle="collapse"
    data-target="#sidebarMenu" aria-controls="sidebarMenu" aria-expanded="false" aria-label="Toggle navigation"
-    (click)="isMenuCollapsed = !isMenuCollapsed">
+    (click)="mobileSearchHidden = false; isMenuCollapsed = !isMenuCollapsed">
    <span class="navbar-toggler-icon"></span>
  </button>
  <a class="navbar-brand d-flex align-items-center me-0 px-3 py-3 order-sm-0"
@@ -24,7 +24,8 @@
      }
    </div>
  </a>
-  <div class="search-container flex-grow-1 py-2 pb-3 pb-sm-2 px-3 ps-md-4 me-sm-auto order-3 order-sm-1">
+  <div class="search-container flex-grow-1 py-2 pb-3 pb-sm-2 px-3 ps-md-4 me-sm-auto order-3 order-sm-1"
+    [class.mobile-hidden]="mobileSearchHidden">
    <div class="col-12 col-md-7">
      <pngx-global-search></pngx-global-search>
    </div>
@@ -378,7 +379,7 @@
      </div>
    </nav>

-    <main role="main" class="ms-sm-auto px-md-4"
+    <main role="main" class="ms-sm-auto px-md-4" [class.mobile-search-hidden]="mobileSearchHidden"
      [ngClass]="slimSidebarEnabled ? 'col-slim' : 'col-md-9 col-lg-10 col-xxxl-11'">
      <router-outlet></router-outlet>
    </main>
--- a/src-ui/src/app/components/app-frame/app-frame.component.scss
+++ b/src-ui/src/app/components/app-frame/app-frame.component.scss
@@ -44,6 +44,23 @@
  .sidebar {
    top: 3.5rem;
  }
+
+  .search-container {
+    max-height: 4.5rem;
+    overflow: hidden;
+    transition: max-height .2s ease, opacity .2s ease, padding-top .2s ease, padding-bottom .2s ease;
+
+    &.mobile-hidden {
+      max-height: 0;
+      opacity: 0;
+      padding-top: 0 !important;
+      padding-bottom: 0 !important;
+    }
+  }
+
+  main.mobile-search-hidden {
+    padding-top: 56px;
+  }
 }

 main {
--- a/src-ui/src/app/components/app-frame/app-frame.component.spec.ts
+++ b/src-ui/src/app/components/app-frame/app-frame.component.spec.ts
@@ -293,6 +293,58 @@ describe('AppFrameComponent', () => {
    expect(component.isMenuCollapsed).toBeTruthy()
  })

+  it('should hide mobile search when scrolling down and show it when scrolling up', () => {
+    Object.defineProperty(window, 'innerWidth', {
+      configurable: true,
+      value: 767,
+    })
+
+    component.ngOnInit()
+
+    Object.defineProperty(window, 'scrollY', {
+      configurable: true,
+      value: 40,
+    })
+    component.onWindowScroll()
+    expect(component.mobileSearchHidden).toBe(true)
+
+    Object.defineProperty(window, 'scrollY', {
+      configurable: true,
+      value: 0,
+    })
+    component.onWindowScroll()
+    expect(component.mobileSearchHidden).toBe(false)
+  })
+
+  it('should keep mobile search visible on desktop scroll', () => {
+    Object.defineProperty(window, 'innerWidth', {
+      configurable: true,
+      value: 768,
+    })
+    component.mobileSearchHidden = true
+
+    component.onWindowScroll()
+
+    expect(component.mobileSearchHidden).toBe(false)
+  })
+
+  it('should keep mobile search visible while the mobile menu is expanded', () => {
+    Object.defineProperty(window, 'innerWidth', {
+      configurable: true,
+      value: 767,
+    })
+    component.ngOnInit()
+    component.isMenuCollapsed = false
+
+    Object.defineProperty(window, 'scrollY', {
+      configurable: true,
+      value: 40,
+    })
+    component.onWindowScroll()
+
+    expect(component.mobileSearchHidden).toBe(false)
+  })
+
  it('should support close document & navigate on close current doc', () => {
    const closeSpy = jest.spyOn(openDocumentsService, 'closeDocument')
    closeSpy.mockReturnValue(of(true))
--- a/src-ui/src/app/components/app-frame/app-frame.component.ts
+++ b/src-ui/src/app/components/app-frame/app-frame.component.ts
@@ -94,6 +94,14 @@ export class AppFrameComponent

  slimSidebarAnimating: boolean = false

+  mobileSearchHidden: boolean = false
+
+  private lastScrollY: number = 0
+
+  private readonly mobileBreakpoint = 768
+
+  private readonly mobileSearchHideThreshold = 16
+
  constructor() {
    super()
    const permissionsService = this.permissionsService
@@ -111,6 +119,8 @@ export class AppFrameComponent
  }

  ngOnInit(): void {
+    this.lastScrollY = window.scrollY
+
    if (this.settingsService.get(SETTINGS_KEYS.UPDATE_CHECKING_ENABLED)) {
      this.checkForUpdates()
    }
@@ -263,6 +273,37 @@ export class AppFrameComponent
    return this.settingsService.get(SETTINGS_KEYS.AI_ENABLED)
  }

+  @HostListener('window:resize')
+  onWindowResize(): void {
+    if (!this.isMobileViewport()) {
+      this.mobileSearchHidden = false
+    }
+  }
+
+  @HostListener('window:scroll')
+  onWindowScroll(): void {
+    const currentScrollY = window.scrollY
+
+    if (!this.isMobileViewport() || this.isMenuCollapsed === false) {
+      this.mobileSearchHidden = false
+      this.lastScrollY = currentScrollY
+      return
+    }
+
+    const delta = currentScrollY - this.lastScrollY
+
+    if (currentScrollY <= 0 || delta < -this.mobileSearchHideThreshold) {
+      this.mobileSearchHidden = false
+    } else if (
+      currentScrollY > this.mobileSearchHideThreshold &&
+      delta > this.mobileSearchHideThreshold
+    ) {
+      this.mobileSearchHidden = true
+    }
+
+    this.lastScrollY = currentScrollY
+  }
+
  closeMenu() {
    this.isMenuCollapsed = true
  }
@@ -384,4 +425,8 @@ export class AppFrameComponent
      !this.settingsService.organizingSidebarSavedViews
    )
  }
+
+  private isMobileViewport(): boolean {
+    return window.innerWidth < this.mobileBreakpoint
+  }
 }
--- a/src-ui/src/app/components/document-list/document-list.component.scss
+++ b/src-ui/src/app/components/document-list/document-list.component.scss
@@ -56,13 +56,20 @@ $paperless-card-breakpoints: (

 .sticky-top {
  z-index: 990; // below main navbar
-  top: calc(7rem - 2px); // height of navbar (mobile)
+  top: calc(7rem - 2px); // height of navbar + search row (mobile)
+  transition: top 0.2s ease;

  @media (min-width: 580px) {
    top: 3.5rem; // height of navbar
  }
 }

+@media (max-width: 579.98px) {
+  :host-context(main.mobile-search-hidden) .sticky-top {
+    top: calc(3.5rem - 2px);
+  }
+}
+
 .table .form-check {
  padding: 0.2rem;
  min-height: 0;
--- a/src/documents/consumer.py
+++ b/src/documents/consumer.py
@@ -51,11 +51,10 @@ from documents.templating.workflows import parse_w_workflow_placeholders
 from documents.utils import copy_basic_file_stats
 from documents.utils import copy_file_with_basic_stats
 from documents.utils import run_subprocess
-from paperless.parsers import ParserContext
-from paperless.parsers.mail import MailDocumentParser
 from paperless.parsers.remote import RemoteDocumentParser
 from paperless.parsers.text import TextDocumentParser
 from paperless.parsers.tika import TikaDocumentParser
+from paperless_mail.parsers import MailDocumentParser

 LOGGING_NAME: Final[str] = "paperless.consumer"

@@ -72,12 +71,7 @@ def _parser_cleanup(parser: DocumentParser) -> None:
    """
    if isinstance(
        parser,
-        (
-            MailDocumentParser,
-            RemoteDocumentParser,
-            TextDocumentParser,
-            TikaDocumentParser,
-        ),
+        (TextDocumentParser, RemoteDocumentParser, TikaDocumentParser),
    ):
        parser.__exit__(None, None, None)
    else:
@@ -459,20 +453,13 @@ class ConsumerPlugin(
            progress_callback=progress_callback,
        )

-        parser_is_new_style = isinstance(
-            document_parser,
-            (
-                MailDocumentParser,
-                RemoteDocumentParser,
-                TextDocumentParser,
-                TikaDocumentParser,
-            ),
-        )
-
        # New-style parsers use __enter__/__exit__ for resource management.
        # _parser_cleanup (below) handles __exit__; call __enter__ here.
        # TODO(stumpylog): Remove me in the future
-        if parser_is_new_style:
+        if isinstance(
+            document_parser,
+            (TextDocumentParser, RemoteDocumentParser, TikaDocumentParser),
+        ):
            document_parser.__enter__()

        self.log.debug(f"Parser: {type(document_parser).__name__}")
@@ -493,12 +480,20 @@ class ConsumerPlugin(
                ConsumerStatusShortMessage.PARSING_DOCUMENT,
            )
            self.log.debug(f"Parsing {self.filename}...")
-
-            # TODO(stumpylog): Remove me in the future when all parsers use new protocol
-            if parser_is_new_style:
-                document_parser.configure(
-                    ParserContext(mailrule_id=self.input_doc.mailrule_id),
+            if (
+                isinstance(document_parser, MailDocumentParser)
+                and self.input_doc.mailrule_id
+            ):
+                document_parser.parse(
+                    self.working_copy,
+                    mime_type,
+                    self.filename,
+                    self.input_doc.mailrule_id,
                )
+            elif isinstance(
+                document_parser,
+                (TextDocumentParser, RemoteDocumentParser, TikaDocumentParser),
+            ):
                # TODO(stumpylog): Remove me in the future
                document_parser.parse(self.working_copy, mime_type)
            else:
@@ -511,8 +506,11 @@ class ConsumerPlugin(
                ProgressStatusOptions.WORKING,
                ConsumerStatusShortMessage.GENERATING_THUMBNAIL,
            )
-            # TODO(stumpylog): Remove me in the future when all parsers use new protocol
-            if parser_is_new_style:
+            if isinstance(
+                document_parser,
+                (TextDocumentParser, RemoteDocumentParser, TikaDocumentParser),
+            ):
+                # TODO(stumpylog): Remove me in the future
                thumbnail = document_parser.get_thumbnail(self.working_copy, mime_type)
            else:
                thumbnail = document_parser.get_thumbnail(
--- a/src/documents/tasks.py
+++ b/src/documents/tasks.py
@@ -65,11 +65,6 @@ from documents.signals.handlers import run_workflows
 from documents.signals.handlers import send_websocket_document_updated
 from documents.workflows.utils import get_workflows_for_trigger
 from paperless.config import AIConfig
-from paperless.parsers import ParserContext
-from paperless.parsers.mail import MailDocumentParser
-from paperless.parsers.remote import RemoteDocumentParser
-from paperless.parsers.text import TextDocumentParser
-from paperless.parsers.tika import TikaDocumentParser
 from paperless_ai.indexing import llm_index_add_or_update_document
 from paperless_ai.indexing import llm_index_remove_document
 from paperless_ai.indexing import update_llm_index
@@ -309,9 +304,7 @@ def update_document_content_maybe_archive_file(document_id) -> None:

    mime_type = document.mime_type

-    parser_class: type[DocumentParser] | None = get_parser_class_for_mime_type(
-        mime_type,
-    )
+    parser_class: type[DocumentParser] = get_parser_class_for_mime_type(mime_type)

    if not parser_class:
        logger.error(
@@ -322,41 +315,14 @@ def update_document_content_maybe_archive_file(document_id) -> None:

    parser: DocumentParser = parser_class(logging_group=uuid.uuid4())

-    parser_is_new_style = isinstance(
-        parser,
-        (
-            MailDocumentParser,
-            RemoteDocumentParser,
-            TextDocumentParser,
-            TikaDocumentParser,
-        ),
-    )
-
-    # TODO(stumpylog): Remove branch in the future when all parsers use new protocol
-    if parser_is_new_style:
-        parser.__enter__()
-
    try:
-        # TODO(stumpylog): Remove branch in the future when all parsers use new protocol
-        if parser_is_new_style:
-            parser.configure(ParserContext())
-            parser.parse(document.source_path, mime_type)
-        else:
-            parser.parse(
-                document.source_path,
-                mime_type,
-                document.get_public_filename(),
-            )
+        parser.parse(document.source_path, mime_type, document.get_public_filename())

-        # TODO(stumpylog): Remove branch in the future when all parsers use new protocol
-        if parser_is_new_style:
-            thumbnail = parser.get_thumbnail(document.source_path, mime_type)
-        else:
-            thumbnail = parser.get_thumbnail(
-                document.source_path,
-                mime_type,
-                document.get_public_filename(),
-            )
+        thumbnail = parser.get_thumbnail(
+            document.source_path,
+            mime_type,
+            document.get_public_filename(),
+        )

        with transaction.atomic():
            oldDocument = Document.objects.get(pk=document.pk)
@@ -437,14 +403,8 @@ def update_document_content_maybe_archive_file(document_id) -> None:
            f"Error while parsing document {document} (ID: {document_id})",
        )
    finally:
-        # TODO(stumpylog): Remove branch in the future when all parsers use new protocol
-        if isinstance(
-            parser,
-            (MailDocumentParser, TextDocumentParser, TikaDocumentParser),
-        ):
-            parser.__exit__(None, None, None)
-        else:
-            parser.cleanup()
+        # TODO(stumpylog): Cleanup once all parsers are handled
+        parser.cleanup()


@shared_task
--- a/src/documents/tests/test_consumer.py
+++ b/src/documents/tests/test_consumer.py
@@ -36,6 +36,7 @@ from documents.tests.utils import DummyProgressManager
 from documents.tests.utils import FileSystemAssertsMixin
 from documents.tests.utils import GetConsumerMixin
 from paperless_mail.models import MailRule
+from paperless_mail.parsers import MailDocumentParser


 class _BaseTestParser(DocumentParser):
@@ -1090,7 +1091,7 @@ class TestConsumer(
            self.assertEqual(command[1], "--replace-input")

    @mock.patch("paperless_mail.models.MailRule.objects.get")
-    @mock.patch("paperless.parsers.mail.MailDocumentParser.parse")
+    @mock.patch("paperless_mail.parsers.MailDocumentParser.parse")
    @mock.patch("documents.parsers.document_consumer_declaration.send")
    def test_mail_parser_receives_mailrule(
        self,
@@ -1106,13 +1107,11 @@ class TestConsumer(
        THEN:
            - The mail parser should receive the mail rule
        """
-        from paperless_mail.signals import get_parser as mail_get_parser
-
        mock_consumer_declaration_send.return_value = [
            (
                None,
                {
-                    "parser": mail_get_parser,
+                    "parser": MailDocumentParser,
                    "mime_types": {"message/rfc822": ".eml"},
                    "weight": 0,
                },
@@ -1124,10 +1123,9 @@ class TestConsumer(
        with self.get_consumer(
            filepath=(
                Path(__file__).parent.parent.parent
-                / Path("paperless")
+                / Path("paperless_mail")
                / Path("tests")
                / Path("samples")
-                / Path("mail")
            ).resolve()
            / "html.eml",
            source=DocumentSource.MailFetch,
@@ -1138,10 +1136,12 @@ class TestConsumer(
                ConsumerError,
            ):
                consumer.run()
-            mock_mail_parser_parse.assert_called_once_with(
-                consumer.working_copy,
-                "message/rfc822",
-            )
+                mock_mail_parser_parse.assert_called_once_with(
+                    consumer.working_copy,
+                    "message/rfc822",
+                    file_name="sample.pdf",
+                    mailrule=mock_mailrule_get.return_value,
+                )


@mock.patch("documents.consumer.magic.from_file", fake_magic_from_file)
--- a/src/paperless/parsers/init.py
+++ b/src/paperless/parsers/init.py
@@ -35,7 +35,6 @@ Usage example (third-party parser)::

 from __future__ import annotations

-from dataclasses import dataclass
 from typing import TYPE_CHECKING
 from typing import Protocol
 from typing import Self
@@ -49,7 +48,6 @@ if TYPE_CHECKING:

 __all__ = [
    "MetadataEntry",
-    "ParserContext",
    "ParserProtocol",
 ]

@@ -75,44 +73,6 @@ class MetadataEntry(TypedDict):
    """String representation of the field value."""


-@dataclass(frozen=True, slots=True)
-class ParserContext:
-    """Immutable context passed to a parser before parse().
-
-    The consumer assembles this from the ingestion event and Django
-    settings, then calls ``parser.configure(context)`` before
-    ``parser.parse()``.  Parsers read only the fields relevant to them;
-    unneeded fields are ignored.
-
-    ``frozen=True`` prevents accidental mutation after the consumer
-    hands the context off.  ``slots=True`` keeps instances lightweight.
-
-    Fields
-    ------
-    mailrule_id : int | None
-        Primary key of the ``MailRule`` that triggered this ingestion,
-        or ``None`` when the document did not arrive via a mail rule.
-        Used by ``MailDocumentParser`` to select the PDF layout.
-
-    Notes
-    -----
-    Future fields (not yet implemented):
-
-    * ``output_type`` — PDF/A variant for archive generation
-      (replaces ``settings.OCR_OUTPUT_TYPE`` reads inside parsers).
-    * ``ocr_mode`` — skip-text, redo, force, etc.
-      (replaces ``settings.OCR_MODE`` reads inside parsers).
-    * ``ocr_language`` — Tesseract language string.
-      (replaces ``settings.OCR_LANGUAGE`` reads inside parsers).
-
-    When those fields are added the consumer will read from Django
-    settings once and populate them here, decoupling parsers from
-    ``settings.*`` entirely.
-    """
-
-    mailrule_id: int | None = None
-
-
@runtime_checkable
 class ParserProtocol(Protocol):
    """Structural contract for all Paperless-ngx document parsers.
@@ -231,21 +191,6 @@ class ParserProtocol(Protocol):
    # Core parsing interface
    # ------------------------------------------------------------------

-    def configure(self, context: ParserContext) -> None:
-        """Apply source context before parse().
-
-        Called by the consumer after instantiation and before parse().
-        The default implementation is a no-op; parsers override only the
-        fields they need.
-
-        Parameters
-        ----------
-        context:
-            Immutable context assembled by the consumer for this
-            specific ingestion event.
-        """
-        ...
-
    def parse(
        self,
        document_path: Path,
--- a/src/paperless/parsers/mail.py
+++ b/src/paperless/parsers/mail.py
@@ -1,834 +0,0 @@
-"""
-Built-in mail document parser.
-
-Handles message/rfc822 (EML) MIME type by:
- Parsing the email using imap_tools
- Generating a PDF via Gotenberg (for display and archive)
- Extracting text via Tika for HTML content
- Extracting metadata from email headers
-
-The parser always produces a PDF because EML files cannot be rendered
-natively in a browser (requires_pdf_rendition=True).
-"""
-
-from __future__ import annotations
-
-import logging
-import re
-import shutil
-import tempfile
-from html import escape
-from pathlib import Path
-from typing import TYPE_CHECKING
-from typing import Self
-
-from bleach import clean
-from bleach import linkify
-from django.conf import settings
-from django.utils import timezone
-from django.utils.timezone import is_naive
-from django.utils.timezone import make_aware
-from gotenberg_client import GotenbergClient
-from gotenberg_client.constants import A4
-from gotenberg_client.options import Measurement
-from gotenberg_client.options import MeasurementUnitType
-from gotenberg_client.options import PageMarginsType
-from gotenberg_client.options import PdfAFormat
-from humanize import naturalsize
-from imap_tools import MailAttachment
-from imap_tools import MailMessage
-from tika_client import TikaClient
-
-from documents.parsers import ParseError
-from documents.parsers import make_thumbnail_from_pdf
-from paperless.models import OutputTypeChoices
-from paperless.version import __full_version_str__
-from paperless_mail.models import MailRule
-
-if TYPE_CHECKING:
-    import datetime
-    from types import TracebackType
-
-    from paperless.parsers import MetadataEntry
-    from paperless.parsers import ParserContext
-
-logger = logging.getLogger("paperless.parsing.mail")
-
-_SUPPORTED_MIME_TYPES: dict[str, str] = {
-    "message/rfc822": ".eml",
-}
-
-
-class MailDocumentParser:
-    """Parse .eml email files for Paperless-ngx.
-
-    Uses imap_tools to parse .eml files, generates a PDF using Gotenberg,
-    and sends the HTML part to a Tika server for text extraction.  Because
-    EML files cannot be rendered natively in a browser, the parser always
-    produces a PDF rendition (requires_pdf_rendition=True).
-
-    Pass a ``ParserContext`` to ``configure()`` before ``parse()`` to
-    apply mail-rule-specific PDF layout options:
-
-        parser.configure(ParserContext(mailrule_id=rule.pk))
-        parser.parse(path, mime_type)
-
-    Class attributes
-    ----------------
-    name : str
-        Human-readable parser name.
-    version : str
-        Semantic version string, kept in sync with Paperless-ngx releases.
-    author : str
-        Maintainer name.
-    url : str
-        Issue tracker / source URL.
-    """
-
-    name: str = "Paperless-ngx Mail Parser"
-    version: str = __full_version_str__
-    author: str = "Paperless-ngx Contributors"
-    url: str = "https://github.com/paperless-ngx/paperless-ngx"
-
-    # ------------------------------------------------------------------
-    # Class methods
-    # ------------------------------------------------------------------
-
-    @classmethod
-    def supported_mime_types(cls) -> dict[str, str]:
-        """Return the MIME types this parser handles.
-
-        Returns
-        -------
-        dict[str, str]
-            Mapping of MIME type to preferred file extension.
-        """
-        return _SUPPORTED_MIME_TYPES
-
-    @classmethod
-    def score(
-        cls,
-        mime_type: str,
-        filename: str,
-        path: Path | None = None,
-    ) -> int | None:
-        """Return the priority score for handling this file.
-
-        Parameters
-        ----------
-        mime_type:
-            Detected MIME type of the file.
-        filename:
-            Original filename including extension.
-        path:
-            Optional filesystem path. Not inspected by this parser.
-
-        Returns
-        -------
-        int | None
-            10 if the MIME type is supported, otherwise None.
-        """
-        if mime_type in _SUPPORTED_MIME_TYPES:
-            return 10
-        return None
-
-    # ------------------------------------------------------------------
-    # Properties
-    # ------------------------------------------------------------------
-
-    @property
-    def can_produce_archive(self) -> bool:
-        """Whether this parser can produce a searchable PDF archive copy.
-
-        Returns
-        -------
-        bool
-            Always False — the mail parser produces a display PDF
-            (requires_pdf_rendition=True), not an optional OCR archive.
-        """
-        return False
-
-    @property
-    def requires_pdf_rendition(self) -> bool:
-        """Whether the parser must produce a PDF for the frontend to display.
-
-        Returns
-        -------
-        bool
-            Always True — EML files cannot be rendered natively in a browser,
-            so a PDF conversion is always required for display.
-        """
-        return True
-
-    # ------------------------------------------------------------------
-    # Lifecycle
-    # ------------------------------------------------------------------
-
-    def __init__(self, logging_group: object = None) -> None:
-        settings.SCRATCH_DIR.mkdir(parents=True, exist_ok=True)
-        self._tempdir = Path(
-            tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR),
-        )
-        self._text: str | None = None
-        self._date: datetime.datetime | None = None
-        self._archive_path: Path | None = None
-        self._mailrule_id: int | None = None
-
-    def __enter__(self) -> Self:
-        return self
-
-    def __exit__(
-        self,
-        exc_type: type[BaseException] | None,
-        exc_val: BaseException | None,
-        exc_tb: TracebackType | None,
-    ) -> None:
-        logger.debug("Cleaning up temporary directory %s", self._tempdir)
-        shutil.rmtree(self._tempdir, ignore_errors=True)
-
-    # ------------------------------------------------------------------
-    # Core parsing interface
-    # ------------------------------------------------------------------
-
-    def configure(self, context: ParserContext) -> None:
-        self._mailrule_id = context.mailrule_id
-
-    def parse(
-        self,
-        document_path: Path,
-        mime_type: str,
-        *,
-        produce_archive: bool = True,
-    ) -> None:
-        """Parse the given .eml into formatted text and a PDF archive.
-
-        Call ``configure(ParserContext(mailrule_id=...))`` before this method
-        to apply mail-rule-specific PDF layout options.  The ``produce_archive``
-        flag is accepted for protocol compatibility but is always honoured —
-        the mail parser always produces a PDF since EML files cannot be
-        displayed natively.
-
-        Parameters
-        ----------
-        document_path:
-            Absolute path to the .eml file.
-        mime_type:
-            Detected MIME type of the document (should be "message/rfc822").
-        produce_archive:
-            Accepted for protocol compatibility. The PDF rendition is always
-            produced since EML files cannot be displayed natively in a browser.
-
-        Raises
-        ------
-        documents.parsers.ParseError
-            If the file cannot be parsed or PDF generation fails.
-        """
-
-        def strip_text(text: str) -> str:
-            """Reduces the spacing of the given text string."""
-            text = re.sub(r"\s+", " ", text)
-            text = re.sub(r"(\n *)+", "\n", text)
-            return text.strip()
-
-        def build_formatted_text(mail_message: MailMessage) -> str:
-            """Constructs a formatted string based on the given email."""
-            fmt_text = f"Subject: {mail_message.subject}\n\n"
-            fmt_text += f"From: {mail_message.from_values.full if mail_message.from_values else ''}\n\n"
-            to_list = [address.full for address in mail_message.to_values]
-            fmt_text += f"To: {', '.join(to_list)}\n\n"
-            if mail_message.cc_values:
-                fmt_text += (
-                    f"CC: {', '.join(address.full for address in mail.cc_values)}\n\n"
-                )
-            if mail_message.bcc_values:
-                fmt_text += (
-                    f"BCC: {', '.join(address.full for address in mail.bcc_values)}\n\n"
-                )
-            if mail_message.attachments:
-                att = []
-                for a in mail.attachments:
-                    attachment_size = naturalsize(a.size, binary=True, format="%.2f")
-                    att.append(
-                        f"{a.filename} ({attachment_size})",
-                    )
-                fmt_text += f"Attachments: {', '.join(att)}\n\n"
-
-            if mail.html:
-                fmt_text += "HTML content: " + strip_text(self.tika_parse(mail.html))
-
-            fmt_text += f"\n\n{strip_text(mail.text)}"
-
-            return fmt_text
-
-        logger.debug("Parsing file %s into an email", document_path.name)
-        mail = self.parse_file_to_message(document_path)
-
-        logger.debug("Building formatted text from email")
-        self._text = build_formatted_text(mail)
-
-        if is_naive(mail.date):
-            self._date = make_aware(mail.date)
-        else:
-            self._date = mail.date
-
-        logger.debug("Creating a PDF from the email")
-        if self._mailrule_id:
-            rule = MailRule.objects.get(pk=self._mailrule_id)
-            self._archive_path = self.generate_pdf(
-                mail,
-                MailRule.PdfLayout(rule.pdf_layout),
-            )
-        else:
-            self._archive_path = self.generate_pdf(mail)
-
-    # ------------------------------------------------------------------
-    # Result accessors
-    # ------------------------------------------------------------------
-
-    def get_text(self) -> str | None:
-        """Return the plain-text content extracted during parse.
-
-        Returns
-        -------
-        str | None
-            Extracted text, or None if parse has not been called yet.
-        """
-        return self._text
-
-    def get_date(self) -> datetime.datetime | None:
-        """Return the document date detected during parse.
-
-        Returns
-        -------
-        datetime.datetime | None
-            Date from the email headers, or None if not detected.
-        """
-        return self._date
-
-    def get_archive_path(self) -> Path | None:
-        """Return the path to the generated archive PDF, or None.
-
-        Returns
-        -------
-        Path | None
-            Path to the PDF produced by Gotenberg, or None if parse has not
-            been called yet.
-        """
-        return self._archive_path
-
-    # ------------------------------------------------------------------
-    # Thumbnail and metadata
-    # ------------------------------------------------------------------
-
-    def get_thumbnail(
-        self,
-        document_path: Path,
-        mime_type: str,
-        file_name: str | None = None,
-    ) -> Path:
-        """Generate a thumbnail from the PDF rendition of the email.
-
-        Converts the document to PDF first if not already done.
-
-        Parameters
-        ----------
-        document_path:
-            Absolute path to the source document.
-        mime_type:
-            Detected MIME type of the document.
-        file_name:
-            Kept for backward compatibility; not used.
-
-        Returns
-        -------
-        Path
-            Path to the generated WebP thumbnail inside the temporary directory.
-        """
-        if not self._archive_path:
-            self._archive_path = self.generate_pdf(
-                self.parse_file_to_message(document_path),
-            )
-
-        return make_thumbnail_from_pdf(
-            self._archive_path,
-            self._tempdir,
-        )
-
-    def get_page_count(
-        self,
-        document_path: Path,
-        mime_type: str,
-    ) -> int | None:
-        """Return the number of pages in the document.
-
-        Counts pages in the archive PDF produced by a preceding parse()
-        call.  Returns ``None`` if parse() has not been called yet or if
-        no archive was produced.
-
-        Returns
-        -------
-        int | None
-            Page count of the archive PDF, or ``None``.
-        """
-        if self._archive_path is not None:
-            from paperless.parsers.utils import get_page_count_for_pdf
-
-            return get_page_count_for_pdf(self._archive_path, log=logger)
-        return None
-
-    def extract_metadata(
-        self,
-        document_path: Path,
-        mime_type: str,
-    ) -> list[MetadataEntry]:
-        """Extract metadata from the email headers.
-
-        Returns email headers as metadata entries with prefix "header",
-        plus summary entries for attachments and date.
-
-        Returns
-        -------
-        list[MetadataEntry]
-            Sorted list of metadata entries, or ``[]`` on parse failure.
-        """
-        result: list[MetadataEntry] = []
-
-        try:
-            mail = self.parse_file_to_message(document_path)
-        except ParseError as e:
-            logger.warning(
-                "Error while fetching document metadata for %s: %s",
-                document_path,
-                e,
-            )
-            return result
-
-        for key, header_values in mail.headers.items():
-            value = ", ".join(header_values)
-            try:
-                value.encode("utf-8")
-            except UnicodeEncodeError as e:  # pragma: no cover
-                logger.debug("Skipping header %s: %s", key, e)
-                continue
-
-            result.append(
-                {
-                    "namespace": "",
-                    "prefix": "header",
-                    "key": key,
-                    "value": value,
-                },
-            )
-
-        result.append(
-            {
-                "namespace": "",
-                "prefix": "",
-                "key": "attachments",
-                "value": ", ".join(
-                    f"{attachment.filename}"
-                    f"({naturalsize(attachment.size, binary=True, format='%.2f')})"
-                    for attachment in mail.attachments
-                ),
-            },
-        )
-
-        result.append(
-            {
-                "namespace": "",
-                "prefix": "",
-                "key": "date",
-                "value": mail.date.strftime("%Y-%m-%d %H:%M:%S %Z"),
-            },
-        )
-
-        result.sort(key=lambda item: (item["prefix"], item["key"]))
-        return result
-
-    # ------------------------------------------------------------------
-    # Email-specific methods
-    # ------------------------------------------------------------------
-
-    def _settings_to_gotenberg_pdfa(self) -> PdfAFormat | None:
-        """Convert the OCR output type setting to a Gotenberg PdfAFormat."""
-        if settings.OCR_OUTPUT_TYPE in {
-            OutputTypeChoices.PDF_A,
-            OutputTypeChoices.PDF_A2,
-        }:
-            return PdfAFormat.A2b
-        elif settings.OCR_OUTPUT_TYPE == OutputTypeChoices.PDF_A1:  # pragma: no cover
-            logger.warning(
-                "Gotenberg does not support PDF/A-1a, choosing PDF/A-2b instead",
-            )
-            return PdfAFormat.A2b
-        elif settings.OCR_OUTPUT_TYPE == OutputTypeChoices.PDF_A3:  # pragma: no cover
-            return PdfAFormat.A3b
-        return None
-
-    @staticmethod
-    def parse_file_to_message(filepath: Path) -> MailMessage:
-        """Parse the given .eml file into a MailMessage object.
-
-        Parameters
-        ----------
-        filepath:
-            Path to the .eml file.
-
-        Returns
-        -------
-        MailMessage
-            Parsed mail message.
-
-        Raises
-        ------
-        documents.parsers.ParseError
-            If the file cannot be parsed or is missing required fields.
-        """
-        try:
-            with filepath.open("rb") as eml:
-                parsed = MailMessage.from_bytes(eml.read())
-                if parsed.from_values is None:
-                    raise ParseError(
-                        f"Could not parse {filepath}: Missing 'from'",
-                    )
-        except Exception as err:
-            raise ParseError(
-                f"Could not parse {filepath}: {err}",
-            ) from err
-
-        return parsed
-
-    def tika_parse(self, html: str) -> str:
-        """Send HTML content to the Tika server for text extraction.
-
-        Parameters
-        ----------
-        html:
-            HTML string to parse.
-
-        Returns
-        -------
-        str
-            Extracted plain text.
-
-        Raises
-        ------
-        documents.parsers.ParseError
-            If the Tika server cannot be reached or returns an error.
-        """
-        logger.info("Sending content to Tika server")
-
-        try:
-            with TikaClient(tika_url=settings.TIKA_ENDPOINT) as client:
-                parsed = client.tika.as_text.from_buffer(html, "text/html")
-
-                if parsed.content is not None:
-                    return parsed.content.strip()
-                return ""
-        except Exception as err:
-            raise ParseError(
-                f"Could not parse content with tika server at "
-                f"{settings.TIKA_ENDPOINT}: {err}",
-            ) from err
-
-    def generate_pdf(
-        self,
-        mail_message: MailMessage,
-        pdf_layout: MailRule.PdfLayout | None = None,
-    ) -> Path:
-        """Generate a PDF from the email message.
-
-        Creates separate PDFs for the email body and HTML content, then
-        merges them according to the requested layout.
-
-        Parameters
-        ----------
-        mail_message:
-            Parsed email message.
-        pdf_layout:
-            Layout option for the PDF. Falls back to the
-            EMAIL_PARSE_DEFAULT_LAYOUT setting if not provided.
-
-        Returns
-        -------
-        Path
-            Path to the generated PDF inside the temporary directory.
-        """
-        archive_path = Path(self._tempdir) / "merged.pdf"
-
-        mail_pdf_file = self.generate_pdf_from_mail(mail_message)
-
-        if pdf_layout is None:
-            pdf_layout = MailRule.PdfLayout(settings.EMAIL_PARSE_DEFAULT_LAYOUT)
-
-        # If no HTML content, create the PDF from the message.
-        # Otherwise, create 2 PDFs and merge them with Gotenberg.
-        if not mail_message.html:
-            archive_path.write_bytes(mail_pdf_file.read_bytes())
-        else:
-            pdf_of_html_content = self.generate_pdf_from_html(
-                mail_message.html,
-                mail_message.attachments,
-            )
-
-            logger.debug("Merging email text and HTML content into single PDF")
-
-            with (
-                GotenbergClient(
-                    host=settings.TIKA_GOTENBERG_ENDPOINT,
-                    timeout=settings.CELERY_TASK_TIME_LIMIT,
-                ) as client,
-                client.merge.merge() as route,
-            ):
-                # Configure requested PDF/A formatting, if any
-                pdf_a_format = self._settings_to_gotenberg_pdfa()
-                if pdf_a_format is not None:
-                    route.pdf_format(pdf_a_format)
-
-                match pdf_layout:
-                    case MailRule.PdfLayout.HTML_TEXT:
-                        route.merge([pdf_of_html_content, mail_pdf_file])
-                    case MailRule.PdfLayout.HTML_ONLY:
-                        route.merge([pdf_of_html_content])
-                    case MailRule.PdfLayout.TEXT_ONLY:
-                        route.merge([mail_pdf_file])
-                    case MailRule.PdfLayout.TEXT_HTML | _:
-                        route.merge([mail_pdf_file, pdf_of_html_content])
-
-                try:
-                    response = route.run()
-                    archive_path.write_bytes(response.content)
-                except Exception as err:
-                    raise ParseError(
-                        f"Error while merging email HTML into PDF: {err}",
-                    ) from err
-
-        return archive_path
-
-    def mail_to_html(self, mail: MailMessage) -> Path:
-        """Convert the given email into an HTML file using a template.
-
-        Parameters
-        ----------
-        mail:
-            Parsed mail message.
-
-        Returns
-        -------
-        Path
-            Path to the rendered HTML file inside the temporary directory.
-        """
-
-        def clean_html(text: str) -> str:
-            """Attempt to clean, escape, and linkify the given HTML string."""
-            if isinstance(text, list):
-                text = "\n".join([str(e) for e in text])
-            if not isinstance(text, str):
-                text = str(text)
-            text = escape(text)
-            text = clean(text)
-            text = linkify(text, parse_email=True)
-            text = text.replace("\n", "<br>")
-            return text
-
-        data = {}
-
-        data["subject"] = clean_html(mail.subject)
-        if data["subject"]:
-            data["subject_label"] = "Subject"
-        data["from"] = clean_html(mail.from_values.full if mail.from_values else "")
-        if data["from"]:
-            data["from_label"] = "From"
-        data["to"] = clean_html(", ".join(address.full for address in mail.to_values))
-        if data["to"]:
-            data["to_label"] = "To"
-        data["cc"] = clean_html(", ".join(address.full for address in mail.cc_values))
-        if data["cc"]:
-            data["cc_label"] = "CC"
-        data["bcc"] = clean_html(", ".join(address.full for address in mail.bcc_values))
-        if data["bcc"]:
-            data["bcc_label"] = "BCC"
-
-        att = []
-        for a in mail.attachments:
-            att.append(
-                f"{a.filename} ({naturalsize(a.size, binary=True, format='%.2f')})",
-            )
-        data["attachments"] = clean_html(", ".join(att))
-        if data["attachments"]:
-            data["attachments_label"] = "Attachments"
-
-        data["date"] = clean_html(
-            timezone.localtime(mail.date).strftime("%Y-%m-%d %H:%M"),
-        )
-        data["content"] = clean_html(mail.text.strip())
-
-        from django.template.loader import render_to_string
-
-        html_file = Path(self._tempdir) / "email_as_html.html"
-        html_file.write_text(render_to_string("email_msg_template.html", context=data))
-
-        return html_file
-
-    def generate_pdf_from_mail(self, mail: MailMessage) -> Path:
-        """Create a PDF from the email body using an HTML template and Gotenberg.
-
-        Parameters
-        ----------
-        mail:
-            Parsed mail message.
-
-        Returns
-        -------
-        Path
-            Path to the generated PDF inside the temporary directory.
-
-        Raises
-        ------
-        documents.parsers.ParseError
-            If Gotenberg returns an error.
-        """
-        logger.info("Converting mail to PDF")
-
-        css_file = (
-            Path(__file__).parent.parent.parent
-            / "paperless_mail"
-            / "templates"
-            / "output.css"
-        )
-        email_html_file = self.mail_to_html(mail)
-
-        with (
-            GotenbergClient(
-                host=settings.TIKA_GOTENBERG_ENDPOINT,
-                timeout=settings.CELERY_TASK_TIME_LIMIT,
-            ) as client,
-            client.chromium.html_to_pdf() as route,
-        ):
-            # Configure requested PDF/A formatting, if any
-            pdf_a_format = self._settings_to_gotenberg_pdfa()
-            if pdf_a_format is not None:
-                route.pdf_format(pdf_a_format)
-
-            try:
-                response = (
-                    route.index(email_html_file)
-                    .resource(css_file)
-                    .margins(
-                        PageMarginsType(
-                            top=Measurement(0.1, MeasurementUnitType.Inches),
-                            bottom=Measurement(0.1, MeasurementUnitType.Inches),
-                            left=Measurement(0.1, MeasurementUnitType.Inches),
-                            right=Measurement(0.1, MeasurementUnitType.Inches),
-                        ),
-                    )
-                    .size(A4)
-                    .scale(1.0)
-                    .run()
-                )
-            except Exception as err:
-                raise ParseError(
-                    f"Error while converting email to PDF: {err}",
-                ) from err
-
-        email_as_pdf_file = Path(self._tempdir) / "email_as_pdf.pdf"
-        email_as_pdf_file.write_bytes(response.content)
-
-        return email_as_pdf_file
-
-    def generate_pdf_from_html(
-        self,
-        orig_html: str,
-        attachments: list[MailAttachment],
-    ) -> Path:
-        """Generate a PDF from the HTML content of the email.
-
-        Parameters
-        ----------
-        orig_html:
-            Raw HTML string from the email body.
-        attachments:
-            List of email attachments (used as inline resources).
-
-        Returns
-        -------
-        Path
-            Path to the generated PDF inside the temporary directory.
-
-        Raises
-        ------
-        documents.parsers.ParseError
-            If Gotenberg returns an error.
-        """
-
-        def clean_html_script(text: str) -> str:
-            compiled_open = re.compile(re.escape("<script"), re.IGNORECASE)
-            text = compiled_open.sub("<div hidden ", text)
-
-            compiled_close = re.compile(re.escape("</script"), re.IGNORECASE)
-            text = compiled_close.sub("</div", text)
-            return text
-
-        logger.info("Converting message html to PDF")
-
-        tempdir = Path(self._tempdir)
-
-        html_clean = clean_html_script(orig_html)
-        html_clean_file = tempdir / "index.html"
-        html_clean_file.write_text(html_clean)
-
-        with (
-            GotenbergClient(
-                host=settings.TIKA_GOTENBERG_ENDPOINT,
-                timeout=settings.CELERY_TASK_TIME_LIMIT,
-            ) as client,
-            client.chromium.html_to_pdf() as route,
-        ):
-            # Configure requested PDF/A formatting, if any
-            pdf_a_format = self._settings_to_gotenberg_pdfa()
-            if pdf_a_format is not None:
-                route.pdf_format(pdf_a_format)
-
-            # Add attachments as resources, cleaning the filename and replacing
-            # it in the index file for inclusion
-            for attachment in attachments:
-                # Clean the attachment name to be valid
-                name_cid = f"cid:{attachment.content_id}"
-                name_clean = "".join(e for e in name_cid if e.isalnum())
-
-                # Write attachment payload to a temp file
-                temp_file = tempdir / name_clean
-                temp_file.write_bytes(attachment.payload)
-
-                route.resource(temp_file)
-
-                # Replace as needed the name with the clean name
-                html_clean = html_clean.replace(name_cid, name_clean)
-
-            # Now store the cleaned up HTML version
-            html_clean_file = tempdir / "index.html"
-            html_clean_file.write_text(html_clean)
-            # This is our index file, the main page basically
-            route.index(html_clean_file)
-
-            # Set page size, margins
-            route.margins(
-                PageMarginsType(
-                    top=Measurement(0.1, MeasurementUnitType.Inches),
-                    bottom=Measurement(0.1, MeasurementUnitType.Inches),
-                    left=Measurement(0.1, MeasurementUnitType.Inches),
-                    right=Measurement(0.1, MeasurementUnitType.Inches),
-                ),
-            ).size(A4).scale(1.0)
-
-            try:
-                response = route.run()
-
-            except Exception as err:
-                raise ParseError(
-                    f"Error while converting document to PDF: {err}",
-                ) from err
-
-        html_pdf = tempdir / "html.pdf"
-        html_pdf.write_bytes(response.content)
-        return html_pdf
--- a/src/paperless/parsers/registry.py
+++ b/src/paperless/parsers/registry.py
@@ -193,7 +193,6 @@ class ParserRegistry:
        that log output is predictable; scoring determines which parser wins
        at runtime regardless of registration order.
        """
-        from paperless.parsers.mail import MailDocumentParser
        from paperless.parsers.remote import RemoteDocumentParser
        from paperless.parsers.text import TextDocumentParser
        from paperless.parsers.tika import TikaDocumentParser
@@ -201,7 +200,6 @@ class ParserRegistry:
        self.register_builtin(TextDocumentParser)
        self.register_builtin(RemoteDocumentParser)
        self.register_builtin(TikaDocumentParser)
-        self.register_builtin(MailDocumentParser)

    # ------------------------------------------------------------------
    # Discovery
--- a/src/paperless/parsers/remote.py
+++ b/src/paperless/parsers/remote.py
@@ -28,7 +28,6 @@ if TYPE_CHECKING:
    from types import TracebackType

    from paperless.parsers import MetadataEntry
-    from paperless.parsers import ParserContext

 logger = logging.getLogger("paperless.parsing.remote")

@@ -205,9 +204,6 @@ class RemoteDocumentParser:
    # Core parsing interface
    # ------------------------------------------------------------------

-    def configure(self, context: ParserContext) -> None:
-        pass
-
    def parse(
        self,
        document_path: Path,
--- a/src/paperless/parsers/text.py
+++ b/src/paperless/parsers/text.py
@@ -27,7 +27,6 @@ if TYPE_CHECKING:
    from types import TracebackType

    from paperless.parsers import MetadataEntry
-    from paperless.parsers import ParserContext

 logger = logging.getLogger("paperless.parsing.text")

@@ -157,9 +156,6 @@ class TextDocumentParser:
    # Core parsing interface
    # ------------------------------------------------------------------

-    def configure(self, context: ParserContext) -> None:
-        pass
-
    def parse(
        self,
        document_path: Path,
--- a/src/paperless/parsers/tika.py
+++ b/src/paperless/parsers/tika.py
@@ -35,7 +35,6 @@ if TYPE_CHECKING:
    from types import TracebackType

    from paperless.parsers import MetadataEntry
-    from paperless.parsers import ParserContext

 logger = logging.getLogger("paperless.parsing.tika")

@@ -206,9 +205,6 @@ class TikaDocumentParser:
    # Core parsing interface
    # ------------------------------------------------------------------

-    def configure(self, context: ParserContext) -> None:
-        pass
-
    def parse(
        self,
        document_path: Path,
@@ -344,19 +340,11 @@ class TikaDocumentParser:
    ) -> int | None:
        """Return the number of pages in the document.

-        Counts pages in the archive PDF produced by a preceding parse()
-        call.  Returns ``None`` if parse() has not been called yet or if
-        no archive was produced.
-
        Returns
        -------
        int | None
-            Page count of the archive PDF, or ``None``.
+            Always None — page count is not available from Tika.
        """
-        if self._archive_path is not None:
-            from paperless.parsers.utils import get_page_count_for_pdf
-
-            return get_page_count_for_pdf(self._archive_path, log=logger)
        return None

    def extract_metadata(
--- a/src/paperless/tests/parsers/conftest.py
+++ b/src/paperless/tests/parsers/conftest.py
@@ -10,7 +10,6 @@ from typing import TYPE_CHECKING

 import pytest

-from paperless.parsers.mail import MailDocumentParser
 from paperless.parsers.remote import RemoteDocumentParser
 from paperless.parsers.text import TextDocumentParser
 from paperless.parsers.tika import TikaDocumentParser
@@ -248,166 +247,3 @@ def tika_parser() -> Generator[TikaDocumentParser, None, None]:
    """
    with TikaDocumentParser() as parser:
        yield parser
-
-
-# ------------------------------------------------------------------
-# Mail parser sample files
-# ------------------------------------------------------------------
-
-
-@pytest.fixture(scope="session")
-def mail_samples_dir(samples_dir: Path) -> Path:
-    """Absolute path to the mail parser sample files directory.
-
-    Returns
-    -------
-    Path
-        ``<samples_dir>/mail/``
-    """
-    return samples_dir / "mail"
-
-
-@pytest.fixture(scope="session")
-def broken_email_file(mail_samples_dir: Path) -> Path:
-    """Path to a broken/malformed EML sample file.
-
-    Returns
-    -------
-    Path
-        Absolute path to ``mail/broken.eml``.
-    """
-    return mail_samples_dir / "broken.eml"
-
-
-@pytest.fixture(scope="session")
-def simple_txt_email_file(mail_samples_dir: Path) -> Path:
-    """Path to a plain-text email sample file.
-
-    Returns
-    -------
-    Path
-        Absolute path to ``mail/simple_text.eml``.
-    """
-    return mail_samples_dir / "simple_text.eml"
-
-
-@pytest.fixture(scope="session")
-def simple_txt_email_pdf_file(mail_samples_dir: Path) -> Path:
-    """Path to the expected PDF rendition of the plain-text email.
-
-    Returns
-    -------
-    Path
-        Absolute path to ``mail/simple_text.eml.pdf``.
-    """
-    return mail_samples_dir / "simple_text.eml.pdf"
-
-
-@pytest.fixture(scope="session")
-def simple_txt_email_thumbnail_file(mail_samples_dir: Path) -> Path:
-    """Path to the expected thumbnail for the plain-text email.
-
-    Returns
-    -------
-    Path
-        Absolute path to ``mail/simple_text.eml.pdf.webp``.
-    """
-    return mail_samples_dir / "simple_text.eml.pdf.webp"
-
-
-@pytest.fixture(scope="session")
-def html_email_file(mail_samples_dir: Path) -> Path:
-    """Path to an HTML email sample file.
-
-    Returns
-    -------
-    Path
-        Absolute path to ``mail/html.eml``.
-    """
-    return mail_samples_dir / "html.eml"
-
-
-@pytest.fixture(scope="session")
-def html_email_pdf_file(mail_samples_dir: Path) -> Path:
-    """Path to the expected PDF rendition of the HTML email.
-
-    Returns
-    -------
-    Path
-        Absolute path to ``mail/html.eml.pdf``.
-    """
-    return mail_samples_dir / "html.eml.pdf"
-
-
-@pytest.fixture(scope="session")
-def html_email_thumbnail_file(mail_samples_dir: Path) -> Path:
-    """Path to the expected thumbnail for the HTML email.
-
-    Returns
-    -------
-    Path
-        Absolute path to ``mail/html.eml.pdf.webp``.
-    """
-    return mail_samples_dir / "html.eml.pdf.webp"
-
-
-@pytest.fixture(scope="session")
-def html_email_html_file(mail_samples_dir: Path) -> Path:
-    """Path to the HTML body of the HTML email sample.
-
-    Returns
-    -------
-    Path
-        Absolute path to ``mail/html.eml.html``.
-    """
-    return mail_samples_dir / "html.eml.html"
-
-
-@pytest.fixture(scope="session")
-def merged_pdf_first(mail_samples_dir: Path) -> Path:
-    """Path to the first PDF used in PDF-merge tests.
-
-    Returns
-    -------
-    Path
-        Absolute path to ``mail/first.pdf``.
-    """
-    return mail_samples_dir / "first.pdf"
-
-
-@pytest.fixture(scope="session")
-def merged_pdf_second(mail_samples_dir: Path) -> Path:
-    """Path to the second PDF used in PDF-merge tests.
-
-    Returns
-    -------
-    Path
-        Absolute path to ``mail/second.pdf``.
-    """
-    return mail_samples_dir / "second.pdf"
-
-
-# ------------------------------------------------------------------
-# Mail parser instance
-# ------------------------------------------------------------------
-
-
-@pytest.fixture()
-def mail_parser() -> Generator[MailDocumentParser, None, None]:
-    """Yield a MailDocumentParser and clean up its temporary directory afterwards.
-
-    Yields
-    ------
-    MailDocumentParser
-        A ready-to-use parser instance.
-    """
-    with MailDocumentParser() as parser:
-        yield parser
-
-
-@pytest.fixture(scope="session")
-def nginx_base_url() -> Generator[str, None, None]:
-    """
-    The base URL for the nginx HTTP server we expect to be alive
-    """
-    yield "http://localhost:8080"
--- a/src/paperless/tests/parsers/test_remote_parser.py
+++ b/src/paperless/tests/parsers/test_remote_parser.py
@@ -20,7 +20,6 @@ from unittest.mock import Mock

 import pytest

-from paperless.parsers import ParserContext
 from paperless.parsers import ParserProtocol
 from paperless.parsers.remote import RemoteDocumentParser

@@ -303,7 +302,6 @@ class TestRemoteParserParse:
        sample_pdf_file: Path,
        azure_client: Mock,
    ) -> None:
-        remote_parser.configure(ParserContext())
        remote_parser.parse(sample_pdf_file, "application/pdf")

        azure_client.close.assert_called_once()
--- a/src/paperless/tests/parsers/test_text_parser.py
+++ b/src/paperless/tests/parsers/test_text_parser.py
@@ -12,7 +12,6 @@ from pathlib import Path

 import pytest

-from paperless.parsers import ParserContext
 from paperless.parsers import ParserProtocol
 from paperless.parsers.text import TextDocumentParser

@@ -94,7 +93,6 @@ class TestTextParserParse:
        text_parser: TextDocumentParser,
        sample_txt_file: Path,
    ) -> None:
-        text_parser.configure(ParserContext())
        text_parser.parse(sample_txt_file, "text/plain")

        assert text_parser.get_text() == "This is a test file.\n"
@@ -104,7 +102,6 @@ class TestTextParserParse:
        text_parser: TextDocumentParser,
        sample_txt_file: Path,
    ) -> None:
-        text_parser.configure(ParserContext())
        text_parser.parse(sample_txt_file, "text/plain")

        assert text_parser.get_archive_path() is None
@@ -114,7 +111,6 @@ class TestTextParserParse:
        text_parser: TextDocumentParser,
        sample_txt_file: Path,
    ) -> None:
-        text_parser.configure(ParserContext())
        text_parser.parse(sample_txt_file, "text/plain")

        assert text_parser.get_date() is None
@@ -133,7 +129,6 @@ class TestTextParserParse:
            - Parsing succeeds
            - Invalid bytes are replaced with the Unicode replacement character
        """
-        text_parser.configure(ParserContext())
        text_parser.parse(malformed_txt_file, "text/plain")

        assert text_parser.get_text() == "Pantothens\ufffdure\n"
--- a/src/paperless/tests/parsers/test_tika_parser.py
+++ b/src/paperless/tests/parsers/test_tika_parser.py
@@ -9,7 +9,6 @@ from pytest_django.fixtures import SettingsWrapper
 from pytest_httpx import HTTPXMock

 from documents.parsers import ParseError
-from paperless.parsers import ParserContext
 from paperless.parsers import ParserProtocol
 from paperless.parsers.tika import TikaDocumentParser

@@ -61,29 +60,6 @@ class TestTikaParserRegistryInterface:
    def test_requires_pdf_rendition_is_true(self) -> None:
        assert TikaDocumentParser().requires_pdf_rendition is True

-    def test_get_page_count_returns_none_without_archive(
-        self,
-        tika_parser: TikaDocumentParser,
-        sample_odt_file: Path,
-    ) -> None:
-        assert (
-            tika_parser.get_page_count(
-                sample_odt_file,
-                "application/vnd.oasis.opendocument.text",
-            )
-            is None
-        )
-
-    def test_get_page_count_returns_int_with_pdf_archive(
-        self,
-        tika_parser: TikaDocumentParser,
-        sample_pdf_file: Path,
-    ) -> None:
-        tika_parser._archive_path = sample_pdf_file
-        count = tika_parser.get_page_count(sample_pdf_file, "application/pdf")
-        assert isinstance(count, int)
-        assert count > 0
-

@pytest.mark.django_db()
 class TestTikaParser:
@@ -107,7 +83,6 @@ class TestTikaParser:
        # Pretend convert to PDF response
        httpx_mock.add_response(content=b"PDF document")

-        tika_parser.configure(ParserContext())
        tika_parser.parse(sample_odt_file, "application/vnd.oasis.opendocument.text")

        assert tika_parser.get_text() == "the content"
--- a/src/paperless/tests/test_registry.py
+++ b/src/paperless/tests/test_registry.py
@@ -18,7 +18,6 @@ from unittest.mock import patch

 import pytest

-from paperless.parsers import ParserContext
 from paperless.parsers import ParserProtocol
 from paperless.parsers.registry import ParserRegistry
 from paperless.parsers.registry import get_parser_registry
@@ -104,11 +103,6 @@ def dummy_parser_cls() -> type:
        ) -> list:
            return []

-        def configure(self, context: ParserContext) -> None:
-            """
-            Required to exist, but doesn't need to do anything
-            """
-
        def __enter__(self) -> Self:
            return self

@@ -150,7 +144,6 @@ class TestParserProtocol:
    @pytest.mark.parametrize(
        "missing_method",
        [
-            pytest.param("configure", id="missing-configure"),
            pytest.param("parse", id="missing-parse"),
            pytest.param("get_text", id="missing-get_text"),
            pytest.param("get_thumbnail", id="missing-get_thumbnail"),
--- a/src/paperless_mail/parsers.py
+++ b/src/paperless_mail/parsers.py
@@ -0,0 +1,481 @@
+import re
+from html import escape
+from pathlib import Path
+
+from bleach import clean
+from bleach import linkify
+from django.conf import settings
+from django.utils import timezone
+from django.utils.timezone import is_naive
+from django.utils.timezone import make_aware
+from gotenberg_client import GotenbergClient
+from gotenberg_client.constants import A4
+from gotenberg_client.options import Measurement
+from gotenberg_client.options import MeasurementUnitType
+from gotenberg_client.options import PageMarginsType
+from gotenberg_client.options import PdfAFormat
+from humanize import naturalsize
+from imap_tools import MailAttachment
+from imap_tools import MailMessage
+from tika_client import TikaClient
+
+from documents.parsers import DocumentParser
+from documents.parsers import ParseError
+from documents.parsers import make_thumbnail_from_pdf
+from paperless.models import OutputTypeChoices
+from paperless_mail.models import MailRule
+
+
+class MailDocumentParser(DocumentParser):
+    """
+    This parser uses imap_tools to parse .eml files, generates pdf using
+    Gotenberg and sends the html part to a Tika server for text extraction.
+    """
+
+    logging_name = "paperless.parsing.mail"
+
+    def _settings_to_gotenberg_pdfa(self) -> PdfAFormat | None:
+        """
+        Converts our requested PDF/A output into the Gotenberg API
+        format
+        """
+        if settings.OCR_OUTPUT_TYPE in {
+            OutputTypeChoices.PDF_A,
+            OutputTypeChoices.PDF_A2,
+        }:
+            return PdfAFormat.A2b
+        elif settings.OCR_OUTPUT_TYPE == OutputTypeChoices.PDF_A1:  # pragma: no cover
+            self.log.warning(
+                "Gotenberg does not support PDF/A-1a, choosing PDF/A-2b instead",
+            )
+            return PdfAFormat.A2b
+        elif settings.OCR_OUTPUT_TYPE == OutputTypeChoices.PDF_A3:  # pragma: no cover
+            return PdfAFormat.A3b
+        return None
+
+    def get_thumbnail(
+        self,
+        document_path: Path,
+        mime_type: str,
+        file_name=None,
+    ) -> Path:
+        if not self.archive_path:
+            self.archive_path = self.generate_pdf(
+                self.parse_file_to_message(document_path),
+            )
+
+        return make_thumbnail_from_pdf(
+            self.archive_path,
+            self.tempdir,
+            self.logging_group,
+        )
+
+    def extract_metadata(self, document_path: Path, mime_type: str):
+        result = []
+
+        try:
+            mail = self.parse_file_to_message(document_path)
+        except ParseError as e:
+            self.log.warning(
+                f"Error while fetching document metadata for {document_path}: {e}",
+            )
+            return result
+
+        for key, value in mail.headers.items():
+            value = ", ".join(i for i in value)
+            try:
+                value.encode("utf-8")
+            except UnicodeEncodeError as e:  # pragma: no cover
+                self.log.debug(f"Skipping header {key}: {e}")
+                continue
+
+            result.append(
+                {
+                    "namespace": "",
+                    "prefix": "header",
+                    "key": key,
+                    "value": value,
+                },
+            )
+
+        result.append(
+            {
+                "namespace": "",
+                "prefix": "",
+                "key": "attachments",
+                "value": ", ".join(
+                    f"{attachment.filename}"
+                    f"({naturalsize(attachment.size, binary=True, format='%.2f')})"
+                    for attachment in mail.attachments
+                ),
+            },
+        )
+
+        result.append(
+            {
+                "namespace": "",
+                "prefix": "",
+                "key": "date",
+                "value": mail.date.strftime("%Y-%m-%d %H:%M:%S %Z"),
+            },
+        )
+
+        result.sort(key=lambda item: (item["prefix"], item["key"]))
+        return result
+
+    def parse(
+        self,
+        document_path: Path,
+        mime_type: str,
+        file_name=None,
+        mailrule_id: int | None = None,
+    ) -> None:
+        """
+        Parses the given .eml into formatted text, based on the decoded email.
+
+        """
+
+        def strip_text(text: str):
+            """
+            Reduces the spacing of the given text string
+            """
+            text = re.sub(r"\s+", " ", text)
+            text = re.sub(r"(\n *)+", "\n", text)
+            return text.strip()
+
+        def build_formatted_text(mail_message: MailMessage) -> str:
+            """
+            Constructs a formatted string, based on the given email.  Basically tries
+            to get most of the email content, included front matter, into a nice string
+            """
+            fmt_text = f"Subject: {mail_message.subject}\n\n"
+            fmt_text += f"From: {mail_message.from_values.full}\n\n"
+            to_list = [address.full for address in mail_message.to_values]
+            fmt_text += f"To: {', '.join(to_list)}\n\n"
+            if mail_message.cc_values:
+                fmt_text += (
+                    f"CC: {', '.join(address.full for address in mail.cc_values)}\n\n"
+                )
+            if mail_message.bcc_values:
+                fmt_text += (
+                    f"BCC: {', '.join(address.full for address in mail.bcc_values)}\n\n"
+                )
+            if mail_message.attachments:
+                att = []
+                for a in mail.attachments:
+                    attachment_size = naturalsize(a.size, binary=True, format="%.2f")
+                    att.append(
+                        f"{a.filename} ({attachment_size})",
+                    )
+                fmt_text += f"Attachments: {', '.join(att)}\n\n"
+
+            if mail.html:
+                fmt_text += "HTML content: " + strip_text(self.tika_parse(mail.html))
+
+            fmt_text += f"\n\n{strip_text(mail.text)}"
+
+            return fmt_text
+
+        self.log.debug(f"Parsing file {document_path.name} into an email")
+        mail = self.parse_file_to_message(document_path)
+
+        self.log.debug("Building formatted text from email")
+        self.text = build_formatted_text(mail)
+
+        if is_naive(mail.date):
+            self.date = make_aware(mail.date)
+        else:
+            self.date = mail.date
+
+        self.log.debug("Creating a PDF from the email")
+        if mailrule_id:
+            rule = MailRule.objects.get(pk=mailrule_id)
+            self.archive_path = self.generate_pdf(mail, rule.pdf_layout)
+        else:
+            self.archive_path = self.generate_pdf(mail)
+
+    @staticmethod
+    def parse_file_to_message(filepath: Path) -> MailMessage:
+        """
+        Parses the given .eml file into a MailMessage object
+        """
+        try:
+            with filepath.open("rb") as eml:
+                parsed = MailMessage.from_bytes(eml.read())
+                if parsed.from_values is None:
+                    raise ParseError(
+                        f"Could not parse {filepath}: Missing 'from'",
+                    )
+        except Exception as err:
+            raise ParseError(
+                f"Could not parse {filepath}: {err}",
+            ) from err
+
+        return parsed
+
+    def tika_parse(self, html: str):
+        self.log.info("Sending content to Tika server")
+
+        try:
+            with TikaClient(tika_url=settings.TIKA_ENDPOINT) as client:
+                parsed = client.tika.as_text.from_buffer(html, "text/html")
+
+                if parsed.content is not None:
+                    return parsed.content.strip()
+                return ""
+        except Exception as err:
+            raise ParseError(
+                f"Could not parse content with tika server at "
+                f"{settings.TIKA_ENDPOINT}: {err}",
+            ) from err
+
+    def generate_pdf(
+        self,
+        mail_message: MailMessage,
+        pdf_layout: MailRule.PdfLayout | None = None,
+    ) -> Path:
+        archive_path = Path(self.tempdir) / "merged.pdf"
+
+        mail_pdf_file = self.generate_pdf_from_mail(mail_message)
+
+        pdf_layout = (
+            pdf_layout or settings.EMAIL_PARSE_DEFAULT_LAYOUT
+        )  # EMAIL_PARSE_DEFAULT_LAYOUT is a MailRule.PdfLayout
+
+        # If no HTML content, create the PDF from the message
+        # Otherwise, create 2 PDFs and merge them with Gotenberg
+        if not mail_message.html:
+            archive_path.write_bytes(mail_pdf_file.read_bytes())
+        else:
+            pdf_of_html_content = self.generate_pdf_from_html(
+                mail_message.html,
+                mail_message.attachments,
+            )
+
+            self.log.debug("Merging email text and HTML content into single PDF")
+
+            with (
+                GotenbergClient(
+                    host=settings.TIKA_GOTENBERG_ENDPOINT,
+                    timeout=settings.CELERY_TASK_TIME_LIMIT,
+                ) as client,
+                client.merge.merge() as route,
+            ):
+                # Configure requested PDF/A formatting, if any
+                pdf_a_format = self._settings_to_gotenberg_pdfa()
+                if pdf_a_format is not None:
+                    route.pdf_format(pdf_a_format)
+
+                match pdf_layout:
+                    case MailRule.PdfLayout.HTML_TEXT:
+                        route.merge([pdf_of_html_content, mail_pdf_file])
+                    case MailRule.PdfLayout.HTML_ONLY:
+                        route.merge([pdf_of_html_content])
+                    case MailRule.PdfLayout.TEXT_ONLY:
+                        route.merge([mail_pdf_file])
+                    case MailRule.PdfLayout.TEXT_HTML | _:
+                        route.merge([mail_pdf_file, pdf_of_html_content])
+
+                try:
+                    response = route.run()
+                    archive_path.write_bytes(response.content)
+                except Exception as err:
+                    raise ParseError(
+                        f"Error while merging email HTML into PDF: {err}",
+                    ) from err
+
+        return archive_path
+
+    def mail_to_html(self, mail: MailMessage) -> Path:
+        """
+        Converts the given email into an HTML file, formatted
+        based on the given template
+        """
+
+        def clean_html(text: str) -> str:
+            """
+            Attempts to clean, escape and linkify the given HTML string
+            """
+            if isinstance(text, list):
+                text = "\n".join([str(e) for e in text])
+            if not isinstance(text, str):
+                text = str(text)
+            text = escape(text)
+            text = clean(text)
+            text = linkify(text, parse_email=True)
+            text = text.replace("\n", "<br>")
+            return text
+
+        data = {}
+
+        data["subject"] = clean_html(mail.subject)
+        if data["subject"]:
+            data["subject_label"] = "Subject"
+        data["from"] = clean_html(mail.from_values.full)
+        if data["from"]:
+            data["from_label"] = "From"
+        data["to"] = clean_html(", ".join(address.full for address in mail.to_values))
+        if data["to"]:
+            data["to_label"] = "To"
+        data["cc"] = clean_html(", ".join(address.full for address in mail.cc_values))
+        if data["cc"]:
+            data["cc_label"] = "CC"
+        data["bcc"] = clean_html(", ".join(address.full for address in mail.bcc_values))
+        if data["bcc"]:
+            data["bcc_label"] = "BCC"
+
+        att = []
+        for a in mail.attachments:
+            att.append(
+                f"{a.filename} ({naturalsize(a.size, binary=True, format='%.2f')})",
+            )
+        data["attachments"] = clean_html(", ".join(att))
+        if data["attachments"]:
+            data["attachments_label"] = "Attachments"
+
+        data["date"] = clean_html(
+            timezone.localtime(mail.date).strftime("%Y-%m-%d %H:%M"),
+        )
+        data["content"] = clean_html(mail.text.strip())
+
+        from django.template.loader import render_to_string
+
+        html_file = Path(self.tempdir) / "email_as_html.html"
+        html_file.write_text(render_to_string("email_msg_template.html", context=data))
+
+        return html_file
+
+    def generate_pdf_from_mail(self, mail: MailMessage) -> Path:
+        """
+        Creates a PDF based on the given email, using the email's values in a
+        an HTML template
+        """
+        self.log.info("Converting mail to PDF")
+
+        css_file = Path(__file__).parent / "templates" / "output.css"
+        email_html_file = self.mail_to_html(mail)
+
+        with (
+            GotenbergClient(
+                host=settings.TIKA_GOTENBERG_ENDPOINT,
+                timeout=settings.CELERY_TASK_TIME_LIMIT,
+            ) as client,
+            client.chromium.html_to_pdf() as route,
+        ):
+            # Configure requested PDF/A formatting, if any
+            pdf_a_format = self._settings_to_gotenberg_pdfa()
+            if pdf_a_format is not None:
+                route.pdf_format(pdf_a_format)
+
+            try:
+                response = (
+                    route.index(email_html_file)
+                    .resource(css_file)
+                    .margins(
+                        PageMarginsType(
+                            top=Measurement(0.1, MeasurementUnitType.Inches),
+                            bottom=Measurement(0.1, MeasurementUnitType.Inches),
+                            left=Measurement(0.1, MeasurementUnitType.Inches),
+                            right=Measurement(0.1, MeasurementUnitType.Inches),
+                        ),
+                    )
+                    .size(A4)
+                    .scale(1.0)
+                    .run()
+                )
+            except Exception as err:
+                raise ParseError(
+                    f"Error while converting email to PDF: {err}",
+                ) from err
+
+        email_as_pdf_file = Path(self.tempdir) / "email_as_pdf.pdf"
+        email_as_pdf_file.write_bytes(response.content)
+
+        return email_as_pdf_file
+
+    def generate_pdf_from_html(
+        self,
+        orig_html: str,
+        attachments: list[MailAttachment],
+    ) -> Path:
+        """
+        Generates a PDF file based on the HTML and attachments of the email
+        """
+
+        def clean_html_script(text: str):
+            compiled_open = re.compile(re.escape("<script"), re.IGNORECASE)
+            text = compiled_open.sub("<div hidden ", text)
+
+            compiled_close = re.compile(re.escape("</script"), re.IGNORECASE)
+            text = compiled_close.sub("</div", text)
+            return text
+
+        self.log.info("Converting message html to PDF")
+
+        tempdir = Path(self.tempdir)
+
+        html_clean = clean_html_script(orig_html)
+        html_clean_file = tempdir / "index.html"
+        html_clean_file.write_text(html_clean)
+
+        with (
+            GotenbergClient(
+                host=settings.TIKA_GOTENBERG_ENDPOINT,
+                timeout=settings.CELERY_TASK_TIME_LIMIT,
+            ) as client,
+            client.chromium.html_to_pdf() as route,
+        ):
+            # Configure requested PDF/A formatting, if any
+            pdf_a_format = self._settings_to_gotenberg_pdfa()
+            if pdf_a_format is not None:
+                route.pdf_format(pdf_a_format)
+
+            # Add attachments as resources, cleaning the filename and replacing
+            # it in the index file for inclusion
+            for attachment in attachments:
+                # Clean the attachment name to be valid
+                name_cid = f"cid:{attachment.content_id}"
+                name_clean = "".join(e for e in name_cid if e.isalnum())
+
+                # Write attachment payload to a temp file
+                temp_file = tempdir / name_clean
+                temp_file.write_bytes(attachment.payload)
+
+                route.resource(temp_file)
+
+                # Replace as needed the name with the clean name
+                html_clean = html_clean.replace(name_cid, name_clean)
+
+            # Now store the cleaned up HTML version
+            html_clean_file = tempdir / "index.html"
+            html_clean_file.write_text(html_clean)
+            # This is our index file, the main page basically
+            route.index(html_clean_file)
+
+            # Set page size, margins
+            route.margins(
+                PageMarginsType(
+                    top=Measurement(0.1, MeasurementUnitType.Inches),
+                    bottom=Measurement(0.1, MeasurementUnitType.Inches),
+                    left=Measurement(0.1, MeasurementUnitType.Inches),
+                    right=Measurement(0.1, MeasurementUnitType.Inches),
+                ),
+            ).size(A4).scale(1.0)
+
+            try:
+                response = route.run()
+
+            except Exception as err:
+                raise ParseError(
+                    f"Error while converting document to PDF: {err}",
+                ) from err
+
+        html_pdf = tempdir / "html.pdf"
+        html_pdf.write_bytes(response.content)
+        return html_pdf
+
+    def get_settings(self) -> None:
+        """
+        This parser does not implement additional settings yet
+        """
+        return None
--- a/src/paperless_mail/signals.py
+++ b/src/paperless_mail/signals.py
@@ -1,12 +1,7 @@
 def get_parser(*args, **kwargs):
-    from paperless.parsers.mail import MailDocumentParser
+    from paperless_mail.parsers import MailDocumentParser

-    # MailDocumentParser accepts no constructor args in the new-style protocol.
-    # Pop legacy args that arrive from the signal-based consumer path.
-    # Phase 4 will replace this signal path with the ParserRegistry.
-    kwargs.pop("logging_group", None)
-    kwargs.pop("progress_callback", None)
-    return MailDocumentParser()
+    return MailDocumentParser(*args, **kwargs)


 def mail_consumer_declaration(sender, **kwargs):
--- a/src/paperless_mail/tests/conftest.py
+++ b/src/paperless_mail/tests/conftest.py
@@ -1,9 +1,71 @@
 from collections.abc import Generator
+from pathlib import Path

 import pytest

 from paperless_mail.mail import MailAccountHandler
 from paperless_mail.models import MailAccount
+from paperless_mail.parsers import MailDocumentParser
+
+
+@pytest.fixture(scope="session")
+def sample_dir() -> Path:
+    return (Path(__file__).parent / Path("samples")).resolve()
+
+
+@pytest.fixture(scope="session")
+def broken_email_file(sample_dir: Path) -> Path:
+    return sample_dir / "broken.eml"
+
+
+@pytest.fixture(scope="session")
+def simple_txt_email_file(sample_dir: Path) -> Path:
+    return sample_dir / "simple_text.eml"
+
+
+@pytest.fixture(scope="session")
+def simple_txt_email_pdf_file(sample_dir: Path) -> Path:
+    return sample_dir / "simple_text.eml.pdf"
+
+
+@pytest.fixture(scope="session")
+def simple_txt_email_thumbnail_file(sample_dir: Path) -> Path:
+    return sample_dir / "simple_text.eml.pdf.webp"
+
+
+@pytest.fixture(scope="session")
+def html_email_file(sample_dir: Path) -> Path:
+    return sample_dir / "html.eml"
+
+
+@pytest.fixture(scope="session")
+def html_email_pdf_file(sample_dir: Path) -> Path:
+    return sample_dir / "html.eml.pdf"
+
+
+@pytest.fixture(scope="session")
+def html_email_thumbnail_file(sample_dir: Path) -> Path:
+    return sample_dir / "html.eml.pdf.webp"
+
+
+@pytest.fixture(scope="session")
+def html_email_html_file(sample_dir: Path) -> Path:
+    return sample_dir / "html.eml.html"
+
+
+@pytest.fixture(scope="session")
+def merged_pdf_first(sample_dir: Path) -> Path:
+    return sample_dir / "first.pdf"
+
+
+@pytest.fixture(scope="session")
+def merged_pdf_second(sample_dir: Path) -> Path:
+    return sample_dir / "second.pdf"
+
+
+@pytest.fixture()
+def mail_parser() -> MailDocumentParser:
+    return MailDocumentParser(logging_group=None)


@pytest.fixture()
@@ -27,3 +89,11 @@ def greenmail_mail_account(db: None) -> Generator[MailAccount, None, None]:
@pytest.fixture()
 def mail_account_handler() -> MailAccountHandler:
    return MailAccountHandler()
+
+
+@pytest.fixture(scope="session")
+def nginx_base_url() -> Generator[str, None, None]:
+    """
+    The base URL for the nginx HTTP server we expect to be alive
+    """
+    yield "http://localhost:8080"
--- a/src/paperless_mail/tests/samples/broken.eml
+++ b/src/paperless_mail/tests/samples/broken.eml
--- a/src/paperless_mail/tests/samples/first.pdf
+++ b/src/paperless_mail/tests/samples/first.pdf
--- a/src/paperless_mail/tests/samples/html.eml
+++ b/src/paperless_mail/tests/samples/html.eml
--- a/src/paperless_mail/tests/samples/html.eml.html
+++ b/src/paperless_mail/tests/samples/html.eml.html
--- a/src/paperless_mail/tests/samples/html.eml.pdf
+++ b/src/paperless_mail/tests/samples/html.eml.pdf
--- a/src/paperless_mail/tests/samples/html.eml.pdf.webp
+++ b/src/paperless_mail/tests/samples/html.eml.pdf.webp
--- a/src/paperless_mail/tests/samples/sample.html
+++ b/src/paperless_mail/tests/samples/sample.html
--- a/src/paperless_mail/tests/samples/sample.html.pdf
+++ b/src/paperless_mail/tests/samples/sample.html.pdf
--- a/src/paperless_mail/tests/samples/sample.html.pdf.webp
+++ b/src/paperless_mail/tests/samples/sample.html.pdf.webp
--- a/src/paperless_mail/tests/samples/sample.png
+++ b/src/paperless_mail/tests/samples/sample.png
--- a/src/paperless_mail/tests/samples/second.pdf
+++ b/src/paperless_mail/tests/samples/second.pdf
--- a/src/paperless_mail/tests/samples/simple_text.eml
+++ b/src/paperless_mail/tests/samples/simple_text.eml
--- a/src/paperless_mail/tests/samples/simple_text.eml.pdf
+++ b/src/paperless_mail/tests/samples/simple_text.eml.pdf
--- a/src/paperless_mail/tests/samples/simple_text.eml.pdf.webp
+++ b/src/paperless_mail/tests/samples/simple_text.eml.pdf.webp
--- a/src/paperless/tests/parsers/test_mail_parser.py
+++ b/src/paperless/tests/parsers/test_mail_parser.py
@@ -12,64 +12,7 @@ from pytest_httpx import HTTPXMock
 from pytest_mock import MockerFixture

 from documents.parsers import ParseError
-from paperless.parsers import ParserContext
-from paperless.parsers import ParserProtocol
-from paperless.parsers.mail import MailDocumentParser
-
-
-class TestMailParserProtocol:
-    """Verify that MailDocumentParser satisfies the ParserProtocol contract."""
-
-    def test_isinstance_satisfies_protocol(
-        self,
-        mail_parser: MailDocumentParser,
-    ) -> None:
-        assert isinstance(mail_parser, ParserProtocol)
-
-    def test_supported_mime_types(self) -> None:
-        mime_types = MailDocumentParser.supported_mime_types()
-        assert isinstance(mime_types, dict)
-        assert "message/rfc822" in mime_types
-
-    @pytest.mark.parametrize(
-        ("mime_type", "expected"),
-        [
-            ("message/rfc822", 10),
-            ("application/pdf", None),
-            ("text/plain", None),
-        ],
-    )
-    def test_score(self, mime_type: str, expected: int | None) -> None:
-        assert MailDocumentParser.score(mime_type, "email.eml") == expected
-
-    def test_can_produce_archive_is_false(
-        self,
-        mail_parser: MailDocumentParser,
-    ) -> None:
-        assert mail_parser.can_produce_archive is False
-
-    def test_requires_pdf_rendition_is_true(
-        self,
-        mail_parser: MailDocumentParser,
-    ) -> None:
-        assert mail_parser.requires_pdf_rendition is True
-
-    def test_get_page_count_returns_none_without_archive(
-        self,
-        mail_parser: MailDocumentParser,
-        html_email_file: Path,
-    ) -> None:
-        assert mail_parser.get_page_count(html_email_file, "message/rfc822") is None
-
-    def test_get_page_count_returns_int_with_pdf_archive(
-        self,
-        mail_parser: MailDocumentParser,
-        simple_txt_email_pdf_file: Path,
-    ) -> None:
-        mail_parser._archive_path = simple_txt_email_pdf_file
-        count = mail_parser.get_page_count(simple_txt_email_pdf_file, "message/rfc822")
-        assert isinstance(count, int)
-        assert count > 0
+from paperless_mail.parsers import MailDocumentParser


 class TestEmailFileParsing:
@@ -81,7 +24,7 @@ class TestEmailFileParsing:
    def test_parse_error_missing_file(
        self,
        mail_parser: MailDocumentParser,
-        mail_samples_dir: Path,
+        sample_dir: Path,
    ) -> None:
        """
        GIVEN:
@@ -92,7 +35,7 @@ class TestEmailFileParsing:
            - An Exception is thrown
        """
        # Check if exception is raised when parsing fails.
-        test_file = mail_samples_dir / "doesntexist.eml"
+        test_file = sample_dir / "doesntexist.eml"

        assert not test_file.exists()

@@ -303,12 +246,12 @@ class TestEmailThumbnailGenerate:
        """
        mocked_return = "Passing the return value through.."
        mock_make_thumbnail_from_pdf = mocker.patch(
-            "paperless.parsers.mail.make_thumbnail_from_pdf",
+            "paperless_mail.parsers.make_thumbnail_from_pdf",
        )
        mock_make_thumbnail_from_pdf.return_value = mocked_return

        mock_generate_pdf = mocker.patch(
-            "paperless.parsers.mail.MailDocumentParser.generate_pdf",
+            "paperless_mail.parsers.MailDocumentParser.generate_pdf",
        )
        mock_generate_pdf.return_value = "Mocked return value.."

@@ -317,7 +260,8 @@ class TestEmailThumbnailGenerate:
        mock_generate_pdf.assert_called_once()
        mock_make_thumbnail_from_pdf.assert_called_once_with(
            "Mocked return value..",
-            mail_parser._tempdir,
+            mail_parser.tempdir,
+            None,
        )

        assert mocked_return == thumb
@@ -429,7 +373,7 @@ class TestParser:
        """
        # Validate parsing returns the expected results
        mock_generate_pdf = mocker.patch(
-            "paperless.parsers.mail.MailDocumentParser.generate_pdf",
+            "paperless_mail.parsers.MailDocumentParser.generate_pdf",
        )

        mail_parser.parse(simple_txt_email_file, "message/rfc822")
@@ -441,7 +385,7 @@ class TestParser:
            "BCC: fdf@fvf.de\n\n"
            "\n\nThis is just a simple Text Mail."
        )
-        assert text_expected == mail_parser.get_text()
+        assert text_expected == mail_parser.text
        assert (
            datetime.datetime(
                2022,
@@ -452,7 +396,7 @@ class TestParser:
                43,
                tzinfo=datetime.timezone(datetime.timedelta(seconds=7200)),
            )
-            == mail_parser.get_date()
+            == mail_parser.date
        )

        # Just check if tried to generate archive, the unittest for generate_pdf() goes deeper.
@@ -475,7 +419,7 @@ class TestParser:
        """

        mock_generate_pdf = mocker.patch(
-            "paperless.parsers.mail.MailDocumentParser.generate_pdf",
+            "paperless_mail.parsers.MailDocumentParser.generate_pdf",
        )

        # Validate parsing returns the expected results
@@ -499,7 +443,7 @@ class TestParser:
        mail_parser.parse(html_email_file, "message/rfc822")

        mock_generate_pdf.assert_called_once()
-        assert text_expected == mail_parser.get_text()
+        assert text_expected == mail_parser.text
        assert (
            datetime.datetime(
                2022,
@@ -510,7 +454,7 @@ class TestParser:
                19,
                tzinfo=datetime.timezone(datetime.timedelta(seconds=7200)),
            )
-            == mail_parser.get_date()
+            == mail_parser.date
        )

    def test_generate_pdf_parse_error(
@@ -557,7 +501,7 @@ class TestParser:

        mail_parser.parse(simple_txt_email_file, "message/rfc822")

-        assert mail_parser.get_archive_path() is not None
+        assert mail_parser.archive_path is not None

    @pytest.mark.httpx_mock(can_send_already_matched_responses=True)
    def test_generate_pdf_html_email(
@@ -598,7 +542,7 @@ class TestParser:
        )
        mail_parser.parse(html_email_file, "message/rfc822")

-        assert mail_parser.get_archive_path() is not None
+        assert mail_parser.archive_path is not None

    def test_generate_pdf_html_email_html_to_pdf_failure(
        self,
@@ -768,10 +712,10 @@ class TestParser:

        def test_layout_option(layout_option, expected_calls, expected_pdf_names):
            mock_mailrule_get.return_value = mock.Mock(pdf_layout=layout_option)
-            mail_parser.configure(ParserContext(mailrule_id=1))
            mail_parser.parse(
                document_path=html_email_file,
                mime_type="message/rfc822",
+                mailrule_id=1,
            )
            args, _ = mock_merge_route.call_args
            assert len(args[0]) == expected_calls
--- a/src/paperless/tests/parsers/test_mail_parser_live.py
+++ b/src/paperless/tests/parsers/test_mail_parser_live.py
@@ -11,7 +11,7 @@ from PIL import Image
 from pytest_mock import MockerFixture

 from documents.tests.utils import util_call_with_backoff
-from paperless.parsers.mail import MailDocumentParser
+from paperless_mail.parsers import MailDocumentParser


 def extract_text(pdf_path: Path) -> str:
@@ -159,7 +159,7 @@ class TestParserLive:
            - The returned thumbnail image file shall match the expected hash
        """
        mock_generate_pdf = mocker.patch(
-            "paperless.parsers.mail.MailDocumentParser.generate_pdf",
+            "paperless_mail.parsers.MailDocumentParser.generate_pdf",
        )
        mock_generate_pdf.return_value = simple_txt_email_pdf_file

@@ -216,10 +216,10 @@ class TestParserLive:
            - The merged PDF shall contain text from both source PDFs
        """
        mock_generate_pdf_from_html = mocker.patch(
-            "paperless.parsers.mail.MailDocumentParser.generate_pdf_from_html",
+            "paperless_mail.parsers.MailDocumentParser.generate_pdf_from_html",
        )
        mock_generate_pdf_from_mail = mocker.patch(
-            "paperless.parsers.mail.MailDocumentParser.generate_pdf_from_mail",
+            "paperless_mail.parsers.MailDocumentParser.generate_pdf_from_mail",
        )
        mock_generate_pdf_from_mail.return_value = merged_pdf_first
        mock_generate_pdf_from_html.return_value = merged_pdf_second