diff --git a/src/documents/consumer.py b/src/documents/consumer.py
index b5f7ebb24..cbc2198ef 100644
--- a/src/documents/consumer.py
+++ b/src/documents/consumer.py
@@ -51,11 +51,28 @@ from documents.templating.workflows import parse_w_workflow_placeholders
 from documents.utils import copy_basic_file_stats
 from documents.utils import copy_file_with_basic_stats
 from documents.utils import run_subprocess
+from paperless.parsers.text import TextDocumentParser
 from paperless_mail.parsers import MailDocumentParser
 
 LOGGING_NAME: Final[str] = "paperless.consumer"
 
 
+def _parser_cleanup(parser: DocumentParser) -> None:
+    """
+    Call cleanup on a parser, handling the new-style context-manager parsers.
+
+    New-style parsers (e.g. TextDocumentParser) use __exit__ for teardown
+    instead of a cleanup() method.  This shim will be removed once all existing parsers
+    have switched to the new style and this consumer is updated to use it
+
+    TODO(stumpylog): Remove me in the future
+    """
+    if isinstance(parser, TextDocumentParser):
+        parser.__exit__(None, None, None)
+    else:
+        parser.cleanup()
+
+
 class WorkflowTriggerPlugin(
     NoCleanupPluginMixin,
     NoSetupPluginMixin,
@@ -459,6 +476,9 @@ class ConsumerPlugin(
                     self.filename,
                     self.input_doc.mailrule_id,
                 )
+            elif isinstance(document_parser, TextDocumentParser):
+                # TODO(stumpylog): Remove me in the future
+                document_parser.parse(self.working_copy, mime_type)
             else:
                 document_parser.parse(self.working_copy, mime_type, self.filename)
 
@@ -469,11 +489,15 @@ class ConsumerPlugin(
                 ProgressStatusOptions.WORKING,
                 ConsumerStatusShortMessage.GENERATING_THUMBNAIL,
             )
-            thumbnail = document_parser.get_thumbnail(
-                self.working_copy,
-                mime_type,
-                self.filename,
-            )
+            if isinstance(document_parser, TextDocumentParser):
+                # TODO(stumpylog): Remove me in the future
+                thumbnail = document_parser.get_thumbnail(self.working_copy, mime_type)
+            else:
+                thumbnail = document_parser.get_thumbnail(
+                    self.working_copy,
+                    mime_type,
+                    self.filename,
+                )
 
             text = document_parser.get_text()
             date = document_parser.get_date()
@@ -490,7 +514,7 @@ class ConsumerPlugin(
             page_count = document_parser.get_page_count(self.working_copy, mime_type)
 
         except ParseError as e:
-            document_parser.cleanup()
+            _parser_cleanup(document_parser)
             if tempdir:
                 tempdir.cleanup()
             self._fail(
@@ -500,7 +524,7 @@ class ConsumerPlugin(
                 exception=e,
             )
         except Exception as e:
-            document_parser.cleanup()
+            _parser_cleanup(document_parser)
             if tempdir:
                 tempdir.cleanup()
             self._fail(
@@ -702,7 +726,7 @@ class ConsumerPlugin(
                 exception=e,
             )
         finally:
-            document_parser.cleanup()
+            _parser_cleanup(document_parser)
             tempdir.cleanup()
 
         self.run_post_consume_script(document)
diff --git a/src/documents/management/commands/document_thumbnails.py b/src/documents/management/commands/document_thumbnails.py
index 2d8609588..e4ae88766 100644
--- a/src/documents/management/commands/document_thumbnails.py
+++ b/src/documents/management/commands/document_thumbnails.py
@@ -30,6 +30,7 @@ def _process_document(doc_id: int) -> None:
         )
         shutil.move(thumb, document.thumbnail_path)
     finally:
+        # TODO(stumpylog): Cleanup once all parsers are handled
         parser.cleanup()
 
 
diff --git a/src/documents/tasks.py b/src/documents/tasks.py
index ff25adbc7..86b6b2716 100644
--- a/src/documents/tasks.py
+++ b/src/documents/tasks.py
@@ -399,6 +399,7 @@ def update_document_content_maybe_archive_file(document_id) -> None:
             f"Error while parsing document {document} (ID: {document_id})",
         )
     finally:
+        # TODO(stumpylog): Cleanup once all parsers are handled
         parser.cleanup()
 
 
diff --git a/src/documents/tests/test_parsers.py b/src/documents/tests/test_parsers.py
index 1447d5c30..5383975d1 100644
--- a/src/documents/tests/test_parsers.py
+++ b/src/documents/tests/test_parsers.py
@@ -9,8 +9,8 @@ from documents.parsers import get_default_file_extension
 from documents.parsers import get_parser_class_for_mime_type
 from documents.parsers import get_supported_file_extensions
 from documents.parsers import is_file_ext_supported
+from paperless.parsers.text import TextDocumentParser
 from paperless_tesseract.parsers import RasterisedDocumentParser
-from paperless_text.parsers import TextDocumentParser
 from paperless_tika.parsers import TikaDocumentParser
 
 
diff --git a/src/paperless/celery.py b/src/paperless/celery.py
index a9a853521..d937b3ada 100644
--- a/src/paperless/celery.py
+++ b/src/paperless/celery.py
@@ -1,6 +1,7 @@
 import os
 
 from celery import Celery
+from celery.signals import worker_process_init
 
 # Set the default Django settings module for the 'celery' program.
 os.environ.setdefault("DJANGO_SETTINGS_MODULE", "paperless.settings")
@@ -15,3 +16,19 @@ app.config_from_object("django.conf:settings", namespace="CELERY")
 
 # Load task modules from all registered Django apps.
 app.autodiscover_tasks()
+
+
+@worker_process_init.connect
+def on_worker_process_init(**kwargs) -> None:  # pragma: no cover
+    """
+    Register built-in parsers eagerly in each Celery worker process.
+
+    This registers only the built-in parsers (no entrypoint discovery) so
+    that workers can begin consuming documents immediately.  Entrypoint
+    discovery for third-party parsers is deferred to the first call of
+    get_parser_registry() inside a task, keeping worker_process_init
+    well within its 4-second timeout budget.
+    """
+    from paperless.parsers.registry import init_builtin_parsers
+
+    init_builtin_parsers()
diff --git a/src/paperless/parsers/__init__.py b/src/paperless/parsers/__init__.py
new file mode 100644
index 000000000..ea67ade00
--- /dev/null
+++ b/src/paperless/parsers/__init__.py
@@ -0,0 +1,379 @@
+"""
+Public interface for the Paperless-ngx parser plugin system.
+
+This module defines ParserProtocol — the structural contract that every
+document parser must satisfy, whether it is a built-in parser shipped with
+Paperless-ngx or a third-party parser installed via a Python entrypoint.
+
+Phase 1/2 scope: only the Protocol is defined here. The transitional
+DocumentParser ABC (Phase 3) and concrete built-in parsers (Phase 3+) will
+be added in later phases, so there are intentionally no imports of parser
+implementations here.
+
+Usage example (third-party parser)::
+
+    from paperless.parsers import ParserProtocol
+
+    class MyParser:
+        name = "my-parser"
+        version = "1.0.0"
+        author = "Acme Corp"
+        url = "https://example.com/my-parser"
+
+        @classmethod
+        def supported_mime_types(cls) -> dict[str, str]:
+            return {"application/x-my-format": ".myf"}
+
+        @classmethod
+        def score(cls, mime_type, filename, path=None):
+            return 10
+
+        # … implement remaining protocol methods …
+
+    assert isinstance(MyParser(), ParserProtocol)
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+from typing import Protocol
+from typing import Self
+from typing import TypedDict
+from typing import runtime_checkable
+
+if TYPE_CHECKING:
+    import datetime
+    from pathlib import Path
+    from types import TracebackType
+
+__all__ = [
+    "MetadataEntry",
+    "ParserProtocol",
+]
+
+
+class MetadataEntry(TypedDict):
+    """A single metadata field extracted from a document.
+
+    All four keys are required. Values are always serialised to strings —
+    type-specific conversion (dates, integers, lists) is the responsibility
+    of the parser before returning.
+    """
+
+    namespace: str
+    """URI of the metadata namespace (e.g. 'http://ns.adobe.com/pdf/1.3/')."""
+
+    prefix: str
+    """Conventional namespace prefix (e.g. 'pdf', 'xmp', 'dc')."""
+
+    key: str
+    """Field name within the namespace (e.g. 'Author', 'CreateDate')."""
+
+    value: str
+    """String representation of the field value."""
+
+
+@runtime_checkable
+class ParserProtocol(Protocol):
+    """Structural contract for all Paperless-ngx document parsers.
+
+    Both built-in parsers and third-party plugins (discovered via the
+    "paperless_ngx.parsers" entrypoint group) must satisfy this Protocol.
+    Because it is decorated with runtime_checkable, isinstance(obj,
+    ParserProtocol) works at runtime based on method presence, which is
+    useful for validation in ParserRegistry.discover.
+
+    Parsers must expose four string attributes at the class level so the
+    registry can log attribution information without instantiating the parser:
+
+    name : str
+        Human-readable parser name (e.g. "Tesseract OCR").
+    version : str
+        Semantic version string (e.g. "1.2.3").
+    author : str
+        Author or organisation name.
+    url : str
+        URL for documentation, source code, or issue tracker.
+    """
+
+    # ------------------------------------------------------------------
+    # Class-level identity (checked by the registry, not Protocol methods)
+    # ------------------------------------------------------------------
+
+    name: str
+    version: str
+    author: str
+    url: str
+
+    # ------------------------------------------------------------------
+    # Class methods
+    # ------------------------------------------------------------------
+
+    @classmethod
+    def supported_mime_types(cls) -> dict[str, str]:
+        """Return a mapping of supported MIME types to preferred file extensions.
+
+        The keys are MIME type strings (e.g. "application/pdf"), and the
+        values are the preferred file extension including the leading dot
+        (e.g. ".pdf").  The registry uses this mapping both to decide whether
+        a parser is a candidate for a given file and to determine the default
+        extension when creating archive copies.
+
+        Returns
+        -------
+        dict[str, str]
+            {mime_type: extension} mapping — may be empty if the parser
+            has been temporarily disabled.
+        """
+        ...
+
+    @classmethod
+    def score(
+        cls,
+        mime_type: str,
+        filename: str,
+        path: Path | None = None,
+    ) -> int | None:
+        """Return a priority score for handling this file, or None to decline.
+
+        The registry calls this after confirming that the MIME type is in
+        supported_mime_types. Parsers may inspect filename and optionally
+        the file at path to refine their confidence level.
+
+        A higher score wins. Return None to explicitly decline handling a file
+        even though the MIME type is listed as supported (e.g. when a feature
+        flag is disabled, or a required service is not configured).
+
+        Parameters
+        ----------
+        mime_type:
+            The detected MIME type of the file to be parsed.
+        filename:
+            The original filename, including extension.
+        path:
+            Optional filesystem path to the file. Parsers that need to
+            inspect file content (e.g. magic-byte sniffing) may use this.
+            May be None when scoring happens before the file is available locally.
+
+        Returns
+        -------
+        int | None
+            Priority score (higher wins), or None to decline.
+        """
+        ...
+
+    # ------------------------------------------------------------------
+    # Properties
+    # ------------------------------------------------------------------
+
+    @property
+    def can_produce_archive(self) -> bool:
+        """Whether this parser can produce a searchable PDF archive copy.
+
+        If True, the consumption pipeline may request an archive version when
+        processing the document, subject to the ARCHIVE_FILE_GENERATION
+        setting. If False, only thumbnail and text extraction are performed.
+        """
+        ...
+
+    @property
+    def requires_pdf_rendition(self) -> bool:
+        """Whether the parser must produce a PDF for the frontend to display.
+
+        True for formats the browser cannot display natively (e.g. DOCX, ODT).
+        When True, the pipeline always stores the PDF output regardless of the
+        ARCHIVE_FILE_GENERATION setting, since the original format cannot be
+        shown to the user.
+        """
+        ...
+
+    # ------------------------------------------------------------------
+    # Core parsing interface
+    # ------------------------------------------------------------------
+
+    def parse(
+        self,
+        document_path: Path,
+        mime_type: str,
+        *,
+        produce_archive: bool = True,
+    ) -> None:
+        """Parse document_path and populate internal state.
+
+        After a successful call, callers retrieve results via get_text,
+        get_date, and get_archive_path.
+
+        Parameters
+        ----------
+        document_path:
+            Absolute path to the document file to parse.
+        mime_type:
+            Detected MIME type of the document.
+        produce_archive:
+            When True (the default) and can_produce_archive is also True,
+            the parser should produce a searchable PDF at the path returned
+            by get_archive_path. Pass False when only text extraction and
+            thumbnail generation are required and disk I/O should be minimised.
+
+        Raises
+        ------
+        documents.parsers.ParseError
+            If parsing fails for any reason.
+        """
+        ...
+
+    # ------------------------------------------------------------------
+    # Result accessors
+    # ------------------------------------------------------------------
+
+    def get_text(self) -> str | None:
+        """Return the plain-text content extracted during parse.
+
+        Returns
+        -------
+        str | None
+            Extracted text, or None if no text could be found.
+        """
+        ...
+
+    def get_date(self) -> datetime.datetime | None:
+        """Return the document date detected during parse.
+
+        Returns
+        -------
+        datetime.datetime | None
+            Detected document date, or None if no date was found.
+        """
+        ...
+
+    def get_archive_path(self) -> Path | None:
+        """Return the path to the generated archive PDF, or None.
+
+        Returns
+        -------
+        Path | None
+            Path to the searchable PDF archive, or None if no archive was
+            produced (e.g. because produce_archive=False or the parser does
+            not support archive generation).
+        """
+        ...
+
+    # ------------------------------------------------------------------
+    # Thumbnail and metadata
+    # ------------------------------------------------------------------
+
+    def get_thumbnail(self, document_path: Path, mime_type: str) -> Path:
+        """Generate and return the path to a thumbnail image for the document.
+
+        May be called independently of parse. The returned path must point to
+        an existing WebP image file inside the parser's temporary working
+        directory.
+
+        Parameters
+        ----------
+        document_path:
+            Absolute path to the source document.
+        mime_type:
+            Detected MIME type of the document.
+
+        Returns
+        -------
+        Path
+            Path to the generated thumbnail image (WebP format preferred).
+        """
+        ...
+
+    def get_page_count(
+        self,
+        document_path: Path,
+        mime_type: str,
+    ) -> int | None:
+        """Return the number of pages in the document, if determinable.
+
+        Parameters
+        ----------
+        document_path:
+            Absolute path to the source document.
+        mime_type:
+            Detected MIME type of the document.
+
+        Returns
+        -------
+        int | None
+            Page count, or None if the parser cannot determine it.
+        """
+        ...
+
+    def extract_metadata(
+        self,
+        document_path: Path,
+        mime_type: str,
+    ) -> list[MetadataEntry]:
+        """Extract format-specific metadata from the document.
+
+        Called by the API view layer on demand — not during the consumption
+        pipeline. Results are returned to the frontend for per-file display.
+
+        For documents with an archive version, this method is called twice:
+        once for the original file (with its native MIME type) and once for
+        the archive file (with ``"application/pdf"``). Parsers that produce
+        archives should handle both cases.
+
+        Implementations must not raise. A failure to read metadata is not
+        fatal — log a warning and return whatever partial results were
+        collected, or ``[]`` if none.
+
+        Parameters
+        ----------
+        document_path:
+            Absolute path to the file to extract metadata from.
+        mime_type:
+            MIME type of the file at ``document_path``. May be
+            ``"application/pdf"`` when called for the archive version.
+
+        Returns
+        -------
+        list[MetadataEntry]
+            Zero or more metadata entries. Returns ``[]`` if no metadata
+            could be extracted or the format does not support it.
+        """
+        ...
+
+    # ------------------------------------------------------------------
+    # Context manager
+    # ------------------------------------------------------------------
+
+    def __enter__(self) -> Self:
+        """Enter the parser context, returning the parser instance.
+
+        Implementations should perform any resource allocation here if not
+        done in __init__ (e.g. creating API clients or temp directories).
+
+        Returns
+        -------
+        Self
+            The parser instance itself.
+        """
+        ...
+
+    def __exit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc_val: BaseException | None,
+        exc_tb: TracebackType | None,
+    ) -> None:
+        """Exit the parser context and release all resources.
+
+        Implementations must clean up all temporary files and other resources
+        regardless of whether an exception occurred.
+
+        Parameters
+        ----------
+        exc_type:
+            The exception class, or None if no exception was raised.
+        exc_val:
+            The exception instance, or None.
+        exc_tb:
+            The traceback, or None.
+        """
+        ...
diff --git a/src/paperless/parsers/registry.py b/src/paperless/parsers/registry.py
new file mode 100644
index 000000000..8c45db628
--- /dev/null
+++ b/src/paperless/parsers/registry.py
@@ -0,0 +1,364 @@
+"""
+Singleton registry that tracks all document parsers available to
+Paperless-ngx — both built-ins shipped with the application and third-party
+plugins installed via Python entrypoints.
+
+Public surface
+--------------
+get_parser_registry
+    Lazy-initialise and return the shared ParserRegistry. This is the primary
+    entry point for production code.
+
+init_builtin_parsers
+    Register built-in parsers only, without entrypoint discovery. Safe to
+    call from Celery worker_process_init where importing all entrypoints
+    would be wasteful or cause side effects.
+
+reset_parser_registry
+    Reset module-level state. For tests only.
+
+Entrypoint group
+----------------
+Third-party parsers must advertise themselves under the
+"paperless_ngx.parsers" entrypoint group in their pyproject.toml::
+
+    [project.entry-points."paperless_ngx.parsers"]
+    my_parser = "my_package.parsers:MyParser"
+
+The loaded class must expose the following attributes at the class level
+(not just on instances) for the registry to accept it:
+name, version, author, url, supported_mime_types (callable), score (callable).
+"""
+
+from __future__ import annotations
+
+import logging
+from importlib.metadata import entry_points
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from pathlib import Path
+
+    from paperless.parsers import ParserProtocol
+
+logger = logging.getLogger("paperless.parsers.registry")
+
+# ---------------------------------------------------------------------------
+# Module-level singleton state
+# ---------------------------------------------------------------------------
+
+_registry: ParserRegistry | None = None
+_discovery_complete: bool = False
+
+# Attribute names that every registered external parser class must expose.
+_REQUIRED_ATTRS: tuple[str, ...] = (
+    "name",
+    "version",
+    "author",
+    "url",
+    "supported_mime_types",
+    "score",
+)
+
+
+# ---------------------------------------------------------------------------
+# Module-level accessor functions
+# ---------------------------------------------------------------------------
+
+
+def get_parser_registry() -> ParserRegistry:
+    """Return the shared ParserRegistry instance.
+
+    On the first call this function:
+
+    1. Creates a new ParserRegistry.
+    2. Calls register_defaults to install built-in parsers.
+    3. Calls discover to load third-party plugins via importlib.metadata entrypoints.
+    4. Calls log_summary to emit a startup summary.
+
+    Subsequent calls return the same instance immediately.
+
+    Returns
+    -------
+    ParserRegistry
+        The shared registry singleton.
+    """
+    global _registry, _discovery_complete
+
+    if _registry is None:
+        _registry = ParserRegistry()
+        _registry.register_defaults()
+
+    if not _discovery_complete:
+        _registry.discover()
+        _registry.log_summary()
+        _discovery_complete = True
+
+    return _registry
+
+
+def init_builtin_parsers() -> None:
+    """Register built-in parsers without performing entrypoint discovery.
+
+    Intended for use in Celery worker_process_init handlers where importing
+    all installed entrypoints would be wasteful, slow, or could produce
+    undesirable side effects. Entrypoint discovery (third-party plugins) is
+    deliberately not performed.
+
+    Safe to call multiple times — subsequent calls are no-ops.
+
+    Returns
+    -------
+    None
+    """
+    global _registry
+
+    if _registry is None:
+        _registry = ParserRegistry()
+        _registry.register_defaults()
+
+
+def reset_parser_registry() -> None:
+    """Reset the module-level registry state to its initial values.
+
+    Resets _registry and _discovery_complete so the next call to
+    get_parser_registry will re-initialise everything from scratch.
+
+    FOR TESTS ONLY. Do not call this in production code — resetting the
+    registry mid-request causes all subsequent parser lookups to go through
+    discovery again, which is expensive and may have unexpected side effects
+    in multi-threaded environments.
+
+    Returns
+    -------
+    None
+    """
+    global _registry, _discovery_complete
+
+    _registry = None
+    _discovery_complete = False
+
+
+# ---------------------------------------------------------------------------
+# Registry class
+# ---------------------------------------------------------------------------
+
+
+class ParserRegistry:
+    """Registry that maps MIME types to the best available parser class.
+
+    Parsers are partitioned into two lists:
+
+    _builtins
+        Parser classes registered via register_builtin (populated by
+        register_defaults in Phase 3+).
+
+    _external
+        Parser classes loaded from installed Python entrypoints via discover.
+
+    When resolving a parser for a file, external parsers are evaluated
+    alongside built-in parsers using a uniform scoring mechanism. Both lists
+    are iterated together; the class with the highest score wins. If an
+    external parser wins, its attribution details are logged so users can
+    identify which third-party package handled their document.
+    """
+
+    def __init__(self) -> None:
+        self._external: list[type[ParserProtocol]] = []
+        self._builtins: list[type[ParserProtocol]] = []
+
+    # ------------------------------------------------------------------
+    # Registration
+    # ------------------------------------------------------------------
+
+    def register_builtin(self, parser_class: type[ParserProtocol]) -> None:
+        """Register a built-in parser class.
+
+        Built-in parsers are shipped with Paperless-ngx and are appended to
+        the _builtins list. They are never overridden by external parsers;
+        instead, scoring determines which parser wins for any given file.
+
+        Parameters
+        ----------
+        parser_class:
+            The parser class to register. Must satisfy ParserProtocol.
+        """
+        self._builtins.append(parser_class)
+
+    def register_defaults(self) -> None:
+        """Register the built-in parsers that ship with Paperless-ngx.
+
+        Each parser that has been migrated to the new ParserProtocol interface
+        is registered here.  Parsers are added in ascending weight order so
+        that log output is predictable; scoring determines which parser wins
+        at runtime regardless of registration order.
+        """
+        from paperless.parsers.text import TextDocumentParser
+
+        self.register_builtin(TextDocumentParser)
+
+    # ------------------------------------------------------------------
+    # Discovery
+    # ------------------------------------------------------------------
+
+    def discover(self) -> None:
+        """Load third-party parsers from the "paperless_ngx.parsers" entrypoint group.
+
+        For each advertised entrypoint the method:
+
+        1. Calls ep.load() to import the class.
+        2. Validates that the class exposes all required attributes.
+        3. On success, appends the class to _external and logs an info message.
+        4. On failure (import error or missing attributes), logs an appropriate
+           warning/error and continues to the next entrypoint.
+
+        Errors during discovery of a single parser do not prevent other parsers
+        from being loaded.
+
+        Returns
+        -------
+        None
+        """
+        eps = entry_points(group="paperless_ngx.parsers")
+
+        for ep in eps:
+            try:
+                parser_class = ep.load()
+            except Exception:
+                logger.exception(
+                    "Failed to load parser entrypoint '%s' — skipping.",
+                    ep.name,
+                )
+                continue
+
+            missing = [
+                attr for attr in _REQUIRED_ATTRS if not hasattr(parser_class, attr)
+            ]
+            if missing:
+                logger.warning(
+                    "Parser loaded from entrypoint '%s' is missing required "
+                    "attributes %r — skipping.",
+                    ep.name,
+                    missing,
+                )
+                continue
+
+            self._external.append(parser_class)
+            logger.info(
+                "Loaded third-party parser '%s' v%s by %s (entrypoint: '%s').",
+                parser_class.name,
+                parser_class.version,
+                parser_class.author,
+                ep.name,
+            )
+
+    # ------------------------------------------------------------------
+    # Summary logging
+    # ------------------------------------------------------------------
+
+    def log_summary(self) -> None:
+        """Log a startup summary of all registered parsers.
+
+        Built-in parsers are listed first, followed by any external parsers
+        discovered from entrypoints.  If no external parsers were found a
+        short informational message is logged instead of an empty list.
+
+        Returns
+        -------
+        None
+        """
+        logger.info(
+            "Built-in parsers (%d):",
+            len(self._builtins),
+        )
+        for cls in self._builtins:
+            logger.info(
+                "  [built-in] %s v%s — %s",
+                getattr(cls, "name", repr(cls)),
+                getattr(cls, "version", "unknown"),
+                getattr(cls, "url", "built-in"),
+            )
+
+        if not self._external:
+            logger.info("No third-party parsers discovered.")
+            return
+
+        logger.info(
+            "Third-party parsers (%d):",
+            len(self._external),
+        )
+        for cls in self._external:
+            logger.info(
+                "  [external] %s v%s by %s — report issues at %s",
+                getattr(cls, "name", repr(cls)),
+                getattr(cls, "version", "unknown"),
+                getattr(cls, "author", "unknown"),
+                getattr(cls, "url", "unknown"),
+            )
+
+    # ------------------------------------------------------------------
+    # Parser resolution
+    # ------------------------------------------------------------------
+
+    def get_parser_for_file(
+        self,
+        mime_type: str,
+        filename: str,
+        path: Path | None = None,
+    ) -> type[ParserProtocol] | None:
+        """Return the best parser class for the given file, or None.
+
+        All registered parsers (external first, then built-ins) are evaluated
+        against the file. A parser is eligible if mime_type appears in the dict
+        returned by its supported_mime_types classmethod, and its score
+        classmethod returns a non-None integer.
+
+        The parser with the highest score wins. When two parsers return the
+        same score, the one that appears earlier in the evaluation order wins
+        (external parsers are evaluated before built-ins, giving third-party
+        packages a chance to override defaults at equal priority).
+
+        When an external parser is selected, its identity is logged at INFO
+        level so operators can trace which package handled a document.
+
+        Parameters
+        ----------
+        mime_type:
+            The detected MIME type of the file.
+        filename:
+            The original filename, including extension.
+        path:
+            Optional filesystem path to the file. Forwarded to each
+            parser's score method.
+
+        Returns
+        -------
+        type[ParserProtocol] | None
+            The winning parser class, or None if no parser can handle the file.
+        """
+        best_score: int | None = None
+        best_parser: type[ParserProtocol] | None = None
+
+        # External parsers are placed first so that, at equal scores, an
+        # external parser wins over a built-in (first-seen policy).
+        for parser_class in (*self._external, *self._builtins):
+            if mime_type not in parser_class.supported_mime_types():
+                continue
+
+            score = parser_class.score(mime_type, filename, path)
+            if score is None:
+                continue
+
+            if best_score is None or score > best_score:
+                best_score = score
+                best_parser = parser_class
+
+        if best_parser is not None and best_parser in self._external:
+            logger.info(
+                "Document handled by third-party parser '%s' v%s — %s",
+                getattr(best_parser, "name", repr(best_parser)),
+                getattr(best_parser, "version", "unknown"),
+                getattr(best_parser, "url", "unknown"),
+            )
+
+        return best_parser
diff --git a/src/paperless/parsers/text.py b/src/paperless/parsers/text.py
new file mode 100644
index 000000000..99d9dab08
--- /dev/null
+++ b/src/paperless/parsers/text.py
@@ -0,0 +1,320 @@
+"""
+Built-in plain-text document parser.
+
+Handles text/plain, text/csv, and application/csv MIME types by reading the
+file content directly.  Thumbnails are generated by rendering a page-sized
+WebP image from the first 100,000 characters using Pillow.
+"""
+
+from __future__ import annotations
+
+import logging
+import shutil
+import tempfile
+from pathlib import Path
+from typing import TYPE_CHECKING
+from typing import Self
+
+from django.conf import settings
+from PIL import Image
+from PIL import ImageDraw
+from PIL import ImageFont
+
+from paperless.version import __full_version_str__
+
+if TYPE_CHECKING:
+    import datetime
+    from types import TracebackType
+
+    from paperless.parsers import MetadataEntry
+
+logger = logging.getLogger("paperless.parsing.text")
+
+_SUPPORTED_MIME_TYPES: dict[str, str] = {
+    "text/plain": ".txt",
+    "text/csv": ".csv",
+    "application/csv": ".csv",
+}
+
+
+class TextDocumentParser:
+    """Parse plain-text documents (txt, csv) for Paperless-ngx.
+
+    This parser reads the file content directly as UTF-8 text and renders a
+    simple thumbnail using Pillow.  It does not perform OCR and does not
+    produce a searchable PDF archive copy.
+
+    Class attributes
+    ----------------
+    name : str
+        Human-readable parser name.
+    version : str
+        Semantic version string, kept in sync with Paperless-ngx releases.
+    author : str
+        Maintainer name.
+    url : str
+        Issue tracker / source URL.
+    """
+
+    name: str = "Paperless-ngx Text Parser"
+    version: str = __full_version_str__
+    author: str = "Paperless-ngx Contributors"
+    url: str = "https://github.com/paperless-ngx/paperless-ngx"
+
+    # ------------------------------------------------------------------
+    # Class methods
+    # ------------------------------------------------------------------
+
+    @classmethod
+    def supported_mime_types(cls) -> dict[str, str]:
+        """Return the MIME types this parser handles.
+
+        Returns
+        -------
+        dict[str, str]
+            Mapping of MIME type to preferred file extension.
+        """
+        return _SUPPORTED_MIME_TYPES
+
+    @classmethod
+    def score(
+        cls,
+        mime_type: str,
+        filename: str,
+        path: Path | None = None,
+    ) -> int | None:
+        """Return the priority score for handling this file.
+
+        Parameters
+        ----------
+        mime_type:
+            Detected MIME type of the file.
+        filename:
+            Original filename including extension.
+        path:
+            Optional filesystem path. Not inspected by this parser.
+
+        Returns
+        -------
+        int | None
+            10 if the MIME type is supported, otherwise None.
+        """
+        if mime_type in _SUPPORTED_MIME_TYPES:
+            return 10
+        return None
+
+    # ------------------------------------------------------------------
+    # Properties
+    # ------------------------------------------------------------------
+
+    @property
+    def can_produce_archive(self) -> bool:
+        """Whether this parser can produce a searchable PDF archive copy.
+
+        Returns
+        -------
+        bool
+            Always False — the text parser does not produce a PDF archive.
+        """
+        return False
+
+    @property
+    def requires_pdf_rendition(self) -> bool:
+        """Whether the parser must produce a PDF for the frontend to display.
+
+        Returns
+        -------
+        bool
+            Always False — plain text files are displayable as-is.
+        """
+        return False
+
+    # ------------------------------------------------------------------
+    # Lifecycle
+    # ------------------------------------------------------------------
+
+    def __init__(self, logging_group: object = None) -> None:
+        settings.SCRATCH_DIR.mkdir(parents=True, exist_ok=True)
+        self._tempdir = Path(
+            tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR),
+        )
+        self._text: str | None = None
+
+    def __enter__(self) -> Self:
+        return self
+
+    def __exit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc_val: BaseException | None,
+        exc_tb: TracebackType | None,
+    ) -> None:
+        logger.debug("Cleaning up temporary directory %s", self._tempdir)
+        shutil.rmtree(self._tempdir, ignore_errors=True)
+
+    # ------------------------------------------------------------------
+    # Core parsing interface
+    # ------------------------------------------------------------------
+
+    def parse(
+        self,
+        document_path: Path,
+        mime_type: str,
+        *,
+        produce_archive: bool = True,
+    ) -> None:
+        """Read the document and store its text content.
+
+        Parameters
+        ----------
+        document_path:
+            Absolute path to the text file.
+        mime_type:
+            Detected MIME type of the document.
+        produce_archive:
+            Ignored — this parser never produces a PDF archive.
+
+        Raises
+        ------
+        documents.parsers.ParseError
+            If the file cannot be read.
+        """
+        self._text = self._read_text(document_path)
+
+    # ------------------------------------------------------------------
+    # Result accessors
+    # ------------------------------------------------------------------
+
+    def get_text(self) -> str | None:
+        """Return the plain-text content extracted during parse.
+
+        Returns
+        -------
+        str | None
+            Extracted text, or None if parse has not been called yet.
+        """
+        return self._text
+
+    def get_date(self) -> datetime.datetime | None:
+        """Return the document date detected during parse.
+
+        Returns
+        -------
+        datetime.datetime | None
+            Always None — the text parser does not detect dates.
+        """
+        return None
+
+    def get_archive_path(self) -> Path | None:
+        """Return the path to a generated archive PDF, or None.
+
+        Returns
+        -------
+        Path | None
+            Always None — the text parser does not produce a PDF archive.
+        """
+        return None
+
+    # ------------------------------------------------------------------
+    # Thumbnail and metadata
+    # ------------------------------------------------------------------
+
+    def get_thumbnail(self, document_path: Path, mime_type: str) -> Path:
+        """Render the first portion of the document as a WebP thumbnail.
+
+        Parameters
+        ----------
+        document_path:
+            Absolute path to the source document.
+        mime_type:
+            Detected MIME type of the document.
+
+        Returns
+        -------
+        Path
+            Path to the generated WebP thumbnail inside the temporary directory.
+        """
+        max_chars = 100_000
+        file_size_limit = 50 * 1024 * 1024
+
+        if document_path.stat().st_size > file_size_limit:
+            text = "[File too large to preview]"
+        else:
+            with Path(document_path).open("r", encoding="utf-8", errors="replace") as f:
+                text = f.read(max_chars)
+
+        img = Image.new("RGB", (500, 700), color="white")
+        draw = ImageDraw.Draw(img)
+        font = ImageFont.truetype(
+            font=settings.THUMBNAIL_FONT_NAME,
+            size=20,
+            layout_engine=ImageFont.Layout.BASIC,
+        )
+        draw.multiline_text((5, 5), text, font=font, fill="black", spacing=4)
+
+        out_path = self._tempdir / "thumb.webp"
+        img.save(out_path, format="WEBP")
+
+        return out_path
+
+    def get_page_count(
+        self,
+        document_path: Path,
+        mime_type: str,
+    ) -> int | None:
+        """Return the number of pages in the document.
+
+        Parameters
+        ----------
+        document_path:
+            Absolute path to the source document.
+        mime_type:
+            Detected MIME type of the document.
+
+        Returns
+        -------
+        int | None
+            Always None — page count is not meaningful for plain text.
+        """
+        return None
+
+    def extract_metadata(
+        self,
+        document_path: Path,
+        mime_type: str,
+    ) -> list[MetadataEntry]:
+        """Extract format-specific metadata from the document.
+
+        Returns
+        -------
+        list[MetadataEntry]
+            Always ``[]`` — plain text files carry no structured metadata.
+        """
+        return []
+
+    # ------------------------------------------------------------------
+    # Private helpers
+    # ------------------------------------------------------------------
+
+    def _read_text(self, filepath: Path) -> str:
+        """Read file content, replacing invalid UTF-8 bytes rather than failing.
+
+        Parameters
+        ----------
+        filepath:
+            Path to the file to read.
+
+        Returns
+        -------
+        str
+            File content as a string.
+        """
+        try:
+            return filepath.read_text(encoding="utf-8")
+        except UnicodeDecodeError as exc:
+            logger.warning(
+                "Unicode error reading %s, replacing bad bytes: %s",
+                filepath,
+                exc,
+            )
+            return filepath.read_bytes().decode("utf-8", errors="replace")
diff --git a/src/paperless/tests/conftest.py b/src/paperless/tests/conftest.py
new file mode 100644
index 000000000..b016191c4
--- /dev/null
+++ b/src/paperless/tests/conftest.py
@@ -0,0 +1,48 @@
+"""
+Fixtures defined here are available to every test module under
+src/paperless/tests/ (including sub-packages such as parsers/).
+
+Session-scoped fixtures for the shared samples directory live here so
+sub-package conftest files can reference them without duplicating path logic.
+Parser-specific fixtures (concrete parser instances, format-specific sample
+files) live in paperless/tests/parsers/conftest.py.
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import TYPE_CHECKING
+
+import pytest
+
+from paperless.parsers.registry import reset_parser_registry
+
+if TYPE_CHECKING:
+    from collections.abc import Generator
+
+
+@pytest.fixture(scope="session")
+def samples_dir() -> Path:
+    """Absolute path to the shared parser sample files directory.
+
+    Sub-package conftest files derive format-specific paths from this root,
+    e.g. ``samples_dir / "text" / "test.txt"``.
+
+    Returns
+    -------
+    Path
+        Directory containing all sample documents used by parser tests.
+    """
+    return (Path(__file__).parent / "samples").resolve()
+
+
+@pytest.fixture(autouse=True)
+def clean_registry() -> Generator[None, None, None]:
+    """Reset the parser registry before and after every test.
+
+    This prevents registry state from leaking between tests that call
+    get_parser_registry() or init_builtin_parsers().
+    """
+    reset_parser_registry()
+    yield
+    reset_parser_registry()
diff --git a/src/paperless/tests/parsers/__init__.py b/src/paperless/tests/parsers/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/src/paperless/tests/parsers/conftest.py b/src/paperless/tests/parsers/conftest.py
new file mode 100644
index 000000000..2d5deb684
--- /dev/null
+++ b/src/paperless/tests/parsers/conftest.py
@@ -0,0 +1,76 @@
+"""
+Parser fixtures that are used across multiple test modules in this package
+are defined here.  Format-specific sample-file fixtures are grouped by parser
+so it is easy to see which files belong to which test module.
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import pytest
+
+from paperless.parsers.text import TextDocumentParser
+
+if TYPE_CHECKING:
+    from collections.abc import Generator
+    from pathlib import Path
+
+
+# ------------------------------------------------------------------
+# Text parser sample files
+# ------------------------------------------------------------------
+
+
+@pytest.fixture(scope="session")
+def text_samples_dir(samples_dir: Path) -> Path:
+    """Absolute path to the text parser sample files directory.
+
+    Returns
+    -------
+    Path
+        ``<samples_dir>/text/``
+    """
+    return samples_dir / "text"
+
+
+@pytest.fixture(scope="session")
+def sample_txt_file(text_samples_dir: Path) -> Path:
+    """Path to a valid UTF-8 plain-text sample file.
+
+    Returns
+    -------
+    Path
+        Absolute path to ``text/test.txt``.
+    """
+    return text_samples_dir / "test.txt"
+
+
+@pytest.fixture(scope="session")
+def malformed_txt_file(text_samples_dir: Path) -> Path:
+    """Path to a text file containing invalid UTF-8 bytes.
+
+    Returns
+    -------
+    Path
+        Absolute path to ``text/decode_error.txt``.
+    """
+    return text_samples_dir / "decode_error.txt"
+
+
+# ------------------------------------------------------------------
+# Text parser instance
+# ------------------------------------------------------------------
+
+
+@pytest.fixture()
+def text_parser() -> Generator[TextDocumentParser, None, None]:
+    """Yield a TextDocumentParser and clean up its temporary directory afterwards.
+
+    Yields
+    ------
+    TextDocumentParser
+        A ready-to-use parser instance.
+    """
+    with TextDocumentParser() as parser:
+        yield parser
diff --git a/src/paperless/tests/parsers/test_text_parser.py b/src/paperless/tests/parsers/test_text_parser.py
new file mode 100644
index 000000000..d2f095f5c
--- /dev/null
+++ b/src/paperless/tests/parsers/test_text_parser.py
@@ -0,0 +1,256 @@
+"""
+Tests for paperless.parsers.text.TextDocumentParser.
+
+All tests use the context-manager protocol for parser lifecycle.  Sample
+files are provided by session-scoped fixtures defined in conftest.py.
+"""
+
+from __future__ import annotations
+
+import tempfile
+from pathlib import Path
+
+import pytest
+
+from paperless.parsers import ParserProtocol
+from paperless.parsers.text import TextDocumentParser
+
+
+class TestTextParserProtocol:
+    """Verify that TextDocumentParser satisfies the ParserProtocol contract."""
+
+    def test_isinstance_satisfies_protocol(
+        self,
+        text_parser: TextDocumentParser,
+    ) -> None:
+        assert isinstance(text_parser, ParserProtocol)
+
+    def test_class_attributes_present(self) -> None:
+        assert isinstance(TextDocumentParser.name, str) and TextDocumentParser.name
+        assert (
+            isinstance(TextDocumentParser.version, str) and TextDocumentParser.version
+        )
+        assert isinstance(TextDocumentParser.author, str) and TextDocumentParser.author
+        assert isinstance(TextDocumentParser.url, str) and TextDocumentParser.url
+
+    def test_supported_mime_types_returns_dict(self) -> None:
+        mime_types = TextDocumentParser.supported_mime_types()
+        assert isinstance(mime_types, dict)
+        assert "text/plain" in mime_types
+        assert "text/csv" in mime_types
+        assert "application/csv" in mime_types
+
+    @pytest.mark.parametrize(
+        ("mime_type", "expected"),
+        [
+            ("text/plain", 10),
+            ("text/csv", 10),
+            ("application/csv", 10),
+            ("application/pdf", None),
+            ("image/png", None),
+        ],
+    )
+    def test_score(self, mime_type: str, expected: int | None) -> None:
+        assert TextDocumentParser.score(mime_type, "file.txt") == expected
+
+    def test_can_produce_archive_is_false(
+        self,
+        text_parser: TextDocumentParser,
+    ) -> None:
+        assert text_parser.can_produce_archive is False
+
+    def test_requires_pdf_rendition_is_false(
+        self,
+        text_parser: TextDocumentParser,
+    ) -> None:
+        assert text_parser.requires_pdf_rendition is False
+
+
+class TestTextParserLifecycle:
+    """Verify context-manager behaviour and temporary directory cleanup."""
+
+    def test_context_manager_cleans_up_tempdir(self) -> None:
+        with TextDocumentParser() as parser:
+            tempdir = parser._tempdir
+            assert tempdir.exists()
+        assert not tempdir.exists()
+
+    def test_context_manager_cleans_up_after_exception(self) -> None:
+        tempdir: Path | None = None
+        with pytest.raises(RuntimeError):
+            with TextDocumentParser() as parser:
+                tempdir = parser._tempdir
+                raise RuntimeError("boom")
+        assert tempdir is not None
+        assert not tempdir.exists()
+
+
+class TestTextParserParse:
+    """Verify parse() and the result accessors."""
+
+    def test_parse_valid_utf8(
+        self,
+        text_parser: TextDocumentParser,
+        sample_txt_file: Path,
+    ) -> None:
+        text_parser.parse(sample_txt_file, "text/plain")
+
+        assert text_parser.get_text() == "This is a test file.\n"
+
+    def test_parse_returns_none_for_archive_path(
+        self,
+        text_parser: TextDocumentParser,
+        sample_txt_file: Path,
+    ) -> None:
+        text_parser.parse(sample_txt_file, "text/plain")
+
+        assert text_parser.get_archive_path() is None
+
+    def test_parse_returns_none_for_date(
+        self,
+        text_parser: TextDocumentParser,
+        sample_txt_file: Path,
+    ) -> None:
+        text_parser.parse(sample_txt_file, "text/plain")
+
+        assert text_parser.get_date() is None
+
+    def test_parse_invalid_utf8_bytes_replaced(
+        self,
+        text_parser: TextDocumentParser,
+        malformed_txt_file: Path,
+    ) -> None:
+        """
+        GIVEN:
+            - A text file containing invalid UTF-8 byte sequences
+        WHEN:
+            - The file is parsed
+        THEN:
+            - Parsing succeeds
+            - Invalid bytes are replaced with the Unicode replacement character
+        """
+        text_parser.parse(malformed_txt_file, "text/plain")
+
+        assert text_parser.get_text() == "Pantothens\ufffdure\n"
+
+    def test_get_text_none_before_parse(
+        self,
+        text_parser: TextDocumentParser,
+    ) -> None:
+        assert text_parser.get_text() is None
+
+
+class TestTextParserThumbnail:
+    """Verify thumbnail generation."""
+
+    def test_thumbnail_exists_and_is_file(
+        self,
+        text_parser: TextDocumentParser,
+        sample_txt_file: Path,
+    ) -> None:
+        thumb = text_parser.get_thumbnail(sample_txt_file, "text/plain")
+
+        assert thumb.exists()
+        assert thumb.is_file()
+
+    def test_thumbnail_large_file_does_not_read_all(
+        self,
+        text_parser: TextDocumentParser,
+    ) -> None:
+        """
+        GIVEN:
+            - A text file larger than 50 MB
+        WHEN:
+            - A thumbnail is requested
+        THEN:
+            - The thumbnail is generated without loading the full file
+        """
+        with tempfile.NamedTemporaryFile(
+            delete=False,
+            mode="w",
+            encoding="utf-8",
+            suffix=".txt",
+        ) as tmp:
+            tmp.write("A" * (51 * 1024 * 1024))
+            large_file = Path(tmp.name)
+
+        try:
+            thumb = text_parser.get_thumbnail(large_file, "text/plain")
+            assert thumb.exists()
+            assert thumb.is_file()
+        finally:
+            large_file.unlink(missing_ok=True)
+
+    def test_get_page_count_returns_none(
+        self,
+        text_parser: TextDocumentParser,
+        sample_txt_file: Path,
+    ) -> None:
+        assert text_parser.get_page_count(sample_txt_file, "text/plain") is None
+
+
+class TestTextParserMetadata:
+    """Verify extract_metadata behaviour."""
+
+    def test_extract_metadata_returns_empty_list(
+        self,
+        text_parser: TextDocumentParser,
+        sample_txt_file: Path,
+    ) -> None:
+        result = text_parser.extract_metadata(sample_txt_file, "text/plain")
+
+        assert result == []
+
+    def test_extract_metadata_returns_list_type(
+        self,
+        text_parser: TextDocumentParser,
+        sample_txt_file: Path,
+    ) -> None:
+        result = text_parser.extract_metadata(sample_txt_file, "text/plain")
+
+        assert isinstance(result, list)
+
+    def test_extract_metadata_ignores_mime_type(
+        self,
+        text_parser: TextDocumentParser,
+        sample_txt_file: Path,
+    ) -> None:
+        """extract_metadata returns [] regardless of the mime_type argument."""
+        assert text_parser.extract_metadata(sample_txt_file, "application/pdf") == []
+        assert text_parser.extract_metadata(sample_txt_file, "text/csv") == []
+
+
+class TestTextParserRegistry:
+    """Verify that TextDocumentParser is registered by default."""
+
+    def test_registered_in_defaults(self) -> None:
+        from paperless.parsers.registry import ParserRegistry
+
+        registry = ParserRegistry()
+        registry.register_defaults()
+
+        assert TextDocumentParser in registry._builtins
+
+    def test_get_parser_for_text_plain(self) -> None:
+        from paperless.parsers.registry import get_parser_registry
+
+        registry = get_parser_registry()
+        parser_cls = registry.get_parser_for_file("text/plain", "doc.txt")
+
+        assert parser_cls is TextDocumentParser
+
+    def test_get_parser_for_text_csv(self) -> None:
+        from paperless.parsers.registry import get_parser_registry
+
+        registry = get_parser_registry()
+        parser_cls = registry.get_parser_for_file("text/csv", "data.csv")
+
+        assert parser_cls is TextDocumentParser
+
+    def test_get_parser_for_unknown_type_returns_none(self) -> None:
+        from paperless.parsers.registry import get_parser_registry
+
+        registry = get_parser_registry()
+        parser_cls = registry.get_parser_for_file("application/pdf", "doc.pdf")
+
+        assert parser_cls is None
diff --git a/src/paperless_text/tests/samples/decode_error.txt b/src/paperless/tests/samples/text/decode_error.txt
similarity index 100%
rename from src/paperless_text/tests/samples/decode_error.txt
rename to src/paperless/tests/samples/text/decode_error.txt
diff --git a/src/paperless_text/tests/samples/test.txt b/src/paperless/tests/samples/text/test.txt
similarity index 100%
rename from src/paperless_text/tests/samples/test.txt
rename to src/paperless/tests/samples/text/test.txt
diff --git a/src/paperless/tests/test_registry.py b/src/paperless/tests/test_registry.py
new file mode 100644
index 000000000..80c686bc4
--- /dev/null
+++ b/src/paperless/tests/test_registry.py
@@ -0,0 +1,714 @@
+"""
+Tests for :mod:`paperless.parsers` (ParserProtocol) and
+:mod:`paperless.parsers.registry` (ParserRegistry + module-level helpers).
+
+All tests use pytest-style functions/classes — no unittest.TestCase.
+The ``clean_registry`` fixture ensures complete isolation between tests by
+resetting the module-level singleton before and after every test.
+"""
+
+from __future__ import annotations
+
+import logging
+from importlib.metadata import EntryPoint
+from pathlib import Path
+from typing import Self
+from unittest.mock import MagicMock
+from unittest.mock import patch
+
+import pytest
+
+from paperless.parsers import ParserProtocol
+from paperless.parsers.registry import ParserRegistry
+from paperless.parsers.registry import get_parser_registry
+from paperless.parsers.registry import init_builtin_parsers
+from paperless.parsers.registry import reset_parser_registry
+
+
+@pytest.fixture()
+def dummy_parser_cls() -> type:
+    """Return a class that fully satisfies :class:`ParserProtocol`.
+
+    GIVEN: A need to exercise registry and Protocol logic with a minimal
+           but complete parser.
+    WHEN:  A test requests this fixture.
+    THEN:  A class with all required attributes and methods is returned.
+    """
+
+    class DummyParser:
+        name = "dummy-parser"
+        version = "0.1.0"
+        author = "Test Author"
+        url = "https://example.com/dummy-parser"
+
+        @classmethod
+        def supported_mime_types(cls) -> dict[str, str]:
+            return {"text/plain": ".txt"}
+
+        @classmethod
+        def score(
+            cls,
+            mime_type: str,
+            filename: str,
+            path: Path | None = None,
+        ) -> int | None:
+            return 10
+
+        @property
+        def can_produce_archive(self) -> bool:
+            return False
+
+        @property
+        def requires_pdf_rendition(self) -> bool:
+            return False
+
+        def parse(
+            self,
+            document_path: Path,
+            mime_type: str,
+            *,
+            produce_archive: bool = True,
+        ) -> None:
+            """
+            Required to exist, but doesn't need to do anything
+            """
+
+        def get_text(self) -> str | None:
+            return None
+
+        def get_date(self) -> None:
+            return None
+
+        def get_archive_path(self) -> Path | None:
+            return None
+
+        def get_thumbnail(
+            self,
+            document_path: Path,
+            mime_type: str,
+        ) -> Path:
+            return Path("/tmp/thumbnail.webp")
+
+        def get_page_count(
+            self,
+            document_path: Path,
+            mime_type: str,
+        ) -> int | None:
+            return None
+
+        def extract_metadata(
+            self,
+            document_path: Path,
+            mime_type: str,
+        ) -> list:
+            return []
+
+        def __enter__(self) -> Self:
+            return self
+
+        def __exit__(self, exc_type, exc_val, exc_tb) -> None:
+            """
+            Required to exist, but doesn't need to do anything
+            """
+
+    return DummyParser
+
+
+class TestParserProtocol:
+    """Verify runtime isinstance() checks against ParserProtocol."""
+
+    def test_compliant_class_instance_passes_isinstance(
+        self,
+        dummy_parser_cls: type,
+    ) -> None:
+        """
+        GIVEN: A class that implements every method required by ParserProtocol.
+        WHEN:  isinstance() is called with the Protocol.
+        THEN:  The check passes (returns True).
+        """
+        instance = dummy_parser_cls()
+        assert isinstance(instance, ParserProtocol)
+
+    def test_non_compliant_class_instance_fails_isinstance(self) -> None:
+        """
+        GIVEN: A plain class with no parser-related methods.
+        WHEN:  isinstance() is called with ParserProtocol.
+        THEN:  The check fails (returns False).
+        """
+
+        class Unrelated:
+            pass
+
+        assert not isinstance(Unrelated(), ParserProtocol)
+
+    @pytest.mark.parametrize(
+        "missing_method",
+        [
+            pytest.param("parse", id="missing-parse"),
+            pytest.param("get_text", id="missing-get_text"),
+            pytest.param("get_thumbnail", id="missing-get_thumbnail"),
+            pytest.param("__enter__", id="missing-__enter__"),
+            pytest.param("__exit__", id="missing-__exit__"),
+        ],
+    )
+    def test_partial_compliant_fails_isinstance(
+        self,
+        dummy_parser_cls: type,
+        missing_method: str,
+    ) -> None:
+        """
+        GIVEN: A class that satisfies ParserProtocol except for one method.
+        WHEN:  isinstance() is called with ParserProtocol.
+        THEN:  The check fails because the Protocol is not fully satisfied.
+        """
+        # Create a subclass and delete the specified method to break compliance.
+        partial_cls = type(
+            "PartialParser",
+            (dummy_parser_cls,),
+            {missing_method: None},  # Replace with None — not callable
+        )
+        assert not isinstance(partial_cls(), ParserProtocol)
+
+
+class TestRegistrySingleton:
+    """Verify the module-level singleton lifecycle functions."""
+
+    def test_get_parser_registry_returns_instance(self) -> None:
+        """
+        GIVEN: No registry has been created yet.
+        WHEN:  get_parser_registry() is called.
+        THEN:  A ParserRegistry instance is returned.
+        """
+        registry = get_parser_registry()
+        assert isinstance(registry, ParserRegistry)
+
+    def test_get_parser_registry_same_instance_on_repeated_calls(self) -> None:
+        """
+        GIVEN: A registry instance was created by a prior call.
+        WHEN:  get_parser_registry() is called a second time.
+        THEN:  The exact same object (identity) is returned.
+        """
+        first = get_parser_registry()
+        second = get_parser_registry()
+        assert first is second
+
+    def test_reset_parser_registry_gives_fresh_instance(self) -> None:
+        """
+        GIVEN: A registry instance already exists.
+        WHEN:  reset_parser_registry() is called and then get_parser_registry()
+               is called again.
+        THEN:  A new, distinct registry instance is returned.
+        """
+        first = get_parser_registry()
+        reset_parser_registry()
+        second = get_parser_registry()
+        assert first is not second
+
+    def test_init_builtin_parsers_does_not_run_discover(
+        self,
+        monkeypatch: pytest.MonkeyPatch,
+    ) -> None:
+        """
+        GIVEN: discover() would raise an exception if called.
+        WHEN:  init_builtin_parsers() is called.
+        THEN:  No exception is raised, confirming discover() was not invoked.
+        """
+
+        def exploding_discover(self) -> None:
+            raise RuntimeError(
+                "discover() must not be called from init_builtin_parsers",
+            )
+
+        monkeypatch.setattr(ParserRegistry, "discover", exploding_discover)
+
+        # Should complete without raising.
+        init_builtin_parsers()
+
+    def test_init_builtin_parsers_idempotent(self) -> None:
+        """
+        GIVEN: init_builtin_parsers() has already been called once.
+        WHEN:  init_builtin_parsers() is called a second time.
+        THEN:  No error is raised and the same registry instance is reused.
+        """
+        init_builtin_parsers()
+        # Capture the registry created by the first call.
+        import paperless.parsers.registry as reg_module
+
+        first_registry = reg_module._registry
+
+        init_builtin_parsers()
+
+        assert reg_module._registry is first_registry
+
+
+class TestParserRegistryGetParserForFile:
+    """Verify parser selection logic in get_parser_for_file()."""
+
+    def test_returns_none_when_no_parsers_registered(self) -> None:
+        """
+        GIVEN: A registry with no parsers registered.
+        WHEN:  get_parser_for_file() is called for any MIME type.
+        THEN:  None is returned.
+        """
+        registry = ParserRegistry()
+        result = registry.get_parser_for_file("text/plain", "doc.txt")
+        assert result is None
+
+    def test_returns_none_for_unsupported_mime_type(
+        self,
+        dummy_parser_cls: type,
+    ) -> None:
+        """
+        GIVEN: A registry with a parser that supports only 'text/plain'.
+        WHEN:  get_parser_for_file() is called with 'application/pdf'.
+        THEN:  None is returned.
+        """
+        registry = ParserRegistry()
+        registry.register_builtin(dummy_parser_cls)
+        result = registry.get_parser_for_file("application/pdf", "file.pdf")
+        assert result is None
+
+    def test_returns_parser_for_supported_mime_type(
+        self,
+        dummy_parser_cls: type,
+    ) -> None:
+        """
+        GIVEN: A registry with a parser registered for 'text/plain'.
+        WHEN:  get_parser_for_file() is called with 'text/plain'.
+        THEN:  The registered parser class is returned.
+        """
+        registry = ParserRegistry()
+        registry.register_builtin(dummy_parser_cls)
+        result = registry.get_parser_for_file("text/plain", "readme.txt")
+        assert result is dummy_parser_cls
+
+    def test_highest_score_wins(self) -> None:
+        """
+        GIVEN: Two parsers both supporting 'text/plain' with scores 5 and 20.
+        WHEN:  get_parser_for_file() is called for 'text/plain'.
+        THEN:  The parser with score 20 is returned.
+        """
+
+        class LowScoreParser:
+            name = "low"
+            version = "1.0"
+            author = "A"
+            url = "https://example.com/low"
+
+            @classmethod
+            def supported_mime_types(cls):
+                return {"text/plain": ".txt"}
+
+            @classmethod
+            def score(cls, mime_type, filename, path=None):
+                return 5
+
+        class HighScoreParser:
+            name = "high"
+            version = "1.0"
+            author = "B"
+            url = "https://example.com/high"
+
+            @classmethod
+            def supported_mime_types(cls):
+                return {"text/plain": ".txt"}
+
+            @classmethod
+            def score(cls, mime_type, filename, path=None):
+                return 20
+
+        registry = ParserRegistry()
+        registry.register_builtin(LowScoreParser)
+        registry.register_builtin(HighScoreParser)
+        result = registry.get_parser_for_file("text/plain", "readme.txt")
+        assert result is HighScoreParser
+
+    def test_parser_returning_none_score_is_skipped(self) -> None:
+        """
+        GIVEN: A parser that returns None from score() for the given file.
+        WHEN:  get_parser_for_file() is called.
+        THEN:  That parser is skipped and None is returned (no other candidates).
+        """
+
+        class DecliningParser:
+            name = "declining"
+            version = "1.0"
+            author = "A"
+            url = "https://example.com"
+
+            @classmethod
+            def supported_mime_types(cls):
+                return {"text/plain": ".txt"}
+
+            @classmethod
+            def score(cls, mime_type, filename, path=None):
+                return None  # Explicitly declines
+
+        registry = ParserRegistry()
+        registry.register_builtin(DecliningParser)
+        result = registry.get_parser_for_file("text/plain", "readme.txt")
+        assert result is None
+
+    def test_all_parsers_decline_returns_none(self) -> None:
+        """
+        GIVEN: Multiple parsers that all return None from score().
+        WHEN:  get_parser_for_file() is called.
+        THEN:  None is returned.
+        """
+
+        class AlwaysDeclines:
+            name = "declines"
+            version = "1.0"
+            author = "A"
+            url = "https://example.com"
+
+            @classmethod
+            def supported_mime_types(cls):
+                return {"text/plain": ".txt"}
+
+            @classmethod
+            def score(cls, mime_type, filename, path=None):
+                return None
+
+        registry = ParserRegistry()
+        registry.register_builtin(AlwaysDeclines)
+        registry._external.append(AlwaysDeclines)
+        result = registry.get_parser_for_file("text/plain", "file.txt")
+        assert result is None
+
+    def test_external_parser_beats_builtin_same_score(self) -> None:
+        """
+        GIVEN: An external and a built-in parser both returning score 10.
+        WHEN:  get_parser_for_file() is called.
+        THEN:  The external parser wins because externals are evaluated first
+               and the first-seen-wins policy applies at equal scores.
+        """
+
+        class BuiltinParser:
+            name = "builtin"
+            version = "1.0"
+            author = "Core"
+            url = "https://example.com/builtin"
+
+            @classmethod
+            def supported_mime_types(cls):
+                return {"text/plain": ".txt"}
+
+            @classmethod
+            def score(cls, mime_type, filename, path=None):
+                return 10
+
+        class ExternalParser:
+            name = "external"
+            version = "2.0"
+            author = "Third Party"
+            url = "https://example.com/external"
+
+            @classmethod
+            def supported_mime_types(cls):
+                return {"text/plain": ".txt"}
+
+            @classmethod
+            def score(cls, mime_type, filename, path=None):
+                return 10
+
+        registry = ParserRegistry()
+        registry.register_builtin(BuiltinParser)
+        registry._external.append(ExternalParser)
+        result = registry.get_parser_for_file("text/plain", "file.txt")
+        assert result is ExternalParser
+
+    def test_builtin_wins_when_external_declines(self) -> None:
+        """
+        GIVEN: An external parser that declines (score None) and a built-in
+               that returns score 5.
+        WHEN:  get_parser_for_file() is called.
+        THEN:  The built-in parser is returned.
+        """
+
+        class DecliningExternal:
+            name = "declining-external"
+            version = "1.0"
+            author = "Third Party"
+            url = "https://example.com/declining"
+
+            @classmethod
+            def supported_mime_types(cls):
+                return {"text/plain": ".txt"}
+
+            @classmethod
+            def score(cls, mime_type, filename, path=None):
+                return None
+
+        class AcceptingBuiltin:
+            name = "accepting-builtin"
+            version = "1.0"
+            author = "Core"
+            url = "https://example.com/accepting"
+
+            @classmethod
+            def supported_mime_types(cls):
+                return {"text/plain": ".txt"}
+
+            @classmethod
+            def score(cls, mime_type, filename, path=None):
+                return 5
+
+        registry = ParserRegistry()
+        registry.register_builtin(AcceptingBuiltin)
+        registry._external.append(DecliningExternal)
+        result = registry.get_parser_for_file("text/plain", "file.txt")
+        assert result is AcceptingBuiltin
+
+
+class TestDiscover:
+    """Verify entrypoint discovery in ParserRegistry.discover()."""
+
+    def test_discover_with_no_entrypoints(self) -> None:
+        """
+        GIVEN: No entrypoints are registered under 'paperless_ngx.parsers'.
+        WHEN:  discover() is called.
+        THEN:  _external remains empty and no errors are raised.
+        """
+        registry = ParserRegistry()
+
+        with patch(
+            "paperless.parsers.registry.entry_points",
+            return_value=[],
+        ):
+            registry.discover()
+
+        assert registry._external == []
+
+    def test_discover_adds_valid_external_parser(self) -> None:
+        """
+        GIVEN: One valid entrypoint whose loaded class has all required attrs.
+        WHEN:  discover() is called.
+        THEN:  The class is appended to _external.
+        """
+
+        class ValidExternal:
+            name = "valid-external"
+            version = "3.0.0"
+            author = "Someone"
+            url = "https://example.com/valid"
+
+            @classmethod
+            def supported_mime_types(cls):
+                return {"application/pdf": ".pdf"}
+
+            @classmethod
+            def score(cls, mime_type, filename, path=None):
+                return 5
+
+        mock_ep = MagicMock(spec=EntryPoint)
+        mock_ep.name = "valid_external"
+        mock_ep.load.return_value = ValidExternal
+
+        registry = ParserRegistry()
+
+        with patch(
+            "paperless.parsers.registry.entry_points",
+            return_value=[mock_ep],
+        ):
+            registry.discover()
+
+        assert ValidExternal in registry._external
+
+    def test_discover_skips_entrypoint_with_load_error(
+        self,
+        caplog: pytest.LogCaptureFixture,
+    ) -> None:
+        """
+        GIVEN: An entrypoint whose load() method raises ImportError.
+        WHEN:  discover() is called.
+        THEN:  The entrypoint is skipped, an error is logged, and _external
+               remains empty.
+        """
+        mock_ep = MagicMock(spec=EntryPoint)
+        mock_ep.name = "broken_ep"
+        mock_ep.load.side_effect = ImportError("missing dependency")
+
+        registry = ParserRegistry()
+
+        with caplog.at_level(logging.ERROR, logger="paperless.parsers.registry"):
+            with patch(
+                "paperless.parsers.registry.entry_points",
+                return_value=[mock_ep],
+            ):
+                registry.discover()
+
+        assert registry._external == []
+        assert any(
+            "broken_ep" in record.message
+            for record in caplog.records
+            if record.levelno >= logging.ERROR
+        )
+
+    def test_discover_skips_entrypoint_with_missing_attrs(
+        self,
+        caplog: pytest.LogCaptureFixture,
+    ) -> None:
+        """
+        GIVEN: A class loaded from an entrypoint that is missing the 'score'
+               attribute.
+        WHEN:  discover() is called.
+        THEN:  The entrypoint is skipped, a warning is logged, and _external
+               remains empty.
+        """
+
+        class MissingScore:
+            name = "missing-score"
+            version = "1.0"
+            author = "Someone"
+            url = "https://example.com"
+
+            # 'score' classmethod is intentionally absent.
+
+            @classmethod
+            def supported_mime_types(cls):
+                return {"text/plain": ".txt"}
+
+        mock_ep = MagicMock(spec=EntryPoint)
+        mock_ep.name = "missing_score_ep"
+        mock_ep.load.return_value = MissingScore
+
+        registry = ParserRegistry()
+
+        with caplog.at_level(logging.WARNING, logger="paperless.parsers.registry"):
+            with patch(
+                "paperless.parsers.registry.entry_points",
+                return_value=[mock_ep],
+            ):
+                registry.discover()
+
+        assert registry._external == []
+        assert any(
+            "missing_score_ep" in record.message
+            for record in caplog.records
+            if record.levelno >= logging.WARNING
+        )
+
+    def test_discover_logs_loaded_parser_info(
+        self,
+        caplog: pytest.LogCaptureFixture,
+    ) -> None:
+        """
+        GIVEN: A valid entrypoint that loads successfully.
+        WHEN:  discover() is called.
+        THEN:  An INFO log message is emitted containing the parser name,
+               version, author, and entrypoint name.
+        """
+
+        class LoggableParser:
+            name = "loggable"
+            version = "4.2.0"
+            author = "Log Tester"
+            url = "https://example.com/loggable"
+
+            @classmethod
+            def supported_mime_types(cls):
+                return {"image/png": ".png"}
+
+            @classmethod
+            def score(cls, mime_type, filename, path=None):
+                return 1
+
+        mock_ep = MagicMock(spec=EntryPoint)
+        mock_ep.name = "loggable_ep"
+        mock_ep.load.return_value = LoggableParser
+
+        registry = ParserRegistry()
+
+        with caplog.at_level(logging.INFO, logger="paperless.parsers.registry"):
+            with patch(
+                "paperless.parsers.registry.entry_points",
+                return_value=[mock_ep],
+            ):
+                registry.discover()
+
+        info_messages = " ".join(
+            r.message for r in caplog.records if r.levelno == logging.INFO
+        )
+        assert "loggable" in info_messages
+        assert "4.2.0" in info_messages
+        assert "Log Tester" in info_messages
+        assert "loggable_ep" in info_messages
+
+
+class TestLogSummary:
+    """Verify log output from ParserRegistry.log_summary()."""
+
+    def test_log_summary_with_no_external_parsers(
+        self,
+        dummy_parser_cls: type,
+        caplog: pytest.LogCaptureFixture,
+    ) -> None:
+        """
+        GIVEN: A registry with one built-in parser and no external parsers.
+        WHEN:  log_summary() is called.
+        THEN:  The built-in parser name appears in the logs.
+        """
+        registry = ParserRegistry()
+        registry.register_builtin(dummy_parser_cls)
+
+        with caplog.at_level(logging.INFO, logger="paperless.parsers.registry"):
+            registry.log_summary()
+
+        all_messages = " ".join(r.message for r in caplog.records)
+        assert dummy_parser_cls.name in all_messages
+
+    def test_log_summary_with_external_parsers(
+        self,
+        caplog: pytest.LogCaptureFixture,
+    ) -> None:
+        """
+        GIVEN: A registry with one external parser registered.
+        WHEN:  log_summary() is called.
+        THEN:  The external parser name, version, author, and url appear in
+               the log output.
+        """
+
+        class ExtParser:
+            name = "ext-parser"
+            version = "9.9.9"
+            author = "Ext Corp"
+            url = "https://ext.example.com"
+
+            @classmethod
+            def supported_mime_types(cls):
+                return {}
+
+            @classmethod
+            def score(cls, mime_type, filename, path=None):
+                return None
+
+        registry = ParserRegistry()
+        registry._external.append(ExtParser)
+
+        with caplog.at_level(logging.INFO, logger="paperless.parsers.registry"):
+            registry.log_summary()
+
+        all_messages = " ".join(r.message for r in caplog.records)
+        assert "ext-parser" in all_messages
+        assert "9.9.9" in all_messages
+        assert "Ext Corp" in all_messages
+        assert "https://ext.example.com" in all_messages
+
+    def test_log_summary_logs_no_third_party_message_when_none(
+        self,
+        caplog: pytest.LogCaptureFixture,
+    ) -> None:
+        """
+        GIVEN: A registry with no external parsers.
+        WHEN:  log_summary() is called.
+        THEN:  A message containing 'No third-party parsers discovered.' is
+               logged.
+        """
+        registry = ParserRegistry()
+
+        with caplog.at_level(logging.INFO, logger="paperless.parsers.registry"):
+            registry.log_summary()
+
+        all_messages = " ".join(r.message for r in caplog.records)
+        assert "No third-party parsers discovered." in all_messages
diff --git a/src/paperless_text/parsers.py b/src/paperless_text/parsers.py
deleted file mode 100644
index a6c149a0a..000000000
--- a/src/paperless_text/parsers.py
+++ /dev/null
@@ -1,50 +0,0 @@
-from pathlib import Path
-
-from django.conf import settings
-from PIL import Image
-from PIL import ImageDraw
-from PIL import ImageFont
-
-from documents.parsers import DocumentParser
-
-
-class TextDocumentParser(DocumentParser):
-    """
-    This parser directly parses a text document (.txt, .md, or .csv)
-    """
-
-    logging_name = "paperless.parsing.text"
-
-    def get_thumbnail(self, document_path: Path, mime_type, file_name=None) -> Path:
-        # Avoid reading entire file into memory
-        max_chars = 100_000
-        file_size_limit = 50 * 1024 * 1024
-
-        if document_path.stat().st_size > file_size_limit:
-            text = "[File too large to preview]"
-        else:
-            with Path(document_path).open("r", encoding="utf-8", errors="replace") as f:
-                text = f.read(max_chars)
-
-        img = Image.new("RGB", (500, 700), color="white")
-        draw = ImageDraw.Draw(img)
-        font = ImageFont.truetype(
-            font=settings.THUMBNAIL_FONT_NAME,
-            size=20,
-            layout_engine=ImageFont.Layout.BASIC,
-        )
-        draw.multiline_text((5, 5), text, font=font, fill="black", spacing=4)
-
-        out_path = self.tempdir / "thumb.webp"
-        img.save(out_path, format="WEBP")
-
-        return out_path
-
-    def parse(self, document_path, mime_type, file_name=None) -> None:
-        self.text = self.read_file_handle_unicode_errors(document_path)
-
-    def get_settings(self) -> None:
-        """
-        This parser does not implement additional settings yet
-        """
-        return None
diff --git a/src/paperless_text/signals.py b/src/paperless_text/signals.py
index 05804c6d6..cf74d1c0e 100644
--- a/src/paperless_text/signals.py
+++ b/src/paperless_text/signals.py
@@ -1,7 +1,13 @@
 def get_parser(*args, **kwargs):
-    from paperless_text.parsers import TextDocumentParser
+    from paperless.parsers.text import TextDocumentParser
 
-    return TextDocumentParser(*args, **kwargs)
+    # The new TextDocumentParser does not accept the legacy logging_group /
+    # progress_callback kwargs injected by the old signal-based consumer.
+    # These are dropped here; Phase 4 will replace this signal path with the
+    # new ParserRegistry so the shim can be removed at that point.
+    kwargs.pop("logging_group", None)
+    kwargs.pop("progress_callback", None)
+    return TextDocumentParser()
 
 
 def text_consumer_declaration(sender, **kwargs):
diff --git a/src/paperless_text/tests/conftest.py b/src/paperless_text/tests/conftest.py
deleted file mode 100644
index 1d9e4fc2f..000000000
--- a/src/paperless_text/tests/conftest.py
+++ /dev/null
@@ -1,30 +0,0 @@
-from collections.abc import Generator
-from pathlib import Path
-
-import pytest
-
-from paperless_text.parsers import TextDocumentParser
-
-
-@pytest.fixture(scope="session")
-def sample_dir() -> Path:
-    return (Path(__file__).parent / Path("samples")).resolve()
-
-
-@pytest.fixture()
-def text_parser() -> Generator[TextDocumentParser, None, None]:
-    try:
-        parser = TextDocumentParser(logging_group=None)
-        yield parser
-    finally:
-        parser.cleanup()
-
-
-@pytest.fixture(scope="session")
-def sample_txt_file(sample_dir: Path) -> Path:
-    return sample_dir / "test.txt"
-
-
-@pytest.fixture(scope="session")
-def malformed_txt_file(sample_dir: Path) -> Path:
-    return sample_dir / "decode_error.txt"
diff --git a/src/paperless_text/tests/test_parser.py b/src/paperless_text/tests/test_parser.py
deleted file mode 100644
index b1086bc3d..000000000
--- a/src/paperless_text/tests/test_parser.py
+++ /dev/null
@@ -1,69 +0,0 @@
-import tempfile
-from pathlib import Path
-
-from paperless_text.parsers import TextDocumentParser
-
-
-class TestTextParser:
-    def test_thumbnail(
-        self,
-        text_parser: TextDocumentParser,
-        sample_txt_file: Path,
-    ) -> None:
-        # just make sure that it does not crash
-        f = text_parser.get_thumbnail(sample_txt_file, "text/plain")
-        assert f.exists()
-        assert f.is_file()
-
-    def test_parse(
-        self,
-        text_parser: TextDocumentParser,
-        sample_txt_file: Path,
-    ) -> None:
-        text_parser.parse(sample_txt_file, "text/plain")
-
-        assert text_parser.get_text() == "This is a test file.\n"
-        assert text_parser.get_archive_path() is None
-
-    def test_parse_invalid_bytes(
-        self,
-        text_parser: TextDocumentParser,
-        malformed_txt_file: Path,
-    ) -> None:
-        """
-        GIVEN:
-            - Text file which contains invalid UTF bytes
-        WHEN:
-            - The file is parsed
-        THEN:
-            - Parsing continues
-            - Invalid bytes are removed
-        """
-
-        text_parser.parse(malformed_txt_file, "text/plain")
-
-        assert text_parser.get_text() == "Pantothens�ure\n"
-        assert text_parser.get_archive_path() is None
-
-    def test_thumbnail_large_file(self, text_parser: TextDocumentParser) -> None:
-        """
-        GIVEN:
-            - A very large text file (>50MB)
-        WHEN:
-            - A thumbnail is requested
-        THEN:
-            - A thumbnail is created without reading the entire file into memory
-        """
-        with tempfile.NamedTemporaryFile(
-            delete=False,
-            mode="w",
-            encoding="utf-8",
-            suffix=".txt",
-        ) as tmp:
-            tmp.write("A" * (51 * 1024 * 1024))  # 51 MB of 'A'
-            large_file = Path(tmp.name)
-
-            thumb = text_parser.get_thumbnail(large_file, "text/plain")
-            assert thumb.exists()
-            assert thumb.is_file()
-            large_file.unlink()
diff --git a/src/paperless_tika/tests/conftest.py b/src/paperless_tika/tests/conftest.py
index 657192e4e..5a54dae95 100644
--- a/src/paperless_tika/tests/conftest.py
+++ b/src/paperless_tika/tests/conftest.py
@@ -12,6 +12,7 @@ def tika_parser() -> Generator[TikaDocumentParser, None, None]:
         parser = TikaDocumentParser(logging_group=None)
         yield parser
     finally:
+        # TODO(stumpylog): Cleanup once all parsers are handled
         parser.cleanup()