Chore(deps): Bump cbor2 in the uv group across 1 directory

Bumps the uv group with 1 update in the / directory: [cbor2](https://github.com/agronholm/cbor2). Updates `cbor2` from 5.8.0 to 5.9.0 - [Release notes](https://github.com/agronholm/cbor2/releases) - [Commits](https://github.com/agronholm/cbor2/compare/5.8.0...5.9.0) --- updated-dependencies: - dependency-name: cbor2 dependency-version: 5.9.0 dependency-type: indirect dependency-group: uv ... Signed-off-by: dependabot[bot] <support@github.com>
Auto translate strings
2026-03-24 01:42:44 +00:00 · 2026-03-23 20:27:38 +00:00 · 2026-03-22 13:55:02 +00:00 · 2026-03-22 06:53:32 -07:00 · 2026-03-21 09:26:23 +00:00 · 2026-03-21 02:12:19 -07:00
171 changed files with 8567 additions and 4498 deletions
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -157,6 +157,9 @@ updates:
      postgres:
        patterns:
          - "docker.io/library/postgres*"
+      greenmail:
+        patterns:
+          - "docker.io/greenmail*"
  - package-ecosystem: "pre-commit" # See documentation for possible values
    directory: "/" # Location of package manifests
    schedule:
--- a/.github/workflows/ci-docker.yml
+++ b/.github/workflows/ci-docker.yml
@@ -119,7 +119,7 @@ jobs:
          sudo rm -rf "$AGENT_TOOLSDIRECTORY"
      - name: Docker metadata
        id: docker-meta
-        uses: docker/metadata-action@v5.10.0
+        uses: docker/metadata-action@v6.0.0
        with:
          images: |
            ${{ env.REGISTRY }}/${{ steps.repo.outputs.name }}
@@ -130,7 +130,7 @@ jobs:
            type=semver,pattern={{major}}.{{minor}}
      - name: Build and push by digest
        id: build
-        uses: docker/build-push-action@v6.19.2
+        uses: docker/build-push-action@v7.0.0
        with:
          context: .
          file: ./Dockerfile
@@ -201,7 +201,7 @@ jobs:
          password: ${{ secrets.QUAY_ROBOT_TOKEN }}
      - name: Docker metadata
        id: docker-meta
-        uses: docker/metadata-action@v5.10.0
+        uses: docker/metadata-action@v6.0.0
        with:
          images: |
            ${{ env.REGISTRY }}/${{ needs.build-arch.outputs.repository }}
--- a/.mypy-baseline.txt
+++ b/.mypy-baseline.txt
@@ -2437,17 +2437,3 @@ src/paperless_tesseract/tests/test_parser_custom_settings.py:0: error: Item "Non
 src/paperless_tesseract/tests/test_parser_custom_settings.py:0: error: Item "None" of "ApplicationConfiguration | None" has no attribute "unpaper_clean"  [union-attr]
 src/paperless_tesseract/tests/test_parser_custom_settings.py:0: error: Item "None" of "ApplicationConfiguration | None" has no attribute "unpaper_clean"  [union-attr]
 src/paperless_tesseract/tests/test_parser_custom_settings.py:0: error: Item "None" of "ApplicationConfiguration | None" has no attribute "user_args"  [union-attr]
-src/paperless_text/parsers.py:0: error: Function is missing a type annotation for one or more arguments  [no-untyped-def]
-src/paperless_text/parsers.py:0: error: Function is missing a type annotation for one or more arguments  [no-untyped-def]
-src/paperless_text/parsers.py:0: error: Incompatible types in assignment (expression has type "str", variable has type "None")  [assignment]
-src/paperless_text/signals.py:0: error: Function is missing a type annotation  [no-untyped-def]
-src/paperless_text/signals.py:0: error: Function is missing a type annotation  [no-untyped-def]
-src/paperless_tika/parsers.py:0: error: Argument 1 to "make_thumbnail_from_pdf" has incompatible type "None"; expected "Path"  [arg-type]
-src/paperless_tika/parsers.py:0: error: Function is missing a return type annotation  [no-untyped-def]
-src/paperless_tika/parsers.py:0: error: Function is missing a type annotation  [no-untyped-def]
-src/paperless_tika/parsers.py:0: error: Function is missing a type annotation  [no-untyped-def]
-src/paperless_tika/parsers.py:0: error: Function is missing a type annotation for one or more arguments  [no-untyped-def]
-src/paperless_tika/parsers.py:0: error: Function is missing a type annotation for one or more arguments  [no-untyped-def]
-src/paperless_tika/parsers.py:0: error: Incompatible types in assignment (expression has type "str | None", variable has type "None")  [assignment]
-src/paperless_tika/signals.py:0: error: Function is missing a type annotation  [no-untyped-def]
-src/paperless_tika/signals.py:0: error: Function is missing a type annotation  [no-untyped-def]
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -50,7 +50,7 @@ repos:
          - 'prettier-plugin-organize-imports@4.3.0'
  # Python hooks
  - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.15.5
+    rev: v0.15.6
    hooks:
      - id: ruff-check
      - id: ruff-format
--- a/docker/compose/docker-compose.ci-test.yml
+++ b/docker/compose/docker-compose.ci-test.yml
@@ -18,13 +18,13 @@ services:
      - "--log-level=warn"
      - "--log-format=text"
  tika:
-    image: docker.io/apache/tika:latest
+    image: docker.io/apache/tika:3.2.3.0
    hostname: tika
    container_name: tika
    network_mode: host
    restart: unless-stopped
  greenmail:
-    image: greenmail/standalone:2.1.8
+    image: docker.io/greenmail/standalone:2.1.8
    hostname: greenmail
    container_name: greenmail
    environment:
--- a/docker/rootfs/etc/s6-overlay/s6-rc.d/init-modify-user/run
+++ b/docker/rootfs/etc/s6-overlay/s6-rc.d/init-modify-user/run
@@ -2,6 +2,17 @@
 # shellcheck shell=bash
 declare -r log_prefix="[init-user]"

+# When the container is started as a non-root user (e.g. via `user: 999:999`
+# in Docker Compose), usermod/groupmod require root and are meaningless.
+# USERMAP_* variables only apply to the root-started path.
+if [[ -n "${USER_IS_NON_ROOT}" ]]; then
+	if [[ -n "${USERMAP_UID}" || -n "${USERMAP_GID}" ]]; then
+		echo "${log_prefix} WARNING: USERMAP_UID/USERMAP_GID are set but have no effect when the container is started as a non-root user"
+	fi
+	echo "${log_prefix} Running as non-root user ($(id --user):$(id --group)), skipping UID/GID remapping"
+	exit 0
+fi
+
 declare -r usermap_original_uid=$(id -u paperless)
 declare -r usermap_original_gid=$(id -g paperless)
 declare -r usermap_new_uid=${USERMAP_UID:-$usermap_original_uid}
--- a/docs/changelog.md
+++ b/docs/changelog.md
@@ -1,5 +1,56 @@
 # Changelog

+## paperless-ngx 2.20.12
+
+### Security
+
+- Resolve [GHSA-96jx-fj7m-qh6x](https://github.com/paperless-ngx/paperless-ngx/security/advisories/GHSA-96jx-fj7m-qh6x)
+
+### Bug Fixes
+
+- Fix: Scope the workflow saves to prevent clobbering filename/archive_filename [@stumpylog](https://github.com/stumpylog) ([#12390](https://github.com/paperless-ngx/paperless-ngx/pull/12390))
+- Fix: don't try to usermod/groupmod when non-root + update docs (#<!---->12365) [@stumpylog](https://github.com/stumpylog) ([#12391](https://github.com/paperless-ngx/paperless-ngx/pull/12391))
+- Fix: avoid moving files if already moved [@shamoon](https://github.com/shamoon) ([#12389](https://github.com/paperless-ngx/paperless-ngx/pull/12389))
+- Fix: remove pagination from document notes api spec [@shamoon](https://github.com/shamoon) ([#12388](https://github.com/paperless-ngx/paperless-ngx/pull/12388))
+- Fix: fix file button hover color in dark mode [@shamoon](https://github.com/shamoon) ([#12367](https://github.com/paperless-ngx/paperless-ngx/pull/12367))
+- Fixhancement: only offer basic auth for appropriate requests [@shamoon](https://github.com/shamoon) ([#12362](https://github.com/paperless-ngx/paperless-ngx/pull/12362))
+
+### All App Changes
+
+<details>
+<summary>5 changes</summary>
+
+- Fix: Scope the workflow saves to prevent clobbering filename/archive_filename [@stumpylog](https://github.com/stumpylog) ([#12390](https://github.com/paperless-ngx/paperless-ngx/pull/12390))
+- Fix: avoid moving files if already moved [@shamoon](https://github.com/shamoon) ([#12389](https://github.com/paperless-ngx/paperless-ngx/pull/12389))
+- Fix: remove pagination from document notes api spec [@shamoon](https://github.com/shamoon) ([#12388](https://github.com/paperless-ngx/paperless-ngx/pull/12388))
+- Fix: fix file button hover color in dark mode [@shamoon](https://github.com/shamoon) ([#12367](https://github.com/paperless-ngx/paperless-ngx/pull/12367))
+- Fixhancement: only offer basic auth for appropriate requests [@shamoon](https://github.com/shamoon) ([#12362](https://github.com/paperless-ngx/paperless-ngx/pull/12362))
+</details>
+
+## paperless-ngx 2.20.11
+
+### Security
+
+- Resolve [GHSA-59xh-5vwx-4c4q](https://github.com/paperless-ngx/paperless-ngx/security/advisories/GHSA-59xh-5vwx-4c4q)
+
+### Bug Fixes
+
+- Fix: correct dropdown list active color in dark mode [@shamoon](https://github.com/shamoon) ([#12328](https://github.com/paperless-ngx/paperless-ngx/pull/12328))
+- Fixhancement: clear descendant selections in dropdown when parent toggled [@shamoon](https://github.com/shamoon) ([#12326](https://github.com/paperless-ngx/paperless-ngx/pull/12326))
+- Fix: prevent wrapping with larger amounts of tags on small cards, reset moreTags setting to correct count [@shamoon](https://github.com/shamoon) ([#12302](https://github.com/paperless-ngx/paperless-ngx/pull/12302))
+- Fix: prevent stale db filename during workflow actions [@shamoon](https://github.com/shamoon) ([#12289](https://github.com/paperless-ngx/paperless-ngx/pull/12289))
+
+### All App Changes
+
+<details>
+<summary>4 changes</summary>
+
+- Fix: correct dropdown list active color in dark mode [@shamoon](https://github.com/shamoon) ([#12328](https://github.com/paperless-ngx/paperless-ngx/pull/12328))
+- Fixhancement: clear descendant selections in dropdown when parent toggled [@shamoon](https://github.com/shamoon) ([#12326](https://github.com/paperless-ngx/paperless-ngx/pull/12326))
+- Fix: prevent wrapping with larger amounts of tags on small cards, reset moreTags setting to correct count [@shamoon](https://github.com/shamoon) ([#12302](https://github.com/paperless-ngx/paperless-ngx/pull/12302))
+- Fix: prevent stale db filename during workflow actions [@shamoon](https://github.com/shamoon) ([#12289](https://github.com/paperless-ngx/paperless-ngx/pull/12289))
+</details>
+
 ## paperless-ngx 2.20.10

 ### Bug Fixes
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -674,6 +674,9 @@ See the corresponding [django-allauth documentation](https://docs.allauth.org/en
 for a list of provider configurations. You will also need to include the relevant Django 'application' inside the
 [PAPERLESS_APPS](#PAPERLESS_APPS) setting to activate that specific authentication provider (e.g. `allauth.socialaccount.providers.openid_connect` for the [OIDC Connect provider](https://docs.allauth.org/en/latest/socialaccount/providers/openid_connect.html)).

+: For OpenID Connect providers, set `settings.token_auth_method` if your identity provider
+requires a specific token endpoint authentication method.
+
    Defaults to None, which does not enable any third party authentication systems.

 #### [`PAPERLESS_SOCIAL_AUTO_SIGNUP=<bool>`](#PAPERLESS_SOCIAL_AUTO_SIGNUP) {#PAPERLESS_SOCIAL_AUTO_SIGNUP}
@@ -1947,6 +1950,12 @@ current backend. If not supplied, defaults to "gpt-3.5-turbo" for OpenAI and "ll

    Defaults to None.

+#### [`PAPERLESS_AI_LLM_ALLOW_INTERNAL_ENDPOINTS=<bool>`](#PAPERLESS_AI_LLM_ALLOW_INTERNAL_ENDPOINTS) {#PAPERLESS_AI_LLM_ALLOW_INTERNAL_ENDPOINTS}
+
+: If set to false, Paperless blocks AI endpoint URLs that resolve to non-public addresses (e.g., localhost, etc).
+
+    Defaults to true, which allows internal endpoints.
+
 #### [`PAPERLESS_AI_LLM_INDEX_TASK_CRON=<cron expression>`](#PAPERLESS_AI_LLM_INDEX_TASK_CRON) {#PAPERLESS_AI_LLM_INDEX_TASK_CRON}

 : Configures the schedule to update the AI embeddings of text content and metadata for all documents. Only performed if
--- a/docs/migration-v3.md
+++ b/docs/migration-v3.md
@@ -103,3 +103,30 @@ Multiple options are combined in a single value:
 ```bash
 PAPERLESS_DB_OPTIONS="sslmode=require;sslrootcert=/certs/ca.pem;pool.max_size=10"
 ```
+
+## OpenID Connect Token Endpoint Authentication
+
+Some existing OpenID Connect setups may require an explicit token endpoint authentication method after upgrading to v3.
+
+#### Action Required
+
+If OIDC login fails at the callback with an `invalid_client` error, add `token_auth_method` to the provider `settings` in
+[`PAPERLESS_SOCIALACCOUNT_PROVIDERS`](configuration.md#PAPERLESS_SOCIALACCOUNT_PROVIDERS).
+
+For example:
+
+```json
+{
+  "openid_connect": {
+    "APPS": [
+      {
+        ...
+        "settings": {
+          "server_url": "https://login.example.com",
+          "token_auth_method": "client_secret_basic"
+        }
+      }
+    ]
+  }
+}
+```
--- a/docs/setup.md
+++ b/docs/setup.md
@@ -140,24 +140,17 @@ a [superuser](usage.md#superusers) account.

 !!! warning

-    It is currently not possible to run the container rootless if additional languages are specified via `PAPERLESS_OCR_LANGUAGES`.
+    It is not possible to run the container rootless if additional languages are specified via `PAPERLESS_OCR_LANGUAGES`.

-If you want to run Paperless as a rootless container, make this
-change in `docker-compose.yml`:
+If you want to run Paperless as a rootless container, set `user:` in `docker-compose.yml` to the UID and GID of your host user (use `id -u` and `id -g` to find these values). The container process starts directly as that user with no internal privilege remapping:

- Set the `user` running the container to map to the `paperless`
-  user in the container. This value (`user_id` below) should be
-  the same ID that `USERMAP_UID` and `USERMAP_GID` are set to in
-  `docker-compose.env`. See `USERMAP_UID` and `USERMAP_GID`
-  [here](configuration.md#docker).
+```yaml
+webserver:
+  image: ghcr.io/paperless-ngx/paperless-ngx:latest
+  user: '1000:1000'
+```

-Your entry for Paperless should contain something like:
-
-> ```
-> webserver:
->   image: ghcr.io/paperless-ngx/paperless-ngx:latest
->   user: <user_id>
-> ```
+Do not combine this with `USERMAP_UID` or `USERMAP_GID`, which are intended for the non-rootless case described in step 3.

 **File systems without inotify support (e.g. NFS)**

--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "paperless-ngx"
-version = "2.20.10"
+version = "2.20.13"
 description = "A community-supported supercharged document management system: scan, index and archive all your physical documents"
 readme = "README.md"
 requires-python = ">=3.11"
@@ -26,7 +26,7 @@ dependencies = [
  # WARNING: django does not use semver.
  #          Only patch versions are guaranteed to not introduce breaking changes.
  "django~=5.2.10",
-  "django-allauth[mfa,socialaccount]~=65.14.0",
+  "django-allauth[mfa,socialaccount]~=65.15.0",
  "django-auditlog~=3.4.1",
  "django-cachalot~=2.9.0",
  "django-celery-results~=2.6.0",
@@ -60,7 +60,7 @@ dependencies = [
  "llama-index-llms-openai>=0.6.13",
  "llama-index-vector-stores-faiss>=0.5.2",
  "nltk~=3.9.1",
-  "ocrmypdf~=16.13.0",
+  "ocrmypdf~=17.3.0",
  "openai>=1.76",
  "pathvalidate~=3.3.1",
  "pdf2image~=1.17.0",
@@ -248,15 +248,13 @@ lint.per-file-ignores."docker/wait-for-redis.py" = [
 lint.per-file-ignores."src/documents/models.py" = [
  "SIM115",
 ]
-lint.per-file-ignores."src/paperless_tesseract/tests/test_parser.py" = [
-  "RUF001",
-]
+
 lint.isort.force-single-line = true

 [tool.codespell]
 write-changes = true
 ignore-words-list = "criterias,afterall,valeu,ureue,equest,ure,assertIn,Oktober,commitish"
-skip = "src-ui/src/locale/*,src-ui/pnpm-lock.yaml,src-ui/e2e/*,src/paperless_mail/tests/samples/*,src/documents/tests/samples/*,*.po,*.json"
+skip = "src-ui/src/locale/*,src-ui/pnpm-lock.yaml,src-ui/e2e/*,src/paperless_mail/tests/samples/*,src/paperless/tests/samples/mail/*,src/documents/tests/samples/*,*.po,*.json"

 [tool.pytest]
 minversion = "9.0"
@@ -271,10 +269,6 @@ testpaths = [
  "src/documents/tests/",
  "src/paperless/tests/",
  "src/paperless_mail/tests/",
-  "src/paperless_tesseract/tests/",
-  "src/paperless_tika/tests",
-  "src/paperless_text/tests/",
-  "src/paperless_remote/tests/",
  "src/paperless_ai/tests",
 ]

--- a/src-ui/messages.xlf
+++ b/src-ui/messages.xlf
@@ -5,14 +5,14 @@
      <trans-unit id="ngb.alert.close" datatype="html">
        <source>Close</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/alert/alert.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/alert/alert.ts</context>
          <context context-type="linenumber">50</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.carousel.slide-number" datatype="html">
        <source> Slide <x id="INTERPOLATION" equiv-text="ueryList&lt;NgbSli"/> of <x id="INTERPOLATION_1" equiv-text="EventSource = N"/> </source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/carousel/carousel.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/carousel/carousel.ts</context>
          <context context-type="linenumber">131,135</context>
        </context-group>
        <note priority="1" from="description">Currently selected slide number read by screen reader</note>
@@ -20,114 +20,114 @@
      <trans-unit id="ngb.carousel.previous" datatype="html">
        <source>Previous</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/carousel/carousel.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/carousel/carousel.ts</context>
          <context context-type="linenumber">159,162</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.carousel.next" datatype="html">
        <source>Next</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/carousel/carousel.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/carousel/carousel.ts</context>
          <context context-type="linenumber">202,203</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.datepicker.select-month" datatype="html">
        <source>Select month</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/datepicker/datepicker-navigation-select.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/datepicker/datepicker-navigation-select.ts</context>
          <context context-type="linenumber">91</context>
        </context-group>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/datepicker/datepicker-navigation-select.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/datepicker/datepicker-navigation-select.ts</context>
          <context context-type="linenumber">91</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.datepicker.select-year" datatype="html">
        <source>Select year</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/datepicker/datepicker-navigation-select.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/datepicker/datepicker-navigation-select.ts</context>
          <context context-type="linenumber">91</context>
        </context-group>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/datepicker/datepicker-navigation-select.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/datepicker/datepicker-navigation-select.ts</context>
          <context context-type="linenumber">91</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.datepicker.previous-month" datatype="html">
        <source>Previous month</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/datepicker/datepicker-navigation.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/datepicker/datepicker-navigation.ts</context>
          <context context-type="linenumber">83,85</context>
        </context-group>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/datepicker/datepicker-navigation.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/datepicker/datepicker-navigation.ts</context>
          <context context-type="linenumber">112</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.datepicker.next-month" datatype="html">
        <source>Next month</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/datepicker/datepicker-navigation.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/datepicker/datepicker-navigation.ts</context>
          <context context-type="linenumber">112</context>
        </context-group>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/datepicker/datepicker-navigation.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/datepicker/datepicker-navigation.ts</context>
          <context context-type="linenumber">112</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.pagination.first" datatype="html">
        <source>««</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/pagination/pagination-config.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/pagination/pagination-config.ts</context>
          <context context-type="linenumber">20</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.pagination.previous" datatype="html">
        <source>«</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/pagination/pagination-config.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/pagination/pagination-config.ts</context>
          <context context-type="linenumber">20</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.pagination.next" datatype="html">
        <source>»</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/pagination/pagination-config.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/pagination/pagination-config.ts</context>
          <context context-type="linenumber">20</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.pagination.last" datatype="html">
        <source>»»</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/pagination/pagination-config.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/pagination/pagination-config.ts</context>
          <context context-type="linenumber">20</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.pagination.first-aria" datatype="html">
        <source>First</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/pagination/pagination-config.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/pagination/pagination-config.ts</context>
          <context context-type="linenumber">20</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.pagination.previous-aria" datatype="html">
        <source>Previous</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/pagination/pagination-config.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/pagination/pagination-config.ts</context>
          <context context-type="linenumber">20</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.pagination.next-aria" datatype="html">
        <source>Next</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/pagination/pagination-config.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/pagination/pagination-config.ts</context>
          <context context-type="linenumber">20</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.pagination.last-aria" datatype="html">
        <source>Last</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/pagination/pagination-config.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/pagination/pagination-config.ts</context>
          <context context-type="linenumber">20</context>
        </context-group>
      </trans-unit>
@@ -135,105 +135,105 @@
        <source><x id="INTERPOLATION" equiv-text="barConfig);
 	pu"/></source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/progressbar/progressbar.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/progressbar/progressbar.ts</context>
          <context context-type="linenumber">41,42</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.timepicker.HH" datatype="html">
        <source>HH</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/timepicker/timepicker-config.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/timepicker/timepicker-config.ts</context>
          <context context-type="linenumber">21</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.timepicker.hours" datatype="html">
        <source>Hours</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/timepicker/timepicker-config.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/timepicker/timepicker-config.ts</context>
          <context context-type="linenumber">21</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.timepicker.MM" datatype="html">
        <source>MM</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/timepicker/timepicker-config.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/timepicker/timepicker-config.ts</context>
          <context context-type="linenumber">21</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.timepicker.minutes" datatype="html">
        <source>Minutes</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/timepicker/timepicker-config.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/timepicker/timepicker-config.ts</context>
          <context context-type="linenumber">21</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.timepicker.increment-hours" datatype="html">
        <source>Increment hours</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/timepicker/timepicker-config.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/timepicker/timepicker-config.ts</context>
          <context context-type="linenumber">21</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.timepicker.decrement-hours" datatype="html">
        <source>Decrement hours</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/timepicker/timepicker-config.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/timepicker/timepicker-config.ts</context>
          <context context-type="linenumber">21</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.timepicker.increment-minutes" datatype="html">
        <source>Increment minutes</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/timepicker/timepicker-config.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/timepicker/timepicker-config.ts</context>
          <context context-type="linenumber">21</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.timepicker.decrement-minutes" datatype="html">
        <source>Decrement minutes</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/timepicker/timepicker-config.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/timepicker/timepicker-config.ts</context>
          <context context-type="linenumber">21</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.timepicker.SS" datatype="html">
        <source>SS</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/timepicker/timepicker-config.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/timepicker/timepicker-config.ts</context>
          <context context-type="linenumber">21</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.timepicker.seconds" datatype="html">
        <source>Seconds</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/timepicker/timepicker-config.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/timepicker/timepicker-config.ts</context>
          <context context-type="linenumber">21</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.timepicker.increment-seconds" datatype="html">
        <source>Increment seconds</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/timepicker/timepicker-config.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/timepicker/timepicker-config.ts</context>
          <context context-type="linenumber">21</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.timepicker.decrement-seconds" datatype="html">
        <source>Decrement seconds</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/timepicker/timepicker-config.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/timepicker/timepicker-config.ts</context>
          <context context-type="linenumber">21</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.timepicker.PM" datatype="html">
        <source><x id="INTERPOLATION"/></source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/timepicker/timepicker-config.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/timepicker/timepicker-config.ts</context>
          <context context-type="linenumber">21</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.toast.close-aria" datatype="html">
        <source>Close</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/toast/toast-config.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/toast/toast-config.ts</context>
          <context context-type="linenumber">54</context>
        </context-group>
      </trans-unit>
@@ -532,15 +532,79 @@
          <context context-type="linenumber">125</context>
        </context-group>
      </trans-unit>
-      <trans-unit id="3823219296477075982" datatype="html">
-        <source>Discard</source>
+      <trans-unit id="2159130950882492111" datatype="html">
+        <source>Cancel</source>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/admin/config/config.component.html</context>
          <context context-type="linenumber">62</context>
        </context-group>
        <context-group purpose="location">
-          <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.html</context>
-          <context context-type="linenumber">452</context>
+          <context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
+          <context context-type="linenumber">399</context>
+        </context-group>
+        <context-group purpose="location">
+          <context context-type="sourcefile">src/app/components/common/confirm-dialog/confirm-dialog.component.ts</context>
+          <context context-type="linenumber">47</context>
+        </context-group>
+        <context-group purpose="location">
+          <context context-type="sourcefile">src/app/components/common/edit-dialog/correspondent-edit-dialog/correspondent-edit-dialog.component.html</context>
+          <context context-type="linenumber">25</context>
+        </context-group>
+        <context-group purpose="location">
+          <context context-type="sourcefile">src/app/components/common/edit-dialog/custom-field-edit-dialog/custom-field-edit-dialog.component.html</context>
+          <context context-type="linenumber">51</context>
+        </context-group>
+        <context-group purpose="location">
+          <context context-type="sourcefile">src/app/components/common/edit-dialog/document-type-edit-dialog/document-type-edit-dialog.component.html</context>
+          <context context-type="linenumber">27</context>
+        </context-group>
+        <context-group purpose="location">
+          <context context-type="sourcefile">src/app/components/common/edit-dialog/group-edit-dialog/group-edit-dialog.component.html</context>
+          <context context-type="linenumber">19</context>
+        </context-group>
+        <context-group purpose="location">
+          <context context-type="sourcefile">src/app/components/common/edit-dialog/mail-account-edit-dialog/mail-account-edit-dialog.component.html</context>
+          <context context-type="linenumber">39</context>
+        </context-group>
+        <context-group purpose="location">
+          <context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.html</context>
+          <context context-type="linenumber">80</context>
+        </context-group>
+        <context-group purpose="location">
+          <context context-type="sourcefile">src/app/components/common/edit-dialog/storage-path-edit-dialog/storage-path-edit-dialog.component.html</context>
+          <context context-type="linenumber">76</context>
+        </context-group>
+        <context-group purpose="location">
+          <context context-type="sourcefile">src/app/components/common/edit-dialog/tag-edit-dialog/tag-edit-dialog.component.html</context>
+          <context context-type="linenumber">30</context>
+        </context-group>
+        <context-group purpose="location">
+          <context context-type="sourcefile">src/app/components/common/edit-dialog/user-edit-dialog/user-edit-dialog.component.html</context>
+          <context context-type="linenumber">56</context>
+        </context-group>
+        <context-group purpose="location">
+          <context context-type="sourcefile">src/app/components/common/edit-dialog/workflow-edit-dialog/workflow-edit-dialog.component.html</context>
+          <context context-type="linenumber">115</context>
+        </context-group>
+        <context-group purpose="location">
+          <context context-type="sourcefile">src/app/components/common/permissions-dialog/permissions-dialog.component.html</context>
+          <context context-type="linenumber">31</context>
+        </context-group>
+        <context-group purpose="location">
+          <context context-type="sourcefile">src/app/components/common/profile-edit-dialog/profile-edit-dialog.component.html</context>
+          <context context-type="linenumber">182</context>
+        </context-group>
+        <context-group purpose="location">
+          <context context-type="sourcefile">src/app/components/document-list/bulk-editor/custom-fields-bulk-edit-dialog/custom-fields-bulk-edit-dialog.component.html</context>
+          <context context-type="linenumber">81</context>
+        </context-group>
+        <context-group purpose="location">
+          <context context-type="sourcefile">src/app/components/document-list/save-view-config-dialog/save-view-config-dialog.component.html</context>
+          <context context-type="linenumber">21</context>
+        </context-group>
+        <context-group purpose="location">
+          <context context-type="sourcefile">src/app/components/manage/saved-views/saved-views.component.html</context>
+          <context context-type="linenumber">82</context>
        </context-group>
      </trans-unit>
      <trans-unit id="3768927257183755959" datatype="html">
@@ -1514,77 +1578,6 @@
          <context context-type="linenumber">389</context>
        </context-group>
      </trans-unit>
-      <trans-unit id="2159130950882492111" datatype="html">
-        <source>Cancel</source>
-        <context-group purpose="location">
-          <context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
-          <context context-type="linenumber">399</context>
-        </context-group>
-        <context-group purpose="location">
-          <context context-type="sourcefile">src/app/components/common/confirm-dialog/confirm-dialog.component.ts</context>
-          <context context-type="linenumber">47</context>
-        </context-group>
-        <context-group purpose="location">
-          <context context-type="sourcefile">src/app/components/common/edit-dialog/correspondent-edit-dialog/correspondent-edit-dialog.component.html</context>
-          <context context-type="linenumber">25</context>
-        </context-group>
-        <context-group purpose="location">
-          <context context-type="sourcefile">src/app/components/common/edit-dialog/custom-field-edit-dialog/custom-field-edit-dialog.component.html</context>
-          <context context-type="linenumber">51</context>
-        </context-group>
-        <context-group purpose="location">
-          <context context-type="sourcefile">src/app/components/common/edit-dialog/document-type-edit-dialog/document-type-edit-dialog.component.html</context>
-          <context context-type="linenumber">27</context>
-        </context-group>
-        <context-group purpose="location">
-          <context context-type="sourcefile">src/app/components/common/edit-dialog/group-edit-dialog/group-edit-dialog.component.html</context>
-          <context context-type="linenumber">19</context>
-        </context-group>
-        <context-group purpose="location">
-          <context context-type="sourcefile">src/app/components/common/edit-dialog/mail-account-edit-dialog/mail-account-edit-dialog.component.html</context>
-          <context context-type="linenumber">39</context>
-        </context-group>
-        <context-group purpose="location">
-          <context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.html</context>
-          <context context-type="linenumber">80</context>
-        </context-group>
-        <context-group purpose="location">
-          <context context-type="sourcefile">src/app/components/common/edit-dialog/storage-path-edit-dialog/storage-path-edit-dialog.component.html</context>
-          <context context-type="linenumber">76</context>
-        </context-group>
-        <context-group purpose="location">
-          <context context-type="sourcefile">src/app/components/common/edit-dialog/tag-edit-dialog/tag-edit-dialog.component.html</context>
-          <context context-type="linenumber">30</context>
-        </context-group>
-        <context-group purpose="location">
-          <context context-type="sourcefile">src/app/components/common/edit-dialog/user-edit-dialog/user-edit-dialog.component.html</context>
-          <context context-type="linenumber">56</context>
-        </context-group>
-        <context-group purpose="location">
-          <context context-type="sourcefile">src/app/components/common/edit-dialog/workflow-edit-dialog/workflow-edit-dialog.component.html</context>
-          <context context-type="linenumber">115</context>
-        </context-group>
-        <context-group purpose="location">
-          <context context-type="sourcefile">src/app/components/common/permissions-dialog/permissions-dialog.component.html</context>
-          <context context-type="linenumber">31</context>
-        </context-group>
-        <context-group purpose="location">
-          <context context-type="sourcefile">src/app/components/common/profile-edit-dialog/profile-edit-dialog.component.html</context>
-          <context context-type="linenumber">182</context>
-        </context-group>
-        <context-group purpose="location">
-          <context context-type="sourcefile">src/app/components/document-list/bulk-editor/custom-fields-bulk-edit-dialog/custom-fields-bulk-edit-dialog.component.html</context>
-          <context context-type="linenumber">81</context>
-        </context-group>
-        <context-group purpose="location">
-          <context context-type="sourcefile">src/app/components/document-list/save-view-config-dialog/save-view-config-dialog.component.html</context>
-          <context context-type="linenumber">21</context>
-        </context-group>
-        <context-group purpose="location">
-          <context context-type="sourcefile">src/app/components/manage/saved-views/saved-views.component.html</context>
-          <context context-type="linenumber">82</context>
-        </context-group>
-      </trans-unit>
      <trans-unit id="6839066544204061364" datatype="html">
        <source>Use system language</source>
        <context-group purpose="location">
@@ -5736,7 +5729,7 @@
        <source>Open <x id="PH" equiv-text="this.title"/> filter</source>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/common/filterable-dropdown/filterable-dropdown.component.ts</context>
-          <context context-type="linenumber">788</context>
+          <context context-type="linenumber">823</context>
        </context-group>
      </trans-unit>
      <trans-unit id="7005745151564974365" datatype="html">
@@ -7489,7 +7482,7 @@
        </context-group>
        <context-group purpose="location">
          <context context-type="sourcefile">src/main.ts</context>
-          <context context-type="linenumber">411</context>
+          <context context-type="linenumber">416</context>
        </context-group>
      </trans-unit>
      <trans-unit id="5028777105388019087" datatype="html">
@@ -7684,6 +7677,13 @@
          <context context-type="linenumber">450</context>
        </context-group>
      </trans-unit>
+      <trans-unit id="3823219296477075982" datatype="html">
+        <source>Discard</source>
+        <context-group purpose="location">
+          <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.html</context>
+          <context context-type="linenumber">452</context>
+        </context-group>
+      </trans-unit>
      <trans-unit id="1309556917227148591" datatype="html">
        <source>Document loading...</source>
        <context-group purpose="location">
@@ -11352,14 +11352,14 @@
        <source>Prev</source>
        <context-group purpose="location">
          <context context-type="sourcefile">src/main.ts</context>
-          <context context-type="linenumber">410</context>
+          <context context-type="linenumber">415</context>
        </context-group>
      </trans-unit>
      <trans-unit id="1241348629231510663" datatype="html">
        <source>End</source>
        <context-group purpose="location">
          <context context-type="sourcefile">src/main.ts</context>
-          <context context-type="linenumber">412</context>
+          <context context-type="linenumber">417</context>
        </context-group>
      </trans-unit>
    </body>
--- a/src-ui/package.json
+++ b/src-ui/package.json
@@ -1,6 +1,6 @@
 {
  "name": "paperless-ngx-ui",
-  "version": "2.20.10",
+  "version": "2.20.13",
  "scripts": {
    "preinstall": "npx only-allow pnpm",
    "ng": "ng",
@@ -11,17 +11,17 @@
  },
  "private": true,
  "dependencies": {
-    "@angular/cdk": "^21.2.0",
-    "@angular/common": "~21.2.0",
-    "@angular/compiler": "~21.2.0",
-    "@angular/core": "~21.2.0",
-    "@angular/forms": "~21.2.0",
-    "@angular/localize": "~21.2.0",
-    "@angular/platform-browser": "~21.2.0",
-    "@angular/platform-browser-dynamic": "~21.2.0",
-    "@angular/router": "~21.2.0",
+    "@angular/cdk": "^21.2.2",
+    "@angular/common": "~21.2.4",
+    "@angular/compiler": "~21.2.4",
+    "@angular/core": "~21.2.4",
+    "@angular/forms": "~21.2.4",
+    "@angular/localize": "~21.2.4",
+    "@angular/platform-browser": "~21.2.4",
+    "@angular/platform-browser-dynamic": "~21.2.4",
+    "@angular/router": "~21.2.4",
    "@ng-bootstrap/ng-bootstrap": "^20.0.0",
-    "@ng-select/ng-select": "^21.4.1",
+    "@ng-select/ng-select": "^21.5.2",
    "@ngneat/dirty-check-forms": "^3.0.3",
    "@popperjs/core": "^2.11.8",
    "bootstrap": "^5.3.8",
@@ -42,26 +42,26 @@
  "devDependencies": {
    "@angular-builders/custom-webpack": "^21.0.3",
    "@angular-builders/jest": "^21.0.3",
-    "@angular-devkit/core": "^21.2.0",
-    "@angular-devkit/schematics": "^21.2.0",
+    "@angular-devkit/core": "^21.2.2",
+    "@angular-devkit/schematics": "^21.2.2",
    "@angular-eslint/builder": "21.3.0",
    "@angular-eslint/eslint-plugin": "21.3.0",
    "@angular-eslint/eslint-plugin-template": "21.3.0",
    "@angular-eslint/schematics": "21.3.0",
    "@angular-eslint/template-parser": "21.3.0",
-    "@angular/build": "^21.2.0",
-    "@angular/cli": "~21.2.0",
-    "@angular/compiler-cli": "~21.2.0",
+    "@angular/build": "^21.2.2",
+    "@angular/cli": "~21.2.2",
+    "@angular/compiler-cli": "~21.2.4",
    "@codecov/webpack-plugin": "^1.9.1",
    "@playwright/test": "^1.58.2",
    "@types/jest": "^30.0.0",
-    "@types/node": "^25.3.3",
-    "@typescript-eslint/eslint-plugin": "^8.54.0",
-    "@typescript-eslint/parser": "^8.54.0",
-    "@typescript-eslint/utils": "^8.54.0",
-    "eslint": "^10.0.2",
-    "jest": "30.2.0",
-    "jest-environment-jsdom": "^30.2.0",
+    "@types/node": "^25.4.0",
+    "@typescript-eslint/eslint-plugin": "^8.57.0",
+    "@typescript-eslint/parser": "^8.57.0",
+    "@typescript-eslint/utils": "^8.57.0",
+    "eslint": "^10.0.3",
+    "jest": "30.3.0",
+    "jest-environment-jsdom": "^30.3.0",
    "jest-junit": "^16.0.0",
    "jest-preset-angular": "^16.1.1",
    "jest-websocket-mock": "^2.5.0",
--- a/src-ui/pnpm-lock.yaml
+++ b/src-ui/pnpm-lock.yaml
--- a/src-ui/src/app/components/admin/config/config.component.html
+++ b/src-ui/src/app/components/admin/config/config.component.html
@@ -59,7 +59,7 @@
    <div [ngbNavOutlet]="nav" class="border-start border-end border-bottom p-3 mb-3 shadow-sm"></div>
    <div class="btn-toolbar" role="toolbar">
        <div class="btn-group me-2">
-            <button type="button" (click)="discardChanges()" class="btn btn-outline-secondary" [disabled]="loading || (isDirty$ | async) === false" i18n>Discard</button>
+            <button type="button" (click)="discardChanges()" class="btn btn-outline-secondary" [disabled]="loading || (isDirty$ | async) === false" i18n>Cancel</button>
        </div>
        <div class="btn-group">
            <button type="submit" class="btn btn-primary" [disabled]="loading || !configForm.valid || (isDirty$ | async) === false" i18n>Save</button>
--- a/src-ui/src/app/components/common/filterable-dropdown/filterable-dropdown.component.spec.ts
+++ b/src-ui/src/app/components/common/filterable-dropdown/filterable-dropdown.component.spec.ts
@@ -631,6 +631,59 @@ describe('FilterableDropdownComponent & FilterableDropdownSelectionModel', () =>
    ])
  })

+  it('deselecting a parent clears selected descendants', () => {
+    const root: Tag = { id: 100, name: 'Root Tag' }
+    const child: Tag = { id: 101, name: 'Child Tag', parent: root.id }
+    const grandchild: Tag = {
+      id: 102,
+      name: 'Grandchild Tag',
+      parent: child.id,
+    }
+    const other: Tag = { id: 103, name: 'Other Tag' }
+
+    selectionModel.items = [root, child, grandchild, other]
+    selectionModel.set(root.id, ToggleableItemState.Selected, false)
+    selectionModel.set(child.id, ToggleableItemState.Selected, false)
+    selectionModel.set(grandchild.id, ToggleableItemState.Selected, false)
+    selectionModel.set(other.id, ToggleableItemState.Selected, false)
+
+    selectionModel.toggle(root.id, false)
+
+    expect(selectionModel.getSelectedItems()).toEqual([other])
+  })
+
+  it('un-excluding a parent clears excluded descendants', () => {
+    const root: Tag = { id: 110, name: 'Root Tag' }
+    const child: Tag = { id: 111, name: 'Child Tag', parent: root.id }
+    const other: Tag = { id: 112, name: 'Other Tag' }
+
+    selectionModel.items = [root, child, other]
+    selectionModel.set(root.id, ToggleableItemState.Excluded, false)
+    selectionModel.set(child.id, ToggleableItemState.Excluded, false)
+    selectionModel.set(other.id, ToggleableItemState.Excluded, false)
+
+    selectionModel.exclude(root.id, false)
+
+    expect(selectionModel.getExcludedItems()).toEqual([other])
+  })
+
+  it('excluding a selected parent clears selected descendants', () => {
+    const root: Tag = { id: 120, name: 'Root Tag' }
+    const child: Tag = { id: 121, name: 'Child Tag', parent: root.id }
+    const other: Tag = { id: 122, name: 'Other Tag' }
+
+    selectionModel.manyToOne = true
+    selectionModel.items = [root, child, other]
+    selectionModel.set(root.id, ToggleableItemState.Selected, false)
+    selectionModel.set(child.id, ToggleableItemState.Selected, false)
+    selectionModel.set(other.id, ToggleableItemState.Selected, false)
+
+    selectionModel.exclude(root.id, false)
+
+    expect(selectionModel.getExcludedItems()).toEqual([root])
+    expect(selectionModel.getSelectedItems()).toEqual([other])
+  })
+
  it('resorts items immediately when document count sorting enabled', () => {
    const apple: Tag = { id: 55, name: 'Apple' }
    const zebra: Tag = { id: 56, name: 'Zebra' }
--- a/src-ui/src/app/components/common/filterable-dropdown/filterable-dropdown.component.ts
+++ b/src-ui/src/app/components/common/filterable-dropdown/filterable-dropdown.component.ts
@@ -235,6 +235,7 @@ export class FilterableDropdownSelectionModel {
      state == ToggleableItemState.Excluded
    ) {
      this.temporarySelectionStates.delete(id)
+      this.clearDescendantSelections(id)
    }

    if (!id) {
@@ -261,6 +262,7 @@ export class FilterableDropdownSelectionModel {

      if (this.manyToOne || this.singleSelect) {
        this.temporarySelectionStates.set(id, ToggleableItemState.Excluded)
+        this.clearDescendantSelections(id)

        if (this.singleSelect) {
          for (let key of this.temporarySelectionStates.keys()) {
@@ -281,9 +283,15 @@ export class FilterableDropdownSelectionModel {
          newState = ToggleableItemState.NotSelected
        }
        this.temporarySelectionStates.set(id, newState)
+        if (newState == ToggleableItemState.Excluded) {
+          this.clearDescendantSelections(id)
+        }
      }
    } else if (!id || state == ToggleableItemState.Excluded) {
      this.temporarySelectionStates.delete(id)
+      if (id) {
+        this.clearDescendantSelections(id)
+      }
    }

    if (fireEvent) {
@@ -295,6 +303,33 @@ export class FilterableDropdownSelectionModel {
    return this.selectionStates.get(id) || ToggleableItemState.NotSelected
  }

+  private clearDescendantSelections(id: number) {
+    for (const descendantID of this.getDescendantIDs(id)) {
+      this.temporarySelectionStates.delete(descendantID)
+    }
+  }
+
+  private getDescendantIDs(id: number): number[] {
+    const descendants: number[] = []
+    const queue: number[] = [id]
+
+    while (queue.length) {
+      const parentID = queue.shift()
+      for (const item of this._items) {
+        if (
+          typeof item?.id === 'number' &&
+          typeof (item as any)['parent'] === 'number' &&
+          (item as any)['parent'] === parentID
+        ) {
+          descendants.push(item.id)
+          queue.push(item.id)
+        }
+      }
+    }
+
+    return descendants
+  }
+
  get logicalOperator(): LogicalOperator {
    return this.temporaryLogicalOperator
  }
--- a/src-ui/src/app/components/document-list/document-card-small/document-card-small.component.html
+++ b/src-ui/src/app/components/document-list/document-card-small/document-card-small.component.html
@@ -15,7 +15,7 @@
      }

      @if (document && displayFields?.includes(DisplayField.TAGS)) {
-        <div class="tags d-flex flex-column text-end position-absolute me-1 fs-6">
+        <div class="tags d-flex flex-column text-end position-absolute me-1 fs-6" [class.tags-no-wrap]="document.tags.length > 3">
          @for (tagID of tagIDs; track tagID) {
            <pngx-tag [tagID]="tagID" (click)="clickTag.emit(tagID);$event.stopPropagation()" [clickable]="true" linkTitle="Toggle tag filter" i18n-linkTitle></pngx-tag>
          }
--- a/src-ui/src/app/components/document-list/document-card-small/document-card-small.component.scss
+++ b/src-ui/src/app/components/document-list/document-card-small/document-card-small.component.scss
@@ -72,4 +72,14 @@ a {
  max-width: 80%;
  row-gap: .2rem;
  line-height: 1;
+
+  &.tags-no-wrap {
+    ::ng-deep .badge {
+      display: inline-block;
+      max-width: 100%;
+      white-space: nowrap;
+      overflow: hidden;
+      text-overflow: ellipsis;
+    }
+  }
 }
--- a/src-ui/src/app/components/document-list/document-card-small/document-card-small.component.spec.ts
+++ b/src-ui/src/app/components/document-list/document-card-small/document-card-small.component.spec.ts
@@ -82,6 +82,16 @@ describe('DocumentCardSmallComponent', () => {
    ).toHaveLength(6)
  })

+  it('should clear hidden tag counter when tag count falls below the limit', () => {
+    expect(component.moreTags).toEqual(3)
+
+    component.document.tags = [1, 2, 3, 4, 5, 6]
+    fixture.detectChanges()
+
+    expect(component.moreTags).toBeNull()
+    expect(fixture.nativeElement.textContent).not.toContain('+ 3')
+  })
+
  it('should try to close the preview on mouse leave', () => {
    component.popupPreview = {
      close: jest.fn(),
--- a/src-ui/src/app/components/document-list/document-card-small/document-card-small.component.ts
+++ b/src-ui/src/app/components/document-list/document-card-small/document-card-small.component.ts
@@ -126,6 +126,7 @@ export class DocumentCardSmallComponent
      this.moreTags = this.document.tags.length - (limit - 1)
      return this.document.tags.slice(0, limit - 1)
    } else {
+      this.moreTags = null
      return this.document.tags
    }
  }
--- a/src-ui/src/app/interceptors/auth-expiry.interceptor.spec.ts
+++ b/src-ui/src/app/interceptors/auth-expiry.interceptor.spec.ts
@@ -0,0 +1,122 @@
+import {
+  HttpErrorResponse,
+  HttpHandlerFn,
+  HttpRequest,
+} from '@angular/common/http'
+import { throwError } from 'rxjs'
+import * as navUtils from '../utils/navigation'
+import { createAuthExpiryInterceptor } from './auth-expiry.interceptor'
+
+describe('withAuthExpiryInterceptor', () => {
+  let interceptor: ReturnType<typeof createAuthExpiryInterceptor>
+  let dateNowSpy: jest.SpiedFunction<typeof Date.now>
+
+  beforeEach(() => {
+    interceptor = createAuthExpiryInterceptor()
+    dateNowSpy = jest.spyOn(Date, 'now').mockReturnValue(1000)
+  })
+
+  afterEach(() => {
+    jest.restoreAllMocks()
+  })
+
+  it('reloads when an API request returns 401', () => {
+    const reloadSpy = jest
+      .spyOn(navUtils, 'locationReload')
+      .mockImplementation(() => {})
+
+    interceptor(
+      new HttpRequest('GET', '/api/documents/'),
+      failingHandler('/api/documents/', 401)
+    ).subscribe({
+      error: () => undefined,
+    })
+
+    expect(reloadSpy).toHaveBeenCalledTimes(1)
+  })
+
+  it('does not reload for non-401 errors', () => {
+    const reloadSpy = jest
+      .spyOn(navUtils, 'locationReload')
+      .mockImplementation(() => {})
+
+    interceptor(
+      new HttpRequest('GET', '/api/documents/'),
+      failingHandler('/api/documents/', 500)
+    ).subscribe({
+      error: () => undefined,
+    })
+
+    expect(reloadSpy).not.toHaveBeenCalled()
+  })
+
+  it('does not reload for non-api 401 responses', () => {
+    const reloadSpy = jest
+      .spyOn(navUtils, 'locationReload')
+      .mockImplementation(() => {})
+
+    interceptor(
+      new HttpRequest('GET', '/accounts/profile/'),
+      failingHandler('/accounts/profile/', 401)
+    ).subscribe({
+      error: () => undefined,
+    })
+
+    expect(reloadSpy).not.toHaveBeenCalled()
+  })
+
+  it('reloads only once even with multiple API 401 responses', () => {
+    const reloadSpy = jest
+      .spyOn(navUtils, 'locationReload')
+      .mockImplementation(() => {})
+
+    const request = new HttpRequest('GET', '/api/documents/')
+    const handler = failingHandler('/api/documents/', 401)
+
+    interceptor(request, handler).subscribe({
+      error: () => undefined,
+    })
+    interceptor(request, handler).subscribe({
+      error: () => undefined,
+    })
+
+    expect(reloadSpy).toHaveBeenCalledTimes(1)
+  })
+
+  it('retries reload after cooldown for repeated API 401 responses', () => {
+    const reloadSpy = jest
+      .spyOn(navUtils, 'locationReload')
+      .mockImplementation(() => {})
+
+    dateNowSpy
+      .mockReturnValueOnce(1000)
+      .mockReturnValueOnce(2500)
+      .mockReturnValueOnce(3501)
+
+    const request = new HttpRequest('GET', '/api/documents/')
+    const handler = failingHandler('/api/documents/', 401)
+
+    interceptor(request, handler).subscribe({
+      error: () => undefined,
+    })
+    interceptor(request, handler).subscribe({
+      error: () => undefined,
+    })
+    interceptor(request, handler).subscribe({
+      error: () => undefined,
+    })
+
+    expect(reloadSpy).toHaveBeenCalledTimes(2)
+  })
+})
+
+function failingHandler(url: string, status: number): HttpHandlerFn {
+  return (_request) =>
+    throwError(
+      () =>
+        new HttpErrorResponse({
+          status,
+          url,
+        })
+    )
+}
--- a/src-ui/src/app/interceptors/auth-expiry.interceptor.ts
+++ b/src-ui/src/app/interceptors/auth-expiry.interceptor.ts
@@ -0,0 +1,37 @@
+import {
+  HttpErrorResponse,
+  HttpEvent,
+  HttpHandlerFn,
+  HttpInterceptorFn,
+  HttpRequest,
+} from '@angular/common/http'
+import { catchError, Observable, throwError } from 'rxjs'
+import { locationReload } from '../utils/navigation'
+
+export const createAuthExpiryInterceptor = (): HttpInterceptorFn => {
+  let lastReloadAttempt = Number.NEGATIVE_INFINITY
+
+  return (
+    request: HttpRequest<unknown>,
+    next: HttpHandlerFn
+  ): Observable<HttpEvent<unknown>> =>
+    next(request).pipe(
+      catchError((error: unknown) => {
+        if (
+          error instanceof HttpErrorResponse &&
+          error.status === 401 &&
+          request.url.includes('/api/')
+        ) {
+          const now = Date.now()
+          if (now - lastReloadAttempt >= 2000) {
+            lastReloadAttempt = now
+            locationReload()
+          }
+        }
+
+        return throwError(() => error)
+      })
+    )
+}
+
+export const withAuthExpiryInterceptor = createAuthExpiryInterceptor()
--- a/src-ui/src/environments/environment.prod.ts
+++ b/src-ui/src/environments/environment.prod.ts
@@ -6,7 +6,7 @@ export const environment = {
  apiVersion: '10', // match src/paperless/settings.py
  appTitle: 'Paperless-ngx',
  tag: 'prod',
-  version: '2.20.10',
+  version: '2.20.13',
  webSocketHost: window.location.host,
  webSocketProtocol: window.location.protocol == 'https:' ? 'wss:' : 'ws:',
  webSocketBaseUrl: base_url.pathname + 'ws/',
--- a/src-ui/src/main.ts
+++ b/src-ui/src/main.ts
@@ -154,6 +154,7 @@ import { DirtyDocGuard } from './app/guards/dirty-doc.guard'
 import { DirtySavedViewGuard } from './app/guards/dirty-saved-view.guard'
 import { PermissionsGuard } from './app/guards/permissions.guard'
 import { withApiVersionInterceptor } from './app/interceptors/api-version.interceptor'
+import { withAuthExpiryInterceptor } from './app/interceptors/auth-expiry.interceptor'
 import { withCsrfInterceptor } from './app/interceptors/csrf.interceptor'
 import { DocumentTitlePipe } from './app/pipes/document-title.pipe'
 import { FilterPipe } from './app/pipes/filter.pipe'
@@ -399,7 +400,11 @@ bootstrapApplication(AppComponent, {
    StoragePathNamePipe,
    provideHttpClient(
      withInterceptorsFromDi(),
-      withInterceptors([withCsrfInterceptor, withApiVersionInterceptor]),
+      withInterceptors([
+        withCsrfInterceptor,
+        withApiVersionInterceptor,
+        withAuthExpiryInterceptor,
+      ]),
      withFetch()
    ),
    provideUiTour({
--- a/src-ui/src/theme.scss
+++ b/src-ui/src/theme.scss
@@ -150,6 +150,15 @@ $form-check-radio-checked-bg-image-dark: url("data:image/svg+xml,<svg xmlns='htt
    background-color: var(--pngx-body-color-accent);
  }

+  .list-group-item-action:not(.active):active {
+    --bs-list-group-action-active-color: var(--bs-body-color);
+    --bs-list-group-action-active-bg: var(--pngx-bg-darker);
+  }
+
+  .form-control:hover::file-selector-button {
+    background-color:var(--pngx-bg-dark) !important
+  }
+
  .search-container {
    input, input:focus, i-bs[name="search"] , ::placeholder {
      color: var(--pngx-primary-text-contrast) !important;
--- a/src/documents/bulk_edit.py
+++ b/src/documents/bulk_edit.py
@@ -576,8 +576,8 @@ def merge(
        except Exception:
            restore_archive_serial_numbers(backup)
            raise
-        else:
-            consume_task.delay()
+    else:
+        consume_task.delay()

    return "OK"

--- a/src/documents/checks.py
+++ b/src/documents/checks.py
@@ -3,25 +3,20 @@ from django.core.checks import Error
 from django.core.checks import Warning
 from django.core.checks import register

-from documents.signals import document_consumer_declaration
 from documents.templating.utils import convert_format_str_to_template_format
+from paperless.parsers.registry import get_parser_registry


@register()
 def parser_check(app_configs, **kwargs):
-    parsers = []
-    for response in document_consumer_declaration.send(None):
-        parsers.append(response[1])
-
-    if len(parsers) == 0:
+    if not get_parser_registry().all_parsers():
        return [
            Error(
                "No parsers found. This is a bug. The consumer won't be "
                "able to consume any documents without parsers.",
            ),
        ]
-    else:
-        return []
+    return []


@register()
--- a/src/documents/classifier.py
+++ b/src/documents/classifier.py
@@ -9,6 +9,7 @@ from pathlib import Path
 from typing import TYPE_CHECKING

 if TYPE_CHECKING:
+    from collections.abc import Callable
    from collections.abc import Iterator
    from datetime import datetime

@@ -191,7 +192,12 @@ class DocumentClassifier:

        target_file_temp.rename(target_file)

-    def train(self) -> bool:
+    def train(
+        self,
+        status_callback: Callable[[str], None] | None = None,
+    ) -> bool:
+        notify = status_callback if status_callback is not None else lambda _: None
+
        # Get non-inbox documents
        docs_queryset = (
            Document.objects.exclude(
@@ -213,6 +219,7 @@ class DocumentClassifier:

        # Step 1: Extract and preprocess training data from the database.
        logger.debug("Gathering data from database...")
+        notify(f"Gathering data from {docs_queryset.count()} document(s)...")
        hasher = sha256()
        for doc in docs_queryset:
            y = -1
@@ -290,6 +297,7 @@ class DocumentClassifier:

        # Step 2: vectorize data
        logger.debug("Vectorizing data...")
+        notify("Vectorizing document content...")

        def content_generator() -> Iterator[str]:
            """
@@ -316,6 +324,7 @@ class DocumentClassifier:
        # Step 3: train the classifiers
        if num_tags > 0:
            logger.debug("Training tags classifier...")
+            notify(f"Training tags classifier ({num_tags} tag(s))...")

            if num_tags == 1:
                # Special case where only one tag has auto:
@@ -339,6 +348,9 @@ class DocumentClassifier:

        if num_correspondents > 0:
            logger.debug("Training correspondent classifier...")
+            notify(
+                f"Training correspondent classifier ({num_correspondents} correspondent(s))...",
+            )
            self.correspondent_classifier = MLPClassifier(tol=0.01)
            self.correspondent_classifier.fit(data_vectorized, labels_correspondent)
        else:
@@ -349,6 +361,9 @@ class DocumentClassifier:

        if num_document_types > 0:
            logger.debug("Training document type classifier...")
+            notify(
+                f"Training document type classifier ({num_document_types} type(s))...",
+            )
            self.document_type_classifier = MLPClassifier(tol=0.01)
            self.document_type_classifier.fit(data_vectorized, labels_document_type)
        else:
@@ -361,6 +376,7 @@ class DocumentClassifier:
            logger.debug(
                "Training storage paths classifier...",
            )
+            notify(f"Training storage path classifier ({num_storage_paths} path(s))...")
            self.storage_path_classifier = MLPClassifier(tol=0.01)
            self.storage_path_classifier.fit(
                data_vectorized,
--- a/src/documents/consumer.py
+++ b/src/documents/consumer.py
@@ -32,9 +32,7 @@ from documents.models import DocumentType
 from documents.models import StoragePath
 from documents.models import Tag
 from documents.models import WorkflowTrigger
-from documents.parsers import DocumentParser
 from documents.parsers import ParseError
-from documents.parsers import get_parser_class_for_mime_type
 from documents.permissions import set_permissions_for_object
 from documents.plugins.base import AlwaysRunPluginMixin
 from documents.plugins.base import ConsumeTaskPlugin
@@ -51,28 +49,13 @@ from documents.templating.workflows import parse_w_workflow_placeholders
 from documents.utils import copy_basic_file_stats
 from documents.utils import copy_file_with_basic_stats
 from documents.utils import run_subprocess
-from paperless.parsers.text import TextDocumentParser
-from paperless_mail.parsers import MailDocumentParser
+from paperless.parsers import ParserContext
+from paperless.parsers import ParserProtocol
+from paperless.parsers.registry import get_parser_registry

 LOGGING_NAME: Final[str] = "paperless.consumer"


-def _parser_cleanup(parser: DocumentParser) -> None:
-    """
-    Call cleanup on a parser, handling the new-style context-manager parsers.
-
-    New-style parsers (e.g. TextDocumentParser) use __exit__ for teardown
-    instead of a cleanup() method.  This shim will be removed once all existing parsers
-    have switched to the new style and this consumer is updated to use it
-
-    TODO(stumpylog): Remove me in the future
-    """
-    if isinstance(parser, TextDocumentParser):
-        parser.__exit__(None, None, None)
-    else:
-        parser.cleanup()
-
-
 class WorkflowTriggerPlugin(
    NoCleanupPluginMixin,
    NoSetupPluginMixin,
@@ -409,8 +392,12 @@ class ConsumerPlugin(
                    self.log.error(f"Error attempting to clean PDF: {e}")

            # Based on the mime type, get the parser for that type
-            parser_class: type[DocumentParser] | None = get_parser_class_for_mime_type(
-                mime_type,
+            parser_class: type[ParserProtocol] | None = (
+                get_parser_registry().get_parser_for_file(
+                    mime_type,
+                    self.filename,
+                    self.working_copy,
+                )
            )
            if not parser_class:
                tempdir.cleanup()
@@ -433,301 +420,275 @@ class ConsumerPlugin(
                tempdir.cleanup()
            raise

-        def progress_callback(
-            current_progress,
-            max_progress,
-        ) -> None:  # pragma: no cover
-            # recalculate progress to be within 20 and 80
-            p = int((current_progress / max_progress) * 50 + 20)
-            self._send_progress(p, 100, ProgressStatusOptions.WORKING)
-
        # This doesn't parse the document yet, but gives us a parser.
-
-        document_parser: DocumentParser = parser_class(
-            self.logging_group,
-            progress_callback=progress_callback,
-        )
-
-        self.log.debug(f"Parser: {type(document_parser).__name__}")
-
-        # Parse the document. This may take some time.
-
-        text = None
-        date = None
-        thumbnail = None
-        archive_path = None
-        page_count = None
-
-        try:
-            self._send_progress(
-                20,
-                100,
-                ProgressStatusOptions.WORKING,
-                ConsumerStatusShortMessage.PARSING_DOCUMENT,
+        with parser_class() as document_parser:
+            document_parser.configure(
+                ParserContext(mailrule_id=self.input_doc.mailrule_id),
            )
-            self.log.debug(f"Parsing {self.filename}...")
-            if (
-                isinstance(document_parser, MailDocumentParser)
-                and self.input_doc.mailrule_id
-            ):
-                document_parser.parse(
-                    self.working_copy,
-                    mime_type,
-                    self.filename,
-                    self.input_doc.mailrule_id,
-                )
-            elif isinstance(document_parser, TextDocumentParser):
-                # TODO(stumpylog): Remove me in the future
-                document_parser.parse(self.working_copy, mime_type)
-            else:
-                document_parser.parse(self.working_copy, mime_type, self.filename)

-            self.log.debug(f"Generating thumbnail for {self.filename}...")
-            self._send_progress(
-                70,
-                100,
-                ProgressStatusOptions.WORKING,
-                ConsumerStatusShortMessage.GENERATING_THUMBNAIL,
-            )
-            if isinstance(document_parser, TextDocumentParser):
-                # TODO(stumpylog): Remove me in the future
-                thumbnail = document_parser.get_thumbnail(self.working_copy, mime_type)
-            else:
-                thumbnail = document_parser.get_thumbnail(
-                    self.working_copy,
-                    mime_type,
-                    self.filename,
-                )
+            self.log.debug(f"Parser: {document_parser.name} v{document_parser.version}")

-            text = document_parser.get_text()
-            date = document_parser.get_date()
-            if date is None:
+            # Parse the document. This may take some time.
+
+            text = None
+            date = None
+            thumbnail = None
+            archive_path = None
+            page_count = None
+
+            try:
                self._send_progress(
-                    90,
+                    20,
                    100,
                    ProgressStatusOptions.WORKING,
-                    ConsumerStatusShortMessage.PARSE_DATE,
+                    ConsumerStatusShortMessage.PARSING_DOCUMENT,
                )
-                with get_date_parser() as date_parser:
-                    date = next(date_parser.parse(self.filename, text), None)
-            archive_path = document_parser.get_archive_path()
-            page_count = document_parser.get_page_count(self.working_copy, mime_type)
+                self.log.debug(f"Parsing {self.filename}...")

-        except ParseError as e:
-            _parser_cleanup(document_parser)
-            if tempdir:
-                tempdir.cleanup()
-            self._fail(
-                str(e),
-                f"Error occurred while consuming document {self.filename}: {e}",
-                exc_info=True,
-                exception=e,
-            )
-        except Exception as e:
-            _parser_cleanup(document_parser)
-            if tempdir:
-                tempdir.cleanup()
-            self._fail(
-                str(e),
-                f"Unexpected error while consuming document {self.filename}: {e}",
-                exc_info=True,
-                exception=e,
-            )
+                document_parser.parse(self.working_copy, mime_type)

-        # Prepare the document classifier.
+                self.log.debug(f"Generating thumbnail for {self.filename}...")
+                self._send_progress(
+                    70,
+                    100,
+                    ProgressStatusOptions.WORKING,
+                    ConsumerStatusShortMessage.GENERATING_THUMBNAIL,
+                )
+                thumbnail = document_parser.get_thumbnail(self.working_copy, mime_type)

-        # TODO: I don't really like to do this here, but this way we avoid
-        #   reloading the classifier multiple times, since there are multiple
-        #   post-consume hooks that all require the classifier.
-
-        classifier = load_classifier()
-
-        self._send_progress(
-            95,
-            100,
-            ProgressStatusOptions.WORKING,
-            ConsumerStatusShortMessage.SAVE_DOCUMENT,
-        )
-        # now that everything is done, we can start to store the document
-        # in the system. This will be a transaction and reasonably fast.
-        try:
-            with transaction.atomic():
-                # store the document.
-                if self.input_doc.root_document_id:
-                    # If this is a new version of an existing document, we need
-                    # to make sure we're not creating a new document, but updating
-                    # the existing one.
-                    root_doc = Document.objects.get(
-                        pk=self.input_doc.root_document_id,
+                text = document_parser.get_text()
+                date = document_parser.get_date()
+                if date is None:
+                    self._send_progress(
+                        90,
+                        100,
+                        ProgressStatusOptions.WORKING,
+                        ConsumerStatusShortMessage.PARSE_DATE,
                    )
-                    original_document = self._create_version_from_root(
-                        root_doc,
-                        text=text,
-                        page_count=page_count,
-                        mime_type=mime_type,
-                    )
-                    actor = None
+                    with get_date_parser() as date_parser:
+                        date = next(date_parser.parse(self.filename, text), None)
+                archive_path = document_parser.get_archive_path()
+                page_count = document_parser.get_page_count(
+                    self.working_copy,
+                    mime_type,
+                )

-                    # Save the new version, potentially creating an audit log entry for the version addition if enabled.
-                    if (
-                        settings.AUDIT_LOG_ENABLED
-                        and self.metadata.actor_id is not None
-                    ):
-                        actor = User.objects.filter(pk=self.metadata.actor_id).first()
-                        if actor is not None:
-                            from auditlog.context import (  # type: ignore[import-untyped]
-                                set_actor,
-                            )
+            except ParseError as e:
+                if tempdir:
+                    tempdir.cleanup()
+                self._fail(
+                    str(e),
+                    f"Error occurred while consuming document {self.filename}: {e}",
+                    exc_info=True,
+                    exception=e,
+                )
+            except Exception as e:
+                if tempdir:
+                    tempdir.cleanup()
+                self._fail(
+                    str(e),
+                    f"Unexpected error while consuming document {self.filename}: {e}",
+                    exc_info=True,
+                    exception=e,
+                )

-                            with set_actor(actor):
+            # Prepare the document classifier.
+
+            # TODO: I don't really like to do this here, but this way we avoid
+            #   reloading the classifier multiple times, since there are multiple
+            #   post-consume hooks that all require the classifier.
+
+            classifier = load_classifier()
+
+            self._send_progress(
+                95,
+                100,
+                ProgressStatusOptions.WORKING,
+                ConsumerStatusShortMessage.SAVE_DOCUMENT,
+            )
+            # now that everything is done, we can start to store the document
+            # in the system. This will be a transaction and reasonably fast.
+            try:
+                with transaction.atomic():
+                    # store the document.
+                    if self.input_doc.root_document_id:
+                        # If this is a new version of an existing document, we need
+                        # to make sure we're not creating a new document, but updating
+                        # the existing one.
+                        root_doc = Document.objects.get(
+                            pk=self.input_doc.root_document_id,
+                        )
+                        original_document = self._create_version_from_root(
+                            root_doc,
+                            text=text,
+                            page_count=page_count,
+                            mime_type=mime_type,
+                        )
+                        actor = None
+
+                        # Save the new version, potentially creating an audit log entry for the version addition if enabled.
+                        if (
+                            settings.AUDIT_LOG_ENABLED
+                            and self.metadata.actor_id is not None
+                        ):
+                            actor = User.objects.filter(
+                                pk=self.metadata.actor_id,
+                            ).first()
+                            if actor is not None:
+                                from auditlog.context import (  # type: ignore[import-untyped]
+                                    set_actor,
+                                )
+
+                                with set_actor(actor):
+                                    original_document.save()
+                            else:
                                original_document.save()
                        else:
                            original_document.save()
+
+                        # Create a log entry for the version addition, if enabled
+                        if settings.AUDIT_LOG_ENABLED:
+                            from auditlog.models import (  # type: ignore[import-untyped]
+                                LogEntry,
+                            )
+
+                            LogEntry.objects.log_create(
+                                instance=root_doc,
+                                changes={
+                                    "Version Added": ["None", original_document.id],
+                                },
+                                action=LogEntry.Action.UPDATE,
+                                actor=actor,
+                                additional_data={
+                                    "reason": "Version added",
+                                    "version_id": original_document.id,
+                                },
+                            )
+                        document = original_document
                    else:
-                        original_document.save()
-
-                    # Create a log entry for the version addition, if enabled
-                    if settings.AUDIT_LOG_ENABLED:
-                        from auditlog.models import (  # type: ignore[import-untyped]
-                            LogEntry,
+                        document = self._store(
+                            text=text,
+                            date=date,
+                            page_count=page_count,
+                            mime_type=mime_type,
                        )

-                        LogEntry.objects.log_create(
-                            instance=root_doc,
-                            changes={
-                                "Version Added": ["None", original_document.id],
-                            },
-                            action=LogEntry.Action.UPDATE,
-                            actor=actor,
-                            additional_data={
-                                "reason": "Version added",
-                                "version_id": original_document.id,
-                            },
-                        )
-                    document = original_document
-                else:
-                    document = self._store(
-                        text=text,
-                        date=date,
-                        page_count=page_count,
-                        mime_type=mime_type,
-                    )
+                    # If we get here, it was successful. Proceed with post-consume
+                    # hooks. If they fail, nothing will get changed.

-                # If we get here, it was successful. Proceed with post-consume
-                # hooks. If they fail, nothing will get changed.
-
-                document_consumption_finished.send(
-                    sender=self.__class__,
-                    document=document,
-                    logging_group=self.logging_group,
-                    classifier=classifier,
-                    original_file=self.unmodified_original
-                    if self.unmodified_original
-                    else self.working_copy,
-                )
-
-                # After everything is in the database, copy the files into
-                # place. If this fails, we'll also rollback the transaction.
-                with FileLock(settings.MEDIA_LOCK):
-                    generated_filename = generate_unique_filename(document)
-                    if (
-                        len(str(generated_filename))
-                        > Document.MAX_STORED_FILENAME_LENGTH
-                    ):
-                        self.log.warning(
-                            "Generated source filename exceeds db path limit, falling back to default naming",
-                        )
-                        generated_filename = generate_filename(
-                            document,
-                            use_format=False,
-                        )
-                    document.filename = generated_filename
-                    create_source_path_directory(document.source_path)
-
-                    self._write(
-                        self.unmodified_original
-                        if self.unmodified_original is not None
+                    document_consumption_finished.send(
+                        sender=self.__class__,
+                        document=document,
+                        logging_group=self.logging_group,
+                        classifier=classifier,
+                        original_file=self.unmodified_original
+                        if self.unmodified_original
                        else self.working_copy,
-                        document.source_path,
                    )

-                    self._write(
-                        thumbnail,
-                        document.thumbnail_path,
-                    )
-
-                    if archive_path and Path(archive_path).is_file():
-                        generated_archive_filename = generate_unique_filename(
-                            document,
-                            archive_filename=True,
-                        )
+                    # After everything is in the database, copy the files into
+                    # place. If this fails, we'll also rollback the transaction.
+                    with FileLock(settings.MEDIA_LOCK):
+                        generated_filename = generate_unique_filename(document)
                        if (
-                            len(str(generated_archive_filename))
+                            len(str(generated_filename))
                            > Document.MAX_STORED_FILENAME_LENGTH
                        ):
                            self.log.warning(
-                                "Generated archive filename exceeds db path limit, falling back to default naming",
+                                "Generated source filename exceeds db path limit, falling back to default naming",
                            )
-                            generated_archive_filename = generate_filename(
+                            generated_filename = generate_filename(
                                document,
-                                archive_filename=True,
                                use_format=False,
                            )
-                        document.archive_filename = generated_archive_filename
-                        create_source_path_directory(document.archive_path)
+                        document.filename = generated_filename
+                        create_source_path_directory(document.source_path)
+
                        self._write(
-                            archive_path,
-                            document.archive_path,
+                            self.unmodified_original
+                            if self.unmodified_original is not None
+                            else self.working_copy,
+                            document.source_path,
                        )

-                        with Path(archive_path).open("rb") as f:
-                            document.archive_checksum = hashlib.md5(
-                                f.read(),
-                            ).hexdigest()
+                        self._write(
+                            thumbnail,
+                            document.thumbnail_path,
+                        )

-                # Don't save with the lock active. Saving will cause the file
-                # renaming logic to acquire the lock as well.
-                # This triggers things like file renaming
-                document.save()
+                        if archive_path and Path(archive_path).is_file():
+                            generated_archive_filename = generate_unique_filename(
+                                document,
+                                archive_filename=True,
+                            )
+                            if (
+                                len(str(generated_archive_filename))
+                                > Document.MAX_STORED_FILENAME_LENGTH
+                            ):
+                                self.log.warning(
+                                    "Generated archive filename exceeds db path limit, falling back to default naming",
+                                )
+                                generated_archive_filename = generate_filename(
+                                    document,
+                                    archive_filename=True,
+                                    use_format=False,
+                                )
+                            document.archive_filename = generated_archive_filename
+                            create_source_path_directory(document.archive_path)
+                            self._write(
+                                archive_path,
+                                document.archive_path,
+                            )

-                if document.root_document_id:
-                    document_updated.send(
-                        sender=self.__class__,
-                        document=document.root_document,
-                    )
+                            with Path(archive_path).open("rb") as f:
+                                document.archive_checksum = hashlib.md5(
+                                    f.read(),
+                                ).hexdigest()

-                # Delete the file only if it was successfully consumed
-                self.log.debug(f"Deleting original file {self.input_doc.original_file}")
-                self.input_doc.original_file.unlink()
-                self.log.debug(f"Deleting working copy {self.working_copy}")
-                self.working_copy.unlink()
-                if self.unmodified_original is not None:  # pragma: no cover
+                    # Don't save with the lock active. Saving will cause the file
+                    # renaming logic to acquire the lock as well.
+                    # This triggers things like file renaming
+                    document.save()
+
+                    if document.root_document_id:
+                        document_updated.send(
+                            sender=self.__class__,
+                            document=document.root_document,
+                        )
+
+                    # Delete the file only if it was successfully consumed
                    self.log.debug(
-                        f"Deleting unmodified original file {self.unmodified_original}",
+                        f"Deleting original file {self.input_doc.original_file}",
                    )
-                    self.unmodified_original.unlink()
+                    self.input_doc.original_file.unlink()
+                    self.log.debug(f"Deleting working copy {self.working_copy}")
+                    self.working_copy.unlink()
+                    if self.unmodified_original is not None:  # pragma: no cover
+                        self.log.debug(
+                            f"Deleting unmodified original file {self.unmodified_original}",
+                        )
+                        self.unmodified_original.unlink()

-                # https://github.com/jonaswinkler/paperless-ng/discussions/1037
-                shadow_file = (
-                    Path(self.input_doc.original_file).parent
-                    / f"._{Path(self.input_doc.original_file).name}"
+                    # https://github.com/jonaswinkler/paperless-ng/discussions/1037
+                    shadow_file = (
+                        Path(self.input_doc.original_file).parent
+                        / f"._{Path(self.input_doc.original_file).name}"
+                    )
+
+                    if Path(shadow_file).is_file():
+                        self.log.debug(f"Deleting shadow file {shadow_file}")
+                        Path(shadow_file).unlink()
+
+            except Exception as e:
+                self._fail(
+                    str(e),
+                    f"The following error occurred while storing document "
+                    f"{self.filename} after parsing: {e}",
+                    exc_info=True,
+                    exception=e,
                )
-
-                if Path(shadow_file).is_file():
-                    self.log.debug(f"Deleting shadow file {shadow_file}")
-                    Path(shadow_file).unlink()
-
-        except Exception as e:
-            self._fail(
-                str(e),
-                f"The following error occurred while storing document "
-                f"{self.filename} after parsing: {e}",
-                exc_info=True,
-                exception=e,
-            )
-        finally:
-            _parser_cleanup(document_parser)
-            tempdir.cleanup()
+            finally:
+                tempdir.cleanup()

        self.run_post_consume_script(document)

--- a/src/documents/index.py
+++ b/src/documents/index.py
@@ -477,7 +477,14 @@ class DelayedFullTextQuery(DelayedQuery):
        try:
            corrected = self.searcher.correct_query(q, q_str)
            if corrected.string != q_str:
-                suggested_correction = corrected.string
+                corrected_results = self.searcher.search(
+                    corrected.query,
+                    limit=1,
+                    filter=MappedDocIdSet(self.filter_queryset, self.searcher.ixreader),
+                    scored=False,
+                )
+                if len(corrected_results) > 0:
+                    suggested_correction = corrected.string
        except Exception as e:
            logger.info(
                "Error while correcting query %s: %s",
--- a/src/documents/management/commands/document_create_classifier.py
+++ b/src/documents/management/commands/document_create_classifier.py
@@ -1,13 +1,32 @@
-from django.core.management.base import BaseCommand
+from __future__ import annotations

+import time
+
+from documents.management.commands.base import PaperlessCommand
 from documents.tasks import train_classifier


-class Command(BaseCommand):
+class Command(PaperlessCommand):
    help = (
        "Trains the classifier on your data and saves the resulting models to a "
        "file. The document consumer will then automatically use this new model."
    )
+    supports_progress_bar = False
+    supports_multiprocessing = False

-    def handle(self, *args, **options):
-        train_classifier(scheduled=False)
+    def handle(self, *args, **options) -> None:
+        start = time.monotonic()
+
+        with (
+            self.buffered_logging("paperless.tasks"),
+            self.buffered_logging("paperless.classifier"),
+        ):
+            train_classifier(
+                scheduled=False,
+                status_callback=lambda msg: self.console.print(f"  {msg}"),
+            )
+
+        elapsed = time.monotonic() - start
+        self.console.print(
+            f"[green]✓[/green] Classifier training complete ({elapsed:.1f}s)",
+        )
--- a/src/documents/management/commands/document_importer.py
+++ b/src/documents/management/commands/document_importer.py
@@ -205,7 +205,7 @@ class Command(CryptMixin, PaperlessCommand):
                ContentType.objects.all().delete()
                Permission.objects.all().delete()
                for manifest_path in self.manifest_paths:
-                    call_command("loaddata", manifest_path)
+                    call_command("loaddata", manifest_path, skip_checks=True)
        except (FieldDoesNotExist, DeserializationError, IntegrityError) as e:
            self.stdout.write(self.style.ERROR("Database import failed"))
            if (
--- a/src/documents/management/commands/document_thumbnails.py
+++ b/src/documents/management/commands/document_thumbnails.py
@@ -3,14 +3,18 @@ import shutil

 from documents.management.commands.base import PaperlessCommand
 from documents.models import Document
-from documents.parsers import get_parser_class_for_mime_type
+from paperless.parsers.registry import get_parser_registry

 logger = logging.getLogger("paperless.management.thumbnails")


 def _process_document(doc_id: int) -> None:
    document: Document = Document.objects.get(id=doc_id)
-    parser_class = get_parser_class_for_mime_type(document.mime_type)
+    parser_class = get_parser_registry().get_parser_for_file(
+        document.mime_type,
+        document.original_filename or "",
+        document.source_path,
+    )

    if parser_class is None:
        logger.warning(
@@ -20,18 +24,9 @@ def _process_document(doc_id: int) -> None:
        )
        return

-    parser = parser_class(logging_group=None)
-
-    try:
-        thumb = parser.get_thumbnail(
-            document.source_path,
-            document.mime_type,
-            document.get_public_filename(),
-        )
+    with parser_class() as parser:
+        thumb = parser.get_thumbnail(document.source_path, document.mime_type)
        shutil.move(thumb, document.thumbnail_path)
-    finally:
-        # TODO(stumpylog): Cleanup once all parsers are handled
-        parser.cleanup()


 class Command(PaperlessCommand):
--- a/src/documents/parsers.py
+++ b/src/documents/parsers.py
@@ -3,84 +3,47 @@ from __future__ import annotations
 import logging
 import mimetypes
 import os
-import re
 import shutil
 import subprocess
 import tempfile
-from functools import lru_cache
 from pathlib import Path
 from typing import TYPE_CHECKING

 from django.conf import settings

 from documents.loggers import LoggingMixin
-from documents.signals import document_consumer_declaration
 from documents.utils import copy_file_with_basic_stats
 from documents.utils import run_subprocess
+from paperless.parsers.registry import get_parser_registry

 if TYPE_CHECKING:
    import datetime

-# This regular expression will try to find dates in the document at
-# hand and will match the following formats:
-# - XX.YY.ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
-# - XX/YY/ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
-# - XX-YY-ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
-# - ZZZZ.XX.YY with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
-# - ZZZZ/XX/YY with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
-# - ZZZZ-XX-YY with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
-# - XX. MONTH ZZZZ with XX being 1 or 2 and ZZZZ being 2 or 4 digits
-# - MONTH ZZZZ, with ZZZZ being 4 digits
-# - MONTH XX, ZZZZ with XX being 1 or 2 and ZZZZ being 4 digits
-# - XX MON ZZZZ with XX being 1 or 2 and ZZZZ being 4 digits. MONTH is 3 letters
-# - XXPP MONTH ZZZZ with XX being 1 or 2 and PP being 2 letters and ZZZZ being 4 digits
-
-# TODO: isn't there a date parsing library for this?
-
-DATE_REGEX = re.compile(
-    r"(\b|(?!=([_-])))(\d{1,2})[\.\/-](\d{1,2})[\.\/-](\d{4}|\d{2})(\b|(?=([_-])))|"
-    r"(\b|(?!=([_-])))(\d{4}|\d{2})[\.\/-](\d{1,2})[\.\/-](\d{1,2})(\b|(?=([_-])))|"
-    r"(\b|(?!=([_-])))(\d{1,2}[\. ]+[a-zéûäëčžúřěáíóńźçŞğü]{3,9} \d{4}|[a-zéûäëčžúřěáíóńźçŞğü]{3,9} \d{1,2}, \d{4})(\b|(?=([_-])))|"
-    r"(\b|(?!=([_-])))([^\W\d_]{3,9} \d{1,2}, (\d{4}))(\b|(?=([_-])))|"
-    r"(\b|(?!=([_-])))([^\W\d_]{3,9} \d{4})(\b|(?=([_-])))|"
-    r"(\b|(?!=([_-])))(\d{1,2}[^ 0-9]{2}[\. ]+[^ ]{3,9}[ \.\/-]\d{4})(\b|(?=([_-])))|"
-    r"(\b|(?!=([_-])))(\b\d{1,2}[ \.\/-][a-zéûäëčžúřěáíóńźçŞğü]{3}[ \.\/-]\d{4})(\b|(?=([_-])))",
-    re.IGNORECASE,
-)
-
-
 logger = logging.getLogger("paperless.parsing")


-@lru_cache(maxsize=8)
 def is_mime_type_supported(mime_type: str) -> bool:
    """
    Returns True if the mime type is supported, False otherwise
    """
-    return get_parser_class_for_mime_type(mime_type) is not None
+    return get_parser_registry().get_parser_for_file(mime_type, "") is not None


-@lru_cache(maxsize=8)
 def get_default_file_extension(mime_type: str) -> str:
    """
    Returns the default file extension for a mimetype, or
    an empty string if it could not be determined
    """
-    for response in document_consumer_declaration.send(None):
-        parser_declaration = response[1]
-        supported_mime_types = parser_declaration["mime_types"]
-
-        if mime_type in supported_mime_types:
-            return supported_mime_types[mime_type]
+    parser_class = get_parser_registry().get_parser_for_file(mime_type, "")
+    if parser_class is not None:
+        supported = parser_class.supported_mime_types()
+        if mime_type in supported:
+            return supported[mime_type]

    ext = mimetypes.guess_extension(mime_type)
-    if ext:
-        return ext
-    else:
-        return ""
+    return ext if ext else ""


-@lru_cache(maxsize=8)
 def is_file_ext_supported(ext: str) -> bool:
    """
    Returns True if the file extension is supported, False otherwise
@@ -94,44 +57,17 @@ def is_file_ext_supported(ext: str) -> bool:

 def get_supported_file_extensions() -> set[str]:
    extensions = set()
-    for response in document_consumer_declaration.send(None):
-        parser_declaration = response[1]
-        supported_mime_types = parser_declaration["mime_types"]
-
-        for mime_type in supported_mime_types:
+    for parser_class in get_parser_registry().all_parsers():
+        for mime_type, ext in parser_class.supported_mime_types().items():
            extensions.update(mimetypes.guess_all_extensions(mime_type))
            # Python's stdlib might be behind, so also add what the parser
            # says is the default extension
            # This makes image/webp supported on Python < 3.11
-            extensions.add(supported_mime_types[mime_type])
+            extensions.add(ext)

    return extensions


-def get_parser_class_for_mime_type(mime_type: str) -> type[DocumentParser] | None:
-    """
-    Returns the best parser (by weight) for the given mimetype or
-    None if no parser exists
-    """
-
-    options = []
-
-    for response in document_consumer_declaration.send(None):
-        parser_declaration = response[1]
-        supported_mime_types = parser_declaration["mime_types"]
-
-        if mime_type in supported_mime_types:
-            options.append(parser_declaration)
-
-    if not options:
-        return None
-
-    best_parser = sorted(options, key=lambda _: _["weight"], reverse=True)[0]
-
-    # Return the parser with the highest weight.
-    return best_parser["parser"]
-
-
 def run_convert(
    input_file,
    output_file,
--- a/src/documents/serialisers.py
+++ b/src/documents/serialisers.py
@@ -797,6 +797,25 @@ class ReadWriteSerializerMethodField(serializers.SerializerMethodField):
        return {self.field_name: data}


+def validate_documentlink_targets(user, doc_ids):
+    if Document.objects.filter(id__in=doc_ids).count() != len(doc_ids):
+        raise serializers.ValidationError(
+            "Some documents in value don't exist or were specified twice.",
+        )
+
+    if user is None:
+        return
+
+    target_documents = Document.objects.filter(id__in=doc_ids).select_related("owner")
+    if not all(
+        has_perms_owner_aware(user, "change_document", document)
+        for document in target_documents
+    ):
+        raise PermissionDenied(
+            _("Insufficient permissions."),
+        )
+
+
 class CustomFieldInstanceSerializer(serializers.ModelSerializer):
    field = serializers.PrimaryKeyRelatedField(queryset=CustomField.objects.all())
    value = ReadWriteSerializerMethodField(allow_null=True)
@@ -887,12 +906,11 @@ class CustomFieldInstanceSerializer(serializers.ModelSerializer):
                        "Value must be a list",
                    )
                doc_ids = data["value"]
-                if Document.objects.filter(id__in=doc_ids).count() != len(
-                    data["value"],
-                ):
-                    raise serializers.ValidationError(
-                        "Some documents in value don't exist or were specified twice.",
-                    )
+                request = self.context.get("request")
+                validate_documentlink_targets(
+                    getattr(request, "user", None) if request is not None else None,
+                    doc_ids,
+                )

        return data

@@ -1713,6 +1731,19 @@ class BulkEditSerializer(
                f"Some custom fields in {name} don't exist or were specified twice.",
            )

+        if isinstance(custom_fields, dict):
+            custom_field_map = CustomField.objects.in_bulk(ids)
+            for raw_field_id, value in custom_fields.items():
+                field = custom_field_map.get(int(raw_field_id))
+                if (
+                    field is not None
+                    and field.data_type == CustomField.FieldDataType.DOCUMENTLINK
+                    and value is not None
+                ):
+                    if not isinstance(value, list):
+                        raise serializers.ValidationError("Value must be a list")
+                    validate_documentlink_targets(self.user, value)
+
    def validate_method(self, method):
        if method == "set_correspondent":
            return bulk_edit.set_correspondent
--- a/src/documents/signals/init.py
+++ b/src/documents/signals/init.py
@@ -2,5 +2,4 @@ from django.dispatch import Signal

 document_consumption_started = Signal()
 document_consumption_finished = Signal()
-document_consumer_declaration = Signal()
 document_updated = Signal()
--- a/src/documents/signals/handlers.py
+++ b/src/documents/signals/handlers.py
@@ -1,5 +1,6 @@
 from __future__ import annotations

+import hashlib
 import logging
 import shutil
 from pathlib import Path
@@ -403,6 +404,14 @@ class CannotMoveFilesException(Exception):
    pass


+def _path_matches_checksum(path: Path, checksum: str | None) -> bool:
+    if checksum is None or not path.is_file():
+        return False
+
+    with path.open("rb") as f:
+        return hashlib.md5(f.read()).hexdigest() == checksum
+
+
 def _filename_template_uses_custom_fields(doc: Document) -> bool:
    template = None
    if doc.storage_path is not None:
@@ -473,10 +482,12 @@ def update_filename_and_move_files(
            old_filename = instance.filename
            old_source_path = instance.source_path
            move_original = False
+            original_already_moved = False

            old_archive_filename = instance.archive_filename
            old_archive_path = instance.archive_path
            move_archive = False
+            archive_already_moved = False

            candidate_filename = generate_filename(instance)
            if len(str(candidate_filename)) > Document.MAX_STORED_FILENAME_LENGTH:
@@ -497,14 +508,23 @@ def update_filename_and_move_files(
                candidate_source_path.exists()
                and candidate_source_path != old_source_path
            ):
-                # Only fall back to unique search when there is an actual conflict
-                new_filename = generate_unique_filename(instance)
+                if not old_source_path.is_file() and _path_matches_checksum(
+                    candidate_source_path,
+                    instance.checksum,
+                ):
+                    new_filename = candidate_filename
+                    original_already_moved = True
+                else:
+                    # Only fall back to unique search when there is an actual conflict
+                    new_filename = generate_unique_filename(instance)
            else:
                new_filename = candidate_filename

            # Need to convert to string to be able to save it to the db
            instance.filename = str(new_filename)
-            move_original = old_filename != instance.filename
+            move_original = (
+                old_filename != instance.filename and not original_already_moved
+            )

            if instance.has_archive_version:
                archive_candidate = generate_filename(instance, archive_filename=True)
@@ -525,24 +545,38 @@ def update_filename_and_move_files(
                    archive_candidate_path.exists()
                    and archive_candidate_path != old_archive_path
                ):
-                    new_archive_filename = generate_unique_filename(
-                        instance,
-                        archive_filename=True,
-                    )
+                    if not old_archive_path.is_file() and _path_matches_checksum(
+                        archive_candidate_path,
+                        instance.archive_checksum,
+                    ):
+                        new_archive_filename = archive_candidate
+                        archive_already_moved = True
+                    else:
+                        new_archive_filename = generate_unique_filename(
+                            instance,
+                            archive_filename=True,
+                        )
                else:
                    new_archive_filename = archive_candidate

                instance.archive_filename = str(new_archive_filename)

-                move_archive = old_archive_filename != instance.archive_filename
+                move_archive = (
+                    old_archive_filename != instance.archive_filename
+                    and not archive_already_moved
+                )
            else:
                move_archive = False

            if not move_original and not move_archive:
-                # Just update modified. Also, don't save() here to prevent infinite recursion.
-                Document.objects.filter(pk=instance.pk).update(
-                    modified=timezone.now(),
-                )
+                updates = {"modified": timezone.now()}
+                if old_filename != instance.filename:
+                    updates["filename"] = instance.filename
+                if old_archive_filename != instance.archive_filename:
+                    updates["archive_filename"] = instance.archive_filename
+
+                # Don't save() here to prevent infinite recursion.
+                Document.objects.filter(pk=instance.pk).update(**updates)
                return

            if move_original:
@@ -932,8 +966,25 @@ def run_workflows(
            if not use_overrides:
                # limit title to 128 characters
                document.title = document.title[:128]
-                # save first before setting tags
-                document.save()
+                # Save only the fields that workflow actions can set directly.
+                # Deliberately excludes filename and archive_filename — those are
+                # managed exclusively by update_filename_and_move_files via the
+                # post_save signal. Writing stale in-memory values here would revert
+                # a concurrent update_filename_and_move_files DB write, leaving the
+                # DB pointing at the old path while the file is already at the new
+                # one (see: https://github.com/paperless-ngx/paperless-ngx/issues/12386).
+                # modified has auto_now=True but is not auto-added when update_fields
+                # is specified, so it must be listed explicitly.
+                document.save(
+                    update_fields=[
+                        "title",
+                        "correspondent",
+                        "document_type",
+                        "storage_path",
+                        "owner",
+                        "modified",
+                    ],
+                )
                document.tags.set(doc_tag_ids)

            WorkflowRun.objects.create(
--- a/src/documents/tasks.py
+++ b/src/documents/tasks.py
@@ -52,8 +52,6 @@ from documents.models import StoragePath
 from documents.models import Tag
 from documents.models import WorkflowRun
 from documents.models import WorkflowTrigger
-from documents.parsers import DocumentParser
-from documents.parsers import get_parser_class_for_mime_type
 from documents.plugins.base import ConsumeTaskPlugin
 from documents.plugins.base import ProgressManager
 from documents.plugins.base import StopConsumeTaskError
@@ -65,6 +63,8 @@ from documents.signals.handlers import run_workflows
 from documents.signals.handlers import send_websocket_document_updated
 from documents.workflows.utils import get_workflows_for_trigger
 from paperless.config import AIConfig
+from paperless.parsers import ParserContext
+from paperless.parsers.registry import get_parser_registry
 from paperless_ai.indexing import llm_index_add_or_update_document
 from paperless_ai.indexing import llm_index_remove_document
 from paperless_ai.indexing import update_llm_index
@@ -100,7 +100,11 @@ def index_reindex(*, iter_wrapper: IterWrapper[Document] = _identity) -> None:


@shared_task
-def train_classifier(*, scheduled=True) -> None:
+def train_classifier(
+    *,
+    scheduled=True,
+    status_callback: Callable[[str], None] | None = None,
+) -> None:
    task = PaperlessTask.objects.create(
        type=PaperlessTask.TaskType.SCHEDULED_TASK
        if scheduled
@@ -136,7 +140,7 @@ def train_classifier(*, scheduled=True) -> None:
        classifier = DocumentClassifier()

    try:
-        if classifier.train():
+        if classifier.train(status_callback=status_callback):
            logger.info(
                f"Saving updated classifier model to {settings.MODEL_FILE}...",
            )
@@ -300,7 +304,11 @@ def update_document_content_maybe_archive_file(document_id) -> None:

    mime_type = document.mime_type

-    parser_class: type[DocumentParser] = get_parser_class_for_mime_type(mime_type)
+    parser_class = get_parser_registry().get_parser_for_file(
+        mime_type,
+        document.original_filename or "",
+        document.source_path,
+    )

    if not parser_class:
        logger.error(
@@ -309,98 +317,92 @@ def update_document_content_maybe_archive_file(document_id) -> None:
        )
        return

-    parser: DocumentParser = parser_class(logging_group=uuid.uuid4())
+    with parser_class() as parser:
+        parser.configure(ParserContext())

-    try:
-        parser.parse(document.source_path, mime_type, document.get_public_filename())
+        try:
+            parser.parse(document.source_path, mime_type)

-        thumbnail = parser.get_thumbnail(
-            document.source_path,
-            mime_type,
-            document.get_public_filename(),
-        )
+            thumbnail = parser.get_thumbnail(document.source_path, mime_type)

-        with transaction.atomic():
-            oldDocument = Document.objects.get(pk=document.pk)
-            if parser.get_archive_path():
-                with Path(parser.get_archive_path()).open("rb") as f:
-                    checksum = hashlib.md5(f.read()).hexdigest()
-                # I'm going to save first so that in case the file move
-                # fails, the database is rolled back.
-                # We also don't use save() since that triggers the filehandling
-                # logic, and we don't want that yet (file not yet in place)
-                document.archive_filename = generate_unique_filename(
-                    document,
-                    archive_filename=True,
-                )
-                Document.objects.filter(pk=document.pk).update(
-                    archive_checksum=checksum,
-                    content=parser.get_text(),
-                    archive_filename=document.archive_filename,
-                )
-                newDocument = Document.objects.get(pk=document.pk)
-                if settings.AUDIT_LOG_ENABLED:
-                    LogEntry.objects.log_create(
-                        instance=oldDocument,
-                        changes={
-                            "content": [oldDocument.content, newDocument.content],
-                            "archive_checksum": [
-                                oldDocument.archive_checksum,
-                                newDocument.archive_checksum,
-                            ],
-                            "archive_filename": [
-                                oldDocument.archive_filename,
-                                newDocument.archive_filename,
-                            ],
-                        },
-                        additional_data={
-                            "reason": "Update document content",
-                        },
-                        action=LogEntry.Action.UPDATE,
-                    )
-            else:
-                Document.objects.filter(pk=document.pk).update(
-                    content=parser.get_text(),
-                )
-
-                if settings.AUDIT_LOG_ENABLED:
-                    LogEntry.objects.log_create(
-                        instance=oldDocument,
-                        changes={
-                            "content": [oldDocument.content, parser.get_text()],
-                        },
-                        additional_data={
-                            "reason": "Update document content",
-                        },
-                        action=LogEntry.Action.UPDATE,
-                    )
-
-            with FileLock(settings.MEDIA_LOCK):
+            with transaction.atomic():
+                oldDocument = Document.objects.get(pk=document.pk)
                if parser.get_archive_path():
-                    create_source_path_directory(document.archive_path)
-                    shutil.move(parser.get_archive_path(), document.archive_path)
-                shutil.move(thumbnail, document.thumbnail_path)
+                    with Path(parser.get_archive_path()).open("rb") as f:
+                        checksum = hashlib.md5(f.read()).hexdigest()
+                    # I'm going to save first so that in case the file move
+                    # fails, the database is rolled back.
+                    # We also don't use save() since that triggers the filehandling
+                    # logic, and we don't want that yet (file not yet in place)
+                    document.archive_filename = generate_unique_filename(
+                        document,
+                        archive_filename=True,
+                    )
+                    Document.objects.filter(pk=document.pk).update(
+                        archive_checksum=checksum,
+                        content=parser.get_text(),
+                        archive_filename=document.archive_filename,
+                    )
+                    newDocument = Document.objects.get(pk=document.pk)
+                    if settings.AUDIT_LOG_ENABLED:
+                        LogEntry.objects.log_create(
+                            instance=oldDocument,
+                            changes={
+                                "content": [oldDocument.content, newDocument.content],
+                                "archive_checksum": [
+                                    oldDocument.archive_checksum,
+                                    newDocument.archive_checksum,
+                                ],
+                                "archive_filename": [
+                                    oldDocument.archive_filename,
+                                    newDocument.archive_filename,
+                                ],
+                            },
+                            additional_data={
+                                "reason": "Update document content",
+                            },
+                            action=LogEntry.Action.UPDATE,
+                        )
+                else:
+                    Document.objects.filter(pk=document.pk).update(
+                        content=parser.get_text(),
+                    )

-        document.refresh_from_db()
-        logger.info(
-            f"Updating index for document {document_id} ({document.archive_checksum})",
-        )
-        with index.open_index_writer() as writer:
-            index.update_document(writer, document)
+                    if settings.AUDIT_LOG_ENABLED:
+                        LogEntry.objects.log_create(
+                            instance=oldDocument,
+                            changes={
+                                "content": [oldDocument.content, parser.get_text()],
+                            },
+                            additional_data={
+                                "reason": "Update document content",
+                            },
+                            action=LogEntry.Action.UPDATE,
+                        )

-        ai_config = AIConfig()
-        if ai_config.llm_index_enabled:
-            llm_index_add_or_update_document(document)
+                with FileLock(settings.MEDIA_LOCK):
+                    if parser.get_archive_path():
+                        create_source_path_directory(document.archive_path)
+                        shutil.move(parser.get_archive_path(), document.archive_path)
+                    shutil.move(thumbnail, document.thumbnail_path)

-        clear_document_caches(document.pk)
+            document.refresh_from_db()
+            logger.info(
+                f"Updating index for document {document_id} ({document.archive_checksum})",
+            )
+            with index.open_index_writer() as writer:
+                index.update_document(writer, document)

-    except Exception:
-        logger.exception(
-            f"Error while parsing document {document} (ID: {document_id})",
-        )
-    finally:
-        # TODO(stumpylog): Cleanup once all parsers are handled
-        parser.cleanup()
+            ai_config = AIConfig()
+            if ai_config.llm_index_enabled:
+                llm_index_add_or_update_document(document)
+
+            clear_document_caches(document.pk)
+
+        except Exception:
+            logger.exception(
+                f"Error while parsing document {document} (ID: {document_id})",
+            )


@shared_task
--- a/src/documents/tests/management/test_management_sanity_checker.py
+++ b/src/documents/tests/management/test_management_sanity_checker.py
@@ -163,13 +163,23 @@ class TestRenderResultsSummary:
 class TestDocumentSanityCheckerCommand:
    def test_no_issues(self, sample_doc: Document) -> None:
        out = StringIO()
-        call_command("document_sanity_checker", "--no-progress-bar", stdout=out)
+        call_command(
+            "document_sanity_checker",
+            "--no-progress-bar",
+            stdout=out,
+            skip_checks=True,
+        )
        assert "No issues detected" in out.getvalue()

    def test_missing_original(self, sample_doc: Document) -> None:
        Path(sample_doc.source_path).unlink()
        out = StringIO()
-        call_command("document_sanity_checker", "--no-progress-bar", stdout=out)
+        call_command(
+            "document_sanity_checker",
+            "--no-progress-bar",
+            stdout=out,
+            skip_checks=True,
+        )
        output = out.getvalue()
        assert "ERROR" in output
        assert "Original of document does not exist" in output
@@ -187,7 +197,12 @@ class TestDocumentSanityCheckerCommand:
        Path(doc.thumbnail_path).touch()

        out = StringIO()
-        call_command("document_sanity_checker", "--no-progress-bar", stdout=out)
+        call_command(
+            "document_sanity_checker",
+            "--no-progress-bar",
+            stdout=out,
+            skip_checks=True,
+        )
        output = out.getvalue()
        assert "ERROR" in output
        assert "Checksum mismatch. Stored: abc, actual:" in output
--- a/src/documents/tests/test_api_app_config.py
+++ b/src/documents/tests/test_api_app_config.py
@@ -5,6 +5,7 @@ from unittest.mock import patch

 from django.contrib.auth.models import User
 from django.core.files.uploadedfile import SimpleUploadedFile
+from django.test import override_settings
 from rest_framework import status
 from rest_framework.test import APITestCase

@@ -693,3 +694,17 @@ class TestApiAppConfig(DirectoriesMixin, APITestCase):
                content_type="application/json",
            )
            mock_update.assert_called_once()
+
+    @override_settings(LLM_ALLOW_INTERNAL_ENDPOINTS=False)
+    def test_update_llm_endpoint_blocks_internal_endpoint_when_disallowed(self) -> None:
+        response = self.client.patch(
+            f"{self.ENDPOINT}1/",
+            json.dumps(
+                {
+                    "llm_endpoint": "http://127.0.0.1:11434",
+                },
+            ),
+            content_type="application/json",
+        )
+        self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
+        self.assertIn("non-public address", str(response.data).lower())
--- a/src/documents/tests/test_api_bulk_edit.py
+++ b/src/documents/tests/test_api_bulk_edit.py
@@ -262,6 +262,50 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
        self.assertEqual(kwargs["add_custom_fields"], [self.cf1.id])
        self.assertEqual(kwargs["remove_custom_fields"], [self.cf2.id])

+    @mock.patch("documents.serialisers.bulk_edit.modify_custom_fields")
+    def test_api_modify_custom_fields_documentlink_forbidden_for_unpermitted_target(
+        self,
+        m,
+    ) -> None:
+        self.setup_mock(m, "modify_custom_fields")
+        user = User.objects.create_user(username="doc-owner")
+        user.user_permissions.add(Permission.objects.get(codename="change_document"))
+        other_user = User.objects.create_user(username="other-user")
+        source_doc = Document.objects.create(
+            checksum="source",
+            title="Source",
+            owner=user,
+        )
+        target_doc = Document.objects.create(
+            checksum="target",
+            title="Target",
+            owner=other_user,
+        )
+        doclink_field = CustomField.objects.create(
+            name="doclink",
+            data_type=CustomField.FieldDataType.DOCUMENTLINK,
+        )
+
+        self.client.force_authenticate(user=user)
+
+        response = self.client.post(
+            "/api/documents/bulk_edit/",
+            json.dumps(
+                {
+                    "documents": [source_doc.id],
+                    "method": "modify_custom_fields",
+                    "parameters": {
+                        "add_custom_fields": {doclink_field.id: [target_doc.id]},
+                        "remove_custom_fields": [],
+                    },
+                },
+            ),
+            content_type="application/json",
+        )
+
+        self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
+        m.assert_not_called()
+
    @mock.patch("documents.serialisers.bulk_edit.modify_custom_fields")
    def test_api_modify_custom_fields_with_values(self, m) -> None:
        self.setup_mock(m, "modify_custom_fields")
--- a/src/documents/tests/test_api_custom_fields.py
+++ b/src/documents/tests/test_api_custom_fields.py
@@ -6,6 +6,7 @@ from unittest.mock import ANY
 from django.contrib.auth.models import Permission
 from django.contrib.auth.models import User
 from django.test import override_settings
+from guardian.shortcuts import assign_perm
 from rest_framework import status
 from rest_framework.test import APITestCase

@@ -1140,6 +1141,102 @@ class TestCustomFieldsAPI(DirectoriesMixin, APITestCase):
        self.assertEqual(resp.status_code, status.HTTP_200_OK)
        self.assertEqual(doc5.custom_fields.first().value, [1])

+    def test_documentlink_patch_requires_change_permission_on_target_documents(
+        self,
+    ) -> None:
+        source_owner = User.objects.create_user(username="source-owner")
+        source_owner.user_permissions.add(
+            Permission.objects.get(codename="change_document"),
+        )
+        other_user = User.objects.create_user(username="other-user")
+
+        source_doc = Document.objects.create(
+            title="Source",
+            checksum="source",
+            mime_type="application/pdf",
+            owner=source_owner,
+        )
+        target_doc = Document.objects.create(
+            title="Target",
+            checksum="target",
+            mime_type="application/pdf",
+            owner=other_user,
+        )
+        custom_field_doclink = CustomField.objects.create(
+            name="Test Custom Field Doc Link",
+            data_type=CustomField.FieldDataType.DOCUMENTLINK,
+        )
+
+        self.client.force_authenticate(user=source_owner)
+
+        resp = self.client.patch(
+            f"/api/documents/{source_doc.id}/",
+            data={
+                "custom_fields": [
+                    {
+                        "field": custom_field_doclink.id,
+                        "value": [target_doc.id],
+                    },
+                ],
+            },
+            format="json",
+        )
+
+        self.assertEqual(resp.status_code, status.HTTP_403_FORBIDDEN)
+        self.assertEqual(
+            CustomFieldInstance.objects.filter(field=custom_field_doclink).count(),
+            0,
+        )
+
+    def test_documentlink_patch_allowed_with_change_permission_on_target_documents(
+        self,
+    ) -> None:
+        source_owner = User.objects.create_user(username="source-owner")
+        source_owner.user_permissions.add(
+            Permission.objects.get(codename="change_document"),
+        )
+        other_user = User.objects.create_user(username="other-user")
+
+        source_doc = Document.objects.create(
+            title="Source",
+            checksum="source",
+            mime_type="application/pdf",
+            owner=source_owner,
+        )
+        target_doc = Document.objects.create(
+            title="Target",
+            checksum="target",
+            mime_type="application/pdf",
+            owner=other_user,
+        )
+        custom_field_doclink = CustomField.objects.create(
+            name="Test Custom Field Doc Link",
+            data_type=CustomField.FieldDataType.DOCUMENTLINK,
+        )
+
+        assign_perm("change_document", source_owner, target_doc)
+        self.client.force_authenticate(user=source_owner)
+
+        resp = self.client.patch(
+            f"/api/documents/{source_doc.id}/",
+            data={
+                "custom_fields": [
+                    {
+                        "field": custom_field_doclink.id,
+                        "value": [target_doc.id],
+                    },
+                ],
+            },
+            format="json",
+        )
+
+        self.assertEqual(resp.status_code, status.HTTP_200_OK)
+        target_doc.refresh_from_db()
+        self.assertEqual(
+            target_doc.custom_fields.get(field=custom_field_doclink).value,
+            [source_doc.id],
+        )
+
    def test_custom_field_filters(self) -> None:
        custom_field_string = CustomField.objects.create(
            name="Test Custom Field String",
--- a/src/documents/tests/test_api_permissions.py
+++ b/src/documents/tests/test_api_permissions.py
@@ -888,6 +888,19 @@ class TestApiUser(DirectoriesMixin, APITestCase):

        self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)

+        response = self.client.post(
+            f"{self.ENDPOINT}",
+            json.dumps(
+                {
+                    "username": "user4",
+                    "is_superuser": "true",
+                },
+            ),
+            content_type="application/json",
+        )
+
+        self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
+
        self.client.force_authenticate(user2)

        response = self.client.patch(
@@ -920,6 +933,65 @@ class TestApiUser(DirectoriesMixin, APITestCase):
        returned_user1 = User.objects.get(pk=user1.pk)
        self.assertEqual(returned_user1.is_superuser, False)

+    def test_only_superusers_can_create_or_alter_staff_status(self):
+        """
+        GIVEN:
+            - Existing user account
+        WHEN:
+            - API request is made to add a user account with staff status
+            - API request is made to change staff status
+        THEN:
+            - Only superusers can change staff status
+        """
+
+        user1 = User.objects.create_user(username="user1")
+        user1.user_permissions.add(*Permission.objects.all())
+        user2 = User.objects.create_superuser(username="user2")
+
+        self.client.force_authenticate(user1)
+
+        response = self.client.patch(
+            f"{self.ENDPOINT}{user1.pk}/",
+            json.dumps(
+                {
+                    "is_staff": "true",
+                },
+            ),
+            content_type="application/json",
+        )
+
+        self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
+
+        response = self.client.post(
+            f"{self.ENDPOINT}",
+            json.dumps(
+                {
+                    "username": "user3",
+                    "is_staff": 1,
+                },
+            ),
+            content_type="application/json",
+        )
+
+        self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
+
+        self.client.force_authenticate(user2)
+
+        response = self.client.patch(
+            f"{self.ENDPOINT}{user1.pk}/",
+            json.dumps(
+                {
+                    "is_staff": True,
+                },
+            ),
+            content_type="application/json",
+        )
+
+        self.assertEqual(response.status_code, status.HTTP_200_OK)
+
+        returned_user1 = User.objects.get(pk=user1.pk)
+        self.assertEqual(returned_user1.is_staff, True)
+

 class TestApiGroup(DirectoriesMixin, APITestCase):
    ENDPOINT = "/api/groups/"
--- a/src/documents/tests/test_api_schema.py
+++ b/src/documents/tests/test_api_schema.py
@@ -12,7 +12,12 @@ class TestApiSchema(APITestCase):
        Test that the schema is valid
        """
        try:
-            call_command("spectacular", "--validate", "--fail-on-warn")
+            call_command(
+                "spectacular",
+                "--validate",
+                "--fail-on-warn",
+                skip_checks=True,
+            )
        except CommandError as e:
            self.fail(f"Schema validation failed: {e}")

--- a/src/documents/tests/test_api_search.py
+++ b/src/documents/tests/test_api_search.py
@@ -702,6 +702,40 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):

        self.assertEqual(correction, None)

+    def test_search_spelling_suggestion_suppressed_for_private_terms(self):
+        owner = User.objects.create_user("owner")
+        attacker = User.objects.create_user("attacker")
+        attacker.user_permissions.add(
+            Permission.objects.get(codename="view_document"),
+        )
+
+        with AsyncWriter(index.open_index()) as writer:
+            for i in range(55):
+                private_doc = Document.objects.create(
+                    checksum=f"p{i}",
+                    pk=100 + i,
+                    title=f"Private Document {i + 1}",
+                    content=f"treasury document {i + 1}",
+                    owner=owner,
+                )
+                visible_doc = Document.objects.create(
+                    checksum=f"v{i}",
+                    pk=200 + i,
+                    title=f"Visible Document {i + 1}",
+                    content=f"public ledger {i + 1}",
+                    owner=attacker,
+                )
+                index.update_document(writer, private_doc)
+                index.update_document(writer, visible_doc)
+
+        self.client.force_authenticate(user=attacker)
+
+        response = self.client.get("/api/documents/?query=treasurx")
+
+        self.assertEqual(response.status_code, status.HTTP_200_OK)
+        self.assertEqual(response.data["count"], 0)
+        self.assertIsNone(response.data["corrected_query"])
+
    @mock.patch(
        "whoosh.searching.Searcher.correct_query",
        side_effect=Exception("Test error"),
@@ -772,6 +806,60 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
        self.assertEqual(results[0]["id"], d3.id)
        self.assertEqual(results[1]["id"], d1.id)

+    def test_search_more_like_requires_view_permission_on_seed_document(
+        self,
+    ) -> None:
+        """
+        GIVEN:
+            - A user can search documents they own
+            - Another user's private document exists with similar content
+        WHEN:
+            - The user requests more-like-this for the private seed document
+        THEN:
+            - The request is rejected
+        """
+        owner = User.objects.create_user("owner")
+        attacker = User.objects.create_user("attacker")
+        attacker.user_permissions.add(
+            Permission.objects.get(codename="view_document"),
+        )
+
+        private_seed = Document.objects.create(
+            title="private bank statement",
+            content="quarterly treasury bank statement wire transfer",
+            checksum="seed",
+            owner=owner,
+            pk=10,
+        )
+        visible_doc = Document.objects.create(
+            title="attacker-visible match",
+            content="quarterly treasury bank statement wire transfer summary",
+            checksum="visible",
+            owner=attacker,
+            pk=11,
+        )
+        other_doc = Document.objects.create(
+            title="unrelated",
+            content="completely different topic",
+            checksum="other",
+            owner=attacker,
+            pk=12,
+        )
+
+        with AsyncWriter(index.open_index()) as writer:
+            index.update_document(writer, private_seed)
+            index.update_document(writer, visible_doc)
+            index.update_document(writer, other_doc)
+
+        self.client.force_authenticate(user=attacker)
+
+        response = self.client.get(
+            f"/api/documents/?more_like_id={private_seed.id}",
+        )
+
+        self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
+        self.assertEqual(response.content, b"Insufficient permissions.")
+
    def test_search_filtering(self) -> None:
        t = Tag.objects.create(name="tag")
        t2 = Tag.objects.create(name="tag2")
@@ -1356,6 +1444,83 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
        self.assertEqual(results["custom_fields"][0]["id"], custom_field1.id)
        self.assertEqual(results["workflows"][0]["id"], workflow1.id)

+    def test_global_search_filters_owned_mail_objects(self) -> None:
+        user1 = User.objects.create_user("mail-search-user")
+        user2 = User.objects.create_user("other-mail-search-user")
+        user1.user_permissions.add(
+            Permission.objects.get(codename="view_mailaccount"),
+            Permission.objects.get(codename="view_mailrule"),
+        )
+
+        own_account = MailAccount.objects.create(
+            name="bank owned account",
+            username="owner@example.com",
+            password="secret",
+            imap_server="imap.owner.example.com",
+            imap_port=993,
+            imap_security=MailAccount.ImapSecurity.SSL,
+            character_set="UTF-8",
+            owner=user1,
+        )
+        other_account = MailAccount.objects.create(
+            name="bank other account",
+            username="other@example.com",
+            password="secret",
+            imap_server="imap.other.example.com",
+            imap_port=993,
+            imap_security=MailAccount.ImapSecurity.SSL,
+            character_set="UTF-8",
+            owner=user2,
+        )
+        unowned_account = MailAccount.objects.create(
+            name="bank shared account",
+            username="shared@example.com",
+            password="secret",
+            imap_server="imap.shared.example.com",
+            imap_port=993,
+            imap_security=MailAccount.ImapSecurity.SSL,
+            character_set="UTF-8",
+        )
+        own_rule = MailRule.objects.create(
+            name="bank owned rule",
+            account=own_account,
+            action=MailRule.MailAction.MOVE,
+            owner=user1,
+        )
+        other_rule = MailRule.objects.create(
+            name="bank other rule",
+            account=other_account,
+            action=MailRule.MailAction.MOVE,
+            owner=user2,
+        )
+        unowned_rule = MailRule.objects.create(
+            name="bank shared rule",
+            account=unowned_account,
+            action=MailRule.MailAction.MOVE,
+        )
+
+        self.client.force_authenticate(user1)
+
+        response = self.client.get("/api/search/?query=bank")
+
+        self.assertEqual(response.status_code, status.HTTP_200_OK)
+        self.assertCountEqual(
+            [account["id"] for account in response.data["mail_accounts"]],
+            [own_account.id, unowned_account.id],
+        )
+        self.assertCountEqual(
+            [rule["id"] for rule in response.data["mail_rules"]],
+            [own_rule.id, unowned_rule.id],
+        )
+        self.assertNotIn(
+            other_account.id,
+            [account["id"] for account in response.data["mail_accounts"]],
+        )
+        self.assertNotIn(
+            other_rule.id,
+            [rule["id"] for rule in response.data["mail_rules"]],
+        )
+
    def test_global_search_bad_request(self) -> None:
        """
        WHEN:
--- a/src/documents/tests/test_api_status.py
+++ b/src/documents/tests/test_api_status.py
@@ -26,6 +26,23 @@ class TestSystemStatus(APITestCase):
        self.override = override_settings(MEDIA_ROOT=self.tmp_dir)
        self.override.enable()

+        # Mock slow network calls so tests don't block on real Redis/Celery timeouts.
+        # Individual tests that care about specific behaviour override these with
+        # their own @mock.patch decorators (which take precedence).
+        redis_patcher = mock.patch(
+            "redis.Redis.execute_command",
+            side_effect=Exception("Redis not available"),
+        )
+        self.mock_redis = redis_patcher.start()
+        self.addCleanup(redis_patcher.stop)
+
+        celery_patcher = mock.patch(
+            "celery.app.control.Inspect.ping",
+            side_effect=Exception("Celery not available"),
+        )
+        self.mock_celery_ping = celery_patcher.start()
+        self.addCleanup(celery_patcher.stop)
+
    def tearDown(self) -> None:
        super().tearDown()

@@ -69,11 +86,18 @@ class TestSystemStatus(APITestCase):
        """
        response = self.client.get(self.ENDPOINT)
        self.assertEqual(response.status_code, status.HTTP_401_UNAUTHORIZED)
+        self.assertEqual(response["WWW-Authenticate"], "Token")
        normal_user = User.objects.create_user(username="normal_user")
        self.client.force_login(normal_user)
        response = self.client.get(self.ENDPOINT)
        self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)

+    def test_system_status_with_bad_basic_auth_challenges(self) -> None:
+        self.client.credentials(HTTP_AUTHORIZATION="Basic invalid")
+        response = self.client.get(self.ENDPOINT)
+        self.assertEqual(response.status_code, status.HTTP_401_UNAUTHORIZED)
+        self.assertEqual(response["WWW-Authenticate"], 'Basic realm="api"')
+
    def test_system_status_container_detection(self) -> None:
        """
        GIVEN:
@@ -84,13 +108,17 @@ class TestSystemStatus(APITestCase):
            - The response contains the correct install type
        """
        self.client.force_login(self.user)
-        os.environ["PNGX_CONTAINERIZED"] = "1"
-        response = self.client.get(self.ENDPOINT)
-        self.assertEqual(response.status_code, status.HTTP_200_OK)
-        self.assertEqual(response.data["install_type"], "docker")
-        os.environ["KUBERNETES_SERVICE_HOST"] = "http://localhost"
-        response = self.client.get(self.ENDPOINT)
-        self.assertEqual(response.data["install_type"], "kubernetes")
+        with mock.patch.dict(os.environ, {"PNGX_CONTAINERIZED": "1"}, clear=False):
+            response = self.client.get(self.ENDPOINT)
+            self.assertEqual(response.status_code, status.HTTP_200_OK)
+            self.assertEqual(response.data["install_type"], "docker")
+        with mock.patch.dict(
+            os.environ,
+            {"PNGX_CONTAINERIZED": "1", "KUBERNETES_SERVICE_HOST": "http://localhost"},
+            clear=False,
+        ):
+            response = self.client.get(self.ENDPOINT)
+            self.assertEqual(response.data["install_type"], "kubernetes")

    @mock.patch("redis.Redis.execute_command")
    def test_system_status_redis_ping(self, mock_ping) -> None:
--- a/src/documents/tests/test_checks.py
+++ b/src/documents/tests/test_checks.py
@@ -13,8 +13,10 @@ class TestDocumentChecks(TestCase):
    def test_parser_check(self) -> None:
        self.assertEqual(parser_check(None), [])

-        with mock.patch("documents.checks.document_consumer_declaration.send") as m:
-            m.return_value = []
+        with mock.patch("documents.checks.get_parser_registry") as mock_registry_fn:
+            mock_registry = mock.MagicMock()
+            mock_registry.all_parsers.return_value = []
+            mock_registry_fn.return_value = mock_registry

            self.assertEqual(
                parser_check(None),
--- a/src/documents/tests/test_consumer.py
+++ b/src/documents/tests/test_consumer.py
@@ -27,7 +27,6 @@ from documents.models import Document
 from documents.models import DocumentType
 from documents.models import StoragePath
 from documents.models import Tag
-from documents.parsers import DocumentParser
 from documents.parsers import ParseError
 from documents.plugins.helpers import ProgressStatusOptions
 from documents.tasks import sanity_check
@@ -36,65 +35,108 @@ from documents.tests.utils import DummyProgressManager
 from documents.tests.utils import FileSystemAssertsMixin
 from documents.tests.utils import GetConsumerMixin
 from paperless_mail.models import MailRule
-from paperless_mail.parsers import MailDocumentParser


-class _BaseTestParser(DocumentParser):
-    def get_settings(self) -> None:
+class _BaseNewStyleParser:
+    """Minimal ParserProtocol implementation for use in consumer tests."""
+
+    name: str = "test-parser"
+    version: str = "0.1"
+    author: str = "test"
+    url: str = "test"
+
+    @classmethod
+    def supported_mime_types(cls) -> dict:
+        return {
+            "application/pdf": ".pdf",
+            "image/png": ".png",
+            "message/rfc822": ".eml",
+        }
+
+    @classmethod
+    def score(cls, mime_type: str, filename: str, path=None):
+        return 0 if mime_type in cls.supported_mime_types() else None
+
+    @property
+    def can_produce_archive(self) -> bool:
+        return True
+
+    @property
+    def requires_pdf_rendition(self) -> bool:
+        return False
+
+    def __init__(self) -> None:
+        self._tmpdir: Path | None = None
+        self._text: str | None = None
+        self._archive: Path | None = None
+        self._thumb: Path | None = None
+
+    def __enter__(self):
+        self._tmpdir = Path(
+            tempfile.mkdtemp(prefix="paperless-test-", dir=settings.SCRATCH_DIR),
+        )
+        _, thumb = tempfile.mkstemp(suffix=".webp", dir=self._tmpdir)
+        self._thumb = Path(thumb)
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb) -> None:
+        if self._tmpdir and self._tmpdir.exists():
+            shutil.rmtree(self._tmpdir, ignore_errors=True)
+
+    def configure(self, context) -> None:
        """
-        This parser does not implement additional settings yet
+        Test parser doesn't do anything with context
        """
+
+    def parse(self, document_path, mime_type, *, produce_archive: bool = True) -> None:
+        raise NotImplementedError
+
+    def get_text(self) -> str | None:
+        return self._text
+
+    def get_date(self):
        return None

+    def get_archive_path(self):
+        return self._archive

-class DummyParser(_BaseTestParser):
-    def __init__(self, logging_group, scratch_dir, archive_path) -> None:
-        super().__init__(logging_group, None)
-        _, self.fake_thumb = tempfile.mkstemp(suffix=".webp", dir=scratch_dir)
-        self.archive_path = archive_path
+    def get_thumbnail(self, document_path, mime_type) -> Path:
+        return self._thumb

-    def get_thumbnail(self, document_path, mime_type, file_name=None):
-        return self.fake_thumb
+    def get_page_count(self, document_path, mime_type):
+        return None

-    def parse(self, document_path, mime_type, file_name=None) -> None:
-        self.text = "The Text"
+    def extract_metadata(self, document_path, mime_type) -> list:
+        return []


-class CopyParser(_BaseTestParser):
-    def get_thumbnail(self, document_path, mime_type, file_name=None):
-        return self.fake_thumb
+class DummyParser(_BaseNewStyleParser):
+    _ARCHIVE_SRC = (
+        Path(__file__).parent / "samples" / "documents" / "archive" / "0000001.pdf"
+    )

-    def __init__(self, logging_group, progress_callback=None) -> None:
-        super().__init__(logging_group, progress_callback)
-        _, self.fake_thumb = tempfile.mkstemp(suffix=".webp", dir=self.tempdir)
-
-    def parse(self, document_path, mime_type, file_name=None) -> None:
-        self.text = "The text"
-        self.archive_path = Path(self.tempdir / "archive.pdf")
-        shutil.copy(document_path, self.archive_path)
+    def parse(self, document_path, mime_type, *, produce_archive: bool = True) -> None:
+        self._text = "The Text"
+        if produce_archive and self._tmpdir:
+            self._archive = self._tmpdir / "archive.pdf"
+            shutil.copy(self._ARCHIVE_SRC, self._archive)


-class FaultyParser(_BaseTestParser):
-    def __init__(self, logging_group, scratch_dir) -> None:
-        super().__init__(logging_group)
-        _, self.fake_thumb = tempfile.mkstemp(suffix=".webp", dir=scratch_dir)
+class CopyParser(_BaseNewStyleParser):
+    def parse(self, document_path, mime_type, *, produce_archive: bool = True) -> None:
+        self._text = "The text"
+        if produce_archive and self._tmpdir:
+            self._archive = self._tmpdir / "archive.pdf"
+            shutil.copy(document_path, self._archive)

-    def get_thumbnail(self, document_path, mime_type, file_name=None):
-        return self.fake_thumb

-    def parse(self, document_path, mime_type, file_name=None):
+class FaultyParser(_BaseNewStyleParser):
+    def parse(self, document_path, mime_type, *, produce_archive: bool = True) -> None:
        raise ParseError("Does not compute.")


-class FaultyGenericExceptionParser(_BaseTestParser):
-    def __init__(self, logging_group, scratch_dir) -> None:
-        super().__init__(logging_group)
-        _, self.fake_thumb = tempfile.mkstemp(suffix=".webp", dir=scratch_dir)
-
-    def get_thumbnail(self, document_path, mime_type, file_name=None):
-        return self.fake_thumb
-
-    def parse(self, document_path, mime_type, file_name=None):
+class FaultyGenericExceptionParser(_BaseNewStyleParser):
+    def parse(self, document_path, mime_type, *, produce_archive: bool = True) -> None:
        raise Exception("Generic exception.")


@@ -148,38 +190,12 @@ class TestConsumer(
        self.assertEqual(payload["data"]["max_progress"], last_progress_max)
        self.assertEqual(payload["data"]["status"], last_status)

-    def make_dummy_parser(self, logging_group, progress_callback=None):
-        return DummyParser(
-            logging_group,
-            self.dirs.scratch_dir,
-            self.get_test_archive_file(),
-        )
-
-    def make_faulty_parser(self, logging_group, progress_callback=None):
-        return FaultyParser(logging_group, self.dirs.scratch_dir)
-
-    def make_faulty_generic_exception_parser(
-        self,
-        logging_group,
-        progress_callback=None,
-    ):
-        return FaultyGenericExceptionParser(logging_group, self.dirs.scratch_dir)
-
    def setUp(self) -> None:
        super().setUp()

-        patcher = mock.patch("documents.parsers.document_consumer_declaration.send")
-        m = patcher.start()
-        m.return_value = [
-            (
-                None,
-                {
-                    "parser": self.make_dummy_parser,
-                    "mime_types": {"application/pdf": ".pdf"},
-                    "weight": 0,
-                },
-            ),
-        ]
+        patcher = mock.patch("documents.consumer.get_parser_registry")
+        mock_registry = patcher.start()
+        mock_registry.return_value.get_parser_for_file.return_value = DummyParser
        self.addCleanup(patcher.stop)

    def get_test_file(self):
@@ -548,9 +564,9 @@ class TestConsumer(
            ) as consumer:
                consumer.run()

-    @mock.patch("documents.parsers.document_consumer_declaration.send")
+    @mock.patch("documents.consumer.get_parser_registry")
    def testNoParsers(self, m) -> None:
-        m.return_value = []
+        m.return_value.get_parser_for_file.return_value = None

        with self.assertRaisesMessage(
            ConsumerError,
@@ -561,18 +577,9 @@ class TestConsumer(

        self._assert_first_last_send_progress(last_status="FAILED")

-    @mock.patch("documents.parsers.document_consumer_declaration.send")
+    @mock.patch("documents.consumer.get_parser_registry")
    def testFaultyParser(self, m) -> None:
-        m.return_value = [
-            (
-                None,
-                {
-                    "parser": self.make_faulty_parser,
-                    "mime_types": {"application/pdf": ".pdf"},
-                    "weight": 0,
-                },
-            ),
-        ]
+        m.return_value.get_parser_for_file.return_value = FaultyParser

        with self.get_consumer(self.get_test_file()) as consumer:
            with self.assertRaisesMessage(
@@ -583,18 +590,9 @@ class TestConsumer(

        self._assert_first_last_send_progress(last_status="FAILED")

-    @mock.patch("documents.parsers.document_consumer_declaration.send")
+    @mock.patch("documents.consumer.get_parser_registry")
    def testGenericParserException(self, m) -> None:
-        m.return_value = [
-            (
-                None,
-                {
-                    "parser": self.make_faulty_generic_exception_parser,
-                    "mime_types": {"application/pdf": ".pdf"},
-                    "weight": 0,
-                },
-            ),
-        ]
+        m.return_value.get_parser_for_file.return_value = FaultyGenericExceptionParser

        with self.get_consumer(self.get_test_file()) as consumer:
            with self.assertRaisesMessage(
@@ -642,6 +640,7 @@ class TestConsumer(
        self._assert_first_last_send_progress()

    @mock.patch("documents.consumer.generate_unique_filename")
+    @override_settings(FILENAME_FORMAT="{pk}")
    def testFilenameHandlingFallsBackWhenGeneratedPathExceedsDbLimit(self, m):
        m.side_effect = lambda doc, archive_filename=False: Path(
            ("a" * 1100 + ".pdf") if not archive_filename else ("b" * 1100 + ".pdf"),
@@ -1017,7 +1016,7 @@ class TestConsumer(
        self._assert_first_last_send_progress()

    @override_settings(FILENAME_FORMAT="{title}")
-    @mock.patch("documents.parsers.document_consumer_declaration.send")
+    @mock.patch("documents.consumer.get_parser_registry")
    def test_similar_filenames(self, m) -> None:
        shutil.copy(
            Path(__file__).parent / "samples" / "simple.pdf",
@@ -1031,16 +1030,7 @@ class TestConsumer(
            Path(__file__).parent / "samples" / "simple-noalpha.png",
            settings.CONSUMPTION_DIR / "simple.png.pdf",
        )
-        m.return_value = [
-            (
-                None,
-                {
-                    "parser": CopyParser,
-                    "mime_types": {"application/pdf": ".pdf", "image/png": ".png"},
-                    "weight": 0,
-                },
-            ),
-        ]
+        m.return_value.get_parser_for_file.return_value = CopyParser

        with self.get_consumer(settings.CONSUMPTION_DIR / "simple.png") as consumer:
            consumer.run()
@@ -1068,8 +1058,10 @@ class TestConsumer(

        sanity_check()

+    @mock.patch("documents.consumer.get_parser_registry")
    @mock.patch("documents.consumer.run_subprocess")
-    def test_try_to_clean_invalid_pdf(self, m) -> None:
+    def test_try_to_clean_invalid_pdf(self, m, mock_registry) -> None:
+        mock_registry.return_value.get_parser_for_file.return_value = None
        shutil.copy(
            Path(__file__).parent / "samples" / "invalid_pdf.pdf",
            settings.CONSUMPTION_DIR / "invalid_pdf.pdf",
@@ -1090,11 +1082,11 @@ class TestConsumer(
            self.assertEqual(command[1], "--replace-input")

    @mock.patch("paperless_mail.models.MailRule.objects.get")
-    @mock.patch("paperless_mail.parsers.MailDocumentParser.parse")
-    @mock.patch("documents.parsers.document_consumer_declaration.send")
+    @mock.patch("paperless.parsers.mail.MailDocumentParser.parse")
+    @mock.patch("documents.consumer.get_parser_registry")
    def test_mail_parser_receives_mailrule(
        self,
-        mock_consumer_declaration_send: mock.Mock,
+        mock_get_parser_registry: mock.Mock,
        mock_mail_parser_parse: mock.Mock,
        mock_mailrule_get: mock.Mock,
    ) -> None:
@@ -1106,25 +1098,21 @@ class TestConsumer(
        THEN:
            - The mail parser should receive the mail rule
        """
-        mock_consumer_declaration_send.return_value = [
-            (
-                None,
-                {
-                    "parser": MailDocumentParser,
-                    "mime_types": {"message/rfc822": ".eml"},
-                    "weight": 0,
-                },
-            ),
-        ]
+        from paperless.parsers.mail import MailDocumentParser
+
+        mock_get_parser_registry.return_value.get_parser_for_file.return_value = (
+            MailDocumentParser
+        )
        mock_mailrule_get.return_value = mock.Mock(
            pdf_layout=MailRule.PdfLayout.HTML_ONLY,
        )
        with self.get_consumer(
            filepath=(
                Path(__file__).parent.parent.parent
-                / Path("paperless_mail")
+                / Path("paperless")
                / Path("tests")
                / Path("samples")
+                / Path("mail")
            ).resolve()
            / "html.eml",
            source=DocumentSource.MailFetch,
@@ -1135,12 +1123,10 @@ class TestConsumer(
                ConsumerError,
            ):
                consumer.run()
-                mock_mail_parser_parse.assert_called_once_with(
-                    consumer.working_copy,
-                    "message/rfc822",
-                    file_name="sample.pdf",
-                    mailrule=mock_mailrule_get.return_value,
-                )
+            mock_mail_parser_parse.assert_called_once_with(
+                consumer.working_copy,
+                "message/rfc822",
+            )


@mock.patch("documents.consumer.magic.from_file", fake_magic_from_file)
--- a/src/documents/tests/test_file_handling.py
+++ b/src/documents/tests/test_file_handling.py
@@ -1,4 +1,5 @@
 import datetime
+import hashlib
 import logging
 import tempfile
 from pathlib import Path
@@ -204,6 +205,52 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
            )
            self.assertEqual(document.filename, "none/none.pdf")

+    @override_settings(FILENAME_FORMAT=None)
+    def test_stale_save_recovers_already_moved_files(self) -> None:
+        old_storage_path = StoragePath.objects.create(
+            name="old-path",
+            path="old/{{title}}",
+        )
+        new_storage_path = StoragePath.objects.create(
+            name="new-path",
+            path="new/{{title}}",
+        )
+        original_bytes = b"original"
+        archive_bytes = b"archive"
+
+        doc = Document.objects.create(
+            title="document",
+            mime_type="application/pdf",
+            checksum=hashlib.md5(original_bytes).hexdigest(),
+            archive_checksum=hashlib.md5(archive_bytes).hexdigest(),
+            filename="old/document.pdf",
+            archive_filename="old/document.pdf",
+            storage_path=old_storage_path,
+        )
+        create_source_path_directory(doc.source_path)
+        doc.source_path.write_bytes(original_bytes)
+        create_source_path_directory(doc.archive_path)
+        doc.archive_path.write_bytes(archive_bytes)
+
+        stale_doc = Document.objects.get(pk=doc.pk)
+        fresh_doc = Document.objects.get(pk=doc.pk)
+        fresh_doc.storage_path = new_storage_path
+        fresh_doc.save()
+        doc.refresh_from_db()
+        self.assertEqual(doc.filename, "new/document.pdf")
+        self.assertEqual(doc.archive_filename, "new/document.pdf")
+
+        stale_doc.storage_path = new_storage_path
+        stale_doc.save()
+
+        doc.refresh_from_db()
+        self.assertEqual(doc.filename, "new/document.pdf")
+        self.assertEqual(doc.archive_filename, "new/document.pdf")
+        self.assertIsFile(doc.source_path)
+        self.assertIsFile(doc.archive_path)
+        self.assertIsNotFile(settings.ORIGINALS_DIR / "old" / "document.pdf")
+        self.assertIsNotFile(settings.ARCHIVE_DIR / "old" / "document.pdf")
+
    @override_settings(FILENAME_FORMAT="{correspondent}/{correspondent}")
    def test_document_delete(self) -> None:
        document = Document()
--- a/src/documents/tests/test_management.py
+++ b/src/documents/tests/test_management.py
@@ -1,7 +1,10 @@
+from __future__ import annotations
+
 import filecmp
 import shutil
 from io import StringIO
 from pathlib import Path
+from typing import TYPE_CHECKING
 from unittest import mock

 import pytest
@@ -11,6 +14,9 @@ from django.core.management import call_command
 from django.test import TestCase
 from django.test import override_settings

+if TYPE_CHECKING:
+    from pytest_mock import MockerFixture
+
 from documents.file_handling import generate_filename
 from documents.models import Document
 from documents.tasks import update_document_content_maybe_archive_file
@@ -35,7 +41,7 @@ class TestArchiver(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
        doc = self.make_models()
        shutil.copy(sample_file, Path(self.dirs.originals_dir) / f"{doc.id:07}.pdf")

-        call_command("document_archiver", "--processes", "1")
+        call_command("document_archiver", "--processes", "1", skip_checks=True)

    def test_handle_document(self) -> None:
        doc = self.make_models()
@@ -100,12 +106,12 @@ class TestArchiver(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
 class TestMakeIndex(TestCase):
    @mock.patch("documents.management.commands.document_index.index_reindex")
    def test_reindex(self, m) -> None:
-        call_command("document_index", "reindex")
+        call_command("document_index", "reindex", skip_checks=True)
        m.assert_called_once()

    @mock.patch("documents.management.commands.document_index.index_optimize")
    def test_optimize(self, m) -> None:
-        call_command("document_index", "optimize")
+        call_command("document_index", "optimize", skip_checks=True)
        m.assert_called_once()


@@ -122,7 +128,7 @@ class TestRenamer(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
        Path(doc.archive_path).touch()

        with override_settings(FILENAME_FORMAT="{correspondent}/{title}"):
-            call_command("document_renamer")
+            call_command("document_renamer", skip_checks=True)

        doc2 = Document.objects.get(id=doc.id)

@@ -135,14 +141,32 @@ class TestRenamer(DirectoriesMixin, FileSystemAssertsMixin, TestCase):


@pytest.mark.management
-class TestCreateClassifier(TestCase):
-    @mock.patch(
-        "documents.management.commands.document_create_classifier.train_classifier",
-    )
-    def test_create_classifier(self, m) -> None:
-        call_command("document_create_classifier")
+class TestCreateClassifier:
+    def test_create_classifier(self, mocker: MockerFixture) -> None:
+        m = mocker.patch(
+            "documents.management.commands.document_create_classifier.train_classifier",
+        )

-        m.assert_called_once()
+        call_command("document_create_classifier", skip_checks=True)
+
+        m.assert_called_once_with(scheduled=False, status_callback=mocker.ANY)
+        assert callable(m.call_args.kwargs["status_callback"])
+
+    def test_create_classifier_callback_output(self, mocker: MockerFixture) -> None:
+        """Callback passed to train_classifier writes each phase message to the console."""
+        m = mocker.patch(
+            "documents.management.commands.document_create_classifier.train_classifier",
+        )
+
+        def invoke_callback(**kwargs):
+            kwargs["status_callback"]("Vectorizing document content...")
+
+        m.side_effect = invoke_callback
+
+        stdout = StringIO()
+        call_command("document_create_classifier", skip_checks=True, stdout=stdout)
+
+        assert "Vectorizing document content..." in stdout.getvalue()


@pytest.mark.management
@@ -152,7 +176,7 @@ class TestConvertMariaDBUUID(TestCase):
        m.alter_field.return_value = None

        stdout = StringIO()
-        call_command("convert_mariadb_uuid", stdout=stdout)
+        call_command("convert_mariadb_uuid", stdout=stdout, skip_checks=True)

        m.assert_called_once()

@@ -167,6 +191,6 @@ class TestPruneAuditLogs(TestCase):
            object_id=1,
            action=LogEntry.Action.CREATE,
        )
-        call_command("prune_audit_logs")
+        call_command("prune_audit_logs", skip_checks=True)

        self.assertEqual(LogEntry.objects.count(), 0)
--- a/src/documents/tests/test_management_exporter.py
+++ b/src/documents/tests/test_management_exporter.py
@@ -180,7 +180,7 @@ class TestExportImport(
        if data_only:
            args += ["--data-only"]

-        call_command(*args)
+        call_command(*args, skip_checks=True)

        with (self.target / "manifest.json").open() as f:
            manifest = json.load(f)
@@ -272,7 +272,12 @@ class TestExportImport(
            GroupObjectPermission.objects.all().delete()
            self.assertEqual(Document.objects.count(), 0)

-            call_command("document_importer", "--no-progress-bar", self.target)
+            call_command(
+                "document_importer",
+                "--no-progress-bar",
+                self.target,
+                skip_checks=True,
+            )
            self.assertEqual(Document.objects.count(), 4)
            self.assertEqual(Tag.objects.count(), 1)
            self.assertEqual(Correspondent.objects.count(), 1)
@@ -438,7 +443,8 @@ class TestExportImport(
            filename="0000010.pdf",
            mime_type="application/pdf",
        )
-        self.assertRaises(FileNotFoundError, call_command, "document_exporter", target)
+        with self.assertRaises(FileNotFoundError):
+            call_command("document_exporter", target, skip_checks=True)

    def test_export_zipped(self) -> None:
        """
@@ -458,7 +464,7 @@ class TestExportImport(

        args = ["document_exporter", self.target, "--zip"]

-        call_command(*args)
+        call_command(*args, skip_checks=True)

        expected_file = str(
            self.target / f"export-{timezone.localdate().isoformat()}.zip",
@@ -493,7 +499,7 @@ class TestExportImport(
        with override_settings(
            FILENAME_FORMAT="{created_year}/{correspondent}/{title}",
        ):
-            call_command(*args)
+            call_command(*args, skip_checks=True)

        expected_file = str(
            self.target / f"export-{timezone.localdate().isoformat()}.zip",
@@ -538,7 +544,7 @@ class TestExportImport(

        args = ["document_exporter", self.target, "--zip", "--delete"]

-        call_command(*args)
+        call_command(*args, skip_checks=True)

        expected_file = str(
            self.target / f"export-{timezone.localdate().isoformat()}.zip",
@@ -565,7 +571,7 @@ class TestExportImport(
        args = ["document_exporter", "/tmp/foo/bar"]

        with self.assertRaises(CommandError) as e:
-            call_command(*args)
+            call_command(*args, skip_checks=True)

        self.assertEqual("That path doesn't exist", str(e.exception))

@@ -583,7 +589,7 @@ class TestExportImport(
            args = ["document_exporter", tmp_file.name]

            with self.assertRaises(CommandError) as e:
-                call_command(*args)
+                call_command(*args, skip_checks=True)

            self.assertEqual("That path isn't a directory", str(e.exception))

@@ -602,7 +608,7 @@ class TestExportImport(
            args = ["document_exporter", tmp_dir]

            with self.assertRaises(CommandError) as e:
-                call_command(*args)
+                call_command(*args, skip_checks=True)

            self.assertEqual(
                "That path doesn't appear to be writable",
@@ -647,7 +653,12 @@ class TestExportImport(
            self.assertEqual(Document.objects.count(), 4)
            Document.objects.all().delete()
            self.assertEqual(Document.objects.count(), 0)
-            call_command("document_importer", "--no-progress-bar", self.target)
+            call_command(
+                "document_importer",
+                "--no-progress-bar",
+                self.target,
+                skip_checks=True,
+            )
            self.assertEqual(Document.objects.count(), 4)

    def test_no_thumbnail(self) -> None:
@@ -690,7 +701,12 @@ class TestExportImport(
            self.assertEqual(Document.objects.count(), 4)
            Document.objects.all().delete()
            self.assertEqual(Document.objects.count(), 0)
-            call_command("document_importer", "--no-progress-bar", self.target)
+            call_command(
+                "document_importer",
+                "--no-progress-bar",
+                self.target,
+                skip_checks=True,
+            )
            self.assertEqual(Document.objects.count(), 4)

    def test_split_manifest(self) -> None:
@@ -721,7 +737,12 @@ class TestExportImport(
            Document.objects.all().delete()
            CustomFieldInstance.objects.all().delete()
            self.assertEqual(Document.objects.count(), 0)
-            call_command("document_importer", "--no-progress-bar", self.target)
+            call_command(
+                "document_importer",
+                "--no-progress-bar",
+                self.target,
+                skip_checks=True,
+            )
            self.assertEqual(Document.objects.count(), 4)
            self.assertEqual(CustomFieldInstance.objects.count(), 1)

@@ -746,7 +767,12 @@ class TestExportImport(
            self.assertEqual(Document.objects.count(), 4)
            Document.objects.all().delete()
            self.assertEqual(Document.objects.count(), 0)
-            call_command("document_importer", "--no-progress-bar", self.target)
+            call_command(
+                "document_importer",
+                "--no-progress-bar",
+                self.target,
+                skip_checks=True,
+            )
            self.assertEqual(Document.objects.count(), 4)

    def test_folder_prefix_with_split(self) -> None:
@@ -771,7 +797,12 @@ class TestExportImport(
            self.assertEqual(Document.objects.count(), 4)
            Document.objects.all().delete()
            self.assertEqual(Document.objects.count(), 0)
-            call_command("document_importer", "--no-progress-bar", self.target)
+            call_command(
+                "document_importer",
+                "--no-progress-bar",
+                self.target,
+                skip_checks=True,
+            )
            self.assertEqual(Document.objects.count(), 4)

    def test_import_db_transaction_failed(self) -> None:
@@ -813,7 +844,12 @@ class TestExportImport(
            self.user = User.objects.create(username="temp_admin")

            with self.assertRaises(IntegrityError):
-                call_command("document_importer", "--no-progress-bar", self.target)
+                call_command(
+                    "document_importer",
+                    "--no-progress-bar",
+                    self.target,
+                    skip_checks=True,
+                )

            self.assertEqual(ContentType.objects.count(), num_content_type_objects)
            self.assertEqual(Permission.objects.count(), num_permission_objects + 1)
@@ -864,6 +900,7 @@ class TestExportImport(
            "--no-progress-bar",
            "--data-only",
            self.target,
+            skip_checks=True,
        )

        self.assertEqual(Document.objects.all().count(), 4)
@@ -923,6 +960,7 @@ class TestCryptExportImport(
            "--passphrase",
            "securepassword",
            self.target,
+            skip_checks=True,
        )

        self.assertIsFile(self.target / "metadata.json")
@@ -948,6 +986,7 @@ class TestCryptExportImport(
            "--passphrase",
            "securepassword",
            self.target,
+            skip_checks=True,
        )

        account = MailAccount.objects.first()
@@ -976,6 +1015,7 @@ class TestCryptExportImport(
            "--passphrase",
            "securepassword",
            self.target,
+            skip_checks=True,
        )

        with self.assertRaises(CommandError) as err:
@@ -983,6 +1023,7 @@ class TestCryptExportImport(
                "document_importer",
                "--no-progress-bar",
                self.target,
+                skip_checks=True,
            )
            self.assertEqual(
                err.msg,
@@ -1014,6 +1055,7 @@ class TestCryptExportImport(
            "--no-progress-bar",
            str(self.target),
            stdout=stdout,
+            skip_checks=True,
        )
        stdout.seek(0)
        self.assertIn(
--- a/src/documents/tests/test_management_fuzzy.py
+++ b/src/documents/tests/test_management_fuzzy.py
@@ -21,6 +21,7 @@ class TestFuzzyMatchCommand(TestCase):
            *args,
            stdout=stdout,
            stderr=stderr,
+            skip_checks=True,
            **kwargs,
        )
        return stdout.getvalue(), stderr.getvalue()
--- a/src/documents/tests/test_management_importer.py
+++ b/src/documents/tests/test_management_importer.py
@@ -41,6 +41,7 @@ class TestCommandImport(
                "document_importer",
                "--no-progress-bar",
                str(self.dirs.scratch_dir),
+                skip_checks=True,
            )
        self.assertIn(
            "That directory doesn't appear to contain a manifest.json file.",
@@ -67,6 +68,7 @@ class TestCommandImport(
                "document_importer",
                "--no-progress-bar",
                str(self.dirs.scratch_dir),
+                skip_checks=True,
            )
        self.assertIn(
            "The manifest file contains a record which does not refer to an actual document file.",
@@ -96,6 +98,7 @@ class TestCommandImport(
                "document_importer",
                "--no-progress-bar",
                str(self.dirs.scratch_dir),
+                skip_checks=True,
            )
        self.assertIn('The manifest file refers to "noexist.pdf"', str(e.exception))

@@ -157,7 +160,7 @@ class TestCommandImport(
            - CommandError is raised indicating the issue
        """
        with self.assertRaises(CommandError) as cm:
-            call_command("document_importer", Path("/tmp/notapath"))
+            call_command("document_importer", Path("/tmp/notapath"), skip_checks=True)
        self.assertIn("That path doesn't exist", str(cm.exception))

    def test_import_source_not_readable(self) -> None:
@@ -173,7 +176,7 @@ class TestCommandImport(
            path = Path(temp_dir)
            path.chmod(0o222)
            with self.assertRaises(CommandError) as cm:
-                call_command("document_importer", path)
+                call_command("document_importer", path, skip_checks=True)
            self.assertIn(
                "That path doesn't appear to be readable",
                str(cm.exception),
@@ -193,7 +196,12 @@ class TestCommandImport(
        self.assertIsNotFile(path)

        with self.assertRaises(CommandError) as e:
-            call_command("document_importer", "--no-progress-bar", str(path))
+            call_command(
+                "document_importer",
+                "--no-progress-bar",
+                str(path),
+                skip_checks=True,
+            )
        self.assertIn("That path doesn't exist", str(e.exception))

    def test_import_files_exist(self) -> None:
@@ -218,6 +226,7 @@ class TestCommandImport(
                "--no-progress-bar",
                str(self.dirs.scratch_dir),
                stdout=stdout,
+                skip_checks=True,
            )
        stdout.seek(0)
        self.assertIn(
@@ -246,6 +255,7 @@ class TestCommandImport(
                "--no-progress-bar",
                str(self.dirs.scratch_dir),
                stdout=stdout,
+                skip_checks=True,
            )
        stdout.seek(0)
        self.assertIn(
@@ -282,6 +292,7 @@ class TestCommandImport(
                "--no-progress-bar",
                str(self.dirs.scratch_dir),
                stdout=stdout,
+                skip_checks=True,
            )
        stdout.seek(0)
        self.assertIn(
@@ -309,6 +320,7 @@ class TestCommandImport(
                "--no-progress-bar",
                str(self.dirs.scratch_dir),
                stdout=stdout,
+                skip_checks=True,
            )
        stdout.seek(0)
        stdout_str = str(stdout.read())
@@ -338,6 +350,7 @@ class TestCommandImport(
                "--no-progress-bar",
                str(self.dirs.scratch_dir),
                stdout=stdout,
+                skip_checks=True,
            )
        stdout.seek(0)
        stdout_str = str(stdout.read())
@@ -377,6 +390,7 @@ class TestCommandImport(
                "--no-progress-bar",
                str(zip_path),
                stdout=stdout,
+                skip_checks=True,
            )
        stdout.seek(0)
        stdout_str = str(stdout.read())
--- a/src/documents/tests/test_management_retagger.py
+++ b/src/documents/tests/test_management_retagger.py
@@ -139,7 +139,7 @@ class TestRetaggerTags(DirectoriesMixin):
    @pytest.mark.usefixtures("documents")
    def test_add_tags(self, tags: TagTuple) -> None:
        tag_first, tag_second, *_ = tags
-        call_command("document_retagger", "--tags")
+        call_command("document_retagger", "--tags", skip_checks=True)
        d_first, d_second, d_unrelated, d_auto = _get_docs()

        assert d_first.tags.count() == 1
@@ -158,7 +158,7 @@ class TestRetaggerTags(DirectoriesMixin):
        tag_first, tag_second, tag_inbox, tag_no_match, _ = tags
        d1.tags.add(tag_second)

-        call_command("document_retagger", "--tags", "--overwrite")
+        call_command("document_retagger", "--tags", "--overwrite", skip_checks=True)

        d_first, d_second, d_unrelated, d_auto = _get_docs()

@@ -180,7 +180,13 @@ class TestRetaggerTags(DirectoriesMixin):
        ],
    )
    def test_suggest_does_not_apply_tags(self, extra_args: list[str]) -> None:
-        call_command("document_retagger", "--tags", "--suggest", *extra_args)
+        call_command(
+            "document_retagger",
+            "--tags",
+            "--suggest",
+            *extra_args,
+            skip_checks=True,
+        )
        d_first, d_second, _, d_auto = _get_docs()

        assert d_first.tags.count() == 0
@@ -199,7 +205,7 @@ class TestRetaggerDocumentType(DirectoriesMixin):
    @pytest.mark.usefixtures("documents")
    def test_add_type(self, document_types: DocumentTypeTuple) -> None:
        dt_first, dt_second = document_types
-        call_command("document_retagger", "--document_type")
+        call_command("document_retagger", "--document_type", skip_checks=True)
        d_first, d_second, _, _ = _get_docs()

        assert d_first.document_type == dt_first
@@ -214,7 +220,13 @@ class TestRetaggerDocumentType(DirectoriesMixin):
        ],
    )
    def test_suggest_does_not_apply_document_type(self, extra_args: list[str]) -> None:
-        call_command("document_retagger", "--document_type", "--suggest", *extra_args)
+        call_command(
+            "document_retagger",
+            "--document_type",
+            "--suggest",
+            *extra_args,
+            skip_checks=True,
+        )
        d_first, d_second, _, _ = _get_docs()

        assert d_first.document_type is None
@@ -243,7 +255,12 @@ class TestRetaggerDocumentType(DirectoriesMixin):
        )
        doc = DocumentFactory(content="ambiguous content")

-        call_command("document_retagger", "--document_type", *use_first_flag)
+        call_command(
+            "document_retagger",
+            "--document_type",
+            *use_first_flag,
+            skip_checks=True,
+        )

        doc.refresh_from_db()
        assert (doc.document_type is not None) is expects_assignment
@@ -260,7 +277,7 @@ class TestRetaggerCorrespondent(DirectoriesMixin):
    @pytest.mark.usefixtures("documents")
    def test_add_correspondent(self, correspondents: CorrespondentTuple) -> None:
        c_first, c_second = correspondents
-        call_command("document_retagger", "--correspondent")
+        call_command("document_retagger", "--correspondent", skip_checks=True)
        d_first, d_second, _, _ = _get_docs()

        assert d_first.correspondent == c_first
@@ -275,7 +292,13 @@ class TestRetaggerCorrespondent(DirectoriesMixin):
        ],
    )
    def test_suggest_does_not_apply_correspondent(self, extra_args: list[str]) -> None:
-        call_command("document_retagger", "--correspondent", "--suggest", *extra_args)
+        call_command(
+            "document_retagger",
+            "--correspondent",
+            "--suggest",
+            *extra_args,
+            skip_checks=True,
+        )
        d_first, d_second, _, _ = _get_docs()

        assert d_first.correspondent is None
@@ -304,7 +327,12 @@ class TestRetaggerCorrespondent(DirectoriesMixin):
        )
        doc = DocumentFactory(content="ambiguous content")

-        call_command("document_retagger", "--correspondent", *use_first_flag)
+        call_command(
+            "document_retagger",
+            "--correspondent",
+            *use_first_flag,
+            skip_checks=True,
+        )

        doc.refresh_from_db()
        assert (doc.correspondent is not None) is expects_assignment
@@ -326,7 +354,7 @@ class TestRetaggerStoragePath(DirectoriesMixin):
        THEN matching documents get the correct path; existing path is unchanged
        """
        sp1, sp2, sp3 = storage_paths
-        call_command("document_retagger", "--storage_path")
+        call_command("document_retagger", "--storage_path", skip_checks=True)
        d_first, d_second, d_unrelated, d_auto = _get_docs()

        assert d_first.storage_path == sp2
@@ -342,7 +370,12 @@ class TestRetaggerStoragePath(DirectoriesMixin):
        THEN the existing path is replaced by the newly matched path
        """
        sp1, sp2, _ = storage_paths
-        call_command("document_retagger", "--storage_path", "--overwrite")
+        call_command(
+            "document_retagger",
+            "--storage_path",
+            "--overwrite",
+            skip_checks=True,
+        )
        d_first, d_second, d_unrelated, d_auto = _get_docs()

        assert d_first.storage_path == sp2
@@ -373,7 +406,12 @@ class TestRetaggerStoragePath(DirectoriesMixin):
        )
        doc = DocumentFactory(content="ambiguous content")

-        call_command("document_retagger", "--storage_path", *use_first_flag)
+        call_command(
+            "document_retagger",
+            "--storage_path",
+            *use_first_flag,
+            skip_checks=True,
+        )

        doc.refresh_from_db()
        assert (doc.storage_path is not None) is expects_assignment
@@ -402,7 +440,13 @@ class TestRetaggerIdRange(DirectoriesMixin):
        expected_count: int,
    ) -> None:
        DocumentFactory(content="NOT the first document")
-        call_command("document_retagger", "--tags", "--id-range", *id_range_args)
+        call_command(
+            "document_retagger",
+            "--tags",
+            "--id-range",
+            *id_range_args,
+            skip_checks=True,
+        )
        tag_first, *_ = tags
        assert Document.objects.filter(tags__id=tag_first.id).count() == expected_count

@@ -416,7 +460,7 @@ class TestRetaggerIdRange(DirectoriesMixin):
    )
    def test_id_range_invalid_arguments_raise(self, args: list[str]) -> None:
        with pytest.raises((CommandError, SystemExit)):
-            call_command("document_retagger", *args)
+            call_command("document_retagger", *args, skip_checks=True)


 # ---------------------------------------------------------------------------
@@ -430,12 +474,12 @@ class TestRetaggerEdgeCases(DirectoriesMixin):
    @pytest.mark.usefixtures("documents")
    def test_no_targets_exits_cleanly(self) -> None:
        """Calling the retagger with no classifier targets should not raise."""
-        call_command("document_retagger")
+        call_command("document_retagger", skip_checks=True)

    @pytest.mark.usefixtures("documents")
    def test_inbox_only_skips_non_inbox_documents(self) -> None:
        """--inbox-only must restrict processing to documents with an inbox tag."""
-        call_command("document_retagger", "--tags", "--inbox-only")
+        call_command("document_retagger", "--tags", "--inbox-only", skip_checks=True)
        d_first, _, d_unrelated, _ = _get_docs()

        assert d_first.tags.count() == 0
--- a/src/documents/tests/test_management_superuser.py
+++ b/src/documents/tests/test_management_superuser.py
@@ -20,6 +20,7 @@ class TestManageSuperUser(DirectoriesMixin, TestCase):
                "--no-color",
                stdout=out,
                stderr=StringIO(),
+                skip_checks=True,
            )
        return out.getvalue()

--- a/src/documents/tests/test_management_thumbnails.py
+++ b/src/documents/tests/test_management_thumbnails.py
@@ -85,13 +85,20 @@ class TestMakeThumbnails(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
    def test_command(self) -> None:
        self.assertIsNotFile(self.d1.thumbnail_path)
        self.assertIsNotFile(self.d2.thumbnail_path)
-        call_command("document_thumbnails", "--processes", "1")
+        call_command("document_thumbnails", "--processes", "1", skip_checks=True)
        self.assertIsFile(self.d1.thumbnail_path)
        self.assertIsFile(self.d2.thumbnail_path)

    def test_command_documentid(self) -> None:
        self.assertIsNotFile(self.d1.thumbnail_path)
        self.assertIsNotFile(self.d2.thumbnail_path)
-        call_command("document_thumbnails", "--processes", "1", "-d", f"{self.d1.id}")
+        call_command(
+            "document_thumbnails",
+            "--processes",
+            "1",
+            "-d",
+            f"{self.d1.id}",
+            skip_checks=True,
+        )
        self.assertIsFile(self.d1.thumbnail_path)
        self.assertIsNotFile(self.d2.thumbnail_path)
--- a/src/documents/tests/test_parsers.py
+++ b/src/documents/tests/test_parsers.py
@@ -1,130 +1,14 @@
-from tempfile import TemporaryDirectory
-from unittest import mock
-
-from django.apps import apps
 from django.test import TestCase
 from django.test import override_settings

 from documents.parsers import get_default_file_extension
-from documents.parsers import get_parser_class_for_mime_type
 from documents.parsers import get_supported_file_extensions
 from documents.parsers import is_file_ext_supported
+from paperless.parsers.registry import get_parser_registry
+from paperless.parsers.registry import reset_parser_registry
+from paperless.parsers.tesseract import RasterisedDocumentParser
 from paperless.parsers.text import TextDocumentParser
-from paperless_tesseract.parsers import RasterisedDocumentParser
-from paperless_tika.parsers import TikaDocumentParser
-
-
-class TestParserDiscovery(TestCase):
-    @mock.patch("documents.parsers.document_consumer_declaration.send")
-    def test_get_parser_class_1_parser(self, m, *args) -> None:
-        """
-        GIVEN:
-            - Parser declared for a given mimetype
-        WHEN:
-            - Attempt to get parser for the mimetype
-        THEN:
-            - Declared parser class is returned
-        """
-
-        class DummyParser:
-            pass
-
-        m.return_value = (
-            (
-                None,
-                {
-                    "weight": 0,
-                    "parser": DummyParser,
-                    "mime_types": {"application/pdf": ".pdf"},
-                },
-            ),
-        )
-
-        self.assertEqual(get_parser_class_for_mime_type("application/pdf"), DummyParser)
-
-    @mock.patch("documents.parsers.document_consumer_declaration.send")
-    def test_get_parser_class_n_parsers(self, m, *args) -> None:
-        """
-        GIVEN:
-            - Two parsers declared for a given mimetype
-            - Second parser has a higher weight
-        WHEN:
-            - Attempt to get parser for the mimetype
-        THEN:
-            - Second parser class is returned
-        """
-
-        class DummyParser1:
-            pass
-
-        class DummyParser2:
-            pass
-
-        m.return_value = (
-            (
-                None,
-                {
-                    "weight": 0,
-                    "parser": DummyParser1,
-                    "mime_types": {"application/pdf": ".pdf"},
-                },
-            ),
-            (
-                None,
-                {
-                    "weight": 1,
-                    "parser": DummyParser2,
-                    "mime_types": {"application/pdf": ".pdf"},
-                },
-            ),
-        )
-
-        self.assertEqual(
-            get_parser_class_for_mime_type("application/pdf"),
-            DummyParser2,
-        )
-
-    @mock.patch("documents.parsers.document_consumer_declaration.send")
-    def test_get_parser_class_0_parsers(self, m, *args) -> None:
-        """
-        GIVEN:
-            - No parsers are declared
-        WHEN:
-            - Attempt to get parser for the mimetype
-        THEN:
-            - No parser class is returned
-        """
-        m.return_value = []
-        with TemporaryDirectory():
-            self.assertIsNone(get_parser_class_for_mime_type("application/pdf"))
-
-    @mock.patch("documents.parsers.document_consumer_declaration.send")
-    def test_get_parser_class_no_valid_parser(self, m, *args) -> None:
-        """
-        GIVEN:
-            - No parser declared for a given mimetype
-            - Parser declared for a different mimetype
-        WHEN:
-            - Attempt to get parser for the given mimetype
-        THEN:
-            - No parser class is returned
-        """
-
-        class DummyParser:
-            pass
-
-        m.return_value = (
-            (
-                None,
-                {
-                    "weight": 0,
-                    "parser": DummyParser,
-                    "mime_types": {"application/pdf": ".pdf"},
-                },
-            ),
-        )
-
-        self.assertIsNone(get_parser_class_for_mime_type("image/tiff"))
+from paperless.parsers.tika import TikaDocumentParser


 class TestParserAvailability(TestCase):
@@ -151,7 +35,7 @@ class TestParserAvailability(TestCase):
            self.assertIn(ext, supported_exts)
            self.assertEqual(get_default_file_extension(mime_type), ext)
            self.assertIsInstance(
-                get_parser_class_for_mime_type(mime_type)(logging_group=None),
+                get_parser_registry().get_parser_for_file(mime_type, "")(),
                RasterisedDocumentParser,
            )

@@ -175,7 +59,7 @@ class TestParserAvailability(TestCase):
            self.assertIn(ext, supported_exts)
            self.assertEqual(get_default_file_extension(mime_type), ext)
            self.assertIsInstance(
-                get_parser_class_for_mime_type(mime_type)(logging_group=None),
+                get_parser_registry().get_parser_for_file(mime_type, "")(),
                TextDocumentParser,
            )

@@ -198,22 +82,23 @@ class TestParserAvailability(TestCase):
            ),
        ]

-        # Force the app ready to notice the settings override
-        with override_settings(TIKA_ENABLED=True, INSTALLED_APPS=["paperless_tika"]):
-            app = apps.get_app_config("paperless_tika")
-            app.ready()
+        self.addCleanup(reset_parser_registry)
+
+        # Reset and rebuild the registry with Tika enabled.
+        with override_settings(TIKA_ENABLED=True):
+            reset_parser_registry()
            supported_exts = get_supported_file_extensions()

-        for mime_type, ext in supported_mimes_and_exts:
-            self.assertIn(ext, supported_exts)
-            self.assertEqual(get_default_file_extension(mime_type), ext)
-            self.assertIsInstance(
-                get_parser_class_for_mime_type(mime_type)(logging_group=None),
-                TikaDocumentParser,
-            )
+            for mime_type, ext in supported_mimes_and_exts:
+                self.assertIn(ext, supported_exts)
+                self.assertEqual(get_default_file_extension(mime_type), ext)
+                self.assertIsInstance(
+                    get_parser_registry().get_parser_for_file(mime_type, "")(),
+                    TikaDocumentParser,
+                )

    def test_no_parser_for_mime(self) -> None:
-        self.assertIsNone(get_parser_class_for_mime_type("text/sdgsdf"))
+        self.assertIsNone(get_parser_registry().get_parser_for_file("text/sdgsdf", ""))

    def test_default_extension(self) -> None:
        # Test no parser declared still returns a an extension
--- a/src/documents/tests/test_workflows.py
+++ b/src/documents/tests/test_workflows.py
@@ -28,6 +28,7 @@ from rest_framework.test import APIClient
 from rest_framework.test import APITestCase

 from documents.file_handling import create_source_path_directory
+from documents.file_handling import generate_filename
 from documents.file_handling import generate_unique_filename
 from documents.signals.handlers import run_workflows
 from documents.workflows.webhooks import send_webhook
@@ -905,6 +906,121 @@ class TestWorkflows(
        expected_str = f"Document matched {trigger} from {w}"
        self.assertIn(expected_str, cm.output[0])

+    def test_workflow_assign_custom_field_keeps_storage_filename_in_sync(self) -> None:
+        """
+        GIVEN:
+            - Existing document with a storage path template that depends on a custom field
+            - Existing workflow triggered on document update assigning that custom field
+        WHEN:
+            - Workflow runs for the document
+        THEN:
+            - The database filename remains aligned with the moved file on disk
+        """
+        storage_path = StoragePath.objects.create(
+            name="workflow-custom-field-path",
+            path="{{ custom_fields|get_cf_value('Custom Field 1', 'none') }}/{{ title }}",
+        )
+        doc = Document.objects.create(
+            title="workflow custom field sync",
+            mime_type="application/pdf",
+            checksum="workflow-custom-field-sync",
+            storage_path=storage_path,
+            original_filename="workflow-custom-field-sync.pdf",
+        )
+        CustomFieldInstance.objects.create(
+            document=doc,
+            field=self.cf1,
+            value_text="initial",
+        )
+
+        generated = generate_unique_filename(doc)
+        destination = (settings.ORIGINALS_DIR / generated).resolve()
+        create_source_path_directory(destination)
+        shutil.copy(self.SAMPLE_DIR / "simple.pdf", destination)
+        Document.objects.filter(pk=doc.pk).update(filename=generated.as_posix())
+        doc.refresh_from_db()
+
+        trigger = WorkflowTrigger.objects.create(
+            type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_UPDATED,
+        )
+        action = WorkflowAction.objects.create(
+            type=WorkflowAction.WorkflowActionType.ASSIGNMENT,
+            assign_custom_fields_values={self.cf1.pk: "cars"},
+        )
+        action.assign_custom_fields.add(self.cf1.pk)
+        workflow = Workflow.objects.create(
+            name="Workflow custom field filename sync",
+            order=0,
+        )
+        workflow.triggers.add(trigger)
+        workflow.actions.add(action)
+        workflow.save()
+
+        run_workflows(WorkflowTrigger.WorkflowTriggerType.DOCUMENT_UPDATED, doc)
+
+        doc.refresh_from_db()
+        expected_filename = generate_filename(doc)
+        self.assertEqual(Path(doc.filename), expected_filename)
+        self.assertTrue(doc.source_path.is_file())
+
+    def test_workflow_document_updated_does_not_overwrite_filename(self) -> None:
+        """
+        GIVEN:
+            - A document whose filename has been updated in the DB by a concurrent
+              bulk_update_documents task (simulating update_filename_and_move_files
+              completing and writing the new filename to the DB)
+            - A stale in-memory document instance still holding the old filename
+            - An active DOCUMENT_UPDATED workflow
+        WHEN:
+            - run_workflows is called with the stale in-memory instance
+              (as would happen in the second concurrent bulk_update_documents task)
+        THEN:
+            - The DB filename is NOT overwritten with the stale in-memory value
+              (regression test for GH #12386 — the race window between
+              refresh_from_db and document.save in run_workflows)
+        """
+        trigger = WorkflowTrigger.objects.create(
+            type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_UPDATED,
+        )
+        action = WorkflowAction.objects.create(
+            type=WorkflowAction.WorkflowActionType.ASSIGNMENT,
+            assign_title="Updated by workflow",
+        )
+        workflow = Workflow.objects.create(name="Race condition test workflow", order=0)
+        workflow.triggers.add(trigger)
+        workflow.actions.add(action)
+        workflow.save()
+
+        doc = Document.objects.create(
+            title="race condition test",
+            mime_type="application/pdf",
+            checksum="racecondition123",
+            original_filename="old.pdf",
+            filename="old/path/old.pdf",
+        )
+
+        # Simulate BUD-1 completing update_filename_and_move_files:
+        # the DB now holds the new filename while BUD-2's in-memory instance is stale.
+        new_filename = "new/path/new.pdf"
+        Document.global_objects.filter(pk=doc.pk).update(filename=new_filename)
+
+        # The stale instance still has filename="old/path/old.pdf" in memory.
+        # Mock refresh_from_db so the stale value persists through run_workflows,
+        # replicating the race window between refresh and save.
+        # Mock update_filename_and_move_files to prevent file-not-found errors
+        # since we are only testing DB state here.
+        with (
+            mock.patch(
+                "documents.signals.handlers.update_filename_and_move_files",
+            ),
+            mock.patch.object(Document, "refresh_from_db"),
+        ):
+            run_workflows(WorkflowTrigger.WorkflowTriggerType.DOCUMENT_UPDATED, doc)
+
+        # The DB filename must not have been reverted to the stale old value.
+        doc.refresh_from_db()
+        self.assertEqual(doc.filename, new_filename)
+
    def test_document_added_workflow(self) -> None:
        trigger = WorkflowTrigger.objects.create(
            type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED,
--- a/src/documents/views.py
+++ b/src/documents/views.py
@@ -82,6 +82,7 @@ from rest_framework import serializers
 from rest_framework import status
 from rest_framework.decorators import action
 from rest_framework.exceptions import NotFound
+from rest_framework.exceptions import PermissionDenied
 from rest_framework.exceptions import ValidationError
 from rest_framework.filters import OrderingFilter
 from rest_framework.filters import SearchFilter
@@ -157,7 +158,6 @@ from documents.models import UiSettings
 from documents.models import Workflow
 from documents.models import WorkflowAction
 from documents.models import WorkflowTrigger
-from documents.parsers import get_parser_class_for_mime_type
 from documents.permissions import AcknowledgeTasksPermissions
 from documents.permissions import PaperlessAdminPermissions
 from documents.permissions import PaperlessNotePermissions
@@ -225,6 +225,7 @@ from paperless.celery import app as celery_app
 from paperless.config import AIConfig
 from paperless.config import GeneralConfig
 from paperless.models import ApplicationConfiguration
+from paperless.parsers.registry import get_parser_registry
 from paperless.serialisers import GroupSerializer
 from paperless.serialisers import UserSerializer
 from paperless.views import StandardPagination
@@ -1081,15 +1082,17 @@ class DocumentViewSet(
        if not Path(file).is_file():
            return None

-        parser_class = get_parser_class_for_mime_type(mime_type)
+        parser_class = get_parser_registry().get_parser_for_file(
+            mime_type,
+            Path(file).name,
+            Path(file),
+        )
        if parser_class:
-            parser = parser_class(progress_callback=None, logging_group=None)
-
            try:
-                return parser.extract_metadata(file, mime_type)
+                with parser_class() as parser:
+                    return parser.extract_metadata(file, mime_type)
            except Exception:  # pragma: no cover
                logger.exception(f"Issue getting metadata for {file}")
-                # TODO: cover GPG errors, remove later.
                return []
        else:  # pragma: no cover
            logger.warning(f"No parser for {mime_type}")
@@ -1328,6 +1331,7 @@ class DocumentViewSet(
        methods=["get", "post", "delete"],
        detail=True,
        permission_classes=[PaperlessNotePermissions],
+        pagination_class=None,
        filter_backends=[],
    )
    def notes(self, request, pk=None):
@@ -1965,11 +1969,28 @@ class UnifiedSearchViewSet(DocumentViewSet):
        filtered_queryset = super().filter_queryset(queryset)

        if self._is_search_request():
-            from documents import index
-
            if "query" in self.request.query_params:
+                from documents import index
+
                query_class = index.DelayedFullTextQuery
            elif "more_like_id" in self.request.query_params:
+                try:
+                    more_like_doc_id = int(self.request.query_params["more_like_id"])
+                    more_like_doc = Document.objects.select_related("owner").get(
+                        pk=more_like_doc_id,
+                    )
+                except (TypeError, ValueError, Document.DoesNotExist):
+                    raise PermissionDenied(_("Invalid more_like_id"))
+
+                if not has_perms_owner_aware(
+                    self.request.user,
+                    "view_document",
+                    more_like_doc,
+                ):
+                    raise PermissionDenied(_("Insufficient permissions."))
+
+                from documents import index
+
                query_class = index.DelayedMoreLikeThisQuery
            else:
                raise ValueError
@@ -2005,6 +2026,8 @@ class UnifiedSearchViewSet(DocumentViewSet):
                    return response
            except NotFound:
                raise
+            except PermissionDenied as e:
+                return HttpResponseForbidden(str(e.detail))
            except Exception as e:
                logger.warning(f"An error occurred listing search results: {e!s}")
                return HttpResponseBadRequest(
@@ -2943,13 +2966,21 @@ class GlobalSearchView(PassUserMixin):
        )
        groups = groups[:OBJECT_LIMIT]
        mail_rules = (
-            MailRule.objects.filter(name__icontains=query)
+            get_objects_for_user_owner_aware(
+                request.user,
+                "view_mailrule",
+                MailRule,
+            ).filter(name__icontains=query)
            if request.user.has_perm("paperless_mail.view_mailrule")
            else []
        )
        mail_rules = mail_rules[:OBJECT_LIMIT]
        mail_accounts = (
-            MailAccount.objects.filter(name__icontains=query)
+            get_objects_for_user_owner_aware(
+                request.user,
+                "view_mailaccount",
+                MailAccount,
+            ).filter(name__icontains=query)
            if request.user.has_perm("paperless_mail.view_mailaccount")
            else []
        )
@@ -3923,7 +3954,7 @@ class CustomFieldViewSet(PermissionsAwareDocumentCountMixin, ModelViewSet):
    document_count_through = CustomFieldInstance
    document_count_source_field = "field_id"

-    queryset = CustomField.objects.all().order_by("-created")
+    queryset = CustomField.objects.all().order_by("name")


@extend_schema_view(
--- a/src/documents/workflows/webhooks.py
+++ b/src/documents/workflows/webhooks.py
@@ -1,12 +1,14 @@
-import ipaddress
 import logging
-import socket
-from urllib.parse import urlparse

 import httpx
 from celery import shared_task
 from django.conf import settings

+from paperless.network import format_host_for_url
+from paperless.network import is_public_ip
+from paperless.network import resolve_hostname_ips
+from paperless.network import validate_outbound_http_url
+
 logger = logging.getLogger("paperless.workflows.webhooks")


@@ -34,23 +36,19 @@ class WebhookTransport(httpx.HTTPTransport):
            raise httpx.ConnectError("No hostname in request URL")

        try:
-            addr_info = socket.getaddrinfo(hostname, None)
-        except socket.gaierror as e:
-            raise httpx.ConnectError(f"Could not resolve hostname: {hostname}") from e
-
-        ips = [info[4][0] for info in addr_info if info and info[4]]
-        if not ips:
-            raise httpx.ConnectError(f"Could not resolve hostname: {hostname}")
+            ips = resolve_hostname_ips(hostname)
+        except ValueError as e:
+            raise httpx.ConnectError(str(e)) from e

        if not self.allow_internal:
            for ip_str in ips:
-                if not WebhookTransport.is_public_ip(ip_str):
+                if not is_public_ip(ip_str):
                    raise httpx.ConnectError(
                        f"Connection blocked: {hostname} resolves to a non-public address",
                    )

        ip_str = ips[0]
-        formatted_ip = self._format_ip_for_url(ip_str)
+        formatted_ip = format_host_for_url(ip_str)

        new_headers = httpx.Headers(request.headers)
        if "host" in new_headers:
@@ -69,40 +67,6 @@ class WebhookTransport(httpx.HTTPTransport):

        return super().handle_request(request)

-    def _format_ip_for_url(self, ip: str) -> str:
-        """
-        Format IP address for use in URL (wrap IPv6 in brackets)
-        """
-        try:
-            ip_obj = ipaddress.ip_address(ip)
-            if ip_obj.version == 6:
-                return f"[{ip}]"
-            return ip
-        except ValueError:
-            return ip
-
-    @staticmethod
-    def is_public_ip(ip: str | int) -> bool:
-        try:
-            obj = ipaddress.ip_address(ip)
-            return not (
-                obj.is_private
-                or obj.is_loopback
-                or obj.is_link_local
-                or obj.is_multicast
-                or obj.is_unspecified
-            )
-        except ValueError:  # pragma: no cover
-            return False
-
-    @staticmethod
-    def resolve_first_ip(host: str) -> str | None:
-        try:
-            info = socket.getaddrinfo(host, None)
-            return info[0][4][0] if info else None
-        except Exception:  # pragma: no cover
-            return None
-

@shared_task(
    retry_backoff=True,
@@ -118,21 +82,24 @@ def send_webhook(
    *,
    as_json: bool = False,
 ):
-    p = urlparse(url)
-    if p.scheme.lower() not in settings.WEBHOOKS_ALLOWED_SCHEMES or not p.hostname:
-        logger.warning("Webhook blocked: invalid scheme/hostname")
+    try:
+        parsed = validate_outbound_http_url(
+            url,
+            allowed_schemes=settings.WEBHOOKS_ALLOWED_SCHEMES,
+            allowed_ports=settings.WEBHOOKS_ALLOWED_PORTS,
+            # Internal-address checks happen in transport to preserve ConnectError behavior.
+            allow_internal=True,
+        )
+    except ValueError as e:
+        logger.warning("Webhook blocked: %s", e)
+        raise
+
+    hostname = parsed.hostname
+    if hostname is None:  # pragma: no cover
        raise ValueError("Invalid URL scheme or hostname.")

-    port = p.port or (443 if p.scheme == "https" else 80)
-    if (
-        len(settings.WEBHOOKS_ALLOWED_PORTS) > 0
-        and port not in settings.WEBHOOKS_ALLOWED_PORTS
-    ):
-        logger.warning("Webhook blocked: port not permitted")
-        raise ValueError("Destination port not permitted.")
-
    transport = WebhookTransport(
-        hostname=p.hostname,
+        hostname=hostname,
        allow_internal=settings.WEBHOOKS_ALLOW_INTERNAL_REQUESTS,
    )

--- a/src/locale/en_US/LC_MESSAGES/django.po
+++ b/src/locale/en_US/LC_MESSAGES/django.po
@@ -2,7 +2,7 @@ msgid ""
 msgstr ""
 "Project-Id-Version: paperless-ngx\n"
 "Report-Msgid-Bugs-To: \n"
-"POT-Creation-Date: 2026-03-12 15:43+0000\n"
+"POT-Creation-Date: 2026-03-22 13:54+0000\n"
 "PO-Revision-Date: 2022-02-17 04:17\n"
 "Last-Translator: \n"
 "Language-Team: English\n"
@@ -1299,7 +1299,9 @@ msgstr ""
 msgid "workflow runs"
 msgstr ""

-#: documents/serialisers.py:463 documents/serialisers.py:2470
+#: documents/serialisers.py:463 documents/serialisers.py:815
+#: documents/serialisers.py:2501 documents/views.py:1990
+#: paperless_mail/serialisers.py:143
 msgid "Insufficient permissions."
 msgstr ""

@@ -1307,39 +1309,39 @@ msgstr ""
 msgid "Invalid color."
 msgstr ""

-#: documents/serialisers.py:2093
+#: documents/serialisers.py:2124
 #, python-format
 msgid "File type %(type)s not supported"
 msgstr ""

-#: documents/serialisers.py:2137
+#: documents/serialisers.py:2168
 #, python-format
 msgid "Custom field id must be an integer: %(id)s"
 msgstr ""

-#: documents/serialisers.py:2144
+#: documents/serialisers.py:2175
 #, python-format
 msgid "Custom field with id %(id)s does not exist"
 msgstr ""

-#: documents/serialisers.py:2161 documents/serialisers.py:2171
+#: documents/serialisers.py:2192 documents/serialisers.py:2202
 msgid ""
 "Custom fields must be a list of integers or an object mapping ids to values."
 msgstr ""

-#: documents/serialisers.py:2166
+#: documents/serialisers.py:2197
 msgid "Some custom fields don't exist or were specified twice."
 msgstr ""

-#: documents/serialisers.py:2313
+#: documents/serialisers.py:2344
 msgid "Invalid variable detected."
 msgstr ""

-#: documents/serialisers.py:2526
+#: documents/serialisers.py:2557
 msgid "Duplicate document identifiers are not allowed."
 msgstr ""

-#: documents/serialisers.py:2556 documents/views.py:3565
+#: documents/serialisers.py:2587 documents/views.py:3596
 #, python-format
 msgid "Documents not found: %(ids)s"
 msgstr ""
@@ -1603,20 +1605,24 @@ msgstr ""
 msgid "Unable to parse URI {value}"
 msgstr ""

-#: documents/views.py:3577
+#: documents/views.py:1983
+msgid "Invalid more_like_id"
+msgstr ""
+
+#: documents/views.py:3608
 #, python-format
 msgid "Insufficient permissions to share document %(id)s."
 msgstr ""

-#: documents/views.py:3620
+#: documents/views.py:3651
 msgid "Bundle is already being processed."
 msgstr ""

-#: documents/views.py:3677
+#: documents/views.py:3708
 msgid "The share link bundle is still being prepared. Please try again later."
 msgstr ""

-#: documents/views.py:3687
+#: documents/views.py:3718
 msgid "The share link bundle is unavailable."
 msgstr ""

@@ -1856,151 +1862,151 @@ msgstr ""
 msgid "paperless application settings"
 msgstr ""

-#: paperless/settings/__init__.py:521
+#: paperless/settings/__init__.py:518
 msgid "English (US)"
 msgstr ""

-#: paperless/settings/__init__.py:522
+#: paperless/settings/__init__.py:519
 msgid "Arabic"
 msgstr ""

-#: paperless/settings/__init__.py:523
+#: paperless/settings/__init__.py:520
 msgid "Afrikaans"
 msgstr ""

-#: paperless/settings/__init__.py:524
+#: paperless/settings/__init__.py:521
 msgid "Belarusian"
 msgstr ""

-#: paperless/settings/__init__.py:525
+#: paperless/settings/__init__.py:522
 msgid "Bulgarian"
 msgstr ""

-#: paperless/settings/__init__.py:526
+#: paperless/settings/__init__.py:523
 msgid "Catalan"
 msgstr ""

-#: paperless/settings/__init__.py:527
+#: paperless/settings/__init__.py:524
 msgid "Czech"
 msgstr ""

-#: paperless/settings/__init__.py:528
+#: paperless/settings/__init__.py:525
 msgid "Danish"
 msgstr ""

-#: paperless/settings/__init__.py:529
+#: paperless/settings/__init__.py:526
 msgid "German"
 msgstr ""

-#: paperless/settings/__init__.py:530
+#: paperless/settings/__init__.py:527
 msgid "Greek"
 msgstr ""

-#: paperless/settings/__init__.py:531
+#: paperless/settings/__init__.py:528
 msgid "English (GB)"
 msgstr ""

-#: paperless/settings/__init__.py:532
+#: paperless/settings/__init__.py:529
 msgid "Spanish"
 msgstr ""

-#: paperless/settings/__init__.py:533
+#: paperless/settings/__init__.py:530
 msgid "Persian"
 msgstr ""

-#: paperless/settings/__init__.py:534
+#: paperless/settings/__init__.py:531
 msgid "Finnish"
 msgstr ""

-#: paperless/settings/__init__.py:535
+#: paperless/settings/__init__.py:532
 msgid "French"
 msgstr ""

-#: paperless/settings/__init__.py:536
+#: paperless/settings/__init__.py:533
 msgid "Hungarian"
 msgstr ""

-#: paperless/settings/__init__.py:537
+#: paperless/settings/__init__.py:534
 msgid "Indonesian"
 msgstr ""

-#: paperless/settings/__init__.py:538
+#: paperless/settings/__init__.py:535
 msgid "Italian"
 msgstr ""

-#: paperless/settings/__init__.py:539
+#: paperless/settings/__init__.py:536
 msgid "Japanese"
 msgstr ""

-#: paperless/settings/__init__.py:540
+#: paperless/settings/__init__.py:537
 msgid "Korean"
 msgstr ""

-#: paperless/settings/__init__.py:541
+#: paperless/settings/__init__.py:538
 msgid "Luxembourgish"
 msgstr ""

-#: paperless/settings/__init__.py:542
+#: paperless/settings/__init__.py:539
 msgid "Norwegian"
 msgstr ""

-#: paperless/settings/__init__.py:543
+#: paperless/settings/__init__.py:540
 msgid "Dutch"
 msgstr ""

-#: paperless/settings/__init__.py:544
+#: paperless/settings/__init__.py:541
 msgid "Polish"
 msgstr ""

-#: paperless/settings/__init__.py:545
+#: paperless/settings/__init__.py:542
 msgid "Portuguese (Brazil)"
 msgstr ""

-#: paperless/settings/__init__.py:546
+#: paperless/settings/__init__.py:543
 msgid "Portuguese"
 msgstr ""

-#: paperless/settings/__init__.py:547
+#: paperless/settings/__init__.py:544
 msgid "Romanian"
 msgstr ""

-#: paperless/settings/__init__.py:548
+#: paperless/settings/__init__.py:545
 msgid "Russian"
 msgstr ""

-#: paperless/settings/__init__.py:549
+#: paperless/settings/__init__.py:546
 msgid "Slovak"
 msgstr ""

-#: paperless/settings/__init__.py:550
+#: paperless/settings/__init__.py:547
 msgid "Slovenian"
 msgstr ""

-#: paperless/settings/__init__.py:551
+#: paperless/settings/__init__.py:548
 msgid "Serbian"
 msgstr ""

-#: paperless/settings/__init__.py:552
+#: paperless/settings/__init__.py:549
 msgid "Swedish"
 msgstr ""

-#: paperless/settings/__init__.py:553
+#: paperless/settings/__init__.py:550
 msgid "Turkish"
 msgstr ""

-#: paperless/settings/__init__.py:554
+#: paperless/settings/__init__.py:551
 msgid "Ukrainian"
 msgstr ""

-#: paperless/settings/__init__.py:555
+#: paperless/settings/__init__.py:552
 msgid "Vietnamese"
 msgstr ""

-#: paperless/settings/__init__.py:556
+#: paperless/settings/__init__.py:553
 msgid "Chinese Simplified"
 msgstr ""

-#: paperless/settings/__init__.py:557
+#: paperless/settings/__init__.py:554
 msgid "Chinese Traditional"
 msgstr ""

@@ -2046,7 +2052,7 @@ msgid ""
 "process all matching rules that you have defined."
 msgstr ""

-#: paperless_mail/apps.py:11
+#: paperless_mail/apps.py:8
 msgid "Paperless mail"
 msgstr ""

--- a/src/paperless/auth.py
+++ b/src/paperless/auth.py
@@ -83,3 +83,11 @@ class PaperlessBasicAuthentication(authentication.BasicAuthentication):
            raise exceptions.AuthenticationFailed("MFA required")

        return user_tuple
+
+    def authenticate_header(self, request):
+        auth_header = request.META.get("HTTP_AUTHORIZATION", "")
+        if auth_header.lower().startswith("basic "):
+            return super().authenticate_header(request)
+
+        # Still 401 for anonymous API access
+        return authentication.TokenAuthentication.keyword
--- a/src/paperless/checks.py
+++ b/src/paperless/checks.py
@@ -3,6 +3,7 @@ import os
 import pwd
 import shutil
 import stat
+import subprocess
 from pathlib import Path

 from django.conf import settings
@@ -299,3 +300,62 @@ def check_deprecated_db_settings(
        )

    return warnings
+
+
+@register()
+def check_remote_parser_configured(app_configs, **kwargs) -> list[Error]:
+    if settings.REMOTE_OCR_ENGINE == "azureai" and not (
+        settings.REMOTE_OCR_ENDPOINT and settings.REMOTE_OCR_API_KEY
+    ):
+        return [
+            Error(
+                "Azure AI remote parser requires endpoint and API key to be configured.",
+            ),
+        ]
+
+    return []
+
+
+def get_tesseract_langs():
+    proc = subprocess.run(
+        [shutil.which("tesseract"), "--list-langs"],
+        capture_output=True,
+    )
+
+    # Decode bytes to string, split on newlines, trim out the header
+    proc_lines = proc.stdout.decode("utf8", errors="ignore").strip().split("\n")[1:]
+
+    return [x.strip() for x in proc_lines]
+
+
+@register()
+def check_default_language_available(app_configs, **kwargs):
+    errs = []
+
+    if not settings.OCR_LANGUAGE:
+        errs.append(
+            Warning(
+                "No OCR language has been specified with PAPERLESS_OCR_LANGUAGE. "
+                "This means that tesseract will fallback to english.",
+            ),
+        )
+        return errs
+
+    # binaries_check in paperless will check and report if this doesn't exist
+    # So skip trying to do anything here and let that handle missing binaries
+    if shutil.which("tesseract") is not None:
+        installed_langs = get_tesseract_langs()
+
+        specified_langs = [x.strip() for x in settings.OCR_LANGUAGE.split("+")]
+
+        for lang in specified_langs:
+            if lang not in installed_langs:
+                errs.append(
+                    Error(
+                        f"The selected ocr language {lang} is "
+                        f"not installed. Paperless cannot OCR your documents "
+                        f"without it. Please fix PAPERLESS_OCR_LANGUAGE.",
+                    ),
+                )
+
+    return errs
--- a/src/paperless/config.py
+++ b/src/paperless/config.py
@@ -188,6 +188,7 @@ class AIConfig(BaseConfig):
    llm_model: str = dataclasses.field(init=False)
    llm_api_key: str = dataclasses.field(init=False)
    llm_endpoint: str = dataclasses.field(init=False)
+    llm_allow_internal_endpoints: bool = dataclasses.field(init=False)

    def __post_init__(self) -> None:
        app_config = self._get_config_instance()
@@ -203,6 +204,7 @@ class AIConfig(BaseConfig):
        self.llm_model = app_config.llm_model or settings.LLM_MODEL
        self.llm_api_key = app_config.llm_api_key or settings.LLM_API_KEY
        self.llm_endpoint = app_config.llm_endpoint or settings.LLM_ENDPOINT
+        self.llm_allow_internal_endpoints = settings.LLM_ALLOW_INTERNAL_ENDPOINTS

    @property
    def llm_index_enabled(self) -> bool:
--- a/src/paperless/network.py
+++ b/src/paperless/network.py
@@ -0,0 +1,76 @@
+import ipaddress
+import socket
+from collections.abc import Collection
+from urllib.parse import ParseResult
+from urllib.parse import urlparse
+
+
+def is_public_ip(ip: str | int) -> bool:
+    try:
+        obj = ipaddress.ip_address(ip)
+        return not (
+            obj.is_private
+            or obj.is_loopback
+            or obj.is_link_local
+            or obj.is_multicast
+            or obj.is_unspecified
+        )
+    except ValueError:  # pragma: no cover
+        return False
+
+
+def resolve_hostname_ips(hostname: str) -> list[str]:
+    try:
+        addr_info = socket.getaddrinfo(hostname, None)
+    except socket.gaierror as e:
+        raise ValueError(f"Could not resolve hostname: {hostname}") from e
+
+    ips = [info[4][0] for info in addr_info if info and info[4]]
+    if not ips:
+        raise ValueError(f"Could not resolve hostname: {hostname}")
+    return ips
+
+
+def format_host_for_url(host: str) -> str:
+    """
+    Format IP address for URL use (wrap IPv6 in brackets).
+    """
+    try:
+        ip_obj = ipaddress.ip_address(host)
+        if ip_obj.version == 6:
+            return f"[{host}]"
+        return host
+    except ValueError:
+        return host
+
+
+def validate_outbound_http_url(
+    url: str,
+    *,
+    allowed_schemes: Collection[str] = ("http", "https"),
+    allowed_ports: Collection[int] | None = None,
+    allow_internal: bool = False,
+) -> ParseResult:
+    parsed = urlparse(url)
+    scheme = parsed.scheme.lower()
+
+    if scheme not in allowed_schemes or not parsed.hostname:
+        raise ValueError("Invalid URL scheme or hostname.")
+
+    default_port = 443 if scheme == "https" else 80
+    try:
+        port = parsed.port or default_port
+    except ValueError as e:
+        raise ValueError("Invalid URL scheme or hostname.") from e
+
+    if allowed_ports and port not in allowed_ports:
+        raise ValueError("Destination port not permitted.")
+
+    if not allow_internal:
+        for ip_str in resolve_hostname_ips(parsed.hostname):
+            if not is_public_ip(ip_str):
+                raise ValueError(
+                    f"Connection blocked: {parsed.hostname} resolves to a non-public address",
+                )
+
+    return parsed
--- a/src/paperless/parsers/init.py
+++ b/src/paperless/parsers/init.py
@@ -35,6 +35,7 @@ Usage example (third-party parser)::

 from __future__ import annotations

+from dataclasses import dataclass
 from typing import TYPE_CHECKING
 from typing import Protocol
 from typing import Self
@@ -48,6 +49,7 @@ if TYPE_CHECKING:

 __all__ = [
    "MetadataEntry",
+    "ParserContext",
    "ParserProtocol",
 ]

@@ -73,6 +75,44 @@ class MetadataEntry(TypedDict):
    """String representation of the field value."""


+@dataclass(frozen=True, slots=True)
+class ParserContext:
+    """Immutable context passed to a parser before parse().
+
+    The consumer assembles this from the ingestion event and Django
+    settings, then calls ``parser.configure(context)`` before
+    ``parser.parse()``.  Parsers read only the fields relevant to them;
+    unneeded fields are ignored.
+
+    ``frozen=True`` prevents accidental mutation after the consumer
+    hands the context off.  ``slots=True`` keeps instances lightweight.
+
+    Fields
+    ------
+    mailrule_id : int | None
+        Primary key of the ``MailRule`` that triggered this ingestion,
+        or ``None`` when the document did not arrive via a mail rule.
+        Used by ``MailDocumentParser`` to select the PDF layout.
+
+    Notes
+    -----
+    Future fields (not yet implemented):
+
+    * ``output_type`` — PDF/A variant for archive generation
+      (replaces ``settings.OCR_OUTPUT_TYPE`` reads inside parsers).
+    * ``ocr_mode`` — skip-text, redo, force, etc.
+      (replaces ``settings.OCR_MODE`` reads inside parsers).
+    * ``ocr_language`` — Tesseract language string.
+      (replaces ``settings.OCR_LANGUAGE`` reads inside parsers).
+
+    When those fields are added the consumer will read from Django
+    settings once and populate them here, decoupling parsers from
+    ``settings.*`` entirely.
+    """
+
+    mailrule_id: int | None = None
+
+
@runtime_checkable
 class ParserProtocol(Protocol):
    """Structural contract for all Paperless-ngx document parsers.
@@ -191,6 +231,21 @@ class ParserProtocol(Protocol):
    # Core parsing interface
    # ------------------------------------------------------------------

+    def configure(self, context: ParserContext) -> None:
+        """Apply source context before parse().
+
+        Called by the consumer after instantiation and before parse().
+        The default implementation is a no-op; parsers override only the
+        fields they need.
+
+        Parameters
+        ----------
+        context:
+            Immutable context assembled by the consumer for this
+            specific ingestion event.
+        """
+        ...
+
    def parse(
        self,
        document_path: Path,
--- a/src/paperless/parsers/mail.py
+++ b/src/paperless/parsers/mail.py
@@ -0,0 +1,834 @@
+"""
+Built-in mail document parser.
+
+Handles message/rfc822 (EML) MIME type by:
+- Parsing the email using imap_tools
+- Generating a PDF via Gotenberg (for display and archive)
+- Extracting text via Tika for HTML content
+- Extracting metadata from email headers
+
+The parser always produces a PDF because EML files cannot be rendered
+natively in a browser (requires_pdf_rendition=True).
+"""
+
+from __future__ import annotations
+
+import logging
+import re
+import shutil
+import tempfile
+from html import escape
+from pathlib import Path
+from typing import TYPE_CHECKING
+from typing import Self
+
+from bleach import clean
+from bleach import linkify
+from django.conf import settings
+from django.utils import timezone
+from django.utils.timezone import is_naive
+from django.utils.timezone import make_aware
+from gotenberg_client import GotenbergClient
+from gotenberg_client.constants import A4
+from gotenberg_client.options import Measurement
+from gotenberg_client.options import MeasurementUnitType
+from gotenberg_client.options import PageMarginsType
+from gotenberg_client.options import PdfAFormat
+from humanize import naturalsize
+from imap_tools import MailAttachment
+from imap_tools import MailMessage
+from tika_client import TikaClient
+
+from documents.parsers import ParseError
+from documents.parsers import make_thumbnail_from_pdf
+from paperless.models import OutputTypeChoices
+from paperless.version import __full_version_str__
+from paperless_mail.models import MailRule
+
+if TYPE_CHECKING:
+    import datetime
+    from types import TracebackType
+
+    from paperless.parsers import MetadataEntry
+    from paperless.parsers import ParserContext
+
+logger = logging.getLogger("paperless.parsing.mail")
+
+_SUPPORTED_MIME_TYPES: dict[str, str] = {
+    "message/rfc822": ".eml",
+}
+
+
+class MailDocumentParser:
+    """Parse .eml email files for Paperless-ngx.
+
+    Uses imap_tools to parse .eml files, generates a PDF using Gotenberg,
+    and sends the HTML part to a Tika server for text extraction.  Because
+    EML files cannot be rendered natively in a browser, the parser always
+    produces a PDF rendition (requires_pdf_rendition=True).
+
+    Pass a ``ParserContext`` to ``configure()`` before ``parse()`` to
+    apply mail-rule-specific PDF layout options:
+
+        parser.configure(ParserContext(mailrule_id=rule.pk))
+        parser.parse(path, mime_type)
+
+    Class attributes
+    ----------------
+    name : str
+        Human-readable parser name.
+    version : str
+        Semantic version string, kept in sync with Paperless-ngx releases.
+    author : str
+        Maintainer name.
+    url : str
+        Issue tracker / source URL.
+    """
+
+    name: str = "Paperless-ngx Mail Parser"
+    version: str = __full_version_str__
+    author: str = "Paperless-ngx Contributors"
+    url: str = "https://github.com/paperless-ngx/paperless-ngx"
+
+    # ------------------------------------------------------------------
+    # Class methods
+    # ------------------------------------------------------------------
+
+    @classmethod
+    def supported_mime_types(cls) -> dict[str, str]:
+        """Return the MIME types this parser handles.
+
+        Returns
+        -------
+        dict[str, str]
+            Mapping of MIME type to preferred file extension.
+        """
+        return _SUPPORTED_MIME_TYPES
+
+    @classmethod
+    def score(
+        cls,
+        mime_type: str,
+        filename: str,
+        path: Path | None = None,
+    ) -> int | None:
+        """Return the priority score for handling this file.
+
+        Parameters
+        ----------
+        mime_type:
+            Detected MIME type of the file.
+        filename:
+            Original filename including extension.
+        path:
+            Optional filesystem path. Not inspected by this parser.
+
+        Returns
+        -------
+        int | None
+            10 if the MIME type is supported, otherwise None.
+        """
+        if mime_type in _SUPPORTED_MIME_TYPES:
+            return 10
+        return None
+
+    # ------------------------------------------------------------------
+    # Properties
+    # ------------------------------------------------------------------
+
+    @property
+    def can_produce_archive(self) -> bool:
+        """Whether this parser can produce a searchable PDF archive copy.
+
+        Returns
+        -------
+        bool
+            Always False — the mail parser produces a display PDF
+            (requires_pdf_rendition=True), not an optional OCR archive.
+        """
+        return False
+
+    @property
+    def requires_pdf_rendition(self) -> bool:
+        """Whether the parser must produce a PDF for the frontend to display.
+
+        Returns
+        -------
+        bool
+            Always True — EML files cannot be rendered natively in a browser,
+            so a PDF conversion is always required for display.
+        """
+        return True
+
+    # ------------------------------------------------------------------
+    # Lifecycle
+    # ------------------------------------------------------------------
+
+    def __init__(self, logging_group: object = None) -> None:
+        settings.SCRATCH_DIR.mkdir(parents=True, exist_ok=True)
+        self._tempdir = Path(
+            tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR),
+        )
+        self._text: str | None = None
+        self._date: datetime.datetime | None = None
+        self._archive_path: Path | None = None
+        self._mailrule_id: int | None = None
+
+    def __enter__(self) -> Self:
+        return self
+
+    def __exit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc_val: BaseException | None,
+        exc_tb: TracebackType | None,
+    ) -> None:
+        logger.debug("Cleaning up temporary directory %s", self._tempdir)
+        shutil.rmtree(self._tempdir, ignore_errors=True)
+
+    # ------------------------------------------------------------------
+    # Core parsing interface
+    # ------------------------------------------------------------------
+
+    def configure(self, context: ParserContext) -> None:
+        self._mailrule_id = context.mailrule_id
+
+    def parse(
+        self,
+        document_path: Path,
+        mime_type: str,
+        *,
+        produce_archive: bool = True,
+    ) -> None:
+        """Parse the given .eml into formatted text and a PDF archive.
+
+        Call ``configure(ParserContext(mailrule_id=...))`` before this method
+        to apply mail-rule-specific PDF layout options.  The ``produce_archive``
+        flag is accepted for protocol compatibility but is always honoured —
+        the mail parser always produces a PDF since EML files cannot be
+        displayed natively.
+
+        Parameters
+        ----------
+        document_path:
+            Absolute path to the .eml file.
+        mime_type:
+            Detected MIME type of the document (should be "message/rfc822").
+        produce_archive:
+            Accepted for protocol compatibility. The PDF rendition is always
+            produced since EML files cannot be displayed natively in a browser.
+
+        Raises
+        ------
+        documents.parsers.ParseError
+            If the file cannot be parsed or PDF generation fails.
+        """
+
+        def strip_text(text: str) -> str:
+            """Reduces the spacing of the given text string."""
+            text = re.sub(r"\s+", " ", text)
+            text = re.sub(r"(\n *)+", "\n", text)
+            return text.strip()
+
+        def build_formatted_text(mail_message: MailMessage) -> str:
+            """Constructs a formatted string based on the given email."""
+            fmt_text = f"Subject: {mail_message.subject}\n\n"
+            fmt_text += f"From: {mail_message.from_values.full if mail_message.from_values else ''}\n\n"
+            to_list = [address.full for address in mail_message.to_values]
+            fmt_text += f"To: {', '.join(to_list)}\n\n"
+            if mail_message.cc_values:
+                fmt_text += (
+                    f"CC: {', '.join(address.full for address in mail.cc_values)}\n\n"
+                )
+            if mail_message.bcc_values:
+                fmt_text += (
+                    f"BCC: {', '.join(address.full for address in mail.bcc_values)}\n\n"
+                )
+            if mail_message.attachments:
+                att = []
+                for a in mail.attachments:
+                    attachment_size = naturalsize(a.size, binary=True, format="%.2f")
+                    att.append(
+                        f"{a.filename} ({attachment_size})",
+                    )
+                fmt_text += f"Attachments: {', '.join(att)}\n\n"
+
+            if mail.html:
+                fmt_text += "HTML content: " + strip_text(self.tika_parse(mail.html))
+
+            fmt_text += f"\n\n{strip_text(mail.text)}"
+
+            return fmt_text
+
+        logger.debug("Parsing file %s into an email", document_path.name)
+        mail = self.parse_file_to_message(document_path)
+
+        logger.debug("Building formatted text from email")
+        self._text = build_formatted_text(mail)
+
+        if is_naive(mail.date):
+            self._date = make_aware(mail.date)
+        else:
+            self._date = mail.date
+
+        logger.debug("Creating a PDF from the email")
+        if self._mailrule_id:
+            rule = MailRule.objects.get(pk=self._mailrule_id)
+            self._archive_path = self.generate_pdf(
+                mail,
+                MailRule.PdfLayout(rule.pdf_layout),
+            )
+        else:
+            self._archive_path = self.generate_pdf(mail)
+
+    # ------------------------------------------------------------------
+    # Result accessors
+    # ------------------------------------------------------------------
+
+    def get_text(self) -> str | None:
+        """Return the plain-text content extracted during parse.
+
+        Returns
+        -------
+        str | None
+            Extracted text, or None if parse has not been called yet.
+        """
+        return self._text
+
+    def get_date(self) -> datetime.datetime | None:
+        """Return the document date detected during parse.
+
+        Returns
+        -------
+        datetime.datetime | None
+            Date from the email headers, or None if not detected.
+        """
+        return self._date
+
+    def get_archive_path(self) -> Path | None:
+        """Return the path to the generated archive PDF, or None.
+
+        Returns
+        -------
+        Path | None
+            Path to the PDF produced by Gotenberg, or None if parse has not
+            been called yet.
+        """
+        return self._archive_path
+
+    # ------------------------------------------------------------------
+    # Thumbnail and metadata
+    # ------------------------------------------------------------------
+
+    def get_thumbnail(
+        self,
+        document_path: Path,
+        mime_type: str,
+        file_name: str | None = None,
+    ) -> Path:
+        """Generate a thumbnail from the PDF rendition of the email.
+
+        Converts the document to PDF first if not already done.
+
+        Parameters
+        ----------
+        document_path:
+            Absolute path to the source document.
+        mime_type:
+            Detected MIME type of the document.
+        file_name:
+            Kept for backward compatibility; not used.
+
+        Returns
+        -------
+        Path
+            Path to the generated WebP thumbnail inside the temporary directory.
+        """
+        if not self._archive_path:
+            self._archive_path = self.generate_pdf(
+                self.parse_file_to_message(document_path),
+            )
+
+        return make_thumbnail_from_pdf(
+            self._archive_path,
+            self._tempdir,
+        )
+
+    def get_page_count(
+        self,
+        document_path: Path,
+        mime_type: str,
+    ) -> int | None:
+        """Return the number of pages in the document.
+
+        Counts pages in the archive PDF produced by a preceding parse()
+        call.  Returns ``None`` if parse() has not been called yet or if
+        no archive was produced.
+
+        Returns
+        -------
+        int | None
+            Page count of the archive PDF, or ``None``.
+        """
+        if self._archive_path is not None:
+            from paperless.parsers.utils import get_page_count_for_pdf
+
+            return get_page_count_for_pdf(self._archive_path, log=logger)
+        return None
+
+    def extract_metadata(
+        self,
+        document_path: Path,
+        mime_type: str,
+    ) -> list[MetadataEntry]:
+        """Extract metadata from the email headers.
+
+        Returns email headers as metadata entries with prefix "header",
+        plus summary entries for attachments and date.
+
+        Returns
+        -------
+        list[MetadataEntry]
+            Sorted list of metadata entries, or ``[]`` on parse failure.
+        """
+        result: list[MetadataEntry] = []
+
+        try:
+            mail = self.parse_file_to_message(document_path)
+        except ParseError as e:
+            logger.warning(
+                "Error while fetching document metadata for %s: %s",
+                document_path,
+                e,
+            )
+            return result
+
+        for key, header_values in mail.headers.items():
+            value = ", ".join(header_values)
+            try:
+                value.encode("utf-8")
+            except UnicodeEncodeError as e:  # pragma: no cover
+                logger.debug("Skipping header %s: %s", key, e)
+                continue
+
+            result.append(
+                {
+                    "namespace": "",
+                    "prefix": "header",
+                    "key": key,
+                    "value": value,
+                },
+            )
+
+        result.append(
+            {
+                "namespace": "",
+                "prefix": "",
+                "key": "attachments",
+                "value": ", ".join(
+                    f"{attachment.filename}"
+                    f"({naturalsize(attachment.size, binary=True, format='%.2f')})"
+                    for attachment in mail.attachments
+                ),
+            },
+        )
+
+        result.append(
+            {
+                "namespace": "",
+                "prefix": "",
+                "key": "date",
+                "value": mail.date.strftime("%Y-%m-%d %H:%M:%S %Z"),
+            },
+        )
+
+        result.sort(key=lambda item: (item["prefix"], item["key"]))
+        return result
+
+    # ------------------------------------------------------------------
+    # Email-specific methods
+    # ------------------------------------------------------------------
+
+    def _settings_to_gotenberg_pdfa(self) -> PdfAFormat | None:
+        """Convert the OCR output type setting to a Gotenberg PdfAFormat."""
+        if settings.OCR_OUTPUT_TYPE in {
+            OutputTypeChoices.PDF_A,
+            OutputTypeChoices.PDF_A2,
+        }:
+            return PdfAFormat.A2b
+        elif settings.OCR_OUTPUT_TYPE == OutputTypeChoices.PDF_A1:  # pragma: no cover
+            logger.warning(
+                "Gotenberg does not support PDF/A-1a, choosing PDF/A-2b instead",
+            )
+            return PdfAFormat.A2b
+        elif settings.OCR_OUTPUT_TYPE == OutputTypeChoices.PDF_A3:  # pragma: no cover
+            return PdfAFormat.A3b
+        return None
+
+    @staticmethod
+    def parse_file_to_message(filepath: Path) -> MailMessage:
+        """Parse the given .eml file into a MailMessage object.
+
+        Parameters
+        ----------
+        filepath:
+            Path to the .eml file.
+
+        Returns
+        -------
+        MailMessage
+            Parsed mail message.
+
+        Raises
+        ------
+        documents.parsers.ParseError
+            If the file cannot be parsed or is missing required fields.
+        """
+        try:
+            with filepath.open("rb") as eml:
+                parsed = MailMessage.from_bytes(eml.read())
+                if parsed.from_values is None:
+                    raise ParseError(
+                        f"Could not parse {filepath}: Missing 'from'",
+                    )
+        except Exception as err:
+            raise ParseError(
+                f"Could not parse {filepath}: {err}",
+            ) from err
+
+        return parsed
+
+    def tika_parse(self, html: str) -> str:
+        """Send HTML content to the Tika server for text extraction.
+
+        Parameters
+        ----------
+        html:
+            HTML string to parse.
+
+        Returns
+        -------
+        str
+            Extracted plain text.
+
+        Raises
+        ------
+        documents.parsers.ParseError
+            If the Tika server cannot be reached or returns an error.
+        """
+        logger.info("Sending content to Tika server")
+
+        try:
+            with TikaClient(tika_url=settings.TIKA_ENDPOINT) as client:
+                parsed = client.tika.as_text.from_buffer(html, "text/html")
+
+                if parsed.content is not None:
+                    return parsed.content.strip()
+                return ""
+        except Exception as err:
+            raise ParseError(
+                f"Could not parse content with tika server at "
+                f"{settings.TIKA_ENDPOINT}: {err}",
+            ) from err
+
+    def generate_pdf(
+        self,
+        mail_message: MailMessage,
+        pdf_layout: MailRule.PdfLayout | None = None,
+    ) -> Path:
+        """Generate a PDF from the email message.
+
+        Creates separate PDFs for the email body and HTML content, then
+        merges them according to the requested layout.
+
+        Parameters
+        ----------
+        mail_message:
+            Parsed email message.
+        pdf_layout:
+            Layout option for the PDF. Falls back to the
+            EMAIL_PARSE_DEFAULT_LAYOUT setting if not provided.
+
+        Returns
+        -------
+        Path
+            Path to the generated PDF inside the temporary directory.
+        """
+        archive_path = Path(self._tempdir) / "merged.pdf"
+
+        mail_pdf_file = self.generate_pdf_from_mail(mail_message)
+
+        if pdf_layout is None:
+            pdf_layout = MailRule.PdfLayout(settings.EMAIL_PARSE_DEFAULT_LAYOUT)
+
+        # If no HTML content, create the PDF from the message.
+        # Otherwise, create 2 PDFs and merge them with Gotenberg.
+        if not mail_message.html:
+            archive_path.write_bytes(mail_pdf_file.read_bytes())
+        else:
+            pdf_of_html_content = self.generate_pdf_from_html(
+                mail_message.html,
+                mail_message.attachments,
+            )
+
+            logger.debug("Merging email text and HTML content into single PDF")
+
+            with (
+                GotenbergClient(
+                    host=settings.TIKA_GOTENBERG_ENDPOINT,
+                    timeout=settings.CELERY_TASK_TIME_LIMIT,
+                ) as client,
+                client.merge.merge() as route,
+            ):
+                # Configure requested PDF/A formatting, if any
+                pdf_a_format = self._settings_to_gotenberg_pdfa()
+                if pdf_a_format is not None:
+                    route.pdf_format(pdf_a_format)
+
+                match pdf_layout:
+                    case MailRule.PdfLayout.HTML_TEXT:
+                        route.merge([pdf_of_html_content, mail_pdf_file])
+                    case MailRule.PdfLayout.HTML_ONLY:
+                        route.merge([pdf_of_html_content])
+                    case MailRule.PdfLayout.TEXT_ONLY:
+                        route.merge([mail_pdf_file])
+                    case MailRule.PdfLayout.TEXT_HTML | _:
+                        route.merge([mail_pdf_file, pdf_of_html_content])
+
+                try:
+                    response = route.run()
+                    archive_path.write_bytes(response.content)
+                except Exception as err:
+                    raise ParseError(
+                        f"Error while merging email HTML into PDF: {err}",
+                    ) from err
+
+        return archive_path
+
+    def mail_to_html(self, mail: MailMessage) -> Path:
+        """Convert the given email into an HTML file using a template.
+
+        Parameters
+        ----------
+        mail:
+            Parsed mail message.
+
+        Returns
+        -------
+        Path
+            Path to the rendered HTML file inside the temporary directory.
+        """
+
+        def clean_html(text: str) -> str:
+            """Attempt to clean, escape, and linkify the given HTML string."""
+            if isinstance(text, list):
+                text = "\n".join([str(e) for e in text])
+            if not isinstance(text, str):
+                text = str(text)
+            text = escape(text)
+            text = clean(text)
+            text = linkify(text, parse_email=True)
+            text = text.replace("\n", "<br>")
+            return text
+
+        data = {}
+
+        data["subject"] = clean_html(mail.subject)
+        if data["subject"]:
+            data["subject_label"] = "Subject"
+        data["from"] = clean_html(mail.from_values.full if mail.from_values else "")
+        if data["from"]:
+            data["from_label"] = "From"
+        data["to"] = clean_html(", ".join(address.full for address in mail.to_values))
+        if data["to"]:
+            data["to_label"] = "To"
+        data["cc"] = clean_html(", ".join(address.full for address in mail.cc_values))
+        if data["cc"]:
+            data["cc_label"] = "CC"
+        data["bcc"] = clean_html(", ".join(address.full for address in mail.bcc_values))
+        if data["bcc"]:
+            data["bcc_label"] = "BCC"
+
+        att = []
+        for a in mail.attachments:
+            att.append(
+                f"{a.filename} ({naturalsize(a.size, binary=True, format='%.2f')})",
+            )
+        data["attachments"] = clean_html(", ".join(att))
+        if data["attachments"]:
+            data["attachments_label"] = "Attachments"
+
+        data["date"] = clean_html(
+            timezone.localtime(mail.date).strftime("%Y-%m-%d %H:%M"),
+        )
+        data["content"] = clean_html(mail.text.strip())
+
+        from django.template.loader import render_to_string
+
+        html_file = Path(self._tempdir) / "email_as_html.html"
+        html_file.write_text(render_to_string("email_msg_template.html", context=data))
+
+        return html_file
+
+    def generate_pdf_from_mail(self, mail: MailMessage) -> Path:
+        """Create a PDF from the email body using an HTML template and Gotenberg.
+
+        Parameters
+        ----------
+        mail:
+            Parsed mail message.
+
+        Returns
+        -------
+        Path
+            Path to the generated PDF inside the temporary directory.
+
+        Raises
+        ------
+        documents.parsers.ParseError
+            If Gotenberg returns an error.
+        """
+        logger.info("Converting mail to PDF")
+
+        css_file = (
+            Path(__file__).parent.parent.parent
+            / "paperless_mail"
+            / "templates"
+            / "output.css"
+        )
+        email_html_file = self.mail_to_html(mail)
+
+        with (
+            GotenbergClient(
+                host=settings.TIKA_GOTENBERG_ENDPOINT,
+                timeout=settings.CELERY_TASK_TIME_LIMIT,
+            ) as client,
+            client.chromium.html_to_pdf() as route,
+        ):
+            # Configure requested PDF/A formatting, if any
+            pdf_a_format = self._settings_to_gotenberg_pdfa()
+            if pdf_a_format is not None:
+                route.pdf_format(pdf_a_format)
+
+            try:
+                response = (
+                    route.index(email_html_file)
+                    .resource(css_file)
+                    .margins(
+                        PageMarginsType(
+                            top=Measurement(0.1, MeasurementUnitType.Inches),
+                            bottom=Measurement(0.1, MeasurementUnitType.Inches),
+                            left=Measurement(0.1, MeasurementUnitType.Inches),
+                            right=Measurement(0.1, MeasurementUnitType.Inches),
+                        ),
+                    )
+                    .size(A4)
+                    .scale(1.0)
+                    .run()
+                )
+            except Exception as err:
+                raise ParseError(
+                    f"Error while converting email to PDF: {err}",
+                ) from err
+
+        email_as_pdf_file = Path(self._tempdir) / "email_as_pdf.pdf"
+        email_as_pdf_file.write_bytes(response.content)
+
+        return email_as_pdf_file
+
+    def generate_pdf_from_html(
+        self,
+        orig_html: str,
+        attachments: list[MailAttachment],
+    ) -> Path:
+        """Generate a PDF from the HTML content of the email.
+
+        Parameters
+        ----------
+        orig_html:
+            Raw HTML string from the email body.
+        attachments:
+            List of email attachments (used as inline resources).
+
+        Returns
+        -------
+        Path
+            Path to the generated PDF inside the temporary directory.
+
+        Raises
+        ------
+        documents.parsers.ParseError
+            If Gotenberg returns an error.
+        """
+
+        def clean_html_script(text: str) -> str:
+            compiled_open = re.compile(re.escape("<script"), re.IGNORECASE)
+            text = compiled_open.sub("<div hidden ", text)
+
+            compiled_close = re.compile(re.escape("</script"), re.IGNORECASE)
+            text = compiled_close.sub("</div", text)
+            return text
+
+        logger.info("Converting message html to PDF")
+
+        tempdir = Path(self._tempdir)
+
+        html_clean = clean_html_script(orig_html)
+        html_clean_file = tempdir / "index.html"
+        html_clean_file.write_text(html_clean)
+
+        with (
+            GotenbergClient(
+                host=settings.TIKA_GOTENBERG_ENDPOINT,
+                timeout=settings.CELERY_TASK_TIME_LIMIT,
+            ) as client,
+            client.chromium.html_to_pdf() as route,
+        ):
+            # Configure requested PDF/A formatting, if any
+            pdf_a_format = self._settings_to_gotenberg_pdfa()
+            if pdf_a_format is not None:
+                route.pdf_format(pdf_a_format)
+
+            # Add attachments as resources, cleaning the filename and replacing
+            # it in the index file for inclusion
+            for attachment in attachments:
+                # Clean the attachment name to be valid
+                name_cid = f"cid:{attachment.content_id}"
+                name_clean = "".join(e for e in name_cid if e.isalnum())
+
+                # Write attachment payload to a temp file
+                temp_file = tempdir / name_clean
+                temp_file.write_bytes(attachment.payload)
+
+                route.resource(temp_file)
+
+                # Replace as needed the name with the clean name
+                html_clean = html_clean.replace(name_cid, name_clean)
+
+            # Now store the cleaned up HTML version
+            html_clean_file = tempdir / "index.html"
+            html_clean_file.write_text(html_clean)
+            # This is our index file, the main page basically
+            route.index(html_clean_file)
+
+            # Set page size, margins
+            route.margins(
+                PageMarginsType(
+                    top=Measurement(0.1, MeasurementUnitType.Inches),
+                    bottom=Measurement(0.1, MeasurementUnitType.Inches),
+                    left=Measurement(0.1, MeasurementUnitType.Inches),
+                    right=Measurement(0.1, MeasurementUnitType.Inches),
+                ),
+            ).size(A4).scale(1.0)
+
+            try:
+                response = route.run()
+
+            except Exception as err:
+                raise ParseError(
+                    f"Error while converting document to PDF: {err}",
+                ) from err
+
+        html_pdf = tempdir / "html.pdf"
+        html_pdf.write_bytes(response.content)
+        return html_pdf
--- a/src/paperless/parsers/registry.py
+++ b/src/paperless/parsers/registry.py
@@ -33,6 +33,7 @@ name, version, author, url, supported_mime_types (callable), score (callable).
 from __future__ import annotations

 import logging
+import threading
 from importlib.metadata import entry_points
 from typing import TYPE_CHECKING

@@ -49,6 +50,7 @@ logger = logging.getLogger("paperless.parsers.registry")

 _registry: ParserRegistry | None = None
 _discovery_complete: bool = False
+_lock = threading.Lock()

 # Attribute names that every registered external parser class must expose.
 _REQUIRED_ATTRS: tuple[str, ...] = (
@@ -74,7 +76,6 @@ def get_parser_registry() -> ParserRegistry:
    1. Creates a new ParserRegistry.
    2. Calls register_defaults to install built-in parsers.
    3. Calls discover to load third-party plugins via importlib.metadata entrypoints.
-    4. Calls log_summary to emit a startup summary.

    Subsequent calls return the same instance immediately.

@@ -85,14 +86,15 @@ def get_parser_registry() -> ParserRegistry:
    """
    global _registry, _discovery_complete

-    if _registry is None:
-        _registry = ParserRegistry()
-        _registry.register_defaults()
+    with _lock:
+        if _registry is None:
+            r = ParserRegistry()
+            r.register_defaults()
+            _registry = r

-    if not _discovery_complete:
-        _registry.discover()
-        _registry.log_summary()
-        _discovery_complete = True
+        if not _discovery_complete:
+            _registry.discover()
+            _discovery_complete = True

    return _registry

@@ -113,9 +115,11 @@ def init_builtin_parsers() -> None:
    """
    global _registry

-    if _registry is None:
-        _registry = ParserRegistry()
-        _registry.register_defaults()
+    with _lock:
+        if _registry is None:
+            r = ParserRegistry()
+            r.register_defaults()
+            _registry = r


 def reset_parser_registry() -> None:
@@ -193,9 +197,17 @@ class ParserRegistry:
        that log output is predictable; scoring determines which parser wins
        at runtime regardless of registration order.
        """
+        from paperless.parsers.mail import MailDocumentParser
+        from paperless.parsers.remote import RemoteDocumentParser
+        from paperless.parsers.tesseract import RasterisedDocumentParser
        from paperless.parsers.text import TextDocumentParser
+        from paperless.parsers.tika import TikaDocumentParser

        self.register_builtin(TextDocumentParser)
+        self.register_builtin(RemoteDocumentParser)
+        self.register_builtin(TikaDocumentParser)
+        self.register_builtin(MailDocumentParser)
+        self.register_builtin(RasterisedDocumentParser)

    # ------------------------------------------------------------------
    # Discovery
@@ -296,6 +308,23 @@ class ParserRegistry:
                getattr(cls, "url", "unknown"),
            )

+    # ------------------------------------------------------------------
+    # Inspection helpers
+    # ------------------------------------------------------------------
+
+    def all_parsers(self) -> list[type[ParserProtocol]]:
+        """Return all registered parser classes (external first, then builtins).
+
+        Used by compatibility wrappers that need to iterate every parser to
+        compute the full set of supported MIME types and file extensions.
+
+        Returns
+        -------
+        list[type[ParserProtocol]]
+            External parsers followed by built-in parsers.
+        """
+        return [*self._external, *self._builtins]
+
    # ------------------------------------------------------------------
    # Parser resolution
    # ------------------------------------------------------------------
@@ -326,7 +355,7 @@ class ParserRegistry:
        mime_type:
            The detected MIME type of the file.
        filename:
-            The original filename, including extension.
+            The original filename, including extension.  May be empty in some cases
        path:
            Optional filesystem path to the file. Forwarded to each
            parser's score method.
--- a/src/paperless/parsers/remote.py
+++ b/src/paperless/parsers/remote.py
@@ -0,0 +1,433 @@
+"""
+Built-in remote-OCR document parser.
+
+Handles documents by sending them to a configured remote OCR engine
+(currently Azure AI Vision / Document Intelligence) and retrieving both
+the extracted text and a searchable PDF with an embedded text layer.
+
+When no engine is configured, ``score()`` returns ``None`` so the parser
+is effectively invisible to the registry — the tesseract parser handles
+these MIME types instead.
+"""
+
+from __future__ import annotations
+
+import logging
+import shutil
+import tempfile
+from pathlib import Path
+from typing import TYPE_CHECKING
+from typing import Self
+
+from django.conf import settings
+
+from paperless.version import __full_version_str__
+
+if TYPE_CHECKING:
+    import datetime
+    from types import TracebackType
+
+    from paperless.parsers import MetadataEntry
+    from paperless.parsers import ParserContext
+
+logger = logging.getLogger("paperless.parsing.remote")
+
+_SUPPORTED_MIME_TYPES: dict[str, str] = {
+    "application/pdf": ".pdf",
+    "image/png": ".png",
+    "image/jpeg": ".jpg",
+    "image/tiff": ".tiff",
+    "image/bmp": ".bmp",
+    "image/gif": ".gif",
+    "image/webp": ".webp",
+}
+
+
+class RemoteEngineConfig:
+    """Holds and validates the remote OCR engine configuration."""
+
+    def __init__(
+        self,
+        engine: str | None,
+        api_key: str | None = None,
+        endpoint: str | None = None,
+    ) -> None:
+        self.engine = engine
+        self.api_key = api_key
+        self.endpoint = endpoint
+
+    def engine_is_valid(self) -> bool:
+        """Return True when the engine is known and fully configured."""
+        return (
+            self.engine in ("azureai",)
+            and self.api_key is not None
+            and not (self.engine == "azureai" and self.endpoint is None)
+        )
+
+
+class RemoteDocumentParser:
+    """Parse documents via a remote OCR API (currently Azure AI Vision).
+
+    This parser sends documents to a remote engine that returns both
+    extracted text and a searchable PDF with an embedded text layer.
+    It does not depend on Tesseract or ocrmypdf.
+
+    Class attributes
+    ----------------
+    name : str
+        Human-readable parser name.
+    version : str
+        Semantic version string, kept in sync with Paperless-ngx releases.
+    author : str
+        Maintainer name.
+    url : str
+        Issue tracker / source URL.
+    """
+
+    name: str = "Paperless-ngx Remote OCR Parser"
+    version: str = __full_version_str__
+    author: str = "Paperless-ngx Contributors"
+    url: str = "https://github.com/paperless-ngx/paperless-ngx"
+
+    # ------------------------------------------------------------------
+    # Class methods
+    # ------------------------------------------------------------------
+
+    @classmethod
+    def supported_mime_types(cls) -> dict[str, str]:
+        """Return the MIME types this parser can handle.
+
+        The full set is always returned regardless of whether a remote
+        engine is configured.  The ``score()`` method handles the
+        "am I active?" logic by returning ``None`` when not configured.
+
+        Returns
+        -------
+        dict[str, str]
+            Mapping of MIME type to preferred file extension.
+        """
+        return _SUPPORTED_MIME_TYPES
+
+    @classmethod
+    def score(
+        cls,
+        mime_type: str,
+        filename: str,
+        path: Path | None = None,
+    ) -> int | None:
+        """Return the priority score for handling this file, or None.
+
+        Returns ``None`` when no valid remote engine is configured,
+        making the parser invisible to the registry for this file.
+        When configured, returns 20 — higher than the Tesseract parser's
+        default of 10 — so the remote engine takes priority.
+
+        Parameters
+        ----------
+        mime_type:
+            Detected MIME type of the file.
+        filename:
+            Original filename including extension.
+        path:
+            Optional filesystem path. Not inspected by this parser.
+
+        Returns
+        -------
+        int | None
+            20 when the remote engine is configured and the MIME type is
+            supported, otherwise None.
+        """
+        config = RemoteEngineConfig(
+            engine=settings.REMOTE_OCR_ENGINE,
+            api_key=settings.REMOTE_OCR_API_KEY,
+            endpoint=settings.REMOTE_OCR_ENDPOINT,
+        )
+        if not config.engine_is_valid():
+            return None
+        if mime_type not in _SUPPORTED_MIME_TYPES:
+            return None
+        return 20
+
+    # ------------------------------------------------------------------
+    # Properties
+    # ------------------------------------------------------------------
+
+    @property
+    def can_produce_archive(self) -> bool:
+        """Whether this parser can produce a searchable PDF archive copy.
+
+        Returns
+        -------
+        bool
+            Always True — the remote engine always returns a PDF with an
+            embedded text layer that serves as the archive copy.
+        """
+        return True
+
+    @property
+    def requires_pdf_rendition(self) -> bool:
+        """Whether the parser must produce a PDF for the frontend to display.
+
+        Returns
+        -------
+        bool
+            Always False — all supported originals are displayable by
+            the browser (PDF) or handled via the archive copy (images).
+        """
+        return False
+
+    # ------------------------------------------------------------------
+    # Lifecycle
+    # ------------------------------------------------------------------
+
+    def __init__(self, logging_group: object = None) -> None:
+        settings.SCRATCH_DIR.mkdir(parents=True, exist_ok=True)
+        self._tempdir = Path(
+            tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR),
+        )
+        self._logging_group = logging_group
+        self._text: str | None = None
+        self._archive_path: Path | None = None
+
+    def __enter__(self) -> Self:
+        return self
+
+    def __exit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc_val: BaseException | None,
+        exc_tb: TracebackType | None,
+    ) -> None:
+        logger.debug("Cleaning up temporary directory %s", self._tempdir)
+        shutil.rmtree(self._tempdir, ignore_errors=True)
+
+    # ------------------------------------------------------------------
+    # Core parsing interface
+    # ------------------------------------------------------------------
+
+    def configure(self, context: ParserContext) -> None:
+        pass
+
+    def parse(
+        self,
+        document_path: Path,
+        mime_type: str,
+        *,
+        produce_archive: bool = True,
+    ) -> None:
+        """Send the document to the remote engine and store results.
+
+        Parameters
+        ----------
+        document_path:
+            Absolute path to the document file to parse.
+        mime_type:
+            Detected MIME type of the document.
+        produce_archive:
+            Ignored — the remote engine always returns a searchable PDF,
+            which is stored as the archive copy regardless of this flag.
+        """
+        config = RemoteEngineConfig(
+            engine=settings.REMOTE_OCR_ENGINE,
+            api_key=settings.REMOTE_OCR_API_KEY,
+            endpoint=settings.REMOTE_OCR_ENDPOINT,
+        )
+
+        if not config.engine_is_valid():
+            logger.warning(
+                "No valid remote parser engine is configured, content will be empty.",
+            )
+            self._text = ""
+            return
+
+        if config.engine == "azureai":
+            self._text = self._azure_ai_vision_parse(document_path, config)
+
+    # ------------------------------------------------------------------
+    # Result accessors
+    # ------------------------------------------------------------------
+
+    def get_text(self) -> str | None:
+        """Return the plain-text content extracted during parse."""
+        return self._text
+
+    def get_date(self) -> datetime.datetime | None:
+        """Return the document date detected during parse.
+
+        Returns
+        -------
+        datetime.datetime | None
+            Always None — the remote parser does not detect dates.
+        """
+        return None
+
+    def get_archive_path(self) -> Path | None:
+        """Return the path to the generated archive PDF, or None."""
+        return self._archive_path
+
+    # ------------------------------------------------------------------
+    # Thumbnail and metadata
+    # ------------------------------------------------------------------
+
+    def get_thumbnail(self, document_path: Path, mime_type: str) -> Path:
+        """Generate a thumbnail image for the document.
+
+        Uses the archive PDF produced by the remote engine when available,
+        otherwise falls back to the original document path (PDF inputs).
+
+        Parameters
+        ----------
+        document_path:
+            Absolute path to the source document.
+        mime_type:
+            Detected MIME type of the document.
+
+        Returns
+        -------
+        Path
+            Path to the generated WebP thumbnail inside the temp directory.
+        """
+        # make_thumbnail_from_pdf lives in documents.parsers for now;
+        # it will move to paperless.parsers.utils when the tesseract
+        # parser is migrated in a later phase.
+        from documents.parsers import make_thumbnail_from_pdf
+
+        return make_thumbnail_from_pdf(
+            self._archive_path or document_path,
+            self._tempdir,
+            self._logging_group,
+        )
+
+    def get_page_count(
+        self,
+        document_path: Path,
+        mime_type: str,
+    ) -> int | None:
+        """Return the number of pages in a PDF document.
+
+        Parameters
+        ----------
+        document_path:
+            Absolute path to the source document.
+        mime_type:
+            Detected MIME type of the document.
+
+        Returns
+        -------
+        int | None
+            Page count for PDF inputs, or ``None`` for other MIME types.
+        """
+        if mime_type != "application/pdf":
+            return None
+
+        from paperless.parsers.utils import get_page_count_for_pdf
+
+        return get_page_count_for_pdf(document_path, log=logger)
+
+    def extract_metadata(
+        self,
+        document_path: Path,
+        mime_type: str,
+    ) -> list[MetadataEntry]:
+        """Extract format-specific metadata from the document.
+
+        Delegates to the shared pikepdf-based extractor for PDF files.
+        Returns ``[]`` for all other MIME types.
+
+        Parameters
+        ----------
+        document_path:
+            Absolute path to the file to extract metadata from.
+        mime_type:
+            MIME type of the file.  May be ``"application/pdf"`` when
+            called for the archive version of an image original.
+
+        Returns
+        -------
+        list[MetadataEntry]
+            Zero or more metadata entries.
+        """
+        if mime_type != "application/pdf":
+            return []
+
+        from paperless.parsers.utils import extract_pdf_metadata
+
+        return extract_pdf_metadata(document_path, log=logger)
+
+    # ------------------------------------------------------------------
+    # Private helpers
+    # ------------------------------------------------------------------
+
+    def _azure_ai_vision_parse(
+        self,
+        file: Path,
+        config: RemoteEngineConfig,
+    ) -> str | None:
+        """Send ``file`` to Azure AI Document Intelligence and return text.
+
+        Downloads the searchable PDF output from Azure and stores it at
+        ``self._archive_path``.  Returns the extracted text content, or
+        ``None`` on failure (the error is logged).
+
+        Parameters
+        ----------
+        file:
+            Absolute path to the document to analyse.
+        config:
+            Validated remote engine configuration.
+
+        Returns
+        -------
+        str | None
+            Extracted text, or None if the Azure call failed.
+        """
+        if TYPE_CHECKING:
+            # Callers must have already validated config via engine_is_valid():
+            # engine_is_valid() asserts api_key is not None and (for azureai)
+            # endpoint is not None, so these casts are provably safe.
+            assert config.endpoint is not None
+            assert config.api_key is not None
+
+        from azure.ai.documentintelligence import DocumentIntelligenceClient
+        from azure.ai.documentintelligence.models import AnalyzeDocumentRequest
+        from azure.ai.documentintelligence.models import AnalyzeOutputOption
+        from azure.ai.documentintelligence.models import DocumentContentFormat
+        from azure.core.credentials import AzureKeyCredential
+
+        client = DocumentIntelligenceClient(
+            endpoint=config.endpoint,
+            credential=AzureKeyCredential(config.api_key),
+        )
+
+        try:
+            with file.open("rb") as f:
+                analyze_request = AnalyzeDocumentRequest(bytes_source=f.read())
+                poller = client.begin_analyze_document(
+                    model_id="prebuilt-read",
+                    body=analyze_request,
+                    output_content_format=DocumentContentFormat.TEXT,
+                    output=[AnalyzeOutputOption.PDF],
+                    content_type="application/json",
+                )
+
+            poller.wait()
+            result_id = poller.details["operation_id"]
+            result = poller.result()
+
+            self._archive_path = self._tempdir / "archive.pdf"
+            with self._archive_path.open("wb") as f:
+                for chunk in client.get_analyze_result_pdf(
+                    model_id="prebuilt-read",
+                    result_id=result_id,
+                ):
+                    f.write(chunk)
+
+            return result.content
+
+        except Exception as e:
+            logger.error("Azure AI Vision parsing failed: %s", e)
+
+        finally:
+            client.close()
+
+        return None
--- a/src/paperless/parsers/tesseract.py
+++ b/src/paperless/parsers/tesseract.py
@@ -1,13 +1,18 @@
+from __future__ import annotations
+
+import logging
 import os
 import re
+import shutil
 import tempfile
 from pathlib import Path
 from typing import TYPE_CHECKING
+from typing import Any
+from typing import Self

 from django.conf import settings
 from PIL import Image

-from documents.parsers import DocumentParser
 from documents.parsers import ParseError
 from documents.parsers import make_thumbnail_from_pdf
 from documents.utils import maybe_override_pixel_limit
@@ -16,6 +21,28 @@ from paperless.config import OcrConfig
 from paperless.models import ArchiveFileChoices
 from paperless.models import CleanChoices
 from paperless.models import ModeChoices
+from paperless.parsers.utils import read_file_handle_unicode_errors
+from paperless.version import __full_version_str__
+
+if TYPE_CHECKING:
+    import datetime
+    from types import TracebackType
+
+    from paperless.parsers import MetadataEntry
+    from paperless.parsers import ParserContext
+
+logger = logging.getLogger("paperless.parsing.tesseract")
+
+_SUPPORTED_MIME_TYPES: dict[str, str] = {
+    "application/pdf": ".pdf",
+    "image/jpeg": ".jpg",
+    "image/png": ".png",
+    "image/tiff": ".tif",
+    "image/gif": ".gif",
+    "image/bmp": ".bmp",
+    "image/webp": ".webp",
+    "image/heic": ".heic",
+}


 class NoTextFoundException(Exception):
@@ -26,81 +53,125 @@ class RtlLanguageException(Exception):
    pass


-class RasterisedDocumentParser(DocumentParser):
+class RasterisedDocumentParser:
    """
    This parser uses Tesseract to try and get some text out of a rasterised
    image, whether it's a PDF, or other graphical format (JPEG, TIFF, etc.)
    """

-    logging_name = "paperless.parsing.tesseract"
+    name: str = "Paperless-ngx Tesseract OCR Parser"
+    version: str = __full_version_str__
+    author: str = "Paperless-ngx Contributors"
+    url: str = "https://github.com/paperless-ngx/paperless-ngx"

-    def get_settings(self) -> OcrConfig:
-        """
-        This parser uses the OCR configuration settings to parse documents
-        """
-        return OcrConfig()
+    # ------------------------------------------------------------------
+    # Class methods
+    # ------------------------------------------------------------------

-    def get_page_count(self, document_path, mime_type):
-        page_count = None
-        if mime_type == "application/pdf":
-            try:
-                import pikepdf
+    @classmethod
+    def supported_mime_types(cls) -> dict[str, str]:
+        return _SUPPORTED_MIME_TYPES

-                with pikepdf.Pdf.open(document_path) as pdf:
-                    page_count = len(pdf.pages)
-            except Exception as e:
-                self.log.warning(
-                    f"Unable to determine PDF page count {document_path}: {e}",
-                )
-        return page_count
+    @classmethod
+    def score(
+        cls,
+        mime_type: str,
+        filename: str,
+        path: Path | None = None,
+    ) -> int | None:
+        if mime_type in _SUPPORTED_MIME_TYPES:
+            return 10
+        return None

-    def extract_metadata(self, document_path, mime_type):
-        result = []
-        if mime_type == "application/pdf":
-            import pikepdf
+    # ------------------------------------------------------------------
+    # Properties
+    # ------------------------------------------------------------------

-            namespace_pattern = re.compile(r"\{(.*)\}(.*)")
+    @property
+    def can_produce_archive(self) -> bool:
+        return True

-            pdf = pikepdf.open(document_path)
-            meta = pdf.open_metadata()
-            for key, value in meta.items():
-                if isinstance(value, list):
-                    value = " ".join([str(e) for e in value])
-                value = str(value)
-                try:
-                    m = namespace_pattern.match(key)
-                    if m is None:  # pragma: no cover
-                        continue
-                    namespace = m.group(1)
-                    key_value = m.group(2)
-                    try:
-                        namespace.encode("utf-8")
-                        key_value.encode("utf-8")
-                    except UnicodeEncodeError as e:  # pragma: no cover
-                        self.log.debug(f"Skipping metadata key {key}: {e}")
-                        continue
-                    result.append(
-                        {
-                            "namespace": namespace,
-                            "prefix": meta.REVERSE_NS[namespace],
-                            "key": key_value,
-                            "value": value,
-                        },
-                    )
-                except Exception as e:
-                    self.log.warning(
-                        f"Error while reading metadata {key}: {value}. Error: {e}",
-                    )
-        return result
+    @property
+    def requires_pdf_rendition(self) -> bool:
+        return False

-    def get_thumbnail(self, document_path, mime_type, file_name=None):
+    # ------------------------------------------------------------------
+    # Lifecycle
+    # ------------------------------------------------------------------
+
+    def __init__(self, logging_group: object = None) -> None:
+        settings.SCRATCH_DIR.mkdir(parents=True, exist_ok=True)
+        self.tempdir = Path(
+            tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR),
+        )
+        self.settings = OcrConfig()
+        self.archive_path: Path | None = None
+        self.text: str | None = None
+        self.date: datetime.datetime | None = None
+        self.log = logger
+
+    def __enter__(self) -> Self:
+        return self
+
+    def __exit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc_val: BaseException | None,
+        exc_tb: TracebackType | None,
+    ) -> None:
+        logger.debug("Cleaning up temporary directory %s", self.tempdir)
+        shutil.rmtree(self.tempdir, ignore_errors=True)
+
+    # ------------------------------------------------------------------
+    # Core parsing interface
+    # ------------------------------------------------------------------
+
+    def configure(self, context: ParserContext) -> None:
+        pass
+
+    # ------------------------------------------------------------------
+    # Result accessors
+    # ------------------------------------------------------------------
+
+    def get_text(self) -> str | None:
+        return self.text
+
+    def get_date(self) -> datetime.datetime | None:
+        return self.date
+
+    def get_archive_path(self) -> Path | None:
+        return self.archive_path
+
+    # ------------------------------------------------------------------
+    # Thumbnail, page count, and metadata
+    # ------------------------------------------------------------------
+
+    def get_thumbnail(self, document_path: Path, mime_type: str) -> Path:
        return make_thumbnail_from_pdf(
-            self.archive_path or document_path,
+            self.archive_path or Path(document_path),
            self.tempdir,
-            self.logging_group,
        )

-    def is_image(self, mime_type) -> bool:
+    def get_page_count(self, document_path: Path, mime_type: str) -> int | None:
+        if mime_type == "application/pdf":
+            from paperless.parsers.utils import get_page_count_for_pdf
+
+            return get_page_count_for_pdf(Path(document_path), log=self.log)
+        return None
+
+    def extract_metadata(
+        self,
+        document_path: Path,
+        mime_type: str,
+    ) -> list[MetadataEntry]:
+        if mime_type != "application/pdf":
+            return []
+
+        from paperless.parsers.utils import extract_pdf_metadata
+
+        return extract_pdf_metadata(Path(document_path), log=self.log)
+
+    def is_image(self, mime_type: str) -> bool:
        return mime_type in [
            "image/png",
            "image/jpeg",
@@ -111,25 +182,25 @@ class RasterisedDocumentParser(DocumentParser):
            "image/heic",
        ]

-    def has_alpha(self, image) -> bool:
+    def has_alpha(self, image: Path) -> bool:
        with Image.open(image) as im:
            return im.mode in ("RGBA", "LA")

-    def remove_alpha(self, image_path: str) -> Path:
+    def remove_alpha(self, image_path: Path) -> Path:
        no_alpha_image = Path(self.tempdir) / "image-no-alpha"
        run_subprocess(
            [
                settings.CONVERT_BINARY,
                "-alpha",
                "off",
-                image_path,
-                no_alpha_image,
+                str(image_path),
+                str(no_alpha_image),
            ],
            logger=self.log,
        )
        return no_alpha_image

-    def get_dpi(self, image) -> int | None:
+    def get_dpi(self, image: Path) -> int | None:
        try:
            with Image.open(image) as im:
                x, _ = im.info["dpi"]
@@ -138,7 +209,7 @@ class RasterisedDocumentParser(DocumentParser):
            self.log.warning(f"Error while getting DPI from image {image}: {e}")
            return None

-    def calculate_a4_dpi(self, image) -> int | None:
+    def calculate_a4_dpi(self, image: Path) -> int | None:
        try:
            with Image.open(image) as im:
                width, _ = im.size
@@ -156,6 +227,7 @@ class RasterisedDocumentParser(DocumentParser):
        sidecar_file: Path | None,
        pdf_file: Path,
    ) -> str | None:
+        text: str | None = None
        # When re-doing OCR, the sidecar contains ONLY the new text, not
        # the whole text, so do not utilize it in that case
        if (
@@ -163,7 +235,7 @@ class RasterisedDocumentParser(DocumentParser):
            and sidecar_file.is_file()
            and self.settings.mode != "redo"
        ):
-            text = self.read_file_handle_unicode_errors(sidecar_file)
+            text = read_file_handle_unicode_errors(sidecar_file)

            if "[OCR skipped on page" not in text:
                # This happens when there's already text in the input file.
@@ -191,12 +263,12 @@ class RasterisedDocumentParser(DocumentParser):
                        "-layout",
                        "-enc",
                        "UTF-8",
-                        pdf_file,
+                        str(pdf_file),
                        tmp.name,
                    ],
                    logger=self.log,
                )
-                text = self.read_file_handle_unicode_errors(Path(tmp.name))
+                text = read_file_handle_unicode_errors(Path(tmp.name))

            return post_process_text(text)

@@ -211,17 +283,15 @@ class RasterisedDocumentParser(DocumentParser):

    def construct_ocrmypdf_parameters(
        self,
-        input_file,
-        mime_type,
-        output_file,
-        sidecar_file,
+        input_file: Path,
+        mime_type: str,
+        output_file: Path,
+        sidecar_file: Path,
        *,
-        safe_fallback=False,
-    ):
-        if TYPE_CHECKING:
-            assert isinstance(self.settings, OcrConfig)
-        ocrmypdf_args = {
-            "input_file": input_file,
+        safe_fallback: bool = False,
+    ) -> dict[str, Any]:
+        ocrmypdf_args: dict[str, Any] = {
+            "input_file_or_options": input_file,
            "output_file": output_file,
            # need to use threads, since this will be run in daemonized
            # processes via the task library.
@@ -285,7 +355,7 @@ class RasterisedDocumentParser(DocumentParser):
                    "for compatibility with img2pdf",
                )
                # Replace the input file with the non-alpha
-                ocrmypdf_args["input_file"] = self.remove_alpha(input_file)
+                ocrmypdf_args["input_file_or_options"] = self.remove_alpha(input_file)

            if dpi:
                self.log.debug(f"Detected DPI for image {input_file}: {dpi}")
@@ -330,7 +400,13 @@ class RasterisedDocumentParser(DocumentParser):

        return ocrmypdf_args

-    def parse(self, document_path: Path, mime_type, file_name=None) -> None:
+    def parse(
+        self,
+        document_path: Path,
+        mime_type: str,
+        *,
+        produce_archive: bool = True,
+    ) -> None:
        # This forces tesseract to use one core per page.
        os.environ["OMP_THREAD_LIMIT"] = "1"
        VALID_TEXT_LENGTH = 50
@@ -458,7 +534,7 @@ class RasterisedDocumentParser(DocumentParser):
                self.text = ""


-def post_process_text(text):
+def post_process_text(text: str | None) -> str | None:
    if not text:
        return None

--- a/src/paperless/parsers/text.py
+++ b/src/paperless/parsers/text.py
@@ -27,6 +27,7 @@ if TYPE_CHECKING:
    from types import TracebackType

    from paperless.parsers import MetadataEntry
+    from paperless.parsers import ParserContext

 logger = logging.getLogger("paperless.parsing.text")

@@ -156,6 +157,9 @@ class TextDocumentParser:
    # Core parsing interface
    # ------------------------------------------------------------------

+    def configure(self, context: ParserContext) -> None:
+        pass
+
    def parse(
        self,
        document_path: Path,
--- a/src/paperless/parsers/tika.py
+++ b/src/paperless/parsers/tika.py
@@ -0,0 +1,452 @@
+"""
+Built-in Tika document parser.
+
+Handles Office documents (DOCX, ODT, XLS, XLSX, PPT, PPTX, RTF, etc.) by
+sending them to an Apache Tika server for text extraction and a Gotenberg
+server for PDF conversion.  Because the source formats cannot be rendered by
+a browser natively, the parser always produces a PDF rendition for display.
+"""
+
+from __future__ import annotations
+
+import logging
+import shutil
+import tempfile
+from contextlib import ExitStack
+from pathlib import Path
+from typing import TYPE_CHECKING
+from typing import Self
+
+import httpx
+from django.conf import settings
+from django.utils import timezone
+from gotenberg_client import GotenbergClient
+from gotenberg_client.options import PdfAFormat
+from tika_client import TikaClient
+
+from documents.parsers import ParseError
+from documents.parsers import make_thumbnail_from_pdf
+from paperless.config import OutputTypeConfig
+from paperless.models import OutputTypeChoices
+from paperless.version import __full_version_str__
+
+if TYPE_CHECKING:
+    import datetime
+    from types import TracebackType
+
+    from paperless.parsers import MetadataEntry
+    from paperless.parsers import ParserContext
+
+logger = logging.getLogger("paperless.parsing.tika")
+
+_SUPPORTED_MIME_TYPES: dict[str, str] = {
+    "application/msword": ".doc",
+    "application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx",
+    "application/vnd.ms-excel": ".xls",
+    "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx",
+    "application/vnd.ms-powerpoint": ".ppt",
+    "application/vnd.openxmlformats-officedocument.presentationml.presentation": ".pptx",
+    "application/vnd.openxmlformats-officedocument.presentationml.slideshow": ".ppsx",
+    "application/vnd.oasis.opendocument.presentation": ".odp",
+    "application/vnd.oasis.opendocument.spreadsheet": ".ods",
+    "application/vnd.oasis.opendocument.text": ".odt",
+    "application/vnd.oasis.opendocument.graphics": ".odg",
+    "text/rtf": ".rtf",
+}
+
+
+class TikaDocumentParser:
+    """Parse Office documents via Apache Tika and Gotenberg for Paperless-ngx.
+
+    Text extraction is handled by the Tika server.  PDF conversion for display
+    is handled by Gotenberg (LibreOffice route).  Because the source formats
+    cannot be rendered by a browser natively, ``requires_pdf_rendition`` is
+    True and the PDF is always produced regardless of the ``produce_archive``
+    flag passed to ``parse``.
+
+    Both ``TikaClient`` and ``GotenbergClient`` are opened once in
+    ``__enter__`` via an ``ExitStack`` and shared across ``parse``,
+    ``extract_metadata``, and ``_convert_to_pdf`` calls, then closed via
+    ``ExitStack.close()`` in ``__exit__``.  The parser must always be used
+    as a context manager.
+
+    Class attributes
+    ----------------
+    name : str
+        Human-readable parser name.
+    version : str
+        Semantic version string, kept in sync with Paperless-ngx releases.
+    author : str
+        Maintainer name.
+    url : str
+        Issue tracker / source URL.
+    """
+
+    name: str = "Paperless-ngx Tika Parser"
+    version: str = __full_version_str__
+    author: str = "Paperless-ngx Contributors"
+    url: str = "https://github.com/paperless-ngx/paperless-ngx"
+
+    # ------------------------------------------------------------------
+    # Class methods
+    # ------------------------------------------------------------------
+
+    @classmethod
+    def supported_mime_types(cls) -> dict[str, str]:
+        """Return the MIME types this parser handles.
+
+        Returns
+        -------
+        dict[str, str]
+            Mapping of MIME type to preferred file extension.
+        """
+        return _SUPPORTED_MIME_TYPES
+
+    @classmethod
+    def score(
+        cls,
+        mime_type: str,
+        filename: str,
+        path: Path | None = None,
+    ) -> int | None:
+        """Return the priority score for handling this file.
+
+        Returns ``None`` when Tika integration is disabled so the registry
+        skips this parser entirely.
+
+        Parameters
+        ----------
+        mime_type:
+            Detected MIME type of the file.
+        filename:
+            Original filename including extension.
+        path:
+            Optional filesystem path. Not inspected by this parser.
+
+        Returns
+        -------
+        int | None
+            10 if TIKA_ENABLED and the MIME type is supported, otherwise None.
+        """
+        if not settings.TIKA_ENABLED:
+            return None
+        if mime_type in _SUPPORTED_MIME_TYPES:
+            return 10
+        return None
+
+    # ------------------------------------------------------------------
+    # Properties
+    # ------------------------------------------------------------------
+
+    @property
+    def can_produce_archive(self) -> bool:
+        """Whether this parser can produce a searchable PDF archive copy.
+
+        Returns
+        -------
+        bool
+            Always False — Tika produces a display PDF, not an OCR archive.
+        """
+        return False
+
+    @property
+    def requires_pdf_rendition(self) -> bool:
+        """Whether the parser must produce a PDF for the frontend to display.
+
+        Returns
+        -------
+        bool
+            Always True — Office formats cannot be rendered natively in a
+            browser, so a PDF conversion is always required for display.
+        """
+        return True
+
+    # ------------------------------------------------------------------
+    # Lifecycle
+    # ------------------------------------------------------------------
+
+    def __init__(self, logging_group: object = None) -> None:
+        settings.SCRATCH_DIR.mkdir(parents=True, exist_ok=True)
+        self._tempdir = Path(
+            tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR),
+        )
+        self._text: str | None = None
+        self._date: datetime.datetime | None = None
+        self._archive_path: Path | None = None
+        self._exit_stack = ExitStack()
+        self._tika_client: TikaClient | None = None
+        self._gotenberg_client: GotenbergClient | None = None
+
+    def __enter__(self) -> Self:
+        self._tika_client = self._exit_stack.enter_context(
+            TikaClient(
+                tika_url=settings.TIKA_ENDPOINT,
+                timeout=settings.CELERY_TASK_TIME_LIMIT,
+            ),
+        )
+        self._gotenberg_client = self._exit_stack.enter_context(
+            GotenbergClient(
+                host=settings.TIKA_GOTENBERG_ENDPOINT,
+                timeout=settings.CELERY_TASK_TIME_LIMIT,
+            ),
+        )
+        return self
+
+    def __exit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc_val: BaseException | None,
+        exc_tb: TracebackType | None,
+    ) -> None:
+        self._exit_stack.close()
+        logger.debug("Cleaning up temporary directory %s", self._tempdir)
+        shutil.rmtree(self._tempdir, ignore_errors=True)
+
+    # ------------------------------------------------------------------
+    # Core parsing interface
+    # ------------------------------------------------------------------
+
+    def configure(self, context: ParserContext) -> None:
+        pass
+
+    def parse(
+        self,
+        document_path: Path,
+        mime_type: str,
+        *,
+        produce_archive: bool = True,
+    ) -> None:
+        """Send the document to Tika for text extraction and Gotenberg for PDF.
+
+        Because ``requires_pdf_rendition`` is True the PDF conversion is
+        always performed — the ``produce_archive`` flag is intentionally
+        ignored.
+
+        Parameters
+        ----------
+        document_path:
+            Absolute path to the document file to parse.
+        mime_type:
+            Detected MIME type of the document.
+        produce_archive:
+            Accepted for protocol compatibility but ignored; the PDF rendition
+            is always produced since the source format cannot be displayed
+            natively in the browser.
+
+        Raises
+        ------
+        documents.parsers.ParseError
+            If Tika or Gotenberg returns an error.
+        """
+        if TYPE_CHECKING:
+            assert self._tika_client is not None
+
+        logger.info("Sending %s to Tika server", document_path)
+
+        try:
+            try:
+                parsed = self._tika_client.tika.as_text.from_file(
+                    document_path,
+                    mime_type,
+                )
+            except httpx.HTTPStatusError as err:
+                # Workaround https://issues.apache.org/jira/browse/TIKA-4110
+                # Tika fails with some files as multi-part form data
+                if err.response.status_code == httpx.codes.INTERNAL_SERVER_ERROR:
+                    parsed = self._tika_client.tika.as_text.from_buffer(
+                        document_path.read_bytes(),
+                        mime_type,
+                    )
+                else:  # pragma: no cover
+                    raise
+        except Exception as err:
+            raise ParseError(
+                f"Could not parse {document_path} with tika server at "
+                f"{settings.TIKA_ENDPOINT}: {err}",
+            ) from err
+
+        self._text = parsed.content
+        if self._text is not None:
+            self._text = self._text.strip()
+
+        self._date = parsed.created
+        if self._date is not None and timezone.is_naive(self._date):
+            self._date = timezone.make_aware(self._date)
+
+        # Always convert — requires_pdf_rendition=True means the browser
+        # cannot display the source format natively.
+        self._archive_path = self._convert_to_pdf(document_path)
+
+    # ------------------------------------------------------------------
+    # Result accessors
+    # ------------------------------------------------------------------
+
+    def get_text(self) -> str | None:
+        """Return the plain-text content extracted during parse.
+
+        Returns
+        -------
+        str | None
+            Extracted text, or None if parse has not been called yet.
+        """
+        return self._text
+
+    def get_date(self) -> datetime.datetime | None:
+        """Return the document date detected during parse.
+
+        Returns
+        -------
+        datetime.datetime | None
+            Creation date from Tika metadata, or None if not detected.
+        """
+        return self._date
+
+    def get_archive_path(self) -> Path | None:
+        """Return the path to the generated PDF rendition, or None.
+
+        Returns
+        -------
+        Path | None
+            Path to the PDF produced by Gotenberg, or None if parse has not
+            been called yet.
+        """
+        return self._archive_path
+
+    # ------------------------------------------------------------------
+    # Thumbnail and metadata
+    # ------------------------------------------------------------------
+
+    def get_thumbnail(self, document_path: Path, mime_type: str) -> Path:
+        """Generate a thumbnail from the PDF rendition of the document.
+
+        Converts the document to PDF first if not already done.
+
+        Parameters
+        ----------
+        document_path:
+            Absolute path to the source document.
+        mime_type:
+            Detected MIME type of the document.
+
+        Returns
+        -------
+        Path
+            Path to the generated WebP thumbnail inside the temporary directory.
+        """
+        if self._archive_path is None:
+            self._archive_path = self._convert_to_pdf(document_path)
+        return make_thumbnail_from_pdf(self._archive_path, self._tempdir)
+
+    def get_page_count(
+        self,
+        document_path: Path,
+        mime_type: str,
+    ) -> int | None:
+        """Return the number of pages in the document.
+
+        Counts pages in the archive PDF produced by a preceding parse()
+        call.  Returns ``None`` if parse() has not been called yet or if
+        no archive was produced.
+
+        Returns
+        -------
+        int | None
+            Page count of the archive PDF, or ``None``.
+        """
+        if self._archive_path is not None:
+            from paperless.parsers.utils import get_page_count_for_pdf
+
+            return get_page_count_for_pdf(self._archive_path, log=logger)
+        return None
+
+    def extract_metadata(
+        self,
+        document_path: Path,
+        mime_type: str,
+    ) -> list[MetadataEntry]:
+        """Extract format-specific metadata via the Tika metadata endpoint.
+
+        Returns
+        -------
+        list[MetadataEntry]
+            All key/value pairs returned by Tika, or ``[]`` on error.
+        """
+        if TYPE_CHECKING:
+            assert self._tika_client is not None
+
+        try:
+            parsed = self._tika_client.metadata.from_file(document_path, mime_type)
+            return [
+                {
+                    "namespace": "",
+                    "prefix": "",
+                    "key": key,
+                    "value": parsed.data[key],
+                }
+                for key in parsed.data
+            ]
+        except Exception as e:
+            logger.warning(
+                "Error while fetching document metadata for %s: %s",
+                document_path,
+                e,
+            )
+            return []
+
+    # ------------------------------------------------------------------
+    # Private helpers
+    # ------------------------------------------------------------------
+
+    def _convert_to_pdf(self, document_path: Path) -> Path:
+        """Convert the document to PDF using Gotenberg's LibreOffice route.
+
+        Parameters
+        ----------
+        document_path:
+            Absolute path to the source document.
+
+        Returns
+        -------
+        Path
+            Path to the generated PDF inside the temporary directory.
+
+        Raises
+        ------
+        documents.parsers.ParseError
+            If Gotenberg returns an error.
+        """
+        if TYPE_CHECKING:
+            assert self._gotenberg_client is not None
+
+        pdf_path = self._tempdir / "convert.pdf"
+
+        logger.info("Converting %s to PDF as %s", document_path, pdf_path)
+
+        with self._gotenberg_client.libre_office.to_pdf() as route:
+            # Set the output format of the resulting PDF.
+            # OutputTypeConfig reads the database-stored ApplicationConfiguration
+            # first, then falls back to the PAPERLESS_OCR_OUTPUT_TYPE env var.
+            output_type = OutputTypeConfig().output_type
+            if output_type in {
+                OutputTypeChoices.PDF_A,
+                OutputTypeChoices.PDF_A2,
+            }:
+                route.pdf_format(PdfAFormat.A2b)
+            elif output_type == OutputTypeChoices.PDF_A1:
+                logger.warning(
+                    "Gotenberg does not support PDF/A-1a, choosing PDF/A-2b instead",
+                )
+                route.pdf_format(PdfAFormat.A2b)
+            elif output_type == OutputTypeChoices.PDF_A3:
+                route.pdf_format(PdfAFormat.A3b)
+
+            route.convert(document_path)
+
+            try:
+                response = route.run()
+                pdf_path.write_bytes(response.content)
+                return pdf_path
+            except Exception as err:
+                raise ParseError(
+                    f"Error while converting document to PDF: {err}",
+                ) from err
--- a/src/paperless/parsers/utils.py
+++ b/src/paperless/parsers/utils.py
@@ -0,0 +1,158 @@
+"""
+Shared utilities for Paperless-ngx document parsers.
+
+Functions here are format-neutral helpers that multiple parsers need.
+Keeping them here avoids parsers inheriting from each other just to
+share implementation.
+"""
+
+from __future__ import annotations
+
+import logging
+import re
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from pathlib import Path
+
+    from paperless.parsers import MetadataEntry
+
+logger = logging.getLogger("paperless.parsers.utils")
+
+
+def read_file_handle_unicode_errors(
+    filepath: Path,
+    log: logging.Logger | None = None,
+) -> str:
+    """Read a file as UTF-8 text, replacing invalid bytes rather than raising.
+
+    Parameters
+    ----------
+    filepath:
+        Absolute path to the file to read.
+    log:
+        Logger to use for warnings.  Falls back to the module-level logger
+        when omitted.
+
+    Returns
+    -------
+    str
+        File content as a string, with any invalid UTF-8 sequences replaced
+        by the Unicode replacement character.
+    """
+    _log = log or logger
+    try:
+        return filepath.read_text(encoding="utf-8")
+    except UnicodeDecodeError as e:
+        _log.warning("Unicode error during text reading, continuing: %s", e)
+        return filepath.read_bytes().decode("utf-8", errors="replace")
+
+
+def get_page_count_for_pdf(
+    document_path: Path,
+    log: logging.Logger | None = None,
+) -> int | None:
+    """Return the number of pages in a PDF file using pikepdf.
+
+    Parameters
+    ----------
+    document_path:
+        Absolute path to the PDF file.
+    log:
+        Logger to use for warnings.  Falls back to the module-level logger
+        when omitted.
+
+    Returns
+    -------
+    int | None
+        Page count, or ``None`` if the file cannot be opened or is not a
+        valid PDF.
+    """
+    import pikepdf
+
+    _log = log or logger
+
+    try:
+        with pikepdf.Pdf.open(document_path) as pdf:
+            return len(pdf.pages)
+    except Exception as e:
+        _log.warning("Unable to determine PDF page count for %s: %s", document_path, e)
+        return None
+
+
+def extract_pdf_metadata(
+    document_path: Path,
+    log: logging.Logger | None = None,
+) -> list[MetadataEntry]:
+    """Extract XMP/PDF metadata from a PDF file using pikepdf.
+
+    Reads all XMP metadata entries from the document and returns them as a
+    list of ``MetadataEntry`` dicts.  The method never raises — any failure
+    to open the file or read a specific key is logged and skipped.
+
+    Parameters
+    ----------
+    document_path:
+        Absolute path to the PDF file.
+    log:
+        Logger to use for warnings and debug messages.  Falls back to the
+        module-level logger when omitted.
+
+    Returns
+    -------
+    list[MetadataEntry]
+        Zero or more metadata entries.  Returns ``[]`` if the file cannot
+        be opened or contains no readable XMP metadata.
+    """
+    import pikepdf
+
+    from paperless.parsers import MetadataEntry
+
+    _log = log or logger
+    result: list[MetadataEntry] = []
+    namespace_pattern = re.compile(r"\{(.*)\}(.*)")
+
+    try:
+        pdf = pikepdf.open(document_path)
+        meta = pdf.open_metadata()
+    except Exception as e:
+        _log.warning("Could not open PDF metadata for %s: %s", document_path, e)
+        return []
+
+    for key, value in meta.items():
+        if isinstance(value, list):
+            value = " ".join(str(e) for e in value)
+        value = str(value)
+
+        try:
+            m = namespace_pattern.match(key)
+            if m is None:
+                continue
+
+            namespace = m.group(1)
+            key_value = m.group(2)
+
+            try:
+                namespace.encode("utf-8")
+                key_value.encode("utf-8")
+            except UnicodeEncodeError as enc_err:  # pragma: no cover
+                _log.debug("Skipping metadata key %s: %s", key, enc_err)
+                continue
+
+            result.append(
+                MetadataEntry(
+                    namespace=namespace,
+                    prefix=meta.REVERSE_NS[namespace],
+                    key=key_value,
+                    value=value,
+                ),
+            )
+        except Exception as e:
+            _log.warning(
+                "Error reading metadata key %s value %s: %s",
+                key,
+                value,
+                e,
+            )
+
+    return result
--- a/src/paperless/serialisers.py
+++ b/src/paperless/serialisers.py
@@ -6,6 +6,7 @@ from allauth.mfa.models import Authenticator
 from allauth.mfa.totp.internal.auth import TOTP
 from allauth.socialaccount.models import SocialAccount
 from allauth.socialaccount.models import SocialApp
+from django.conf import settings
 from django.contrib.auth.models import Group
 from django.contrib.auth.models import Permission
 from django.contrib.auth.models import User
@@ -15,6 +16,7 @@ from rest_framework import serializers
 from rest_framework.authtoken.serializers import AuthTokenSerializer

 from paperless.models import ApplicationConfiguration
+from paperless.network import validate_outbound_http_url
 from paperless.validators import reject_dangerous_svg
 from paperless_mail.serialisers import ObfuscatedPasswordField

@@ -236,6 +238,22 @@ class ApplicationConfigurationSerializer(serializers.ModelSerializer):
            reject_dangerous_svg(file)
        return file

+    def validate_llm_endpoint(self, value: str | None) -> str | None:
+        if not value:
+            return value
+
+        try:
+            validate_outbound_http_url(
+                value,
+                allow_internal=settings.LLM_ALLOW_INTERNAL_ENDPOINTS,
+            )
+        except ValueError as e:
+            raise serializers.ValidationError(
+                f"Invalid LLM endpoint: {e.args[0]}, see logs for details",
+            ) from e
+
+        return value
+
    class Meta:
        model = ApplicationConfiguration
        fields = "__all__"
--- a/src/paperless/settings/init.py
+++ b/src/paperless/settings/init.py
@@ -121,10 +121,7 @@ INSTALLED_APPS = [
    "django_extensions",
    "paperless",
    "documents.apps.DocumentsConfig",
-    "paperless_tesseract.apps.PaperlessTesseractConfig",
-    "paperless_text.apps.PaperlessTextConfig",
    "paperless_mail.apps.PaperlessMailConfig",
-    "paperless_remote.apps.PaperlessRemoteParserConfig",
    "django.contrib.admin",
    "rest_framework",
    "rest_framework.authtoken",
@@ -974,8 +971,8 @@ TIKA_GOTENBERG_ENDPOINT = os.getenv(
    "http://localhost:3000",
 )

-if TIKA_ENABLED:
-    INSTALLED_APPS.append("paperless_tika.apps.PaperlessTikaConfig")
+# Tika parser is now integrated into the main parser registry
+# No separate Django app needed

 AUDIT_LOG_ENABLED = get_bool_from_env("PAPERLESS_AUDIT_LOG_ENABLED", "true")
 if AUDIT_LOG_ENABLED:
@@ -1112,3 +1109,7 @@ LLM_BACKEND = os.getenv("PAPERLESS_AI_LLM_BACKEND")  # "ollama" or "openai"
 LLM_MODEL = os.getenv("PAPERLESS_AI_LLM_MODEL")
 LLM_API_KEY = os.getenv("PAPERLESS_AI_LLM_API_KEY")
 LLM_ENDPOINT = os.getenv("PAPERLESS_AI_LLM_ENDPOINT")
+LLM_ALLOW_INTERNAL_ENDPOINTS = get_bool_from_env(
+    "PAPERLESS_AI_LLM_ALLOW_INTERNAL_ENDPOINTS",
+    "true",
+)
--- a/src/paperless/tests/parsers/conftest.py
+++ b/src/paperless/tests/parsers/conftest.py
@@ -6,15 +6,29 @@ so it is easy to see which files belong to which test module.

 from __future__ import annotations

+from contextlib import contextmanager
 from typing import TYPE_CHECKING

 import pytest
+from django.test import override_settings

+from paperless.parsers.mail import MailDocumentParser
+from paperless.parsers.remote import RemoteDocumentParser
+from paperless.parsers.tesseract import RasterisedDocumentParser
 from paperless.parsers.text import TextDocumentParser
+from paperless.parsers.tika import TikaDocumentParser

 if TYPE_CHECKING:
+    from collections.abc import Callable
    from collections.abc import Generator
    from pathlib import Path
+    from unittest.mock import MagicMock
+
+    from pytest_django.fixtures import SettingsWrapper
+    from pytest_mock import MockerFixture
+
+    #: Type for the ``make_tesseract_parser`` fixture factory.
+    MakeTesseractParser = Callable[..., Generator[RasterisedDocumentParser, None, None]]


 # ------------------------------------------------------------------
@@ -74,3 +88,684 @@ def text_parser() -> Generator[TextDocumentParser, None, None]:
    """
    with TextDocumentParser() as parser:
        yield parser
+
+
+# ------------------------------------------------------------------
+# Remote parser instance
+# ------------------------------------------------------------------
+
+
+@pytest.fixture()
+def remote_parser() -> Generator[RemoteDocumentParser, None, None]:
+    """Yield a RemoteDocumentParser and clean up its temporary directory afterwards.
+
+    Yields
+    ------
+    RemoteDocumentParser
+        A ready-to-use parser instance.
+    """
+    with RemoteDocumentParser() as parser:
+        yield parser
+
+
+# ------------------------------------------------------------------
+# Remote parser settings helpers
+# ------------------------------------------------------------------
+
+
+@pytest.fixture()
+def azure_settings(settings: SettingsWrapper) -> SettingsWrapper:
+    """Configure Django settings for a valid Azure AI OCR engine.
+
+    Sets ``REMOTE_OCR_ENGINE``, ``REMOTE_OCR_API_KEY``, and
+    ``REMOTE_OCR_ENDPOINT`` to test values.  Settings are restored
+    automatically after the test by pytest-django.
+
+    Returns
+    -------
+    SettingsWrapper
+        The modified settings object (for chaining further overrides).
+    """
+    settings.REMOTE_OCR_ENGINE = "azureai"
+    settings.REMOTE_OCR_API_KEY = "test-api-key"
+    settings.REMOTE_OCR_ENDPOINT = "https://test.cognitiveservices.azure.com"
+    return settings
+
+
+@pytest.fixture()
+def no_engine_settings(settings: SettingsWrapper) -> SettingsWrapper:
+    """Configure Django settings with no remote engine configured.
+
+    Returns
+    -------
+    SettingsWrapper
+        The modified settings object.
+    """
+    settings.REMOTE_OCR_ENGINE = None
+    settings.REMOTE_OCR_API_KEY = None
+    settings.REMOTE_OCR_ENDPOINT = None
+    return settings
+
+
+# ------------------------------------------------------------------
+# Tika parser sample files
+# ------------------------------------------------------------------
+
+
+@pytest.fixture(scope="session")
+def tika_samples_dir(samples_dir: Path) -> Path:
+    """Absolute path to the Tika parser sample files directory.
+
+    Returns
+    -------
+    Path
+        ``<samples_dir>/tika/``
+    """
+    return samples_dir / "tika"
+
+
+@pytest.fixture(scope="session")
+def sample_odt_file(tika_samples_dir: Path) -> Path:
+    """Path to a sample ODT file.
+
+    Returns
+    -------
+    Path
+        Absolute path to ``tika/sample.odt``.
+    """
+    return tika_samples_dir / "sample.odt"
+
+
+@pytest.fixture(scope="session")
+def sample_docx_file(tika_samples_dir: Path) -> Path:
+    """Path to a sample DOCX file.
+
+    Returns
+    -------
+    Path
+        Absolute path to ``tika/sample.docx``.
+    """
+    return tika_samples_dir / "sample.docx"
+
+
+@pytest.fixture(scope="session")
+def sample_doc_file(tika_samples_dir: Path) -> Path:
+    """Path to a sample DOC file.
+
+    Returns
+    -------
+    Path
+        Absolute path to ``tika/sample.doc``.
+    """
+    return tika_samples_dir / "sample.doc"
+
+
+@pytest.fixture(scope="session")
+def sample_broken_odt(tika_samples_dir: Path) -> Path:
+    """Path to a broken ODT file that triggers the multi-part fallback.
+
+    Returns
+    -------
+    Path
+        Absolute path to ``tika/multi-part-broken.odt``.
+    """
+    return tika_samples_dir / "multi-part-broken.odt"
+
+
+# ------------------------------------------------------------------
+# Tika parser instance
+# ------------------------------------------------------------------
+
+
+@pytest.fixture()
+def tika_parser() -> Generator[TikaDocumentParser, None, None]:
+    """Yield a TikaDocumentParser and clean up its temporary directory afterwards.
+
+    Yields
+    ------
+    TikaDocumentParser
+        A ready-to-use parser instance.
+    """
+    with TikaDocumentParser() as parser:
+        yield parser
+
+
+# ------------------------------------------------------------------
+# Mail parser sample files
+# ------------------------------------------------------------------
+
+
+@pytest.fixture(scope="session")
+def mail_samples_dir(samples_dir: Path) -> Path:
+    """Absolute path to the mail parser sample files directory.
+
+    Returns
+    -------
+    Path
+        ``<samples_dir>/mail/``
+    """
+    return samples_dir / "mail"
+
+
+@pytest.fixture(scope="session")
+def broken_email_file(mail_samples_dir: Path) -> Path:
+    """Path to a broken/malformed EML sample file.
+
+    Returns
+    -------
+    Path
+        Absolute path to ``mail/broken.eml``.
+    """
+    return mail_samples_dir / "broken.eml"
+
+
+@pytest.fixture(scope="session")
+def simple_txt_email_file(mail_samples_dir: Path) -> Path:
+    """Path to a plain-text email sample file.
+
+    Returns
+    -------
+    Path
+        Absolute path to ``mail/simple_text.eml``.
+    """
+    return mail_samples_dir / "simple_text.eml"
+
+
+@pytest.fixture(scope="session")
+def simple_txt_email_pdf_file(mail_samples_dir: Path) -> Path:
+    """Path to the expected PDF rendition of the plain-text email.
+
+    Returns
+    -------
+    Path
+        Absolute path to ``mail/simple_text.eml.pdf``.
+    """
+    return mail_samples_dir / "simple_text.eml.pdf"
+
+
+@pytest.fixture(scope="session")
+def simple_txt_email_thumbnail_file(mail_samples_dir: Path) -> Path:
+    """Path to the expected thumbnail for the plain-text email.
+
+    Returns
+    -------
+    Path
+        Absolute path to ``mail/simple_text.eml.pdf.webp``.
+    """
+    return mail_samples_dir / "simple_text.eml.pdf.webp"
+
+
+@pytest.fixture(scope="session")
+def html_email_file(mail_samples_dir: Path) -> Path:
+    """Path to an HTML email sample file.
+
+    Returns
+    -------
+    Path
+        Absolute path to ``mail/html.eml``.
+    """
+    return mail_samples_dir / "html.eml"
+
+
+@pytest.fixture(scope="session")
+def html_email_pdf_file(mail_samples_dir: Path) -> Path:
+    """Path to the expected PDF rendition of the HTML email.
+
+    Returns
+    -------
+    Path
+        Absolute path to ``mail/html.eml.pdf``.
+    """
+    return mail_samples_dir / "html.eml.pdf"
+
+
+@pytest.fixture(scope="session")
+def html_email_thumbnail_file(mail_samples_dir: Path) -> Path:
+    """Path to the expected thumbnail for the HTML email.
+
+    Returns
+    -------
+    Path
+        Absolute path to ``mail/html.eml.pdf.webp``.
+    """
+    return mail_samples_dir / "html.eml.pdf.webp"
+
+
+@pytest.fixture(scope="session")
+def html_email_html_file(mail_samples_dir: Path) -> Path:
+    """Path to the HTML body of the HTML email sample.
+
+    Returns
+    -------
+    Path
+        Absolute path to ``mail/html.eml.html``.
+    """
+    return mail_samples_dir / "html.eml.html"
+
+
+@pytest.fixture(scope="session")
+def merged_pdf_first(mail_samples_dir: Path) -> Path:
+    """Path to the first PDF used in PDF-merge tests.
+
+    Returns
+    -------
+    Path
+        Absolute path to ``mail/first.pdf``.
+    """
+    return mail_samples_dir / "first.pdf"
+
+
+@pytest.fixture(scope="session")
+def merged_pdf_second(mail_samples_dir: Path) -> Path:
+    """Path to the second PDF used in PDF-merge tests.
+
+    Returns
+    -------
+    Path
+        Absolute path to ``mail/second.pdf``.
+    """
+    return mail_samples_dir / "second.pdf"
+
+
+# ------------------------------------------------------------------
+# Mail parser instance
+# ------------------------------------------------------------------
+
+
+@pytest.fixture()
+def mail_parser() -> Generator[MailDocumentParser, None, None]:
+    """Yield a MailDocumentParser and clean up its temporary directory afterwards.
+
+    Yields
+    ------
+    MailDocumentParser
+        A ready-to-use parser instance.
+    """
+    with MailDocumentParser() as parser:
+        yield parser
+
+
+@pytest.fixture(scope="session")
+def nginx_base_url() -> Generator[str, None, None]:
+    """
+    The base URL for the nginx HTTP server we expect to be alive
+    """
+    yield "http://localhost:8080"
+
+
+# ------------------------------------------------------------------
+# Tesseract parser sample files
+# ------------------------------------------------------------------
+
+
+@pytest.fixture(scope="session")
+def tesseract_samples_dir(samples_dir: Path) -> Path:
+    """Absolute path to the tesseract parser sample files directory.
+
+    Returns
+    -------
+    Path
+        ``<samples_dir>/tesseract/``
+    """
+    return samples_dir / "tesseract"
+
+
+@pytest.fixture(scope="session")
+def document_webp_file(tesseract_samples_dir: Path) -> Path:
+    """Path to a WebP document sample file.
+
+    Returns
+    -------
+    Path
+        Absolute path to ``tesseract/document.webp``.
+    """
+    return tesseract_samples_dir / "document.webp"
+
+
+@pytest.fixture(scope="session")
+def encrypted_pdf_file(tesseract_samples_dir: Path) -> Path:
+    """Path to an encrypted PDF sample file.
+
+    Returns
+    -------
+    Path
+        Absolute path to ``tesseract/encrypted.pdf``.
+    """
+    return tesseract_samples_dir / "encrypted.pdf"
+
+
+@pytest.fixture(scope="session")
+def multi_page_digital_pdf_file(tesseract_samples_dir: Path) -> Path:
+    """Path to a multi-page digital PDF sample file.
+
+    Returns
+    -------
+    Path
+        Absolute path to ``tesseract/multi-page-digital.pdf``.
+    """
+    return tesseract_samples_dir / "multi-page-digital.pdf"
+
+
+@pytest.fixture(scope="session")
+def multi_page_images_alpha_rgb_tiff_file(tesseract_samples_dir: Path) -> Path:
+    """Path to a multi-page TIFF with alpha channel in RGB.
+
+    Returns
+    -------
+    Path
+        Absolute path to ``tesseract/multi-page-images-alpha-rgb.tiff``.
+    """
+    return tesseract_samples_dir / "multi-page-images-alpha-rgb.tiff"
+
+
+@pytest.fixture(scope="session")
+def multi_page_images_alpha_tiff_file(tesseract_samples_dir: Path) -> Path:
+    """Path to a multi-page TIFF with alpha channel.
+
+    Returns
+    -------
+    Path
+        Absolute path to ``tesseract/multi-page-images-alpha.tiff``.
+    """
+    return tesseract_samples_dir / "multi-page-images-alpha.tiff"
+
+
+@pytest.fixture(scope="session")
+def multi_page_images_pdf_file(tesseract_samples_dir: Path) -> Path:
+    """Path to a multi-page PDF with images.
+
+    Returns
+    -------
+    Path
+        Absolute path to ``tesseract/multi-page-images.pdf``.
+    """
+    return tesseract_samples_dir / "multi-page-images.pdf"
+
+
+@pytest.fixture(scope="session")
+def multi_page_images_tiff_file(tesseract_samples_dir: Path) -> Path:
+    """Path to a multi-page TIFF sample file.
+
+    Returns
+    -------
+    Path
+        Absolute path to ``tesseract/multi-page-images.tiff``.
+    """
+    return tesseract_samples_dir / "multi-page-images.tiff"
+
+
+@pytest.fixture(scope="session")
+def multi_page_mixed_pdf_file(tesseract_samples_dir: Path) -> Path:
+    """Path to a multi-page mixed PDF sample file.
+
+    Returns
+    -------
+    Path
+        Absolute path to ``tesseract/multi-page-mixed.pdf``.
+    """
+    return tesseract_samples_dir / "multi-page-mixed.pdf"
+
+
+@pytest.fixture(scope="session")
+def no_text_alpha_png_file(tesseract_samples_dir: Path) -> Path:
+    """Path to a PNG with alpha channel and no text.
+
+    Returns
+    -------
+    Path
+        Absolute path to ``tesseract/no-text-alpha.png``.
+    """
+    return tesseract_samples_dir / "no-text-alpha.png"
+
+
+@pytest.fixture(scope="session")
+def rotated_pdf_file(tesseract_samples_dir: Path) -> Path:
+    """Path to a rotated PDF sample file.
+
+    Returns
+    -------
+    Path
+        Absolute path to ``tesseract/rotated.pdf``.
+    """
+    return tesseract_samples_dir / "rotated.pdf"
+
+
+@pytest.fixture(scope="session")
+def rtl_test_pdf_file(tesseract_samples_dir: Path) -> Path:
+    """Path to an RTL test PDF sample file.
+
+    Returns
+    -------
+    Path
+        Absolute path to ``tesseract/rtl-test.pdf``.
+    """
+    return tesseract_samples_dir / "rtl-test.pdf"
+
+
+@pytest.fixture(scope="session")
+def signed_pdf_file(tesseract_samples_dir: Path) -> Path:
+    """Path to a signed PDF sample file.
+
+    Returns
+    -------
+    Path
+        Absolute path to ``tesseract/signed.pdf``.
+    """
+    return tesseract_samples_dir / "signed.pdf"
+
+
+@pytest.fixture(scope="session")
+def simple_alpha_png_file(tesseract_samples_dir: Path) -> Path:
+    """Path to a simple PNG with alpha channel.
+
+    Returns
+    -------
+    Path
+        Absolute path to ``tesseract/simple-alpha.png``.
+    """
+    return tesseract_samples_dir / "simple-alpha.png"
+
+
+@pytest.fixture(scope="session")
+def simple_digital_pdf_file(tesseract_samples_dir: Path) -> Path:
+    """Path to a simple digital PDF sample file.
+
+    Returns
+    -------
+    Path
+        Absolute path to ``tesseract/simple-digital.pdf``.
+    """
+    return tesseract_samples_dir / "simple-digital.pdf"
+
+
+@pytest.fixture(scope="session")
+def simple_no_dpi_png_file(tesseract_samples_dir: Path) -> Path:
+    """Path to a simple PNG without DPI information.
+
+    Returns
+    -------
+    Path
+        Absolute path to ``tesseract/simple-no-dpi.png``.
+    """
+    return tesseract_samples_dir / "simple-no-dpi.png"
+
+
+@pytest.fixture(scope="session")
+def simple_bmp_file(tesseract_samples_dir: Path) -> Path:
+    """Path to a simple BMP sample file.
+
+    Returns
+    -------
+    Path
+        Absolute path to ``tesseract/simple.bmp``.
+    """
+    return tesseract_samples_dir / "simple.bmp"
+
+
+@pytest.fixture(scope="session")
+def simple_gif_file(tesseract_samples_dir: Path) -> Path:
+    """Path to a simple GIF sample file.
+
+    Returns
+    -------
+    Path
+        Absolute path to ``tesseract/simple.gif``.
+    """
+    return tesseract_samples_dir / "simple.gif"
+
+
+@pytest.fixture(scope="session")
+def simple_heic_file(tesseract_samples_dir: Path) -> Path:
+    """Path to a simple HEIC sample file.
+
+    Returns
+    -------
+    Path
+        Absolute path to ``tesseract/simple.heic``.
+    """
+    return tesseract_samples_dir / "simple.heic"
+
+
+@pytest.fixture(scope="session")
+def simple_jpg_file(tesseract_samples_dir: Path) -> Path:
+    """Path to a simple JPG sample file.
+
+    Returns
+    -------
+    Path
+        Absolute path to ``tesseract/simple.jpg``.
+    """
+    return tesseract_samples_dir / "simple.jpg"
+
+
+@pytest.fixture(scope="session")
+def simple_png_file(tesseract_samples_dir: Path) -> Path:
+    """Path to a simple PNG sample file.
+
+    Returns
+    -------
+    Path
+        Absolute path to ``tesseract/simple.png``.
+    """
+    return tesseract_samples_dir / "simple.png"
+
+
+@pytest.fixture(scope="session")
+def simple_tif_file(tesseract_samples_dir: Path) -> Path:
+    """Path to a simple TIF sample file.
+
+    Returns
+    -------
+    Path
+        Absolute path to ``tesseract/simple.tif``.
+    """
+    return tesseract_samples_dir / "simple.tif"
+
+
+@pytest.fixture(scope="session")
+def single_page_mixed_pdf_file(tesseract_samples_dir: Path) -> Path:
+    """Path to a single-page mixed PDF sample file.
+
+    Returns
+    -------
+    Path
+        Absolute path to ``tesseract/single-page-mixed.pdf``.
+    """
+    return tesseract_samples_dir / "single-page-mixed.pdf"
+
+
+@pytest.fixture(scope="session")
+def with_form_pdf_file(tesseract_samples_dir: Path) -> Path:
+    """Path to a PDF with form sample file.
+
+    Returns
+    -------
+    Path
+        Absolute path to ``tesseract/with-form.pdf``.
+    """
+    return tesseract_samples_dir / "with-form.pdf"
+
+
+# ------------------------------------------------------------------
+# Tesseract parser instance and settings helpers
+# ------------------------------------------------------------------
+
+
+@pytest.fixture()
+def null_app_config(mocker: MockerFixture) -> MagicMock:
+    """Return a MagicMock with all OcrConfig fields set to None.
+
+    This allows the parser to fall back to Django settings instead of
+    hitting the database.
+
+    Returns
+    -------
+    MagicMock
+        Mock config with all fields as None
+    """
+    return mocker.MagicMock(
+        output_type=None,
+        pages=None,
+        language=None,
+        mode=None,
+        skip_archive_file=None,
+        image_dpi=None,
+        unpaper_clean=None,
+        deskew=None,
+        rotate_pages=None,
+        rotate_pages_threshold=None,
+        max_image_pixels=None,
+        color_conversion_strategy=None,
+        user_args=None,
+    )
+
+
+@pytest.fixture()
+def tesseract_parser(
+    mocker: MockerFixture,
+    null_app_config: MagicMock,
+) -> Generator[RasterisedDocumentParser, None, None]:
+    """Yield a RasterisedDocumentParser and clean up its temporary directory afterwards.
+
+    Patches the config system to avoid database access.
+
+    Yields
+    ------
+    RasterisedDocumentParser
+        A ready-to-use parser instance.
+    """
+    mocker.patch(
+        "paperless.config.BaseConfig._get_config_instance",
+        return_value=null_app_config,
+    )
+    with RasterisedDocumentParser() as parser:
+        yield parser
+
+
+@pytest.fixture()
+def make_tesseract_parser(
+    mocker: MockerFixture,
+    null_app_config: MagicMock,
+) -> MakeTesseractParser:
+    """Return a factory for creating RasterisedDocumentParser with Django settings overrides.
+
+    This fixture is useful for tests that need to create parsers with different
+    settings configurations.
+
+    Returns
+    -------
+    Callable[..., contextmanager[RasterisedDocumentParser]]
+        A context manager factory that accepts Django settings overrides
+    """
+    mocker.patch(
+        "paperless.config.BaseConfig._get_config_instance",
+        return_value=null_app_config,
+    )
+
+    @contextmanager
+    def _make_parser(**django_settings_overrides):
+        with override_settings(**django_settings_overrides):
+            with RasterisedDocumentParser() as parser:
+                yield parser
+
+    return _make_parser
--- a/src/paperless/tests/parsers/test_mail_parser.py
+++ b/src/paperless/tests/parsers/test_mail_parser.py
@@ -12,7 +12,64 @@ from pytest_httpx import HTTPXMock
 from pytest_mock import MockerFixture

 from documents.parsers import ParseError
-from paperless_mail.parsers import MailDocumentParser
+from paperless.parsers import ParserContext
+from paperless.parsers import ParserProtocol
+from paperless.parsers.mail import MailDocumentParser
+
+
+class TestMailParserProtocol:
+    """Verify that MailDocumentParser satisfies the ParserProtocol contract."""
+
+    def test_isinstance_satisfies_protocol(
+        self,
+        mail_parser: MailDocumentParser,
+    ) -> None:
+        assert isinstance(mail_parser, ParserProtocol)
+
+    def test_supported_mime_types(self) -> None:
+        mime_types = MailDocumentParser.supported_mime_types()
+        assert isinstance(mime_types, dict)
+        assert "message/rfc822" in mime_types
+
+    @pytest.mark.parametrize(
+        ("mime_type", "expected"),
+        [
+            ("message/rfc822", 10),
+            ("application/pdf", None),
+            ("text/plain", None),
+        ],
+    )
+    def test_score(self, mime_type: str, expected: int | None) -> None:
+        assert MailDocumentParser.score(mime_type, "email.eml") == expected
+
+    def test_can_produce_archive_is_false(
+        self,
+        mail_parser: MailDocumentParser,
+    ) -> None:
+        assert mail_parser.can_produce_archive is False
+
+    def test_requires_pdf_rendition_is_true(
+        self,
+        mail_parser: MailDocumentParser,
+    ) -> None:
+        assert mail_parser.requires_pdf_rendition is True
+
+    def test_get_page_count_returns_none_without_archive(
+        self,
+        mail_parser: MailDocumentParser,
+        html_email_file: Path,
+    ) -> None:
+        assert mail_parser.get_page_count(html_email_file, "message/rfc822") is None
+
+    def test_get_page_count_returns_int_with_pdf_archive(
+        self,
+        mail_parser: MailDocumentParser,
+        simple_txt_email_pdf_file: Path,
+    ) -> None:
+        mail_parser._archive_path = simple_txt_email_pdf_file
+        count = mail_parser.get_page_count(simple_txt_email_pdf_file, "message/rfc822")
+        assert isinstance(count, int)
+        assert count > 0


 class TestEmailFileParsing:
@@ -24,7 +81,7 @@ class TestEmailFileParsing:
    def test_parse_error_missing_file(
        self,
        mail_parser: MailDocumentParser,
-        sample_dir: Path,
+        mail_samples_dir: Path,
    ) -> None:
        """
        GIVEN:
@@ -35,7 +92,7 @@ class TestEmailFileParsing:
            - An Exception is thrown
        """
        # Check if exception is raised when parsing fails.
-        test_file = sample_dir / "doesntexist.eml"
+        test_file = mail_samples_dir / "doesntexist.eml"

        assert not test_file.exists()

@@ -246,12 +303,12 @@ class TestEmailThumbnailGenerate:
        """
        mocked_return = "Passing the return value through.."
        mock_make_thumbnail_from_pdf = mocker.patch(
-            "paperless_mail.parsers.make_thumbnail_from_pdf",
+            "paperless.parsers.mail.make_thumbnail_from_pdf",
        )
        mock_make_thumbnail_from_pdf.return_value = mocked_return

        mock_generate_pdf = mocker.patch(
-            "paperless_mail.parsers.MailDocumentParser.generate_pdf",
+            "paperless.parsers.mail.MailDocumentParser.generate_pdf",
        )
        mock_generate_pdf.return_value = "Mocked return value.."

@@ -260,8 +317,7 @@ class TestEmailThumbnailGenerate:
        mock_generate_pdf.assert_called_once()
        mock_make_thumbnail_from_pdf.assert_called_once_with(
            "Mocked return value..",
-            mail_parser.tempdir,
-            None,
+            mail_parser._tempdir,
        )

        assert mocked_return == thumb
@@ -373,7 +429,7 @@ class TestParser:
        """
        # Validate parsing returns the expected results
        mock_generate_pdf = mocker.patch(
-            "paperless_mail.parsers.MailDocumentParser.generate_pdf",
+            "paperless.parsers.mail.MailDocumentParser.generate_pdf",
        )

        mail_parser.parse(simple_txt_email_file, "message/rfc822")
@@ -385,7 +441,7 @@ class TestParser:
            "BCC: fdf@fvf.de\n\n"
            "\n\nThis is just a simple Text Mail."
        )
-        assert text_expected == mail_parser.text
+        assert text_expected == mail_parser.get_text()
        assert (
            datetime.datetime(
                2022,
@@ -396,7 +452,7 @@ class TestParser:
                43,
                tzinfo=datetime.timezone(datetime.timedelta(seconds=7200)),
            )
-            == mail_parser.date
+            == mail_parser.get_date()
        )

        # Just check if tried to generate archive, the unittest for generate_pdf() goes deeper.
@@ -419,7 +475,7 @@ class TestParser:
        """

        mock_generate_pdf = mocker.patch(
-            "paperless_mail.parsers.MailDocumentParser.generate_pdf",
+            "paperless.parsers.mail.MailDocumentParser.generate_pdf",
        )

        # Validate parsing returns the expected results
@@ -443,7 +499,7 @@ class TestParser:
        mail_parser.parse(html_email_file, "message/rfc822")

        mock_generate_pdf.assert_called_once()
-        assert text_expected == mail_parser.text
+        assert text_expected == mail_parser.get_text()
        assert (
            datetime.datetime(
                2022,
@@ -454,7 +510,7 @@ class TestParser:
                19,
                tzinfo=datetime.timezone(datetime.timedelta(seconds=7200)),
            )
-            == mail_parser.date
+            == mail_parser.get_date()
        )

    def test_generate_pdf_parse_error(
@@ -501,7 +557,7 @@ class TestParser:

        mail_parser.parse(simple_txt_email_file, "message/rfc822")

-        assert mail_parser.archive_path is not None
+        assert mail_parser.get_archive_path() is not None

    @pytest.mark.httpx_mock(can_send_already_matched_responses=True)
    def test_generate_pdf_html_email(
@@ -542,7 +598,7 @@ class TestParser:
        )
        mail_parser.parse(html_email_file, "message/rfc822")

-        assert mail_parser.archive_path is not None
+        assert mail_parser.get_archive_path() is not None

    def test_generate_pdf_html_email_html_to_pdf_failure(
        self,
@@ -712,10 +768,10 @@ class TestParser:

        def test_layout_option(layout_option, expected_calls, expected_pdf_names):
            mock_mailrule_get.return_value = mock.Mock(pdf_layout=layout_option)
+            mail_parser.configure(ParserContext(mailrule_id=1))
            mail_parser.parse(
                document_path=html_email_file,
                mime_type="message/rfc822",
-                mailrule_id=1,
            )
            args, _ = mock_merge_route.call_args
            assert len(args[0]) == expected_calls
--- a/src/paperless/tests/parsers/test_mail_parser_live.py
+++ b/src/paperless/tests/parsers/test_mail_parser_live.py
@@ -11,7 +11,7 @@ from PIL import Image
 from pytest_mock import MockerFixture

 from documents.tests.utils import util_call_with_backoff
-from paperless_mail.parsers import MailDocumentParser
+from paperless.parsers.mail import MailDocumentParser


 def extract_text(pdf_path: Path) -> str:
@@ -159,7 +159,7 @@ class TestParserLive:
            - The returned thumbnail image file shall match the expected hash
        """
        mock_generate_pdf = mocker.patch(
-            "paperless_mail.parsers.MailDocumentParser.generate_pdf",
+            "paperless.parsers.mail.MailDocumentParser.generate_pdf",
        )
        mock_generate_pdf.return_value = simple_txt_email_pdf_file

@@ -216,10 +216,10 @@ class TestParserLive:
            - The merged PDF shall contain text from both source PDFs
        """
        mock_generate_pdf_from_html = mocker.patch(
-            "paperless_mail.parsers.MailDocumentParser.generate_pdf_from_html",
+            "paperless.parsers.mail.MailDocumentParser.generate_pdf_from_html",
        )
        mock_generate_pdf_from_mail = mocker.patch(
-            "paperless_mail.parsers.MailDocumentParser.generate_pdf_from_mail",
+            "paperless.parsers.mail.MailDocumentParser.generate_pdf_from_mail",
        )
        mock_generate_pdf_from_mail.return_value = merged_pdf_first
        mock_generate_pdf_from_html.return_value = merged_pdf_second
--- a/src/paperless/tests/parsers/test_remote_parser.py
+++ b/src/paperless/tests/parsers/test_remote_parser.py
@@ -0,0 +1,503 @@
+"""
+Tests for paperless.parsers.remote.RemoteDocumentParser.
+
+All tests use the context-manager protocol for parser lifecycle.
+
+Fixture layout
+--------------
+make_azure_mock  — factory (defined here; specific to this module)
+azure_client     — composes azure_settings + make_azure_mock + patch;
+                   use when a test needs the client to succeed
+failing_azure_client
+                 — composes azure_settings + patch with RuntimeError;
+                   use when a test needs the client to fail
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+from unittest.mock import Mock
+
+import pytest
+
+from paperless.parsers import ParserContext
+from paperless.parsers import ParserProtocol
+from paperless.parsers.remote import RemoteDocumentParser
+
+if TYPE_CHECKING:
+    from collections.abc import Callable
+    from pathlib import Path
+
+    from pytest_django.fixtures import SettingsWrapper
+    from pytest_mock import MockerFixture
+
+
+# ---------------------------------------------------------------------------
+# Module-local fixtures
+# ---------------------------------------------------------------------------
+
+_AZURE_CLIENT_TARGET = "azure.ai.documentintelligence.DocumentIntelligenceClient"
+_DEFAULT_TEXT = "Extracted text."
+
+
+@pytest.fixture()
+def make_azure_mock() -> Callable[[str], Mock]:
+    """Return a factory that builds a mock Azure DocumentIntelligenceClient.
+
+    Usage::
+
+        mock_client = make_azure_mock()            # default extracted text
+        mock_client = make_azure_mock("My text.")  # custom extracted text
+    """
+
+    def _factory(text: str = _DEFAULT_TEXT) -> Mock:
+        mock_client = Mock()
+        mock_poller = Mock()
+        mock_poller.wait.return_value = None
+        mock_poller.details = {"operation_id": "fake-op-id"}
+        mock_poller.result.return_value.content = text
+        mock_client.begin_analyze_document.return_value = mock_poller
+        mock_client.get_analyze_result_pdf.return_value = [b"%PDF-1.4 FAKE"]
+        return mock_client
+
+    return _factory
+
+
+@pytest.fixture()
+def azure_client(
+    azure_settings: SettingsWrapper,
+    make_azure_mock: Callable[[str], Mock],
+    mocker: MockerFixture,
+) -> Mock:
+    """Patch the Azure DI client with a succeeding mock and return the instance.
+
+    Implicitly applies ``azure_settings`` so tests using this fixture do not
+    also need ``@pytest.mark.usefixtures("azure_settings")``.
+    """
+    mock_client = make_azure_mock()
+    mocker.patch(_AZURE_CLIENT_TARGET, return_value=mock_client)
+    return mock_client
+
+
+@pytest.fixture()
+def failing_azure_client(
+    azure_settings: SettingsWrapper,
+    mocker: MockerFixture,
+) -> Mock:
+    """Patch the Azure DI client to raise RuntimeError on every call.
+
+    Implicitly applies ``azure_settings``.  Returns the mock instance so
+    tests can assert on calls such as ``close()``.
+    """
+    mock_client = Mock()
+    mock_client.begin_analyze_document.side_effect = RuntimeError("network failure")
+    mocker.patch(_AZURE_CLIENT_TARGET, return_value=mock_client)
+    return mock_client
+
+
+# ---------------------------------------------------------------------------
+# Protocol contract
+# ---------------------------------------------------------------------------
+
+
+class TestRemoteParserProtocol:
+    """Verify that RemoteDocumentParser satisfies the ParserProtocol contract."""
+
+    def test_isinstance_satisfies_protocol(
+        self,
+        remote_parser: RemoteDocumentParser,
+    ) -> None:
+        assert isinstance(remote_parser, ParserProtocol)
+
+    def test_class_attributes_present(self) -> None:
+        assert isinstance(RemoteDocumentParser.name, str) and RemoteDocumentParser.name
+        assert (
+            isinstance(RemoteDocumentParser.version, str)
+            and RemoteDocumentParser.version
+        )
+        assert (
+            isinstance(RemoteDocumentParser.author, str) and RemoteDocumentParser.author
+        )
+        assert isinstance(RemoteDocumentParser.url, str) and RemoteDocumentParser.url
+
+
+# ---------------------------------------------------------------------------
+# supported_mime_types
+# ---------------------------------------------------------------------------
+
+
+class TestRemoteParserSupportedMimeTypes:
+    """supported_mime_types() always returns the full set regardless of config."""
+
+    def test_returns_dict(self) -> None:
+        mime_types = RemoteDocumentParser.supported_mime_types()
+        assert isinstance(mime_types, dict)
+
+    def test_includes_all_expected_types(self) -> None:
+        mime_types = RemoteDocumentParser.supported_mime_types()
+        expected = {
+            "application/pdf",
+            "image/png",
+            "image/jpeg",
+            "image/tiff",
+            "image/bmp",
+            "image/gif",
+            "image/webp",
+        }
+        assert expected == set(mime_types.keys())
+
+    @pytest.mark.usefixtures("no_engine_settings")
+    def test_returns_full_set_when_not_configured(self) -> None:
+        """
+        GIVEN: No remote engine is configured
+        WHEN:  supported_mime_types() is called
+        THEN:  The full MIME type dict is still returned (score() handles activation)
+        """
+        mime_types = RemoteDocumentParser.supported_mime_types()
+        assert len(mime_types) == 7
+
+
+# ---------------------------------------------------------------------------
+# score()
+# ---------------------------------------------------------------------------
+
+
+class TestRemoteParserScore:
+    """score() encodes the activation logic: None when unconfigured, 20 when active."""
+
+    @pytest.mark.usefixtures("azure_settings")
+    @pytest.mark.parametrize(
+        "mime_type",
+        [
+            pytest.param("application/pdf", id="pdf"),
+            pytest.param("image/png", id="png"),
+            pytest.param("image/jpeg", id="jpeg"),
+            pytest.param("image/tiff", id="tiff"),
+            pytest.param("image/bmp", id="bmp"),
+            pytest.param("image/gif", id="gif"),
+            pytest.param("image/webp", id="webp"),
+        ],
+    )
+    def test_score_returns_20_when_configured(self, mime_type: str) -> None:
+        result = RemoteDocumentParser.score(mime_type, "doc.pdf")
+        assert result == 20
+
+    @pytest.mark.usefixtures("no_engine_settings")
+    @pytest.mark.parametrize(
+        "mime_type",
+        [
+            pytest.param("application/pdf", id="pdf"),
+            pytest.param("image/png", id="png"),
+            pytest.param("image/jpeg", id="jpeg"),
+        ],
+    )
+    def test_score_returns_none_when_no_engine(self, mime_type: str) -> None:
+        result = RemoteDocumentParser.score(mime_type, "doc.pdf")
+        assert result is None
+
+    def test_score_returns_none_when_api_key_missing(
+        self,
+        settings: SettingsWrapper,
+    ) -> None:
+        settings.REMOTE_OCR_ENGINE = "azureai"
+        settings.REMOTE_OCR_API_KEY = None
+        settings.REMOTE_OCR_ENDPOINT = "https://test.cognitiveservices.azure.com"
+        result = RemoteDocumentParser.score("application/pdf", "doc.pdf")
+        assert result is None
+
+    def test_score_returns_none_when_endpoint_missing(
+        self,
+        settings: SettingsWrapper,
+    ) -> None:
+        settings.REMOTE_OCR_ENGINE = "azureai"
+        settings.REMOTE_OCR_API_KEY = "key"
+        settings.REMOTE_OCR_ENDPOINT = None
+        result = RemoteDocumentParser.score("application/pdf", "doc.pdf")
+        assert result is None
+
+    @pytest.mark.usefixtures("azure_settings")
+    def test_score_returns_none_for_unsupported_mime_type(self) -> None:
+        result = RemoteDocumentParser.score("text/plain", "doc.txt")
+        assert result is None
+
+    @pytest.mark.usefixtures("azure_settings")
+    def test_score_higher_than_tesseract_default(self) -> None:
+        """Remote parser (20) outranks the tesseract default (10) when configured."""
+        score = RemoteDocumentParser.score("application/pdf", "doc.pdf")
+        assert score is not None and score > 10
+
+
+# ---------------------------------------------------------------------------
+# Properties
+# ---------------------------------------------------------------------------
+
+
+class TestRemoteParserProperties:
+    def test_can_produce_archive_is_true(
+        self,
+        remote_parser: RemoteDocumentParser,
+    ) -> None:
+        assert remote_parser.can_produce_archive is True
+
+    def test_requires_pdf_rendition_is_false(
+        self,
+        remote_parser: RemoteDocumentParser,
+    ) -> None:
+        assert remote_parser.requires_pdf_rendition is False
+
+
+# ---------------------------------------------------------------------------
+# Lifecycle
+# ---------------------------------------------------------------------------
+
+
+class TestRemoteParserLifecycle:
+    def test_context_manager_cleans_up_tempdir(self) -> None:
+        with RemoteDocumentParser() as parser:
+            tempdir = parser._tempdir
+            assert tempdir.exists()
+        assert not tempdir.exists()
+
+    def test_context_manager_cleans_up_after_exception(self) -> None:
+        tempdir: Path | None = None
+        with pytest.raises(RuntimeError):
+            with RemoteDocumentParser() as parser:
+                tempdir = parser._tempdir
+                raise RuntimeError("boom")
+        assert tempdir is not None
+        assert not tempdir.exists()
+
+
+# ---------------------------------------------------------------------------
+# parse() — happy path
+# ---------------------------------------------------------------------------
+
+
+class TestRemoteParserParse:
+    def test_parse_returns_text_from_azure(
+        self,
+        remote_parser: RemoteDocumentParser,
+        simple_digital_pdf_file: Path,
+        azure_client: Mock,
+    ) -> None:
+        remote_parser.parse(simple_digital_pdf_file, "application/pdf")
+
+        assert remote_parser.get_text() == _DEFAULT_TEXT
+
+    def test_parse_sets_archive_path(
+        self,
+        remote_parser: RemoteDocumentParser,
+        simple_digital_pdf_file: Path,
+        azure_client: Mock,
+    ) -> None:
+        remote_parser.parse(simple_digital_pdf_file, "application/pdf")
+
+        archive = remote_parser.get_archive_path()
+        assert archive is not None
+        assert archive.exists()
+        assert archive.suffix == ".pdf"
+
+    def test_parse_closes_client_on_success(
+        self,
+        remote_parser: RemoteDocumentParser,
+        simple_digital_pdf_file: Path,
+        azure_client: Mock,
+    ) -> None:
+        remote_parser.configure(ParserContext())
+        remote_parser.parse(simple_digital_pdf_file, "application/pdf")
+
+        azure_client.close.assert_called_once()
+
+    @pytest.mark.usefixtures("no_engine_settings")
+    def test_parse_sets_empty_text_when_not_configured(
+        self,
+        remote_parser: RemoteDocumentParser,
+        simple_digital_pdf_file: Path,
+    ) -> None:
+        remote_parser.parse(simple_digital_pdf_file, "application/pdf")
+
+        assert remote_parser.get_text() == ""
+        assert remote_parser.get_archive_path() is None
+
+    def test_get_text_none_before_parse(
+        self,
+        remote_parser: RemoteDocumentParser,
+    ) -> None:
+        assert remote_parser.get_text() is None
+
+    def test_get_date_always_none(
+        self,
+        remote_parser: RemoteDocumentParser,
+        simple_digital_pdf_file: Path,
+        azure_client: Mock,
+    ) -> None:
+        remote_parser.parse(simple_digital_pdf_file, "application/pdf")
+
+        assert remote_parser.get_date() is None
+
+
+# ---------------------------------------------------------------------------
+# parse() — Azure failure path
+# ---------------------------------------------------------------------------
+
+
+class TestRemoteParserParseError:
+    def test_parse_returns_none_on_azure_error(
+        self,
+        remote_parser: RemoteDocumentParser,
+        simple_digital_pdf_file: Path,
+        failing_azure_client: Mock,
+    ) -> None:
+        remote_parser.parse(simple_digital_pdf_file, "application/pdf")
+
+        assert remote_parser.get_text() is None
+
+    def test_parse_closes_client_on_error(
+        self,
+        remote_parser: RemoteDocumentParser,
+        simple_digital_pdf_file: Path,
+        failing_azure_client: Mock,
+    ) -> None:
+        remote_parser.parse(simple_digital_pdf_file, "application/pdf")
+
+        failing_azure_client.close.assert_called_once()
+
+    def test_parse_logs_error_on_azure_failure(
+        self,
+        remote_parser: RemoteDocumentParser,
+        simple_digital_pdf_file: Path,
+        failing_azure_client: Mock,
+        mocker: MockerFixture,
+    ) -> None:
+        mock_log = mocker.patch("paperless.parsers.remote.logger")
+
+        remote_parser.parse(simple_digital_pdf_file, "application/pdf")
+
+        mock_log.error.assert_called_once()
+        assert "Azure AI Vision parsing failed" in mock_log.error.call_args[0][0]
+
+
+# ---------------------------------------------------------------------------
+# get_page_count()
+# ---------------------------------------------------------------------------
+
+
+class TestRemoteParserPageCount:
+    def test_page_count_for_pdf(
+        self,
+        remote_parser: RemoteDocumentParser,
+        simple_digital_pdf_file: Path,
+    ) -> None:
+        count = remote_parser.get_page_count(simple_digital_pdf_file, "application/pdf")
+        assert isinstance(count, int)
+        assert count >= 1
+
+    def test_page_count_returns_none_for_image_mime(
+        self,
+        remote_parser: RemoteDocumentParser,
+        simple_digital_pdf_file: Path,
+    ) -> None:
+        count = remote_parser.get_page_count(simple_digital_pdf_file, "image/png")
+        assert count is None
+
+    def test_page_count_returns_none_for_invalid_pdf(
+        self,
+        remote_parser: RemoteDocumentParser,
+        tmp_path: Path,
+    ) -> None:
+        bad_pdf = tmp_path / "bad.pdf"
+        bad_pdf.write_bytes(b"not a pdf at all")
+        count = remote_parser.get_page_count(bad_pdf, "application/pdf")
+        assert count is None
+
+
+# ---------------------------------------------------------------------------
+# extract_metadata()
+# ---------------------------------------------------------------------------
+
+
+class TestRemoteParserMetadata:
+    def test_extract_metadata_non_pdf_returns_empty(
+        self,
+        remote_parser: RemoteDocumentParser,
+        simple_digital_pdf_file: Path,
+    ) -> None:
+        result = remote_parser.extract_metadata(simple_digital_pdf_file, "image/png")
+        assert result == []
+
+    def test_extract_metadata_pdf_returns_list(
+        self,
+        remote_parser: RemoteDocumentParser,
+        simple_digital_pdf_file: Path,
+    ) -> None:
+        result = remote_parser.extract_metadata(
+            simple_digital_pdf_file,
+            "application/pdf",
+        )
+        assert isinstance(result, list)
+
+    def test_extract_metadata_pdf_entries_have_required_keys(
+        self,
+        remote_parser: RemoteDocumentParser,
+        simple_digital_pdf_file: Path,
+    ) -> None:
+        result = remote_parser.extract_metadata(
+            simple_digital_pdf_file,
+            "application/pdf",
+        )
+        for entry in result:
+            assert "namespace" in entry
+            assert "prefix" in entry
+            assert "key" in entry
+            assert "value" in entry
+            assert isinstance(entry["value"], str)
+
+    def test_extract_metadata_does_not_raise_on_invalid_pdf(
+        self,
+        remote_parser: RemoteDocumentParser,
+        tmp_path: Path,
+    ) -> None:
+        bad_pdf = tmp_path / "bad.pdf"
+        bad_pdf.write_bytes(b"not a pdf at all")
+        result = remote_parser.extract_metadata(bad_pdf, "application/pdf")
+        assert result == []
+
+
+# ---------------------------------------------------------------------------
+# Registry integration
+# ---------------------------------------------------------------------------
+
+
+class TestRemoteParserRegistry:
+    def test_registered_in_defaults(self) -> None:
+        from paperless.parsers.registry import ParserRegistry
+
+        registry = ParserRegistry()
+        registry.register_defaults()
+
+        assert RemoteDocumentParser in registry._builtins
+
+    @pytest.mark.usefixtures("azure_settings")
+    def test_get_parser_returns_remote_when_configured(self) -> None:
+        from paperless.parsers.registry import get_parser_registry
+
+        registry = get_parser_registry()
+        parser_cls = registry.get_parser_for_file("application/pdf", "doc.pdf")
+
+        assert parser_cls is RemoteDocumentParser
+
+    @pytest.mark.usefixtures("no_engine_settings")
+    def test_get_parser_returns_none_for_unsupported_type_when_not_configured(
+        self,
+    ) -> None:
+        """With remote off and a truly unsupported MIME type, registry returns None."""
+        from paperless.parsers.registry import ParserRegistry
+
+        registry = ParserRegistry()
+        registry.register_defaults()
+        parser_cls = registry.get_parser_for_file(
+            "application/x-unknown-format",
+            "doc.xyz",
+        )
+
+        assert parser_cls is None
--- a/src/paperless/tests/parsers/test_tesseract_custom_settings.py
+++ b/src/paperless/tests/parsers/test_tesseract_custom_settings.py
@@ -10,7 +10,7 @@ from paperless.models import CleanChoices
 from paperless.models import ColorConvertChoices
 from paperless.models import ModeChoices
 from paperless.models import OutputTypeChoices
-from paperless_tesseract.parsers import RasterisedDocumentParser
+from paperless.parsers.tesseract import RasterisedDocumentParser


 class TestParserSettingsFromDb(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
--- a/src/paperless/tests/parsers/test_tesseract_parser.py
+++ b/src/paperless/tests/parsers/test_tesseract_parser.py
--- a/src/paperless/tests/parsers/test_text_parser.py
+++ b/src/paperless/tests/parsers/test_text_parser.py
@@ -12,6 +12,7 @@ from pathlib import Path

 import pytest

+from paperless.parsers import ParserContext
 from paperless.parsers import ParserProtocol
 from paperless.parsers.text import TextDocumentParser

@@ -93,6 +94,7 @@ class TestTextParserParse:
        text_parser: TextDocumentParser,
        sample_txt_file: Path,
    ) -> None:
+        text_parser.configure(ParserContext())
        text_parser.parse(sample_txt_file, "text/plain")

        assert text_parser.get_text() == "This is a test file.\n"
@@ -102,6 +104,7 @@ class TestTextParserParse:
        text_parser: TextDocumentParser,
        sample_txt_file: Path,
    ) -> None:
+        text_parser.configure(ParserContext())
        text_parser.parse(sample_txt_file, "text/plain")

        assert text_parser.get_archive_path() is None
@@ -111,6 +114,7 @@ class TestTextParserParse:
        text_parser: TextDocumentParser,
        sample_txt_file: Path,
    ) -> None:
+        text_parser.configure(ParserContext())
        text_parser.parse(sample_txt_file, "text/plain")

        assert text_parser.get_date() is None
@@ -129,6 +133,7 @@ class TestTextParserParse:
            - Parsing succeeds
            - Invalid bytes are replaced with the Unicode replacement character
        """
+        text_parser.configure(ParserContext())
        text_parser.parse(malformed_txt_file, "text/plain")

        assert text_parser.get_text() == "Pantothens\ufffdure\n"
@@ -251,6 +256,9 @@ class TestTextParserRegistry:
        from paperless.parsers.registry import get_parser_registry

        registry = get_parser_registry()
-        parser_cls = registry.get_parser_for_file("application/pdf", "doc.pdf")
+        parser_cls = registry.get_parser_for_file(
+            "application/x-unknown-format",
+            "doc.xyz",
+        )

        assert parser_cls is None
--- a/src/paperless/tests/parsers/test_tika_liva.py
+++ b/src/paperless/tests/parsers/test_tika_liva.py
@@ -4,7 +4,7 @@ from pathlib import Path
 import pytest

 from documents.tests.utils import util_call_with_backoff
-from paperless_tika.parsers import TikaDocumentParser
+from paperless.parsers.tika import TikaDocumentParser


@pytest.mark.skipif(
@@ -42,14 +42,15 @@ class TestTikaParserAgainstServer:
        )

        assert (
-            tika_parser.text
+            tika_parser.get_text()
            == "This is an ODT test document, created September 14, 2022"
        )
-        assert tika_parser.archive_path is not None
-        assert b"PDF-" in tika_parser.archive_path.read_bytes()[:10]
+        archive = tika_parser.get_archive_path()
+        assert archive is not None
+        assert b"PDF-" in archive.read_bytes()[:10]

        # TODO: Unsure what can set the Creation-Date field in a document, enable when possible
-        # self.assertEqual(tika_parser.date, datetime.datetime(2022, 9, 14))
+        # self.assertEqual(tika_parser.get_date(), datetime.datetime(2022, 9, 14))

    def test_basic_parse_docx(
        self,
@@ -74,14 +75,15 @@ class TestTikaParserAgainstServer:
        )

        assert (
-            tika_parser.text
+            tika_parser.get_text()
            == "This is an DOCX test document, also made September 14, 2022"
        )
-        assert tika_parser.archive_path is not None
-        with Path(tika_parser.archive_path).open("rb") as f:
+        archive = tika_parser.get_archive_path()
+        assert archive is not None
+        with archive.open("rb") as f:
            assert b"PDF-" in f.read()[:10]

-        # self.assertEqual(tika_parser.date, datetime.datetime(2022, 9, 14))
+        # self.assertEqual(tika_parser.get_date(), datetime.datetime(2022, 9, 14))

    def test_basic_parse_doc(
        self,
@@ -102,13 +104,12 @@ class TestTikaParserAgainstServer:
            [sample_doc_file, "application/msword"],
        )

-        assert tika_parser.text is not None
-        assert (
-            "This is a test document, saved in the older .doc format"
-            in tika_parser.text
-        )
-        assert tika_parser.archive_path is not None
-        with Path(tika_parser.archive_path).open("rb") as f:
+        text = tika_parser.get_text()
+        assert text is not None
+        assert "This is a test document, saved in the older .doc format" in text
+        archive = tika_parser.get_archive_path()
+        assert archive is not None
+        with archive.open("rb") as f:
            assert b"PDF-" in f.read()[:10]

    def test_tika_fails_multi_part(
@@ -133,6 +134,7 @@ class TestTikaParserAgainstServer:
            [sample_broken_odt, "application/vnd.oasis.opendocument.text"],
        )

-        assert tika_parser.archive_path is not None
-        with Path(tika_parser.archive_path).open("rb") as f:
+        archive = tika_parser.get_archive_path()
+        assert archive is not None
+        with archive.open("rb") as f:
            assert b"PDF-" in f.read()[:10]
--- a/src/paperless/tests/parsers/test_tika_parser.py
+++ b/src/paperless/tests/parsers/test_tika_parser.py
@@ -9,7 +9,80 @@ from pytest_django.fixtures import SettingsWrapper
 from pytest_httpx import HTTPXMock

 from documents.parsers import ParseError
-from paperless_tika.parsers import TikaDocumentParser
+from paperless.parsers import ParserContext
+from paperless.parsers import ParserProtocol
+from paperless.parsers.tika import TikaDocumentParser
+
+
+class TestTikaParserRegistryInterface:
+    """Verify that TikaDocumentParser satisfies the ParserProtocol contract."""
+
+    def test_satisfies_parser_protocol(self) -> None:
+        assert isinstance(TikaDocumentParser(), ParserProtocol)
+
+    def test_supported_mime_types_is_classmethod(self) -> None:
+        mime_types = TikaDocumentParser.supported_mime_types()
+        assert isinstance(mime_types, dict)
+        assert len(mime_types) > 0
+
+    def test_score_returns_none_when_tika_disabled(
+        self,
+        settings: SettingsWrapper,
+    ) -> None:
+        settings.TIKA_ENABLED = False
+        result = TikaDocumentParser.score(
+            "application/vnd.oasis.opendocument.text",
+            "sample.odt",
+        )
+        assert result is None
+
+    def test_score_returns_int_when_tika_enabled(
+        self,
+        settings: SettingsWrapper,
+    ) -> None:
+        settings.TIKA_ENABLED = True
+        result = TikaDocumentParser.score(
+            "application/vnd.oasis.opendocument.text",
+            "sample.odt",
+        )
+        assert isinstance(result, int)
+
+    def test_score_returns_none_for_unsupported_mime(
+        self,
+        settings: SettingsWrapper,
+    ) -> None:
+        settings.TIKA_ENABLED = True
+        result = TikaDocumentParser.score("application/pdf", "doc.pdf")
+        assert result is None
+
+    def test_can_produce_archive_is_false(self) -> None:
+        assert TikaDocumentParser().can_produce_archive is False
+
+    def test_requires_pdf_rendition_is_true(self) -> None:
+        assert TikaDocumentParser().requires_pdf_rendition is True
+
+    def test_get_page_count_returns_none_without_archive(
+        self,
+        tika_parser: TikaDocumentParser,
+        sample_odt_file: Path,
+    ) -> None:
+        assert (
+            tika_parser.get_page_count(
+                sample_odt_file,
+                "application/vnd.oasis.opendocument.text",
+            )
+            is None
+        )
+
+    def test_get_page_count_returns_int_with_pdf_archive(
+        self,
+        tika_parser: TikaDocumentParser,
+        simple_digital_pdf_file: Path,
+    ) -> None:
+        tika_parser._archive_path = simple_digital_pdf_file
+        count = tika_parser.get_page_count(simple_digital_pdf_file, "application/pdf")
+        assert isinstance(count, int)
+        assert count > 0


@pytest.mark.django_db()
@@ -34,14 +107,15 @@ class TestTikaParser:
        # Pretend convert to PDF response
        httpx_mock.add_response(content=b"PDF document")

+        tika_parser.configure(ParserContext())
        tika_parser.parse(sample_odt_file, "application/vnd.oasis.opendocument.text")

-        assert tika_parser.text == "the content"
-        assert tika_parser.archive_path is not None
-        with Path(tika_parser.archive_path).open("rb") as f:
+        assert tika_parser.get_text() == "the content"
+        assert tika_parser.get_archive_path() is not None
+        with Path(tika_parser.get_archive_path()).open("rb") as f:
            assert f.read() == b"PDF document"

-        assert tika_parser.date == datetime.datetime(
+        assert tika_parser.get_date() == datetime.datetime(
            2020,
            11,
            21,
@@ -89,7 +163,7 @@ class TestTikaParser:
        httpx_mock.add_response(status_code=HTTPStatus.INTERNAL_SERVER_ERROR)

        with pytest.raises(ParseError):
-            tika_parser.convert_to_pdf(sample_odt_file, None)
+            tika_parser._convert_to_pdf(sample_odt_file)

    @pytest.mark.parametrize(
        ("setting_value", "expected_form_value"),
@@ -106,7 +180,6 @@ class TestTikaParser:
        expected_form_value: str,
        httpx_mock: HTTPXMock,
        settings: SettingsWrapper,
-        tika_parser: TikaDocumentParser,
        sample_odt_file: Path,
    ) -> None:
        """
@@ -117,6 +190,8 @@ class TestTikaParser:
        THEN:
            - Request to Gotenberg contains the expected PDF/A format string
        """
+        # Parser must be created after the setting is changed so that
+        # OutputTypeConfig reads the correct value at __init__ time.
        settings.OCR_OUTPUT_TYPE = setting_value
        httpx_mock.add_response(
            status_code=codes.OK,
@@ -124,7 +199,8 @@ class TestTikaParser:
            method="POST",
        )

-        tika_parser.convert_to_pdf(sample_odt_file, None)
+        with TikaDocumentParser() as parser:
+            parser._convert_to_pdf(sample_odt_file)

        request = httpx_mock.get_request()

--- a/src/paperless/tests/samples/mail/broken.eml
+++ b/src/paperless/tests/samples/mail/broken.eml
--- a/src/paperless/tests/samples/mail/first.pdf
+++ b/src/paperless/tests/samples/mail/first.pdf
--- a/src/paperless/tests/samples/mail/html.eml
+++ b/src/paperless/tests/samples/mail/html.eml
--- a/src/paperless/tests/samples/mail/html.eml.html
+++ b/src/paperless/tests/samples/mail/html.eml.html
--- a/src/paperless/tests/samples/mail/html.eml.pdf
+++ b/src/paperless/tests/samples/mail/html.eml.pdf
--- a/src/paperless/tests/samples/mail/html.eml.pdf.webp
+++ b/src/paperless/tests/samples/mail/html.eml.pdf.webp
--- a/src/paperless/tests/samples/mail/sample.html
+++ b/src/paperless/tests/samples/mail/sample.html
--- a/src/paperless/tests/samples/mail/sample.html.pdf
+++ b/src/paperless/tests/samples/mail/sample.html.pdf
--- a/src/paperless/tests/samples/mail/sample.html.pdf.webp
+++ b/src/paperless/tests/samples/mail/sample.html.pdf.webp
--- a/src/paperless/tests/samples/mail/sample.png
+++ b/src/paperless/tests/samples/mail/sample.png
--- a/src/paperless/tests/samples/mail/second.pdf
+++ b/src/paperless/tests/samples/mail/second.pdf
--- a/src/paperless/tests/samples/mail/simple_text.eml
+++ b/src/paperless/tests/samples/mail/simple_text.eml
--- a/src/paperless/tests/samples/mail/simple_text.eml.pdf
+++ b/src/paperless/tests/samples/mail/simple_text.eml.pdf
--- a/src/paperless/tests/samples/mail/simple_text.eml.pdf.webp
+++ b/src/paperless/tests/samples/mail/simple_text.eml.pdf.webp
--- a/src/paperless/tests/samples/tesseract/document.webp
+++ b/src/paperless/tests/samples/tesseract/document.webp
--- a/Show More
+++ b/Show More