Chore(deps): Bump cbor2 in the uv group across 1 directory (#12424 )

Bumps the uv group with 1 update in the / directory: [cbor2](https://github.com/agronholm/cbor2). Updates `cbor2` from 5.8.0 to 5.9.0 - [Release notes](https://github.com/agronholm/cbor2/releases) - [Commits](https://github.com/agronholm/cbor2/compare/5.8.0...5.9.0) --- updated-dependencies: - dependency-name: cbor2 dependency-version: 5.9.0 dependency-type: indirect dependency-group: uv ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Auto translate strings
2026-03-25 18:32:45 +00:00 · 2026-03-24 09:26:24 -07:00 · 2026-03-22 13:55:02 +00:00 · 2026-03-22 06:53:32 -07:00 · 2026-03-21 09:26:23 +00:00 · 2026-03-21 02:12:19 -07:00
171 changed files with 8567 additions and 4498 deletions
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -157,6 +157,9 @@ updates:
      postgres:
        patterns:
          - "docker.io/library/postgres*"
      greenmail:
        patterns:
          - "docker.io/greenmail*"
  - package-ecosystem: "pre-commit" # See documentation for possible values
    directory: "/" # Location of package manifests
    schedule:
--- a/.github/workflows/ci-docker.yml
+++ b/.github/workflows/ci-docker.yml
@@ -119,7 +119,7 @@ jobs:
          sudo rm -rf "$AGENT_TOOLSDIRECTORY"
      - name: Docker metadata
        id: docker-meta
-        uses: docker/metadata-action@v5.10.0
+        uses: docker/metadata-action@v6.0.0
        with:
          images: |
            ${{ env.REGISTRY }}/${{ steps.repo.outputs.name }}
@@ -130,7 +130,7 @@ jobs:
            type=semver,pattern={{major}}.{{minor}}
      - name: Build and push by digest
        id: build
-        uses: docker/build-push-action@v6.19.2
+        uses: docker/build-push-action@v7.0.0
        with:
          context: .
          file: ./Dockerfile
@@ -201,7 +201,7 @@ jobs:
          password: ${{ secrets.QUAY_ROBOT_TOKEN }}
      - name: Docker metadata
        id: docker-meta
-        uses: docker/metadata-action@v5.10.0
+        uses: docker/metadata-action@v6.0.0
        with:
          images: |
            ${{ env.REGISTRY }}/${{ needs.build-arch.outputs.repository }}
--- a/.mypy-baseline.txt
+++ b/.mypy-baseline.txt
@@ -2437,17 +2437,3 @@ src/paperless_tesseract/tests/test_parser_custom_settings.py:0: error: Item "Non
 src/paperless_tesseract/tests/test_parser_custom_settings.py:0: error: Item "None" of "ApplicationConfiguration | None" has no attribute "unpaper_clean"  [union-attr]
 src/paperless_tesseract/tests/test_parser_custom_settings.py:0: error: Item "None" of "ApplicationConfiguration | None" has no attribute "unpaper_clean"  [union-attr]
 src/paperless_tesseract/tests/test_parser_custom_settings.py:0: error: Item "None" of "ApplicationConfiguration | None" has no attribute "user_args"  [union-attr]
 src/paperless_text/parsers.py:0: error: Function is missing a type annotation for one or more arguments  [no-untyped-def]
 src/paperless_text/parsers.py:0: error: Function is missing a type annotation for one or more arguments  [no-untyped-def]
 src/paperless_text/parsers.py:0: error: Incompatible types in assignment (expression has type "str", variable has type "None")  [assignment]
 src/paperless_text/signals.py:0: error: Function is missing a type annotation  [no-untyped-def]
 src/paperless_text/signals.py:0: error: Function is missing a type annotation  [no-untyped-def]
 src/paperless_tika/parsers.py:0: error: Argument 1 to "make_thumbnail_from_pdf" has incompatible type "None"; expected "Path"  [arg-type]
 src/paperless_tika/parsers.py:0: error: Function is missing a return type annotation  [no-untyped-def]
 src/paperless_tika/parsers.py:0: error: Function is missing a type annotation  [no-untyped-def]
 src/paperless_tika/parsers.py:0: error: Function is missing a type annotation  [no-untyped-def]
 src/paperless_tika/parsers.py:0: error: Function is missing a type annotation for one or more arguments  [no-untyped-def]
 src/paperless_tika/parsers.py:0: error: Function is missing a type annotation for one or more arguments  [no-untyped-def]
 src/paperless_tika/parsers.py:0: error: Incompatible types in assignment (expression has type "str | None", variable has type "None")  [assignment]
 src/paperless_tika/signals.py:0: error: Function is missing a type annotation  [no-untyped-def]
 src/paperless_tika/signals.py:0: error: Function is missing a type annotation  [no-untyped-def]
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -50,7 +50,7 @@ repos:
          - 'prettier-plugin-organize-imports@4.3.0'
  # Python hooks
  - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.15.5
+    rev: v0.15.6
    hooks:
      - id: ruff-check
      - id: ruff-format
--- a/docker/compose/docker-compose.ci-test.yml
+++ b/docker/compose/docker-compose.ci-test.yml
@@ -18,13 +18,13 @@ services:
      - "--log-level=warn"
      - "--log-format=text"
  tika:
-    image: docker.io/apache/tika:latest
+    image: docker.io/apache/tika:3.2.3.0
    hostname: tika
    container_name: tika
    network_mode: host
    restart: unless-stopped
  greenmail:
-    image: greenmail/standalone:2.1.8
+    image: docker.io/greenmail/standalone:2.1.8
    hostname: greenmail
    container_name: greenmail
    environment:
--- a/docker/rootfs/etc/s6-overlay/s6-rc.d/init-modify-user/run
+++ b/docker/rootfs/etc/s6-overlay/s6-rc.d/init-modify-user/run
@@ -2,6 +2,17 @@
 # shellcheck shell=bash
 declare -r log_prefix="[init-user]"
 # When the container is started as a non-root user (e.g. via `user: 999:999`
 # in Docker Compose), usermod/groupmod require root and are meaningless.
 # USERMAP_* variables only apply to the root-started path.
 if [[ -n "${USER_IS_NON_ROOT}" ]]; then
 	if [[ -n "${USERMAP_UID}" || -n "${USERMAP_GID}" ]]; then
 		echo "${log_prefix} WARNING: USERMAP_UID/USERMAP_GID are set but have no effect when the container is started as a non-root user"
 	fi
 	echo "${log_prefix} Running as non-root user ($(id --user):$(id --group)), skipping UID/GID remapping"
 	exit 0
 fi
 declare -r usermap_original_uid=$(id -u paperless)
 declare -r usermap_original_gid=$(id -g paperless)
 declare -r usermap_new_uid=${USERMAP_UID:-$usermap_original_uid}
--- a/docs/changelog.md
+++ b/docs/changelog.md
@@ -1,5 +1,56 @@
 # Changelog
 ## paperless-ngx 2.20.12
 ### Security
 - Resolve [GHSA-96jx-fj7m-qh6x](https://github.com/paperless-ngx/paperless-ngx/security/advisories/GHSA-96jx-fj7m-qh6x)
 ### Bug Fixes
 - Fix: Scope the workflow saves to prevent clobbering filename/archive_filename [@stumpylog](https://github.com/stumpylog) ([#12390](https://github.com/paperless-ngx/paperless-ngx/pull/12390))
 - Fix: don't try to usermod/groupmod when non-root + update docs (#<!---->12365) [@stumpylog](https://github.com/stumpylog) ([#12391](https://github.com/paperless-ngx/paperless-ngx/pull/12391))
 - Fix: avoid moving files if already moved [@shamoon](https://github.com/shamoon) ([#12389](https://github.com/paperless-ngx/paperless-ngx/pull/12389))
 - Fix: remove pagination from document notes api spec [@shamoon](https://github.com/shamoon) ([#12388](https://github.com/paperless-ngx/paperless-ngx/pull/12388))
 - Fix: fix file button hover color in dark mode [@shamoon](https://github.com/shamoon) ([#12367](https://github.com/paperless-ngx/paperless-ngx/pull/12367))
 - Fixhancement: only offer basic auth for appropriate requests [@shamoon](https://github.com/shamoon) ([#12362](https://github.com/paperless-ngx/paperless-ngx/pull/12362))
 ### All App Changes
 <details>
 <summary>5 changes</summary>
 - Fix: Scope the workflow saves to prevent clobbering filename/archive_filename [@stumpylog](https://github.com/stumpylog) ([#12390](https://github.com/paperless-ngx/paperless-ngx/pull/12390))
 - Fix: avoid moving files if already moved [@shamoon](https://github.com/shamoon) ([#12389](https://github.com/paperless-ngx/paperless-ngx/pull/12389))
 - Fix: remove pagination from document notes api spec [@shamoon](https://github.com/shamoon) ([#12388](https://github.com/paperless-ngx/paperless-ngx/pull/12388))
 - Fix: fix file button hover color in dark mode [@shamoon](https://github.com/shamoon) ([#12367](https://github.com/paperless-ngx/paperless-ngx/pull/12367))
 - Fixhancement: only offer basic auth for appropriate requests [@shamoon](https://github.com/shamoon) ([#12362](https://github.com/paperless-ngx/paperless-ngx/pull/12362))
 </details>
 ## paperless-ngx 2.20.11
 ### Security
 - Resolve [GHSA-59xh-5vwx-4c4q](https://github.com/paperless-ngx/paperless-ngx/security/advisories/GHSA-59xh-5vwx-4c4q)
 ### Bug Fixes
 - Fix: correct dropdown list active color in dark mode [@shamoon](https://github.com/shamoon) ([#12328](https://github.com/paperless-ngx/paperless-ngx/pull/12328))
 - Fixhancement: clear descendant selections in dropdown when parent toggled [@shamoon](https://github.com/shamoon) ([#12326](https://github.com/paperless-ngx/paperless-ngx/pull/12326))
 - Fix: prevent wrapping with larger amounts of tags on small cards, reset moreTags setting to correct count [@shamoon](https://github.com/shamoon) ([#12302](https://github.com/paperless-ngx/paperless-ngx/pull/12302))
 - Fix: prevent stale db filename during workflow actions [@shamoon](https://github.com/shamoon) ([#12289](https://github.com/paperless-ngx/paperless-ngx/pull/12289))
 ### All App Changes
 <details>
 <summary>4 changes</summary>
 - Fix: correct dropdown list active color in dark mode [@shamoon](https://github.com/shamoon) ([#12328](https://github.com/paperless-ngx/paperless-ngx/pull/12328))
 - Fixhancement: clear descendant selections in dropdown when parent toggled [@shamoon](https://github.com/shamoon) ([#12326](https://github.com/paperless-ngx/paperless-ngx/pull/12326))
 - Fix: prevent wrapping with larger amounts of tags on small cards, reset moreTags setting to correct count [@shamoon](https://github.com/shamoon) ([#12302](https://github.com/paperless-ngx/paperless-ngx/pull/12302))
 - Fix: prevent stale db filename during workflow actions [@shamoon](https://github.com/shamoon) ([#12289](https://github.com/paperless-ngx/paperless-ngx/pull/12289))
 </details>
 ## paperless-ngx 2.20.10
 ### Bug Fixes
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -674,6 +674,9 @@ See the corresponding [django-allauth documentation](https://docs.allauth.org/en
 for a list of provider configurations. You will also need to include the relevant Django 'application' inside the
 [PAPERLESS_APPS](#PAPERLESS_APPS) setting to activate that specific authentication provider (e.g. `allauth.socialaccount.providers.openid_connect` for the [OIDC Connect provider](https://docs.allauth.org/en/latest/socialaccount/providers/openid_connect.html)).
 : For OpenID Connect providers, set `settings.token_auth_method` if your identity provider
 requires a specific token endpoint authentication method.
    Defaults to None, which does not enable any third party authentication systems.
 #### [`PAPERLESS_SOCIAL_AUTO_SIGNUP=<bool>`](#PAPERLESS_SOCIAL_AUTO_SIGNUP) {#PAPERLESS_SOCIAL_AUTO_SIGNUP}
@@ -1947,6 +1950,12 @@ current backend. If not supplied, defaults to "gpt-3.5-turbo" for OpenAI and "ll
    Defaults to None.
 #### [`PAPERLESS_AI_LLM_ALLOW_INTERNAL_ENDPOINTS=<bool>`](#PAPERLESS_AI_LLM_ALLOW_INTERNAL_ENDPOINTS) {#PAPERLESS_AI_LLM_ALLOW_INTERNAL_ENDPOINTS}
 : If set to false, Paperless blocks AI endpoint URLs that resolve to non-public addresses (e.g., localhost, etc).
    Defaults to true, which allows internal endpoints.
 #### [`PAPERLESS_AI_LLM_INDEX_TASK_CRON=<cron expression>`](#PAPERLESS_AI_LLM_INDEX_TASK_CRON) {#PAPERLESS_AI_LLM_INDEX_TASK_CRON}
 : Configures the schedule to update the AI embeddings of text content and metadata for all documents. Only performed if
--- a/docs/migration-v3.md
+++ b/docs/migration-v3.md
@@ -103,3 +103,30 @@ Multiple options are combined in a single value:
 ```bash
 PAPERLESS_DB_OPTIONS="sslmode=require;sslrootcert=/certs/ca.pem;pool.max_size=10"
 ```
 ## OpenID Connect Token Endpoint Authentication
 Some existing OpenID Connect setups may require an explicit token endpoint authentication method after upgrading to v3.
 #### Action Required
 If OIDC login fails at the callback with an `invalid_client` error, add `token_auth_method` to the provider `settings` in
 [`PAPERLESS_SOCIALACCOUNT_PROVIDERS`](configuration.md#PAPERLESS_SOCIALACCOUNT_PROVIDERS).
 For example:
 ```json
 {
  "openid_connect": {
    "APPS": [
      {
        ...
        "settings": {
          "server_url": "https://login.example.com",
          "token_auth_method": "client_secret_basic"
        }
      }
    ]
  }
 }
 ```
--- a/docs/setup.md
+++ b/docs/setup.md
@@ -140,24 +140,17 @@ a [superuser](usage.md#superusers) account.
 !!! warning
-    It is currently not possible to run the container rootless if additional languages are specified via `PAPERLESS_OCR_LANGUAGES`.
+    It is not possible to run the container rootless if additional languages are specified via `PAPERLESS_OCR_LANGUAGES`.
-If you want to run Paperless as a rootless container, make this
+If you want to run Paperless as a rootless container, set `user:` in `docker-compose.yml` to the UID and GID of your host user (use `id -u` and `id -g` to find these values). The container process starts directly as that user with no internal privilege remapping:
 change in `docker-compose.yml`:
- Set the `user` running the container to map to the `paperless`
+```yaml
-  user in the container. This value (`user_id` below) should be
+webserver:
-  the same ID that `USERMAP_UID` and `USERMAP_GID` are set to in
+  image: ghcr.io/paperless-ngx/paperless-ngx:latest
-  `docker-compose.env`. See `USERMAP_UID` and `USERMAP_GID`
+  user: '1000:1000'
-  [here](configuration.md#docker).
+```
-Your entry for Paperless should contain something like:
+Do not combine this with `USERMAP_UID` or `USERMAP_GID`, which are intended for the non-rootless case described in step 3.
 > ```
 > webserver:
 >   image: ghcr.io/paperless-ngx/paperless-ngx:latest
 >   user: <user_id>
 > ```
 **File systems without inotify support (e.g. NFS)**
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "paperless-ngx"
-version = "2.20.10"
+version = "2.20.13"
 description = "A community-supported supercharged document management system: scan, index and archive all your physical documents"
 readme = "README.md"
 requires-python = ">=3.11"
@@ -26,7 +26,7 @@ dependencies = [
  # WARNING: django does not use semver.
  #          Only patch versions are guaranteed to not introduce breaking changes.
  "django~=5.2.10",
-  "django-allauth[mfa,socialaccount]~=65.14.0",
+  "django-allauth[mfa,socialaccount]~=65.15.0",
  "django-auditlog~=3.4.1",
  "django-cachalot~=2.9.0",
  "django-celery-results~=2.6.0",
@@ -60,7 +60,7 @@ dependencies = [
  "llama-index-llms-openai>=0.6.13",
  "llama-index-vector-stores-faiss>=0.5.2",
  "nltk~=3.9.1",
-  "ocrmypdf~=16.13.0",
+  "ocrmypdf~=17.3.0",
  "openai>=1.76",
  "pathvalidate~=3.3.1",
  "pdf2image~=1.17.0",
@@ -248,15 +248,13 @@ lint.per-file-ignores."docker/wait-for-redis.py" = [
 lint.per-file-ignores."src/documents/models.py" = [
  "SIM115",
 ]
-lint.per-file-ignores."src/paperless_tesseract/tests/test_parser.py" = [
+
  "RUF001",
 ]
 lint.isort.force-single-line = true
 [tool.codespell]
 write-changes = true
 ignore-words-list = "criterias,afterall,valeu,ureue,equest,ure,assertIn,Oktober,commitish"
-skip = "src-ui/src/locale/*,src-ui/pnpm-lock.yaml,src-ui/e2e/*,src/paperless_mail/tests/samples/*,src/documents/tests/samples/*,*.po,*.json"
+skip = "src-ui/src/locale/*,src-ui/pnpm-lock.yaml,src-ui/e2e/*,src/paperless_mail/tests/samples/*,src/paperless/tests/samples/mail/*,src/documents/tests/samples/*,*.po,*.json"
 [tool.pytest]
 minversion = "9.0"
@@ -271,10 +269,6 @@ testpaths = [
  "src/documents/tests/",
  "src/paperless/tests/",
  "src/paperless_mail/tests/",
  "src/paperless_tesseract/tests/",
  "src/paperless_tika/tests",
  "src/paperless_text/tests/",
  "src/paperless_remote/tests/",
  "src/paperless_ai/tests",
 ]
--- a/src-ui/messages.xlf
+++ b/src-ui/messages.xlf
@@ -5,14 +5,14 @@
      <trans-unit id="ngb.alert.close" datatype="html">
        <source>Close</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/alert/alert.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/alert/alert.ts</context>
          <context context-type="linenumber">50</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.carousel.slide-number" datatype="html">
        <source> Slide <x id="INTERPOLATION" equiv-text="ueryList&lt;NgbSli"/> of <x id="INTERPOLATION_1" equiv-text="EventSource = N"/> </source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/carousel/carousel.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/carousel/carousel.ts</context>
          <context context-type="linenumber">131,135</context>
        </context-group>
        <note priority="1" from="description">Currently selected slide number read by screen reader</note>
@@ -20,114 +20,114 @@
      <trans-unit id="ngb.carousel.previous" datatype="html">
        <source>Previous</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/carousel/carousel.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/carousel/carousel.ts</context>
          <context context-type="linenumber">159,162</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.carousel.next" datatype="html">
        <source>Next</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/carousel/carousel.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/carousel/carousel.ts</context>
          <context context-type="linenumber">202,203</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.datepicker.select-month" datatype="html">
        <source>Select month</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/datepicker/datepicker-navigation-select.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/datepicker/datepicker-navigation-select.ts</context>
          <context context-type="linenumber">91</context>
        </context-group>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/datepicker/datepicker-navigation-select.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/datepicker/datepicker-navigation-select.ts</context>
          <context context-type="linenumber">91</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.datepicker.select-year" datatype="html">
        <source>Select year</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/datepicker/datepicker-navigation-select.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/datepicker/datepicker-navigation-select.ts</context>
          <context context-type="linenumber">91</context>
        </context-group>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/datepicker/datepicker-navigation-select.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/datepicker/datepicker-navigation-select.ts</context>
          <context context-type="linenumber">91</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.datepicker.previous-month" datatype="html">
        <source>Previous month</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/datepicker/datepicker-navigation.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/datepicker/datepicker-navigation.ts</context>
          <context context-type="linenumber">83,85</context>
        </context-group>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/datepicker/datepicker-navigation.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/datepicker/datepicker-navigation.ts</context>
          <context context-type="linenumber">112</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.datepicker.next-month" datatype="html">
        <source>Next month</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/datepicker/datepicker-navigation.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/datepicker/datepicker-navigation.ts</context>
          <context context-type="linenumber">112</context>
        </context-group>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/datepicker/datepicker-navigation.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/datepicker/datepicker-navigation.ts</context>
          <context context-type="linenumber">112</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.pagination.first" datatype="html">
        <source>««</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/pagination/pagination-config.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/pagination/pagination-config.ts</context>
          <context context-type="linenumber">20</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.pagination.previous" datatype="html">
        <source>«</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/pagination/pagination-config.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/pagination/pagination-config.ts</context>
          <context context-type="linenumber">20</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.pagination.next" datatype="html">
        <source>»</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/pagination/pagination-config.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/pagination/pagination-config.ts</context>
          <context context-type="linenumber">20</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.pagination.last" datatype="html">
        <source>»»</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/pagination/pagination-config.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/pagination/pagination-config.ts</context>
          <context context-type="linenumber">20</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.pagination.first-aria" datatype="html">
        <source>First</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/pagination/pagination-config.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/pagination/pagination-config.ts</context>
          <context context-type="linenumber">20</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.pagination.previous-aria" datatype="html">
        <source>Previous</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/pagination/pagination-config.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/pagination/pagination-config.ts</context>
          <context context-type="linenumber">20</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.pagination.next-aria" datatype="html">
        <source>Next</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/pagination/pagination-config.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/pagination/pagination-config.ts</context>
          <context context-type="linenumber">20</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.pagination.last-aria" datatype="html">
        <source>Last</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/pagination/pagination-config.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/pagination/pagination-config.ts</context>
          <context context-type="linenumber">20</context>
        </context-group>
      </trans-unit>
@@ -135,105 +135,105 @@
        <source><x id="INTERPOLATION" equiv-text="barConfig);
 	pu"/></source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/progressbar/progressbar.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/progressbar/progressbar.ts</context>
          <context context-type="linenumber">41,42</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.timepicker.HH" datatype="html">
        <source>HH</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/timepicker/timepicker-config.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/timepicker/timepicker-config.ts</context>
          <context context-type="linenumber">21</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.timepicker.hours" datatype="html">
        <source>Hours</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/timepicker/timepicker-config.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/timepicker/timepicker-config.ts</context>
          <context context-type="linenumber">21</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.timepicker.MM" datatype="html">
        <source>MM</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/timepicker/timepicker-config.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/timepicker/timepicker-config.ts</context>
          <context context-type="linenumber">21</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.timepicker.minutes" datatype="html">
        <source>Minutes</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/timepicker/timepicker-config.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/timepicker/timepicker-config.ts</context>
          <context context-type="linenumber">21</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.timepicker.increment-hours" datatype="html">
        <source>Increment hours</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/timepicker/timepicker-config.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/timepicker/timepicker-config.ts</context>
          <context context-type="linenumber">21</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.timepicker.decrement-hours" datatype="html">
        <source>Decrement hours</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/timepicker/timepicker-config.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/timepicker/timepicker-config.ts</context>
          <context context-type="linenumber">21</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.timepicker.increment-minutes" datatype="html">
        <source>Increment minutes</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/timepicker/timepicker-config.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/timepicker/timepicker-config.ts</context>
          <context context-type="linenumber">21</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.timepicker.decrement-minutes" datatype="html">
        <source>Decrement minutes</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/timepicker/timepicker-config.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/timepicker/timepicker-config.ts</context>
          <context context-type="linenumber">21</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.timepicker.SS" datatype="html">
        <source>SS</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/timepicker/timepicker-config.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/timepicker/timepicker-config.ts</context>
          <context context-type="linenumber">21</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.timepicker.seconds" datatype="html">
        <source>Seconds</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/timepicker/timepicker-config.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/timepicker/timepicker-config.ts</context>
          <context context-type="linenumber">21</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.timepicker.increment-seconds" datatype="html">
        <source>Increment seconds</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/timepicker/timepicker-config.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/timepicker/timepicker-config.ts</context>
          <context context-type="linenumber">21</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.timepicker.decrement-seconds" datatype="html">
        <source>Decrement seconds</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/timepicker/timepicker-config.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/timepicker/timepicker-config.ts</context>
          <context context-type="linenumber">21</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.timepicker.PM" datatype="html">
        <source><x id="INTERPOLATION"/></source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/timepicker/timepicker-config.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/timepicker/timepicker-config.ts</context>
          <context context-type="linenumber">21</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.toast.close-aria" datatype="html">
        <source>Close</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/toast/toast-config.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/toast/toast-config.ts</context>
          <context context-type="linenumber">54</context>
        </context-group>
      </trans-unit>
@@ -532,15 +532,79 @@
          <context context-type="linenumber">125</context>
        </context-group>
      </trans-unit>
-      <trans-unit id="3823219296477075982" datatype="html">
+      <trans-unit id="2159130950882492111" datatype="html">
-        <source>Discard</source>
+        <source>Cancel</source>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/admin/config/config.component.html</context>
          <context context-type="linenumber">62</context>
        </context-group>
        <context-group purpose="location">
-          <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.html</context>
+          <context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
-          <context context-type="linenumber">452</context>
+          <context context-type="linenumber">399</context>
        </context-group>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/common/confirm-dialog/confirm-dialog.component.ts</context>
          <context context-type="linenumber">47</context>
        </context-group>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/common/edit-dialog/correspondent-edit-dialog/correspondent-edit-dialog.component.html</context>
          <context context-type="linenumber">25</context>
        </context-group>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/common/edit-dialog/custom-field-edit-dialog/custom-field-edit-dialog.component.html</context>
          <context context-type="linenumber">51</context>
        </context-group>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/common/edit-dialog/document-type-edit-dialog/document-type-edit-dialog.component.html</context>
          <context context-type="linenumber">27</context>
        </context-group>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/common/edit-dialog/group-edit-dialog/group-edit-dialog.component.html</context>
          <context context-type="linenumber">19</context>
        </context-group>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/common/edit-dialog/mail-account-edit-dialog/mail-account-edit-dialog.component.html</context>
          <context context-type="linenumber">39</context>
        </context-group>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.html</context>
          <context context-type="linenumber">80</context>
        </context-group>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/common/edit-dialog/storage-path-edit-dialog/storage-path-edit-dialog.component.html</context>
          <context context-type="linenumber">76</context>
        </context-group>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/common/edit-dialog/tag-edit-dialog/tag-edit-dialog.component.html</context>
          <context context-type="linenumber">30</context>
        </context-group>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/common/edit-dialog/user-edit-dialog/user-edit-dialog.component.html</context>
          <context context-type="linenumber">56</context>
        </context-group>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/common/edit-dialog/workflow-edit-dialog/workflow-edit-dialog.component.html</context>
          <context context-type="linenumber">115</context>
        </context-group>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/common/permissions-dialog/permissions-dialog.component.html</context>
          <context context-type="linenumber">31</context>
        </context-group>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/common/profile-edit-dialog/profile-edit-dialog.component.html</context>
          <context context-type="linenumber">182</context>
        </context-group>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/document-list/bulk-editor/custom-fields-bulk-edit-dialog/custom-fields-bulk-edit-dialog.component.html</context>
          <context context-type="linenumber">81</context>
        </context-group>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/document-list/save-view-config-dialog/save-view-config-dialog.component.html</context>
          <context context-type="linenumber">21</context>
        </context-group>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/manage/saved-views/saved-views.component.html</context>
          <context context-type="linenumber">82</context>
        </context-group>
      </trans-unit>
      <trans-unit id="3768927257183755959" datatype="html">
@@ -1514,77 +1578,6 @@
          <context context-type="linenumber">389</context>
        </context-group>
      </trans-unit>
      <trans-unit id="2159130950882492111" datatype="html">
        <source>Cancel</source>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
          <context context-type="linenumber">399</context>
        </context-group>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/common/confirm-dialog/confirm-dialog.component.ts</context>
          <context context-type="linenumber">47</context>
        </context-group>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/common/edit-dialog/correspondent-edit-dialog/correspondent-edit-dialog.component.html</context>
          <context context-type="linenumber">25</context>
        </context-group>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/common/edit-dialog/custom-field-edit-dialog/custom-field-edit-dialog.component.html</context>
          <context context-type="linenumber">51</context>
        </context-group>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/common/edit-dialog/document-type-edit-dialog/document-type-edit-dialog.component.html</context>
          <context context-type="linenumber">27</context>
        </context-group>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/common/edit-dialog/group-edit-dialog/group-edit-dialog.component.html</context>
          <context context-type="linenumber">19</context>
        </context-group>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/common/edit-dialog/mail-account-edit-dialog/mail-account-edit-dialog.component.html</context>
          <context context-type="linenumber">39</context>
        </context-group>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.html</context>
          <context context-type="linenumber">80</context>
        </context-group>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/common/edit-dialog/storage-path-edit-dialog/storage-path-edit-dialog.component.html</context>
          <context context-type="linenumber">76</context>
        </context-group>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/common/edit-dialog/tag-edit-dialog/tag-edit-dialog.component.html</context>
          <context context-type="linenumber">30</context>
        </context-group>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/common/edit-dialog/user-edit-dialog/user-edit-dialog.component.html</context>
          <context context-type="linenumber">56</context>
        </context-group>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/common/edit-dialog/workflow-edit-dialog/workflow-edit-dialog.component.html</context>
          <context context-type="linenumber">115</context>
        </context-group>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/common/permissions-dialog/permissions-dialog.component.html</context>
          <context context-type="linenumber">31</context>
        </context-group>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/common/profile-edit-dialog/profile-edit-dialog.component.html</context>
          <context context-type="linenumber">182</context>
        </context-group>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/document-list/bulk-editor/custom-fields-bulk-edit-dialog/custom-fields-bulk-edit-dialog.component.html</context>
          <context context-type="linenumber">81</context>
        </context-group>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/document-list/save-view-config-dialog/save-view-config-dialog.component.html</context>
          <context context-type="linenumber">21</context>
        </context-group>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/manage/saved-views/saved-views.component.html</context>
          <context context-type="linenumber">82</context>
        </context-group>
      </trans-unit>
      <trans-unit id="6839066544204061364" datatype="html">
        <source>Use system language</source>
        <context-group purpose="location">
@@ -5736,7 +5729,7 @@
        <source>Open <x id="PH" equiv-text="this.title"/> filter</source>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/common/filterable-dropdown/filterable-dropdown.component.ts</context>
-          <context context-type="linenumber">788</context>
+          <context context-type="linenumber">823</context>
        </context-group>
      </trans-unit>
      <trans-unit id="7005745151564974365" datatype="html">
@@ -7489,7 +7482,7 @@
        </context-group>
        <context-group purpose="location">
          <context context-type="sourcefile">src/main.ts</context>
-          <context context-type="linenumber">411</context>
+          <context context-type="linenumber">416</context>
        </context-group>
      </trans-unit>
      <trans-unit id="5028777105388019087" datatype="html">
@@ -7684,6 +7677,13 @@
          <context context-type="linenumber">450</context>
        </context-group>
      </trans-unit>
      <trans-unit id="3823219296477075982" datatype="html">
        <source>Discard</source>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.html</context>
          <context context-type="linenumber">452</context>
        </context-group>
      </trans-unit>
      <trans-unit id="1309556917227148591" datatype="html">
        <source>Document loading...</source>
        <context-group purpose="location">
@@ -11352,14 +11352,14 @@
        <source>Prev</source>
        <context-group purpose="location">
          <context context-type="sourcefile">src/main.ts</context>
-          <context context-type="linenumber">410</context>
+          <context context-type="linenumber">415</context>
        </context-group>
      </trans-unit>
      <trans-unit id="1241348629231510663" datatype="html">
        <source>End</source>
        <context-group purpose="location">
          <context context-type="sourcefile">src/main.ts</context>
-          <context context-type="linenumber">412</context>
+          <context context-type="linenumber">417</context>
        </context-group>
      </trans-unit>
    </body>
--- a/src-ui/package.json
+++ b/src-ui/package.json
@@ -1,6 +1,6 @@
 {
  "name": "paperless-ngx-ui",
-  "version": "2.20.10",
+  "version": "2.20.13",
  "scripts": {
    "preinstall": "npx only-allow pnpm",
    "ng": "ng",
@@ -11,17 +11,17 @@
  },
  "private": true,
  "dependencies": {
-    "@angular/cdk": "^21.2.0",
+    "@angular/cdk": "^21.2.2",
-    "@angular/common": "~21.2.0",
+    "@angular/common": "~21.2.4",
-    "@angular/compiler": "~21.2.0",
+    "@angular/compiler": "~21.2.4",
-    "@angular/core": "~21.2.0",
+    "@angular/core": "~21.2.4",
-    "@angular/forms": "~21.2.0",
+    "@angular/forms": "~21.2.4",
-    "@angular/localize": "~21.2.0",
+    "@angular/localize": "~21.2.4",
-    "@angular/platform-browser": "~21.2.0",
+    "@angular/platform-browser": "~21.2.4",
-    "@angular/platform-browser-dynamic": "~21.2.0",
+    "@angular/platform-browser-dynamic": "~21.2.4",
-    "@angular/router": "~21.2.0",
+    "@angular/router": "~21.2.4",
    "@ng-bootstrap/ng-bootstrap": "^20.0.0",
-    "@ng-select/ng-select": "^21.4.1",
+    "@ng-select/ng-select": "^21.5.2",
    "@ngneat/dirty-check-forms": "^3.0.3",
    "@popperjs/core": "^2.11.8",
    "bootstrap": "^5.3.8",
@@ -42,26 +42,26 @@
  "devDependencies": {
    "@angular-builders/custom-webpack": "^21.0.3",
    "@angular-builders/jest": "^21.0.3",
-    "@angular-devkit/core": "^21.2.0",
+    "@angular-devkit/core": "^21.2.2",
-    "@angular-devkit/schematics": "^21.2.0",
+    "@angular-devkit/schematics": "^21.2.2",
    "@angular-eslint/builder": "21.3.0",
    "@angular-eslint/eslint-plugin": "21.3.0",
    "@angular-eslint/eslint-plugin-template": "21.3.0",
    "@angular-eslint/schematics": "21.3.0",
    "@angular-eslint/template-parser": "21.3.0",
-    "@angular/build": "^21.2.0",
+    "@angular/build": "^21.2.2",
-    "@angular/cli": "~21.2.0",
+    "@angular/cli": "~21.2.2",
-    "@angular/compiler-cli": "~21.2.0",
+    "@angular/compiler-cli": "~21.2.4",
    "@codecov/webpack-plugin": "^1.9.1",
    "@playwright/test": "^1.58.2",
    "@types/jest": "^30.0.0",
-    "@types/node": "^25.3.3",
+    "@types/node": "^25.4.0",
-    "@typescript-eslint/eslint-plugin": "^8.54.0",
+    "@typescript-eslint/eslint-plugin": "^8.57.0",
-    "@typescript-eslint/parser": "^8.54.0",
+    "@typescript-eslint/parser": "^8.57.0",
-    "@typescript-eslint/utils": "^8.54.0",
+    "@typescript-eslint/utils": "^8.57.0",
-    "eslint": "^10.0.2",
+    "eslint": "^10.0.3",
-    "jest": "30.2.0",
+    "jest": "30.3.0",
-    "jest-environment-jsdom": "^30.2.0",
+    "jest-environment-jsdom": "^30.3.0",
    "jest-junit": "^16.0.0",
    "jest-preset-angular": "^16.1.1",
    "jest-websocket-mock": "^2.5.0",
--- a/src-ui/pnpm-lock.yaml
+++ b/src-ui/pnpm-lock.yaml
--- a/src-ui/src/app/components/admin/config/config.component.html
+++ b/src-ui/src/app/components/admin/config/config.component.html
@@ -59,7 +59,7 @@
    <div [ngbNavOutlet]="nav" class="border-start border-end border-bottom p-3 mb-3 shadow-sm"></div>
    <div class="btn-toolbar" role="toolbar">
        <div class="btn-group me-2">
-            <button type="button" (click)="discardChanges()" class="btn btn-outline-secondary" [disabled]="loading || (isDirty$ | async) === false" i18n>Discard</button>
+            <button type="button" (click)="discardChanges()" class="btn btn-outline-secondary" [disabled]="loading || (isDirty$ | async) === false" i18n>Cancel</button>
        </div>
        <div class="btn-group">
            <button type="submit" class="btn btn-primary" [disabled]="loading || !configForm.valid || (isDirty$ | async) === false" i18n>Save</button>
--- a/src-ui/src/app/components/common/filterable-dropdown/filterable-dropdown.component.spec.ts
+++ b/src-ui/src/app/components/common/filterable-dropdown/filterable-dropdown.component.spec.ts
@@ -631,6 +631,59 @@ describe('FilterableDropdownComponent & FilterableDropdownSelectionModel', () =>
    ])
  })
  it('deselecting a parent clears selected descendants', () => {
    const root: Tag = { id: 100, name: 'Root Tag' }
    const child: Tag = { id: 101, name: 'Child Tag', parent: root.id }
    const grandchild: Tag = {
      id: 102,
      name: 'Grandchild Tag',
      parent: child.id,
    }
    const other: Tag = { id: 103, name: 'Other Tag' }
    selectionModel.items = [root, child, grandchild, other]
    selectionModel.set(root.id, ToggleableItemState.Selected, false)
    selectionModel.set(child.id, ToggleableItemState.Selected, false)
    selectionModel.set(grandchild.id, ToggleableItemState.Selected, false)
    selectionModel.set(other.id, ToggleableItemState.Selected, false)
    selectionModel.toggle(root.id, false)
    expect(selectionModel.getSelectedItems()).toEqual([other])
  })
  it('un-excluding a parent clears excluded descendants', () => {
    const root: Tag = { id: 110, name: 'Root Tag' }
    const child: Tag = { id: 111, name: 'Child Tag', parent: root.id }
    const other: Tag = { id: 112, name: 'Other Tag' }
    selectionModel.items = [root, child, other]
    selectionModel.set(root.id, ToggleableItemState.Excluded, false)
    selectionModel.set(child.id, ToggleableItemState.Excluded, false)
    selectionModel.set(other.id, ToggleableItemState.Excluded, false)
    selectionModel.exclude(root.id, false)
    expect(selectionModel.getExcludedItems()).toEqual([other])
  })
  it('excluding a selected parent clears selected descendants', () => {
    const root: Tag = { id: 120, name: 'Root Tag' }
    const child: Tag = { id: 121, name: 'Child Tag', parent: root.id }
    const other: Tag = { id: 122, name: 'Other Tag' }
    selectionModel.manyToOne = true
    selectionModel.items = [root, child, other]
    selectionModel.set(root.id, ToggleableItemState.Selected, false)
    selectionModel.set(child.id, ToggleableItemState.Selected, false)
    selectionModel.set(other.id, ToggleableItemState.Selected, false)
    selectionModel.exclude(root.id, false)
    expect(selectionModel.getExcludedItems()).toEqual([root])
    expect(selectionModel.getSelectedItems()).toEqual([other])
  })
  it('resorts items immediately when document count sorting enabled', () => {
    const apple: Tag = { id: 55, name: 'Apple' }
    const zebra: Tag = { id: 56, name: 'Zebra' }
--- a/src-ui/src/app/components/common/filterable-dropdown/filterable-dropdown.component.ts
+++ b/src-ui/src/app/components/common/filterable-dropdown/filterable-dropdown.component.ts
@@ -235,6 +235,7 @@ export class FilterableDropdownSelectionModel {
      state == ToggleableItemState.Excluded
    ) {
      this.temporarySelectionStates.delete(id)
      this.clearDescendantSelections(id)
    }
    if (!id) {
@@ -261,6 +262,7 @@ export class FilterableDropdownSelectionModel {
      if (this.manyToOne || this.singleSelect) {
        this.temporarySelectionStates.set(id, ToggleableItemState.Excluded)
        this.clearDescendantSelections(id)
        if (this.singleSelect) {
          for (let key of this.temporarySelectionStates.keys()) {
@@ -281,9 +283,15 @@ export class FilterableDropdownSelectionModel {
          newState = ToggleableItemState.NotSelected
        }
        this.temporarySelectionStates.set(id, newState)
        if (newState == ToggleableItemState.Excluded) {
          this.clearDescendantSelections(id)
        }
      }
    } else if (!id || state == ToggleableItemState.Excluded) {
      this.temporarySelectionStates.delete(id)
      if (id) {
        this.clearDescendantSelections(id)
      }
    }
    if (fireEvent) {
@@ -295,6 +303,33 @@ export class FilterableDropdownSelectionModel {
    return this.selectionStates.get(id) || ToggleableItemState.NotSelected
  }
  private clearDescendantSelections(id: number) {
    for (const descendantID of this.getDescendantIDs(id)) {
      this.temporarySelectionStates.delete(descendantID)
    }
  }
  private getDescendantIDs(id: number): number[] {
    const descendants: number[] = []
    const queue: number[] = [id]
    while (queue.length) {
      const parentID = queue.shift()
      for (const item of this._items) {
        if (
          typeof item?.id === 'number' &&
          typeof (item as any)['parent'] === 'number' &&
          (item as any)['parent'] === parentID
        ) {
          descendants.push(item.id)
          queue.push(item.id)
        }
      }
    }
    return descendants
  }
  get logicalOperator(): LogicalOperator {
    return this.temporaryLogicalOperator
  }
--- a/src-ui/src/app/components/document-list/document-card-small/document-card-small.component.html
+++ b/src-ui/src/app/components/document-list/document-card-small/document-card-small.component.html
@@ -15,7 +15,7 @@
      }
      @if (document && displayFields?.includes(DisplayField.TAGS)) {
-        <div class="tags d-flex flex-column text-end position-absolute me-1 fs-6">
+        <div class="tags d-flex flex-column text-end position-absolute me-1 fs-6" [class.tags-no-wrap]="document.tags.length > 3">
          @for (tagID of tagIDs; track tagID) {
            <pngx-tag [tagID]="tagID" (click)="clickTag.emit(tagID);$event.stopPropagation()" [clickable]="true" linkTitle="Toggle tag filter" i18n-linkTitle></pngx-tag>
          }
--- a/src-ui/src/app/components/document-list/document-card-small/document-card-small.component.scss
+++ b/src-ui/src/app/components/document-list/document-card-small/document-card-small.component.scss
@@ -72,4 +72,14 @@ a {
  max-width: 80%;
  row-gap: .2rem;
  line-height: 1;
  &.tags-no-wrap {
    ::ng-deep .badge {
      display: inline-block;
      max-width: 100%;
      white-space: nowrap;
      overflow: hidden;
      text-overflow: ellipsis;
    }
  }
 }
--- a/src-ui/src/app/components/document-list/document-card-small/document-card-small.component.spec.ts
+++ b/src-ui/src/app/components/document-list/document-card-small/document-card-small.component.spec.ts
@@ -82,6 +82,16 @@ describe('DocumentCardSmallComponent', () => {
    ).toHaveLength(6)
  })
  it('should clear hidden tag counter when tag count falls below the limit', () => {
    expect(component.moreTags).toEqual(3)
    component.document.tags = [1, 2, 3, 4, 5, 6]
    fixture.detectChanges()
    expect(component.moreTags).toBeNull()
    expect(fixture.nativeElement.textContent).not.toContain('+ 3')
  })
  it('should try to close the preview on mouse leave', () => {
    component.popupPreview = {
      close: jest.fn(),
--- a/src-ui/src/app/components/document-list/document-card-small/document-card-small.component.ts
+++ b/src-ui/src/app/components/document-list/document-card-small/document-card-small.component.ts
@@ -126,6 +126,7 @@ export class DocumentCardSmallComponent
      this.moreTags = this.document.tags.length - (limit - 1)
      return this.document.tags.slice(0, limit - 1)
    } else {
      this.moreTags = null
      return this.document.tags
    }
  }
--- a/src-ui/src/app/interceptors/auth-expiry.interceptor.spec.ts
+++ b/src-ui/src/app/interceptors/auth-expiry.interceptor.spec.ts
@@ -0,0 +1,122 @@
 import {
  HttpErrorResponse,
  HttpHandlerFn,
  HttpRequest,
 } from '@angular/common/http'
 import { throwError } from 'rxjs'
 import * as navUtils from '../utils/navigation'
 import { createAuthExpiryInterceptor } from './auth-expiry.interceptor'
 describe('withAuthExpiryInterceptor', () => {
  let interceptor: ReturnType<typeof createAuthExpiryInterceptor>
  let dateNowSpy: jest.SpiedFunction<typeof Date.now>
  beforeEach(() => {
    interceptor = createAuthExpiryInterceptor()
    dateNowSpy = jest.spyOn(Date, 'now').mockReturnValue(1000)
  })
  afterEach(() => {
    jest.restoreAllMocks()
  })
  it('reloads when an API request returns 401', () => {
    const reloadSpy = jest
      .spyOn(navUtils, 'locationReload')
      .mockImplementation(() => {})
    interceptor(
      new HttpRequest('GET', '/api/documents/'),
      failingHandler('/api/documents/', 401)
    ).subscribe({
      error: () => undefined,
    })
    expect(reloadSpy).toHaveBeenCalledTimes(1)
  })
  it('does not reload for non-401 errors', () => {
    const reloadSpy = jest
      .spyOn(navUtils, 'locationReload')
      .mockImplementation(() => {})
    interceptor(
      new HttpRequest('GET', '/api/documents/'),
      failingHandler('/api/documents/', 500)
    ).subscribe({
      error: () => undefined,
    })
    expect(reloadSpy).not.toHaveBeenCalled()
  })
  it('does not reload for non-api 401 responses', () => {
    const reloadSpy = jest
      .spyOn(navUtils, 'locationReload')
      .mockImplementation(() => {})
    interceptor(
      new HttpRequest('GET', '/accounts/profile/'),
      failingHandler('/accounts/profile/', 401)
    ).subscribe({
      error: () => undefined,
    })
    expect(reloadSpy).not.toHaveBeenCalled()
  })
  it('reloads only once even with multiple API 401 responses', () => {
    const reloadSpy = jest
      .spyOn(navUtils, 'locationReload')
      .mockImplementation(() => {})
    const request = new HttpRequest('GET', '/api/documents/')
    const handler = failingHandler('/api/documents/', 401)
    interceptor(request, handler).subscribe({
      error: () => undefined,
    })
    interceptor(request, handler).subscribe({
      error: () => undefined,
    })
    expect(reloadSpy).toHaveBeenCalledTimes(1)
  })
  it('retries reload after cooldown for repeated API 401 responses', () => {
    const reloadSpy = jest
      .spyOn(navUtils, 'locationReload')
      .mockImplementation(() => {})
    dateNowSpy
      .mockReturnValueOnce(1000)
      .mockReturnValueOnce(2500)
      .mockReturnValueOnce(3501)
    const request = new HttpRequest('GET', '/api/documents/')
    const handler = failingHandler('/api/documents/', 401)
    interceptor(request, handler).subscribe({
      error: () => undefined,
    })
    interceptor(request, handler).subscribe({
      error: () => undefined,
    })
    interceptor(request, handler).subscribe({
      error: () => undefined,
    })
    expect(reloadSpy).toHaveBeenCalledTimes(2)
  })
 })
 function failingHandler(url: string, status: number): HttpHandlerFn {
  return (_request) =>
    throwError(
      () =>
        new HttpErrorResponse({
          status,
          url,
        })
    )
 }
--- a/src-ui/src/app/interceptors/auth-expiry.interceptor.ts
+++ b/src-ui/src/app/interceptors/auth-expiry.interceptor.ts
@@ -0,0 +1,37 @@
 import {
  HttpErrorResponse,
  HttpEvent,
  HttpHandlerFn,
  HttpInterceptorFn,
  HttpRequest,
 } from '@angular/common/http'
 import { catchError, Observable, throwError } from 'rxjs'
 import { locationReload } from '../utils/navigation'
 export const createAuthExpiryInterceptor = (): HttpInterceptorFn => {
  let lastReloadAttempt = Number.NEGATIVE_INFINITY
  return (
    request: HttpRequest<unknown>,
    next: HttpHandlerFn
  ): Observable<HttpEvent<unknown>> =>
    next(request).pipe(
      catchError((error: unknown) => {
        if (
          error instanceof HttpErrorResponse &&
          error.status === 401 &&
          request.url.includes('/api/')
        ) {
          const now = Date.now()
          if (now - lastReloadAttempt >= 2000) {
            lastReloadAttempt = now
            locationReload()
          }
        }
        return throwError(() => error)
      })
    )
 }
 export const withAuthExpiryInterceptor = createAuthExpiryInterceptor()
--- a/src-ui/src/environments/environment.prod.ts
+++ b/src-ui/src/environments/environment.prod.ts
@@ -6,7 +6,7 @@ export const environment = {
  apiVersion: '10', // match src/paperless/settings.py
  appTitle: 'Paperless-ngx',
  tag: 'prod',
-  version: '2.20.10',
+  version: '2.20.13',
  webSocketHost: window.location.host,
  webSocketProtocol: window.location.protocol == 'https:' ? 'wss:' : 'ws:',
  webSocketBaseUrl: base_url.pathname + 'ws/',
--- a/src-ui/src/main.ts
+++ b/src-ui/src/main.ts
@@ -154,6 +154,7 @@ import { DirtyDocGuard } from './app/guards/dirty-doc.guard'
 import { DirtySavedViewGuard } from './app/guards/dirty-saved-view.guard'
 import { PermissionsGuard } from './app/guards/permissions.guard'
 import { withApiVersionInterceptor } from './app/interceptors/api-version.interceptor'
 import { withAuthExpiryInterceptor } from './app/interceptors/auth-expiry.interceptor'
 import { withCsrfInterceptor } from './app/interceptors/csrf.interceptor'
 import { DocumentTitlePipe } from './app/pipes/document-title.pipe'
 import { FilterPipe } from './app/pipes/filter.pipe'
@@ -399,7 +400,11 @@ bootstrapApplication(AppComponent, {
    StoragePathNamePipe,
    provideHttpClient(
      withInterceptorsFromDi(),
-      withInterceptors([withCsrfInterceptor, withApiVersionInterceptor]),
+      withInterceptors([
        withCsrfInterceptor,
        withApiVersionInterceptor,
        withAuthExpiryInterceptor,
      ]),
      withFetch()
    ),
    provideUiTour({
--- a/src-ui/src/theme.scss
+++ b/src-ui/src/theme.scss
@@ -150,6 +150,15 @@ $form-check-radio-checked-bg-image-dark: url("data:image/svg+xml,<svg xmlns='htt
    background-color: var(--pngx-body-color-accent);
  }
  .list-group-item-action:not(.active):active {
    --bs-list-group-action-active-color: var(--bs-body-color);
    --bs-list-group-action-active-bg: var(--pngx-bg-darker);
  }
  .form-control:hover::file-selector-button {
    background-color:var(--pngx-bg-dark) !important
  }
  .search-container {
    input, input:focus, i-bs[name="search"] , ::placeholder {
      color: var(--pngx-primary-text-contrast) !important;
--- a/src/documents/checks.py
+++ b/src/documents/checks.py
@@ -3,24 +3,19 @@ from django.core.checks import Error
 from django.core.checks import Warning
 from django.core.checks import register
 from documents.signals import document_consumer_declaration
 from documents.templating.utils import convert_format_str_to_template_format
 from paperless.parsers.registry import get_parser_registry
@register()
 def parser_check(app_configs, **kwargs):
-    parsers = []
+    if not get_parser_registry().all_parsers():
    for response in document_consumer_declaration.send(None):
        parsers.append(response[1])
    if len(parsers) == 0:
        return [
            Error(
                "No parsers found. This is a bug. The consumer won't be "
                "able to consume any documents without parsers.",
            ),
        ]
    else:
    return []
--- a/src/documents/classifier.py
+++ b/src/documents/classifier.py
@@ -9,6 +9,7 @@ from pathlib import Path
 from typing import TYPE_CHECKING
 if TYPE_CHECKING:
    from collections.abc import Callable
    from collections.abc import Iterator
    from datetime import datetime
@@ -191,7 +192,12 @@ class DocumentClassifier:
        target_file_temp.rename(target_file)
-    def train(self) -> bool:
+    def train(
        self,
        status_callback: Callable[[str], None] | None = None,
    ) -> bool:
        notify = status_callback if status_callback is not None else lambda _: None
        # Get non-inbox documents
        docs_queryset = (
            Document.objects.exclude(
@@ -213,6 +219,7 @@ class DocumentClassifier:
        # Step 1: Extract and preprocess training data from the database.
        logger.debug("Gathering data from database...")
        notify(f"Gathering data from {docs_queryset.count()} document(s)...")
        hasher = sha256()
        for doc in docs_queryset:
            y = -1
@@ -290,6 +297,7 @@ class DocumentClassifier:
        # Step 2: vectorize data
        logger.debug("Vectorizing data...")
        notify("Vectorizing document content...")
        def content_generator() -> Iterator[str]:
            """
@@ -316,6 +324,7 @@ class DocumentClassifier:
        # Step 3: train the classifiers
        if num_tags > 0:
            logger.debug("Training tags classifier...")
            notify(f"Training tags classifier ({num_tags} tag(s))...")
            if num_tags == 1:
                # Special case where only one tag has auto:
@@ -339,6 +348,9 @@ class DocumentClassifier:
        if num_correspondents > 0:
            logger.debug("Training correspondent classifier...")
            notify(
                f"Training correspondent classifier ({num_correspondents} correspondent(s))...",
            )
            self.correspondent_classifier = MLPClassifier(tol=0.01)
            self.correspondent_classifier.fit(data_vectorized, labels_correspondent)
        else:
@@ -349,6 +361,9 @@ class DocumentClassifier:
        if num_document_types > 0:
            logger.debug("Training document type classifier...")
            notify(
                f"Training document type classifier ({num_document_types} type(s))...",
            )
            self.document_type_classifier = MLPClassifier(tol=0.01)
            self.document_type_classifier.fit(data_vectorized, labels_document_type)
        else:
@@ -361,6 +376,7 @@ class DocumentClassifier:
            logger.debug(
                "Training storage paths classifier...",
            )
            notify(f"Training storage path classifier ({num_storage_paths} path(s))...")
            self.storage_path_classifier = MLPClassifier(tol=0.01)
            self.storage_path_classifier.fit(
                data_vectorized,
--- a/src/documents/consumer.py
+++ b/src/documents/consumer.py
@@ -32,9 +32,7 @@ from documents.models import DocumentType
 from documents.models import StoragePath
 from documents.models import Tag
 from documents.models import WorkflowTrigger
 from documents.parsers import DocumentParser
 from documents.parsers import ParseError
 from documents.parsers import get_parser_class_for_mime_type
 from documents.permissions import set_permissions_for_object
 from documents.plugins.base import AlwaysRunPluginMixin
 from documents.plugins.base import ConsumeTaskPlugin
@@ -51,28 +49,13 @@ from documents.templating.workflows import parse_w_workflow_placeholders
 from documents.utils import copy_basic_file_stats
 from documents.utils import copy_file_with_basic_stats
 from documents.utils import run_subprocess
-from paperless.parsers.text import TextDocumentParser
+from paperless.parsers import ParserContext
-from paperless_mail.parsers import MailDocumentParser
+from paperless.parsers import ParserProtocol
 from paperless.parsers.registry import get_parser_registry
 LOGGING_NAME: Final[str] = "paperless.consumer"
 def _parser_cleanup(parser: DocumentParser) -> None:
    """
    Call cleanup on a parser, handling the new-style context-manager parsers.
    New-style parsers (e.g. TextDocumentParser) use __exit__ for teardown
    instead of a cleanup() method.  This shim will be removed once all existing parsers
    have switched to the new style and this consumer is updated to use it
    TODO(stumpylog): Remove me in the future
    """
    if isinstance(parser, TextDocumentParser):
        parser.__exit__(None, None, None)
    else:
        parser.cleanup()
 class WorkflowTriggerPlugin(
    NoCleanupPluginMixin,
    NoSetupPluginMixin,
@@ -409,8 +392,12 @@ class ConsumerPlugin(
                    self.log.error(f"Error attempting to clean PDF: {e}")
            # Based on the mime type, get the parser for that type
-            parser_class: type[DocumentParser] | None = get_parser_class_for_mime_type(
+            parser_class: type[ParserProtocol] | None = (
                get_parser_registry().get_parser_for_file(
                    mime_type,
                    self.filename,
                    self.working_copy,
                )
            )
            if not parser_class:
                tempdir.cleanup()
@@ -433,22 +420,13 @@ class ConsumerPlugin(
                tempdir.cleanup()
            raise
        def progress_callback(
            current_progress,
            max_progress,
        ) -> None:  # pragma: no cover
            # recalculate progress to be within 20 and 80
            p = int((current_progress / max_progress) * 50 + 20)
            self._send_progress(p, 100, ProgressStatusOptions.WORKING)
        # This doesn't parse the document yet, but gives us a parser.
-
+        with parser_class() as document_parser:
-        document_parser: DocumentParser = parser_class(
+            document_parser.configure(
-            self.logging_group,
+                ParserContext(mailrule_id=self.input_doc.mailrule_id),
            progress_callback=progress_callback,
            )
-        self.log.debug(f"Parser: {type(document_parser).__name__}")
+            self.log.debug(f"Parser: {document_parser.name} v{document_parser.version}")
            # Parse the document. This may take some time.
@@ -466,21 +444,8 @@ class ConsumerPlugin(
                    ConsumerStatusShortMessage.PARSING_DOCUMENT,
                )
                self.log.debug(f"Parsing {self.filename}...")
-            if (
+
                isinstance(document_parser, MailDocumentParser)
                and self.input_doc.mailrule_id
            ):
                document_parser.parse(
                    self.working_copy,
                    mime_type,
                    self.filename,
                    self.input_doc.mailrule_id,
                )
            elif isinstance(document_parser, TextDocumentParser):
                # TODO(stumpylog): Remove me in the future
                document_parser.parse(self.working_copy, mime_type)
            else:
                document_parser.parse(self.working_copy, mime_type, self.filename)
                self.log.debug(f"Generating thumbnail for {self.filename}...")
                self._send_progress(
@@ -489,15 +454,7 @@ class ConsumerPlugin(
                    ProgressStatusOptions.WORKING,
                    ConsumerStatusShortMessage.GENERATING_THUMBNAIL,
                )
            if isinstance(document_parser, TextDocumentParser):
                # TODO(stumpylog): Remove me in the future
                thumbnail = document_parser.get_thumbnail(self.working_copy, mime_type)
            else:
                thumbnail = document_parser.get_thumbnail(
                    self.working_copy,
                    mime_type,
                    self.filename,
                )
                text = document_parser.get_text()
                date = document_parser.get_date()
@@ -511,10 +468,12 @@ class ConsumerPlugin(
                    with get_date_parser() as date_parser:
                        date = next(date_parser.parse(self.filename, text), None)
                archive_path = document_parser.get_archive_path()
-            page_count = document_parser.get_page_count(self.working_copy, mime_type)
+                page_count = document_parser.get_page_count(
                    self.working_copy,
                    mime_type,
                )
            except ParseError as e:
            _parser_cleanup(document_parser)
                if tempdir:
                    tempdir.cleanup()
                self._fail(
@@ -524,7 +483,6 @@ class ConsumerPlugin(
                    exception=e,
                )
            except Exception as e:
            _parser_cleanup(document_parser)
                if tempdir:
                    tempdir.cleanup()
                self._fail(
@@ -573,7 +531,9 @@ class ConsumerPlugin(
                            settings.AUDIT_LOG_ENABLED
                            and self.metadata.actor_id is not None
                        ):
-                        actor = User.objects.filter(pk=self.metadata.actor_id).first()
+                            actor = User.objects.filter(
                                pk=self.metadata.actor_id,
                            ).first()
                            if actor is not None:
                                from auditlog.context import (  # type: ignore[import-untyped]
                                    set_actor,
@@ -697,7 +657,9 @@ class ConsumerPlugin(
                        )
                    # Delete the file only if it was successfully consumed
-                self.log.debug(f"Deleting original file {self.input_doc.original_file}")
+                    self.log.debug(
                        f"Deleting original file {self.input_doc.original_file}",
                    )
                    self.input_doc.original_file.unlink()
                    self.log.debug(f"Deleting working copy {self.working_copy}")
                    self.working_copy.unlink()
@@ -726,7 +688,6 @@ class ConsumerPlugin(
                    exception=e,
                )
            finally:
            _parser_cleanup(document_parser)
                tempdir.cleanup()
        self.run_post_consume_script(document)
--- a/src/documents/index.py
+++ b/src/documents/index.py
@@ -477,6 +477,13 @@ class DelayedFullTextQuery(DelayedQuery):
        try:
            corrected = self.searcher.correct_query(q, q_str)
            if corrected.string != q_str:
                corrected_results = self.searcher.search(
                    corrected.query,
                    limit=1,
                    filter=MappedDocIdSet(self.filter_queryset, self.searcher.ixreader),
                    scored=False,
                )
                if len(corrected_results) > 0:
                    suggested_correction = corrected.string
        except Exception as e:
            logger.info(
--- a/src/documents/management/commands/document_create_classifier.py
+++ b/src/documents/management/commands/document_create_classifier.py
@@ -1,13 +1,32 @@
-from django.core.management.base import BaseCommand
+from __future__ import annotations
 import time
 from documents.management.commands.base import PaperlessCommand
 from documents.tasks import train_classifier
-class Command(BaseCommand):
+class Command(PaperlessCommand):
    help = (
        "Trains the classifier on your data and saves the resulting models to a "
        "file. The document consumer will then automatically use this new model."
    )
    supports_progress_bar = False
    supports_multiprocessing = False
-    def handle(self, *args, **options):
+    def handle(self, *args, **options) -> None:
-        train_classifier(scheduled=False)
+        start = time.monotonic()
        with (
            self.buffered_logging("paperless.tasks"),
            self.buffered_logging("paperless.classifier"),
        ):
            train_classifier(
                scheduled=False,
                status_callback=lambda msg: self.console.print(f"  {msg}"),
            )
        elapsed = time.monotonic() - start
        self.console.print(
            f"[green]✓[/green] Classifier training complete ({elapsed:.1f}s)",
        )
--- a/src/documents/management/commands/document_importer.py
+++ b/src/documents/management/commands/document_importer.py
@@ -205,7 +205,7 @@ class Command(CryptMixin, PaperlessCommand):
                ContentType.objects.all().delete()
                Permission.objects.all().delete()
                for manifest_path in self.manifest_paths:
-                    call_command("loaddata", manifest_path)
+                    call_command("loaddata", manifest_path, skip_checks=True)
        except (FieldDoesNotExist, DeserializationError, IntegrityError) as e:
            self.stdout.write(self.style.ERROR("Database import failed"))
            if (
--- a/src/documents/management/commands/document_thumbnails.py
+++ b/src/documents/management/commands/document_thumbnails.py
@@ -3,14 +3,18 @@ import shutil
 from documents.management.commands.base import PaperlessCommand
 from documents.models import Document
-from documents.parsers import get_parser_class_for_mime_type
+from paperless.parsers.registry import get_parser_registry
 logger = logging.getLogger("paperless.management.thumbnails")
 def _process_document(doc_id: int) -> None:
    document: Document = Document.objects.get(id=doc_id)
-    parser_class = get_parser_class_for_mime_type(document.mime_type)
+    parser_class = get_parser_registry().get_parser_for_file(
        document.mime_type,
        document.original_filename or "",
        document.source_path,
    )
    if parser_class is None:
        logger.warning(
@@ -20,18 +24,9 @@ def _process_document(doc_id: int) -> None:
        )
        return
-    parser = parser_class(logging_group=None)
+    with parser_class() as parser:
-
+        thumb = parser.get_thumbnail(document.source_path, document.mime_type)
    try:
        thumb = parser.get_thumbnail(
            document.source_path,
            document.mime_type,
            document.get_public_filename(),
        )
        shutil.move(thumb, document.thumbnail_path)
    finally:
        # TODO(stumpylog): Cleanup once all parsers are handled
        parser.cleanup()
 class Command(PaperlessCommand):
--- a/src/documents/parsers.py
+++ b/src/documents/parsers.py
@@ -3,84 +3,47 @@ from __future__ import annotations
 import logging
 import mimetypes
 import os
 import re
 import shutil
 import subprocess
 import tempfile
 from functools import lru_cache
 from pathlib import Path
 from typing import TYPE_CHECKING
 from django.conf import settings
 from documents.loggers import LoggingMixin
 from documents.signals import document_consumer_declaration
 from documents.utils import copy_file_with_basic_stats
 from documents.utils import run_subprocess
 from paperless.parsers.registry import get_parser_registry
 if TYPE_CHECKING:
    import datetime
 # This regular expression will try to find dates in the document at
 # hand and will match the following formats:
 # - XX.YY.ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
 # - XX/YY/ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
 # - XX-YY-ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
 # - ZZZZ.XX.YY with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
 # - ZZZZ/XX/YY with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
 # - ZZZZ-XX-YY with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
 # - XX. MONTH ZZZZ with XX being 1 or 2 and ZZZZ being 2 or 4 digits
 # - MONTH ZZZZ, with ZZZZ being 4 digits
 # - MONTH XX, ZZZZ with XX being 1 or 2 and ZZZZ being 4 digits
 # - XX MON ZZZZ with XX being 1 or 2 and ZZZZ being 4 digits. MONTH is 3 letters
 # - XXPP MONTH ZZZZ with XX being 1 or 2 and PP being 2 letters and ZZZZ being 4 digits
 # TODO: isn't there a date parsing library for this?
 DATE_REGEX = re.compile(
    r"(\b|(?!=([_-])))(\d{1,2})[\.\/-](\d{1,2})[\.\/-](\d{4}|\d{2})(\b|(?=([_-])))|"
    r"(\b|(?!=([_-])))(\d{4}|\d{2})[\.\/-](\d{1,2})[\.\/-](\d{1,2})(\b|(?=([_-])))|"
    r"(\b|(?!=([_-])))(\d{1,2}[\. ]+[a-zéûäëčžúřěáíóńźçŞğü]{3,9} \d{4}|[a-zéûäëčžúřěáíóńźçŞğü]{3,9} \d{1,2}, \d{4})(\b|(?=([_-])))|"
    r"(\b|(?!=([_-])))([^\W\d_]{3,9} \d{1,2}, (\d{4}))(\b|(?=([_-])))|"
    r"(\b|(?!=([_-])))([^\W\d_]{3,9} \d{4})(\b|(?=([_-])))|"
    r"(\b|(?!=([_-])))(\d{1,2}[^ 0-9]{2}[\. ]+[^ ]{3,9}[ \.\/-]\d{4})(\b|(?=([_-])))|"
    r"(\b|(?!=([_-])))(\b\d{1,2}[ \.\/-][a-zéûäëčžúřěáíóńźçŞğü]{3}[ \.\/-]\d{4})(\b|(?=([_-])))",
    re.IGNORECASE,
 )
 logger = logging.getLogger("paperless.parsing")
@lru_cache(maxsize=8)
 def is_mime_type_supported(mime_type: str) -> bool:
    """
    Returns True if the mime type is supported, False otherwise
    """
-    return get_parser_class_for_mime_type(mime_type) is not None
+    return get_parser_registry().get_parser_for_file(mime_type, "") is not None
@lru_cache(maxsize=8)
 def get_default_file_extension(mime_type: str) -> str:
    """
    Returns the default file extension for a mimetype, or
    an empty string if it could not be determined
    """
-    for response in document_consumer_declaration.send(None):
+    parser_class = get_parser_registry().get_parser_for_file(mime_type, "")
-        parser_declaration = response[1]
+    if parser_class is not None:
-        supported_mime_types = parser_declaration["mime_types"]
+        supported = parser_class.supported_mime_types()
-
+        if mime_type in supported:
-        if mime_type in supported_mime_types:
+            return supported[mime_type]
            return supported_mime_types[mime_type]
    ext = mimetypes.guess_extension(mime_type)
-    if ext:
+    return ext if ext else ""
        return ext
    else:
        return ""
@lru_cache(maxsize=8)
 def is_file_ext_supported(ext: str) -> bool:
    """
    Returns True if the file extension is supported, False otherwise
@@ -94,44 +57,17 @@ def is_file_ext_supported(ext: str) -> bool:
 def get_supported_file_extensions() -> set[str]:
    extensions = set()
-    for response in document_consumer_declaration.send(None):
+    for parser_class in get_parser_registry().all_parsers():
-        parser_declaration = response[1]
+        for mime_type, ext in parser_class.supported_mime_types().items():
        supported_mime_types = parser_declaration["mime_types"]
        for mime_type in supported_mime_types:
            extensions.update(mimetypes.guess_all_extensions(mime_type))
            # Python's stdlib might be behind, so also add what the parser
            # says is the default extension
            # This makes image/webp supported on Python < 3.11
-            extensions.add(supported_mime_types[mime_type])
+            extensions.add(ext)
    return extensions
 def get_parser_class_for_mime_type(mime_type: str) -> type[DocumentParser] | None:
    """
    Returns the best parser (by weight) for the given mimetype or
    None if no parser exists
    """
    options = []
    for response in document_consumer_declaration.send(None):
        parser_declaration = response[1]
        supported_mime_types = parser_declaration["mime_types"]
        if mime_type in supported_mime_types:
            options.append(parser_declaration)
    if not options:
        return None
    best_parser = sorted(options, key=lambda _: _["weight"], reverse=True)[0]
    # Return the parser with the highest weight.
    return best_parser["parser"]
 def run_convert(
    input_file,
    output_file,
--- a/src/documents/serialisers.py
+++ b/src/documents/serialisers.py
@@ -797,6 +797,25 @@ class ReadWriteSerializerMethodField(serializers.SerializerMethodField):
        return {self.field_name: data}
 def validate_documentlink_targets(user, doc_ids):
    if Document.objects.filter(id__in=doc_ids).count() != len(doc_ids):
        raise serializers.ValidationError(
            "Some documents in value don't exist or were specified twice.",
        )
    if user is None:
        return
    target_documents = Document.objects.filter(id__in=doc_ids).select_related("owner")
    if not all(
        has_perms_owner_aware(user, "change_document", document)
        for document in target_documents
    ):
        raise PermissionDenied(
            _("Insufficient permissions."),
        )
 class CustomFieldInstanceSerializer(serializers.ModelSerializer):
    field = serializers.PrimaryKeyRelatedField(queryset=CustomField.objects.all())
    value = ReadWriteSerializerMethodField(allow_null=True)
@@ -887,11 +906,10 @@ class CustomFieldInstanceSerializer(serializers.ModelSerializer):
                        "Value must be a list",
                    )
                doc_ids = data["value"]
-                if Document.objects.filter(id__in=doc_ids).count() != len(
+                request = self.context.get("request")
-                    data["value"],
+                validate_documentlink_targets(
-                ):
+                    getattr(request, "user", None) if request is not None else None,
-                    raise serializers.ValidationError(
+                    doc_ids,
                        "Some documents in value don't exist or were specified twice.",
                )
        return data
@@ -1713,6 +1731,19 @@ class BulkEditSerializer(
                f"Some custom fields in {name} don't exist or were specified twice.",
            )
        if isinstance(custom_fields, dict):
            custom_field_map = CustomField.objects.in_bulk(ids)
            for raw_field_id, value in custom_fields.items():
                field = custom_field_map.get(int(raw_field_id))
                if (
                    field is not None
                    and field.data_type == CustomField.FieldDataType.DOCUMENTLINK
                    and value is not None
                ):
                    if not isinstance(value, list):
                        raise serializers.ValidationError("Value must be a list")
                    validate_documentlink_targets(self.user, value)
    def validate_method(self, method):
        if method == "set_correspondent":
            return bulk_edit.set_correspondent
--- a/src/documents/signals/init.py
+++ b/src/documents/signals/init.py
@@ -2,5 +2,4 @@ from django.dispatch import Signal
 document_consumption_started = Signal()
 document_consumption_finished = Signal()
 document_consumer_declaration = Signal()
 document_updated = Signal()
--- a/src/documents/signals/handlers.py
+++ b/src/documents/signals/handlers.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 import hashlib
 import logging
 import shutil
 from pathlib import Path
@@ -403,6 +404,14 @@ class CannotMoveFilesException(Exception):
    pass
 def _path_matches_checksum(path: Path, checksum: str | None) -> bool:
    if checksum is None or not path.is_file():
        return False
    with path.open("rb") as f:
        return hashlib.md5(f.read()).hexdigest() == checksum
 def _filename_template_uses_custom_fields(doc: Document) -> bool:
    template = None
    if doc.storage_path is not None:
@@ -473,10 +482,12 @@ def update_filename_and_move_files(
            old_filename = instance.filename
            old_source_path = instance.source_path
            move_original = False
            original_already_moved = False
            old_archive_filename = instance.archive_filename
            old_archive_path = instance.archive_path
            move_archive = False
            archive_already_moved = False
            candidate_filename = generate_filename(instance)
            if len(str(candidate_filename)) > Document.MAX_STORED_FILENAME_LENGTH:
@@ -497,6 +508,13 @@ def update_filename_and_move_files(
                candidate_source_path.exists()
                and candidate_source_path != old_source_path
            ):
                if not old_source_path.is_file() and _path_matches_checksum(
                    candidate_source_path,
                    instance.checksum,
                ):
                    new_filename = candidate_filename
                    original_already_moved = True
                else:
                    # Only fall back to unique search when there is an actual conflict
                    new_filename = generate_unique_filename(instance)
            else:
@@ -504,7 +522,9 @@ def update_filename_and_move_files(
            # Need to convert to string to be able to save it to the db
            instance.filename = str(new_filename)
-            move_original = old_filename != instance.filename
+            move_original = (
                old_filename != instance.filename and not original_already_moved
            )
            if instance.has_archive_version:
                archive_candidate = generate_filename(instance, archive_filename=True)
@@ -525,6 +545,13 @@ def update_filename_and_move_files(
                    archive_candidate_path.exists()
                    and archive_candidate_path != old_archive_path
                ):
                    if not old_archive_path.is_file() and _path_matches_checksum(
                        archive_candidate_path,
                        instance.archive_checksum,
                    ):
                        new_archive_filename = archive_candidate
                        archive_already_moved = True
                    else:
                        new_archive_filename = generate_unique_filename(
                            instance,
                            archive_filename=True,
@@ -534,15 +561,22 @@ def update_filename_and_move_files(
                instance.archive_filename = str(new_archive_filename)
-                move_archive = old_archive_filename != instance.archive_filename
+                move_archive = (
                    old_archive_filename != instance.archive_filename
                    and not archive_already_moved
                )
            else:
                move_archive = False
            if not move_original and not move_archive:
-                # Just update modified. Also, don't save() here to prevent infinite recursion.
+                updates = {"modified": timezone.now()}
-                Document.objects.filter(pk=instance.pk).update(
+                if old_filename != instance.filename:
-                    modified=timezone.now(),
+                    updates["filename"] = instance.filename
-                )
+                if old_archive_filename != instance.archive_filename:
                    updates["archive_filename"] = instance.archive_filename
                # Don't save() here to prevent infinite recursion.
                Document.objects.filter(pk=instance.pk).update(**updates)
                return
            if move_original:
@@ -932,8 +966,25 @@ def run_workflows(
            if not use_overrides:
                # limit title to 128 characters
                document.title = document.title[:128]
-                # save first before setting tags
+                # Save only the fields that workflow actions can set directly.
-                document.save()
+                # Deliberately excludes filename and archive_filename — those are
                # managed exclusively by update_filename_and_move_files via the
                # post_save signal. Writing stale in-memory values here would revert
                # a concurrent update_filename_and_move_files DB write, leaving the
                # DB pointing at the old path while the file is already at the new
                # one (see: https://github.com/paperless-ngx/paperless-ngx/issues/12386).
                # modified has auto_now=True but is not auto-added when update_fields
                # is specified, so it must be listed explicitly.
                document.save(
                    update_fields=[
                        "title",
                        "correspondent",
                        "document_type",
                        "storage_path",
                        "owner",
                        "modified",
                    ],
                )
                document.tags.set(doc_tag_ids)
            WorkflowRun.objects.create(
--- a/src/documents/tasks.py
+++ b/src/documents/tasks.py
@@ -52,8 +52,6 @@ from documents.models import StoragePath
 from documents.models import Tag
 from documents.models import WorkflowRun
 from documents.models import WorkflowTrigger
 from documents.parsers import DocumentParser
 from documents.parsers import get_parser_class_for_mime_type
 from documents.plugins.base import ConsumeTaskPlugin
 from documents.plugins.base import ProgressManager
 from documents.plugins.base import StopConsumeTaskError
@@ -65,6 +63,8 @@ from documents.signals.handlers import run_workflows
 from documents.signals.handlers import send_websocket_document_updated
 from documents.workflows.utils import get_workflows_for_trigger
 from paperless.config import AIConfig
 from paperless.parsers import ParserContext
 from paperless.parsers.registry import get_parser_registry
 from paperless_ai.indexing import llm_index_add_or_update_document
 from paperless_ai.indexing import llm_index_remove_document
 from paperless_ai.indexing import update_llm_index
@@ -100,7 +100,11 @@ def index_reindex(*, iter_wrapper: IterWrapper[Document] = _identity) -> None:
@shared_task
-def train_classifier(*, scheduled=True) -> None:
+def train_classifier(
    *,
    scheduled=True,
    status_callback: Callable[[str], None] | None = None,
 ) -> None:
    task = PaperlessTask.objects.create(
        type=PaperlessTask.TaskType.SCHEDULED_TASK
        if scheduled
@@ -136,7 +140,7 @@ def train_classifier(*, scheduled=True) -> None:
        classifier = DocumentClassifier()
    try:
-        if classifier.train():
+        if classifier.train(status_callback=status_callback):
            logger.info(
                f"Saving updated classifier model to {settings.MODEL_FILE}...",
            )
@@ -300,7 +304,11 @@ def update_document_content_maybe_archive_file(document_id) -> None:
    mime_type = document.mime_type
-    parser_class: type[DocumentParser] = get_parser_class_for_mime_type(mime_type)
+    parser_class = get_parser_registry().get_parser_for_file(
        mime_type,
        document.original_filename or "",
        document.source_path,
    )
    if not parser_class:
        logger.error(
@@ -309,16 +317,13 @@ def update_document_content_maybe_archive_file(document_id) -> None:
        )
        return
-    parser: DocumentParser = parser_class(logging_group=uuid.uuid4())
+    with parser_class() as parser:
        parser.configure(ParserContext())
        try:
-        parser.parse(document.source_path, mime_type, document.get_public_filename())
+            parser.parse(document.source_path, mime_type)
-        thumbnail = parser.get_thumbnail(
+            thumbnail = parser.get_thumbnail(document.source_path, mime_type)
            document.source_path,
            mime_type,
            document.get_public_filename(),
        )
            with transaction.atomic():
                oldDocument = Document.objects.get(pk=document.pk)
@@ -398,9 +403,6 @@ def update_document_content_maybe_archive_file(document_id) -> None:
            logger.exception(
                f"Error while parsing document {document} (ID: {document_id})",
            )
    finally:
        # TODO(stumpylog): Cleanup once all parsers are handled
        parser.cleanup()
@shared_task
--- a/src/documents/tests/management/test_management_sanity_checker.py
+++ b/src/documents/tests/management/test_management_sanity_checker.py
@@ -163,13 +163,23 @@ class TestRenderResultsSummary:
 class TestDocumentSanityCheckerCommand:
    def test_no_issues(self, sample_doc: Document) -> None:
        out = StringIO()
-        call_command("document_sanity_checker", "--no-progress-bar", stdout=out)
+        call_command(
            "document_sanity_checker",
            "--no-progress-bar",
            stdout=out,
            skip_checks=True,
        )
        assert "No issues detected" in out.getvalue()
    def test_missing_original(self, sample_doc: Document) -> None:
        Path(sample_doc.source_path).unlink()
        out = StringIO()
-        call_command("document_sanity_checker", "--no-progress-bar", stdout=out)
+        call_command(
            "document_sanity_checker",
            "--no-progress-bar",
            stdout=out,
            skip_checks=True,
        )
        output = out.getvalue()
        assert "ERROR" in output
        assert "Original of document does not exist" in output
@@ -187,7 +197,12 @@ class TestDocumentSanityCheckerCommand:
        Path(doc.thumbnail_path).touch()
        out = StringIO()
-        call_command("document_sanity_checker", "--no-progress-bar", stdout=out)
+        call_command(
            "document_sanity_checker",
            "--no-progress-bar",
            stdout=out,
            skip_checks=True,
        )
        output = out.getvalue()
        assert "ERROR" in output
        assert "Checksum mismatch. Stored: abc, actual:" in output
--- a/src/documents/tests/test_api_app_config.py
+++ b/src/documents/tests/test_api_app_config.py
@@ -5,6 +5,7 @@ from unittest.mock import patch
 from django.contrib.auth.models import User
 from django.core.files.uploadedfile import SimpleUploadedFile
 from django.test import override_settings
 from rest_framework import status
 from rest_framework.test import APITestCase
@@ -693,3 +694,17 @@ class TestApiAppConfig(DirectoriesMixin, APITestCase):
                content_type="application/json",
            )
            mock_update.assert_called_once()
    @override_settings(LLM_ALLOW_INTERNAL_ENDPOINTS=False)
    def test_update_llm_endpoint_blocks_internal_endpoint_when_disallowed(self) -> None:
        response = self.client.patch(
            f"{self.ENDPOINT}1/",
            json.dumps(
                {
                    "llm_endpoint": "http://127.0.0.1:11434",
                },
            ),
            content_type="application/json",
        )
        self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
        self.assertIn("non-public address", str(response.data).lower())
--- a/src/documents/tests/test_api_bulk_edit.py
+++ b/src/documents/tests/test_api_bulk_edit.py
@@ -262,6 +262,50 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
        self.assertEqual(kwargs["add_custom_fields"], [self.cf1.id])
        self.assertEqual(kwargs["remove_custom_fields"], [self.cf2.id])
    @mock.patch("documents.serialisers.bulk_edit.modify_custom_fields")
    def test_api_modify_custom_fields_documentlink_forbidden_for_unpermitted_target(
        self,
        m,
    ) -> None:
        self.setup_mock(m, "modify_custom_fields")
        user = User.objects.create_user(username="doc-owner")
        user.user_permissions.add(Permission.objects.get(codename="change_document"))
        other_user = User.objects.create_user(username="other-user")
        source_doc = Document.objects.create(
            checksum="source",
            title="Source",
            owner=user,
        )
        target_doc = Document.objects.create(
            checksum="target",
            title="Target",
            owner=other_user,
        )
        doclink_field = CustomField.objects.create(
            name="doclink",
            data_type=CustomField.FieldDataType.DOCUMENTLINK,
        )
        self.client.force_authenticate(user=user)
        response = self.client.post(
            "/api/documents/bulk_edit/",
            json.dumps(
                {
                    "documents": [source_doc.id],
                    "method": "modify_custom_fields",
                    "parameters": {
                        "add_custom_fields": {doclink_field.id: [target_doc.id]},
                        "remove_custom_fields": [],
                    },
                },
            ),
            content_type="application/json",
        )
        self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
        m.assert_not_called()
    @mock.patch("documents.serialisers.bulk_edit.modify_custom_fields")
    def test_api_modify_custom_fields_with_values(self, m) -> None:
        self.setup_mock(m, "modify_custom_fields")
--- a/src/documents/tests/test_api_custom_fields.py
+++ b/src/documents/tests/test_api_custom_fields.py
@@ -6,6 +6,7 @@ from unittest.mock import ANY
 from django.contrib.auth.models import Permission
 from django.contrib.auth.models import User
 from django.test import override_settings
 from guardian.shortcuts import assign_perm
 from rest_framework import status
 from rest_framework.test import APITestCase
@@ -1140,6 +1141,102 @@ class TestCustomFieldsAPI(DirectoriesMixin, APITestCase):
        self.assertEqual(resp.status_code, status.HTTP_200_OK)
        self.assertEqual(doc5.custom_fields.first().value, [1])
    def test_documentlink_patch_requires_change_permission_on_target_documents(
        self,
    ) -> None:
        source_owner = User.objects.create_user(username="source-owner")
        source_owner.user_permissions.add(
            Permission.objects.get(codename="change_document"),
        )
        other_user = User.objects.create_user(username="other-user")
        source_doc = Document.objects.create(
            title="Source",
            checksum="source",
            mime_type="application/pdf",
            owner=source_owner,
        )
        target_doc = Document.objects.create(
            title="Target",
            checksum="target",
            mime_type="application/pdf",
            owner=other_user,
        )
        custom_field_doclink = CustomField.objects.create(
            name="Test Custom Field Doc Link",
            data_type=CustomField.FieldDataType.DOCUMENTLINK,
        )
        self.client.force_authenticate(user=source_owner)
        resp = self.client.patch(
            f"/api/documents/{source_doc.id}/",
            data={
                "custom_fields": [
                    {
                        "field": custom_field_doclink.id,
                        "value": [target_doc.id],
                    },
                ],
            },
            format="json",
        )
        self.assertEqual(resp.status_code, status.HTTP_403_FORBIDDEN)
        self.assertEqual(
            CustomFieldInstance.objects.filter(field=custom_field_doclink).count(),
            0,
        )
    def test_documentlink_patch_allowed_with_change_permission_on_target_documents(
        self,
    ) -> None:
        source_owner = User.objects.create_user(username="source-owner")
        source_owner.user_permissions.add(
            Permission.objects.get(codename="change_document"),
        )
        other_user = User.objects.create_user(username="other-user")
        source_doc = Document.objects.create(
            title="Source",
            checksum="source",
            mime_type="application/pdf",
            owner=source_owner,
        )
        target_doc = Document.objects.create(
            title="Target",
            checksum="target",
            mime_type="application/pdf",
            owner=other_user,
        )
        custom_field_doclink = CustomField.objects.create(
            name="Test Custom Field Doc Link",
            data_type=CustomField.FieldDataType.DOCUMENTLINK,
        )
        assign_perm("change_document", source_owner, target_doc)
        self.client.force_authenticate(user=source_owner)
        resp = self.client.patch(
            f"/api/documents/{source_doc.id}/",
            data={
                "custom_fields": [
                    {
                        "field": custom_field_doclink.id,
                        "value": [target_doc.id],
                    },
                ],
            },
            format="json",
        )
        self.assertEqual(resp.status_code, status.HTTP_200_OK)
        target_doc.refresh_from_db()
        self.assertEqual(
            target_doc.custom_fields.get(field=custom_field_doclink).value,
            [source_doc.id],
        )
    def test_custom_field_filters(self) -> None:
        custom_field_string = CustomField.objects.create(
            name="Test Custom Field String",
--- a/src/documents/tests/test_api_permissions.py
+++ b/src/documents/tests/test_api_permissions.py
@@ -888,6 +888,19 @@ class TestApiUser(DirectoriesMixin, APITestCase):
        self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
        response = self.client.post(
            f"{self.ENDPOINT}",
            json.dumps(
                {
                    "username": "user4",
                    "is_superuser": "true",
                },
            ),
            content_type="application/json",
        )
        self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
        self.client.force_authenticate(user2)
        response = self.client.patch(
@@ -920,6 +933,65 @@ class TestApiUser(DirectoriesMixin, APITestCase):
        returned_user1 = User.objects.get(pk=user1.pk)
        self.assertEqual(returned_user1.is_superuser, False)
    def test_only_superusers_can_create_or_alter_staff_status(self):
        """
        GIVEN:
            - Existing user account
        WHEN:
            - API request is made to add a user account with staff status
            - API request is made to change staff status
        THEN:
            - Only superusers can change staff status
        """
        user1 = User.objects.create_user(username="user1")
        user1.user_permissions.add(*Permission.objects.all())
        user2 = User.objects.create_superuser(username="user2")
        self.client.force_authenticate(user1)
        response = self.client.patch(
            f"{self.ENDPOINT}{user1.pk}/",
            json.dumps(
                {
                    "is_staff": "true",
                },
            ),
            content_type="application/json",
        )
        self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
        response = self.client.post(
            f"{self.ENDPOINT}",
            json.dumps(
                {
                    "username": "user3",
                    "is_staff": 1,
                },
            ),
            content_type="application/json",
        )
        self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
        self.client.force_authenticate(user2)
        response = self.client.patch(
            f"{self.ENDPOINT}{user1.pk}/",
            json.dumps(
                {
                    "is_staff": True,
                },
            ),
            content_type="application/json",
        )
        self.assertEqual(response.status_code, status.HTTP_200_OK)
        returned_user1 = User.objects.get(pk=user1.pk)
        self.assertEqual(returned_user1.is_staff, True)
 class TestApiGroup(DirectoriesMixin, APITestCase):
    ENDPOINT = "/api/groups/"
--- a/src/documents/tests/test_api_schema.py
+++ b/src/documents/tests/test_api_schema.py
@@ -12,7 +12,12 @@ class TestApiSchema(APITestCase):
        Test that the schema is valid
        """
        try:
-            call_command("spectacular", "--validate", "--fail-on-warn")
+            call_command(
                "spectacular",
                "--validate",
                "--fail-on-warn",
                skip_checks=True,
            )
        except CommandError as e:
            self.fail(f"Schema validation failed: {e}")
--- a/src/documents/tests/test_api_search.py
+++ b/src/documents/tests/test_api_search.py
@@ -702,6 +702,40 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
        self.assertEqual(correction, None)
    def test_search_spelling_suggestion_suppressed_for_private_terms(self):
        owner = User.objects.create_user("owner")
        attacker = User.objects.create_user("attacker")
        attacker.user_permissions.add(
            Permission.objects.get(codename="view_document"),
        )
        with AsyncWriter(index.open_index()) as writer:
            for i in range(55):
                private_doc = Document.objects.create(
                    checksum=f"p{i}",
                    pk=100 + i,
                    title=f"Private Document {i + 1}",
                    content=f"treasury document {i + 1}",
                    owner=owner,
                )
                visible_doc = Document.objects.create(
                    checksum=f"v{i}",
                    pk=200 + i,
                    title=f"Visible Document {i + 1}",
                    content=f"public ledger {i + 1}",
                    owner=attacker,
                )
                index.update_document(writer, private_doc)
                index.update_document(writer, visible_doc)
        self.client.force_authenticate(user=attacker)
        response = self.client.get("/api/documents/?query=treasurx")
        self.assertEqual(response.status_code, status.HTTP_200_OK)
        self.assertEqual(response.data["count"], 0)
        self.assertIsNone(response.data["corrected_query"])
    @mock.patch(
        "whoosh.searching.Searcher.correct_query",
        side_effect=Exception("Test error"),
@@ -772,6 +806,60 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
        self.assertEqual(results[0]["id"], d3.id)
        self.assertEqual(results[1]["id"], d1.id)
    def test_search_more_like_requires_view_permission_on_seed_document(
        self,
    ) -> None:
        """
        GIVEN:
            - A user can search documents they own
            - Another user's private document exists with similar content
        WHEN:
            - The user requests more-like-this for the private seed document
        THEN:
            - The request is rejected
        """
        owner = User.objects.create_user("owner")
        attacker = User.objects.create_user("attacker")
        attacker.user_permissions.add(
            Permission.objects.get(codename="view_document"),
        )
        private_seed = Document.objects.create(
            title="private bank statement",
            content="quarterly treasury bank statement wire transfer",
            checksum="seed",
            owner=owner,
            pk=10,
        )
        visible_doc = Document.objects.create(
            title="attacker-visible match",
            content="quarterly treasury bank statement wire transfer summary",
            checksum="visible",
            owner=attacker,
            pk=11,
        )
        other_doc = Document.objects.create(
            title="unrelated",
            content="completely different topic",
            checksum="other",
            owner=attacker,
            pk=12,
        )
        with AsyncWriter(index.open_index()) as writer:
            index.update_document(writer, private_seed)
            index.update_document(writer, visible_doc)
            index.update_document(writer, other_doc)
        self.client.force_authenticate(user=attacker)
        response = self.client.get(
            f"/api/documents/?more_like_id={private_seed.id}",
        )
        self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
        self.assertEqual(response.content, b"Insufficient permissions.")
    def test_search_filtering(self) -> None:
        t = Tag.objects.create(name="tag")
        t2 = Tag.objects.create(name="tag2")
@@ -1356,6 +1444,83 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
        self.assertEqual(results["custom_fields"][0]["id"], custom_field1.id)
        self.assertEqual(results["workflows"][0]["id"], workflow1.id)
    def test_global_search_filters_owned_mail_objects(self) -> None:
        user1 = User.objects.create_user("mail-search-user")
        user2 = User.objects.create_user("other-mail-search-user")
        user1.user_permissions.add(
            Permission.objects.get(codename="view_mailaccount"),
            Permission.objects.get(codename="view_mailrule"),
        )
        own_account = MailAccount.objects.create(
            name="bank owned account",
            username="owner@example.com",
            password="secret",
            imap_server="imap.owner.example.com",
            imap_port=993,
            imap_security=MailAccount.ImapSecurity.SSL,
            character_set="UTF-8",
            owner=user1,
        )
        other_account = MailAccount.objects.create(
            name="bank other account",
            username="other@example.com",
            password="secret",
            imap_server="imap.other.example.com",
            imap_port=993,
            imap_security=MailAccount.ImapSecurity.SSL,
            character_set="UTF-8",
            owner=user2,
        )
        unowned_account = MailAccount.objects.create(
            name="bank shared account",
            username="shared@example.com",
            password="secret",
            imap_server="imap.shared.example.com",
            imap_port=993,
            imap_security=MailAccount.ImapSecurity.SSL,
            character_set="UTF-8",
        )
        own_rule = MailRule.objects.create(
            name="bank owned rule",
            account=own_account,
            action=MailRule.MailAction.MOVE,
            owner=user1,
        )
        other_rule = MailRule.objects.create(
            name="bank other rule",
            account=other_account,
            action=MailRule.MailAction.MOVE,
            owner=user2,
        )
        unowned_rule = MailRule.objects.create(
            name="bank shared rule",
            account=unowned_account,
            action=MailRule.MailAction.MOVE,
        )
        self.client.force_authenticate(user1)
        response = self.client.get("/api/search/?query=bank")
        self.assertEqual(response.status_code, status.HTTP_200_OK)
        self.assertCountEqual(
            [account["id"] for account in response.data["mail_accounts"]],
            [own_account.id, unowned_account.id],
        )
        self.assertCountEqual(
            [rule["id"] for rule in response.data["mail_rules"]],
            [own_rule.id, unowned_rule.id],
        )
        self.assertNotIn(
            other_account.id,
            [account["id"] for account in response.data["mail_accounts"]],
        )
        self.assertNotIn(
            other_rule.id,
            [rule["id"] for rule in response.data["mail_rules"]],
        )
    def test_global_search_bad_request(self) -> None:
        """
        WHEN:
--- a/src/documents/tests/test_api_status.py
+++ b/src/documents/tests/test_api_status.py
@@ -26,6 +26,23 @@ class TestSystemStatus(APITestCase):
        self.override = override_settings(MEDIA_ROOT=self.tmp_dir)
        self.override.enable()
        # Mock slow network calls so tests don't block on real Redis/Celery timeouts.
        # Individual tests that care about specific behaviour override these with
        # their own @mock.patch decorators (which take precedence).
        redis_patcher = mock.patch(
            "redis.Redis.execute_command",
            side_effect=Exception("Redis not available"),
        )
        self.mock_redis = redis_patcher.start()
        self.addCleanup(redis_patcher.stop)
        celery_patcher = mock.patch(
            "celery.app.control.Inspect.ping",
            side_effect=Exception("Celery not available"),
        )
        self.mock_celery_ping = celery_patcher.start()
        self.addCleanup(celery_patcher.stop)
    def tearDown(self) -> None:
        super().tearDown()
@@ -69,11 +86,18 @@ class TestSystemStatus(APITestCase):
        """
        response = self.client.get(self.ENDPOINT)
        self.assertEqual(response.status_code, status.HTTP_401_UNAUTHORIZED)
        self.assertEqual(response["WWW-Authenticate"], "Token")
        normal_user = User.objects.create_user(username="normal_user")
        self.client.force_login(normal_user)
        response = self.client.get(self.ENDPOINT)
        self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
    def test_system_status_with_bad_basic_auth_challenges(self) -> None:
        self.client.credentials(HTTP_AUTHORIZATION="Basic invalid")
        response = self.client.get(self.ENDPOINT)
        self.assertEqual(response.status_code, status.HTTP_401_UNAUTHORIZED)
        self.assertEqual(response["WWW-Authenticate"], 'Basic realm="api"')
    def test_system_status_container_detection(self) -> None:
        """
        GIVEN:
@@ -84,11 +108,15 @@ class TestSystemStatus(APITestCase):
            - The response contains the correct install type
        """
        self.client.force_login(self.user)
-        os.environ["PNGX_CONTAINERIZED"] = "1"
+        with mock.patch.dict(os.environ, {"PNGX_CONTAINERIZED": "1"}, clear=False):
            response = self.client.get(self.ENDPOINT)
            self.assertEqual(response.status_code, status.HTTP_200_OK)
            self.assertEqual(response.data["install_type"], "docker")
-        os.environ["KUBERNETES_SERVICE_HOST"] = "http://localhost"
+        with mock.patch.dict(
            os.environ,
            {"PNGX_CONTAINERIZED": "1", "KUBERNETES_SERVICE_HOST": "http://localhost"},
            clear=False,
        ):
            response = self.client.get(self.ENDPOINT)
            self.assertEqual(response.data["install_type"], "kubernetes")
--- a/src/documents/tests/test_checks.py
+++ b/src/documents/tests/test_checks.py
@@ -13,8 +13,10 @@ class TestDocumentChecks(TestCase):
    def test_parser_check(self) -> None:
        self.assertEqual(parser_check(None), [])
-        with mock.patch("documents.checks.document_consumer_declaration.send") as m:
+        with mock.patch("documents.checks.get_parser_registry") as mock_registry_fn:
-            m.return_value = []
+            mock_registry = mock.MagicMock()
            mock_registry.all_parsers.return_value = []
            mock_registry_fn.return_value = mock_registry
            self.assertEqual(
                parser_check(None),
--- a/src/documents/tests/test_consumer.py
+++ b/src/documents/tests/test_consumer.py
@@ -27,7 +27,6 @@ from documents.models import Document
 from documents.models import DocumentType
 from documents.models import StoragePath
 from documents.models import Tag
 from documents.parsers import DocumentParser
 from documents.parsers import ParseError
 from documents.plugins.helpers import ProgressStatusOptions
 from documents.tasks import sanity_check
@@ -36,65 +35,108 @@ from documents.tests.utils import DummyProgressManager
 from documents.tests.utils import FileSystemAssertsMixin
 from documents.tests.utils import GetConsumerMixin
 from paperless_mail.models import MailRule
 from paperless_mail.parsers import MailDocumentParser
-class _BaseTestParser(DocumentParser):
+class _BaseNewStyleParser:
-    def get_settings(self) -> None:
+    """Minimal ParserProtocol implementation for use in consumer tests."""
    name: str = "test-parser"
    version: str = "0.1"
    author: str = "test"
    url: str = "test"
    @classmethod
    def supported_mime_types(cls) -> dict:
        return {
            "application/pdf": ".pdf",
            "image/png": ".png",
            "message/rfc822": ".eml",
        }
    @classmethod
    def score(cls, mime_type: str, filename: str, path=None):
        return 0 if mime_type in cls.supported_mime_types() else None
    @property
    def can_produce_archive(self) -> bool:
        return True
    @property
    def requires_pdf_rendition(self) -> bool:
        return False
    def __init__(self) -> None:
        self._tmpdir: Path | None = None
        self._text: str | None = None
        self._archive: Path | None = None
        self._thumb: Path | None = None
    def __enter__(self):
        self._tmpdir = Path(
            tempfile.mkdtemp(prefix="paperless-test-", dir=settings.SCRATCH_DIR),
        )
        _, thumb = tempfile.mkstemp(suffix=".webp", dir=self._tmpdir)
        self._thumb = Path(thumb)
        return self
    def __exit__(self, exc_type, exc_val, exc_tb) -> None:
        if self._tmpdir and self._tmpdir.exists():
            shutil.rmtree(self._tmpdir, ignore_errors=True)
    def configure(self, context) -> None:
        """
-        This parser does not implement additional settings yet
+        Test parser doesn't do anything with context
        """
    def parse(self, document_path, mime_type, *, produce_archive: bool = True) -> None:
        raise NotImplementedError
    def get_text(self) -> str | None:
        return self._text
    def get_date(self):
        return None
    def get_archive_path(self):
        return self._archive
-class DummyParser(_BaseTestParser):
+    def get_thumbnail(self, document_path, mime_type) -> Path:
-    def __init__(self, logging_group, scratch_dir, archive_path) -> None:
+        return self._thumb
        super().__init__(logging_group, None)
        _, self.fake_thumb = tempfile.mkstemp(suffix=".webp", dir=scratch_dir)
        self.archive_path = archive_path
-    def get_thumbnail(self, document_path, mime_type, file_name=None):
+    def get_page_count(self, document_path, mime_type):
-        return self.fake_thumb
+        return None
-    def parse(self, document_path, mime_type, file_name=None) -> None:
+    def extract_metadata(self, document_path, mime_type) -> list:
-        self.text = "The Text"
+        return []
-class CopyParser(_BaseTestParser):
+class DummyParser(_BaseNewStyleParser):
-    def get_thumbnail(self, document_path, mime_type, file_name=None):
+    _ARCHIVE_SRC = (
-        return self.fake_thumb
+        Path(__file__).parent / "samples" / "documents" / "archive" / "0000001.pdf"
    )
-    def __init__(self, logging_group, progress_callback=None) -> None:
+    def parse(self, document_path, mime_type, *, produce_archive: bool = True) -> None:
-        super().__init__(logging_group, progress_callback)
+        self._text = "The Text"
-        _, self.fake_thumb = tempfile.mkstemp(suffix=".webp", dir=self.tempdir)
+        if produce_archive and self._tmpdir:
-
+            self._archive = self._tmpdir / "archive.pdf"
-    def parse(self, document_path, mime_type, file_name=None) -> None:
+            shutil.copy(self._ARCHIVE_SRC, self._archive)
        self.text = "The text"
        self.archive_path = Path(self.tempdir / "archive.pdf")
        shutil.copy(document_path, self.archive_path)
-class FaultyParser(_BaseTestParser):
+class CopyParser(_BaseNewStyleParser):
-    def __init__(self, logging_group, scratch_dir) -> None:
+    def parse(self, document_path, mime_type, *, produce_archive: bool = True) -> None:
-        super().__init__(logging_group)
+        self._text = "The text"
-        _, self.fake_thumb = tempfile.mkstemp(suffix=".webp", dir=scratch_dir)
+        if produce_archive and self._tmpdir:
            self._archive = self._tmpdir / "archive.pdf"
            shutil.copy(document_path, self._archive)
    def get_thumbnail(self, document_path, mime_type, file_name=None):
        return self.fake_thumb
-    def parse(self, document_path, mime_type, file_name=None):
+class FaultyParser(_BaseNewStyleParser):
    def parse(self, document_path, mime_type, *, produce_archive: bool = True) -> None:
        raise ParseError("Does not compute.")
-class FaultyGenericExceptionParser(_BaseTestParser):
+class FaultyGenericExceptionParser(_BaseNewStyleParser):
-    def __init__(self, logging_group, scratch_dir) -> None:
+    def parse(self, document_path, mime_type, *, produce_archive: bool = True) -> None:
        super().__init__(logging_group)
        _, self.fake_thumb = tempfile.mkstemp(suffix=".webp", dir=scratch_dir)
    def get_thumbnail(self, document_path, mime_type, file_name=None):
        return self.fake_thumb
    def parse(self, document_path, mime_type, file_name=None):
        raise Exception("Generic exception.")
@@ -148,38 +190,12 @@ class TestConsumer(
        self.assertEqual(payload["data"]["max_progress"], last_progress_max)
        self.assertEqual(payload["data"]["status"], last_status)
    def make_dummy_parser(self, logging_group, progress_callback=None):
        return DummyParser(
            logging_group,
            self.dirs.scratch_dir,
            self.get_test_archive_file(),
        )
    def make_faulty_parser(self, logging_group, progress_callback=None):
        return FaultyParser(logging_group, self.dirs.scratch_dir)
    def make_faulty_generic_exception_parser(
        self,
        logging_group,
        progress_callback=None,
    ):
        return FaultyGenericExceptionParser(logging_group, self.dirs.scratch_dir)
    def setUp(self) -> None:
        super().setUp()
-        patcher = mock.patch("documents.parsers.document_consumer_declaration.send")
+        patcher = mock.patch("documents.consumer.get_parser_registry")
-        m = patcher.start()
+        mock_registry = patcher.start()
-        m.return_value = [
+        mock_registry.return_value.get_parser_for_file.return_value = DummyParser
            (
                None,
                {
                    "parser": self.make_dummy_parser,
                    "mime_types": {"application/pdf": ".pdf"},
                    "weight": 0,
                },
            ),
        ]
        self.addCleanup(patcher.stop)
    def get_test_file(self):
@@ -548,9 +564,9 @@ class TestConsumer(
            ) as consumer:
                consumer.run()
-    @mock.patch("documents.parsers.document_consumer_declaration.send")
+    @mock.patch("documents.consumer.get_parser_registry")
    def testNoParsers(self, m) -> None:
-        m.return_value = []
+        m.return_value.get_parser_for_file.return_value = None
        with self.assertRaisesMessage(
            ConsumerError,
@@ -561,18 +577,9 @@ class TestConsumer(
        self._assert_first_last_send_progress(last_status="FAILED")
-    @mock.patch("documents.parsers.document_consumer_declaration.send")
+    @mock.patch("documents.consumer.get_parser_registry")
    def testFaultyParser(self, m) -> None:
-        m.return_value = [
+        m.return_value.get_parser_for_file.return_value = FaultyParser
            (
                None,
                {
                    "parser": self.make_faulty_parser,
                    "mime_types": {"application/pdf": ".pdf"},
                    "weight": 0,
                },
            ),
        ]
        with self.get_consumer(self.get_test_file()) as consumer:
            with self.assertRaisesMessage(
@@ -583,18 +590,9 @@ class TestConsumer(
        self._assert_first_last_send_progress(last_status="FAILED")
-    @mock.patch("documents.parsers.document_consumer_declaration.send")
+    @mock.patch("documents.consumer.get_parser_registry")
    def testGenericParserException(self, m) -> None:
-        m.return_value = [
+        m.return_value.get_parser_for_file.return_value = FaultyGenericExceptionParser
            (
                None,
                {
                    "parser": self.make_faulty_generic_exception_parser,
                    "mime_types": {"application/pdf": ".pdf"},
                    "weight": 0,
                },
            ),
        ]
        with self.get_consumer(self.get_test_file()) as consumer:
            with self.assertRaisesMessage(
@@ -642,6 +640,7 @@ class TestConsumer(
        self._assert_first_last_send_progress()
    @mock.patch("documents.consumer.generate_unique_filename")
    @override_settings(FILENAME_FORMAT="{pk}")
    def testFilenameHandlingFallsBackWhenGeneratedPathExceedsDbLimit(self, m):
        m.side_effect = lambda doc, archive_filename=False: Path(
            ("a" * 1100 + ".pdf") if not archive_filename else ("b" * 1100 + ".pdf"),
@@ -1017,7 +1016,7 @@ class TestConsumer(
        self._assert_first_last_send_progress()
    @override_settings(FILENAME_FORMAT="{title}")
-    @mock.patch("documents.parsers.document_consumer_declaration.send")
+    @mock.patch("documents.consumer.get_parser_registry")
    def test_similar_filenames(self, m) -> None:
        shutil.copy(
            Path(__file__).parent / "samples" / "simple.pdf",
@@ -1031,16 +1030,7 @@ class TestConsumer(
            Path(__file__).parent / "samples" / "simple-noalpha.png",
            settings.CONSUMPTION_DIR / "simple.png.pdf",
        )
-        m.return_value = [
+        m.return_value.get_parser_for_file.return_value = CopyParser
            (
                None,
                {
                    "parser": CopyParser,
                    "mime_types": {"application/pdf": ".pdf", "image/png": ".png"},
                    "weight": 0,
                },
            ),
        ]
        with self.get_consumer(settings.CONSUMPTION_DIR / "simple.png") as consumer:
            consumer.run()
@@ -1068,8 +1058,10 @@ class TestConsumer(
        sanity_check()
    @mock.patch("documents.consumer.get_parser_registry")
    @mock.patch("documents.consumer.run_subprocess")
-    def test_try_to_clean_invalid_pdf(self, m) -> None:
+    def test_try_to_clean_invalid_pdf(self, m, mock_registry) -> None:
        mock_registry.return_value.get_parser_for_file.return_value = None
        shutil.copy(
            Path(__file__).parent / "samples" / "invalid_pdf.pdf",
            settings.CONSUMPTION_DIR / "invalid_pdf.pdf",
@@ -1090,11 +1082,11 @@ class TestConsumer(
            self.assertEqual(command[1], "--replace-input")
    @mock.patch("paperless_mail.models.MailRule.objects.get")
-    @mock.patch("paperless_mail.parsers.MailDocumentParser.parse")
+    @mock.patch("paperless.parsers.mail.MailDocumentParser.parse")
-    @mock.patch("documents.parsers.document_consumer_declaration.send")
+    @mock.patch("documents.consumer.get_parser_registry")
    def test_mail_parser_receives_mailrule(
        self,
-        mock_consumer_declaration_send: mock.Mock,
+        mock_get_parser_registry: mock.Mock,
        mock_mail_parser_parse: mock.Mock,
        mock_mailrule_get: mock.Mock,
    ) -> None:
@@ -1106,25 +1098,21 @@ class TestConsumer(
        THEN:
            - The mail parser should receive the mail rule
        """
-        mock_consumer_declaration_send.return_value = [
+        from paperless.parsers.mail import MailDocumentParser
-            (
+
-                None,
+        mock_get_parser_registry.return_value.get_parser_for_file.return_value = (
-                {
+            MailDocumentParser
-                    "parser": MailDocumentParser,
+        )
                    "mime_types": {"message/rfc822": ".eml"},
                    "weight": 0,
                },
            ),
        ]
        mock_mailrule_get.return_value = mock.Mock(
            pdf_layout=MailRule.PdfLayout.HTML_ONLY,
        )
        with self.get_consumer(
            filepath=(
                Path(__file__).parent.parent.parent
-                / Path("paperless_mail")
+                / Path("paperless")
                / Path("tests")
                / Path("samples")
                / Path("mail")
            ).resolve()
            / "html.eml",
            source=DocumentSource.MailFetch,
@@ -1138,8 +1126,6 @@ class TestConsumer(
            mock_mail_parser_parse.assert_called_once_with(
                consumer.working_copy,
                "message/rfc822",
                    file_name="sample.pdf",
                    mailrule=mock_mailrule_get.return_value,
            )
--- a/src/documents/tests/test_file_handling.py
+++ b/src/documents/tests/test_file_handling.py
@@ -1,4 +1,5 @@
 import datetime
 import hashlib
 import logging
 import tempfile
 from pathlib import Path
@@ -204,6 +205,52 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
            )
            self.assertEqual(document.filename, "none/none.pdf")
    @override_settings(FILENAME_FORMAT=None)
    def test_stale_save_recovers_already_moved_files(self) -> None:
        old_storage_path = StoragePath.objects.create(
            name="old-path",
            path="old/{{title}}",
        )
        new_storage_path = StoragePath.objects.create(
            name="new-path",
            path="new/{{title}}",
        )
        original_bytes = b"original"
        archive_bytes = b"archive"
        doc = Document.objects.create(
            title="document",
            mime_type="application/pdf",
            checksum=hashlib.md5(original_bytes).hexdigest(),
            archive_checksum=hashlib.md5(archive_bytes).hexdigest(),
            filename="old/document.pdf",
            archive_filename="old/document.pdf",
            storage_path=old_storage_path,
        )
        create_source_path_directory(doc.source_path)
        doc.source_path.write_bytes(original_bytes)
        create_source_path_directory(doc.archive_path)
        doc.archive_path.write_bytes(archive_bytes)
        stale_doc = Document.objects.get(pk=doc.pk)
        fresh_doc = Document.objects.get(pk=doc.pk)
        fresh_doc.storage_path = new_storage_path
        fresh_doc.save()
        doc.refresh_from_db()
        self.assertEqual(doc.filename, "new/document.pdf")
        self.assertEqual(doc.archive_filename, "new/document.pdf")
        stale_doc.storage_path = new_storage_path
        stale_doc.save()
        doc.refresh_from_db()
        self.assertEqual(doc.filename, "new/document.pdf")
        self.assertEqual(doc.archive_filename, "new/document.pdf")
        self.assertIsFile(doc.source_path)
        self.assertIsFile(doc.archive_path)
        self.assertIsNotFile(settings.ORIGINALS_DIR / "old" / "document.pdf")
        self.assertIsNotFile(settings.ARCHIVE_DIR / "old" / "document.pdf")
    @override_settings(FILENAME_FORMAT="{correspondent}/{correspondent}")
    def test_document_delete(self) -> None:
        document = Document()
--- a/src/documents/tests/test_management.py
+++ b/src/documents/tests/test_management.py
@@ -1,7 +1,10 @@
 from __future__ import annotations
 import filecmp
 import shutil
 from io import StringIO
 from pathlib import Path
 from typing import TYPE_CHECKING
 from unittest import mock
 import pytest
@@ -11,6 +14,9 @@ from django.core.management import call_command
 from django.test import TestCase
 from django.test import override_settings
 if TYPE_CHECKING:
    from pytest_mock import MockerFixture
 from documents.file_handling import generate_filename
 from documents.models import Document
 from documents.tasks import update_document_content_maybe_archive_file
@@ -35,7 +41,7 @@ class TestArchiver(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
        doc = self.make_models()
        shutil.copy(sample_file, Path(self.dirs.originals_dir) / f"{doc.id:07}.pdf")
-        call_command("document_archiver", "--processes", "1")
+        call_command("document_archiver", "--processes", "1", skip_checks=True)
    def test_handle_document(self) -> None:
        doc = self.make_models()
@@ -100,12 +106,12 @@ class TestArchiver(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
 class TestMakeIndex(TestCase):
    @mock.patch("documents.management.commands.document_index.index_reindex")
    def test_reindex(self, m) -> None:
-        call_command("document_index", "reindex")
+        call_command("document_index", "reindex", skip_checks=True)
        m.assert_called_once()
    @mock.patch("documents.management.commands.document_index.index_optimize")
    def test_optimize(self, m) -> None:
-        call_command("document_index", "optimize")
+        call_command("document_index", "optimize", skip_checks=True)
        m.assert_called_once()
@@ -122,7 +128,7 @@ class TestRenamer(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
        Path(doc.archive_path).touch()
        with override_settings(FILENAME_FORMAT="{correspondent}/{title}"):
-            call_command("document_renamer")
+            call_command("document_renamer", skip_checks=True)
        doc2 = Document.objects.get(id=doc.id)
@@ -135,14 +141,32 @@ class TestRenamer(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
@pytest.mark.management
-class TestCreateClassifier(TestCase):
+class TestCreateClassifier:
-    @mock.patch(
+    def test_create_classifier(self, mocker: MockerFixture) -> None:
        m = mocker.patch(
            "documents.management.commands.document_create_classifier.train_classifier",
        )
    def test_create_classifier(self, m) -> None:
        call_command("document_create_classifier")
-        m.assert_called_once()
+        call_command("document_create_classifier", skip_checks=True)
        m.assert_called_once_with(scheduled=False, status_callback=mocker.ANY)
        assert callable(m.call_args.kwargs["status_callback"])
    def test_create_classifier_callback_output(self, mocker: MockerFixture) -> None:
        """Callback passed to train_classifier writes each phase message to the console."""
        m = mocker.patch(
            "documents.management.commands.document_create_classifier.train_classifier",
        )
        def invoke_callback(**kwargs):
            kwargs["status_callback"]("Vectorizing document content...")
        m.side_effect = invoke_callback
        stdout = StringIO()
        call_command("document_create_classifier", skip_checks=True, stdout=stdout)
        assert "Vectorizing document content..." in stdout.getvalue()
@pytest.mark.management
@@ -152,7 +176,7 @@ class TestConvertMariaDBUUID(TestCase):
        m.alter_field.return_value = None
        stdout = StringIO()
-        call_command("convert_mariadb_uuid", stdout=stdout)
+        call_command("convert_mariadb_uuid", stdout=stdout, skip_checks=True)
        m.assert_called_once()
@@ -167,6 +191,6 @@ class TestPruneAuditLogs(TestCase):
            object_id=1,
            action=LogEntry.Action.CREATE,
        )
-        call_command("prune_audit_logs")
+        call_command("prune_audit_logs", skip_checks=True)
        self.assertEqual(LogEntry.objects.count(), 0)
--- a/src/documents/tests/test_management_exporter.py
+++ b/src/documents/tests/test_management_exporter.py
@@ -180,7 +180,7 @@ class TestExportImport(
        if data_only:
            args += ["--data-only"]
-        call_command(*args)
+        call_command(*args, skip_checks=True)
        with (self.target / "manifest.json").open() as f:
            manifest = json.load(f)
@@ -272,7 +272,12 @@ class TestExportImport(
            GroupObjectPermission.objects.all().delete()
            self.assertEqual(Document.objects.count(), 0)
-            call_command("document_importer", "--no-progress-bar", self.target)
+            call_command(
                "document_importer",
                "--no-progress-bar",
                self.target,
                skip_checks=True,
            )
            self.assertEqual(Document.objects.count(), 4)
            self.assertEqual(Tag.objects.count(), 1)
            self.assertEqual(Correspondent.objects.count(), 1)
@@ -438,7 +443,8 @@ class TestExportImport(
            filename="0000010.pdf",
            mime_type="application/pdf",
        )
-        self.assertRaises(FileNotFoundError, call_command, "document_exporter", target)
+        with self.assertRaises(FileNotFoundError):
            call_command("document_exporter", target, skip_checks=True)
    def test_export_zipped(self) -> None:
        """
@@ -458,7 +464,7 @@ class TestExportImport(
        args = ["document_exporter", self.target, "--zip"]
-        call_command(*args)
+        call_command(*args, skip_checks=True)
        expected_file = str(
            self.target / f"export-{timezone.localdate().isoformat()}.zip",
@@ -493,7 +499,7 @@ class TestExportImport(
        with override_settings(
            FILENAME_FORMAT="{created_year}/{correspondent}/{title}",
        ):
-            call_command(*args)
+            call_command(*args, skip_checks=True)
        expected_file = str(
            self.target / f"export-{timezone.localdate().isoformat()}.zip",
@@ -538,7 +544,7 @@ class TestExportImport(
        args = ["document_exporter", self.target, "--zip", "--delete"]
-        call_command(*args)
+        call_command(*args, skip_checks=True)
        expected_file = str(
            self.target / f"export-{timezone.localdate().isoformat()}.zip",
@@ -565,7 +571,7 @@ class TestExportImport(
        args = ["document_exporter", "/tmp/foo/bar"]
        with self.assertRaises(CommandError) as e:
-            call_command(*args)
+            call_command(*args, skip_checks=True)
        self.assertEqual("That path doesn't exist", str(e.exception))
@@ -583,7 +589,7 @@ class TestExportImport(
            args = ["document_exporter", tmp_file.name]
            with self.assertRaises(CommandError) as e:
-                call_command(*args)
+                call_command(*args, skip_checks=True)
            self.assertEqual("That path isn't a directory", str(e.exception))
@@ -602,7 +608,7 @@ class TestExportImport(
            args = ["document_exporter", tmp_dir]
            with self.assertRaises(CommandError) as e:
-                call_command(*args)
+                call_command(*args, skip_checks=True)
            self.assertEqual(
                "That path doesn't appear to be writable",
@@ -647,7 +653,12 @@ class TestExportImport(
            self.assertEqual(Document.objects.count(), 4)
            Document.objects.all().delete()
            self.assertEqual(Document.objects.count(), 0)
-            call_command("document_importer", "--no-progress-bar", self.target)
+            call_command(
                "document_importer",
                "--no-progress-bar",
                self.target,
                skip_checks=True,
            )
            self.assertEqual(Document.objects.count(), 4)
    def test_no_thumbnail(self) -> None:
@@ -690,7 +701,12 @@ class TestExportImport(
            self.assertEqual(Document.objects.count(), 4)
            Document.objects.all().delete()
            self.assertEqual(Document.objects.count(), 0)
-            call_command("document_importer", "--no-progress-bar", self.target)
+            call_command(
                "document_importer",
                "--no-progress-bar",
                self.target,
                skip_checks=True,
            )
            self.assertEqual(Document.objects.count(), 4)
    def test_split_manifest(self) -> None:
@@ -721,7 +737,12 @@ class TestExportImport(
            Document.objects.all().delete()
            CustomFieldInstance.objects.all().delete()
            self.assertEqual(Document.objects.count(), 0)
-            call_command("document_importer", "--no-progress-bar", self.target)
+            call_command(
                "document_importer",
                "--no-progress-bar",
                self.target,
                skip_checks=True,
            )
            self.assertEqual(Document.objects.count(), 4)
            self.assertEqual(CustomFieldInstance.objects.count(), 1)
@@ -746,7 +767,12 @@ class TestExportImport(
            self.assertEqual(Document.objects.count(), 4)
            Document.objects.all().delete()
            self.assertEqual(Document.objects.count(), 0)
-            call_command("document_importer", "--no-progress-bar", self.target)
+            call_command(
                "document_importer",
                "--no-progress-bar",
                self.target,
                skip_checks=True,
            )
            self.assertEqual(Document.objects.count(), 4)
    def test_folder_prefix_with_split(self) -> None:
@@ -771,7 +797,12 @@ class TestExportImport(
            self.assertEqual(Document.objects.count(), 4)
            Document.objects.all().delete()
            self.assertEqual(Document.objects.count(), 0)
-            call_command("document_importer", "--no-progress-bar", self.target)
+            call_command(
                "document_importer",
                "--no-progress-bar",
                self.target,
                skip_checks=True,
            )
            self.assertEqual(Document.objects.count(), 4)
    def test_import_db_transaction_failed(self) -> None:
@@ -813,7 +844,12 @@ class TestExportImport(
            self.user = User.objects.create(username="temp_admin")
            with self.assertRaises(IntegrityError):
-                call_command("document_importer", "--no-progress-bar", self.target)
+                call_command(
                    "document_importer",
                    "--no-progress-bar",
                    self.target,
                    skip_checks=True,
                )
            self.assertEqual(ContentType.objects.count(), num_content_type_objects)
            self.assertEqual(Permission.objects.count(), num_permission_objects + 1)
@@ -864,6 +900,7 @@ class TestExportImport(
            "--no-progress-bar",
            "--data-only",
            self.target,
            skip_checks=True,
        )
        self.assertEqual(Document.objects.all().count(), 4)
@@ -923,6 +960,7 @@ class TestCryptExportImport(
            "--passphrase",
            "securepassword",
            self.target,
            skip_checks=True,
        )
        self.assertIsFile(self.target / "metadata.json")
@@ -948,6 +986,7 @@ class TestCryptExportImport(
            "--passphrase",
            "securepassword",
            self.target,
            skip_checks=True,
        )
        account = MailAccount.objects.first()
@@ -976,6 +1015,7 @@ class TestCryptExportImport(
            "--passphrase",
            "securepassword",
            self.target,
            skip_checks=True,
        )
        with self.assertRaises(CommandError) as err:
@@ -983,6 +1023,7 @@ class TestCryptExportImport(
                "document_importer",
                "--no-progress-bar",
                self.target,
                skip_checks=True,
            )
            self.assertEqual(
                err.msg,
@@ -1014,6 +1055,7 @@ class TestCryptExportImport(
            "--no-progress-bar",
            str(self.target),
            stdout=stdout,
            skip_checks=True,
        )
        stdout.seek(0)
        self.assertIn(
--- a/src/documents/tests/test_management_fuzzy.py
+++ b/src/documents/tests/test_management_fuzzy.py
@@ -21,6 +21,7 @@ class TestFuzzyMatchCommand(TestCase):
            *args,
            stdout=stdout,
            stderr=stderr,
            skip_checks=True,
            **kwargs,
        )
        return stdout.getvalue(), stderr.getvalue()
--- a/src/documents/tests/test_management_importer.py
+++ b/src/documents/tests/test_management_importer.py
@@ -41,6 +41,7 @@ class TestCommandImport(
                "document_importer",
                "--no-progress-bar",
                str(self.dirs.scratch_dir),
                skip_checks=True,
            )
        self.assertIn(
            "That directory doesn't appear to contain a manifest.json file.",
@@ -67,6 +68,7 @@ class TestCommandImport(
                "document_importer",
                "--no-progress-bar",
                str(self.dirs.scratch_dir),
                skip_checks=True,
            )
        self.assertIn(
            "The manifest file contains a record which does not refer to an actual document file.",
@@ -96,6 +98,7 @@ class TestCommandImport(
                "document_importer",
                "--no-progress-bar",
                str(self.dirs.scratch_dir),
                skip_checks=True,
            )
        self.assertIn('The manifest file refers to "noexist.pdf"', str(e.exception))
@@ -157,7 +160,7 @@ class TestCommandImport(
            - CommandError is raised indicating the issue
        """
        with self.assertRaises(CommandError) as cm:
-            call_command("document_importer", Path("/tmp/notapath"))
+            call_command("document_importer", Path("/tmp/notapath"), skip_checks=True)
        self.assertIn("That path doesn't exist", str(cm.exception))
    def test_import_source_not_readable(self) -> None:
@@ -173,7 +176,7 @@ class TestCommandImport(
            path = Path(temp_dir)
            path.chmod(0o222)
            with self.assertRaises(CommandError) as cm:
-                call_command("document_importer", path)
+                call_command("document_importer", path, skip_checks=True)
            self.assertIn(
                "That path doesn't appear to be readable",
                str(cm.exception),
@@ -193,7 +196,12 @@ class TestCommandImport(
        self.assertIsNotFile(path)
        with self.assertRaises(CommandError) as e:
-            call_command("document_importer", "--no-progress-bar", str(path))
+            call_command(
                "document_importer",
                "--no-progress-bar",
                str(path),
                skip_checks=True,
            )
        self.assertIn("That path doesn't exist", str(e.exception))
    def test_import_files_exist(self) -> None:
@@ -218,6 +226,7 @@ class TestCommandImport(
                "--no-progress-bar",
                str(self.dirs.scratch_dir),
                stdout=stdout,
                skip_checks=True,
            )
        stdout.seek(0)
        self.assertIn(
@@ -246,6 +255,7 @@ class TestCommandImport(
                "--no-progress-bar",
                str(self.dirs.scratch_dir),
                stdout=stdout,
                skip_checks=True,
            )
        stdout.seek(0)
        self.assertIn(
@@ -282,6 +292,7 @@ class TestCommandImport(
                "--no-progress-bar",
                str(self.dirs.scratch_dir),
                stdout=stdout,
                skip_checks=True,
            )
        stdout.seek(0)
        self.assertIn(
@@ -309,6 +320,7 @@ class TestCommandImport(
                "--no-progress-bar",
                str(self.dirs.scratch_dir),
                stdout=stdout,
                skip_checks=True,
            )
        stdout.seek(0)
        stdout_str = str(stdout.read())
@@ -338,6 +350,7 @@ class TestCommandImport(
                "--no-progress-bar",
                str(self.dirs.scratch_dir),
                stdout=stdout,
                skip_checks=True,
            )
        stdout.seek(0)
        stdout_str = str(stdout.read())
@@ -377,6 +390,7 @@ class TestCommandImport(
                "--no-progress-bar",
                str(zip_path),
                stdout=stdout,
                skip_checks=True,
            )
        stdout.seek(0)
        stdout_str = str(stdout.read())
--- a/src/documents/tests/test_management_retagger.py
+++ b/src/documents/tests/test_management_retagger.py
@@ -139,7 +139,7 @@ class TestRetaggerTags(DirectoriesMixin):
    @pytest.mark.usefixtures("documents")
    def test_add_tags(self, tags: TagTuple) -> None:
        tag_first, tag_second, *_ = tags
-        call_command("document_retagger", "--tags")
+        call_command("document_retagger", "--tags", skip_checks=True)
        d_first, d_second, d_unrelated, d_auto = _get_docs()
        assert d_first.tags.count() == 1
@@ -158,7 +158,7 @@ class TestRetaggerTags(DirectoriesMixin):
        tag_first, tag_second, tag_inbox, tag_no_match, _ = tags
        d1.tags.add(tag_second)
-        call_command("document_retagger", "--tags", "--overwrite")
+        call_command("document_retagger", "--tags", "--overwrite", skip_checks=True)
        d_first, d_second, d_unrelated, d_auto = _get_docs()
@@ -180,7 +180,13 @@ class TestRetaggerTags(DirectoriesMixin):
        ],
    )
    def test_suggest_does_not_apply_tags(self, extra_args: list[str]) -> None:
-        call_command("document_retagger", "--tags", "--suggest", *extra_args)
+        call_command(
            "document_retagger",
            "--tags",
            "--suggest",
            *extra_args,
            skip_checks=True,
        )
        d_first, d_second, _, d_auto = _get_docs()
        assert d_first.tags.count() == 0
@@ -199,7 +205,7 @@ class TestRetaggerDocumentType(DirectoriesMixin):
    @pytest.mark.usefixtures("documents")
    def test_add_type(self, document_types: DocumentTypeTuple) -> None:
        dt_first, dt_second = document_types
-        call_command("document_retagger", "--document_type")
+        call_command("document_retagger", "--document_type", skip_checks=True)
        d_first, d_second, _, _ = _get_docs()
        assert d_first.document_type == dt_first
@@ -214,7 +220,13 @@ class TestRetaggerDocumentType(DirectoriesMixin):
        ],
    )
    def test_suggest_does_not_apply_document_type(self, extra_args: list[str]) -> None:
-        call_command("document_retagger", "--document_type", "--suggest", *extra_args)
+        call_command(
            "document_retagger",
            "--document_type",
            "--suggest",
            *extra_args,
            skip_checks=True,
        )
        d_first, d_second, _, _ = _get_docs()
        assert d_first.document_type is None
@@ -243,7 +255,12 @@ class TestRetaggerDocumentType(DirectoriesMixin):
        )
        doc = DocumentFactory(content="ambiguous content")
-        call_command("document_retagger", "--document_type", *use_first_flag)
+        call_command(
            "document_retagger",
            "--document_type",
            *use_first_flag,
            skip_checks=True,
        )
        doc.refresh_from_db()
        assert (doc.document_type is not None) is expects_assignment
@@ -260,7 +277,7 @@ class TestRetaggerCorrespondent(DirectoriesMixin):
    @pytest.mark.usefixtures("documents")
    def test_add_correspondent(self, correspondents: CorrespondentTuple) -> None:
        c_first, c_second = correspondents
-        call_command("document_retagger", "--correspondent")
+        call_command("document_retagger", "--correspondent", skip_checks=True)
        d_first, d_second, _, _ = _get_docs()
        assert d_first.correspondent == c_first
@@ -275,7 +292,13 @@ class TestRetaggerCorrespondent(DirectoriesMixin):
        ],
    )
    def test_suggest_does_not_apply_correspondent(self, extra_args: list[str]) -> None:
-        call_command("document_retagger", "--correspondent", "--suggest", *extra_args)
+        call_command(
            "document_retagger",
            "--correspondent",
            "--suggest",
            *extra_args,
            skip_checks=True,
        )
        d_first, d_second, _, _ = _get_docs()
        assert d_first.correspondent is None
@@ -304,7 +327,12 @@ class TestRetaggerCorrespondent(DirectoriesMixin):
        )
        doc = DocumentFactory(content="ambiguous content")
-        call_command("document_retagger", "--correspondent", *use_first_flag)
+        call_command(
            "document_retagger",
            "--correspondent",
            *use_first_flag,
            skip_checks=True,
        )
        doc.refresh_from_db()
        assert (doc.correspondent is not None) is expects_assignment
@@ -326,7 +354,7 @@ class TestRetaggerStoragePath(DirectoriesMixin):
        THEN matching documents get the correct path; existing path is unchanged
        """
        sp1, sp2, sp3 = storage_paths
-        call_command("document_retagger", "--storage_path")
+        call_command("document_retagger", "--storage_path", skip_checks=True)
        d_first, d_second, d_unrelated, d_auto = _get_docs()
        assert d_first.storage_path == sp2
@@ -342,7 +370,12 @@ class TestRetaggerStoragePath(DirectoriesMixin):
        THEN the existing path is replaced by the newly matched path
        """
        sp1, sp2, _ = storage_paths
-        call_command("document_retagger", "--storage_path", "--overwrite")
+        call_command(
            "document_retagger",
            "--storage_path",
            "--overwrite",
            skip_checks=True,
        )
        d_first, d_second, d_unrelated, d_auto = _get_docs()
        assert d_first.storage_path == sp2
@@ -373,7 +406,12 @@ class TestRetaggerStoragePath(DirectoriesMixin):
        )
        doc = DocumentFactory(content="ambiguous content")
-        call_command("document_retagger", "--storage_path", *use_first_flag)
+        call_command(
            "document_retagger",
            "--storage_path",
            *use_first_flag,
            skip_checks=True,
        )
        doc.refresh_from_db()
        assert (doc.storage_path is not None) is expects_assignment
@@ -402,7 +440,13 @@ class TestRetaggerIdRange(DirectoriesMixin):
        expected_count: int,
    ) -> None:
        DocumentFactory(content="NOT the first document")
-        call_command("document_retagger", "--tags", "--id-range", *id_range_args)
+        call_command(
            "document_retagger",
            "--tags",
            "--id-range",
            *id_range_args,
            skip_checks=True,
        )
        tag_first, *_ = tags
        assert Document.objects.filter(tags__id=tag_first.id).count() == expected_count
@@ -416,7 +460,7 @@ class TestRetaggerIdRange(DirectoriesMixin):
    )
    def test_id_range_invalid_arguments_raise(self, args: list[str]) -> None:
        with pytest.raises((CommandError, SystemExit)):
-            call_command("document_retagger", *args)
+            call_command("document_retagger", *args, skip_checks=True)
 # ---------------------------------------------------------------------------
@@ -430,12 +474,12 @@ class TestRetaggerEdgeCases(DirectoriesMixin):
    @pytest.mark.usefixtures("documents")
    def test_no_targets_exits_cleanly(self) -> None:
        """Calling the retagger with no classifier targets should not raise."""
-        call_command("document_retagger")
+        call_command("document_retagger", skip_checks=True)
    @pytest.mark.usefixtures("documents")
    def test_inbox_only_skips_non_inbox_documents(self) -> None:
        """--inbox-only must restrict processing to documents with an inbox tag."""
-        call_command("document_retagger", "--tags", "--inbox-only")
+        call_command("document_retagger", "--tags", "--inbox-only", skip_checks=True)
        d_first, _, d_unrelated, _ = _get_docs()
        assert d_first.tags.count() == 0
--- a/src/documents/tests/test_management_superuser.py
+++ b/src/documents/tests/test_management_superuser.py
@@ -20,6 +20,7 @@ class TestManageSuperUser(DirectoriesMixin, TestCase):
                "--no-color",
                stdout=out,
                stderr=StringIO(),
                skip_checks=True,
            )
        return out.getvalue()
--- a/src/documents/tests/test_management_thumbnails.py
+++ b/src/documents/tests/test_management_thumbnails.py
@@ -85,13 +85,20 @@ class TestMakeThumbnails(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
    def test_command(self) -> None:
        self.assertIsNotFile(self.d1.thumbnail_path)
        self.assertIsNotFile(self.d2.thumbnail_path)
-        call_command("document_thumbnails", "--processes", "1")
+        call_command("document_thumbnails", "--processes", "1", skip_checks=True)
        self.assertIsFile(self.d1.thumbnail_path)
        self.assertIsFile(self.d2.thumbnail_path)
    def test_command_documentid(self) -> None:
        self.assertIsNotFile(self.d1.thumbnail_path)
        self.assertIsNotFile(self.d2.thumbnail_path)
-        call_command("document_thumbnails", "--processes", "1", "-d", f"{self.d1.id}")
+        call_command(
            "document_thumbnails",
            "--processes",
            "1",
            "-d",
            f"{self.d1.id}",
            skip_checks=True,
        )
        self.assertIsFile(self.d1.thumbnail_path)
        self.assertIsNotFile(self.d2.thumbnail_path)
--- a/src/documents/tests/test_parsers.py
+++ b/src/documents/tests/test_parsers.py
@@ -1,130 +1,14 @@
 from tempfile import TemporaryDirectory
 from unittest import mock
 from django.apps import apps
 from django.test import TestCase
 from django.test import override_settings
 from documents.parsers import get_default_file_extension
 from documents.parsers import get_parser_class_for_mime_type
 from documents.parsers import get_supported_file_extensions
 from documents.parsers import is_file_ext_supported
 from paperless.parsers.registry import get_parser_registry
 from paperless.parsers.registry import reset_parser_registry
 from paperless.parsers.tesseract import RasterisedDocumentParser
 from paperless.parsers.text import TextDocumentParser
-from paperless_tesseract.parsers import RasterisedDocumentParser
+from paperless.parsers.tika import TikaDocumentParser
 from paperless_tika.parsers import TikaDocumentParser
 class TestParserDiscovery(TestCase):
    @mock.patch("documents.parsers.document_consumer_declaration.send")
    def test_get_parser_class_1_parser(self, m, *args) -> None:
        """
        GIVEN:
            - Parser declared for a given mimetype
        WHEN:
            - Attempt to get parser for the mimetype
        THEN:
            - Declared parser class is returned
        """
        class DummyParser:
            pass
        m.return_value = (
            (
                None,
                {
                    "weight": 0,
                    "parser": DummyParser,
                    "mime_types": {"application/pdf": ".pdf"},
                },
            ),
        )
        self.assertEqual(get_parser_class_for_mime_type("application/pdf"), DummyParser)
    @mock.patch("documents.parsers.document_consumer_declaration.send")
    def test_get_parser_class_n_parsers(self, m, *args) -> None:
        """
        GIVEN:
            - Two parsers declared for a given mimetype
            - Second parser has a higher weight
        WHEN:
            - Attempt to get parser for the mimetype
        THEN:
            - Second parser class is returned
        """
        class DummyParser1:
            pass
        class DummyParser2:
            pass
        m.return_value = (
            (
                None,
                {
                    "weight": 0,
                    "parser": DummyParser1,
                    "mime_types": {"application/pdf": ".pdf"},
                },
            ),
            (
                None,
                {
                    "weight": 1,
                    "parser": DummyParser2,
                    "mime_types": {"application/pdf": ".pdf"},
                },
            ),
        )
        self.assertEqual(
            get_parser_class_for_mime_type("application/pdf"),
            DummyParser2,
        )
    @mock.patch("documents.parsers.document_consumer_declaration.send")
    def test_get_parser_class_0_parsers(self, m, *args) -> None:
        """
        GIVEN:
            - No parsers are declared
        WHEN:
            - Attempt to get parser for the mimetype
        THEN:
            - No parser class is returned
        """
        m.return_value = []
        with TemporaryDirectory():
            self.assertIsNone(get_parser_class_for_mime_type("application/pdf"))
    @mock.patch("documents.parsers.document_consumer_declaration.send")
    def test_get_parser_class_no_valid_parser(self, m, *args) -> None:
        """
        GIVEN:
            - No parser declared for a given mimetype
            - Parser declared for a different mimetype
        WHEN:
            - Attempt to get parser for the given mimetype
        THEN:
            - No parser class is returned
        """
        class DummyParser:
            pass
        m.return_value = (
            (
                None,
                {
                    "weight": 0,
                    "parser": DummyParser,
                    "mime_types": {"application/pdf": ".pdf"},
                },
            ),
        )
        self.assertIsNone(get_parser_class_for_mime_type("image/tiff"))
 class TestParserAvailability(TestCase):
@@ -151,7 +35,7 @@ class TestParserAvailability(TestCase):
            self.assertIn(ext, supported_exts)
            self.assertEqual(get_default_file_extension(mime_type), ext)
            self.assertIsInstance(
-                get_parser_class_for_mime_type(mime_type)(logging_group=None),
+                get_parser_registry().get_parser_for_file(mime_type, "")(),
                RasterisedDocumentParser,
            )
@@ -175,7 +59,7 @@ class TestParserAvailability(TestCase):
            self.assertIn(ext, supported_exts)
            self.assertEqual(get_default_file_extension(mime_type), ext)
            self.assertIsInstance(
-                get_parser_class_for_mime_type(mime_type)(logging_group=None),
+                get_parser_registry().get_parser_for_file(mime_type, "")(),
                TextDocumentParser,
            )
@@ -198,22 +82,23 @@ class TestParserAvailability(TestCase):
            ),
        ]
-        # Force the app ready to notice the settings override
+        self.addCleanup(reset_parser_registry)
-        with override_settings(TIKA_ENABLED=True, INSTALLED_APPS=["paperless_tika"]):
+
-            app = apps.get_app_config("paperless_tika")
+        # Reset and rebuild the registry with Tika enabled.
-            app.ready()
+        with override_settings(TIKA_ENABLED=True):
            reset_parser_registry()
            supported_exts = get_supported_file_extensions()
            for mime_type, ext in supported_mimes_and_exts:
                self.assertIn(ext, supported_exts)
                self.assertEqual(get_default_file_extension(mime_type), ext)
                self.assertIsInstance(
-                get_parser_class_for_mime_type(mime_type)(logging_group=None),
+                    get_parser_registry().get_parser_for_file(mime_type, "")(),
                    TikaDocumentParser,
                )
    def test_no_parser_for_mime(self) -> None:
-        self.assertIsNone(get_parser_class_for_mime_type("text/sdgsdf"))
+        self.assertIsNone(get_parser_registry().get_parser_for_file("text/sdgsdf", ""))
    def test_default_extension(self) -> None:
        # Test no parser declared still returns a an extension
--- a/src/documents/tests/test_workflows.py
+++ b/src/documents/tests/test_workflows.py
@@ -28,6 +28,7 @@ from rest_framework.test import APIClient
 from rest_framework.test import APITestCase
 from documents.file_handling import create_source_path_directory
 from documents.file_handling import generate_filename
 from documents.file_handling import generate_unique_filename
 from documents.signals.handlers import run_workflows
 from documents.workflows.webhooks import send_webhook
@@ -905,6 +906,121 @@ class TestWorkflows(
        expected_str = f"Document matched {trigger} from {w}"
        self.assertIn(expected_str, cm.output[0])
    def test_workflow_assign_custom_field_keeps_storage_filename_in_sync(self) -> None:
        """
        GIVEN:
            - Existing document with a storage path template that depends on a custom field
            - Existing workflow triggered on document update assigning that custom field
        WHEN:
            - Workflow runs for the document
        THEN:
            - The database filename remains aligned with the moved file on disk
        """
        storage_path = StoragePath.objects.create(
            name="workflow-custom-field-path",
            path="{{ custom_fields|get_cf_value('Custom Field 1', 'none') }}/{{ title }}",
        )
        doc = Document.objects.create(
            title="workflow custom field sync",
            mime_type="application/pdf",
            checksum="workflow-custom-field-sync",
            storage_path=storage_path,
            original_filename="workflow-custom-field-sync.pdf",
        )
        CustomFieldInstance.objects.create(
            document=doc,
            field=self.cf1,
            value_text="initial",
        )
        generated = generate_unique_filename(doc)
        destination = (settings.ORIGINALS_DIR / generated).resolve()
        create_source_path_directory(destination)
        shutil.copy(self.SAMPLE_DIR / "simple.pdf", destination)
        Document.objects.filter(pk=doc.pk).update(filename=generated.as_posix())
        doc.refresh_from_db()
        trigger = WorkflowTrigger.objects.create(
            type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_UPDATED,
        )
        action = WorkflowAction.objects.create(
            type=WorkflowAction.WorkflowActionType.ASSIGNMENT,
            assign_custom_fields_values={self.cf1.pk: "cars"},
        )
        action.assign_custom_fields.add(self.cf1.pk)
        workflow = Workflow.objects.create(
            name="Workflow custom field filename sync",
            order=0,
        )
        workflow.triggers.add(trigger)
        workflow.actions.add(action)
        workflow.save()
        run_workflows(WorkflowTrigger.WorkflowTriggerType.DOCUMENT_UPDATED, doc)
        doc.refresh_from_db()
        expected_filename = generate_filename(doc)
        self.assertEqual(Path(doc.filename), expected_filename)
        self.assertTrue(doc.source_path.is_file())
    def test_workflow_document_updated_does_not_overwrite_filename(self) -> None:
        """
        GIVEN:
            - A document whose filename has been updated in the DB by a concurrent
              bulk_update_documents task (simulating update_filename_and_move_files
              completing and writing the new filename to the DB)
            - A stale in-memory document instance still holding the old filename
            - An active DOCUMENT_UPDATED workflow
        WHEN:
            - run_workflows is called with the stale in-memory instance
              (as would happen in the second concurrent bulk_update_documents task)
        THEN:
            - The DB filename is NOT overwritten with the stale in-memory value
              (regression test for GH #12386 — the race window between
              refresh_from_db and document.save in run_workflows)
        """
        trigger = WorkflowTrigger.objects.create(
            type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_UPDATED,
        )
        action = WorkflowAction.objects.create(
            type=WorkflowAction.WorkflowActionType.ASSIGNMENT,
            assign_title="Updated by workflow",
        )
        workflow = Workflow.objects.create(name="Race condition test workflow", order=0)
        workflow.triggers.add(trigger)
        workflow.actions.add(action)
        workflow.save()
        doc = Document.objects.create(
            title="race condition test",
            mime_type="application/pdf",
            checksum="racecondition123",
            original_filename="old.pdf",
            filename="old/path/old.pdf",
        )
        # Simulate BUD-1 completing update_filename_and_move_files:
        # the DB now holds the new filename while BUD-2's in-memory instance is stale.
        new_filename = "new/path/new.pdf"
        Document.global_objects.filter(pk=doc.pk).update(filename=new_filename)
        # The stale instance still has filename="old/path/old.pdf" in memory.
        # Mock refresh_from_db so the stale value persists through run_workflows,
        # replicating the race window between refresh and save.
        # Mock update_filename_and_move_files to prevent file-not-found errors
        # since we are only testing DB state here.
        with (
            mock.patch(
                "documents.signals.handlers.update_filename_and_move_files",
            ),
            mock.patch.object(Document, "refresh_from_db"),
        ):
            run_workflows(WorkflowTrigger.WorkflowTriggerType.DOCUMENT_UPDATED, doc)
        # The DB filename must not have been reverted to the stale old value.
        doc.refresh_from_db()
        self.assertEqual(doc.filename, new_filename)
    def test_document_added_workflow(self) -> None:
        trigger = WorkflowTrigger.objects.create(
            type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED,
--- a/src/documents/views.py
+++ b/src/documents/views.py
@@ -82,6 +82,7 @@ from rest_framework import serializers
 from rest_framework import status
 from rest_framework.decorators import action
 from rest_framework.exceptions import NotFound
 from rest_framework.exceptions import PermissionDenied
 from rest_framework.exceptions import ValidationError
 from rest_framework.filters import OrderingFilter
 from rest_framework.filters import SearchFilter
@@ -157,7 +158,6 @@ from documents.models import UiSettings
 from documents.models import Workflow
 from documents.models import WorkflowAction
 from documents.models import WorkflowTrigger
 from documents.parsers import get_parser_class_for_mime_type
 from documents.permissions import AcknowledgeTasksPermissions
 from documents.permissions import PaperlessAdminPermissions
 from documents.permissions import PaperlessNotePermissions
@@ -225,6 +225,7 @@ from paperless.celery import app as celery_app
 from paperless.config import AIConfig
 from paperless.config import GeneralConfig
 from paperless.models import ApplicationConfiguration
 from paperless.parsers.registry import get_parser_registry
 from paperless.serialisers import GroupSerializer
 from paperless.serialisers import UserSerializer
 from paperless.views import StandardPagination
@@ -1081,15 +1082,17 @@ class DocumentViewSet(
        if not Path(file).is_file():
            return None
-        parser_class = get_parser_class_for_mime_type(mime_type)
+        parser_class = get_parser_registry().get_parser_for_file(
            mime_type,
            Path(file).name,
            Path(file),
        )
        if parser_class:
            parser = parser_class(progress_callback=None, logging_group=None)
            try:
                with parser_class() as parser:
                    return parser.extract_metadata(file, mime_type)
            except Exception:  # pragma: no cover
                logger.exception(f"Issue getting metadata for {file}")
                # TODO: cover GPG errors, remove later.
                return []
        else:  # pragma: no cover
            logger.warning(f"No parser for {mime_type}")
@@ -1328,6 +1331,7 @@ class DocumentViewSet(
        methods=["get", "post", "delete"],
        detail=True,
        permission_classes=[PaperlessNotePermissions],
        pagination_class=None,
        filter_backends=[],
    )
    def notes(self, request, pk=None):
@@ -1965,11 +1969,28 @@ class UnifiedSearchViewSet(DocumentViewSet):
        filtered_queryset = super().filter_queryset(queryset)
        if self._is_search_request():
            if "query" in self.request.query_params:
                from documents import index
            if "query" in self.request.query_params:
                query_class = index.DelayedFullTextQuery
            elif "more_like_id" in self.request.query_params:
                try:
                    more_like_doc_id = int(self.request.query_params["more_like_id"])
                    more_like_doc = Document.objects.select_related("owner").get(
                        pk=more_like_doc_id,
                    )
                except (TypeError, ValueError, Document.DoesNotExist):
                    raise PermissionDenied(_("Invalid more_like_id"))
                if not has_perms_owner_aware(
                    self.request.user,
                    "view_document",
                    more_like_doc,
                ):
                    raise PermissionDenied(_("Insufficient permissions."))
                from documents import index
                query_class = index.DelayedMoreLikeThisQuery
            else:
                raise ValueError
@@ -2005,6 +2026,8 @@ class UnifiedSearchViewSet(DocumentViewSet):
                    return response
            except NotFound:
                raise
            except PermissionDenied as e:
                return HttpResponseForbidden(str(e.detail))
            except Exception as e:
                logger.warning(f"An error occurred listing search results: {e!s}")
                return HttpResponseBadRequest(
@@ -2943,13 +2966,21 @@ class GlobalSearchView(PassUserMixin):
        )
        groups = groups[:OBJECT_LIMIT]
        mail_rules = (
-            MailRule.objects.filter(name__icontains=query)
+            get_objects_for_user_owner_aware(
                request.user,
                "view_mailrule",
                MailRule,
            ).filter(name__icontains=query)
            if request.user.has_perm("paperless_mail.view_mailrule")
            else []
        )
        mail_rules = mail_rules[:OBJECT_LIMIT]
        mail_accounts = (
-            MailAccount.objects.filter(name__icontains=query)
+            get_objects_for_user_owner_aware(
                request.user,
                "view_mailaccount",
                MailAccount,
            ).filter(name__icontains=query)
            if request.user.has_perm("paperless_mail.view_mailaccount")
            else []
        )
@@ -3923,7 +3954,7 @@ class CustomFieldViewSet(PermissionsAwareDocumentCountMixin, ModelViewSet):
    document_count_through = CustomFieldInstance
    document_count_source_field = "field_id"
-    queryset = CustomField.objects.all().order_by("-created")
+    queryset = CustomField.objects.all().order_by("name")
@extend_schema_view(
--- a/src/documents/workflows/webhooks.py
+++ b/src/documents/workflows/webhooks.py
@@ -1,12 +1,14 @@
 import ipaddress
 import logging
 import socket
 from urllib.parse import urlparse
 import httpx
 from celery import shared_task
 from django.conf import settings
 from paperless.network import format_host_for_url
 from paperless.network import is_public_ip
 from paperless.network import resolve_hostname_ips
 from paperless.network import validate_outbound_http_url
 logger = logging.getLogger("paperless.workflows.webhooks")
@@ -34,23 +36,19 @@ class WebhookTransport(httpx.HTTPTransport):
            raise httpx.ConnectError("No hostname in request URL")
        try:
-            addr_info = socket.getaddrinfo(hostname, None)
+            ips = resolve_hostname_ips(hostname)
-        except socket.gaierror as e:
+        except ValueError as e:
-            raise httpx.ConnectError(f"Could not resolve hostname: {hostname}") from e
+            raise httpx.ConnectError(str(e)) from e
        ips = [info[4][0] for info in addr_info if info and info[4]]
        if not ips:
            raise httpx.ConnectError(f"Could not resolve hostname: {hostname}")
        if not self.allow_internal:
            for ip_str in ips:
-                if not WebhookTransport.is_public_ip(ip_str):
+                if not is_public_ip(ip_str):
                    raise httpx.ConnectError(
                        f"Connection blocked: {hostname} resolves to a non-public address",
                    )
        ip_str = ips[0]
-        formatted_ip = self._format_ip_for_url(ip_str)
+        formatted_ip = format_host_for_url(ip_str)
        new_headers = httpx.Headers(request.headers)
        if "host" in new_headers:
@@ -69,40 +67,6 @@ class WebhookTransport(httpx.HTTPTransport):
        return super().handle_request(request)
    def _format_ip_for_url(self, ip: str) -> str:
        """
        Format IP address for use in URL (wrap IPv6 in brackets)
        """
        try:
            ip_obj = ipaddress.ip_address(ip)
            if ip_obj.version == 6:
                return f"[{ip}]"
            return ip
        except ValueError:
            return ip
    @staticmethod
    def is_public_ip(ip: str | int) -> bool:
        try:
            obj = ipaddress.ip_address(ip)
            return not (
                obj.is_private
                or obj.is_loopback
                or obj.is_link_local
                or obj.is_multicast
                or obj.is_unspecified
            )
        except ValueError:  # pragma: no cover
            return False
    @staticmethod
    def resolve_first_ip(host: str) -> str | None:
        try:
            info = socket.getaddrinfo(host, None)
            return info[0][4][0] if info else None
        except Exception:  # pragma: no cover
            return None
@shared_task(
    retry_backoff=True,
@@ -118,21 +82,24 @@ def send_webhook(
    *,
    as_json: bool = False,
 ):
-    p = urlparse(url)
+    try:
-    if p.scheme.lower() not in settings.WEBHOOKS_ALLOWED_SCHEMES or not p.hostname:
+        parsed = validate_outbound_http_url(
-        logger.warning("Webhook blocked: invalid scheme/hostname")
+            url,
            allowed_schemes=settings.WEBHOOKS_ALLOWED_SCHEMES,
            allowed_ports=settings.WEBHOOKS_ALLOWED_PORTS,
            # Internal-address checks happen in transport to preserve ConnectError behavior.
            allow_internal=True,
        )
    except ValueError as e:
        logger.warning("Webhook blocked: %s", e)
        raise
    hostname = parsed.hostname
    if hostname is None:  # pragma: no cover
        raise ValueError("Invalid URL scheme or hostname.")
    port = p.port or (443 if p.scheme == "https" else 80)
    if (
        len(settings.WEBHOOKS_ALLOWED_PORTS) > 0
        and port not in settings.WEBHOOKS_ALLOWED_PORTS
    ):
        logger.warning("Webhook blocked: port not permitted")
        raise ValueError("Destination port not permitted.")
    transport = WebhookTransport(
-        hostname=p.hostname,
+        hostname=hostname,
        allow_internal=settings.WEBHOOKS_ALLOW_INTERNAL_REQUESTS,
    )
--- a/src/locale/en_US/LC_MESSAGES/django.po
+++ b/src/locale/en_US/LC_MESSAGES/django.po
@@ -2,7 +2,7 @@ msgid ""
 msgstr ""
 "Project-Id-Version: paperless-ngx\n"
 "Report-Msgid-Bugs-To: \n"
-"POT-Creation-Date: 2026-03-12 15:43+0000\n"
+"POT-Creation-Date: 2026-03-22 13:54+0000\n"
 "PO-Revision-Date: 2022-02-17 04:17\n"
 "Last-Translator: \n"
 "Language-Team: English\n"
@@ -1299,7 +1299,9 @@ msgstr ""
 msgid "workflow runs"
 msgstr ""
-#: documents/serialisers.py:463 documents/serialisers.py:2470
+#: documents/serialisers.py:463 documents/serialisers.py:815
 #: documents/serialisers.py:2501 documents/views.py:1990
 #: paperless_mail/serialisers.py:143
 msgid "Insufficient permissions."
 msgstr ""
@@ -1307,39 +1309,39 @@ msgstr ""
 msgid "Invalid color."
 msgstr ""
-#: documents/serialisers.py:2093
+#: documents/serialisers.py:2124
 #, python-format
 msgid "File type %(type)s not supported"
 msgstr ""
-#: documents/serialisers.py:2137
+#: documents/serialisers.py:2168
 #, python-format
 msgid "Custom field id must be an integer: %(id)s"
 msgstr ""
-#: documents/serialisers.py:2144
+#: documents/serialisers.py:2175
 #, python-format
 msgid "Custom field with id %(id)s does not exist"
 msgstr ""
-#: documents/serialisers.py:2161 documents/serialisers.py:2171
+#: documents/serialisers.py:2192 documents/serialisers.py:2202
 msgid ""
 "Custom fields must be a list of integers or an object mapping ids to values."
 msgstr ""
-#: documents/serialisers.py:2166
+#: documents/serialisers.py:2197
 msgid "Some custom fields don't exist or were specified twice."
 msgstr ""
-#: documents/serialisers.py:2313
+#: documents/serialisers.py:2344
 msgid "Invalid variable detected."
 msgstr ""
-#: documents/serialisers.py:2526
+#: documents/serialisers.py:2557
 msgid "Duplicate document identifiers are not allowed."
 msgstr ""
-#: documents/serialisers.py:2556 documents/views.py:3565
+#: documents/serialisers.py:2587 documents/views.py:3596
 #, python-format
 msgid "Documents not found: %(ids)s"
 msgstr ""
@@ -1603,20 +1605,24 @@ msgstr ""
 msgid "Unable to parse URI {value}"
 msgstr ""
-#: documents/views.py:3577
+#: documents/views.py:1983
 msgid "Invalid more_like_id"
 msgstr ""
 #: documents/views.py:3608
 #, python-format
 msgid "Insufficient permissions to share document %(id)s."
 msgstr ""
-#: documents/views.py:3620
+#: documents/views.py:3651
 msgid "Bundle is already being processed."
 msgstr ""
-#: documents/views.py:3677
+#: documents/views.py:3708
 msgid "The share link bundle is still being prepared. Please try again later."
 msgstr ""
-#: documents/views.py:3687
+#: documents/views.py:3718
 msgid "The share link bundle is unavailable."
 msgstr ""
@@ -1856,151 +1862,151 @@ msgstr ""
 msgid "paperless application settings"
 msgstr ""
-#: paperless/settings/__init__.py:521
+#: paperless/settings/__init__.py:518
 msgid "English (US)"
 msgstr ""
-#: paperless/settings/__init__.py:522
+#: paperless/settings/__init__.py:519
 msgid "Arabic"
 msgstr ""
-#: paperless/settings/__init__.py:523
+#: paperless/settings/__init__.py:520
 msgid "Afrikaans"
 msgstr ""
-#: paperless/settings/__init__.py:524
+#: paperless/settings/__init__.py:521
 msgid "Belarusian"
 msgstr ""
-#: paperless/settings/__init__.py:525
+#: paperless/settings/__init__.py:522
 msgid "Bulgarian"
 msgstr ""
-#: paperless/settings/__init__.py:526
+#: paperless/settings/__init__.py:523
 msgid "Catalan"
 msgstr ""
-#: paperless/settings/__init__.py:527
+#: paperless/settings/__init__.py:524
 msgid "Czech"
 msgstr ""
-#: paperless/settings/__init__.py:528
+#: paperless/settings/__init__.py:525
 msgid "Danish"
 msgstr ""
-#: paperless/settings/__init__.py:529
+#: paperless/settings/__init__.py:526
 msgid "German"
 msgstr ""
-#: paperless/settings/__init__.py:530
+#: paperless/settings/__init__.py:527
 msgid "Greek"
 msgstr ""
-#: paperless/settings/__init__.py:531
+#: paperless/settings/__init__.py:528
 msgid "English (GB)"
 msgstr ""
-#: paperless/settings/__init__.py:532
+#: paperless/settings/__init__.py:529
 msgid "Spanish"
 msgstr ""
-#: paperless/settings/__init__.py:533
+#: paperless/settings/__init__.py:530
 msgid "Persian"
 msgstr ""
-#: paperless/settings/__init__.py:534
+#: paperless/settings/__init__.py:531
 msgid "Finnish"
 msgstr ""
-#: paperless/settings/__init__.py:535
+#: paperless/settings/__init__.py:532
 msgid "French"
 msgstr ""
-#: paperless/settings/__init__.py:536
+#: paperless/settings/__init__.py:533
 msgid "Hungarian"
 msgstr ""
-#: paperless/settings/__init__.py:537
+#: paperless/settings/__init__.py:534
 msgid "Indonesian"
 msgstr ""
-#: paperless/settings/__init__.py:538
+#: paperless/settings/__init__.py:535
 msgid "Italian"
 msgstr ""
-#: paperless/settings/__init__.py:539
+#: paperless/settings/__init__.py:536
 msgid "Japanese"
 msgstr ""
-#: paperless/settings/__init__.py:540
+#: paperless/settings/__init__.py:537
 msgid "Korean"
 msgstr ""
-#: paperless/settings/__init__.py:541
+#: paperless/settings/__init__.py:538
 msgid "Luxembourgish"
 msgstr ""
-#: paperless/settings/__init__.py:542
+#: paperless/settings/__init__.py:539
 msgid "Norwegian"
 msgstr ""
-#: paperless/settings/__init__.py:543
+#: paperless/settings/__init__.py:540
 msgid "Dutch"
 msgstr ""
-#: paperless/settings/__init__.py:544
+#: paperless/settings/__init__.py:541
 msgid "Polish"
 msgstr ""
-#: paperless/settings/__init__.py:545
+#: paperless/settings/__init__.py:542
 msgid "Portuguese (Brazil)"
 msgstr ""
-#: paperless/settings/__init__.py:546
+#: paperless/settings/__init__.py:543
 msgid "Portuguese"
 msgstr ""
-#: paperless/settings/__init__.py:547
+#: paperless/settings/__init__.py:544
 msgid "Romanian"
 msgstr ""
-#: paperless/settings/__init__.py:548
+#: paperless/settings/__init__.py:545
 msgid "Russian"
 msgstr ""
-#: paperless/settings/__init__.py:549
+#: paperless/settings/__init__.py:546
 msgid "Slovak"
 msgstr ""
-#: paperless/settings/__init__.py:550
+#: paperless/settings/__init__.py:547
 msgid "Slovenian"
 msgstr ""
-#: paperless/settings/__init__.py:551
+#: paperless/settings/__init__.py:548
 msgid "Serbian"
 msgstr ""
-#: paperless/settings/__init__.py:552
+#: paperless/settings/__init__.py:549
 msgid "Swedish"
 msgstr ""
-#: paperless/settings/__init__.py:553
+#: paperless/settings/__init__.py:550
 msgid "Turkish"
 msgstr ""
-#: paperless/settings/__init__.py:554
+#: paperless/settings/__init__.py:551
 msgid "Ukrainian"
 msgstr ""
-#: paperless/settings/__init__.py:555
+#: paperless/settings/__init__.py:552
 msgid "Vietnamese"
 msgstr ""
-#: paperless/settings/__init__.py:556
+#: paperless/settings/__init__.py:553
 msgid "Chinese Simplified"
 msgstr ""
-#: paperless/settings/__init__.py:557
+#: paperless/settings/__init__.py:554
 msgid "Chinese Traditional"
 msgstr ""
@@ -2046,7 +2052,7 @@ msgid ""
 "process all matching rules that you have defined."
 msgstr ""
-#: paperless_mail/apps.py:11
+#: paperless_mail/apps.py:8
 msgid "Paperless mail"
 msgstr ""
--- a/src/paperless/auth.py
+++ b/src/paperless/auth.py
@@ -83,3 +83,11 @@ class PaperlessBasicAuthentication(authentication.BasicAuthentication):
            raise exceptions.AuthenticationFailed("MFA required")
        return user_tuple
    def authenticate_header(self, request):
        auth_header = request.META.get("HTTP_AUTHORIZATION", "")
        if auth_header.lower().startswith("basic "):
            return super().authenticate_header(request)
        # Still 401 for anonymous API access
        return authentication.TokenAuthentication.keyword
--- a/src/paperless/checks.py
+++ b/src/paperless/checks.py
@@ -3,6 +3,7 @@ import os
 import pwd
 import shutil
 import stat
 import subprocess
 from pathlib import Path
 from django.conf import settings
@@ -299,3 +300,62 @@ def check_deprecated_db_settings(
        )
    return warnings
@register()
 def check_remote_parser_configured(app_configs, **kwargs) -> list[Error]:
    if settings.REMOTE_OCR_ENGINE == "azureai" and not (
        settings.REMOTE_OCR_ENDPOINT and settings.REMOTE_OCR_API_KEY
    ):
        return [
            Error(
                "Azure AI remote parser requires endpoint and API key to be configured.",
            ),
        ]
    return []
 def get_tesseract_langs():
    proc = subprocess.run(
        [shutil.which("tesseract"), "--list-langs"],
        capture_output=True,
    )
    # Decode bytes to string, split on newlines, trim out the header
    proc_lines = proc.stdout.decode("utf8", errors="ignore").strip().split("\n")[1:]
    return [x.strip() for x in proc_lines]
@register()
 def check_default_language_available(app_configs, **kwargs):
    errs = []
    if not settings.OCR_LANGUAGE:
        errs.append(
            Warning(
                "No OCR language has been specified with PAPERLESS_OCR_LANGUAGE. "
                "This means that tesseract will fallback to english.",
            ),
        )
        return errs
    # binaries_check in paperless will check and report if this doesn't exist
    # So skip trying to do anything here and let that handle missing binaries
    if shutil.which("tesseract") is not None:
        installed_langs = get_tesseract_langs()
        specified_langs = [x.strip() for x in settings.OCR_LANGUAGE.split("+")]
        for lang in specified_langs:
            if lang not in installed_langs:
                errs.append(
                    Error(
                        f"The selected ocr language {lang} is "
                        f"not installed. Paperless cannot OCR your documents "
                        f"without it. Please fix PAPERLESS_OCR_LANGUAGE.",
                    ),
                )
    return errs
--- a/src/paperless/config.py
+++ b/src/paperless/config.py
@@ -188,6 +188,7 @@ class AIConfig(BaseConfig):
    llm_model: str = dataclasses.field(init=False)
    llm_api_key: str = dataclasses.field(init=False)
    llm_endpoint: str = dataclasses.field(init=False)
    llm_allow_internal_endpoints: bool = dataclasses.field(init=False)
    def __post_init__(self) -> None:
        app_config = self._get_config_instance()
@@ -203,6 +204,7 @@ class AIConfig(BaseConfig):
        self.llm_model = app_config.llm_model or settings.LLM_MODEL
        self.llm_api_key = app_config.llm_api_key or settings.LLM_API_KEY
        self.llm_endpoint = app_config.llm_endpoint or settings.LLM_ENDPOINT
        self.llm_allow_internal_endpoints = settings.LLM_ALLOW_INTERNAL_ENDPOINTS
    @property
    def llm_index_enabled(self) -> bool:
--- a/src/paperless/network.py
+++ b/src/paperless/network.py
@@ -0,0 +1,76 @@
 import ipaddress
 import socket
 from collections.abc import Collection
 from urllib.parse import ParseResult
 from urllib.parse import urlparse
 def is_public_ip(ip: str | int) -> bool:
    try:
        obj = ipaddress.ip_address(ip)
        return not (
            obj.is_private
            or obj.is_loopback
            or obj.is_link_local
            or obj.is_multicast
            or obj.is_unspecified
        )
    except ValueError:  # pragma: no cover
        return False
 def resolve_hostname_ips(hostname: str) -> list[str]:
    try:
        addr_info = socket.getaddrinfo(hostname, None)
    except socket.gaierror as e:
        raise ValueError(f"Could not resolve hostname: {hostname}") from e
    ips = [info[4][0] for info in addr_info if info and info[4]]
    if not ips:
        raise ValueError(f"Could not resolve hostname: {hostname}")
    return ips
 def format_host_for_url(host: str) -> str:
    """
    Format IP address for URL use (wrap IPv6 in brackets).
    """
    try:
        ip_obj = ipaddress.ip_address(host)
        if ip_obj.version == 6:
            return f"[{host}]"
        return host
    except ValueError:
        return host
 def validate_outbound_http_url(
    url: str,
    *,
    allowed_schemes: Collection[str] = ("http", "https"),
    allowed_ports: Collection[int] | None = None,
    allow_internal: bool = False,
 ) -> ParseResult:
    parsed = urlparse(url)
    scheme = parsed.scheme.lower()
    if scheme not in allowed_schemes or not parsed.hostname:
        raise ValueError("Invalid URL scheme or hostname.")
    default_port = 443 if scheme == "https" else 80
    try:
        port = parsed.port or default_port
    except ValueError as e:
        raise ValueError("Invalid URL scheme or hostname.") from e
    if allowed_ports and port not in allowed_ports:
        raise ValueError("Destination port not permitted.")
    if not allow_internal:
        for ip_str in resolve_hostname_ips(parsed.hostname):
            if not is_public_ip(ip_str):
                raise ValueError(
                    f"Connection blocked: {parsed.hostname} resolves to a non-public address",
                )
    return parsed
--- a/src/paperless/parsers/init.py
+++ b/src/paperless/parsers/init.py
@@ -35,6 +35,7 @@ Usage example (third-party parser)::
 from __future__ import annotations
 from dataclasses import dataclass
 from typing import TYPE_CHECKING
 from typing import Protocol
 from typing import Self
@@ -48,6 +49,7 @@ if TYPE_CHECKING:
 __all__ = [
    "MetadataEntry",
    "ParserContext",
    "ParserProtocol",
 ]
@@ -73,6 +75,44 @@ class MetadataEntry(TypedDict):
    """String representation of the field value."""
@dataclass(frozen=True, slots=True)
 class ParserContext:
    """Immutable context passed to a parser before parse().
    The consumer assembles this from the ingestion event and Django
    settings, then calls ``parser.configure(context)`` before
    ``parser.parse()``.  Parsers read only the fields relevant to them;
    unneeded fields are ignored.
    ``frozen=True`` prevents accidental mutation after the consumer
    hands the context off.  ``slots=True`` keeps instances lightweight.
    Fields
    ------
    mailrule_id : int | None
        Primary key of the ``MailRule`` that triggered this ingestion,
        or ``None`` when the document did not arrive via a mail rule.
        Used by ``MailDocumentParser`` to select the PDF layout.
    Notes
    -----
    Future fields (not yet implemented):
    * ``output_type`` — PDF/A variant for archive generation
      (replaces ``settings.OCR_OUTPUT_TYPE`` reads inside parsers).
    * ``ocr_mode`` — skip-text, redo, force, etc.
      (replaces ``settings.OCR_MODE`` reads inside parsers).
    * ``ocr_language`` — Tesseract language string.
      (replaces ``settings.OCR_LANGUAGE`` reads inside parsers).
    When those fields are added the consumer will read from Django
    settings once and populate them here, decoupling parsers from
    ``settings.*`` entirely.
    """
    mailrule_id: int | None = None
@runtime_checkable
 class ParserProtocol(Protocol):
    """Structural contract for all Paperless-ngx document parsers.
@@ -191,6 +231,21 @@ class ParserProtocol(Protocol):
    # Core parsing interface
    # ------------------------------------------------------------------
    def configure(self, context: ParserContext) -> None:
        """Apply source context before parse().
        Called by the consumer after instantiation and before parse().
        The default implementation is a no-op; parsers override only the
        fields they need.
        Parameters
        ----------
        context:
            Immutable context assembled by the consumer for this
            specific ingestion event.
        """
        ...
    def parse(
        self,
        document_path: Path,
--- a/src/paperless/parsers/mail.py
+++ b/src/paperless/parsers/mail.py
@@ -0,0 +1,834 @@
 """
 Built-in mail document parser.
 Handles message/rfc822 (EML) MIME type by:
 - Parsing the email using imap_tools
 - Generating a PDF via Gotenberg (for display and archive)
 - Extracting text via Tika for HTML content
 - Extracting metadata from email headers
 The parser always produces a PDF because EML files cannot be rendered
 natively in a browser (requires_pdf_rendition=True).
 """
 from __future__ import annotations
 import logging
 import re
 import shutil
 import tempfile
 from html import escape
 from pathlib import Path
 from typing import TYPE_CHECKING
 from typing import Self
 from bleach import clean
 from bleach import linkify
 from django.conf import settings
 from django.utils import timezone
 from django.utils.timezone import is_naive
 from django.utils.timezone import make_aware
 from gotenberg_client import GotenbergClient
 from gotenberg_client.constants import A4
 from gotenberg_client.options import Measurement
 from gotenberg_client.options import MeasurementUnitType
 from gotenberg_client.options import PageMarginsType
 from gotenberg_client.options import PdfAFormat
 from humanize import naturalsize
 from imap_tools import MailAttachment
 from imap_tools import MailMessage
 from tika_client import TikaClient
 from documents.parsers import ParseError
 from documents.parsers import make_thumbnail_from_pdf
 from paperless.models import OutputTypeChoices
 from paperless.version import __full_version_str__
 from paperless_mail.models import MailRule
 if TYPE_CHECKING:
    import datetime
    from types import TracebackType
    from paperless.parsers import MetadataEntry
    from paperless.parsers import ParserContext
 logger = logging.getLogger("paperless.parsing.mail")
 _SUPPORTED_MIME_TYPES: dict[str, str] = {
    "message/rfc822": ".eml",
 }
 class MailDocumentParser:
    """Parse .eml email files for Paperless-ngx.
    Uses imap_tools to parse .eml files, generates a PDF using Gotenberg,
    and sends the HTML part to a Tika server for text extraction.  Because
    EML files cannot be rendered natively in a browser, the parser always
    produces a PDF rendition (requires_pdf_rendition=True).
    Pass a ``ParserContext`` to ``configure()`` before ``parse()`` to
    apply mail-rule-specific PDF layout options:
        parser.configure(ParserContext(mailrule_id=rule.pk))
        parser.parse(path, mime_type)
    Class attributes
    ----------------
    name : str
        Human-readable parser name.
    version : str
        Semantic version string, kept in sync with Paperless-ngx releases.
    author : str
        Maintainer name.
    url : str
        Issue tracker / source URL.
    """
    name: str = "Paperless-ngx Mail Parser"
    version: str = __full_version_str__
    author: str = "Paperless-ngx Contributors"
    url: str = "https://github.com/paperless-ngx/paperless-ngx"
    # ------------------------------------------------------------------
    # Class methods
    # ------------------------------------------------------------------
    @classmethod
    def supported_mime_types(cls) -> dict[str, str]:
        """Return the MIME types this parser handles.
        Returns
        -------
        dict[str, str]
            Mapping of MIME type to preferred file extension.
        """
        return _SUPPORTED_MIME_TYPES
    @classmethod
    def score(
        cls,
        mime_type: str,
        filename: str,
        path: Path | None = None,
    ) -> int | None:
        """Return the priority score for handling this file.
        Parameters
        ----------
        mime_type:
            Detected MIME type of the file.
        filename:
            Original filename including extension.
        path:
            Optional filesystem path. Not inspected by this parser.
        Returns
        -------
        int | None
            10 if the MIME type is supported, otherwise None.
        """
        if mime_type in _SUPPORTED_MIME_TYPES:
            return 10
        return None
    # ------------------------------------------------------------------
    # Properties
    # ------------------------------------------------------------------
    @property
    def can_produce_archive(self) -> bool:
        """Whether this parser can produce a searchable PDF archive copy.
        Returns
        -------
        bool
            Always False — the mail parser produces a display PDF
            (requires_pdf_rendition=True), not an optional OCR archive.
        """
        return False
    @property
    def requires_pdf_rendition(self) -> bool:
        """Whether the parser must produce a PDF for the frontend to display.
        Returns
        -------
        bool
            Always True — EML files cannot be rendered natively in a browser,
            so a PDF conversion is always required for display.
        """
        return True
    # ------------------------------------------------------------------
    # Lifecycle
    # ------------------------------------------------------------------
    def __init__(self, logging_group: object = None) -> None:
        settings.SCRATCH_DIR.mkdir(parents=True, exist_ok=True)
        self._tempdir = Path(
            tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR),
        )
        self._text: str | None = None
        self._date: datetime.datetime | None = None
        self._archive_path: Path | None = None
        self._mailrule_id: int | None = None
    def __enter__(self) -> Self:
        return self
    def __exit__(
        self,
        exc_type: type[BaseException] | None,
        exc_val: BaseException | None,
        exc_tb: TracebackType | None,
    ) -> None:
        logger.debug("Cleaning up temporary directory %s", self._tempdir)
        shutil.rmtree(self._tempdir, ignore_errors=True)
    # ------------------------------------------------------------------
    # Core parsing interface
    # ------------------------------------------------------------------
    def configure(self, context: ParserContext) -> None:
        self._mailrule_id = context.mailrule_id
    def parse(
        self,
        document_path: Path,
        mime_type: str,
        *,
        produce_archive: bool = True,
    ) -> None:
        """Parse the given .eml into formatted text and a PDF archive.
        Call ``configure(ParserContext(mailrule_id=...))`` before this method
        to apply mail-rule-specific PDF layout options.  The ``produce_archive``
        flag is accepted for protocol compatibility but is always honoured —
        the mail parser always produces a PDF since EML files cannot be
        displayed natively.
        Parameters
        ----------
        document_path:
            Absolute path to the .eml file.
        mime_type:
            Detected MIME type of the document (should be "message/rfc822").
        produce_archive:
            Accepted for protocol compatibility. The PDF rendition is always
            produced since EML files cannot be displayed natively in a browser.
        Raises
        ------
        documents.parsers.ParseError
            If the file cannot be parsed or PDF generation fails.
        """
        def strip_text(text: str) -> str:
            """Reduces the spacing of the given text string."""
            text = re.sub(r"\s+", " ", text)
            text = re.sub(r"(\n *)+", "\n", text)
            return text.strip()
        def build_formatted_text(mail_message: MailMessage) -> str:
            """Constructs a formatted string based on the given email."""
            fmt_text = f"Subject: {mail_message.subject}\n\n"
            fmt_text += f"From: {mail_message.from_values.full if mail_message.from_values else ''}\n\n"
            to_list = [address.full for address in mail_message.to_values]
            fmt_text += f"To: {', '.join(to_list)}\n\n"
            if mail_message.cc_values:
                fmt_text += (
                    f"CC: {', '.join(address.full for address in mail.cc_values)}\n\n"
                )
            if mail_message.bcc_values:
                fmt_text += (
                    f"BCC: {', '.join(address.full for address in mail.bcc_values)}\n\n"
                )
            if mail_message.attachments:
                att = []
                for a in mail.attachments:
                    attachment_size = naturalsize(a.size, binary=True, format="%.2f")
                    att.append(
                        f"{a.filename} ({attachment_size})",
                    )
                fmt_text += f"Attachments: {', '.join(att)}\n\n"
            if mail.html:
                fmt_text += "HTML content: " + strip_text(self.tika_parse(mail.html))
            fmt_text += f"\n\n{strip_text(mail.text)}"
            return fmt_text
        logger.debug("Parsing file %s into an email", document_path.name)
        mail = self.parse_file_to_message(document_path)
        logger.debug("Building formatted text from email")
        self._text = build_formatted_text(mail)
        if is_naive(mail.date):
            self._date = make_aware(mail.date)
        else:
            self._date = mail.date
        logger.debug("Creating a PDF from the email")
        if self._mailrule_id:
            rule = MailRule.objects.get(pk=self._mailrule_id)
            self._archive_path = self.generate_pdf(
                mail,
                MailRule.PdfLayout(rule.pdf_layout),
            )
        else:
            self._archive_path = self.generate_pdf(mail)
    # ------------------------------------------------------------------
    # Result accessors
    # ------------------------------------------------------------------
    def get_text(self) -> str | None:
        """Return the plain-text content extracted during parse.
        Returns
        -------
        str | None
            Extracted text, or None if parse has not been called yet.
        """
        return self._text
    def get_date(self) -> datetime.datetime | None:
        """Return the document date detected during parse.
        Returns
        -------
        datetime.datetime | None
            Date from the email headers, or None if not detected.
        """
        return self._date
    def get_archive_path(self) -> Path | None:
        """Return the path to the generated archive PDF, or None.
        Returns
        -------
        Path | None
            Path to the PDF produced by Gotenberg, or None if parse has not
            been called yet.
        """
        return self._archive_path
    # ------------------------------------------------------------------
    # Thumbnail and metadata
    # ------------------------------------------------------------------
    def get_thumbnail(
        self,
        document_path: Path,
        mime_type: str,
        file_name: str | None = None,
    ) -> Path:
        """Generate a thumbnail from the PDF rendition of the email.
        Converts the document to PDF first if not already done.
        Parameters
        ----------
        document_path:
            Absolute path to the source document.
        mime_type:
            Detected MIME type of the document.
        file_name:
            Kept for backward compatibility; not used.
        Returns
        -------
        Path
            Path to the generated WebP thumbnail inside the temporary directory.
        """
        if not self._archive_path:
            self._archive_path = self.generate_pdf(
                self.parse_file_to_message(document_path),
            )
        return make_thumbnail_from_pdf(
            self._archive_path,
            self._tempdir,
        )
    def get_page_count(
        self,
        document_path: Path,
        mime_type: str,
    ) -> int | None:
        """Return the number of pages in the document.
        Counts pages in the archive PDF produced by a preceding parse()
        call.  Returns ``None`` if parse() has not been called yet or if
        no archive was produced.
        Returns
        -------
        int | None
            Page count of the archive PDF, or ``None``.
        """
        if self._archive_path is not None:
            from paperless.parsers.utils import get_page_count_for_pdf
            return get_page_count_for_pdf(self._archive_path, log=logger)
        return None
    def extract_metadata(
        self,
        document_path: Path,
        mime_type: str,
    ) -> list[MetadataEntry]:
        """Extract metadata from the email headers.
        Returns email headers as metadata entries with prefix "header",
        plus summary entries for attachments and date.
        Returns
        -------
        list[MetadataEntry]
            Sorted list of metadata entries, or ``[]`` on parse failure.
        """
        result: list[MetadataEntry] = []
        try:
            mail = self.parse_file_to_message(document_path)
        except ParseError as e:
            logger.warning(
                "Error while fetching document metadata for %s: %s",
                document_path,
                e,
            )
            return result
        for key, header_values in mail.headers.items():
            value = ", ".join(header_values)
            try:
                value.encode("utf-8")
            except UnicodeEncodeError as e:  # pragma: no cover
                logger.debug("Skipping header %s: %s", key, e)
                continue
            result.append(
                {
                    "namespace": "",
                    "prefix": "header",
                    "key": key,
                    "value": value,
                },
            )
        result.append(
            {
                "namespace": "",
                "prefix": "",
                "key": "attachments",
                "value": ", ".join(
                    f"{attachment.filename}"
                    f"({naturalsize(attachment.size, binary=True, format='%.2f')})"
                    for attachment in mail.attachments
                ),
            },
        )
        result.append(
            {
                "namespace": "",
                "prefix": "",
                "key": "date",
                "value": mail.date.strftime("%Y-%m-%d %H:%M:%S %Z"),
            },
        )
        result.sort(key=lambda item: (item["prefix"], item["key"]))
        return result
    # ------------------------------------------------------------------
    # Email-specific methods
    # ------------------------------------------------------------------
    def _settings_to_gotenberg_pdfa(self) -> PdfAFormat | None:
        """Convert the OCR output type setting to a Gotenberg PdfAFormat."""
        if settings.OCR_OUTPUT_TYPE in {
            OutputTypeChoices.PDF_A,
            OutputTypeChoices.PDF_A2,
        }:
            return PdfAFormat.A2b
        elif settings.OCR_OUTPUT_TYPE == OutputTypeChoices.PDF_A1:  # pragma: no cover
            logger.warning(
                "Gotenberg does not support PDF/A-1a, choosing PDF/A-2b instead",
            )
            return PdfAFormat.A2b
        elif settings.OCR_OUTPUT_TYPE == OutputTypeChoices.PDF_A3:  # pragma: no cover
            return PdfAFormat.A3b
        return None
    @staticmethod
    def parse_file_to_message(filepath: Path) -> MailMessage:
        """Parse the given .eml file into a MailMessage object.
        Parameters
        ----------
        filepath:
            Path to the .eml file.
        Returns
        -------
        MailMessage
            Parsed mail message.
        Raises
        ------
        documents.parsers.ParseError
            If the file cannot be parsed or is missing required fields.
        """
        try:
            with filepath.open("rb") as eml:
                parsed = MailMessage.from_bytes(eml.read())
                if parsed.from_values is None:
                    raise ParseError(
                        f"Could not parse {filepath}: Missing 'from'",
                    )
        except Exception as err:
            raise ParseError(
                f"Could not parse {filepath}: {err}",
            ) from err
        return parsed
    def tika_parse(self, html: str) -> str:
        """Send HTML content to the Tika server for text extraction.
        Parameters
        ----------
        html:
            HTML string to parse.
        Returns
        -------
        str
            Extracted plain text.
        Raises
        ------
        documents.parsers.ParseError
            If the Tika server cannot be reached or returns an error.
        """
        logger.info("Sending content to Tika server")
        try:
            with TikaClient(tika_url=settings.TIKA_ENDPOINT) as client:
                parsed = client.tika.as_text.from_buffer(html, "text/html")
                if parsed.content is not None:
                    return parsed.content.strip()
                return ""
        except Exception as err:
            raise ParseError(
                f"Could not parse content with tika server at "
                f"{settings.TIKA_ENDPOINT}: {err}",
            ) from err
    def generate_pdf(
        self,
        mail_message: MailMessage,
        pdf_layout: MailRule.PdfLayout | None = None,
    ) -> Path:
        """Generate a PDF from the email message.
        Creates separate PDFs for the email body and HTML content, then
        merges them according to the requested layout.
        Parameters
        ----------
        mail_message:
            Parsed email message.
        pdf_layout:
            Layout option for the PDF. Falls back to the
            EMAIL_PARSE_DEFAULT_LAYOUT setting if not provided.
        Returns
        -------
        Path
            Path to the generated PDF inside the temporary directory.
        """
        archive_path = Path(self._tempdir) / "merged.pdf"
        mail_pdf_file = self.generate_pdf_from_mail(mail_message)
        if pdf_layout is None:
            pdf_layout = MailRule.PdfLayout(settings.EMAIL_PARSE_DEFAULT_LAYOUT)
        # If no HTML content, create the PDF from the message.
        # Otherwise, create 2 PDFs and merge them with Gotenberg.
        if not mail_message.html:
            archive_path.write_bytes(mail_pdf_file.read_bytes())
        else:
            pdf_of_html_content = self.generate_pdf_from_html(
                mail_message.html,
                mail_message.attachments,
            )
            logger.debug("Merging email text and HTML content into single PDF")
            with (
                GotenbergClient(
                    host=settings.TIKA_GOTENBERG_ENDPOINT,
                    timeout=settings.CELERY_TASK_TIME_LIMIT,
                ) as client,
                client.merge.merge() as route,
            ):
                # Configure requested PDF/A formatting, if any
                pdf_a_format = self._settings_to_gotenberg_pdfa()
                if pdf_a_format is not None:
                    route.pdf_format(pdf_a_format)
                match pdf_layout:
                    case MailRule.PdfLayout.HTML_TEXT:
                        route.merge([pdf_of_html_content, mail_pdf_file])
                    case MailRule.PdfLayout.HTML_ONLY:
                        route.merge([pdf_of_html_content])
                    case MailRule.PdfLayout.TEXT_ONLY:
                        route.merge([mail_pdf_file])
                    case MailRule.PdfLayout.TEXT_HTML | _:
                        route.merge([mail_pdf_file, pdf_of_html_content])
                try:
                    response = route.run()
                    archive_path.write_bytes(response.content)
                except Exception as err:
                    raise ParseError(
                        f"Error while merging email HTML into PDF: {err}",
                    ) from err
        return archive_path
    def mail_to_html(self, mail: MailMessage) -> Path:
        """Convert the given email into an HTML file using a template.
        Parameters
        ----------
        mail:
            Parsed mail message.
        Returns
        -------
        Path
            Path to the rendered HTML file inside the temporary directory.
        """
        def clean_html(text: str) -> str:
            """Attempt to clean, escape, and linkify the given HTML string."""
            if isinstance(text, list):
                text = "\n".join([str(e) for e in text])
            if not isinstance(text, str):
                text = str(text)
            text = escape(text)
            text = clean(text)
            text = linkify(text, parse_email=True)
            text = text.replace("\n", "<br>")
            return text
        data = {}
        data["subject"] = clean_html(mail.subject)
        if data["subject"]:
            data["subject_label"] = "Subject"
        data["from"] = clean_html(mail.from_values.full if mail.from_values else "")
        if data["from"]:
            data["from_label"] = "From"
        data["to"] = clean_html(", ".join(address.full for address in mail.to_values))
        if data["to"]:
            data["to_label"] = "To"
        data["cc"] = clean_html(", ".join(address.full for address in mail.cc_values))
        if data["cc"]:
            data["cc_label"] = "CC"
        data["bcc"] = clean_html(", ".join(address.full for address in mail.bcc_values))
        if data["bcc"]:
            data["bcc_label"] = "BCC"
        att = []
        for a in mail.attachments:
            att.append(
                f"{a.filename} ({naturalsize(a.size, binary=True, format='%.2f')})",
            )
        data["attachments"] = clean_html(", ".join(att))
        if data["attachments"]:
            data["attachments_label"] = "Attachments"
        data["date"] = clean_html(
            timezone.localtime(mail.date).strftime("%Y-%m-%d %H:%M"),
        )
        data["content"] = clean_html(mail.text.strip())
        from django.template.loader import render_to_string
        html_file = Path(self._tempdir) / "email_as_html.html"
        html_file.write_text(render_to_string("email_msg_template.html", context=data))
        return html_file
    def generate_pdf_from_mail(self, mail: MailMessage) -> Path:
        """Create a PDF from the email body using an HTML template and Gotenberg.
        Parameters
        ----------
        mail:
            Parsed mail message.
        Returns
        -------
        Path
            Path to the generated PDF inside the temporary directory.
        Raises
        ------
        documents.parsers.ParseError
            If Gotenberg returns an error.
        """
        logger.info("Converting mail to PDF")
        css_file = (
            Path(__file__).parent.parent.parent
            / "paperless_mail"
            / "templates"
            / "output.css"
        )
        email_html_file = self.mail_to_html(mail)
        with (
            GotenbergClient(
                host=settings.TIKA_GOTENBERG_ENDPOINT,
                timeout=settings.CELERY_TASK_TIME_LIMIT,
            ) as client,
            client.chromium.html_to_pdf() as route,
        ):
            # Configure requested PDF/A formatting, if any
            pdf_a_format = self._settings_to_gotenberg_pdfa()
            if pdf_a_format is not None:
                route.pdf_format(pdf_a_format)
            try:
                response = (
                    route.index(email_html_file)
                    .resource(css_file)
                    .margins(
                        PageMarginsType(
                            top=Measurement(0.1, MeasurementUnitType.Inches),
                            bottom=Measurement(0.1, MeasurementUnitType.Inches),
                            left=Measurement(0.1, MeasurementUnitType.Inches),
                            right=Measurement(0.1, MeasurementUnitType.Inches),
                        ),
                    )
                    .size(A4)
                    .scale(1.0)
                    .run()
                )
            except Exception as err:
                raise ParseError(
                    f"Error while converting email to PDF: {err}",
                ) from err
        email_as_pdf_file = Path(self._tempdir) / "email_as_pdf.pdf"
        email_as_pdf_file.write_bytes(response.content)
        return email_as_pdf_file
    def generate_pdf_from_html(
        self,
        orig_html: str,
        attachments: list[MailAttachment],
    ) -> Path:
        """Generate a PDF from the HTML content of the email.
        Parameters
        ----------
        orig_html:
            Raw HTML string from the email body.
        attachments:
            List of email attachments (used as inline resources).
        Returns
        -------
        Path
            Path to the generated PDF inside the temporary directory.
        Raises
        ------
        documents.parsers.ParseError
            If Gotenberg returns an error.
        """
        def clean_html_script(text: str) -> str:
            compiled_open = re.compile(re.escape("<script"), re.IGNORECASE)
            text = compiled_open.sub("<div hidden ", text)
            compiled_close = re.compile(re.escape("</script"), re.IGNORECASE)
            text = compiled_close.sub("</div", text)
            return text
        logger.info("Converting message html to PDF")
        tempdir = Path(self._tempdir)
        html_clean = clean_html_script(orig_html)
        html_clean_file = tempdir / "index.html"
        html_clean_file.write_text(html_clean)
        with (
            GotenbergClient(
                host=settings.TIKA_GOTENBERG_ENDPOINT,
                timeout=settings.CELERY_TASK_TIME_LIMIT,
            ) as client,
            client.chromium.html_to_pdf() as route,
        ):
            # Configure requested PDF/A formatting, if any
            pdf_a_format = self._settings_to_gotenberg_pdfa()
            if pdf_a_format is not None:
                route.pdf_format(pdf_a_format)
            # Add attachments as resources, cleaning the filename and replacing
            # it in the index file for inclusion
            for attachment in attachments:
                # Clean the attachment name to be valid
                name_cid = f"cid:{attachment.content_id}"
                name_clean = "".join(e for e in name_cid if e.isalnum())
                # Write attachment payload to a temp file
                temp_file = tempdir / name_clean
                temp_file.write_bytes(attachment.payload)
                route.resource(temp_file)
                # Replace as needed the name with the clean name
                html_clean = html_clean.replace(name_cid, name_clean)
            # Now store the cleaned up HTML version
            html_clean_file = tempdir / "index.html"
            html_clean_file.write_text(html_clean)
            # This is our index file, the main page basically
            route.index(html_clean_file)
            # Set page size, margins
            route.margins(
                PageMarginsType(
                    top=Measurement(0.1, MeasurementUnitType.Inches),
                    bottom=Measurement(0.1, MeasurementUnitType.Inches),
                    left=Measurement(0.1, MeasurementUnitType.Inches),
                    right=Measurement(0.1, MeasurementUnitType.Inches),
                ),
            ).size(A4).scale(1.0)
            try:
                response = route.run()
            except Exception as err:
                raise ParseError(
                    f"Error while converting document to PDF: {err}",
                ) from err
        html_pdf = tempdir / "html.pdf"
        html_pdf.write_bytes(response.content)
        return html_pdf
--- a/src/paperless/parsers/registry.py
+++ b/src/paperless/parsers/registry.py
@@ -33,6 +33,7 @@ name, version, author, url, supported_mime_types (callable), score (callable).
 from __future__ import annotations
 import logging
 import threading
 from importlib.metadata import entry_points
 from typing import TYPE_CHECKING
@@ -49,6 +50,7 @@ logger = logging.getLogger("paperless.parsers.registry")
 _registry: ParserRegistry | None = None
 _discovery_complete: bool = False
 _lock = threading.Lock()
 # Attribute names that every registered external parser class must expose.
 _REQUIRED_ATTRS: tuple[str, ...] = (
@@ -74,7 +76,6 @@ def get_parser_registry() -> ParserRegistry:
    1. Creates a new ParserRegistry.
    2. Calls register_defaults to install built-in parsers.
    3. Calls discover to load third-party plugins via importlib.metadata entrypoints.
    4. Calls log_summary to emit a startup summary.
    Subsequent calls return the same instance immediately.
@@ -85,13 +86,14 @@ def get_parser_registry() -> ParserRegistry:
    """
    global _registry, _discovery_complete
    with _lock:
        if _registry is None:
-        _registry = ParserRegistry()
+            r = ParserRegistry()
-        _registry.register_defaults()
+            r.register_defaults()
            _registry = r
        if not _discovery_complete:
            _registry.discover()
        _registry.log_summary()
            _discovery_complete = True
    return _registry
@@ -113,9 +115,11 @@ def init_builtin_parsers() -> None:
    """
    global _registry
    with _lock:
        if _registry is None:
-        _registry = ParserRegistry()
+            r = ParserRegistry()
-        _registry.register_defaults()
+            r.register_defaults()
            _registry = r
 def reset_parser_registry() -> None:
@@ -193,9 +197,17 @@ class ParserRegistry:
        that log output is predictable; scoring determines which parser wins
        at runtime regardless of registration order.
        """
        from paperless.parsers.mail import MailDocumentParser
        from paperless.parsers.remote import RemoteDocumentParser
        from paperless.parsers.tesseract import RasterisedDocumentParser
        from paperless.parsers.text import TextDocumentParser
        from paperless.parsers.tika import TikaDocumentParser
        self.register_builtin(TextDocumentParser)
        self.register_builtin(RemoteDocumentParser)
        self.register_builtin(TikaDocumentParser)
        self.register_builtin(MailDocumentParser)
        self.register_builtin(RasterisedDocumentParser)
    # ------------------------------------------------------------------
    # Discovery
@@ -296,6 +308,23 @@ class ParserRegistry:
                getattr(cls, "url", "unknown"),
            )
    # ------------------------------------------------------------------
    # Inspection helpers
    # ------------------------------------------------------------------
    def all_parsers(self) -> list[type[ParserProtocol]]:
        """Return all registered parser classes (external first, then builtins).
        Used by compatibility wrappers that need to iterate every parser to
        compute the full set of supported MIME types and file extensions.
        Returns
        -------
        list[type[ParserProtocol]]
            External parsers followed by built-in parsers.
        """
        return [*self._external, *self._builtins]
    # ------------------------------------------------------------------
    # Parser resolution
    # ------------------------------------------------------------------
@@ -326,7 +355,7 @@ class ParserRegistry:
        mime_type:
            The detected MIME type of the file.
        filename:
-            The original filename, including extension.
+            The original filename, including extension.  May be empty in some cases
        path:
            Optional filesystem path to the file. Forwarded to each
            parser's score method.
--- a/src/paperless/parsers/remote.py
+++ b/src/paperless/parsers/remote.py
@@ -0,0 +1,433 @@
 """
 Built-in remote-OCR document parser.
 Handles documents by sending them to a configured remote OCR engine
 (currently Azure AI Vision / Document Intelligence) and retrieving both
 the extracted text and a searchable PDF with an embedded text layer.
 When no engine is configured, ``score()`` returns ``None`` so the parser
 is effectively invisible to the registry — the tesseract parser handles
 these MIME types instead.
 """
 from __future__ import annotations
 import logging
 import shutil
 import tempfile
 from pathlib import Path
 from typing import TYPE_CHECKING
 from typing import Self
 from django.conf import settings
 from paperless.version import __full_version_str__
 if TYPE_CHECKING:
    import datetime
    from types import TracebackType
    from paperless.parsers import MetadataEntry
    from paperless.parsers import ParserContext
 logger = logging.getLogger("paperless.parsing.remote")
 _SUPPORTED_MIME_TYPES: dict[str, str] = {
    "application/pdf": ".pdf",
    "image/png": ".png",
    "image/jpeg": ".jpg",
    "image/tiff": ".tiff",
    "image/bmp": ".bmp",
    "image/gif": ".gif",
    "image/webp": ".webp",
 }
 class RemoteEngineConfig:
    """Holds and validates the remote OCR engine configuration."""
    def __init__(
        self,
        engine: str | None,
        api_key: str | None = None,
        endpoint: str | None = None,
    ) -> None:
        self.engine = engine
        self.api_key = api_key
        self.endpoint = endpoint
    def engine_is_valid(self) -> bool:
        """Return True when the engine is known and fully configured."""
        return (
            self.engine in ("azureai",)
            and self.api_key is not None
            and not (self.engine == "azureai" and self.endpoint is None)
        )
 class RemoteDocumentParser:
    """Parse documents via a remote OCR API (currently Azure AI Vision).
    This parser sends documents to a remote engine that returns both
    extracted text and a searchable PDF with an embedded text layer.
    It does not depend on Tesseract or ocrmypdf.
    Class attributes
    ----------------
    name : str
        Human-readable parser name.
    version : str
        Semantic version string, kept in sync with Paperless-ngx releases.
    author : str
        Maintainer name.
    url : str
        Issue tracker / source URL.
    """
    name: str = "Paperless-ngx Remote OCR Parser"
    version: str = __full_version_str__
    author: str = "Paperless-ngx Contributors"
    url: str = "https://github.com/paperless-ngx/paperless-ngx"
    # ------------------------------------------------------------------
    # Class methods
    # ------------------------------------------------------------------
    @classmethod
    def supported_mime_types(cls) -> dict[str, str]:
        """Return the MIME types this parser can handle.
        The full set is always returned regardless of whether a remote
        engine is configured.  The ``score()`` method handles the
        "am I active?" logic by returning ``None`` when not configured.
        Returns
        -------
        dict[str, str]
            Mapping of MIME type to preferred file extension.
        """
        return _SUPPORTED_MIME_TYPES
    @classmethod
    def score(
        cls,
        mime_type: str,
        filename: str,
        path: Path | None = None,
    ) -> int | None:
        """Return the priority score for handling this file, or None.
        Returns ``None`` when no valid remote engine is configured,
        making the parser invisible to the registry for this file.
        When configured, returns 20 — higher than the Tesseract parser's
        default of 10 — so the remote engine takes priority.
        Parameters
        ----------
        mime_type:
            Detected MIME type of the file.
        filename:
            Original filename including extension.
        path:
            Optional filesystem path. Not inspected by this parser.
        Returns
        -------
        int | None
            20 when the remote engine is configured and the MIME type is
            supported, otherwise None.
        """
        config = RemoteEngineConfig(
            engine=settings.REMOTE_OCR_ENGINE,
            api_key=settings.REMOTE_OCR_API_KEY,
            endpoint=settings.REMOTE_OCR_ENDPOINT,
        )
        if not config.engine_is_valid():
            return None
        if mime_type not in _SUPPORTED_MIME_TYPES:
            return None
        return 20
    # ------------------------------------------------------------------
    # Properties
    # ------------------------------------------------------------------
    @property
    def can_produce_archive(self) -> bool:
        """Whether this parser can produce a searchable PDF archive copy.
        Returns
        -------
        bool
            Always True — the remote engine always returns a PDF with an
            embedded text layer that serves as the archive copy.
        """
        return True
    @property
    def requires_pdf_rendition(self) -> bool:
        """Whether the parser must produce a PDF for the frontend to display.
        Returns
        -------
        bool
            Always False — all supported originals are displayable by
            the browser (PDF) or handled via the archive copy (images).
        """
        return False
    # ------------------------------------------------------------------
    # Lifecycle
    # ------------------------------------------------------------------
    def __init__(self, logging_group: object = None) -> None:
        settings.SCRATCH_DIR.mkdir(parents=True, exist_ok=True)
        self._tempdir = Path(
            tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR),
        )
        self._logging_group = logging_group
        self._text: str | None = None
        self._archive_path: Path | None = None
    def __enter__(self) -> Self:
        return self
    def __exit__(
        self,
        exc_type: type[BaseException] | None,
        exc_val: BaseException | None,
        exc_tb: TracebackType | None,
    ) -> None:
        logger.debug("Cleaning up temporary directory %s", self._tempdir)
        shutil.rmtree(self._tempdir, ignore_errors=True)
    # ------------------------------------------------------------------
    # Core parsing interface
    # ------------------------------------------------------------------
    def configure(self, context: ParserContext) -> None:
        pass
    def parse(
        self,
        document_path: Path,
        mime_type: str,
        *,
        produce_archive: bool = True,
    ) -> None:
        """Send the document to the remote engine and store results.
        Parameters
        ----------
        document_path:
            Absolute path to the document file to parse.
        mime_type:
            Detected MIME type of the document.
        produce_archive:
            Ignored — the remote engine always returns a searchable PDF,
            which is stored as the archive copy regardless of this flag.
        """
        config = RemoteEngineConfig(
            engine=settings.REMOTE_OCR_ENGINE,
            api_key=settings.REMOTE_OCR_API_KEY,
            endpoint=settings.REMOTE_OCR_ENDPOINT,
        )
        if not config.engine_is_valid():
            logger.warning(
                "No valid remote parser engine is configured, content will be empty.",
            )
            self._text = ""
            return
        if config.engine == "azureai":
            self._text = self._azure_ai_vision_parse(document_path, config)
    # ------------------------------------------------------------------
    # Result accessors
    # ------------------------------------------------------------------
    def get_text(self) -> str | None:
        """Return the plain-text content extracted during parse."""
        return self._text
    def get_date(self) -> datetime.datetime | None:
        """Return the document date detected during parse.
        Returns
        -------
        datetime.datetime | None
            Always None — the remote parser does not detect dates.
        """
        return None
    def get_archive_path(self) -> Path | None:
        """Return the path to the generated archive PDF, or None."""
        return self._archive_path
    # ------------------------------------------------------------------
    # Thumbnail and metadata
    # ------------------------------------------------------------------
    def get_thumbnail(self, document_path: Path, mime_type: str) -> Path:
        """Generate a thumbnail image for the document.
        Uses the archive PDF produced by the remote engine when available,
        otherwise falls back to the original document path (PDF inputs).
        Parameters
        ----------
        document_path:
            Absolute path to the source document.
        mime_type:
            Detected MIME type of the document.
        Returns
        -------
        Path
            Path to the generated WebP thumbnail inside the temp directory.
        """
        # make_thumbnail_from_pdf lives in documents.parsers for now;
        # it will move to paperless.parsers.utils when the tesseract
        # parser is migrated in a later phase.
        from documents.parsers import make_thumbnail_from_pdf
        return make_thumbnail_from_pdf(
            self._archive_path or document_path,
            self._tempdir,
            self._logging_group,
        )
    def get_page_count(
        self,
        document_path: Path,
        mime_type: str,
    ) -> int | None:
        """Return the number of pages in a PDF document.
        Parameters
        ----------
        document_path:
            Absolute path to the source document.
        mime_type:
            Detected MIME type of the document.
        Returns
        -------
        int | None
            Page count for PDF inputs, or ``None`` for other MIME types.
        """
        if mime_type != "application/pdf":
            return None
        from paperless.parsers.utils import get_page_count_for_pdf
        return get_page_count_for_pdf(document_path, log=logger)
    def extract_metadata(
        self,
        document_path: Path,
        mime_type: str,
    ) -> list[MetadataEntry]:
        """Extract format-specific metadata from the document.
        Delegates to the shared pikepdf-based extractor for PDF files.
        Returns ``[]`` for all other MIME types.
        Parameters
        ----------
        document_path:
            Absolute path to the file to extract metadata from.
        mime_type:
            MIME type of the file.  May be ``"application/pdf"`` when
            called for the archive version of an image original.
        Returns
        -------
        list[MetadataEntry]
            Zero or more metadata entries.
        """
        if mime_type != "application/pdf":
            return []
        from paperless.parsers.utils import extract_pdf_metadata
        return extract_pdf_metadata(document_path, log=logger)
    # ------------------------------------------------------------------
    # Private helpers
    # ------------------------------------------------------------------
    def _azure_ai_vision_parse(
        self,
        file: Path,
        config: RemoteEngineConfig,
    ) -> str | None:
        """Send ``file`` to Azure AI Document Intelligence and return text.
        Downloads the searchable PDF output from Azure and stores it at
        ``self._archive_path``.  Returns the extracted text content, or
        ``None`` on failure (the error is logged).
        Parameters
        ----------
        file:
            Absolute path to the document to analyse.
        config:
            Validated remote engine configuration.
        Returns
        -------
        str | None
            Extracted text, or None if the Azure call failed.
        """
        if TYPE_CHECKING:
            # Callers must have already validated config via engine_is_valid():
            # engine_is_valid() asserts api_key is not None and (for azureai)
            # endpoint is not None, so these casts are provably safe.
            assert config.endpoint is not None
            assert config.api_key is not None
        from azure.ai.documentintelligence import DocumentIntelligenceClient
        from azure.ai.documentintelligence.models import AnalyzeDocumentRequest
        from azure.ai.documentintelligence.models import AnalyzeOutputOption
        from azure.ai.documentintelligence.models import DocumentContentFormat
        from azure.core.credentials import AzureKeyCredential
        client = DocumentIntelligenceClient(
            endpoint=config.endpoint,
            credential=AzureKeyCredential(config.api_key),
        )
        try:
            with file.open("rb") as f:
                analyze_request = AnalyzeDocumentRequest(bytes_source=f.read())
                poller = client.begin_analyze_document(
                    model_id="prebuilt-read",
                    body=analyze_request,
                    output_content_format=DocumentContentFormat.TEXT,
                    output=[AnalyzeOutputOption.PDF],
                    content_type="application/json",
                )
            poller.wait()
            result_id = poller.details["operation_id"]
            result = poller.result()
            self._archive_path = self._tempdir / "archive.pdf"
            with self._archive_path.open("wb") as f:
                for chunk in client.get_analyze_result_pdf(
                    model_id="prebuilt-read",
                    result_id=result_id,
                ):
                    f.write(chunk)
            return result.content
        except Exception as e:
            logger.error("Azure AI Vision parsing failed: %s", e)
        finally:
            client.close()
        return None
--- a/src/paperless/parsers/tesseract.py
+++ b/src/paperless/parsers/tesseract.py
@@ -1,13 +1,18 @@
 from __future__ import annotations
 import logging
 import os
 import re
 import shutil
 import tempfile
 from pathlib import Path
 from typing import TYPE_CHECKING
 from typing import Any
 from typing import Self
 from django.conf import settings
 from PIL import Image
 from documents.parsers import DocumentParser
 from documents.parsers import ParseError
 from documents.parsers import make_thumbnail_from_pdf
 from documents.utils import maybe_override_pixel_limit
@@ -16,6 +21,28 @@ from paperless.config import OcrConfig
 from paperless.models import ArchiveFileChoices
 from paperless.models import CleanChoices
 from paperless.models import ModeChoices
 from paperless.parsers.utils import read_file_handle_unicode_errors
 from paperless.version import __full_version_str__
 if TYPE_CHECKING:
    import datetime
    from types import TracebackType
    from paperless.parsers import MetadataEntry
    from paperless.parsers import ParserContext
 logger = logging.getLogger("paperless.parsing.tesseract")
 _SUPPORTED_MIME_TYPES: dict[str, str] = {
    "application/pdf": ".pdf",
    "image/jpeg": ".jpg",
    "image/png": ".png",
    "image/tiff": ".tif",
    "image/gif": ".gif",
    "image/bmp": ".bmp",
    "image/webp": ".webp",
    "image/heic": ".heic",
 }
 class NoTextFoundException(Exception):
@@ -26,81 +53,125 @@ class RtlLanguageException(Exception):
    pass
-class RasterisedDocumentParser(DocumentParser):
+class RasterisedDocumentParser:
    """
    This parser uses Tesseract to try and get some text out of a rasterised
    image, whether it's a PDF, or other graphical format (JPEG, TIFF, etc.)
    """
-    logging_name = "paperless.parsing.tesseract"
+    name: str = "Paperless-ngx Tesseract OCR Parser"
    version: str = __full_version_str__
    author: str = "Paperless-ngx Contributors"
    url: str = "https://github.com/paperless-ngx/paperless-ngx"
-    def get_settings(self) -> OcrConfig:
+    # ------------------------------------------------------------------
-        """
+    # Class methods
-        This parser uses the OCR configuration settings to parse documents
+    # ------------------------------------------------------------------
        """
        return OcrConfig()
-    def get_page_count(self, document_path, mime_type):
+    @classmethod
-        page_count = None
+    def supported_mime_types(cls) -> dict[str, str]:
-        if mime_type == "application/pdf":
+        return _SUPPORTED_MIME_TYPES
            try:
                import pikepdf
-                with pikepdf.Pdf.open(document_path) as pdf:
+    @classmethod
-                    page_count = len(pdf.pages)
+    def score(
-            except Exception as e:
+        cls,
-                self.log.warning(
+        mime_type: str,
-                    f"Unable to determine PDF page count {document_path}: {e}",
+        filename: str,
        path: Path | None = None,
    ) -> int | None:
        if mime_type in _SUPPORTED_MIME_TYPES:
            return 10
        return None
    # ------------------------------------------------------------------
    # Properties
    # ------------------------------------------------------------------
    @property
    def can_produce_archive(self) -> bool:
        return True
    @property
    def requires_pdf_rendition(self) -> bool:
        return False
    # ------------------------------------------------------------------
    # Lifecycle
    # ------------------------------------------------------------------
    def __init__(self, logging_group: object = None) -> None:
        settings.SCRATCH_DIR.mkdir(parents=True, exist_ok=True)
        self.tempdir = Path(
            tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR),
        )
-        return page_count
+        self.settings = OcrConfig()
        self.archive_path: Path | None = None
        self.text: str | None = None
        self.date: datetime.datetime | None = None
        self.log = logger
-    def extract_metadata(self, document_path, mime_type):
+    def __enter__(self) -> Self:
-        result = []
+        return self
        if mime_type == "application/pdf":
            import pikepdf
-            namespace_pattern = re.compile(r"\{(.*)\}(.*)")
+    def __exit__(
        self,
        exc_type: type[BaseException] | None,
        exc_val: BaseException | None,
        exc_tb: TracebackType | None,
    ) -> None:
        logger.debug("Cleaning up temporary directory %s", self.tempdir)
        shutil.rmtree(self.tempdir, ignore_errors=True)
-            pdf = pikepdf.open(document_path)
+    # ------------------------------------------------------------------
-            meta = pdf.open_metadata()
+    # Core parsing interface
-            for key, value in meta.items():
+    # ------------------------------------------------------------------
                if isinstance(value, list):
                    value = " ".join([str(e) for e in value])
                value = str(value)
                try:
                    m = namespace_pattern.match(key)
                    if m is None:  # pragma: no cover
                        continue
                    namespace = m.group(1)
                    key_value = m.group(2)
                    try:
                        namespace.encode("utf-8")
                        key_value.encode("utf-8")
                    except UnicodeEncodeError as e:  # pragma: no cover
                        self.log.debug(f"Skipping metadata key {key}: {e}")
                        continue
                    result.append(
                        {
                            "namespace": namespace,
                            "prefix": meta.REVERSE_NS[namespace],
                            "key": key_value,
                            "value": value,
                        },
                    )
                except Exception as e:
                    self.log.warning(
                        f"Error while reading metadata {key}: {value}. Error: {e}",
                    )
        return result
-    def get_thumbnail(self, document_path, mime_type, file_name=None):
+    def configure(self, context: ParserContext) -> None:
        pass
    # ------------------------------------------------------------------
    # Result accessors
    # ------------------------------------------------------------------
    def get_text(self) -> str | None:
        return self.text
    def get_date(self) -> datetime.datetime | None:
        return self.date
    def get_archive_path(self) -> Path | None:
        return self.archive_path
    # ------------------------------------------------------------------
    # Thumbnail, page count, and metadata
    # ------------------------------------------------------------------
    def get_thumbnail(self, document_path: Path, mime_type: str) -> Path:
        return make_thumbnail_from_pdf(
-            self.archive_path or document_path,
+            self.archive_path or Path(document_path),
            self.tempdir,
            self.logging_group,
        )
-    def is_image(self, mime_type) -> bool:
+    def get_page_count(self, document_path: Path, mime_type: str) -> int | None:
        if mime_type == "application/pdf":
            from paperless.parsers.utils import get_page_count_for_pdf
            return get_page_count_for_pdf(Path(document_path), log=self.log)
        return None
    def extract_metadata(
        self,
        document_path: Path,
        mime_type: str,
    ) -> list[MetadataEntry]:
        if mime_type != "application/pdf":
            return []
        from paperless.parsers.utils import extract_pdf_metadata
        return extract_pdf_metadata(Path(document_path), log=self.log)
    def is_image(self, mime_type: str) -> bool:
        return mime_type in [
            "image/png",
            "image/jpeg",
@@ -111,25 +182,25 @@ class RasterisedDocumentParser(DocumentParser):
            "image/heic",
        ]
-    def has_alpha(self, image) -> bool:
+    def has_alpha(self, image: Path) -> bool:
        with Image.open(image) as im:
            return im.mode in ("RGBA", "LA")
-    def remove_alpha(self, image_path: str) -> Path:
+    def remove_alpha(self, image_path: Path) -> Path:
        no_alpha_image = Path(self.tempdir) / "image-no-alpha"
        run_subprocess(
            [
                settings.CONVERT_BINARY,
                "-alpha",
                "off",
-                image_path,
+                str(image_path),
-                no_alpha_image,
+                str(no_alpha_image),
            ],
            logger=self.log,
        )
        return no_alpha_image
-    def get_dpi(self, image) -> int | None:
+    def get_dpi(self, image: Path) -> int | None:
        try:
            with Image.open(image) as im:
                x, _ = im.info["dpi"]
@@ -138,7 +209,7 @@ class RasterisedDocumentParser(DocumentParser):
            self.log.warning(f"Error while getting DPI from image {image}: {e}")
            return None
-    def calculate_a4_dpi(self, image) -> int | None:
+    def calculate_a4_dpi(self, image: Path) -> int | None:
        try:
            with Image.open(image) as im:
                width, _ = im.size
@@ -156,6 +227,7 @@ class RasterisedDocumentParser(DocumentParser):
        sidecar_file: Path | None,
        pdf_file: Path,
    ) -> str | None:
        text: str | None = None
        # When re-doing OCR, the sidecar contains ONLY the new text, not
        # the whole text, so do not utilize it in that case
        if (
@@ -163,7 +235,7 @@ class RasterisedDocumentParser(DocumentParser):
            and sidecar_file.is_file()
            and self.settings.mode != "redo"
        ):
-            text = self.read_file_handle_unicode_errors(sidecar_file)
+            text = read_file_handle_unicode_errors(sidecar_file)
            if "[OCR skipped on page" not in text:
                # This happens when there's already text in the input file.
@@ -191,12 +263,12 @@ class RasterisedDocumentParser(DocumentParser):
                        "-layout",
                        "-enc",
                        "UTF-8",
-                        pdf_file,
+                        str(pdf_file),
                        tmp.name,
                    ],
                    logger=self.log,
                )
-                text = self.read_file_handle_unicode_errors(Path(tmp.name))
+                text = read_file_handle_unicode_errors(Path(tmp.name))
            return post_process_text(text)
@@ -211,17 +283,15 @@ class RasterisedDocumentParser(DocumentParser):
    def construct_ocrmypdf_parameters(
        self,
-        input_file,
+        input_file: Path,
-        mime_type,
+        mime_type: str,
-        output_file,
+        output_file: Path,
-        sidecar_file,
+        sidecar_file: Path,
        *,
-        safe_fallback=False,
+        safe_fallback: bool = False,
-    ):
+    ) -> dict[str, Any]:
-        if TYPE_CHECKING:
+        ocrmypdf_args: dict[str, Any] = {
-            assert isinstance(self.settings, OcrConfig)
+            "input_file_or_options": input_file,
        ocrmypdf_args = {
            "input_file": input_file,
            "output_file": output_file,
            # need to use threads, since this will be run in daemonized
            # processes via the task library.
@@ -285,7 +355,7 @@ class RasterisedDocumentParser(DocumentParser):
                    "for compatibility with img2pdf",
                )
                # Replace the input file with the non-alpha
-                ocrmypdf_args["input_file"] = self.remove_alpha(input_file)
+                ocrmypdf_args["input_file_or_options"] = self.remove_alpha(input_file)
            if dpi:
                self.log.debug(f"Detected DPI for image {input_file}: {dpi}")
@@ -330,7 +400,13 @@ class RasterisedDocumentParser(DocumentParser):
        return ocrmypdf_args
-    def parse(self, document_path: Path, mime_type, file_name=None) -> None:
+    def parse(
        self,
        document_path: Path,
        mime_type: str,
        *,
        produce_archive: bool = True,
    ) -> None:
        # This forces tesseract to use one core per page.
        os.environ["OMP_THREAD_LIMIT"] = "1"
        VALID_TEXT_LENGTH = 50
@@ -458,7 +534,7 @@ class RasterisedDocumentParser(DocumentParser):
                self.text = ""
-def post_process_text(text):
+def post_process_text(text: str | None) -> str | None:
    if not text:
        return None
--- a/src/paperless/parsers/text.py
+++ b/src/paperless/parsers/text.py
@@ -27,6 +27,7 @@ if TYPE_CHECKING:
    from types import TracebackType
    from paperless.parsers import MetadataEntry
    from paperless.parsers import ParserContext
 logger = logging.getLogger("paperless.parsing.text")
@@ -156,6 +157,9 @@ class TextDocumentParser:
    # Core parsing interface
    # ------------------------------------------------------------------
    def configure(self, context: ParserContext) -> None:
        pass
    def parse(
        self,
        document_path: Path,
--- a/src/paperless/parsers/tika.py
+++ b/src/paperless/parsers/tika.py
@@ -0,0 +1,452 @@
 """
 Built-in Tika document parser.
 Handles Office documents (DOCX, ODT, XLS, XLSX, PPT, PPTX, RTF, etc.) by
 sending them to an Apache Tika server for text extraction and a Gotenberg
 server for PDF conversion.  Because the source formats cannot be rendered by
 a browser natively, the parser always produces a PDF rendition for display.
 """
 from __future__ import annotations
 import logging
 import shutil
 import tempfile
 from contextlib import ExitStack
 from pathlib import Path
 from typing import TYPE_CHECKING
 from typing import Self
 import httpx
 from django.conf import settings
 from django.utils import timezone
 from gotenberg_client import GotenbergClient
 from gotenberg_client.options import PdfAFormat
 from tika_client import TikaClient
 from documents.parsers import ParseError
 from documents.parsers import make_thumbnail_from_pdf
 from paperless.config import OutputTypeConfig
 from paperless.models import OutputTypeChoices
 from paperless.version import __full_version_str__
 if TYPE_CHECKING:
    import datetime
    from types import TracebackType
    from paperless.parsers import MetadataEntry
    from paperless.parsers import ParserContext
 logger = logging.getLogger("paperless.parsing.tika")
 _SUPPORTED_MIME_TYPES: dict[str, str] = {
    "application/msword": ".doc",
    "application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx",
    "application/vnd.ms-excel": ".xls",
    "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx",
    "application/vnd.ms-powerpoint": ".ppt",
    "application/vnd.openxmlformats-officedocument.presentationml.presentation": ".pptx",
    "application/vnd.openxmlformats-officedocument.presentationml.slideshow": ".ppsx",
    "application/vnd.oasis.opendocument.presentation": ".odp",
    "application/vnd.oasis.opendocument.spreadsheet": ".ods",
    "application/vnd.oasis.opendocument.text": ".odt",
    "application/vnd.oasis.opendocument.graphics": ".odg",
    "text/rtf": ".rtf",
 }
 class TikaDocumentParser:
    """Parse Office documents via Apache Tika and Gotenberg for Paperless-ngx.
    Text extraction is handled by the Tika server.  PDF conversion for display
    is handled by Gotenberg (LibreOffice route).  Because the source formats
    cannot be rendered by a browser natively, ``requires_pdf_rendition`` is
    True and the PDF is always produced regardless of the ``produce_archive``
    flag passed to ``parse``.
    Both ``TikaClient`` and ``GotenbergClient`` are opened once in
    ``__enter__`` via an ``ExitStack`` and shared across ``parse``,
    ``extract_metadata``, and ``_convert_to_pdf`` calls, then closed via
    ``ExitStack.close()`` in ``__exit__``.  The parser must always be used
    as a context manager.
    Class attributes
    ----------------
    name : str
        Human-readable parser name.
    version : str
        Semantic version string, kept in sync with Paperless-ngx releases.
    author : str
        Maintainer name.
    url : str
        Issue tracker / source URL.
    """
    name: str = "Paperless-ngx Tika Parser"
    version: str = __full_version_str__
    author: str = "Paperless-ngx Contributors"
    url: str = "https://github.com/paperless-ngx/paperless-ngx"
    # ------------------------------------------------------------------
    # Class methods
    # ------------------------------------------------------------------
    @classmethod
    def supported_mime_types(cls) -> dict[str, str]:
        """Return the MIME types this parser handles.
        Returns
        -------
        dict[str, str]
            Mapping of MIME type to preferred file extension.
        """
        return _SUPPORTED_MIME_TYPES
    @classmethod
    def score(
        cls,
        mime_type: str,
        filename: str,
        path: Path | None = None,
    ) -> int | None:
        """Return the priority score for handling this file.
        Returns ``None`` when Tika integration is disabled so the registry
        skips this parser entirely.
        Parameters
        ----------
        mime_type:
            Detected MIME type of the file.
        filename:
            Original filename including extension.
        path:
            Optional filesystem path. Not inspected by this parser.
        Returns
        -------
        int | None
            10 if TIKA_ENABLED and the MIME type is supported, otherwise None.
        """
        if not settings.TIKA_ENABLED:
            return None
        if mime_type in _SUPPORTED_MIME_TYPES:
            return 10
        return None
    # ------------------------------------------------------------------
    # Properties
    # ------------------------------------------------------------------
    @property
    def can_produce_archive(self) -> bool:
        """Whether this parser can produce a searchable PDF archive copy.
        Returns
        -------
        bool
            Always False — Tika produces a display PDF, not an OCR archive.
        """
        return False
    @property
    def requires_pdf_rendition(self) -> bool:
        """Whether the parser must produce a PDF for the frontend to display.
        Returns
        -------
        bool
            Always True — Office formats cannot be rendered natively in a
            browser, so a PDF conversion is always required for display.
        """
        return True
    # ------------------------------------------------------------------
    # Lifecycle
    # ------------------------------------------------------------------
    def __init__(self, logging_group: object = None) -> None:
        settings.SCRATCH_DIR.mkdir(parents=True, exist_ok=True)
        self._tempdir = Path(
            tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR),
        )
        self._text: str | None = None
        self._date: datetime.datetime | None = None
        self._archive_path: Path | None = None
        self._exit_stack = ExitStack()
        self._tika_client: TikaClient | None = None
        self._gotenberg_client: GotenbergClient | None = None
    def __enter__(self) -> Self:
        self._tika_client = self._exit_stack.enter_context(
            TikaClient(
                tika_url=settings.TIKA_ENDPOINT,
                timeout=settings.CELERY_TASK_TIME_LIMIT,
            ),
        )
        self._gotenberg_client = self._exit_stack.enter_context(
            GotenbergClient(
                host=settings.TIKA_GOTENBERG_ENDPOINT,
                timeout=settings.CELERY_TASK_TIME_LIMIT,
            ),
        )
        return self
    def __exit__(
        self,
        exc_type: type[BaseException] | None,
        exc_val: BaseException | None,
        exc_tb: TracebackType | None,
    ) -> None:
        self._exit_stack.close()
        logger.debug("Cleaning up temporary directory %s", self._tempdir)
        shutil.rmtree(self._tempdir, ignore_errors=True)
    # ------------------------------------------------------------------
    # Core parsing interface
    # ------------------------------------------------------------------
    def configure(self, context: ParserContext) -> None:
        pass
    def parse(
        self,
        document_path: Path,
        mime_type: str,
        *,
        produce_archive: bool = True,
    ) -> None:
        """Send the document to Tika for text extraction and Gotenberg for PDF.
        Because ``requires_pdf_rendition`` is True the PDF conversion is
        always performed — the ``produce_archive`` flag is intentionally
        ignored.
        Parameters
        ----------
        document_path:
            Absolute path to the document file to parse.
        mime_type:
            Detected MIME type of the document.
        produce_archive:
            Accepted for protocol compatibility but ignored; the PDF rendition
            is always produced since the source format cannot be displayed
            natively in the browser.
        Raises
        ------
        documents.parsers.ParseError
            If Tika or Gotenberg returns an error.
        """
        if TYPE_CHECKING:
            assert self._tika_client is not None
        logger.info("Sending %s to Tika server", document_path)
        try:
            try:
                parsed = self._tika_client.tika.as_text.from_file(
                    document_path,
                    mime_type,
                )
            except httpx.HTTPStatusError as err:
                # Workaround https://issues.apache.org/jira/browse/TIKA-4110
                # Tika fails with some files as multi-part form data
                if err.response.status_code == httpx.codes.INTERNAL_SERVER_ERROR:
                    parsed = self._tika_client.tika.as_text.from_buffer(
                        document_path.read_bytes(),
                        mime_type,
                    )
                else:  # pragma: no cover
                    raise
        except Exception as err:
            raise ParseError(
                f"Could not parse {document_path} with tika server at "
                f"{settings.TIKA_ENDPOINT}: {err}",
            ) from err
        self._text = parsed.content
        if self._text is not None:
            self._text = self._text.strip()
        self._date = parsed.created
        if self._date is not None and timezone.is_naive(self._date):
            self._date = timezone.make_aware(self._date)
        # Always convert — requires_pdf_rendition=True means the browser
        # cannot display the source format natively.
        self._archive_path = self._convert_to_pdf(document_path)
    # ------------------------------------------------------------------
    # Result accessors
    # ------------------------------------------------------------------
    def get_text(self) -> str | None:
        """Return the plain-text content extracted during parse.
        Returns
        -------
        str | None
            Extracted text, or None if parse has not been called yet.
        """
        return self._text
    def get_date(self) -> datetime.datetime | None:
        """Return the document date detected during parse.
        Returns
        -------
        datetime.datetime | None
            Creation date from Tika metadata, or None if not detected.
        """
        return self._date
    def get_archive_path(self) -> Path | None:
        """Return the path to the generated PDF rendition, or None.
        Returns
        -------
        Path | None
            Path to the PDF produced by Gotenberg, or None if parse has not
            been called yet.
        """
        return self._archive_path
    # ------------------------------------------------------------------
    # Thumbnail and metadata
    # ------------------------------------------------------------------
    def get_thumbnail(self, document_path: Path, mime_type: str) -> Path:
        """Generate a thumbnail from the PDF rendition of the document.
        Converts the document to PDF first if not already done.
        Parameters
        ----------
        document_path:
            Absolute path to the source document.
        mime_type:
            Detected MIME type of the document.
        Returns
        -------
        Path
            Path to the generated WebP thumbnail inside the temporary directory.
        """
        if self._archive_path is None:
            self._archive_path = self._convert_to_pdf(document_path)
        return make_thumbnail_from_pdf(self._archive_path, self._tempdir)
    def get_page_count(
        self,
        document_path: Path,
        mime_type: str,
    ) -> int | None:
        """Return the number of pages in the document.
        Counts pages in the archive PDF produced by a preceding parse()
        call.  Returns ``None`` if parse() has not been called yet or if
        no archive was produced.
        Returns
        -------
        int | None
            Page count of the archive PDF, or ``None``.
        """
        if self._archive_path is not None:
            from paperless.parsers.utils import get_page_count_for_pdf
            return get_page_count_for_pdf(self._archive_path, log=logger)
        return None
    def extract_metadata(
        self,
        document_path: Path,
        mime_type: str,
    ) -> list[MetadataEntry]:
        """Extract format-specific metadata via the Tika metadata endpoint.
        Returns
        -------
        list[MetadataEntry]
            All key/value pairs returned by Tika, or ``[]`` on error.
        """
        if TYPE_CHECKING:
            assert self._tika_client is not None
        try:
            parsed = self._tika_client.metadata.from_file(document_path, mime_type)
            return [
                {
                    "namespace": "",
                    "prefix": "",
                    "key": key,
                    "value": parsed.data[key],
                }
                for key in parsed.data
            ]
        except Exception as e:
            logger.warning(
                "Error while fetching document metadata for %s: %s",
                document_path,
                e,
            )
            return []
    # ------------------------------------------------------------------
    # Private helpers
    # ------------------------------------------------------------------
    def _convert_to_pdf(self, document_path: Path) -> Path:
        """Convert the document to PDF using Gotenberg's LibreOffice route.
        Parameters
        ----------
        document_path:
            Absolute path to the source document.
        Returns
        -------
        Path
            Path to the generated PDF inside the temporary directory.
        Raises
        ------
        documents.parsers.ParseError
            If Gotenberg returns an error.
        """
        if TYPE_CHECKING:
            assert self._gotenberg_client is not None
        pdf_path = self._tempdir / "convert.pdf"
        logger.info("Converting %s to PDF as %s", document_path, pdf_path)
        with self._gotenberg_client.libre_office.to_pdf() as route:
            # Set the output format of the resulting PDF.
            # OutputTypeConfig reads the database-stored ApplicationConfiguration
            # first, then falls back to the PAPERLESS_OCR_OUTPUT_TYPE env var.
            output_type = OutputTypeConfig().output_type
            if output_type in {
                OutputTypeChoices.PDF_A,
                OutputTypeChoices.PDF_A2,
            }:
                route.pdf_format(PdfAFormat.A2b)
            elif output_type == OutputTypeChoices.PDF_A1:
                logger.warning(
                    "Gotenberg does not support PDF/A-1a, choosing PDF/A-2b instead",
                )
                route.pdf_format(PdfAFormat.A2b)
            elif output_type == OutputTypeChoices.PDF_A3:
                route.pdf_format(PdfAFormat.A3b)
            route.convert(document_path)
            try:
                response = route.run()
                pdf_path.write_bytes(response.content)
                return pdf_path
            except Exception as err:
                raise ParseError(
                    f"Error while converting document to PDF: {err}",
                ) from err
--- a/src/paperless/parsers/utils.py
+++ b/src/paperless/parsers/utils.py
@@ -0,0 +1,158 @@
 """
 Shared utilities for Paperless-ngx document parsers.
 Functions here are format-neutral helpers that multiple parsers need.
 Keeping them here avoids parsers inheriting from each other just to
 share implementation.
 """
 from __future__ import annotations
 import logging
 import re
 from typing import TYPE_CHECKING
 if TYPE_CHECKING:
    from pathlib import Path
    from paperless.parsers import MetadataEntry
 logger = logging.getLogger("paperless.parsers.utils")
 def read_file_handle_unicode_errors(
    filepath: Path,
    log: logging.Logger | None = None,
 ) -> str:
    """Read a file as UTF-8 text, replacing invalid bytes rather than raising.
    Parameters
    ----------
    filepath:
        Absolute path to the file to read.
    log:
        Logger to use for warnings.  Falls back to the module-level logger
        when omitted.
    Returns
    -------
    str
        File content as a string, with any invalid UTF-8 sequences replaced
        by the Unicode replacement character.
    """
    _log = log or logger
    try:
        return filepath.read_text(encoding="utf-8")
    except UnicodeDecodeError as e:
        _log.warning("Unicode error during text reading, continuing: %s", e)
        return filepath.read_bytes().decode("utf-8", errors="replace")
 def get_page_count_for_pdf(
    document_path: Path,
    log: logging.Logger | None = None,
 ) -> int | None:
    """Return the number of pages in a PDF file using pikepdf.
    Parameters
    ----------
    document_path:
        Absolute path to the PDF file.
    log:
        Logger to use for warnings.  Falls back to the module-level logger
        when omitted.
    Returns
    -------
    int | None
        Page count, or ``None`` if the file cannot be opened or is not a
        valid PDF.
    """
    import pikepdf
    _log = log or logger
    try:
        with pikepdf.Pdf.open(document_path) as pdf:
            return len(pdf.pages)
    except Exception as e:
        _log.warning("Unable to determine PDF page count for %s: %s", document_path, e)
        return None
 def extract_pdf_metadata(
    document_path: Path,
    log: logging.Logger | None = None,
 ) -> list[MetadataEntry]:
    """Extract XMP/PDF metadata from a PDF file using pikepdf.
    Reads all XMP metadata entries from the document and returns them as a
    list of ``MetadataEntry`` dicts.  The method never raises — any failure
    to open the file or read a specific key is logged and skipped.
    Parameters
    ----------
    document_path:
        Absolute path to the PDF file.
    log:
        Logger to use for warnings and debug messages.  Falls back to the
        module-level logger when omitted.
    Returns
    -------
    list[MetadataEntry]
        Zero or more metadata entries.  Returns ``[]`` if the file cannot
        be opened or contains no readable XMP metadata.
    """
    import pikepdf
    from paperless.parsers import MetadataEntry
    _log = log or logger
    result: list[MetadataEntry] = []
    namespace_pattern = re.compile(r"\{(.*)\}(.*)")
    try:
        pdf = pikepdf.open(document_path)
        meta = pdf.open_metadata()
    except Exception as e:
        _log.warning("Could not open PDF metadata for %s: %s", document_path, e)
        return []
    for key, value in meta.items():
        if isinstance(value, list):
            value = " ".join(str(e) for e in value)
        value = str(value)
        try:
            m = namespace_pattern.match(key)
            if m is None:
                continue
            namespace = m.group(1)
            key_value = m.group(2)
            try:
                namespace.encode("utf-8")
                key_value.encode("utf-8")
            except UnicodeEncodeError as enc_err:  # pragma: no cover
                _log.debug("Skipping metadata key %s: %s", key, enc_err)
                continue
            result.append(
                MetadataEntry(
                    namespace=namespace,
                    prefix=meta.REVERSE_NS[namespace],
                    key=key_value,
                    value=value,
                ),
            )
        except Exception as e:
            _log.warning(
                "Error reading metadata key %s value %s: %s",
                key,
                value,
                e,
            )
    return result
--- a/src/paperless/serialisers.py
+++ b/src/paperless/serialisers.py
@@ -6,6 +6,7 @@ from allauth.mfa.models import Authenticator
 from allauth.mfa.totp.internal.auth import TOTP
 from allauth.socialaccount.models import SocialAccount
 from allauth.socialaccount.models import SocialApp
 from django.conf import settings
 from django.contrib.auth.models import Group
 from django.contrib.auth.models import Permission
 from django.contrib.auth.models import User
@@ -15,6 +16,7 @@ from rest_framework import serializers
 from rest_framework.authtoken.serializers import AuthTokenSerializer
 from paperless.models import ApplicationConfiguration
 from paperless.network import validate_outbound_http_url
 from paperless.validators import reject_dangerous_svg
 from paperless_mail.serialisers import ObfuscatedPasswordField
@@ -236,6 +238,22 @@ class ApplicationConfigurationSerializer(serializers.ModelSerializer):
            reject_dangerous_svg(file)
        return file
    def validate_llm_endpoint(self, value: str | None) -> str | None:
        if not value:
            return value
        try:
            validate_outbound_http_url(
                value,
                allow_internal=settings.LLM_ALLOW_INTERNAL_ENDPOINTS,
            )
        except ValueError as e:
            raise serializers.ValidationError(
                f"Invalid LLM endpoint: {e.args[0]}, see logs for details",
            ) from e
        return value
    class Meta:
        model = ApplicationConfiguration
        fields = "__all__"
--- a/src/paperless/settings/init.py
+++ b/src/paperless/settings/init.py
@@ -121,10 +121,7 @@ INSTALLED_APPS = [
    "django_extensions",
    "paperless",
    "documents.apps.DocumentsConfig",
    "paperless_tesseract.apps.PaperlessTesseractConfig",
    "paperless_text.apps.PaperlessTextConfig",
    "paperless_mail.apps.PaperlessMailConfig",
    "paperless_remote.apps.PaperlessRemoteParserConfig",
    "django.contrib.admin",
    "rest_framework",
    "rest_framework.authtoken",
@@ -974,8 +971,8 @@ TIKA_GOTENBERG_ENDPOINT = os.getenv(
    "http://localhost:3000",
 )
-if TIKA_ENABLED:
+# Tika parser is now integrated into the main parser registry
-    INSTALLED_APPS.append("paperless_tika.apps.PaperlessTikaConfig")
+# No separate Django app needed
 AUDIT_LOG_ENABLED = get_bool_from_env("PAPERLESS_AUDIT_LOG_ENABLED", "true")
 if AUDIT_LOG_ENABLED:
@@ -1112,3 +1109,7 @@ LLM_BACKEND = os.getenv("PAPERLESS_AI_LLM_BACKEND")  # "ollama" or "openai"
 LLM_MODEL = os.getenv("PAPERLESS_AI_LLM_MODEL")
 LLM_API_KEY = os.getenv("PAPERLESS_AI_LLM_API_KEY")
 LLM_ENDPOINT = os.getenv("PAPERLESS_AI_LLM_ENDPOINT")
 LLM_ALLOW_INTERNAL_ENDPOINTS = get_bool_from_env(
    "PAPERLESS_AI_LLM_ALLOW_INTERNAL_ENDPOINTS",
    "true",
 )
--- a/src/paperless/tests/parsers/conftest.py
+++ b/src/paperless/tests/parsers/conftest.py
@@ -6,15 +6,29 @@ so it is easy to see which files belong to which test module.
 from __future__ import annotations
 from contextlib import contextmanager
 from typing import TYPE_CHECKING
 import pytest
 from django.test import override_settings
 from paperless.parsers.mail import MailDocumentParser
 from paperless.parsers.remote import RemoteDocumentParser
 from paperless.parsers.tesseract import RasterisedDocumentParser
 from paperless.parsers.text import TextDocumentParser
 from paperless.parsers.tika import TikaDocumentParser
 if TYPE_CHECKING:
    from collections.abc import Callable
    from collections.abc import Generator
    from pathlib import Path
    from unittest.mock import MagicMock
    from pytest_django.fixtures import SettingsWrapper
    from pytest_mock import MockerFixture
    #: Type for the ``make_tesseract_parser`` fixture factory.
    MakeTesseractParser = Callable[..., Generator[RasterisedDocumentParser, None, None]]
 # ------------------------------------------------------------------
@@ -74,3 +88,684 @@ def text_parser() -> Generator[TextDocumentParser, None, None]:
    """
    with TextDocumentParser() as parser:
        yield parser
 # ------------------------------------------------------------------
 # Remote parser instance
 # ------------------------------------------------------------------
@pytest.fixture()
 def remote_parser() -> Generator[RemoteDocumentParser, None, None]:
    """Yield a RemoteDocumentParser and clean up its temporary directory afterwards.
    Yields
    ------
    RemoteDocumentParser
        A ready-to-use parser instance.
    """
    with RemoteDocumentParser() as parser:
        yield parser
 # ------------------------------------------------------------------
 # Remote parser settings helpers
 # ------------------------------------------------------------------
@pytest.fixture()
 def azure_settings(settings: SettingsWrapper) -> SettingsWrapper:
    """Configure Django settings for a valid Azure AI OCR engine.
    Sets ``REMOTE_OCR_ENGINE``, ``REMOTE_OCR_API_KEY``, and
    ``REMOTE_OCR_ENDPOINT`` to test values.  Settings are restored
    automatically after the test by pytest-django.
    Returns
    -------
    SettingsWrapper
        The modified settings object (for chaining further overrides).
    """
    settings.REMOTE_OCR_ENGINE = "azureai"
    settings.REMOTE_OCR_API_KEY = "test-api-key"
    settings.REMOTE_OCR_ENDPOINT = "https://test.cognitiveservices.azure.com"
    return settings
@pytest.fixture()
 def no_engine_settings(settings: SettingsWrapper) -> SettingsWrapper:
    """Configure Django settings with no remote engine configured.
    Returns
    -------
    SettingsWrapper
        The modified settings object.
    """
    settings.REMOTE_OCR_ENGINE = None
    settings.REMOTE_OCR_API_KEY = None
    settings.REMOTE_OCR_ENDPOINT = None
    return settings
 # ------------------------------------------------------------------
 # Tika parser sample files
 # ------------------------------------------------------------------
@pytest.fixture(scope="session")
 def tika_samples_dir(samples_dir: Path) -> Path:
    """Absolute path to the Tika parser sample files directory.
    Returns
    -------
    Path
        ``<samples_dir>/tika/``
    """
    return samples_dir / "tika"
@pytest.fixture(scope="session")
 def sample_odt_file(tika_samples_dir: Path) -> Path:
    """Path to a sample ODT file.
    Returns
    -------
    Path
        Absolute path to ``tika/sample.odt``.
    """
    return tika_samples_dir / "sample.odt"
@pytest.fixture(scope="session")
 def sample_docx_file(tika_samples_dir: Path) -> Path:
    """Path to a sample DOCX file.
    Returns
    -------
    Path
        Absolute path to ``tika/sample.docx``.
    """
    return tika_samples_dir / "sample.docx"
@pytest.fixture(scope="session")
 def sample_doc_file(tika_samples_dir: Path) -> Path:
    """Path to a sample DOC file.
    Returns
    -------
    Path
        Absolute path to ``tika/sample.doc``.
    """
    return tika_samples_dir / "sample.doc"
@pytest.fixture(scope="session")
 def sample_broken_odt(tika_samples_dir: Path) -> Path:
    """Path to a broken ODT file that triggers the multi-part fallback.
    Returns
    -------
    Path
        Absolute path to ``tika/multi-part-broken.odt``.
    """
    return tika_samples_dir / "multi-part-broken.odt"
 # ------------------------------------------------------------------
 # Tika parser instance
 # ------------------------------------------------------------------
@pytest.fixture()
 def tika_parser() -> Generator[TikaDocumentParser, None, None]:
    """Yield a TikaDocumentParser and clean up its temporary directory afterwards.
    Yields
    ------
    TikaDocumentParser
        A ready-to-use parser instance.
    """
    with TikaDocumentParser() as parser:
        yield parser
 # ------------------------------------------------------------------
 # Mail parser sample files
 # ------------------------------------------------------------------
@pytest.fixture(scope="session")
 def mail_samples_dir(samples_dir: Path) -> Path:
    """Absolute path to the mail parser sample files directory.
    Returns
    -------
    Path
        ``<samples_dir>/mail/``
    """
    return samples_dir / "mail"
@pytest.fixture(scope="session")
 def broken_email_file(mail_samples_dir: Path) -> Path:
    """Path to a broken/malformed EML sample file.
    Returns
    -------
    Path
        Absolute path to ``mail/broken.eml``.
    """
    return mail_samples_dir / "broken.eml"
@pytest.fixture(scope="session")
 def simple_txt_email_file(mail_samples_dir: Path) -> Path:
    """Path to a plain-text email sample file.
    Returns
    -------
    Path
        Absolute path to ``mail/simple_text.eml``.
    """
    return mail_samples_dir / "simple_text.eml"
@pytest.fixture(scope="session")
 def simple_txt_email_pdf_file(mail_samples_dir: Path) -> Path:
    """Path to the expected PDF rendition of the plain-text email.
    Returns
    -------
    Path
        Absolute path to ``mail/simple_text.eml.pdf``.
    """
    return mail_samples_dir / "simple_text.eml.pdf"
@pytest.fixture(scope="session")
 def simple_txt_email_thumbnail_file(mail_samples_dir: Path) -> Path:
    """Path to the expected thumbnail for the plain-text email.
    Returns
    -------
    Path
        Absolute path to ``mail/simple_text.eml.pdf.webp``.
    """
    return mail_samples_dir / "simple_text.eml.pdf.webp"
@pytest.fixture(scope="session")
 def html_email_file(mail_samples_dir: Path) -> Path:
    """Path to an HTML email sample file.
    Returns
    -------
    Path
        Absolute path to ``mail/html.eml``.
    """
    return mail_samples_dir / "html.eml"
@pytest.fixture(scope="session")
 def html_email_pdf_file(mail_samples_dir: Path) -> Path:
    """Path to the expected PDF rendition of the HTML email.
    Returns
    -------
    Path
        Absolute path to ``mail/html.eml.pdf``.
    """
    return mail_samples_dir / "html.eml.pdf"
@pytest.fixture(scope="session")
 def html_email_thumbnail_file(mail_samples_dir: Path) -> Path:
    """Path to the expected thumbnail for the HTML email.
    Returns
    -------
    Path
        Absolute path to ``mail/html.eml.pdf.webp``.
    """
    return mail_samples_dir / "html.eml.pdf.webp"
@pytest.fixture(scope="session")
 def html_email_html_file(mail_samples_dir: Path) -> Path:
    """Path to the HTML body of the HTML email sample.
    Returns
    -------
    Path
        Absolute path to ``mail/html.eml.html``.
    """
    return mail_samples_dir / "html.eml.html"
@pytest.fixture(scope="session")
 def merged_pdf_first(mail_samples_dir: Path) -> Path:
    """Path to the first PDF used in PDF-merge tests.
    Returns
    -------
    Path
        Absolute path to ``mail/first.pdf``.
    """
    return mail_samples_dir / "first.pdf"
@pytest.fixture(scope="session")
 def merged_pdf_second(mail_samples_dir: Path) -> Path:
    """Path to the second PDF used in PDF-merge tests.
    Returns
    -------
    Path
        Absolute path to ``mail/second.pdf``.
    """
    return mail_samples_dir / "second.pdf"
 # ------------------------------------------------------------------
 # Mail parser instance
 # ------------------------------------------------------------------
@pytest.fixture()
 def mail_parser() -> Generator[MailDocumentParser, None, None]:
    """Yield a MailDocumentParser and clean up its temporary directory afterwards.
    Yields
    ------
    MailDocumentParser
        A ready-to-use parser instance.
    """
    with MailDocumentParser() as parser:
        yield parser
@pytest.fixture(scope="session")
 def nginx_base_url() -> Generator[str, None, None]:
    """
    The base URL for the nginx HTTP server we expect to be alive
    """
    yield "http://localhost:8080"
 # ------------------------------------------------------------------
 # Tesseract parser sample files
 # ------------------------------------------------------------------
@pytest.fixture(scope="session")
 def tesseract_samples_dir(samples_dir: Path) -> Path:
    """Absolute path to the tesseract parser sample files directory.
    Returns
    -------
    Path
        ``<samples_dir>/tesseract/``
    """
    return samples_dir / "tesseract"
@pytest.fixture(scope="session")
 def document_webp_file(tesseract_samples_dir: Path) -> Path:
    """Path to a WebP document sample file.
    Returns
    -------
    Path
        Absolute path to ``tesseract/document.webp``.
    """
    return tesseract_samples_dir / "document.webp"
@pytest.fixture(scope="session")
 def encrypted_pdf_file(tesseract_samples_dir: Path) -> Path:
    """Path to an encrypted PDF sample file.
    Returns
    -------
    Path
        Absolute path to ``tesseract/encrypted.pdf``.
    """
    return tesseract_samples_dir / "encrypted.pdf"
@pytest.fixture(scope="session")
 def multi_page_digital_pdf_file(tesseract_samples_dir: Path) -> Path:
    """Path to a multi-page digital PDF sample file.
    Returns
    -------
    Path
        Absolute path to ``tesseract/multi-page-digital.pdf``.
    """
    return tesseract_samples_dir / "multi-page-digital.pdf"
@pytest.fixture(scope="session")
 def multi_page_images_alpha_rgb_tiff_file(tesseract_samples_dir: Path) -> Path:
    """Path to a multi-page TIFF with alpha channel in RGB.
    Returns
    -------
    Path
        Absolute path to ``tesseract/multi-page-images-alpha-rgb.tiff``.
    """
    return tesseract_samples_dir / "multi-page-images-alpha-rgb.tiff"
@pytest.fixture(scope="session")
 def multi_page_images_alpha_tiff_file(tesseract_samples_dir: Path) -> Path:
    """Path to a multi-page TIFF with alpha channel.
    Returns
    -------
    Path
        Absolute path to ``tesseract/multi-page-images-alpha.tiff``.
    """
    return tesseract_samples_dir / "multi-page-images-alpha.tiff"
@pytest.fixture(scope="session")
 def multi_page_images_pdf_file(tesseract_samples_dir: Path) -> Path:
    """Path to a multi-page PDF with images.
    Returns
    -------
    Path
        Absolute path to ``tesseract/multi-page-images.pdf``.
    """
    return tesseract_samples_dir / "multi-page-images.pdf"
@pytest.fixture(scope="session")
 def multi_page_images_tiff_file(tesseract_samples_dir: Path) -> Path:
    """Path to a multi-page TIFF sample file.
    Returns
    -------
    Path
        Absolute path to ``tesseract/multi-page-images.tiff``.
    """
    return tesseract_samples_dir / "multi-page-images.tiff"
@pytest.fixture(scope="session")
 def multi_page_mixed_pdf_file(tesseract_samples_dir: Path) -> Path:
    """Path to a multi-page mixed PDF sample file.
    Returns
    -------
    Path
        Absolute path to ``tesseract/multi-page-mixed.pdf``.
    """
    return tesseract_samples_dir / "multi-page-mixed.pdf"
@pytest.fixture(scope="session")
 def no_text_alpha_png_file(tesseract_samples_dir: Path) -> Path:
    """Path to a PNG with alpha channel and no text.
    Returns
    -------
    Path
        Absolute path to ``tesseract/no-text-alpha.png``.
    """
    return tesseract_samples_dir / "no-text-alpha.png"
@pytest.fixture(scope="session")
 def rotated_pdf_file(tesseract_samples_dir: Path) -> Path:
    """Path to a rotated PDF sample file.
    Returns
    -------
    Path
        Absolute path to ``tesseract/rotated.pdf``.
    """
    return tesseract_samples_dir / "rotated.pdf"
@pytest.fixture(scope="session")
 def rtl_test_pdf_file(tesseract_samples_dir: Path) -> Path:
    """Path to an RTL test PDF sample file.
    Returns
    -------
    Path
        Absolute path to ``tesseract/rtl-test.pdf``.
    """
    return tesseract_samples_dir / "rtl-test.pdf"
@pytest.fixture(scope="session")
 def signed_pdf_file(tesseract_samples_dir: Path) -> Path:
    """Path to a signed PDF sample file.
    Returns
    -------
    Path
        Absolute path to ``tesseract/signed.pdf``.
    """
    return tesseract_samples_dir / "signed.pdf"
@pytest.fixture(scope="session")
 def simple_alpha_png_file(tesseract_samples_dir: Path) -> Path:
    """Path to a simple PNG with alpha channel.
    Returns
    -------
    Path
        Absolute path to ``tesseract/simple-alpha.png``.
    """
    return tesseract_samples_dir / "simple-alpha.png"
@pytest.fixture(scope="session")
 def simple_digital_pdf_file(tesseract_samples_dir: Path) -> Path:
    """Path to a simple digital PDF sample file.
    Returns
    -------
    Path
        Absolute path to ``tesseract/simple-digital.pdf``.
    """
    return tesseract_samples_dir / "simple-digital.pdf"
@pytest.fixture(scope="session")
 def simple_no_dpi_png_file(tesseract_samples_dir: Path) -> Path:
    """Path to a simple PNG without DPI information.
    Returns
    -------
    Path
        Absolute path to ``tesseract/simple-no-dpi.png``.
    """
    return tesseract_samples_dir / "simple-no-dpi.png"
@pytest.fixture(scope="session")
 def simple_bmp_file(tesseract_samples_dir: Path) -> Path:
    """Path to a simple BMP sample file.
    Returns
    -------
    Path
        Absolute path to ``tesseract/simple.bmp``.
    """
    return tesseract_samples_dir / "simple.bmp"
@pytest.fixture(scope="session")
 def simple_gif_file(tesseract_samples_dir: Path) -> Path:
    """Path to a simple GIF sample file.
    Returns
    -------
    Path
        Absolute path to ``tesseract/simple.gif``.
    """
    return tesseract_samples_dir / "simple.gif"
@pytest.fixture(scope="session")
 def simple_heic_file(tesseract_samples_dir: Path) -> Path:
    """Path to a simple HEIC sample file.
    Returns
    -------
    Path
        Absolute path to ``tesseract/simple.heic``.
    """
    return tesseract_samples_dir / "simple.heic"
@pytest.fixture(scope="session")
 def simple_jpg_file(tesseract_samples_dir: Path) -> Path:
    """Path to a simple JPG sample file.
    Returns
    -------
    Path
        Absolute path to ``tesseract/simple.jpg``.
    """
    return tesseract_samples_dir / "simple.jpg"
@pytest.fixture(scope="session")
 def simple_png_file(tesseract_samples_dir: Path) -> Path:
    """Path to a simple PNG sample file.
    Returns
    -------
    Path
        Absolute path to ``tesseract/simple.png``.
    """
    return tesseract_samples_dir / "simple.png"
@pytest.fixture(scope="session")
 def simple_tif_file(tesseract_samples_dir: Path) -> Path:
    """Path to a simple TIF sample file.
    Returns
    -------
    Path
        Absolute path to ``tesseract/simple.tif``.
    """
    return tesseract_samples_dir / "simple.tif"
@pytest.fixture(scope="session")
 def single_page_mixed_pdf_file(tesseract_samples_dir: Path) -> Path:
    """Path to a single-page mixed PDF sample file.
    Returns
    -------
    Path
        Absolute path to ``tesseract/single-page-mixed.pdf``.
    """
    return tesseract_samples_dir / "single-page-mixed.pdf"
@pytest.fixture(scope="session")
 def with_form_pdf_file(tesseract_samples_dir: Path) -> Path:
    """Path to a PDF with form sample file.
    Returns
    -------
    Path
        Absolute path to ``tesseract/with-form.pdf``.
    """
    return tesseract_samples_dir / "with-form.pdf"
 # ------------------------------------------------------------------
 # Tesseract parser instance and settings helpers
 # ------------------------------------------------------------------
@pytest.fixture()
 def null_app_config(mocker: MockerFixture) -> MagicMock:
    """Return a MagicMock with all OcrConfig fields set to None.
    This allows the parser to fall back to Django settings instead of
    hitting the database.
    Returns
    -------
    MagicMock
        Mock config with all fields as None
    """
    return mocker.MagicMock(
        output_type=None,
        pages=None,
        language=None,
        mode=None,
        skip_archive_file=None,
        image_dpi=None,
        unpaper_clean=None,
        deskew=None,
        rotate_pages=None,
        rotate_pages_threshold=None,
        max_image_pixels=None,
        color_conversion_strategy=None,
        user_args=None,
    )
@pytest.fixture()
 def tesseract_parser(
    mocker: MockerFixture,
    null_app_config: MagicMock,
 ) -> Generator[RasterisedDocumentParser, None, None]:
    """Yield a RasterisedDocumentParser and clean up its temporary directory afterwards.
    Patches the config system to avoid database access.
    Yields
    ------
    RasterisedDocumentParser
        A ready-to-use parser instance.
    """
    mocker.patch(
        "paperless.config.BaseConfig._get_config_instance",
        return_value=null_app_config,
    )
    with RasterisedDocumentParser() as parser:
        yield parser
@pytest.fixture()
 def make_tesseract_parser(
    mocker: MockerFixture,
    null_app_config: MagicMock,
 ) -> MakeTesseractParser:
    """Return a factory for creating RasterisedDocumentParser with Django settings overrides.
    This fixture is useful for tests that need to create parsers with different
    settings configurations.
    Returns
    -------
    Callable[..., contextmanager[RasterisedDocumentParser]]
        A context manager factory that accepts Django settings overrides
    """
    mocker.patch(
        "paperless.config.BaseConfig._get_config_instance",
        return_value=null_app_config,
    )
    @contextmanager
    def _make_parser(**django_settings_overrides):
        with override_settings(**django_settings_overrides):
            with RasterisedDocumentParser() as parser:
                yield parser
    return _make_parser
--- a/src/paperless/tests/parsers/test_mail_parser.py
+++ b/src/paperless/tests/parsers/test_mail_parser.py
@@ -12,7 +12,64 @@ from pytest_httpx import HTTPXMock
 from pytest_mock import MockerFixture
 from documents.parsers import ParseError
-from paperless_mail.parsers import MailDocumentParser
+from paperless.parsers import ParserContext
 from paperless.parsers import ParserProtocol
 from paperless.parsers.mail import MailDocumentParser
 class TestMailParserProtocol:
    """Verify that MailDocumentParser satisfies the ParserProtocol contract."""
    def test_isinstance_satisfies_protocol(
        self,
        mail_parser: MailDocumentParser,
    ) -> None:
        assert isinstance(mail_parser, ParserProtocol)
    def test_supported_mime_types(self) -> None:
        mime_types = MailDocumentParser.supported_mime_types()
        assert isinstance(mime_types, dict)
        assert "message/rfc822" in mime_types
    @pytest.mark.parametrize(
        ("mime_type", "expected"),
        [
            ("message/rfc822", 10),
            ("application/pdf", None),
            ("text/plain", None),
        ],
    )
    def test_score(self, mime_type: str, expected: int | None) -> None:
        assert MailDocumentParser.score(mime_type, "email.eml") == expected
    def test_can_produce_archive_is_false(
        self,
        mail_parser: MailDocumentParser,
    ) -> None:
        assert mail_parser.can_produce_archive is False
    def test_requires_pdf_rendition_is_true(
        self,
        mail_parser: MailDocumentParser,
    ) -> None:
        assert mail_parser.requires_pdf_rendition is True
    def test_get_page_count_returns_none_without_archive(
        self,
        mail_parser: MailDocumentParser,
        html_email_file: Path,
    ) -> None:
        assert mail_parser.get_page_count(html_email_file, "message/rfc822") is None
    def test_get_page_count_returns_int_with_pdf_archive(
        self,
        mail_parser: MailDocumentParser,
        simple_txt_email_pdf_file: Path,
    ) -> None:
        mail_parser._archive_path = simple_txt_email_pdf_file
        count = mail_parser.get_page_count(simple_txt_email_pdf_file, "message/rfc822")
        assert isinstance(count, int)
        assert count > 0
 class TestEmailFileParsing:
@@ -24,7 +81,7 @@ class TestEmailFileParsing:
    def test_parse_error_missing_file(
        self,
        mail_parser: MailDocumentParser,
-        sample_dir: Path,
+        mail_samples_dir: Path,
    ) -> None:
        """
        GIVEN:
@@ -35,7 +92,7 @@ class TestEmailFileParsing:
            - An Exception is thrown
        """
        # Check if exception is raised when parsing fails.
-        test_file = sample_dir / "doesntexist.eml"
+        test_file = mail_samples_dir / "doesntexist.eml"
        assert not test_file.exists()
@@ -246,12 +303,12 @@ class TestEmailThumbnailGenerate:
        """
        mocked_return = "Passing the return value through.."
        mock_make_thumbnail_from_pdf = mocker.patch(
-            "paperless_mail.parsers.make_thumbnail_from_pdf",
+            "paperless.parsers.mail.make_thumbnail_from_pdf",
        )
        mock_make_thumbnail_from_pdf.return_value = mocked_return
        mock_generate_pdf = mocker.patch(
-            "paperless_mail.parsers.MailDocumentParser.generate_pdf",
+            "paperless.parsers.mail.MailDocumentParser.generate_pdf",
        )
        mock_generate_pdf.return_value = "Mocked return value.."
@@ -260,8 +317,7 @@ class TestEmailThumbnailGenerate:
        mock_generate_pdf.assert_called_once()
        mock_make_thumbnail_from_pdf.assert_called_once_with(
            "Mocked return value..",
-            mail_parser.tempdir,
+            mail_parser._tempdir,
            None,
        )
        assert mocked_return == thumb
@@ -373,7 +429,7 @@ class TestParser:
        """
        # Validate parsing returns the expected results
        mock_generate_pdf = mocker.patch(
-            "paperless_mail.parsers.MailDocumentParser.generate_pdf",
+            "paperless.parsers.mail.MailDocumentParser.generate_pdf",
        )
        mail_parser.parse(simple_txt_email_file, "message/rfc822")
@@ -385,7 +441,7 @@ class TestParser:
            "BCC: fdf@fvf.de\n\n"
            "\n\nThis is just a simple Text Mail."
        )
-        assert text_expected == mail_parser.text
+        assert text_expected == mail_parser.get_text()
        assert (
            datetime.datetime(
                2022,
@@ -396,7 +452,7 @@ class TestParser:
                43,
                tzinfo=datetime.timezone(datetime.timedelta(seconds=7200)),
            )
-            == mail_parser.date
+            == mail_parser.get_date()
        )
        # Just check if tried to generate archive, the unittest for generate_pdf() goes deeper.
@@ -419,7 +475,7 @@ class TestParser:
        """
        mock_generate_pdf = mocker.patch(
-            "paperless_mail.parsers.MailDocumentParser.generate_pdf",
+            "paperless.parsers.mail.MailDocumentParser.generate_pdf",
        )
        # Validate parsing returns the expected results
@@ -443,7 +499,7 @@ class TestParser:
        mail_parser.parse(html_email_file, "message/rfc822")
        mock_generate_pdf.assert_called_once()
-        assert text_expected == mail_parser.text
+        assert text_expected == mail_parser.get_text()
        assert (
            datetime.datetime(
                2022,
@@ -454,7 +510,7 @@ class TestParser:
                19,
                tzinfo=datetime.timezone(datetime.timedelta(seconds=7200)),
            )
-            == mail_parser.date
+            == mail_parser.get_date()
        )
    def test_generate_pdf_parse_error(
@@ -501,7 +557,7 @@ class TestParser:
        mail_parser.parse(simple_txt_email_file, "message/rfc822")
-        assert mail_parser.archive_path is not None
+        assert mail_parser.get_archive_path() is not None
    @pytest.mark.httpx_mock(can_send_already_matched_responses=True)
    def test_generate_pdf_html_email(
@@ -542,7 +598,7 @@ class TestParser:
        )
        mail_parser.parse(html_email_file, "message/rfc822")
-        assert mail_parser.archive_path is not None
+        assert mail_parser.get_archive_path() is not None
    def test_generate_pdf_html_email_html_to_pdf_failure(
        self,
@@ -712,10 +768,10 @@ class TestParser:
        def test_layout_option(layout_option, expected_calls, expected_pdf_names):
            mock_mailrule_get.return_value = mock.Mock(pdf_layout=layout_option)
            mail_parser.configure(ParserContext(mailrule_id=1))
            mail_parser.parse(
                document_path=html_email_file,
                mime_type="message/rfc822",
                mailrule_id=1,
            )
            args, _ = mock_merge_route.call_args
            assert len(args[0]) == expected_calls
--- a/src/paperless/tests/parsers/test_mail_parser_live.py
+++ b/src/paperless/tests/parsers/test_mail_parser_live.py
@@ -11,7 +11,7 @@ from PIL import Image
 from pytest_mock import MockerFixture
 from documents.tests.utils import util_call_with_backoff
-from paperless_mail.parsers import MailDocumentParser
+from paperless.parsers.mail import MailDocumentParser
 def extract_text(pdf_path: Path) -> str:
@@ -159,7 +159,7 @@ class TestParserLive:
            - The returned thumbnail image file shall match the expected hash
        """
        mock_generate_pdf = mocker.patch(
-            "paperless_mail.parsers.MailDocumentParser.generate_pdf",
+            "paperless.parsers.mail.MailDocumentParser.generate_pdf",
        )
        mock_generate_pdf.return_value = simple_txt_email_pdf_file
@@ -216,10 +216,10 @@ class TestParserLive:
            - The merged PDF shall contain text from both source PDFs
        """
        mock_generate_pdf_from_html = mocker.patch(
-            "paperless_mail.parsers.MailDocumentParser.generate_pdf_from_html",
+            "paperless.parsers.mail.MailDocumentParser.generate_pdf_from_html",
        )
        mock_generate_pdf_from_mail = mocker.patch(
-            "paperless_mail.parsers.MailDocumentParser.generate_pdf_from_mail",
+            "paperless.parsers.mail.MailDocumentParser.generate_pdf_from_mail",
        )
        mock_generate_pdf_from_mail.return_value = merged_pdf_first
        mock_generate_pdf_from_html.return_value = merged_pdf_second
--- a/src/paperless/tests/parsers/test_remote_parser.py
+++ b/src/paperless/tests/parsers/test_remote_parser.py
@@ -0,0 +1,503 @@
 """
 Tests for paperless.parsers.remote.RemoteDocumentParser.
 All tests use the context-manager protocol for parser lifecycle.
 Fixture layout
 --------------
 make_azure_mock  — factory (defined here; specific to this module)
 azure_client     — composes azure_settings + make_azure_mock + patch;
                   use when a test needs the client to succeed
 failing_azure_client
                 — composes azure_settings + patch with RuntimeError;
                   use when a test needs the client to fail
 """
 from __future__ import annotations
 from typing import TYPE_CHECKING
 from unittest.mock import Mock
 import pytest
 from paperless.parsers import ParserContext
 from paperless.parsers import ParserProtocol
 from paperless.parsers.remote import RemoteDocumentParser
 if TYPE_CHECKING:
    from collections.abc import Callable
    from pathlib import Path
    from pytest_django.fixtures import SettingsWrapper
    from pytest_mock import MockerFixture
 # ---------------------------------------------------------------------------
 # Module-local fixtures
 # ---------------------------------------------------------------------------
 _AZURE_CLIENT_TARGET = "azure.ai.documentintelligence.DocumentIntelligenceClient"
 _DEFAULT_TEXT = "Extracted text."
@pytest.fixture()
 def make_azure_mock() -> Callable[[str], Mock]:
    """Return a factory that builds a mock Azure DocumentIntelligenceClient.
    Usage::
        mock_client = make_azure_mock()            # default extracted text
        mock_client = make_azure_mock("My text.")  # custom extracted text
    """
    def _factory(text: str = _DEFAULT_TEXT) -> Mock:
        mock_client = Mock()
        mock_poller = Mock()
        mock_poller.wait.return_value = None
        mock_poller.details = {"operation_id": "fake-op-id"}
        mock_poller.result.return_value.content = text
        mock_client.begin_analyze_document.return_value = mock_poller
        mock_client.get_analyze_result_pdf.return_value = [b"%PDF-1.4 FAKE"]
        return mock_client
    return _factory
@pytest.fixture()
 def azure_client(
    azure_settings: SettingsWrapper,
    make_azure_mock: Callable[[str], Mock],
    mocker: MockerFixture,
 ) -> Mock:
    """Patch the Azure DI client with a succeeding mock and return the instance.
    Implicitly applies ``azure_settings`` so tests using this fixture do not
    also need ``@pytest.mark.usefixtures("azure_settings")``.
    """
    mock_client = make_azure_mock()
    mocker.patch(_AZURE_CLIENT_TARGET, return_value=mock_client)
    return mock_client
@pytest.fixture()
 def failing_azure_client(
    azure_settings: SettingsWrapper,
    mocker: MockerFixture,
 ) -> Mock:
    """Patch the Azure DI client to raise RuntimeError on every call.
    Implicitly applies ``azure_settings``.  Returns the mock instance so
    tests can assert on calls such as ``close()``.
    """
    mock_client = Mock()
    mock_client.begin_analyze_document.side_effect = RuntimeError("network failure")
    mocker.patch(_AZURE_CLIENT_TARGET, return_value=mock_client)
    return mock_client
 # ---------------------------------------------------------------------------
 # Protocol contract
 # ---------------------------------------------------------------------------
 class TestRemoteParserProtocol:
    """Verify that RemoteDocumentParser satisfies the ParserProtocol contract."""
    def test_isinstance_satisfies_protocol(
        self,
        remote_parser: RemoteDocumentParser,
    ) -> None:
        assert isinstance(remote_parser, ParserProtocol)
    def test_class_attributes_present(self) -> None:
        assert isinstance(RemoteDocumentParser.name, str) and RemoteDocumentParser.name
        assert (
            isinstance(RemoteDocumentParser.version, str)
            and RemoteDocumentParser.version
        )
        assert (
            isinstance(RemoteDocumentParser.author, str) and RemoteDocumentParser.author
        )
        assert isinstance(RemoteDocumentParser.url, str) and RemoteDocumentParser.url
 # ---------------------------------------------------------------------------
 # supported_mime_types
 # ---------------------------------------------------------------------------
 class TestRemoteParserSupportedMimeTypes:
    """supported_mime_types() always returns the full set regardless of config."""
    def test_returns_dict(self) -> None:
        mime_types = RemoteDocumentParser.supported_mime_types()
        assert isinstance(mime_types, dict)
    def test_includes_all_expected_types(self) -> None:
        mime_types = RemoteDocumentParser.supported_mime_types()
        expected = {
            "application/pdf",
            "image/png",
            "image/jpeg",
            "image/tiff",
            "image/bmp",
            "image/gif",
            "image/webp",
        }
        assert expected == set(mime_types.keys())
    @pytest.mark.usefixtures("no_engine_settings")
    def test_returns_full_set_when_not_configured(self) -> None:
        """
        GIVEN: No remote engine is configured
        WHEN:  supported_mime_types() is called
        THEN:  The full MIME type dict is still returned (score() handles activation)
        """
        mime_types = RemoteDocumentParser.supported_mime_types()
        assert len(mime_types) == 7
 # ---------------------------------------------------------------------------
 # score()
 # ---------------------------------------------------------------------------
 class TestRemoteParserScore:
    """score() encodes the activation logic: None when unconfigured, 20 when active."""
    @pytest.mark.usefixtures("azure_settings")
    @pytest.mark.parametrize(
        "mime_type",
        [
            pytest.param("application/pdf", id="pdf"),
            pytest.param("image/png", id="png"),
            pytest.param("image/jpeg", id="jpeg"),
            pytest.param("image/tiff", id="tiff"),
            pytest.param("image/bmp", id="bmp"),
            pytest.param("image/gif", id="gif"),
            pytest.param("image/webp", id="webp"),
        ],
    )
    def test_score_returns_20_when_configured(self, mime_type: str) -> None:
        result = RemoteDocumentParser.score(mime_type, "doc.pdf")
        assert result == 20
    @pytest.mark.usefixtures("no_engine_settings")
    @pytest.mark.parametrize(
        "mime_type",
        [
            pytest.param("application/pdf", id="pdf"),
            pytest.param("image/png", id="png"),
            pytest.param("image/jpeg", id="jpeg"),
        ],
    )
    def test_score_returns_none_when_no_engine(self, mime_type: str) -> None:
        result = RemoteDocumentParser.score(mime_type, "doc.pdf")
        assert result is None
    def test_score_returns_none_when_api_key_missing(
        self,
        settings: SettingsWrapper,
    ) -> None:
        settings.REMOTE_OCR_ENGINE = "azureai"
        settings.REMOTE_OCR_API_KEY = None
        settings.REMOTE_OCR_ENDPOINT = "https://test.cognitiveservices.azure.com"
        result = RemoteDocumentParser.score("application/pdf", "doc.pdf")
        assert result is None
    def test_score_returns_none_when_endpoint_missing(
        self,
        settings: SettingsWrapper,
    ) -> None:
        settings.REMOTE_OCR_ENGINE = "azureai"
        settings.REMOTE_OCR_API_KEY = "key"
        settings.REMOTE_OCR_ENDPOINT = None
        result = RemoteDocumentParser.score("application/pdf", "doc.pdf")
        assert result is None
    @pytest.mark.usefixtures("azure_settings")
    def test_score_returns_none_for_unsupported_mime_type(self) -> None:
        result = RemoteDocumentParser.score("text/plain", "doc.txt")
        assert result is None
    @pytest.mark.usefixtures("azure_settings")
    def test_score_higher_than_tesseract_default(self) -> None:
        """Remote parser (20) outranks the tesseract default (10) when configured."""
        score = RemoteDocumentParser.score("application/pdf", "doc.pdf")
        assert score is not None and score > 10
 # ---------------------------------------------------------------------------
 # Properties
 # ---------------------------------------------------------------------------
 class TestRemoteParserProperties:
    def test_can_produce_archive_is_true(
        self,
        remote_parser: RemoteDocumentParser,
    ) -> None:
        assert remote_parser.can_produce_archive is True
    def test_requires_pdf_rendition_is_false(
        self,
        remote_parser: RemoteDocumentParser,
    ) -> None:
        assert remote_parser.requires_pdf_rendition is False
 # ---------------------------------------------------------------------------
 # Lifecycle
 # ---------------------------------------------------------------------------
 class TestRemoteParserLifecycle:
    def test_context_manager_cleans_up_tempdir(self) -> None:
        with RemoteDocumentParser() as parser:
            tempdir = parser._tempdir
            assert tempdir.exists()
        assert not tempdir.exists()
    def test_context_manager_cleans_up_after_exception(self) -> None:
        tempdir: Path | None = None
        with pytest.raises(RuntimeError):
            with RemoteDocumentParser() as parser:
                tempdir = parser._tempdir
                raise RuntimeError("boom")
        assert tempdir is not None
        assert not tempdir.exists()
 # ---------------------------------------------------------------------------
 # parse() — happy path
 # ---------------------------------------------------------------------------
 class TestRemoteParserParse:
    def test_parse_returns_text_from_azure(
        self,
        remote_parser: RemoteDocumentParser,
        simple_digital_pdf_file: Path,
        azure_client: Mock,
    ) -> None:
        remote_parser.parse(simple_digital_pdf_file, "application/pdf")
        assert remote_parser.get_text() == _DEFAULT_TEXT
    def test_parse_sets_archive_path(
        self,
        remote_parser: RemoteDocumentParser,
        simple_digital_pdf_file: Path,
        azure_client: Mock,
    ) -> None:
        remote_parser.parse(simple_digital_pdf_file, "application/pdf")
        archive = remote_parser.get_archive_path()
        assert archive is not None
        assert archive.exists()
        assert archive.suffix == ".pdf"
    def test_parse_closes_client_on_success(
        self,
        remote_parser: RemoteDocumentParser,
        simple_digital_pdf_file: Path,
        azure_client: Mock,
    ) -> None:
        remote_parser.configure(ParserContext())
        remote_parser.parse(simple_digital_pdf_file, "application/pdf")
        azure_client.close.assert_called_once()
    @pytest.mark.usefixtures("no_engine_settings")
    def test_parse_sets_empty_text_when_not_configured(
        self,
        remote_parser: RemoteDocumentParser,
        simple_digital_pdf_file: Path,
    ) -> None:
        remote_parser.parse(simple_digital_pdf_file, "application/pdf")
        assert remote_parser.get_text() == ""
        assert remote_parser.get_archive_path() is None
    def test_get_text_none_before_parse(
        self,
        remote_parser: RemoteDocumentParser,
    ) -> None:
        assert remote_parser.get_text() is None
    def test_get_date_always_none(
        self,
        remote_parser: RemoteDocumentParser,
        simple_digital_pdf_file: Path,
        azure_client: Mock,
    ) -> None:
        remote_parser.parse(simple_digital_pdf_file, "application/pdf")
        assert remote_parser.get_date() is None
 # ---------------------------------------------------------------------------
 # parse() — Azure failure path
 # ---------------------------------------------------------------------------
 class TestRemoteParserParseError:
    def test_parse_returns_none_on_azure_error(
        self,
        remote_parser: RemoteDocumentParser,
        simple_digital_pdf_file: Path,
        failing_azure_client: Mock,
    ) -> None:
        remote_parser.parse(simple_digital_pdf_file, "application/pdf")
        assert remote_parser.get_text() is None
    def test_parse_closes_client_on_error(
        self,
        remote_parser: RemoteDocumentParser,
        simple_digital_pdf_file: Path,
        failing_azure_client: Mock,
    ) -> None:
        remote_parser.parse(simple_digital_pdf_file, "application/pdf")
        failing_azure_client.close.assert_called_once()
    def test_parse_logs_error_on_azure_failure(
        self,
        remote_parser: RemoteDocumentParser,
        simple_digital_pdf_file: Path,
        failing_azure_client: Mock,
        mocker: MockerFixture,
    ) -> None:
        mock_log = mocker.patch("paperless.parsers.remote.logger")
        remote_parser.parse(simple_digital_pdf_file, "application/pdf")
        mock_log.error.assert_called_once()
        assert "Azure AI Vision parsing failed" in mock_log.error.call_args[0][0]
 # ---------------------------------------------------------------------------
 # get_page_count()
 # ---------------------------------------------------------------------------
 class TestRemoteParserPageCount:
    def test_page_count_for_pdf(
        self,
        remote_parser: RemoteDocumentParser,
        simple_digital_pdf_file: Path,
    ) -> None:
        count = remote_parser.get_page_count(simple_digital_pdf_file, "application/pdf")
        assert isinstance(count, int)
        assert count >= 1
    def test_page_count_returns_none_for_image_mime(
        self,
        remote_parser: RemoteDocumentParser,
        simple_digital_pdf_file: Path,
    ) -> None:
        count = remote_parser.get_page_count(simple_digital_pdf_file, "image/png")
        assert count is None
    def test_page_count_returns_none_for_invalid_pdf(
        self,
        remote_parser: RemoteDocumentParser,
        tmp_path: Path,
    ) -> None:
        bad_pdf = tmp_path / "bad.pdf"
        bad_pdf.write_bytes(b"not a pdf at all")
        count = remote_parser.get_page_count(bad_pdf, "application/pdf")
        assert count is None
 # ---------------------------------------------------------------------------
 # extract_metadata()
 # ---------------------------------------------------------------------------
 class TestRemoteParserMetadata:
    def test_extract_metadata_non_pdf_returns_empty(
        self,
        remote_parser: RemoteDocumentParser,
        simple_digital_pdf_file: Path,
    ) -> None:
        result = remote_parser.extract_metadata(simple_digital_pdf_file, "image/png")
        assert result == []
    def test_extract_metadata_pdf_returns_list(
        self,
        remote_parser: RemoteDocumentParser,
        simple_digital_pdf_file: Path,
    ) -> None:
        result = remote_parser.extract_metadata(
            simple_digital_pdf_file,
            "application/pdf",
        )
        assert isinstance(result, list)
    def test_extract_metadata_pdf_entries_have_required_keys(
        self,
        remote_parser: RemoteDocumentParser,
        simple_digital_pdf_file: Path,
    ) -> None:
        result = remote_parser.extract_metadata(
            simple_digital_pdf_file,
            "application/pdf",
        )
        for entry in result:
            assert "namespace" in entry
            assert "prefix" in entry
            assert "key" in entry
            assert "value" in entry
            assert isinstance(entry["value"], str)
    def test_extract_metadata_does_not_raise_on_invalid_pdf(
        self,
        remote_parser: RemoteDocumentParser,
        tmp_path: Path,
    ) -> None:
        bad_pdf = tmp_path / "bad.pdf"
        bad_pdf.write_bytes(b"not a pdf at all")
        result = remote_parser.extract_metadata(bad_pdf, "application/pdf")
        assert result == []
 # ---------------------------------------------------------------------------
 # Registry integration
 # ---------------------------------------------------------------------------
 class TestRemoteParserRegistry:
    def test_registered_in_defaults(self) -> None:
        from paperless.parsers.registry import ParserRegistry
        registry = ParserRegistry()
        registry.register_defaults()
        assert RemoteDocumentParser in registry._builtins
    @pytest.mark.usefixtures("azure_settings")
    def test_get_parser_returns_remote_when_configured(self) -> None:
        from paperless.parsers.registry import get_parser_registry
        registry = get_parser_registry()
        parser_cls = registry.get_parser_for_file("application/pdf", "doc.pdf")
        assert parser_cls is RemoteDocumentParser
    @pytest.mark.usefixtures("no_engine_settings")
    def test_get_parser_returns_none_for_unsupported_type_when_not_configured(
        self,
    ) -> None:
        """With remote off and a truly unsupported MIME type, registry returns None."""
        from paperless.parsers.registry import ParserRegistry
        registry = ParserRegistry()
        registry.register_defaults()
        parser_cls = registry.get_parser_for_file(
            "application/x-unknown-format",
            "doc.xyz",
        )
        assert parser_cls is None
--- a/src/paperless/tests/parsers/test_tesseract_custom_settings.py
+++ b/src/paperless/tests/parsers/test_tesseract_custom_settings.py
@@ -10,7 +10,7 @@ from paperless.models import CleanChoices
 from paperless.models import ColorConvertChoices
 from paperless.models import ModeChoices
 from paperless.models import OutputTypeChoices
-from paperless_tesseract.parsers import RasterisedDocumentParser
+from paperless.parsers.tesseract import RasterisedDocumentParser
 class TestParserSettingsFromDb(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
--- a/src/paperless/tests/parsers/test_tesseract_parser.py
+++ b/src/paperless/tests/parsers/test_tesseract_parser.py
--- a/src/paperless/tests/parsers/test_text_parser.py
+++ b/src/paperless/tests/parsers/test_text_parser.py
@@ -12,6 +12,7 @@ from pathlib import Path
 import pytest
 from paperless.parsers import ParserContext
 from paperless.parsers import ParserProtocol
 from paperless.parsers.text import TextDocumentParser
@@ -93,6 +94,7 @@ class TestTextParserParse:
        text_parser: TextDocumentParser,
        sample_txt_file: Path,
    ) -> None:
        text_parser.configure(ParserContext())
        text_parser.parse(sample_txt_file, "text/plain")
        assert text_parser.get_text() == "This is a test file.\n"
@@ -102,6 +104,7 @@ class TestTextParserParse:
        text_parser: TextDocumentParser,
        sample_txt_file: Path,
    ) -> None:
        text_parser.configure(ParserContext())
        text_parser.parse(sample_txt_file, "text/plain")
        assert text_parser.get_archive_path() is None
@@ -111,6 +114,7 @@ class TestTextParserParse:
        text_parser: TextDocumentParser,
        sample_txt_file: Path,
    ) -> None:
        text_parser.configure(ParserContext())
        text_parser.parse(sample_txt_file, "text/plain")
        assert text_parser.get_date() is None
@@ -129,6 +133,7 @@ class TestTextParserParse:
            - Parsing succeeds
            - Invalid bytes are replaced with the Unicode replacement character
        """
        text_parser.configure(ParserContext())
        text_parser.parse(malformed_txt_file, "text/plain")
        assert text_parser.get_text() == "Pantothens\ufffdure\n"
@@ -251,6 +256,9 @@ class TestTextParserRegistry:
        from paperless.parsers.registry import get_parser_registry
        registry = get_parser_registry()
-        parser_cls = registry.get_parser_for_file("application/pdf", "doc.pdf")
+        parser_cls = registry.get_parser_for_file(
            "application/x-unknown-format",
            "doc.xyz",
        )
        assert parser_cls is None
--- a/src/paperless/tests/parsers/test_tika_liva.py
+++ b/src/paperless/tests/parsers/test_tika_liva.py
@@ -4,7 +4,7 @@ from pathlib import Path
 import pytest
 from documents.tests.utils import util_call_with_backoff
-from paperless_tika.parsers import TikaDocumentParser
+from paperless.parsers.tika import TikaDocumentParser
@pytest.mark.skipif(
@@ -42,14 +42,15 @@ class TestTikaParserAgainstServer:
        )
        assert (
-            tika_parser.text
+            tika_parser.get_text()
            == "This is an ODT test document, created September 14, 2022"
        )
-        assert tika_parser.archive_path is not None
+        archive = tika_parser.get_archive_path()
-        assert b"PDF-" in tika_parser.archive_path.read_bytes()[:10]
+        assert archive is not None
        assert b"PDF-" in archive.read_bytes()[:10]
        # TODO: Unsure what can set the Creation-Date field in a document, enable when possible
-        # self.assertEqual(tika_parser.date, datetime.datetime(2022, 9, 14))
+        # self.assertEqual(tika_parser.get_date(), datetime.datetime(2022, 9, 14))
    def test_basic_parse_docx(
        self,
@@ -74,14 +75,15 @@ class TestTikaParserAgainstServer:
        )
        assert (
-            tika_parser.text
+            tika_parser.get_text()
            == "This is an DOCX test document, also made September 14, 2022"
        )
-        assert tika_parser.archive_path is not None
+        archive = tika_parser.get_archive_path()
-        with Path(tika_parser.archive_path).open("rb") as f:
+        assert archive is not None
        with archive.open("rb") as f:
            assert b"PDF-" in f.read()[:10]
-        # self.assertEqual(tika_parser.date, datetime.datetime(2022, 9, 14))
+        # self.assertEqual(tika_parser.get_date(), datetime.datetime(2022, 9, 14))
    def test_basic_parse_doc(
        self,
@@ -102,13 +104,12 @@ class TestTikaParserAgainstServer:
            [sample_doc_file, "application/msword"],
        )
-        assert tika_parser.text is not None
+        text = tika_parser.get_text()
-        assert (
+        assert text is not None
-            "This is a test document, saved in the older .doc format"
+        assert "This is a test document, saved in the older .doc format" in text
-            in tika_parser.text
+        archive = tika_parser.get_archive_path()
-        )
+        assert archive is not None
-        assert tika_parser.archive_path is not None
+        with archive.open("rb") as f:
        with Path(tika_parser.archive_path).open("rb") as f:
            assert b"PDF-" in f.read()[:10]
    def test_tika_fails_multi_part(
@@ -133,6 +134,7 @@ class TestTikaParserAgainstServer:
            [sample_broken_odt, "application/vnd.oasis.opendocument.text"],
        )
-        assert tika_parser.archive_path is not None
+        archive = tika_parser.get_archive_path()
-        with Path(tika_parser.archive_path).open("rb") as f:
+        assert archive is not None
        with archive.open("rb") as f:
            assert b"PDF-" in f.read()[:10]
--- a/src/paperless/tests/parsers/test_tika_parser.py
+++ b/src/paperless/tests/parsers/test_tika_parser.py
@@ -9,7 +9,80 @@ from pytest_django.fixtures import SettingsWrapper
 from pytest_httpx import HTTPXMock
 from documents.parsers import ParseError
-from paperless_tika.parsers import TikaDocumentParser
+from paperless.parsers import ParserContext
 from paperless.parsers import ParserProtocol
 from paperless.parsers.tika import TikaDocumentParser
 class TestTikaParserRegistryInterface:
    """Verify that TikaDocumentParser satisfies the ParserProtocol contract."""
    def test_satisfies_parser_protocol(self) -> None:
        assert isinstance(TikaDocumentParser(), ParserProtocol)
    def test_supported_mime_types_is_classmethod(self) -> None:
        mime_types = TikaDocumentParser.supported_mime_types()
        assert isinstance(mime_types, dict)
        assert len(mime_types) > 0
    def test_score_returns_none_when_tika_disabled(
        self,
        settings: SettingsWrapper,
    ) -> None:
        settings.TIKA_ENABLED = False
        result = TikaDocumentParser.score(
            "application/vnd.oasis.opendocument.text",
            "sample.odt",
        )
        assert result is None
    def test_score_returns_int_when_tika_enabled(
        self,
        settings: SettingsWrapper,
    ) -> None:
        settings.TIKA_ENABLED = True
        result = TikaDocumentParser.score(
            "application/vnd.oasis.opendocument.text",
            "sample.odt",
        )
        assert isinstance(result, int)
    def test_score_returns_none_for_unsupported_mime(
        self,
        settings: SettingsWrapper,
    ) -> None:
        settings.TIKA_ENABLED = True
        result = TikaDocumentParser.score("application/pdf", "doc.pdf")
        assert result is None
    def test_can_produce_archive_is_false(self) -> None:
        assert TikaDocumentParser().can_produce_archive is False
    def test_requires_pdf_rendition_is_true(self) -> None:
        assert TikaDocumentParser().requires_pdf_rendition is True
    def test_get_page_count_returns_none_without_archive(
        self,
        tika_parser: TikaDocumentParser,
        sample_odt_file: Path,
    ) -> None:
        assert (
            tika_parser.get_page_count(
                sample_odt_file,
                "application/vnd.oasis.opendocument.text",
            )
            is None
        )
    def test_get_page_count_returns_int_with_pdf_archive(
        self,
        tika_parser: TikaDocumentParser,
        simple_digital_pdf_file: Path,
    ) -> None:
        tika_parser._archive_path = simple_digital_pdf_file
        count = tika_parser.get_page_count(simple_digital_pdf_file, "application/pdf")
        assert isinstance(count, int)
        assert count > 0
@pytest.mark.django_db()
@@ -34,14 +107,15 @@ class TestTikaParser:
        # Pretend convert to PDF response
        httpx_mock.add_response(content=b"PDF document")
        tika_parser.configure(ParserContext())
        tika_parser.parse(sample_odt_file, "application/vnd.oasis.opendocument.text")
-        assert tika_parser.text == "the content"
+        assert tika_parser.get_text() == "the content"
-        assert tika_parser.archive_path is not None
+        assert tika_parser.get_archive_path() is not None
-        with Path(tika_parser.archive_path).open("rb") as f:
+        with Path(tika_parser.get_archive_path()).open("rb") as f:
            assert f.read() == b"PDF document"
-        assert tika_parser.date == datetime.datetime(
+        assert tika_parser.get_date() == datetime.datetime(
            2020,
            11,
            21,
@@ -89,7 +163,7 @@ class TestTikaParser:
        httpx_mock.add_response(status_code=HTTPStatus.INTERNAL_SERVER_ERROR)
        with pytest.raises(ParseError):
-            tika_parser.convert_to_pdf(sample_odt_file, None)
+            tika_parser._convert_to_pdf(sample_odt_file)
    @pytest.mark.parametrize(
        ("setting_value", "expected_form_value"),
@@ -106,7 +180,6 @@ class TestTikaParser:
        expected_form_value: str,
        httpx_mock: HTTPXMock,
        settings: SettingsWrapper,
        tika_parser: TikaDocumentParser,
        sample_odt_file: Path,
    ) -> None:
        """
@@ -117,6 +190,8 @@ class TestTikaParser:
        THEN:
            - Request to Gotenberg contains the expected PDF/A format string
        """
        # Parser must be created after the setting is changed so that
        # OutputTypeConfig reads the correct value at __init__ time.
        settings.OCR_OUTPUT_TYPE = setting_value
        httpx_mock.add_response(
            status_code=codes.OK,
@@ -124,7 +199,8 @@ class TestTikaParser:
            method="POST",
        )
-        tika_parser.convert_to_pdf(sample_odt_file, None)
+        with TikaDocumentParser() as parser:
            parser._convert_to_pdf(sample_odt_file)
        request = httpx_mock.get_request()
--- a/src/paperless/tests/samples/mail/broken.eml
+++ b/src/paperless/tests/samples/mail/broken.eml
--- a/src/paperless/tests/samples/mail/first.pdf
+++ b/src/paperless/tests/samples/mail/first.pdf
--- a/src/paperless/tests/samples/mail/html.eml
+++ b/src/paperless/tests/samples/mail/html.eml
--- a/src/paperless/tests/samples/mail/html.eml.html
+++ b/src/paperless/tests/samples/mail/html.eml.html
--- a/src/paperless/tests/samples/mail/html.eml.pdf
+++ b/src/paperless/tests/samples/mail/html.eml.pdf
--- a/src/paperless/tests/samples/mail/html.eml.pdf.webp
+++ b/src/paperless/tests/samples/mail/html.eml.pdf.webp
--- a/src/paperless/tests/samples/mail/sample.html
+++ b/src/paperless/tests/samples/mail/sample.html
--- a/src/paperless/tests/samples/mail/sample.html.pdf
+++ b/src/paperless/tests/samples/mail/sample.html.pdf
--- a/src/paperless/tests/samples/mail/sample.html.pdf.webp
+++ b/src/paperless/tests/samples/mail/sample.html.pdf.webp
--- a/src/paperless/tests/samples/mail/sample.png
+++ b/src/paperless/tests/samples/mail/sample.png
--- a/src/paperless/tests/samples/mail/second.pdf
+++ b/src/paperless/tests/samples/mail/second.pdf
--- a/src/paperless/tests/samples/mail/simple_text.eml
+++ b/src/paperless/tests/samples/mail/simple_text.eml
--- a/src/paperless/tests/samples/mail/simple_text.eml.pdf
+++ b/src/paperless/tests/samples/mail/simple_text.eml.pdf
--- a/src/paperless/tests/samples/mail/simple_text.eml.pdf.webp
+++ b/src/paperless/tests/samples/mail/simple_text.eml.pdf.webp
--- a/src/paperless/tests/samples/tesseract/document.webp
+++ b/src/paperless/tests/samples/tesseract/document.webp
--- a/src/paperless/tests/samples/tesseract/encrypted.pdf
+++ b/src/paperless/tests/samples/tesseract/encrypted.pdf
--- a/Show More
+++ b/Show More