Auto translate strings

Merge branch 'main' into dev
# Conflicts: # docs/setup.md # src-ui/src/main.ts # src/documents/tests/test_api_bulk_edit.py # src/documents/tests/test_api_custom_fields.py # src/documents/tests/test_api_search.py # src/documents/tests/test_api_status.py # src/documents/tests/test_workflows.py # src/paperless_mail/tests/test_api.py
2026-03-21 16:32:45 +00:00 · 2026-03-21 09:26:23 +00:00 · 2026-03-21 02:12:19 -07:00 · 2026-03-21 01:50:04 -07:00 · 2026-03-21 01:49:32 -07:00 · 2026-03-21 01:24:23 -07:00
173 changed files with 16396 additions and 9730 deletions
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -12,6 +12,8 @@ updates:
    open-pull-requests-limit: 10
    schedule:
      interval: "monthly"
    cooldown:
      default-days: 7
    labels:
      - "frontend"
      - "dependencies"
@@ -36,7 +38,9 @@ updates:
    directory: "/"
    # Check for updates once a week
    schedule:
-      interval: "weekly"
+      interval: "monthly"
    cooldown:
      default-days: 7
    labels:
      - "backend"
      - "dependencies"
@@ -97,6 +101,8 @@ updates:
    schedule:
      # Check for updates to GitHub Actions every month
      interval: "monthly"
    cooldown:
      default-days: 7
    labels:
      - "ci-cd"
      - "dependencies"
@@ -112,7 +118,9 @@ updates:
      - "/"
      - "/.devcontainer/"
    schedule:
-      interval: "weekly"
+      interval: "monthly"
    cooldown:
      default-days: 7
    open-pull-requests-limit: 5
    labels:
      - "dependencies"
@@ -123,7 +131,9 @@ updates:
  - package-ecosystem: "docker-compose"
    directory: "/docker/compose/"
    schedule:
-      interval: "weekly"
+      interval: "monthly"
    cooldown:
      default-days: 7
    open-pull-requests-limit: 5
    labels:
      - "dependencies"
@@ -147,3 +157,14 @@ updates:
      postgres:
        patterns:
          - "docker.io/library/postgres*"
      greenmail:
        patterns:
          - "docker.io/greenmail*"
  - package-ecosystem: "pre-commit" # See documentation for possible values
    directory: "/" # Location of package manifests
    schedule:
      interval: "monthly"
    groups:
      pre-commit-dependencies:
        patterns:
          - "*"
--- a/.github/workflows/ci-docker.yml
+++ b/.github/workflows/ci-docker.yml
@@ -104,9 +104,9 @@ jobs:
          echo "repository=${repo_name}"
          echo "name=${repo_name}" >> $GITHUB_OUTPUT
      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3.12.0
+        uses: docker/setup-buildx-action@v4.0.0
      - name: Login to GitHub Container Registry
-        uses: docker/login-action@v3.7.0
+        uses: docker/login-action@v4.0.0
        with:
          registry: ${{ env.REGISTRY }}
          username: ${{ github.actor }}
@@ -119,7 +119,7 @@ jobs:
          sudo rm -rf "$AGENT_TOOLSDIRECTORY"
      - name: Docker metadata
        id: docker-meta
-        uses: docker/metadata-action@v5.10.0
+        uses: docker/metadata-action@v6.0.0
        with:
          images: |
            ${{ env.REGISTRY }}/${{ steps.repo.outputs.name }}
@@ -130,7 +130,7 @@ jobs:
            type=semver,pattern={{major}}.{{minor}}
      - name: Build and push by digest
        id: build
-        uses: docker/build-push-action@v6.19.2
+        uses: docker/build-push-action@v7.0.0
        with:
          context: .
          file: ./Dockerfile
@@ -179,29 +179,29 @@ jobs:
          echo "Downloaded digests:"
          ls -la /tmp/digests/
      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3.12.0
+        uses: docker/setup-buildx-action@v4.0.0
      - name: Login to GitHub Container Registry
-        uses: docker/login-action@v3.7.0
+        uses: docker/login-action@v4.0.0
        with:
          registry: ${{ env.REGISTRY }}
          username: ${{ github.actor }}
          password: ${{ secrets.GITHUB_TOKEN }}
      - name: Login to Docker Hub
        if: needs.build-arch.outputs.push-external == 'true'
-        uses: docker/login-action@v3.7.0
+        uses: docker/login-action@v4.0.0
        with:
          username: ${{ secrets.DOCKERHUB_USERNAME }}
          password: ${{ secrets.DOCKERHUB_TOKEN }}
      - name: Login to Quay.io
        if: needs.build-arch.outputs.push-external == 'true'
-        uses: docker/login-action@v3.7.0
+        uses: docker/login-action@v4.0.0
        with:
          registry: quay.io
          username: ${{ secrets.QUAY_USERNAME }}
          password: ${{ secrets.QUAY_ROBOT_TOKEN }}
      - name: Docker metadata
        id: docker-meta
-        uses: docker/metadata-action@v5.10.0
+        uses: docker/metadata-action@v6.0.0
        with:
          images: |
            ${{ env.REGISTRY }}/${{ needs.build-arch.outputs.repository }}
--- a/.github/workflows/ci-frontend.yml
+++ b/.github/workflows/ci-frontend.yml
@@ -67,7 +67,7 @@ jobs:
        with:
          version: 10
      - name: Use Node.js 24
-        uses: actions/setup-node@v6.2.0
+        uses: actions/setup-node@v6.3.0
        with:
          node-version: 24.x
          cache: 'pnpm'
@@ -95,7 +95,7 @@ jobs:
        with:
          version: 10
      - name: Use Node.js 24
-        uses: actions/setup-node@v6.2.0
+        uses: actions/setup-node@v6.3.0
        with:
          node-version: 24.x
          cache: 'pnpm'
@@ -130,7 +130,7 @@ jobs:
        with:
          version: 10
      - name: Use Node.js 24
-        uses: actions/setup-node@v6.2.0
+        uses: actions/setup-node@v6.3.0
        with:
          node-version: 24.x
          cache: 'pnpm'
@@ -181,7 +181,7 @@ jobs:
        with:
          version: 10
      - name: Use Node.js 24
-        uses: actions/setup-node@v6.2.0
+        uses: actions/setup-node@v6.3.0
        with:
          node-version: 24.x
          cache: 'pnpm'
@@ -214,7 +214,7 @@ jobs:
        with:
          version: 10
      - name: Use Node.js 24
-        uses: actions/setup-node@v6.2.0
+        uses: actions/setup-node@v6.3.0
        with:
          node-version: 24.x
          cache: 'pnpm'
--- a/.github/workflows/ci-release.yml
+++ b/.github/workflows/ci-release.yml
@@ -35,7 +35,7 @@ jobs:
        with:
          version: 10
      - name: Use Node.js 24
-        uses: actions/setup-node@v6.2.0
+        uses: actions/setup-node@v6.3.0
        with:
          node-version: 24.x
          cache: 'pnpm'
--- a/.github/workflows/translate-strings.yml
+++ b/.github/workflows/translate-strings.yml
@@ -40,7 +40,7 @@ jobs:
        with:
          version: 10
      - name: Use Node.js 24
-        uses: actions/setup-node@v6.2.0
+        uses: actions/setup-node@v6.3.0
        with:
          node-version: 24.x
          cache: 'pnpm'
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -29,7 +29,7 @@ repos:
      - id: check-case-conflict
      - id: detect-private-key
  - repo: https://github.com/codespell-project/codespell
-    rev: v2.4.1
+    rev: v2.4.2
    hooks:
      - id: codespell
        additional_dependencies: [tomli]
@@ -46,11 +46,11 @@ repos:
          - ts
          - markdown
        additional_dependencies:
-          - prettier@3.3.3
+          - prettier@3.8.1
-          - 'prettier-plugin-organize-imports@4.1.0'
+          - 'prettier-plugin-organize-imports@4.3.0'
  # Python hooks
  - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.15.5
+    rev: v0.15.6
    hooks:
      - id: ruff-check
      - id: ruff-format
@@ -65,7 +65,7 @@ repos:
      - id: hadolint
  # Shell script hooks
  - repo: https://github.com/lovesegfault/beautysh
-    rev: v6.4.2
+    rev: v6.4.3
    hooks:
      - id: beautysh
        types: [file]
--- a/.prettierrc.js
+++ b/.prettierrc.js
@@ -5,14 +5,6 @@ const config = {
 	singleQuote: true,
 	// https://prettier.io/docs/en/options.html#trailing-commas
 	trailingComma: 'es5',
 	overrides: [
 		{
 			files: ['docs/*.md'],
 			options: {
 				tabWidth: 4,
 			},
 		},
 	],
 	plugins: [require('prettier-plugin-organize-imports')],
 }
--- a/docker/compose/docker-compose.ci-test.yml
+++ b/docker/compose/docker-compose.ci-test.yml
@@ -18,13 +18,13 @@ services:
      - "--log-level=warn"
      - "--log-format=text"
  tika:
-    image: docker.io/apache/tika:latest
+    image: docker.io/apache/tika:3.2.3.0
    hostname: tika
    container_name: tika
    network_mode: host
    restart: unless-stopped
  greenmail:
-    image: greenmail/standalone:2.1.8
+    image: docker.io/greenmail/standalone:2.1.8
    hostname: greenmail
    container_name: greenmail
    environment:
--- a/docker/rootfs/etc/s6-overlay/s6-rc.d/init-migrations/run
+++ b/docker/rootfs/etc/s6-overlay/s6-rc.d/init-migrations/run
@@ -10,12 +10,10 @@ cd "${PAPERLESS_SRC_DIR}"
 # The whole migrate, with flock, needs to run as the right user
 if [[ -n "${USER_IS_NON_ROOT}" ]]; then
-	exec s6-setlock -n "${data_dir}/migration_lock" python3 manage.py check --tag compatibility paperless
+	python3 manage.py check --tag compatibility paperless || exit 1
 	exec s6-setlock -n "${data_dir}/migration_lock" python3 manage.py migrate --skip-checks --no-input
 else
-	exec s6-setuidgid paperless \
+	s6-setuidgid paperless python3 manage.py check --tag compatibility paperless  || exit 1
 		s6-setlock -n "${data_dir}/migration_lock" \
 		python3 manage.py check --tag compatibility paperless
 	exec s6-setuidgid paperless \
 		s6-setlock -n "${data_dir}/migration_lock" \
 		python3 manage.py migrate --skip-checks --no-input
--- a/docker/rootfs/etc/s6-overlay/s6-rc.d/init-modify-user/run
+++ b/docker/rootfs/etc/s6-overlay/s6-rc.d/init-modify-user/run
@@ -2,6 +2,17 @@
 # shellcheck shell=bash
 declare -r log_prefix="[init-user]"
 # When the container is started as a non-root user (e.g. via `user: 999:999`
 # in Docker Compose), usermod/groupmod require root and are meaningless.
 # USERMAP_* variables only apply to the root-started path.
 if [[ -n "${USER_IS_NON_ROOT}" ]]; then
 	if [[ -n "${USERMAP_UID}" || -n "${USERMAP_GID}" ]]; then
 		echo "${log_prefix} WARNING: USERMAP_UID/USERMAP_GID are set but have no effect when the container is started as a non-root user"
 	fi
 	echo "${log_prefix} Running as non-root user ($(id --user):$(id --group)), skipping UID/GID remapping"
 	exit 0
 fi
 declare -r usermap_original_uid=$(id -u paperless)
 declare -r usermap_original_gid=$(id -g paperless)
 declare -r usermap_new_uid=${USERMAP_UID:-$usermap_original_uid}
--- a/docs/administration.md
+++ b/docs/administration.md
@@ -10,16 +10,16 @@ consuming documents at that time.
 Options available to any installation of paperless:
-   Use the [document exporter](#exporter). The document exporter exports all your documents,
+- Use the [document exporter](#exporter). The document exporter exports all your documents,
-    thumbnails, metadata, and database contents to a specific folder. You may import your
+  thumbnails, metadata, and database contents to a specific folder. You may import your
-    documents and settings into a fresh instance of paperless again or store your
+  documents and settings into a fresh instance of paperless again or store your
-    documents in another DMS with this export.
+  documents in another DMS with this export.
-    The document exporter is also able to update an already existing
+  The document exporter is also able to update an already existing
-    export. Therefore, incremental backups with `rsync` are entirely
+  export. Therefore, incremental backups with `rsync` are entirely
-    possible.
+  possible.
-    The exporter does not include API tokens and they will need to be re-generated after importing.
+  The exporter does not include API tokens and they will need to be re-generated after importing.
 !!! caution
@@ -29,28 +29,27 @@ Options available to any installation of paperless:
 Options available to docker installations:
-   Backup the docker volumes. These usually reside within
+- Backup the docker volumes. These usually reside within
-    `/var/lib/docker/volumes` on the host and you need to be root in
+  `/var/lib/docker/volumes` on the host and you need to be root in
-    order to access them.
+  order to access them.
-    Paperless uses 4 volumes:
+  Paperless uses 4 volumes:
-
+  - `paperless_media`: This is where your documents are stored.
-    -   `paperless_media`: This is where your documents are stored.
+  - `paperless_data`: This is where auxiliary data is stored. This
-    -   `paperless_data`: This is where auxiliary data is stored. This
+    folder also contains the SQLite database, if you use it.
-        folder also contains the SQLite database, if you use it.
+  - `paperless_pgdata`: Exists only if you use PostgreSQL and
-    -   `paperless_pgdata`: Exists only if you use PostgreSQL and
+    contains the database.
-        contains the database.
+  - `paperless_dbdata`: Exists only if you use MariaDB and contains
-    -   `paperless_dbdata`: Exists only if you use MariaDB and contains
+    the database.
        the database.
 Options available to bare-metal and non-docker installations:
-   Backup the entire paperless folder. This ensures that if your
+- Backup the entire paperless folder. This ensures that if your
-    paperless instance crashes at some point or your disk fails, you can
+  paperless instance crashes at some point or your disk fails, you can
-    simply copy the folder back into place and it works.
+  simply copy the folder back into place and it works.
-    When using PostgreSQL or MariaDB, you'll also have to backup the
+  When using PostgreSQL or MariaDB, you'll also have to backup the
-    database.
+  database.
 ### Restoring {#migrating-restoring}
@@ -509,19 +508,19 @@ collection for issues.
 The issues detected by the sanity checker are as follows:
-   Missing original files.
+- Missing original files.
-   Missing archive files.
+- Missing archive files.
-   Inaccessible original files due to improper permissions.
+- Inaccessible original files due to improper permissions.
-   Inaccessible archive files due to improper permissions.
+- Inaccessible archive files due to improper permissions.
-   Corrupted original documents by comparing their checksum against
+- Corrupted original documents by comparing their checksum against
-    what is stored in the database.
+  what is stored in the database.
-   Corrupted archive documents by comparing their checksum against what
+- Corrupted archive documents by comparing their checksum against what
-    is stored in the database.
+  is stored in the database.
-   Missing thumbnails.
+- Missing thumbnails.
-   Inaccessible thumbnails due to improper permissions.
+- Inaccessible thumbnails due to improper permissions.
-   Documents without any content (warning).
+- Documents without any content (warning).
-   Orphaned files in the media directory (warning). These are files
+- Orphaned files in the media directory (warning). These are files
-    that are not referenced by any document in paperless.
+  that are not referenced by any document in paperless.
 ```
 document_sanity_checker
--- a/docs/advanced_usage.md
+++ b/docs/advanced_usage.md
@@ -25,20 +25,20 @@ documents.
 The following algorithms are available:
-   **None:** No matching will be performed.
+- **None:** No matching will be performed.
-   **Any:** Looks for any occurrence of any word provided in match in
+- **Any:** Looks for any occurrence of any word provided in match in
-    the PDF. If you define the match as `Bank1 Bank2`, it will match
+  the PDF. If you define the match as `Bank1 Bank2`, it will match
-    documents containing either of these terms.
+  documents containing either of these terms.
-   **All:** Requires that every word provided appears in the PDF,
+- **All:** Requires that every word provided appears in the PDF,
-    albeit not in the order provided.
+  albeit not in the order provided.
-   **Exact:** Matches only if the match appears exactly as provided
+- **Exact:** Matches only if the match appears exactly as provided
-    (i.e. preserve ordering) in the PDF.
+  (i.e. preserve ordering) in the PDF.
-   **Regular expression:** Parses the match as a regular expression and
+- **Regular expression:** Parses the match as a regular expression and
-    tries to find a match within the document.
+  tries to find a match within the document.
-   **Fuzzy match:** Uses a partial matching based on locating the tag text
+- **Fuzzy match:** Uses a partial matching based on locating the tag text
-    inside the document, using a [partial ratio](https://rapidfuzz.github.io/RapidFuzz/Usage/fuzz.html#partial-ratio)
+  inside the document, using a [partial ratio](https://rapidfuzz.github.io/RapidFuzz/Usage/fuzz.html#partial-ratio)
-   **Auto:** Tries to automatically match new documents. This does not
+- **Auto:** Tries to automatically match new documents. This does not
-    require you to set a match. See the [notes below](#automatic-matching).
+  require you to set a match. See the [notes below](#automatic-matching).
 When using the _any_ or _all_ matching algorithms, you can search for
 terms that consist of multiple words by enclosing them in double quotes.
@@ -69,33 +69,33 @@ Paperless tries to hide much of the involved complexity with this
 approach. However, there are a couple caveats you need to keep in mind
 when using this feature:
-   Changes to your documents are not immediately reflected by the
+- Changes to your documents are not immediately reflected by the
-    matching algorithm. The neural network needs to be _trained_ on your
+  matching algorithm. The neural network needs to be _trained_ on your
-    documents after changes. Paperless periodically (default: once each
+  documents after changes. Paperless periodically (default: once each
-    hour) checks for changes and does this automatically for you.
+  hour) checks for changes and does this automatically for you.
-   The Auto matching algorithm only takes documents into account which
+- The Auto matching algorithm only takes documents into account which
-    are NOT placed in your inbox (i.e. have any inbox tags assigned to
+  are NOT placed in your inbox (i.e. have any inbox tags assigned to
-    them). This ensures that the neural network only learns from
+  them). This ensures that the neural network only learns from
-    documents which you have correctly tagged before.
+  documents which you have correctly tagged before.
-   The matching algorithm can only work if there is a correlation
+- The matching algorithm can only work if there is a correlation
-    between the tag, correspondent, document type, or storage path and
+  between the tag, correspondent, document type, or storage path and
-    the document itself. Your bank statements usually contain your bank
+  the document itself. Your bank statements usually contain your bank
-    account number and the name of the bank, so this works reasonably
+  account number and the name of the bank, so this works reasonably
-    well, However, tags such as "TODO" cannot be automatically
+  well, However, tags such as "TODO" cannot be automatically
-    assigned.
+  assigned.
-   The matching algorithm needs a reasonable number of documents to
+- The matching algorithm needs a reasonable number of documents to
-    identify when to assign tags, correspondents, storage paths, and
+  identify when to assign tags, correspondents, storage paths, and
-    types. If one out of a thousand documents has the correspondent
+  types. If one out of a thousand documents has the correspondent
-    "Very obscure web shop I bought something five years ago", it will
+  "Very obscure web shop I bought something five years ago", it will
-    probably not assign this correspondent automatically if you buy
+  probably not assign this correspondent automatically if you buy
-    something from them again. The more documents, the better.
+  something from them again. The more documents, the better.
-   Paperless also needs a reasonable amount of negative examples to
+- Paperless also needs a reasonable amount of negative examples to
-    decide when not to assign a certain tag, correspondent, document
+  decide when not to assign a certain tag, correspondent, document
-    type, or storage path. This will usually be the case as you start
+  type, or storage path. This will usually be the case as you start
-    filling up paperless with documents. Example: If all your documents
+  filling up paperless with documents. Example: If all your documents
-    are either from "Webshop" or "Bank", paperless will assign one
+  are either from "Webshop" or "Bank", paperless will assign one
-    of these correspondents to ANY new document, if both are set to
+  of these correspondents to ANY new document, if both are set to
-    automatic matching.
+  automatic matching.
 ## Hooking into the consumption process {#consume-hooks}
@@ -243,12 +243,12 @@ webserver:
 Troubleshooting:
-   Monitor the Docker Compose log
+- Monitor the Docker Compose log
-    `cd ~/paperless-ngx; docker compose logs -f`
+  `cd ~/paperless-ngx; docker compose logs -f`
-   Check your script's permission e.g. in case of permission error
+- Check your script's permission e.g. in case of permission error
-    `sudo chmod 755 post-consumption-example.sh`
+  `sudo chmod 755 post-consumption-example.sh`
-   Pipe your scripts's output to a log file e.g.
+- Pipe your scripts's output to a log file e.g.
-    `echo "${DOCUMENT_ID}" | tee --append /usr/src/paperless/scripts/post-consumption-example.log`
+  `echo "${DOCUMENT_ID}" | tee --append /usr/src/paperless/scripts/post-consumption-example.log`
 ## File name handling {#file-name-handling}
@@ -307,35 +307,35 @@ will create a directory structure as follows:
 Paperless provides the following variables for use within filenames:
-   `{{ asn }}`: The archive serial number of the document, or "none".
+- `{{ asn }}`: The archive serial number of the document, or "none".
-   `{{ correspondent }}`: The name of the correspondent, or "none".
+- `{{ correspondent }}`: The name of the correspondent, or "none".
-   `{{ document_type }}`: The name of the document type, or "none".
+- `{{ document_type }}`: The name of the document type, or "none".
-   `{{ tag_list }}`: A comma separated list of all tags assigned to the
+- `{{ tag_list }}`: A comma separated list of all tags assigned to the
-    document.
+  document.
-   `{{ title }}`: The title of the document.
+- `{{ title }}`: The title of the document.
-   `{{ created }}`: The full date (ISO 8601 format, e.g. `2024-03-14`) the document was created.
+- `{{ created }}`: The full date (ISO 8601 format, e.g. `2024-03-14`) the document was created.
-   `{{ created_year }}`: Year created only, formatted as the year with
+- `{{ created_year }}`: Year created only, formatted as the year with
-    century.
+  century.
-   `{{ created_year_short }}`: Year created only, formatted as the year
+- `{{ created_year_short }}`: Year created only, formatted as the year
-    without century, zero padded.
+  without century, zero padded.
-   `{{ created_month }}`: Month created only (number 01-12).
+- `{{ created_month }}`: Month created only (number 01-12).
-   `{{ created_month_name }}`: Month created name, as per locale
+- `{{ created_month_name }}`: Month created name, as per locale
-   `{{ created_month_name_short }}`: Month created abbreviated name, as per
+- `{{ created_month_name_short }}`: Month created abbreviated name, as per
-    locale
+  locale
-   `{{ created_day }}`: Day created only (number 01-31).
+- `{{ created_day }}`: Day created only (number 01-31).
-   `{{ added }}`: The full date (ISO format) the document was added to
+- `{{ added }}`: The full date (ISO format) the document was added to
-    paperless.
+  paperless.
-   `{{ added_year }}`: Year added only.
+- `{{ added_year }}`: Year added only.
-   `{{ added_year_short }}`: Year added only, formatted as the year without
+- `{{ added_year_short }}`: Year added only, formatted as the year without
-    century, zero padded.
+  century, zero padded.
-   `{{ added_month }}`: Month added only (number 01-12).
+- `{{ added_month }}`: Month added only (number 01-12).
-   `{{ added_month_name }}`: Month added name, as per locale
+- `{{ added_month_name }}`: Month added name, as per locale
-   `{{ added_month_name_short }}`: Month added abbreviated name, as per
+- `{{ added_month_name_short }}`: Month added abbreviated name, as per
-    locale
+  locale
-   `{{ added_day }}`: Day added only (number 01-31).
+- `{{ added_day }}`: Day added only (number 01-31).
-   `{{ owner_username }}`: Username of document owner, if any, or "none"
+- `{{ owner_username }}`: Username of document owner, if any, or "none"
-   `{{ original_name }}`: Document original filename, minus the extension, if any, or "none"
+- `{{ original_name }}`: Document original filename, minus the extension, if any, or "none"
-   `{{ doc_pk }}`: The paperless identifier (primary key) for the document.
+- `{{ doc_pk }}`: The paperless identifier (primary key) for the document.
 !!! warning
@@ -388,10 +388,10 @@ before empty placeholders are removed as well, empty directories are omitted.
 When a single storage layout is not sufficient for your use case, storage paths allow for more complex
 structure to set precisely where each document is stored in the file system.
-   Each storage path is a [`PAPERLESS_FILENAME_FORMAT`](configuration.md#PAPERLESS_FILENAME_FORMAT) and
+- Each storage path is a [`PAPERLESS_FILENAME_FORMAT`](configuration.md#PAPERLESS_FILENAME_FORMAT) and
-    follows the rules described above
+  follows the rules described above
-   Each document is assigned a storage path using the matching algorithms described above, but can be
+- Each document is assigned a storage path using the matching algorithms described above, but can be
-    overwritten at any time
+  overwritten at any time
 For example, you could define the following two storage paths:
@@ -457,13 +457,13 @@ The `get_cf_value` filter retrieves a value from custom field data with optional
 ###### Parameters
-   `custom_fields`: This _must_ be the provided custom field data
+- `custom_fields`: This _must_ be the provided custom field data
-   `name` (str): Name of the custom field to retrieve
+- `name` (str): Name of the custom field to retrieve
-   `default` (str, optional): Default value to return if field is not found or has no value
+- `default` (str, optional): Default value to return if field is not found or has no value
 ###### Returns
-   `str | None`: The field value, default value, or `None` if neither exists
+- `str | None`: The field value, default value, or `None` if neither exists
 ###### Examples
@@ -487,12 +487,12 @@ The `datetime` filter formats a datetime string or datetime object using Python'
 ###### Parameters
-   `value` (str | datetime): Date/time value to format (strings will be parsed automatically)
+- `value` (str | datetime): Date/time value to format (strings will be parsed automatically)
-   `format` (str): Python strftime format string
+- `format` (str): Python strftime format string
 ###### Returns
-   `str`: Formatted datetime string
+- `str`: Formatted datetime string
 ###### Examples
@@ -525,13 +525,13 @@ An ISO string can also be provided to control the output format.
 ###### Parameters
-   `value` (date | datetime | str): Date, datetime object or ISO string to format (datetime should be timezone-aware)
+- `value` (date | datetime | str): Date, datetime object or ISO string to format (datetime should be timezone-aware)
-   `format` (str): Format type - either a Babel preset ('short', 'medium', 'long', 'full') or custom pattern
+- `format` (str): Format type - either a Babel preset ('short', 'medium', 'long', 'full') or custom pattern
-   `locale` (str): Locale code for localization (e.g., 'en_US', 'fr_FR', 'de_DE')
+- `locale` (str): Locale code for localization (e.g., 'en_US', 'fr_FR', 'de_DE')
 ###### Returns
-   `str`: Localized, formatted date string
+- `str`: Localized, formatted date string
 ###### Examples
@@ -565,15 +565,15 @@ See the [supported format codes](https://unicode.org/reports/tr35/tr35-dates.htm
 ### Format Presets
-   **short**: Abbreviated format (e.g., "1/15/24")
+- **short**: Abbreviated format (e.g., "1/15/24")
-   **medium**: Medium-length format (e.g., "Jan 15, 2024")
+- **medium**: Medium-length format (e.g., "Jan 15, 2024")
-   **long**: Long format with full month name (e.g., "January 15, 2024")
+- **long**: Long format with full month name (e.g., "January 15, 2024")
-   **full**: Full format including day of week (e.g., "Monday, January 15, 2024")
+- **full**: Full format including day of week (e.g., "Monday, January 15, 2024")
 #### Additional Variables
-   `{{ tag_name_list }}`: A list of tag names applied to the document, ordered by the tag name. Note this is a list, not a single string
+- `{{ tag_name_list }}`: A list of tag names applied to the document, ordered by the tag name. Note this is a list, not a single string
-   `{{ custom_fields }}`: A mapping of custom field names to their type and value. A user can access the mapping by field name or check if a field is applied by checking its existence in the variable.
+- `{{ custom_fields }}`: A mapping of custom field names to their type and value. A user can access the mapping by field name or check if a field is applied by checking its existence in the variable.
 !!! tip
@@ -675,15 +675,15 @@ installation, you can use volumes to accomplish this:
 ```yaml
 services:
  # ...
  webserver:
    environment:
      - PAPERLESS_ENABLE_FLOWER
    ports:
      - 5555:5555 # (2)!
    # ...
-    webserver:
+    volumes:
-        environment:
+      - /path/to/my/flowerconfig.py:/usr/src/paperless/src/paperless/flowerconfig.py:ro # (1)!
            - PAPERLESS_ENABLE_FLOWER
        ports:
            - 5555:5555 # (2)!
        # ...
        volumes:
            - /path/to/my/flowerconfig.py:/usr/src/paperless/src/paperless/flowerconfig.py:ro # (1)!
 ```
 1. Note the `:ro` tag means the file will be mounted as read only.
@@ -714,11 +714,11 @@ For example, using Docker Compose:
 ```yaml
 services:
  # ...
  webserver:
    # ...
-    webserver:
+    volumes:
-        # ...
+      - /path/to/my/scripts:/custom-cont-init.d:ro # (1)!
        volumes:
            - /path/to/my/scripts:/custom-cont-init.d:ro # (1)!
 ```
 1. Note the `:ro` tag means the folder will be mounted as read only. This is for extra security against changes
@@ -771,16 +771,16 @@ Paperless is able to utilize barcodes for automatically performing some tasks.
 At this time, the library utilized for detection of barcodes supports the following types:
-   AN-13/UPC-A
+- AN-13/UPC-A
-   UPC-E
+- UPC-E
-   EAN-8
+- EAN-8
-   Code 128
+- Code 128
-   Code 93
+- Code 93
-   Code 39
+- Code 39
-   Codabar
+- Codabar
-   Interleaved 2 of 5
+- Interleaved 2 of 5
-   QR Code
+- QR Code
-   SQ Code
+- SQ Code
 For usage in Paperless, the type of barcode does not matter, only the contents of it.
@@ -793,8 +793,8 @@ below.
 If document splitting is enabled, Paperless splits _after_ a separator barcode by default.
 This means:
-   any page containing the configured separator barcode starts a new document, starting with the **next** page
+- any page containing the configured separator barcode starts a new document, starting with the **next** page
-   pages containing the separator barcode are discarded
+- pages containing the separator barcode are discarded
 This is intended for dedicated separator sheets such as PATCH-T pages.
@@ -831,10 +831,10 @@ to `true`.
 When enabled, documents will be split at pages containing tag barcodes, similar to how
 ASN barcodes work. Key features:
-   The page with the tag barcode is **retained** in the resulting document
+- The page with the tag barcode is **retained** in the resulting document
-   **Each split document extracts its own tags** - only tags on pages within that document are assigned
+- **Each split document extracts its own tags** - only tags on pages within that document are assigned
-   Multiple tag barcodes can trigger multiple splits in the same document
+- Multiple tag barcodes can trigger multiple splits in the same document
-   Works seamlessly with ASN barcodes - each split document gets its own ASN and tags
+- Works seamlessly with ASN barcodes - each split document gets its own ASN and tags
 This is useful for batch scanning where you place tag barcode pages between different
 documents to both separate and categorize them in a single operation.
@@ -996,9 +996,9 @@ If using docker, you'll need to add the following volume mounts to your `docker-
 ```yaml
 webserver:
-    volumes:
+  volumes:
-        - /home/user/.gnupg/pubring.gpg:/usr/src/paperless/.gnupg/pubring.gpg
+    - /home/user/.gnupg/pubring.gpg:/usr/src/paperless/.gnupg/pubring.gpg
-        - <path to gpg-agent socket>:/usr/src/paperless/.gnupg/S.gpg-agent
+    - <path to gpg-agent socket>:/usr/src/paperless/.gnupg/S.gpg-agent
 ```
 For a 'bare-metal' installation no further configuration is necessary. If you
@@ -1006,9 +1006,9 @@ want to use a separate `GNUPG_HOME`, you can do so by configuring the [PAPERLESS
 ### Troubleshooting
-   Make sure, that `gpg-agent` is running on your host machine
+- Make sure, that `gpg-agent` is running on your host machine
-   Make sure, that encryption and decryption works from inside the container using the `gpg` commands from above.
+- Make sure, that encryption and decryption works from inside the container using the `gpg` commands from above.
-   Check that all files in `/usr/src/paperless/.gnupg` have correct permissions
+- Check that all files in `/usr/src/paperless/.gnupg` have correct permissions
 ```shell
 paperless@9da1865df327:~/.gnupg$ ls -al
--- a/docs/api.md
+++ b/docs/api.md
@@ -66,10 +66,10 @@ Full text searching is available on the `/api/documents/` endpoint. Two
 specific query parameters cause the API to return full text search
 results:
-   `/api/documents/?query=your%20search%20query`: Search for a document
+- `/api/documents/?query=your%20search%20query`: Search for a document
-    using a full text query. For details on the syntax, see [Basic Usage - Searching](usage.md#basic-usage_searching).
+  using a full text query. For details on the syntax, see [Basic Usage - Searching](usage.md#basic-usage_searching).
-   `/api/documents/?more_like_id=1234`: Search for documents similar to
+- `/api/documents/?more_like_id=1234`: Search for documents similar to
-    the document with id 1234.
+  the document with id 1234.
 Pagination works exactly the same as it does for normal requests on this
 endpoint.
@@ -106,12 +106,12 @@ attribute with various information about the search results:
 }
 ```
-   `score` is an indication how well this document matches the query
+- `score` is an indication how well this document matches the query
-    relative to the other search results.
+  relative to the other search results.
-   `highlights` is an excerpt from the document content and highlights
+- `highlights` is an excerpt from the document content and highlights
-    the search terms with `<span>` tags as shown above.
+  the search terms with `<span>` tags as shown above.
-   `rank` is the index of the search results. The first result will
+- `rank` is the index of the search results. The first result will
-    have rank 0.
+  have rank 0.
 ### Filtering by custom fields
@@ -122,33 +122,33 @@ use cases:
 1. Documents with a custom field "due" (date) between Aug 1, 2024 and
   Sept 1, 2024 (inclusive):
-    `?custom_field_query=["due", "range", ["2024-08-01", "2024-09-01"]]`
+   `?custom_field_query=["due", "range", ["2024-08-01", "2024-09-01"]]`
 2. Documents with a custom field "customer" (text) that equals "bob"
   (case sensitive):
-    `?custom_field_query=["customer", "exact", "bob"]`
+   `?custom_field_query=["customer", "exact", "bob"]`
 3. Documents with a custom field "answered" (boolean) set to `true`:
-    `?custom_field_query=["answered", "exact", true]`
+   `?custom_field_query=["answered", "exact", true]`
 4. Documents with a custom field "favorite animal" (select) set to either
   "cat" or "dog":
-    `?custom_field_query=["favorite animal", "in", ["cat", "dog"]]`
+   `?custom_field_query=["favorite animal", "in", ["cat", "dog"]]`
 5. Documents with a custom field "address" (text) that is empty:
-    `?custom_field_query=["OR", [["address", "isnull", true], ["address", "exact", ""]]]`
+   `?custom_field_query=["OR", [["address", "isnull", true], ["address", "exact", ""]]]`
 6. Documents that don't have a field called "foo":
-    `?custom_field_query=["foo", "exists", false]`
+   `?custom_field_query=["foo", "exists", false]`
 7. Documents that have document links "references" to both document 3 and 7:
-    `?custom_field_query=["references", "contains", [3, 7]]`
+   `?custom_field_query=["references", "contains", [3, 7]]`
 All field types support basic operations including `exact`, `in`, `isnull`,
 and `exists`. String, URL, and monetary fields support case-insensitive
@@ -164,8 +164,8 @@ Get auto completions for a partial search term.
 Query parameters:
-   `term`: The incomplete term.
+- `term`: The incomplete term.
-   `limit`: Amount of results. Defaults to 10.
+- `limit`: Amount of results. Defaults to 10.
 Results returned by the endpoint are ordered by importance of the term
 in the document index. The first result is the term that has the highest
@@ -189,19 +189,19 @@ from there.
 The endpoint supports the following optional form fields:
-   `title`: Specify a title that the consumer should use for the
+- `title`: Specify a title that the consumer should use for the
-    document.
+  document.
-   `created`: Specify a DateTime where the document was created (e.g.
+- `created`: Specify a DateTime where the document was created (e.g.
-    "2016-04-19" or "2016-04-19 06:15:00+02:00").
+  "2016-04-19" or "2016-04-19 06:15:00+02:00").
-   `correspondent`: Specify the ID of a correspondent that the consumer
+- `correspondent`: Specify the ID of a correspondent that the consumer
-    should use for the document.
+  should use for the document.
-   `document_type`: Similar to correspondent.
+- `document_type`: Similar to correspondent.
-   `storage_path`: Similar to correspondent.
+- `storage_path`: Similar to correspondent.
-   `tags`: Similar to correspondent. Specify this multiple times to
+- `tags`: Similar to correspondent. Specify this multiple times to
-    have multiple tags added to the document.
+  have multiple tags added to the document.
-   `archive_serial_number`: An optional archive serial number to set.
+- `archive_serial_number`: An optional archive serial number to set.
-   `custom_fields`: Either an array of custom field ids to assign (with an empty
+- `custom_fields`: Either an array of custom field ids to assign (with an empty
-    value) to the document or an object mapping field id -> value.
+  value) to the document or an object mapping field id -> value.
 The endpoint will immediately return HTTP 200 if the document consumption
 process was started successfully, with the UUID of the consumption task
@@ -215,16 +215,16 @@ consumption including the ID of a created document if consumption succeeded.
 Document versions are file-level versions linked to one root document.
-   Root document metadata (title, tags, correspondent, document type, storage path, custom fields, permissions) remains shared.
+- Root document metadata (title, tags, correspondent, document type, storage path, custom fields, permissions) remains shared.
-   Version-specific file data (file, mime type, checksums, archive info, extracted text content) belongs to the selected/latest version.
+- Version-specific file data (file, mime type, checksums, archive info, extracted text content) belongs to the selected/latest version.
 Version-aware endpoints:
-   `GET /api/documents/{id}/`: returns root document data; `content` resolves to latest version content by default. Use `?version={version_id}` to resolve content for a specific version.
+- `GET /api/documents/{id}/`: returns root document data; `content` resolves to latest version content by default. Use `?version={version_id}` to resolve content for a specific version.
-   `PATCH /api/documents/{id}/`: content updates target the selected version (`?version={version_id}`) or latest version by default; non-content metadata updates target the root document.
+- `PATCH /api/documents/{id}/`: content updates target the selected version (`?version={version_id}`) or latest version by default; non-content metadata updates target the root document.
-   `GET /api/documents/{id}/download/`, `GET /api/documents/{id}/preview/`, `GET /api/documents/{id}/thumb/`, `GET /api/documents/{id}/metadata/`: accept `?version={version_id}`.
+- `GET /api/documents/{id}/download/`, `GET /api/documents/{id}/preview/`, `GET /api/documents/{id}/thumb/`, `GET /api/documents/{id}/metadata/`: accept `?version={version_id}`.
-   `POST /api/documents/{id}/update_version/`: uploads a new version using multipart form field `document` and optional `version_label`.
+- `POST /api/documents/{id}/update_version/`: uploads a new version using multipart form field `document` and optional `version_label`.
-   `DELETE /api/documents/{root_id}/versions/{version_id}/`: deletes a non-root version.
+- `DELETE /api/documents/{root_id}/versions/{version_id}/`: deletes a non-root version.
 ## Permissions
@@ -282,34 +282,34 @@ a json payload of the format:
 The following methods are supported:
-   `set_correspondent`
+- `set_correspondent`
-    -   Requires `parameters`: `{ "correspondent": CORRESPONDENT_ID }`
+  - Requires `parameters`: `{ "correspondent": CORRESPONDENT_ID }`
-   `set_document_type`
+- `set_document_type`
-    -   Requires `parameters`: `{ "document_type": DOCUMENT_TYPE_ID }`
+  - Requires `parameters`: `{ "document_type": DOCUMENT_TYPE_ID }`
-   `set_storage_path`
+- `set_storage_path`
-    -   Requires `parameters`: `{ "storage_path": STORAGE_PATH_ID }`
+  - Requires `parameters`: `{ "storage_path": STORAGE_PATH_ID }`
-   `add_tag`
+- `add_tag`
-    -   Requires `parameters`: `{ "tag": TAG_ID }`
+  - Requires `parameters`: `{ "tag": TAG_ID }`
-   `remove_tag`
+- `remove_tag`
-    -   Requires `parameters`: `{ "tag": TAG_ID }`
+  - Requires `parameters`: `{ "tag": TAG_ID }`
-   `modify_tags`
+- `modify_tags`
-    -   Requires `parameters`: `{ "add_tags": [LIST_OF_TAG_IDS] }` and `{ "remove_tags": [LIST_OF_TAG_IDS] }`
+  - Requires `parameters`: `{ "add_tags": [LIST_OF_TAG_IDS] }` and `{ "remove_tags": [LIST_OF_TAG_IDS] }`
-   `delete`
+- `delete`
-    -   No `parameters` required
+  - No `parameters` required
-   `reprocess`
+- `reprocess`
-    -   No `parameters` required
+  - No `parameters` required
-   `set_permissions`
+- `set_permissions`
-    -   Requires `parameters`:
+  - Requires `parameters`:
-        -   `"set_permissions": PERMISSIONS_OBJ` (see format [above](#permissions)) and / or
+    - `"set_permissions": PERMISSIONS_OBJ` (see format [above](#permissions)) and / or
-        -   `"owner": OWNER_ID or null`
+    - `"owner": OWNER_ID or null`
-        -   `"merge": true or false` (defaults to false)
+    - `"merge": true or false` (defaults to false)
-    -   The `merge` flag determines if the supplied permissions will overwrite all existing permissions (including
+  - The `merge` flag determines if the supplied permissions will overwrite all existing permissions (including
-        removing them) or be merged with existing permissions.
+    removing them) or be merged with existing permissions.
-   `modify_custom_fields`
+- `modify_custom_fields`
-    -   Requires `parameters`:
+  - Requires `parameters`:
-        -   `"add_custom_fields": { CUSTOM_FIELD_ID: VALUE }`: JSON object consisting of custom field id:value pairs to add to the document, can also be a list of custom field IDs
+    - `"add_custom_fields": { CUSTOM_FIELD_ID: VALUE }`: JSON object consisting of custom field id:value pairs to add to the document, can also be a list of custom field IDs
-            to add with empty values.
+      to add with empty values.
-        -   `"remove_custom_fields": [CUSTOM_FIELD_ID]`: custom field ids to remove from the document.
+    - `"remove_custom_fields": [CUSTOM_FIELD_ID]`: custom field ids to remove from the document.
 #### Document-editing operations
@@ -335,16 +335,16 @@ operations, using the endpoint: `/api/bulk_edit_objects/`, which requires a json
 The REST API is versioned.
-   Versioning ensures that changes to the API don't break older
+- Versioning ensures that changes to the API don't break older
-    clients.
+  clients.
-   Clients specify the specific version of the API they wish to use
+- Clients specify the specific version of the API they wish to use
-    with every request and Paperless will handle the request using the
+  with every request and Paperless will handle the request using the
-    specified API version.
+  specified API version.
-   Even if the underlying data model changes, supported older API
+- Even if the underlying data model changes, supported older API
-    versions continue to serve compatible data.
+  versions continue to serve compatible data.
-   If no version is specified, Paperless serves the configured default
+- If no version is specified, Paperless serves the configured default
-    API version (currently `10`).
+  API version (currently `10`).
-   Supported API versions are currently `9` and `10`.
+- Supported API versions are currently `9` and `10`.
 API versions are specified by submitting an additional HTTP `Accept`
 header with every request:
@@ -384,56 +384,56 @@ Initial API version.
 #### Version 2
-   Added field `Tag.color`. This read/write string field contains a hex
+- Added field `Tag.color`. This read/write string field contains a hex
-    color such as `#a6cee3`.
+  color such as `#a6cee3`.
-   Added read-only field `Tag.text_color`. This field contains the text
+- Added read-only field `Tag.text_color`. This field contains the text
-    color to use for a specific tag, which is either black or white
+  color to use for a specific tag, which is either black or white
-    depending on the brightness of `Tag.color`.
+  depending on the brightness of `Tag.color`.
-   Removed field `Tag.colour`.
+- Removed field `Tag.colour`.
 #### Version 3
-   Permissions endpoints have been added.
+- Permissions endpoints have been added.
-   The format of the `/api/ui_settings/` has changed.
+- The format of the `/api/ui_settings/` has changed.
 #### Version 4
-   Consumption templates were refactored to workflows and API endpoints
+- Consumption templates were refactored to workflows and API endpoints
-    changed as such.
+  changed as such.
 #### Version 5
-   Added bulk deletion methods for documents and objects.
+- Added bulk deletion methods for documents and objects.
 #### Version 6
-   Moved acknowledge tasks endpoint to be under `/api/tasks/acknowledge/`.
+- Moved acknowledge tasks endpoint to be under `/api/tasks/acknowledge/`.
 #### Version 7
-   The format of select type custom fields has changed to return the options
+- The format of select type custom fields has changed to return the options
-    as an array of objects with `id` and `label` fields as opposed to a simple
+  as an array of objects with `id` and `label` fields as opposed to a simple
-    list of strings. When creating or updating a custom field value of a
+  list of strings. When creating or updating a custom field value of a
-    document for a select type custom field, the value should be the `id` of
+  document for a select type custom field, the value should be the `id` of
-    the option whereas previously was the index of the option.
+  the option whereas previously was the index of the option.
 #### Version 8
-   The user field of document notes now returns a simplified user object
+- The user field of document notes now returns a simplified user object
-    rather than just the user ID.
+  rather than just the user ID.
 #### Version 9
-   The document `created` field is now a date, not a datetime. The
+- The document `created` field is now a date, not a datetime. The
-    `created_date` field is considered deprecated and will be removed in a
+  `created_date` field is considered deprecated and will be removed in a
-    future version.
+  future version.
 #### Version 10
-   The `show_on_dashboard` and `show_in_sidebar` fields of saved views have been
+- The `show_on_dashboard` and `show_in_sidebar` fields of saved views have been
-    removed. Relevant settings are now stored in the UISettings model. Compatibility is maintained
+  removed. Relevant settings are now stored in the UISettings model. Compatibility is maintained
-    for versions < 10 until support for API v9 is dropped.
+  for versions < 10 until support for API v9 is dropped.
-   Document-editing operations such as `merge`, `rotate`, and `edit_pdf` have been
+- Document-editing operations such as `merge`, `rotate`, and `edit_pdf` have been
-    moved from the bulk edit endpoint to their own individual endpoints. Using these methods via
+  moved from the bulk edit endpoint to their own individual endpoints. Using these methods via
-    the bulk edit endpoint is still supported for compatibility with versions < 10 until support
+  the bulk edit endpoint is still supported for compatibility with versions < 10 until support
-    for API v9 is dropped.
+  for API v9 is dropped.
--- a/docs/changelog.md
+++ b/docs/changelog.md
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -8,17 +8,17 @@ common [OCR](#ocr) related settings and some frontend settings. If set, these wi
 preference over the settings via environment variables. If not set, the environment setting
 or applicable default will be utilized instead.
-   If you run paperless on docker, `paperless.conf` is not used.
+- If you run paperless on docker, `paperless.conf` is not used.
-    Rather, configure paperless by copying necessary options to
+  Rather, configure paperless by copying necessary options to
-    `docker-compose.env`.
+  `docker-compose.env`.
-   If you are running paperless on anything else, paperless will search
+- If you are running paperless on anything else, paperless will search
-    for the configuration file in these locations and use the first one
+  for the configuration file in these locations and use the first one
-    it finds:
+  it finds:
-    -   The environment variable `PAPERLESS_CONFIGURATION_PATH`
+  - The environment variable `PAPERLESS_CONFIGURATION_PATH`
-    -   `/path/to/paperless/paperless.conf`
+  - `/path/to/paperless/paperless.conf`
-    -   `/etc/paperless.conf`
+  - `/etc/paperless.conf`
-    -   `/usr/local/etc/paperless.conf`
+  - `/usr/local/etc/paperless.conf`
 ## Required services
@@ -674,6 +674,9 @@ See the corresponding [django-allauth documentation](https://docs.allauth.org/en
 for a list of provider configurations. You will also need to include the relevant Django 'application' inside the
 [PAPERLESS_APPS](#PAPERLESS_APPS) setting to activate that specific authentication provider (e.g. `allauth.socialaccount.providers.openid_connect` for the [OIDC Connect provider](https://docs.allauth.org/en/latest/socialaccount/providers/openid_connect.html)).
 : For OpenID Connect providers, set `settings.token_auth_method` if your identity provider
 requires a specific token endpoint authentication method.
    Defaults to None, which does not enable any third party authentication systems.
 #### [`PAPERLESS_SOCIAL_AUTO_SIGNUP=<bool>`](#PAPERLESS_SOCIAL_AUTO_SIGNUP) {#PAPERLESS_SOCIAL_AUTO_SIGNUP}
@@ -1947,6 +1950,12 @@ current backend. If not supplied, defaults to "gpt-3.5-turbo" for OpenAI and "ll
    Defaults to None.
 #### [`PAPERLESS_AI_LLM_ALLOW_INTERNAL_ENDPOINTS=<bool>`](#PAPERLESS_AI_LLM_ALLOW_INTERNAL_ENDPOINTS) {#PAPERLESS_AI_LLM_ALLOW_INTERNAL_ENDPOINTS}
 : If set to false, Paperless blocks AI endpoint URLs that resolve to non-public addresses (e.g., localhost, etc).
    Defaults to true, which allows internal endpoints.
 #### [`PAPERLESS_AI_LLM_INDEX_TASK_CRON=<cron expression>`](#PAPERLESS_AI_LLM_INDEX_TASK_CRON) {#PAPERLESS_AI_LLM_INDEX_TASK_CRON}
 : Configures the schedule to update the AI embeddings of text content and metadata for all documents. Only performed if
--- a/docs/development.md
+++ b/docs/development.md
@@ -6,23 +6,23 @@ on Paperless-ngx.
 Check out the source from GitHub. The repository is organized in the
 following way:
-   `main` always represents the latest release and will only see
+- `main` always represents the latest release and will only see
-    changes when a new release is made.
+  changes when a new release is made.
-   `dev` contains the code that will be in the next release.
+- `dev` contains the code that will be in the next release.
-   `feature-X` contains bigger changes that will be in some release, but
+- `feature-X` contains bigger changes that will be in some release, but
-    not necessarily the next one.
+  not necessarily the next one.
 When making functional changes to Paperless-ngx, _always_ make your changes
 on the `dev` branch.
 Apart from that, the folder structure is as follows:
-   `docs/` - Documentation.
+- `docs/` - Documentation.
-   `src-ui/` - Code of the front end.
+- `src-ui/` - Code of the front end.
-   `src/` - Code of the back end.
+- `src/` - Code of the back end.
-   `scripts/` - Various scripts that help with different parts of
+- `scripts/` - Various scripts that help with different parts of
-    development.
+  development.
-   `docker/` - Files required to build the docker image.
+- `docker/` - Files required to build the docker image.
 ## Contributing to Paperless-ngx
@@ -94,18 +94,17 @@ first-time setup.
    ```
 7.  You can now either ...
    - install Redis or
-    -   install Redis or
+    - use the included `scripts/start_services.sh` to use Docker to fire
      up a Redis instance (and some other services such as Tika,
      Gotenberg and a database server) or
-    -   use the included `scripts/start_services.sh` to use Docker to fire
+    - spin up a bare Redis container
        up a Redis instance (and some other services such as Tika,
        Gotenberg and a database server) or
-    -   spin up a bare Redis container
+      ```bash
-
+      docker run -d -p 6379:6379 --restart unless-stopped redis:latest
-        ```bash
+      ```
        docker run -d -p 6379:6379 --restart unless-stopped redis:latest
        ```
 8.  Continue with either back-end or front-end development – or both :-).
@@ -118,9 +117,9 @@ work well for development, but you can use whatever you want.
 Configure the IDE to use the `src/`-folder as the base source folder.
 Configure the following launch configurations in your IDE:
-   `uv run manage.py runserver`
+- `uv run manage.py runserver`
-   `uv run manage.py document_consumer`
+- `uv run manage.py document_consumer`
-   `uv run celery --app paperless worker -l DEBUG` (or any other log level)
+- `uv run celery --app paperless worker -l DEBUG` (or any other log level)
 To start them all:
@@ -146,11 +145,11 @@ pnpm ng build --configuration production
 ### Testing
-   Run `pytest` in the `src/` directory to execute all tests. This also
+- Run `pytest` in the `src/` directory to execute all tests. This also
-    generates a HTML coverage report. When running tests, `paperless.conf`
+  generates a HTML coverage report. When running tests, `paperless.conf`
-    is loaded as well. However, the tests rely on the default
+  is loaded as well. However, the tests rely on the default
-    configuration. This is not ideal. But for now, make sure no settings
+  configuration. This is not ideal. But for now, make sure no settings
-    except for DEBUG are overridden when testing.
+  except for DEBUG are overridden when testing.
 !!! note
@@ -254,14 +253,14 @@ these parts have to be translated separately.
 ### Front end localization
-   The AngularJS front end does localization according to the [Angular
+- The AngularJS front end does localization according to the [Angular
-    documentation](https://angular.io/guide/i18n).
+  documentation](https://angular.io/guide/i18n).
-   The source language of the project is "en_US".
+- The source language of the project is "en_US".
-   The source strings end up in the file `src-ui/messages.xlf`.
+- The source strings end up in the file `src-ui/messages.xlf`.
-   The translated strings need to be placed in the
+- The translated strings need to be placed in the
-    `src-ui/src/locale/` folder.
+  `src-ui/src/locale/` folder.
-   In order to extract added or changed strings from the source files,
+- In order to extract added or changed strings from the source files,
-    call `ng extract-i18n`.
+  call `ng extract-i18n`.
 Adding new languages requires adding the translated files in the
 `src-ui/src/locale/` folder and adjusting a couple files.
@@ -307,18 +306,18 @@ A majority of the strings that appear in the back end appear only when
 the admin is used. However, some of these are still shown on the front
 end (such as error messages).
-   The django application does localization according to the [Django
+- The django application does localization according to the [Django
-    documentation](https://docs.djangoproject.com/en/3.1/topics/i18n/translation/).
+  documentation](https://docs.djangoproject.com/en/3.1/topics/i18n/translation/).
-   The source language of the project is "en_US".
+- The source language of the project is "en_US".
-   Localization files end up in the folder `src/locale/`.
+- Localization files end up in the folder `src/locale/`.
-   In order to extract strings from the application, call
+- In order to extract strings from the application, call
-    `uv run manage.py makemessages -l en_US`. This is important after
+  `uv run manage.py makemessages -l en_US`. This is important after
-    making changes to translatable strings.
+  making changes to translatable strings.
-   The message files need to be compiled for them to show up in the
+- The message files need to be compiled for them to show up in the
-    application. Call `uv run manage.py compilemessages` to do this.
+  application. Call `uv run manage.py compilemessages` to do this.
-    The generated files don't get committed into git, since these are
+  The generated files don't get committed into git, since these are
-    derived artifacts. The build pipeline takes care of executing this
+  derived artifacts. The build pipeline takes care of executing this
-    command.
+  command.
 Adding new languages requires adding the translated files in the
 `src/locale/`-folder and adjusting the file
@@ -381,10 +380,10 @@ base code.
 Paperless-ngx uses parsers to add documents. A parser is
 responsible for:
-   Retrieving the content from the original
+- Retrieving the content from the original
-   Creating a thumbnail
+- Creating a thumbnail
-   _optional:_ Retrieving a created date from the original
+- _optional:_ Retrieving a created date from the original
-   _optional:_ Creating an archived document from the original
+- _optional:_ Creating an archived document from the original
 Custom parsers can be added to Paperless-ngx to support more file types. In
 order to do that, you need to write the parser itself and announce its
@@ -442,17 +441,17 @@ def myparser_consumer_declaration(sender, **kwargs):
    }
 ```
-   `parser` is a reference to a class that extends `DocumentParser`.
+- `parser` is a reference to a class that extends `DocumentParser`.
-   `weight` is used whenever two or more parsers are able to parse a
+- `weight` is used whenever two or more parsers are able to parse a
-    file: The parser with the higher weight wins. This can be used to
+  file: The parser with the higher weight wins. This can be used to
-    override the parsers provided by Paperless-ngx.
+  override the parsers provided by Paperless-ngx.
-   `mime_types` is a dictionary. The keys are the mime types your
+- `mime_types` is a dictionary. The keys are the mime types your
-    parser supports and the value is the default file extension that
+  parser supports and the value is the default file extension that
-    Paperless-ngx should use when storing files and serving them for
+  Paperless-ngx should use when storing files and serving them for
-    download. We could guess that from the file extensions, but some
+  download. We could guess that from the file extensions, but some
-    mime types have many extensions associated with them and the Python
+  mime types have many extensions associated with them and the Python
-    methods responsible for guessing the extension do not always return
+  methods responsible for guessing the extension do not always return
-    the same value.
+  the same value.
 ## Using Visual Studio Code devcontainer
@@ -471,9 +470,8 @@ To get started:
 2. VS Code will prompt you with "Reopen in container". Do so and wait for the environment to start.
 3. In case your host operating system is Windows:
-
+   - The Source Control view in Visual Studio Code might show: "The detected Git repository is potentially unsafe as the folder is owned by someone other than the current user." Use "Manage Unsafe Repositories" to fix this.
-    - The Source Control view in Visual Studio Code might show: "The detected Git repository is potentially unsafe as the folder is owned by someone other than the current user." Use "Manage Unsafe Repositories" to fix this.
+   - Git might have detecteded modifications for all files, because Windows is using CRLF line endings. Run `git checkout .` in the containers terminal to fix this issue.
    - Git might have detecteded modifications for all files, because Windows is using CRLF line endings. Run `git checkout .` in the containers terminal to fix this issue.
 4. Initialize the project by running the task **Project Setup: Run all Init Tasks**. This
   will initialize the database tables and create a superuser. Then you can compile the front end
@@ -538,12 +536,12 @@ class MyDateParserPlugin(DateParserPluginBase):
 Your parser instance is initialized with a `DateParserConfig` object accessible via `self.config`. This provides:
-   `languages: list[str]` - List of language codes for date parsing
+- `languages: list[str]` - List of language codes for date parsing
-   `timezone_str: str` - Timezone string for date localization
+- `timezone_str: str` - Timezone string for date localization
-   `ignore_dates: set[datetime.date]` - Dates that should be filtered out
+- `ignore_dates: set[datetime.date]` - Dates that should be filtered out
-   `reference_time: datetime.datetime` - Current time for filtering future dates
+- `reference_time: datetime.datetime` - Current time for filtering future dates
-   `filename_date_order: str | None` - Date order preference for filenames (e.g., "DMY", "MDY")
+- `filename_date_order: str | None` - Date order preference for filenames (e.g., "DMY", "MDY")
-   `content_date_order: str` - Date order preference for content
+- `content_date_order: str` - Date order preference for content
 The base class provides two helper methods you can use:
--- a/docs/faq.md
+++ b/docs/faq.md
@@ -44,28 +44,28 @@ system. On Linux, chances are high that this location is
 You can always drag those files out of that folder to use them
 elsewhere. Here are a couple notes about that.
-   Paperless-ngx never modifies your original documents. It keeps
+- Paperless-ngx never modifies your original documents. It keeps
-    checksums of all documents and uses a scheduled sanity checker to
+  checksums of all documents and uses a scheduled sanity checker to
-    check that they remain the same.
+  check that they remain the same.
-   By default, paperless uses the internal ID of each document as its
+- By default, paperless uses the internal ID of each document as its
-    filename. This might not be very convenient for export. However, you
+  filename. This might not be very convenient for export. However, you
-    can adjust the way files are stored in paperless by
+  can adjust the way files are stored in paperless by
-    [configuring the filename format](advanced_usage.md#file-name-handling).
+  [configuring the filename format](advanced_usage.md#file-name-handling).
-   [The exporter](administration.md#exporter) is
+- [The exporter](administration.md#exporter) is
-    another easy way to get your files out of paperless with reasonable
+  another easy way to get your files out of paperless with reasonable
-    file names.
+  file names.
 ## _What file types does paperless-ngx support?_
 **A:** Currently, the following files are supported:
-   PDF documents, PNG images, JPEG images, TIFF images, GIF images and
+- PDF documents, PNG images, JPEG images, TIFF images, GIF images and
-    WebP images are processed with OCR and converted into PDF documents.
+  WebP images are processed with OCR and converted into PDF documents.
-   Plain text documents are supported as well and are added verbatim to
+- Plain text documents are supported as well and are added verbatim to
-    paperless.
+  paperless.
-   With the optional Tika integration enabled (see [Tika configuration](https://docs.paperless-ngx.com/configuration#tika)),
+- With the optional Tika integration enabled (see [Tika configuration](https://docs.paperless-ngx.com/configuration#tika)),
-    Paperless also supports various Office documents (.docx, .doc, odt,
+  Paperless also supports various Office documents (.docx, .doc, odt,
-    .ppt, .pptx, .odp, .xls, .xlsx, .ods).
+  .ppt, .pptx, .odp, .xls, .xlsx, .ods).
 Paperless-ngx determines the type of a file by inspecting its content
 rather than its file extensions. However, files processed via the
--- a/docs/index.md
+++ b/docs/index.md
@@ -28,36 +28,36 @@ physical documents into a searchable online archive so you can keep, well, _less
 ## Features
-   **Organize and index** your scanned documents with tags, correspondents, types, and more.
+- **Organize and index** your scanned documents with tags, correspondents, types, and more.
-   _Your_ data is stored locally on _your_ server and is never transmitted or shared in any way, unless you explicitly choose to do so.
+- _Your_ data is stored locally on _your_ server and is never transmitted or shared in any way, unless you explicitly choose to do so.
-   Performs **OCR** on your documents, adding searchable and selectable text, even to documents scanned with only images.
+- Performs **OCR** on your documents, adding searchable and selectable text, even to documents scanned with only images.
-    -   Utilizes the open-source Tesseract engine to recognize more than 100 languages.
+  - Utilizes the open-source Tesseract engine to recognize more than 100 languages.
-    -   _New!_ Supports remote OCR with Azure AI (opt-in).
+  - _New!_ Supports remote OCR with Azure AI (opt-in).
-   Documents are saved as PDF/A format which is designed for long term storage, alongside the unaltered originals.
+- Documents are saved as PDF/A format which is designed for long term storage, alongside the unaltered originals.
-   Uses machine-learning to automatically add tags, correspondents and document types to your documents.
+- Uses machine-learning to automatically add tags, correspondents and document types to your documents.
-   **New**: Paperless-ngx can now leverage AI (Large Language Models or LLMs) for document suggestions. This is an optional feature that can be enabled (and is disabled by default).
+- **New**: Paperless-ngx can now leverage AI (Large Language Models or LLMs) for document suggestions. This is an optional feature that can be enabled (and is disabled by default).
-   Supports PDF documents, images, plain text files, Office documents (Word, Excel, PowerPoint, and LibreOffice equivalents)[^1] and more.
+- Supports PDF documents, images, plain text files, Office documents (Word, Excel, PowerPoint, and LibreOffice equivalents)[^1] and more.
-   Paperless stores your documents plain on disk. Filenames and folders are managed by paperless and their format can be configured freely with different configurations assigned to different documents.
+- Paperless stores your documents plain on disk. Filenames and folders are managed by paperless and their format can be configured freely with different configurations assigned to different documents.
-   **Beautiful, modern web application** that features:
+- **Beautiful, modern web application** that features:
-    -   Customizable dashboard with statistics.
+  - Customizable dashboard with statistics.
-    -   Filtering by tags, correspondents, types, and more.
+  - Filtering by tags, correspondents, types, and more.
-    -   Bulk editing of tags, correspondents, types and more.
+  - Bulk editing of tags, correspondents, types and more.
-    -   Drag-and-drop uploading of documents throughout the app.
+  - Drag-and-drop uploading of documents throughout the app.
-    -   Customizable views can be saved and displayed on the dashboard and / or sidebar.
+  - Customizable views can be saved and displayed on the dashboard and / or sidebar.
-    -   Support for custom fields of various data types.
+  - Support for custom fields of various data types.
-    -   Shareable public links with optional expiration.
+  - Shareable public links with optional expiration.
-   **Full text search** helps you find what you need:
+- **Full text search** helps you find what you need:
-    -   Auto completion suggests relevant words from your documents.
+  - Auto completion suggests relevant words from your documents.
-    -   Results are sorted by relevance to your search query.
+  - Results are sorted by relevance to your search query.
-    -   Highlighting shows you which parts of the document matched the query.
+  - Highlighting shows you which parts of the document matched the query.
-    -   Searching for similar documents ("More like this")
+  - Searching for similar documents ("More like this")
-   **Email processing**[^1]: import documents from your email accounts:
+- **Email processing**[^1]: import documents from your email accounts:
-    -   Configure multiple accounts and rules for each account.
+  - Configure multiple accounts and rules for each account.
-    -   After processing, paperless can perform actions on the messages such as marking as read, deleting and more.
+  - After processing, paperless can perform actions on the messages such as marking as read, deleting and more.
-   A built-in robust **multi-user permissions** system that supports 'global' permissions as well as per document or object.
+- A built-in robust **multi-user permissions** system that supports 'global' permissions as well as per document or object.
-   A powerful workflow system that gives you even more control.
+- A powerful workflow system that gives you even more control.
-   **Optimized** for multi core systems: Paperless-ngx consumes multiple documents in parallel.
+- **Optimized** for multi core systems: Paperless-ngx consumes multiple documents in parallel.
-   The integrated sanity checker makes sure that your document archive is in good health.
+- The integrated sanity checker makes sure that your document archive is in good health.
 [^1]: Office document and email consumption support is optional and provided by Apache Tika (see [configuration](https://docs.paperless-ngx.com/configuration/#tika))
--- a/docs/migration-v3.md
+++ b/docs/migration-v3.md
@@ -42,12 +42,12 @@ The `CONSUMER_BARCODE_SCANNER` setting has been removed. zxing-cpp is now the on
 ### Action Required
-   If you were already using `CONSUMER_BARCODE_SCANNER=ZXING`, simply remove the setting.
+- If you were already using `CONSUMER_BARCODE_SCANNER=ZXING`, simply remove the setting.
-   If you had `CONSUMER_BARCODE_SCANNER=PYZBAR` or were using the default, no functional changes are needed beyond
+- If you had `CONSUMER_BARCODE_SCANNER=PYZBAR` or were using the default, no functional changes are needed beyond
-    removing the setting. zxing-cpp supports all the same barcode formats and you should see improved detection
+  removing the setting. zxing-cpp supports all the same barcode formats and you should see improved detection
-    reliability.
+  reliability.
-   The `libzbar0` / `libzbar-dev` system packages are no longer required and can be removed from any custom Docker
+- The `libzbar0` / `libzbar-dev` system packages are no longer required and can be removed from any custom Docker
-    images or host installations.
+  images or host installations.
 ## Database Engine
@@ -103,3 +103,30 @@ Multiple options are combined in a single value:
 ```bash
 PAPERLESS_DB_OPTIONS="sslmode=require;sslrootcert=/certs/ca.pem;pool.max_size=10"
 ```
 ## OpenID Connect Token Endpoint Authentication
 Some existing OpenID Connect setups may require an explicit token endpoint authentication method after upgrading to v3.
 #### Action Required
 If OIDC login fails at the callback with an `invalid_client` error, add `token_auth_method` to the provider `settings` in
 [`PAPERLESS_SOCIALACCOUNT_PROVIDERS`](configuration.md#PAPERLESS_SOCIALACCOUNT_PROVIDERS).
 For example:
 ```json
 {
  "openid_connect": {
    "APPS": [
      {
        ...
        "settings": {
          "server_url": "https://login.example.com",
          "token_auth_method": "client_secret_basic"
        }
      }
    ]
  }
 }
 ```
--- a/docs/setup.md
+++ b/docs/setup.md
@@ -44,8 +44,8 @@ account. In short, it automates the [Docker Compose setup](#docker) described be
 #### Prerequisites
-   Docker and Docker Compose must be [installed](https://docs.docker.com/engine/install/){:target="\_blank"}.
+- Docker and Docker Compose must be [installed](https://docs.docker.com/engine/install/){:target="\_blank"}.
-   macOS users will need [GNU sed](https://formulae.brew.sh/formula/gnu-sed) with support for running as `sed` as well as [wget](https://formulae.brew.sh/formula/wget).
+- macOS users will need [GNU sed](https://formulae.brew.sh/formula/gnu-sed) with support for running as `sed` as well as [wget](https://formulae.brew.sh/formula/wget).
 #### Run the installation script
@@ -63,7 +63,7 @@ credentials you provided during the installation script.
 #### Prerequisites
-   Docker and Docker Compose must be [installed](https://docs.docker.com/engine/install/){:target="\_blank"}.
+- Docker and Docker Compose must be [installed](https://docs.docker.com/engine/install/){:target="\_blank"}.
 #### Installation
@@ -101,7 +101,7 @@ credentials you provided during the installation script.
    ```yaml
    ports:
-        - 8010:8000
+      - 8010:8000
    ```
 3.  Modify `docker-compose.env` with any configuration options you need.
@@ -140,24 +140,17 @@ a [superuser](usage.md#superusers) account.
 !!! warning
-    It is currently not possible to run the container rootless if additional languages are specified via `PAPERLESS_OCR_LANGUAGES`.
+    It is not possible to run the container rootless if additional languages are specified via `PAPERLESS_OCR_LANGUAGES`.
-If you want to run Paperless as a rootless container, make this
+If you want to run Paperless as a rootless container, set `user:` in `docker-compose.yml` to the UID and GID of your host user (use `id -u` and `id -g` to find these values). The container process starts directly as that user with no internal privilege remapping:
 change in `docker-compose.yml`:
-   Set the `user` running the container to map to the `paperless`
+```yaml
-    user in the container. This value (`user_id` below) should be
+webserver:
-    the same ID that `USERMAP_UID` and `USERMAP_GID` are set to in
+  image: ghcr.io/paperless-ngx/paperless-ngx:latest
-    `docker-compose.env`. See `USERMAP_UID` and `USERMAP_GID`
+  user: '1000:1000'
-    [here](configuration.md#docker).
+```
-Your entry for Paperless should contain something like:
+Do not combine this with `USERMAP_UID` or `USERMAP_GID`, which are intended for the non-rootless case described in step 3.
 > ```
 > webserver:
 >   image: ghcr.io/paperless-ngx/paperless-ngx:latest
 >   user: <user_id>
 > ```
 **File systems without inotify support (e.g. NFS)**
@@ -171,26 +164,25 @@ to enable polling and disable inotify. See [here](configuration.md#polling).
 #### Prerequisites
-   Paperless runs on Linux only, Windows is not supported.
+- Paperless runs on Linux only, Windows is not supported.
-   Python 3.11, 3.12, 3.13, or 3.14 is required. As a policy, Paperless-ngx aims to support at least the three most recent Python versions and drops support for versions as they reach end-of-life. Newer versions may work, but some dependencies may not be fully compatible.
+- Python 3.11, 3.12, 3.13, or 3.14 is required. As a policy, Paperless-ngx aims to support at least the three most recent Python versions and drops support for versions as they reach end-of-life. Newer versions may work, but some dependencies may not be fully compatible.
 #### Installation
 1.  Install dependencies. Paperless requires the following packages:
-
+    - `python3`
-    -   `python3`
+    - `python3-pip`
-    -   `python3-pip`
+    - `python3-dev`
-    -   `python3-dev`
+    - `default-libmysqlclient-dev` for MariaDB
-    -   `default-libmysqlclient-dev` for MariaDB
+    - `pkg-config` for mysqlclient (python dependency)
-    -   `pkg-config` for mysqlclient (python dependency)
+    - `fonts-liberation` for generating thumbnails for plain text
-    -   `fonts-liberation` for generating thumbnails for plain text
+      files
-        files
+    - `imagemagick` >= 6 for PDF conversion
-    -   `imagemagick` >= 6 for PDF conversion
+    - `gnupg` for handling encrypted documents
-    -   `gnupg` for handling encrypted documents
+    - `libpq-dev` for PostgreSQL
-    -   `libpq-dev` for PostgreSQL
+    - `libmagic-dev` for mime type detection
-    -   `libmagic-dev` for mime type detection
+    - `mariadb-client` for MariaDB compile time
-    -   `mariadb-client` for MariaDB compile time
+    - `poppler-utils` for barcode detection
    -   `poppler-utils` for barcode detection
    Use this list for your preferred package management:
@@ -200,18 +192,17 @@ to enable polling and disable inotify. See [here](configuration.md#polling).
    These dependencies are required for OCRmyPDF, which is used for text
    recognition.
-
+    - `unpaper`
-    -   `unpaper`
+    - `ghostscript`
-    -   `ghostscript`
+    - `icc-profiles-free`
-    -   `icc-profiles-free`
+    - `qpdf`
-    -   `qpdf`
+    - `liblept5`
-    -   `liblept5`
+    - `libxml2`
-    -   `libxml2`
+    - `pngquant` (suggested for certain PDF image optimizations)
-    -   `pngquant` (suggested for certain PDF image optimizations)
+    - `zlib1g`
-    -   `zlib1g`
+    - `tesseract-ocr` >= 4.0.0 for OCR
-    -   `tesseract-ocr` >= 4.0.0 for OCR
+    - `tesseract-ocr` language packs (`tesseract-ocr-eng`,
-    -   `tesseract-ocr` language packs (`tesseract-ocr-eng`,
+      `tesseract-ocr-deu`, etc)
        `tesseract-ocr-deu`, etc)
    Use this list for your preferred package management:
@@ -220,16 +211,14 @@ to enable polling and disable inotify. See [here](configuration.md#polling).
    ```
    On Raspberry Pi, these libraries are required as well:
-
+    - `libatlas-base-dev`
-    -   `libatlas-base-dev`
+    - `libxslt1-dev`
-    -   `libxslt1-dev`
+    - `mime-support`
    -   `mime-support`
    You will also need these for installing some of the python dependencies:
-
+    - `build-essential`
-    -   `build-essential`
+    - `python3-setuptools`
-    -   `python3-setuptools`
+    - `python3-wheel`
    -   `python3-wheel`
    Use this list for your preferred package management:
@@ -279,44 +268,41 @@ to enable polling and disable inotify. See [here](configuration.md#polling).
 6.  Configure Paperless-ngx. See [configuration](configuration.md) for details.
    Edit the included `paperless.conf` and adjust the settings to your
    needs. Required settings for getting Paperless-ngx running are:
-
+    - [`PAPERLESS_REDIS`](configuration.md#PAPERLESS_REDIS) should point to your Redis server, such as
-    -   [`PAPERLESS_REDIS`](configuration.md#PAPERLESS_REDIS) should point to your Redis server, such as
+      `redis://localhost:6379`.
-        `redis://localhost:6379`.
+    - [`PAPERLESS_DBENGINE`](configuration.md#PAPERLESS_DBENGINE) is optional, and should be one of `postgres`,
-    -   [`PAPERLESS_DBENGINE`](configuration.md#PAPERLESS_DBENGINE) is optional, and should be one of `postgres`,
+      `mariadb`, or `sqlite`
-        `mariadb`, or `sqlite`
+    - [`PAPERLESS_DBHOST`](configuration.md#PAPERLESS_DBHOST) should be the hostname on which your
-    -   [`PAPERLESS_DBHOST`](configuration.md#PAPERLESS_DBHOST) should be the hostname on which your
+      PostgreSQL server is running. Do not configure this to use
-        PostgreSQL server is running. Do not configure this to use
+      SQLite instead. Also configure port, database name, user and
-        SQLite instead. Also configure port, database name, user and
+      password as necessary.
-        password as necessary.
+    - [`PAPERLESS_CONSUMPTION_DIR`](configuration.md#PAPERLESS_CONSUMPTION_DIR) should point to the folder
-    -   [`PAPERLESS_CONSUMPTION_DIR`](configuration.md#PAPERLESS_CONSUMPTION_DIR) should point to the folder
+      that Paperless-ngx should watch for incoming documents.
-        that Paperless-ngx should watch for incoming documents.
+      Likewise, [`PAPERLESS_DATA_DIR`](configuration.md#PAPERLESS_DATA_DIR) and
-        Likewise, [`PAPERLESS_DATA_DIR`](configuration.md#PAPERLESS_DATA_DIR) and
+      [`PAPERLESS_MEDIA_ROOT`](configuration.md#PAPERLESS_MEDIA_ROOT) define where Paperless-ngx stores its data.
-        [`PAPERLESS_MEDIA_ROOT`](configuration.md#PAPERLESS_MEDIA_ROOT) define where Paperless-ngx stores its data.
+      If needed, these can point to the same directory.
-        If needed, these can point to the same directory.
+    - [`PAPERLESS_SECRET_KEY`](configuration.md#PAPERLESS_SECRET_KEY) should be a random sequence of
-    -   [`PAPERLESS_SECRET_KEY`](configuration.md#PAPERLESS_SECRET_KEY) should be a random sequence of
+      characters. It's used for authentication. Failure to do so
-        characters. It's used for authentication. Failure to do so
+      allows third parties to forge authentication credentials.
-        allows third parties to forge authentication credentials.
+    - Set [`PAPERLESS_URL`](configuration.md#PAPERLESS_URL) if you are behind a reverse proxy. This should
-    -   Set [`PAPERLESS_URL`](configuration.md#PAPERLESS_URL) if you are behind a reverse proxy. This should
+      point to your domain. Please see
-        point to your domain. Please see
+      [configuration](configuration.md) for more
-        [configuration](configuration.md) for more
+      information.
        information.
    You can make many more adjustments, especially for OCR.
    The following options are recommended for most users:
-
+    - Set [`PAPERLESS_OCR_LANGUAGE`](configuration.md#PAPERLESS_OCR_LANGUAGE) to the language most of your
-    -   Set [`PAPERLESS_OCR_LANGUAGE`](configuration.md#PAPERLESS_OCR_LANGUAGE) to the language most of your
+      documents are written in.
-        documents are written in.
+    - Set [`PAPERLESS_TIME_ZONE`](configuration.md#PAPERLESS_TIME_ZONE) to your local time zone.
    -   Set [`PAPERLESS_TIME_ZONE`](configuration.md#PAPERLESS_TIME_ZONE) to your local time zone.
    !!! warning
        Ensure your Redis instance [is secured](https://redis.io/docs/latest/operate/oss_and_stack/management/security/).
 7.  Create the following directories if they do not already exist:
-
+    - `/opt/paperless/media`
-    -   `/opt/paperless/media`
+    - `/opt/paperless/data`
-    -   `/opt/paperless/data`
+    - `/opt/paperless/consume`
    -   `/opt/paperless/consume`
    Adjust these paths if you configured different folders.
    Then verify that the `paperless` user has write permissions:
@@ -391,11 +377,10 @@ to enable polling and disable inotify. See [here](configuration.md#polling).
    starting point.
    Paperless needs:
-
+    - The `webserver` script to run the webserver.
-    -   The `webserver` script to run the webserver.
+    - The `consumer` script to watch the input folder.
-    -   The `consumer` script to watch the input folder.
+    - The `taskqueue` script for background workers (document consumption, etc.).
-    -   The `taskqueue` script for background workers (document consumption, etc.).
+    - The `scheduler` script for periodic tasks such as email checking.
    -   The `scheduler` script for periodic tasks such as email checking.
    !!! note
@@ -501,19 +486,19 @@ your setup depending on how you installed Paperless.
 This section describes how to update an existing Paperless Docker
 installation. Keep these points in mind:
-   Read the [changelog](changelog.md) and
+- Read the [changelog](changelog.md) and
-    take note of breaking changes.
+  take note of breaking changes.
-   Decide whether to stay on SQLite or migrate to PostgreSQL.
+- Decide whether to stay on SQLite or migrate to PostgreSQL.
-    Both work fine with Paperless-ngx.
+  Both work fine with Paperless-ngx.
-    However, if you already have a database server running
+  However, if you already have a database server running
-    for other services, you might as well use it for Paperless as well.
+  for other services, you might as well use it for Paperless as well.
-   The task scheduler of Paperless, which is used to execute periodic
+- The task scheduler of Paperless, which is used to execute periodic
-    tasks such as email checking and maintenance, requires a
+  tasks such as email checking and maintenance, requires a
-    [Redis](https://redis.io/) message broker instance. The
+  [Redis](https://redis.io/) message broker instance. The
-    Docker Compose route takes care of that.
+  Docker Compose route takes care of that.
-   The layout of the folder structure for your documents and data
+- The layout of the folder structure for your documents and data
-    remains the same, so you can plug your old Docker volumes into
+  remains the same, so you can plug your old Docker volumes into
-    paperless-ngx and expect it to find everything where it should be.
+  paperless-ngx and expect it to find everything where it should be.
 Migration to Paperless-ngx is then performed in a few simple steps:
@@ -598,7 +583,6 @@ commands as well.
 1.  Stop and remove the Paperless container.
 2.  If using an external database, stop that container.
 3.  Update Redis configuration.
    1. If `REDIS_URL` is already set, change it to [`PAPERLESS_REDIS`](configuration.md#PAPERLESS_REDIS)
       and continue to step 4.
@@ -610,22 +594,18 @@ commands as well.
       the new Redis container.
 4.  Update user mapping.
    1. If set, change the environment variable `PUID` to `USERMAP_UID`.
    1. If set, change the environment variable `PGID` to `USERMAP_GID`.
 5.  Update configuration paths.
    1. Set the environment variable [`PAPERLESS_DATA_DIR`](configuration.md#PAPERLESS_DATA_DIR) to `/config`.
 6.  Update media paths.
    1. Set the environment variable [`PAPERLESS_MEDIA_ROOT`](configuration.md#PAPERLESS_MEDIA_ROOT) to
       `/data/media`.
 7.  Update timezone.
    1. Set the environment variable [`PAPERLESS_TIME_ZONE`](configuration.md#PAPERLESS_TIME_ZONE) to the same
       value as `TZ`.
@@ -639,33 +619,33 @@ commands as well.
 Paperless runs on Raspberry Pi. Some tasks can be slow on lower-powered
 hardware, but a few settings can improve performance:
-   Stick with SQLite to save some resources. See [troubleshooting](troubleshooting.md#log-reports-creating-paperlesstask-failed)
+- Stick with SQLite to save some resources. See [troubleshooting](troubleshooting.md#log-reports-creating-paperlesstask-failed)
-    if you encounter issues with SQLite locking.
+  if you encounter issues with SQLite locking.
-   If you do not need the filesystem-based consumer, consider disabling it
+- If you do not need the filesystem-based consumer, consider disabling it
-    entirely by setting [`PAPERLESS_CONSUMER_DISABLE`](configuration.md#PAPERLESS_CONSUMER_DISABLE) to `true`.
+  entirely by setting [`PAPERLESS_CONSUMER_DISABLE`](configuration.md#PAPERLESS_CONSUMER_DISABLE) to `true`.
-   Consider setting [`PAPERLESS_OCR_PAGES`](configuration.md#PAPERLESS_OCR_PAGES) to 1, so that Paperless
+- Consider setting [`PAPERLESS_OCR_PAGES`](configuration.md#PAPERLESS_OCR_PAGES) to 1, so that Paperless
-    OCRs only the first page of your documents. In most cases, this page
+  OCRs only the first page of your documents. In most cases, this page
-    contains enough information to be able to find it.
+  contains enough information to be able to find it.
-   [`PAPERLESS_TASK_WORKERS`](configuration.md#PAPERLESS_TASK_WORKERS) and [`PAPERLESS_THREADS_PER_WORKER`](configuration.md#PAPERLESS_THREADS_PER_WORKER) are
+- [`PAPERLESS_TASK_WORKERS`](configuration.md#PAPERLESS_TASK_WORKERS) and [`PAPERLESS_THREADS_PER_WORKER`](configuration.md#PAPERLESS_THREADS_PER_WORKER) are
-    configured to use all cores. The Raspberry Pi models 3 and up have 4
+  configured to use all cores. The Raspberry Pi models 3 and up have 4
-    cores, meaning that Paperless will use 2 workers and 2 threads per
+  cores, meaning that Paperless will use 2 workers and 2 threads per
-    worker. This may result in sluggish response times during
+  worker. This may result in sluggish response times during
-    consumption, so you might want to lower these settings (example: 2
+  consumption, so you might want to lower these settings (example: 2
-    workers and 1 thread to always have some computing power left for
+  workers and 1 thread to always have some computing power left for
-    other tasks).
+  other tasks).
-   Keep [`PAPERLESS_OCR_MODE`](configuration.md#PAPERLESS_OCR_MODE) at its default value `skip` and consider
+- Keep [`PAPERLESS_OCR_MODE`](configuration.md#PAPERLESS_OCR_MODE) at its default value `skip` and consider
-    OCRing your documents before feeding them into Paperless. Some
+  OCRing your documents before feeding them into Paperless. Some
-    scanners are able to do this!
+  scanners are able to do this!
-   Set [`PAPERLESS_OCR_SKIP_ARCHIVE_FILE`](configuration.md#PAPERLESS_OCR_SKIP_ARCHIVE_FILE) to `with_text` to skip archive
+- Set [`PAPERLESS_OCR_SKIP_ARCHIVE_FILE`](configuration.md#PAPERLESS_OCR_SKIP_ARCHIVE_FILE) to `with_text` to skip archive
-    file generation for already OCRed documents, or `always` to skip it
+  file generation for already OCRed documents, or `always` to skip it
-    for all documents.
+  for all documents.
-   If you want to perform OCR on the device, consider using
+- If you want to perform OCR on the device, consider using
-    `PAPERLESS_OCR_CLEAN=none`. This will speed up OCR times and use
+  `PAPERLESS_OCR_CLEAN=none`. This will speed up OCR times and use
-    less memory at the expense of slightly worse OCR results.
+  less memory at the expense of slightly worse OCR results.
-   If using Docker, consider setting [`PAPERLESS_WEBSERVER_WORKERS`](configuration.md#PAPERLESS_WEBSERVER_WORKERS) to 1. This will save some memory.
+- If using Docker, consider setting [`PAPERLESS_WEBSERVER_WORKERS`](configuration.md#PAPERLESS_WEBSERVER_WORKERS) to 1. This will save some memory.
-   Consider setting [`PAPERLESS_ENABLE_NLTK`](configuration.md#PAPERLESS_ENABLE_NLTK) to false, to disable the
+- Consider setting [`PAPERLESS_ENABLE_NLTK`](configuration.md#PAPERLESS_ENABLE_NLTK) to false, to disable the
-    more advanced language processing, which can take more memory and
+  more advanced language processing, which can take more memory and
-    processing time.
+  processing time.
 For details, refer to [configuration](configuration.md).
--- a/docs/troubleshooting.md
+++ b/docs/troubleshooting.md
@@ -4,27 +4,27 @@
 Check for the following issues:
-   Ensure that the directory you're putting your documents in is the
+- Ensure that the directory you're putting your documents in is the
-    folder paperless is watching. With docker, this setting is performed
+  folder paperless is watching. With docker, this setting is performed
-    in the `docker-compose.yml` file. Without Docker, look at the
+  in the `docker-compose.yml` file. Without Docker, look at the
-    `CONSUMPTION_DIR` setting. Don't adjust this setting if you're
+  `CONSUMPTION_DIR` setting. Don't adjust this setting if you're
-    using docker.
+  using docker.
-   Ensure that redis is up and running. Paperless does its task
+- Ensure that redis is up and running. Paperless does its task
-    processing asynchronously, and for documents to arrive at the task
+  processing asynchronously, and for documents to arrive at the task
-    processor, it needs redis to run.
+  processor, it needs redis to run.
-   Ensure that the task processor is running. Docker does this
+- Ensure that the task processor is running. Docker does this
-    automatically. Manually invoke the task processor by executing
+  automatically. Manually invoke the task processor by executing
-    ```shell-session
+  ```shell-session
-    celery --app paperless worker
+  celery --app paperless worker
-    ```
+  ```
-   Look at the output of paperless and inspect it for any errors.
+- Look at the output of paperless and inspect it for any errors.
-   Go to the admin interface, and check if there are failed tasks. If
+- Go to the admin interface, and check if there are failed tasks. If
-    so, the tasks will contain an error message.
+  so, the tasks will contain an error message.
 ## Consumer warns `OCR for XX failed`
@@ -78,12 +78,12 @@ Ensure that `chown` is possible on these directories.
 This indicates that the Auto matching algorithm found no documents to
 learn from. This may have two reasons:
-   You don't use the Auto matching algorithm: The error can be safely
+- You don't use the Auto matching algorithm: The error can be safely
-    ignored in this case.
+  ignored in this case.
-   You are using the Auto matching algorithm: The classifier explicitly
+- You are using the Auto matching algorithm: The classifier explicitly
-    excludes documents with Inbox tags. Verify that there are documents
+  excludes documents with Inbox tags. Verify that there are documents
-    in your archive without inbox tags. The algorithm will only learn
+  in your archive without inbox tags. The algorithm will only learn
-    from documents not in your inbox.
+  from documents not in your inbox.
 ## UserWarning in sklearn on every single document
@@ -127,10 +127,10 @@ change in the `docker-compose.yml` file:
 # The gotenberg chromium route is used to convert .eml files. We do not
 # want to allow external content like tracking pixels or even javascript.
 command:
-    - 'gotenberg'
+  - 'gotenberg'
-    - '--chromium-disable-javascript=true'
+  - '--chromium-disable-javascript=true'
-    - '--chromium-allow-list=file:///tmp/.*'
+  - '--chromium-allow-list=file:///tmp/.*'
-    - '--api-timeout=60s'
+  - '--api-timeout=60s'
 ```
 ## Permission denied errors in the consumption directory
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -14,42 +14,42 @@ for finding and managing your documents.
 Paperless essentially consists of two different parts for managing your
 documents:
-   The _consumer_ watches a specified folder and adds all documents in
+- The _consumer_ watches a specified folder and adds all documents in
-    that folder to paperless.
+  that folder to paperless.
-   The _web server_ (web UI) provides a UI that you use to manage and
+- The _web server_ (web UI) provides a UI that you use to manage and
-    search documents.
+  search documents.
 Each document has data fields that you can assign to them:
-   A _Document_ is a piece of paper that sometimes contains valuable
+- A _Document_ is a piece of paper that sometimes contains valuable
-    information.
+  information.
-   The _correspondent_ of a document is the person, institution or
+- The _correspondent_ of a document is the person, institution or
-    company that a document either originates from, or is sent to.
+  company that a document either originates from, or is sent to.
-   A _tag_ is a label that you can assign to documents. Think of labels
+- A _tag_ is a label that you can assign to documents. Think of labels
-    as more powerful folders: Multiple documents can be grouped together
+  as more powerful folders: Multiple documents can be grouped together
-    with a single tag, however, a single document can also have multiple
+  with a single tag, however, a single document can also have multiple
-    tags. This is not possible with folders. The reason folders are not
+  tags. This is not possible with folders. The reason folders are not
-    implemented in paperless is simply that tags are much more versatile
+  implemented in paperless is simply that tags are much more versatile
-    than folders.
+  than folders.
-   A _document type_ is used to demarcate the type of a document such
+- A _document type_ is used to demarcate the type of a document such
-    as letter, bank statement, invoice, contract, etc. It is used to
+  as letter, bank statement, invoice, contract, etc. It is used to
-    identify what a document is about.
+  identify what a document is about.
-   The document _storage path_ is the location where the document files
+- The document _storage path_ is the location where the document files
-    are stored. See [Storage Paths](advanced_usage.md#storage-paths) for
+  are stored. See [Storage Paths](advanced_usage.md#storage-paths) for
-    more information.
+  more information.
-   The _date added_ of a document is the date the document was scanned
+- The _date added_ of a document is the date the document was scanned
-    into paperless. You cannot and should not change this date.
+  into paperless. You cannot and should not change this date.
-   The _date created_ of a document is the date the document was
+- The _date created_ of a document is the date the document was
-    initially issued. This can be the date you bought a product, the
+  initially issued. This can be the date you bought a product, the
-    date you signed a contract, or the date a letter was sent to you.
+  date you signed a contract, or the date a letter was sent to you.
-   The _archive serial number_ (short: ASN) of a document is the
+- The _archive serial number_ (short: ASN) of a document is the
-    identifier of the document in your physical document binders. See
+  identifier of the document in your physical document binders. See
-    [recommended workflow](#usage-recommended-workflow) below.
+  [recommended workflow](#usage-recommended-workflow) below.
-   The _content_ of a document is the text that was OCR'ed from the
+- The _content_ of a document is the text that was OCR'ed from the
-    document. This text is fed into the search engine and is used for
+  document. This text is fed into the search engine and is used for
-    matching tags, correspondents and document types.
+  matching tags, correspondents and document types.
-   Paperless-ngx also supports _custom fields_ which can be used to
+- Paperless-ngx also supports _custom fields_ which can be used to
-    store additional metadata about a document.
+  store additional metadata about a document.
 ## The Web UI
@@ -93,12 +93,12 @@ download the document or share it via a share link.
 Think of versions as **file history** for a document.
-   Versions track the underlying file and extracted text content (OCR/text).
+- Versions track the underlying file and extracted text content (OCR/text).
-   Metadata such as tags, correspondent, document type, storage path and custom fields stay on the "root" document.
+- Metadata such as tags, correspondent, document type, storage path and custom fields stay on the "root" document.
-   Version files follow normal filename formatting (including storage paths) and add a `_vN` suffix (for example `_v1`, `_v2`).
+- Version files follow normal filename formatting (including storage paths) and add a `_vN` suffix (for example `_v1`, `_v2`).
-   By default, search and document content use the latest version.
+- By default, search and document content use the latest version.
-   In document detail, selecting a version switches the preview, file metadata and content (and download etc buttons) to that version.
+- In document detail, selecting a version switches the preview, file metadata and content (and download etc buttons) to that version.
-   Deleting a non-root version keeps metadata and falls back to the latest remaining version.
+- Deleting a non-root version keeps metadata and falls back to the latest remaining version.
 ### Management Lists
@@ -218,21 +218,20 @@ patterns can include wildcards and multiple patterns separated by a comma.
 The actions all ensure that the same mail is not consumed twice by
 different means. These are as follows:
-   **Delete:** Immediately deletes mail that paperless has consumed
+- **Delete:** Immediately deletes mail that paperless has consumed
-    documents from. Use with caution.
+  documents from. Use with caution.
-   **Mark as read:** Mark consumed mail as read. Paperless will not
+- **Mark as read:** Mark consumed mail as read. Paperless will not
-    consume documents from already read mails. If you read a mail before
+  consume documents from already read mails. If you read a mail before
-    paperless sees it, it will be ignored.
+  paperless sees it, it will be ignored.
-   **Flag:** Sets the 'important' flag on mails with consumed
+- **Flag:** Sets the 'important' flag on mails with consumed
-    documents. Paperless will not consume flagged mails.
+  documents. Paperless will not consume flagged mails.
-   **Move to folder:** Moves consumed mails out of the way so that
+- **Move to folder:** Moves consumed mails out of the way so that
-    paperless won't consume them again.
+  paperless won't consume them again.
-   **Add custom Tag:** Adds a custom tag to mails with consumed
+- **Add custom Tag:** Adds a custom tag to mails with consumed
-    documents (the IMAP standard calls these "keywords"). Paperless
+  documents (the IMAP standard calls these "keywords"). Paperless
-    will not consume mails already tagged. Not all mail servers support
+  will not consume mails already tagged. Not all mail servers support
-    this feature!
+  this feature!
-
+  - **Apple Mail support:** Apple Mail clients allow differently colored tags. For this to work use `apple:<color>` (e.g. _apple:green_) as a custom tag. Available colors are _red_, _orange_, _yellow_, _blue_, _green_, _violet_ and _grey_.
    -   **Apple Mail support:** Apple Mail clients allow differently colored tags. For this to work use `apple:<color>` (e.g. _apple:green_) as a custom tag. Available colors are _red_, _orange_, _yellow_, _blue_, _green_, _violet_ and _grey_.
 !!! warning
@@ -325,12 +324,12 @@ or using [email](#workflow-action-email) or [webhook](#workflow-action-webhook)
 "Share links" are public links to files (or an archive of files) and can be created and managed under the 'Send' button on the document detail screen or from the bulk editor.
-   Share links do not require a user to login and thus link directly to a file or bundled download.
+- Share links do not require a user to login and thus link directly to a file or bundled download.
-   Links are unique and are of the form `{paperless-url}/share/{randomly-generated-slug}`.
+- Links are unique and are of the form `{paperless-url}/share/{randomly-generated-slug}`.
-   Links can optionally have an expiration time set.
+- Links can optionally have an expiration time set.
-   After a link expires or is deleted users will be redirected to the regular paperless-ngx login.
+- After a link expires or is deleted users will be redirected to the regular paperless-ngx login.
-   From the document detail screen you can create a share link for that single document.
+- From the document detail screen you can create a share link for that single document.
-   From the bulk editor you can create a **share link bundle** for any selection. Paperless-ngx prepares a ZIP archive in the background and exposes a single share link. You can revisit the "Manage share link bundles" dialog to monitor progress, retry failed bundles, or delete links.
+- From the bulk editor you can create a **share link bundle** for any selection. Paperless-ngx prepares a ZIP archive in the background and exposes a single share link. You can revisit the "Manage share link bundles" dialog to monitor progress, retry failed bundles, or delete links.
 !!! tip
@@ -514,25 +513,25 @@ flowchart TD
 Workflows allow you to filter by:
-   Source, e.g. documents uploaded via consume folder, API (& the web UI) and mail fetch
+- Source, e.g. documents uploaded via consume folder, API (& the web UI) and mail fetch
-   File name, including wildcards e.g. \*.pdf will apply to all pdfs.
+- File name, including wildcards e.g. \*.pdf will apply to all pdfs.
-   File path, including wildcards. Note that enabling `PAPERLESS_CONSUMER_RECURSIVE` would allow, for
+- File path, including wildcards. Note that enabling `PAPERLESS_CONSUMER_RECURSIVE` would allow, for
-    example, automatically assigning documents to different owners based on the upload directory.
+  example, automatically assigning documents to different owners based on the upload directory.
-   Mail rule. Choosing this option will force 'mail fetch' to be the workflow source.
+- Mail rule. Choosing this option will force 'mail fetch' to be the workflow source.
-   Content matching (`Added`, `Updated` and `Scheduled` triggers only). Filter document content using the matching settings.
+- Content matching (`Added`, `Updated` and `Scheduled` triggers only). Filter document content using the matching settings.
 There are also 'advanced' filters available for `Added`, `Updated` and `Scheduled` triggers:
-   Any Tags: Filter for documents with any of the specified tags.
+- Any Tags: Filter for documents with any of the specified tags.
-   All Tags: Filter for documents with all of the specified tags.
+- All Tags: Filter for documents with all of the specified tags.
-   No Tags: Filter for documents with none of the specified tags.
+- No Tags: Filter for documents with none of the specified tags.
-   Document type: Filter documents with this document type.
+- Document type: Filter documents with this document type.
-   Not Document types: Filter documents without any of these document types.
+- Not Document types: Filter documents without any of these document types.
-   Correspondent: Filter documents with this correspondent.
+- Correspondent: Filter documents with this correspondent.
-   Not Correspondents: Filter documents without any of these correspondents.
+- Not Correspondents: Filter documents without any of these correspondents.
-   Storage path: Filter documents with this storage path.
+- Storage path: Filter documents with this storage path.
-   Not Storage paths: Filter documents without any of these storage paths.
+- Not Storage paths: Filter documents without any of these storage paths.
-   Custom field query: Filter documents with a custom field query (the same as used for the document list filters).
+- Custom field query: Filter documents with a custom field query (the same as used for the document list filters).
 ### Workflow Actions
@@ -544,37 +543,37 @@ The following workflow action types are available:
 "Assignment" actions can assign:
-   Title, see [workflow placeholders](usage.md#workflow-placeholders) below
+- Title, see [workflow placeholders](usage.md#workflow-placeholders) below
-   Tags, correspondent, document type and storage path
+- Tags, correspondent, document type and storage path
-   Document owner
+- Document owner
-   View and / or edit permissions to users or groups
+- View and / or edit permissions to users or groups
-   Custom fields. Note that no value for the field will be set
+- Custom fields. Note that no value for the field will be set
 ##### Removal {#workflow-action-removal}
 "Removal" actions can remove either all of or specific sets of the following:
-   Tags, correspondents, document types or storage paths
+- Tags, correspondents, document types or storage paths
-   Document owner
+- Document owner
-   View and / or edit permissions
+- View and / or edit permissions
-   Custom fields
+- Custom fields
 ##### Email {#workflow-action-email}
 "Email" actions can send documents via email. This action requires a mail server to be [configured](configuration.md#email-sending). You can specify:
-   The recipient email address(es) separated by commas
+- The recipient email address(es) separated by commas
-   The subject and body of the email, which can include placeholders, see [placeholders](usage.md#workflow-placeholders) below
+- The subject and body of the email, which can include placeholders, see [placeholders](usage.md#workflow-placeholders) below
-   Whether to include the document as an attachment
+- Whether to include the document as an attachment
 ##### Webhook {#workflow-action-webhook}
 "Webhook" actions send a POST request to a specified URL. You can specify:
-   The URL to send the request to
+- The URL to send the request to
-   The request body as text or as key-value pairs, which can include placeholders, see [placeholders](usage.md#workflow-placeholders) below.
+- The request body as text or as key-value pairs, which can include placeholders, see [placeholders](usage.md#workflow-placeholders) below.
-   Encoding for the request body, either JSON or form data
+- Encoding for the request body, either JSON or form data
-   The request headers as key-value pairs
+- The request headers as key-value pairs
 For security reasons, webhooks can be limited to specific ports and disallowed from connecting to local URLs. See the relevant
 [configuration settings](configuration.md#workflow-webhooks) to change this behavior. If you are allowing non-admins to create workflows,
@@ -605,33 +604,33 @@ The available inputs differ depending on the type of workflow trigger.
 This is because at the time of consumption (when the text is to be set), no automatic tags etc. have been
 applied. You can use the following placeholders in the template with any trigger type:
-   `{{correspondent}}`: assigned correspondent name
+- `{{correspondent}}`: assigned correspondent name
-   `{{document_type}}`: assigned document type name
+- `{{document_type}}`: assigned document type name
-   `{{owner_username}}`: assigned owner username
+- `{{owner_username}}`: assigned owner username
-   `{{added}}`: added datetime
+- `{{added}}`: added datetime
-   `{{added_year}}`: added year
+- `{{added_year}}`: added year
-   `{{added_year_short}}`: added year
+- `{{added_year_short}}`: added year
-   `{{added_month}}`: added month
+- `{{added_month}}`: added month
-   `{{added_month_name}}`: added month name
+- `{{added_month_name}}`: added month name
-   `{{added_month_name_short}}`: added month short name
+- `{{added_month_name_short}}`: added month short name
-   `{{added_day}}`: added day
+- `{{added_day}}`: added day
-   `{{added_time}}`: added time in HH:MM format
+- `{{added_time}}`: added time in HH:MM format
-   `{{original_filename}}`: original file name without extension
+- `{{original_filename}}`: original file name without extension
-   `{{filename}}`: current file name without extension (for "added" workflows this may not be final yet, you can use `{{original_filename}}`)
+- `{{filename}}`: current file name without extension (for "added" workflows this may not be final yet, you can use `{{original_filename}}`)
-   `{{doc_title}}`: current document title (cannot be used in title assignment)
+- `{{doc_title}}`: current document title (cannot be used in title assignment)
 The following placeholders are only available for "added" or "updated" triggers
-   `{{created}}`: created datetime
+- `{{created}}`: created datetime
-   `{{created_year}}`: created year
+- `{{created_year}}`: created year
-   `{{created_year_short}}`: created year
+- `{{created_year_short}}`: created year
-   `{{created_month}}`: created month
+- `{{created_month}}`: created month
-   `{{created_month_name}}`: created month name
+- `{{created_month_name}}`: created month name
-   `{{created_month_name_short}}`: created month short name
+- `{{created_month_name_short}}`: created month short name
-   `{{created_day}}`: created day
+- `{{created_day}}`: created day
-   `{{created_time}}`: created time in HH:MM format
+- `{{created_time}}`: created time in HH:MM format
-   `{{doc_url}}`: URL to the document in the web UI. Requires the `PAPERLESS_URL` setting to be set.
+- `{{doc_url}}`: URL to the document in the web UI. Requires the `PAPERLESS_URL` setting to be set.
-   `{{doc_id}}`: Document ID
+- `{{doc_id}}`: Document ID
 ##### Examples
@@ -676,26 +675,26 @@ Multiple fields may be attached to a document but the same field name cannot be
 The following custom field types are supported:
-   `Text`: any text
+- `Text`: any text
-   `Boolean`: true / false (check / unchecked) field
+- `Boolean`: true / false (check / unchecked) field
-   `Date`: date
+- `Date`: date
-   `URL`: a valid url
+- `URL`: a valid url
-   `Integer`: integer number e.g. 12
+- `Integer`: integer number e.g. 12
-   `Number`: float number e.g. 12.3456
+- `Number`: float number e.g. 12.3456
-   `Monetary`: [ISO 4217 currency code](https://en.wikipedia.org/wiki/ISO_4217#List_of_ISO_4217_currency_codes) and a number with exactly two decimals, e.g. USD12.30
+- `Monetary`: [ISO 4217 currency code](https://en.wikipedia.org/wiki/ISO_4217#List_of_ISO_4217_currency_codes) and a number with exactly two decimals, e.g. USD12.30
-   `Document Link`: reference(s) to other document(s) displayed as links, automatically creates a symmetrical link in reverse
+- `Document Link`: reference(s) to other document(s) displayed as links, automatically creates a symmetrical link in reverse
-   `Select`: a pre-defined list of strings from which the user can choose
+- `Select`: a pre-defined list of strings from which the user can choose
 ## PDF Actions
 Paperless-ngx supports basic editing operations for PDFs (these operations currently cannot be performed on non-PDF files). When viewing an individual document you can
 open the 'PDF Editor' to use a simple UI for re-arranging, rotating, deleting pages and splitting documents.
-   Merging documents: available when selecting multiple documents for 'bulk editing'.
+- Merging documents: available when selecting multiple documents for 'bulk editing'.
-   Rotating documents: available when selecting multiple documents for 'bulk editing' and via the pdf editor on an individual document's details page.
+- Rotating documents: available when selecting multiple documents for 'bulk editing' and via the pdf editor on an individual document's details page.
-   Splitting documents: via the pdf editor on an individual document's details page.
+- Splitting documents: via the pdf editor on an individual document's details page.
-   Deleting pages: via the pdf editor on an individual document's details page.
+- Deleting pages: via the pdf editor on an individual document's details page.
-   Re-arranging pages: via the pdf editor on an individual document's details page.
+- Re-arranging pages: via the pdf editor on an individual document's details page.
 !!! important
@@ -773,18 +772,18 @@ the system.
 Here are a couple examples of tags and types that you could use in your
 collection.
-   An `inbox` tag for newly added documents that you haven't manually
+- An `inbox` tag for newly added documents that you haven't manually
-    edited yet.
+  edited yet.
-   A tag `car` for everything car related (repairs, registration,
+- A tag `car` for everything car related (repairs, registration,
-    insurance, etc)
+  insurance, etc)
-   A tag `todo` for documents that you still need to do something with,
+- A tag `todo` for documents that you still need to do something with,
-    such as reply, or perform some task online.
+  such as reply, or perform some task online.
-   A tag `bank account x` for all bank statement related to that
+- A tag `bank account x` for all bank statement related to that
-    account.
+  account.
-   A tag `mail` for anything that you added to paperless via its mail
+- A tag `mail` for anything that you added to paperless via its mail
-    processing capabilities.
+  processing capabilities.
-   A tag `missing_metadata` when you still need to add some metadata to
+- A tag `missing_metadata` when you still need to add some metadata to
-    a document, but can't or don't want to do this right now.
+  a document, but can't or don't want to do this right now.
 ## Searching {#basic-usage_searching}
@@ -873,8 +872,8 @@ The following diagram shows how easy it is to manage your documents.
 ### Preparations in paperless
-   Create an inbox tag that gets assigned to all new documents.
+- Create an inbox tag that gets assigned to all new documents.
-   Create a TODO tag.
+- Create a TODO tag.
 ### Processing of the physical documents
@@ -948,15 +947,15 @@ Some documents require attention and require you to act on the document.
 You may take two different approaches to handle these documents based on
 how regularly you intend to scan documents and use paperless.
-   If you scan and process your documents in paperless regularly,
+- If you scan and process your documents in paperless regularly,
-    assign a TODO tag to all scanned documents that you need to process.
+  assign a TODO tag to all scanned documents that you need to process.
-    Create a saved view on the dashboard that shows all documents with
+  Create a saved view on the dashboard that shows all documents with
-    this tag.
+  this tag.
-   If you do not scan documents regularly and use paperless solely for
+- If you do not scan documents regularly and use paperless solely for
-    archiving, create a physical todo box next to your physical inbox
+  archiving, create a physical todo box next to your physical inbox
-    and put documents you need to process in the TODO box. When you
+  and put documents you need to process in the TODO box. When you
-    performed the task associated with the document, move it to the
+  performed the task associated with the document, move it to the
-    inbox.
+  inbox.
 ## Remote OCR
@@ -977,64 +976,63 @@ or page limitations (e.g. with a free tier).
 Paperless-ngx consists of the following components:
-   **The webserver:** This serves the administration pages, the API,
+- **The webserver:** This serves the administration pages, the API,
-    and the new frontend. This is the main tool you'll be using to interact
+  and the new frontend. This is the main tool you'll be using to interact
-    with paperless. You may start the webserver directly with
+  with paperless. You may start the webserver directly with
-    ```shell-session
+  ```shell-session
-    cd /path/to/paperless/src/
+  cd /path/to/paperless/src/
-    granian --interface asginl --ws "paperless.asgi:application"
+  granian --interface asginl --ws "paperless.asgi:application"
-    ```
+  ```
-    or by any other means such as Apache `mod_wsgi`.
+  or by any other means such as Apache `mod_wsgi`.
-   **The consumer:** This is what watches your consumption folder for
+- **The consumer:** This is what watches your consumption folder for
-    documents. However, the consumer itself does not really consume your
+  documents. However, the consumer itself does not really consume your
-    documents. Now it notifies a task processor that a new file is ready
+  documents. Now it notifies a task processor that a new file is ready
-    for consumption. I suppose it should be named differently. This was
+  for consumption. I suppose it should be named differently. This was
-    also used to check your emails, but that's now done elsewhere as
+  also used to check your emails, but that's now done elsewhere as
-    well.
+  well.
-    Start the consumer with the management command `document_consumer`:
+  Start the consumer with the management command `document_consumer`:
-    ```shell-session
+  ```shell-session
-    cd /path/to/paperless/src/
+  cd /path/to/paperless/src/
-    python3 manage.py document_consumer
+  python3 manage.py document_consumer
-    ```
+  ```
-   **The task processor:** Paperless relies on [Celery - Distributed
+- **The task processor:** Paperless relies on [Celery - Distributed
-    Task Queue](https://docs.celeryq.dev/en/stable/index.html) for doing
+  Task Queue](https://docs.celeryq.dev/en/stable/index.html) for doing
-    most of the heavy lifting. This is a task queue that accepts tasks
+  most of the heavy lifting. This is a task queue that accepts tasks
-    from multiple sources and processes these in parallel. It also comes
+  from multiple sources and processes these in parallel. It also comes
-    with a scheduler that executes certain commands periodically.
+  with a scheduler that executes certain commands periodically.
-    This task processor is responsible for:
+  This task processor is responsible for:
  - Consuming documents. When the consumer finds new documents, it
    notifies the task processor to start a consumption task.
  - The task processor also performs the consumption of any
    documents you upload through the web interface.
  - Consuming emails. It periodically checks your configured
    accounts for new emails and notifies the task processor to
    consume the attachment of an email.
  - Maintaining the search index and the automatic matching
    algorithm. These are things that paperless needs to do from time
    to time in order to operate properly.
-    -   Consuming documents. When the consumer finds new documents, it
+  This allows paperless to process multiple documents from your
-        notifies the task processor to start a consumption task.
+  consumption folder in parallel! On a modern multi core system, this
-    -   The task processor also performs the consumption of any
+  makes the consumption process with full OCR blazingly fast.
        documents you upload through the web interface.
    -   Consuming emails. It periodically checks your configured
        accounts for new emails and notifies the task processor to
        consume the attachment of an email.
    -   Maintaining the search index and the automatic matching
        algorithm. These are things that paperless needs to do from time
        to time in order to operate properly.
-    This allows paperless to process multiple documents from your
+  The task processor comes with a built-in admin interface that you
-    consumption folder in parallel! On a modern multi core system, this
+  can use to check whenever any of the tasks fail and inspect the
-    makes the consumption process with full OCR blazingly fast.
+  errors (i.e., wrong email credentials, errors during consuming a
  specific file, etc).
-    The task processor comes with a built-in admin interface that you
+- A [redis](https://redis.io/) message broker: This is a really
-    can use to check whenever any of the tasks fail and inspect the
+  lightweight service that is responsible for getting the tasks from
-    errors (i.e., wrong email credentials, errors during consuming a
+  the webserver and the consumer to the task scheduler. These run in a
-    specific file, etc).
+  different process (maybe even on different machines!), and
  therefore, this is necessary.
-   A [redis](https://redis.io/) message broker: This is a really
+- Optional: A database server. Paperless supports PostgreSQL, MariaDB
-    lightweight service that is responsible for getting the tasks from
+  and SQLite for storing its data.
    the webserver and the consumer to the task scheduler. These run in a
    different process (maybe even on different machines!), and
    therefore, this is necessary.
 -   Optional: A database server. Paperless supports PostgreSQL, MariaDB
    and SQLite for storing its data.
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "paperless-ngx"
-version = "2.20.10"
+version = "2.20.13"
 description = "A community-supported supercharged document management system: scan, index and archive all your physical documents"
 readme = "README.md"
 requires-python = ">=3.11"
@@ -26,7 +26,7 @@ dependencies = [
  # WARNING: django does not use semver.
  #          Only patch versions are guaranteed to not introduce breaking changes.
  "django~=5.2.10",
-  "django-allauth[mfa,socialaccount]~=65.14.0",
+  "django-allauth[mfa,socialaccount]~=65.15.0",
  "django-auditlog~=3.4.1",
  "django-cachalot~=2.9.0",
  "django-celery-results~=2.6.0",
@@ -42,10 +42,10 @@ dependencies = [
  "djangorestframework~=3.16",
  "djangorestframework-guardian~=0.4.0",
  "drf-spectacular~=0.28",
-  "drf-spectacular-sidecar~=2026.1.1",
+  "drf-spectacular-sidecar~=2026.3.1",
  "drf-writable-nested~=0.7.1",
  "faiss-cpu>=1.10",
-  "filelock~=3.20.3",
+  "filelock~=3.25.2",
  "flower~=2.0.1",
  "gotenberg-client~=0.13.1",
  "httpx-oauth~=0.16",
@@ -60,7 +60,7 @@ dependencies = [
  "llama-index-llms-openai>=0.6.13",
  "llama-index-vector-stores-faiss>=0.5.2",
  "nltk~=3.9.1",
-  "ocrmypdf~=16.13.0",
+  "ocrmypdf~=17.3.0",
  "openai>=1.76",
  "pathvalidate~=3.3.1",
  "pdf2image~=1.17.0",
@@ -72,7 +72,7 @@ dependencies = [
  "rapidfuzz~=3.14.0",
  "redis[hiredis]~=5.2.1",
  "regex>=2025.9.18",
-  "scikit-learn~=1.7.0",
+  "scikit-learn~=1.8.0",
  "sentence-transformers>=4.1",
  "setproctitle~=1.3.4",
  "tika-client~=0.10.0",
@@ -111,7 +111,7 @@ docs = [
 testing = [
  "daphne",
  "factory-boy~=3.3.1",
-  "faker~=40.5.1",
+  "faker~=40.8.0",
  "imagehash",
  "pytest~=9.0.0",
  "pytest-cov~=7.0.0",
@@ -248,15 +248,13 @@ lint.per-file-ignores."docker/wait-for-redis.py" = [
 lint.per-file-ignores."src/documents/models.py" = [
  "SIM115",
 ]
-lint.per-file-ignores."src/paperless_tesseract/tests/test_parser.py" = [
+
  "RUF001",
 ]
 lint.isort.force-single-line = true
 [tool.codespell]
 write-changes = true
 ignore-words-list = "criterias,afterall,valeu,ureue,equest,ure,assertIn,Oktober,commitish"
-skip = "src-ui/src/locale/*,src-ui/pnpm-lock.yaml,src-ui/e2e/*,src/paperless_mail/tests/samples/*,src/documents/tests/samples/*,*.po,*.json"
+skip = "src-ui/src/locale/*,src-ui/pnpm-lock.yaml,src-ui/e2e/*,src/paperless_mail/tests/samples/*,src/paperless/tests/samples/mail/*,src/documents/tests/samples/*,*.po,*.json"
 [tool.pytest]
 minversion = "9.0"
--- a/src-ui/messages.xlf
+++ b/src-ui/messages.xlf
@@ -5,14 +5,14 @@
      <trans-unit id="ngb.alert.close" datatype="html">
        <source>Close</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/alert/alert.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/alert/alert.ts</context>
          <context context-type="linenumber">50</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.carousel.slide-number" datatype="html">
        <source> Slide <x id="INTERPOLATION" equiv-text="ueryList&lt;NgbSli"/> of <x id="INTERPOLATION_1" equiv-text="EventSource = N"/> </source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/carousel/carousel.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/carousel/carousel.ts</context>
          <context context-type="linenumber">131,135</context>
        </context-group>
        <note priority="1" from="description">Currently selected slide number read by screen reader</note>
@@ -20,114 +20,114 @@
      <trans-unit id="ngb.carousel.previous" datatype="html">
        <source>Previous</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/carousel/carousel.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/carousel/carousel.ts</context>
          <context context-type="linenumber">159,162</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.carousel.next" datatype="html">
        <source>Next</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/carousel/carousel.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/carousel/carousel.ts</context>
          <context context-type="linenumber">202,203</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.datepicker.select-month" datatype="html">
        <source>Select month</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/datepicker/datepicker-navigation-select.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/datepicker/datepicker-navigation-select.ts</context>
          <context context-type="linenumber">91</context>
        </context-group>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/datepicker/datepicker-navigation-select.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/datepicker/datepicker-navigation-select.ts</context>
          <context context-type="linenumber">91</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.datepicker.select-year" datatype="html">
        <source>Select year</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/datepicker/datepicker-navigation-select.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/datepicker/datepicker-navigation-select.ts</context>
          <context context-type="linenumber">91</context>
        </context-group>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/datepicker/datepicker-navigation-select.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/datepicker/datepicker-navigation-select.ts</context>
          <context context-type="linenumber">91</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.datepicker.previous-month" datatype="html">
        <source>Previous month</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/datepicker/datepicker-navigation.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/datepicker/datepicker-navigation.ts</context>
          <context context-type="linenumber">83,85</context>
        </context-group>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/datepicker/datepicker-navigation.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/datepicker/datepicker-navigation.ts</context>
          <context context-type="linenumber">112</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.datepicker.next-month" datatype="html">
        <source>Next month</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/datepicker/datepicker-navigation.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/datepicker/datepicker-navigation.ts</context>
          <context context-type="linenumber">112</context>
        </context-group>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/datepicker/datepicker-navigation.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/datepicker/datepicker-navigation.ts</context>
          <context context-type="linenumber">112</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.pagination.first" datatype="html">
        <source>««</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/pagination/pagination-config.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/pagination/pagination-config.ts</context>
          <context context-type="linenumber">20</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.pagination.previous" datatype="html">
        <source>«</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/pagination/pagination-config.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/pagination/pagination-config.ts</context>
          <context context-type="linenumber">20</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.pagination.next" datatype="html">
        <source>»</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/pagination/pagination-config.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/pagination/pagination-config.ts</context>
          <context context-type="linenumber">20</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.pagination.last" datatype="html">
        <source>»»</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/pagination/pagination-config.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/pagination/pagination-config.ts</context>
          <context context-type="linenumber">20</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.pagination.first-aria" datatype="html">
        <source>First</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/pagination/pagination-config.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/pagination/pagination-config.ts</context>
          <context context-type="linenumber">20</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.pagination.previous-aria" datatype="html">
        <source>Previous</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/pagination/pagination-config.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/pagination/pagination-config.ts</context>
          <context context-type="linenumber">20</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.pagination.next-aria" datatype="html">
        <source>Next</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/pagination/pagination-config.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/pagination/pagination-config.ts</context>
          <context context-type="linenumber">20</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.pagination.last-aria" datatype="html">
        <source>Last</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/pagination/pagination-config.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/pagination/pagination-config.ts</context>
          <context context-type="linenumber">20</context>
        </context-group>
      </trans-unit>
@@ -135,105 +135,105 @@
        <source><x id="INTERPOLATION" equiv-text="barConfig);
 	pu"/></source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/progressbar/progressbar.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/progressbar/progressbar.ts</context>
          <context context-type="linenumber">41,42</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.timepicker.HH" datatype="html">
        <source>HH</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/timepicker/timepicker-config.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/timepicker/timepicker-config.ts</context>
          <context context-type="linenumber">21</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.timepicker.hours" datatype="html">
        <source>Hours</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/timepicker/timepicker-config.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/timepicker/timepicker-config.ts</context>
          <context context-type="linenumber">21</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.timepicker.MM" datatype="html">
        <source>MM</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/timepicker/timepicker-config.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/timepicker/timepicker-config.ts</context>
          <context context-type="linenumber">21</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.timepicker.minutes" datatype="html">
        <source>Minutes</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/timepicker/timepicker-config.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/timepicker/timepicker-config.ts</context>
          <context context-type="linenumber">21</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.timepicker.increment-hours" datatype="html">
        <source>Increment hours</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/timepicker/timepicker-config.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/timepicker/timepicker-config.ts</context>
          <context context-type="linenumber">21</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.timepicker.decrement-hours" datatype="html">
        <source>Decrement hours</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/timepicker/timepicker-config.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/timepicker/timepicker-config.ts</context>
          <context context-type="linenumber">21</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.timepicker.increment-minutes" datatype="html">
        <source>Increment minutes</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/timepicker/timepicker-config.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/timepicker/timepicker-config.ts</context>
          <context context-type="linenumber">21</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.timepicker.decrement-minutes" datatype="html">
        <source>Decrement minutes</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/timepicker/timepicker-config.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/timepicker/timepicker-config.ts</context>
          <context context-type="linenumber">21</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.timepicker.SS" datatype="html">
        <source>SS</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/timepicker/timepicker-config.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/timepicker/timepicker-config.ts</context>
          <context context-type="linenumber">21</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.timepicker.seconds" datatype="html">
        <source>Seconds</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/timepicker/timepicker-config.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/timepicker/timepicker-config.ts</context>
          <context context-type="linenumber">21</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.timepicker.increment-seconds" datatype="html">
        <source>Increment seconds</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/timepicker/timepicker-config.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/timepicker/timepicker-config.ts</context>
          <context context-type="linenumber">21</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.timepicker.decrement-seconds" datatype="html">
        <source>Decrement seconds</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/timepicker/timepicker-config.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/timepicker/timepicker-config.ts</context>
          <context context-type="linenumber">21</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.timepicker.PM" datatype="html">
        <source><x id="INTERPOLATION"/></source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/timepicker/timepicker-config.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/timepicker/timepicker-config.ts</context>
          <context context-type="linenumber">21</context>
        </context-group>
      </trans-unit>
      <trans-unit id="ngb.toast.close-aria" datatype="html">
        <source>Close</source>
        <context-group purpose="location">
-          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.0_@angular+core@21.2.0_@angular+_fdecb2f5429dfeda6301fd300107de5b/node_modules/src/toast/toast-config.ts</context>
+          <context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/toast/toast-config.ts</context>
          <context context-type="linenumber">54</context>
        </context-group>
      </trans-unit>
@@ -532,15 +532,79 @@
          <context context-type="linenumber">125</context>
        </context-group>
      </trans-unit>
-      <trans-unit id="3823219296477075982" datatype="html">
+      <trans-unit id="2159130950882492111" datatype="html">
-        <source>Discard</source>
+        <source>Cancel</source>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/admin/config/config.component.html</context>
          <context context-type="linenumber">62</context>
        </context-group>
        <context-group purpose="location">
-          <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.html</context>
+          <context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
-          <context context-type="linenumber">452</context>
+          <context context-type="linenumber">399</context>
        </context-group>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/common/confirm-dialog/confirm-dialog.component.ts</context>
          <context context-type="linenumber">47</context>
        </context-group>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/common/edit-dialog/correspondent-edit-dialog/correspondent-edit-dialog.component.html</context>
          <context context-type="linenumber">25</context>
        </context-group>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/common/edit-dialog/custom-field-edit-dialog/custom-field-edit-dialog.component.html</context>
          <context context-type="linenumber">51</context>
        </context-group>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/common/edit-dialog/document-type-edit-dialog/document-type-edit-dialog.component.html</context>
          <context context-type="linenumber">27</context>
        </context-group>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/common/edit-dialog/group-edit-dialog/group-edit-dialog.component.html</context>
          <context context-type="linenumber">19</context>
        </context-group>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/common/edit-dialog/mail-account-edit-dialog/mail-account-edit-dialog.component.html</context>
          <context context-type="linenumber">39</context>
        </context-group>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.html</context>
          <context context-type="linenumber">80</context>
        </context-group>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/common/edit-dialog/storage-path-edit-dialog/storage-path-edit-dialog.component.html</context>
          <context context-type="linenumber">76</context>
        </context-group>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/common/edit-dialog/tag-edit-dialog/tag-edit-dialog.component.html</context>
          <context context-type="linenumber">30</context>
        </context-group>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/common/edit-dialog/user-edit-dialog/user-edit-dialog.component.html</context>
          <context context-type="linenumber">56</context>
        </context-group>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/common/edit-dialog/workflow-edit-dialog/workflow-edit-dialog.component.html</context>
          <context context-type="linenumber">115</context>
        </context-group>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/common/permissions-dialog/permissions-dialog.component.html</context>
          <context context-type="linenumber">31</context>
        </context-group>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/common/profile-edit-dialog/profile-edit-dialog.component.html</context>
          <context context-type="linenumber">182</context>
        </context-group>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/document-list/bulk-editor/custom-fields-bulk-edit-dialog/custom-fields-bulk-edit-dialog.component.html</context>
          <context context-type="linenumber">81</context>
        </context-group>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/document-list/save-view-config-dialog/save-view-config-dialog.component.html</context>
          <context context-type="linenumber">21</context>
        </context-group>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/manage/saved-views/saved-views.component.html</context>
          <context context-type="linenumber">82</context>
        </context-group>
      </trans-unit>
      <trans-unit id="3768927257183755959" datatype="html">
@@ -1514,77 +1578,6 @@
          <context context-type="linenumber">389</context>
        </context-group>
      </trans-unit>
      <trans-unit id="2159130950882492111" datatype="html">
        <source>Cancel</source>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
          <context context-type="linenumber">399</context>
        </context-group>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/common/confirm-dialog/confirm-dialog.component.ts</context>
          <context context-type="linenumber">47</context>
        </context-group>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/common/edit-dialog/correspondent-edit-dialog/correspondent-edit-dialog.component.html</context>
          <context context-type="linenumber">25</context>
        </context-group>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/common/edit-dialog/custom-field-edit-dialog/custom-field-edit-dialog.component.html</context>
          <context context-type="linenumber">51</context>
        </context-group>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/common/edit-dialog/document-type-edit-dialog/document-type-edit-dialog.component.html</context>
          <context context-type="linenumber">27</context>
        </context-group>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/common/edit-dialog/group-edit-dialog/group-edit-dialog.component.html</context>
          <context context-type="linenumber">19</context>
        </context-group>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/common/edit-dialog/mail-account-edit-dialog/mail-account-edit-dialog.component.html</context>
          <context context-type="linenumber">39</context>
        </context-group>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/common/edit-dialog/mail-rule-edit-dialog/mail-rule-edit-dialog.component.html</context>
          <context context-type="linenumber">80</context>
        </context-group>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/common/edit-dialog/storage-path-edit-dialog/storage-path-edit-dialog.component.html</context>
          <context context-type="linenumber">76</context>
        </context-group>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/common/edit-dialog/tag-edit-dialog/tag-edit-dialog.component.html</context>
          <context context-type="linenumber">30</context>
        </context-group>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/common/edit-dialog/user-edit-dialog/user-edit-dialog.component.html</context>
          <context context-type="linenumber">56</context>
        </context-group>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/common/edit-dialog/workflow-edit-dialog/workflow-edit-dialog.component.html</context>
          <context context-type="linenumber">115</context>
        </context-group>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/common/permissions-dialog/permissions-dialog.component.html</context>
          <context context-type="linenumber">31</context>
        </context-group>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/common/profile-edit-dialog/profile-edit-dialog.component.html</context>
          <context context-type="linenumber">182</context>
        </context-group>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/document-list/bulk-editor/custom-fields-bulk-edit-dialog/custom-fields-bulk-edit-dialog.component.html</context>
          <context context-type="linenumber">81</context>
        </context-group>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/document-list/save-view-config-dialog/save-view-config-dialog.component.html</context>
          <context context-type="linenumber">21</context>
        </context-group>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/manage/saved-views/saved-views.component.html</context>
          <context context-type="linenumber">82</context>
        </context-group>
      </trans-unit>
      <trans-unit id="6839066544204061364" datatype="html">
        <source>Use system language</source>
        <context-group purpose="location">
@@ -5736,7 +5729,7 @@
        <source>Open <x id="PH" equiv-text="this.title"/> filter</source>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/common/filterable-dropdown/filterable-dropdown.component.ts</context>
-          <context context-type="linenumber">788</context>
+          <context context-type="linenumber">823</context>
        </context-group>
      </trans-unit>
      <trans-unit id="7005745151564974365" datatype="html">
@@ -7489,7 +7482,7 @@
        </context-group>
        <context-group purpose="location">
          <context context-type="sourcefile">src/main.ts</context>
-          <context context-type="linenumber">411</context>
+          <context context-type="linenumber">416</context>
        </context-group>
      </trans-unit>
      <trans-unit id="5028777105388019087" datatype="html">
@@ -7684,6 +7677,13 @@
          <context context-type="linenumber">450</context>
        </context-group>
      </trans-unit>
      <trans-unit id="3823219296477075982" datatype="html">
        <source>Discard</source>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.html</context>
          <context context-type="linenumber">452</context>
        </context-group>
      </trans-unit>
      <trans-unit id="1309556917227148591" datatype="html">
        <source>Document loading...</source>
        <context-group purpose="location">
@@ -11352,14 +11352,14 @@
        <source>Prev</source>
        <context-group purpose="location">
          <context context-type="sourcefile">src/main.ts</context>
-          <context context-type="linenumber">410</context>
+          <context context-type="linenumber">415</context>
        </context-group>
      </trans-unit>
      <trans-unit id="1241348629231510663" datatype="html">
        <source>End</source>
        <context-group purpose="location">
          <context context-type="sourcefile">src/main.ts</context>
-          <context context-type="linenumber">412</context>
+          <context context-type="linenumber">417</context>
        </context-group>
      </trans-unit>
    </body>
--- a/src-ui/package.json
+++ b/src-ui/package.json
@@ -1,6 +1,6 @@
 {
  "name": "paperless-ngx-ui",
-  "version": "2.20.10",
+  "version": "2.20.13",
  "scripts": {
    "preinstall": "npx only-allow pnpm",
    "ng": "ng",
@@ -11,17 +11,17 @@
  },
  "private": true,
  "dependencies": {
-    "@angular/cdk": "^21.2.0",
+    "@angular/cdk": "^21.2.2",
-    "@angular/common": "~21.2.0",
+    "@angular/common": "~21.2.4",
-    "@angular/compiler": "~21.2.0",
+    "@angular/compiler": "~21.2.4",
-    "@angular/core": "~21.2.0",
+    "@angular/core": "~21.2.4",
-    "@angular/forms": "~21.2.0",
+    "@angular/forms": "~21.2.4",
-    "@angular/localize": "~21.2.0",
+    "@angular/localize": "~21.2.4",
-    "@angular/platform-browser": "~21.2.0",
+    "@angular/platform-browser": "~21.2.4",
-    "@angular/platform-browser-dynamic": "~21.2.0",
+    "@angular/platform-browser-dynamic": "~21.2.4",
-    "@angular/router": "~21.2.0",
+    "@angular/router": "~21.2.4",
    "@ng-bootstrap/ng-bootstrap": "^20.0.0",
-    "@ng-select/ng-select": "^21.4.1",
+    "@ng-select/ng-select": "^21.5.2",
    "@ngneat/dirty-check-forms": "^3.0.3",
    "@popperjs/core": "^2.11.8",
    "bootstrap": "^5.3.8",
@@ -42,26 +42,26 @@
  "devDependencies": {
    "@angular-builders/custom-webpack": "^21.0.3",
    "@angular-builders/jest": "^21.0.3",
-    "@angular-devkit/core": "^21.2.0",
+    "@angular-devkit/core": "^21.2.2",
-    "@angular-devkit/schematics": "^21.2.0",
+    "@angular-devkit/schematics": "^21.2.2",
    "@angular-eslint/builder": "21.3.0",
    "@angular-eslint/eslint-plugin": "21.3.0",
    "@angular-eslint/eslint-plugin-template": "21.3.0",
    "@angular-eslint/schematics": "21.3.0",
    "@angular-eslint/template-parser": "21.3.0",
-    "@angular/build": "^21.2.0",
+    "@angular/build": "^21.2.2",
-    "@angular/cli": "~21.2.0",
+    "@angular/cli": "~21.2.2",
-    "@angular/compiler-cli": "~21.2.0",
+    "@angular/compiler-cli": "~21.2.4",
    "@codecov/webpack-plugin": "^1.9.1",
    "@playwright/test": "^1.58.2",
    "@types/jest": "^30.0.0",
-    "@types/node": "^25.3.3",
+    "@types/node": "^25.4.0",
-    "@typescript-eslint/eslint-plugin": "^8.54.0",
+    "@typescript-eslint/eslint-plugin": "^8.57.0",
-    "@typescript-eslint/parser": "^8.54.0",
+    "@typescript-eslint/parser": "^8.57.0",
-    "@typescript-eslint/utils": "^8.54.0",
+    "@typescript-eslint/utils": "^8.57.0",
-    "eslint": "^10.0.2",
+    "eslint": "^10.0.3",
-    "jest": "30.2.0",
+    "jest": "30.3.0",
-    "jest-environment-jsdom": "^30.2.0",
+    "jest-environment-jsdom": "^30.3.0",
    "jest-junit": "^16.0.0",
    "jest-preset-angular": "^16.1.1",
    "jest-websocket-mock": "^2.5.0",
--- a/src-ui/pnpm-lock.yaml
+++ b/src-ui/pnpm-lock.yaml
--- a/src-ui/src/app/components/admin/config/config.component.html
+++ b/src-ui/src/app/components/admin/config/config.component.html
@@ -59,7 +59,7 @@
    <div [ngbNavOutlet]="nav" class="border-start border-end border-bottom p-3 mb-3 shadow-sm"></div>
    <div class="btn-toolbar" role="toolbar">
        <div class="btn-group me-2">
-            <button type="button" (click)="discardChanges()" class="btn btn-outline-secondary" [disabled]="loading || (isDirty$ | async) === false" i18n>Discard</button>
+            <button type="button" (click)="discardChanges()" class="btn btn-outline-secondary" [disabled]="loading || (isDirty$ | async) === false" i18n>Cancel</button>
        </div>
        <div class="btn-group">
            <button type="submit" class="btn btn-primary" [disabled]="loading || !configForm.valid || (isDirty$ | async) === false" i18n>Save</button>
--- a/src-ui/src/app/components/common/edit-dialog/edit-dialog.component.ts
+++ b/src-ui/src/app/components/common/edit-dialog/edit-dialog.component.ts
@@ -31,8 +31,8 @@ export enum EditDialogMode {
@Directive()
 export abstract class EditDialogComponent<
-    T extends ObjectWithPermissions | ObjectWithId,
+  T extends ObjectWithPermissions | ObjectWithId,
-  >
+>
  extends LoadingComponentWithPermissions
  implements OnInit
 {
--- a/src-ui/src/app/components/common/filterable-dropdown/filterable-dropdown.component.spec.ts
+++ b/src-ui/src/app/components/common/filterable-dropdown/filterable-dropdown.component.spec.ts
@@ -631,6 +631,59 @@ describe('FilterableDropdownComponent & FilterableDropdownSelectionModel', () =>
    ])
  })
  it('deselecting a parent clears selected descendants', () => {
    const root: Tag = { id: 100, name: 'Root Tag' }
    const child: Tag = { id: 101, name: 'Child Tag', parent: root.id }
    const grandchild: Tag = {
      id: 102,
      name: 'Grandchild Tag',
      parent: child.id,
    }
    const other: Tag = { id: 103, name: 'Other Tag' }
    selectionModel.items = [root, child, grandchild, other]
    selectionModel.set(root.id, ToggleableItemState.Selected, false)
    selectionModel.set(child.id, ToggleableItemState.Selected, false)
    selectionModel.set(grandchild.id, ToggleableItemState.Selected, false)
    selectionModel.set(other.id, ToggleableItemState.Selected, false)
    selectionModel.toggle(root.id, false)
    expect(selectionModel.getSelectedItems()).toEqual([other])
  })
  it('un-excluding a parent clears excluded descendants', () => {
    const root: Tag = { id: 110, name: 'Root Tag' }
    const child: Tag = { id: 111, name: 'Child Tag', parent: root.id }
    const other: Tag = { id: 112, name: 'Other Tag' }
    selectionModel.items = [root, child, other]
    selectionModel.set(root.id, ToggleableItemState.Excluded, false)
    selectionModel.set(child.id, ToggleableItemState.Excluded, false)
    selectionModel.set(other.id, ToggleableItemState.Excluded, false)
    selectionModel.exclude(root.id, false)
    expect(selectionModel.getExcludedItems()).toEqual([other])
  })
  it('excluding a selected parent clears selected descendants', () => {
    const root: Tag = { id: 120, name: 'Root Tag' }
    const child: Tag = { id: 121, name: 'Child Tag', parent: root.id }
    const other: Tag = { id: 122, name: 'Other Tag' }
    selectionModel.manyToOne = true
    selectionModel.items = [root, child, other]
    selectionModel.set(root.id, ToggleableItemState.Selected, false)
    selectionModel.set(child.id, ToggleableItemState.Selected, false)
    selectionModel.set(other.id, ToggleableItemState.Selected, false)
    selectionModel.exclude(root.id, false)
    expect(selectionModel.getExcludedItems()).toEqual([root])
    expect(selectionModel.getSelectedItems()).toEqual([other])
  })
  it('resorts items immediately when document count sorting enabled', () => {
    const apple: Tag = { id: 55, name: 'Apple' }
    const zebra: Tag = { id: 56, name: 'Zebra' }
--- a/src-ui/src/app/components/common/filterable-dropdown/filterable-dropdown.component.ts
+++ b/src-ui/src/app/components/common/filterable-dropdown/filterable-dropdown.component.ts
@@ -235,6 +235,7 @@ export class FilterableDropdownSelectionModel {
      state == ToggleableItemState.Excluded
    ) {
      this.temporarySelectionStates.delete(id)
      this.clearDescendantSelections(id)
    }
    if (!id) {
@@ -261,6 +262,7 @@ export class FilterableDropdownSelectionModel {
      if (this.manyToOne || this.singleSelect) {
        this.temporarySelectionStates.set(id, ToggleableItemState.Excluded)
        this.clearDescendantSelections(id)
        if (this.singleSelect) {
          for (let key of this.temporarySelectionStates.keys()) {
@@ -281,9 +283,15 @@ export class FilterableDropdownSelectionModel {
          newState = ToggleableItemState.NotSelected
        }
        this.temporarySelectionStates.set(id, newState)
        if (newState == ToggleableItemState.Excluded) {
          this.clearDescendantSelections(id)
        }
      }
    } else if (!id || state == ToggleableItemState.Excluded) {
      this.temporarySelectionStates.delete(id)
      if (id) {
        this.clearDescendantSelections(id)
      }
    }
    if (fireEvent) {
@@ -295,6 +303,33 @@ export class FilterableDropdownSelectionModel {
    return this.selectionStates.get(id) || ToggleableItemState.NotSelected
  }
  private clearDescendantSelections(id: number) {
    for (const descendantID of this.getDescendantIDs(id)) {
      this.temporarySelectionStates.delete(descendantID)
    }
  }
  private getDescendantIDs(id: number): number[] {
    const descendants: number[] = []
    const queue: number[] = [id]
    while (queue.length) {
      const parentID = queue.shift()
      for (const item of this._items) {
        if (
          typeof item?.id === 'number' &&
          typeof (item as any)['parent'] === 'number' &&
          (item as any)['parent'] === parentID
        ) {
          descendants.push(item.id)
          queue.push(item.id)
        }
      }
    }
    return descendants
  }
  get logicalOperator(): LogicalOperator {
    return this.temporaryLogicalOperator
  }
--- a/src-ui/src/app/components/document-detail/document-detail.component.spec.ts
+++ b/src-ui/src/app/components/document-detail/document-detail.component.spec.ts
@@ -1644,9 +1644,9 @@ describe('DocumentDetailComponent', () => {
    expect(
      fixture.debugElement.query(By.css('.preview-sticky img'))
    ).not.toBeUndefined()
-    ;(component.document.mime_type =
+    ;((component.document.mime_type =
      'application/vnd.openxmlformats-officedocument.wordprocessingml.document'),
-      fixture.detectChanges()
+      fixture.detectChanges())
    expect(component.archiveContentRenderType).toEqual(
      component.ContentRenderType.Other
    )
--- a/src-ui/src/app/components/document-list/document-card-small/document-card-small.component.html
+++ b/src-ui/src/app/components/document-list/document-card-small/document-card-small.component.html
@@ -15,7 +15,7 @@
      }
      @if (document && displayFields?.includes(DisplayField.TAGS)) {
-        <div class="tags d-flex flex-column text-end position-absolute me-1 fs-6">
+        <div class="tags d-flex flex-column text-end position-absolute me-1 fs-6" [class.tags-no-wrap]="document.tags.length > 3">
          @for (tagID of tagIDs; track tagID) {
            <pngx-tag [tagID]="tagID" (click)="clickTag.emit(tagID);$event.stopPropagation()" [clickable]="true" linkTitle="Toggle tag filter" i18n-linkTitle></pngx-tag>
          }
--- a/src-ui/src/app/components/document-list/document-card-small/document-card-small.component.scss
+++ b/src-ui/src/app/components/document-list/document-card-small/document-card-small.component.scss
@@ -72,4 +72,14 @@ a {
  max-width: 80%;
  row-gap: .2rem;
  line-height: 1;
  &.tags-no-wrap {
    ::ng-deep .badge {
      display: inline-block;
      max-width: 100%;
      white-space: nowrap;
      overflow: hidden;
      text-overflow: ellipsis;
    }
  }
 }
--- a/src-ui/src/app/components/document-list/document-card-small/document-card-small.component.spec.ts
+++ b/src-ui/src/app/components/document-list/document-card-small/document-card-small.component.spec.ts
@@ -82,6 +82,16 @@ describe('DocumentCardSmallComponent', () => {
    ).toHaveLength(6)
  })
  it('should clear hidden tag counter when tag count falls below the limit', () => {
    expect(component.moreTags).toEqual(3)
    component.document.tags = [1, 2, 3, 4, 5, 6]
    fixture.detectChanges()
    expect(component.moreTags).toBeNull()
    expect(fixture.nativeElement.textContent).not.toContain('+ 3')
  })
  it('should try to close the preview on mouse leave', () => {
    component.popupPreview = {
      close: jest.fn(),
--- a/src-ui/src/app/components/document-list/document-card-small/document-card-small.component.ts
+++ b/src-ui/src/app/components/document-list/document-card-small/document-card-small.component.ts
@@ -126,6 +126,7 @@ export class DocumentCardSmallComponent
      this.moreTags = this.document.tags.length - (limit - 1)
      return this.document.tags.slice(0, limit - 1)
    } else {
      this.moreTags = null
      return this.document.tags
    }
  }
--- a/src-ui/src/app/interceptors/auth-expiry.interceptor.spec.ts
+++ b/src-ui/src/app/interceptors/auth-expiry.interceptor.spec.ts
@@ -0,0 +1,122 @@
 import {
  HttpErrorResponse,
  HttpHandlerFn,
  HttpRequest,
 } from '@angular/common/http'
 import { throwError } from 'rxjs'
 import * as navUtils from '../utils/navigation'
 import { createAuthExpiryInterceptor } from './auth-expiry.interceptor'
 describe('withAuthExpiryInterceptor', () => {
  let interceptor: ReturnType<typeof createAuthExpiryInterceptor>
  let dateNowSpy: jest.SpiedFunction<typeof Date.now>
  beforeEach(() => {
    interceptor = createAuthExpiryInterceptor()
    dateNowSpy = jest.spyOn(Date, 'now').mockReturnValue(1000)
  })
  afterEach(() => {
    jest.restoreAllMocks()
  })
  it('reloads when an API request returns 401', () => {
    const reloadSpy = jest
      .spyOn(navUtils, 'locationReload')
      .mockImplementation(() => {})
    interceptor(
      new HttpRequest('GET', '/api/documents/'),
      failingHandler('/api/documents/', 401)
    ).subscribe({
      error: () => undefined,
    })
    expect(reloadSpy).toHaveBeenCalledTimes(1)
  })
  it('does not reload for non-401 errors', () => {
    const reloadSpy = jest
      .spyOn(navUtils, 'locationReload')
      .mockImplementation(() => {})
    interceptor(
      new HttpRequest('GET', '/api/documents/'),
      failingHandler('/api/documents/', 500)
    ).subscribe({
      error: () => undefined,
    })
    expect(reloadSpy).not.toHaveBeenCalled()
  })
  it('does not reload for non-api 401 responses', () => {
    const reloadSpy = jest
      .spyOn(navUtils, 'locationReload')
      .mockImplementation(() => {})
    interceptor(
      new HttpRequest('GET', '/accounts/profile/'),
      failingHandler('/accounts/profile/', 401)
    ).subscribe({
      error: () => undefined,
    })
    expect(reloadSpy).not.toHaveBeenCalled()
  })
  it('reloads only once even with multiple API 401 responses', () => {
    const reloadSpy = jest
      .spyOn(navUtils, 'locationReload')
      .mockImplementation(() => {})
    const request = new HttpRequest('GET', '/api/documents/')
    const handler = failingHandler('/api/documents/', 401)
    interceptor(request, handler).subscribe({
      error: () => undefined,
    })
    interceptor(request, handler).subscribe({
      error: () => undefined,
    })
    expect(reloadSpy).toHaveBeenCalledTimes(1)
  })
  it('retries reload after cooldown for repeated API 401 responses', () => {
    const reloadSpy = jest
      .spyOn(navUtils, 'locationReload')
      .mockImplementation(() => {})
    dateNowSpy
      .mockReturnValueOnce(1000)
      .mockReturnValueOnce(2500)
      .mockReturnValueOnce(3501)
    const request = new HttpRequest('GET', '/api/documents/')
    const handler = failingHandler('/api/documents/', 401)
    interceptor(request, handler).subscribe({
      error: () => undefined,
    })
    interceptor(request, handler).subscribe({
      error: () => undefined,
    })
    interceptor(request, handler).subscribe({
      error: () => undefined,
    })
    expect(reloadSpy).toHaveBeenCalledTimes(2)
  })
 })
 function failingHandler(url: string, status: number): HttpHandlerFn {
  return (_request) =>
    throwError(
      () =>
        new HttpErrorResponse({
          status,
          url,
        })
    )
 }
--- a/src-ui/src/app/interceptors/auth-expiry.interceptor.ts
+++ b/src-ui/src/app/interceptors/auth-expiry.interceptor.ts
@@ -0,0 +1,37 @@
 import {
  HttpErrorResponse,
  HttpEvent,
  HttpHandlerFn,
  HttpInterceptorFn,
  HttpRequest,
 } from '@angular/common/http'
 import { catchError, Observable, throwError } from 'rxjs'
 import { locationReload } from '../utils/navigation'
 export const createAuthExpiryInterceptor = (): HttpInterceptorFn => {
  let lastReloadAttempt = Number.NEGATIVE_INFINITY
  return (
    request: HttpRequest<unknown>,
    next: HttpHandlerFn
  ): Observable<HttpEvent<unknown>> =>
    next(request).pipe(
      catchError((error: unknown) => {
        if (
          error instanceof HttpErrorResponse &&
          error.status === 401 &&
          request.url.includes('/api/')
        ) {
          const now = Date.now()
          if (now - lastReloadAttempt >= 2000) {
            lastReloadAttempt = now
            locationReload()
          }
        }
        return throwError(() => error)
      })
    )
 }
 export const withAuthExpiryInterceptor = createAuthExpiryInterceptor()
--- a/src-ui/src/app/utils/color.ts
+++ b/src-ui/src/app/utils/color.ts
@@ -62,7 +62,7 @@ export function hslToRgb(h, s, l) {
 * @return  Array           The HSL representation
 */
 export function rgbToHsl(r, g, b) {
-  ;(r /= 255), (g /= 255), (b /= 255)
+  ;((r /= 255), (g /= 255), (b /= 255))
  var max = Math.max(r, g, b),
    min = Math.min(r, g, b)
  var h,
--- a/src-ui/src/environments/environment.prod.ts
+++ b/src-ui/src/environments/environment.prod.ts
@@ -6,7 +6,7 @@ export const environment = {
  apiVersion: '10', // match src/paperless/settings.py
  appTitle: 'Paperless-ngx',
  tag: 'prod',
-  version: '2.20.10',
+  version: '2.20.13',
  webSocketHost: window.location.host,
  webSocketProtocol: window.location.protocol == 'https:' ? 'wss:' : 'ws:',
  webSocketBaseUrl: base_url.pathname + 'ws/',
--- a/src-ui/src/main.ts
+++ b/src-ui/src/main.ts
@@ -154,6 +154,7 @@ import { DirtyDocGuard } from './app/guards/dirty-doc.guard'
 import { DirtySavedViewGuard } from './app/guards/dirty-saved-view.guard'
 import { PermissionsGuard } from './app/guards/permissions.guard'
 import { withApiVersionInterceptor } from './app/interceptors/api-version.interceptor'
 import { withAuthExpiryInterceptor } from './app/interceptors/auth-expiry.interceptor'
 import { withCsrfInterceptor } from './app/interceptors/csrf.interceptor'
 import { DocumentTitlePipe } from './app/pipes/document-title.pipe'
 import { FilterPipe } from './app/pipes/filter.pipe'
@@ -399,7 +400,11 @@ bootstrapApplication(AppComponent, {
    StoragePathNamePipe,
    provideHttpClient(
      withInterceptorsFromDi(),
-      withInterceptors([withCsrfInterceptor, withApiVersionInterceptor]),
+      withInterceptors([
        withCsrfInterceptor,
        withApiVersionInterceptor,
        withAuthExpiryInterceptor,
      ]),
      withFetch()
    ),
    provideUiTour({
--- a/src-ui/src/theme.scss
+++ b/src-ui/src/theme.scss
@@ -150,6 +150,15 @@ $form-check-radio-checked-bg-image-dark: url("data:image/svg+xml,<svg xmlns='htt
    background-color: var(--pngx-body-color-accent);
  }
  .list-group-item-action:not(.active):active {
    --bs-list-group-action-active-color: var(--bs-body-color);
    --bs-list-group-action-active-bg: var(--pngx-bg-darker);
  }
  .form-control:hover::file-selector-button {
    background-color:var(--pngx-bg-dark) !important
  }
  .search-container {
    input, input:focus, i-bs[name="search"] , ::placeholder {
      color: var(--pngx-primary-text-contrast) !important;
--- a/src/documents/bulk_edit.py
+++ b/src/documents/bulk_edit.py
@@ -576,8 +576,8 @@ def merge(
        except Exception:
            restore_archive_serial_numbers(backup)
            raise
-        else:
+    else:
-            consume_task.delay()
+        consume_task.delay()
    return "OK"
--- a/src/documents/classifier.py
+++ b/src/documents/classifier.py
@@ -9,6 +9,7 @@ from pathlib import Path
 from typing import TYPE_CHECKING
 if TYPE_CHECKING:
    from collections.abc import Callable
    from collections.abc import Iterator
    from datetime import datetime
@@ -191,7 +192,12 @@ class DocumentClassifier:
        target_file_temp.rename(target_file)
-    def train(self) -> bool:
+    def train(
        self,
        status_callback: Callable[[str], None] | None = None,
    ) -> bool:
        notify = status_callback if status_callback is not None else lambda _: None
        # Get non-inbox documents
        docs_queryset = (
            Document.objects.exclude(
@@ -213,6 +219,7 @@ class DocumentClassifier:
        # Step 1: Extract and preprocess training data from the database.
        logger.debug("Gathering data from database...")
        notify(f"Gathering data from {docs_queryset.count()} document(s)...")
        hasher = sha256()
        for doc in docs_queryset:
            y = -1
@@ -290,6 +297,7 @@ class DocumentClassifier:
        # Step 2: vectorize data
        logger.debug("Vectorizing data...")
        notify("Vectorizing document content...")
        def content_generator() -> Iterator[str]:
            """
@@ -316,6 +324,7 @@ class DocumentClassifier:
        # Step 3: train the classifiers
        if num_tags > 0:
            logger.debug("Training tags classifier...")
            notify(f"Training tags classifier ({num_tags} tag(s))...")
            if num_tags == 1:
                # Special case where only one tag has auto:
@@ -339,6 +348,9 @@ class DocumentClassifier:
        if num_correspondents > 0:
            logger.debug("Training correspondent classifier...")
            notify(
                f"Training correspondent classifier ({num_correspondents} correspondent(s))...",
            )
            self.correspondent_classifier = MLPClassifier(tol=0.01)
            self.correspondent_classifier.fit(data_vectorized, labels_correspondent)
        else:
@@ -349,6 +361,9 @@ class DocumentClassifier:
        if num_document_types > 0:
            logger.debug("Training document type classifier...")
            notify(
                f"Training document type classifier ({num_document_types} type(s))...",
            )
            self.document_type_classifier = MLPClassifier(tol=0.01)
            self.document_type_classifier.fit(data_vectorized, labels_document_type)
        else:
@@ -361,6 +376,7 @@ class DocumentClassifier:
            logger.debug(
                "Training storage paths classifier...",
            )
            notify(f"Training storage path classifier ({num_storage_paths} path(s))...")
            self.storage_path_classifier = MLPClassifier(tol=0.01)
            self.storage_path_classifier.fit(
                data_vectorized,
--- a/src/documents/consumer.py
+++ b/src/documents/consumer.py
@@ -51,11 +51,41 @@ from documents.templating.workflows import parse_w_workflow_placeholders
 from documents.utils import copy_basic_file_stats
 from documents.utils import copy_file_with_basic_stats
 from documents.utils import run_subprocess
-from paperless_mail.parsers import MailDocumentParser
+from paperless.parsers import ParserContext
 from paperless.parsers.mail import MailDocumentParser
 from paperless.parsers.remote import RemoteDocumentParser
 from paperless.parsers.tesseract import RasterisedDocumentParser
 from paperless.parsers.text import TextDocumentParser
 from paperless.parsers.tika import TikaDocumentParser
 LOGGING_NAME: Final[str] = "paperless.consumer"
 def _parser_cleanup(parser: DocumentParser) -> None:
    """
    Call cleanup on a parser, handling the new-style context-manager parsers.
    New-style parsers (e.g. TextDocumentParser) use __exit__ for teardown
    instead of a cleanup() method.  This shim will be removed once all existing parsers
    have switched to the new style and this consumer is updated to use it
    TODO(stumpylog): Remove me in the future
    """
    if isinstance(
        parser,
        (
            MailDocumentParser,
            RasterisedDocumentParser,
            RemoteDocumentParser,
            TextDocumentParser,
            TikaDocumentParser,
        ),
    ):
        parser.__exit__(None, None, None)
    else:
        parser.cleanup()
 class WorkflowTriggerPlugin(
    NoCleanupPluginMixin,
    NoSetupPluginMixin,
@@ -431,6 +461,23 @@ class ConsumerPlugin(
            progress_callback=progress_callback,
        )
        parser_is_new_style = isinstance(
            document_parser,
            (
                MailDocumentParser,
                RasterisedDocumentParser,
                RemoteDocumentParser,
                TextDocumentParser,
                TikaDocumentParser,
            ),
        )
        # New-style parsers use __enter__/__exit__ for resource management.
        # _parser_cleanup (below) handles __exit__; call __enter__ here.
        # TODO(stumpylog): Remove me in the future
        if parser_is_new_style:
            document_parser.__enter__()
        self.log.debug(f"Parser: {type(document_parser).__name__}")
        # Parse the document. This may take some time.
@@ -449,16 +496,14 @@ class ConsumerPlugin(
                ConsumerStatusShortMessage.PARSING_DOCUMENT,
            )
            self.log.debug(f"Parsing {self.filename}...")
-            if (
+
-                isinstance(document_parser, MailDocumentParser)
+            # TODO(stumpylog): Remove me in the future when all parsers use new protocol
-                and self.input_doc.mailrule_id
+            if parser_is_new_style:
-            ):
+                document_parser.configure(
-                document_parser.parse(
+                    ParserContext(mailrule_id=self.input_doc.mailrule_id),
                    self.working_copy,
                    mime_type,
                    self.filename,
                    self.input_doc.mailrule_id,
                )
                # TODO(stumpylog): Remove me in the future
                document_parser.parse(self.working_copy, mime_type)
            else:
                document_parser.parse(self.working_copy, mime_type, self.filename)
@@ -469,11 +514,15 @@ class ConsumerPlugin(
                ProgressStatusOptions.WORKING,
                ConsumerStatusShortMessage.GENERATING_THUMBNAIL,
            )
-            thumbnail = document_parser.get_thumbnail(
+            # TODO(stumpylog): Remove me in the future when all parsers use new protocol
-                self.working_copy,
+            if parser_is_new_style:
-                mime_type,
+                thumbnail = document_parser.get_thumbnail(self.working_copy, mime_type)
-                self.filename,
+            else:
-            )
+                thumbnail = document_parser.get_thumbnail(
                    self.working_copy,
                    mime_type,
                    self.filename,
                )
            text = document_parser.get_text()
            date = document_parser.get_date()
@@ -490,7 +539,7 @@ class ConsumerPlugin(
            page_count = document_parser.get_page_count(self.working_copy, mime_type)
        except ParseError as e:
-            document_parser.cleanup()
+            _parser_cleanup(document_parser)
            if tempdir:
                tempdir.cleanup()
            self._fail(
@@ -500,7 +549,7 @@ class ConsumerPlugin(
                exception=e,
            )
        except Exception as e:
-            document_parser.cleanup()
+            _parser_cleanup(document_parser)
            if tempdir:
                tempdir.cleanup()
            self._fail(
@@ -702,7 +751,7 @@ class ConsumerPlugin(
                exception=e,
            )
        finally:
-            document_parser.cleanup()
+            _parser_cleanup(document_parser)
            tempdir.cleanup()
        self.run_post_consume_script(document)
--- a/src/documents/index.py
+++ b/src/documents/index.py
@@ -477,7 +477,14 @@ class DelayedFullTextQuery(DelayedQuery):
        try:
            corrected = self.searcher.correct_query(q, q_str)
            if corrected.string != q_str:
-                suggested_correction = corrected.string
+                corrected_results = self.searcher.search(
                    corrected.query,
                    limit=1,
                    filter=MappedDocIdSet(self.filter_queryset, self.searcher.ixreader),
                    scored=False,
                )
                if len(corrected_results) > 0:
                    suggested_correction = corrected.string
        except Exception as e:
            logger.info(
                "Error while correcting query %s: %s",
--- a/src/documents/management/commands/document_create_classifier.py
+++ b/src/documents/management/commands/document_create_classifier.py
@@ -1,13 +1,32 @@
-from django.core.management.base import BaseCommand
+from __future__ import annotations
 import time
 from documents.management.commands.base import PaperlessCommand
 from documents.tasks import train_classifier
-class Command(BaseCommand):
+class Command(PaperlessCommand):
    help = (
        "Trains the classifier on your data and saves the resulting models to a "
        "file. The document consumer will then automatically use this new model."
    )
    supports_progress_bar = False
    supports_multiprocessing = False
-    def handle(self, *args, **options):
+    def handle(self, *args, **options) -> None:
-        train_classifier(scheduled=False)
+        start = time.monotonic()
        with (
            self.buffered_logging("paperless.tasks"),
            self.buffered_logging("paperless.classifier"),
        ):
            train_classifier(
                scheduled=False,
                status_callback=lambda msg: self.console.print(f"  {msg}"),
            )
        elapsed = time.monotonic() - start
        self.console.print(
            f"[green]✓[/green] Classifier training complete ({elapsed:.1f}s)",
        )
--- a/src/documents/management/commands/document_importer.py
+++ b/src/documents/management/commands/document_importer.py
@@ -205,7 +205,7 @@ class Command(CryptMixin, PaperlessCommand):
                ContentType.objects.all().delete()
                Permission.objects.all().delete()
                for manifest_path in self.manifest_paths:
-                    call_command("loaddata", manifest_path)
+                    call_command("loaddata", manifest_path, skip_checks=True)
        except (FieldDoesNotExist, DeserializationError, IntegrityError) as e:
            self.stdout.write(self.style.ERROR("Database import failed"))
            if (
--- a/src/documents/management/commands/document_thumbnails.py
+++ b/src/documents/management/commands/document_thumbnails.py
@@ -4,6 +4,11 @@ import shutil
 from documents.management.commands.base import PaperlessCommand
 from documents.models import Document
 from documents.parsers import get_parser_class_for_mime_type
 from paperless.parsers.mail import MailDocumentParser
 from paperless.parsers.remote import RemoteDocumentParser
 from paperless.parsers.tesseract import RasterisedDocumentParser
 from paperless.parsers.text import TextDocumentParser
 from paperless.parsers.tika import TikaDocumentParser
 logger = logging.getLogger("paperless.management.thumbnails")
@@ -22,15 +27,38 @@ def _process_document(doc_id: int) -> None:
    parser = parser_class(logging_group=None)
    parser_is_new_style = isinstance(
        parser,
        (
            MailDocumentParser,
            RasterisedDocumentParser,
            RemoteDocumentParser,
            TextDocumentParser,
            TikaDocumentParser,
        ),
    )
    # TODO(stumpylog): Remove branch in the future when all parsers use new protocol
    if parser_is_new_style:
        parser.__enter__()
    try:
-        thumb = parser.get_thumbnail(
+        # TODO(stumpylog): Remove branch in the future when all parsers use new protocol
-            document.source_path,
+        if parser_is_new_style:
-            document.mime_type,
+            thumb = parser.get_thumbnail(document.source_path, document.mime_type)
-            document.get_public_filename(),
+        else:
-        )
+            thumb = parser.get_thumbnail(
                document.source_path,
                document.mime_type,
                document.get_public_filename(),
            )
        shutil.move(thumb, document.thumbnail_path)
    finally:
-        parser.cleanup()
+        # TODO(stumpylog): Cleanup once all parsers are handled
        if parser_is_new_style:
            parser.__exit__(None, None, None)
        else:
            parser.cleanup()
 class Command(PaperlessCommand):
--- a/src/documents/serialisers.py
+++ b/src/documents/serialisers.py
@@ -797,6 +797,25 @@ class ReadWriteSerializerMethodField(serializers.SerializerMethodField):
        return {self.field_name: data}
 def validate_documentlink_targets(user, doc_ids):
    if Document.objects.filter(id__in=doc_ids).count() != len(doc_ids):
        raise serializers.ValidationError(
            "Some documents in value don't exist or were specified twice.",
        )
    if user is None:
        return
    target_documents = Document.objects.filter(id__in=doc_ids).select_related("owner")
    if not all(
        has_perms_owner_aware(user, "change_document", document)
        for document in target_documents
    ):
        raise PermissionDenied(
            _("Insufficient permissions."),
        )
 class CustomFieldInstanceSerializer(serializers.ModelSerializer):
    field = serializers.PrimaryKeyRelatedField(queryset=CustomField.objects.all())
    value = ReadWriteSerializerMethodField(allow_null=True)
@@ -887,12 +906,11 @@ class CustomFieldInstanceSerializer(serializers.ModelSerializer):
                        "Value must be a list",
                    )
                doc_ids = data["value"]
-                if Document.objects.filter(id__in=doc_ids).count() != len(
+                request = self.context.get("request")
-                    data["value"],
+                validate_documentlink_targets(
-                ):
+                    getattr(request, "user", None) if request is not None else None,
-                    raise serializers.ValidationError(
+                    doc_ids,
-                        "Some documents in value don't exist or were specified twice.",
+                )
                    )
        return data
@@ -1713,6 +1731,19 @@ class BulkEditSerializer(
                f"Some custom fields in {name} don't exist or were specified twice.",
            )
        if isinstance(custom_fields, dict):
            custom_field_map = CustomField.objects.in_bulk(ids)
            for raw_field_id, value in custom_fields.items():
                field = custom_field_map.get(int(raw_field_id))
                if (
                    field is not None
                    and field.data_type == CustomField.FieldDataType.DOCUMENTLINK
                    and value is not None
                ):
                    if not isinstance(value, list):
                        raise serializers.ValidationError("Value must be a list")
                    validate_documentlink_targets(self.user, value)
    def validate_method(self, method):
        if method == "set_correspondent":
            return bulk_edit.set_correspondent
--- a/src/documents/signals/handlers.py
+++ b/src/documents/signals/handlers.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 import hashlib
 import logging
 import shutil
 from pathlib import Path
@@ -403,6 +404,14 @@ class CannotMoveFilesException(Exception):
    pass
 def _path_matches_checksum(path: Path, checksum: str | None) -> bool:
    if checksum is None or not path.is_file():
        return False
    with path.open("rb") as f:
        return hashlib.md5(f.read()).hexdigest() == checksum
 def _filename_template_uses_custom_fields(doc: Document) -> bool:
    template = None
    if doc.storage_path is not None:
@@ -473,10 +482,12 @@ def update_filename_and_move_files(
            old_filename = instance.filename
            old_source_path = instance.source_path
            move_original = False
            original_already_moved = False
            old_archive_filename = instance.archive_filename
            old_archive_path = instance.archive_path
            move_archive = False
            archive_already_moved = False
            candidate_filename = generate_filename(instance)
            if len(str(candidate_filename)) > Document.MAX_STORED_FILENAME_LENGTH:
@@ -497,14 +508,23 @@ def update_filename_and_move_files(
                candidate_source_path.exists()
                and candidate_source_path != old_source_path
            ):
-                # Only fall back to unique search when there is an actual conflict
+                if not old_source_path.is_file() and _path_matches_checksum(
-                new_filename = generate_unique_filename(instance)
+                    candidate_source_path,
                    instance.checksum,
                ):
                    new_filename = candidate_filename
                    original_already_moved = True
                else:
                    # Only fall back to unique search when there is an actual conflict
                    new_filename = generate_unique_filename(instance)
            else:
                new_filename = candidate_filename
            # Need to convert to string to be able to save it to the db
            instance.filename = str(new_filename)
-            move_original = old_filename != instance.filename
+            move_original = (
                old_filename != instance.filename and not original_already_moved
            )
            if instance.has_archive_version:
                archive_candidate = generate_filename(instance, archive_filename=True)
@@ -525,24 +545,38 @@ def update_filename_and_move_files(
                    archive_candidate_path.exists()
                    and archive_candidate_path != old_archive_path
                ):
-                    new_archive_filename = generate_unique_filename(
+                    if not old_archive_path.is_file() and _path_matches_checksum(
-                        instance,
+                        archive_candidate_path,
-                        archive_filename=True,
+                        instance.archive_checksum,
-                    )
+                    ):
                        new_archive_filename = archive_candidate
                        archive_already_moved = True
                    else:
                        new_archive_filename = generate_unique_filename(
                            instance,
                            archive_filename=True,
                        )
                else:
                    new_archive_filename = archive_candidate
                instance.archive_filename = str(new_archive_filename)
-                move_archive = old_archive_filename != instance.archive_filename
+                move_archive = (
                    old_archive_filename != instance.archive_filename
                    and not archive_already_moved
                )
            else:
                move_archive = False
            if not move_original and not move_archive:
-                # Just update modified. Also, don't save() here to prevent infinite recursion.
+                updates = {"modified": timezone.now()}
-                Document.objects.filter(pk=instance.pk).update(
+                if old_filename != instance.filename:
-                    modified=timezone.now(),
+                    updates["filename"] = instance.filename
-                )
+                if old_archive_filename != instance.archive_filename:
                    updates["archive_filename"] = instance.archive_filename
                # Don't save() here to prevent infinite recursion.
                Document.objects.filter(pk=instance.pk).update(**updates)
                return
            if move_original:
@@ -932,8 +966,25 @@ def run_workflows(
            if not use_overrides:
                # limit title to 128 characters
                document.title = document.title[:128]
-                # save first before setting tags
+                # Save only the fields that workflow actions can set directly.
-                document.save()
+                # Deliberately excludes filename and archive_filename — those are
                # managed exclusively by update_filename_and_move_files via the
                # post_save signal. Writing stale in-memory values here would revert
                # a concurrent update_filename_and_move_files DB write, leaving the
                # DB pointing at the old path while the file is already at the new
                # one (see: https://github.com/paperless-ngx/paperless-ngx/issues/12386).
                # modified has auto_now=True but is not auto-added when update_fields
                # is specified, so it must be listed explicitly.
                document.save(
                    update_fields=[
                        "title",
                        "correspondent",
                        "document_type",
                        "storage_path",
                        "owner",
                        "modified",
                    ],
                )
                document.tags.set(doc_tag_ids)
            WorkflowRun.objects.create(
--- a/src/documents/tasks.py
+++ b/src/documents/tasks.py
@@ -65,6 +65,12 @@ from documents.signals.handlers import run_workflows
 from documents.signals.handlers import send_websocket_document_updated
 from documents.workflows.utils import get_workflows_for_trigger
 from paperless.config import AIConfig
 from paperless.parsers import ParserContext
 from paperless.parsers.mail import MailDocumentParser
 from paperless.parsers.remote import RemoteDocumentParser
 from paperless.parsers.tesseract import RasterisedDocumentParser
 from paperless.parsers.text import TextDocumentParser
 from paperless.parsers.tika import TikaDocumentParser
 from paperless_ai.indexing import llm_index_add_or_update_document
 from paperless_ai.indexing import llm_index_remove_document
 from paperless_ai.indexing import update_llm_index
@@ -100,7 +106,11 @@ def index_reindex(*, iter_wrapper: IterWrapper[Document] = _identity) -> None:
@shared_task
-def train_classifier(*, scheduled=True) -> None:
+def train_classifier(
    *,
    scheduled=True,
    status_callback: Callable[[str], None] | None = None,
 ) -> None:
    task = PaperlessTask.objects.create(
        type=PaperlessTask.TaskType.SCHEDULED_TASK
        if scheduled
@@ -136,7 +146,7 @@ def train_classifier(*, scheduled=True) -> None:
        classifier = DocumentClassifier()
    try:
-        if classifier.train():
+        if classifier.train(status_callback=status_callback):
            logger.info(
                f"Saving updated classifier model to {settings.MODEL_FILE}...",
            )
@@ -300,7 +310,9 @@ def update_document_content_maybe_archive_file(document_id) -> None:
    mime_type = document.mime_type
-    parser_class: type[DocumentParser] = get_parser_class_for_mime_type(mime_type)
+    parser_class: type[DocumentParser] | None = get_parser_class_for_mime_type(
        mime_type,
    )
    if not parser_class:
        logger.error(
@@ -311,14 +323,42 @@ def update_document_content_maybe_archive_file(document_id) -> None:
    parser: DocumentParser = parser_class(logging_group=uuid.uuid4())
-    try:
+    parser_is_new_style = isinstance(
-        parser.parse(document.source_path, mime_type, document.get_public_filename())
+        parser,
        (
            MailDocumentParser,
            RasterisedDocumentParser,
            RemoteDocumentParser,
            TextDocumentParser,
            TikaDocumentParser,
        ),
    )
-        thumbnail = parser.get_thumbnail(
+    # TODO(stumpylog): Remove branch in the future when all parsers use new protocol
-            document.source_path,
+    if parser_is_new_style:
-            mime_type,
+        parser.__enter__()
-            document.get_public_filename(),
+
-        )
+    try:
        # TODO(stumpylog): Remove branch in the future when all parsers use new protocol
        if parser_is_new_style:
            parser.configure(ParserContext())
            parser.parse(document.source_path, mime_type)
        else:
            parser.parse(
                document.source_path,
                mime_type,
                document.get_public_filename(),
            )
        # TODO(stumpylog): Remove branch in the future when all parsers use new protocol
        if parser_is_new_style:
            thumbnail = parser.get_thumbnail(document.source_path, mime_type)
        else:
            thumbnail = parser.get_thumbnail(
                document.source_path,
                mime_type,
                document.get_public_filename(),
            )
        with transaction.atomic():
            oldDocument = Document.objects.get(pk=document.pk)
@@ -399,7 +439,20 @@ def update_document_content_maybe_archive_file(document_id) -> None:
            f"Error while parsing document {document} (ID: {document_id})",
        )
    finally:
-        parser.cleanup()
+        # TODO(stumpylog): Remove branch in the future when all parsers use new protocol
        if isinstance(
            parser,
            (
                MailDocumentParser,
                RasterisedDocumentParser,
                RemoteDocumentParser,
                TextDocumentParser,
                TikaDocumentParser,
            ),
        ):
            parser.__exit__(None, None, None)
        else:
            parser.cleanup()
@shared_task
--- a/src/documents/tests/management/test_management_sanity_checker.py
+++ b/src/documents/tests/management/test_management_sanity_checker.py
@@ -163,13 +163,23 @@ class TestRenderResultsSummary:
 class TestDocumentSanityCheckerCommand:
    def test_no_issues(self, sample_doc: Document) -> None:
        out = StringIO()
-        call_command("document_sanity_checker", "--no-progress-bar", stdout=out)
+        call_command(
            "document_sanity_checker",
            "--no-progress-bar",
            stdout=out,
            skip_checks=True,
        )
        assert "No issues detected" in out.getvalue()
    def test_missing_original(self, sample_doc: Document) -> None:
        Path(sample_doc.source_path).unlink()
        out = StringIO()
-        call_command("document_sanity_checker", "--no-progress-bar", stdout=out)
+        call_command(
            "document_sanity_checker",
            "--no-progress-bar",
            stdout=out,
            skip_checks=True,
        )
        output = out.getvalue()
        assert "ERROR" in output
        assert "Original of document does not exist" in output
@@ -187,7 +197,12 @@ class TestDocumentSanityCheckerCommand:
        Path(doc.thumbnail_path).touch()
        out = StringIO()
-        call_command("document_sanity_checker", "--no-progress-bar", stdout=out)
+        call_command(
            "document_sanity_checker",
            "--no-progress-bar",
            stdout=out,
            skip_checks=True,
        )
        output = out.getvalue()
        assert "ERROR" in output
        assert "Checksum mismatch. Stored: abc, actual:" in output
--- a/src/documents/tests/test_api_app_config.py
+++ b/src/documents/tests/test_api_app_config.py
@@ -5,6 +5,7 @@ from unittest.mock import patch
 from django.contrib.auth.models import User
 from django.core.files.uploadedfile import SimpleUploadedFile
 from django.test import override_settings
 from rest_framework import status
 from rest_framework.test import APITestCase
@@ -693,3 +694,17 @@ class TestApiAppConfig(DirectoriesMixin, APITestCase):
                content_type="application/json",
            )
            mock_update.assert_called_once()
    @override_settings(LLM_ALLOW_INTERNAL_ENDPOINTS=False)
    def test_update_llm_endpoint_blocks_internal_endpoint_when_disallowed(self) -> None:
        response = self.client.patch(
            f"{self.ENDPOINT}1/",
            json.dumps(
                {
                    "llm_endpoint": "http://127.0.0.1:11434",
                },
            ),
            content_type="application/json",
        )
        self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
        self.assertIn("non-public address", str(response.data).lower())
--- a/src/documents/tests/test_api_bulk_edit.py
+++ b/src/documents/tests/test_api_bulk_edit.py
@@ -262,6 +262,50 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
        self.assertEqual(kwargs["add_custom_fields"], [self.cf1.id])
        self.assertEqual(kwargs["remove_custom_fields"], [self.cf2.id])
    @mock.patch("documents.serialisers.bulk_edit.modify_custom_fields")
    def test_api_modify_custom_fields_documentlink_forbidden_for_unpermitted_target(
        self,
        m,
    ) -> None:
        self.setup_mock(m, "modify_custom_fields")
        user = User.objects.create_user(username="doc-owner")
        user.user_permissions.add(Permission.objects.get(codename="change_document"))
        other_user = User.objects.create_user(username="other-user")
        source_doc = Document.objects.create(
            checksum="source",
            title="Source",
            owner=user,
        )
        target_doc = Document.objects.create(
            checksum="target",
            title="Target",
            owner=other_user,
        )
        doclink_field = CustomField.objects.create(
            name="doclink",
            data_type=CustomField.FieldDataType.DOCUMENTLINK,
        )
        self.client.force_authenticate(user=user)
        response = self.client.post(
            "/api/documents/bulk_edit/",
            json.dumps(
                {
                    "documents": [source_doc.id],
                    "method": "modify_custom_fields",
                    "parameters": {
                        "add_custom_fields": {doclink_field.id: [target_doc.id]},
                        "remove_custom_fields": [],
                    },
                },
            ),
            content_type="application/json",
        )
        self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
        m.assert_not_called()
    @mock.patch("documents.serialisers.bulk_edit.modify_custom_fields")
    def test_api_modify_custom_fields_with_values(self, m) -> None:
        self.setup_mock(m, "modify_custom_fields")
--- a/src/documents/tests/test_api_custom_fields.py
+++ b/src/documents/tests/test_api_custom_fields.py
@@ -6,6 +6,7 @@ from unittest.mock import ANY
 from django.contrib.auth.models import Permission
 from django.contrib.auth.models import User
 from django.test import override_settings
 from guardian.shortcuts import assign_perm
 from rest_framework import status
 from rest_framework.test import APITestCase
@@ -1140,6 +1141,102 @@ class TestCustomFieldsAPI(DirectoriesMixin, APITestCase):
        self.assertEqual(resp.status_code, status.HTTP_200_OK)
        self.assertEqual(doc5.custom_fields.first().value, [1])
    def test_documentlink_patch_requires_change_permission_on_target_documents(
        self,
    ) -> None:
        source_owner = User.objects.create_user(username="source-owner")
        source_owner.user_permissions.add(
            Permission.objects.get(codename="change_document"),
        )
        other_user = User.objects.create_user(username="other-user")
        source_doc = Document.objects.create(
            title="Source",
            checksum="source",
            mime_type="application/pdf",
            owner=source_owner,
        )
        target_doc = Document.objects.create(
            title="Target",
            checksum="target",
            mime_type="application/pdf",
            owner=other_user,
        )
        custom_field_doclink = CustomField.objects.create(
            name="Test Custom Field Doc Link",
            data_type=CustomField.FieldDataType.DOCUMENTLINK,
        )
        self.client.force_authenticate(user=source_owner)
        resp = self.client.patch(
            f"/api/documents/{source_doc.id}/",
            data={
                "custom_fields": [
                    {
                        "field": custom_field_doclink.id,
                        "value": [target_doc.id],
                    },
                ],
            },
            format="json",
        )
        self.assertEqual(resp.status_code, status.HTTP_403_FORBIDDEN)
        self.assertEqual(
            CustomFieldInstance.objects.filter(field=custom_field_doclink).count(),
            0,
        )
    def test_documentlink_patch_allowed_with_change_permission_on_target_documents(
        self,
    ) -> None:
        source_owner = User.objects.create_user(username="source-owner")
        source_owner.user_permissions.add(
            Permission.objects.get(codename="change_document"),
        )
        other_user = User.objects.create_user(username="other-user")
        source_doc = Document.objects.create(
            title="Source",
            checksum="source",
            mime_type="application/pdf",
            owner=source_owner,
        )
        target_doc = Document.objects.create(
            title="Target",
            checksum="target",
            mime_type="application/pdf",
            owner=other_user,
        )
        custom_field_doclink = CustomField.objects.create(
            name="Test Custom Field Doc Link",
            data_type=CustomField.FieldDataType.DOCUMENTLINK,
        )
        assign_perm("change_document", source_owner, target_doc)
        self.client.force_authenticate(user=source_owner)
        resp = self.client.patch(
            f"/api/documents/{source_doc.id}/",
            data={
                "custom_fields": [
                    {
                        "field": custom_field_doclink.id,
                        "value": [target_doc.id],
                    },
                ],
            },
            format="json",
        )
        self.assertEqual(resp.status_code, status.HTTP_200_OK)
        target_doc.refresh_from_db()
        self.assertEqual(
            target_doc.custom_fields.get(field=custom_field_doclink).value,
            [source_doc.id],
        )
    def test_custom_field_filters(self) -> None:
        custom_field_string = CustomField.objects.create(
            name="Test Custom Field String",
--- a/src/documents/tests/test_api_permissions.py
+++ b/src/documents/tests/test_api_permissions.py
@@ -888,6 +888,19 @@ class TestApiUser(DirectoriesMixin, APITestCase):
        self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
        response = self.client.post(
            f"{self.ENDPOINT}",
            json.dumps(
                {
                    "username": "user4",
                    "is_superuser": "true",
                },
            ),
            content_type="application/json",
        )
        self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
        self.client.force_authenticate(user2)
        response = self.client.patch(
@@ -920,6 +933,65 @@ class TestApiUser(DirectoriesMixin, APITestCase):
        returned_user1 = User.objects.get(pk=user1.pk)
        self.assertEqual(returned_user1.is_superuser, False)
    def test_only_superusers_can_create_or_alter_staff_status(self):
        """
        GIVEN:
            - Existing user account
        WHEN:
            - API request is made to add a user account with staff status
            - API request is made to change staff status
        THEN:
            - Only superusers can change staff status
        """
        user1 = User.objects.create_user(username="user1")
        user1.user_permissions.add(*Permission.objects.all())
        user2 = User.objects.create_superuser(username="user2")
        self.client.force_authenticate(user1)
        response = self.client.patch(
            f"{self.ENDPOINT}{user1.pk}/",
            json.dumps(
                {
                    "is_staff": "true",
                },
            ),
            content_type="application/json",
        )
        self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
        response = self.client.post(
            f"{self.ENDPOINT}",
            json.dumps(
                {
                    "username": "user3",
                    "is_staff": 1,
                },
            ),
            content_type="application/json",
        )
        self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
        self.client.force_authenticate(user2)
        response = self.client.patch(
            f"{self.ENDPOINT}{user1.pk}/",
            json.dumps(
                {
                    "is_staff": True,
                },
            ),
            content_type="application/json",
        )
        self.assertEqual(response.status_code, status.HTTP_200_OK)
        returned_user1 = User.objects.get(pk=user1.pk)
        self.assertEqual(returned_user1.is_staff, True)
 class TestApiGroup(DirectoriesMixin, APITestCase):
    ENDPOINT = "/api/groups/"
--- a/src/documents/tests/test_api_schema.py
+++ b/src/documents/tests/test_api_schema.py
@@ -12,7 +12,12 @@ class TestApiSchema(APITestCase):
        Test that the schema is valid
        """
        try:
-            call_command("spectacular", "--validate", "--fail-on-warn")
+            call_command(
                "spectacular",
                "--validate",
                "--fail-on-warn",
                skip_checks=True,
            )
        except CommandError as e:
            self.fail(f"Schema validation failed: {e}")
--- a/src/documents/tests/test_api_search.py
+++ b/src/documents/tests/test_api_search.py
@@ -702,6 +702,40 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
        self.assertEqual(correction, None)
    def test_search_spelling_suggestion_suppressed_for_private_terms(self):
        owner = User.objects.create_user("owner")
        attacker = User.objects.create_user("attacker")
        attacker.user_permissions.add(
            Permission.objects.get(codename="view_document"),
        )
        with AsyncWriter(index.open_index()) as writer:
            for i in range(55):
                private_doc = Document.objects.create(
                    checksum=f"p{i}",
                    pk=100 + i,
                    title=f"Private Document {i + 1}",
                    content=f"treasury document {i + 1}",
                    owner=owner,
                )
                visible_doc = Document.objects.create(
                    checksum=f"v{i}",
                    pk=200 + i,
                    title=f"Visible Document {i + 1}",
                    content=f"public ledger {i + 1}",
                    owner=attacker,
                )
                index.update_document(writer, private_doc)
                index.update_document(writer, visible_doc)
        self.client.force_authenticate(user=attacker)
        response = self.client.get("/api/documents/?query=treasurx")
        self.assertEqual(response.status_code, status.HTTP_200_OK)
        self.assertEqual(response.data["count"], 0)
        self.assertIsNone(response.data["corrected_query"])
    @mock.patch(
        "whoosh.searching.Searcher.correct_query",
        side_effect=Exception("Test error"),
@@ -772,6 +806,60 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
        self.assertEqual(results[0]["id"], d3.id)
        self.assertEqual(results[1]["id"], d1.id)
    def test_search_more_like_requires_view_permission_on_seed_document(
        self,
    ) -> None:
        """
        GIVEN:
            - A user can search documents they own
            - Another user's private document exists with similar content
        WHEN:
            - The user requests more-like-this for the private seed document
        THEN:
            - The request is rejected
        """
        owner = User.objects.create_user("owner")
        attacker = User.objects.create_user("attacker")
        attacker.user_permissions.add(
            Permission.objects.get(codename="view_document"),
        )
        private_seed = Document.objects.create(
            title="private bank statement",
            content="quarterly treasury bank statement wire transfer",
            checksum="seed",
            owner=owner,
            pk=10,
        )
        visible_doc = Document.objects.create(
            title="attacker-visible match",
            content="quarterly treasury bank statement wire transfer summary",
            checksum="visible",
            owner=attacker,
            pk=11,
        )
        other_doc = Document.objects.create(
            title="unrelated",
            content="completely different topic",
            checksum="other",
            owner=attacker,
            pk=12,
        )
        with AsyncWriter(index.open_index()) as writer:
            index.update_document(writer, private_seed)
            index.update_document(writer, visible_doc)
            index.update_document(writer, other_doc)
        self.client.force_authenticate(user=attacker)
        response = self.client.get(
            f"/api/documents/?more_like_id={private_seed.id}",
        )
        self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
        self.assertEqual(response.content, b"Insufficient permissions.")
    def test_search_filtering(self) -> None:
        t = Tag.objects.create(name="tag")
        t2 = Tag.objects.create(name="tag2")
@@ -1356,6 +1444,83 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
        self.assertEqual(results["custom_fields"][0]["id"], custom_field1.id)
        self.assertEqual(results["workflows"][0]["id"], workflow1.id)
    def test_global_search_filters_owned_mail_objects(self) -> None:
        user1 = User.objects.create_user("mail-search-user")
        user2 = User.objects.create_user("other-mail-search-user")
        user1.user_permissions.add(
            Permission.objects.get(codename="view_mailaccount"),
            Permission.objects.get(codename="view_mailrule"),
        )
        own_account = MailAccount.objects.create(
            name="bank owned account",
            username="owner@example.com",
            password="secret",
            imap_server="imap.owner.example.com",
            imap_port=993,
            imap_security=MailAccount.ImapSecurity.SSL,
            character_set="UTF-8",
            owner=user1,
        )
        other_account = MailAccount.objects.create(
            name="bank other account",
            username="other@example.com",
            password="secret",
            imap_server="imap.other.example.com",
            imap_port=993,
            imap_security=MailAccount.ImapSecurity.SSL,
            character_set="UTF-8",
            owner=user2,
        )
        unowned_account = MailAccount.objects.create(
            name="bank shared account",
            username="shared@example.com",
            password="secret",
            imap_server="imap.shared.example.com",
            imap_port=993,
            imap_security=MailAccount.ImapSecurity.SSL,
            character_set="UTF-8",
        )
        own_rule = MailRule.objects.create(
            name="bank owned rule",
            account=own_account,
            action=MailRule.MailAction.MOVE,
            owner=user1,
        )
        other_rule = MailRule.objects.create(
            name="bank other rule",
            account=other_account,
            action=MailRule.MailAction.MOVE,
            owner=user2,
        )
        unowned_rule = MailRule.objects.create(
            name="bank shared rule",
            account=unowned_account,
            action=MailRule.MailAction.MOVE,
        )
        self.client.force_authenticate(user1)
        response = self.client.get("/api/search/?query=bank")
        self.assertEqual(response.status_code, status.HTTP_200_OK)
        self.assertCountEqual(
            [account["id"] for account in response.data["mail_accounts"]],
            [own_account.id, unowned_account.id],
        )
        self.assertCountEqual(
            [rule["id"] for rule in response.data["mail_rules"]],
            [own_rule.id, unowned_rule.id],
        )
        self.assertNotIn(
            other_account.id,
            [account["id"] for account in response.data["mail_accounts"]],
        )
        self.assertNotIn(
            other_rule.id,
            [rule["id"] for rule in response.data["mail_rules"]],
        )
    def test_global_search_bad_request(self) -> None:
        """
        WHEN:
--- a/src/documents/tests/test_api_status.py
+++ b/src/documents/tests/test_api_status.py
@@ -26,6 +26,23 @@ class TestSystemStatus(APITestCase):
        self.override = override_settings(MEDIA_ROOT=self.tmp_dir)
        self.override.enable()
        # Mock slow network calls so tests don't block on real Redis/Celery timeouts.
        # Individual tests that care about specific behaviour override these with
        # their own @mock.patch decorators (which take precedence).
        redis_patcher = mock.patch(
            "redis.Redis.execute_command",
            side_effect=Exception("Redis not available"),
        )
        self.mock_redis = redis_patcher.start()
        self.addCleanup(redis_patcher.stop)
        celery_patcher = mock.patch(
            "celery.app.control.Inspect.ping",
            side_effect=Exception("Celery not available"),
        )
        self.mock_celery_ping = celery_patcher.start()
        self.addCleanup(celery_patcher.stop)
    def tearDown(self) -> None:
        super().tearDown()
@@ -69,11 +86,18 @@ class TestSystemStatus(APITestCase):
        """
        response = self.client.get(self.ENDPOINT)
        self.assertEqual(response.status_code, status.HTTP_401_UNAUTHORIZED)
        self.assertEqual(response["WWW-Authenticate"], "Token")
        normal_user = User.objects.create_user(username="normal_user")
        self.client.force_login(normal_user)
        response = self.client.get(self.ENDPOINT)
        self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
    def test_system_status_with_bad_basic_auth_challenges(self) -> None:
        self.client.credentials(HTTP_AUTHORIZATION="Basic invalid")
        response = self.client.get(self.ENDPOINT)
        self.assertEqual(response.status_code, status.HTTP_401_UNAUTHORIZED)
        self.assertEqual(response["WWW-Authenticate"], 'Basic realm="api"')
    def test_system_status_container_detection(self) -> None:
        """
        GIVEN:
@@ -84,13 +108,17 @@ class TestSystemStatus(APITestCase):
            - The response contains the correct install type
        """
        self.client.force_login(self.user)
-        os.environ["PNGX_CONTAINERIZED"] = "1"
+        with mock.patch.dict(os.environ, {"PNGX_CONTAINERIZED": "1"}, clear=False):
-        response = self.client.get(self.ENDPOINT)
+            response = self.client.get(self.ENDPOINT)
-        self.assertEqual(response.status_code, status.HTTP_200_OK)
+            self.assertEqual(response.status_code, status.HTTP_200_OK)
-        self.assertEqual(response.data["install_type"], "docker")
+            self.assertEqual(response.data["install_type"], "docker")
-        os.environ["KUBERNETES_SERVICE_HOST"] = "http://localhost"
+        with mock.patch.dict(
-        response = self.client.get(self.ENDPOINT)
+            os.environ,
-        self.assertEqual(response.data["install_type"], "kubernetes")
+            {"PNGX_CONTAINERIZED": "1", "KUBERNETES_SERVICE_HOST": "http://localhost"},
            clear=False,
        ):
            response = self.client.get(self.ENDPOINT)
            self.assertEqual(response.data["install_type"], "kubernetes")
    @mock.patch("redis.Redis.execute_command")
    def test_system_status_redis_ping(self, mock_ping) -> None:
--- a/src/documents/tests/test_consumer.py
+++ b/src/documents/tests/test_consumer.py
@@ -36,7 +36,6 @@ from documents.tests.utils import DummyProgressManager
 from documents.tests.utils import FileSystemAssertsMixin
 from documents.tests.utils import GetConsumerMixin
 from paperless_mail.models import MailRule
 from paperless_mail.parsers import MailDocumentParser
 class _BaseTestParser(DocumentParser):
@@ -642,6 +641,7 @@ class TestConsumer(
        self._assert_first_last_send_progress()
    @mock.patch("documents.consumer.generate_unique_filename")
    @override_settings(FILENAME_FORMAT="{pk}")
    def testFilenameHandlingFallsBackWhenGeneratedPathExceedsDbLimit(self, m):
        m.side_effect = lambda doc, archive_filename=False: Path(
            ("a" * 1100 + ".pdf") if not archive_filename else ("b" * 1100 + ".pdf"),
@@ -1090,7 +1090,7 @@ class TestConsumer(
            self.assertEqual(command[1], "--replace-input")
    @mock.patch("paperless_mail.models.MailRule.objects.get")
-    @mock.patch("paperless_mail.parsers.MailDocumentParser.parse")
+    @mock.patch("paperless.parsers.mail.MailDocumentParser.parse")
    @mock.patch("documents.parsers.document_consumer_declaration.send")
    def test_mail_parser_receives_mailrule(
        self,
@@ -1106,11 +1106,13 @@ class TestConsumer(
        THEN:
            - The mail parser should receive the mail rule
        """
        from paperless_mail.signals import get_parser as mail_get_parser
        mock_consumer_declaration_send.return_value = [
            (
                None,
                {
-                    "parser": MailDocumentParser,
+                    "parser": mail_get_parser,
                    "mime_types": {"message/rfc822": ".eml"},
                    "weight": 0,
                },
@@ -1122,9 +1124,10 @@ class TestConsumer(
        with self.get_consumer(
            filepath=(
                Path(__file__).parent.parent.parent
-                / Path("paperless_mail")
+                / Path("paperless")
                / Path("tests")
                / Path("samples")
                / Path("mail")
            ).resolve()
            / "html.eml",
            source=DocumentSource.MailFetch,
@@ -1135,12 +1138,10 @@ class TestConsumer(
                ConsumerError,
            ):
                consumer.run()
-                mock_mail_parser_parse.assert_called_once_with(
+            mock_mail_parser_parse.assert_called_once_with(
-                    consumer.working_copy,
+                consumer.working_copy,
-                    "message/rfc822",
+                "message/rfc822",
-                    file_name="sample.pdf",
+            )
                    mailrule=mock_mailrule_get.return_value,
                )
@mock.patch("documents.consumer.magic.from_file", fake_magic_from_file)
--- a/src/documents/tests/test_file_handling.py
+++ b/src/documents/tests/test_file_handling.py
@@ -1,4 +1,5 @@
 import datetime
 import hashlib
 import logging
 import tempfile
 from pathlib import Path
@@ -204,6 +205,52 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
            )
            self.assertEqual(document.filename, "none/none.pdf")
    @override_settings(FILENAME_FORMAT=None)
    def test_stale_save_recovers_already_moved_files(self) -> None:
        old_storage_path = StoragePath.objects.create(
            name="old-path",
            path="old/{{title}}",
        )
        new_storage_path = StoragePath.objects.create(
            name="new-path",
            path="new/{{title}}",
        )
        original_bytes = b"original"
        archive_bytes = b"archive"
        doc = Document.objects.create(
            title="document",
            mime_type="application/pdf",
            checksum=hashlib.md5(original_bytes).hexdigest(),
            archive_checksum=hashlib.md5(archive_bytes).hexdigest(),
            filename="old/document.pdf",
            archive_filename="old/document.pdf",
            storage_path=old_storage_path,
        )
        create_source_path_directory(doc.source_path)
        doc.source_path.write_bytes(original_bytes)
        create_source_path_directory(doc.archive_path)
        doc.archive_path.write_bytes(archive_bytes)
        stale_doc = Document.objects.get(pk=doc.pk)
        fresh_doc = Document.objects.get(pk=doc.pk)
        fresh_doc.storage_path = new_storage_path
        fresh_doc.save()
        doc.refresh_from_db()
        self.assertEqual(doc.filename, "new/document.pdf")
        self.assertEqual(doc.archive_filename, "new/document.pdf")
        stale_doc.storage_path = new_storage_path
        stale_doc.save()
        doc.refresh_from_db()
        self.assertEqual(doc.filename, "new/document.pdf")
        self.assertEqual(doc.archive_filename, "new/document.pdf")
        self.assertIsFile(doc.source_path)
        self.assertIsFile(doc.archive_path)
        self.assertIsNotFile(settings.ORIGINALS_DIR / "old" / "document.pdf")
        self.assertIsNotFile(settings.ARCHIVE_DIR / "old" / "document.pdf")
    @override_settings(FILENAME_FORMAT="{correspondent}/{correspondent}")
    def test_document_delete(self) -> None:
        document = Document()
--- a/src/documents/tests/test_management.py
+++ b/src/documents/tests/test_management.py
@@ -1,7 +1,10 @@
 from __future__ import annotations
 import filecmp
 import shutil
 from io import StringIO
 from pathlib import Path
 from typing import TYPE_CHECKING
 from unittest import mock
 import pytest
@@ -11,6 +14,9 @@ from django.core.management import call_command
 from django.test import TestCase
 from django.test import override_settings
 if TYPE_CHECKING:
    from pytest_mock import MockerFixture
 from documents.file_handling import generate_filename
 from documents.models import Document
 from documents.tasks import update_document_content_maybe_archive_file
@@ -35,7 +41,7 @@ class TestArchiver(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
        doc = self.make_models()
        shutil.copy(sample_file, Path(self.dirs.originals_dir) / f"{doc.id:07}.pdf")
-        call_command("document_archiver", "--processes", "1")
+        call_command("document_archiver", "--processes", "1", skip_checks=True)
    def test_handle_document(self) -> None:
        doc = self.make_models()
@@ -100,12 +106,12 @@ class TestArchiver(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
 class TestMakeIndex(TestCase):
    @mock.patch("documents.management.commands.document_index.index_reindex")
    def test_reindex(self, m) -> None:
-        call_command("document_index", "reindex")
+        call_command("document_index", "reindex", skip_checks=True)
        m.assert_called_once()
    @mock.patch("documents.management.commands.document_index.index_optimize")
    def test_optimize(self, m) -> None:
-        call_command("document_index", "optimize")
+        call_command("document_index", "optimize", skip_checks=True)
        m.assert_called_once()
@@ -122,7 +128,7 @@ class TestRenamer(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
        Path(doc.archive_path).touch()
        with override_settings(FILENAME_FORMAT="{correspondent}/{title}"):
-            call_command("document_renamer")
+            call_command("document_renamer", skip_checks=True)
        doc2 = Document.objects.get(id=doc.id)
@@ -135,14 +141,32 @@ class TestRenamer(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
@pytest.mark.management
-class TestCreateClassifier(TestCase):
+class TestCreateClassifier:
-    @mock.patch(
+    def test_create_classifier(self, mocker: MockerFixture) -> None:
-        "documents.management.commands.document_create_classifier.train_classifier",
+        m = mocker.patch(
-    )
+            "documents.management.commands.document_create_classifier.train_classifier",
-    def test_create_classifier(self, m) -> None:
+        )
        call_command("document_create_classifier")
-        m.assert_called_once()
+        call_command("document_create_classifier", skip_checks=True)
        m.assert_called_once_with(scheduled=False, status_callback=mocker.ANY)
        assert callable(m.call_args.kwargs["status_callback"])
    def test_create_classifier_callback_output(self, mocker: MockerFixture) -> None:
        """Callback passed to train_classifier writes each phase message to the console."""
        m = mocker.patch(
            "documents.management.commands.document_create_classifier.train_classifier",
        )
        def invoke_callback(**kwargs):
            kwargs["status_callback"]("Vectorizing document content...")
        m.side_effect = invoke_callback
        stdout = StringIO()
        call_command("document_create_classifier", skip_checks=True, stdout=stdout)
        assert "Vectorizing document content..." in stdout.getvalue()
@pytest.mark.management
@@ -152,7 +176,7 @@ class TestConvertMariaDBUUID(TestCase):
        m.alter_field.return_value = None
        stdout = StringIO()
-        call_command("convert_mariadb_uuid", stdout=stdout)
+        call_command("convert_mariadb_uuid", stdout=stdout, skip_checks=True)
        m.assert_called_once()
@@ -167,6 +191,6 @@ class TestPruneAuditLogs(TestCase):
            object_id=1,
            action=LogEntry.Action.CREATE,
        )
-        call_command("prune_audit_logs")
+        call_command("prune_audit_logs", skip_checks=True)
        self.assertEqual(LogEntry.objects.count(), 0)
--- a/src/documents/tests/test_management_exporter.py
+++ b/src/documents/tests/test_management_exporter.py
@@ -180,7 +180,7 @@ class TestExportImport(
        if data_only:
            args += ["--data-only"]
-        call_command(*args)
+        call_command(*args, skip_checks=True)
        with (self.target / "manifest.json").open() as f:
            manifest = json.load(f)
@@ -272,7 +272,12 @@ class TestExportImport(
            GroupObjectPermission.objects.all().delete()
            self.assertEqual(Document.objects.count(), 0)
-            call_command("document_importer", "--no-progress-bar", self.target)
+            call_command(
                "document_importer",
                "--no-progress-bar",
                self.target,
                skip_checks=True,
            )
            self.assertEqual(Document.objects.count(), 4)
            self.assertEqual(Tag.objects.count(), 1)
            self.assertEqual(Correspondent.objects.count(), 1)
@@ -438,7 +443,8 @@ class TestExportImport(
            filename="0000010.pdf",
            mime_type="application/pdf",
        )
-        self.assertRaises(FileNotFoundError, call_command, "document_exporter", target)
+        with self.assertRaises(FileNotFoundError):
            call_command("document_exporter", target, skip_checks=True)
    def test_export_zipped(self) -> None:
        """
@@ -458,7 +464,7 @@ class TestExportImport(
        args = ["document_exporter", self.target, "--zip"]
-        call_command(*args)
+        call_command(*args, skip_checks=True)
        expected_file = str(
            self.target / f"export-{timezone.localdate().isoformat()}.zip",
@@ -493,7 +499,7 @@ class TestExportImport(
        with override_settings(
            FILENAME_FORMAT="{created_year}/{correspondent}/{title}",
        ):
-            call_command(*args)
+            call_command(*args, skip_checks=True)
        expected_file = str(
            self.target / f"export-{timezone.localdate().isoformat()}.zip",
@@ -538,7 +544,7 @@ class TestExportImport(
        args = ["document_exporter", self.target, "--zip", "--delete"]
-        call_command(*args)
+        call_command(*args, skip_checks=True)
        expected_file = str(
            self.target / f"export-{timezone.localdate().isoformat()}.zip",
@@ -565,7 +571,7 @@ class TestExportImport(
        args = ["document_exporter", "/tmp/foo/bar"]
        with self.assertRaises(CommandError) as e:
-            call_command(*args)
+            call_command(*args, skip_checks=True)
        self.assertEqual("That path doesn't exist", str(e.exception))
@@ -583,7 +589,7 @@ class TestExportImport(
            args = ["document_exporter", tmp_file.name]
            with self.assertRaises(CommandError) as e:
-                call_command(*args)
+                call_command(*args, skip_checks=True)
            self.assertEqual("That path isn't a directory", str(e.exception))
@@ -602,7 +608,7 @@ class TestExportImport(
            args = ["document_exporter", tmp_dir]
            with self.assertRaises(CommandError) as e:
-                call_command(*args)
+                call_command(*args, skip_checks=True)
            self.assertEqual(
                "That path doesn't appear to be writable",
@@ -647,7 +653,12 @@ class TestExportImport(
            self.assertEqual(Document.objects.count(), 4)
            Document.objects.all().delete()
            self.assertEqual(Document.objects.count(), 0)
-            call_command("document_importer", "--no-progress-bar", self.target)
+            call_command(
                "document_importer",
                "--no-progress-bar",
                self.target,
                skip_checks=True,
            )
            self.assertEqual(Document.objects.count(), 4)
    def test_no_thumbnail(self) -> None:
@@ -690,7 +701,12 @@ class TestExportImport(
            self.assertEqual(Document.objects.count(), 4)
            Document.objects.all().delete()
            self.assertEqual(Document.objects.count(), 0)
-            call_command("document_importer", "--no-progress-bar", self.target)
+            call_command(
                "document_importer",
                "--no-progress-bar",
                self.target,
                skip_checks=True,
            )
            self.assertEqual(Document.objects.count(), 4)
    def test_split_manifest(self) -> None:
@@ -721,7 +737,12 @@ class TestExportImport(
            Document.objects.all().delete()
            CustomFieldInstance.objects.all().delete()
            self.assertEqual(Document.objects.count(), 0)
-            call_command("document_importer", "--no-progress-bar", self.target)
+            call_command(
                "document_importer",
                "--no-progress-bar",
                self.target,
                skip_checks=True,
            )
            self.assertEqual(Document.objects.count(), 4)
            self.assertEqual(CustomFieldInstance.objects.count(), 1)
@@ -746,7 +767,12 @@ class TestExportImport(
            self.assertEqual(Document.objects.count(), 4)
            Document.objects.all().delete()
            self.assertEqual(Document.objects.count(), 0)
-            call_command("document_importer", "--no-progress-bar", self.target)
+            call_command(
                "document_importer",
                "--no-progress-bar",
                self.target,
                skip_checks=True,
            )
            self.assertEqual(Document.objects.count(), 4)
    def test_folder_prefix_with_split(self) -> None:
@@ -771,7 +797,12 @@ class TestExportImport(
            self.assertEqual(Document.objects.count(), 4)
            Document.objects.all().delete()
            self.assertEqual(Document.objects.count(), 0)
-            call_command("document_importer", "--no-progress-bar", self.target)
+            call_command(
                "document_importer",
                "--no-progress-bar",
                self.target,
                skip_checks=True,
            )
            self.assertEqual(Document.objects.count(), 4)
    def test_import_db_transaction_failed(self) -> None:
@@ -813,7 +844,12 @@ class TestExportImport(
            self.user = User.objects.create(username="temp_admin")
            with self.assertRaises(IntegrityError):
-                call_command("document_importer", "--no-progress-bar", self.target)
+                call_command(
                    "document_importer",
                    "--no-progress-bar",
                    self.target,
                    skip_checks=True,
                )
            self.assertEqual(ContentType.objects.count(), num_content_type_objects)
            self.assertEqual(Permission.objects.count(), num_permission_objects + 1)
@@ -864,6 +900,7 @@ class TestExportImport(
            "--no-progress-bar",
            "--data-only",
            self.target,
            skip_checks=True,
        )
        self.assertEqual(Document.objects.all().count(), 4)
@@ -923,6 +960,7 @@ class TestCryptExportImport(
            "--passphrase",
            "securepassword",
            self.target,
            skip_checks=True,
        )
        self.assertIsFile(self.target / "metadata.json")
@@ -948,6 +986,7 @@ class TestCryptExportImport(
            "--passphrase",
            "securepassword",
            self.target,
            skip_checks=True,
        )
        account = MailAccount.objects.first()
@@ -976,6 +1015,7 @@ class TestCryptExportImport(
            "--passphrase",
            "securepassword",
            self.target,
            skip_checks=True,
        )
        with self.assertRaises(CommandError) as err:
@@ -983,6 +1023,7 @@ class TestCryptExportImport(
                "document_importer",
                "--no-progress-bar",
                self.target,
                skip_checks=True,
            )
            self.assertEqual(
                err.msg,
@@ -1014,6 +1055,7 @@ class TestCryptExportImport(
            "--no-progress-bar",
            str(self.target),
            stdout=stdout,
            skip_checks=True,
        )
        stdout.seek(0)
        self.assertIn(
--- a/src/documents/tests/test_management_fuzzy.py
+++ b/src/documents/tests/test_management_fuzzy.py
@@ -21,6 +21,7 @@ class TestFuzzyMatchCommand(TestCase):
            *args,
            stdout=stdout,
            stderr=stderr,
            skip_checks=True,
            **kwargs,
        )
        return stdout.getvalue(), stderr.getvalue()
--- a/src/documents/tests/test_management_importer.py
+++ b/src/documents/tests/test_management_importer.py
@@ -41,6 +41,7 @@ class TestCommandImport(
                "document_importer",
                "--no-progress-bar",
                str(self.dirs.scratch_dir),
                skip_checks=True,
            )
        self.assertIn(
            "That directory doesn't appear to contain a manifest.json file.",
@@ -67,6 +68,7 @@ class TestCommandImport(
                "document_importer",
                "--no-progress-bar",
                str(self.dirs.scratch_dir),
                skip_checks=True,
            )
        self.assertIn(
            "The manifest file contains a record which does not refer to an actual document file.",
@@ -96,6 +98,7 @@ class TestCommandImport(
                "document_importer",
                "--no-progress-bar",
                str(self.dirs.scratch_dir),
                skip_checks=True,
            )
        self.assertIn('The manifest file refers to "noexist.pdf"', str(e.exception))
@@ -157,7 +160,7 @@ class TestCommandImport(
            - CommandError is raised indicating the issue
        """
        with self.assertRaises(CommandError) as cm:
-            call_command("document_importer", Path("/tmp/notapath"))
+            call_command("document_importer", Path("/tmp/notapath"), skip_checks=True)
        self.assertIn("That path doesn't exist", str(cm.exception))
    def test_import_source_not_readable(self) -> None:
@@ -173,7 +176,7 @@ class TestCommandImport(
            path = Path(temp_dir)
            path.chmod(0o222)
            with self.assertRaises(CommandError) as cm:
-                call_command("document_importer", path)
+                call_command("document_importer", path, skip_checks=True)
            self.assertIn(
                "That path doesn't appear to be readable",
                str(cm.exception),
@@ -193,7 +196,12 @@ class TestCommandImport(
        self.assertIsNotFile(path)
        with self.assertRaises(CommandError) as e:
-            call_command("document_importer", "--no-progress-bar", str(path))
+            call_command(
                "document_importer",
                "--no-progress-bar",
                str(path),
                skip_checks=True,
            )
        self.assertIn("That path doesn't exist", str(e.exception))
    def test_import_files_exist(self) -> None:
@@ -218,6 +226,7 @@ class TestCommandImport(
                "--no-progress-bar",
                str(self.dirs.scratch_dir),
                stdout=stdout,
                skip_checks=True,
            )
        stdout.seek(0)
        self.assertIn(
@@ -246,6 +255,7 @@ class TestCommandImport(
                "--no-progress-bar",
                str(self.dirs.scratch_dir),
                stdout=stdout,
                skip_checks=True,
            )
        stdout.seek(0)
        self.assertIn(
@@ -282,6 +292,7 @@ class TestCommandImport(
                "--no-progress-bar",
                str(self.dirs.scratch_dir),
                stdout=stdout,
                skip_checks=True,
            )
        stdout.seek(0)
        self.assertIn(
@@ -309,6 +320,7 @@ class TestCommandImport(
                "--no-progress-bar",
                str(self.dirs.scratch_dir),
                stdout=stdout,
                skip_checks=True,
            )
        stdout.seek(0)
        stdout_str = str(stdout.read())
@@ -338,6 +350,7 @@ class TestCommandImport(
                "--no-progress-bar",
                str(self.dirs.scratch_dir),
                stdout=stdout,
                skip_checks=True,
            )
        stdout.seek(0)
        stdout_str = str(stdout.read())
@@ -377,6 +390,7 @@ class TestCommandImport(
                "--no-progress-bar",
                str(zip_path),
                stdout=stdout,
                skip_checks=True,
            )
        stdout.seek(0)
        stdout_str = str(stdout.read())
--- a/src/documents/tests/test_management_retagger.py
+++ b/src/documents/tests/test_management_retagger.py
@@ -139,7 +139,7 @@ class TestRetaggerTags(DirectoriesMixin):
    @pytest.mark.usefixtures("documents")
    def test_add_tags(self, tags: TagTuple) -> None:
        tag_first, tag_second, *_ = tags
-        call_command("document_retagger", "--tags")
+        call_command("document_retagger", "--tags", skip_checks=True)
        d_first, d_second, d_unrelated, d_auto = _get_docs()
        assert d_first.tags.count() == 1
@@ -158,7 +158,7 @@ class TestRetaggerTags(DirectoriesMixin):
        tag_first, tag_second, tag_inbox, tag_no_match, _ = tags
        d1.tags.add(tag_second)
-        call_command("document_retagger", "--tags", "--overwrite")
+        call_command("document_retagger", "--tags", "--overwrite", skip_checks=True)
        d_first, d_second, d_unrelated, d_auto = _get_docs()
@@ -180,7 +180,13 @@ class TestRetaggerTags(DirectoriesMixin):
        ],
    )
    def test_suggest_does_not_apply_tags(self, extra_args: list[str]) -> None:
-        call_command("document_retagger", "--tags", "--suggest", *extra_args)
+        call_command(
            "document_retagger",
            "--tags",
            "--suggest",
            *extra_args,
            skip_checks=True,
        )
        d_first, d_second, _, d_auto = _get_docs()
        assert d_first.tags.count() == 0
@@ -199,7 +205,7 @@ class TestRetaggerDocumentType(DirectoriesMixin):
    @pytest.mark.usefixtures("documents")
    def test_add_type(self, document_types: DocumentTypeTuple) -> None:
        dt_first, dt_second = document_types
-        call_command("document_retagger", "--document_type")
+        call_command("document_retagger", "--document_type", skip_checks=True)
        d_first, d_second, _, _ = _get_docs()
        assert d_first.document_type == dt_first
@@ -214,7 +220,13 @@ class TestRetaggerDocumentType(DirectoriesMixin):
        ],
    )
    def test_suggest_does_not_apply_document_type(self, extra_args: list[str]) -> None:
-        call_command("document_retagger", "--document_type", "--suggest", *extra_args)
+        call_command(
            "document_retagger",
            "--document_type",
            "--suggest",
            *extra_args,
            skip_checks=True,
        )
        d_first, d_second, _, _ = _get_docs()
        assert d_first.document_type is None
@@ -243,7 +255,12 @@ class TestRetaggerDocumentType(DirectoriesMixin):
        )
        doc = DocumentFactory(content="ambiguous content")
-        call_command("document_retagger", "--document_type", *use_first_flag)
+        call_command(
            "document_retagger",
            "--document_type",
            *use_first_flag,
            skip_checks=True,
        )
        doc.refresh_from_db()
        assert (doc.document_type is not None) is expects_assignment
@@ -260,7 +277,7 @@ class TestRetaggerCorrespondent(DirectoriesMixin):
    @pytest.mark.usefixtures("documents")
    def test_add_correspondent(self, correspondents: CorrespondentTuple) -> None:
        c_first, c_second = correspondents
-        call_command("document_retagger", "--correspondent")
+        call_command("document_retagger", "--correspondent", skip_checks=True)
        d_first, d_second, _, _ = _get_docs()
        assert d_first.correspondent == c_first
@@ -275,7 +292,13 @@ class TestRetaggerCorrespondent(DirectoriesMixin):
        ],
    )
    def test_suggest_does_not_apply_correspondent(self, extra_args: list[str]) -> None:
-        call_command("document_retagger", "--correspondent", "--suggest", *extra_args)
+        call_command(
            "document_retagger",
            "--correspondent",
            "--suggest",
            *extra_args,
            skip_checks=True,
        )
        d_first, d_second, _, _ = _get_docs()
        assert d_first.correspondent is None
@@ -304,7 +327,12 @@ class TestRetaggerCorrespondent(DirectoriesMixin):
        )
        doc = DocumentFactory(content="ambiguous content")
-        call_command("document_retagger", "--correspondent", *use_first_flag)
+        call_command(
            "document_retagger",
            "--correspondent",
            *use_first_flag,
            skip_checks=True,
        )
        doc.refresh_from_db()
        assert (doc.correspondent is not None) is expects_assignment
@@ -326,7 +354,7 @@ class TestRetaggerStoragePath(DirectoriesMixin):
        THEN matching documents get the correct path; existing path is unchanged
        """
        sp1, sp2, sp3 = storage_paths
-        call_command("document_retagger", "--storage_path")
+        call_command("document_retagger", "--storage_path", skip_checks=True)
        d_first, d_second, d_unrelated, d_auto = _get_docs()
        assert d_first.storage_path == sp2
@@ -342,7 +370,12 @@ class TestRetaggerStoragePath(DirectoriesMixin):
        THEN the existing path is replaced by the newly matched path
        """
        sp1, sp2, _ = storage_paths
-        call_command("document_retagger", "--storage_path", "--overwrite")
+        call_command(
            "document_retagger",
            "--storage_path",
            "--overwrite",
            skip_checks=True,
        )
        d_first, d_second, d_unrelated, d_auto = _get_docs()
        assert d_first.storage_path == sp2
@@ -373,7 +406,12 @@ class TestRetaggerStoragePath(DirectoriesMixin):
        )
        doc = DocumentFactory(content="ambiguous content")
-        call_command("document_retagger", "--storage_path", *use_first_flag)
+        call_command(
            "document_retagger",
            "--storage_path",
            *use_first_flag,
            skip_checks=True,
        )
        doc.refresh_from_db()
        assert (doc.storage_path is not None) is expects_assignment
@@ -402,7 +440,13 @@ class TestRetaggerIdRange(DirectoriesMixin):
        expected_count: int,
    ) -> None:
        DocumentFactory(content="NOT the first document")
-        call_command("document_retagger", "--tags", "--id-range", *id_range_args)
+        call_command(
            "document_retagger",
            "--tags",
            "--id-range",
            *id_range_args,
            skip_checks=True,
        )
        tag_first, *_ = tags
        assert Document.objects.filter(tags__id=tag_first.id).count() == expected_count
@@ -416,7 +460,7 @@ class TestRetaggerIdRange(DirectoriesMixin):
    )
    def test_id_range_invalid_arguments_raise(self, args: list[str]) -> None:
        with pytest.raises((CommandError, SystemExit)):
-            call_command("document_retagger", *args)
+            call_command("document_retagger", *args, skip_checks=True)
 # ---------------------------------------------------------------------------
@@ -430,12 +474,12 @@ class TestRetaggerEdgeCases(DirectoriesMixin):
    @pytest.mark.usefixtures("documents")
    def test_no_targets_exits_cleanly(self) -> None:
        """Calling the retagger with no classifier targets should not raise."""
-        call_command("document_retagger")
+        call_command("document_retagger", skip_checks=True)
    @pytest.mark.usefixtures("documents")
    def test_inbox_only_skips_non_inbox_documents(self) -> None:
        """--inbox-only must restrict processing to documents with an inbox tag."""
-        call_command("document_retagger", "--tags", "--inbox-only")
+        call_command("document_retagger", "--tags", "--inbox-only", skip_checks=True)
        d_first, _, d_unrelated, _ = _get_docs()
        assert d_first.tags.count() == 0
--- a/src/documents/tests/test_management_superuser.py
+++ b/src/documents/tests/test_management_superuser.py
@@ -20,6 +20,7 @@ class TestManageSuperUser(DirectoriesMixin, TestCase):
                "--no-color",
                stdout=out,
                stderr=StringIO(),
                skip_checks=True,
            )
        return out.getvalue()
--- a/src/documents/tests/test_management_thumbnails.py
+++ b/src/documents/tests/test_management_thumbnails.py
@@ -85,13 +85,20 @@ class TestMakeThumbnails(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
    def test_command(self) -> None:
        self.assertIsNotFile(self.d1.thumbnail_path)
        self.assertIsNotFile(self.d2.thumbnail_path)
-        call_command("document_thumbnails", "--processes", "1")
+        call_command("document_thumbnails", "--processes", "1", skip_checks=True)
        self.assertIsFile(self.d1.thumbnail_path)
        self.assertIsFile(self.d2.thumbnail_path)
    def test_command_documentid(self) -> None:
        self.assertIsNotFile(self.d1.thumbnail_path)
        self.assertIsNotFile(self.d2.thumbnail_path)
-        call_command("document_thumbnails", "--processes", "1", "-d", f"{self.d1.id}")
+        call_command(
            "document_thumbnails",
            "--processes",
            "1",
            "-d",
            f"{self.d1.id}",
            skip_checks=True,
        )
        self.assertIsFile(self.d1.thumbnail_path)
        self.assertIsNotFile(self.d2.thumbnail_path)
--- a/src/documents/tests/test_parsers.py
+++ b/src/documents/tests/test_parsers.py
@@ -9,9 +9,9 @@ from documents.parsers import get_default_file_extension
 from documents.parsers import get_parser_class_for_mime_type
 from documents.parsers import get_supported_file_extensions
 from documents.parsers import is_file_ext_supported
-from paperless_tesseract.parsers import RasterisedDocumentParser
+from paperless.parsers.tesseract import RasterisedDocumentParser
-from paperless_text.parsers import TextDocumentParser
+from paperless.parsers.text import TextDocumentParser
-from paperless_tika.parsers import TikaDocumentParser
+from paperless.parsers.tika import TikaDocumentParser
 class TestParserDiscovery(TestCase):
--- a/src/documents/tests/test_workflows.py
+++ b/src/documents/tests/test_workflows.py
@@ -28,6 +28,7 @@ from rest_framework.test import APIClient
 from rest_framework.test import APITestCase
 from documents.file_handling import create_source_path_directory
 from documents.file_handling import generate_filename
 from documents.file_handling import generate_unique_filename
 from documents.signals.handlers import run_workflows
 from documents.workflows.webhooks import send_webhook
@@ -905,6 +906,121 @@ class TestWorkflows(
        expected_str = f"Document matched {trigger} from {w}"
        self.assertIn(expected_str, cm.output[0])
    def test_workflow_assign_custom_field_keeps_storage_filename_in_sync(self) -> None:
        """
        GIVEN:
            - Existing document with a storage path template that depends on a custom field
            - Existing workflow triggered on document update assigning that custom field
        WHEN:
            - Workflow runs for the document
        THEN:
            - The database filename remains aligned with the moved file on disk
        """
        storage_path = StoragePath.objects.create(
            name="workflow-custom-field-path",
            path="{{ custom_fields|get_cf_value('Custom Field 1', 'none') }}/{{ title }}",
        )
        doc = Document.objects.create(
            title="workflow custom field sync",
            mime_type="application/pdf",
            checksum="workflow-custom-field-sync",
            storage_path=storage_path,
            original_filename="workflow-custom-field-sync.pdf",
        )
        CustomFieldInstance.objects.create(
            document=doc,
            field=self.cf1,
            value_text="initial",
        )
        generated = generate_unique_filename(doc)
        destination = (settings.ORIGINALS_DIR / generated).resolve()
        create_source_path_directory(destination)
        shutil.copy(self.SAMPLE_DIR / "simple.pdf", destination)
        Document.objects.filter(pk=doc.pk).update(filename=generated.as_posix())
        doc.refresh_from_db()
        trigger = WorkflowTrigger.objects.create(
            type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_UPDATED,
        )
        action = WorkflowAction.objects.create(
            type=WorkflowAction.WorkflowActionType.ASSIGNMENT,
            assign_custom_fields_values={self.cf1.pk: "cars"},
        )
        action.assign_custom_fields.add(self.cf1.pk)
        workflow = Workflow.objects.create(
            name="Workflow custom field filename sync",
            order=0,
        )
        workflow.triggers.add(trigger)
        workflow.actions.add(action)
        workflow.save()
        run_workflows(WorkflowTrigger.WorkflowTriggerType.DOCUMENT_UPDATED, doc)
        doc.refresh_from_db()
        expected_filename = generate_filename(doc)
        self.assertEqual(Path(doc.filename), expected_filename)
        self.assertTrue(doc.source_path.is_file())
    def test_workflow_document_updated_does_not_overwrite_filename(self) -> None:
        """
        GIVEN:
            - A document whose filename has been updated in the DB by a concurrent
              bulk_update_documents task (simulating update_filename_and_move_files
              completing and writing the new filename to the DB)
            - A stale in-memory document instance still holding the old filename
            - An active DOCUMENT_UPDATED workflow
        WHEN:
            - run_workflows is called with the stale in-memory instance
              (as would happen in the second concurrent bulk_update_documents task)
        THEN:
            - The DB filename is NOT overwritten with the stale in-memory value
              (regression test for GH #12386 — the race window between
              refresh_from_db and document.save in run_workflows)
        """
        trigger = WorkflowTrigger.objects.create(
            type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_UPDATED,
        )
        action = WorkflowAction.objects.create(
            type=WorkflowAction.WorkflowActionType.ASSIGNMENT,
            assign_title="Updated by workflow",
        )
        workflow = Workflow.objects.create(name="Race condition test workflow", order=0)
        workflow.triggers.add(trigger)
        workflow.actions.add(action)
        workflow.save()
        doc = Document.objects.create(
            title="race condition test",
            mime_type="application/pdf",
            checksum="racecondition123",
            original_filename="old.pdf",
            filename="old/path/old.pdf",
        )
        # Simulate BUD-1 completing update_filename_and_move_files:
        # the DB now holds the new filename while BUD-2's in-memory instance is stale.
        new_filename = "new/path/new.pdf"
        Document.global_objects.filter(pk=doc.pk).update(filename=new_filename)
        # The stale instance still has filename="old/path/old.pdf" in memory.
        # Mock refresh_from_db so the stale value persists through run_workflows,
        # replicating the race window between refresh and save.
        # Mock update_filename_and_move_files to prevent file-not-found errors
        # since we are only testing DB state here.
        with (
            mock.patch(
                "documents.signals.handlers.update_filename_and_move_files",
            ),
            mock.patch.object(Document, "refresh_from_db"),
        ):
            run_workflows(WorkflowTrigger.WorkflowTriggerType.DOCUMENT_UPDATED, doc)
        # The DB filename must not have been reverted to the stale old value.
        doc.refresh_from_db()
        self.assertEqual(doc.filename, new_filename)
    def test_document_added_workflow(self) -> None:
        trigger = WorkflowTrigger.objects.create(
            type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED,
--- a/src/documents/views.py
+++ b/src/documents/views.py
@@ -7,6 +7,7 @@ import tempfile
 import zipfile
 from collections import defaultdict
 from collections import deque
 from contextlib import nullcontext
 from datetime import datetime
 from pathlib import Path
 from time import mktime
@@ -82,6 +83,7 @@ from rest_framework import serializers
 from rest_framework import status
 from rest_framework.decorators import action
 from rest_framework.exceptions import NotFound
 from rest_framework.exceptions import PermissionDenied
 from rest_framework.exceptions import ValidationError
 from rest_framework.filters import OrderingFilter
 from rest_framework.filters import SearchFilter
@@ -225,6 +227,7 @@ from paperless.celery import app as celery_app
 from paperless.config import AIConfig
 from paperless.config import GeneralConfig
 from paperless.models import ApplicationConfiguration
 from paperless.parsers import ParserProtocol
 from paperless.serialisers import GroupSerializer
 from paperless.serialisers import UserSerializer
 from paperless.views import StandardPagination
@@ -1084,9 +1087,11 @@ class DocumentViewSet(
        parser_class = get_parser_class_for_mime_type(mime_type)
        if parser_class:
            parser = parser_class(progress_callback=None, logging_group=None)
            cm = parser if isinstance(parser, ParserProtocol) else nullcontext(parser)
            try:
-                return parser.extract_metadata(file, mime_type)
+                with cm:
                    return parser.extract_metadata(file, mime_type)
            except Exception:  # pragma: no cover
                logger.exception(f"Issue getting metadata for {file}")
                # TODO: cover GPG errors, remove later.
@@ -1328,6 +1333,7 @@ class DocumentViewSet(
        methods=["get", "post", "delete"],
        detail=True,
        permission_classes=[PaperlessNotePermissions],
        pagination_class=None,
        filter_backends=[],
    )
    def notes(self, request, pk=None):
@@ -1528,13 +1534,17 @@ class DocumentViewSet(
        return Response(sorted(entries, key=lambda x: x["timestamp"], reverse=True))
    @extend_schema(
        operation_id="documents_email_document",
        deprecated=True,
    )
    @action(
        methods=["post"],
        detail=True,
        url_path="email",
        permission_classes=[IsAuthenticated, ViewDocumentsPermissions],
    )
-    # TODO: deprecated as of 2.19, remove in future release
+    # TODO: deprecated, remove with drop of support for API v9
    def email_document(self, request, pk=None):
        request_data = request.data.copy()
        request_data.setlist("documents", [pk])
@@ -1961,11 +1971,28 @@ class UnifiedSearchViewSet(DocumentViewSet):
        filtered_queryset = super().filter_queryset(queryset)
        if self._is_search_request():
            from documents import index
            if "query" in self.request.query_params:
                from documents import index
                query_class = index.DelayedFullTextQuery
            elif "more_like_id" in self.request.query_params:
                try:
                    more_like_doc_id = int(self.request.query_params["more_like_id"])
                    more_like_doc = Document.objects.select_related("owner").get(
                        pk=more_like_doc_id,
                    )
                except (TypeError, ValueError, Document.DoesNotExist):
                    raise PermissionDenied(_("Invalid more_like_id"))
                if not has_perms_owner_aware(
                    self.request.user,
                    "view_document",
                    more_like_doc,
                ):
                    raise PermissionDenied(_("Insufficient permissions."))
                from documents import index
                query_class = index.DelayedMoreLikeThisQuery
            else:
                raise ValueError
@@ -2001,6 +2028,8 @@ class UnifiedSearchViewSet(DocumentViewSet):
                    return response
            except NotFound:
                raise
            except PermissionDenied as e:
                return HttpResponseForbidden(str(e.detail))
            except Exception as e:
                logger.warning(f"An error occurred listing search results: {e!s}")
                return HttpResponseBadRequest(
@@ -2939,13 +2968,21 @@ class GlobalSearchView(PassUserMixin):
        )
        groups = groups[:OBJECT_LIMIT]
        mail_rules = (
-            MailRule.objects.filter(name__icontains=query)
+            get_objects_for_user_owner_aware(
                request.user,
                "view_mailrule",
                MailRule,
            ).filter(name__icontains=query)
            if request.user.has_perm("paperless_mail.view_mailrule")
            else []
        )
        mail_rules = mail_rules[:OBJECT_LIMIT]
        mail_accounts = (
-            MailAccount.objects.filter(name__icontains=query)
+            get_objects_for_user_owner_aware(
                request.user,
                "view_mailaccount",
                MailAccount,
            ).filter(name__icontains=query)
            if request.user.has_perm("paperless_mail.view_mailaccount")
            else []
        )
@@ -3919,7 +3956,7 @@ class CustomFieldViewSet(PermissionsAwareDocumentCountMixin, ModelViewSet):
    document_count_through = CustomFieldInstance
    document_count_source_field = "field_id"
-    queryset = CustomField.objects.all().order_by("-created")
+    queryset = CustomField.objects.all().order_by("name")
@extend_schema_view(
--- a/src/documents/workflows/webhooks.py
+++ b/src/documents/workflows/webhooks.py
@@ -1,12 +1,14 @@
 import ipaddress
 import logging
 import socket
 from urllib.parse import urlparse
 import httpx
 from celery import shared_task
 from django.conf import settings
 from paperless.network import format_host_for_url
 from paperless.network import is_public_ip
 from paperless.network import resolve_hostname_ips
 from paperless.network import validate_outbound_http_url
 logger = logging.getLogger("paperless.workflows.webhooks")
@@ -34,23 +36,19 @@ class WebhookTransport(httpx.HTTPTransport):
            raise httpx.ConnectError("No hostname in request URL")
        try:
-            addr_info = socket.getaddrinfo(hostname, None)
+            ips = resolve_hostname_ips(hostname)
-        except socket.gaierror as e:
+        except ValueError as e:
-            raise httpx.ConnectError(f"Could not resolve hostname: {hostname}") from e
+            raise httpx.ConnectError(str(e)) from e
        ips = [info[4][0] for info in addr_info if info and info[4]]
        if not ips:
            raise httpx.ConnectError(f"Could not resolve hostname: {hostname}")
        if not self.allow_internal:
            for ip_str in ips:
-                if not WebhookTransport.is_public_ip(ip_str):
+                if not is_public_ip(ip_str):
                    raise httpx.ConnectError(
                        f"Connection blocked: {hostname} resolves to a non-public address",
                    )
        ip_str = ips[0]
-        formatted_ip = self._format_ip_for_url(ip_str)
+        formatted_ip = format_host_for_url(ip_str)
        new_headers = httpx.Headers(request.headers)
        if "host" in new_headers:
@@ -69,40 +67,6 @@ class WebhookTransport(httpx.HTTPTransport):
        return super().handle_request(request)
    def _format_ip_for_url(self, ip: str) -> str:
        """
        Format IP address for use in URL (wrap IPv6 in brackets)
        """
        try:
            ip_obj = ipaddress.ip_address(ip)
            if ip_obj.version == 6:
                return f"[{ip}]"
            return ip
        except ValueError:
            return ip
    @staticmethod
    def is_public_ip(ip: str | int) -> bool:
        try:
            obj = ipaddress.ip_address(ip)
            return not (
                obj.is_private
                or obj.is_loopback
                or obj.is_link_local
                or obj.is_multicast
                or obj.is_unspecified
            )
        except ValueError:  # pragma: no cover
            return False
    @staticmethod
    def resolve_first_ip(host: str) -> str | None:
        try:
            info = socket.getaddrinfo(host, None)
            return info[0][4][0] if info else None
        except Exception:  # pragma: no cover
            return None
@shared_task(
    retry_backoff=True,
@@ -118,21 +82,24 @@ def send_webhook(
    *,
    as_json: bool = False,
 ):
-    p = urlparse(url)
+    try:
-    if p.scheme.lower() not in settings.WEBHOOKS_ALLOWED_SCHEMES or not p.hostname:
+        parsed = validate_outbound_http_url(
-        logger.warning("Webhook blocked: invalid scheme/hostname")
+            url,
            allowed_schemes=settings.WEBHOOKS_ALLOWED_SCHEMES,
            allowed_ports=settings.WEBHOOKS_ALLOWED_PORTS,
            # Internal-address checks happen in transport to preserve ConnectError behavior.
            allow_internal=True,
        )
    except ValueError as e:
        logger.warning("Webhook blocked: %s", e)
        raise
    hostname = parsed.hostname
    if hostname is None:  # pragma: no cover
        raise ValueError("Invalid URL scheme or hostname.")
    port = p.port or (443 if p.scheme == "https" else 80)
    if (
        len(settings.WEBHOOKS_ALLOWED_PORTS) > 0
        and port not in settings.WEBHOOKS_ALLOWED_PORTS
    ):
        logger.warning("Webhook blocked: port not permitted")
        raise ValueError("Destination port not permitted.")
    transport = WebhookTransport(
-        hostname=p.hostname,
+        hostname=hostname,
        allow_internal=settings.WEBHOOKS_ALLOW_INTERNAL_REQUESTS,
    )
--- a/src/locale/en_US/LC_MESSAGES/django.po
+++ b/src/locale/en_US/LC_MESSAGES/django.po
@@ -2,7 +2,7 @@ msgid ""
 msgstr ""
 "Project-Id-Version: paperless-ngx\n"
 "Report-Msgid-Bugs-To: \n"
-"POT-Creation-Date: 2026-03-10 23:46+0000\n"
+"POT-Creation-Date: 2026-03-21 09:25+0000\n"
 "PO-Revision-Date: 2022-02-17 04:17\n"
 "Last-Translator: \n"
 "Language-Team: English\n"
@@ -1299,7 +1299,9 @@ msgstr ""
 msgid "workflow runs"
 msgstr ""
-#: documents/serialisers.py:463 documents/serialisers.py:2470
+#: documents/serialisers.py:463 documents/serialisers.py:815
 #: documents/serialisers.py:2501 documents/views.py:1992
 #: paperless_mail/serialisers.py:143
 msgid "Insufficient permissions."
 msgstr ""
@@ -1307,39 +1309,39 @@ msgstr ""
 msgid "Invalid color."
 msgstr ""
-#: documents/serialisers.py:2093
+#: documents/serialisers.py:2124
 #, python-format
 msgid "File type %(type)s not supported"
 msgstr ""
-#: documents/serialisers.py:2137
+#: documents/serialisers.py:2168
 #, python-format
 msgid "Custom field id must be an integer: %(id)s"
 msgstr ""
-#: documents/serialisers.py:2144
+#: documents/serialisers.py:2175
 #, python-format
 msgid "Custom field with id %(id)s does not exist"
 msgstr ""
-#: documents/serialisers.py:2161 documents/serialisers.py:2171
+#: documents/serialisers.py:2192 documents/serialisers.py:2202
 msgid ""
 "Custom fields must be a list of integers or an object mapping ids to values."
 msgstr ""
-#: documents/serialisers.py:2166
+#: documents/serialisers.py:2197
 msgid "Some custom fields don't exist or were specified twice."
 msgstr ""
-#: documents/serialisers.py:2313
+#: documents/serialisers.py:2344
 msgid "Invalid variable detected."
 msgstr ""
-#: documents/serialisers.py:2526
+#: documents/serialisers.py:2557
 msgid "Duplicate document identifiers are not allowed."
 msgstr ""
-#: documents/serialisers.py:2556 documents/views.py:3561
+#: documents/serialisers.py:2587 documents/views.py:3598
 #, python-format
 msgid "Documents not found: %(ids)s"
 msgstr ""
@@ -1603,20 +1605,24 @@ msgstr ""
 msgid "Unable to parse URI {value}"
 msgstr ""
-#: documents/views.py:3573
+#: documents/views.py:1985
 msgid "Invalid more_like_id"
 msgstr ""
 #: documents/views.py:3610
 #, python-format
 msgid "Insufficient permissions to share document %(id)s."
 msgstr ""
-#: documents/views.py:3616
+#: documents/views.py:3653
 msgid "Bundle is already being processed."
 msgstr ""
-#: documents/views.py:3673
+#: documents/views.py:3710
 msgid "The share link bundle is still being prepared. Please try again later."
 msgstr ""
-#: documents/views.py:3683
+#: documents/views.py:3720
 msgid "The share link bundle is unavailable."
 msgstr ""
--- a/src/paperless/auth.py
+++ b/src/paperless/auth.py
@@ -83,3 +83,11 @@ class PaperlessBasicAuthentication(authentication.BasicAuthentication):
            raise exceptions.AuthenticationFailed("MFA required")
        return user_tuple
    def authenticate_header(self, request):
        auth_header = request.META.get("HTTP_AUTHORIZATION", "")
        if auth_header.lower().startswith("basic "):
            return super().authenticate_header(request)
        # Still 401 for anonymous API access
        return authentication.TokenAuthentication.keyword
--- a/src/paperless/celery.py
+++ b/src/paperless/celery.py
@@ -1,6 +1,7 @@
 import os
 from celery import Celery
 from celery.signals import worker_process_init
 # Set the default Django settings module for the 'celery' program.
 os.environ.setdefault("DJANGO_SETTINGS_MODULE", "paperless.settings")
@@ -15,3 +16,19 @@ app.config_from_object("django.conf:settings", namespace="CELERY")
 # Load task modules from all registered Django apps.
 app.autodiscover_tasks()
@worker_process_init.connect
 def on_worker_process_init(**kwargs) -> None:  # pragma: no cover
    """
    Register built-in parsers eagerly in each Celery worker process.
    This registers only the built-in parsers (no entrypoint discovery) so
    that workers can begin consuming documents immediately.  Entrypoint
    discovery for third-party parsers is deferred to the first call of
    get_parser_registry() inside a task, keeping worker_process_init
    well within its 4-second timeout budget.
    """
    from paperless.parsers.registry import init_builtin_parsers
    init_builtin_parsers()
--- a/src/paperless/config.py
+++ b/src/paperless/config.py
@@ -188,6 +188,7 @@ class AIConfig(BaseConfig):
    llm_model: str = dataclasses.field(init=False)
    llm_api_key: str = dataclasses.field(init=False)
    llm_endpoint: str = dataclasses.field(init=False)
    llm_allow_internal_endpoints: bool = dataclasses.field(init=False)
    def __post_init__(self) -> None:
        app_config = self._get_config_instance()
@@ -203,6 +204,7 @@ class AIConfig(BaseConfig):
        self.llm_model = app_config.llm_model or settings.LLM_MODEL
        self.llm_api_key = app_config.llm_api_key or settings.LLM_API_KEY
        self.llm_endpoint = app_config.llm_endpoint or settings.LLM_ENDPOINT
        self.llm_allow_internal_endpoints = settings.LLM_ALLOW_INTERNAL_ENDPOINTS
    @property
    def llm_index_enabled(self) -> bool:
--- a/src/paperless/consumers.py
+++ b/src/paperless/consumers.py
@@ -1,62 +1,51 @@
 import json
 from typing import Any
-from asgiref.sync import async_to_sync
+from channels.generic.websocket import AsyncWebsocketConsumer
 from channels.exceptions import AcceptConnection
 from channels.exceptions import DenyConnection
 from channels.generic.websocket import WebsocketConsumer
-class StatusConsumer(WebsocketConsumer):
+class StatusConsumer(AsyncWebsocketConsumer):
-    def _authenticated(self):
+    def _authenticated(self) -> bool:
-        return "user" in self.scope and self.scope["user"].is_authenticated
+        user: Any = self.scope.get("user")
        return user is not None and user.is_authenticated
-    def _can_view(self, data):
+    async def _can_view(self, data: dict[str, Any]) -> bool:
-        user = self.scope.get("user") if self.scope.get("user") else None
+        user: Any = self.scope.get("user")
        if user is None:
            return False
        owner_id = data.get("owner_id")
        users_can_view = data.get("users_can_view", [])
        groups_can_view = data.get("groups_can_view", [])
        return (
            user.is_superuser
            or user.id == owner_id
            or user.id in users_can_view
            or any(
                user.groups.filter(pk=group_id).exists() for group_id in groups_can_view
            )
        )
-    def connect(self):
+        if user.is_superuser or user.id == owner_id or user.id in users_can_view:
            return True
        return await user.groups.filter(pk__in=groups_can_view).aexists()
    async def connect(self) -> None:
        if not self._authenticated():
-            raise DenyConnection
+            await self.close()
-        else:
+            return
-            async_to_sync(self.channel_layer.group_add)(
+        await self.channel_layer.group_add("status_updates", self.channel_name)
-                "status_updates",
+        await self.accept()
                self.channel_name,
            )
            raise AcceptConnection
-    def disconnect(self, close_code) -> None:
+    async def disconnect(self, code: int) -> None:
-        async_to_sync(self.channel_layer.group_discard)(
+        await self.channel_layer.group_discard("status_updates", self.channel_name)
            "status_updates",
            self.channel_name,
        )
-    def status_update(self, event) -> None:
+    async def status_update(self, event: dict[str, Any]) -> None:
        if not self._authenticated():
-            self.close()
+            await self.close()
-        else:
+        elif await self._can_view(event["data"]):
-            if self._can_view(event["data"]):
+            await self.send(json.dumps(event))
                self.send(json.dumps(event))
-    def documents_deleted(self, event) -> None:
+    async def documents_deleted(self, event: dict[str, Any]) -> None:
        if not self._authenticated():
-            self.close()
+            await self.close()
        else:
-            self.send(json.dumps(event))
+            await self.send(json.dumps(event))
-    def document_updated(self, event: Any) -> None:
+    async def document_updated(self, event: dict[str, Any]) -> None:
        if not self._authenticated():
-            self.close()
+            await self.close()
-        else:
+        elif await self._can_view(event["data"]):
-            if self._can_view(event["data"]):
+            await self.send(json.dumps(event))
                self.send(json.dumps(event))
--- a/src/paperless/network.py
+++ b/src/paperless/network.py
@@ -0,0 +1,76 @@
 import ipaddress
 import socket
 from collections.abc import Collection
 from urllib.parse import ParseResult
 from urllib.parse import urlparse
 def is_public_ip(ip: str | int) -> bool:
    try:
        obj = ipaddress.ip_address(ip)
        return not (
            obj.is_private
            or obj.is_loopback
            or obj.is_link_local
            or obj.is_multicast
            or obj.is_unspecified
        )
    except ValueError:  # pragma: no cover
        return False
 def resolve_hostname_ips(hostname: str) -> list[str]:
    try:
        addr_info = socket.getaddrinfo(hostname, None)
    except socket.gaierror as e:
        raise ValueError(f"Could not resolve hostname: {hostname}") from e
    ips = [info[4][0] for info in addr_info if info and info[4]]
    if not ips:
        raise ValueError(f"Could not resolve hostname: {hostname}")
    return ips
 def format_host_for_url(host: str) -> str:
    """
    Format IP address for URL use (wrap IPv6 in brackets).
    """
    try:
        ip_obj = ipaddress.ip_address(host)
        if ip_obj.version == 6:
            return f"[{host}]"
        return host
    except ValueError:
        return host
 def validate_outbound_http_url(
    url: str,
    *,
    allowed_schemes: Collection[str] = ("http", "https"),
    allowed_ports: Collection[int] | None = None,
    allow_internal: bool = False,
 ) -> ParseResult:
    parsed = urlparse(url)
    scheme = parsed.scheme.lower()
    if scheme not in allowed_schemes or not parsed.hostname:
        raise ValueError("Invalid URL scheme or hostname.")
    default_port = 443 if scheme == "https" else 80
    try:
        port = parsed.port or default_port
    except ValueError as e:
        raise ValueError("Invalid URL scheme or hostname.") from e
    if allowed_ports and port not in allowed_ports:
        raise ValueError("Destination port not permitted.")
    if not allow_internal:
        for ip_str in resolve_hostname_ips(parsed.hostname):
            if not is_public_ip(ip_str):
                raise ValueError(
                    f"Connection blocked: {parsed.hostname} resolves to a non-public address",
                )
    return parsed
--- a/src/paperless/parsers/init.py
+++ b/src/paperless/parsers/init.py
@@ -0,0 +1,434 @@
 """
 Public interface for the Paperless-ngx parser plugin system.
 This module defines ParserProtocol — the structural contract that every
 document parser must satisfy, whether it is a built-in parser shipped with
 Paperless-ngx or a third-party parser installed via a Python entrypoint.
 Phase 1/2 scope: only the Protocol is defined here. The transitional
 DocumentParser ABC (Phase 3) and concrete built-in parsers (Phase 3+) will
 be added in later phases, so there are intentionally no imports of parser
 implementations here.
 Usage example (third-party parser)::
    from paperless.parsers import ParserProtocol
    class MyParser:
        name = "my-parser"
        version = "1.0.0"
        author = "Acme Corp"
        url = "https://example.com/my-parser"
        @classmethod
        def supported_mime_types(cls) -> dict[str, str]:
            return {"application/x-my-format": ".myf"}
        @classmethod
        def score(cls, mime_type, filename, path=None):
            return 10
        # … implement remaining protocol methods …
    assert isinstance(MyParser(), ParserProtocol)
 """
 from __future__ import annotations
 from dataclasses import dataclass
 from typing import TYPE_CHECKING
 from typing import Protocol
 from typing import Self
 from typing import TypedDict
 from typing import runtime_checkable
 if TYPE_CHECKING:
    import datetime
    from pathlib import Path
    from types import TracebackType
 __all__ = [
    "MetadataEntry",
    "ParserContext",
    "ParserProtocol",
 ]
 class MetadataEntry(TypedDict):
    """A single metadata field extracted from a document.
    All four keys are required. Values are always serialised to strings —
    type-specific conversion (dates, integers, lists) is the responsibility
    of the parser before returning.
    """
    namespace: str
    """URI of the metadata namespace (e.g. 'http://ns.adobe.com/pdf/1.3/')."""
    prefix: str
    """Conventional namespace prefix (e.g. 'pdf', 'xmp', 'dc')."""
    key: str
    """Field name within the namespace (e.g. 'Author', 'CreateDate')."""
    value: str
    """String representation of the field value."""
@dataclass(frozen=True, slots=True)
 class ParserContext:
    """Immutable context passed to a parser before parse().
    The consumer assembles this from the ingestion event and Django
    settings, then calls ``parser.configure(context)`` before
    ``parser.parse()``.  Parsers read only the fields relevant to them;
    unneeded fields are ignored.
    ``frozen=True`` prevents accidental mutation after the consumer
    hands the context off.  ``slots=True`` keeps instances lightweight.
    Fields
    ------
    mailrule_id : int | None
        Primary key of the ``MailRule`` that triggered this ingestion,
        or ``None`` when the document did not arrive via a mail rule.
        Used by ``MailDocumentParser`` to select the PDF layout.
    Notes
    -----
    Future fields (not yet implemented):
    * ``output_type`` — PDF/A variant for archive generation
      (replaces ``settings.OCR_OUTPUT_TYPE`` reads inside parsers).
    * ``ocr_mode`` — skip-text, redo, force, etc.
      (replaces ``settings.OCR_MODE`` reads inside parsers).
    * ``ocr_language`` — Tesseract language string.
      (replaces ``settings.OCR_LANGUAGE`` reads inside parsers).
    When those fields are added the consumer will read from Django
    settings once and populate them here, decoupling parsers from
    ``settings.*`` entirely.
    """
    mailrule_id: int | None = None
@runtime_checkable
 class ParserProtocol(Protocol):
    """Structural contract for all Paperless-ngx document parsers.
    Both built-in parsers and third-party plugins (discovered via the
    "paperless_ngx.parsers" entrypoint group) must satisfy this Protocol.
    Because it is decorated with runtime_checkable, isinstance(obj,
    ParserProtocol) works at runtime based on method presence, which is
    useful for validation in ParserRegistry.discover.
    Parsers must expose four string attributes at the class level so the
    registry can log attribution information without instantiating the parser:
    name : str
        Human-readable parser name (e.g. "Tesseract OCR").
    version : str
        Semantic version string (e.g. "1.2.3").
    author : str
        Author or organisation name.
    url : str
        URL for documentation, source code, or issue tracker.
    """
    # ------------------------------------------------------------------
    # Class-level identity (checked by the registry, not Protocol methods)
    # ------------------------------------------------------------------
    name: str
    version: str
    author: str
    url: str
    # ------------------------------------------------------------------
    # Class methods
    # ------------------------------------------------------------------
    @classmethod
    def supported_mime_types(cls) -> dict[str, str]:
        """Return a mapping of supported MIME types to preferred file extensions.
        The keys are MIME type strings (e.g. "application/pdf"), and the
        values are the preferred file extension including the leading dot
        (e.g. ".pdf").  The registry uses this mapping both to decide whether
        a parser is a candidate for a given file and to determine the default
        extension when creating archive copies.
        Returns
        -------
        dict[str, str]
            {mime_type: extension} mapping — may be empty if the parser
            has been temporarily disabled.
        """
        ...
    @classmethod
    def score(
        cls,
        mime_type: str,
        filename: str,
        path: Path | None = None,
    ) -> int | None:
        """Return a priority score for handling this file, or None to decline.
        The registry calls this after confirming that the MIME type is in
        supported_mime_types. Parsers may inspect filename and optionally
        the file at path to refine their confidence level.
        A higher score wins. Return None to explicitly decline handling a file
        even though the MIME type is listed as supported (e.g. when a feature
        flag is disabled, or a required service is not configured).
        Parameters
        ----------
        mime_type:
            The detected MIME type of the file to be parsed.
        filename:
            The original filename, including extension.
        path:
            Optional filesystem path to the file. Parsers that need to
            inspect file content (e.g. magic-byte sniffing) may use this.
            May be None when scoring happens before the file is available locally.
        Returns
        -------
        int | None
            Priority score (higher wins), or None to decline.
        """
        ...
    # ------------------------------------------------------------------
    # Properties
    # ------------------------------------------------------------------
    @property
    def can_produce_archive(self) -> bool:
        """Whether this parser can produce a searchable PDF archive copy.
        If True, the consumption pipeline may request an archive version when
        processing the document, subject to the ARCHIVE_FILE_GENERATION
        setting. If False, only thumbnail and text extraction are performed.
        """
        ...
    @property
    def requires_pdf_rendition(self) -> bool:
        """Whether the parser must produce a PDF for the frontend to display.
        True for formats the browser cannot display natively (e.g. DOCX, ODT).
        When True, the pipeline always stores the PDF output regardless of the
        ARCHIVE_FILE_GENERATION setting, since the original format cannot be
        shown to the user.
        """
        ...
    # ------------------------------------------------------------------
    # Core parsing interface
    # ------------------------------------------------------------------
    def configure(self, context: ParserContext) -> None:
        """Apply source context before parse().
        Called by the consumer after instantiation and before parse().
        The default implementation is a no-op; parsers override only the
        fields they need.
        Parameters
        ----------
        context:
            Immutable context assembled by the consumer for this
            specific ingestion event.
        """
        ...
    def parse(
        self,
        document_path: Path,
        mime_type: str,
        *,
        produce_archive: bool = True,
    ) -> None:
        """Parse document_path and populate internal state.
        After a successful call, callers retrieve results via get_text,
        get_date, and get_archive_path.
        Parameters
        ----------
        document_path:
            Absolute path to the document file to parse.
        mime_type:
            Detected MIME type of the document.
        produce_archive:
            When True (the default) and can_produce_archive is also True,
            the parser should produce a searchable PDF at the path returned
            by get_archive_path. Pass False when only text extraction and
            thumbnail generation are required and disk I/O should be minimised.
        Raises
        ------
        documents.parsers.ParseError
            If parsing fails for any reason.
        """
        ...
    # ------------------------------------------------------------------
    # Result accessors
    # ------------------------------------------------------------------
    def get_text(self) -> str | None:
        """Return the plain-text content extracted during parse.
        Returns
        -------
        str | None
            Extracted text, or None if no text could be found.
        """
        ...
    def get_date(self) -> datetime.datetime | None:
        """Return the document date detected during parse.
        Returns
        -------
        datetime.datetime | None
            Detected document date, or None if no date was found.
        """
        ...
    def get_archive_path(self) -> Path | None:
        """Return the path to the generated archive PDF, or None.
        Returns
        -------
        Path | None
            Path to the searchable PDF archive, or None if no archive was
            produced (e.g. because produce_archive=False or the parser does
            not support archive generation).
        """
        ...
    # ------------------------------------------------------------------
    # Thumbnail and metadata
    # ------------------------------------------------------------------
    def get_thumbnail(self, document_path: Path, mime_type: str) -> Path:
        """Generate and return the path to a thumbnail image for the document.
        May be called independently of parse. The returned path must point to
        an existing WebP image file inside the parser's temporary working
        directory.
        Parameters
        ----------
        document_path:
            Absolute path to the source document.
        mime_type:
            Detected MIME type of the document.
        Returns
        -------
        Path
            Path to the generated thumbnail image (WebP format preferred).
        """
        ...
    def get_page_count(
        self,
        document_path: Path,
        mime_type: str,
    ) -> int | None:
        """Return the number of pages in the document, if determinable.
        Parameters
        ----------
        document_path:
            Absolute path to the source document.
        mime_type:
            Detected MIME type of the document.
        Returns
        -------
        int | None
            Page count, or None if the parser cannot determine it.
        """
        ...
    def extract_metadata(
        self,
        document_path: Path,
        mime_type: str,
    ) -> list[MetadataEntry]:
        """Extract format-specific metadata from the document.
        Called by the API view layer on demand — not during the consumption
        pipeline. Results are returned to the frontend for per-file display.
        For documents with an archive version, this method is called twice:
        once for the original file (with its native MIME type) and once for
        the archive file (with ``"application/pdf"``). Parsers that produce
        archives should handle both cases.
        Implementations must not raise. A failure to read metadata is not
        fatal — log a warning and return whatever partial results were
        collected, or ``[]`` if none.
        Parameters
        ----------
        document_path:
            Absolute path to the file to extract metadata from.
        mime_type:
            MIME type of the file at ``document_path``. May be
            ``"application/pdf"`` when called for the archive version.
        Returns
        -------
        list[MetadataEntry]
            Zero or more metadata entries. Returns ``[]`` if no metadata
            could be extracted or the format does not support it.
        """
        ...
    # ------------------------------------------------------------------
    # Context manager
    # ------------------------------------------------------------------
    def __enter__(self) -> Self:
        """Enter the parser context, returning the parser instance.
        Implementations should perform any resource allocation here if not
        done in __init__ (e.g. creating API clients or temp directories).
        Returns
        -------
        Self
            The parser instance itself.
        """
        ...
    def __exit__(
        self,
        exc_type: type[BaseException] | None,
        exc_val: BaseException | None,
        exc_tb: TracebackType | None,
    ) -> None:
        """Exit the parser context and release all resources.
        Implementations must clean up all temporary files and other resources
        regardless of whether an exception occurred.
        Parameters
        ----------
        exc_type:
            The exception class, or None if no exception was raised.
        exc_val:
            The exception instance, or None.
        exc_tb:
            The traceback, or None.
        """
        ...
--- a/src/paperless/parsers/mail.py
+++ b/src/paperless/parsers/mail.py
@@ -0,0 +1,834 @@
 """
 Built-in mail document parser.
 Handles message/rfc822 (EML) MIME type by:
 - Parsing the email using imap_tools
 - Generating a PDF via Gotenberg (for display and archive)
 - Extracting text via Tika for HTML content
 - Extracting metadata from email headers
 The parser always produces a PDF because EML files cannot be rendered
 natively in a browser (requires_pdf_rendition=True).
 """
 from __future__ import annotations
 import logging
 import re
 import shutil
 import tempfile
 from html import escape
 from pathlib import Path
 from typing import TYPE_CHECKING
 from typing import Self
 from bleach import clean
 from bleach import linkify
 from django.conf import settings
 from django.utils import timezone
 from django.utils.timezone import is_naive
 from django.utils.timezone import make_aware
 from gotenberg_client import GotenbergClient
 from gotenberg_client.constants import A4
 from gotenberg_client.options import Measurement
 from gotenberg_client.options import MeasurementUnitType
 from gotenberg_client.options import PageMarginsType
 from gotenberg_client.options import PdfAFormat
 from humanize import naturalsize
 from imap_tools import MailAttachment
 from imap_tools import MailMessage
 from tika_client import TikaClient
 from documents.parsers import ParseError
 from documents.parsers import make_thumbnail_from_pdf
 from paperless.models import OutputTypeChoices
 from paperless.version import __full_version_str__
 from paperless_mail.models import MailRule
 if TYPE_CHECKING:
    import datetime
    from types import TracebackType
    from paperless.parsers import MetadataEntry
    from paperless.parsers import ParserContext
 logger = logging.getLogger("paperless.parsing.mail")
 _SUPPORTED_MIME_TYPES: dict[str, str] = {
    "message/rfc822": ".eml",
 }
 class MailDocumentParser:
    """Parse .eml email files for Paperless-ngx.
    Uses imap_tools to parse .eml files, generates a PDF using Gotenberg,
    and sends the HTML part to a Tika server for text extraction.  Because
    EML files cannot be rendered natively in a browser, the parser always
    produces a PDF rendition (requires_pdf_rendition=True).
    Pass a ``ParserContext`` to ``configure()`` before ``parse()`` to
    apply mail-rule-specific PDF layout options:
        parser.configure(ParserContext(mailrule_id=rule.pk))
        parser.parse(path, mime_type)
    Class attributes
    ----------------
    name : str
        Human-readable parser name.
    version : str
        Semantic version string, kept in sync with Paperless-ngx releases.
    author : str
        Maintainer name.
    url : str
        Issue tracker / source URL.
    """
    name: str = "Paperless-ngx Mail Parser"
    version: str = __full_version_str__
    author: str = "Paperless-ngx Contributors"
    url: str = "https://github.com/paperless-ngx/paperless-ngx"
    # ------------------------------------------------------------------
    # Class methods
    # ------------------------------------------------------------------
    @classmethod
    def supported_mime_types(cls) -> dict[str, str]:
        """Return the MIME types this parser handles.
        Returns
        -------
        dict[str, str]
            Mapping of MIME type to preferred file extension.
        """
        return _SUPPORTED_MIME_TYPES
    @classmethod
    def score(
        cls,
        mime_type: str,
        filename: str,
        path: Path | None = None,
    ) -> int | None:
        """Return the priority score for handling this file.
        Parameters
        ----------
        mime_type:
            Detected MIME type of the file.
        filename:
            Original filename including extension.
        path:
            Optional filesystem path. Not inspected by this parser.
        Returns
        -------
        int | None
            10 if the MIME type is supported, otherwise None.
        """
        if mime_type in _SUPPORTED_MIME_TYPES:
            return 10
        return None
    # ------------------------------------------------------------------
    # Properties
    # ------------------------------------------------------------------
    @property
    def can_produce_archive(self) -> bool:
        """Whether this parser can produce a searchable PDF archive copy.
        Returns
        -------
        bool
            Always False — the mail parser produces a display PDF
            (requires_pdf_rendition=True), not an optional OCR archive.
        """
        return False
    @property
    def requires_pdf_rendition(self) -> bool:
        """Whether the parser must produce a PDF for the frontend to display.
        Returns
        -------
        bool
            Always True — EML files cannot be rendered natively in a browser,
            so a PDF conversion is always required for display.
        """
        return True
    # ------------------------------------------------------------------
    # Lifecycle
    # ------------------------------------------------------------------
    def __init__(self, logging_group: object = None) -> None:
        settings.SCRATCH_DIR.mkdir(parents=True, exist_ok=True)
        self._tempdir = Path(
            tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR),
        )
        self._text: str | None = None
        self._date: datetime.datetime | None = None
        self._archive_path: Path | None = None
        self._mailrule_id: int | None = None
    def __enter__(self) -> Self:
        return self
    def __exit__(
        self,
        exc_type: type[BaseException] | None,
        exc_val: BaseException | None,
        exc_tb: TracebackType | None,
    ) -> None:
        logger.debug("Cleaning up temporary directory %s", self._tempdir)
        shutil.rmtree(self._tempdir, ignore_errors=True)
    # ------------------------------------------------------------------
    # Core parsing interface
    # ------------------------------------------------------------------
    def configure(self, context: ParserContext) -> None:
        self._mailrule_id = context.mailrule_id
    def parse(
        self,
        document_path: Path,
        mime_type: str,
        *,
        produce_archive: bool = True,
    ) -> None:
        """Parse the given .eml into formatted text and a PDF archive.
        Call ``configure(ParserContext(mailrule_id=...))`` before this method
        to apply mail-rule-specific PDF layout options.  The ``produce_archive``
        flag is accepted for protocol compatibility but is always honoured —
        the mail parser always produces a PDF since EML files cannot be
        displayed natively.
        Parameters
        ----------
        document_path:
            Absolute path to the .eml file.
        mime_type:
            Detected MIME type of the document (should be "message/rfc822").
        produce_archive:
            Accepted for protocol compatibility. The PDF rendition is always
            produced since EML files cannot be displayed natively in a browser.
        Raises
        ------
        documents.parsers.ParseError
            If the file cannot be parsed or PDF generation fails.
        """
        def strip_text(text: str) -> str:
            """Reduces the spacing of the given text string."""
            text = re.sub(r"\s+", " ", text)
            text = re.sub(r"(\n *)+", "\n", text)
            return text.strip()
        def build_formatted_text(mail_message: MailMessage) -> str:
            """Constructs a formatted string based on the given email."""
            fmt_text = f"Subject: {mail_message.subject}\n\n"
            fmt_text += f"From: {mail_message.from_values.full if mail_message.from_values else ''}\n\n"
            to_list = [address.full for address in mail_message.to_values]
            fmt_text += f"To: {', '.join(to_list)}\n\n"
            if mail_message.cc_values:
                fmt_text += (
                    f"CC: {', '.join(address.full for address in mail.cc_values)}\n\n"
                )
            if mail_message.bcc_values:
                fmt_text += (
                    f"BCC: {', '.join(address.full for address in mail.bcc_values)}\n\n"
                )
            if mail_message.attachments:
                att = []
                for a in mail.attachments:
                    attachment_size = naturalsize(a.size, binary=True, format="%.2f")
                    att.append(
                        f"{a.filename} ({attachment_size})",
                    )
                fmt_text += f"Attachments: {', '.join(att)}\n\n"
            if mail.html:
                fmt_text += "HTML content: " + strip_text(self.tika_parse(mail.html))
            fmt_text += f"\n\n{strip_text(mail.text)}"
            return fmt_text
        logger.debug("Parsing file %s into an email", document_path.name)
        mail = self.parse_file_to_message(document_path)
        logger.debug("Building formatted text from email")
        self._text = build_formatted_text(mail)
        if is_naive(mail.date):
            self._date = make_aware(mail.date)
        else:
            self._date = mail.date
        logger.debug("Creating a PDF from the email")
        if self._mailrule_id:
            rule = MailRule.objects.get(pk=self._mailrule_id)
            self._archive_path = self.generate_pdf(
                mail,
                MailRule.PdfLayout(rule.pdf_layout),
            )
        else:
            self._archive_path = self.generate_pdf(mail)
    # ------------------------------------------------------------------
    # Result accessors
    # ------------------------------------------------------------------
    def get_text(self) -> str | None:
        """Return the plain-text content extracted during parse.
        Returns
        -------
        str | None
            Extracted text, or None if parse has not been called yet.
        """
        return self._text
    def get_date(self) -> datetime.datetime | None:
        """Return the document date detected during parse.
        Returns
        -------
        datetime.datetime | None
            Date from the email headers, or None if not detected.
        """
        return self._date
    def get_archive_path(self) -> Path | None:
        """Return the path to the generated archive PDF, or None.
        Returns
        -------
        Path | None
            Path to the PDF produced by Gotenberg, or None if parse has not
            been called yet.
        """
        return self._archive_path
    # ------------------------------------------------------------------
    # Thumbnail and metadata
    # ------------------------------------------------------------------
    def get_thumbnail(
        self,
        document_path: Path,
        mime_type: str,
        file_name: str | None = None,
    ) -> Path:
        """Generate a thumbnail from the PDF rendition of the email.
        Converts the document to PDF first if not already done.
        Parameters
        ----------
        document_path:
            Absolute path to the source document.
        mime_type:
            Detected MIME type of the document.
        file_name:
            Kept for backward compatibility; not used.
        Returns
        -------
        Path
            Path to the generated WebP thumbnail inside the temporary directory.
        """
        if not self._archive_path:
            self._archive_path = self.generate_pdf(
                self.parse_file_to_message(document_path),
            )
        return make_thumbnail_from_pdf(
            self._archive_path,
            self._tempdir,
        )
    def get_page_count(
        self,
        document_path: Path,
        mime_type: str,
    ) -> int | None:
        """Return the number of pages in the document.
        Counts pages in the archive PDF produced by a preceding parse()
        call.  Returns ``None`` if parse() has not been called yet or if
        no archive was produced.
        Returns
        -------
        int | None
            Page count of the archive PDF, or ``None``.
        """
        if self._archive_path is not None:
            from paperless.parsers.utils import get_page_count_for_pdf
            return get_page_count_for_pdf(self._archive_path, log=logger)
        return None
    def extract_metadata(
        self,
        document_path: Path,
        mime_type: str,
    ) -> list[MetadataEntry]:
        """Extract metadata from the email headers.
        Returns email headers as metadata entries with prefix "header",
        plus summary entries for attachments and date.
        Returns
        -------
        list[MetadataEntry]
            Sorted list of metadata entries, or ``[]`` on parse failure.
        """
        result: list[MetadataEntry] = []
        try:
            mail = self.parse_file_to_message(document_path)
        except ParseError as e:
            logger.warning(
                "Error while fetching document metadata for %s: %s",
                document_path,
                e,
            )
            return result
        for key, header_values in mail.headers.items():
            value = ", ".join(header_values)
            try:
                value.encode("utf-8")
            except UnicodeEncodeError as e:  # pragma: no cover
                logger.debug("Skipping header %s: %s", key, e)
                continue
            result.append(
                {
                    "namespace": "",
                    "prefix": "header",
                    "key": key,
                    "value": value,
                },
            )
        result.append(
            {
                "namespace": "",
                "prefix": "",
                "key": "attachments",
                "value": ", ".join(
                    f"{attachment.filename}"
                    f"({naturalsize(attachment.size, binary=True, format='%.2f')})"
                    for attachment in mail.attachments
                ),
            },
        )
        result.append(
            {
                "namespace": "",
                "prefix": "",
                "key": "date",
                "value": mail.date.strftime("%Y-%m-%d %H:%M:%S %Z"),
            },
        )
        result.sort(key=lambda item: (item["prefix"], item["key"]))
        return result
    # ------------------------------------------------------------------
    # Email-specific methods
    # ------------------------------------------------------------------
    def _settings_to_gotenberg_pdfa(self) -> PdfAFormat | None:
        """Convert the OCR output type setting to a Gotenberg PdfAFormat."""
        if settings.OCR_OUTPUT_TYPE in {
            OutputTypeChoices.PDF_A,
            OutputTypeChoices.PDF_A2,
        }:
            return PdfAFormat.A2b
        elif settings.OCR_OUTPUT_TYPE == OutputTypeChoices.PDF_A1:  # pragma: no cover
            logger.warning(
                "Gotenberg does not support PDF/A-1a, choosing PDF/A-2b instead",
            )
            return PdfAFormat.A2b
        elif settings.OCR_OUTPUT_TYPE == OutputTypeChoices.PDF_A3:  # pragma: no cover
            return PdfAFormat.A3b
        return None
    @staticmethod
    def parse_file_to_message(filepath: Path) -> MailMessage:
        """Parse the given .eml file into a MailMessage object.
        Parameters
        ----------
        filepath:
            Path to the .eml file.
        Returns
        -------
        MailMessage
            Parsed mail message.
        Raises
        ------
        documents.parsers.ParseError
            If the file cannot be parsed or is missing required fields.
        """
        try:
            with filepath.open("rb") as eml:
                parsed = MailMessage.from_bytes(eml.read())
                if parsed.from_values is None:
                    raise ParseError(
                        f"Could not parse {filepath}: Missing 'from'",
                    )
        except Exception as err:
            raise ParseError(
                f"Could not parse {filepath}: {err}",
            ) from err
        return parsed
    def tika_parse(self, html: str) -> str:
        """Send HTML content to the Tika server for text extraction.
        Parameters
        ----------
        html:
            HTML string to parse.
        Returns
        -------
        str
            Extracted plain text.
        Raises
        ------
        documents.parsers.ParseError
            If the Tika server cannot be reached or returns an error.
        """
        logger.info("Sending content to Tika server")
        try:
            with TikaClient(tika_url=settings.TIKA_ENDPOINT) as client:
                parsed = client.tika.as_text.from_buffer(html, "text/html")
                if parsed.content is not None:
                    return parsed.content.strip()
                return ""
        except Exception as err:
            raise ParseError(
                f"Could not parse content with tika server at "
                f"{settings.TIKA_ENDPOINT}: {err}",
            ) from err
    def generate_pdf(
        self,
        mail_message: MailMessage,
        pdf_layout: MailRule.PdfLayout | None = None,
    ) -> Path:
        """Generate a PDF from the email message.
        Creates separate PDFs for the email body and HTML content, then
        merges them according to the requested layout.
        Parameters
        ----------
        mail_message:
            Parsed email message.
        pdf_layout:
            Layout option for the PDF. Falls back to the
            EMAIL_PARSE_DEFAULT_LAYOUT setting if not provided.
        Returns
        -------
        Path
            Path to the generated PDF inside the temporary directory.
        """
        archive_path = Path(self._tempdir) / "merged.pdf"
        mail_pdf_file = self.generate_pdf_from_mail(mail_message)
        if pdf_layout is None:
            pdf_layout = MailRule.PdfLayout(settings.EMAIL_PARSE_DEFAULT_LAYOUT)
        # If no HTML content, create the PDF from the message.
        # Otherwise, create 2 PDFs and merge them with Gotenberg.
        if not mail_message.html:
            archive_path.write_bytes(mail_pdf_file.read_bytes())
        else:
            pdf_of_html_content = self.generate_pdf_from_html(
                mail_message.html,
                mail_message.attachments,
            )
            logger.debug("Merging email text and HTML content into single PDF")
            with (
                GotenbergClient(
                    host=settings.TIKA_GOTENBERG_ENDPOINT,
                    timeout=settings.CELERY_TASK_TIME_LIMIT,
                ) as client,
                client.merge.merge() as route,
            ):
                # Configure requested PDF/A formatting, if any
                pdf_a_format = self._settings_to_gotenberg_pdfa()
                if pdf_a_format is not None:
                    route.pdf_format(pdf_a_format)
                match pdf_layout:
                    case MailRule.PdfLayout.HTML_TEXT:
                        route.merge([pdf_of_html_content, mail_pdf_file])
                    case MailRule.PdfLayout.HTML_ONLY:
                        route.merge([pdf_of_html_content])
                    case MailRule.PdfLayout.TEXT_ONLY:
                        route.merge([mail_pdf_file])
                    case MailRule.PdfLayout.TEXT_HTML | _:
                        route.merge([mail_pdf_file, pdf_of_html_content])
                try:
                    response = route.run()
                    archive_path.write_bytes(response.content)
                except Exception as err:
                    raise ParseError(
                        f"Error while merging email HTML into PDF: {err}",
                    ) from err
        return archive_path
    def mail_to_html(self, mail: MailMessage) -> Path:
        """Convert the given email into an HTML file using a template.
        Parameters
        ----------
        mail:
            Parsed mail message.
        Returns
        -------
        Path
            Path to the rendered HTML file inside the temporary directory.
        """
        def clean_html(text: str) -> str:
            """Attempt to clean, escape, and linkify the given HTML string."""
            if isinstance(text, list):
                text = "\n".join([str(e) for e in text])
            if not isinstance(text, str):
                text = str(text)
            text = escape(text)
            text = clean(text)
            text = linkify(text, parse_email=True)
            text = text.replace("\n", "<br>")
            return text
        data = {}
        data["subject"] = clean_html(mail.subject)
        if data["subject"]:
            data["subject_label"] = "Subject"
        data["from"] = clean_html(mail.from_values.full if mail.from_values else "")
        if data["from"]:
            data["from_label"] = "From"
        data["to"] = clean_html(", ".join(address.full for address in mail.to_values))
        if data["to"]:
            data["to_label"] = "To"
        data["cc"] = clean_html(", ".join(address.full for address in mail.cc_values))
        if data["cc"]:
            data["cc_label"] = "CC"
        data["bcc"] = clean_html(", ".join(address.full for address in mail.bcc_values))
        if data["bcc"]:
            data["bcc_label"] = "BCC"
        att = []
        for a in mail.attachments:
            att.append(
                f"{a.filename} ({naturalsize(a.size, binary=True, format='%.2f')})",
            )
        data["attachments"] = clean_html(", ".join(att))
        if data["attachments"]:
            data["attachments_label"] = "Attachments"
        data["date"] = clean_html(
            timezone.localtime(mail.date).strftime("%Y-%m-%d %H:%M"),
        )
        data["content"] = clean_html(mail.text.strip())
        from django.template.loader import render_to_string
        html_file = Path(self._tempdir) / "email_as_html.html"
        html_file.write_text(render_to_string("email_msg_template.html", context=data))
        return html_file
    def generate_pdf_from_mail(self, mail: MailMessage) -> Path:
        """Create a PDF from the email body using an HTML template and Gotenberg.
        Parameters
        ----------
        mail:
            Parsed mail message.
        Returns
        -------
        Path
            Path to the generated PDF inside the temporary directory.
        Raises
        ------
        documents.parsers.ParseError
            If Gotenberg returns an error.
        """
        logger.info("Converting mail to PDF")
        css_file = (
            Path(__file__).parent.parent.parent
            / "paperless_mail"
            / "templates"
            / "output.css"
        )
        email_html_file = self.mail_to_html(mail)
        with (
            GotenbergClient(
                host=settings.TIKA_GOTENBERG_ENDPOINT,
                timeout=settings.CELERY_TASK_TIME_LIMIT,
            ) as client,
            client.chromium.html_to_pdf() as route,
        ):
            # Configure requested PDF/A formatting, if any
            pdf_a_format = self._settings_to_gotenberg_pdfa()
            if pdf_a_format is not None:
                route.pdf_format(pdf_a_format)
            try:
                response = (
                    route.index(email_html_file)
                    .resource(css_file)
                    .margins(
                        PageMarginsType(
                            top=Measurement(0.1, MeasurementUnitType.Inches),
                            bottom=Measurement(0.1, MeasurementUnitType.Inches),
                            left=Measurement(0.1, MeasurementUnitType.Inches),
                            right=Measurement(0.1, MeasurementUnitType.Inches),
                        ),
                    )
                    .size(A4)
                    .scale(1.0)
                    .run()
                )
            except Exception as err:
                raise ParseError(
                    f"Error while converting email to PDF: {err}",
                ) from err
        email_as_pdf_file = Path(self._tempdir) / "email_as_pdf.pdf"
        email_as_pdf_file.write_bytes(response.content)
        return email_as_pdf_file
    def generate_pdf_from_html(
        self,
        orig_html: str,
        attachments: list[MailAttachment],
    ) -> Path:
        """Generate a PDF from the HTML content of the email.
        Parameters
        ----------
        orig_html:
            Raw HTML string from the email body.
        attachments:
            List of email attachments (used as inline resources).
        Returns
        -------
        Path
            Path to the generated PDF inside the temporary directory.
        Raises
        ------
        documents.parsers.ParseError
            If Gotenberg returns an error.
        """
        def clean_html_script(text: str) -> str:
            compiled_open = re.compile(re.escape("<script"), re.IGNORECASE)
            text = compiled_open.sub("<div hidden ", text)
            compiled_close = re.compile(re.escape("</script"), re.IGNORECASE)
            text = compiled_close.sub("</div", text)
            return text
        logger.info("Converting message html to PDF")
        tempdir = Path(self._tempdir)
        html_clean = clean_html_script(orig_html)
        html_clean_file = tempdir / "index.html"
        html_clean_file.write_text(html_clean)
        with (
            GotenbergClient(
                host=settings.TIKA_GOTENBERG_ENDPOINT,
                timeout=settings.CELERY_TASK_TIME_LIMIT,
            ) as client,
            client.chromium.html_to_pdf() as route,
        ):
            # Configure requested PDF/A formatting, if any
            pdf_a_format = self._settings_to_gotenberg_pdfa()
            if pdf_a_format is not None:
                route.pdf_format(pdf_a_format)
            # Add attachments as resources, cleaning the filename and replacing
            # it in the index file for inclusion
            for attachment in attachments:
                # Clean the attachment name to be valid
                name_cid = f"cid:{attachment.content_id}"
                name_clean = "".join(e for e in name_cid if e.isalnum())
                # Write attachment payload to a temp file
                temp_file = tempdir / name_clean
                temp_file.write_bytes(attachment.payload)
                route.resource(temp_file)
                # Replace as needed the name with the clean name
                html_clean = html_clean.replace(name_cid, name_clean)
            # Now store the cleaned up HTML version
            html_clean_file = tempdir / "index.html"
            html_clean_file.write_text(html_clean)
            # This is our index file, the main page basically
            route.index(html_clean_file)
            # Set page size, margins
            route.margins(
                PageMarginsType(
                    top=Measurement(0.1, MeasurementUnitType.Inches),
                    bottom=Measurement(0.1, MeasurementUnitType.Inches),
                    left=Measurement(0.1, MeasurementUnitType.Inches),
                    right=Measurement(0.1, MeasurementUnitType.Inches),
                ),
            ).size(A4).scale(1.0)
            try:
                response = route.run()
            except Exception as err:
                raise ParseError(
                    f"Error while converting document to PDF: {err}",
                ) from err
        html_pdf = tempdir / "html.pdf"
        html_pdf.write_bytes(response.content)
        return html_pdf
--- a/src/paperless/parsers/registry.py
+++ b/src/paperless/parsers/registry.py
@@ -0,0 +1,372 @@
 """
 Singleton registry that tracks all document parsers available to
 Paperless-ngx — both built-ins shipped with the application and third-party
 plugins installed via Python entrypoints.
 Public surface
 --------------
 get_parser_registry
    Lazy-initialise and return the shared ParserRegistry. This is the primary
    entry point for production code.
 init_builtin_parsers
    Register built-in parsers only, without entrypoint discovery. Safe to
    call from Celery worker_process_init where importing all entrypoints
    would be wasteful or cause side effects.
 reset_parser_registry
    Reset module-level state. For tests only.
 Entrypoint group
 ----------------
 Third-party parsers must advertise themselves under the
 "paperless_ngx.parsers" entrypoint group in their pyproject.toml::
    [project.entry-points."paperless_ngx.parsers"]
    my_parser = "my_package.parsers:MyParser"
 The loaded class must expose the following attributes at the class level
 (not just on instances) for the registry to accept it:
 name, version, author, url, supported_mime_types (callable), score (callable).
 """
 from __future__ import annotations
 import logging
 from importlib.metadata import entry_points
 from typing import TYPE_CHECKING
 if TYPE_CHECKING:
    from pathlib import Path
    from paperless.parsers import ParserProtocol
 logger = logging.getLogger("paperless.parsers.registry")
 # ---------------------------------------------------------------------------
 # Module-level singleton state
 # ---------------------------------------------------------------------------
 _registry: ParserRegistry | None = None
 _discovery_complete: bool = False
 # Attribute names that every registered external parser class must expose.
 _REQUIRED_ATTRS: tuple[str, ...] = (
    "name",
    "version",
    "author",
    "url",
    "supported_mime_types",
    "score",
 )
 # ---------------------------------------------------------------------------
 # Module-level accessor functions
 # ---------------------------------------------------------------------------
 def get_parser_registry() -> ParserRegistry:
    """Return the shared ParserRegistry instance.
    On the first call this function:
    1. Creates a new ParserRegistry.
    2. Calls register_defaults to install built-in parsers.
    3. Calls discover to load third-party plugins via importlib.metadata entrypoints.
    4. Calls log_summary to emit a startup summary.
    Subsequent calls return the same instance immediately.
    Returns
    -------
    ParserRegistry
        The shared registry singleton.
    """
    global _registry, _discovery_complete
    if _registry is None:
        _registry = ParserRegistry()
        _registry.register_defaults()
    if not _discovery_complete:
        _registry.discover()
        _registry.log_summary()
        _discovery_complete = True
    return _registry
 def init_builtin_parsers() -> None:
    """Register built-in parsers without performing entrypoint discovery.
    Intended for use in Celery worker_process_init handlers where importing
    all installed entrypoints would be wasteful, slow, or could produce
    undesirable side effects. Entrypoint discovery (third-party plugins) is
    deliberately not performed.
    Safe to call multiple times — subsequent calls are no-ops.
    Returns
    -------
    None
    """
    global _registry
    if _registry is None:
        _registry = ParserRegistry()
        _registry.register_defaults()
 def reset_parser_registry() -> None:
    """Reset the module-level registry state to its initial values.
    Resets _registry and _discovery_complete so the next call to
    get_parser_registry will re-initialise everything from scratch.
    FOR TESTS ONLY. Do not call this in production code — resetting the
    registry mid-request causes all subsequent parser lookups to go through
    discovery again, which is expensive and may have unexpected side effects
    in multi-threaded environments.
    Returns
    -------
    None
    """
    global _registry, _discovery_complete
    _registry = None
    _discovery_complete = False
 # ---------------------------------------------------------------------------
 # Registry class
 # ---------------------------------------------------------------------------
 class ParserRegistry:
    """Registry that maps MIME types to the best available parser class.
    Parsers are partitioned into two lists:
    _builtins
        Parser classes registered via register_builtin (populated by
        register_defaults in Phase 3+).
    _external
        Parser classes loaded from installed Python entrypoints via discover.
    When resolving a parser for a file, external parsers are evaluated
    alongside built-in parsers using a uniform scoring mechanism. Both lists
    are iterated together; the class with the highest score wins. If an
    external parser wins, its attribution details are logged so users can
    identify which third-party package handled their document.
    """
    def __init__(self) -> None:
        self._external: list[type[ParserProtocol]] = []
        self._builtins: list[type[ParserProtocol]] = []
    # ------------------------------------------------------------------
    # Registration
    # ------------------------------------------------------------------
    def register_builtin(self, parser_class: type[ParserProtocol]) -> None:
        """Register a built-in parser class.
        Built-in parsers are shipped with Paperless-ngx and are appended to
        the _builtins list. They are never overridden by external parsers;
        instead, scoring determines which parser wins for any given file.
        Parameters
        ----------
        parser_class:
            The parser class to register. Must satisfy ParserProtocol.
        """
        self._builtins.append(parser_class)
    def register_defaults(self) -> None:
        """Register the built-in parsers that ship with Paperless-ngx.
        Each parser that has been migrated to the new ParserProtocol interface
        is registered here.  Parsers are added in ascending weight order so
        that log output is predictable; scoring determines which parser wins
        at runtime regardless of registration order.
        """
        from paperless.parsers.mail import MailDocumentParser
        from paperless.parsers.remote import RemoteDocumentParser
        from paperless.parsers.tesseract import RasterisedDocumentParser
        from paperless.parsers.text import TextDocumentParser
        from paperless.parsers.tika import TikaDocumentParser
        self.register_builtin(TextDocumentParser)
        self.register_builtin(RemoteDocumentParser)
        self.register_builtin(TikaDocumentParser)
        self.register_builtin(MailDocumentParser)
        self.register_builtin(RasterisedDocumentParser)
    # ------------------------------------------------------------------
    # Discovery
    # ------------------------------------------------------------------
    def discover(self) -> None:
        """Load third-party parsers from the "paperless_ngx.parsers" entrypoint group.
        For each advertised entrypoint the method:
        1. Calls ep.load() to import the class.
        2. Validates that the class exposes all required attributes.
        3. On success, appends the class to _external and logs an info message.
        4. On failure (import error or missing attributes), logs an appropriate
           warning/error and continues to the next entrypoint.
        Errors during discovery of a single parser do not prevent other parsers
        from being loaded.
        Returns
        -------
        None
        """
        eps = entry_points(group="paperless_ngx.parsers")
        for ep in eps:
            try:
                parser_class = ep.load()
            except Exception:
                logger.exception(
                    "Failed to load parser entrypoint '%s' — skipping.",
                    ep.name,
                )
                continue
            missing = [
                attr for attr in _REQUIRED_ATTRS if not hasattr(parser_class, attr)
            ]
            if missing:
                logger.warning(
                    "Parser loaded from entrypoint '%s' is missing required "
                    "attributes %r — skipping.",
                    ep.name,
                    missing,
                )
                continue
            self._external.append(parser_class)
            logger.info(
                "Loaded third-party parser '%s' v%s by %s (entrypoint: '%s').",
                parser_class.name,
                parser_class.version,
                parser_class.author,
                ep.name,
            )
    # ------------------------------------------------------------------
    # Summary logging
    # ------------------------------------------------------------------
    def log_summary(self) -> None:
        """Log a startup summary of all registered parsers.
        Built-in parsers are listed first, followed by any external parsers
        discovered from entrypoints.  If no external parsers were found a
        short informational message is logged instead of an empty list.
        Returns
        -------
        None
        """
        logger.info(
            "Built-in parsers (%d):",
            len(self._builtins),
        )
        for cls in self._builtins:
            logger.info(
                "  [built-in] %s v%s — %s",
                getattr(cls, "name", repr(cls)),
                getattr(cls, "version", "unknown"),
                getattr(cls, "url", "built-in"),
            )
        if not self._external:
            logger.info("No third-party parsers discovered.")
            return
        logger.info(
            "Third-party parsers (%d):",
            len(self._external),
        )
        for cls in self._external:
            logger.info(
                "  [external] %s v%s by %s — report issues at %s",
                getattr(cls, "name", repr(cls)),
                getattr(cls, "version", "unknown"),
                getattr(cls, "author", "unknown"),
                getattr(cls, "url", "unknown"),
            )
    # ------------------------------------------------------------------
    # Parser resolution
    # ------------------------------------------------------------------
    def get_parser_for_file(
        self,
        mime_type: str,
        filename: str,
        path: Path | None = None,
    ) -> type[ParserProtocol] | None:
        """Return the best parser class for the given file, or None.
        All registered parsers (external first, then built-ins) are evaluated
        against the file. A parser is eligible if mime_type appears in the dict
        returned by its supported_mime_types classmethod, and its score
        classmethod returns a non-None integer.
        The parser with the highest score wins. When two parsers return the
        same score, the one that appears earlier in the evaluation order wins
        (external parsers are evaluated before built-ins, giving third-party
        packages a chance to override defaults at equal priority).
        When an external parser is selected, its identity is logged at INFO
        level so operators can trace which package handled a document.
        Parameters
        ----------
        mime_type:
            The detected MIME type of the file.
        filename:
            The original filename, including extension.
        path:
            Optional filesystem path to the file. Forwarded to each
            parser's score method.
        Returns
        -------
        type[ParserProtocol] | None
            The winning parser class, or None if no parser can handle the file.
        """
        best_score: int | None = None
        best_parser: type[ParserProtocol] | None = None
        # External parsers are placed first so that, at equal scores, an
        # external parser wins over a built-in (first-seen policy).
        for parser_class in (*self._external, *self._builtins):
            if mime_type not in parser_class.supported_mime_types():
                continue
            score = parser_class.score(mime_type, filename, path)
            if score is None:
                continue
            if best_score is None or score > best_score:
                best_score = score
                best_parser = parser_class
        if best_parser is not None and best_parser in self._external:
            logger.info(
                "Document handled by third-party parser '%s' v%s — %s",
                getattr(best_parser, "name", repr(best_parser)),
                getattr(best_parser, "version", "unknown"),
                getattr(best_parser, "url", "unknown"),
            )
        return best_parser
--- a/src/paperless/parsers/remote.py
+++ b/src/paperless/parsers/remote.py
@@ -0,0 +1,433 @@
 """
 Built-in remote-OCR document parser.
 Handles documents by sending them to a configured remote OCR engine
 (currently Azure AI Vision / Document Intelligence) and retrieving both
 the extracted text and a searchable PDF with an embedded text layer.
 When no engine is configured, ``score()`` returns ``None`` so the parser
 is effectively invisible to the registry — the tesseract parser handles
 these MIME types instead.
 """
 from __future__ import annotations
 import logging
 import shutil
 import tempfile
 from pathlib import Path
 from typing import TYPE_CHECKING
 from typing import Self
 from django.conf import settings
 from paperless.version import __full_version_str__
 if TYPE_CHECKING:
    import datetime
    from types import TracebackType
    from paperless.parsers import MetadataEntry
    from paperless.parsers import ParserContext
 logger = logging.getLogger("paperless.parsing.remote")
 _SUPPORTED_MIME_TYPES: dict[str, str] = {
    "application/pdf": ".pdf",
    "image/png": ".png",
    "image/jpeg": ".jpg",
    "image/tiff": ".tiff",
    "image/bmp": ".bmp",
    "image/gif": ".gif",
    "image/webp": ".webp",
 }
 class RemoteEngineConfig:
    """Holds and validates the remote OCR engine configuration."""
    def __init__(
        self,
        engine: str | None,
        api_key: str | None = None,
        endpoint: str | None = None,
    ) -> None:
        self.engine = engine
        self.api_key = api_key
        self.endpoint = endpoint
    def engine_is_valid(self) -> bool:
        """Return True when the engine is known and fully configured."""
        return (
            self.engine in ("azureai",)
            and self.api_key is not None
            and not (self.engine == "azureai" and self.endpoint is None)
        )
 class RemoteDocumentParser:
    """Parse documents via a remote OCR API (currently Azure AI Vision).
    This parser sends documents to a remote engine that returns both
    extracted text and a searchable PDF with an embedded text layer.
    It does not depend on Tesseract or ocrmypdf.
    Class attributes
    ----------------
    name : str
        Human-readable parser name.
    version : str
        Semantic version string, kept in sync with Paperless-ngx releases.
    author : str
        Maintainer name.
    url : str
        Issue tracker / source URL.
    """
    name: str = "Paperless-ngx Remote OCR Parser"
    version: str = __full_version_str__
    author: str = "Paperless-ngx Contributors"
    url: str = "https://github.com/paperless-ngx/paperless-ngx"
    # ------------------------------------------------------------------
    # Class methods
    # ------------------------------------------------------------------
    @classmethod
    def supported_mime_types(cls) -> dict[str, str]:
        """Return the MIME types this parser can handle.
        The full set is always returned regardless of whether a remote
        engine is configured.  The ``score()`` method handles the
        "am I active?" logic by returning ``None`` when not configured.
        Returns
        -------
        dict[str, str]
            Mapping of MIME type to preferred file extension.
        """
        return _SUPPORTED_MIME_TYPES
    @classmethod
    def score(
        cls,
        mime_type: str,
        filename: str,
        path: Path | None = None,
    ) -> int | None:
        """Return the priority score for handling this file, or None.
        Returns ``None`` when no valid remote engine is configured,
        making the parser invisible to the registry for this file.
        When configured, returns 20 — higher than the Tesseract parser's
        default of 10 — so the remote engine takes priority.
        Parameters
        ----------
        mime_type:
            Detected MIME type of the file.
        filename:
            Original filename including extension.
        path:
            Optional filesystem path. Not inspected by this parser.
        Returns
        -------
        int | None
            20 when the remote engine is configured and the MIME type is
            supported, otherwise None.
        """
        config = RemoteEngineConfig(
            engine=settings.REMOTE_OCR_ENGINE,
            api_key=settings.REMOTE_OCR_API_KEY,
            endpoint=settings.REMOTE_OCR_ENDPOINT,
        )
        if not config.engine_is_valid():
            return None
        if mime_type not in _SUPPORTED_MIME_TYPES:
            return None
        return 20
    # ------------------------------------------------------------------
    # Properties
    # ------------------------------------------------------------------
    @property
    def can_produce_archive(self) -> bool:
        """Whether this parser can produce a searchable PDF archive copy.
        Returns
        -------
        bool
            Always True — the remote engine always returns a PDF with an
            embedded text layer that serves as the archive copy.
        """
        return True
    @property
    def requires_pdf_rendition(self) -> bool:
        """Whether the parser must produce a PDF for the frontend to display.
        Returns
        -------
        bool
            Always False — all supported originals are displayable by
            the browser (PDF) or handled via the archive copy (images).
        """
        return False
    # ------------------------------------------------------------------
    # Lifecycle
    # ------------------------------------------------------------------
    def __init__(self, logging_group: object = None) -> None:
        settings.SCRATCH_DIR.mkdir(parents=True, exist_ok=True)
        self._tempdir = Path(
            tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR),
        )
        self._logging_group = logging_group
        self._text: str | None = None
        self._archive_path: Path | None = None
    def __enter__(self) -> Self:
        return self
    def __exit__(
        self,
        exc_type: type[BaseException] | None,
        exc_val: BaseException | None,
        exc_tb: TracebackType | None,
    ) -> None:
        logger.debug("Cleaning up temporary directory %s", self._tempdir)
        shutil.rmtree(self._tempdir, ignore_errors=True)
    # ------------------------------------------------------------------
    # Core parsing interface
    # ------------------------------------------------------------------
    def configure(self, context: ParserContext) -> None:
        pass
    def parse(
        self,
        document_path: Path,
        mime_type: str,
        *,
        produce_archive: bool = True,
    ) -> None:
        """Send the document to the remote engine and store results.
        Parameters
        ----------
        document_path:
            Absolute path to the document file to parse.
        mime_type:
            Detected MIME type of the document.
        produce_archive:
            Ignored — the remote engine always returns a searchable PDF,
            which is stored as the archive copy regardless of this flag.
        """
        config = RemoteEngineConfig(
            engine=settings.REMOTE_OCR_ENGINE,
            api_key=settings.REMOTE_OCR_API_KEY,
            endpoint=settings.REMOTE_OCR_ENDPOINT,
        )
        if not config.engine_is_valid():
            logger.warning(
                "No valid remote parser engine is configured, content will be empty.",
            )
            self._text = ""
            return
        if config.engine == "azureai":
            self._text = self._azure_ai_vision_parse(document_path, config)
    # ------------------------------------------------------------------
    # Result accessors
    # ------------------------------------------------------------------
    def get_text(self) -> str | None:
        """Return the plain-text content extracted during parse."""
        return self._text
    def get_date(self) -> datetime.datetime | None:
        """Return the document date detected during parse.
        Returns
        -------
        datetime.datetime | None
            Always None — the remote parser does not detect dates.
        """
        return None
    def get_archive_path(self) -> Path | None:
        """Return the path to the generated archive PDF, or None."""
        return self._archive_path
    # ------------------------------------------------------------------
    # Thumbnail and metadata
    # ------------------------------------------------------------------
    def get_thumbnail(self, document_path: Path, mime_type: str) -> Path:
        """Generate a thumbnail image for the document.
        Uses the archive PDF produced by the remote engine when available,
        otherwise falls back to the original document path (PDF inputs).
        Parameters
        ----------
        document_path:
            Absolute path to the source document.
        mime_type:
            Detected MIME type of the document.
        Returns
        -------
        Path
            Path to the generated WebP thumbnail inside the temp directory.
        """
        # make_thumbnail_from_pdf lives in documents.parsers for now;
        # it will move to paperless.parsers.utils when the tesseract
        # parser is migrated in a later phase.
        from documents.parsers import make_thumbnail_from_pdf
        return make_thumbnail_from_pdf(
            self._archive_path or document_path,
            self._tempdir,
            self._logging_group,
        )
    def get_page_count(
        self,
        document_path: Path,
        mime_type: str,
    ) -> int | None:
        """Return the number of pages in a PDF document.
        Parameters
        ----------
        document_path:
            Absolute path to the source document.
        mime_type:
            Detected MIME type of the document.
        Returns
        -------
        int | None
            Page count for PDF inputs, or ``None`` for other MIME types.
        """
        if mime_type != "application/pdf":
            return None
        from paperless.parsers.utils import get_page_count_for_pdf
        return get_page_count_for_pdf(document_path, log=logger)
    def extract_metadata(
        self,
        document_path: Path,
        mime_type: str,
    ) -> list[MetadataEntry]:
        """Extract format-specific metadata from the document.
        Delegates to the shared pikepdf-based extractor for PDF files.
        Returns ``[]`` for all other MIME types.
        Parameters
        ----------
        document_path:
            Absolute path to the file to extract metadata from.
        mime_type:
            MIME type of the file.  May be ``"application/pdf"`` when
            called for the archive version of an image original.
        Returns
        -------
        list[MetadataEntry]
            Zero or more metadata entries.
        """
        if mime_type != "application/pdf":
            return []
        from paperless.parsers.utils import extract_pdf_metadata
        return extract_pdf_metadata(document_path, log=logger)
    # ------------------------------------------------------------------
    # Private helpers
    # ------------------------------------------------------------------
    def _azure_ai_vision_parse(
        self,
        file: Path,
        config: RemoteEngineConfig,
    ) -> str | None:
        """Send ``file`` to Azure AI Document Intelligence and return text.
        Downloads the searchable PDF output from Azure and stores it at
        ``self._archive_path``.  Returns the extracted text content, or
        ``None`` on failure (the error is logged).
        Parameters
        ----------
        file:
            Absolute path to the document to analyse.
        config:
            Validated remote engine configuration.
        Returns
        -------
        str | None
            Extracted text, or None if the Azure call failed.
        """
        if TYPE_CHECKING:
            # Callers must have already validated config via engine_is_valid():
            # engine_is_valid() asserts api_key is not None and (for azureai)
            # endpoint is not None, so these casts are provably safe.
            assert config.endpoint is not None
            assert config.api_key is not None
        from azure.ai.documentintelligence import DocumentIntelligenceClient
        from azure.ai.documentintelligence.models import AnalyzeDocumentRequest
        from azure.ai.documentintelligence.models import AnalyzeOutputOption
        from azure.ai.documentintelligence.models import DocumentContentFormat
        from azure.core.credentials import AzureKeyCredential
        client = DocumentIntelligenceClient(
            endpoint=config.endpoint,
            credential=AzureKeyCredential(config.api_key),
        )
        try:
            with file.open("rb") as f:
                analyze_request = AnalyzeDocumentRequest(bytes_source=f.read())
                poller = client.begin_analyze_document(
                    model_id="prebuilt-read",
                    body=analyze_request,
                    output_content_format=DocumentContentFormat.TEXT,
                    output=[AnalyzeOutputOption.PDF],
                    content_type="application/json",
                )
            poller.wait()
            result_id = poller.details["operation_id"]
            result = poller.result()
            self._archive_path = self._tempdir / "archive.pdf"
            with self._archive_path.open("wb") as f:
                for chunk in client.get_analyze_result_pdf(
                    model_id="prebuilt-read",
                    result_id=result_id,
                ):
                    f.write(chunk)
            return result.content
        except Exception as e:
            logger.error("Azure AI Vision parsing failed: %s", e)
        finally:
            client.close()
        return None
--- a/src/paperless/parsers/tesseract.py
+++ b/src/paperless/parsers/tesseract.py
@@ -1,13 +1,18 @@
 from __future__ import annotations
 import logging
 import os
 import re
 import shutil
 import tempfile
 from pathlib import Path
 from typing import TYPE_CHECKING
 from typing import Any
 from typing import Self
 from django.conf import settings
 from PIL import Image
 from documents.parsers import DocumentParser
 from documents.parsers import ParseError
 from documents.parsers import make_thumbnail_from_pdf
 from documents.utils import maybe_override_pixel_limit
@@ -16,6 +21,28 @@ from paperless.config import OcrConfig
 from paperless.models import ArchiveFileChoices
 from paperless.models import CleanChoices
 from paperless.models import ModeChoices
 from paperless.parsers.utils import read_file_handle_unicode_errors
 from paperless.version import __full_version_str__
 if TYPE_CHECKING:
    import datetime
    from types import TracebackType
    from paperless.parsers import MetadataEntry
    from paperless.parsers import ParserContext
 logger = logging.getLogger("paperless.parsing.tesseract")
 _SUPPORTED_MIME_TYPES: dict[str, str] = {
    "application/pdf": ".pdf",
    "image/jpeg": ".jpg",
    "image/png": ".png",
    "image/tiff": ".tif",
    "image/gif": ".gif",
    "image/bmp": ".bmp",
    "image/webp": ".webp",
    "image/heic": ".heic",
 }
 class NoTextFoundException(Exception):
@@ -26,81 +53,125 @@ class RtlLanguageException(Exception):
    pass
-class RasterisedDocumentParser(DocumentParser):
+class RasterisedDocumentParser:
    """
    This parser uses Tesseract to try and get some text out of a rasterised
    image, whether it's a PDF, or other graphical format (JPEG, TIFF, etc.)
    """
-    logging_name = "paperless.parsing.tesseract"
+    name: str = "Paperless-ngx Tesseract OCR Parser"
    version: str = __full_version_str__
    author: str = "Paperless-ngx Contributors"
    url: str = "https://github.com/paperless-ngx/paperless-ngx"
-    def get_settings(self) -> OcrConfig:
+    # ------------------------------------------------------------------
-        """
+    # Class methods
-        This parser uses the OCR configuration settings to parse documents
+    # ------------------------------------------------------------------
        """
        return OcrConfig()
-    def get_page_count(self, document_path, mime_type):
+    @classmethod
-        page_count = None
+    def supported_mime_types(cls) -> dict[str, str]:
-        if mime_type == "application/pdf":
+        return _SUPPORTED_MIME_TYPES
            try:
                import pikepdf
-                with pikepdf.Pdf.open(document_path) as pdf:
+    @classmethod
-                    page_count = len(pdf.pages)
+    def score(
-            except Exception as e:
+        cls,
-                self.log.warning(
+        mime_type: str,
-                    f"Unable to determine PDF page count {document_path}: {e}",
+        filename: str,
-                )
+        path: Path | None = None,
-        return page_count
+    ) -> int | None:
        if mime_type in _SUPPORTED_MIME_TYPES:
            return 10
        return None
-    def extract_metadata(self, document_path, mime_type):
+    # ------------------------------------------------------------------
-        result = []
+    # Properties
-        if mime_type == "application/pdf":
+    # ------------------------------------------------------------------
            import pikepdf
-            namespace_pattern = re.compile(r"\{(.*)\}(.*)")
+    @property
    def can_produce_archive(self) -> bool:
        return True
-            pdf = pikepdf.open(document_path)
+    @property
-            meta = pdf.open_metadata()
+    def requires_pdf_rendition(self) -> bool:
-            for key, value in meta.items():
+        return False
                if isinstance(value, list):
                    value = " ".join([str(e) for e in value])
                value = str(value)
                try:
                    m = namespace_pattern.match(key)
                    if m is None:  # pragma: no cover
                        continue
                    namespace = m.group(1)
                    key_value = m.group(2)
                    try:
                        namespace.encode("utf-8")
                        key_value.encode("utf-8")
                    except UnicodeEncodeError as e:  # pragma: no cover
                        self.log.debug(f"Skipping metadata key {key}: {e}")
                        continue
                    result.append(
                        {
                            "namespace": namespace,
                            "prefix": meta.REVERSE_NS[namespace],
                            "key": key_value,
                            "value": value,
                        },
                    )
                except Exception as e:
                    self.log.warning(
                        f"Error while reading metadata {key}: {value}. Error: {e}",
                    )
        return result
-    def get_thumbnail(self, document_path, mime_type, file_name=None):
+    # ------------------------------------------------------------------
    # Lifecycle
    # ------------------------------------------------------------------
    def __init__(self, logging_group: object = None) -> None:
        settings.SCRATCH_DIR.mkdir(parents=True, exist_ok=True)
        self.tempdir = Path(
            tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR),
        )
        self.settings = OcrConfig()
        self.archive_path: Path | None = None
        self.text: str | None = None
        self.date: datetime.datetime | None = None
        self.log = logger
    def __enter__(self) -> Self:
        return self
    def __exit__(
        self,
        exc_type: type[BaseException] | None,
        exc_val: BaseException | None,
        exc_tb: TracebackType | None,
    ) -> None:
        logger.debug("Cleaning up temporary directory %s", self.tempdir)
        shutil.rmtree(self.tempdir, ignore_errors=True)
    # ------------------------------------------------------------------
    # Core parsing interface
    # ------------------------------------------------------------------
    def configure(self, context: ParserContext) -> None:
        pass
    # ------------------------------------------------------------------
    # Result accessors
    # ------------------------------------------------------------------
    def get_text(self) -> str | None:
        return self.text
    def get_date(self) -> datetime.datetime | None:
        return self.date
    def get_archive_path(self) -> Path | None:
        return self.archive_path
    # ------------------------------------------------------------------
    # Thumbnail, page count, and metadata
    # ------------------------------------------------------------------
    def get_thumbnail(self, document_path: Path, mime_type: str) -> Path:
        return make_thumbnail_from_pdf(
-            self.archive_path or document_path,
+            self.archive_path or Path(document_path),
            self.tempdir,
            self.logging_group,
        )
-    def is_image(self, mime_type) -> bool:
+    def get_page_count(self, document_path: Path, mime_type: str) -> int | None:
        if mime_type == "application/pdf":
            from paperless.parsers.utils import get_page_count_for_pdf
            return get_page_count_for_pdf(Path(document_path), log=self.log)
        return None
    def extract_metadata(
        self,
        document_path: Path,
        mime_type: str,
    ) -> list[MetadataEntry]:
        if mime_type != "application/pdf":
            return []
        from paperless.parsers.utils import extract_pdf_metadata
        return extract_pdf_metadata(Path(document_path), log=self.log)
    def is_image(self, mime_type: str) -> bool:
        return mime_type in [
            "image/png",
            "image/jpeg",
@@ -111,25 +182,25 @@ class RasterisedDocumentParser(DocumentParser):
            "image/heic",
        ]
-    def has_alpha(self, image) -> bool:
+    def has_alpha(self, image: Path) -> bool:
        with Image.open(image) as im:
            return im.mode in ("RGBA", "LA")
-    def remove_alpha(self, image_path: str) -> Path:
+    def remove_alpha(self, image_path: Path) -> Path:
        no_alpha_image = Path(self.tempdir) / "image-no-alpha"
        run_subprocess(
            [
                settings.CONVERT_BINARY,
                "-alpha",
                "off",
-                image_path,
+                str(image_path),
-                no_alpha_image,
+                str(no_alpha_image),
            ],
            logger=self.log,
        )
        return no_alpha_image
-    def get_dpi(self, image) -> int | None:
+    def get_dpi(self, image: Path) -> int | None:
        try:
            with Image.open(image) as im:
                x, _ = im.info["dpi"]
@@ -138,7 +209,7 @@ class RasterisedDocumentParser(DocumentParser):
            self.log.warning(f"Error while getting DPI from image {image}: {e}")
            return None
-    def calculate_a4_dpi(self, image) -> int | None:
+    def calculate_a4_dpi(self, image: Path) -> int | None:
        try:
            with Image.open(image) as im:
                width, _ = im.size
@@ -156,6 +227,7 @@ class RasterisedDocumentParser(DocumentParser):
        sidecar_file: Path | None,
        pdf_file: Path,
    ) -> str | None:
        text: str | None = None
        # When re-doing OCR, the sidecar contains ONLY the new text, not
        # the whole text, so do not utilize it in that case
        if (
@@ -163,7 +235,7 @@ class RasterisedDocumentParser(DocumentParser):
            and sidecar_file.is_file()
            and self.settings.mode != "redo"
        ):
-            text = self.read_file_handle_unicode_errors(sidecar_file)
+            text = read_file_handle_unicode_errors(sidecar_file)
            if "[OCR skipped on page" not in text:
                # This happens when there's already text in the input file.
@@ -191,12 +263,12 @@ class RasterisedDocumentParser(DocumentParser):
                        "-layout",
                        "-enc",
                        "UTF-8",
-                        pdf_file,
+                        str(pdf_file),
                        tmp.name,
                    ],
                    logger=self.log,
                )
-                text = self.read_file_handle_unicode_errors(Path(tmp.name))
+                text = read_file_handle_unicode_errors(Path(tmp.name))
            return post_process_text(text)
@@ -211,17 +283,15 @@ class RasterisedDocumentParser(DocumentParser):
    def construct_ocrmypdf_parameters(
        self,
-        input_file,
+        input_file: Path,
-        mime_type,
+        mime_type: str,
-        output_file,
+        output_file: Path,
-        sidecar_file,
+        sidecar_file: Path,
        *,
-        safe_fallback=False,
+        safe_fallback: bool = False,
-    ):
+    ) -> dict[str, Any]:
-        if TYPE_CHECKING:
+        ocrmypdf_args: dict[str, Any] = {
-            assert isinstance(self.settings, OcrConfig)
+            "input_file_or_options": input_file,
        ocrmypdf_args = {
            "input_file": input_file,
            "output_file": output_file,
            # need to use threads, since this will be run in daemonized
            # processes via the task library.
@@ -285,7 +355,7 @@ class RasterisedDocumentParser(DocumentParser):
                    "for compatibility with img2pdf",
                )
                # Replace the input file with the non-alpha
-                ocrmypdf_args["input_file"] = self.remove_alpha(input_file)
+                ocrmypdf_args["input_file_or_options"] = self.remove_alpha(input_file)
            if dpi:
                self.log.debug(f"Detected DPI for image {input_file}: {dpi}")
@@ -330,7 +400,13 @@ class RasterisedDocumentParser(DocumentParser):
        return ocrmypdf_args
-    def parse(self, document_path: Path, mime_type, file_name=None) -> None:
+    def parse(
        self,
        document_path: Path,
        mime_type: str,
        *,
        produce_archive: bool = True,
    ) -> None:
        # This forces tesseract to use one core per page.
        os.environ["OMP_THREAD_LIMIT"] = "1"
        VALID_TEXT_LENGTH = 50
@@ -458,7 +534,7 @@ class RasterisedDocumentParser(DocumentParser):
                self.text = ""
-def post_process_text(text):
+def post_process_text(text: str | None) -> str | None:
    if not text:
        return None
--- a/src/paperless/parsers/text.py
+++ b/src/paperless/parsers/text.py
@@ -0,0 +1,324 @@
 """
 Built-in plain-text document parser.
 Handles text/plain, text/csv, and application/csv MIME types by reading the
 file content directly.  Thumbnails are generated by rendering a page-sized
 WebP image from the first 100,000 characters using Pillow.
 """
 from __future__ import annotations
 import logging
 import shutil
 import tempfile
 from pathlib import Path
 from typing import TYPE_CHECKING
 from typing import Self
 from django.conf import settings
 from PIL import Image
 from PIL import ImageDraw
 from PIL import ImageFont
 from paperless.version import __full_version_str__
 if TYPE_CHECKING:
    import datetime
    from types import TracebackType
    from paperless.parsers import MetadataEntry
    from paperless.parsers import ParserContext
 logger = logging.getLogger("paperless.parsing.text")
 _SUPPORTED_MIME_TYPES: dict[str, str] = {
    "text/plain": ".txt",
    "text/csv": ".csv",
    "application/csv": ".csv",
 }
 class TextDocumentParser:
    """Parse plain-text documents (txt, csv) for Paperless-ngx.
    This parser reads the file content directly as UTF-8 text and renders a
    simple thumbnail using Pillow.  It does not perform OCR and does not
    produce a searchable PDF archive copy.
    Class attributes
    ----------------
    name : str
        Human-readable parser name.
    version : str
        Semantic version string, kept in sync with Paperless-ngx releases.
    author : str
        Maintainer name.
    url : str
        Issue tracker / source URL.
    """
    name: str = "Paperless-ngx Text Parser"
    version: str = __full_version_str__
    author: str = "Paperless-ngx Contributors"
    url: str = "https://github.com/paperless-ngx/paperless-ngx"
    # ------------------------------------------------------------------
    # Class methods
    # ------------------------------------------------------------------
    @classmethod
    def supported_mime_types(cls) -> dict[str, str]:
        """Return the MIME types this parser handles.
        Returns
        -------
        dict[str, str]
            Mapping of MIME type to preferred file extension.
        """
        return _SUPPORTED_MIME_TYPES
    @classmethod
    def score(
        cls,
        mime_type: str,
        filename: str,
        path: Path | None = None,
    ) -> int | None:
        """Return the priority score for handling this file.
        Parameters
        ----------
        mime_type:
            Detected MIME type of the file.
        filename:
            Original filename including extension.
        path:
            Optional filesystem path. Not inspected by this parser.
        Returns
        -------
        int | None
            10 if the MIME type is supported, otherwise None.
        """
        if mime_type in _SUPPORTED_MIME_TYPES:
            return 10
        return None
    # ------------------------------------------------------------------
    # Properties
    # ------------------------------------------------------------------
    @property
    def can_produce_archive(self) -> bool:
        """Whether this parser can produce a searchable PDF archive copy.
        Returns
        -------
        bool
            Always False — the text parser does not produce a PDF archive.
        """
        return False
    @property
    def requires_pdf_rendition(self) -> bool:
        """Whether the parser must produce a PDF for the frontend to display.
        Returns
        -------
        bool
            Always False — plain text files are displayable as-is.
        """
        return False
    # ------------------------------------------------------------------
    # Lifecycle
    # ------------------------------------------------------------------
    def __init__(self, logging_group: object = None) -> None:
        settings.SCRATCH_DIR.mkdir(parents=True, exist_ok=True)
        self._tempdir = Path(
            tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR),
        )
        self._text: str | None = None
    def __enter__(self) -> Self:
        return self
    def __exit__(
        self,
        exc_type: type[BaseException] | None,
        exc_val: BaseException | None,
        exc_tb: TracebackType | None,
    ) -> None:
        logger.debug("Cleaning up temporary directory %s", self._tempdir)
        shutil.rmtree(self._tempdir, ignore_errors=True)
    # ------------------------------------------------------------------
    # Core parsing interface
    # ------------------------------------------------------------------
    def configure(self, context: ParserContext) -> None:
        pass
    def parse(
        self,
        document_path: Path,
        mime_type: str,
        *,
        produce_archive: bool = True,
    ) -> None:
        """Read the document and store its text content.
        Parameters
        ----------
        document_path:
            Absolute path to the text file.
        mime_type:
            Detected MIME type of the document.
        produce_archive:
            Ignored — this parser never produces a PDF archive.
        Raises
        ------
        documents.parsers.ParseError
            If the file cannot be read.
        """
        self._text = self._read_text(document_path)
    # ------------------------------------------------------------------
    # Result accessors
    # ------------------------------------------------------------------
    def get_text(self) -> str | None:
        """Return the plain-text content extracted during parse.
        Returns
        -------
        str | None
            Extracted text, or None if parse has not been called yet.
        """
        return self._text
    def get_date(self) -> datetime.datetime | None:
        """Return the document date detected during parse.
        Returns
        -------
        datetime.datetime | None
            Always None — the text parser does not detect dates.
        """
        return None
    def get_archive_path(self) -> Path | None:
        """Return the path to a generated archive PDF, or None.
        Returns
        -------
        Path | None
            Always None — the text parser does not produce a PDF archive.
        """
        return None
    # ------------------------------------------------------------------
    # Thumbnail and metadata
    # ------------------------------------------------------------------
    def get_thumbnail(self, document_path: Path, mime_type: str) -> Path:
        """Render the first portion of the document as a WebP thumbnail.
        Parameters
        ----------
        document_path:
            Absolute path to the source document.
        mime_type:
            Detected MIME type of the document.
        Returns
        -------
        Path
            Path to the generated WebP thumbnail inside the temporary directory.
        """
        max_chars = 100_000
        file_size_limit = 50 * 1024 * 1024
        if document_path.stat().st_size > file_size_limit:
            text = "[File too large to preview]"
        else:
            with Path(document_path).open("r", encoding="utf-8", errors="replace") as f:
                text = f.read(max_chars)
        img = Image.new("RGB", (500, 700), color="white")
        draw = ImageDraw.Draw(img)
        font = ImageFont.truetype(
            font=settings.THUMBNAIL_FONT_NAME,
            size=20,
            layout_engine=ImageFont.Layout.BASIC,
        )
        draw.multiline_text((5, 5), text, font=font, fill="black", spacing=4)
        out_path = self._tempdir / "thumb.webp"
        img.save(out_path, format="WEBP")
        return out_path
    def get_page_count(
        self,
        document_path: Path,
        mime_type: str,
    ) -> int | None:
        """Return the number of pages in the document.
        Parameters
        ----------
        document_path:
            Absolute path to the source document.
        mime_type:
            Detected MIME type of the document.
        Returns
        -------
        int | None
            Always None — page count is not meaningful for plain text.
        """
        return None
    def extract_metadata(
        self,
        document_path: Path,
        mime_type: str,
    ) -> list[MetadataEntry]:
        """Extract format-specific metadata from the document.
        Returns
        -------
        list[MetadataEntry]
            Always ``[]`` — plain text files carry no structured metadata.
        """
        return []
    # ------------------------------------------------------------------
    # Private helpers
    # ------------------------------------------------------------------
    def _read_text(self, filepath: Path) -> str:
        """Read file content, replacing invalid UTF-8 bytes rather than failing.
        Parameters
        ----------
        filepath:
            Path to the file to read.
        Returns
        -------
        str
            File content as a string.
        """
        try:
            return filepath.read_text(encoding="utf-8")
        except UnicodeDecodeError as exc:
            logger.warning(
                "Unicode error reading %s, replacing bad bytes: %s",
                filepath,
                exc,
            )
            return filepath.read_bytes().decode("utf-8", errors="replace")
--- a/src/paperless/parsers/tika.py
+++ b/src/paperless/parsers/tika.py
@@ -0,0 +1,452 @@
 """
 Built-in Tika document parser.
 Handles Office documents (DOCX, ODT, XLS, XLSX, PPT, PPTX, RTF, etc.) by
 sending them to an Apache Tika server for text extraction and a Gotenberg
 server for PDF conversion.  Because the source formats cannot be rendered by
 a browser natively, the parser always produces a PDF rendition for display.
 """
 from __future__ import annotations
 import logging
 import shutil
 import tempfile
 from contextlib import ExitStack
 from pathlib import Path
 from typing import TYPE_CHECKING
 from typing import Self
 import httpx
 from django.conf import settings
 from django.utils import timezone
 from gotenberg_client import GotenbergClient
 from gotenberg_client.options import PdfAFormat
 from tika_client import TikaClient
 from documents.parsers import ParseError
 from documents.parsers import make_thumbnail_from_pdf
 from paperless.config import OutputTypeConfig
 from paperless.models import OutputTypeChoices
 from paperless.version import __full_version_str__
 if TYPE_CHECKING:
    import datetime
    from types import TracebackType
    from paperless.parsers import MetadataEntry
    from paperless.parsers import ParserContext
 logger = logging.getLogger("paperless.parsing.tika")
 _SUPPORTED_MIME_TYPES: dict[str, str] = {
    "application/msword": ".doc",
    "application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx",
    "application/vnd.ms-excel": ".xls",
    "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx",
    "application/vnd.ms-powerpoint": ".ppt",
    "application/vnd.openxmlformats-officedocument.presentationml.presentation": ".pptx",
    "application/vnd.openxmlformats-officedocument.presentationml.slideshow": ".ppsx",
    "application/vnd.oasis.opendocument.presentation": ".odp",
    "application/vnd.oasis.opendocument.spreadsheet": ".ods",
    "application/vnd.oasis.opendocument.text": ".odt",
    "application/vnd.oasis.opendocument.graphics": ".odg",
    "text/rtf": ".rtf",
 }
 class TikaDocumentParser:
    """Parse Office documents via Apache Tika and Gotenberg for Paperless-ngx.
    Text extraction is handled by the Tika server.  PDF conversion for display
    is handled by Gotenberg (LibreOffice route).  Because the source formats
    cannot be rendered by a browser natively, ``requires_pdf_rendition`` is
    True and the PDF is always produced regardless of the ``produce_archive``
    flag passed to ``parse``.
    Both ``TikaClient`` and ``GotenbergClient`` are opened once in
    ``__enter__`` via an ``ExitStack`` and shared across ``parse``,
    ``extract_metadata``, and ``_convert_to_pdf`` calls, then closed via
    ``ExitStack.close()`` in ``__exit__``.  The parser must always be used
    as a context manager.
    Class attributes
    ----------------
    name : str
        Human-readable parser name.
    version : str
        Semantic version string, kept in sync with Paperless-ngx releases.
    author : str
        Maintainer name.
    url : str
        Issue tracker / source URL.
    """
    name: str = "Paperless-ngx Tika Parser"
    version: str = __full_version_str__
    author: str = "Paperless-ngx Contributors"
    url: str = "https://github.com/paperless-ngx/paperless-ngx"
    # ------------------------------------------------------------------
    # Class methods
    # ------------------------------------------------------------------
    @classmethod
    def supported_mime_types(cls) -> dict[str, str]:
        """Return the MIME types this parser handles.
        Returns
        -------
        dict[str, str]
            Mapping of MIME type to preferred file extension.
        """
        return _SUPPORTED_MIME_TYPES
    @classmethod
    def score(
        cls,
        mime_type: str,
        filename: str,
        path: Path | None = None,
    ) -> int | None:
        """Return the priority score for handling this file.
        Returns ``None`` when Tika integration is disabled so the registry
        skips this parser entirely.
        Parameters
        ----------
        mime_type:
            Detected MIME type of the file.
        filename:
            Original filename including extension.
        path:
            Optional filesystem path. Not inspected by this parser.
        Returns
        -------
        int | None
            10 if TIKA_ENABLED and the MIME type is supported, otherwise None.
        """
        if not settings.TIKA_ENABLED:
            return None
        if mime_type in _SUPPORTED_MIME_TYPES:
            return 10
        return None
    # ------------------------------------------------------------------
    # Properties
    # ------------------------------------------------------------------
    @property
    def can_produce_archive(self) -> bool:
        """Whether this parser can produce a searchable PDF archive copy.
        Returns
        -------
        bool
            Always False — Tika produces a display PDF, not an OCR archive.
        """
        return False
    @property
    def requires_pdf_rendition(self) -> bool:
        """Whether the parser must produce a PDF for the frontend to display.
        Returns
        -------
        bool
            Always True — Office formats cannot be rendered natively in a
            browser, so a PDF conversion is always required for display.
        """
        return True
    # ------------------------------------------------------------------
    # Lifecycle
    # ------------------------------------------------------------------
    def __init__(self, logging_group: object = None) -> None:
        settings.SCRATCH_DIR.mkdir(parents=True, exist_ok=True)
        self._tempdir = Path(
            tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR),
        )
        self._text: str | None = None
        self._date: datetime.datetime | None = None
        self._archive_path: Path | None = None
        self._exit_stack = ExitStack()
        self._tika_client: TikaClient | None = None
        self._gotenberg_client: GotenbergClient | None = None
    def __enter__(self) -> Self:
        self._tika_client = self._exit_stack.enter_context(
            TikaClient(
                tika_url=settings.TIKA_ENDPOINT,
                timeout=settings.CELERY_TASK_TIME_LIMIT,
            ),
        )
        self._gotenberg_client = self._exit_stack.enter_context(
            GotenbergClient(
                host=settings.TIKA_GOTENBERG_ENDPOINT,
                timeout=settings.CELERY_TASK_TIME_LIMIT,
            ),
        )
        return self
    def __exit__(
        self,
        exc_type: type[BaseException] | None,
        exc_val: BaseException | None,
        exc_tb: TracebackType | None,
    ) -> None:
        self._exit_stack.close()
        logger.debug("Cleaning up temporary directory %s", self._tempdir)
        shutil.rmtree(self._tempdir, ignore_errors=True)
    # ------------------------------------------------------------------
    # Core parsing interface
    # ------------------------------------------------------------------
    def configure(self, context: ParserContext) -> None:
        pass
    def parse(
        self,
        document_path: Path,
        mime_type: str,
        *,
        produce_archive: bool = True,
    ) -> None:
        """Send the document to Tika for text extraction and Gotenberg for PDF.
        Because ``requires_pdf_rendition`` is True the PDF conversion is
        always performed — the ``produce_archive`` flag is intentionally
        ignored.
        Parameters
        ----------
        document_path:
            Absolute path to the document file to parse.
        mime_type:
            Detected MIME type of the document.
        produce_archive:
            Accepted for protocol compatibility but ignored; the PDF rendition
            is always produced since the source format cannot be displayed
            natively in the browser.
        Raises
        ------
        documents.parsers.ParseError
            If Tika or Gotenberg returns an error.
        """
        if TYPE_CHECKING:
            assert self._tika_client is not None
        logger.info("Sending %s to Tika server", document_path)
        try:
            try:
                parsed = self._tika_client.tika.as_text.from_file(
                    document_path,
                    mime_type,
                )
            except httpx.HTTPStatusError as err:
                # Workaround https://issues.apache.org/jira/browse/TIKA-4110
                # Tika fails with some files as multi-part form data
                if err.response.status_code == httpx.codes.INTERNAL_SERVER_ERROR:
                    parsed = self._tika_client.tika.as_text.from_buffer(
                        document_path.read_bytes(),
                        mime_type,
                    )
                else:  # pragma: no cover
                    raise
        except Exception as err:
            raise ParseError(
                f"Could not parse {document_path} with tika server at "
                f"{settings.TIKA_ENDPOINT}: {err}",
            ) from err
        self._text = parsed.content
        if self._text is not None:
            self._text = self._text.strip()
        self._date = parsed.created
        if self._date is not None and timezone.is_naive(self._date):
            self._date = timezone.make_aware(self._date)
        # Always convert — requires_pdf_rendition=True means the browser
        # cannot display the source format natively.
        self._archive_path = self._convert_to_pdf(document_path)
    # ------------------------------------------------------------------
    # Result accessors
    # ------------------------------------------------------------------
    def get_text(self) -> str | None:
        """Return the plain-text content extracted during parse.
        Returns
        -------
        str | None
            Extracted text, or None if parse has not been called yet.
        """
        return self._text
    def get_date(self) -> datetime.datetime | None:
        """Return the document date detected during parse.
        Returns
        -------
        datetime.datetime | None
            Creation date from Tika metadata, or None if not detected.
        """
        return self._date
    def get_archive_path(self) -> Path | None:
        """Return the path to the generated PDF rendition, or None.
        Returns
        -------
        Path | None
            Path to the PDF produced by Gotenberg, or None if parse has not
            been called yet.
        """
        return self._archive_path
    # ------------------------------------------------------------------
    # Thumbnail and metadata
    # ------------------------------------------------------------------
    def get_thumbnail(self, document_path: Path, mime_type: str) -> Path:
        """Generate a thumbnail from the PDF rendition of the document.
        Converts the document to PDF first if not already done.
        Parameters
        ----------
        document_path:
            Absolute path to the source document.
        mime_type:
            Detected MIME type of the document.
        Returns
        -------
        Path
            Path to the generated WebP thumbnail inside the temporary directory.
        """
        if self._archive_path is None:
            self._archive_path = self._convert_to_pdf(document_path)
        return make_thumbnail_from_pdf(self._archive_path, self._tempdir)
    def get_page_count(
        self,
        document_path: Path,
        mime_type: str,
    ) -> int | None:
        """Return the number of pages in the document.
        Counts pages in the archive PDF produced by a preceding parse()
        call.  Returns ``None`` if parse() has not been called yet or if
        no archive was produced.
        Returns
        -------
        int | None
            Page count of the archive PDF, or ``None``.
        """
        if self._archive_path is not None:
            from paperless.parsers.utils import get_page_count_for_pdf
            return get_page_count_for_pdf(self._archive_path, log=logger)
        return None
    def extract_metadata(
        self,
        document_path: Path,
        mime_type: str,
    ) -> list[MetadataEntry]:
        """Extract format-specific metadata via the Tika metadata endpoint.
        Returns
        -------
        list[MetadataEntry]
            All key/value pairs returned by Tika, or ``[]`` on error.
        """
        if TYPE_CHECKING:
            assert self._tika_client is not None
        try:
            parsed = self._tika_client.metadata.from_file(document_path, mime_type)
            return [
                {
                    "namespace": "",
                    "prefix": "",
                    "key": key,
                    "value": parsed.data[key],
                }
                for key in parsed.data
            ]
        except Exception as e:
            logger.warning(
                "Error while fetching document metadata for %s: %s",
                document_path,
                e,
            )
            return []
    # ------------------------------------------------------------------
    # Private helpers
    # ------------------------------------------------------------------
    def _convert_to_pdf(self, document_path: Path) -> Path:
        """Convert the document to PDF using Gotenberg's LibreOffice route.
        Parameters
        ----------
        document_path:
            Absolute path to the source document.
        Returns
        -------
        Path
            Path to the generated PDF inside the temporary directory.
        Raises
        ------
        documents.parsers.ParseError
            If Gotenberg returns an error.
        """
        if TYPE_CHECKING:
            assert self._gotenberg_client is not None
        pdf_path = self._tempdir / "convert.pdf"
        logger.info("Converting %s to PDF as %s", document_path, pdf_path)
        with self._gotenberg_client.libre_office.to_pdf() as route:
            # Set the output format of the resulting PDF.
            # OutputTypeConfig reads the database-stored ApplicationConfiguration
            # first, then falls back to the PAPERLESS_OCR_OUTPUT_TYPE env var.
            output_type = OutputTypeConfig().output_type
            if output_type in {
                OutputTypeChoices.PDF_A,
                OutputTypeChoices.PDF_A2,
            }:
                route.pdf_format(PdfAFormat.A2b)
            elif output_type == OutputTypeChoices.PDF_A1:
                logger.warning(
                    "Gotenberg does not support PDF/A-1a, choosing PDF/A-2b instead",
                )
                route.pdf_format(PdfAFormat.A2b)
            elif output_type == OutputTypeChoices.PDF_A3:
                route.pdf_format(PdfAFormat.A3b)
            route.convert(document_path)
            try:
                response = route.run()
                pdf_path.write_bytes(response.content)
                return pdf_path
            except Exception as err:
                raise ParseError(
                    f"Error while converting document to PDF: {err}",
                ) from err
--- a/src/paperless/parsers/utils.py
+++ b/src/paperless/parsers/utils.py
@@ -0,0 +1,158 @@
 """
 Shared utilities for Paperless-ngx document parsers.
 Functions here are format-neutral helpers that multiple parsers need.
 Keeping them here avoids parsers inheriting from each other just to
 share implementation.
 """
 from __future__ import annotations
 import logging
 import re
 from typing import TYPE_CHECKING
 if TYPE_CHECKING:
    from pathlib import Path
    from paperless.parsers import MetadataEntry
 logger = logging.getLogger("paperless.parsers.utils")
 def read_file_handle_unicode_errors(
    filepath: Path,
    log: logging.Logger | None = None,
 ) -> str:
    """Read a file as UTF-8 text, replacing invalid bytes rather than raising.
    Parameters
    ----------
    filepath:
        Absolute path to the file to read.
    log:
        Logger to use for warnings.  Falls back to the module-level logger
        when omitted.
    Returns
    -------
    str
        File content as a string, with any invalid UTF-8 sequences replaced
        by the Unicode replacement character.
    """
    _log = log or logger
    try:
        return filepath.read_text(encoding="utf-8")
    except UnicodeDecodeError as e:
        _log.warning("Unicode error during text reading, continuing: %s", e)
        return filepath.read_bytes().decode("utf-8", errors="replace")
 def get_page_count_for_pdf(
    document_path: Path,
    log: logging.Logger | None = None,
 ) -> int | None:
    """Return the number of pages in a PDF file using pikepdf.
    Parameters
    ----------
    document_path:
        Absolute path to the PDF file.
    log:
        Logger to use for warnings.  Falls back to the module-level logger
        when omitted.
    Returns
    -------
    int | None
        Page count, or ``None`` if the file cannot be opened or is not a
        valid PDF.
    """
    import pikepdf
    _log = log or logger
    try:
        with pikepdf.Pdf.open(document_path) as pdf:
            return len(pdf.pages)
    except Exception as e:
        _log.warning("Unable to determine PDF page count for %s: %s", document_path, e)
        return None
 def extract_pdf_metadata(
    document_path: Path,
    log: logging.Logger | None = None,
 ) -> list[MetadataEntry]:
    """Extract XMP/PDF metadata from a PDF file using pikepdf.
    Reads all XMP metadata entries from the document and returns them as a
    list of ``MetadataEntry`` dicts.  The method never raises — any failure
    to open the file or read a specific key is logged and skipped.
    Parameters
    ----------
    document_path:
        Absolute path to the PDF file.
    log:
        Logger to use for warnings and debug messages.  Falls back to the
        module-level logger when omitted.
    Returns
    -------
    list[MetadataEntry]
        Zero or more metadata entries.  Returns ``[]`` if the file cannot
        be opened or contains no readable XMP metadata.
    """
    import pikepdf
    from paperless.parsers import MetadataEntry
    _log = log or logger
    result: list[MetadataEntry] = []
    namespace_pattern = re.compile(r"\{(.*)\}(.*)")
    try:
        pdf = pikepdf.open(document_path)
        meta = pdf.open_metadata()
    except Exception as e:
        _log.warning("Could not open PDF metadata for %s: %s", document_path, e)
        return []
    for key, value in meta.items():
        if isinstance(value, list):
            value = " ".join(str(e) for e in value)
        value = str(value)
        try:
            m = namespace_pattern.match(key)
            if m is None:
                continue
            namespace = m.group(1)
            key_value = m.group(2)
            try:
                namespace.encode("utf-8")
                key_value.encode("utf-8")
            except UnicodeEncodeError as enc_err:  # pragma: no cover
                _log.debug("Skipping metadata key %s: %s", key, enc_err)
                continue
            result.append(
                MetadataEntry(
                    namespace=namespace,
                    prefix=meta.REVERSE_NS[namespace],
                    key=key_value,
                    value=value,
                ),
            )
        except Exception as e:
            _log.warning(
                "Error reading metadata key %s value %s: %s",
                key,
                value,
                e,
            )
    return result
--- a/src/paperless/serialisers.py
+++ b/src/paperless/serialisers.py
@@ -6,6 +6,7 @@ from allauth.mfa.models import Authenticator
 from allauth.mfa.totp.internal.auth import TOTP
 from allauth.socialaccount.models import SocialAccount
 from allauth.socialaccount.models import SocialApp
 from django.conf import settings
 from django.contrib.auth.models import Group
 from django.contrib.auth.models import Permission
 from django.contrib.auth.models import User
@@ -15,6 +16,7 @@ from rest_framework import serializers
 from rest_framework.authtoken.serializers import AuthTokenSerializer
 from paperless.models import ApplicationConfiguration
 from paperless.network import validate_outbound_http_url
 from paperless.validators import reject_dangerous_svg
 from paperless_mail.serialisers import ObfuscatedPasswordField
@@ -236,6 +238,22 @@ class ApplicationConfigurationSerializer(serializers.ModelSerializer):
            reject_dangerous_svg(file)
        return file
    def validate_llm_endpoint(self, value: str | None) -> str | None:
        if not value:
            return value
        try:
            validate_outbound_http_url(
                value,
                allow_internal=settings.LLM_ALLOW_INTERNAL_ENDPOINTS,
            )
        except ValueError as e:
            raise serializers.ValidationError(
                f"Invalid LLM endpoint: {e.args[0]}, see logs for details",
            ) from e
        return value
    class Meta:
        model = ApplicationConfiguration
        fields = "__all__"
--- a/src/paperless/settings/init.py
+++ b/src/paperless/settings/init.py
@@ -1112,3 +1112,7 @@ LLM_BACKEND = os.getenv("PAPERLESS_AI_LLM_BACKEND")  # "ollama" or "openai"
 LLM_MODEL = os.getenv("PAPERLESS_AI_LLM_MODEL")
 LLM_API_KEY = os.getenv("PAPERLESS_AI_LLM_API_KEY")
 LLM_ENDPOINT = os.getenv("PAPERLESS_AI_LLM_ENDPOINT")
 LLM_ALLOW_INTERNAL_ENDPOINTS = get_bool_from_env(
    "PAPERLESS_AI_LLM_ALLOW_INTERNAL_ENDPOINTS",
    "true",
 )
--- a/src/paperless/tests/conftest.py
+++ b/src/paperless/tests/conftest.py
@@ -0,0 +1,48 @@
 """
 Fixtures defined here are available to every test module under
 src/paperless/tests/ (including sub-packages such as parsers/).
 Session-scoped fixtures for the shared samples directory live here so
 sub-package conftest files can reference them without duplicating path logic.
 Parser-specific fixtures (concrete parser instances, format-specific sample
 files) live in paperless/tests/parsers/conftest.py.
 """
 from __future__ import annotations
 from pathlib import Path
 from typing import TYPE_CHECKING
 import pytest
 from paperless.parsers.registry import reset_parser_registry
 if TYPE_CHECKING:
    from collections.abc import Generator
@pytest.fixture(scope="session")
 def samples_dir() -> Path:
    """Absolute path to the shared parser sample files directory.
    Sub-package conftest files derive format-specific paths from this root,
    e.g. ``samples_dir / "text" / "test.txt"``.
    Returns
    -------
    Path
        Directory containing all sample documents used by parser tests.
    """
    return (Path(__file__).parent / "samples").resolve()
@pytest.fixture(autouse=True)
 def clean_registry() -> Generator[None, None, None]:
    """Reset the parser registry before and after every test.
    This prevents registry state from leaking between tests that call
    get_parser_registry() or init_builtin_parsers().
    """
    reset_parser_registry()
    yield
    reset_parser_registry()
--- a/src/paperless/tests/parsers/init.py
+++ b/src/paperless/tests/parsers/init.py
--- a/src/paperless/tests/parsers/conftest.py
+++ b/src/paperless/tests/parsers/conftest.py
@@ -0,0 +1,800 @@
 """
 Parser fixtures that are used across multiple test modules in this package
 are defined here.  Format-specific sample-file fixtures are grouped by parser
 so it is easy to see which files belong to which test module.
 """
 from __future__ import annotations
 from contextlib import contextmanager
 from typing import TYPE_CHECKING
 import pytest
 from django.test import override_settings
 from paperless.parsers.mail import MailDocumentParser
 from paperless.parsers.remote import RemoteDocumentParser
 from paperless.parsers.tesseract import RasterisedDocumentParser
 from paperless.parsers.text import TextDocumentParser
 from paperless.parsers.tika import TikaDocumentParser
 if TYPE_CHECKING:
    from collections.abc import Callable
    from collections.abc import Generator
    from pathlib import Path
    from unittest.mock import MagicMock
    from pytest_django.fixtures import SettingsWrapper
    from pytest_mock import MockerFixture
    #: Type for the ``make_tesseract_parser`` fixture factory.
    MakeTesseractParser = Callable[..., Generator[RasterisedDocumentParser, None, None]]
 # ------------------------------------------------------------------
 # Text parser sample files
 # ------------------------------------------------------------------
@pytest.fixture(scope="session")
 def text_samples_dir(samples_dir: Path) -> Path:
    """Absolute path to the text parser sample files directory.
    Returns
    -------
    Path
        ``<samples_dir>/text/``
    """
    return samples_dir / "text"
@pytest.fixture(scope="session")
 def sample_txt_file(text_samples_dir: Path) -> Path:
    """Path to a valid UTF-8 plain-text sample file.
    Returns
    -------
    Path
        Absolute path to ``text/test.txt``.
    """
    return text_samples_dir / "test.txt"
@pytest.fixture(scope="session")
 def malformed_txt_file(text_samples_dir: Path) -> Path:
    """Path to a text file containing invalid UTF-8 bytes.
    Returns
    -------
    Path
        Absolute path to ``text/decode_error.txt``.
    """
    return text_samples_dir / "decode_error.txt"
 # ------------------------------------------------------------------
 # Text parser instance
 # ------------------------------------------------------------------
@pytest.fixture()
 def text_parser() -> Generator[TextDocumentParser, None, None]:
    """Yield a TextDocumentParser and clean up its temporary directory afterwards.
    Yields
    ------
    TextDocumentParser
        A ready-to-use parser instance.
    """
    with TextDocumentParser() as parser:
        yield parser
 # ------------------------------------------------------------------
 # Remote parser sample files
 # ------------------------------------------------------------------
@pytest.fixture(scope="session")
 def remote_samples_dir(samples_dir: Path) -> Path:
    """Absolute path to the remote parser sample files directory.
    Returns
    -------
    Path
        ``<samples_dir>/remote/``
    """
    return samples_dir / "remote"
@pytest.fixture(scope="session")
 def sample_pdf_file(remote_samples_dir: Path) -> Path:
    """Path to a simple digital PDF sample file.
    Returns
    -------
    Path
        Absolute path to ``remote/simple-digital.pdf``.
    """
    return remote_samples_dir / "simple-digital.pdf"
 # ------------------------------------------------------------------
 # Remote parser instance
 # ------------------------------------------------------------------
@pytest.fixture()
 def remote_parser() -> Generator[RemoteDocumentParser, None, None]:
    """Yield a RemoteDocumentParser and clean up its temporary directory afterwards.
    Yields
    ------
    RemoteDocumentParser
        A ready-to-use parser instance.
    """
    with RemoteDocumentParser() as parser:
        yield parser
 # ------------------------------------------------------------------
 # Remote parser settings helpers
 # ------------------------------------------------------------------
@pytest.fixture()
 def azure_settings(settings: SettingsWrapper) -> SettingsWrapper:
    """Configure Django settings for a valid Azure AI OCR engine.
    Sets ``REMOTE_OCR_ENGINE``, ``REMOTE_OCR_API_KEY``, and
    ``REMOTE_OCR_ENDPOINT`` to test values.  Settings are restored
    automatically after the test by pytest-django.
    Returns
    -------
    SettingsWrapper
        The modified settings object (for chaining further overrides).
    """
    settings.REMOTE_OCR_ENGINE = "azureai"
    settings.REMOTE_OCR_API_KEY = "test-api-key"
    settings.REMOTE_OCR_ENDPOINT = "https://test.cognitiveservices.azure.com"
    return settings
@pytest.fixture()
 def no_engine_settings(settings: SettingsWrapper) -> SettingsWrapper:
    """Configure Django settings with no remote engine configured.
    Returns
    -------
    SettingsWrapper
        The modified settings object.
    """
    settings.REMOTE_OCR_ENGINE = None
    settings.REMOTE_OCR_API_KEY = None
    settings.REMOTE_OCR_ENDPOINT = None
    return settings
 # ------------------------------------------------------------------
 # Tika parser sample files
 # ------------------------------------------------------------------
@pytest.fixture(scope="session")
 def tika_samples_dir(samples_dir: Path) -> Path:
    """Absolute path to the Tika parser sample files directory.
    Returns
    -------
    Path
        ``<samples_dir>/tika/``
    """
    return samples_dir / "tika"
@pytest.fixture(scope="session")
 def sample_odt_file(tika_samples_dir: Path) -> Path:
    """Path to a sample ODT file.
    Returns
    -------
    Path
        Absolute path to ``tika/sample.odt``.
    """
    return tika_samples_dir / "sample.odt"
@pytest.fixture(scope="session")
 def sample_docx_file(tika_samples_dir: Path) -> Path:
    """Path to a sample DOCX file.
    Returns
    -------
    Path
        Absolute path to ``tika/sample.docx``.
    """
    return tika_samples_dir / "sample.docx"
@pytest.fixture(scope="session")
 def sample_doc_file(tika_samples_dir: Path) -> Path:
    """Path to a sample DOC file.
    Returns
    -------
    Path
        Absolute path to ``tika/sample.doc``.
    """
    return tika_samples_dir / "sample.doc"
@pytest.fixture(scope="session")
 def sample_broken_odt(tika_samples_dir: Path) -> Path:
    """Path to a broken ODT file that triggers the multi-part fallback.
    Returns
    -------
    Path
        Absolute path to ``tika/multi-part-broken.odt``.
    """
    return tika_samples_dir / "multi-part-broken.odt"
 # ------------------------------------------------------------------
 # Tika parser instance
 # ------------------------------------------------------------------
@pytest.fixture()
 def tika_parser() -> Generator[TikaDocumentParser, None, None]:
    """Yield a TikaDocumentParser and clean up its temporary directory afterwards.
    Yields
    ------
    TikaDocumentParser
        A ready-to-use parser instance.
    """
    with TikaDocumentParser() as parser:
        yield parser
 # ------------------------------------------------------------------
 # Mail parser sample files
 # ------------------------------------------------------------------
@pytest.fixture(scope="session")
 def mail_samples_dir(samples_dir: Path) -> Path:
    """Absolute path to the mail parser sample files directory.
    Returns
    -------
    Path
        ``<samples_dir>/mail/``
    """
    return samples_dir / "mail"
@pytest.fixture(scope="session")
 def broken_email_file(mail_samples_dir: Path) -> Path:
    """Path to a broken/malformed EML sample file.
    Returns
    -------
    Path
        Absolute path to ``mail/broken.eml``.
    """
    return mail_samples_dir / "broken.eml"
@pytest.fixture(scope="session")
 def simple_txt_email_file(mail_samples_dir: Path) -> Path:
    """Path to a plain-text email sample file.
    Returns
    -------
    Path
        Absolute path to ``mail/simple_text.eml``.
    """
    return mail_samples_dir / "simple_text.eml"
@pytest.fixture(scope="session")
 def simple_txt_email_pdf_file(mail_samples_dir: Path) -> Path:
    """Path to the expected PDF rendition of the plain-text email.
    Returns
    -------
    Path
        Absolute path to ``mail/simple_text.eml.pdf``.
    """
    return mail_samples_dir / "simple_text.eml.pdf"
@pytest.fixture(scope="session")
 def simple_txt_email_thumbnail_file(mail_samples_dir: Path) -> Path:
    """Path to the expected thumbnail for the plain-text email.
    Returns
    -------
    Path
        Absolute path to ``mail/simple_text.eml.pdf.webp``.
    """
    return mail_samples_dir / "simple_text.eml.pdf.webp"
@pytest.fixture(scope="session")
 def html_email_file(mail_samples_dir: Path) -> Path:
    """Path to an HTML email sample file.
    Returns
    -------
    Path
        Absolute path to ``mail/html.eml``.
    """
    return mail_samples_dir / "html.eml"
@pytest.fixture(scope="session")
 def html_email_pdf_file(mail_samples_dir: Path) -> Path:
    """Path to the expected PDF rendition of the HTML email.
    Returns
    -------
    Path
        Absolute path to ``mail/html.eml.pdf``.
    """
    return mail_samples_dir / "html.eml.pdf"
@pytest.fixture(scope="session")
 def html_email_thumbnail_file(mail_samples_dir: Path) -> Path:
    """Path to the expected thumbnail for the HTML email.
    Returns
    -------
    Path
        Absolute path to ``mail/html.eml.pdf.webp``.
    """
    return mail_samples_dir / "html.eml.pdf.webp"
@pytest.fixture(scope="session")
 def html_email_html_file(mail_samples_dir: Path) -> Path:
    """Path to the HTML body of the HTML email sample.
    Returns
    -------
    Path
        Absolute path to ``mail/html.eml.html``.
    """
    return mail_samples_dir / "html.eml.html"
@pytest.fixture(scope="session")
 def merged_pdf_first(mail_samples_dir: Path) -> Path:
    """Path to the first PDF used in PDF-merge tests.
    Returns
    -------
    Path
        Absolute path to ``mail/first.pdf``.
    """
    return mail_samples_dir / "first.pdf"
@pytest.fixture(scope="session")
 def merged_pdf_second(mail_samples_dir: Path) -> Path:
    """Path to the second PDF used in PDF-merge tests.
    Returns
    -------
    Path
        Absolute path to ``mail/second.pdf``.
    """
    return mail_samples_dir / "second.pdf"
 # ------------------------------------------------------------------
 # Mail parser instance
 # ------------------------------------------------------------------
@pytest.fixture()
 def mail_parser() -> Generator[MailDocumentParser, None, None]:
    """Yield a MailDocumentParser and clean up its temporary directory afterwards.
    Yields
    ------
    MailDocumentParser
        A ready-to-use parser instance.
    """
    with MailDocumentParser() as parser:
        yield parser
@pytest.fixture(scope="session")
 def nginx_base_url() -> Generator[str, None, None]:
    """
    The base URL for the nginx HTTP server we expect to be alive
    """
    yield "http://localhost:8080"
 # ------------------------------------------------------------------
 # Tesseract parser sample files
 # ------------------------------------------------------------------
@pytest.fixture(scope="session")
 def tesseract_samples_dir(samples_dir: Path) -> Path:
    """Absolute path to the tesseract parser sample files directory.
    Returns
    -------
    Path
        ``<samples_dir>/tesseract/``
    """
    return samples_dir / "tesseract"
@pytest.fixture(scope="session")
 def document_webp_file(tesseract_samples_dir: Path) -> Path:
    """Path to a WebP document sample file.
    Returns
    -------
    Path
        Absolute path to ``tesseract/document.webp``.
    """
    return tesseract_samples_dir / "document.webp"
@pytest.fixture(scope="session")
 def encrypted_pdf_file(tesseract_samples_dir: Path) -> Path:
    """Path to an encrypted PDF sample file.
    Returns
    -------
    Path
        Absolute path to ``tesseract/encrypted.pdf``.
    """
    return tesseract_samples_dir / "encrypted.pdf"
@pytest.fixture(scope="session")
 def multi_page_digital_pdf_file(tesseract_samples_dir: Path) -> Path:
    """Path to a multi-page digital PDF sample file.
    Returns
    -------
    Path
        Absolute path to ``tesseract/multi-page-digital.pdf``.
    """
    return tesseract_samples_dir / "multi-page-digital.pdf"
@pytest.fixture(scope="session")
 def multi_page_images_alpha_rgb_tiff_file(tesseract_samples_dir: Path) -> Path:
    """Path to a multi-page TIFF with alpha channel in RGB.
    Returns
    -------
    Path
        Absolute path to ``tesseract/multi-page-images-alpha-rgb.tiff``.
    """
    return tesseract_samples_dir / "multi-page-images-alpha-rgb.tiff"
@pytest.fixture(scope="session")
 def multi_page_images_alpha_tiff_file(tesseract_samples_dir: Path) -> Path:
    """Path to a multi-page TIFF with alpha channel.
    Returns
    -------
    Path
        Absolute path to ``tesseract/multi-page-images-alpha.tiff``.
    """
    return tesseract_samples_dir / "multi-page-images-alpha.tiff"
@pytest.fixture(scope="session")
 def multi_page_images_pdf_file(tesseract_samples_dir: Path) -> Path:
    """Path to a multi-page PDF with images.
    Returns
    -------
    Path
        Absolute path to ``tesseract/multi-page-images.pdf``.
    """
    return tesseract_samples_dir / "multi-page-images.pdf"
@pytest.fixture(scope="session")
 def multi_page_images_tiff_file(tesseract_samples_dir: Path) -> Path:
    """Path to a multi-page TIFF sample file.
    Returns
    -------
    Path
        Absolute path to ``tesseract/multi-page-images.tiff``.
    """
    return tesseract_samples_dir / "multi-page-images.tiff"
@pytest.fixture(scope="session")
 def multi_page_mixed_pdf_file(tesseract_samples_dir: Path) -> Path:
    """Path to a multi-page mixed PDF sample file.
    Returns
    -------
    Path
        Absolute path to ``tesseract/multi-page-mixed.pdf``.
    """
    return tesseract_samples_dir / "multi-page-mixed.pdf"
@pytest.fixture(scope="session")
 def no_text_alpha_png_file(tesseract_samples_dir: Path) -> Path:
    """Path to a PNG with alpha channel and no text.
    Returns
    -------
    Path
        Absolute path to ``tesseract/no-text-alpha.png``.
    """
    return tesseract_samples_dir / "no-text-alpha.png"
@pytest.fixture(scope="session")
 def rotated_pdf_file(tesseract_samples_dir: Path) -> Path:
    """Path to a rotated PDF sample file.
    Returns
    -------
    Path
        Absolute path to ``tesseract/rotated.pdf``.
    """
    return tesseract_samples_dir / "rotated.pdf"
@pytest.fixture(scope="session")
 def rtl_test_pdf_file(tesseract_samples_dir: Path) -> Path:
    """Path to an RTL test PDF sample file.
    Returns
    -------
    Path
        Absolute path to ``tesseract/rtl-test.pdf``.
    """
    return tesseract_samples_dir / "rtl-test.pdf"
@pytest.fixture(scope="session")
 def signed_pdf_file(tesseract_samples_dir: Path) -> Path:
    """Path to a signed PDF sample file.
    Returns
    -------
    Path
        Absolute path to ``tesseract/signed.pdf``.
    """
    return tesseract_samples_dir / "signed.pdf"
@pytest.fixture(scope="session")
 def simple_alpha_png_file(tesseract_samples_dir: Path) -> Path:
    """Path to a simple PNG with alpha channel.
    Returns
    -------
    Path
        Absolute path to ``tesseract/simple-alpha.png``.
    """
    return tesseract_samples_dir / "simple-alpha.png"
@pytest.fixture(scope="session")
 def simple_digital_pdf_file(tesseract_samples_dir: Path) -> Path:
    """Path to a simple digital PDF sample file.
    Returns
    -------
    Path
        Absolute path to ``tesseract/simple-digital.pdf``.
    """
    return tesseract_samples_dir / "simple-digital.pdf"
@pytest.fixture(scope="session")
 def simple_no_dpi_png_file(tesseract_samples_dir: Path) -> Path:
    """Path to a simple PNG without DPI information.
    Returns
    -------
    Path
        Absolute path to ``tesseract/simple-no-dpi.png``.
    """
    return tesseract_samples_dir / "simple-no-dpi.png"
@pytest.fixture(scope="session")
 def simple_bmp_file(tesseract_samples_dir: Path) -> Path:
    """Path to a simple BMP sample file.
    Returns
    -------
    Path
        Absolute path to ``tesseract/simple.bmp``.
    """
    return tesseract_samples_dir / "simple.bmp"
@pytest.fixture(scope="session")
 def simple_gif_file(tesseract_samples_dir: Path) -> Path:
    """Path to a simple GIF sample file.
    Returns
    -------
    Path
        Absolute path to ``tesseract/simple.gif``.
    """
    return tesseract_samples_dir / "simple.gif"
@pytest.fixture(scope="session")
 def simple_heic_file(tesseract_samples_dir: Path) -> Path:
    """Path to a simple HEIC sample file.
    Returns
    -------
    Path
        Absolute path to ``tesseract/simple.heic``.
    """
    return tesseract_samples_dir / "simple.heic"
@pytest.fixture(scope="session")
 def simple_jpg_file(tesseract_samples_dir: Path) -> Path:
    """Path to a simple JPG sample file.
    Returns
    -------
    Path
        Absolute path to ``tesseract/simple.jpg``.
    """
    return tesseract_samples_dir / "simple.jpg"
@pytest.fixture(scope="session")
 def simple_png_file(tesseract_samples_dir: Path) -> Path:
    """Path to a simple PNG sample file.
    Returns
    -------
    Path
        Absolute path to ``tesseract/simple.png``.
    """
    return tesseract_samples_dir / "simple.png"
@pytest.fixture(scope="session")
 def simple_tif_file(tesseract_samples_dir: Path) -> Path:
    """Path to a simple TIF sample file.
    Returns
    -------
    Path
        Absolute path to ``tesseract/simple.tif``.
    """
    return tesseract_samples_dir / "simple.tif"
@pytest.fixture(scope="session")
 def single_page_mixed_pdf_file(tesseract_samples_dir: Path) -> Path:
    """Path to a single-page mixed PDF sample file.
    Returns
    -------
    Path
        Absolute path to ``tesseract/single-page-mixed.pdf``.
    """
    return tesseract_samples_dir / "single-page-mixed.pdf"
@pytest.fixture(scope="session")
 def with_form_pdf_file(tesseract_samples_dir: Path) -> Path:
    """Path to a PDF with form sample file.
    Returns
    -------
    Path
        Absolute path to ``tesseract/with-form.pdf``.
    """
    return tesseract_samples_dir / "with-form.pdf"
 # ------------------------------------------------------------------
 # Tesseract parser instance and settings helpers
 # ------------------------------------------------------------------
@pytest.fixture()
 def null_app_config(mocker: MockerFixture) -> MagicMock:
    """Return a MagicMock with all OcrConfig fields set to None.
    This allows the parser to fall back to Django settings instead of
    hitting the database.
    Returns
    -------
    MagicMock
        Mock config with all fields as None
    """
    return mocker.MagicMock(
        output_type=None,
        pages=None,
        language=None,
        mode=None,
        skip_archive_file=None,
        image_dpi=None,
        unpaper_clean=None,
        deskew=None,
        rotate_pages=None,
        rotate_pages_threshold=None,
        max_image_pixels=None,
        color_conversion_strategy=None,
        user_args=None,
    )
@pytest.fixture()
 def tesseract_parser(
    mocker: MockerFixture,
    null_app_config: MagicMock,
 ) -> Generator[RasterisedDocumentParser, None, None]:
    """Yield a RasterisedDocumentParser and clean up its temporary directory afterwards.
    Patches the config system to avoid database access.
    Yields
    ------
    RasterisedDocumentParser
        A ready-to-use parser instance.
    """
    mocker.patch(
        "paperless.config.BaseConfig._get_config_instance",
        return_value=null_app_config,
    )
    with RasterisedDocumentParser() as parser:
        yield parser
@pytest.fixture()
 def make_tesseract_parser(
    mocker: MockerFixture,
    null_app_config: MagicMock,
 ) -> MakeTesseractParser:
    """Return a factory for creating RasterisedDocumentParser with Django settings overrides.
    This fixture is useful for tests that need to create parsers with different
    settings configurations.
    Returns
    -------
    Callable[..., contextmanager[RasterisedDocumentParser]]
        A context manager factory that accepts Django settings overrides
    """
    mocker.patch(
        "paperless.config.BaseConfig._get_config_instance",
        return_value=null_app_config,
    )
    @contextmanager
    def _make_parser(**django_settings_overrides):
        with override_settings(**django_settings_overrides):
            with RasterisedDocumentParser() as parser:
                yield parser
    return _make_parser
--- a/src/paperless/tests/parsers/test_mail_parser.py
+++ b/src/paperless/tests/parsers/test_mail_parser.py
@@ -12,7 +12,64 @@ from pytest_httpx import HTTPXMock
 from pytest_mock import MockerFixture
 from documents.parsers import ParseError
-from paperless_mail.parsers import MailDocumentParser
+from paperless.parsers import ParserContext
 from paperless.parsers import ParserProtocol
 from paperless.parsers.mail import MailDocumentParser
 class TestMailParserProtocol:
    """Verify that MailDocumentParser satisfies the ParserProtocol contract."""
    def test_isinstance_satisfies_protocol(
        self,
        mail_parser: MailDocumentParser,
    ) -> None:
        assert isinstance(mail_parser, ParserProtocol)
    def test_supported_mime_types(self) -> None:
        mime_types = MailDocumentParser.supported_mime_types()
        assert isinstance(mime_types, dict)
        assert "message/rfc822" in mime_types
    @pytest.mark.parametrize(
        ("mime_type", "expected"),
        [
            ("message/rfc822", 10),
            ("application/pdf", None),
            ("text/plain", None),
        ],
    )
    def test_score(self, mime_type: str, expected: int | None) -> None:
        assert MailDocumentParser.score(mime_type, "email.eml") == expected
    def test_can_produce_archive_is_false(
        self,
        mail_parser: MailDocumentParser,
    ) -> None:
        assert mail_parser.can_produce_archive is False
    def test_requires_pdf_rendition_is_true(
        self,
        mail_parser: MailDocumentParser,
    ) -> None:
        assert mail_parser.requires_pdf_rendition is True
    def test_get_page_count_returns_none_without_archive(
        self,
        mail_parser: MailDocumentParser,
        html_email_file: Path,
    ) -> None:
        assert mail_parser.get_page_count(html_email_file, "message/rfc822") is None
    def test_get_page_count_returns_int_with_pdf_archive(
        self,
        mail_parser: MailDocumentParser,
        simple_txt_email_pdf_file: Path,
    ) -> None:
        mail_parser._archive_path = simple_txt_email_pdf_file
        count = mail_parser.get_page_count(simple_txt_email_pdf_file, "message/rfc822")
        assert isinstance(count, int)
        assert count > 0
 class TestEmailFileParsing:
@@ -24,7 +81,7 @@ class TestEmailFileParsing:
    def test_parse_error_missing_file(
        self,
        mail_parser: MailDocumentParser,
-        sample_dir: Path,
+        mail_samples_dir: Path,
    ) -> None:
        """
        GIVEN:
@@ -35,7 +92,7 @@ class TestEmailFileParsing:
            - An Exception is thrown
        """
        # Check if exception is raised when parsing fails.
-        test_file = sample_dir / "doesntexist.eml"
+        test_file = mail_samples_dir / "doesntexist.eml"
        assert not test_file.exists()
@@ -246,12 +303,12 @@ class TestEmailThumbnailGenerate:
        """
        mocked_return = "Passing the return value through.."
        mock_make_thumbnail_from_pdf = mocker.patch(
-            "paperless_mail.parsers.make_thumbnail_from_pdf",
+            "paperless.parsers.mail.make_thumbnail_from_pdf",
        )
        mock_make_thumbnail_from_pdf.return_value = mocked_return
        mock_generate_pdf = mocker.patch(
-            "paperless_mail.parsers.MailDocumentParser.generate_pdf",
+            "paperless.parsers.mail.MailDocumentParser.generate_pdf",
        )
        mock_generate_pdf.return_value = "Mocked return value.."
@@ -260,8 +317,7 @@ class TestEmailThumbnailGenerate:
        mock_generate_pdf.assert_called_once()
        mock_make_thumbnail_from_pdf.assert_called_once_with(
            "Mocked return value..",
-            mail_parser.tempdir,
+            mail_parser._tempdir,
            None,
        )
        assert mocked_return == thumb
@@ -373,7 +429,7 @@ class TestParser:
        """
        # Validate parsing returns the expected results
        mock_generate_pdf = mocker.patch(
-            "paperless_mail.parsers.MailDocumentParser.generate_pdf",
+            "paperless.parsers.mail.MailDocumentParser.generate_pdf",
        )
        mail_parser.parse(simple_txt_email_file, "message/rfc822")
@@ -385,7 +441,7 @@ class TestParser:
            "BCC: fdf@fvf.de\n\n"
            "\n\nThis is just a simple Text Mail."
        )
-        assert text_expected == mail_parser.text
+        assert text_expected == mail_parser.get_text()
        assert (
            datetime.datetime(
                2022,
@@ -396,7 +452,7 @@ class TestParser:
                43,
                tzinfo=datetime.timezone(datetime.timedelta(seconds=7200)),
            )
-            == mail_parser.date
+            == mail_parser.get_date()
        )
        # Just check if tried to generate archive, the unittest for generate_pdf() goes deeper.
@@ -419,7 +475,7 @@ class TestParser:
        """
        mock_generate_pdf = mocker.patch(
-            "paperless_mail.parsers.MailDocumentParser.generate_pdf",
+            "paperless.parsers.mail.MailDocumentParser.generate_pdf",
        )
        # Validate parsing returns the expected results
@@ -443,7 +499,7 @@ class TestParser:
        mail_parser.parse(html_email_file, "message/rfc822")
        mock_generate_pdf.assert_called_once()
-        assert text_expected == mail_parser.text
+        assert text_expected == mail_parser.get_text()
        assert (
            datetime.datetime(
                2022,
@@ -454,7 +510,7 @@ class TestParser:
                19,
                tzinfo=datetime.timezone(datetime.timedelta(seconds=7200)),
            )
-            == mail_parser.date
+            == mail_parser.get_date()
        )
    def test_generate_pdf_parse_error(
@@ -501,7 +557,7 @@ class TestParser:
        mail_parser.parse(simple_txt_email_file, "message/rfc822")
-        assert mail_parser.archive_path is not None
+        assert mail_parser.get_archive_path() is not None
    @pytest.mark.httpx_mock(can_send_already_matched_responses=True)
    def test_generate_pdf_html_email(
@@ -542,7 +598,7 @@ class TestParser:
        )
        mail_parser.parse(html_email_file, "message/rfc822")
-        assert mail_parser.archive_path is not None
+        assert mail_parser.get_archive_path() is not None
    def test_generate_pdf_html_email_html_to_pdf_failure(
        self,
@@ -712,10 +768,10 @@ class TestParser:
        def test_layout_option(layout_option, expected_calls, expected_pdf_names):
            mock_mailrule_get.return_value = mock.Mock(pdf_layout=layout_option)
            mail_parser.configure(ParserContext(mailrule_id=1))
            mail_parser.parse(
                document_path=html_email_file,
                mime_type="message/rfc822",
                mailrule_id=1,
            )
            args, _ = mock_merge_route.call_args
            assert len(args[0]) == expected_calls
--- a/src/paperless/tests/parsers/test_mail_parser_live.py
+++ b/src/paperless/tests/parsers/test_mail_parser_live.py
@@ -11,7 +11,7 @@ from PIL import Image
 from pytest_mock import MockerFixture
 from documents.tests.utils import util_call_with_backoff
-from paperless_mail.parsers import MailDocumentParser
+from paperless.parsers.mail import MailDocumentParser
 def extract_text(pdf_path: Path) -> str:
@@ -159,7 +159,7 @@ class TestParserLive:
            - The returned thumbnail image file shall match the expected hash
        """
        mock_generate_pdf = mocker.patch(
-            "paperless_mail.parsers.MailDocumentParser.generate_pdf",
+            "paperless.parsers.mail.MailDocumentParser.generate_pdf",
        )
        mock_generate_pdf.return_value = simple_txt_email_pdf_file
@@ -216,10 +216,10 @@ class TestParserLive:
            - The merged PDF shall contain text from both source PDFs
        """
        mock_generate_pdf_from_html = mocker.patch(
-            "paperless_mail.parsers.MailDocumentParser.generate_pdf_from_html",
+            "paperless.parsers.mail.MailDocumentParser.generate_pdf_from_html",
        )
        mock_generate_pdf_from_mail = mocker.patch(
-            "paperless_mail.parsers.MailDocumentParser.generate_pdf_from_mail",
+            "paperless.parsers.mail.MailDocumentParser.generate_pdf_from_mail",
        )
        mock_generate_pdf_from_mail.return_value = merged_pdf_first
        mock_generate_pdf_from_html.return_value = merged_pdf_second
--- a/src/paperless/tests/parsers/test_remote_parser.py
+++ b/src/paperless/tests/parsers/test_remote_parser.py
@@ -0,0 +1,497 @@
 """
 Tests for paperless.parsers.remote.RemoteDocumentParser.
 All tests use the context-manager protocol for parser lifecycle.
 Fixture layout
 --------------
 make_azure_mock  — factory (defined here; specific to this module)
 azure_client     — composes azure_settings + make_azure_mock + patch;
                   use when a test needs the client to succeed
 failing_azure_client
                 — composes azure_settings + patch with RuntimeError;
                   use when a test needs the client to fail
 """
 from __future__ import annotations
 from typing import TYPE_CHECKING
 from unittest.mock import Mock
 import pytest
 from paperless.parsers import ParserContext
 from paperless.parsers import ParserProtocol
 from paperless.parsers.remote import RemoteDocumentParser
 if TYPE_CHECKING:
    from collections.abc import Callable
    from pathlib import Path
    from pytest_django.fixtures import SettingsWrapper
    from pytest_mock import MockerFixture
 # ---------------------------------------------------------------------------
 # Module-local fixtures
 # ---------------------------------------------------------------------------
 _AZURE_CLIENT_TARGET = "azure.ai.documentintelligence.DocumentIntelligenceClient"
 _DEFAULT_TEXT = "Extracted text."
@pytest.fixture()
 def make_azure_mock() -> Callable[[str], Mock]:
    """Return a factory that builds a mock Azure DocumentIntelligenceClient.
    Usage::
        mock_client = make_azure_mock()            # default extracted text
        mock_client = make_azure_mock("My text.")  # custom extracted text
    """
    def _factory(text: str = _DEFAULT_TEXT) -> Mock:
        mock_client = Mock()
        mock_poller = Mock()
        mock_poller.wait.return_value = None
        mock_poller.details = {"operation_id": "fake-op-id"}
        mock_poller.result.return_value.content = text
        mock_client.begin_analyze_document.return_value = mock_poller
        mock_client.get_analyze_result_pdf.return_value = [b"%PDF-1.4 FAKE"]
        return mock_client
    return _factory
@pytest.fixture()
 def azure_client(
    azure_settings: SettingsWrapper,
    make_azure_mock: Callable[[str], Mock],
    mocker: MockerFixture,
 ) -> Mock:
    """Patch the Azure DI client with a succeeding mock and return the instance.
    Implicitly applies ``azure_settings`` so tests using this fixture do not
    also need ``@pytest.mark.usefixtures("azure_settings")``.
    """
    mock_client = make_azure_mock()
    mocker.patch(_AZURE_CLIENT_TARGET, return_value=mock_client)
    return mock_client
@pytest.fixture()
 def failing_azure_client(
    azure_settings: SettingsWrapper,
    mocker: MockerFixture,
 ) -> Mock:
    """Patch the Azure DI client to raise RuntimeError on every call.
    Implicitly applies ``azure_settings``.  Returns the mock instance so
    tests can assert on calls such as ``close()``.
    """
    mock_client = Mock()
    mock_client.begin_analyze_document.side_effect = RuntimeError("network failure")
    mocker.patch(_AZURE_CLIENT_TARGET, return_value=mock_client)
    return mock_client
 # ---------------------------------------------------------------------------
 # Protocol contract
 # ---------------------------------------------------------------------------
 class TestRemoteParserProtocol:
    """Verify that RemoteDocumentParser satisfies the ParserProtocol contract."""
    def test_isinstance_satisfies_protocol(
        self,
        remote_parser: RemoteDocumentParser,
    ) -> None:
        assert isinstance(remote_parser, ParserProtocol)
    def test_class_attributes_present(self) -> None:
        assert isinstance(RemoteDocumentParser.name, str) and RemoteDocumentParser.name
        assert (
            isinstance(RemoteDocumentParser.version, str)
            and RemoteDocumentParser.version
        )
        assert (
            isinstance(RemoteDocumentParser.author, str) and RemoteDocumentParser.author
        )
        assert isinstance(RemoteDocumentParser.url, str) and RemoteDocumentParser.url
 # ---------------------------------------------------------------------------
 # supported_mime_types
 # ---------------------------------------------------------------------------
 class TestRemoteParserSupportedMimeTypes:
    """supported_mime_types() always returns the full set regardless of config."""
    def test_returns_dict(self) -> None:
        mime_types = RemoteDocumentParser.supported_mime_types()
        assert isinstance(mime_types, dict)
    def test_includes_all_expected_types(self) -> None:
        mime_types = RemoteDocumentParser.supported_mime_types()
        expected = {
            "application/pdf",
            "image/png",
            "image/jpeg",
            "image/tiff",
            "image/bmp",
            "image/gif",
            "image/webp",
        }
        assert expected == set(mime_types.keys())
    @pytest.mark.usefixtures("no_engine_settings")
    def test_returns_full_set_when_not_configured(self) -> None:
        """
        GIVEN: No remote engine is configured
        WHEN:  supported_mime_types() is called
        THEN:  The full MIME type dict is still returned (score() handles activation)
        """
        mime_types = RemoteDocumentParser.supported_mime_types()
        assert len(mime_types) == 7
 # ---------------------------------------------------------------------------
 # score()
 # ---------------------------------------------------------------------------
 class TestRemoteParserScore:
    """score() encodes the activation logic: None when unconfigured, 20 when active."""
    @pytest.mark.usefixtures("azure_settings")
    @pytest.mark.parametrize(
        "mime_type",
        [
            pytest.param("application/pdf", id="pdf"),
            pytest.param("image/png", id="png"),
            pytest.param("image/jpeg", id="jpeg"),
            pytest.param("image/tiff", id="tiff"),
            pytest.param("image/bmp", id="bmp"),
            pytest.param("image/gif", id="gif"),
            pytest.param("image/webp", id="webp"),
        ],
    )
    def test_score_returns_20_when_configured(self, mime_type: str) -> None:
        result = RemoteDocumentParser.score(mime_type, "doc.pdf")
        assert result == 20
    @pytest.mark.usefixtures("no_engine_settings")
    @pytest.mark.parametrize(
        "mime_type",
        [
            pytest.param("application/pdf", id="pdf"),
            pytest.param("image/png", id="png"),
            pytest.param("image/jpeg", id="jpeg"),
        ],
    )
    def test_score_returns_none_when_no_engine(self, mime_type: str) -> None:
        result = RemoteDocumentParser.score(mime_type, "doc.pdf")
        assert result is None
    def test_score_returns_none_when_api_key_missing(
        self,
        settings: SettingsWrapper,
    ) -> None:
        settings.REMOTE_OCR_ENGINE = "azureai"
        settings.REMOTE_OCR_API_KEY = None
        settings.REMOTE_OCR_ENDPOINT = "https://test.cognitiveservices.azure.com"
        result = RemoteDocumentParser.score("application/pdf", "doc.pdf")
        assert result is None
    def test_score_returns_none_when_endpoint_missing(
        self,
        settings: SettingsWrapper,
    ) -> None:
        settings.REMOTE_OCR_ENGINE = "azureai"
        settings.REMOTE_OCR_API_KEY = "key"
        settings.REMOTE_OCR_ENDPOINT = None
        result = RemoteDocumentParser.score("application/pdf", "doc.pdf")
        assert result is None
    @pytest.mark.usefixtures("azure_settings")
    def test_score_returns_none_for_unsupported_mime_type(self) -> None:
        result = RemoteDocumentParser.score("text/plain", "doc.txt")
        assert result is None
    @pytest.mark.usefixtures("azure_settings")
    def test_score_higher_than_tesseract_default(self) -> None:
        """Remote parser (20) outranks the tesseract default (10) when configured."""
        score = RemoteDocumentParser.score("application/pdf", "doc.pdf")
        assert score is not None and score > 10
 # ---------------------------------------------------------------------------
 # Properties
 # ---------------------------------------------------------------------------
 class TestRemoteParserProperties:
    def test_can_produce_archive_is_true(
        self,
        remote_parser: RemoteDocumentParser,
    ) -> None:
        assert remote_parser.can_produce_archive is True
    def test_requires_pdf_rendition_is_false(
        self,
        remote_parser: RemoteDocumentParser,
    ) -> None:
        assert remote_parser.requires_pdf_rendition is False
 # ---------------------------------------------------------------------------
 # Lifecycle
 # ---------------------------------------------------------------------------
 class TestRemoteParserLifecycle:
    def test_context_manager_cleans_up_tempdir(self) -> None:
        with RemoteDocumentParser() as parser:
            tempdir = parser._tempdir
            assert tempdir.exists()
        assert not tempdir.exists()
    def test_context_manager_cleans_up_after_exception(self) -> None:
        tempdir: Path | None = None
        with pytest.raises(RuntimeError):
            with RemoteDocumentParser() as parser:
                tempdir = parser._tempdir
                raise RuntimeError("boom")
        assert tempdir is not None
        assert not tempdir.exists()
 # ---------------------------------------------------------------------------
 # parse() — happy path
 # ---------------------------------------------------------------------------
 class TestRemoteParserParse:
    def test_parse_returns_text_from_azure(
        self,
        remote_parser: RemoteDocumentParser,
        sample_pdf_file: Path,
        azure_client: Mock,
    ) -> None:
        remote_parser.parse(sample_pdf_file, "application/pdf")
        assert remote_parser.get_text() == _DEFAULT_TEXT
    def test_parse_sets_archive_path(
        self,
        remote_parser: RemoteDocumentParser,
        sample_pdf_file: Path,
        azure_client: Mock,
    ) -> None:
        remote_parser.parse(sample_pdf_file, "application/pdf")
        archive = remote_parser.get_archive_path()
        assert archive is not None
        assert archive.exists()
        assert archive.suffix == ".pdf"
    def test_parse_closes_client_on_success(
        self,
        remote_parser: RemoteDocumentParser,
        sample_pdf_file: Path,
        azure_client: Mock,
    ) -> None:
        remote_parser.configure(ParserContext())
        remote_parser.parse(sample_pdf_file, "application/pdf")
        azure_client.close.assert_called_once()
    @pytest.mark.usefixtures("no_engine_settings")
    def test_parse_sets_empty_text_when_not_configured(
        self,
        remote_parser: RemoteDocumentParser,
        sample_pdf_file: Path,
    ) -> None:
        remote_parser.parse(sample_pdf_file, "application/pdf")
        assert remote_parser.get_text() == ""
        assert remote_parser.get_archive_path() is None
    def test_get_text_none_before_parse(
        self,
        remote_parser: RemoteDocumentParser,
    ) -> None:
        assert remote_parser.get_text() is None
    def test_get_date_always_none(
        self,
        remote_parser: RemoteDocumentParser,
        sample_pdf_file: Path,
        azure_client: Mock,
    ) -> None:
        remote_parser.parse(sample_pdf_file, "application/pdf")
        assert remote_parser.get_date() is None
 # ---------------------------------------------------------------------------
 # parse() — Azure failure path
 # ---------------------------------------------------------------------------
 class TestRemoteParserParseError:
    def test_parse_returns_none_on_azure_error(
        self,
        remote_parser: RemoteDocumentParser,
        sample_pdf_file: Path,
        failing_azure_client: Mock,
    ) -> None:
        remote_parser.parse(sample_pdf_file, "application/pdf")
        assert remote_parser.get_text() is None
    def test_parse_closes_client_on_error(
        self,
        remote_parser: RemoteDocumentParser,
        sample_pdf_file: Path,
        failing_azure_client: Mock,
    ) -> None:
        remote_parser.parse(sample_pdf_file, "application/pdf")
        failing_azure_client.close.assert_called_once()
    def test_parse_logs_error_on_azure_failure(
        self,
        remote_parser: RemoteDocumentParser,
        sample_pdf_file: Path,
        failing_azure_client: Mock,
        mocker: MockerFixture,
    ) -> None:
        mock_log = mocker.patch("paperless.parsers.remote.logger")
        remote_parser.parse(sample_pdf_file, "application/pdf")
        mock_log.error.assert_called_once()
        assert "Azure AI Vision parsing failed" in mock_log.error.call_args[0][0]
 # ---------------------------------------------------------------------------
 # get_page_count()
 # ---------------------------------------------------------------------------
 class TestRemoteParserPageCount:
    def test_page_count_for_pdf(
        self,
        remote_parser: RemoteDocumentParser,
        sample_pdf_file: Path,
    ) -> None:
        count = remote_parser.get_page_count(sample_pdf_file, "application/pdf")
        assert isinstance(count, int)
        assert count >= 1
    def test_page_count_returns_none_for_image_mime(
        self,
        remote_parser: RemoteDocumentParser,
        sample_pdf_file: Path,
    ) -> None:
        count = remote_parser.get_page_count(sample_pdf_file, "image/png")
        assert count is None
    def test_page_count_returns_none_for_invalid_pdf(
        self,
        remote_parser: RemoteDocumentParser,
        tmp_path: Path,
    ) -> None:
        bad_pdf = tmp_path / "bad.pdf"
        bad_pdf.write_bytes(b"not a pdf at all")
        count = remote_parser.get_page_count(bad_pdf, "application/pdf")
        assert count is None
 # ---------------------------------------------------------------------------
 # extract_metadata()
 # ---------------------------------------------------------------------------
 class TestRemoteParserMetadata:
    def test_extract_metadata_non_pdf_returns_empty(
        self,
        remote_parser: RemoteDocumentParser,
        sample_pdf_file: Path,
    ) -> None:
        result = remote_parser.extract_metadata(sample_pdf_file, "image/png")
        assert result == []
    def test_extract_metadata_pdf_returns_list(
        self,
        remote_parser: RemoteDocumentParser,
        sample_pdf_file: Path,
    ) -> None:
        result = remote_parser.extract_metadata(sample_pdf_file, "application/pdf")
        assert isinstance(result, list)
    def test_extract_metadata_pdf_entries_have_required_keys(
        self,
        remote_parser: RemoteDocumentParser,
        sample_pdf_file: Path,
    ) -> None:
        result = remote_parser.extract_metadata(sample_pdf_file, "application/pdf")
        for entry in result:
            assert "namespace" in entry
            assert "prefix" in entry
            assert "key" in entry
            assert "value" in entry
            assert isinstance(entry["value"], str)
    def test_extract_metadata_does_not_raise_on_invalid_pdf(
        self,
        remote_parser: RemoteDocumentParser,
        tmp_path: Path,
    ) -> None:
        bad_pdf = tmp_path / "bad.pdf"
        bad_pdf.write_bytes(b"not a pdf at all")
        result = remote_parser.extract_metadata(bad_pdf, "application/pdf")
        assert result == []
 # ---------------------------------------------------------------------------
 # Registry integration
 # ---------------------------------------------------------------------------
 class TestRemoteParserRegistry:
    def test_registered_in_defaults(self) -> None:
        from paperless.parsers.registry import ParserRegistry
        registry = ParserRegistry()
        registry.register_defaults()
        assert RemoteDocumentParser in registry._builtins
    @pytest.mark.usefixtures("azure_settings")
    def test_get_parser_returns_remote_when_configured(self) -> None:
        from paperless.parsers.registry import get_parser_registry
        registry = get_parser_registry()
        parser_cls = registry.get_parser_for_file("application/pdf", "doc.pdf")
        assert parser_cls is RemoteDocumentParser
    @pytest.mark.usefixtures("no_engine_settings")
    def test_get_parser_returns_none_for_unsupported_type_when_not_configured(
        self,
    ) -> None:
        """With remote off and a truly unsupported MIME type, registry returns None."""
        from paperless.parsers.registry import ParserRegistry
        registry = ParserRegistry()
        registry.register_defaults()
        parser_cls = registry.get_parser_for_file(
            "application/x-unknown-format",
            "doc.xyz",
        )
        assert parser_cls is None
--- a/src/paperless/tests/parsers/test_tesseract_custom_settings.py
+++ b/src/paperless/tests/parsers/test_tesseract_custom_settings.py
@@ -10,7 +10,7 @@ from paperless.models import CleanChoices
 from paperless.models import ColorConvertChoices
 from paperless.models import ModeChoices
 from paperless.models import OutputTypeChoices
-from paperless_tesseract.parsers import RasterisedDocumentParser
+from paperless.parsers.tesseract import RasterisedDocumentParser
 class TestParserSettingsFromDb(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
--- a/src/paperless/tests/parsers/test_tesseract_parser.py
+++ b/src/paperless/tests/parsers/test_tesseract_parser.py
--- a/src/paperless/tests/parsers/test_text_parser.py
+++ b/src/paperless/tests/parsers/test_text_parser.py
@@ -0,0 +1,264 @@
 """
 Tests for paperless.parsers.text.TextDocumentParser.
 All tests use the context-manager protocol for parser lifecycle.  Sample
 files are provided by session-scoped fixtures defined in conftest.py.
 """
 from __future__ import annotations
 import tempfile
 from pathlib import Path
 import pytest
 from paperless.parsers import ParserContext
 from paperless.parsers import ParserProtocol
 from paperless.parsers.text import TextDocumentParser
 class TestTextParserProtocol:
    """Verify that TextDocumentParser satisfies the ParserProtocol contract."""
    def test_isinstance_satisfies_protocol(
        self,
        text_parser: TextDocumentParser,
    ) -> None:
        assert isinstance(text_parser, ParserProtocol)
    def test_class_attributes_present(self) -> None:
        assert isinstance(TextDocumentParser.name, str) and TextDocumentParser.name
        assert (
            isinstance(TextDocumentParser.version, str) and TextDocumentParser.version
        )
        assert isinstance(TextDocumentParser.author, str) and TextDocumentParser.author
        assert isinstance(TextDocumentParser.url, str) and TextDocumentParser.url
    def test_supported_mime_types_returns_dict(self) -> None:
        mime_types = TextDocumentParser.supported_mime_types()
        assert isinstance(mime_types, dict)
        assert "text/plain" in mime_types
        assert "text/csv" in mime_types
        assert "application/csv" in mime_types
    @pytest.mark.parametrize(
        ("mime_type", "expected"),
        [
            ("text/plain", 10),
            ("text/csv", 10),
            ("application/csv", 10),
            ("application/pdf", None),
            ("image/png", None),
        ],
    )
    def test_score(self, mime_type: str, expected: int | None) -> None:
        assert TextDocumentParser.score(mime_type, "file.txt") == expected
    def test_can_produce_archive_is_false(
        self,
        text_parser: TextDocumentParser,
    ) -> None:
        assert text_parser.can_produce_archive is False
    def test_requires_pdf_rendition_is_false(
        self,
        text_parser: TextDocumentParser,
    ) -> None:
        assert text_parser.requires_pdf_rendition is False
 class TestTextParserLifecycle:
    """Verify context-manager behaviour and temporary directory cleanup."""
    def test_context_manager_cleans_up_tempdir(self) -> None:
        with TextDocumentParser() as parser:
            tempdir = parser._tempdir
            assert tempdir.exists()
        assert not tempdir.exists()
    def test_context_manager_cleans_up_after_exception(self) -> None:
        tempdir: Path | None = None
        with pytest.raises(RuntimeError):
            with TextDocumentParser() as parser:
                tempdir = parser._tempdir
                raise RuntimeError("boom")
        assert tempdir is not None
        assert not tempdir.exists()
 class TestTextParserParse:
    """Verify parse() and the result accessors."""
    def test_parse_valid_utf8(
        self,
        text_parser: TextDocumentParser,
        sample_txt_file: Path,
    ) -> None:
        text_parser.configure(ParserContext())
        text_parser.parse(sample_txt_file, "text/plain")
        assert text_parser.get_text() == "This is a test file.\n"
    def test_parse_returns_none_for_archive_path(
        self,
        text_parser: TextDocumentParser,
        sample_txt_file: Path,
    ) -> None:
        text_parser.configure(ParserContext())
        text_parser.parse(sample_txt_file, "text/plain")
        assert text_parser.get_archive_path() is None
    def test_parse_returns_none_for_date(
        self,
        text_parser: TextDocumentParser,
        sample_txt_file: Path,
    ) -> None:
        text_parser.configure(ParserContext())
        text_parser.parse(sample_txt_file, "text/plain")
        assert text_parser.get_date() is None
    def test_parse_invalid_utf8_bytes_replaced(
        self,
        text_parser: TextDocumentParser,
        malformed_txt_file: Path,
    ) -> None:
        """
        GIVEN:
            - A text file containing invalid UTF-8 byte sequences
        WHEN:
            - The file is parsed
        THEN:
            - Parsing succeeds
            - Invalid bytes are replaced with the Unicode replacement character
        """
        text_parser.configure(ParserContext())
        text_parser.parse(malformed_txt_file, "text/plain")
        assert text_parser.get_text() == "Pantothens\ufffdure\n"
    def test_get_text_none_before_parse(
        self,
        text_parser: TextDocumentParser,
    ) -> None:
        assert text_parser.get_text() is None
 class TestTextParserThumbnail:
    """Verify thumbnail generation."""
    def test_thumbnail_exists_and_is_file(
        self,
        text_parser: TextDocumentParser,
        sample_txt_file: Path,
    ) -> None:
        thumb = text_parser.get_thumbnail(sample_txt_file, "text/plain")
        assert thumb.exists()
        assert thumb.is_file()
    def test_thumbnail_large_file_does_not_read_all(
        self,
        text_parser: TextDocumentParser,
    ) -> None:
        """
        GIVEN:
            - A text file larger than 50 MB
        WHEN:
            - A thumbnail is requested
        THEN:
            - The thumbnail is generated without loading the full file
        """
        with tempfile.NamedTemporaryFile(
            delete=False,
            mode="w",
            encoding="utf-8",
            suffix=".txt",
        ) as tmp:
            tmp.write("A" * (51 * 1024 * 1024))
            large_file = Path(tmp.name)
        try:
            thumb = text_parser.get_thumbnail(large_file, "text/plain")
            assert thumb.exists()
            assert thumb.is_file()
        finally:
            large_file.unlink(missing_ok=True)
    def test_get_page_count_returns_none(
        self,
        text_parser: TextDocumentParser,
        sample_txt_file: Path,
    ) -> None:
        assert text_parser.get_page_count(sample_txt_file, "text/plain") is None
 class TestTextParserMetadata:
    """Verify extract_metadata behaviour."""
    def test_extract_metadata_returns_empty_list(
        self,
        text_parser: TextDocumentParser,
        sample_txt_file: Path,
    ) -> None:
        result = text_parser.extract_metadata(sample_txt_file, "text/plain")
        assert result == []
    def test_extract_metadata_returns_list_type(
        self,
        text_parser: TextDocumentParser,
        sample_txt_file: Path,
    ) -> None:
        result = text_parser.extract_metadata(sample_txt_file, "text/plain")
        assert isinstance(result, list)
    def test_extract_metadata_ignores_mime_type(
        self,
        text_parser: TextDocumentParser,
        sample_txt_file: Path,
    ) -> None:
        """extract_metadata returns [] regardless of the mime_type argument."""
        assert text_parser.extract_metadata(sample_txt_file, "application/pdf") == []
        assert text_parser.extract_metadata(sample_txt_file, "text/csv") == []
 class TestTextParserRegistry:
    """Verify that TextDocumentParser is registered by default."""
    def test_registered_in_defaults(self) -> None:
        from paperless.parsers.registry import ParserRegistry
        registry = ParserRegistry()
        registry.register_defaults()
        assert TextDocumentParser in registry._builtins
    def test_get_parser_for_text_plain(self) -> None:
        from paperless.parsers.registry import get_parser_registry
        registry = get_parser_registry()
        parser_cls = registry.get_parser_for_file("text/plain", "doc.txt")
        assert parser_cls is TextDocumentParser
    def test_get_parser_for_text_csv(self) -> None:
        from paperless.parsers.registry import get_parser_registry
        registry = get_parser_registry()
        parser_cls = registry.get_parser_for_file("text/csv", "data.csv")
        assert parser_cls is TextDocumentParser
    def test_get_parser_for_unknown_type_returns_none(self) -> None:
        from paperless.parsers.registry import get_parser_registry
        registry = get_parser_registry()
        parser_cls = registry.get_parser_for_file(
            "application/x-unknown-format",
            "doc.xyz",
        )
        assert parser_cls is None
--- a/src/paperless/tests/parsers/test_tika_liva.py
+++ b/src/paperless/tests/parsers/test_tika_liva.py
@@ -4,7 +4,7 @@ from pathlib import Path
 import pytest
 from documents.tests.utils import util_call_with_backoff
-from paperless_tika.parsers import TikaDocumentParser
+from paperless.parsers.tika import TikaDocumentParser
@pytest.mark.skipif(
@@ -42,14 +42,15 @@ class TestTikaParserAgainstServer:
        )
        assert (
-            tika_parser.text
+            tika_parser.get_text()
            == "This is an ODT test document, created September 14, 2022"
        )
-        assert tika_parser.archive_path is not None
+        archive = tika_parser.get_archive_path()
-        assert b"PDF-" in tika_parser.archive_path.read_bytes()[:10]
+        assert archive is not None
        assert b"PDF-" in archive.read_bytes()[:10]
        # TODO: Unsure what can set the Creation-Date field in a document, enable when possible
-        # self.assertEqual(tika_parser.date, datetime.datetime(2022, 9, 14))
+        # self.assertEqual(tika_parser.get_date(), datetime.datetime(2022, 9, 14))
    def test_basic_parse_docx(
        self,
@@ -74,14 +75,15 @@ class TestTikaParserAgainstServer:
        )
        assert (
-            tika_parser.text
+            tika_parser.get_text()
            == "This is an DOCX test document, also made September 14, 2022"
        )
-        assert tika_parser.archive_path is not None
+        archive = tika_parser.get_archive_path()
-        with Path(tika_parser.archive_path).open("rb") as f:
+        assert archive is not None
        with archive.open("rb") as f:
            assert b"PDF-" in f.read()[:10]
-        # self.assertEqual(tika_parser.date, datetime.datetime(2022, 9, 14))
+        # self.assertEqual(tika_parser.get_date(), datetime.datetime(2022, 9, 14))
    def test_basic_parse_doc(
        self,
@@ -102,13 +104,12 @@ class TestTikaParserAgainstServer:
            [sample_doc_file, "application/msword"],
        )
-        assert tika_parser.text is not None
+        text = tika_parser.get_text()
-        assert (
+        assert text is not None
-            "This is a test document, saved in the older .doc format"
+        assert "This is a test document, saved in the older .doc format" in text
-            in tika_parser.text
+        archive = tika_parser.get_archive_path()
-        )
+        assert archive is not None
-        assert tika_parser.archive_path is not None
+        with archive.open("rb") as f:
        with Path(tika_parser.archive_path).open("rb") as f:
            assert b"PDF-" in f.read()[:10]
    def test_tika_fails_multi_part(
@@ -133,6 +134,7 @@ class TestTikaParserAgainstServer:
            [sample_broken_odt, "application/vnd.oasis.opendocument.text"],
        )
-        assert tika_parser.archive_path is not None
+        archive = tika_parser.get_archive_path()
-        with Path(tika_parser.archive_path).open("rb") as f:
+        assert archive is not None
        with archive.open("rb") as f:
            assert b"PDF-" in f.read()[:10]
--- a/src/paperless/tests/parsers/test_tika_parser.py
+++ b/src/paperless/tests/parsers/test_tika_parser.py
@@ -9,7 +9,80 @@ from pytest_django.fixtures import SettingsWrapper
 from pytest_httpx import HTTPXMock
 from documents.parsers import ParseError
-from paperless_tika.parsers import TikaDocumentParser
+from paperless.parsers import ParserContext
 from paperless.parsers import ParserProtocol
 from paperless.parsers.tika import TikaDocumentParser
 class TestTikaParserRegistryInterface:
    """Verify that TikaDocumentParser satisfies the ParserProtocol contract."""
    def test_satisfies_parser_protocol(self) -> None:
        assert isinstance(TikaDocumentParser(), ParserProtocol)
    def test_supported_mime_types_is_classmethod(self) -> None:
        mime_types = TikaDocumentParser.supported_mime_types()
        assert isinstance(mime_types, dict)
        assert len(mime_types) > 0
    def test_score_returns_none_when_tika_disabled(
        self,
        settings: SettingsWrapper,
    ) -> None:
        settings.TIKA_ENABLED = False
        result = TikaDocumentParser.score(
            "application/vnd.oasis.opendocument.text",
            "sample.odt",
        )
        assert result is None
    def test_score_returns_int_when_tika_enabled(
        self,
        settings: SettingsWrapper,
    ) -> None:
        settings.TIKA_ENABLED = True
        result = TikaDocumentParser.score(
            "application/vnd.oasis.opendocument.text",
            "sample.odt",
        )
        assert isinstance(result, int)
    def test_score_returns_none_for_unsupported_mime(
        self,
        settings: SettingsWrapper,
    ) -> None:
        settings.TIKA_ENABLED = True
        result = TikaDocumentParser.score("application/pdf", "doc.pdf")
        assert result is None
    def test_can_produce_archive_is_false(self) -> None:
        assert TikaDocumentParser().can_produce_archive is False
    def test_requires_pdf_rendition_is_true(self) -> None:
        assert TikaDocumentParser().requires_pdf_rendition is True
    def test_get_page_count_returns_none_without_archive(
        self,
        tika_parser: TikaDocumentParser,
        sample_odt_file: Path,
    ) -> None:
        assert (
            tika_parser.get_page_count(
                sample_odt_file,
                "application/vnd.oasis.opendocument.text",
            )
            is None
        )
    def test_get_page_count_returns_int_with_pdf_archive(
        self,
        tika_parser: TikaDocumentParser,
        sample_pdf_file: Path,
    ) -> None:
        tika_parser._archive_path = sample_pdf_file
        count = tika_parser.get_page_count(sample_pdf_file, "application/pdf")
        assert isinstance(count, int)
        assert count > 0
@pytest.mark.django_db()
@@ -34,14 +107,15 @@ class TestTikaParser:
        # Pretend convert to PDF response
        httpx_mock.add_response(content=b"PDF document")
        tika_parser.configure(ParserContext())
        tika_parser.parse(sample_odt_file, "application/vnd.oasis.opendocument.text")
-        assert tika_parser.text == "the content"
+        assert tika_parser.get_text() == "the content"
-        assert tika_parser.archive_path is not None
+        assert tika_parser.get_archive_path() is not None
-        with Path(tika_parser.archive_path).open("rb") as f:
+        with Path(tika_parser.get_archive_path()).open("rb") as f:
            assert f.read() == b"PDF document"
-        assert tika_parser.date == datetime.datetime(
+        assert tika_parser.get_date() == datetime.datetime(
            2020,
            11,
            21,
@@ -89,7 +163,7 @@ class TestTikaParser:
        httpx_mock.add_response(status_code=HTTPStatus.INTERNAL_SERVER_ERROR)
        with pytest.raises(ParseError):
-            tika_parser.convert_to_pdf(sample_odt_file, None)
+            tika_parser._convert_to_pdf(sample_odt_file)
    @pytest.mark.parametrize(
        ("setting_value", "expected_form_value"),
@@ -106,7 +180,6 @@ class TestTikaParser:
        expected_form_value: str,
        httpx_mock: HTTPXMock,
        settings: SettingsWrapper,
        tika_parser: TikaDocumentParser,
        sample_odt_file: Path,
    ) -> None:
        """
@@ -117,6 +190,8 @@ class TestTikaParser:
        THEN:
            - Request to Gotenberg contains the expected PDF/A format string
        """
        # Parser must be created after the setting is changed so that
        # OutputTypeConfig reads the correct value at __init__ time.
        settings.OCR_OUTPUT_TYPE = setting_value
        httpx_mock.add_response(
            status_code=codes.OK,
@@ -124,7 +199,8 @@ class TestTikaParser:
            method="POST",
        )
-        tika_parser.convert_to_pdf(sample_odt_file, None)
+        with TikaDocumentParser() as parser:
            parser._convert_to_pdf(sample_odt_file)
        request = httpx_mock.get_request()
--- a/src/paperless/tests/samples/mail/broken.eml
+++ b/src/paperless/tests/samples/mail/broken.eml
--- a/Show More
+++ b/Show More