Locks down permissions to the job level with least privledge we can get away with

2026-03-04 00:06:25 +00:00 · 2026-03-02 10:27:54 -08:00
55 changed files with 1146 additions and 2593 deletions
--- a/.github/workflows/ci-backend.yml
+++ b/.github/workflows/ci-backend.yml
@@ -22,6 +22,7 @@ on:
 concurrency:
  group: backend-${{ github.event.pull_request.number || github.ref }}
  cancel-in-progress: true
+permissions: {}
 env:
  DEFAULT_UV_VERSION: "0.10.x"
  NLTK_DATA: "/usr/share/nltk_data"
@@ -29,24 +30,26 @@ jobs:
  test:
    name: "Python ${{ matrix.python-version }}"
    runs-on: ubuntu-24.04
+    permissions:
+      contents: read
    strategy:
      matrix:
        python-version: ['3.10', '3.11', '3.12']
      fail-fast: false
    steps:
      - name: Checkout
-        uses: actions/checkout@v6.0.2
+        uses: actions/checkout@v6
      - name: Start containers
        run: |
          docker compose --file docker/compose/docker-compose.ci-test.yml pull --quiet
          docker compose --file docker/compose/docker-compose.ci-test.yml up --detach
      - name: Set up Python
        id: setup-python
-        uses: actions/setup-python@v6.2.0
+        uses: actions/setup-python@v6
        with:
          python-version: "${{ matrix.python-version }}"
      - name: Install uv
-        uses: astral-sh/setup-uv@v7.3.1
+        uses: astral-sh/setup-uv@v7
        with:
          version: ${{ env.DEFAULT_UV_VERSION }}
          enable-cache: true
@@ -83,13 +86,13 @@ jobs:
            pytest
      - name: Upload test results to Codecov
        if: always()
-        uses: codecov/codecov-action@v5.5.2
+        uses: codecov/codecov-action@v5
        with:
          flags: backend-python-${{ matrix.python-version }}
          files: junit.xml
          report_type: test_results
      - name: Upload coverage to Codecov
-        uses: codecov/codecov-action@v5.5.2
+        uses: codecov/codecov-action@v5
        with:
          flags: backend-python-${{ matrix.python-version }}
          files: coverage.xml
@@ -102,18 +105,20 @@ jobs:
  typing:
    name: Check project typing
    runs-on: ubuntu-24.04
+    permissions:
+      contents: read
    env:
      DEFAULT_PYTHON: "3.12"
    steps:
      - name: Checkout
-        uses: actions/checkout@v6.0.2
+        uses: actions/checkout@v6.0.1
      - name: Set up Python
        id: setup-python
        uses: actions/setup-python@v6.2.0
        with:
          python-version: "${{ env.DEFAULT_PYTHON }}"
      - name: Install uv
-        uses: astral-sh/setup-uv@v7.3.1
+        uses: astral-sh/setup-uv@v7.2.1
        with:
          version: ${{ env.DEFAULT_UV_VERSION }}
          enable-cache: true
--- a/.github/workflows/ci-docker.yml
+++ b/.github/workflows/ci-docker.yml
@@ -15,6 +15,7 @@ on:
 concurrency:
  group: docker-${{ github.event.pull_request.number || github.ref }}
  cancel-in-progress: true
+permissions: {}
 env:
  REGISTRY: ghcr.io
 jobs:
@@ -41,7 +42,7 @@ jobs:
      ref-name: ${{ steps.ref.outputs.name }}
    steps:
      - name: Checkout
-        uses: actions/checkout@v6.0.2
+        uses: actions/checkout@v6.0.1
      - name: Determine ref name
        id: ref
        run: |
@@ -130,7 +131,7 @@ jobs:
            type=semver,pattern={{major}}.{{minor}}
      - name: Build and push by digest
        id: build
-        uses: docker/build-push-action@v6.19.2
+        uses: docker/build-push-action@v6.18.0
        with:
          context: .
          file: ./Dockerfile
@@ -152,7 +153,7 @@ jobs:
          touch "/tmp/digests/${digest#sha256:}"
      - name: Upload digest
        if: steps.check-push.outputs.should-push == 'true'
-        uses: actions/upload-artifact@v7.0.0
+        uses: actions/upload-artifact@v6.0.0
        with:
          name: digests-${{ matrix.arch }}
          path: /tmp/digests/*
@@ -168,7 +169,7 @@ jobs:
      packages: write
    steps:
      - name: Download digests
-        uses: actions/download-artifact@v8.0.0
+        uses: actions/download-artifact@v7.0.0
        with:
          path: /tmp/digests
          pattern: digests-*
--- a/.github/workflows/ci-docs.yml
+++ b/.github/workflows/ci-docs.yml
@@ -21,10 +21,7 @@ on:
 concurrency:
  group: docs-${{ github.event.pull_request.number || github.ref }}
  cancel-in-progress: true
-permissions:
-  contents: read
-  pages: write
-  id-token: write
+permissions: {}
 env:
  DEFAULT_UV_VERSION: "0.10.x"
  DEFAULT_PYTHON_VERSION: "3.12"
@@ -32,17 +29,19 @@ jobs:
  build:
    name: Build Documentation
    runs-on: ubuntu-24.04
+    permissions:
+      contents: read
    steps:
-      - uses: actions/configure-pages@v5.0.0
+      - uses: actions/configure-pages@v5
      - name: Checkout
-        uses: actions/checkout@v6.0.2
+        uses: actions/checkout@v6
      - name: Set up Python
        id: setup-python
-        uses: actions/setup-python@v6.2.0
+        uses: actions/setup-python@v6
        with:
          python-version: ${{ env.DEFAULT_PYTHON_VERSION }}
      - name: Install uv
-        uses: astral-sh/setup-uv@v7.3.1
+        uses: astral-sh/setup-uv@v7
        with:
          version: ${{ env.DEFAULT_UV_VERSION }}
          enable-cache: true
@@ -58,7 +57,7 @@ jobs:
            --frozen \
            zensical build --clean
      - name: Upload GitHub Pages artifact
-        uses: actions/upload-pages-artifact@v4.0.0
+        uses: actions/upload-pages-artifact@v4
        with:
          path: site
          name: github-pages-${{ github.run_id }}-${{ github.run_attempt }}
@@ -67,12 +66,16 @@ jobs:
    needs: build
    if: github.event_name == 'push' && github.ref == 'refs/heads/main'
    runs-on: ubuntu-24.04
+    permissions:
+      contents: read
+      pages: write
+      id-token: write
    environment:
      name: github-pages
      url: ${{ steps.deployment.outputs.page_url }}
    steps:
      - name: Deploy GitHub Pages
-        uses: actions/deploy-pages@v4.0.5
+        uses: actions/deploy-pages@v4
        id: deployment
        with:
          artifact_name: github-pages-${{ github.run_id }}-${{ github.run_attempt }}
--- a/.github/workflows/ci-frontend.yml
+++ b/.github/workflows/ci-frontend.yml
@@ -16,26 +16,29 @@ on:
 concurrency:
  group: frontend-${{ github.event.pull_request.number || github.ref }}
  cancel-in-progress: true
+permissions: {}
 jobs:
  install-dependencies:
    name: Install Dependencies
    runs-on: ubuntu-24.04
+    permissions:
+      contents: read
    steps:
      - name: Checkout
-        uses: actions/checkout@v6.0.2
+        uses: actions/checkout@v6
      - name: Install pnpm
-        uses: pnpm/action-setup@v4.2.0
+        uses: pnpm/action-setup@v4
        with:
          version: 10
      - name: Use Node.js 24
-        uses: actions/setup-node@v6.2.0
+        uses: actions/setup-node@v6
        with:
          node-version: 24.x
          cache: 'pnpm'
          cache-dependency-path: 'src-ui/pnpm-lock.yaml'
      - name: Cache frontend dependencies
        id: cache-frontend-deps
-        uses: actions/cache@v5.0.3
+        uses: actions/cache@v5
        with:
          path: |
            ~/.pnpm-store
@@ -47,21 +50,23 @@ jobs:
    name: Lint
    needs: install-dependencies
    runs-on: ubuntu-24.04
+    permissions:
+      contents: read
    steps:
      - name: Checkout
-        uses: actions/checkout@v6.0.2
+        uses: actions/checkout@v6
      - name: Install pnpm
-        uses: pnpm/action-setup@v4.2.0
+        uses: pnpm/action-setup@v4
        with:
          version: 10
      - name: Use Node.js 24
-        uses: actions/setup-node@v6.2.0
+        uses: actions/setup-node@v6
        with:
          node-version: 24.x
          cache: 'pnpm'
          cache-dependency-path: 'src-ui/pnpm-lock.yaml'
      - name: Cache frontend dependencies
-        uses: actions/cache@v5.0.3
+        uses: actions/cache@v5
        with:
          path: |
            ~/.pnpm-store
@@ -75,6 +80,8 @@ jobs:
    name: "Unit Tests (${{ matrix.shard-index }}/${{ matrix.shard-count }})"
    needs: install-dependencies
    runs-on: ubuntu-24.04
+    permissions:
+      contents: read
    strategy:
      fail-fast: false
      matrix:
@@ -83,19 +90,19 @@ jobs:
        shard-count: [4]
    steps:
      - name: Checkout
-        uses: actions/checkout@v6.0.2
+        uses: actions/checkout@v6
      - name: Install pnpm
-        uses: pnpm/action-setup@v4.2.0
+        uses: pnpm/action-setup@v4
        with:
          version: 10
      - name: Use Node.js 24
-        uses: actions/setup-node@v6.2.0
+        uses: actions/setup-node@v6
        with:
          node-version: 24.x
          cache: 'pnpm'
          cache-dependency-path: 'src-ui/pnpm-lock.yaml'
      - name: Cache frontend dependencies
-        uses: actions/cache@v5.0.3
+        uses: actions/cache@v5
        with:
          path: |
            ~/.pnpm-store
@@ -107,13 +114,13 @@ jobs:
        run: cd src-ui && pnpm run test --max-workers=2 --shard=${{ matrix.shard-index }}/${{ matrix.shard-count }}
      - name: Upload test results to Codecov
        if: always()
-        uses: codecov/codecov-action@v5.5.2
+        uses: codecov/codecov-action@v5
        with:
          flags: frontend-node-${{ matrix.node-version }}
          directory: src-ui/
          report_type: test_results
      - name: Upload coverage to Codecov
-        uses: codecov/codecov-action@v5.5.2
+        uses: codecov/codecov-action@v5
        with:
          flags: frontend-node-${{ matrix.node-version }}
          directory: src-ui/coverage/
@@ -121,6 +128,8 @@ jobs:
    name: "E2E Tests (${{ matrix.shard-index }}/${{ matrix.shard-count }})"
    needs: install-dependencies
    runs-on: ubuntu-24.04
+    permissions:
+      contents: read
    container: mcr.microsoft.com/playwright:v1.58.2-noble
    env:
      PLAYWRIGHT_BROWSERS_PATH: /ms-playwright
@@ -133,19 +142,19 @@ jobs:
        shard-count: [2]
    steps:
      - name: Checkout
-        uses: actions/checkout@v6.0.2
+        uses: actions/checkout@v6
      - name: Install pnpm
-        uses: pnpm/action-setup@v4.2.0
+        uses: pnpm/action-setup@v4
        with:
          version: 10
      - name: Use Node.js 24
-        uses: actions/setup-node@v6.2.0
+        uses: actions/setup-node@v6
        with:
          node-version: 24.x
          cache: 'pnpm'
          cache-dependency-path: 'src-ui/pnpm-lock.yaml'
      - name: Cache frontend dependencies
-        uses: actions/cache@v5.0.3
+        uses: actions/cache@v5
        with:
          path: |
            ~/.pnpm-store
@@ -161,21 +170,23 @@ jobs:
    name: Bundle Analysis
    needs: [unit-tests, e2e-tests]
    runs-on: ubuntu-24.04
+    permissions:
+      contents: read
    steps:
      - name: Checkout
-        uses: actions/checkout@v6.0.2
+        uses: actions/checkout@v6
      - name: Install pnpm
-        uses: pnpm/action-setup@v4.2.0
+        uses: pnpm/action-setup@v4
        with:
          version: 10
      - name: Use Node.js 24
-        uses: actions/setup-node@v6.2.0
+        uses: actions/setup-node@v6
        with:
          node-version: 24.x
          cache: 'pnpm'
          cache-dependency-path: 'src-ui/pnpm-lock.yaml'
      - name: Cache frontend dependencies
-        uses: actions/cache@v5.0.3
+        uses: actions/cache@v5
        with:
          path: |
            ~/.pnpm-store
--- a/.github/workflows/ci-lint.yml
+++ b/.github/workflows/ci-lint.yml
@@ -9,10 +9,13 @@ on:
 concurrency:
  group: lint-${{ github.event.pull_request.number || github.ref }}
  cancel-in-progress: true
+permissions: {}
 jobs:
  lint:
    name: Linting via prek
    runs-on: ubuntu-slim
+    permissions:
+      contents: read
    steps:
      - name: Checkout
        uses: actions/checkout@v6.0.2
--- a/.github/workflows/ci-release.yml
+++ b/.github/workflows/ci-release.yml
@@ -7,6 +7,7 @@ on:
 concurrency:
  group: release-${{ github.ref }}
  cancel-in-progress: false
+permissions: {}
 env:
  DEFAULT_UV_VERSION: "0.10.x"
  DEFAULT_PYTHON_VERSION: "3.12"
@@ -14,6 +15,10 @@ jobs:
  wait-for-docker:
    name: Wait for Docker Build
    runs-on: ubuntu-24.04
+    permissions:
+      # lewagon/wait-on-check-action reads workflow check runs
+      actions: read
+      contents: read
    steps:
      - name: Wait for Docker build
        uses: lewagon/wait-on-check-action@v1.5.0
@@ -26,16 +31,18 @@ jobs:
    name: Build Release
    needs: wait-for-docker
    runs-on: ubuntu-24.04
+    permissions:
+      contents: read
    steps:
      - name: Checkout
-        uses: actions/checkout@v6.0.2
+        uses: actions/checkout@v6
      # ---- Frontend Build ----
      - name: Install pnpm
-        uses: pnpm/action-setup@v4.2.0
+        uses: pnpm/action-setup@v4
        with:
          version: 10
      - name: Use Node.js 24
-        uses: actions/setup-node@v6.2.0
+        uses: actions/setup-node@v6
        with:
          node-version: 24.x
          cache: 'pnpm'
@@ -47,11 +54,11 @@ jobs:
      # ---- Backend Setup ----
      - name: Set up Python
        id: setup-python
-        uses: actions/setup-python@v6.2.0
+        uses: actions/setup-python@v6
        with:
          python-version: ${{ env.DEFAULT_PYTHON_VERSION }}
      - name: Install uv
-        uses: astral-sh/setup-uv@v7.3.1
+        uses: astral-sh/setup-uv@v7
        with:
          version: ${{ env.DEFAULT_UV_VERSION }}
          enable-cache: true
@@ -118,7 +125,7 @@ jobs:
          sudo chown -R 1000:1000 paperless-ngx/
          tar -cJf paperless-ngx.tar.xz paperless-ngx/
      - name: Upload release artifact
-        uses: actions/upload-artifact@v7.0.0
+        uses: actions/upload-artifact@v6
        with:
          name: release
          path: dist/paperless-ngx.tar.xz
@@ -127,13 +134,17 @@ jobs:
    name: Publish Release
    needs: build-release
    runs-on: ubuntu-24.04
+    permissions:
+      # release-drafter reads PRs to build the changelog and creates/publishes the release
+      contents: write
+      pull-requests: read
    outputs:
      prerelease: ${{ steps.get-version.outputs.prerelease }}
      changelog: ${{ steps.create-release.outputs.body }}
      version: ${{ steps.get-version.outputs.version }}
    steps:
      - name: Download release artifact
-        uses: actions/download-artifact@v8.0.0
+        uses: actions/download-artifact@v7
        with:
          name: release
          path: ./
@@ -148,7 +159,7 @@ jobs:
          fi
      - name: Create release and changelog
        id: create-release
-        uses: release-drafter/release-drafter@v6.2.0
+        uses: release-drafter/release-drafter@v6
        with:
          name: Paperless-ngx ${{ steps.get-version.outputs.version }}
          tag: ${{ steps.get-version.outputs.version }}
@@ -159,7 +170,7 @@ jobs:
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
      - name: Upload release archive
-        uses: shogo82148/actions-upload-release-asset@v1.9.2
+        uses: shogo82148/actions-upload-release-asset@v1
        with:
          github_token: ${{ secrets.GITHUB_TOKEN }}
          upload_url: ${{ steps.create-release.outputs.upload_url }}
@@ -174,18 +185,23 @@ jobs:
    needs: publish-release
    if: needs.publish-release.outputs.prerelease == 'false'
    runs-on: ubuntu-24.04
+    permissions:
+      # git push of the changelog branch requires contents: write
+      # github.rest.pulls.create() and github.rest.issues.addLabels() require pull-requests: write
+      contents: write
+      pull-requests: write
    steps:
      - name: Checkout
-        uses: actions/checkout@v6.0.2
+        uses: actions/checkout@v6
        with:
          ref: main
      - name: Set up Python
        id: setup-python
-        uses: actions/setup-python@v6.2.0
+        uses: actions/setup-python@v6
        with:
          python-version: ${{ env.DEFAULT_PYTHON_VERSION }}
      - name: Install uv
-        uses: astral-sh/setup-uv@v7.3.1
+        uses: astral-sh/setup-uv@v7
        with:
          version: ${{ env.DEFAULT_UV_VERSION }}
          enable-cache: true
@@ -218,7 +234,7 @@ jobs:
          git commit -am "Changelog ${{ needs.publish-release.outputs.version }} - GHA"
          git push origin ${{ needs.publish-release.outputs.version }}-changelog
      - name: Create pull request
-        uses: actions/github-script@v8.0.0
+        uses: actions/github-script@v8
        with:
          script: |
            const { repo, owner } = context.repo;
--- a/.github/workflows/cleanup-tags.yml
+++ b/.github/workflows/cleanup-tags.yml
@@ -12,6 +12,7 @@ on:
 concurrency:
  group: registry-tags-cleanup
  cancel-in-progress: false
+permissions: {}
 jobs:
  cleanup-images:
    name: Cleanup Image Tags for ${{ matrix.primary-name }}
--- a/.github/workflows/codeql-analysis.yml
+++ b/.github/workflows/codeql-analysis.yml
@@ -18,6 +18,7 @@ on:
    branches: [dev]
  schedule:
    - cron: '28 13 * * 5'
+permissions: {}
 jobs:
  analyze:
    name: Analyze
@@ -34,10 +35,10 @@ jobs:
        # Learn more about CodeQL language support at https://git.io/codeql-language-support
    steps:
      - name: Checkout repository
-        uses: actions/checkout@v6.0.2
+        uses: actions/checkout@v6
      # Initializes the CodeQL tools for scanning.
      - name: Initialize CodeQL
-        uses: github/codeql-action/init@v4.32.5
+        uses: github/codeql-action/init@v4
        with:
          languages: ${{ matrix.language }}
          # If you wish to specify custom queries, you can do so here or in a config file.
@@ -45,4 +46,4 @@ jobs:
          # Prefix the list here with "+" to use these queries and those in the config file.
          # queries: ./path/to/local/query, your-org/your-repo/queries@main
      - name: Perform CodeQL Analysis
-        uses: github/codeql-action/analyze@v4.32.5
+        uses: github/codeql-action/analyze@v4
--- a/.github/workflows/crowdin.yml
+++ b/.github/workflows/crowdin.yml
@@ -6,18 +6,23 @@ on:
  push:
    paths: ['src/locale/**', 'src-ui/messages.xlf', 'src-ui/src/locale/**']
    branches: [dev]
+permissions: {}
 jobs:
  synchronize-with-crowdin:
    name: Crowdin Sync
    if: github.repository_owner == 'paperless-ngx'
    runs-on: ubuntu-24.04
+    permissions:
+      # Crowdin action pushes translation branches and creates/updates PRs via GITHUB_TOKEN
+      contents: write
+      pull-requests: write
    steps:
      - name: Checkout
-        uses: actions/checkout@v6.0.2
+        uses: actions/checkout@v6
        with:
          token: ${{ secrets.PNGX_BOT_PAT }}
      - name: crowdin action
-        uses: crowdin/github-action@v2.15.0
+        uses: crowdin/github-action@v2
        with:
          upload_translations: false
          download_translations: true
--- a/.github/workflows/pr-bot.yml
+++ b/.github/workflows/pr-bot.yml
@@ -2,17 +2,19 @@ name: PR Bot
 on:
  pull_request_target:
    types: [opened]
-permissions:
-  contents: read
-  pull-requests: write
+permissions: {}
 jobs:
  pr-bot:
    name: Automated PR Bot
    runs-on: ubuntu-latest
+    permissions:
+      # labeler reads file paths; all steps add labels or post comments on PRs
+      contents: read
+      pull-requests: write
    steps:
      - name: Label PR by file path or branch name
        # see .github/labeler.yml for the labeler config
-        uses: actions/labeler@v6.0.1
+        uses: actions/labeler@v6
        with:
          repo-token: ${{ secrets.GITHUB_TOKEN }}
      - name: Label by size
@@ -26,7 +28,7 @@ jobs:
          fail_if_xl: 'false'
          excluded_files: /\.lock$/ /\.txt$/ ^src-ui/pnpm-lock\.yaml$ ^src-ui/messages\.xlf$ ^src/locale/en_US/LC_MESSAGES/django\.po$
      - name: Label by PR title
-        uses: actions/github-script@v8.0.0
+        uses: actions/github-script@v8
        with:
          script: |
            const pr = context.payload.pull_request;
@@ -52,7 +54,7 @@ jobs:
            }
      - name: Label bot-generated PRs
        if: ${{ contains(github.actor, 'dependabot') || contains(github.actor, 'crowdin-bot') }}
-        uses: actions/github-script@v8.0.0
+        uses: actions/github-script@v8
        with:
          script: |
            const pr = context.payload.pull_request;
@@ -77,7 +79,7 @@ jobs:
            }
      - name: Welcome comment
        if: ${{ !contains(github.actor, 'bot') }}
-        uses: actions/github-script@v8.0.0
+        uses: actions/github-script@v8
        with:
          script: |
            const pr = context.payload.pull_request;
--- a/.github/workflows/project-actions.yml
+++ b/.github/workflows/project-actions.yml
@@ -7,18 +7,19 @@ on:
    branches:
      - main
      - dev
-permissions:
-  contents: read
+permissions: {}
 jobs:
  pr_opened_or_reopened:
    name: pr_opened_or_reopened
    runs-on: ubuntu-24.04
    permissions:
+      # release-drafter reads its config file from the repo
+      contents: read
      # write permission is required for autolabeler
      pull-requests: write
    if: github.event_name == 'pull_request_target' && (github.event.action == 'opened' || github.event.action == 'reopened') && github.event.pull_request.user.login != 'dependabot'
    steps:
      - name: Label PR with release-drafter
-        uses: release-drafter/release-drafter@v6.2.0
+        uses: release-drafter/release-drafter@v6
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
--- a/.github/workflows/repo-maintenance.yml
+++ b/.github/workflows/repo-maintenance.yml
@@ -3,10 +3,7 @@ on:
  schedule:
    - cron: '0 3 * * *'
  workflow_dispatch:
-permissions:
-  issues: write
-  pull-requests: write
-  discussions: write
+permissions: {}
 concurrency:
  group: lock
 jobs:
@@ -14,8 +11,11 @@ jobs:
    name: 'Stale'
    if: github.repository_owner == 'paperless-ngx'
    runs-on: ubuntu-24.04
+    permissions:
+      issues: write
+      pull-requests: write
    steps:
-      - uses: actions/stale@v10.2.0
+      - uses: actions/stale@v10
        with:
          days-before-stale: 7
          days-before-close: 14
@@ -36,8 +36,12 @@ jobs:
    name: 'Lock Old Threads'
    if: github.repository_owner == 'paperless-ngx'
    runs-on: ubuntu-24.04
+    permissions:
+      issues: write
+      pull-requests: write
+      discussions: write
    steps:
-      - uses: dessant/lock-threads@v6.0.0
+      - uses: dessant/lock-threads@v6
        with:
          issue-inactive-days: '30'
          pr-inactive-days: '30'
@@ -56,8 +60,10 @@ jobs:
    name: 'Close Answered Discussions'
    if: github.repository_owner == 'paperless-ngx'
    runs-on: ubuntu-24.04
+    permissions:
+      discussions: write
    steps:
-      - uses: actions/github-script@v8.0.0
+      - uses: actions/github-script@v8
        with:
          script: |
            function sleep(ms) {
@@ -113,8 +119,10 @@ jobs:
    name: 'Close Outdated Discussions'
    if: github.repository_owner == 'paperless-ngx'
    runs-on: ubuntu-24.04
+    permissions:
+      discussions: write
    steps:
-      - uses: actions/github-script@v8.0.0
+      - uses: actions/github-script@v8
        with:
          script: |
            function sleep(ms) {
@@ -205,8 +213,10 @@ jobs:
    name: 'Close Unsupported Feature Requests'
    if: github.repository_owner == 'paperless-ngx'
    runs-on: ubuntu-24.04
+    permissions:
+      discussions: write
    steps:
-      - uses: actions/github-script@v8.0.0
+      - uses: actions/github-script@v8
        with:
          script: |
            function sleep(ms) {
--- a/.github/workflows/translate-strings.yml
+++ b/.github/workflows/translate-strings.yml
@@ -3,6 +3,7 @@ on:
  push:
    branches:
      - dev
+permissions: {}
 jobs:
  generate-translate-strings:
    name: Generate Translation Strings
@@ -11,7 +12,7 @@ jobs:
      contents: write
    steps:
      - name: Checkout code
-        uses: actions/checkout@v6.0.2
+        uses: actions/checkout@v6
        env:
          GH_REF: ${{ github.ref }} # sonar rule:githubactions:S7630 - avoid injection
        with:
@@ -19,13 +20,13 @@ jobs:
          ref: ${{ env.GH_REF }}
      - name: Set up Python
        id: setup-python
-        uses: actions/setup-python@v6.2.0
+        uses: actions/setup-python@v6
      - name: Install system dependencies
        run: |
          sudo apt-get update -qq
          sudo apt-get install -qq --no-install-recommends gettext
      - name: Install uv
-        uses: astral-sh/setup-uv@v7.3.1
+        uses: astral-sh/setup-uv@v7
        with:
          enable-cache: true
      - name: Install backend python dependencies
@@ -36,18 +37,18 @@ jobs:
      - name: Generate backend translation strings
        run: cd src/ && uv run manage.py makemessages -l en_US -i "samples*"
      - name: Install pnpm
-        uses: pnpm/action-setup@v4.2.0
+        uses: pnpm/action-setup@v4
        with:
          version: 10
      - name: Use Node.js 24
-        uses: actions/setup-node@v6.2.0
+        uses: actions/setup-node@v6
        with:
          node-version: 24.x
          cache: 'pnpm'
          cache-dependency-path: 'src-ui/pnpm-lock.yaml'
      - name: Cache frontend dependencies
        id: cache-frontend-deps
-        uses: actions/cache@v5.0.3
+        uses: actions/cache@v5
        with:
          path: |
            ~/.pnpm-store
@@ -63,7 +64,7 @@ jobs:
          cd src-ui
          pnpm run ng extract-i18n
      - name: Commit changes
-        uses: stefanzweifel/git-auto-commit-action@v7.1.0
+        uses: stefanzweifel/git-auto-commit-action@v7
        with:
          file_pattern: 'src-ui/messages.xlf src/locale/en_US/LC_MESSAGES/django.po'
          commit_message: "Auto translate strings"
--- a/4
+++ b/4
@@ -30,7 +30,7 @@ RUN set -eux \
 # Purpose: Installs s6-overlay and rootfs
 # Comments:
 #  - Don't leave anything extra in here either
-FROM ghcr.io/astral-sh/uv:0.10.7-python3.12-trixie-slim AS s6-overlay-base
+FROM ghcr.io/astral-sh/uv:0.10.5-python3.12-trixie-slim AS s6-overlay-base

 WORKDIR /usr/src/s6

@@ -45,7 +45,7 @@ ENV \
 ARG TARGETARCH
 ARG TARGETVARIANT
 # Lock this version
-ARG S6_OVERLAY_VERSION=3.2.2.0
+ARG S6_OVERLAY_VERSION=3.2.1.0

 ARG S6_BUILD_TIME_PKGS="curl \
                        xz-utils"
--- a/docker/compose/docker-compose.ci-test.yml
+++ b/docker/compose/docker-compose.ci-test.yml
@@ -4,7 +4,7 @@
 # correct networking for the tests
 services:
  gotenberg:
-    image: docker.io/gotenberg/gotenberg:8.27
+    image: docker.io/gotenberg/gotenberg:8.26
    hostname: gotenberg
    container_name: gotenberg
    network_mode: host
--- a/docker/compose/docker-compose.mariadb-tika.yml
+++ b/docker/compose/docker-compose.mariadb-tika.yml
@@ -72,7 +72,7 @@ services:
      PAPERLESS_TIKA_GOTENBERG_ENDPOINT: http://gotenberg:3000
      PAPERLESS_TIKA_ENDPOINT: http://tika:9998
  gotenberg:
-    image: docker.io/gotenberg/gotenberg:8.27
+    image: docker.io/gotenberg/gotenberg:8.26
    restart: unless-stopped
    # The gotenberg chromium route is used to convert .eml files. We do not
    # want to allow external content like tracking pixels or even javascript.
--- a/docker/compose/docker-compose.postgres-tika.yml
+++ b/docker/compose/docker-compose.postgres-tika.yml
@@ -66,7 +66,7 @@ services:
      PAPERLESS_TIKA_GOTENBERG_ENDPOINT: http://gotenberg:3000
      PAPERLESS_TIKA_ENDPOINT: http://tika:9998
  gotenberg:
-    image: docker.io/gotenberg/gotenberg:8.27
+    image: docker.io/gotenberg/gotenberg:8.26
    restart: unless-stopped
    # The gotenberg chromium route is used to convert .eml files. We do not
    # want to allow external content like tracking pixels or even javascript.
--- a/docker/compose/docker-compose.sqlite-tika.yml
+++ b/docker/compose/docker-compose.sqlite-tika.yml
@@ -55,7 +55,7 @@ services:
      PAPERLESS_TIKA_GOTENBERG_ENDPOINT: http://gotenberg:3000
      PAPERLESS_TIKA_ENDPOINT: http://tika:9998
  gotenberg:
-    image: docker.io/gotenberg/gotenberg:8.27
+    image: docker.io/gotenberg/gotenberg:8.26
    restart: unless-stopped
    # The gotenberg chromium route is used to convert .eml files. We do not
    # want to allow external content like tracking pixels or even javascript.
--- a/docs/assets/logo_full_black.png
+++ b/docs/assets/logo_full_black.png
--- a/docs/assets/logo_full_white.png
+++ b/docs/assets/logo_full_white.png
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -616,7 +616,7 @@ applied. You can use the following placeholders in the template with any trigger
 -   `{{added_day}}`: added day
 -   `{{added_time}}`: added time in HH:MM format
 -   `{{original_filename}}`: original file name without extension
-   `{{filename}}`: current file name without extension (for "added" workflows this may not be final yet, you can use `{{original_filename}}`)
+-   `{{filename}}`: current file name without extension
 -   `{{doc_title}}`: current document title (cannot be used in title assignment)

 The following placeholders are only available for "added" or "updated" triggers
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -111,7 +111,6 @@ docs = [
 testing = [
  "daphne",
  "factory-boy~=3.3.1",
-  "faker~=40.5.1",
  "imagehash",
  "pytest~=9.0.0",
  "pytest-cov~=7.0.0",
--- a/src-ui/messages.xlf
+++ b/src-ui/messages.xlf
@@ -1238,8 +1238,8 @@
          <context context-type="linenumber">82</context>
        </context-group>
      </trans-unit>
-      <trans-unit id="7860582931776068318" datatype="html">
-        <source>Add document version</source>
+      <trans-unit id="8035757452478567832" datatype="html">
+        <source>Update existing document</source>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
          <context context-type="linenumber">280</context>
@@ -8411,8 +8411,8 @@
          <context context-type="linenumber">832</context>
        </context-group>
      </trans-unit>
-      <trans-unit id="5203024009814367559" datatype="html">
-        <source>This operation will add rotated versions of the <x id="PH" equiv-text="this.list.selected.size"/> document(s).</source>
+      <trans-unit id="6390006284731990222" datatype="html">
+        <source>This operation will permanently rotate the original version of <x id="PH" equiv-text="this.list.selected.size"/> document(s).</source>
        <context-group purpose="location">
          <context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
          <context context-type="linenumber">833</context>
--- a/src-ui/src/app/components/admin/settings/settings.component.html
+++ b/src-ui/src/app/components/admin/settings/settings.component.html
@@ -277,7 +277,7 @@
              <div class="col">
                <select class="form-select" formControlName="pdfEditorDefaultEditMode">
                  <option [ngValue]="PdfEditorEditMode.Create" i18n>Create new document(s)</option>
-                  <option [ngValue]="PdfEditorEditMode.Update" i18n>Add document version</option>
+                  <option [ngValue]="PdfEditorEditMode.Update" i18n>Update existing document</option>
                </select>
              </div>
            </div>
--- a/src-ui/src/app/components/common/pdf-editor/pdf-editor.component.html
+++ b/src-ui/src/app/components/common/pdf-editor/pdf-editor.component.html
@@ -84,7 +84,7 @@
      <input type="radio" class="btn-check" [(ngModel)]="editMode" [value]="PdfEditorEditMode.Update" id="editModeUpdate" name="editmode" [disabled]="hasSplit()">
      <label for="editModeUpdate" class="btn btn-outline-primary btn-sm">
        <i-bs name="pencil"></i-bs>
-        <span class="form-check-label ms-2" i18n>Add document version</span>
+        <span class="form-check-label ms-2" i18n>Update existing document</span>
      </label>
    </div>
    @if (editMode === PdfEditorEditMode.Create) {
--- a/src-ui/src/app/components/common/pdf-editor/pdf-editor.component.spec.ts
+++ b/src-ui/src/app/components/common/pdf-editor/pdf-editor.component.spec.ts
@@ -3,7 +3,6 @@ import { provideHttpClientTesting } from '@angular/common/http/testing'
 import { ComponentFixture, TestBed } from '@angular/core/testing'
 import { NgbActiveModal } from '@ng-bootstrap/ng-bootstrap'
 import { NgxBootstrapIconsModule, allIcons } from 'ngx-bootstrap-icons'
-import { DocumentService } from 'src/app/services/rest/document.service'
 import { PDFEditorComponent } from './pdf-editor.component'

 describe('PDFEditorComponent', () => {
@@ -140,16 +139,4 @@ describe('PDFEditorComponent', () => {
    expect(component.pages[1].page).toBe(2)
    expect(component.pages[2].page).toBe(3)
  })
-
-  it('should include selected version in preview source when provided', () => {
-    const documentService = TestBed.inject(DocumentService)
-    const previewSpy = jest
-      .spyOn(documentService, 'getPreviewUrl')
-      .mockReturnValue('preview-version')
-    component.documentID = 3
-    component.versionID = 10
-
-    expect(component.pdfSrc).toBe('preview-version')
-    expect(previewSpy).toHaveBeenCalledWith(3, false, 10)
-  })
 })
--- a/src-ui/src/app/components/common/pdf-editor/pdf-editor.component.ts
+++ b/src-ui/src/app/components/common/pdf-editor/pdf-editor.component.ts
@@ -46,7 +46,6 @@ export class PDFEditorComponent extends ConfirmDialogComponent {
  activeModal: NgbActiveModal = inject(NgbActiveModal)

  documentID: number
-  versionID?: number
  pages: PageOperation[] = []
  totalPages = 0
  editMode: PdfEditorEditMode = this.settingsService.get(
@@ -56,11 +55,7 @@ export class PDFEditorComponent extends ConfirmDialogComponent {
  includeMetadata: boolean = true

  get pdfSrc(): string {
-    return this.documentService.getPreviewUrl(
-      this.documentID,
-      false,
-      this.versionID
-    )
+    return this.documentService.getPreviewUrl(this.documentID)
  }

  pdfLoaded(pdf: PngxPdfDocumentProxy) {
--- a/src-ui/src/app/components/document-detail/document-detail.component.spec.ts
+++ b/src-ui/src/app/components/document-detail/document-detail.component.spec.ts
@@ -1459,25 +1459,22 @@ describe('DocumentDetailComponent', () => {
    const closeSpy = jest.spyOn(openDocumentsService, 'closeDocument')
    const errorSpy = jest.spyOn(toastService, 'showError')
    initNormally()
-    component.selectedVersionId = 10
    component.editPdf()
    expect(modal).not.toBeUndefined()
    modal.componentInstance.documentID = doc.id
-    expect(modal.componentInstance.versionID).toBe(10)
    modal.componentInstance.pages = [{ page: 1, rotate: 0, splitAfter: false }]
    modal.componentInstance.confirm()
    let req = httpTestingController.expectOne(
      `${environment.apiBaseUrl}documents/bulk_edit/`
    )
    expect(req.request.body).toEqual({
-      documents: [10],
+      documents: [doc.id],
      method: 'edit_pdf',
      parameters: {
        operations: [{ page: 1, rotate: 0, doc: 0 }],
        delete_original: false,
        update_document: false,
        include_metadata: true,
-        source_mode: 'explicit_selection',
      },
    })
    req.error(new ErrorEvent('failed'))
@@ -1499,7 +1496,6 @@ describe('DocumentDetailComponent', () => {
    let modal: NgbModalRef
    modalService.activeInstances.subscribe((m) => (modal = m[0]))
    initNormally()
-    component.selectedVersionId = 10
    component.password = 'secret'
    component.removePassword()
    const dialog =
@@ -1512,14 +1508,13 @@ describe('DocumentDetailComponent', () => {
      `${environment.apiBaseUrl}documents/bulk_edit/`
    )
    expect(req.request.body).toEqual({
-      documents: [10],
+      documents: [doc.id],
      method: 'remove_password',
      parameters: {
        password: 'secret',
        update_document: false,
        include_metadata: false,
        delete_original: true,
-        source_mode: 'explicit_selection',
      },
    })
    req.flush(true)
--- a/src-ui/src/app/components/document-detail/document-detail.component.ts
+++ b/src-ui/src/app/components/document-detail/document-detail.component.ts
@@ -73,10 +73,7 @@ import {
 import { CorrespondentService } from 'src/app/services/rest/correspondent.service'
 import { CustomFieldsService } from 'src/app/services/rest/custom-fields.service'
 import { DocumentTypeService } from 'src/app/services/rest/document-type.service'
-import {
-  BulkEditSourceMode,
-  DocumentService,
-} from 'src/app/services/rest/document.service'
+import { DocumentService } from 'src/app/services/rest/document.service'
 import { SavedViewService } from 'src/app/services/rest/saved-view.service'
 import { StoragePathService } from 'src/app/services/rest/storage-path.service'
 import { TagService } from 'src/app/services/rest/tag.service'
@@ -1629,23 +1626,20 @@ export class DocumentDetailComponent
      size: 'xl',
      scrollable: true,
    })
-    const sourceDocumentId = this.selectedVersionId ?? this.document.id
    modal.componentInstance.title = $localize`PDF Editor`
    modal.componentInstance.btnCaption = $localize`Proceed`
    modal.componentInstance.documentID = this.document.id
-    modal.componentInstance.versionID = sourceDocumentId
    modal.componentInstance.confirmClicked
      .pipe(takeUntil(this.unsubscribeNotifier))
      .subscribe(() => {
        modal.componentInstance.buttonsEnabled = false
        this.documentsService
-          .bulkEdit([sourceDocumentId], 'edit_pdf', {
+          .bulkEdit([this.document.id], 'edit_pdf', {
            operations: modal.componentInstance.getOperations(),
            delete_original: modal.componentInstance.deleteOriginal,
            update_document:
              modal.componentInstance.editMode == PdfEditorEditMode.Update,
            include_metadata: modal.componentInstance.includeMetadata,
-            source_mode: BulkEditSourceMode.EXPLICIT_SELECTION,
          })
          .pipe(first(), takeUntil(this.unsubscribeNotifier))
          .subscribe({
@@ -1691,18 +1685,16 @@ export class DocumentDetailComponent
    modal.componentInstance.confirmClicked
      .pipe(takeUntil(this.unsubscribeNotifier))
      .subscribe(() => {
-        const sourceDocumentId = this.selectedVersionId ?? this.document.id
        const dialog =
          modal.componentInstance as PasswordRemovalConfirmDialogComponent
        dialog.buttonsEnabled = false
        this.networkActive = true
        this.documentsService
-          .bulkEdit([sourceDocumentId], 'remove_password', {
+          .bulkEdit([this.document.id], 'remove_password', {
            password: this.password,
            update_document: dialog.updateDocument,
            include_metadata: dialog.includeMetadata,
            delete_original: dialog.deleteOriginal,
-            source_mode: BulkEditSourceMode.EXPLICIT_SELECTION,
          })
          .pipe(first(), takeUntil(this.unsubscribeNotifier))
          .subscribe({
--- a/src-ui/src/app/components/document-list/bulk-editor/bulk-editor.component.ts
+++ b/src-ui/src/app/components/document-list/bulk-editor/bulk-editor.component.ts
@@ -830,7 +830,7 @@ export class BulkEditorComponent
    })
    const rotateDialog = modal.componentInstance as RotateConfirmDialogComponent
    rotateDialog.title = $localize`Rotate confirm`
-    rotateDialog.messageBold = $localize`This operation will add rotated versions of the ${this.list.selected.size} document(s).`
+    rotateDialog.messageBold = $localize`This operation will permanently rotate the original version of ${this.list.selected.size} document(s).`
    rotateDialog.btnClass = 'btn-danger'
    rotateDialog.btnCaption = $localize`Proceed`
    rotateDialog.documentID = Array.from(this.list.selected)[0]
--- a/src-ui/src/app/services/rest/document.service.ts
+++ b/src-ui/src/app/services/rest/document.service.ts
@@ -37,11 +37,6 @@ export interface SelectionData {
  selected_custom_fields: SelectionDataItem[]
 }

-export enum BulkEditSourceMode {
-  LATEST_VERSION = 'latest_version',
-  EXPLICIT_SELECTION = 'explicit_selection',
-}
-
@Injectable({
  providedIn: 'root',
 })
--- a/src/documents/bulk_edit.py
+++ b/src/documents/bulk_edit.py
@@ -29,21 +29,12 @@ from documents.plugins.helpers import DocumentsStatusManager
 from documents.tasks import bulk_update_documents
 from documents.tasks import consume_file
 from documents.tasks import update_document_content_maybe_archive_file
-from documents.versioning import get_latest_version_for_root
-from documents.versioning import get_root_document

 if TYPE_CHECKING:
    from django.contrib.auth.models import User

 logger: logging.Logger = logging.getLogger("paperless.bulk_edit")

-SourceMode = Literal["latest_version", "explicit_selection"]
-
-
-class SourceModeChoices:
-    LATEST_VERSION: SourceMode = "latest_version"
-    EXPLICIT_SELECTION: SourceMode = "explicit_selection"
-

@shared_task(bind=True)
 def restore_archive_serial_numbers_task(
@@ -81,21 +72,46 @@ def restore_archive_serial_numbers(backup: dict[int, int | None]) -> None:
    logger.info(f"Restored archive serial numbers for documents {list(backup.keys())}")


-def _resolve_root_and_source_doc(
-    doc: Document,
-    *,
-    source_mode: SourceMode = SourceModeChoices.LATEST_VERSION,
-) -> tuple[Document, Document]:
-    root_doc = get_root_document(doc)
+def _get_root_ids_by_doc_id(doc_ids: list[int]) -> dict[int, int]:
+    """
+    Resolve each provided document id to its root document id.

-    if source_mode == SourceModeChoices.EXPLICIT_SELECTION:
-        return root_doc, doc
+    - If the id is already a root document: root id is itself.
+    - If the id is a version document: root id is its `root_document_id`.
+    """
+    qs = Document.objects.filter(id__in=doc_ids).only("id", "root_document_id")
+    return {doc.id: doc.root_document_id or doc.id for doc in qs}

-    # Version IDs are explicit by default, only a selected root resolves to latest
-    if doc.root_document_id is not None:
-        return root_doc, doc

-    return root_doc, get_latest_version_for_root(root_doc)
+def _get_root_and_current_docs_by_root_id(
+    root_ids: set[int],
+) -> tuple[dict[int, Document], dict[int, Document]]:
+    """
+    Returns:
+      - root_docs: root_id -> root Document
+      - current_docs: root_id -> newest version Document (or root if none)
+    """
+    root_docs = {
+        doc.id: doc
+        for doc in Document.objects.filter(id__in=root_ids).select_related(
+            "owner",
+        )
+    }
+    latest_versions_by_root_id: dict[int, Document] = {}
+    for version_doc in Document.objects.filter(root_document_id__in=root_ids).order_by(
+        "root_document_id",
+        "-id",
+    ):
+        root_id = version_doc.root_document_id
+        if root_id is None:
+            continue
+        latest_versions_by_root_id.setdefault(root_id, version_doc)
+
+    current_docs: dict[int, Document] = {
+        root_id: latest_versions_by_root_id.get(root_id, root_docs[root_id])
+        for root_id in root_docs
+    }
+    return root_docs, current_docs


 def set_correspondent(
@@ -405,32 +421,21 @@ def rotate(
    doc_ids: list[int],
    degrees: int,
    *,
-    source_mode: SourceMode = SourceModeChoices.LATEST_VERSION,
    user: User | None = None,
 ) -> Literal["OK"]:
    logger.info(
        f"Attempting to rotate {len(doc_ids)} documents by {degrees} degrees.",
    )
-    docs_by_id = {
-        doc.id: doc
-        for doc in Document.objects.select_related("root_document").filter(
-            id__in=doc_ids,
-        )
-    }
-    docs_by_root_id: dict[int, tuple[Document, Document]] = {}
-    for doc_id in doc_ids:
-        doc = docs_by_id.get(doc_id)
-        if doc is None:
-            continue
-        root_doc, source_doc = _resolve_root_and_source_doc(
-            doc,
-            source_mode=source_mode,
-        )
-        docs_by_root_id.setdefault(root_doc.id, (root_doc, source_doc))
-
+    doc_to_root_id = _get_root_ids_by_doc_id(doc_ids)
+    root_ids = set(doc_to_root_id.values())
+    root_docs_by_id, current_docs_by_root_id = _get_root_and_current_docs_by_root_id(
+        root_ids,
+    )
    import pikepdf

-    for root_doc, source_doc in docs_by_root_id.values():
+    for root_id in root_ids:
+        root_doc = root_docs_by_id[root_id]
+        source_doc = current_docs_by_root_id[root_id]
        if source_doc.mime_type != "application/pdf":
            logger.warning(
                f"Document {root_doc.id} is not a PDF, skipping rotation.",
@@ -654,17 +659,25 @@ def delete_pages(
    doc_ids: list[int],
    pages: list[int],
    *,
-    source_mode: SourceMode = SourceModeChoices.LATEST_VERSION,
    user: User | None = None,
 ) -> Literal["OK"]:
    logger.info(
        f"Attempting to delete pages {pages} from {len(doc_ids)} documents",
    )
    doc = Document.objects.select_related("root_document").get(id=doc_ids[0])
-    root_doc, source_doc = _resolve_root_and_source_doc(
-        doc,
-        source_mode=source_mode,
+    root_doc: Document
+    if doc.root_document_id is None or doc.root_document is None:
+        root_doc = doc
+    else:
+        root_doc = doc.root_document
+
+    source_doc = (
+        Document.objects.filter(Q(id=root_doc.id) | Q(root_document=root_doc))
+        .order_by("-id")
+        .first()
    )
+    if source_doc is None:
+        source_doc = root_doc
    pages = sorted(pages)  # sort pages to avoid index issues
    import pikepdf

@@ -709,7 +722,6 @@ def edit_pdf(
    delete_original: bool = False,
    update_document: bool = False,
    include_metadata: bool = True,
-    source_mode: SourceMode = SourceModeChoices.LATEST_VERSION,
    user: User | None = None,
 ) -> Literal["OK"]:
    """
@@ -724,10 +736,19 @@ def edit_pdf(
        f"Editing PDF of document {doc_ids[0]} with {len(operations)} operations",
    )
    doc = Document.objects.select_related("root_document").get(id=doc_ids[0])
-    root_doc, source_doc = _resolve_root_and_source_doc(
-        doc,
-        source_mode=source_mode,
+    root_doc: Document
+    if doc.root_document_id is None or doc.root_document is None:
+        root_doc = doc
+    else:
+        root_doc = doc.root_document
+
+    source_doc = (
+        Document.objects.filter(Q(id=root_doc.id) | Q(root_document=root_doc))
+        .order_by("-id")
+        .first()
    )
+    if source_doc is None:
+        source_doc = root_doc
    import pikepdf

    pdf_docs: list[pikepdf.Pdf] = []
@@ -838,7 +859,6 @@ def remove_password(
    update_document: bool = False,
    delete_original: bool = False,
    include_metadata: bool = True,
-    source_mode: SourceMode = SourceModeChoices.LATEST_VERSION,
    user: User | None = None,
 ) -> Literal["OK"]:
    """
@@ -848,10 +868,19 @@ def remove_password(

    for doc_id in doc_ids:
        doc = Document.objects.select_related("root_document").get(id=doc_id)
-        root_doc, source_doc = _resolve_root_and_source_doc(
-            doc,
-            source_mode=source_mode,
+        root_doc: Document
+        if doc.root_document_id is None or doc.root_document is None:
+            root_doc = doc
+        else:
+            root_doc = doc.root_document
+
+        source_doc = (
+            Document.objects.filter(Q(id=root_doc.id) | Q(root_document=root_doc))
+            .order_by("-id")
+            .first()
        )
+        if source_doc is None:
+            source_doc = root_doc
        try:
            logger.info(
                f"Attempting password removal from document {doc_ids[0]}",
--- a/src/documents/management/commands/base.py
+++ b/src/documents/management/commands/base.py
@@ -6,14 +6,11 @@ Provides automatic progress bar and multiprocessing support with minimal boilerp

 from __future__ import annotations

-import logging
 import os
-from collections.abc import Callable
 from collections.abc import Iterable
 from collections.abc import Sized
 from concurrent.futures import ProcessPoolExecutor
 from concurrent.futures import as_completed
-from contextlib import contextmanager
 from dataclasses import dataclass
 from typing import TYPE_CHECKING
 from typing import Any
@@ -25,11 +22,7 @@ from django import db
 from django.core.management import CommandError
 from django.db.models import QuerySet
 from django_rich.management import RichCommand
-from rich import box
 from rich.console import Console
-from rich.console import Group
-from rich.console import RenderableType
-from rich.live import Live
 from rich.progress import BarColumn
 from rich.progress import MofNCompleteColumn
 from rich.progress import Progress
@@ -37,11 +30,11 @@ from rich.progress import SpinnerColumn
 from rich.progress import TextColumn
 from rich.progress import TimeElapsedColumn
 from rich.progress import TimeRemainingColumn
-from rich.table import Table
-from rich.text import Text

 if TYPE_CHECKING:
+    from collections.abc import Callable
    from collections.abc import Generator
+    from collections.abc import Iterable
    from collections.abc import Sequence

    from django.core.management import CommandParser
@@ -50,78 +43,6 @@ T = TypeVar("T")
 R = TypeVar("R")


-@dataclass(slots=True, frozen=True)
-class _BufferedRecord:
-    level: int
-    name: str
-    message: str
-
-
-class BufferingLogHandler(logging.Handler):
-    """Captures log records during a command run for deferred rendering.
-
-    Attach to a logger before a long operation and call ``render()``
-    afterwards to emit the buffered records via Rich, optionally filtered
-    by minimum level.
-    """
-
-    def __init__(self) -> None:
-        super().__init__()
-        self._records: list[_BufferedRecord] = []
-
-    def emit(self, record: logging.LogRecord) -> None:
-        self._records.append(
-            _BufferedRecord(
-                level=record.levelno,
-                name=record.name,
-                message=self.format(record),
-            ),
-        )
-
-    def render(
-        self,
-        console: Console,
-        *,
-        min_level: int = logging.DEBUG,
-        title: str = "Log Output",
-    ) -> None:
-        records = [r for r in self._records if r.level >= min_level]
-        if not records:
-            return
-
-        table = Table(
-            title=title,
-            show_header=True,
-            header_style="bold",
-            show_lines=False,
-            box=box.SIMPLE,
-        )
-        table.add_column("Level", style="bold", width=8)
-        table.add_column("Logger", style="dim")
-        table.add_column("Message", no_wrap=False)
-
-        _level_styles: dict[int, str] = {
-            logging.DEBUG: "dim",
-            logging.INFO: "cyan",
-            logging.WARNING: "yellow",
-            logging.ERROR: "red",
-            logging.CRITICAL: "bold red",
-        }
-
-        for record in records:
-            style = _level_styles.get(record.level, "")
-            table.add_row(
-                Text(logging.getLevelName(record.level), style=style),
-                record.name,
-                record.message,
-            )
-
-        console.print(table)
-
-    def clear(self) -> None:
-        self._records.clear()
-
-
@dataclass(frozen=True, slots=True)
 class ProcessResult(Generic[T, R]):
    """
@@ -170,23 +91,6 @@ class PaperlessCommand(RichCommand):
                for result in self.process_parallel(process_doc, ids):
                    if result.error:
                        self.console.print(f"[red]Failed: {result.error}[/red]")
-
-        class Command(PaperlessCommand):
-            help = "Import documents with live stats"
-
-            def handle(self, *args, **options):
-                stats = ImportStats()
-
-                def render_stats() -> Table:
-                    ...  # build Rich Table from stats
-
-                for item in self.track_with_stats(
-                    items,
-                    description="Importing...",
-                    stats_renderer=render_stats,
-                ):
-                    result = import_item(item)
-                    stats.imported += 1
    """

    supports_progress_bar: ClassVar[bool] = True
@@ -224,11 +128,13 @@ class PaperlessCommand(RichCommand):
        This is called by Django's command infrastructure after argument parsing
        but before handle(). We use it to set instance attributes from options.
        """
+        # Set progress bar state
        if self.supports_progress_bar:
            self.no_progress_bar = options.get("no_progress_bar", False)
        else:
            self.no_progress_bar = True

+        # Set multiprocessing state
        if self.supports_multiprocessing:
            self.process_count = options.get("processes", 1)
            if self.process_count < 1:
@@ -238,69 +144,9 @@ class PaperlessCommand(RichCommand):

        return super().execute(*args, **options)

-    @contextmanager
-    def buffered_logging(
-        self,
-        *logger_names: str,
-        level: int = logging.DEBUG,
-    ) -> Generator[BufferingLogHandler, None, None]:
-        """Context manager that captures log output from named loggers.
-
-        Installs a ``BufferingLogHandler`` on each named logger for the
-        duration of the block, suppressing propagation to avoid interleaving
-        with the Rich live display. The handler is removed on exit regardless
-        of whether an exception occurred.
-
-        Usage::
-
-            with self.buffered_logging("paperless", "documents") as log_buf:
-                # ... run progress loop ...
-            if options["verbose"]:
-                log_buf.render(self.console)
-        """
-        handler = BufferingLogHandler()
-        handler.setFormatter(logging.Formatter("%(message)s"))
-
-        loggers: list[logging.Logger] = []
-        original_propagate: dict[str, bool] = {}
-
-        for name in logger_names:
-            log = logging.getLogger(name)
-            log.addHandler(handler)
-            original_propagate[name] = log.propagate
-            log.propagate = False
-            loggers.append(log)
-
-        try:
-            yield handler
-        finally:
-            for log in loggers:
-                log.removeHandler(handler)
-                log.propagate = original_propagate[log.name]
-
-    @staticmethod
-    def _progress_columns() -> tuple[Any, ...]:
-        """
-        Return the standard set of progress bar columns.
-
-        Extracted so both _create_progress (standalone) and track_with_stats
-        (inside Live) use identical column configuration without duplication.
-        """
-        return (
-            SpinnerColumn(),
-            TextColumn("[progress.description]{task.description}"),
-            BarColumn(),
-            MofNCompleteColumn(),
-            TimeElapsedColumn(),
-            TimeRemainingColumn(),
-        )
-
    def _create_progress(self, description: str) -> Progress:
        """
-        Create a standalone Progress instance with its own stderr Console.
-
-        Use this for track(). For track_with_stats(), Progress is created
-        directly inside a Live context instead.
+        Create a configured Progress instance.

        Progress output is directed to stderr to match the convention that
        progress bars are transient UI feedback, not command output. This
@@ -315,7 +161,12 @@ class PaperlessCommand(RichCommand):
            A Progress instance configured with appropriate columns.
        """
        return Progress(
-            *self._progress_columns(),
+            SpinnerColumn(),
+            TextColumn("[progress.description]{task.description}"),
+            BarColumn(),
+            MofNCompleteColumn(),
+            TimeElapsedColumn(),
+            TimeRemainingColumn(),
            console=Console(stderr=True),
            transient=False,
        )
@@ -371,6 +222,7 @@ class PaperlessCommand(RichCommand):
            yield from iterable
            return

+        # Attempt to determine total if not provided
        if total is None:
            total = self._get_iterable_length(iterable)

@@ -380,87 +232,6 @@ class PaperlessCommand(RichCommand):
                yield item
                progress.advance(task_id)

-    def track_with_stats(
-        self,
-        iterable: Iterable[T],
-        *,
-        description: str = "Processing...",
-        stats_renderer: Callable[[], RenderableType],
-        total: int | None = None,
-    ) -> Generator[T, None, None]:
-        """
-        Iterate over items with a progress bar and a live-updating stats display.
-
-        The progress bar and stats renderable are combined in a single Live
-        context, so the stats panel re-renders in place below the progress bar
-        after each item is processed.
-
-        Respects --no-progress-bar flag. When disabled, yields items without
-        any display (stats are still updated by the caller's loop body, so
-        they will be accurate for any post-loop summary the caller prints).
-
-        Args:
-            iterable: The items to iterate over.
-            description: Text to display alongside the progress bar.
-            stats_renderer: Zero-argument callable that returns a Rich
-                renderable. Called after each item to refresh the display.
-                The caller typically closes over a mutable dataclass and
-                rebuilds a Table from it on each call.
-            total: Total number of items. If None, attempts to determine
-                automatically via .count() (for querysets) or len().
-
-        Yields:
-            Items from the iterable.
-
-        Example:
-            @dataclass
-            class Stats:
-                processed: int = 0
-                failed: int = 0
-
-            stats = Stats()
-
-            def render_stats() -> Table:
-                table = Table(box=None)
-                table.add_column("Processed")
-                table.add_column("Failed")
-                table.add_row(str(stats.processed), str(stats.failed))
-                return table
-
-            for item in self.track_with_stats(
-                items,
-                description="Importing...",
-                stats_renderer=render_stats,
-            ):
-                try:
-                    import_item(item)
-                    stats.processed += 1
-                except Exception:
-                    stats.failed += 1
-        """
-        if self.no_progress_bar:
-            yield from iterable
-            return
-
-        if total is None:
-            total = self._get_iterable_length(iterable)
-
-        stderr_console = Console(stderr=True)
-
-        # Progress is created without its own console so Live controls rendering.
-        progress = Progress(*self._progress_columns())
-        task_id = progress.add_task(description, total=total)
-
-        with Live(
-            Group(progress, stats_renderer()),
-            console=stderr_console,
-            refresh_per_second=4,
-        ) as live:
-            for item in iterable:
-                yield item
-                progress.advance(task_id)
-                live.update(Group(progress, stats_renderer()))
-
    def process_parallel(
        self,
        fn: Callable[[T], R],
@@ -498,7 +269,7 @@ class PaperlessCommand(RichCommand):
        total = len(items)

        if self.process_count == 1:
-            # Sequential execution in main process - critical for testing, so we don't fork in fork, etc
+            # Sequential execution in main process - critical for testing
            yield from self._process_sequential(fn, items, description, total)
        else:
            # Parallel execution with ProcessPoolExecutor
@@ -527,7 +298,6 @@ class PaperlessCommand(RichCommand):
        total: int,
    ) -> Generator[ProcessResult[T, R], None, None]:
        """Process items in parallel using ProcessPoolExecutor."""
-
        # Close database connections before forking - required for PostgreSQL
        db.connections.close_all()

--- a/src/documents/management/commands/document_index.py
+++ b/src/documents/management/commands/document_index.py
@@ -1,25 +1,22 @@
+from django.core.management import BaseCommand
 from django.db import transaction

-from documents.management.commands.base import PaperlessCommand
+from documents.management.commands.mixins import ProgressBarMixin
 from documents.tasks import index_optimize
 from documents.tasks import index_reindex


-class Command(PaperlessCommand):
+class Command(ProgressBarMixin, BaseCommand):
    help = "Manages the document index."

    def add_arguments(self, parser):
-        super().add_arguments(parser)
        parser.add_argument("command", choices=["reindex", "optimize"])
+        self.add_argument_progress_bar_mixin(parser)

    def handle(self, *args, **options):
+        self.handle_progress_bar_mixin(**options)
        with transaction.atomic():
            if options["command"] == "reindex":
-                index_reindex(
-                    iter_wrapper=lambda docs: self.track(
-                        docs,
-                        description="Indexing documents...",
-                    ),
-                )
+                index_reindex(progress_bar_disable=self.no_progress_bar)
            elif options["command"] == "optimize":
                index_optimize()
--- a/src/documents/management/commands/document_llmindex.py
+++ b/src/documents/management/commands/document_llmindex.py
@@ -1,22 +1,22 @@
-from typing import Any
+from django.core.management import BaseCommand
+from django.db import transaction

-from documents.management.commands.base import PaperlessCommand
+from documents.management.commands.mixins import ProgressBarMixin
 from documents.tasks import llmindex_index


-class Command(PaperlessCommand):
+class Command(ProgressBarMixin, BaseCommand):
    help = "Manages the LLM-based vector index for Paperless."

-    def add_arguments(self, parser: Any) -> None:
-        super().add_arguments(parser)
+    def add_arguments(self, parser):
        parser.add_argument("command", choices=["rebuild", "update"])
+        self.add_argument_progress_bar_mixin(parser)

-    def handle(self, *args: Any, **options: Any) -> None:
-        llmindex_index(
-            rebuild=options["command"] == "rebuild",
-            scheduled=False,
-            iter_wrapper=lambda docs: self.track(
-                docs,
-                description="Indexing documents...",
-            ),
-        )
+    def handle(self, *args, **options):
+        self.handle_progress_bar_mixin(**options)
+        with transaction.atomic():
+            llmindex_index(
+                progress_bar_disable=self.no_progress_bar,
+                rebuild=options["command"] == "rebuild",
+                scheduled=False,
+            )
--- a/src/documents/management/commands/document_retagger.py
+++ b/src/documents/management/commands/document_retagger.py
@@ -1,12 +1,4 @@
-from __future__ import annotations
-
 import logging
-from dataclasses import dataclass
-from dataclasses import field
-from typing import TYPE_CHECKING
-
-from rich.table import Table
-from rich.text import Text

 from documents.classifier import load_classifier
 from documents.management.commands.base import PaperlessCommand
@@ -16,162 +8,9 @@ from documents.signals.handlers import set_document_type
 from documents.signals.handlers import set_storage_path
 from documents.signals.handlers import set_tags

-if TYPE_CHECKING:
-    from rich.console import RenderableType
-
-    from documents.models import Correspondent
-    from documents.models import DocumentType
-    from documents.models import StoragePath
-    from documents.models import Tag
-
 logger = logging.getLogger("paperless.management.retagger")


-@dataclass(slots=True)
-class RetaggerStats:
-    """Cumulative counters updated as the retagger processes documents.
-
-    Mutable by design -- fields are incremented in the processing loop.
-    slots=True reduces per-instance memory overhead and speeds attribute access.
-    """
-
-    correspondents: int = 0
-    document_types: int = 0
-    tags_added: int = 0
-    tags_removed: int = 0
-    storage_paths: int = 0
-    documents_processed: int = 0
-
-
-@dataclass(slots=True)
-class DocumentSuggestion:
-    """Buffered classifier suggestions for a single document (suggest mode only).
-
-    Mutable by design -- fields are assigned incrementally as each setter runs.
-    """
-
-    document: Document
-    correspondent: Correspondent | None = None
-    document_type: DocumentType | None = None
-    tags_to_add: frozenset[Tag] = field(default_factory=frozenset)
-    tags_to_remove: frozenset[Tag] = field(default_factory=frozenset)
-    storage_path: StoragePath | None = None
-
-    @property
-    def has_suggestions(self) -> bool:
-        return bool(
-            self.correspondent is not None
-            or self.document_type is not None
-            or self.tags_to_add
-            or self.tags_to_remove
-            or self.storage_path is not None,
-        )
-
-
-def _build_stats_table(stats: RetaggerStats, *, suggest: bool) -> Table:
-    """
-    Build the live-updating stats table shown below the progress bar.
-
-    In suggest mode the labels read "would set / would add" to make clear
-    that nothing has been written to the database.
-    """
-    table = Table(box=None, padding=(0, 2), show_header=True, header_style="bold")
-
-    table.add_column("Documents")
-    table.add_column("Correspondents")
-    table.add_column("Doc Types")
-    table.add_column("Tags (+)")
-    table.add_column("Tags (-)")
-    table.add_column("Storage Paths")
-
-    verb = "would set" if suggest else "set"
-
-    table.add_row(
-        str(stats.documents_processed),
-        f"{stats.correspondents} {verb}",
-        f"{stats.document_types} {verb}",
-        f"+{stats.tags_added}",
-        f"-{stats.tags_removed}",
-        f"{stats.storage_paths} {verb}",
-    )
-
-    return table
-
-
-def _build_suggestion_table(
-    suggestions: list[DocumentSuggestion],
-    base_url: str | None,
-) -> Table:
-    """
-    Build the final suggestion table printed after the progress bar completes.
-
-    Only documents with at least one suggestion are included.
-    """
-    table = Table(
-        title="Suggested Changes",
-        show_header=True,
-        header_style="bold cyan",
-        show_lines=True,
-    )
-
-    table.add_column("Document", style="bold", no_wrap=False, min_width=20)
-    table.add_column("Correspondent")
-    table.add_column("Doc Type")
-    table.add_column("Tags")
-    table.add_column("Storage Path")
-
-    for suggestion in suggestions:
-        if not suggestion.has_suggestions:
-            continue
-
-        doc = suggestion.document
-
-        if base_url:
-            doc_cell = Text()
-            doc_cell.append(str(doc))
-            doc_cell.append(f"\n{base_url}/documents/{doc.pk}", style="dim")
-        else:
-            doc_cell = Text(f"{doc} [{doc.pk}]")
-
-        tag_parts: list[str] = []
-        for tag in sorted(suggestion.tags_to_add, key=lambda t: t.name):
-            tag_parts.append(f"[green]+{tag.name}[/green]")
-        for tag in sorted(suggestion.tags_to_remove, key=lambda t: t.name):
-            tag_parts.append(f"[red]-{tag.name}[/red]")
-        tag_cell = Text.from_markup(", ".join(tag_parts)) if tag_parts else Text("-")
-
-        table.add_row(
-            doc_cell,
-            str(suggestion.correspondent) if suggestion.correspondent else "-",
-            str(suggestion.document_type) if suggestion.document_type else "-",
-            tag_cell,
-            str(suggestion.storage_path) if suggestion.storage_path else "-",
-        )
-
-    return table
-
-
-def _build_summary_table(stats: RetaggerStats) -> Table:
-    """Build the final applied-changes summary table."""
-    table = Table(
-        title="Retagger Summary",
-        show_header=True,
-        header_style="bold cyan",
-    )
-
-    table.add_column("Metric", style="bold")
-    table.add_column("Count", justify="right")
-
-    table.add_row("Documents processed", str(stats.documents_processed))
-    table.add_row("Correspondents set", str(stats.correspondents))
-    table.add_row("Document types set", str(stats.document_types))
-    table.add_row("Tags added", str(stats.tags_added))
-    table.add_row("Tags removed", str(stats.tags_removed))
-    table.add_row("Storage paths set", str(stats.storage_paths))
-
-    return table
-
-
 class Command(PaperlessCommand):
    help = (
        "Using the current classification model, assigns correspondents, tags "
@@ -180,7 +19,7 @@ class Command(PaperlessCommand):
        "modified) after their initial import."
    )

-    def add_arguments(self, parser) -> None:
+    def add_arguments(self, parser):
        super().add_arguments(parser)
        parser.add_argument("-c", "--correspondent", default=False, action="store_true")
        parser.add_argument("-T", "--tags", default=False, action="store_true")
@@ -192,9 +31,9 @@ class Command(PaperlessCommand):
            default=False,
            action="store_true",
            help=(
-                "By default this command will not try to assign a correspondent "
-                "if more than one matches the document. Use this flag to pick "
-                "the first match instead."
+                "By default this command won't try to assign a correspondent "
+                "if more than one matches the document. Use this flag if "
+                "you'd rather it just pick the first one it finds."
            ),
        )
        parser.add_argument(
@@ -203,140 +42,91 @@ class Command(PaperlessCommand):
            default=False,
            action="store_true",
            help=(
-                "Overwrite any previously set correspondent, document type, and "
-                "remove tags that no longer match due to changed rules."
+                "If set, the document retagger will overwrite any previously "
+                "set correspondent, document and remove correspondents, types "
+                "and tags that do not match anymore due to changed rules."
            ),
        )
        parser.add_argument(
            "--suggest",
            default=False,
            action="store_true",
-            help="Show what would be changed without applying anything.",
+            help="Return the suggestion, don't change anything.",
        )
        parser.add_argument(
            "--base-url",
-            help="Base URL used to build document links in suggest output.",
+            help="The base URL to use to build the link to the documents.",
        )
        parser.add_argument(
            "--id-range",
-            help="Restrict retagging to documents within this ID range (inclusive).",
+            help="A range of document ids on which the retagging should be applied.",
            nargs=2,
            type=int,
        )

-    def handle(self, *args, **options) -> None:
-        suggest: bool = options["suggest"]
-        overwrite: bool = options["overwrite"]
-        use_first: bool = options["use_first"]
-        base_url: str | None = options["base_url"]
-
-        do_correspondent: bool = options["correspondent"]
-        do_document_type: bool = options["document_type"]
-        do_tags: bool = options["tags"]
-        do_storage_path: bool = options["storage_path"]
-
-        if not any([do_correspondent, do_document_type, do_tags, do_storage_path]):
-            self.console.print(
-                "[yellow]No classifier targets specified. "
-                "Use -c, -T, -t, or -s to select what to retag.[/yellow]",
-            )
-            return
-
+    def handle(self, *args, **options):
        if options["inbox_only"]:
            queryset = Document.objects.filter(tags__is_inbox_tag=True)
        else:
            queryset = Document.objects.all()

        if options["id_range"]:
-            lo, hi = options["id_range"]
-            queryset = queryset.filter(id__range=(lo, hi))
+            queryset = queryset.filter(
+                id__range=(options["id_range"][0], options["id_range"][1]),
+            )

        documents = queryset.distinct()
+
        classifier = load_classifier()

-        stats = RetaggerStats()
-        suggestions: list[DocumentSuggestion] = []
+        for document in self.track(documents, description="Retagging..."):
+            if options["correspondent"]:
+                set_correspondent(
+                    sender=None,
+                    document=document,
+                    classifier=classifier,
+                    replace=options["overwrite"],
+                    use_first=options["use_first"],
+                    suggest=options["suggest"],
+                    base_url=options["base_url"],
+                    stdout=self.stdout,
+                    style_func=self.style,
+                )

-        def render_stats() -> RenderableType:
-            return _build_stats_table(stats, suggest=suggest)
+            if options["document_type"]:
+                set_document_type(
+                    sender=None,
+                    document=document,
+                    classifier=classifier,
+                    replace=options["overwrite"],
+                    use_first=options["use_first"],
+                    suggest=options["suggest"],
+                    base_url=options["base_url"],
+                    stdout=self.stdout,
+                    style_func=self.style,
+                )

-        with self.buffered_logging(
-            "paperless",
-            "paperless.handlers",
-            "documents",
-        ) as log_buf:
-            for document in self.track_with_stats(
-                documents,
-                description="Retagging...",
-                stats_renderer=render_stats,
-            ):
-                suggestion = DocumentSuggestion(document=document)
+            if options["tags"]:
+                set_tags(
+                    sender=None,
+                    document=document,
+                    classifier=classifier,
+                    replace=options["overwrite"],
+                    suggest=options["suggest"],
+                    base_url=options["base_url"],
+                    stdout=self.stdout,
+                    style_func=self.style,
+                )

-                if do_correspondent:
-                    correspondent = set_correspondent(
-                        None,
-                        document,
-                        classifier=classifier,
-                        replace=overwrite,
-                        use_first=use_first,
-                        dry_run=suggest,
-                    )
-                    if correspondent is not None:
-                        stats.correspondents += 1
-                        suggestion.correspondent = correspondent
-
-                if do_document_type:
-                    document_type = set_document_type(
-                        None,
-                        document,
-                        classifier=classifier,
-                        replace=overwrite,
-                        use_first=use_first,
-                        dry_run=suggest,
-                    )
-                    if document_type is not None:
-                        stats.document_types += 1
-                        suggestion.document_type = document_type
-
-                if do_tags:
-                    tags_to_add, tags_to_remove = set_tags(
-                        None,
-                        document,
-                        classifier=classifier,
-                        replace=overwrite,
-                        dry_run=suggest,
-                    )
-                    stats.tags_added += len(tags_to_add)
-                    stats.tags_removed += len(tags_to_remove)
-                    suggestion.tags_to_add = frozenset(tags_to_add)
-                    suggestion.tags_to_remove = frozenset(tags_to_remove)
-
-                if do_storage_path:
-                    storage_path = set_storage_path(
-                        None,
-                        document,
-                        classifier=classifier,
-                        replace=overwrite,
-                        use_first=use_first,
-                        dry_run=suggest,
-                    )
-                    if storage_path is not None:
-                        stats.storage_paths += 1
-                        suggestion.storage_path = storage_path
-
-                stats.documents_processed += 1
-
-                if suggest:
-                    suggestions.append(suggestion)
-
-        # Post-loop output
-        if suggest:
-            visible = [s for s in suggestions if s.has_suggestions]
-            if visible:
-                self.console.print(_build_suggestion_table(visible, base_url))
-            else:
-                self.console.print("[green]No changes suggested.[/green]")
-        else:
-            self.console.print(_build_summary_table(stats))
-
-        log_buf.render(self.console, min_level=logging.INFO, title="Retagger Log")
+            if options["storage_path"]:
+                set_storage_path(
+                    sender=None,
+                    document=document,
+                    classifier=classifier,
+                    replace=options["overwrite"],
+                    use_first=options["use_first"],
+                    suggest=options["suggest"],
+                    base_url=options["base_url"],
+                    stdout=self.stdout,
+                    style_func=self.style,
+                )
--- a/src/documents/management/commands/document_sanity_checker.py
+++ b/src/documents/management/commands/document_sanity_checker.py
@@ -1,117 +1,17 @@
-"""Management command to check the document archive for issues."""
+from django.core.management.base import BaseCommand

-from __future__ import annotations
-
-import logging
-from typing import Any
-
-from rich.panel import Panel
-from rich.table import Table
-from rich.text import Text
-
-from documents.management.commands.base import PaperlessCommand
-from documents.models import Document
-from documents.sanity_checker import SanityCheckMessages
+from documents.management.commands.mixins import ProgressBarMixin
 from documents.sanity_checker import check_sanity

-_LEVEL_STYLE: dict[int, tuple[str, str]] = {
-    logging.ERROR: ("bold red", "ERROR"),
-    logging.WARNING: ("yellow", "WARN"),
-    logging.INFO: ("dim", "INFO"),
-}

-
-class Command(PaperlessCommand):
+class Command(ProgressBarMixin, BaseCommand):
    help = "This command checks your document archive for issues."

-    def _render_results(self, messages: SanityCheckMessages) -> None:
-        """Render sanity check results as a Rich table."""
+    def add_arguments(self, parser):
+        self.add_argument_progress_bar_mixin(parser)

-        if (
-            not messages.has_error
-            and not messages.has_warning
-            and not messages.has_info
-        ):
-            self.console.print(
-                Panel(
-                    "[green]No issues detected.[/green]",
-                    title="Sanity Check",
-                    border_style="green",
-                ),
-            )
-            return
+    def handle(self, *args, **options):
+        self.handle_progress_bar_mixin(**options)
+        messages = check_sanity(progress=self.use_progress_bar, scheduled=False)

-        # Build a lookup for document titles
-        doc_pks = [pk for pk in messages.document_pks() if pk is not None]
-        titles: dict[int, str] = {}
-        if doc_pks:
-            titles = dict(
-                Document.global_objects.filter(pk__in=doc_pks)
-                .only("pk", "title")
-                .values_list("pk", "title"),
-            )
-
-        table = Table(
-            title="Sanity Check Results",
-            show_lines=True,
-            title_style="bold",
-        )
-        table.add_column("Level", width=7, no_wrap=True)
-        table.add_column("Document", min_width=20)
-        table.add_column("Issue", ratio=1)
-
-        for doc_pk, doc_messages in messages.iter_messages():
-            if doc_pk is not None:
-                title = titles.get(doc_pk, "Unknown")
-                doc_label = f"#{doc_pk} {title}"
-            else:
-                doc_label = "(global)"
-
-            for msg in doc_messages:
-                style, label = _LEVEL_STYLE.get(
-                    msg["level"],
-                    ("dim", "INFO"),
-                )
-                table.add_row(
-                    Text(label, style=style),
-                    Text(doc_label),
-                    Text(str(msg["message"])),
-                )
-
-        self.console.print(table)
-
-        parts: list[str] = []
-
-        if messages.document_error_count:
-            parts.append(
-                f"{messages.document_error_count} document(s) with [bold red]errors[/bold red]",
-            )
-        if messages.document_warning_count:
-            parts.append(
-                f"{messages.document_warning_count} document(s) with [yellow]warnings[/yellow]",
-            )
-        if messages.document_info_count:
-            parts.append(f"{messages.document_info_count} document(s) with infos")
-        if messages.global_warning_count:
-            parts.append(
-                f"{messages.global_warning_count} global [yellow]warning(s)[/yellow]",
-            )
-
-        if parts:
-            if len(parts) > 1:
-                summary = ", ".join(parts[:-1]) + " and " + parts[-1]
-            else:
-                summary = parts[0]
-            self.console.print(f"\nFound {summary}.")
-        else:
-            self.console.print("\nNo issues found.")
-
-    def handle(self, *args: Any, **options: Any) -> None:
-        messages = check_sanity(
-            scheduled=False,
-            iter_wrapper=lambda docs: self.track(
-                docs,
-                description="Checking documents...",
-            ),
-        )
-        self._render_results(messages)
+        messages.log_messages()
--- a/src/documents/models.py
+++ b/src/documents/models.py
@@ -75,7 +75,7 @@ class MatchingModel(ModelWithOwner):

    is_insensitive = models.BooleanField(_("is insensitive"), default=True)

-    class Meta(ModelWithOwner.Meta):
+    class Meta:
        abstract = True
        ordering = ("name",)
        constraints = [
--- a/src/documents/sanity_checker.py
+++ b/src/documents/sanity_checker.py
@@ -1,174 +1,80 @@
-"""
-Sanity checker for the Paperless-ngx document archive.
-
-Verifies that all documents have valid files, correct checksums,
-and consistent metadata. Reports orphaned files in the media directory.
-
-Progress display is the caller's responsibility -- pass an ``iter_wrapper``
-to wrap the document queryset (e.g., with a progress bar). The default
-is an identity function that adds no overhead.
-"""
-
-from __future__ import annotations
-
 import hashlib
 import logging
 import uuid
 from collections import defaultdict
-from collections.abc import Callable
-from collections.abc import Iterable
-from collections.abc import Iterator
 from pathlib import Path
-from typing import TYPE_CHECKING
 from typing import Final
-from typing import TypedDict
-from typing import TypeVar

 from celery import states
 from django.conf import settings
 from django.utils import timezone
+from tqdm import tqdm

 from documents.models import Document
 from documents.models import PaperlessTask
 from paperless.config import GeneralConfig

-logger = logging.getLogger("paperless.sanity_checker")
-
-_T = TypeVar("_T")
-IterWrapper = Callable[[Iterable[_T]], Iterable[_T]]
-
-
-class MessageEntry(TypedDict):
-    """A single sanity check message with its severity level."""
-
-    level: int
-    message: str
-
-
-def _identity(iterable: Iterable[_T]) -> Iterable[_T]:
-    """Pass through an iterable unchanged (default iter_wrapper)."""
-    return iterable
-

 class SanityCheckMessages:
-    """Collects sanity check messages grouped by document primary key.
-
-    Messages are categorized as error, warning, or info. ``None`` is used
-    as the key for messages not associated with a specific document
-    (e.g., orphaned files).
-    """
-
    def __init__(self) -> None:
-        self._messages: dict[int | None, list[MessageEntry]] = defaultdict(list)
-        self.has_error: bool = False
-        self.has_warning: bool = False
-        self.has_info: bool = False
-        self.document_count: int = 0
-        self.document_error_count: int = 0
-        self.document_warning_count: int = 0
-        self.document_info_count: int = 0
-        self.global_warning_count: int = 0
+        self._messages: dict[int, list[dict]] = defaultdict(list)
+        self.has_error = False
+        self.has_warning = False

-    # -- Recording ----------------------------------------------------------
-
-    def error(self, doc_pk: int | None, message: str) -> None:
+    def error(self, doc_pk, message) -> None:
        self._messages[doc_pk].append({"level": logging.ERROR, "message": message})
        self.has_error = True
-        if doc_pk is not None:
-            self.document_count += 1
-            self.document_error_count += 1

-    def warning(self, doc_pk: int | None, message: str) -> None:
+    def warning(self, doc_pk, message) -> None:
        self._messages[doc_pk].append({"level": logging.WARNING, "message": message})
        self.has_warning = True

-        if doc_pk is not None:
-            self.document_count += 1
-            self.document_warning_count += 1
-        else:
-            # This is the only type of global message we do right now
-            self.global_warning_count += 1
-
-    def info(self, doc_pk: int | None, message: str) -> None:
+    def info(self, doc_pk, message) -> None:
        self._messages[doc_pk].append({"level": logging.INFO, "message": message})
-        self.has_info = True
-
-        if doc_pk is not None:
-            self.document_count += 1
-            self.document_info_count += 1
-
-    # -- Iteration / query --------------------------------------------------
-
-    def document_pks(self) -> list[int | None]:
-        """Return all document PKs (including None for global messages)."""
-        return list(self._messages.keys())
-
-    def iter_messages(self) -> Iterator[tuple[int | None, list[MessageEntry]]]:
-        """Iterate over (doc_pk, messages) pairs."""
-        yield from self._messages.items()
-
-    def __getitem__(self, item: int | None) -> list[MessageEntry]:
-        return self._messages[item]
-
-    # -- Summarize Helpers --------------------------------------------------
-
-    @property
-    def has_global_issues(self) -> bool:
-        return None in self._messages
-
-    @property
-    def total_issue_count(self) -> int:
-        """Total number of error and warning messages across all documents and global."""
-        return (
-            self.document_error_count
-            + self.document_warning_count
-            + self.global_warning_count
-        )
-
-    # -- Logging output (used by Celery task path) --------------------------

    def log_messages(self) -> None:
-        """Write all messages to the ``paperless.sanity_checker`` logger.
+        logger = logging.getLogger("paperless.sanity_checker")

-        This is the output path for headless / Celery execution.
-        Management commands use Rich rendering instead.
-        """
        if len(self._messages) == 0:
            logger.info("Sanity checker detected no issues.")
-            return
+        else:
+            # Query once
+            all_docs = Document.global_objects.all()

-        doc_pks = [pk for pk in self._messages if pk is not None]
-        titles: dict[int, str] = {}
-        if doc_pks:
-            titles = dict(
-                Document.global_objects.filter(pk__in=doc_pks)
-                .only("pk", "title")
-                .values_list("pk", "title"),
-            )
+            for doc_pk in self._messages:
+                if doc_pk is not None:
+                    doc = all_docs.get(pk=doc_pk)
+                    logger.info(
+                        f"Detected following issue(s) with document #{doc.pk},"
+                        f" titled {doc.title}",
+                    )
+                for msg in self._messages[doc_pk]:
+                    logger.log(msg["level"], msg["message"])

-        for doc_pk, entries in self._messages.items():
-            if doc_pk is not None:
-                title = titles.get(doc_pk, "Unknown")
-                logger.info(
-                    "Detected following issue(s) with document #%s, titled %s",
-                    doc_pk,
-                    title,
-                )
-            for msg in entries:
-                logger.log(msg["level"], msg["message"])
+    def __len__(self):
+        return len(self._messages)
+
+    def __getitem__(self, item):
+        return self._messages[item]


 class SanityCheckFailedException(Exception):
    pass


-# ---------------------------------------------------------------------------
-# Internal helpers
-# ---------------------------------------------------------------------------
+def check_sanity(*, progress=False, scheduled=True) -> SanityCheckMessages:
+    paperless_task = PaperlessTask.objects.create(
+        task_id=uuid.uuid4(),
+        type=PaperlessTask.TaskType.SCHEDULED_TASK
+        if scheduled
+        else PaperlessTask.TaskType.MANUAL_TASK,
+        task_name=PaperlessTask.TaskName.CHECK_SANITY,
+        status=states.STARTED,
+        date_created=timezone.now(),
+        date_started=timezone.now(),
+    )
+    messages = SanityCheckMessages()

-
-def _build_present_files() -> set[Path]:
-    """Collect all files in MEDIA_ROOT, excluding directories and ignorable files."""
    present_files = {
        x.resolve()
        for x in Path(settings.MEDIA_ROOT).glob("**/*")
@@ -176,178 +82,95 @@ def _build_present_files() -> set[Path]:
    }

    lockfile = Path(settings.MEDIA_LOCK).resolve()
-    present_files.discard(lockfile)
+    if lockfile in present_files:
+        present_files.remove(lockfile)

    general_config = GeneralConfig()
    app_logo = general_config.app_logo or settings.APP_LOGO
    if app_logo:
        logo_file = Path(settings.MEDIA_ROOT / Path(app_logo.lstrip("/"))).resolve()
-        present_files.discard(logo_file)
+        if logo_file in present_files:
+            present_files.remove(logo_file)

-    return present_files
-
-
-def _check_thumbnail(
-    doc: Document,
-    messages: SanityCheckMessages,
-    present_files: set[Path],
-) -> None:
-    """Verify the thumbnail exists and is readable."""
-    thumbnail_path: Final[Path] = Path(doc.thumbnail_path).resolve()
-    if not thumbnail_path.exists() or not thumbnail_path.is_file():
-        messages.error(doc.pk, "Thumbnail of document does not exist.")
-        return
-
-    present_files.discard(thumbnail_path)
-    try:
-        _ = thumbnail_path.read_bytes()
-    except OSError as e:
-        messages.error(doc.pk, f"Cannot read thumbnail file of document: {e}")
-
-
-def _check_original(
-    doc: Document,
-    messages: SanityCheckMessages,
-    present_files: set[Path],
-) -> None:
-    """Verify the original file exists, is readable, and has matching checksum."""
-    source_path: Final[Path] = Path(doc.source_path).resolve()
-    if not source_path.exists() or not source_path.is_file():
-        messages.error(doc.pk, "Original of document does not exist.")
-        return
-
-    present_files.discard(source_path)
-    try:
-        checksum = hashlib.md5(source_path.read_bytes()).hexdigest()
-    except OSError as e:
-        messages.error(doc.pk, f"Cannot read original file of document: {e}")
-    else:
-        if checksum != doc.checksum:
-            messages.error(
-                doc.pk,
-                f"Checksum mismatch. Stored: {doc.checksum}, actual: {checksum}.",
-            )
-
-
-def _check_archive(
-    doc: Document,
-    messages: SanityCheckMessages,
-    present_files: set[Path],
-) -> None:
-    """Verify archive file consistency: checksum/filename pairing and file integrity."""
-    if doc.archive_checksum is not None and doc.archive_filename is None:
-        messages.error(
-            doc.pk,
-            "Document has an archive file checksum, but no archive filename.",
-        )
-    elif doc.archive_checksum is None and doc.archive_filename is not None:
-        messages.error(
-            doc.pk,
-            "Document has an archive file, but its checksum is missing.",
-        )
-    elif doc.has_archive_version:
-        if TYPE_CHECKING:
-            assert isinstance(doc.archive_path, Path)
-        archive_path: Final[Path] = Path(doc.archive_path).resolve()
-        if not archive_path.exists() or not archive_path.is_file():
-            messages.error(doc.pk, "Archived version of document does not exist.")
-            return
-
-        present_files.discard(archive_path)
-        try:
-            checksum = hashlib.md5(archive_path.read_bytes()).hexdigest()
-        except OSError as e:
-            messages.error(
-                doc.pk,
-                f"Cannot read archive file of document: {e}",
-            )
+    for doc in tqdm(Document.global_objects.all(), disable=not progress):
+        # Check sanity of the thumbnail
+        thumbnail_path: Final[Path] = Path(doc.thumbnail_path).resolve()
+        if not thumbnail_path.exists() or not thumbnail_path.is_file():
+            messages.error(doc.pk, "Thumbnail of document does not exist.")
        else:
-            if checksum != doc.archive_checksum:
-                messages.error(
-                    doc.pk,
-                    "Checksum mismatch of archived document. "
-                    f"Stored: {doc.archive_checksum}, actual: {checksum}.",
-                )
+            if thumbnail_path in present_files:
+                present_files.remove(thumbnail_path)
+            try:
+                _ = thumbnail_path.read_bytes()
+            except OSError as e:
+                messages.error(doc.pk, f"Cannot read thumbnail file of document: {e}")

+        # Check sanity of the original file
+        # TODO: extract method
+        source_path: Final[Path] = Path(doc.source_path).resolve()
+        if not source_path.exists() or not source_path.is_file():
+            messages.error(doc.pk, "Original of document does not exist.")
+        else:
+            if source_path in present_files:
+                present_files.remove(source_path)
+            try:
+                checksum = hashlib.md5(source_path.read_bytes()).hexdigest()
+            except OSError as e:
+                messages.error(doc.pk, f"Cannot read original file of document: {e}")
+            else:
+                if checksum != doc.checksum:
+                    messages.error(
+                        doc.pk,
+                        "Checksum mismatch. "
+                        f"Stored: {doc.checksum}, actual: {checksum}.",
+                    )

-def _check_content(doc: Document, messages: SanityCheckMessages) -> None:
-    """Flag documents with no OCR content."""
-    if not doc.content:
-        messages.info(doc.pk, "Document contains no OCR data")
+        # Check sanity of the archive file.
+        if doc.archive_checksum is not None and doc.archive_filename is None:
+            messages.error(
+                doc.pk,
+                "Document has an archive file checksum, but no archive filename.",
+            )
+        elif doc.archive_checksum is None and doc.archive_filename is not None:
+            messages.error(
+                doc.pk,
+                "Document has an archive file, but its checksum is missing.",
+            )
+        elif doc.has_archive_version:
+            archive_path: Final[Path] = Path(doc.archive_path).resolve()
+            if not archive_path.exists() or not archive_path.is_file():
+                messages.error(doc.pk, "Archived version of document does not exist.")
+            else:
+                if archive_path in present_files:
+                    present_files.remove(archive_path)
+                try:
+                    checksum = hashlib.md5(archive_path.read_bytes()).hexdigest()
+                except OSError as e:
+                    messages.error(
+                        doc.pk,
+                        f"Cannot read archive file of document : {e}",
+                    )
+                else:
+                    if checksum != doc.archive_checksum:
+                        messages.error(
+                            doc.pk,
+                            "Checksum mismatch of archived document. "
+                            f"Stored: {doc.archive_checksum}, "
+                            f"actual: {checksum}.",
+                        )

-
-def _check_document(
-    doc: Document,
-    messages: SanityCheckMessages,
-    present_files: set[Path],
-) -> None:
-    """Run all checks for a single document."""
-    _check_thumbnail(doc, messages, present_files)
-    _check_original(doc, messages, present_files)
-    _check_archive(doc, messages, present_files)
-    _check_content(doc, messages)
-
-
-# ---------------------------------------------------------------------------
-# Public entry point
-# ---------------------------------------------------------------------------
-
-
-def check_sanity(
-    *,
-    scheduled: bool = True,
-    iter_wrapper: IterWrapper[Document] = _identity,
-) -> SanityCheckMessages:
-    """Run a full sanity check on the document archive.
-
-    Args:
-        scheduled: Whether this is a scheduled (automatic) or manual check.
-            Controls the task type recorded in the database.
-        iter_wrapper: A callable that wraps the document iterable, e.g.,
-            for progress bar display. Defaults to identity (no wrapping).
-
-    Returns:
-        A SanityCheckMessages instance containing all detected issues.
-    """
-    paperless_task = PaperlessTask.objects.create(
-        task_id=uuid.uuid4(),
-        type=(
-            PaperlessTask.TaskType.SCHEDULED_TASK
-            if scheduled
-            else PaperlessTask.TaskType.MANUAL_TASK
-        ),
-        task_name=PaperlessTask.TaskName.CHECK_SANITY,
-        status=states.STARTED,
-        date_created=timezone.now(),
-        date_started=timezone.now(),
-    )
-
-    messages = SanityCheckMessages()
-    present_files = _build_present_files()
-
-    documents = Document.global_objects.all()
-    for doc in iter_wrapper(documents):
-        _check_document(doc, messages, present_files)
+        # other document checks
+        if not doc.content:
+            messages.info(doc.pk, "Document contains no OCR data")

    for extra_file in present_files:
        messages.warning(None, f"Orphaned file in media dir: {extra_file}")

    paperless_task.status = states.SUCCESS if not messages.has_error else states.FAILURE
-    if messages.total_issue_count == 0:
-        paperless_task.result = "No issues found."
-    else:
-        parts: list[str] = []
-        if messages.document_error_count:
-            parts.append(f"{messages.document_error_count} document(s) with errors")
-        if messages.document_warning_count:
-            parts.append(f"{messages.document_warning_count} document(s) with warnings")
-        if messages.global_warning_count:
-            parts.append(f"{messages.global_warning_count} global warning(s)")
-        paperless_task.result = ", ".join(parts) + " found."
-        if messages.has_error:
-            paperless_task.result += " Check logs for details."
-
+    # result is concatenated messages
+    paperless_task.result = f"{len(messages)} issues found."
+    if messages.has_error:
+        paperless_task.result += " Check logs for details."
    paperless_task.date_done = timezone.now()
    paperless_task.save(update_fields=["status", "result", "date_done"])
-
    return messages
--- a/src/documents/serialisers.py
+++ b/src/documents/serialisers.py
@@ -1724,15 +1724,6 @@ class BulkEditSerializer(
        except ValueError:
            raise serializers.ValidationError("invalid rotation degrees")

-    def _validate_source_mode(self, parameters) -> None:
-        source_mode = parameters.get(
-            "source_mode",
-            bulk_edit.SourceModeChoices.LATEST_VERSION,
-        )
-        if source_mode not in bulk_edit.SourceModeChoices.__dict__.values():
-            raise serializers.ValidationError("Invalid source_mode")
-        parameters["source_mode"] = source_mode
-
    def _validate_parameters_split(self, parameters) -> None:
        if "pages" not in parameters:
            raise serializers.ValidationError("pages not specified")
@@ -1833,9 +1824,6 @@ class BulkEditSerializer(
        method = attrs["method"]
        parameters = attrs["parameters"]

-        if "source_mode" in parameters:
-            self._validate_source_mode(parameters)
-
        if method == bulk_edit.set_correspondent:
            self._validate_parameters_correspondent(parameters)
        elif method == bulk_edit.set_document_type:
--- a/src/documents/signals/handlers.py
+++ b/src/documents/signals/handlers.py
@@ -4,7 +4,6 @@ import logging
 import shutil
 from pathlib import Path
 from typing import TYPE_CHECKING
-from typing import Any

 from celery import shared_task
 from celery import states
@@ -33,14 +32,12 @@ from documents.file_handling import create_source_path_directory
 from documents.file_handling import delete_empty_directories
 from documents.file_handling import generate_filename
 from documents.file_handling import generate_unique_filename
-from documents.models import Correspondent
 from documents.models import CustomField
 from documents.models import CustomFieldInstance
 from documents.models import Document
-from documents.models import DocumentType
+from documents.models import MatchingModel
 from documents.models import PaperlessTask
 from documents.models import SavedView
-from documents.models import StoragePath
 from documents.models import Tag
 from documents.models import UiSettings
 from documents.models import Workflow
@@ -84,41 +81,47 @@ def add_inbox_tags(sender, document: Document, logging_group=None, **kwargs) ->
    document.add_nested_tags(inbox_tags)


+def _suggestion_printer(
+    stdout,
+    style_func,
+    suggestion_type: str,
+    document: Document,
+    selected: MatchingModel,
+    base_url: str | None = None,
+) -> None:
+    """
+    Smaller helper to reduce duplication when just outputting suggestions to the console
+    """
+    doc_str = str(document)
+    if base_url is not None:
+        stdout.write(style_func.SUCCESS(doc_str))
+        stdout.write(style_func.SUCCESS(f"{base_url}/documents/{document.pk}"))
+    else:
+        stdout.write(style_func.SUCCESS(f"{doc_str} [{document.pk}]"))
+    stdout.write(f"Suggest {suggestion_type}: {selected}")
+
+
 def set_correspondent(
-    sender: object,
+    sender,
    document: Document,
    *,
-    logging_group: object = None,
+    logging_group=None,
    classifier: DocumentClassifier | None = None,
-    replace: bool = False,
-    use_first: bool = True,
-    dry_run: bool = False,
-    **kwargs: Any,
-) -> Correspondent | None:
-    """
-    Assign a correspondent to a document based on classifier results.
-
-    Args:
-        document: The document to classify.
-        logging_group: Optional logging group for structured log output.
-        classifier: The trained classifier. If None, only rule-based matching runs.
-        replace: If True, overwrite an existing correspondent assignment.
-        use_first: If True, pick the first match when multiple correspondents
-            match. If False, skip assignment when multiple match.
-        dry_run: If True, compute and return the selection without saving.
-        **kwargs: Absorbed for Django signal compatibility (e.g. sender, signal).
-
-    Returns:
-        The correspondent that was (or would be) assigned, or None if no match
-        was found or assignment was skipped.
-    """
+    replace=False,
+    use_first=True,
+    suggest=False,
+    base_url=None,
+    stdout=None,
+    style_func=None,
+    **kwargs,
+) -> None:
    if document.correspondent and not replace:
-        return None
+        return

    potential_correspondents = matching.match_correspondents(document, classifier)
+
    potential_count = len(potential_correspondents)
    selected = potential_correspondents[0] if potential_correspondents else None
-
    if potential_count > 1:
        if use_first:
            logger.debug(
@@ -132,53 +135,49 @@ def set_correspondent(
                f"not assigning any correspondent",
                extra={"group": logging_group},
            )
-            return None
+            return

-    if (selected or replace) and not dry_run:
-        logger.info(
-            f"Assigning correspondent {selected} to {document}",
-            extra={"group": logging_group},
-        )
-        document.correspondent = selected
-        document.save(update_fields=("correspondent",))
+    if selected or replace:
+        if suggest:
+            _suggestion_printer(
+                stdout,
+                style_func,
+                "correspondent",
+                document,
+                selected,
+                base_url,
+            )
+        else:
+            logger.info(
+                f"Assigning correspondent {selected} to {document}",
+                extra={"group": logging_group},
+            )

-    return selected
+            document.correspondent = selected
+            document.save(update_fields=("correspondent",))


 def set_document_type(
-    sender: object,
+    sender,
    document: Document,
    *,
-    logging_group: object = None,
+    logging_group=None,
    classifier: DocumentClassifier | None = None,
-    replace: bool = False,
-    use_first: bool = True,
-    dry_run: bool = False,
-    **kwargs: Any,
-) -> DocumentType | None:
-    """
-    Assign a document type to a document based on classifier results.
-
-    Args:
-        document: The document to classify.
-        logging_group: Optional logging group for structured log output.
-        classifier: The trained classifier. If None, only rule-based matching runs.
-        replace: If True, overwrite an existing document type assignment.
-        use_first: If True, pick the first match when multiple types match.
-            If False, skip assignment when multiple match.
-        dry_run: If True, compute and return the selection without saving.
-        **kwargs: Absorbed for Django signal compatibility (e.g. sender, signal).
-
-    Returns:
-        The document type that was (or would be) assigned, or None if no match
-        was found or assignment was skipped.
-    """
+    replace=False,
+    use_first=True,
+    suggest=False,
+    base_url=None,
+    stdout=None,
+    style_func=None,
+    **kwargs,
+) -> None:
    if document.document_type and not replace:
-        return None
+        return

-    potential_document_types = matching.match_document_types(document, classifier)
-    potential_count = len(potential_document_types)
-    selected = potential_document_types[0] if potential_document_types else None
+    potential_document_type = matching.match_document_types(document, classifier)
+
+    potential_count = len(potential_document_type)
+    selected = potential_document_type[0] if potential_document_type else None

    if potential_count > 1:
        if use_first:
@@ -193,64 +192,42 @@ def set_document_type(
                f"not assigning any document type",
                extra={"group": logging_group},
            )
-            return None
+            return

-    if (selected or replace) and not dry_run:
-        logger.info(
-            f"Assigning document type {selected} to {document}",
-            extra={"group": logging_group},
-        )
-        document.document_type = selected
-        document.save(update_fields=("document_type",))
+    if selected or replace:
+        if suggest:
+            _suggestion_printer(
+                stdout,
+                style_func,
+                "document type",
+                document,
+                selected,
+                base_url,
+            )
+        else:
+            logger.info(
+                f"Assigning document type {selected} to {document}",
+                extra={"group": logging_group},
+            )

-    return selected
+            document.document_type = selected
+            document.save(update_fields=("document_type",))


 def set_tags(
-    sender: object,
+    sender,
    document: Document,
    *,
-    logging_group: object = None,
+    logging_group=None,
    classifier: DocumentClassifier | None = None,
-    replace: bool = False,
-    dry_run: bool = False,
-    **kwargs: Any,
-) -> tuple[set[Tag], set[Tag]]:
-    """
-    Assign tags to a document based on classifier results.
-
-    When replace=True, existing auto-matched and rule-matched tags are removed
-    before applying the new set (inbox tags and manually-added tags are preserved).
-
-    Args:
-        document: The document to classify.
-        logging_group: Optional logging group for structured log output.
-        classifier: The trained classifier. If None, only rule-based matching runs.
-        replace: If True, remove existing classifier-managed tags before applying
-            new ones. Inbox tags and manually-added tags are always preserved.
-        dry_run: If True, compute what would change without saving anything.
-        **kwargs: Absorbed for Django signal compatibility (e.g. sender, signal).
-
-    Returns:
-        A two-tuple of (tags_added, tags_removed). In non-replace mode,
-        tags_removed is always an empty set. In dry_run mode, neither set
-        is applied to the database.
-    """
-    # Compute which tags would be removed under replace mode.
-    # The filter mirrors the .delete() call below: keep inbox tags and
-    # manually-added tags (match="" and not auto-matched).
+    replace=False,
+    suggest=False,
+    base_url=None,
+    stdout=None,
+    style_func=None,
+    **kwargs,
+) -> None:
    if replace:
-        tags_to_remove: set[Tag] = set(
-            document.tags.exclude(
-                is_inbox_tag=True,
-            ).exclude(
-                Q(match="") & ~Q(matching_algorithm=Tag.MATCH_AUTO),
-            ),
-        )
-    else:
-        tags_to_remove = set()
-
-    if replace and not dry_run:
        Document.tags.through.objects.filter(document=document).exclude(
            Q(tag__is_inbox_tag=True),
        ).exclude(
@@ -258,53 +235,65 @@ def set_tags(
        ).delete()

    current_tags = set(document.tags.all())
-    matched_tags = matching.match_tags(document, classifier)
-    tags_to_add = set(matched_tags) - current_tags

-    if tags_to_add and not dry_run:
+    matched_tags = matching.match_tags(document, classifier)
+
+    relevant_tags = set(matched_tags) - current_tags
+
+    if suggest:
+        extra_tags = current_tags - set(matched_tags)
+        extra_tags = [
+            t for t in extra_tags if t.matching_algorithm == MatchingModel.MATCH_AUTO
+        ]
+        if not relevant_tags and not extra_tags:
+            return
+        doc_str = style_func.SUCCESS(str(document))
+        if base_url:
+            stdout.write(doc_str)
+            stdout.write(f"{base_url}/documents/{document.pk}")
+        else:
+            stdout.write(doc_str + style_func.SUCCESS(f" [{document.pk}]"))
+        if relevant_tags:
+            stdout.write("Suggest tags: " + ", ".join([t.name for t in relevant_tags]))
+        if extra_tags:
+            stdout.write("Extra tags: " + ", ".join([t.name for t in extra_tags]))
+    else:
+        if not relevant_tags:
+            return
+
+        message = 'Tagging "{}" with "{}"'
        logger.info(
-            f'Tagging "{document}" with "{", ".join(t.name for t in tags_to_add)}"',
+            message.format(document, ", ".join([t.name for t in relevant_tags])),
            extra={"group": logging_group},
        )
-        document.add_nested_tags(tags_to_add)

-    return tags_to_add, tags_to_remove
+        document.add_nested_tags(relevant_tags)


 def set_storage_path(
-    sender: object,
+    sender,
    document: Document,
    *,
-    logging_group: object = None,
+    logging_group=None,
    classifier: DocumentClassifier | None = None,
-    replace: bool = False,
-    use_first: bool = True,
-    dry_run: bool = False,
-    **kwargs: Any,
-) -> StoragePath | None:
-    """
-    Assign a storage path to a document based on classifier results.
-
-    Args:
-        document: The document to classify.
-        logging_group: Optional logging group for structured log output.
-        classifier: The trained classifier. If None, only rule-based matching runs.
-        replace: If True, overwrite an existing storage path assignment.
-        use_first: If True, pick the first match when multiple paths match.
-            If False, skip assignment when multiple match.
-        dry_run: If True, compute and return the selection without saving.
-        **kwargs: Absorbed for Django signal compatibility (e.g. sender, signal).
-
-    Returns:
-        The storage path that was (or would be) assigned, or None if no match
-        was found or assignment was skipped.
-    """
+    replace=False,
+    use_first=True,
+    suggest=False,
+    base_url=None,
+    stdout=None,
+    style_func=None,
+    **kwargs,
+) -> None:
    if document.storage_path and not replace:
-        return None
+        return

-    potential_storage_paths = matching.match_storage_paths(document, classifier)
-    potential_count = len(potential_storage_paths)
-    selected = potential_storage_paths[0] if potential_storage_paths else None
+    potential_storage_path = matching.match_storage_paths(
+        document,
+        classifier,
+    )
+
+    potential_count = len(potential_storage_path)
+    selected = potential_storage_path[0] if potential_storage_path else None

    if potential_count > 1:
        if use_first:
@@ -319,17 +308,26 @@ def set_storage_path(
                f"not assigning any storage directory",
                extra={"group": logging_group},
            )
-            return None
+            return

-    if (selected or replace) and not dry_run:
-        logger.info(
-            f"Assigning storage path {selected} to {document}",
-            extra={"group": logging_group},
-        )
-        document.storage_path = selected
-        document.save(update_fields=("storage_path",))
+    if selected or replace:
+        if suggest:
+            _suggestion_printer(
+                stdout,
+                style_func,
+                "storage directory",
+                document,
+                selected,
+                base_url,
+            )
+        else:
+            logger.info(
+                f"Assigning storage path {selected} to {document}",
+                extra={"group": logging_group},
+            )

-    return selected
+            document.storage_path = selected
+            document.save(update_fields=("storage_path",))


 # see empty_trash in documents/tasks.py for signal handling
--- a/src/documents/tasks.py
+++ b/src/documents/tasks.py
@@ -4,13 +4,11 @@ import logging
 import shutil
 import uuid
 import zipfile
-from collections.abc import Callable
-from collections.abc import Iterable
 from pathlib import Path
 from tempfile import TemporaryDirectory
 from tempfile import mkstemp
-from typing import TypeVar

+import tqdm
 from celery import Task
 from celery import shared_task
 from celery import states
@@ -68,19 +66,11 @@ from paperless_ai.indexing import llm_index_add_or_update_document
 from paperless_ai.indexing import llm_index_remove_document
 from paperless_ai.indexing import update_llm_index

-_T = TypeVar("_T")
-IterWrapper = Callable[[Iterable[_T]], Iterable[_T]]
-
-
 if settings.AUDIT_LOG_ENABLED:
    from auditlog.models import LogEntry
 logger = logging.getLogger("paperless.tasks")


-def _identity(iterable: Iterable[_T]) -> Iterable[_T]:
-    return iterable
-
-
@shared_task
 def index_optimize() -> None:
    ix = index.open_index()
@@ -88,13 +78,13 @@ def index_optimize() -> None:
    writer.commit(optimize=True)


-def index_reindex(*, iter_wrapper: IterWrapper[Document] = _identity) -> None:
+def index_reindex(*, progress_bar_disable=False) -> None:
    documents = Document.objects.all()

    ix = index.open_index(recreate=True)

    with AsyncWriter(ix) as writer:
-        for document in iter_wrapper(documents):
+        for document in tqdm.tqdm(documents, disable=progress_bar_disable):
            index.update_document(writer, document)


@@ -237,30 +227,20 @@ def consume_file(
@shared_task
 def sanity_check(*, scheduled=True, raise_on_error=True):
    messages = sanity_checker.check_sanity(scheduled=scheduled)
+
    messages.log_messages()

-    if not messages.has_error and not messages.has_warning and not messages.has_info:
-        return "No issues detected."
-
-    parts: list[str] = []
-    if messages.document_error_count:
-        parts.append(f"{messages.document_error_count} document(s) with errors")
-    if messages.document_warning_count:
-        parts.append(f"{messages.document_warning_count} document(s) with warnings")
-    if messages.document_info_count:
-        parts.append(f"{messages.document_info_count} document(s) with infos")
-    if messages.global_warning_count:
-        parts.append(f"{messages.global_warning_count} global warning(s)")
-
-    summary = ", ".join(parts) + " found."
-
    if messages.has_error:
-        message = summary + " Check logs for details."
+        message = "Sanity check exited with errors. See log."
        if raise_on_error:
            raise SanityCheckFailedException(message)
        return message
-
-    return summary
+    elif messages.has_warning:
+        return "Sanity check exited with warnings. See log."
+    elif len(messages) > 0:
+        return "Sanity check exited with infos. See log."
+    else:
+        return "No issues detected."


@shared_task
@@ -285,6 +265,7 @@ def bulk_update_documents(document_ids) -> None:
    ai_config = AIConfig()
    if ai_config.llm_index_enabled:
        update_llm_index(
+            progress_bar_disable=True,
            rebuild=False,
        )

@@ -625,7 +606,7 @@ def update_document_parent_tags(tag: Tag, new_parent: Tag) -> None:
@shared_task
 def llmindex_index(
    *,
-    iter_wrapper: IterWrapper[Document] = _identity,
+    progress_bar_disable=True,
    rebuild=False,
    scheduled=True,
    auto=False,
@@ -648,7 +629,7 @@ def llmindex_index(

        try:
            result = update_llm_index(
-                iter_wrapper=iter_wrapper,
+                progress_bar_disable=progress_bar_disable,
                rebuild=rebuild,
            )
            task.status = states.SUCCESS
--- a/src/documents/tests/conftest.py
+++ b/src/documents/tests/conftest.py
@@ -1,96 +1,10 @@
-import shutil
 import zoneinfo
-from dataclasses import dataclass
-from pathlib import Path
-from typing import TYPE_CHECKING

-import filelock
 import pytest
 from django.contrib.auth import get_user_model
 from pytest_django.fixtures import SettingsWrapper
 from rest_framework.test import APIClient

-from documents.tests.factories import DocumentFactory
-
-if TYPE_CHECKING:
-    from documents.models import Document
-
-
-@dataclass(frozen=True, slots=True)
-class PaperlessDirs:
-    """Standard Paperless-ngx directory layout for tests."""
-
-    media: Path
-    originals: Path
-    archive: Path
-    thumbnails: Path
-
-
-@pytest.fixture(scope="session")
-def samples_dir() -> Path:
-    """Path to the shared test sample documents."""
-    return Path(__file__).parent / "samples" / "documents"
-
-
-@pytest.fixture()
-def paperless_dirs(tmp_path: Path) -> PaperlessDirs:
-    """Create and return the directory structure for testing."""
-    media = tmp_path / "media"
-    dirs = PaperlessDirs(
-        media=media,
-        originals=media / "documents" / "originals",
-        archive=media / "documents" / "archive",
-        thumbnails=media / "documents" / "thumbnails",
-    )
-    for d in (dirs.originals, dirs.archive, dirs.thumbnails):
-        d.mkdir(parents=True)
-    return dirs
-
-
-@pytest.fixture()
-def _media_settings(paperless_dirs: PaperlessDirs, settings) -> None:
-    """Configure Django settings to point at temp directories."""
-    settings.MEDIA_ROOT = paperless_dirs.media
-    settings.ORIGINALS_DIR = paperless_dirs.originals
-    settings.ARCHIVE_DIR = paperless_dirs.archive
-    settings.THUMBNAIL_DIR = paperless_dirs.thumbnails
-    settings.MEDIA_LOCK = paperless_dirs.media / "media.lock"
-    settings.IGNORABLE_FILES = {".DS_Store", "Thumbs.db", "desktop.ini"}
-    settings.APP_LOGO = ""
-
-
-@pytest.fixture()
-def sample_doc(
-    paperless_dirs: PaperlessDirs,
-    _media_settings: None,
-    samples_dir: Path,
-) -> "Document":
-    """Create a document with valid files and matching checksums."""
-    with filelock.FileLock(paperless_dirs.media / "media.lock"):
-        shutil.copy(
-            samples_dir / "originals" / "0000001.pdf",
-            paperless_dirs.originals / "0000001.pdf",
-        )
-        shutil.copy(
-            samples_dir / "archive" / "0000001.pdf",
-            paperless_dirs.archive / "0000001.pdf",
-        )
-        shutil.copy(
-            samples_dir / "thumbnails" / "0000001.webp",
-            paperless_dirs.thumbnails / "0000001.webp",
-        )
-
-    return DocumentFactory(
-        title="test",
-        checksum="42995833e01aea9b3edee44bbfdd7ce1",
-        archive_checksum="62acb0bcbfbcaa62ca6ad3668e4e404b",
-        content="test content",
-        pk=1,
-        filename="0000001.pdf",
-        mime_type="application/pdf",
-        archive_filename="0000001.pdf",
-    )
-

@pytest.fixture()
 def settings_timezone(settings: SettingsWrapper) -> zoneinfo.ZoneInfo:
@@ -114,14 +28,3 @@ def authenticated_rest_api_client(rest_api_client: APIClient):
    user = UserModel.objects.create_user(username="testuser", password="password")
    rest_api_client.force_authenticate(user=user)
    yield rest_api_client
-
-
-@pytest.fixture(scope="session", autouse=True)
-def faker_session_locale():
-    """Set Faker locale for reproducibility."""
-    return "en_US"
-
-
-@pytest.fixture(scope="session", autouse=True)
-def faker_seed():
-    return 12345
--- a/src/documents/tests/factories.py
+++ b/src/documents/tests/factories.py
@@ -1,67 +1,17 @@
-"""
-Factory-boy factories for documents app models.
-"""
-
-from __future__ import annotations
-
-import factory
+from factory import Faker
 from factory.django import DjangoModelFactory

 from documents.models import Correspondent
 from documents.models import Document
-from documents.models import DocumentType
-from documents.models import MatchingModel
-from documents.models import StoragePath
-from documents.models import Tag


 class CorrespondentFactory(DjangoModelFactory):
    class Meta:
        model = Correspondent

-    name = factory.Sequence(lambda n: f"{factory.Faker('company')} {n}")
-    match = ""
-    matching_algorithm = MatchingModel.MATCH_NONE
-
-
-class DocumentTypeFactory(DjangoModelFactory):
-    class Meta:
-        model = DocumentType
-
-    name = factory.Sequence(lambda n: f"{factory.Faker('bs')} {n}")
-    match = ""
-    matching_algorithm = MatchingModel.MATCH_NONE
-
-
-class TagFactory(DjangoModelFactory):
-    class Meta:
-        model = Tag
-
-    name = factory.Sequence(lambda n: f"{factory.Faker('word')} {n}")
-    match = ""
-    matching_algorithm = MatchingModel.MATCH_NONE
-    is_inbox_tag = False
-
-
-class StoragePathFactory(DjangoModelFactory):
-    class Meta:
-        model = StoragePath
-
-    name = factory.Sequence(
-        lambda n: f"{factory.Faker('file_path', depth=2, extension='')} {n}",
-    )
-    path = factory.LazyAttribute(lambda o: f"{o.name}/{{title}}")
-    match = ""
-    matching_algorithm = MatchingModel.MATCH_NONE
+    name = Faker("name")


 class DocumentFactory(DjangoModelFactory):
    class Meta:
        model = Document
-
-    title = factory.Faker("sentence", nb_words=4)
-    checksum = factory.Faker("md5")
-    content = factory.Faker("paragraph")
-    correspondent = None
-    document_type = None
-    storage_path = None
--- a/src/documents/tests/management/test_management_sanity_checker.py
+++ b/src/documents/tests/management/test_management_sanity_checker.py
@@ -1,193 +0,0 @@
-"""Tests for the document_sanity_checker management command.
-
-Verifies Rich rendering (table, panel, summary) and end-to-end CLI behavior.
-"""
-
-from __future__ import annotations
-
-from io import StringIO
-from pathlib import Path
-from typing import TYPE_CHECKING
-
-import pytest
-from django.core.management import call_command
-from rich.console import Console
-
-from documents.management.commands.document_sanity_checker import Command
-from documents.sanity_checker import SanityCheckMessages
-from documents.tests.factories import DocumentFactory
-
-if TYPE_CHECKING:
-    from documents.models import Document
-    from documents.tests.conftest import PaperlessDirs
-
-
-def _render_to_string(messages: SanityCheckMessages) -> str:
-    """Render command output to a plain string for assertion."""
-    buf = StringIO()
-    cmd = Command()
-    cmd.console = Console(file=buf, width=120, no_color=True)
-    cmd._render_results(messages)
-    return buf.getvalue()
-
-
-# ---------------------------------------------------------------------------
-# Rich rendering
-# ---------------------------------------------------------------------------
-
-
-class TestRenderResultsNoIssues:
-    """No DB access needed -- renders an empty SanityCheckMessages."""
-
-    def test_shows_panel(self) -> None:
-        output = _render_to_string(SanityCheckMessages())
-        assert "No issues detected" in output
-        assert "Sanity Check" in output
-
-
-@pytest.mark.django_db
-class TestRenderResultsWithIssues:
-    def test_error_row(self, sample_doc: Document) -> None:
-        msgs = SanityCheckMessages()
-        msgs.error(sample_doc.pk, "Original missing")
-        output = _render_to_string(msgs)
-        assert "Sanity Check Results" in output
-        assert "ERROR" in output
-        assert "Original missing" in output
-        assert f"#{sample_doc.pk}" in output
-        assert sample_doc.title in output
-
-    def test_warning_row(self, sample_doc: Document) -> None:
-        msgs = SanityCheckMessages()
-        msgs.warning(sample_doc.pk, "Suspicious file")
-        output = _render_to_string(msgs)
-        assert "WARN" in output
-        assert "Suspicious file" in output
-
-    def test_info_row(self, sample_doc: Document) -> None:
-        msgs = SanityCheckMessages()
-        msgs.info(sample_doc.pk, "No OCR data")
-        output = _render_to_string(msgs)
-        assert "INFO" in output
-        assert "No OCR data" in output
-
-    @pytest.mark.usefixtures("_media_settings")
-    def test_global_message(self) -> None:
-        msgs = SanityCheckMessages()
-        msgs.warning(None, "Orphaned file: /tmp/stray.pdf")
-        output = _render_to_string(msgs)
-        assert "(global)" in output
-        assert "Orphaned file" in output
-
-    def test_multiple_messages_same_doc(self, sample_doc: Document) -> None:
-        msgs = SanityCheckMessages()
-        msgs.error(sample_doc.pk, "Thumbnail missing")
-        msgs.error(sample_doc.pk, "Checksum mismatch")
-        output = _render_to_string(msgs)
-        assert "Thumbnail missing" in output
-        assert "Checksum mismatch" in output
-
-    @pytest.mark.usefixtures("_media_settings")
-    def test_unknown_doc_pk(self) -> None:
-        msgs = SanityCheckMessages()
-        msgs.error(99999, "Ghost document")
-        output = _render_to_string(msgs)
-        assert "#99999" in output
-        assert "Unknown" in output
-
-
-@pytest.mark.django_db
-class TestRenderResultsSummary:
-    def test_errors_only(self, sample_doc: Document) -> None:
-        msgs = SanityCheckMessages()
-        msgs.error(sample_doc.pk, "broken")
-        output = _render_to_string(msgs)
-        assert "1 document(s) with" in output
-        assert "errors" in output
-
-    def test_warnings_only(self, sample_doc: Document) -> None:
-        msgs = SanityCheckMessages()
-        msgs.warning(sample_doc.pk, "odd")
-        output = _render_to_string(msgs)
-        assert "1 document(s) with" in output
-        assert "warnings" in output
-
-    def test_infos_only(self, sample_doc: Document) -> None:
-        msgs = SanityCheckMessages()
-        msgs.info(sample_doc.pk, "no OCR")
-        output = _render_to_string(msgs)
-        assert "1 document(s) with infos" in output
-
-    def test_empty_messages(self) -> None:
-        msgs = SanityCheckMessages()
-        output = _render_to_string(msgs)
-        assert "No issues detected." in output
-
-    def test_document_errors_and_global_warnings(self, sample_doc: Document) -> None:
-        msgs = SanityCheckMessages()
-        msgs.error(sample_doc.pk, "broken")
-        msgs.warning(None, "orphan")
-        output = _render_to_string(msgs)
-        assert "1 document(s) with" in output
-        assert "errors" in output
-        assert "1 global warning(s)" in output
-        assert "2 document(s)" not in output
-
-    def test_global_warnings_only(self) -> None:
-        msgs = SanityCheckMessages()
-        msgs.warning(None, "extra file")
-        output = _render_to_string(msgs)
-        assert "1 global warning(s)" in output
-        assert "document(s) with" not in output
-
-    def test_all_levels_combined(self, sample_doc: Document) -> None:
-        msgs = SanityCheckMessages()
-        msgs.error(sample_doc.pk, "broken")
-        msgs.warning(sample_doc.pk, "odd")
-        msgs.info(sample_doc.pk, "fyi")
-        msgs.warning(None, "extra file")
-        output = _render_to_string(msgs)
-        assert "1 document(s) with errors" in output
-        assert "1 document(s) with warnings" in output
-        assert "1 document(s) with infos" in output
-        assert "1 global warning(s)" in output
-
-
-# ---------------------------------------------------------------------------
-# End-to-end command execution
-# ---------------------------------------------------------------------------
-
-
-@pytest.mark.django_db
-@pytest.mark.management
-class TestDocumentSanityCheckerCommand:
-    def test_no_issues(self, sample_doc: Document) -> None:
-        out = StringIO()
-        call_command("document_sanity_checker", "--no-progress-bar", stdout=out)
-        assert "No issues detected" in out.getvalue()
-
-    def test_missing_original(self, sample_doc: Document) -> None:
-        Path(sample_doc.source_path).unlink()
-        out = StringIO()
-        call_command("document_sanity_checker", "--no-progress-bar", stdout=out)
-        output = out.getvalue()
-        assert "ERROR" in output
-        assert "Original of document does not exist" in output
-
-    @pytest.mark.usefixtures("_media_settings")
-    def test_checksum_mismatch(self, paperless_dirs: PaperlessDirs) -> None:
-        """Lightweight document with zero-byte files triggers checksum mismatch."""
-        doc = DocumentFactory(
-            title="test",
-            content="test",
-            filename="test.pdf",
-            checksum="abc",
-        )
-        Path(doc.source_path).touch()
-        Path(doc.thumbnail_path).touch()
-
-        out = StringIO()
-        call_command("document_sanity_checker", "--no-progress-bar", stdout=out)
-        output = out.getvalue()
-        assert "ERROR" in output
-        assert "Checksum mismatch. Stored: abc, actual:" in output
--- a/src/documents/tests/test_api_bulk_edit.py
+++ b/src/documents/tests/test_api_bulk_edit.py
@@ -1395,10 +1395,7 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
                {
                    "documents": [self.doc2.id],
                    "method": "edit_pdf",
-                    "parameters": {
-                        "operations": [{"page": 1}],
-                        "source_mode": "explicit_selection",
-                    },
+                    "parameters": {"operations": [{"page": 1}]},
                },
            ),
            content_type="application/json",
@@ -1410,7 +1407,6 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
        args, kwargs = m.call_args
        self.assertCountEqual(args[0], [self.doc2.id])
        self.assertEqual(kwargs["operations"], [{"page": 1}])
-        self.assertEqual(kwargs["source_mode"], "explicit_selection")
        self.assertEqual(kwargs["user"], self.user)

    def test_edit_pdf_invalid_params(self) -> None:
@@ -1576,24 +1572,6 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
            response.content,
        )

-        # invalid source mode
-        response = self.client.post(
-            "/api/documents/bulk_edit/",
-            json.dumps(
-                {
-                    "documents": [self.doc2.id],
-                    "method": "edit_pdf",
-                    "parameters": {
-                        "operations": [{"page": 1}],
-                        "source_mode": "not_a_mode",
-                    },
-                },
-            ),
-            content_type="application/json",
-        )
-        self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
-        self.assertIn(b"Invalid source_mode", response.content)
-
    @mock.patch("documents.serialisers.bulk_edit.edit_pdf")
    def test_edit_pdf_page_out_of_bounds(self, m) -> None:
        """
--- a/src/documents/tests/test_bulk_edit.py
+++ b/src/documents/tests/test_bulk_edit.py
@@ -405,9 +405,7 @@ class TestBulkEdit(DirectoriesMixin, TestCase):
        self.assertTrue(Document.objects.filter(id=self.doc1.id).exists())
        self.assertFalse(Document.objects.filter(id=version.id).exists())

-    def test_resolve_root_and_source_doc_latest_version_prefers_newest_version(
-        self,
-    ) -> None:
+    def test_get_root_and_current_doc_mapping(self) -> None:
        version1 = Document.objects.create(
            checksum="B-v1",
            title="B version 1",
@@ -419,14 +417,18 @@ class TestBulkEdit(DirectoriesMixin, TestCase):
            root_document=self.doc2,
        )

-        root_doc, source_doc = bulk_edit._resolve_root_and_source_doc(
-            self.doc2,
-            source_mode="latest_version",
+        root_ids_by_doc_id = bulk_edit._get_root_ids_by_doc_id(
+            [self.doc2.id, version1.id, version2.id],
        )
+        self.assertEqual(root_ids_by_doc_id[self.doc2.id], self.doc2.id)
+        self.assertEqual(root_ids_by_doc_id[version1.id], self.doc2.id)
+        self.assertEqual(root_ids_by_doc_id[version2.id], self.doc2.id)

-        self.assertEqual(root_doc.id, self.doc2.id)
-        self.assertEqual(source_doc.id, version2.id)
-        self.assertNotEqual(source_doc.id, version1.id)
+        root_docs, current_docs = bulk_edit._get_root_and_current_docs_by_root_id(
+            {self.doc2.id},
+        )
+        self.assertEqual(root_docs[self.doc2.id].id, self.doc2.id)
+        self.assertEqual(current_docs[self.doc2.id].id, version2.id)

    @mock.patch("documents.tasks.bulk_update_documents.delay")
    def test_set_permissions(self, m) -> None:
@@ -1039,34 +1041,6 @@ class TestPDFActions(DirectoriesMixin, TestCase):
            self.assertIsNotNone(overrides)
            self.assertEqual(result, "OK")

-    @mock.patch("documents.data_models.magic.from_file", return_value="application/pdf")
-    @mock.patch("documents.tasks.consume_file.delay")
-    @mock.patch("pikepdf.open")
-    def test_rotate_explicit_selection_uses_root_source_when_root_selected(
-        self,
-        mock_open,
-        mock_consume_delay,
-        mock_magic,
-    ):
-        Document.objects.create(
-            checksum="B-v1",
-            title="B version 1",
-            root_document=self.doc2,
-        )
-        fake_pdf = mock.MagicMock()
-        fake_pdf.pages = [mock.Mock()]
-        mock_open.return_value.__enter__.return_value = fake_pdf
-
-        result = bulk_edit.rotate(
-            [self.doc2.id],
-            90,
-            source_mode="explicit_selection",
-        )
-
-        self.assertEqual(result, "OK")
-        mock_open.assert_called_once_with(self.doc2.source_path)
-        mock_consume_delay.assert_called_once()
-
    @mock.patch("documents.tasks.consume_file.delay")
    @mock.patch("pikepdf.Pdf.save")
    @mock.patch("documents.data_models.magic.from_file", return_value="application/pdf")
@@ -1091,34 +1065,6 @@ class TestPDFActions(DirectoriesMixin, TestCase):
        self.assertIsNotNone(overrides)
        self.assertEqual(result, "OK")

-    @mock.patch("documents.data_models.magic.from_file", return_value="application/pdf")
-    @mock.patch("documents.tasks.consume_file.delay")
-    @mock.patch("pikepdf.open")
-    def test_delete_pages_explicit_selection_uses_root_source_when_root_selected(
-        self,
-        mock_open,
-        mock_consume_delay,
-        mock_magic,
-    ):
-        Document.objects.create(
-            checksum="B-v1",
-            title="B version 1",
-            root_document=self.doc2,
-        )
-        fake_pdf = mock.MagicMock()
-        fake_pdf.pages = [mock.Mock(), mock.Mock()]
-        mock_open.return_value.__enter__.return_value = fake_pdf
-
-        result = bulk_edit.delete_pages(
-            [self.doc2.id],
-            [1],
-            source_mode="explicit_selection",
-        )
-
-        self.assertEqual(result, "OK")
-        mock_open.assert_called_once_with(self.doc2.source_path)
-        mock_consume_delay.assert_called_once()
-
    @mock.patch("documents.tasks.consume_file.delay")
    @mock.patch("pikepdf.Pdf.save")
    def test_delete_pages_with_error(self, mock_pdf_save, mock_consume_delay):
@@ -1267,40 +1213,6 @@ class TestPDFActions(DirectoriesMixin, TestCase):
        self.assertTrue(str(consumable.original_file).endswith("_edited.pdf"))
        self.assertIsNotNone(overrides)

-    @mock.patch("documents.data_models.magic.from_file", return_value="application/pdf")
-    @mock.patch("documents.tasks.consume_file.delay")
-    @mock.patch("pikepdf.new")
-    @mock.patch("pikepdf.open")
-    def test_edit_pdf_explicit_selection_uses_root_source_when_root_selected(
-        self,
-        mock_open,
-        mock_new,
-        mock_consume_delay,
-        mock_magic,
-    ):
-        Document.objects.create(
-            checksum="B-v1",
-            title="B version 1",
-            root_document=self.doc2,
-        )
-        fake_pdf = mock.MagicMock()
-        fake_pdf.pages = [mock.Mock()]
-        mock_open.return_value.__enter__.return_value = fake_pdf
-        output_pdf = mock.MagicMock()
-        output_pdf.pages = []
-        mock_new.return_value = output_pdf
-
-        result = bulk_edit.edit_pdf(
-            [self.doc2.id],
-            operations=[{"page": 1}],
-            update_document=True,
-            source_mode="explicit_selection",
-        )
-
-        self.assertEqual(result, "OK")
-        mock_open.assert_called_once_with(self.doc2.source_path)
-        mock_consume_delay.assert_called_once()
-
    @mock.patch("documents.bulk_edit.group")
    @mock.patch("documents.tasks.consume_file.s")
    def test_edit_pdf_without_metadata(
@@ -1421,34 +1333,6 @@ class TestPDFActions(DirectoriesMixin, TestCase):
        self.assertEqual(consumable.root_document_id, doc.id)
        self.assertIsNotNone(overrides)

-    @mock.patch("documents.data_models.magic.from_file", return_value="application/pdf")
-    @mock.patch("documents.tasks.consume_file.delay")
-    @mock.patch("pikepdf.open")
-    def test_remove_password_explicit_selection_uses_root_source_when_root_selected(
-        self,
-        mock_open,
-        mock_consume_delay,
-        mock_magic,
-    ) -> None:
-        Document.objects.create(
-            checksum="A-v1",
-            title="A version 1",
-            root_document=self.doc1,
-        )
-        fake_pdf = mock.MagicMock()
-        mock_open.return_value.__enter__.return_value = fake_pdf
-
-        result = bulk_edit.remove_password(
-            [self.doc1.id],
-            password="secret",
-            update_document=True,
-            source_mode="explicit_selection",
-        )
-
-        self.assertEqual(result, "OK")
-        mock_open.assert_called_once_with(self.doc1.source_path, password="secret")
-        mock_consume_delay.assert_called_once()
-
    @mock.patch("documents.bulk_edit.chord")
    @mock.patch("documents.bulk_edit.group")
    @mock.patch("documents.tasks.consume_file.s")
--- a/src/documents/tests/test_management.py
+++ b/src/documents/tests/test_management.py
@@ -134,7 +134,6 @@ class TestRenamer(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
        self.assertIsFile(doc2.archive_path)


-@pytest.mark.management
 class TestCreateClassifier(TestCase):
    @mock.patch(
        "documents.management.commands.document_create_classifier.train_classifier",
@@ -145,6 +144,32 @@ class TestCreateClassifier(TestCase):
        m.assert_called_once()


+@pytest.mark.management
+class TestSanityChecker(DirectoriesMixin, TestCase):
+    def test_no_issues(self) -> None:
+        with self.assertLogs() as capture:
+            call_command("document_sanity_checker")
+
+        self.assertEqual(len(capture.output), 1)
+        self.assertIn("Sanity checker detected no issues.", capture.output[0])
+
+    def test_errors(self) -> None:
+        doc = Document.objects.create(
+            title="test",
+            content="test",
+            filename="test.pdf",
+            checksum="abc",
+        )
+        Path(doc.source_path).touch()
+        Path(doc.thumbnail_path).touch()
+
+        with self.assertLogs() as capture:
+            call_command("document_sanity_checker")
+
+        self.assertEqual(len(capture.output), 2)
+        self.assertIn("Checksum mismatch. Stored: abc, actual:", capture.output[1])
+
+
@pytest.mark.management
 class TestConvertMariaDBUUID(TestCase):
    @mock.patch("django.db.connection.schema_editor")
--- a/src/documents/tests/test_management_exporter.py
+++ b/src/documents/tests/test_management_exporter.py
@@ -288,7 +288,7 @@ class TestExportImport(
            self.assertEqual(Permission.objects.count(), num_permission_objects)
            messages = check_sanity()
            # everything is alright after the test
-            self.assertEqual(messages.total_issue_count, 0)
+            self.assertEqual(len(messages), 0)

    def test_exporter_with_filename_format(self) -> None:
        shutil.rmtree(Path(self.dirs.media_dir) / "documents")
--- a/src/documents/tests/test_management_retagger.py
+++ b/src/documents/tests/test_management_retagger.py
@@ -1,442 +1,298 @@
-"""
-Tests for the document_retagger management command.
-"""
-
-from __future__ import annotations
-
 import pytest
 from django.core.management import call_command
 from django.core.management.base import CommandError
+from django.test import TestCase

 from documents.models import Correspondent
 from documents.models import Document
 from documents.models import DocumentType
-from documents.models import MatchingModel
 from documents.models import StoragePath
 from documents.models import Tag
-from documents.tests.factories import CorrespondentFactory
-from documents.tests.factories import DocumentFactory
-from documents.tests.factories import DocumentTypeFactory
-from documents.tests.factories import StoragePathFactory
-from documents.tests.factories import TagFactory
 from documents.tests.utils import DirectoriesMixin

-# ---------------------------------------------------------------------------
-# Module-level type aliases
-# ---------------------------------------------------------------------------
-
-StoragePathTuple = tuple[StoragePath, StoragePath, StoragePath]
-TagTuple = tuple[Tag, Tag, Tag, Tag, Tag]
-CorrespondentTuple = tuple[Correspondent, Correspondent]
-DocumentTypeTuple = tuple[DocumentType, DocumentType]
-DocumentTuple = tuple[Document, Document, Document, Document]
-
-
-# ---------------------------------------------------------------------------
-# Fixtures
-# ---------------------------------------------------------------------------
-
-
-@pytest.fixture()
-def storage_paths(db) -> StoragePathTuple:
-    """Three storage paths with varying match rules."""
-    sp1 = StoragePathFactory(
-        path="{created_data}/{title}",
-        match="auto document",
-        matching_algorithm=MatchingModel.MATCH_LITERAL,
-    )
-    sp2 = StoragePathFactory(
-        path="{title}",
-        match="^first|^unrelated",
-        matching_algorithm=MatchingModel.MATCH_REGEX,
-    )
-    sp3 = StoragePathFactory(
-        path="{title}",
-        match="^blah",
-        matching_algorithm=MatchingModel.MATCH_REGEX,
-    )
-    return sp1, sp2, sp3
-
-
-@pytest.fixture()
-def tags(db) -> TagTuple:
-    """Tags covering the common matching scenarios."""
-    tag_first = TagFactory(match="first", matching_algorithm=Tag.MATCH_ANY)
-    tag_second = TagFactory(match="second", matching_algorithm=Tag.MATCH_ANY)
-    tag_inbox = TagFactory(is_inbox_tag=True)
-    tag_no_match = TagFactory()
-    tag_auto = TagFactory(matching_algorithm=Tag.MATCH_AUTO)
-    return tag_first, tag_second, tag_inbox, tag_no_match, tag_auto
-
-
-@pytest.fixture()
-def correspondents(db) -> CorrespondentTuple:
-    """Two correspondents matching 'first' and 'second' content."""
-    c_first = CorrespondentFactory(
-        match="first",
-        matching_algorithm=MatchingModel.MATCH_ANY,
-    )
-    c_second = CorrespondentFactory(
-        match="second",
-        matching_algorithm=MatchingModel.MATCH_ANY,
-    )
-    return c_first, c_second
-
-
-@pytest.fixture()
-def document_types(db) -> DocumentTypeTuple:
-    """Two document types matching 'first' and 'second' content."""
-    dt_first = DocumentTypeFactory(
-        match="first",
-        matching_algorithm=MatchingModel.MATCH_ANY,
-    )
-    dt_second = DocumentTypeFactory(
-        match="second",
-        matching_algorithm=MatchingModel.MATCH_ANY,
-    )
-    return dt_first, dt_second
-
-
-@pytest.fixture()
-def documents(storage_paths: StoragePathTuple, tags: TagTuple) -> DocumentTuple:
-    """Four documents with varied content used across most retagger tests."""
-    _, _, sp3 = storage_paths
-    _, _, tag_inbox, tag_no_match, tag_auto = tags
-
-    d1 = DocumentFactory(checksum="A", title="A", content="first document")
-    d2 = DocumentFactory(checksum="B", title="B", content="second document")
-    d3 = DocumentFactory(
-        checksum="C",
-        title="C",
-        content="unrelated document",
-        storage_path=sp3,
-    )
-    d4 = DocumentFactory(checksum="D", title="D", content="auto document")
-
-    d3.tags.add(tag_inbox, tag_no_match)
-    d4.tags.add(tag_auto)
-
-    return d1, d2, d3, d4
-
-
-def _get_docs() -> DocumentTuple:
-    return (
-        Document.objects.get(title="A"),
-        Document.objects.get(title="B"),
-        Document.objects.get(title="C"),
-        Document.objects.get(title="D"),
-    )
-
-
-# ---------------------------------------------------------------------------
-# Tag assignment
-# ---------------------------------------------------------------------------
-

@pytest.mark.management
-@pytest.mark.django_db
-class TestRetaggerTags(DirectoriesMixin):
-    @pytest.mark.usefixtures("documents")
-    def test_add_tags(self, tags: TagTuple) -> None:
-        tag_first, tag_second, *_ = tags
+class TestRetagger(DirectoriesMixin, TestCase):
+    def make_models(self) -> None:
+        self.sp1 = StoragePath.objects.create(
+            name="dummy a",
+            path="{created_data}/{title}",
+            match="auto document",
+            matching_algorithm=StoragePath.MATCH_LITERAL,
+        )
+        self.sp2 = StoragePath.objects.create(
+            name="dummy b",
+            path="{title}",
+            match="^first|^unrelated",
+            matching_algorithm=StoragePath.MATCH_REGEX,
+        )
+
+        self.sp3 = StoragePath.objects.create(
+            name="dummy c",
+            path="{title}",
+            match="^blah",
+            matching_algorithm=StoragePath.MATCH_REGEX,
+        )
+
+        self.d1 = Document.objects.create(
+            checksum="A",
+            title="A",
+            content="first document",
+        )
+        self.d2 = Document.objects.create(
+            checksum="B",
+            title="B",
+            content="second document",
+        )
+        self.d3 = Document.objects.create(
+            checksum="C",
+            title="C",
+            content="unrelated document",
+            storage_path=self.sp3,
+        )
+        self.d4 = Document.objects.create(
+            checksum="D",
+            title="D",
+            content="auto document",
+        )
+
+        self.tag_first = Tag.objects.create(
+            name="tag1",
+            match="first",
+            matching_algorithm=Tag.MATCH_ANY,
+        )
+        self.tag_second = Tag.objects.create(
+            name="tag2",
+            match="second",
+            matching_algorithm=Tag.MATCH_ANY,
+        )
+        self.tag_inbox = Tag.objects.create(name="test", is_inbox_tag=True)
+        self.tag_no_match = Tag.objects.create(name="test2")
+        self.tag_auto = Tag.objects.create(
+            name="tagauto",
+            matching_algorithm=Tag.MATCH_AUTO,
+        )
+
+        self.d3.tags.add(self.tag_inbox)
+        self.d3.tags.add(self.tag_no_match)
+        self.d4.tags.add(self.tag_auto)
+
+        self.correspondent_first = Correspondent.objects.create(
+            name="c1",
+            match="first",
+            matching_algorithm=Correspondent.MATCH_ANY,
+        )
+        self.correspondent_second = Correspondent.objects.create(
+            name="c2",
+            match="second",
+            matching_algorithm=Correspondent.MATCH_ANY,
+        )
+
+        self.doctype_first = DocumentType.objects.create(
+            name="dt1",
+            match="first",
+            matching_algorithm=DocumentType.MATCH_ANY,
+        )
+        self.doctype_second = DocumentType.objects.create(
+            name="dt2",
+            match="second",
+            matching_algorithm=DocumentType.MATCH_ANY,
+        )
+
+    def get_updated_docs(self):
+        return (
+            Document.objects.get(title="A"),
+            Document.objects.get(title="B"),
+            Document.objects.get(title="C"),
+            Document.objects.get(title="D"),
+        )
+
+    def setUp(self) -> None:
+        super().setUp()
+        self.make_models()
+
+    def test_add_tags(self) -> None:
        call_command("document_retagger", "--tags")
-        d_first, d_second, d_unrelated, d_auto = _get_docs()
+        d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()

-        assert d_first.tags.count() == 1
-        assert d_second.tags.count() == 1
-        assert d_unrelated.tags.count() == 2
-        assert d_auto.tags.count() == 1
-        assert d_first.tags.first() == tag_first
-        assert d_second.tags.first() == tag_second
+        self.assertEqual(d_first.tags.count(), 1)
+        self.assertEqual(d_second.tags.count(), 1)
+        self.assertEqual(d_unrelated.tags.count(), 2)
+        self.assertEqual(d_auto.tags.count(), 1)

-    def test_overwrite_removes_stale_tags_and_preserves_inbox(
-        self,
-        documents: DocumentTuple,
-        tags: TagTuple,
-    ) -> None:
-        d1, *_ = documents
-        tag_first, tag_second, tag_inbox, tag_no_match, _ = tags
-        d1.tags.add(tag_second)
+        self.assertEqual(d_first.tags.first(), self.tag_first)
+        self.assertEqual(d_second.tags.first(), self.tag_second)
+
+    def test_add_type(self) -> None:
+        call_command("document_retagger", "--document_type")
+        d_first, d_second, _, _ = self.get_updated_docs()
+
+        self.assertEqual(d_first.document_type, self.doctype_first)
+        self.assertEqual(d_second.document_type, self.doctype_second)
+
+    def test_add_correspondent(self) -> None:
+        call_command("document_retagger", "--correspondent")
+        d_first, d_second, _, _ = self.get_updated_docs()
+
+        self.assertEqual(d_first.correspondent, self.correspondent_first)
+        self.assertEqual(d_second.correspondent, self.correspondent_second)
+
+    def test_overwrite_preserve_inbox(self) -> None:
+        self.d1.tags.add(self.tag_second)

        call_command("document_retagger", "--tags", "--overwrite")

-        d_first, d_second, d_unrelated, d_auto = _get_docs()
+        d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()

-        assert Tag.objects.filter(id=tag_second.id).exists()
-        assert list(d_first.tags.values_list("id", flat=True)) == [tag_first.id]
-        assert list(d_second.tags.values_list("id", flat=True)) == [tag_second.id]
-        assert set(d_unrelated.tags.values_list("id", flat=True)) == {
-            tag_inbox.id,
-            tag_no_match.id,
-        }
-        assert d_auto.tags.count() == 0
+        self.assertIsNotNone(Tag.objects.get(id=self.tag_second.id))

-    @pytest.mark.usefixtures("documents")
-    @pytest.mark.parametrize(
-        "extra_args",
-        [
-            pytest.param([], id="no_base_url"),
-            pytest.param(["--base-url=http://localhost"], id="with_base_url"),
-        ],
-    )
-    def test_suggest_does_not_apply_tags(self, extra_args: list[str]) -> None:
-        call_command("document_retagger", "--tags", "--suggest", *extra_args)
-        d_first, d_second, _, d_auto = _get_docs()
-
-        assert d_first.tags.count() == 0
-        assert d_second.tags.count() == 0
-        assert d_auto.tags.count() == 1
-
-
-# ---------------------------------------------------------------------------
-# Document type assignment
-# ---------------------------------------------------------------------------
-
-
-@pytest.mark.management
-@pytest.mark.django_db
-class TestRetaggerDocumentType(DirectoriesMixin):
-    @pytest.mark.usefixtures("documents")
-    def test_add_type(self, document_types: DocumentTypeTuple) -> None:
-        dt_first, dt_second = document_types
-        call_command("document_retagger", "--document_type")
-        d_first, d_second, _, _ = _get_docs()
-
-        assert d_first.document_type == dt_first
-        assert d_second.document_type == dt_second
-
-    @pytest.mark.usefixtures("documents", "document_types")
-    @pytest.mark.parametrize(
-        "extra_args",
-        [
-            pytest.param([], id="no_base_url"),
-            pytest.param(["--base-url=http://localhost"], id="with_base_url"),
-        ],
-    )
-    def test_suggest_does_not_apply_document_type(self, extra_args: list[str]) -> None:
-        call_command("document_retagger", "--document_type", "--suggest", *extra_args)
-        d_first, d_second, _, _ = _get_docs()
-
-        assert d_first.document_type is None
-        assert d_second.document_type is None
-
-    @pytest.mark.parametrize(
-        ("use_first_flag", "expects_assignment"),
-        [
-            pytest.param(["--use-first"], True, id="use_first_assigns_first_match"),
-            pytest.param([], False, id="no_use_first_skips_ambiguous_match"),
-        ],
-    )
-    def test_use_first_with_multiple_matches(
-        self,
-        use_first_flag: list[str],
-        *,
-        expects_assignment: bool,
-    ) -> None:
-        DocumentTypeFactory(
-            match="ambiguous",
-            matching_algorithm=MatchingModel.MATCH_ANY,
+        self.assertCountEqual(
+            [tag.id for tag in d_first.tags.all()],
+            [self.tag_first.id],
        )
-        DocumentTypeFactory(
-            match="ambiguous",
-            matching_algorithm=MatchingModel.MATCH_ANY,
+        self.assertCountEqual(
+            [tag.id for tag in d_second.tags.all()],
+            [self.tag_second.id],
        )
-        doc = DocumentFactory(content="ambiguous content")
-
-        call_command("document_retagger", "--document_type", *use_first_flag)
-
-        doc.refresh_from_db()
-        assert (doc.document_type is not None) is expects_assignment
-
-
-# ---------------------------------------------------------------------------
-# Correspondent assignment
-# ---------------------------------------------------------------------------
-
-
-@pytest.mark.management
-@pytest.mark.django_db
-class TestRetaggerCorrespondent(DirectoriesMixin):
-    @pytest.mark.usefixtures("documents")
-    def test_add_correspondent(self, correspondents: CorrespondentTuple) -> None:
-        c_first, c_second = correspondents
-        call_command("document_retagger", "--correspondent")
-        d_first, d_second, _, _ = _get_docs()
-
-        assert d_first.correspondent == c_first
-        assert d_second.correspondent == c_second
-
-    @pytest.mark.usefixtures("documents", "correspondents")
-    @pytest.mark.parametrize(
-        "extra_args",
-        [
-            pytest.param([], id="no_base_url"),
-            pytest.param(["--base-url=http://localhost"], id="with_base_url"),
-        ],
-    )
-    def test_suggest_does_not_apply_correspondent(self, extra_args: list[str]) -> None:
-        call_command("document_retagger", "--correspondent", "--suggest", *extra_args)
-        d_first, d_second, _, _ = _get_docs()
-
-        assert d_first.correspondent is None
-        assert d_second.correspondent is None
-
-    @pytest.mark.parametrize(
-        ("use_first_flag", "expects_assignment"),
-        [
-            pytest.param(["--use-first"], True, id="use_first_assigns_first_match"),
-            pytest.param([], False, id="no_use_first_skips_ambiguous_match"),
-        ],
-    )
-    def test_use_first_with_multiple_matches(
-        self,
-        use_first_flag: list[str],
-        *,
-        expects_assignment: bool,
-    ) -> None:
-        CorrespondentFactory(
-            match="ambiguous",
-            matching_algorithm=MatchingModel.MATCH_ANY,
+        self.assertCountEqual(
+            [tag.id for tag in d_unrelated.tags.all()],
+            [self.tag_inbox.id, self.tag_no_match.id],
        )
-        CorrespondentFactory(
-            match="ambiguous",
-            matching_algorithm=MatchingModel.MATCH_ANY,
+        self.assertEqual(d_auto.tags.count(), 0)
+
+    def test_add_tags_suggest(self) -> None:
+        call_command("document_retagger", "--tags", "--suggest")
+        d_first, d_second, _, d_auto = self.get_updated_docs()
+
+        self.assertEqual(d_first.tags.count(), 0)
+        self.assertEqual(d_second.tags.count(), 0)
+        self.assertEqual(d_auto.tags.count(), 1)
+
+    def test_add_type_suggest(self) -> None:
+        call_command("document_retagger", "--document_type", "--suggest")
+        d_first, d_second, _, _ = self.get_updated_docs()
+
+        self.assertIsNone(d_first.document_type)
+        self.assertIsNone(d_second.document_type)
+
+    def test_add_correspondent_suggest(self) -> None:
+        call_command("document_retagger", "--correspondent", "--suggest")
+        d_first, d_second, _, _ = self.get_updated_docs()
+
+        self.assertIsNone(d_first.correspondent)
+        self.assertIsNone(d_second.correspondent)
+
+    def test_add_tags_suggest_url(self) -> None:
+        call_command(
+            "document_retagger",
+            "--tags",
+            "--suggest",
+            "--base-url=http://localhost",
        )
-        doc = DocumentFactory(content="ambiguous content")
+        d_first, d_second, _, d_auto = self.get_updated_docs()

-        call_command("document_retagger", "--correspondent", *use_first_flag)
+        self.assertEqual(d_first.tags.count(), 0)
+        self.assertEqual(d_second.tags.count(), 0)
+        self.assertEqual(d_auto.tags.count(), 1)

-        doc.refresh_from_db()
-        assert (doc.correspondent is not None) is expects_assignment
+    def test_add_type_suggest_url(self) -> None:
+        call_command(
+            "document_retagger",
+            "--document_type",
+            "--suggest",
+            "--base-url=http://localhost",
+        )
+        d_first, d_second, _, _ = self.get_updated_docs()

+        self.assertIsNone(d_first.document_type)
+        self.assertIsNone(d_second.document_type)

-# ---------------------------------------------------------------------------
-# Storage path assignment
-# ---------------------------------------------------------------------------
+    def test_add_correspondent_suggest_url(self) -> None:
+        call_command(
+            "document_retagger",
+            "--correspondent",
+            "--suggest",
+            "--base-url=http://localhost",
+        )
+        d_first, d_second, _, _ = self.get_updated_docs()

+        self.assertIsNone(d_first.correspondent)
+        self.assertIsNone(d_second.correspondent)

-@pytest.mark.management
-@pytest.mark.django_db
-class TestRetaggerStoragePath(DirectoriesMixin):
-    @pytest.mark.usefixtures("documents")
-    def test_add_storage_path(self, storage_paths: StoragePathTuple) -> None:
+    def test_add_storage_path(self) -> None:
        """
-        GIVEN documents matching various storage path rules
-        WHEN document_retagger --storage_path is called
-        THEN matching documents get the correct path; existing path is unchanged
+        GIVEN:
+            - 2 storage paths with documents which match them
+            - 1 document which matches but has a storage path
+        WHEN:
+            - document retagger is called
+        THEN:
+            - Matching document's storage paths updated
+            - Non-matching documents have no storage path
+            - Existing storage patch left unchanged
        """
-        sp1, sp2, sp3 = storage_paths
-        call_command("document_retagger", "--storage_path")
-        d_first, d_second, d_unrelated, d_auto = _get_docs()
+        call_command(
+            "document_retagger",
+            "--storage_path",
+        )
+        d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()

-        assert d_first.storage_path == sp2
-        assert d_auto.storage_path == sp1
-        assert d_second.storage_path is None
-        assert d_unrelated.storage_path == sp3
+        self.assertEqual(d_first.storage_path, self.sp2)
+        self.assertEqual(d_auto.storage_path, self.sp1)
+        self.assertIsNone(d_second.storage_path)
+        self.assertEqual(d_unrelated.storage_path, self.sp3)

-    @pytest.mark.usefixtures("documents")
-    def test_overwrite_storage_path(self, storage_paths: StoragePathTuple) -> None:
+    def test_overwrite_storage_path(self) -> None:
        """
-        GIVEN a document with an existing storage path that matches a different rule
-        WHEN document_retagger --storage_path --overwrite is called
-        THEN the existing path is replaced by the newly matched path
+        GIVEN:
+            - 2 storage paths with documents which match them
+            - 1 document which matches but has a storage path
+        WHEN:
+            - document retagger is called with overwrite
+        THEN:
+            - Matching document's storage paths updated
+            - Non-matching documents have no storage path
+            - Existing storage patch overwritten
        """
-        sp1, sp2, _ = storage_paths
        call_command("document_retagger", "--storage_path", "--overwrite")
-        d_first, d_second, d_unrelated, d_auto = _get_docs()
+        d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()

-        assert d_first.storage_path == sp2
-        assert d_auto.storage_path == sp1
-        assert d_second.storage_path is None
-        assert d_unrelated.storage_path == sp2
+        self.assertEqual(d_first.storage_path, self.sp2)
+        self.assertEqual(d_auto.storage_path, self.sp1)
+        self.assertIsNone(d_second.storage_path)
+        self.assertEqual(d_unrelated.storage_path, self.sp2)

-    @pytest.mark.parametrize(
-        ("use_first_flag", "expects_assignment"),
-        [
-            pytest.param(["--use-first"], True, id="use_first_assigns_first_match"),
-            pytest.param([], False, id="no_use_first_skips_ambiguous_match"),
-        ],
-    )
-    def test_use_first_with_multiple_matches(
-        self,
-        use_first_flag: list[str],
-        *,
-        expects_assignment: bool,
-    ) -> None:
-        StoragePathFactory(
-            match="ambiguous",
-            matching_algorithm=MatchingModel.MATCH_ANY,
+    def test_id_range_parameter(self) -> None:
+        commandOutput = ""
+        Document.objects.create(
+            checksum="E",
+            title="E",
+            content="NOT the first document",
        )
-        StoragePathFactory(
-            match="ambiguous",
-            matching_algorithm=MatchingModel.MATCH_ANY,
-        )
-        doc = DocumentFactory(content="ambiguous content")
+        call_command("document_retagger", "--tags", "--id-range", "1", "2")
+        # The retagger shouldn`t apply the 'first' tag to our new document
+        self.assertEqual(Document.objects.filter(tags__id=self.tag_first.id).count(), 1)

-        call_command("document_retagger", "--storage_path", *use_first_flag)
+        try:
+            commandOutput = call_command("document_retagger", "--tags", "--id-range")
+        except CommandError:
+            # Just ignore the error
+            None
+        self.assertIn(commandOutput, "Error: argument --id-range: expected 2 arguments")

-        doc.refresh_from_db()
-        assert (doc.storage_path is not None) is expects_assignment
+        try:
+            commandOutput = call_command(
+                "document_retagger",
+                "--tags",
+                "--id-range",
+                "a",
+                "b",
+            )
+        except CommandError:
+            # Just ignore the error
+            None
+        self.assertIn(commandOutput, "error: argument --id-range: invalid int value:")

-
-# ---------------------------------------------------------------------------
-# ID range filtering
-# ---------------------------------------------------------------------------
-
-
-@pytest.mark.management
-@pytest.mark.django_db
-class TestRetaggerIdRange(DirectoriesMixin):
-    @pytest.mark.usefixtures("documents")
-    @pytest.mark.parametrize(
-        ("id_range_args", "expected_count"),
-        [
-            pytest.param(["1", "2"], 1, id="narrow_range_limits_scope"),
-            pytest.param(["1", "9999"], 2, id="wide_range_tags_all_matches"),
-        ],
-    )
-    def test_id_range_limits_scope(
-        self,
-        tags: TagTuple,
-        id_range_args: list[str],
-        expected_count: int,
-    ) -> None:
-        DocumentFactory(content="NOT the first document")
-        call_command("document_retagger", "--tags", "--id-range", *id_range_args)
-        tag_first, *_ = tags
-        assert Document.objects.filter(tags__id=tag_first.id).count() == expected_count
-
-    @pytest.mark.usefixtures("documents")
-    @pytest.mark.parametrize(
-        "args",
-        [
-            pytest.param(["--tags", "--id-range"], id="missing_both_values"),
-            pytest.param(["--tags", "--id-range", "a", "b"], id="non_integer_values"),
-        ],
-    )
-    def test_id_range_invalid_arguments_raise(self, args: list[str]) -> None:
-        with pytest.raises((CommandError, SystemExit)):
-            call_command("document_retagger", *args)
-
-
-# ---------------------------------------------------------------------------
-# Edge cases
-# ---------------------------------------------------------------------------
-
-
-@pytest.mark.management
-@pytest.mark.django_db
-class TestRetaggerEdgeCases(DirectoriesMixin):
-    @pytest.mark.usefixtures("documents")
-    def test_no_targets_exits_cleanly(self) -> None:
-        """Calling the retagger with no classifier targets should not raise."""
-        call_command("document_retagger")
-
-    @pytest.mark.usefixtures("documents")
-    def test_inbox_only_skips_non_inbox_documents(self) -> None:
-        """--inbox-only must restrict processing to documents with an inbox tag."""
-        call_command("document_retagger", "--tags", "--inbox-only")
-        d_first, _, d_unrelated, _ = _get_docs()
-
-        assert d_first.tags.count() == 0
-        assert d_unrelated.tags.count() == 2
+        call_command("document_retagger", "--tags", "--id-range", "1", "9999")
+        # Now we should have 2 documents
+        self.assertEqual(Document.objects.filter(tags__id=self.tag_first.id).count(), 2)
--- a/src/documents/tests/test_sanity_check.py
+++ b/src/documents/tests/test_sanity_check.py
@@ -1,295 +1,192 @@
-"""Tests for the sanity checker module.
-
-Tests exercise ``check_sanity`` as a whole, verifying document validation,
-orphan detection, task recording, and the iter_wrapper contract.
-"""
-
-from __future__ import annotations
-
 import logging
+import shutil
 from pathlib import Path
-from typing import TYPE_CHECKING

-import pytest
+import filelock
+from django.conf import settings
+from django.test import TestCase
+from django.test import override_settings

 from documents.models import Document
-from documents.models import PaperlessTask
 from documents.sanity_checker import check_sanity
-
-if TYPE_CHECKING:
-    from collections.abc import Iterable
-
-    from documents.tests.conftest import PaperlessDirs
+from documents.tests.utils import DirectoriesMixin


-@pytest.mark.django_db
-class TestCheckSanityNoDocuments:
-    """Sanity checks against an empty archive."""
+class TestSanityCheck(DirectoriesMixin, TestCase):
+    def make_test_data(self):
+        with filelock.FileLock(settings.MEDIA_LOCK):
+            # just make sure that the lockfile is present.
+            shutil.copy(
+                (
+                    Path(__file__).parent
+                    / "samples"
+                    / "documents"
+                    / "originals"
+                    / "0000001.pdf"
+                ),
+                Path(self.dirs.originals_dir) / "0000001.pdf",
+            )
+            shutil.copy(
+                (
+                    Path(__file__).parent
+                    / "samples"
+                    / "documents"
+                    / "archive"
+                    / "0000001.pdf"
+                ),
+                Path(self.dirs.archive_dir) / "0000001.pdf",
+            )
+            shutil.copy(
+                (
+                    Path(__file__).parent
+                    / "samples"
+                    / "documents"
+                    / "thumbnails"
+                    / "0000001.webp"
+                ),
+                Path(self.dirs.thumbnail_dir) / "0000001.webp",
+            )

-    @pytest.mark.usefixtures("_media_settings")
-    def test_no_documents(self) -> None:
+        return Document.objects.create(
+            title="test",
+            checksum="42995833e01aea9b3edee44bbfdd7ce1",
+            archive_checksum="62acb0bcbfbcaa62ca6ad3668e4e404b",
+            content="test",
+            pk=1,
+            filename="0000001.pdf",
+            mime_type="application/pdf",
+            archive_filename="0000001.pdf",
+        )
+
+    def assertSanityError(self, doc: Document, messageRegex) -> None:
        messages = check_sanity()
-        assert not messages.has_error
-        assert not messages.has_warning
-        assert messages.total_issue_count == 0
-
-    @pytest.mark.usefixtures("_media_settings")
-    def test_no_issues_logs_clean(self, caplog: pytest.LogCaptureFixture) -> None:
-        messages = check_sanity()
-        with caplog.at_level(logging.INFO, logger="paperless.sanity_checker"):
+        self.assertTrue(messages.has_error)
+        with self.assertLogs() as capture:
            messages.log_messages()
-        assert "Sanity checker detected no issues." in caplog.text
-
-
-@pytest.mark.django_db
-class TestCheckSanityHealthyDocument:
-    def test_no_errors(self, sample_doc: Document) -> None:
-        messages = check_sanity()
-        assert not messages.has_error
-        assert not messages.has_warning
-        assert messages.total_issue_count == 0
-
-
-@pytest.mark.django_db
-class TestCheckSanityThumbnail:
-    def test_missing(self, sample_doc: Document) -> None:
-        Path(sample_doc.thumbnail_path).unlink()
-        messages = check_sanity()
-        assert messages.has_error
-        assert any(
-            "Thumbnail of document does not exist" in m["message"]
-            for m in messages[sample_doc.pk]
-        )
-
-    def test_unreadable(self, sample_doc: Document) -> None:
-        thumb = Path(sample_doc.thumbnail_path)
-        thumb.chmod(0o000)
-        try:
-            messages = check_sanity()
-            assert messages.has_error
-            assert any(
-                "Cannot read thumbnail" in m["message"] for m in messages[sample_doc.pk]
+            self.assertEqual(
+                capture.records[0].message,
+                f"Detected following issue(s) with document #{doc.pk}, titled {doc.title}",
            )
-        finally:
-            thumb.chmod(0o644)
+            self.assertRegex(capture.records[1].message, messageRegex)

-
-@pytest.mark.django_db
-class TestCheckSanityOriginal:
-    def test_missing(self, sample_doc: Document) -> None:
-        Path(sample_doc.source_path).unlink()
+    def test_no_issues(self) -> None:
+        self.make_test_data()
        messages = check_sanity()
-        assert messages.has_error
-        assert any(
-            "Original of document does not exist" in m["message"]
-            for m in messages[sample_doc.pk]
-        )
-
-    def test_checksum_mismatch(self, sample_doc: Document) -> None:
-        sample_doc.checksum = "badhash"
-        sample_doc.save()
-        messages = check_sanity()
-        assert messages.has_error
-        assert any(
-            "Checksum mismatch" in m["message"] and "badhash" in m["message"]
-            for m in messages[sample_doc.pk]
-        )
-
-    def test_unreadable(self, sample_doc: Document) -> None:
-        src = Path(sample_doc.source_path)
-        src.chmod(0o000)
-        try:
-            messages = check_sanity()
-            assert messages.has_error
-            assert any(
-                "Cannot read original" in m["message"] for m in messages[sample_doc.pk]
+        self.assertFalse(messages.has_error)
+        self.assertFalse(messages.has_warning)
+        with self.assertLogs() as capture:
+            messages.log_messages()
+            self.assertEqual(len(capture.output), 1)
+            self.assertEqual(capture.records[0].levelno, logging.INFO)
+            self.assertEqual(
+                capture.records[0].message,
+                "Sanity checker detected no issues.",
            )
-        finally:
-            src.chmod(0o644)

+    def test_no_docs(self) -> None:
+        self.assertEqual(len(check_sanity()), 0)

-@pytest.mark.django_db
-class TestCheckSanityArchive:
-    def test_checksum_without_filename(self, sample_doc: Document) -> None:
-        sample_doc.archive_filename = None
-        sample_doc.save()
+    def test_success(self) -> None:
+        self.make_test_data()
+        self.assertEqual(len(check_sanity()), 0)
+
+    def test_no_thumbnail(self) -> None:
+        doc = self.make_test_data()
+        Path(doc.thumbnail_path).unlink()
+        self.assertSanityError(doc, "Thumbnail of document does not exist")
+
+    def test_thumbnail_no_access(self) -> None:
+        doc = self.make_test_data()
+        Path(doc.thumbnail_path).chmod(0o000)
+        self.assertSanityError(doc, "Cannot read thumbnail file of document")
+        Path(doc.thumbnail_path).chmod(0o777)
+
+    def test_no_original(self) -> None:
+        doc = self.make_test_data()
+        Path(doc.source_path).unlink()
+        self.assertSanityError(doc, "Original of document does not exist.")
+
+    def test_original_no_access(self) -> None:
+        doc = self.make_test_data()
+        Path(doc.source_path).chmod(0o000)
+        self.assertSanityError(doc, "Cannot read original file of document")
+        Path(doc.source_path).chmod(0o777)
+
+    def test_original_checksum_mismatch(self) -> None:
+        doc = self.make_test_data()
+        doc.checksum = "WOW"
+        doc.save()
+        self.assertSanityError(doc, "Checksum mismatch. Stored: WOW, actual: ")
+
+    def test_no_archive(self) -> None:
+        doc = self.make_test_data()
+        Path(doc.archive_path).unlink()
+        self.assertSanityError(doc, "Archived version of document does not exist.")
+
+    def test_archive_no_access(self) -> None:
+        doc = self.make_test_data()
+        Path(doc.archive_path).chmod(0o000)
+        self.assertSanityError(doc, "Cannot read archive file of document")
+        Path(doc.archive_path).chmod(0o777)
+
+    def test_archive_checksum_mismatch(self) -> None:
+        doc = self.make_test_data()
+        doc.archive_checksum = "WOW"
+        doc.save()
+        self.assertSanityError(doc, "Checksum mismatch of archived document")
+
+    def test_empty_content(self) -> None:
+        doc = self.make_test_data()
+        doc.content = ""
+        doc.save()
        messages = check_sanity()
-        assert messages.has_error
-        assert any(
-            "checksum, but no archive filename" in m["message"]
-            for m in messages[sample_doc.pk]
+        self.assertFalse(messages.has_error)
+        self.assertFalse(messages.has_warning)
+        self.assertEqual(len(messages), 1)
+        self.assertRegex(
+            messages[doc.pk][0]["message"],
+            "Document contains no OCR data",
        )

-    def test_filename_without_checksum(self, sample_doc: Document) -> None:
-        sample_doc.archive_checksum = None
-        sample_doc.save()
+    def test_orphaned_file(self) -> None:
+        self.make_test_data()
+        Path(self.dirs.originals_dir, "orphaned").touch()
        messages = check_sanity()
-        assert messages.has_error
-        assert any(
-            "checksum is missing" in m["message"] for m in messages[sample_doc.pk]
+        self.assertTrue(messages.has_warning)
+        self.assertRegex(
+            messages._messages[None][0]["message"],
+            "Orphaned file in media dir",
        )

-    def test_missing_file(self, sample_doc: Document) -> None:
-        Path(sample_doc.archive_path).unlink()
-        messages = check_sanity()
-        assert messages.has_error
-        assert any(
-            "Archived version of document does not exist" in m["message"]
-            for m in messages[sample_doc.pk]
-        )
-
-    def test_checksum_mismatch(self, sample_doc: Document) -> None:
-        sample_doc.archive_checksum = "wronghash"
-        sample_doc.save()
-        messages = check_sanity()
-        assert messages.has_error
-        assert any(
-            "Checksum mismatch of archived document" in m["message"]
-            for m in messages[sample_doc.pk]
-        )
-
-    def test_unreadable(self, sample_doc: Document) -> None:
-        archive = Path(sample_doc.archive_path)
-        archive.chmod(0o000)
-        try:
-            messages = check_sanity()
-            assert messages.has_error
-            assert any(
-                "Cannot read archive" in m["message"] for m in messages[sample_doc.pk]
-            )
-        finally:
-            archive.chmod(0o644)
-
-    def test_no_archive_at_all(self, sample_doc: Document) -> None:
-        """Document with neither archive checksum nor filename is valid."""
-        Path(sample_doc.archive_path).unlink()
-        sample_doc.archive_checksum = None
-        sample_doc.archive_filename = None
-        sample_doc.save()
-        messages = check_sanity()
-        assert not messages.has_error
-
-
-@pytest.mark.django_db
-class TestCheckSanityContent:
-    @pytest.mark.parametrize(
-        "content",
-        [
-            pytest.param("", id="empty-string"),
-        ],
+    @override_settings(
+        APP_LOGO="logo/logo.png",
    )
-    def test_no_content(self, sample_doc: Document, content: str) -> None:
-        sample_doc.content = content
-        sample_doc.save()
+    def test_ignore_logo(self) -> None:
+        self.make_test_data()
+        logo_dir = Path(self.dirs.media_dir, "logo")
+        logo_dir.mkdir(parents=True, exist_ok=True)
+        Path(self.dirs.media_dir, "logo", "logo.png").touch()
        messages = check_sanity()
-        assert not messages.has_error
-        assert not messages.has_warning
-        assert any("no OCR data" in m["message"] for m in messages[sample_doc.pk])
+        self.assertFalse(messages.has_warning)

-
-@pytest.mark.django_db
-class TestCheckSanityOrphans:
-    def test_orphaned_file(
-        self,
-        sample_doc: Document,
-        paperless_dirs: PaperlessDirs,
-    ) -> None:
-        (paperless_dirs.originals / "orphan.pdf").touch()
+    def test_ignore_ignorable_files(self) -> None:
+        self.make_test_data()
+        Path(self.dirs.media_dir, ".DS_Store").touch()
+        Path(self.dirs.media_dir, "desktop.ini").touch()
        messages = check_sanity()
-        assert messages.has_warning
-        assert any("Orphaned file" in m["message"] for m in messages[None])
+        self.assertFalse(messages.has_warning)

-    @pytest.mark.usefixtures("_media_settings")
-    def test_ignorable_files_not_flagged(
-        self,
-        paperless_dirs: PaperlessDirs,
-    ) -> None:
-        (paperless_dirs.media / ".DS_Store").touch()
-        (paperless_dirs.media / "desktop.ini").touch()
-        messages = check_sanity()
-        assert not messages.has_warning
+    def test_archive_filename_no_checksum(self) -> None:
+        doc = self.make_test_data()
+        doc.archive_checksum = None
+        doc.save()
+        self.assertSanityError(doc, "has an archive file, but its checksum is missing.")

-
-@pytest.mark.django_db
-class TestCheckSanityIterWrapper:
-    def test_wrapper_receives_documents(self, sample_doc: Document) -> None:
-        seen: list[Document] = []
-
-        def tracking(iterable: Iterable[Document]) -> Iterable[Document]:
-            for item in iterable:
-                seen.append(item)
-                yield item
-
-        check_sanity(iter_wrapper=tracking)
-        assert len(seen) == 1
-        assert seen[0].pk == sample_doc.pk
-
-    def test_default_works_without_wrapper(self, sample_doc: Document) -> None:
-        messages = check_sanity()
-        assert not messages.has_error
-
-
-@pytest.mark.django_db
-class TestCheckSanityTaskRecording:
-    @pytest.mark.parametrize(
-        ("expected_type", "scheduled"),
-        [
-            pytest.param(PaperlessTask.TaskType.SCHEDULED_TASK, True, id="scheduled"),
-            pytest.param(PaperlessTask.TaskType.MANUAL_TASK, False, id="manual"),
-        ],
-    )
-    @pytest.mark.usefixtures("_media_settings")
-    def test_task_type(self, expected_type: str, *, scheduled: bool) -> None:
-        check_sanity(scheduled=scheduled)
-        task = PaperlessTask.objects.latest("date_created")
-        assert task.task_name == PaperlessTask.TaskName.CHECK_SANITY
-        assert task.type == expected_type
-
-    def test_success_status(self, sample_doc: Document) -> None:
-        check_sanity()
-        task = PaperlessTask.objects.latest("date_created")
-        assert task.status == "SUCCESS"
-
-    def test_failure_status(self, sample_doc: Document) -> None:
-        Path(sample_doc.source_path).unlink()
-        check_sanity()
-        task = PaperlessTask.objects.latest("date_created")
-        assert task.status == "FAILURE"
-        assert "Check logs for details" in task.result
-
-
-@pytest.mark.django_db
-class TestCheckSanityLogMessages:
-    def test_logs_doc_issues(
-        self,
-        sample_doc: Document,
-        caplog: pytest.LogCaptureFixture,
-    ) -> None:
-        Path(sample_doc.source_path).unlink()
-        messages = check_sanity()
-        with caplog.at_level(logging.INFO, logger="paperless.sanity_checker"):
-            messages.log_messages()
-        assert f"document #{sample_doc.pk}" in caplog.text
-        assert "Original of document does not exist" in caplog.text
-
-    def test_logs_global_issues(
-        self,
-        sample_doc: Document,
-        paperless_dirs: PaperlessDirs,
-        caplog: pytest.LogCaptureFixture,
-    ) -> None:
-        (paperless_dirs.originals / "orphan.pdf").touch()
-        messages = check_sanity()
-        with caplog.at_level(logging.WARNING, logger="paperless.sanity_checker"):
-            messages.log_messages()
-        assert "Orphaned file" in caplog.text
-
-    @pytest.mark.usefixtures("_media_settings")
-    def test_logs_unknown_doc_pk(self, caplog: pytest.LogCaptureFixture) -> None:
-        """A doc PK not in the DB logs 'Unknown' as the title."""
-        messages = check_sanity()
-        messages.error(99999, "Ghost document")
-        with caplog.at_level(logging.INFO, logger="paperless.sanity_checker"):
-            messages.log_messages()
-        assert "#99999" in caplog.text
-        assert "Unknown" in caplog.text
+    def test_archive_checksum_no_filename(self) -> None:
+        doc = self.make_test_data()
+        doc.archive_filename = None
+        doc.save()
+        self.assertSanityError(
+            doc,
+            "has an archive file checksum, but no archive filename.",
+        )
--- a/src/documents/tests/test_tasks.py
+++ b/src/documents/tests/test_tasks.py
@@ -3,7 +3,6 @@ from datetime import timedelta
 from pathlib import Path
 from unittest import mock

-import pytest
 from celery import states
 from django.conf import settings
 from django.test import TestCase
@@ -106,83 +105,55 @@ class TestClassifier(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
            self.assertNotEqual(mtime2, mtime3)


-@pytest.mark.django_db
-class TestSanityCheck:
-    @pytest.fixture
-    def mock_check_sanity(self, mocker) -> mock.MagicMock:
-        return mocker.patch("documents.tasks.sanity_checker.check_sanity")
+class TestSanityCheck(DirectoriesMixin, TestCase):
+    @mock.patch("documents.tasks.sanity_checker.check_sanity")
+    def test_sanity_check_success(self, m) -> None:
+        m.return_value = SanityCheckMessages()
+        self.assertEqual(tasks.sanity_check(), "No issues detected.")
+        m.assert_called_once()

-    def test_sanity_check_success(self, mock_check_sanity: mock.MagicMock) -> None:
-        mock_check_sanity.return_value = SanityCheckMessages()
-        assert tasks.sanity_check() == "No issues detected."
-        mock_check_sanity.assert_called_once()
-
-    def test_sanity_check_error_raises(
-        self,
-        mock_check_sanity: mock.MagicMock,
-        sample_doc: Document,
-    ) -> None:
+    @mock.patch("documents.tasks.sanity_checker.check_sanity")
+    def test_sanity_check_error(self, m) -> None:
        messages = SanityCheckMessages()
-        messages.error(sample_doc.pk, "some error")
-        mock_check_sanity.return_value = messages
-        with pytest.raises(SanityCheckFailedException):
-            tasks.sanity_check()
-        mock_check_sanity.assert_called_once()
+        messages.error(None, "Some error")
+        m.return_value = messages
+        self.assertRaises(SanityCheckFailedException, tasks.sanity_check)
+        m.assert_called_once()

-    def test_sanity_check_error_no_raise(
-        self,
-        mock_check_sanity: mock.MagicMock,
-        sample_doc: Document,
-    ) -> None:
+    @mock.patch("documents.tasks.sanity_checker.check_sanity")
+    def test_sanity_check_error_no_raise(self, m) -> None:
        messages = SanityCheckMessages()
-        messages.error(sample_doc.pk, "some error")
-        mock_check_sanity.return_value = messages
+        messages.error(None, "Some error")
+        m.return_value = messages
+        # No exception should be raised
        result = tasks.sanity_check(raise_on_error=False)
-        assert "1 document(s) with errors" in result
-        assert "Check logs for details." in result
-        mock_check_sanity.assert_called_once()
+        self.assertEqual(
+            result,
+            "Sanity check exited with errors. See log.",
+        )
+        m.assert_called_once()

-    def test_sanity_check_warning_only(
-        self,
-        mock_check_sanity: mock.MagicMock,
-    ) -> None:
+    @mock.patch("documents.tasks.sanity_checker.check_sanity")
+    def test_sanity_check_warning(self, m) -> None:
        messages = SanityCheckMessages()
-        messages.warning(None, "extra file")
-        mock_check_sanity.return_value = messages
-        result = tasks.sanity_check()
-        assert result == "1 global warning(s) found."
-        mock_check_sanity.assert_called_once()
+        messages.warning(None, "Some warning")
+        m.return_value = messages
+        self.assertEqual(
+            tasks.sanity_check(),
+            "Sanity check exited with warnings. See log.",
+        )
+        m.assert_called_once()

-    def test_sanity_check_info_only(
-        self,
-        mock_check_sanity: mock.MagicMock,
-        sample_doc: Document,
-    ) -> None:
+    @mock.patch("documents.tasks.sanity_checker.check_sanity")
+    def test_sanity_check_info(self, m) -> None:
        messages = SanityCheckMessages()
-        messages.info(sample_doc.pk, "some info")
-        mock_check_sanity.return_value = messages
-        result = tasks.sanity_check()
-        assert result == "1 document(s) with infos found."
-        mock_check_sanity.assert_called_once()
-
-    def test_sanity_check_errors_warnings_and_infos(
-        self,
-        mock_check_sanity: mock.MagicMock,
-        sample_doc: Document,
-    ) -> None:
-        messages = SanityCheckMessages()
-        messages.error(sample_doc.pk, "broken")
-        messages.warning(sample_doc.pk, "odd")
-        messages.info(sample_doc.pk, "fyi")
-        messages.warning(None, "extra file")
-        mock_check_sanity.return_value = messages
-        result = tasks.sanity_check(raise_on_error=False)
-        assert "1 document(s) with errors" in result
-        assert "1 document(s) with warnings" in result
-        assert "1 document(s) with infos" in result
-        assert "1 global warning(s)" in result
-        assert "Check logs for details." in result
-        mock_check_sanity.assert_called_once()
+        messages.info(None, "Some info")
+        m.return_value = messages
+        self.assertEqual(
+            tasks.sanity_check(),
+            "Sanity check exited with infos. See log.",
+        )
+        m.assert_called_once()


 class TestBulkUpdate(DirectoriesMixin, TestCase):
--- a/src/paperless/views.py
+++ b/src/paperless/views.py
@@ -378,6 +378,7 @@ class ApplicationConfigurationViewSet(ModelViewSet):
        ):
            # AI index was just enabled and vector store file does not exist
            llmindex_index.delay(
+                progress_bar_disable=True,
                rebuild=True,
                scheduled=False,
                auto=True,
--- a/src/paperless_ai/indexing.py
+++ b/src/paperless_ai/indexing.py
@@ -1,13 +1,11 @@
 import logging
 import shutil
-from collections.abc import Callable
-from collections.abc import Iterable
 from datetime import timedelta
 from pathlib import Path
-from typing import TypeVar

 import faiss
 import llama_index.core.settings as llama_settings
+import tqdm
 from celery import states
 from django.conf import settings
 from django.utils import timezone
@@ -31,14 +29,6 @@ from paperless_ai.embedding import build_llm_index_text
 from paperless_ai.embedding import get_embedding_dim
 from paperless_ai.embedding import get_embedding_model

-_T = TypeVar("_T")
-IterWrapper = Callable[[Iterable[_T]], Iterable[_T]]
-
-
-def _identity(iterable: Iterable[_T]) -> Iterable[_T]:
-    return iterable
-
-
 logger = logging.getLogger("paperless_ai.indexing")


@@ -166,11 +156,7 @@ def vector_store_file_exists():
    return Path(settings.LLM_INDEX_DIR / "default__vector_store.json").exists()


-def update_llm_index(
-    *,
-    iter_wrapper: IterWrapper[Document] = _identity,
-    rebuild=False,
-) -> str:
+def update_llm_index(*, progress_bar_disable=False, rebuild=False) -> str:
    """
    Rebuild or update the LLM index.
    """
@@ -190,7 +176,7 @@ def update_llm_index(
        embed_model = get_embedding_model()
        llama_settings.Settings.embed_model = embed_model
        storage_context = get_or_create_storage_context(rebuild=True)
-        for document in iter_wrapper(documents):
+        for document in tqdm.tqdm(documents, disable=progress_bar_disable):
            document_nodes = build_document_node(document)
            nodes.extend(document_nodes)

@@ -198,7 +184,7 @@ def update_llm_index(
            nodes=nodes,
            storage_context=storage_context,
            embed_model=embed_model,
-            show_progress=False,
+            show_progress=not progress_bar_disable,
        )
        msg = "LLM index rebuilt successfully."
    else:
@@ -210,7 +196,7 @@ def update_llm_index(
            for node in index.docstore.get_nodes(all_node_ids)
        }

-        for document in iter_wrapper(documents):
+        for document in tqdm.tqdm(documents, disable=progress_bar_disable):
            doc_id = str(document.id)
            document_modified = document.modified.isoformat()

--- a/uv.lock
+++ b/uv.lock
@@ -1342,11 +1342,11 @@ wheels = [

 [[package]]
 name = "faker"
-version = "40.5.1"
+version = "40.1.2"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/03/2a/96fff3edcb10f6505143448a4b91535f77b74865cec45be52690ee280443/faker-40.5.1.tar.gz", hash = "sha256:70222361cd82aa10cb86066d1a4e8f47f2bcdc919615c412045a69c4e6da0cd3", size = 1952684, upload-time = "2026-02-23T21:34:38.362Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/5e/77/1c3ff07b6739b9a1d23ca01ec0a90a309a33b78e345a3eb52f9ce9240e36/faker-40.1.2.tar.gz", hash = "sha256:b76a68163aa5f171d260fc24827a8349bc1db672f6a665359e8d0095e8135d30", size = 1949802, upload-time = "2026-01-13T20:51:49.917Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/4d/a9/1eed4db92d0aec2f9bfdf1faae0ab0418b5e121dda5701f118a7a4f0cd6a/faker-40.5.1-py3-none-any.whl", hash = "sha256:c69640c1e13bad49b4bcebcbf1b52f9f1a872b6ea186c248ada34d798f1661bf", size = 1987053, upload-time = "2026-02-23T21:34:36.418Z" },
+    { url = "https://files.pythonhosted.org/packages/46/ec/91a434c8a53d40c3598966621dea9c50512bec6ce8e76fa1751015e74cef/faker-40.1.2-py3-none-any.whl", hash = "sha256:93503165c165d330260e4379fd6dc07c94da90c611ed3191a0174d2ab9966a42", size = 1985633, upload-time = "2026-01-13T20:51:47.982Z" },
 ]

 [[package]]
@@ -3121,7 +3121,6 @@ webserver = [
 dev = [
    { name = "daphne", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "factory-boy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "faker", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "imagehash", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "prek", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "pytest", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
@@ -3146,7 +3145,6 @@ lint = [
 testing = [
    { name = "daphne", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "factory-boy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "faker", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "imagehash", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "pytest", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "pytest-cov", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
@@ -3259,7 +3257,6 @@ provides-extras = ["mariadb", "postgres", "webserver"]
 dev = [
    { name = "daphne" },
    { name = "factory-boy", specifier = "~=3.3.1" },
-    { name = "faker", specifier = "~=40.5.1" },
    { name = "imagehash" },
    { name = "prek", specifier = "~=0.3.0" },
    { name = "pytest", specifier = "~=9.0.0" },
@@ -3282,7 +3279,6 @@ lint = [
 testing = [
    { name = "daphne" },
    { name = "factory-boy", specifier = "~=3.3.1" },
-    { name = "faker", specifier = "~=40.5.1" },
    { name = "imagehash" },
    { name = "pytest", specifier = "~=9.0.0" },
    { name = "pytest-cov", specifier = "~=7.0.0" },
@@ -5910,11 +5906,11 @@ wheels = [

 [[package]]
 name = "whitenoise"
-version = "6.12.0"
+version = "6.11.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/cb/2a/55b3f3a4ec326cd077c1c3defeee656b9298372a69229134d930151acd01/whitenoise-6.12.0.tar.gz", hash = "sha256:f723ebb76a112e98816ff80fcea0a6c9b8ecde835f8ddda25df7a30a3c2db6ad", size = 26841, upload-time = "2026-02-27T00:05:42.028Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/15/95/8c81ec6b6ebcbf8aca2de7603070ccf37dbb873b03f20708e0f7c1664bc6/whitenoise-6.11.0.tar.gz", hash = "sha256:0f5bfce6061ae6611cd9396a8231e088722e4fc67bc13a111be74c738d99375f", size = 26432, upload-time = "2025-09-18T09:16:10.995Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/db/eb/d5583a11486211f3ebd4b385545ae787f32363d453c19fffd81106c9c138/whitenoise-6.12.0-py3-none-any.whl", hash = "sha256:fc5e8c572e33ebf24795b47b6a7da8da3c00cff2349f5b04c02f28d0cc5a3cc2", size = 20302, upload-time = "2026-02-27T00:05:40.086Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/e9/4366332f9295fe0647d7d3251ce18f5615fbcb12d02c79a26f8dba9221b3/whitenoise-6.11.0-py3-none-any.whl", hash = "sha256:b2aeb45950597236f53b5342b3121c5de69c8da0109362aee506ce88e022d258", size = 20197, upload-time = "2025-09-18T09:16:09.754Z" },
 ]

 [[package]]