Chore: convert test_adapter.py to pytest style

Chore: convert paperless unit tests to pytest style
docker(deps): Bump astral-sh/uv (#12191 )
2026-03-04 00:06:25 +00:00 · 2026-03-03 13:45:01 -08:00 · 2026-03-03 13:27:20 -08:00 · 2026-03-03 07:56:35 -08:00 · 2026-03-03 07:14:59 -08:00 · 2026-03-02 13:45:10 -08:00
44 changed files with 2617 additions and 1282 deletions
--- a/.github/workflows/ci-backend.yml
+++ b/.github/workflows/ci-backend.yml
@@ -35,18 +35,18 @@ jobs:
      fail-fast: false
    steps:
      - name: Checkout
-        uses: actions/checkout@v6
+        uses: actions/checkout@v6.0.2
      - name: Start containers
        run: |
          docker compose --file docker/compose/docker-compose.ci-test.yml pull --quiet
          docker compose --file docker/compose/docker-compose.ci-test.yml up --detach
      - name: Set up Python
        id: setup-python
-        uses: actions/setup-python@v6
+        uses: actions/setup-python@v6.2.0
        with:
          python-version: "${{ matrix.python-version }}"
      - name: Install uv
-        uses: astral-sh/setup-uv@v7
+        uses: astral-sh/setup-uv@v7.3.1
        with:
          version: ${{ env.DEFAULT_UV_VERSION }}
          enable-cache: true
@@ -83,13 +83,13 @@ jobs:
            pytest
      - name: Upload test results to Codecov
        if: always()
-        uses: codecov/codecov-action@v5
+        uses: codecov/codecov-action@v5.5.2
        with:
          flags: backend-python-${{ matrix.python-version }}
          files: junit.xml
          report_type: test_results
      - name: Upload coverage to Codecov
-        uses: codecov/codecov-action@v5
+        uses: codecov/codecov-action@v5.5.2
        with:
          flags: backend-python-${{ matrix.python-version }}
          files: coverage.xml
@@ -106,14 +106,14 @@ jobs:
      DEFAULT_PYTHON: "3.12"
    steps:
      - name: Checkout
-        uses: actions/checkout@v6.0.1
+        uses: actions/checkout@v6.0.2
      - name: Set up Python
        id: setup-python
        uses: actions/setup-python@v6.2.0
        with:
          python-version: "${{ env.DEFAULT_PYTHON }}"
      - name: Install uv
-        uses: astral-sh/setup-uv@v7.2.1
+        uses: astral-sh/setup-uv@v7.3.1
        with:
          version: ${{ env.DEFAULT_UV_VERSION }}
          enable-cache: true
--- a/.github/workflows/ci-docker.yml
+++ b/.github/workflows/ci-docker.yml
@@ -41,7 +41,7 @@ jobs:
      ref-name: ${{ steps.ref.outputs.name }}
    steps:
      - name: Checkout
-        uses: actions/checkout@v6.0.1
+        uses: actions/checkout@v6.0.2
      - name: Determine ref name
        id: ref
        run: |
@@ -130,7 +130,7 @@ jobs:
            type=semver,pattern={{major}}.{{minor}}
      - name: Build and push by digest
        id: build
-        uses: docker/build-push-action@v6.18.0
+        uses: docker/build-push-action@v6.19.2
        with:
          context: .
          file: ./Dockerfile
@@ -152,7 +152,7 @@ jobs:
          touch "/tmp/digests/${digest#sha256:}"
      - name: Upload digest
        if: steps.check-push.outputs.should-push == 'true'
-        uses: actions/upload-artifact@v6.0.0
+        uses: actions/upload-artifact@v7.0.0
        with:
          name: digests-${{ matrix.arch }}
          path: /tmp/digests/*
@@ -168,7 +168,7 @@ jobs:
      packages: write
    steps:
      - name: Download digests
-        uses: actions/download-artifact@v7.0.0
+        uses: actions/download-artifact@v8.0.0
        with:
          path: /tmp/digests
          pattern: digests-*
--- a/.github/workflows/ci-docs.yml
+++ b/.github/workflows/ci-docs.yml
@@ -33,16 +33,16 @@ jobs:
    name: Build Documentation
    runs-on: ubuntu-24.04
    steps:
-      - uses: actions/configure-pages@v5
+      - uses: actions/configure-pages@v5.0.0
      - name: Checkout
-        uses: actions/checkout@v6
+        uses: actions/checkout@v6.0.2
      - name: Set up Python
        id: setup-python
-        uses: actions/setup-python@v6
+        uses: actions/setup-python@v6.2.0
        with:
          python-version: ${{ env.DEFAULT_PYTHON_VERSION }}
      - name: Install uv
-        uses: astral-sh/setup-uv@v7
+        uses: astral-sh/setup-uv@v7.3.1
        with:
          version: ${{ env.DEFAULT_UV_VERSION }}
          enable-cache: true
@@ -58,7 +58,7 @@ jobs:
            --frozen \
            zensical build --clean
      - name: Upload GitHub Pages artifact
-        uses: actions/upload-pages-artifact@v4
+        uses: actions/upload-pages-artifact@v4.0.0
        with:
          path: site
          name: github-pages-${{ github.run_id }}-${{ github.run_attempt }}
@@ -72,7 +72,7 @@ jobs:
      url: ${{ steps.deployment.outputs.page_url }}
    steps:
      - name: Deploy GitHub Pages
-        uses: actions/deploy-pages@v4
+        uses: actions/deploy-pages@v4.0.5
        id: deployment
        with:
          artifact_name: github-pages-${{ github.run_id }}-${{ github.run_attempt }}
--- a/.github/workflows/ci-frontend.yml
+++ b/.github/workflows/ci-frontend.yml
@@ -22,20 +22,20 @@ jobs:
    runs-on: ubuntu-24.04
    steps:
      - name: Checkout
-        uses: actions/checkout@v6
+        uses: actions/checkout@v6.0.2
      - name: Install pnpm
-        uses: pnpm/action-setup@v4
+        uses: pnpm/action-setup@v4.2.0
        with:
          version: 10
      - name: Use Node.js 24
-        uses: actions/setup-node@v6
+        uses: actions/setup-node@v6.2.0
        with:
          node-version: 24.x
          cache: 'pnpm'
          cache-dependency-path: 'src-ui/pnpm-lock.yaml'
      - name: Cache frontend dependencies
        id: cache-frontend-deps
-        uses: actions/cache@v5
+        uses: actions/cache@v5.0.3
        with:
          path: |
            ~/.pnpm-store
@@ -49,19 +49,19 @@ jobs:
    runs-on: ubuntu-24.04
    steps:
      - name: Checkout
-        uses: actions/checkout@v6
+        uses: actions/checkout@v6.0.2
      - name: Install pnpm
-        uses: pnpm/action-setup@v4
+        uses: pnpm/action-setup@v4.2.0
        with:
          version: 10
      - name: Use Node.js 24
-        uses: actions/setup-node@v6
+        uses: actions/setup-node@v6.2.0
        with:
          node-version: 24.x
          cache: 'pnpm'
          cache-dependency-path: 'src-ui/pnpm-lock.yaml'
      - name: Cache frontend dependencies
-        uses: actions/cache@v5
+        uses: actions/cache@v5.0.3
        with:
          path: |
            ~/.pnpm-store
@@ -83,19 +83,19 @@ jobs:
        shard-count: [4]
    steps:
      - name: Checkout
-        uses: actions/checkout@v6
+        uses: actions/checkout@v6.0.2
      - name: Install pnpm
-        uses: pnpm/action-setup@v4
+        uses: pnpm/action-setup@v4.2.0
        with:
          version: 10
      - name: Use Node.js 24
-        uses: actions/setup-node@v6
+        uses: actions/setup-node@v6.2.0
        with:
          node-version: 24.x
          cache: 'pnpm'
          cache-dependency-path: 'src-ui/pnpm-lock.yaml'
      - name: Cache frontend dependencies
-        uses: actions/cache@v5
+        uses: actions/cache@v5.0.3
        with:
          path: |
            ~/.pnpm-store
@@ -107,13 +107,13 @@ jobs:
        run: cd src-ui && pnpm run test --max-workers=2 --shard=${{ matrix.shard-index }}/${{ matrix.shard-count }}
      - name: Upload test results to Codecov
        if: always()
-        uses: codecov/codecov-action@v5
+        uses: codecov/codecov-action@v5.5.2
        with:
          flags: frontend-node-${{ matrix.node-version }}
          directory: src-ui/
          report_type: test_results
      - name: Upload coverage to Codecov
-        uses: codecov/codecov-action@v5
+        uses: codecov/codecov-action@v5.5.2
        with:
          flags: frontend-node-${{ matrix.node-version }}
          directory: src-ui/coverage/
@@ -133,19 +133,19 @@ jobs:
        shard-count: [2]
    steps:
      - name: Checkout
-        uses: actions/checkout@v6
+        uses: actions/checkout@v6.0.2
      - name: Install pnpm
-        uses: pnpm/action-setup@v4
+        uses: pnpm/action-setup@v4.2.0
        with:
          version: 10
      - name: Use Node.js 24
-        uses: actions/setup-node@v6
+        uses: actions/setup-node@v6.2.0
        with:
          node-version: 24.x
          cache: 'pnpm'
          cache-dependency-path: 'src-ui/pnpm-lock.yaml'
      - name: Cache frontend dependencies
-        uses: actions/cache@v5
+        uses: actions/cache@v5.0.3
        with:
          path: |
            ~/.pnpm-store
@@ -163,19 +163,19 @@ jobs:
    runs-on: ubuntu-24.04
    steps:
      - name: Checkout
-        uses: actions/checkout@v6
+        uses: actions/checkout@v6.0.2
      - name: Install pnpm
-        uses: pnpm/action-setup@v4
+        uses: pnpm/action-setup@v4.2.0
        with:
          version: 10
      - name: Use Node.js 24
-        uses: actions/setup-node@v6
+        uses: actions/setup-node@v6.2.0
        with:
          node-version: 24.x
          cache: 'pnpm'
          cache-dependency-path: 'src-ui/pnpm-lock.yaml'
      - name: Cache frontend dependencies
-        uses: actions/cache@v5
+        uses: actions/cache@v5.0.3
        with:
          path: |
            ~/.pnpm-store
--- a/.github/workflows/ci-release.yml
+++ b/.github/workflows/ci-release.yml
@@ -28,14 +28,14 @@ jobs:
    runs-on: ubuntu-24.04
    steps:
      - name: Checkout
-        uses: actions/checkout@v6
+        uses: actions/checkout@v6.0.2
      # ---- Frontend Build ----
      - name: Install pnpm
-        uses: pnpm/action-setup@v4
+        uses: pnpm/action-setup@v4.2.0
        with:
          version: 10
      - name: Use Node.js 24
-        uses: actions/setup-node@v6
+        uses: actions/setup-node@v6.2.0
        with:
          node-version: 24.x
          cache: 'pnpm'
@@ -47,11 +47,11 @@ jobs:
      # ---- Backend Setup ----
      - name: Set up Python
        id: setup-python
-        uses: actions/setup-python@v6
+        uses: actions/setup-python@v6.2.0
        with:
          python-version: ${{ env.DEFAULT_PYTHON_VERSION }}
      - name: Install uv
-        uses: astral-sh/setup-uv@v7
+        uses: astral-sh/setup-uv@v7.3.1
        with:
          version: ${{ env.DEFAULT_UV_VERSION }}
          enable-cache: true
@@ -118,7 +118,7 @@ jobs:
          sudo chown -R 1000:1000 paperless-ngx/
          tar -cJf paperless-ngx.tar.xz paperless-ngx/
      - name: Upload release artifact
-        uses: actions/upload-artifact@v6
+        uses: actions/upload-artifact@v7.0.0
        with:
          name: release
          path: dist/paperless-ngx.tar.xz
@@ -133,7 +133,7 @@ jobs:
      version: ${{ steps.get-version.outputs.version }}
    steps:
      - name: Download release artifact
-        uses: actions/download-artifact@v7
+        uses: actions/download-artifact@v8.0.0
        with:
          name: release
          path: ./
@@ -148,7 +148,7 @@ jobs:
          fi
      - name: Create release and changelog
        id: create-release
-        uses: release-drafter/release-drafter@v6
+        uses: release-drafter/release-drafter@v6.2.0
        with:
          name: Paperless-ngx ${{ steps.get-version.outputs.version }}
          tag: ${{ steps.get-version.outputs.version }}
@@ -159,7 +159,7 @@ jobs:
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
      - name: Upload release archive
-        uses: shogo82148/actions-upload-release-asset@v1
+        uses: shogo82148/actions-upload-release-asset@v1.9.2
        with:
          github_token: ${{ secrets.GITHUB_TOKEN }}
          upload_url: ${{ steps.create-release.outputs.upload_url }}
@@ -176,16 +176,16 @@ jobs:
    runs-on: ubuntu-24.04
    steps:
      - name: Checkout
-        uses: actions/checkout@v6
+        uses: actions/checkout@v6.0.2
        with:
          ref: main
      - name: Set up Python
        id: setup-python
-        uses: actions/setup-python@v6
+        uses: actions/setup-python@v6.2.0
        with:
          python-version: ${{ env.DEFAULT_PYTHON_VERSION }}
      - name: Install uv
-        uses: astral-sh/setup-uv@v7
+        uses: astral-sh/setup-uv@v7.3.1
        with:
          version: ${{ env.DEFAULT_UV_VERSION }}
          enable-cache: true
@@ -218,7 +218,7 @@ jobs:
          git commit -am "Changelog ${{ needs.publish-release.outputs.version }} - GHA"
          git push origin ${{ needs.publish-release.outputs.version }}-changelog
      - name: Create pull request
-        uses: actions/github-script@v8
+        uses: actions/github-script@v8.0.0
        with:
          script: |
            const { repo, owner } = context.repo;
--- a/.github/workflows/codeql-analysis.yml
+++ b/.github/workflows/codeql-analysis.yml
@@ -34,10 +34,10 @@ jobs:
        # Learn more about CodeQL language support at https://git.io/codeql-language-support
    steps:
      - name: Checkout repository
-        uses: actions/checkout@v6
+        uses: actions/checkout@v6.0.2
      # Initializes the CodeQL tools for scanning.
      - name: Initialize CodeQL
-        uses: github/codeql-action/init@v4
+        uses: github/codeql-action/init@v4.32.5
        with:
          languages: ${{ matrix.language }}
          # If you wish to specify custom queries, you can do so here or in a config file.
@@ -45,4 +45,4 @@ jobs:
          # Prefix the list here with "+" to use these queries and those in the config file.
          # queries: ./path/to/local/query, your-org/your-repo/queries@main
      - name: Perform CodeQL Analysis
-        uses: github/codeql-action/analyze@v4
+        uses: github/codeql-action/analyze@v4.32.5
--- a/.github/workflows/crowdin.yml
+++ b/.github/workflows/crowdin.yml
@@ -13,11 +13,11 @@ jobs:
    runs-on: ubuntu-24.04
    steps:
      - name: Checkout
-        uses: actions/checkout@v6
+        uses: actions/checkout@v6.0.2
        with:
          token: ${{ secrets.PNGX_BOT_PAT }}
      - name: crowdin action
-        uses: crowdin/github-action@v2
+        uses: crowdin/github-action@v2.15.0
        with:
          upload_translations: false
          download_translations: true
--- a/.github/workflows/pr-bot.yml
+++ b/.github/workflows/pr-bot.yml
@@ -12,7 +12,7 @@ jobs:
    steps:
      - name: Label PR by file path or branch name
        # see .github/labeler.yml for the labeler config
-        uses: actions/labeler@v6
+        uses: actions/labeler@v6.0.1
        with:
          repo-token: ${{ secrets.GITHUB_TOKEN }}
      - name: Label by size
@@ -26,7 +26,7 @@ jobs:
          fail_if_xl: 'false'
          excluded_files: /\.lock$/ /\.txt$/ ^src-ui/pnpm-lock\.yaml$ ^src-ui/messages\.xlf$ ^src/locale/en_US/LC_MESSAGES/django\.po$
      - name: Label by PR title
-        uses: actions/github-script@v8
+        uses: actions/github-script@v8.0.0
        with:
          script: |
            const pr = context.payload.pull_request;
@@ -52,7 +52,7 @@ jobs:
            }
      - name: Label bot-generated PRs
        if: ${{ contains(github.actor, 'dependabot') || contains(github.actor, 'crowdin-bot') }}
-        uses: actions/github-script@v8
+        uses: actions/github-script@v8.0.0
        with:
          script: |
            const pr = context.payload.pull_request;
@@ -77,7 +77,7 @@ jobs:
            }
      - name: Welcome comment
        if: ${{ !contains(github.actor, 'bot') }}
-        uses: actions/github-script@v8
+        uses: actions/github-script@v8.0.0
        with:
          script: |
            const pr = context.payload.pull_request;
--- a/.github/workflows/project-actions.yml
+++ b/.github/workflows/project-actions.yml
@@ -19,6 +19,6 @@ jobs:
    if: github.event_name == 'pull_request_target' && (github.event.action == 'opened' || github.event.action == 'reopened') && github.event.pull_request.user.login != 'dependabot'
    steps:
      - name: Label PR with release-drafter
-        uses: release-drafter/release-drafter@v6
+        uses: release-drafter/release-drafter@v6.2.0
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
--- a/.github/workflows/repo-maintenance.yml
+++ b/.github/workflows/repo-maintenance.yml
@@ -15,7 +15,7 @@ jobs:
    if: github.repository_owner == 'paperless-ngx'
    runs-on: ubuntu-24.04
    steps:
-      - uses: actions/stale@v10
+      - uses: actions/stale@v10.2.0
        with:
          days-before-stale: 7
          days-before-close: 14
@@ -37,7 +37,7 @@ jobs:
    if: github.repository_owner == 'paperless-ngx'
    runs-on: ubuntu-24.04
    steps:
-      - uses: dessant/lock-threads@v6
+      - uses: dessant/lock-threads@v6.0.0
        with:
          issue-inactive-days: '30'
          pr-inactive-days: '30'
@@ -57,7 +57,7 @@ jobs:
    if: github.repository_owner == 'paperless-ngx'
    runs-on: ubuntu-24.04
    steps:
-      - uses: actions/github-script@v8
+      - uses: actions/github-script@v8.0.0
        with:
          script: |
            function sleep(ms) {
@@ -114,7 +114,7 @@ jobs:
    if: github.repository_owner == 'paperless-ngx'
    runs-on: ubuntu-24.04
    steps:
-      - uses: actions/github-script@v8
+      - uses: actions/github-script@v8.0.0
        with:
          script: |
            function sleep(ms) {
@@ -206,7 +206,7 @@ jobs:
    if: github.repository_owner == 'paperless-ngx'
    runs-on: ubuntu-24.04
    steps:
-      - uses: actions/github-script@v8
+      - uses: actions/github-script@v8.0.0
        with:
          script: |
            function sleep(ms) {
--- a/.github/workflows/translate-strings.yml
+++ b/.github/workflows/translate-strings.yml
@@ -11,7 +11,7 @@ jobs:
      contents: write
    steps:
      - name: Checkout code
-        uses: actions/checkout@v6
+        uses: actions/checkout@v6.0.2
        env:
          GH_REF: ${{ github.ref }} # sonar rule:githubactions:S7630 - avoid injection
        with:
@@ -19,13 +19,13 @@ jobs:
          ref: ${{ env.GH_REF }}
      - name: Set up Python
        id: setup-python
-        uses: actions/setup-python@v6
+        uses: actions/setup-python@v6.2.0
      - name: Install system dependencies
        run: |
          sudo apt-get update -qq
          sudo apt-get install -qq --no-install-recommends gettext
      - name: Install uv
-        uses: astral-sh/setup-uv@v7
+        uses: astral-sh/setup-uv@v7.3.1
        with:
          enable-cache: true
      - name: Install backend python dependencies
@@ -36,18 +36,18 @@ jobs:
      - name: Generate backend translation strings
        run: cd src/ && uv run manage.py makemessages -l en_US -i "samples*"
      - name: Install pnpm
-        uses: pnpm/action-setup@v4
+        uses: pnpm/action-setup@v4.2.0
        with:
          version: 10
      - name: Use Node.js 24
-        uses: actions/setup-node@v6
+        uses: actions/setup-node@v6.2.0
        with:
          node-version: 24.x
          cache: 'pnpm'
          cache-dependency-path: 'src-ui/pnpm-lock.yaml'
      - name: Cache frontend dependencies
        id: cache-frontend-deps
-        uses: actions/cache@v5
+        uses: actions/cache@v5.0.3
        with:
          path: |
            ~/.pnpm-store
@@ -63,7 +63,7 @@ jobs:
          cd src-ui
          pnpm run ng extract-i18n
      - name: Commit changes
-        uses: stefanzweifel/git-auto-commit-action@v7
+        uses: stefanzweifel/git-auto-commit-action@v7.1.0
        with:
          file_pattern: 'src-ui/messages.xlf src/locale/en_US/LC_MESSAGES/django.po'
          commit_message: "Auto translate strings"
--- a/4
+++ b/4
@@ -30,7 +30,7 @@ RUN set -eux \
 # Purpose: Installs s6-overlay and rootfs
 # Comments:
 #  - Don't leave anything extra in here either
-FROM ghcr.io/astral-sh/uv:0.10.5-python3.12-trixie-slim AS s6-overlay-base
+FROM ghcr.io/astral-sh/uv:0.10.7-python3.12-trixie-slim AS s6-overlay-base

 WORKDIR /usr/src/s6

@@ -45,7 +45,7 @@ ENV \
 ARG TARGETARCH
 ARG TARGETVARIANT
 # Lock this version
-ARG S6_OVERLAY_VERSION=3.2.1.0
+ARG S6_OVERLAY_VERSION=3.2.2.0

 ARG S6_BUILD_TIME_PKGS="curl \
                        xz-utils"
--- a/docker/compose/docker-compose.ci-test.yml
+++ b/docker/compose/docker-compose.ci-test.yml
@@ -4,7 +4,7 @@
 # correct networking for the tests
 services:
  gotenberg:
-    image: docker.io/gotenberg/gotenberg:8.26
+    image: docker.io/gotenberg/gotenberg:8.27
    hostname: gotenberg
    container_name: gotenberg
    network_mode: host
--- a/docker/compose/docker-compose.mariadb-tika.yml
+++ b/docker/compose/docker-compose.mariadb-tika.yml
@@ -72,7 +72,7 @@ services:
      PAPERLESS_TIKA_GOTENBERG_ENDPOINT: http://gotenberg:3000
      PAPERLESS_TIKA_ENDPOINT: http://tika:9998
  gotenberg:
-    image: docker.io/gotenberg/gotenberg:8.26
+    image: docker.io/gotenberg/gotenberg:8.27
    restart: unless-stopped
    # The gotenberg chromium route is used to convert .eml files. We do not
    # want to allow external content like tracking pixels or even javascript.
--- a/docker/compose/docker-compose.postgres-tika.yml
+++ b/docker/compose/docker-compose.postgres-tika.yml
@@ -66,7 +66,7 @@ services:
      PAPERLESS_TIKA_GOTENBERG_ENDPOINT: http://gotenberg:3000
      PAPERLESS_TIKA_ENDPOINT: http://tika:9998
  gotenberg:
-    image: docker.io/gotenberg/gotenberg:8.26
+    image: docker.io/gotenberg/gotenberg:8.27
    restart: unless-stopped
    # The gotenberg chromium route is used to convert .eml files. We do not
    # want to allow external content like tracking pixels or even javascript.
--- a/docker/compose/docker-compose.sqlite-tika.yml
+++ b/docker/compose/docker-compose.sqlite-tika.yml
@@ -55,7 +55,7 @@ services:
      PAPERLESS_TIKA_GOTENBERG_ENDPOINT: http://gotenberg:3000
      PAPERLESS_TIKA_ENDPOINT: http://tika:9998
  gotenberg:
-    image: docker.io/gotenberg/gotenberg:8.26
+    image: docker.io/gotenberg/gotenberg:8.27
    restart: unless-stopped
    # The gotenberg chromium route is used to convert .eml files. We do not
    # want to allow external content like tracking pixels or even javascript.
--- a/docs/assets/logo_full_black.png
+++ b/docs/assets/logo_full_black.png
--- a/docs/assets/logo_full_white.png
+++ b/docs/assets/logo_full_white.png
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -616,7 +616,7 @@ applied. You can use the following placeholders in the template with any trigger
 -   `{{added_day}}`: added day
 -   `{{added_time}}`: added time in HH:MM format
 -   `{{original_filename}}`: original file name without extension
-   `{{filename}}`: current file name without extension
+-   `{{filename}}`: current file name without extension (for "added" workflows this may not be final yet, you can use `{{original_filename}}`)
 -   `{{doc_title}}`: current document title (cannot be used in title assignment)

 The following placeholders are only available for "added" or "updated" triggers
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -111,6 +111,7 @@ docs = [
 testing = [
  "daphne",
  "factory-boy~=3.3.1",
+  "faker~=40.5.1",
  "imagehash",
  "pytest~=9.0.0",
  "pytest-cov~=7.0.0",
--- a/src/documents/management/commands/base.py
+++ b/src/documents/management/commands/base.py
@@ -6,11 +6,14 @@ Provides automatic progress bar and multiprocessing support with minimal boilerp

 from __future__ import annotations

+import logging
 import os
+from collections.abc import Callable
 from collections.abc import Iterable
 from collections.abc import Sized
 from concurrent.futures import ProcessPoolExecutor
 from concurrent.futures import as_completed
+from contextlib import contextmanager
 from dataclasses import dataclass
 from typing import TYPE_CHECKING
 from typing import Any
@@ -22,7 +25,11 @@ from django import db
 from django.core.management import CommandError
 from django.db.models import QuerySet
 from django_rich.management import RichCommand
+from rich import box
 from rich.console import Console
+from rich.console import Group
+from rich.console import RenderableType
+from rich.live import Live
 from rich.progress import BarColumn
 from rich.progress import MofNCompleteColumn
 from rich.progress import Progress
@@ -30,11 +37,11 @@ from rich.progress import SpinnerColumn
 from rich.progress import TextColumn
 from rich.progress import TimeElapsedColumn
 from rich.progress import TimeRemainingColumn
+from rich.table import Table
+from rich.text import Text

 if TYPE_CHECKING:
-    from collections.abc import Callable
    from collections.abc import Generator
-    from collections.abc import Iterable
    from collections.abc import Sequence

    from django.core.management import CommandParser
@@ -43,6 +50,78 @@ T = TypeVar("T")
 R = TypeVar("R")


+@dataclass(slots=True, frozen=True)
+class _BufferedRecord:
+    level: int
+    name: str
+    message: str
+
+
+class BufferingLogHandler(logging.Handler):
+    """Captures log records during a command run for deferred rendering.
+
+    Attach to a logger before a long operation and call ``render()``
+    afterwards to emit the buffered records via Rich, optionally filtered
+    by minimum level.
+    """
+
+    def __init__(self) -> None:
+        super().__init__()
+        self._records: list[_BufferedRecord] = []
+
+    def emit(self, record: logging.LogRecord) -> None:
+        self._records.append(
+            _BufferedRecord(
+                level=record.levelno,
+                name=record.name,
+                message=self.format(record),
+            ),
+        )
+
+    def render(
+        self,
+        console: Console,
+        *,
+        min_level: int = logging.DEBUG,
+        title: str = "Log Output",
+    ) -> None:
+        records = [r for r in self._records if r.level >= min_level]
+        if not records:
+            return
+
+        table = Table(
+            title=title,
+            show_header=True,
+            header_style="bold",
+            show_lines=False,
+            box=box.SIMPLE,
+        )
+        table.add_column("Level", style="bold", width=8)
+        table.add_column("Logger", style="dim")
+        table.add_column("Message", no_wrap=False)
+
+        _level_styles: dict[int, str] = {
+            logging.DEBUG: "dim",
+            logging.INFO: "cyan",
+            logging.WARNING: "yellow",
+            logging.ERROR: "red",
+            logging.CRITICAL: "bold red",
+        }
+
+        for record in records:
+            style = _level_styles.get(record.level, "")
+            table.add_row(
+                Text(logging.getLevelName(record.level), style=style),
+                record.name,
+                record.message,
+            )
+
+        console.print(table)
+
+    def clear(self) -> None:
+        self._records.clear()
+
+
@dataclass(frozen=True, slots=True)
 class ProcessResult(Generic[T, R]):
    """
@@ -91,6 +170,23 @@ class PaperlessCommand(RichCommand):
                for result in self.process_parallel(process_doc, ids):
                    if result.error:
                        self.console.print(f"[red]Failed: {result.error}[/red]")
+
+        class Command(PaperlessCommand):
+            help = "Import documents with live stats"
+
+            def handle(self, *args, **options):
+                stats = ImportStats()
+
+                def render_stats() -> Table:
+                    ...  # build Rich Table from stats
+
+                for item in self.track_with_stats(
+                    items,
+                    description="Importing...",
+                    stats_renderer=render_stats,
+                ):
+                    result = import_item(item)
+                    stats.imported += 1
    """

    supports_progress_bar: ClassVar[bool] = True
@@ -128,13 +224,11 @@ class PaperlessCommand(RichCommand):
        This is called by Django's command infrastructure after argument parsing
        but before handle(). We use it to set instance attributes from options.
        """
-        # Set progress bar state
        if self.supports_progress_bar:
            self.no_progress_bar = options.get("no_progress_bar", False)
        else:
            self.no_progress_bar = True

-        # Set multiprocessing state
        if self.supports_multiprocessing:
            self.process_count = options.get("processes", 1)
            if self.process_count < 1:
@@ -144,9 +238,69 @@ class PaperlessCommand(RichCommand):

        return super().execute(*args, **options)

+    @contextmanager
+    def buffered_logging(
+        self,
+        *logger_names: str,
+        level: int = logging.DEBUG,
+    ) -> Generator[BufferingLogHandler, None, None]:
+        """Context manager that captures log output from named loggers.
+
+        Installs a ``BufferingLogHandler`` on each named logger for the
+        duration of the block, suppressing propagation to avoid interleaving
+        with the Rich live display. The handler is removed on exit regardless
+        of whether an exception occurred.
+
+        Usage::
+
+            with self.buffered_logging("paperless", "documents") as log_buf:
+                # ... run progress loop ...
+            if options["verbose"]:
+                log_buf.render(self.console)
+        """
+        handler = BufferingLogHandler()
+        handler.setFormatter(logging.Formatter("%(message)s"))
+
+        loggers: list[logging.Logger] = []
+        original_propagate: dict[str, bool] = {}
+
+        for name in logger_names:
+            log = logging.getLogger(name)
+            log.addHandler(handler)
+            original_propagate[name] = log.propagate
+            log.propagate = False
+            loggers.append(log)
+
+        try:
+            yield handler
+        finally:
+            for log in loggers:
+                log.removeHandler(handler)
+                log.propagate = original_propagate[log.name]
+
+    @staticmethod
+    def _progress_columns() -> tuple[Any, ...]:
+        """
+        Return the standard set of progress bar columns.
+
+        Extracted so both _create_progress (standalone) and track_with_stats
+        (inside Live) use identical column configuration without duplication.
+        """
+        return (
+            SpinnerColumn(),
+            TextColumn("[progress.description]{task.description}"),
+            BarColumn(),
+            MofNCompleteColumn(),
+            TimeElapsedColumn(),
+            TimeRemainingColumn(),
+        )
+
    def _create_progress(self, description: str) -> Progress:
        """
-        Create a configured Progress instance.
+        Create a standalone Progress instance with its own stderr Console.
+
+        Use this for track(). For track_with_stats(), Progress is created
+        directly inside a Live context instead.

        Progress output is directed to stderr to match the convention that
        progress bars are transient UI feedback, not command output. This
@@ -161,12 +315,7 @@ class PaperlessCommand(RichCommand):
            A Progress instance configured with appropriate columns.
        """
        return Progress(
-            SpinnerColumn(),
-            TextColumn("[progress.description]{task.description}"),
-            BarColumn(),
-            MofNCompleteColumn(),
-            TimeElapsedColumn(),
-            TimeRemainingColumn(),
+            *self._progress_columns(),
            console=Console(stderr=True),
            transient=False,
        )
@@ -222,7 +371,6 @@ class PaperlessCommand(RichCommand):
            yield from iterable
            return

-        # Attempt to determine total if not provided
        if total is None:
            total = self._get_iterable_length(iterable)

@@ -232,6 +380,87 @@ class PaperlessCommand(RichCommand):
                yield item
                progress.advance(task_id)

+    def track_with_stats(
+        self,
+        iterable: Iterable[T],
+        *,
+        description: str = "Processing...",
+        stats_renderer: Callable[[], RenderableType],
+        total: int | None = None,
+    ) -> Generator[T, None, None]:
+        """
+        Iterate over items with a progress bar and a live-updating stats display.
+
+        The progress bar and stats renderable are combined in a single Live
+        context, so the stats panel re-renders in place below the progress bar
+        after each item is processed.
+
+        Respects --no-progress-bar flag. When disabled, yields items without
+        any display (stats are still updated by the caller's loop body, so
+        they will be accurate for any post-loop summary the caller prints).
+
+        Args:
+            iterable: The items to iterate over.
+            description: Text to display alongside the progress bar.
+            stats_renderer: Zero-argument callable that returns a Rich
+                renderable. Called after each item to refresh the display.
+                The caller typically closes over a mutable dataclass and
+                rebuilds a Table from it on each call.
+            total: Total number of items. If None, attempts to determine
+                automatically via .count() (for querysets) or len().
+
+        Yields:
+            Items from the iterable.
+
+        Example:
+            @dataclass
+            class Stats:
+                processed: int = 0
+                failed: int = 0
+
+            stats = Stats()
+
+            def render_stats() -> Table:
+                table = Table(box=None)
+                table.add_column("Processed")
+                table.add_column("Failed")
+                table.add_row(str(stats.processed), str(stats.failed))
+                return table
+
+            for item in self.track_with_stats(
+                items,
+                description="Importing...",
+                stats_renderer=render_stats,
+            ):
+                try:
+                    import_item(item)
+                    stats.processed += 1
+                except Exception:
+                    stats.failed += 1
+        """
+        if self.no_progress_bar:
+            yield from iterable
+            return
+
+        if total is None:
+            total = self._get_iterable_length(iterable)
+
+        stderr_console = Console(stderr=True)
+
+        # Progress is created without its own console so Live controls rendering.
+        progress = Progress(*self._progress_columns())
+        task_id = progress.add_task(description, total=total)
+
+        with Live(
+            Group(progress, stats_renderer()),
+            console=stderr_console,
+            refresh_per_second=4,
+        ) as live:
+            for item in iterable:
+                yield item
+                progress.advance(task_id)
+                live.update(Group(progress, stats_renderer()))
+
    def process_parallel(
        self,
        fn: Callable[[T], R],
@@ -269,7 +498,7 @@ class PaperlessCommand(RichCommand):
        total = len(items)

        if self.process_count == 1:
-            # Sequential execution in main process - critical for testing
+            # Sequential execution in main process - critical for testing, so we don't fork in fork, etc
            yield from self._process_sequential(fn, items, description, total)
        else:
            # Parallel execution with ProcessPoolExecutor
@@ -298,6 +527,7 @@ class PaperlessCommand(RichCommand):
        total: int,
    ) -> Generator[ProcessResult[T, R], None, None]:
        """Process items in parallel using ProcessPoolExecutor."""
+
        # Close database connections before forking - required for PostgreSQL
        db.connections.close_all()

--- a/src/documents/management/commands/document_index.py
+++ b/src/documents/management/commands/document_index.py
@@ -1,22 +1,25 @@
-from django.core.management import BaseCommand
 from django.db import transaction

-from documents.management.commands.mixins import ProgressBarMixin
+from documents.management.commands.base import PaperlessCommand
 from documents.tasks import index_optimize
 from documents.tasks import index_reindex


-class Command(ProgressBarMixin, BaseCommand):
+class Command(PaperlessCommand):
    help = "Manages the document index."

    def add_arguments(self, parser):
+        super().add_arguments(parser)
        parser.add_argument("command", choices=["reindex", "optimize"])
-        self.add_argument_progress_bar_mixin(parser)

    def handle(self, *args, **options):
-        self.handle_progress_bar_mixin(**options)
        with transaction.atomic():
            if options["command"] == "reindex":
-                index_reindex(progress_bar_disable=self.no_progress_bar)
+                index_reindex(
+                    iter_wrapper=lambda docs: self.track(
+                        docs,
+                        description="Indexing documents...",
+                    ),
+                )
            elif options["command"] == "optimize":
                index_optimize()
--- a/src/documents/management/commands/document_llmindex.py
+++ b/src/documents/management/commands/document_llmindex.py
@@ -1,22 +1,22 @@
-from django.core.management import BaseCommand
-from django.db import transaction
+from typing import Any

-from documents.management.commands.mixins import ProgressBarMixin
+from documents.management.commands.base import PaperlessCommand
 from documents.tasks import llmindex_index


-class Command(ProgressBarMixin, BaseCommand):
+class Command(PaperlessCommand):
    help = "Manages the LLM-based vector index for Paperless."

-    def add_arguments(self, parser):
+    def add_arguments(self, parser: Any) -> None:
+        super().add_arguments(parser)
        parser.add_argument("command", choices=["rebuild", "update"])
-        self.add_argument_progress_bar_mixin(parser)

-    def handle(self, *args, **options):
-        self.handle_progress_bar_mixin(**options)
-        with transaction.atomic():
-            llmindex_index(
-                progress_bar_disable=self.no_progress_bar,
-                rebuild=options["command"] == "rebuild",
-                scheduled=False,
-            )
+    def handle(self, *args: Any, **options: Any) -> None:
+        llmindex_index(
+            rebuild=options["command"] == "rebuild",
+            scheduled=False,
+            iter_wrapper=lambda docs: self.track(
+                docs,
+                description="Indexing documents...",
+            ),
+        )
--- a/src/documents/management/commands/document_retagger.py
+++ b/src/documents/management/commands/document_retagger.py
@@ -1,4 +1,12 @@
+from __future__ import annotations
+
 import logging
+from dataclasses import dataclass
+from dataclasses import field
+from typing import TYPE_CHECKING
+
+from rich.table import Table
+from rich.text import Text

 from documents.classifier import load_classifier
 from documents.management.commands.base import PaperlessCommand
@@ -8,9 +16,162 @@ from documents.signals.handlers import set_document_type
 from documents.signals.handlers import set_storage_path
 from documents.signals.handlers import set_tags

+if TYPE_CHECKING:
+    from rich.console import RenderableType
+
+    from documents.models import Correspondent
+    from documents.models import DocumentType
+    from documents.models import StoragePath
+    from documents.models import Tag
+
 logger = logging.getLogger("paperless.management.retagger")


+@dataclass(slots=True)
+class RetaggerStats:
+    """Cumulative counters updated as the retagger processes documents.
+
+    Mutable by design -- fields are incremented in the processing loop.
+    slots=True reduces per-instance memory overhead and speeds attribute access.
+    """
+
+    correspondents: int = 0
+    document_types: int = 0
+    tags_added: int = 0
+    tags_removed: int = 0
+    storage_paths: int = 0
+    documents_processed: int = 0
+
+
+@dataclass(slots=True)
+class DocumentSuggestion:
+    """Buffered classifier suggestions for a single document (suggest mode only).
+
+    Mutable by design -- fields are assigned incrementally as each setter runs.
+    """
+
+    document: Document
+    correspondent: Correspondent | None = None
+    document_type: DocumentType | None = None
+    tags_to_add: frozenset[Tag] = field(default_factory=frozenset)
+    tags_to_remove: frozenset[Tag] = field(default_factory=frozenset)
+    storage_path: StoragePath | None = None
+
+    @property
+    def has_suggestions(self) -> bool:
+        return bool(
+            self.correspondent is not None
+            or self.document_type is not None
+            or self.tags_to_add
+            or self.tags_to_remove
+            or self.storage_path is not None,
+        )
+
+
+def _build_stats_table(stats: RetaggerStats, *, suggest: bool) -> Table:
+    """
+    Build the live-updating stats table shown below the progress bar.
+
+    In suggest mode the labels read "would set / would add" to make clear
+    that nothing has been written to the database.
+    """
+    table = Table(box=None, padding=(0, 2), show_header=True, header_style="bold")
+
+    table.add_column("Documents")
+    table.add_column("Correspondents")
+    table.add_column("Doc Types")
+    table.add_column("Tags (+)")
+    table.add_column("Tags (-)")
+    table.add_column("Storage Paths")
+
+    verb = "would set" if suggest else "set"
+
+    table.add_row(
+        str(stats.documents_processed),
+        f"{stats.correspondents} {verb}",
+        f"{stats.document_types} {verb}",
+        f"+{stats.tags_added}",
+        f"-{stats.tags_removed}",
+        f"{stats.storage_paths} {verb}",
+    )
+
+    return table
+
+
+def _build_suggestion_table(
+    suggestions: list[DocumentSuggestion],
+    base_url: str | None,
+) -> Table:
+    """
+    Build the final suggestion table printed after the progress bar completes.
+
+    Only documents with at least one suggestion are included.
+    """
+    table = Table(
+        title="Suggested Changes",
+        show_header=True,
+        header_style="bold cyan",
+        show_lines=True,
+    )
+
+    table.add_column("Document", style="bold", no_wrap=False, min_width=20)
+    table.add_column("Correspondent")
+    table.add_column("Doc Type")
+    table.add_column("Tags")
+    table.add_column("Storage Path")
+
+    for suggestion in suggestions:
+        if not suggestion.has_suggestions:
+            continue
+
+        doc = suggestion.document
+
+        if base_url:
+            doc_cell = Text()
+            doc_cell.append(str(doc))
+            doc_cell.append(f"\n{base_url}/documents/{doc.pk}", style="dim")
+        else:
+            doc_cell = Text(f"{doc} [{doc.pk}]")
+
+        tag_parts: list[str] = []
+        for tag in sorted(suggestion.tags_to_add, key=lambda t: t.name):
+            tag_parts.append(f"[green]+{tag.name}[/green]")
+        for tag in sorted(suggestion.tags_to_remove, key=lambda t: t.name):
+            tag_parts.append(f"[red]-{tag.name}[/red]")
+        tag_cell = Text.from_markup(", ".join(tag_parts)) if tag_parts else Text("-")
+
+        table.add_row(
+            doc_cell,
+            str(suggestion.correspondent) if suggestion.correspondent else "-",
+            str(suggestion.document_type) if suggestion.document_type else "-",
+            tag_cell,
+            str(suggestion.storage_path) if suggestion.storage_path else "-",
+        )
+
+    return table
+
+
+def _build_summary_table(stats: RetaggerStats) -> Table:
+    """Build the final applied-changes summary table."""
+    table = Table(
+        title="Retagger Summary",
+        show_header=True,
+        header_style="bold cyan",
+    )
+
+    table.add_column("Metric", style="bold")
+    table.add_column("Count", justify="right")
+
+    table.add_row("Documents processed", str(stats.documents_processed))
+    table.add_row("Correspondents set", str(stats.correspondents))
+    table.add_row("Document types set", str(stats.document_types))
+    table.add_row("Tags added", str(stats.tags_added))
+    table.add_row("Tags removed", str(stats.tags_removed))
+    table.add_row("Storage paths set", str(stats.storage_paths))
+
+    return table
+
+
 class Command(PaperlessCommand):
    help = (
        "Using the current classification model, assigns correspondents, tags "
@@ -19,7 +180,7 @@ class Command(PaperlessCommand):
        "modified) after their initial import."
    )

-    def add_arguments(self, parser):
+    def add_arguments(self, parser) -> None:
        super().add_arguments(parser)
        parser.add_argument("-c", "--correspondent", default=False, action="store_true")
        parser.add_argument("-T", "--tags", default=False, action="store_true")
@@ -31,9 +192,9 @@ class Command(PaperlessCommand):
            default=False,
            action="store_true",
            help=(
-                "By default this command won't try to assign a correspondent "
-                "if more than one matches the document. Use this flag if "
-                "you'd rather it just pick the first one it finds."
+                "By default this command will not try to assign a correspondent "
+                "if more than one matches the document. Use this flag to pick "
+                "the first match instead."
            ),
        )
        parser.add_argument(
@@ -42,91 +203,140 @@ class Command(PaperlessCommand):
            default=False,
            action="store_true",
            help=(
-                "If set, the document retagger will overwrite any previously "
-                "set correspondent, document and remove correspondents, types "
-                "and tags that do not match anymore due to changed rules."
+                "Overwrite any previously set correspondent, document type, and "
+                "remove tags that no longer match due to changed rules."
            ),
        )
        parser.add_argument(
            "--suggest",
            default=False,
            action="store_true",
-            help="Return the suggestion, don't change anything.",
+            help="Show what would be changed without applying anything.",
        )
        parser.add_argument(
            "--base-url",
-            help="The base URL to use to build the link to the documents.",
+            help="Base URL used to build document links in suggest output.",
        )
        parser.add_argument(
            "--id-range",
-            help="A range of document ids on which the retagging should be applied.",
+            help="Restrict retagging to documents within this ID range (inclusive).",
            nargs=2,
            type=int,
        )

-    def handle(self, *args, **options):
+    def handle(self, *args, **options) -> None:
+        suggest: bool = options["suggest"]
+        overwrite: bool = options["overwrite"]
+        use_first: bool = options["use_first"]
+        base_url: str | None = options["base_url"]
+
+        do_correspondent: bool = options["correspondent"]
+        do_document_type: bool = options["document_type"]
+        do_tags: bool = options["tags"]
+        do_storage_path: bool = options["storage_path"]
+
+        if not any([do_correspondent, do_document_type, do_tags, do_storage_path]):
+            self.console.print(
+                "[yellow]No classifier targets specified. "
+                "Use -c, -T, -t, or -s to select what to retag.[/yellow]",
+            )
+            return
+
        if options["inbox_only"]:
            queryset = Document.objects.filter(tags__is_inbox_tag=True)
        else:
            queryset = Document.objects.all()

        if options["id_range"]:
-            queryset = queryset.filter(
-                id__range=(options["id_range"][0], options["id_range"][1]),
-            )
+            lo, hi = options["id_range"]
+            queryset = queryset.filter(id__range=(lo, hi))

        documents = queryset.distinct()
-
        classifier = load_classifier()

-        for document in self.track(documents, description="Retagging..."):
-            if options["correspondent"]:
-                set_correspondent(
-                    sender=None,
-                    document=document,
-                    classifier=classifier,
-                    replace=options["overwrite"],
-                    use_first=options["use_first"],
-                    suggest=options["suggest"],
-                    base_url=options["base_url"],
-                    stdout=self.stdout,
-                    style_func=self.style,
-                )
+        stats = RetaggerStats()
+        suggestions: list[DocumentSuggestion] = []

-            if options["document_type"]:
-                set_document_type(
-                    sender=None,
-                    document=document,
-                    classifier=classifier,
-                    replace=options["overwrite"],
-                    use_first=options["use_first"],
-                    suggest=options["suggest"],
-                    base_url=options["base_url"],
-                    stdout=self.stdout,
-                    style_func=self.style,
-                )
+        def render_stats() -> RenderableType:
+            return _build_stats_table(stats, suggest=suggest)

-            if options["tags"]:
-                set_tags(
-                    sender=None,
-                    document=document,
-                    classifier=classifier,
-                    replace=options["overwrite"],
-                    suggest=options["suggest"],
-                    base_url=options["base_url"],
-                    stdout=self.stdout,
-                    style_func=self.style,
-                )
+        with self.buffered_logging(
+            "paperless",
+            "paperless.handlers",
+            "documents",
+        ) as log_buf:
+            for document in self.track_with_stats(
+                documents,
+                description="Retagging...",
+                stats_renderer=render_stats,
+            ):
+                suggestion = DocumentSuggestion(document=document)

-            if options["storage_path"]:
-                set_storage_path(
-                    sender=None,
-                    document=document,
-                    classifier=classifier,
-                    replace=options["overwrite"],
-                    use_first=options["use_first"],
-                    suggest=options["suggest"],
-                    base_url=options["base_url"],
-                    stdout=self.stdout,
-                    style_func=self.style,
-                )
+                if do_correspondent:
+                    correspondent = set_correspondent(
+                        None,
+                        document,
+                        classifier=classifier,
+                        replace=overwrite,
+                        use_first=use_first,
+                        dry_run=suggest,
+                    )
+                    if correspondent is not None:
+                        stats.correspondents += 1
+                        suggestion.correspondent = correspondent
+
+                if do_document_type:
+                    document_type = set_document_type(
+                        None,
+                        document,
+                        classifier=classifier,
+                        replace=overwrite,
+                        use_first=use_first,
+                        dry_run=suggest,
+                    )
+                    if document_type is not None:
+                        stats.document_types += 1
+                        suggestion.document_type = document_type
+
+                if do_tags:
+                    tags_to_add, tags_to_remove = set_tags(
+                        None,
+                        document,
+                        classifier=classifier,
+                        replace=overwrite,
+                        dry_run=suggest,
+                    )
+                    stats.tags_added += len(tags_to_add)
+                    stats.tags_removed += len(tags_to_remove)
+                    suggestion.tags_to_add = frozenset(tags_to_add)
+                    suggestion.tags_to_remove = frozenset(tags_to_remove)
+
+                if do_storage_path:
+                    storage_path = set_storage_path(
+                        None,
+                        document,
+                        classifier=classifier,
+                        replace=overwrite,
+                        use_first=use_first,
+                        dry_run=suggest,
+                    )
+                    if storage_path is not None:
+                        stats.storage_paths += 1
+                        suggestion.storage_path = storage_path
+
+                stats.documents_processed += 1
+
+                if suggest:
+                    suggestions.append(suggestion)
+
+        # Post-loop output
+        if suggest:
+            visible = [s for s in suggestions if s.has_suggestions]
+            if visible:
+                self.console.print(_build_suggestion_table(visible, base_url))
+            else:
+                self.console.print("[green]No changes suggested.[/green]")
+        else:
+            self.console.print(_build_summary_table(stats))
+
+        log_buf.render(self.console, min_level=logging.INFO, title="Retagger Log")
--- a/src/documents/management/commands/document_sanity_checker.py
+++ b/src/documents/management/commands/document_sanity_checker.py
@@ -1,17 +1,117 @@
-from django.core.management.base import BaseCommand
+"""Management command to check the document archive for issues."""

-from documents.management.commands.mixins import ProgressBarMixin
+from __future__ import annotations
+
+import logging
+from typing import Any
+
+from rich.panel import Panel
+from rich.table import Table
+from rich.text import Text
+
+from documents.management.commands.base import PaperlessCommand
+from documents.models import Document
+from documents.sanity_checker import SanityCheckMessages
 from documents.sanity_checker import check_sanity

+_LEVEL_STYLE: dict[int, tuple[str, str]] = {
+    logging.ERROR: ("bold red", "ERROR"),
+    logging.WARNING: ("yellow", "WARN"),
+    logging.INFO: ("dim", "INFO"),
+}

-class Command(ProgressBarMixin, BaseCommand):
+
+class Command(PaperlessCommand):
    help = "This command checks your document archive for issues."

-    def add_arguments(self, parser):
-        self.add_argument_progress_bar_mixin(parser)
+    def _render_results(self, messages: SanityCheckMessages) -> None:
+        """Render sanity check results as a Rich table."""

-    def handle(self, *args, **options):
-        self.handle_progress_bar_mixin(**options)
-        messages = check_sanity(progress=self.use_progress_bar, scheduled=False)
+        if (
+            not messages.has_error
+            and not messages.has_warning
+            and not messages.has_info
+        ):
+            self.console.print(
+                Panel(
+                    "[green]No issues detected.[/green]",
+                    title="Sanity Check",
+                    border_style="green",
+                ),
+            )
+            return

-        messages.log_messages()
+        # Build a lookup for document titles
+        doc_pks = [pk for pk in messages.document_pks() if pk is not None]
+        titles: dict[int, str] = {}
+        if doc_pks:
+            titles = dict(
+                Document.global_objects.filter(pk__in=doc_pks)
+                .only("pk", "title")
+                .values_list("pk", "title"),
+            )
+
+        table = Table(
+            title="Sanity Check Results",
+            show_lines=True,
+            title_style="bold",
+        )
+        table.add_column("Level", width=7, no_wrap=True)
+        table.add_column("Document", min_width=20)
+        table.add_column("Issue", ratio=1)
+
+        for doc_pk, doc_messages in messages.iter_messages():
+            if doc_pk is not None:
+                title = titles.get(doc_pk, "Unknown")
+                doc_label = f"#{doc_pk} {title}"
+            else:
+                doc_label = "(global)"
+
+            for msg in doc_messages:
+                style, label = _LEVEL_STYLE.get(
+                    msg["level"],
+                    ("dim", "INFO"),
+                )
+                table.add_row(
+                    Text(label, style=style),
+                    Text(doc_label),
+                    Text(str(msg["message"])),
+                )
+
+        self.console.print(table)
+
+        parts: list[str] = []
+
+        if messages.document_error_count:
+            parts.append(
+                f"{messages.document_error_count} document(s) with [bold red]errors[/bold red]",
+            )
+        if messages.document_warning_count:
+            parts.append(
+                f"{messages.document_warning_count} document(s) with [yellow]warnings[/yellow]",
+            )
+        if messages.document_info_count:
+            parts.append(f"{messages.document_info_count} document(s) with infos")
+        if messages.global_warning_count:
+            parts.append(
+                f"{messages.global_warning_count} global [yellow]warning(s)[/yellow]",
+            )
+
+        if parts:
+            if len(parts) > 1:
+                summary = ", ".join(parts[:-1]) + " and " + parts[-1]
+            else:
+                summary = parts[0]
+            self.console.print(f"\nFound {summary}.")
+        else:
+            self.console.print("\nNo issues found.")
+
+    def handle(self, *args: Any, **options: Any) -> None:
+        messages = check_sanity(
+            scheduled=False,
+            iter_wrapper=lambda docs: self.track(
+                docs,
+                description="Checking documents...",
+            ),
+        )
+        self._render_results(messages)
--- a/src/documents/models.py
+++ b/src/documents/models.py
@@ -75,7 +75,7 @@ class MatchingModel(ModelWithOwner):

    is_insensitive = models.BooleanField(_("is insensitive"), default=True)

-    class Meta:
+    class Meta(ModelWithOwner.Meta):
        abstract = True
        ordering = ("name",)
        constraints = [
--- a/src/documents/sanity_checker.py
+++ b/src/documents/sanity_checker.py
@@ -1,80 +1,174 @@
+"""
+Sanity checker for the Paperless-ngx document archive.
+
+Verifies that all documents have valid files, correct checksums,
+and consistent metadata. Reports orphaned files in the media directory.
+
+Progress display is the caller's responsibility -- pass an ``iter_wrapper``
+to wrap the document queryset (e.g., with a progress bar). The default
+is an identity function that adds no overhead.
+"""
+
+from __future__ import annotations
+
 import hashlib
 import logging
 import uuid
 from collections import defaultdict
+from collections.abc import Callable
+from collections.abc import Iterable
+from collections.abc import Iterator
 from pathlib import Path
+from typing import TYPE_CHECKING
 from typing import Final
+from typing import TypedDict
+from typing import TypeVar

 from celery import states
 from django.conf import settings
 from django.utils import timezone
-from tqdm import tqdm

 from documents.models import Document
 from documents.models import PaperlessTask
 from paperless.config import GeneralConfig

+logger = logging.getLogger("paperless.sanity_checker")
+
+_T = TypeVar("_T")
+IterWrapper = Callable[[Iterable[_T]], Iterable[_T]]
+
+
+class MessageEntry(TypedDict):
+    """A single sanity check message with its severity level."""
+
+    level: int
+    message: str
+
+
+def _identity(iterable: Iterable[_T]) -> Iterable[_T]:
+    """Pass through an iterable unchanged (default iter_wrapper)."""
+    return iterable
+

 class SanityCheckMessages:
-    def __init__(self) -> None:
-        self._messages: dict[int, list[dict]] = defaultdict(list)
-        self.has_error = False
-        self.has_warning = False
+    """Collects sanity check messages grouped by document primary key.

-    def error(self, doc_pk, message) -> None:
+    Messages are categorized as error, warning, or info. ``None`` is used
+    as the key for messages not associated with a specific document
+    (e.g., orphaned files).
+    """
+
+    def __init__(self) -> None:
+        self._messages: dict[int | None, list[MessageEntry]] = defaultdict(list)
+        self.has_error: bool = False
+        self.has_warning: bool = False
+        self.has_info: bool = False
+        self.document_count: int = 0
+        self.document_error_count: int = 0
+        self.document_warning_count: int = 0
+        self.document_info_count: int = 0
+        self.global_warning_count: int = 0
+
+    # -- Recording ----------------------------------------------------------
+
+    def error(self, doc_pk: int | None, message: str) -> None:
        self._messages[doc_pk].append({"level": logging.ERROR, "message": message})
        self.has_error = True
+        if doc_pk is not None:
+            self.document_count += 1
+            self.document_error_count += 1

-    def warning(self, doc_pk, message) -> None:
+    def warning(self, doc_pk: int | None, message: str) -> None:
        self._messages[doc_pk].append({"level": logging.WARNING, "message": message})
        self.has_warning = True

-    def info(self, doc_pk, message) -> None:
+        if doc_pk is not None:
+            self.document_count += 1
+            self.document_warning_count += 1
+        else:
+            # This is the only type of global message we do right now
+            self.global_warning_count += 1
+
+    def info(self, doc_pk: int | None, message: str) -> None:
        self._messages[doc_pk].append({"level": logging.INFO, "message": message})
+        self.has_info = True
+
+        if doc_pk is not None:
+            self.document_count += 1
+            self.document_info_count += 1
+
+    # -- Iteration / query --------------------------------------------------
+
+    def document_pks(self) -> list[int | None]:
+        """Return all document PKs (including None for global messages)."""
+        return list(self._messages.keys())
+
+    def iter_messages(self) -> Iterator[tuple[int | None, list[MessageEntry]]]:
+        """Iterate over (doc_pk, messages) pairs."""
+        yield from self._messages.items()
+
+    def __getitem__(self, item: int | None) -> list[MessageEntry]:
+        return self._messages[item]
+
+    # -- Summarize Helpers --------------------------------------------------
+
+    @property
+    def has_global_issues(self) -> bool:
+        return None in self._messages
+
+    @property
+    def total_issue_count(self) -> int:
+        """Total number of error and warning messages across all documents and global."""
+        return (
+            self.document_error_count
+            + self.document_warning_count
+            + self.global_warning_count
+        )
+
+    # -- Logging output (used by Celery task path) --------------------------

    def log_messages(self) -> None:
-        logger = logging.getLogger("paperless.sanity_checker")
+        """Write all messages to the ``paperless.sanity_checker`` logger.

+        This is the output path for headless / Celery execution.
+        Management commands use Rich rendering instead.
+        """
        if len(self._messages) == 0:
            logger.info("Sanity checker detected no issues.")
-        else:
-            # Query once
-            all_docs = Document.global_objects.all()
+            return

-            for doc_pk in self._messages:
-                if doc_pk is not None:
-                    doc = all_docs.get(pk=doc_pk)
-                    logger.info(
-                        f"Detected following issue(s) with document #{doc.pk},"
-                        f" titled {doc.title}",
-                    )
-                for msg in self._messages[doc_pk]:
-                    logger.log(msg["level"], msg["message"])
+        doc_pks = [pk for pk in self._messages if pk is not None]
+        titles: dict[int, str] = {}
+        if doc_pks:
+            titles = dict(
+                Document.global_objects.filter(pk__in=doc_pks)
+                .only("pk", "title")
+                .values_list("pk", "title"),
+            )

-    def __len__(self):
-        return len(self._messages)
-
-    def __getitem__(self, item):
-        return self._messages[item]
+        for doc_pk, entries in self._messages.items():
+            if doc_pk is not None:
+                title = titles.get(doc_pk, "Unknown")
+                logger.info(
+                    "Detected following issue(s) with document #%s, titled %s",
+                    doc_pk,
+                    title,
+                )
+            for msg in entries:
+                logger.log(msg["level"], msg["message"])


 class SanityCheckFailedException(Exception):
    pass


-def check_sanity(*, progress=False, scheduled=True) -> SanityCheckMessages:
-    paperless_task = PaperlessTask.objects.create(
-        task_id=uuid.uuid4(),
-        type=PaperlessTask.TaskType.SCHEDULED_TASK
-        if scheduled
-        else PaperlessTask.TaskType.MANUAL_TASK,
-        task_name=PaperlessTask.TaskName.CHECK_SANITY,
-        status=states.STARTED,
-        date_created=timezone.now(),
-        date_started=timezone.now(),
-    )
-    messages = SanityCheckMessages()
+# ---------------------------------------------------------------------------
+# Internal helpers
+# ---------------------------------------------------------------------------

+
+def _build_present_files() -> set[Path]:
+    """Collect all files in MEDIA_ROOT, excluding directories and ignorable files."""
    present_files = {
        x.resolve()
        for x in Path(settings.MEDIA_ROOT).glob("**/*")
@@ -82,95 +176,178 @@ def check_sanity(*, progress=False, scheduled=True) -> SanityCheckMessages:
    }

    lockfile = Path(settings.MEDIA_LOCK).resolve()
-    if lockfile in present_files:
-        present_files.remove(lockfile)
+    present_files.discard(lockfile)

    general_config = GeneralConfig()
    app_logo = general_config.app_logo or settings.APP_LOGO
    if app_logo:
        logo_file = Path(settings.MEDIA_ROOT / Path(app_logo.lstrip("/"))).resolve()
-        if logo_file in present_files:
-            present_files.remove(logo_file)
+        present_files.discard(logo_file)

-    for doc in tqdm(Document.global_objects.all(), disable=not progress):
-        # Check sanity of the thumbnail
-        thumbnail_path: Final[Path] = Path(doc.thumbnail_path).resolve()
-        if not thumbnail_path.exists() or not thumbnail_path.is_file():
-            messages.error(doc.pk, "Thumbnail of document does not exist.")
-        else:
-            if thumbnail_path in present_files:
-                present_files.remove(thumbnail_path)
-            try:
-                _ = thumbnail_path.read_bytes()
-            except OSError as e:
-                messages.error(doc.pk, f"Cannot read thumbnail file of document: {e}")
+    return present_files

-        # Check sanity of the original file
-        # TODO: extract method
-        source_path: Final[Path] = Path(doc.source_path).resolve()
-        if not source_path.exists() or not source_path.is_file():
-            messages.error(doc.pk, "Original of document does not exist.")
-        else:
-            if source_path in present_files:
-                present_files.remove(source_path)
-            try:
-                checksum = hashlib.md5(source_path.read_bytes()).hexdigest()
-            except OSError as e:
-                messages.error(doc.pk, f"Cannot read original file of document: {e}")
-            else:
-                if checksum != doc.checksum:
-                    messages.error(
-                        doc.pk,
-                        "Checksum mismatch. "
-                        f"Stored: {doc.checksum}, actual: {checksum}.",
-                    )

-        # Check sanity of the archive file.
-        if doc.archive_checksum is not None and doc.archive_filename is None:
+def _check_thumbnail(
+    doc: Document,
+    messages: SanityCheckMessages,
+    present_files: set[Path],
+) -> None:
+    """Verify the thumbnail exists and is readable."""
+    thumbnail_path: Final[Path] = Path(doc.thumbnail_path).resolve()
+    if not thumbnail_path.exists() or not thumbnail_path.is_file():
+        messages.error(doc.pk, "Thumbnail of document does not exist.")
+        return
+
+    present_files.discard(thumbnail_path)
+    try:
+        _ = thumbnail_path.read_bytes()
+    except OSError as e:
+        messages.error(doc.pk, f"Cannot read thumbnail file of document: {e}")
+
+
+def _check_original(
+    doc: Document,
+    messages: SanityCheckMessages,
+    present_files: set[Path],
+) -> None:
+    """Verify the original file exists, is readable, and has matching checksum."""
+    source_path: Final[Path] = Path(doc.source_path).resolve()
+    if not source_path.exists() or not source_path.is_file():
+        messages.error(doc.pk, "Original of document does not exist.")
+        return
+
+    present_files.discard(source_path)
+    try:
+        checksum = hashlib.md5(source_path.read_bytes()).hexdigest()
+    except OSError as e:
+        messages.error(doc.pk, f"Cannot read original file of document: {e}")
+    else:
+        if checksum != doc.checksum:
            messages.error(
                doc.pk,
-                "Document has an archive file checksum, but no archive filename.",
+                f"Checksum mismatch. Stored: {doc.checksum}, actual: {checksum}.",
            )
-        elif doc.archive_checksum is None and doc.archive_filename is not None:
+
+
+def _check_archive(
+    doc: Document,
+    messages: SanityCheckMessages,
+    present_files: set[Path],
+) -> None:
+    """Verify archive file consistency: checksum/filename pairing and file integrity."""
+    if doc.archive_checksum is not None and doc.archive_filename is None:
+        messages.error(
+            doc.pk,
+            "Document has an archive file checksum, but no archive filename.",
+        )
+    elif doc.archive_checksum is None and doc.archive_filename is not None:
+        messages.error(
+            doc.pk,
+            "Document has an archive file, but its checksum is missing.",
+        )
+    elif doc.has_archive_version:
+        if TYPE_CHECKING:
+            assert isinstance(doc.archive_path, Path)
+        archive_path: Final[Path] = Path(doc.archive_path).resolve()
+        if not archive_path.exists() or not archive_path.is_file():
+            messages.error(doc.pk, "Archived version of document does not exist.")
+            return
+
+        present_files.discard(archive_path)
+        try:
+            checksum = hashlib.md5(archive_path.read_bytes()).hexdigest()
+        except OSError as e:
            messages.error(
                doc.pk,
-                "Document has an archive file, but its checksum is missing.",
+                f"Cannot read archive file of document: {e}",
            )
-        elif doc.has_archive_version:
-            archive_path: Final[Path] = Path(doc.archive_path).resolve()
-            if not archive_path.exists() or not archive_path.is_file():
-                messages.error(doc.pk, "Archived version of document does not exist.")
-            else:
-                if archive_path in present_files:
-                    present_files.remove(archive_path)
-                try:
-                    checksum = hashlib.md5(archive_path.read_bytes()).hexdigest()
-                except OSError as e:
-                    messages.error(
-                        doc.pk,
-                        f"Cannot read archive file of document : {e}",
-                    )
-                else:
-                    if checksum != doc.archive_checksum:
-                        messages.error(
-                            doc.pk,
-                            "Checksum mismatch of archived document. "
-                            f"Stored: {doc.archive_checksum}, "
-                            f"actual: {checksum}.",
-                        )
+        else:
+            if checksum != doc.archive_checksum:
+                messages.error(
+                    doc.pk,
+                    "Checksum mismatch of archived document. "
+                    f"Stored: {doc.archive_checksum}, actual: {checksum}.",
+                )

-        # other document checks
-        if not doc.content:
-            messages.info(doc.pk, "Document contains no OCR data")
+
+def _check_content(doc: Document, messages: SanityCheckMessages) -> None:
+    """Flag documents with no OCR content."""
+    if not doc.content:
+        messages.info(doc.pk, "Document contains no OCR data")
+
+
+def _check_document(
+    doc: Document,
+    messages: SanityCheckMessages,
+    present_files: set[Path],
+) -> None:
+    """Run all checks for a single document."""
+    _check_thumbnail(doc, messages, present_files)
+    _check_original(doc, messages, present_files)
+    _check_archive(doc, messages, present_files)
+    _check_content(doc, messages)
+
+
+# ---------------------------------------------------------------------------
+# Public entry point
+# ---------------------------------------------------------------------------
+
+
+def check_sanity(
+    *,
+    scheduled: bool = True,
+    iter_wrapper: IterWrapper[Document] = _identity,
+) -> SanityCheckMessages:
+    """Run a full sanity check on the document archive.
+
+    Args:
+        scheduled: Whether this is a scheduled (automatic) or manual check.
+            Controls the task type recorded in the database.
+        iter_wrapper: A callable that wraps the document iterable, e.g.,
+            for progress bar display. Defaults to identity (no wrapping).
+
+    Returns:
+        A SanityCheckMessages instance containing all detected issues.
+    """
+    paperless_task = PaperlessTask.objects.create(
+        task_id=uuid.uuid4(),
+        type=(
+            PaperlessTask.TaskType.SCHEDULED_TASK
+            if scheduled
+            else PaperlessTask.TaskType.MANUAL_TASK
+        ),
+        task_name=PaperlessTask.TaskName.CHECK_SANITY,
+        status=states.STARTED,
+        date_created=timezone.now(),
+        date_started=timezone.now(),
+    )
+
+    messages = SanityCheckMessages()
+    present_files = _build_present_files()
+
+    documents = Document.global_objects.all()
+    for doc in iter_wrapper(documents):
+        _check_document(doc, messages, present_files)

    for extra_file in present_files:
        messages.warning(None, f"Orphaned file in media dir: {extra_file}")

    paperless_task.status = states.SUCCESS if not messages.has_error else states.FAILURE
-    # result is concatenated messages
-    paperless_task.result = f"{len(messages)} issues found."
-    if messages.has_error:
-        paperless_task.result += " Check logs for details."
+    if messages.total_issue_count == 0:
+        paperless_task.result = "No issues found."
+    else:
+        parts: list[str] = []
+        if messages.document_error_count:
+            parts.append(f"{messages.document_error_count} document(s) with errors")
+        if messages.document_warning_count:
+            parts.append(f"{messages.document_warning_count} document(s) with warnings")
+        if messages.global_warning_count:
+            parts.append(f"{messages.global_warning_count} global warning(s)")
+        paperless_task.result = ", ".join(parts) + " found."
+        if messages.has_error:
+            paperless_task.result += " Check logs for details."
+
    paperless_task.date_done = timezone.now()
    paperless_task.save(update_fields=["status", "result", "date_done"])
+
    return messages
--- a/src/documents/signals/handlers.py
+++ b/src/documents/signals/handlers.py
@@ -4,6 +4,7 @@ import logging
 import shutil
 from pathlib import Path
 from typing import TYPE_CHECKING
+from typing import Any

 from celery import shared_task
 from celery import states
@@ -32,12 +33,14 @@ from documents.file_handling import create_source_path_directory
 from documents.file_handling import delete_empty_directories
 from documents.file_handling import generate_filename
 from documents.file_handling import generate_unique_filename
+from documents.models import Correspondent
 from documents.models import CustomField
 from documents.models import CustomFieldInstance
 from documents.models import Document
-from documents.models import MatchingModel
+from documents.models import DocumentType
 from documents.models import PaperlessTask
 from documents.models import SavedView
+from documents.models import StoragePath
 from documents.models import Tag
 from documents.models import UiSettings
 from documents.models import Workflow
@@ -81,47 +84,41 @@ def add_inbox_tags(sender, document: Document, logging_group=None, **kwargs) ->
    document.add_nested_tags(inbox_tags)


-def _suggestion_printer(
-    stdout,
-    style_func,
-    suggestion_type: str,
-    document: Document,
-    selected: MatchingModel,
-    base_url: str | None = None,
-) -> None:
-    """
-    Smaller helper to reduce duplication when just outputting suggestions to the console
-    """
-    doc_str = str(document)
-    if base_url is not None:
-        stdout.write(style_func.SUCCESS(doc_str))
-        stdout.write(style_func.SUCCESS(f"{base_url}/documents/{document.pk}"))
-    else:
-        stdout.write(style_func.SUCCESS(f"{doc_str} [{document.pk}]"))
-    stdout.write(f"Suggest {suggestion_type}: {selected}")
-
-
 def set_correspondent(
-    sender,
+    sender: object,
    document: Document,
    *,
-    logging_group=None,
+    logging_group: object = None,
    classifier: DocumentClassifier | None = None,
-    replace=False,
-    use_first=True,
-    suggest=False,
-    base_url=None,
-    stdout=None,
-    style_func=None,
-    **kwargs,
-) -> None:
+    replace: bool = False,
+    use_first: bool = True,
+    dry_run: bool = False,
+    **kwargs: Any,
+) -> Correspondent | None:
+    """
+    Assign a correspondent to a document based on classifier results.
+
+    Args:
+        document: The document to classify.
+        logging_group: Optional logging group for structured log output.
+        classifier: The trained classifier. If None, only rule-based matching runs.
+        replace: If True, overwrite an existing correspondent assignment.
+        use_first: If True, pick the first match when multiple correspondents
+            match. If False, skip assignment when multiple match.
+        dry_run: If True, compute and return the selection without saving.
+        **kwargs: Absorbed for Django signal compatibility (e.g. sender, signal).
+
+    Returns:
+        The correspondent that was (or would be) assigned, or None if no match
+        was found or assignment was skipped.
+    """
    if document.correspondent and not replace:
-        return
+        return None

    potential_correspondents = matching.match_correspondents(document, classifier)
-
    potential_count = len(potential_correspondents)
    selected = potential_correspondents[0] if potential_correspondents else None
+
    if potential_count > 1:
        if use_first:
            logger.debug(
@@ -135,49 +132,53 @@ def set_correspondent(
                f"not assigning any correspondent",
                extra={"group": logging_group},
            )
-            return
+            return None

-    if selected or replace:
-        if suggest:
-            _suggestion_printer(
-                stdout,
-                style_func,
-                "correspondent",
-                document,
-                selected,
-                base_url,
-            )
-        else:
-            logger.info(
-                f"Assigning correspondent {selected} to {document}",
-                extra={"group": logging_group},
-            )
+    if (selected or replace) and not dry_run:
+        logger.info(
+            f"Assigning correspondent {selected} to {document}",
+            extra={"group": logging_group},
+        )
+        document.correspondent = selected
+        document.save(update_fields=("correspondent",))

-            document.correspondent = selected
-            document.save(update_fields=("correspondent",))
+    return selected


 def set_document_type(
-    sender,
+    sender: object,
    document: Document,
    *,
-    logging_group=None,
+    logging_group: object = None,
    classifier: DocumentClassifier | None = None,
-    replace=False,
-    use_first=True,
-    suggest=False,
-    base_url=None,
-    stdout=None,
-    style_func=None,
-    **kwargs,
-) -> None:
+    replace: bool = False,
+    use_first: bool = True,
+    dry_run: bool = False,
+    **kwargs: Any,
+) -> DocumentType | None:
+    """
+    Assign a document type to a document based on classifier results.
+
+    Args:
+        document: The document to classify.
+        logging_group: Optional logging group for structured log output.
+        classifier: The trained classifier. If None, only rule-based matching runs.
+        replace: If True, overwrite an existing document type assignment.
+        use_first: If True, pick the first match when multiple types match.
+            If False, skip assignment when multiple match.
+        dry_run: If True, compute and return the selection without saving.
+        **kwargs: Absorbed for Django signal compatibility (e.g. sender, signal).
+
+    Returns:
+        The document type that was (or would be) assigned, or None if no match
+        was found or assignment was skipped.
+    """
    if document.document_type and not replace:
-        return
+        return None

-    potential_document_type = matching.match_document_types(document, classifier)
-
-    potential_count = len(potential_document_type)
-    selected = potential_document_type[0] if potential_document_type else None
+    potential_document_types = matching.match_document_types(document, classifier)
+    potential_count = len(potential_document_types)
+    selected = potential_document_types[0] if potential_document_types else None

    if potential_count > 1:
        if use_first:
@@ -192,42 +193,64 @@ def set_document_type(
                f"not assigning any document type",
                extra={"group": logging_group},
            )
-            return
+            return None

-    if selected or replace:
-        if suggest:
-            _suggestion_printer(
-                stdout,
-                style_func,
-                "document type",
-                document,
-                selected,
-                base_url,
-            )
-        else:
-            logger.info(
-                f"Assigning document type {selected} to {document}",
-                extra={"group": logging_group},
-            )
+    if (selected or replace) and not dry_run:
+        logger.info(
+            f"Assigning document type {selected} to {document}",
+            extra={"group": logging_group},
+        )
+        document.document_type = selected
+        document.save(update_fields=("document_type",))

-            document.document_type = selected
-            document.save(update_fields=("document_type",))
+    return selected


 def set_tags(
-    sender,
+    sender: object,
    document: Document,
    *,
-    logging_group=None,
+    logging_group: object = None,
    classifier: DocumentClassifier | None = None,
-    replace=False,
-    suggest=False,
-    base_url=None,
-    stdout=None,
-    style_func=None,
-    **kwargs,
-) -> None:
+    replace: bool = False,
+    dry_run: bool = False,
+    **kwargs: Any,
+) -> tuple[set[Tag], set[Tag]]:
+    """
+    Assign tags to a document based on classifier results.
+
+    When replace=True, existing auto-matched and rule-matched tags are removed
+    before applying the new set (inbox tags and manually-added tags are preserved).
+
+    Args:
+        document: The document to classify.
+        logging_group: Optional logging group for structured log output.
+        classifier: The trained classifier. If None, only rule-based matching runs.
+        replace: If True, remove existing classifier-managed tags before applying
+            new ones. Inbox tags and manually-added tags are always preserved.
+        dry_run: If True, compute what would change without saving anything.
+        **kwargs: Absorbed for Django signal compatibility (e.g. sender, signal).
+
+    Returns:
+        A two-tuple of (tags_added, tags_removed). In non-replace mode,
+        tags_removed is always an empty set. In dry_run mode, neither set
+        is applied to the database.
+    """
+    # Compute which tags would be removed under replace mode.
+    # The filter mirrors the .delete() call below: keep inbox tags and
+    # manually-added tags (match="" and not auto-matched).
    if replace:
+        tags_to_remove: set[Tag] = set(
+            document.tags.exclude(
+                is_inbox_tag=True,
+            ).exclude(
+                Q(match="") & ~Q(matching_algorithm=Tag.MATCH_AUTO),
+            ),
+        )
+    else:
+        tags_to_remove = set()
+
+    if replace and not dry_run:
        Document.tags.through.objects.filter(document=document).exclude(
            Q(tag__is_inbox_tag=True),
        ).exclude(
@@ -235,65 +258,53 @@ def set_tags(
        ).delete()

    current_tags = set(document.tags.all())
-
    matched_tags = matching.match_tags(document, classifier)
+    tags_to_add = set(matched_tags) - current_tags

-    relevant_tags = set(matched_tags) - current_tags
-
-    if suggest:
-        extra_tags = current_tags - set(matched_tags)
-        extra_tags = [
-            t for t in extra_tags if t.matching_algorithm == MatchingModel.MATCH_AUTO
-        ]
-        if not relevant_tags and not extra_tags:
-            return
-        doc_str = style_func.SUCCESS(str(document))
-        if base_url:
-            stdout.write(doc_str)
-            stdout.write(f"{base_url}/documents/{document.pk}")
-        else:
-            stdout.write(doc_str + style_func.SUCCESS(f" [{document.pk}]"))
-        if relevant_tags:
-            stdout.write("Suggest tags: " + ", ".join([t.name for t in relevant_tags]))
-        if extra_tags:
-            stdout.write("Extra tags: " + ", ".join([t.name for t in extra_tags]))
-    else:
-        if not relevant_tags:
-            return
-
-        message = 'Tagging "{}" with "{}"'
+    if tags_to_add and not dry_run:
        logger.info(
-            message.format(document, ", ".join([t.name for t in relevant_tags])),
+            f'Tagging "{document}" with "{", ".join(t.name for t in tags_to_add)}"',
            extra={"group": logging_group},
        )
+        document.add_nested_tags(tags_to_add)

-        document.add_nested_tags(relevant_tags)
+    return tags_to_add, tags_to_remove


 def set_storage_path(
-    sender,
+    sender: object,
    document: Document,
    *,
-    logging_group=None,
+    logging_group: object = None,
    classifier: DocumentClassifier | None = None,
-    replace=False,
-    use_first=True,
-    suggest=False,
-    base_url=None,
-    stdout=None,
-    style_func=None,
-    **kwargs,
-) -> None:
+    replace: bool = False,
+    use_first: bool = True,
+    dry_run: bool = False,
+    **kwargs: Any,
+) -> StoragePath | None:
+    """
+    Assign a storage path to a document based on classifier results.
+
+    Args:
+        document: The document to classify.
+        logging_group: Optional logging group for structured log output.
+        classifier: The trained classifier. If None, only rule-based matching runs.
+        replace: If True, overwrite an existing storage path assignment.
+        use_first: If True, pick the first match when multiple paths match.
+            If False, skip assignment when multiple match.
+        dry_run: If True, compute and return the selection without saving.
+        **kwargs: Absorbed for Django signal compatibility (e.g. sender, signal).
+
+    Returns:
+        The storage path that was (or would be) assigned, or None if no match
+        was found or assignment was skipped.
+    """
    if document.storage_path and not replace:
-        return
+        return None

-    potential_storage_path = matching.match_storage_paths(
-        document,
-        classifier,
-    )
-
-    potential_count = len(potential_storage_path)
-    selected = potential_storage_path[0] if potential_storage_path else None
+    potential_storage_paths = matching.match_storage_paths(document, classifier)
+    potential_count = len(potential_storage_paths)
+    selected = potential_storage_paths[0] if potential_storage_paths else None

    if potential_count > 1:
        if use_first:
@@ -308,26 +319,17 @@ def set_storage_path(
                f"not assigning any storage directory",
                extra={"group": logging_group},
            )
-            return
+            return None

-    if selected or replace:
-        if suggest:
-            _suggestion_printer(
-                stdout,
-                style_func,
-                "storage directory",
-                document,
-                selected,
-                base_url,
-            )
-        else:
-            logger.info(
-                f"Assigning storage path {selected} to {document}",
-                extra={"group": logging_group},
-            )
+    if (selected or replace) and not dry_run:
+        logger.info(
+            f"Assigning storage path {selected} to {document}",
+            extra={"group": logging_group},
+        )
+        document.storage_path = selected
+        document.save(update_fields=("storage_path",))

-            document.storage_path = selected
-            document.save(update_fields=("storage_path",))
+    return selected


 # see empty_trash in documents/tasks.py for signal handling
--- a/src/documents/tasks.py
+++ b/src/documents/tasks.py
@@ -4,11 +4,13 @@ import logging
 import shutil
 import uuid
 import zipfile
+from collections.abc import Callable
+from collections.abc import Iterable
 from pathlib import Path
 from tempfile import TemporaryDirectory
 from tempfile import mkstemp
+from typing import TypeVar

-import tqdm
 from celery import Task
 from celery import shared_task
 from celery import states
@@ -66,11 +68,19 @@ from paperless_ai.indexing import llm_index_add_or_update_document
 from paperless_ai.indexing import llm_index_remove_document
 from paperless_ai.indexing import update_llm_index

+_T = TypeVar("_T")
+IterWrapper = Callable[[Iterable[_T]], Iterable[_T]]
+
+
 if settings.AUDIT_LOG_ENABLED:
    from auditlog.models import LogEntry
 logger = logging.getLogger("paperless.tasks")


+def _identity(iterable: Iterable[_T]) -> Iterable[_T]:
+    return iterable
+
+
@shared_task
 def index_optimize() -> None:
    ix = index.open_index()
@@ -78,13 +88,13 @@ def index_optimize() -> None:
    writer.commit(optimize=True)


-def index_reindex(*, progress_bar_disable=False) -> None:
+def index_reindex(*, iter_wrapper: IterWrapper[Document] = _identity) -> None:
    documents = Document.objects.all()

    ix = index.open_index(recreate=True)

    with AsyncWriter(ix) as writer:
-        for document in tqdm.tqdm(documents, disable=progress_bar_disable):
+        for document in iter_wrapper(documents):
            index.update_document(writer, document)


@@ -227,20 +237,30 @@ def consume_file(
@shared_task
 def sanity_check(*, scheduled=True, raise_on_error=True):
    messages = sanity_checker.check_sanity(scheduled=scheduled)
-
    messages.log_messages()

+    if not messages.has_error and not messages.has_warning and not messages.has_info:
+        return "No issues detected."
+
+    parts: list[str] = []
+    if messages.document_error_count:
+        parts.append(f"{messages.document_error_count} document(s) with errors")
+    if messages.document_warning_count:
+        parts.append(f"{messages.document_warning_count} document(s) with warnings")
+    if messages.document_info_count:
+        parts.append(f"{messages.document_info_count} document(s) with infos")
+    if messages.global_warning_count:
+        parts.append(f"{messages.global_warning_count} global warning(s)")
+
+    summary = ", ".join(parts) + " found."
+
    if messages.has_error:
-        message = "Sanity check exited with errors. See log."
+        message = summary + " Check logs for details."
        if raise_on_error:
            raise SanityCheckFailedException(message)
        return message
-    elif messages.has_warning:
-        return "Sanity check exited with warnings. See log."
-    elif len(messages) > 0:
-        return "Sanity check exited with infos. See log."
-    else:
-        return "No issues detected."
+
+    return summary


@shared_task
@@ -265,7 +285,6 @@ def bulk_update_documents(document_ids) -> None:
    ai_config = AIConfig()
    if ai_config.llm_index_enabled:
        update_llm_index(
-            progress_bar_disable=True,
            rebuild=False,
        )

@@ -606,7 +625,7 @@ def update_document_parent_tags(tag: Tag, new_parent: Tag) -> None:
@shared_task
 def llmindex_index(
    *,
-    progress_bar_disable=True,
+    iter_wrapper: IterWrapper[Document] = _identity,
    rebuild=False,
    scheduled=True,
    auto=False,
@@ -629,7 +648,7 @@ def llmindex_index(

        try:
            result = update_llm_index(
-                progress_bar_disable=progress_bar_disable,
+                iter_wrapper=iter_wrapper,
                rebuild=rebuild,
            )
            task.status = states.SUCCESS
--- a/src/documents/tests/conftest.py
+++ b/src/documents/tests/conftest.py
@@ -1,10 +1,96 @@
+import shutil
 import zoneinfo
+from dataclasses import dataclass
+from pathlib import Path
+from typing import TYPE_CHECKING

+import filelock
 import pytest
 from django.contrib.auth import get_user_model
 from pytest_django.fixtures import SettingsWrapper
 from rest_framework.test import APIClient

+from documents.tests.factories import DocumentFactory
+
+if TYPE_CHECKING:
+    from documents.models import Document
+
+
+@dataclass(frozen=True, slots=True)
+class PaperlessDirs:
+    """Standard Paperless-ngx directory layout for tests."""
+
+    media: Path
+    originals: Path
+    archive: Path
+    thumbnails: Path
+
+
+@pytest.fixture(scope="session")
+def samples_dir() -> Path:
+    """Path to the shared test sample documents."""
+    return Path(__file__).parent / "samples" / "documents"
+
+
+@pytest.fixture()
+def paperless_dirs(tmp_path: Path) -> PaperlessDirs:
+    """Create and return the directory structure for testing."""
+    media = tmp_path / "media"
+    dirs = PaperlessDirs(
+        media=media,
+        originals=media / "documents" / "originals",
+        archive=media / "documents" / "archive",
+        thumbnails=media / "documents" / "thumbnails",
+    )
+    for d in (dirs.originals, dirs.archive, dirs.thumbnails):
+        d.mkdir(parents=True)
+    return dirs
+
+
+@pytest.fixture()
+def _media_settings(paperless_dirs: PaperlessDirs, settings) -> None:
+    """Configure Django settings to point at temp directories."""
+    settings.MEDIA_ROOT = paperless_dirs.media
+    settings.ORIGINALS_DIR = paperless_dirs.originals
+    settings.ARCHIVE_DIR = paperless_dirs.archive
+    settings.THUMBNAIL_DIR = paperless_dirs.thumbnails
+    settings.MEDIA_LOCK = paperless_dirs.media / "media.lock"
+    settings.IGNORABLE_FILES = {".DS_Store", "Thumbs.db", "desktop.ini"}
+    settings.APP_LOGO = ""
+
+
+@pytest.fixture()
+def sample_doc(
+    paperless_dirs: PaperlessDirs,
+    _media_settings: None,
+    samples_dir: Path,
+) -> "Document":
+    """Create a document with valid files and matching checksums."""
+    with filelock.FileLock(paperless_dirs.media / "media.lock"):
+        shutil.copy(
+            samples_dir / "originals" / "0000001.pdf",
+            paperless_dirs.originals / "0000001.pdf",
+        )
+        shutil.copy(
+            samples_dir / "archive" / "0000001.pdf",
+            paperless_dirs.archive / "0000001.pdf",
+        )
+        shutil.copy(
+            samples_dir / "thumbnails" / "0000001.webp",
+            paperless_dirs.thumbnails / "0000001.webp",
+        )
+
+    return DocumentFactory(
+        title="test",
+        checksum="42995833e01aea9b3edee44bbfdd7ce1",
+        archive_checksum="62acb0bcbfbcaa62ca6ad3668e4e404b",
+        content="test content",
+        pk=1,
+        filename="0000001.pdf",
+        mime_type="application/pdf",
+        archive_filename="0000001.pdf",
+    )
+

@pytest.fixture()
 def settings_timezone(settings: SettingsWrapper) -> zoneinfo.ZoneInfo:
@@ -28,3 +114,14 @@ def authenticated_rest_api_client(rest_api_client: APIClient):
    user = UserModel.objects.create_user(username="testuser", password="password")
    rest_api_client.force_authenticate(user=user)
    yield rest_api_client
+
+
+@pytest.fixture(scope="session", autouse=True)
+def faker_session_locale():
+    """Set Faker locale for reproducibility."""
+    return "en_US"
+
+
+@pytest.fixture(scope="session", autouse=True)
+def faker_seed():
+    return 12345
--- a/src/documents/tests/factories.py
+++ b/src/documents/tests/factories.py
@@ -1,17 +1,67 @@
-from factory import Faker
+"""
+Factory-boy factories for documents app models.
+"""
+
+from __future__ import annotations
+
+import factory
 from factory.django import DjangoModelFactory

 from documents.models import Correspondent
 from documents.models import Document
+from documents.models import DocumentType
+from documents.models import MatchingModel
+from documents.models import StoragePath
+from documents.models import Tag


 class CorrespondentFactory(DjangoModelFactory):
    class Meta:
        model = Correspondent

-    name = Faker("name")
+    name = factory.Sequence(lambda n: f"{factory.Faker('company')} {n}")
+    match = ""
+    matching_algorithm = MatchingModel.MATCH_NONE
+
+
+class DocumentTypeFactory(DjangoModelFactory):
+    class Meta:
+        model = DocumentType
+
+    name = factory.Sequence(lambda n: f"{factory.Faker('bs')} {n}")
+    match = ""
+    matching_algorithm = MatchingModel.MATCH_NONE
+
+
+class TagFactory(DjangoModelFactory):
+    class Meta:
+        model = Tag
+
+    name = factory.Sequence(lambda n: f"{factory.Faker('word')} {n}")
+    match = ""
+    matching_algorithm = MatchingModel.MATCH_NONE
+    is_inbox_tag = False
+
+
+class StoragePathFactory(DjangoModelFactory):
+    class Meta:
+        model = StoragePath
+
+    name = factory.Sequence(
+        lambda n: f"{factory.Faker('file_path', depth=2, extension='')} {n}",
+    )
+    path = factory.LazyAttribute(lambda o: f"{o.name}/{{title}}")
+    match = ""
+    matching_algorithm = MatchingModel.MATCH_NONE


 class DocumentFactory(DjangoModelFactory):
    class Meta:
        model = Document
+
+    title = factory.Faker("sentence", nb_words=4)
+    checksum = factory.Faker("md5")
+    content = factory.Faker("paragraph")
+    correspondent = None
+    document_type = None
+    storage_path = None
--- a/src/documents/tests/management/test_management_sanity_checker.py
+++ b/src/documents/tests/management/test_management_sanity_checker.py
@@ -0,0 +1,193 @@
+"""Tests for the document_sanity_checker management command.
+
+Verifies Rich rendering (table, panel, summary) and end-to-end CLI behavior.
+"""
+
+from __future__ import annotations
+
+from io import StringIO
+from pathlib import Path
+from typing import TYPE_CHECKING
+
+import pytest
+from django.core.management import call_command
+from rich.console import Console
+
+from documents.management.commands.document_sanity_checker import Command
+from documents.sanity_checker import SanityCheckMessages
+from documents.tests.factories import DocumentFactory
+
+if TYPE_CHECKING:
+    from documents.models import Document
+    from documents.tests.conftest import PaperlessDirs
+
+
+def _render_to_string(messages: SanityCheckMessages) -> str:
+    """Render command output to a plain string for assertion."""
+    buf = StringIO()
+    cmd = Command()
+    cmd.console = Console(file=buf, width=120, no_color=True)
+    cmd._render_results(messages)
+    return buf.getvalue()
+
+
+# ---------------------------------------------------------------------------
+# Rich rendering
+# ---------------------------------------------------------------------------
+
+
+class TestRenderResultsNoIssues:
+    """No DB access needed -- renders an empty SanityCheckMessages."""
+
+    def test_shows_panel(self) -> None:
+        output = _render_to_string(SanityCheckMessages())
+        assert "No issues detected" in output
+        assert "Sanity Check" in output
+
+
+@pytest.mark.django_db
+class TestRenderResultsWithIssues:
+    def test_error_row(self, sample_doc: Document) -> None:
+        msgs = SanityCheckMessages()
+        msgs.error(sample_doc.pk, "Original missing")
+        output = _render_to_string(msgs)
+        assert "Sanity Check Results" in output
+        assert "ERROR" in output
+        assert "Original missing" in output
+        assert f"#{sample_doc.pk}" in output
+        assert sample_doc.title in output
+
+    def test_warning_row(self, sample_doc: Document) -> None:
+        msgs = SanityCheckMessages()
+        msgs.warning(sample_doc.pk, "Suspicious file")
+        output = _render_to_string(msgs)
+        assert "WARN" in output
+        assert "Suspicious file" in output
+
+    def test_info_row(self, sample_doc: Document) -> None:
+        msgs = SanityCheckMessages()
+        msgs.info(sample_doc.pk, "No OCR data")
+        output = _render_to_string(msgs)
+        assert "INFO" in output
+        assert "No OCR data" in output
+
+    @pytest.mark.usefixtures("_media_settings")
+    def test_global_message(self) -> None:
+        msgs = SanityCheckMessages()
+        msgs.warning(None, "Orphaned file: /tmp/stray.pdf")
+        output = _render_to_string(msgs)
+        assert "(global)" in output
+        assert "Orphaned file" in output
+
+    def test_multiple_messages_same_doc(self, sample_doc: Document) -> None:
+        msgs = SanityCheckMessages()
+        msgs.error(sample_doc.pk, "Thumbnail missing")
+        msgs.error(sample_doc.pk, "Checksum mismatch")
+        output = _render_to_string(msgs)
+        assert "Thumbnail missing" in output
+        assert "Checksum mismatch" in output
+
+    @pytest.mark.usefixtures("_media_settings")
+    def test_unknown_doc_pk(self) -> None:
+        msgs = SanityCheckMessages()
+        msgs.error(99999, "Ghost document")
+        output = _render_to_string(msgs)
+        assert "#99999" in output
+        assert "Unknown" in output
+
+
+@pytest.mark.django_db
+class TestRenderResultsSummary:
+    def test_errors_only(self, sample_doc: Document) -> None:
+        msgs = SanityCheckMessages()
+        msgs.error(sample_doc.pk, "broken")
+        output = _render_to_string(msgs)
+        assert "1 document(s) with" in output
+        assert "errors" in output
+
+    def test_warnings_only(self, sample_doc: Document) -> None:
+        msgs = SanityCheckMessages()
+        msgs.warning(sample_doc.pk, "odd")
+        output = _render_to_string(msgs)
+        assert "1 document(s) with" in output
+        assert "warnings" in output
+
+    def test_infos_only(self, sample_doc: Document) -> None:
+        msgs = SanityCheckMessages()
+        msgs.info(sample_doc.pk, "no OCR")
+        output = _render_to_string(msgs)
+        assert "1 document(s) with infos" in output
+
+    def test_empty_messages(self) -> None:
+        msgs = SanityCheckMessages()
+        output = _render_to_string(msgs)
+        assert "No issues detected." in output
+
+    def test_document_errors_and_global_warnings(self, sample_doc: Document) -> None:
+        msgs = SanityCheckMessages()
+        msgs.error(sample_doc.pk, "broken")
+        msgs.warning(None, "orphan")
+        output = _render_to_string(msgs)
+        assert "1 document(s) with" in output
+        assert "errors" in output
+        assert "1 global warning(s)" in output
+        assert "2 document(s)" not in output
+
+    def test_global_warnings_only(self) -> None:
+        msgs = SanityCheckMessages()
+        msgs.warning(None, "extra file")
+        output = _render_to_string(msgs)
+        assert "1 global warning(s)" in output
+        assert "document(s) with" not in output
+
+    def test_all_levels_combined(self, sample_doc: Document) -> None:
+        msgs = SanityCheckMessages()
+        msgs.error(sample_doc.pk, "broken")
+        msgs.warning(sample_doc.pk, "odd")
+        msgs.info(sample_doc.pk, "fyi")
+        msgs.warning(None, "extra file")
+        output = _render_to_string(msgs)
+        assert "1 document(s) with errors" in output
+        assert "1 document(s) with warnings" in output
+        assert "1 document(s) with infos" in output
+        assert "1 global warning(s)" in output
+
+
+# ---------------------------------------------------------------------------
+# End-to-end command execution
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.django_db
+@pytest.mark.management
+class TestDocumentSanityCheckerCommand:
+    def test_no_issues(self, sample_doc: Document) -> None:
+        out = StringIO()
+        call_command("document_sanity_checker", "--no-progress-bar", stdout=out)
+        assert "No issues detected" in out.getvalue()
+
+    def test_missing_original(self, sample_doc: Document) -> None:
+        Path(sample_doc.source_path).unlink()
+        out = StringIO()
+        call_command("document_sanity_checker", "--no-progress-bar", stdout=out)
+        output = out.getvalue()
+        assert "ERROR" in output
+        assert "Original of document does not exist" in output
+
+    @pytest.mark.usefixtures("_media_settings")
+    def test_checksum_mismatch(self, paperless_dirs: PaperlessDirs) -> None:
+        """Lightweight document with zero-byte files triggers checksum mismatch."""
+        doc = DocumentFactory(
+            title="test",
+            content="test",
+            filename="test.pdf",
+            checksum="abc",
+        )
+        Path(doc.source_path).touch()
+        Path(doc.thumbnail_path).touch()
+
+        out = StringIO()
+        call_command("document_sanity_checker", "--no-progress-bar", stdout=out)
+        output = out.getvalue()
+        assert "ERROR" in output
+        assert "Checksum mismatch. Stored: abc, actual:" in output
--- a/src/documents/tests/test_management.py
+++ b/src/documents/tests/test_management.py
@@ -134,6 +134,7 @@ class TestRenamer(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
        self.assertIsFile(doc2.archive_path)


+@pytest.mark.management
 class TestCreateClassifier(TestCase):
    @mock.patch(
        "documents.management.commands.document_create_classifier.train_classifier",
@@ -144,32 +145,6 @@ class TestCreateClassifier(TestCase):
        m.assert_called_once()


-@pytest.mark.management
-class TestSanityChecker(DirectoriesMixin, TestCase):
-    def test_no_issues(self) -> None:
-        with self.assertLogs() as capture:
-            call_command("document_sanity_checker")
-
-        self.assertEqual(len(capture.output), 1)
-        self.assertIn("Sanity checker detected no issues.", capture.output[0])
-
-    def test_errors(self) -> None:
-        doc = Document.objects.create(
-            title="test",
-            content="test",
-            filename="test.pdf",
-            checksum="abc",
-        )
-        Path(doc.source_path).touch()
-        Path(doc.thumbnail_path).touch()
-
-        with self.assertLogs() as capture:
-            call_command("document_sanity_checker")
-
-        self.assertEqual(len(capture.output), 2)
-        self.assertIn("Checksum mismatch. Stored: abc, actual:", capture.output[1])
-
-
@pytest.mark.management
 class TestConvertMariaDBUUID(TestCase):
    @mock.patch("django.db.connection.schema_editor")
--- a/src/documents/tests/test_management_exporter.py
+++ b/src/documents/tests/test_management_exporter.py
@@ -288,7 +288,7 @@ class TestExportImport(
            self.assertEqual(Permission.objects.count(), num_permission_objects)
            messages = check_sanity()
            # everything is alright after the test
-            self.assertEqual(len(messages), 0)
+            self.assertEqual(messages.total_issue_count, 0)

    def test_exporter_with_filename_format(self) -> None:
        shutil.rmtree(Path(self.dirs.media_dir) / "documents")
--- a/src/documents/tests/test_management_retagger.py
+++ b/src/documents/tests/test_management_retagger.py
@@ -1,298 +1,442 @@
+"""
+Tests for the document_retagger management command.
+"""
+
+from __future__ import annotations
+
 import pytest
 from django.core.management import call_command
 from django.core.management.base import CommandError
-from django.test import TestCase

 from documents.models import Correspondent
 from documents.models import Document
 from documents.models import DocumentType
+from documents.models import MatchingModel
 from documents.models import StoragePath
 from documents.models import Tag
+from documents.tests.factories import CorrespondentFactory
+from documents.tests.factories import DocumentFactory
+from documents.tests.factories import DocumentTypeFactory
+from documents.tests.factories import StoragePathFactory
+from documents.tests.factories import TagFactory
 from documents.tests.utils import DirectoriesMixin

+# ---------------------------------------------------------------------------
+# Module-level type aliases
+# ---------------------------------------------------------------------------
+
+StoragePathTuple = tuple[StoragePath, StoragePath, StoragePath]
+TagTuple = tuple[Tag, Tag, Tag, Tag, Tag]
+CorrespondentTuple = tuple[Correspondent, Correspondent]
+DocumentTypeTuple = tuple[DocumentType, DocumentType]
+DocumentTuple = tuple[Document, Document, Document, Document]
+
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture()
+def storage_paths(db) -> StoragePathTuple:
+    """Three storage paths with varying match rules."""
+    sp1 = StoragePathFactory(
+        path="{created_data}/{title}",
+        match="auto document",
+        matching_algorithm=MatchingModel.MATCH_LITERAL,
+    )
+    sp2 = StoragePathFactory(
+        path="{title}",
+        match="^first|^unrelated",
+        matching_algorithm=MatchingModel.MATCH_REGEX,
+    )
+    sp3 = StoragePathFactory(
+        path="{title}",
+        match="^blah",
+        matching_algorithm=MatchingModel.MATCH_REGEX,
+    )
+    return sp1, sp2, sp3
+
+
+@pytest.fixture()
+def tags(db) -> TagTuple:
+    """Tags covering the common matching scenarios."""
+    tag_first = TagFactory(match="first", matching_algorithm=Tag.MATCH_ANY)
+    tag_second = TagFactory(match="second", matching_algorithm=Tag.MATCH_ANY)
+    tag_inbox = TagFactory(is_inbox_tag=True)
+    tag_no_match = TagFactory()
+    tag_auto = TagFactory(matching_algorithm=Tag.MATCH_AUTO)
+    return tag_first, tag_second, tag_inbox, tag_no_match, tag_auto
+
+
+@pytest.fixture()
+def correspondents(db) -> CorrespondentTuple:
+    """Two correspondents matching 'first' and 'second' content."""
+    c_first = CorrespondentFactory(
+        match="first",
+        matching_algorithm=MatchingModel.MATCH_ANY,
+    )
+    c_second = CorrespondentFactory(
+        match="second",
+        matching_algorithm=MatchingModel.MATCH_ANY,
+    )
+    return c_first, c_second
+
+
+@pytest.fixture()
+def document_types(db) -> DocumentTypeTuple:
+    """Two document types matching 'first' and 'second' content."""
+    dt_first = DocumentTypeFactory(
+        match="first",
+        matching_algorithm=MatchingModel.MATCH_ANY,
+    )
+    dt_second = DocumentTypeFactory(
+        match="second",
+        matching_algorithm=MatchingModel.MATCH_ANY,
+    )
+    return dt_first, dt_second
+
+
+@pytest.fixture()
+def documents(storage_paths: StoragePathTuple, tags: TagTuple) -> DocumentTuple:
+    """Four documents with varied content used across most retagger tests."""
+    _, _, sp3 = storage_paths
+    _, _, tag_inbox, tag_no_match, tag_auto = tags
+
+    d1 = DocumentFactory(checksum="A", title="A", content="first document")
+    d2 = DocumentFactory(checksum="B", title="B", content="second document")
+    d3 = DocumentFactory(
+        checksum="C",
+        title="C",
+        content="unrelated document",
+        storage_path=sp3,
+    )
+    d4 = DocumentFactory(checksum="D", title="D", content="auto document")
+
+    d3.tags.add(tag_inbox, tag_no_match)
+    d4.tags.add(tag_auto)
+
+    return d1, d2, d3, d4
+
+
+def _get_docs() -> DocumentTuple:
+    return (
+        Document.objects.get(title="A"),
+        Document.objects.get(title="B"),
+        Document.objects.get(title="C"),
+        Document.objects.get(title="D"),
+    )
+
+
+# ---------------------------------------------------------------------------
+# Tag assignment
+# ---------------------------------------------------------------------------
+

@pytest.mark.management
-class TestRetagger(DirectoriesMixin, TestCase):
-    def make_models(self) -> None:
-        self.sp1 = StoragePath.objects.create(
-            name="dummy a",
-            path="{created_data}/{title}",
-            match="auto document",
-            matching_algorithm=StoragePath.MATCH_LITERAL,
-        )
-        self.sp2 = StoragePath.objects.create(
-            name="dummy b",
-            path="{title}",
-            match="^first|^unrelated",
-            matching_algorithm=StoragePath.MATCH_REGEX,
-        )
-
-        self.sp3 = StoragePath.objects.create(
-            name="dummy c",
-            path="{title}",
-            match="^blah",
-            matching_algorithm=StoragePath.MATCH_REGEX,
-        )
-
-        self.d1 = Document.objects.create(
-            checksum="A",
-            title="A",
-            content="first document",
-        )
-        self.d2 = Document.objects.create(
-            checksum="B",
-            title="B",
-            content="second document",
-        )
-        self.d3 = Document.objects.create(
-            checksum="C",
-            title="C",
-            content="unrelated document",
-            storage_path=self.sp3,
-        )
-        self.d4 = Document.objects.create(
-            checksum="D",
-            title="D",
-            content="auto document",
-        )
-
-        self.tag_first = Tag.objects.create(
-            name="tag1",
-            match="first",
-            matching_algorithm=Tag.MATCH_ANY,
-        )
-        self.tag_second = Tag.objects.create(
-            name="tag2",
-            match="second",
-            matching_algorithm=Tag.MATCH_ANY,
-        )
-        self.tag_inbox = Tag.objects.create(name="test", is_inbox_tag=True)
-        self.tag_no_match = Tag.objects.create(name="test2")
-        self.tag_auto = Tag.objects.create(
-            name="tagauto",
-            matching_algorithm=Tag.MATCH_AUTO,
-        )
-
-        self.d3.tags.add(self.tag_inbox)
-        self.d3.tags.add(self.tag_no_match)
-        self.d4.tags.add(self.tag_auto)
-
-        self.correspondent_first = Correspondent.objects.create(
-            name="c1",
-            match="first",
-            matching_algorithm=Correspondent.MATCH_ANY,
-        )
-        self.correspondent_second = Correspondent.objects.create(
-            name="c2",
-            match="second",
-            matching_algorithm=Correspondent.MATCH_ANY,
-        )
-
-        self.doctype_first = DocumentType.objects.create(
-            name="dt1",
-            match="first",
-            matching_algorithm=DocumentType.MATCH_ANY,
-        )
-        self.doctype_second = DocumentType.objects.create(
-            name="dt2",
-            match="second",
-            matching_algorithm=DocumentType.MATCH_ANY,
-        )
-
-    def get_updated_docs(self):
-        return (
-            Document.objects.get(title="A"),
-            Document.objects.get(title="B"),
-            Document.objects.get(title="C"),
-            Document.objects.get(title="D"),
-        )
-
-    def setUp(self) -> None:
-        super().setUp()
-        self.make_models()
-
-    def test_add_tags(self) -> None:
+@pytest.mark.django_db
+class TestRetaggerTags(DirectoriesMixin):
+    @pytest.mark.usefixtures("documents")
+    def test_add_tags(self, tags: TagTuple) -> None:
+        tag_first, tag_second, *_ = tags
        call_command("document_retagger", "--tags")
-        d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
+        d_first, d_second, d_unrelated, d_auto = _get_docs()

-        self.assertEqual(d_first.tags.count(), 1)
-        self.assertEqual(d_second.tags.count(), 1)
-        self.assertEqual(d_unrelated.tags.count(), 2)
-        self.assertEqual(d_auto.tags.count(), 1)
+        assert d_first.tags.count() == 1
+        assert d_second.tags.count() == 1
+        assert d_unrelated.tags.count() == 2
+        assert d_auto.tags.count() == 1
+        assert d_first.tags.first() == tag_first
+        assert d_second.tags.first() == tag_second

-        self.assertEqual(d_first.tags.first(), self.tag_first)
-        self.assertEqual(d_second.tags.first(), self.tag_second)
-
-    def test_add_type(self) -> None:
-        call_command("document_retagger", "--document_type")
-        d_first, d_second, _, _ = self.get_updated_docs()
-
-        self.assertEqual(d_first.document_type, self.doctype_first)
-        self.assertEqual(d_second.document_type, self.doctype_second)
-
-    def test_add_correspondent(self) -> None:
-        call_command("document_retagger", "--correspondent")
-        d_first, d_second, _, _ = self.get_updated_docs()
-
-        self.assertEqual(d_first.correspondent, self.correspondent_first)
-        self.assertEqual(d_second.correspondent, self.correspondent_second)
-
-    def test_overwrite_preserve_inbox(self) -> None:
-        self.d1.tags.add(self.tag_second)
+    def test_overwrite_removes_stale_tags_and_preserves_inbox(
+        self,
+        documents: DocumentTuple,
+        tags: TagTuple,
+    ) -> None:
+        d1, *_ = documents
+        tag_first, tag_second, tag_inbox, tag_no_match, _ = tags
+        d1.tags.add(tag_second)

        call_command("document_retagger", "--tags", "--overwrite")

-        d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
+        d_first, d_second, d_unrelated, d_auto = _get_docs()

-        self.assertIsNotNone(Tag.objects.get(id=self.tag_second.id))
+        assert Tag.objects.filter(id=tag_second.id).exists()
+        assert list(d_first.tags.values_list("id", flat=True)) == [tag_first.id]
+        assert list(d_second.tags.values_list("id", flat=True)) == [tag_second.id]
+        assert set(d_unrelated.tags.values_list("id", flat=True)) == {
+            tag_inbox.id,
+            tag_no_match.id,
+        }
+        assert d_auto.tags.count() == 0

-        self.assertCountEqual(
-            [tag.id for tag in d_first.tags.all()],
-            [self.tag_first.id],
+    @pytest.mark.usefixtures("documents")
+    @pytest.mark.parametrize(
+        "extra_args",
+        [
+            pytest.param([], id="no_base_url"),
+            pytest.param(["--base-url=http://localhost"], id="with_base_url"),
+        ],
+    )
+    def test_suggest_does_not_apply_tags(self, extra_args: list[str]) -> None:
+        call_command("document_retagger", "--tags", "--suggest", *extra_args)
+        d_first, d_second, _, d_auto = _get_docs()
+
+        assert d_first.tags.count() == 0
+        assert d_second.tags.count() == 0
+        assert d_auto.tags.count() == 1
+
+
+# ---------------------------------------------------------------------------
+# Document type assignment
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.management
+@pytest.mark.django_db
+class TestRetaggerDocumentType(DirectoriesMixin):
+    @pytest.mark.usefixtures("documents")
+    def test_add_type(self, document_types: DocumentTypeTuple) -> None:
+        dt_first, dt_second = document_types
+        call_command("document_retagger", "--document_type")
+        d_first, d_second, _, _ = _get_docs()
+
+        assert d_first.document_type == dt_first
+        assert d_second.document_type == dt_second
+
+    @pytest.mark.usefixtures("documents", "document_types")
+    @pytest.mark.parametrize(
+        "extra_args",
+        [
+            pytest.param([], id="no_base_url"),
+            pytest.param(["--base-url=http://localhost"], id="with_base_url"),
+        ],
+    )
+    def test_suggest_does_not_apply_document_type(self, extra_args: list[str]) -> None:
+        call_command("document_retagger", "--document_type", "--suggest", *extra_args)
+        d_first, d_second, _, _ = _get_docs()
+
+        assert d_first.document_type is None
+        assert d_second.document_type is None
+
+    @pytest.mark.parametrize(
+        ("use_first_flag", "expects_assignment"),
+        [
+            pytest.param(["--use-first"], True, id="use_first_assigns_first_match"),
+            pytest.param([], False, id="no_use_first_skips_ambiguous_match"),
+        ],
+    )
+    def test_use_first_with_multiple_matches(
+        self,
+        use_first_flag: list[str],
+        *,
+        expects_assignment: bool,
+    ) -> None:
+        DocumentTypeFactory(
+            match="ambiguous",
+            matching_algorithm=MatchingModel.MATCH_ANY,
        )
-        self.assertCountEqual(
-            [tag.id for tag in d_second.tags.all()],
-            [self.tag_second.id],
+        DocumentTypeFactory(
+            match="ambiguous",
+            matching_algorithm=MatchingModel.MATCH_ANY,
        )
-        self.assertCountEqual(
-            [tag.id for tag in d_unrelated.tags.all()],
-            [self.tag_inbox.id, self.tag_no_match.id],
+        doc = DocumentFactory(content="ambiguous content")
+
+        call_command("document_retagger", "--document_type", *use_first_flag)
+
+        doc.refresh_from_db()
+        assert (doc.document_type is not None) is expects_assignment
+
+
+# ---------------------------------------------------------------------------
+# Correspondent assignment
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.management
+@pytest.mark.django_db
+class TestRetaggerCorrespondent(DirectoriesMixin):
+    @pytest.mark.usefixtures("documents")
+    def test_add_correspondent(self, correspondents: CorrespondentTuple) -> None:
+        c_first, c_second = correspondents
+        call_command("document_retagger", "--correspondent")
+        d_first, d_second, _, _ = _get_docs()
+
+        assert d_first.correspondent == c_first
+        assert d_second.correspondent == c_second
+
+    @pytest.mark.usefixtures("documents", "correspondents")
+    @pytest.mark.parametrize(
+        "extra_args",
+        [
+            pytest.param([], id="no_base_url"),
+            pytest.param(["--base-url=http://localhost"], id="with_base_url"),
+        ],
+    )
+    def test_suggest_does_not_apply_correspondent(self, extra_args: list[str]) -> None:
+        call_command("document_retagger", "--correspondent", "--suggest", *extra_args)
+        d_first, d_second, _, _ = _get_docs()
+
+        assert d_first.correspondent is None
+        assert d_second.correspondent is None
+
+    @pytest.mark.parametrize(
+        ("use_first_flag", "expects_assignment"),
+        [
+            pytest.param(["--use-first"], True, id="use_first_assigns_first_match"),
+            pytest.param([], False, id="no_use_first_skips_ambiguous_match"),
+        ],
+    )
+    def test_use_first_with_multiple_matches(
+        self,
+        use_first_flag: list[str],
+        *,
+        expects_assignment: bool,
+    ) -> None:
+        CorrespondentFactory(
+            match="ambiguous",
+            matching_algorithm=MatchingModel.MATCH_ANY,
        )
-        self.assertEqual(d_auto.tags.count(), 0)
-
-    def test_add_tags_suggest(self) -> None:
-        call_command("document_retagger", "--tags", "--suggest")
-        d_first, d_second, _, d_auto = self.get_updated_docs()
-
-        self.assertEqual(d_first.tags.count(), 0)
-        self.assertEqual(d_second.tags.count(), 0)
-        self.assertEqual(d_auto.tags.count(), 1)
-
-    def test_add_type_suggest(self) -> None:
-        call_command("document_retagger", "--document_type", "--suggest")
-        d_first, d_second, _, _ = self.get_updated_docs()
-
-        self.assertIsNone(d_first.document_type)
-        self.assertIsNone(d_second.document_type)
-
-    def test_add_correspondent_suggest(self) -> None:
-        call_command("document_retagger", "--correspondent", "--suggest")
-        d_first, d_second, _, _ = self.get_updated_docs()
-
-        self.assertIsNone(d_first.correspondent)
-        self.assertIsNone(d_second.correspondent)
-
-    def test_add_tags_suggest_url(self) -> None:
-        call_command(
-            "document_retagger",
-            "--tags",
-            "--suggest",
-            "--base-url=http://localhost",
+        CorrespondentFactory(
+            match="ambiguous",
+            matching_algorithm=MatchingModel.MATCH_ANY,
        )
-        d_first, d_second, _, d_auto = self.get_updated_docs()
+        doc = DocumentFactory(content="ambiguous content")

-        self.assertEqual(d_first.tags.count(), 0)
-        self.assertEqual(d_second.tags.count(), 0)
-        self.assertEqual(d_auto.tags.count(), 1)
+        call_command("document_retagger", "--correspondent", *use_first_flag)

-    def test_add_type_suggest_url(self) -> None:
-        call_command(
-            "document_retagger",
-            "--document_type",
-            "--suggest",
-            "--base-url=http://localhost",
-        )
-        d_first, d_second, _, _ = self.get_updated_docs()
+        doc.refresh_from_db()
+        assert (doc.correspondent is not None) is expects_assignment

-        self.assertIsNone(d_first.document_type)
-        self.assertIsNone(d_second.document_type)

-    def test_add_correspondent_suggest_url(self) -> None:
-        call_command(
-            "document_retagger",
-            "--correspondent",
-            "--suggest",
-            "--base-url=http://localhost",
-        )
-        d_first, d_second, _, _ = self.get_updated_docs()
+# ---------------------------------------------------------------------------
+# Storage path assignment
+# ---------------------------------------------------------------------------

-        self.assertIsNone(d_first.correspondent)
-        self.assertIsNone(d_second.correspondent)

-    def test_add_storage_path(self) -> None:
+@pytest.mark.management
+@pytest.mark.django_db
+class TestRetaggerStoragePath(DirectoriesMixin):
+    @pytest.mark.usefixtures("documents")
+    def test_add_storage_path(self, storage_paths: StoragePathTuple) -> None:
        """
-        GIVEN:
-            - 2 storage paths with documents which match them
-            - 1 document which matches but has a storage path
-        WHEN:
-            - document retagger is called
-        THEN:
-            - Matching document's storage paths updated
-            - Non-matching documents have no storage path
-            - Existing storage patch left unchanged
+        GIVEN documents matching various storage path rules
+        WHEN document_retagger --storage_path is called
+        THEN matching documents get the correct path; existing path is unchanged
        """
-        call_command(
-            "document_retagger",
-            "--storage_path",
-        )
-        d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
+        sp1, sp2, sp3 = storage_paths
+        call_command("document_retagger", "--storage_path")
+        d_first, d_second, d_unrelated, d_auto = _get_docs()

-        self.assertEqual(d_first.storage_path, self.sp2)
-        self.assertEqual(d_auto.storage_path, self.sp1)
-        self.assertIsNone(d_second.storage_path)
-        self.assertEqual(d_unrelated.storage_path, self.sp3)
+        assert d_first.storage_path == sp2
+        assert d_auto.storage_path == sp1
+        assert d_second.storage_path is None
+        assert d_unrelated.storage_path == sp3

-    def test_overwrite_storage_path(self) -> None:
+    @pytest.mark.usefixtures("documents")
+    def test_overwrite_storage_path(self, storage_paths: StoragePathTuple) -> None:
        """
-        GIVEN:
-            - 2 storage paths with documents which match them
-            - 1 document which matches but has a storage path
-        WHEN:
-            - document retagger is called with overwrite
-        THEN:
-            - Matching document's storage paths updated
-            - Non-matching documents have no storage path
-            - Existing storage patch overwritten
+        GIVEN a document with an existing storage path that matches a different rule
+        WHEN document_retagger --storage_path --overwrite is called
+        THEN the existing path is replaced by the newly matched path
        """
+        sp1, sp2, _ = storage_paths
        call_command("document_retagger", "--storage_path", "--overwrite")
-        d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
+        d_first, d_second, d_unrelated, d_auto = _get_docs()

-        self.assertEqual(d_first.storage_path, self.sp2)
-        self.assertEqual(d_auto.storage_path, self.sp1)
-        self.assertIsNone(d_second.storage_path)
-        self.assertEqual(d_unrelated.storage_path, self.sp2)
+        assert d_first.storage_path == sp2
+        assert d_auto.storage_path == sp1
+        assert d_second.storage_path is None
+        assert d_unrelated.storage_path == sp2

-    def test_id_range_parameter(self) -> None:
-        commandOutput = ""
-        Document.objects.create(
-            checksum="E",
-            title="E",
-            content="NOT the first document",
+    @pytest.mark.parametrize(
+        ("use_first_flag", "expects_assignment"),
+        [
+            pytest.param(["--use-first"], True, id="use_first_assigns_first_match"),
+            pytest.param([], False, id="no_use_first_skips_ambiguous_match"),
+        ],
+    )
+    def test_use_first_with_multiple_matches(
+        self,
+        use_first_flag: list[str],
+        *,
+        expects_assignment: bool,
+    ) -> None:
+        StoragePathFactory(
+            match="ambiguous",
+            matching_algorithm=MatchingModel.MATCH_ANY,
        )
-        call_command("document_retagger", "--tags", "--id-range", "1", "2")
-        # The retagger shouldn`t apply the 'first' tag to our new document
-        self.assertEqual(Document.objects.filter(tags__id=self.tag_first.id).count(), 1)
+        StoragePathFactory(
+            match="ambiguous",
+            matching_algorithm=MatchingModel.MATCH_ANY,
+        )
+        doc = DocumentFactory(content="ambiguous content")

-        try:
-            commandOutput = call_command("document_retagger", "--tags", "--id-range")
-        except CommandError:
-            # Just ignore the error
-            None
-        self.assertIn(commandOutput, "Error: argument --id-range: expected 2 arguments")
+        call_command("document_retagger", "--storage_path", *use_first_flag)

-        try:
-            commandOutput = call_command(
-                "document_retagger",
-                "--tags",
-                "--id-range",
-                "a",
-                "b",
-            )
-        except CommandError:
-            # Just ignore the error
-            None
-        self.assertIn(commandOutput, "error: argument --id-range: invalid int value:")
+        doc.refresh_from_db()
+        assert (doc.storage_path is not None) is expects_assignment

-        call_command("document_retagger", "--tags", "--id-range", "1", "9999")
-        # Now we should have 2 documents
-        self.assertEqual(Document.objects.filter(tags__id=self.tag_first.id).count(), 2)
+
+# ---------------------------------------------------------------------------
+# ID range filtering
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.management
+@pytest.mark.django_db
+class TestRetaggerIdRange(DirectoriesMixin):
+    @pytest.mark.usefixtures("documents")
+    @pytest.mark.parametrize(
+        ("id_range_args", "expected_count"),
+        [
+            pytest.param(["1", "2"], 1, id="narrow_range_limits_scope"),
+            pytest.param(["1", "9999"], 2, id="wide_range_tags_all_matches"),
+        ],
+    )
+    def test_id_range_limits_scope(
+        self,
+        tags: TagTuple,
+        id_range_args: list[str],
+        expected_count: int,
+    ) -> None:
+        DocumentFactory(content="NOT the first document")
+        call_command("document_retagger", "--tags", "--id-range", *id_range_args)
+        tag_first, *_ = tags
+        assert Document.objects.filter(tags__id=tag_first.id).count() == expected_count
+
+    @pytest.mark.usefixtures("documents")
+    @pytest.mark.parametrize(
+        "args",
+        [
+            pytest.param(["--tags", "--id-range"], id="missing_both_values"),
+            pytest.param(["--tags", "--id-range", "a", "b"], id="non_integer_values"),
+        ],
+    )
+    def test_id_range_invalid_arguments_raise(self, args: list[str]) -> None:
+        with pytest.raises((CommandError, SystemExit)):
+            call_command("document_retagger", *args)
+
+
+# ---------------------------------------------------------------------------
+# Edge cases
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.management
+@pytest.mark.django_db
+class TestRetaggerEdgeCases(DirectoriesMixin):
+    @pytest.mark.usefixtures("documents")
+    def test_no_targets_exits_cleanly(self) -> None:
+        """Calling the retagger with no classifier targets should not raise."""
+        call_command("document_retagger")
+
+    @pytest.mark.usefixtures("documents")
+    def test_inbox_only_skips_non_inbox_documents(self) -> None:
+        """--inbox-only must restrict processing to documents with an inbox tag."""
+        call_command("document_retagger", "--tags", "--inbox-only")
+        d_first, _, d_unrelated, _ = _get_docs()
+
+        assert d_first.tags.count() == 0
+        assert d_unrelated.tags.count() == 2
--- a/src/documents/tests/test_sanity_check.py
+++ b/src/documents/tests/test_sanity_check.py
@@ -1,192 +1,295 @@
-import logging
-import shutil
-from pathlib import Path
+"""Tests for the sanity checker module.

-import filelock
-from django.conf import settings
-from django.test import TestCase
-from django.test import override_settings
+Tests exercise ``check_sanity`` as a whole, verifying document validation,
+orphan detection, task recording, and the iter_wrapper contract.
+"""
+
+from __future__ import annotations
+
+import logging
+from pathlib import Path
+from typing import TYPE_CHECKING
+
+import pytest

 from documents.models import Document
+from documents.models import PaperlessTask
 from documents.sanity_checker import check_sanity
-from documents.tests.utils import DirectoriesMixin
+
+if TYPE_CHECKING:
+    from collections.abc import Iterable
+
+    from documents.tests.conftest import PaperlessDirs


-class TestSanityCheck(DirectoriesMixin, TestCase):
-    def make_test_data(self):
-        with filelock.FileLock(settings.MEDIA_LOCK):
-            # just make sure that the lockfile is present.
-            shutil.copy(
-                (
-                    Path(__file__).parent
-                    / "samples"
-                    / "documents"
-                    / "originals"
-                    / "0000001.pdf"
-                ),
-                Path(self.dirs.originals_dir) / "0000001.pdf",
-            )
-            shutil.copy(
-                (
-                    Path(__file__).parent
-                    / "samples"
-                    / "documents"
-                    / "archive"
-                    / "0000001.pdf"
-                ),
-                Path(self.dirs.archive_dir) / "0000001.pdf",
-            )
-            shutil.copy(
-                (
-                    Path(__file__).parent
-                    / "samples"
-                    / "documents"
-                    / "thumbnails"
-                    / "0000001.webp"
-                ),
-                Path(self.dirs.thumbnail_dir) / "0000001.webp",
-            )
+@pytest.mark.django_db
+class TestCheckSanityNoDocuments:
+    """Sanity checks against an empty archive."""

-        return Document.objects.create(
-            title="test",
-            checksum="42995833e01aea9b3edee44bbfdd7ce1",
-            archive_checksum="62acb0bcbfbcaa62ca6ad3668e4e404b",
-            content="test",
-            pk=1,
-            filename="0000001.pdf",
-            mime_type="application/pdf",
-            archive_filename="0000001.pdf",
-        )
-
-    def assertSanityError(self, doc: Document, messageRegex) -> None:
+    @pytest.mark.usefixtures("_media_settings")
+    def test_no_documents(self) -> None:
        messages = check_sanity()
-        self.assertTrue(messages.has_error)
-        with self.assertLogs() as capture:
+        assert not messages.has_error
+        assert not messages.has_warning
+        assert messages.total_issue_count == 0
+
+    @pytest.mark.usefixtures("_media_settings")
+    def test_no_issues_logs_clean(self, caplog: pytest.LogCaptureFixture) -> None:
+        messages = check_sanity()
+        with caplog.at_level(logging.INFO, logger="paperless.sanity_checker"):
            messages.log_messages()
-            self.assertEqual(
-                capture.records[0].message,
-                f"Detected following issue(s) with document #{doc.pk}, titled {doc.title}",
-            )
-            self.assertRegex(capture.records[1].message, messageRegex)
+        assert "Sanity checker detected no issues." in caplog.text

-    def test_no_issues(self) -> None:
-        self.make_test_data()
+
+@pytest.mark.django_db
+class TestCheckSanityHealthyDocument:
+    def test_no_errors(self, sample_doc: Document) -> None:
        messages = check_sanity()
-        self.assertFalse(messages.has_error)
-        self.assertFalse(messages.has_warning)
-        with self.assertLogs() as capture:
-            messages.log_messages()
-            self.assertEqual(len(capture.output), 1)
-            self.assertEqual(capture.records[0].levelno, logging.INFO)
-            self.assertEqual(
-                capture.records[0].message,
-                "Sanity checker detected no issues.",
-            )
+        assert not messages.has_error
+        assert not messages.has_warning
+        assert messages.total_issue_count == 0

-    def test_no_docs(self) -> None:
-        self.assertEqual(len(check_sanity()), 0)

-    def test_success(self) -> None:
-        self.make_test_data()
-        self.assertEqual(len(check_sanity()), 0)
-
-    def test_no_thumbnail(self) -> None:
-        doc = self.make_test_data()
-        Path(doc.thumbnail_path).unlink()
-        self.assertSanityError(doc, "Thumbnail of document does not exist")
-
-    def test_thumbnail_no_access(self) -> None:
-        doc = self.make_test_data()
-        Path(doc.thumbnail_path).chmod(0o000)
-        self.assertSanityError(doc, "Cannot read thumbnail file of document")
-        Path(doc.thumbnail_path).chmod(0o777)
-
-    def test_no_original(self) -> None:
-        doc = self.make_test_data()
-        Path(doc.source_path).unlink()
-        self.assertSanityError(doc, "Original of document does not exist.")
-
-    def test_original_no_access(self) -> None:
-        doc = self.make_test_data()
-        Path(doc.source_path).chmod(0o000)
-        self.assertSanityError(doc, "Cannot read original file of document")
-        Path(doc.source_path).chmod(0o777)
-
-    def test_original_checksum_mismatch(self) -> None:
-        doc = self.make_test_data()
-        doc.checksum = "WOW"
-        doc.save()
-        self.assertSanityError(doc, "Checksum mismatch. Stored: WOW, actual: ")
-
-    def test_no_archive(self) -> None:
-        doc = self.make_test_data()
-        Path(doc.archive_path).unlink()
-        self.assertSanityError(doc, "Archived version of document does not exist.")
-
-    def test_archive_no_access(self) -> None:
-        doc = self.make_test_data()
-        Path(doc.archive_path).chmod(0o000)
-        self.assertSanityError(doc, "Cannot read archive file of document")
-        Path(doc.archive_path).chmod(0o777)
-
-    def test_archive_checksum_mismatch(self) -> None:
-        doc = self.make_test_data()
-        doc.archive_checksum = "WOW"
-        doc.save()
-        self.assertSanityError(doc, "Checksum mismatch of archived document")
-
-    def test_empty_content(self) -> None:
-        doc = self.make_test_data()
-        doc.content = ""
-        doc.save()
+@pytest.mark.django_db
+class TestCheckSanityThumbnail:
+    def test_missing(self, sample_doc: Document) -> None:
+        Path(sample_doc.thumbnail_path).unlink()
        messages = check_sanity()
-        self.assertFalse(messages.has_error)
-        self.assertFalse(messages.has_warning)
-        self.assertEqual(len(messages), 1)
-        self.assertRegex(
-            messages[doc.pk][0]["message"],
-            "Document contains no OCR data",
+        assert messages.has_error
+        assert any(
+            "Thumbnail of document does not exist" in m["message"]
+            for m in messages[sample_doc.pk]
        )

-    def test_orphaned_file(self) -> None:
-        self.make_test_data()
-        Path(self.dirs.originals_dir, "orphaned").touch()
+    def test_unreadable(self, sample_doc: Document) -> None:
+        thumb = Path(sample_doc.thumbnail_path)
+        thumb.chmod(0o000)
+        try:
+            messages = check_sanity()
+            assert messages.has_error
+            assert any(
+                "Cannot read thumbnail" in m["message"] for m in messages[sample_doc.pk]
+            )
+        finally:
+            thumb.chmod(0o644)
+
+
+@pytest.mark.django_db
+class TestCheckSanityOriginal:
+    def test_missing(self, sample_doc: Document) -> None:
+        Path(sample_doc.source_path).unlink()
        messages = check_sanity()
-        self.assertTrue(messages.has_warning)
-        self.assertRegex(
-            messages._messages[None][0]["message"],
-            "Orphaned file in media dir",
+        assert messages.has_error
+        assert any(
+            "Original of document does not exist" in m["message"]
+            for m in messages[sample_doc.pk]
        )

-    @override_settings(
-        APP_LOGO="logo/logo.png",
+    def test_checksum_mismatch(self, sample_doc: Document) -> None:
+        sample_doc.checksum = "badhash"
+        sample_doc.save()
+        messages = check_sanity()
+        assert messages.has_error
+        assert any(
+            "Checksum mismatch" in m["message"] and "badhash" in m["message"]
+            for m in messages[sample_doc.pk]
+        )
+
+    def test_unreadable(self, sample_doc: Document) -> None:
+        src = Path(sample_doc.source_path)
+        src.chmod(0o000)
+        try:
+            messages = check_sanity()
+            assert messages.has_error
+            assert any(
+                "Cannot read original" in m["message"] for m in messages[sample_doc.pk]
+            )
+        finally:
+            src.chmod(0o644)
+
+
+@pytest.mark.django_db
+class TestCheckSanityArchive:
+    def test_checksum_without_filename(self, sample_doc: Document) -> None:
+        sample_doc.archive_filename = None
+        sample_doc.save()
+        messages = check_sanity()
+        assert messages.has_error
+        assert any(
+            "checksum, but no archive filename" in m["message"]
+            for m in messages[sample_doc.pk]
+        )
+
+    def test_filename_without_checksum(self, sample_doc: Document) -> None:
+        sample_doc.archive_checksum = None
+        sample_doc.save()
+        messages = check_sanity()
+        assert messages.has_error
+        assert any(
+            "checksum is missing" in m["message"] for m in messages[sample_doc.pk]
+        )
+
+    def test_missing_file(self, sample_doc: Document) -> None:
+        Path(sample_doc.archive_path).unlink()
+        messages = check_sanity()
+        assert messages.has_error
+        assert any(
+            "Archived version of document does not exist" in m["message"]
+            for m in messages[sample_doc.pk]
+        )
+
+    def test_checksum_mismatch(self, sample_doc: Document) -> None:
+        sample_doc.archive_checksum = "wronghash"
+        sample_doc.save()
+        messages = check_sanity()
+        assert messages.has_error
+        assert any(
+            "Checksum mismatch of archived document" in m["message"]
+            for m in messages[sample_doc.pk]
+        )
+
+    def test_unreadable(self, sample_doc: Document) -> None:
+        archive = Path(sample_doc.archive_path)
+        archive.chmod(0o000)
+        try:
+            messages = check_sanity()
+            assert messages.has_error
+            assert any(
+                "Cannot read archive" in m["message"] for m in messages[sample_doc.pk]
+            )
+        finally:
+            archive.chmod(0o644)
+
+    def test_no_archive_at_all(self, sample_doc: Document) -> None:
+        """Document with neither archive checksum nor filename is valid."""
+        Path(sample_doc.archive_path).unlink()
+        sample_doc.archive_checksum = None
+        sample_doc.archive_filename = None
+        sample_doc.save()
+        messages = check_sanity()
+        assert not messages.has_error
+
+
+@pytest.mark.django_db
+class TestCheckSanityContent:
+    @pytest.mark.parametrize(
+        "content",
+        [
+            pytest.param("", id="empty-string"),
+        ],
    )
-    def test_ignore_logo(self) -> None:
-        self.make_test_data()
-        logo_dir = Path(self.dirs.media_dir, "logo")
-        logo_dir.mkdir(parents=True, exist_ok=True)
-        Path(self.dirs.media_dir, "logo", "logo.png").touch()
+    def test_no_content(self, sample_doc: Document, content: str) -> None:
+        sample_doc.content = content
+        sample_doc.save()
        messages = check_sanity()
-        self.assertFalse(messages.has_warning)
+        assert not messages.has_error
+        assert not messages.has_warning
+        assert any("no OCR data" in m["message"] for m in messages[sample_doc.pk])

-    def test_ignore_ignorable_files(self) -> None:
-        self.make_test_data()
-        Path(self.dirs.media_dir, ".DS_Store").touch()
-        Path(self.dirs.media_dir, "desktop.ini").touch()
+
+@pytest.mark.django_db
+class TestCheckSanityOrphans:
+    def test_orphaned_file(
+        self,
+        sample_doc: Document,
+        paperless_dirs: PaperlessDirs,
+    ) -> None:
+        (paperless_dirs.originals / "orphan.pdf").touch()
        messages = check_sanity()
-        self.assertFalse(messages.has_warning)
+        assert messages.has_warning
+        assert any("Orphaned file" in m["message"] for m in messages[None])

-    def test_archive_filename_no_checksum(self) -> None:
-        doc = self.make_test_data()
-        doc.archive_checksum = None
-        doc.save()
-        self.assertSanityError(doc, "has an archive file, but its checksum is missing.")
+    @pytest.mark.usefixtures("_media_settings")
+    def test_ignorable_files_not_flagged(
+        self,
+        paperless_dirs: PaperlessDirs,
+    ) -> None:
+        (paperless_dirs.media / ".DS_Store").touch()
+        (paperless_dirs.media / "desktop.ini").touch()
+        messages = check_sanity()
+        assert not messages.has_warning

-    def test_archive_checksum_no_filename(self) -> None:
-        doc = self.make_test_data()
-        doc.archive_filename = None
-        doc.save()
-        self.assertSanityError(
-            doc,
-            "has an archive file checksum, but no archive filename.",
-        )
+
+@pytest.mark.django_db
+class TestCheckSanityIterWrapper:
+    def test_wrapper_receives_documents(self, sample_doc: Document) -> None:
+        seen: list[Document] = []
+
+        def tracking(iterable: Iterable[Document]) -> Iterable[Document]:
+            for item in iterable:
+                seen.append(item)
+                yield item
+
+        check_sanity(iter_wrapper=tracking)
+        assert len(seen) == 1
+        assert seen[0].pk == sample_doc.pk
+
+    def test_default_works_without_wrapper(self, sample_doc: Document) -> None:
+        messages = check_sanity()
+        assert not messages.has_error
+
+
+@pytest.mark.django_db
+class TestCheckSanityTaskRecording:
+    @pytest.mark.parametrize(
+        ("expected_type", "scheduled"),
+        [
+            pytest.param(PaperlessTask.TaskType.SCHEDULED_TASK, True, id="scheduled"),
+            pytest.param(PaperlessTask.TaskType.MANUAL_TASK, False, id="manual"),
+        ],
+    )
+    @pytest.mark.usefixtures("_media_settings")
+    def test_task_type(self, expected_type: str, *, scheduled: bool) -> None:
+        check_sanity(scheduled=scheduled)
+        task = PaperlessTask.objects.latest("date_created")
+        assert task.task_name == PaperlessTask.TaskName.CHECK_SANITY
+        assert task.type == expected_type
+
+    def test_success_status(self, sample_doc: Document) -> None:
+        check_sanity()
+        task = PaperlessTask.objects.latest("date_created")
+        assert task.status == "SUCCESS"
+
+    def test_failure_status(self, sample_doc: Document) -> None:
+        Path(sample_doc.source_path).unlink()
+        check_sanity()
+        task = PaperlessTask.objects.latest("date_created")
+        assert task.status == "FAILURE"
+        assert "Check logs for details" in task.result
+
+
+@pytest.mark.django_db
+class TestCheckSanityLogMessages:
+    def test_logs_doc_issues(
+        self,
+        sample_doc: Document,
+        caplog: pytest.LogCaptureFixture,
+    ) -> None:
+        Path(sample_doc.source_path).unlink()
+        messages = check_sanity()
+        with caplog.at_level(logging.INFO, logger="paperless.sanity_checker"):
+            messages.log_messages()
+        assert f"document #{sample_doc.pk}" in caplog.text
+        assert "Original of document does not exist" in caplog.text
+
+    def test_logs_global_issues(
+        self,
+        sample_doc: Document,
+        paperless_dirs: PaperlessDirs,
+        caplog: pytest.LogCaptureFixture,
+    ) -> None:
+        (paperless_dirs.originals / "orphan.pdf").touch()
+        messages = check_sanity()
+        with caplog.at_level(logging.WARNING, logger="paperless.sanity_checker"):
+            messages.log_messages()
+        assert "Orphaned file" in caplog.text
+
+    @pytest.mark.usefixtures("_media_settings")
+    def test_logs_unknown_doc_pk(self, caplog: pytest.LogCaptureFixture) -> None:
+        """A doc PK not in the DB logs 'Unknown' as the title."""
+        messages = check_sanity()
+        messages.error(99999, "Ghost document")
+        with caplog.at_level(logging.INFO, logger="paperless.sanity_checker"):
+            messages.log_messages()
+        assert "#99999" in caplog.text
+        assert "Unknown" in caplog.text
--- a/src/documents/tests/test_tasks.py
+++ b/src/documents/tests/test_tasks.py
@@ -3,6 +3,7 @@ from datetime import timedelta
 from pathlib import Path
 from unittest import mock

+import pytest
 from celery import states
 from django.conf import settings
 from django.test import TestCase
@@ -105,55 +106,83 @@ class TestClassifier(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
            self.assertNotEqual(mtime2, mtime3)


-class TestSanityCheck(DirectoriesMixin, TestCase):
-    @mock.patch("documents.tasks.sanity_checker.check_sanity")
-    def test_sanity_check_success(self, m) -> None:
-        m.return_value = SanityCheckMessages()
-        self.assertEqual(tasks.sanity_check(), "No issues detected.")
-        m.assert_called_once()
+@pytest.mark.django_db
+class TestSanityCheck:
+    @pytest.fixture
+    def mock_check_sanity(self, mocker) -> mock.MagicMock:
+        return mocker.patch("documents.tasks.sanity_checker.check_sanity")

-    @mock.patch("documents.tasks.sanity_checker.check_sanity")
-    def test_sanity_check_error(self, m) -> None:
-        messages = SanityCheckMessages()
-        messages.error(None, "Some error")
-        m.return_value = messages
-        self.assertRaises(SanityCheckFailedException, tasks.sanity_check)
-        m.assert_called_once()
+    def test_sanity_check_success(self, mock_check_sanity: mock.MagicMock) -> None:
+        mock_check_sanity.return_value = SanityCheckMessages()
+        assert tasks.sanity_check() == "No issues detected."
+        mock_check_sanity.assert_called_once()

-    @mock.patch("documents.tasks.sanity_checker.check_sanity")
-    def test_sanity_check_error_no_raise(self, m) -> None:
+    def test_sanity_check_error_raises(
+        self,
+        mock_check_sanity: mock.MagicMock,
+        sample_doc: Document,
+    ) -> None:
        messages = SanityCheckMessages()
-        messages.error(None, "Some error")
-        m.return_value = messages
-        # No exception should be raised
+        messages.error(sample_doc.pk, "some error")
+        mock_check_sanity.return_value = messages
+        with pytest.raises(SanityCheckFailedException):
+            tasks.sanity_check()
+        mock_check_sanity.assert_called_once()
+
+    def test_sanity_check_error_no_raise(
+        self,
+        mock_check_sanity: mock.MagicMock,
+        sample_doc: Document,
+    ) -> None:
+        messages = SanityCheckMessages()
+        messages.error(sample_doc.pk, "some error")
+        mock_check_sanity.return_value = messages
        result = tasks.sanity_check(raise_on_error=False)
-        self.assertEqual(
-            result,
-            "Sanity check exited with errors. See log.",
-        )
-        m.assert_called_once()
+        assert "1 document(s) with errors" in result
+        assert "Check logs for details." in result
+        mock_check_sanity.assert_called_once()

-    @mock.patch("documents.tasks.sanity_checker.check_sanity")
-    def test_sanity_check_warning(self, m) -> None:
+    def test_sanity_check_warning_only(
+        self,
+        mock_check_sanity: mock.MagicMock,
+    ) -> None:
        messages = SanityCheckMessages()
-        messages.warning(None, "Some warning")
-        m.return_value = messages
-        self.assertEqual(
-            tasks.sanity_check(),
-            "Sanity check exited with warnings. See log.",
-        )
-        m.assert_called_once()
+        messages.warning(None, "extra file")
+        mock_check_sanity.return_value = messages
+        result = tasks.sanity_check()
+        assert result == "1 global warning(s) found."
+        mock_check_sanity.assert_called_once()

-    @mock.patch("documents.tasks.sanity_checker.check_sanity")
-    def test_sanity_check_info(self, m) -> None:
+    def test_sanity_check_info_only(
+        self,
+        mock_check_sanity: mock.MagicMock,
+        sample_doc: Document,
+    ) -> None:
        messages = SanityCheckMessages()
-        messages.info(None, "Some info")
-        m.return_value = messages
-        self.assertEqual(
-            tasks.sanity_check(),
-            "Sanity check exited with infos. See log.",
-        )
-        m.assert_called_once()
+        messages.info(sample_doc.pk, "some info")
+        mock_check_sanity.return_value = messages
+        result = tasks.sanity_check()
+        assert result == "1 document(s) with infos found."
+        mock_check_sanity.assert_called_once()
+
+    def test_sanity_check_errors_warnings_and_infos(
+        self,
+        mock_check_sanity: mock.MagicMock,
+        sample_doc: Document,
+    ) -> None:
+        messages = SanityCheckMessages()
+        messages.error(sample_doc.pk, "broken")
+        messages.warning(sample_doc.pk, "odd")
+        messages.info(sample_doc.pk, "fyi")
+        messages.warning(None, "extra file")
+        mock_check_sanity.return_value = messages
+        result = tasks.sanity_check(raise_on_error=False)
+        assert "1 document(s) with errors" in result
+        assert "1 document(s) with warnings" in result
+        assert "1 document(s) with infos" in result
+        assert "1 global warning(s)" in result
+        assert "Check logs for details." in result
+        mock_check_sanity.assert_called_once()


 class TestBulkUpdate(DirectoriesMixin, TestCase):
--- a/src/paperless/tests/test_adapter.py
+++ b/src/paperless/tests/test_adapter.py
@@ -1,107 +1,100 @@
-from unittest import mock
+import logging

+import pytest
 from allauth.account.adapter import get_adapter
 from allauth.core import context
 from allauth.socialaccount.adapter import get_adapter as get_social_adapter
-from django.conf import settings
 from django.contrib.auth.models import AnonymousUser
 from django.contrib.auth.models import Group
 from django.contrib.auth.models import User
 from django.forms import ValidationError
 from django.http import HttpRequest
-from django.test import TestCase
-from django.test import override_settings
 from django.urls import reverse
+from pytest_django.fixtures import SettingsWrapper
+from pytest_mock import MockerFixture
 from rest_framework.authtoken.models import Token

 from paperless.adapter import DrfTokenStrategy


-class TestCustomAccountAdapter(TestCase):
-    def test_is_open_for_signup(self) -> None:
+@pytest.mark.django_db
+class TestCustomAccountAdapter:
+    def test_is_open_for_signup(self, settings: SettingsWrapper) -> None:
        adapter = get_adapter()

        # With no accounts, signups should be allowed
-        self.assertTrue(adapter.is_open_for_signup(None))
+        assert adapter.is_open_for_signup(None)

        User.objects.create_user("testuser")

-        # Test when ACCOUNT_ALLOW_SIGNUPS is True
        settings.ACCOUNT_ALLOW_SIGNUPS = True
-        self.assertTrue(adapter.is_open_for_signup(None))
+        assert adapter.is_open_for_signup(None)

-        # Test when ACCOUNT_ALLOW_SIGNUPS is False
        settings.ACCOUNT_ALLOW_SIGNUPS = False
-        self.assertFalse(adapter.is_open_for_signup(None))
+        assert not adapter.is_open_for_signup(None)

-    def test_is_safe_url(self) -> None:
+    def test_is_safe_url(self, settings: SettingsWrapper) -> None:
        request = HttpRequest()
-        request.get_host = mock.Mock(return_value="example.com")
+        request.get_host = lambda: "example.com"
        with context.request_context(request):
            adapter = get_adapter()
-            with override_settings(ALLOWED_HOSTS=["*"]):
-                # True because request host is same
-                url = "https://example.com"
-                self.assertTrue(adapter.is_safe_url(url))

-            url = "https://evil.com"
+            settings.ALLOWED_HOSTS = ["*"]
+            # True because request host is same
+            assert adapter.is_safe_url("https://example.com")
            # False despite wildcard because request host is different
-            self.assertFalse(adapter.is_safe_url(url))
+            assert not adapter.is_safe_url("https://evil.com")

            settings.ALLOWED_HOSTS = ["example.com"]
-            url = "https://example.com"
            # True because request host is same
-            self.assertTrue(adapter.is_safe_url(url))
+            assert adapter.is_safe_url("https://example.com")

            settings.ALLOWED_HOSTS = ["*", "example.com"]
-            url = "//evil.com"
            # False because request host is not in allowed hosts
-            self.assertFalse(adapter.is_safe_url(url))
+            assert not adapter.is_safe_url("//evil.com")

-    @mock.patch("allauth.core.internal.ratelimit.consume", return_value=True)
-    def test_pre_authenticate(self, mock_consume) -> None:
+    def test_pre_authenticate(
+        self,
+        settings: SettingsWrapper,
+        mocker: MockerFixture,
+    ) -> None:
+        mocker.patch("allauth.core.internal.ratelimit.consume", return_value=True)
        adapter = get_adapter()
        request = HttpRequest()
-        request.get_host = mock.Mock(return_value="example.com")
+        request.get_host = lambda: "example.com"

        settings.DISABLE_REGULAR_LOGIN = False
        adapter.pre_authenticate(request)

        settings.DISABLE_REGULAR_LOGIN = True
-        with self.assertRaises(ValidationError):
+        with pytest.raises(ValidationError):
            adapter.pre_authenticate(request)

-    def test_get_reset_password_from_key_url(self) -> None:
+    def test_get_reset_password_from_key_url(self, settings: SettingsWrapper) -> None:
        request = HttpRequest()
-        request.get_host = mock.Mock(return_value="foo.org")
+        request.get_host = lambda: "foo.org"
        with context.request_context(request):
            adapter = get_adapter()

-            # Test when PAPERLESS_URL is None
-            with override_settings(
-                PAPERLESS_URL=None,
-                ACCOUNT_DEFAULT_HTTP_PROTOCOL="https",
-            ):
-                expected_url = f"https://foo.org{reverse('account_reset_password_from_key', kwargs={'uidb36': 'UID', 'key': 'KEY'})}"
-                self.assertEqual(
-                    adapter.get_reset_password_from_key_url("UID-KEY"),
-                    expected_url,
-                )
+            settings.PAPERLESS_URL = None
+            settings.ACCOUNT_DEFAULT_HTTP_PROTOCOL = "https"
+            expected_url = f"https://foo.org{reverse('account_reset_password_from_key', kwargs={'uidb36': 'UID', 'key': 'KEY'})}"
+            assert adapter.get_reset_password_from_key_url("UID-KEY") == expected_url

-            # Test when PAPERLESS_URL is not None
-            with override_settings(PAPERLESS_URL="https://bar.com"):
-                expected_url = f"https://bar.com{reverse('account_reset_password_from_key', kwargs={'uidb36': 'UID', 'key': 'KEY'})}"
-                self.assertEqual(
-                    adapter.get_reset_password_from_key_url("UID-KEY"),
-                    expected_url,
-                )
+            settings.PAPERLESS_URL = "https://bar.com"
+            expected_url = f"https://bar.com{reverse('account_reset_password_from_key', kwargs={'uidb36': 'UID', 'key': 'KEY'})}"
+            assert adapter.get_reset_password_from_key_url("UID-KEY") == expected_url

-    @override_settings(ACCOUNT_DEFAULT_GROUPS=["group1", "group2"])
-    def test_save_user_adds_groups(self) -> None:
+    def test_save_user_adds_groups(
+        self,
+        settings: SettingsWrapper,
+        mocker: MockerFixture,
+    ) -> None:
+        settings.ACCOUNT_DEFAULT_GROUPS = ["group1", "group2"]
        Group.objects.create(name="group1")
        user = User.objects.create_user("testuser")
        adapter = get_adapter()
-        form = mock.Mock(
+        form = mocker.MagicMock(
            cleaned_data={
                "username": "testuser",
                "email": "user@example.com",
@@ -110,88 +103,81 @@ class TestCustomAccountAdapter(TestCase):

        user = adapter.save_user(HttpRequest(), user, form, commit=True)

-        self.assertEqual(user.groups.count(), 1)
-        self.assertTrue(user.groups.filter(name="group1").exists())
-        self.assertFalse(user.groups.filter(name="group2").exists())
+        assert user.groups.count() == 1
+        assert user.groups.filter(name="group1").exists()
+        assert not user.groups.filter(name="group2").exists()

-    def test_fresh_install_save_creates_superuser(self) -> None:
+    def test_fresh_install_save_creates_superuser(self, mocker: MockerFixture) -> None:
        adapter = get_adapter()
-        form = mock.Mock(
+        form = mocker.MagicMock(
            cleaned_data={
                "username": "testuser",
                "email": "user@paperless-ngx.com",
            },
        )
        user = adapter.save_user(HttpRequest(), User(), form, commit=True)
-        self.assertTrue(user.is_superuser)
+        assert user.is_superuser

-        # Next time, it should not create a superuser
-        form = mock.Mock(
+        form = mocker.MagicMock(
            cleaned_data={
                "username": "testuser2",
                "email": "user2@paperless-ngx.com",
            },
        )
        user2 = adapter.save_user(HttpRequest(), User(), form, commit=True)
-        self.assertFalse(user2.is_superuser)
+        assert not user2.is_superuser


-class TestCustomSocialAccountAdapter(TestCase):
-    def test_is_open_for_signup(self) -> None:
+class TestCustomSocialAccountAdapter:
+    @pytest.mark.django_db
+    def test_is_open_for_signup(self, settings: SettingsWrapper) -> None:
        adapter = get_social_adapter()

-        # Test when SOCIALACCOUNT_ALLOW_SIGNUPS is True
        settings.SOCIALACCOUNT_ALLOW_SIGNUPS = True
-        self.assertTrue(adapter.is_open_for_signup(None, None))
+        assert adapter.is_open_for_signup(None, None)

-        # Test when SOCIALACCOUNT_ALLOW_SIGNUPS is False
        settings.SOCIALACCOUNT_ALLOW_SIGNUPS = False
-        self.assertFalse(adapter.is_open_for_signup(None, None))
+        assert not adapter.is_open_for_signup(None, None)

    def test_get_connect_redirect_url(self) -> None:
        adapter = get_social_adapter()
-        request = None
-        socialaccount = None
+        assert adapter.get_connect_redirect_url(None, None) == reverse("base")

-        # Test the default URL
-        expected_url = reverse("base")
-        self.assertEqual(
-            adapter.get_connect_redirect_url(request, socialaccount),
-            expected_url,
-        )
-
-    @override_settings(SOCIAL_ACCOUNT_DEFAULT_GROUPS=["group1", "group2"])
-    def test_save_user_adds_groups(self) -> None:
+    @pytest.mark.django_db
+    def test_save_user_adds_groups(
+        self,
+        settings: SettingsWrapper,
+        mocker: MockerFixture,
+    ) -> None:
+        settings.SOCIAL_ACCOUNT_DEFAULT_GROUPS = ["group1", "group2"]
        Group.objects.create(name="group1")
        adapter = get_social_adapter()
-        request = HttpRequest()
        user = User.objects.create_user("testuser")
-        sociallogin = mock.Mock(
-            user=user,
-        )
+        sociallogin = mocker.MagicMock(user=user)

-        user = adapter.save_user(request, sociallogin, None)
+        user = adapter.save_user(HttpRequest(), sociallogin, None)

-        self.assertEqual(user.groups.count(), 1)
-        self.assertTrue(user.groups.filter(name="group1").exists())
-        self.assertFalse(user.groups.filter(name="group2").exists())
+        assert user.groups.count() == 1
+        assert user.groups.filter(name="group1").exists()
+        assert not user.groups.filter(name="group2").exists()

-    def test_error_logged_on_authentication_error(self) -> None:
+    def test_error_logged_on_authentication_error(
+        self,
+        caplog: pytest.LogCaptureFixture,
+    ) -> None:
        adapter = get_social_adapter()
-        request = HttpRequest()
-        with self.assertLogs("paperless.auth", level="INFO") as log_cm:
+        with caplog.at_level(logging.INFO, logger="paperless.auth"):
            adapter.on_authentication_error(
-                request,
+                HttpRequest(),
                provider="test-provider",
                error="Error",
                exception="Test authentication error",
            )
-        self.assertTrue(
-            any("Test authentication error" in message for message in log_cm.output),
-        )
+        assert any("Test authentication error" in msg for msg in caplog.messages)


-class TestDrfTokenStrategy(TestCase):
+@pytest.mark.django_db
+class TestDrfTokenStrategy:
    def test_create_access_token_creates_new_token(self) -> None:
        """
        GIVEN:
@@ -201,7 +187,6 @@ class TestDrfTokenStrategy(TestCase):
        THEN:
            - A new token is created and its key is returned
        """
-
        user = User.objects.create_user("testuser")
        request = HttpRequest()
        request.user = user
@@ -209,13 +194,9 @@ class TestDrfTokenStrategy(TestCase):
        strategy = DrfTokenStrategy()
        token_key = strategy.create_access_token(request)

-        # Verify a token was created
-        self.assertIsNotNone(token_key)
-        self.assertTrue(Token.objects.filter(user=user).exists())
-
-        # Verify the returned key matches the created token
-        token = Token.objects.get(user=user)
-        self.assertEqual(token_key, token.key)
+        assert token_key is not None
+        assert Token.objects.filter(user=user).exists()
+        assert token_key == Token.objects.get(user=user).key

    def test_create_access_token_returns_existing_token(self) -> None:
        """
@@ -226,7 +207,6 @@ class TestDrfTokenStrategy(TestCase):
        THEN:
            - The same token key is returned (no new token created)
        """
-
        user = User.objects.create_user("testuser")
        existing_token = Token.objects.create(user=user)

@@ -236,11 +216,8 @@ class TestDrfTokenStrategy(TestCase):
        strategy = DrfTokenStrategy()
        token_key = strategy.create_access_token(request)

-        # Verify the existing token key is returned
-        self.assertEqual(token_key, existing_token.key)
-
-        # Verify only one token exists (no duplicate created)
-        self.assertEqual(Token.objects.filter(user=user).count(), 1)
+        assert token_key == existing_token.key
+        assert Token.objects.filter(user=user).count() == 1

    def test_create_access_token_returns_none_for_unauthenticated_user(self) -> None:
        """
@@ -251,12 +228,11 @@ class TestDrfTokenStrategy(TestCase):
        THEN:
            - None is returned and no token is created
        """
-
        request = HttpRequest()
        request.user = AnonymousUser()

        strategy = DrfTokenStrategy()
        token_key = strategy.create_access_token(request)

-        self.assertIsNone(token_key)
-        self.assertEqual(Token.objects.count(), 0)
+        assert token_key is None
+        assert Token.objects.count() == 0
--- a/src/paperless/tests/test_checks.py
+++ b/src/paperless/tests/test_checks.py
@@ -1,15 +1,12 @@
 import os
+from dataclasses import dataclass
 from pathlib import Path
-from unittest import mock

 import pytest
 from django.core.checks import Warning
-from django.test import TestCase
-from django.test import override_settings
+from pytest_django.fixtures import SettingsWrapper
 from pytest_mock import MockerFixture

-from documents.tests.utils import DirectoriesMixin
-from documents.tests.utils import FileSystemAssertsMixin
 from paperless.checks import audit_log_check
 from paperless.checks import binaries_check
 from paperless.checks import check_deprecated_db_settings
@@ -18,54 +15,84 @@ from paperless.checks import paths_check
 from paperless.checks import settings_values_check


-class TestChecks(DirectoriesMixin, TestCase):
-    def test_binaries(self) -> None:
-        self.assertEqual(binaries_check(None), [])
+@dataclass(frozen=True, slots=True)
+class PaperlessTestDirs:
+    data_dir: Path
+    media_dir: Path
+    consumption_dir: Path

-    @override_settings(CONVERT_BINARY="uuuhh")
-    def test_binaries_fail(self) -> None:
-        self.assertEqual(len(binaries_check(None)), 1)

-    def test_paths_check(self) -> None:
-        self.assertEqual(paths_check(None), [])
+# TODO: consolidate with documents/tests/conftest.py PaperlessDirs/paperless_dirs
+#       once the paperless and documents test suites are ready to share fixtures.
+@pytest.fixture()
+def directories(tmp_path: Path, settings: SettingsWrapper) -> PaperlessTestDirs:
+    data_dir = tmp_path / "data"
+    media_dir = tmp_path / "media"
+    consumption_dir = tmp_path / "consumption"

-    @override_settings(
-        MEDIA_ROOT=Path("uuh"),
-        DATA_DIR=Path("whatever"),
-        CONSUMPTION_DIR=Path("idontcare"),
+    for d in (data_dir, media_dir, consumption_dir):
+        d.mkdir()
+
+    settings.DATA_DIR = data_dir
+    settings.MEDIA_ROOT = media_dir
+    settings.CONSUMPTION_DIR = consumption_dir
+
+    return PaperlessTestDirs(
+        data_dir=data_dir,
+        media_dir=media_dir,
+        consumption_dir=consumption_dir,
    )
-    def test_paths_check_dont_exist(self) -> None:
-        msgs = paths_check(None)
-        self.assertEqual(len(msgs), 3, str(msgs))

-        for msg in msgs:
-            self.assertTrue(msg.msg.endswith("is set but doesn't exist."))

-    def test_paths_check_no_access(self) -> None:
-        Path(self.dirs.data_dir).chmod(0o000)
-        Path(self.dirs.media_dir).chmod(0o000)
-        Path(self.dirs.consumption_dir).chmod(0o000)
+class TestChecks:
+    def test_binaries(self) -> None:
+        assert binaries_check(None) == []

-        self.addCleanup(os.chmod, self.dirs.data_dir, 0o777)
-        self.addCleanup(os.chmod, self.dirs.media_dir, 0o777)
-        self.addCleanup(os.chmod, self.dirs.consumption_dir, 0o777)
+    def test_binaries_fail(self, settings: SettingsWrapper) -> None:
+        settings.CONVERT_BINARY = "uuuhh"
+        assert len(binaries_check(None)) == 1
+
+    @pytest.mark.usefixtures("directories")
+    def test_paths_check(self) -> None:
+        assert paths_check(None) == []
+
+    def test_paths_check_dont_exist(self, settings: SettingsWrapper) -> None:
+        settings.MEDIA_ROOT = Path("uuh")
+        settings.DATA_DIR = Path("whatever")
+        settings.CONSUMPTION_DIR = Path("idontcare")

        msgs = paths_check(None)
-        self.assertEqual(len(msgs), 3)

+        assert len(msgs) == 3, str(msgs)
        for msg in msgs:
-            self.assertTrue(msg.msg.endswith("is not writeable"))
+            assert msg.msg.endswith("is set but doesn't exist.")

-    @override_settings(DEBUG=False)
-    def test_debug_disabled(self) -> None:
-        self.assertEqual(debug_mode_check(None), [])
+    def test_paths_check_no_access(self, directories: PaperlessTestDirs) -> None:
+        directories.data_dir.chmod(0o000)
+        directories.media_dir.chmod(0o000)
+        directories.consumption_dir.chmod(0o000)

-    @override_settings(DEBUG=True)
-    def test_debug_enabled(self) -> None:
-        self.assertEqual(len(debug_mode_check(None)), 1)
+        try:
+            msgs = paths_check(None)
+        finally:
+            directories.data_dir.chmod(0o777)
+            directories.media_dir.chmod(0o777)
+            directories.consumption_dir.chmod(0o777)
+
+        assert len(msgs) == 3
+        for msg in msgs:
+            assert msg.msg.endswith("is not writeable")
+
+    def test_debug_disabled(self, settings: SettingsWrapper) -> None:
+        settings.DEBUG = False
+        assert debug_mode_check(None) == []
+
+    def test_debug_enabled(self, settings: SettingsWrapper) -> None:
+        settings.DEBUG = True
+        assert len(debug_mode_check(None)) == 1


-class TestSettingsChecksAgainstDefaults(DirectoriesMixin, TestCase):
+class TestSettingsChecksAgainstDefaults:
    def test_all_valid(self) -> None:
        """
        GIVEN:
@@ -76,104 +103,71 @@ class TestSettingsChecksAgainstDefaults(DirectoriesMixin, TestCase):
            - No system check errors reported
        """
        msgs = settings_values_check(None)
-        self.assertEqual(len(msgs), 0)
+        assert len(msgs) == 0


-class TestOcrSettingsChecks(DirectoriesMixin, TestCase):
-    @override_settings(OCR_OUTPUT_TYPE="notapdf")
-    def test_invalid_output_type(self) -> None:
+class TestOcrSettingsChecks:
+    @pytest.mark.parametrize(
+        ("setting", "value", "expected_msg"),
+        [
+            pytest.param(
+                "OCR_OUTPUT_TYPE",
+                "notapdf",
+                'OCR output type "notapdf"',
+                id="invalid-output-type",
+            ),
+            pytest.param(
+                "OCR_MODE",
+                "makeitso",
+                'OCR output mode "makeitso"',
+                id="invalid-mode",
+            ),
+            pytest.param(
+                "OCR_MODE",
+                "skip_noarchive",
+                "deprecated",
+                id="deprecated-mode",
+            ),
+            pytest.param(
+                "OCR_SKIP_ARCHIVE_FILE",
+                "invalid",
+                'OCR_SKIP_ARCHIVE_FILE setting "invalid"',
+                id="invalid-skip-archive-file",
+            ),
+            pytest.param(
+                "OCR_CLEAN",
+                "cleanme",
+                'OCR clean mode "cleanme"',
+                id="invalid-clean",
+            ),
+        ],
+    )
+    def test_invalid_setting_produces_one_error(
+        self,
+        settings: SettingsWrapper,
+        setting: str,
+        value: str,
+        expected_msg: str,
+    ) -> None:
        """
        GIVEN:
            - Default settings
-            - OCR output type is invalid
+            - One OCR setting is set to an invalid value
        WHEN:
            - Settings are validated
        THEN:
-            - system check error reported for OCR output type
+            - Exactly one system check error is reported containing the expected message
        """
+        setattr(settings, setting, value)
+
        msgs = settings_values_check(None)
-        self.assertEqual(len(msgs), 1)

-        msg = msgs[0]
-
-        self.assertIn('OCR output type "notapdf"', msg.msg)
-
-    @override_settings(OCR_MODE="makeitso")
-    def test_invalid_ocr_type(self) -> None:
-        """
-        GIVEN:
-            - Default settings
-            - OCR type is invalid
-        WHEN:
-            - Settings are validated
-        THEN:
-            - system check error reported for OCR type
-        """
-        msgs = settings_values_check(None)
-        self.assertEqual(len(msgs), 1)
-
-        msg = msgs[0]
-
-        self.assertIn('OCR output mode "makeitso"', msg.msg)
-
-    @override_settings(OCR_MODE="skip_noarchive")
-    def test_deprecated_ocr_type(self) -> None:
-        """
-        GIVEN:
-            - Default settings
-            - OCR type is deprecated
-        WHEN:
-            - Settings are validated
-        THEN:
-            - deprecation warning reported for OCR type
-        """
-        msgs = settings_values_check(None)
-        self.assertEqual(len(msgs), 1)
-
-        msg = msgs[0]
-
-        self.assertIn("deprecated", msg.msg)
-
-    @override_settings(OCR_SKIP_ARCHIVE_FILE="invalid")
-    def test_invalid_ocr_skip_archive_file(self) -> None:
-        """
-        GIVEN:
-            - Default settings
-            - OCR_SKIP_ARCHIVE_FILE is invalid
-        WHEN:
-            - Settings are validated
-        THEN:
-            - system check error reported for OCR_SKIP_ARCHIVE_FILE
-        """
-        msgs = settings_values_check(None)
-        self.assertEqual(len(msgs), 1)
-
-        msg = msgs[0]
-
-        self.assertIn('OCR_SKIP_ARCHIVE_FILE setting "invalid"', msg.msg)
-
-    @override_settings(OCR_CLEAN="cleanme")
-    def test_invalid_ocr_clean(self) -> None:
-        """
-        GIVEN:
-            - Default settings
-            - OCR cleaning type is invalid
-        WHEN:
-            - Settings are validated
-        THEN:
-            - system check error reported for OCR cleaning type
-        """
-        msgs = settings_values_check(None)
-        self.assertEqual(len(msgs), 1)
-
-        msg = msgs[0]
-
-        self.assertIn('OCR clean mode "cleanme"', msg.msg)
+        assert len(msgs) == 1
+        assert expected_msg in msgs[0].msg


-class TestTimezoneSettingsChecks(DirectoriesMixin, TestCase):
-    @override_settings(TIME_ZONE="TheMoon\\MyCrater")
-    def test_invalid_timezone(self) -> None:
+class TestTimezoneSettingsChecks:
+    def test_invalid_timezone(self, settings: SettingsWrapper) -> None:
        """
        GIVEN:
            - Default settings
@@ -183,17 +177,16 @@ class TestTimezoneSettingsChecks(DirectoriesMixin, TestCase):
        THEN:
            - system check error reported for timezone
        """
+        settings.TIME_ZONE = "TheMoon\\MyCrater"
+
        msgs = settings_values_check(None)
-        self.assertEqual(len(msgs), 1)

-        msg = msgs[0]
-
-        self.assertIn('Timezone "TheMoon\\MyCrater"', msg.msg)
+        assert len(msgs) == 1
+        assert 'Timezone "TheMoon\\MyCrater"' in msgs[0].msg


-class TestEmailCertSettingsChecks(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
-    @override_settings(EMAIL_CERTIFICATE_FILE=Path("/tmp/not_actually_here.pem"))
-    def test_not_valid_file(self) -> None:
+class TestEmailCertSettingsChecks:
+    def test_not_valid_file(self, settings: SettingsWrapper) -> None:
        """
        GIVEN:
            - Default settings
@@ -203,19 +196,22 @@ class TestEmailCertSettingsChecks(DirectoriesMixin, FileSystemAssertsMixin, Test
        THEN:
            - system check error reported for email certificate
        """
-        self.assertIsNotFile("/tmp/not_actually_here.pem")
+        cert_path = Path("/tmp/not_actually_here.pem")
+        assert not cert_path.is_file()
+        settings.EMAIL_CERTIFICATE_FILE = cert_path

        msgs = settings_values_check(None)

-        self.assertEqual(len(msgs), 1)
-
-        msg = msgs[0]
-
-        self.assertIn("Email cert /tmp/not_actually_here.pem is not a file", msg.msg)
+        assert len(msgs) == 1
+        assert "Email cert /tmp/not_actually_here.pem is not a file" in msgs[0].msg


-class TestAuditLogChecks(TestCase):
-    def test_was_enabled_once(self) -> None:
+class TestAuditLogChecks:
+    def test_was_enabled_once(
+        self,
+        settings: SettingsWrapper,
+        mocker: MockerFixture,
+    ) -> None:
        """
        GIVEN:
            - Audit log is not enabled
@@ -224,23 +220,18 @@ class TestAuditLogChecks(TestCase):
        THEN:
            - system check error reported for disabling audit log
        """
-        introspect_mock = mock.MagicMock()
+        settings.AUDIT_LOG_ENABLED = False
+        introspect_mock = mocker.MagicMock()
        introspect_mock.introspection.table_names.return_value = ["auditlog_logentry"]
-        with override_settings(AUDIT_LOG_ENABLED=False):
-            with mock.patch.dict(
-                "paperless.checks.connections",
-                {"default": introspect_mock},
-            ):
-                msgs = audit_log_check(None)
+        mocker.patch.dict(
+            "paperless.checks.connections",
+            {"default": introspect_mock},
+        )

-                self.assertEqual(len(msgs), 1)
+        msgs = audit_log_check(None)

-                msg = msgs[0]
-
-                self.assertIn(
-                    ("auditlog table was found but audit log is disabled."),
-                    msg.msg,
-                )
+        assert len(msgs) == 1
+        assert "auditlog table was found but audit log is disabled." in msgs[0].msg


 DEPRECATED_VARS: dict[str, str] = {
@@ -269,20 +260,16 @@ class TestDeprecatedDbSettings:
    @pytest.mark.parametrize(
        ("env_var", "db_option_key"),
        [
-            ("PAPERLESS_DB_TIMEOUT", "timeout"),
-            ("PAPERLESS_DB_POOLSIZE", "pool.min_size / pool.max_size"),
-            ("PAPERLESS_DBSSLMODE", "sslmode"),
-            ("PAPERLESS_DBSSLROOTCERT", "sslrootcert"),
-            ("PAPERLESS_DBSSLCERT", "sslcert"),
-            ("PAPERLESS_DBSSLKEY", "sslkey"),
-        ],
-        ids=[
-            "db-timeout",
-            "db-poolsize",
-            "ssl-mode",
-            "ssl-rootcert",
-            "ssl-cert",
-            "ssl-key",
+            pytest.param("PAPERLESS_DB_TIMEOUT", "timeout", id="db-timeout"),
+            pytest.param(
+                "PAPERLESS_DB_POOLSIZE",
+                "pool.min_size / pool.max_size",
+                id="db-poolsize",
+            ),
+            pytest.param("PAPERLESS_DBSSLMODE", "sslmode", id="ssl-mode"),
+            pytest.param("PAPERLESS_DBSSLROOTCERT", "sslrootcert", id="ssl-rootcert"),
+            pytest.param("PAPERLESS_DBSSLCERT", "sslcert", id="ssl-cert"),
+            pytest.param("PAPERLESS_DBSSLKEY", "sslkey", id="ssl-key"),
        ],
    )
    def test_single_deprecated_var_produces_one_warning(
--- a/src/paperless/tests/test_utils.py
+++ b/src/paperless/tests/test_utils.py
@@ -9,35 +9,50 @@ from paperless.utils import ocr_to_dateparser_languages
@pytest.mark.parametrize(
    ("ocr_language", "expected"),
    [
-        # One language
-        ("eng", ["en"]),
-        # Multiple languages
-        ("fra+ita+lao", ["fr", "it", "lo"]),
-        # Languages that don't have a two-letter equivalent
-        ("fil", ["fil"]),
-        # Languages with a script part supported by dateparser
-        ("aze_cyrl+srp_latn", ["az-Cyrl", "sr-Latn"]),
-        # Languages with a script part not supported by dateparser
-        # In this case, default to the language without script
-        ("deu_frak", ["de"]),
-        # Traditional and simplified chinese don't have the same name in dateparser,
-        # so they're converted to the general chinese language
-        ("chi_tra+chi_sim", ["zh"]),
-        # If a language is not supported by dateparser, fallback to the supported ones
-        ("eng+unsupported_language+por", ["en", "pt"]),
-        # If no language is supported, fallback to default
-        ("unsupported1+unsupported2", []),
-        # Duplicate languages, should not duplicate in result
-        ("eng+eng", ["en"]),
-        # Language with script, but script is not mapped
-        ("ita_unknownscript", ["it"]),
+        pytest.param("eng", ["en"], id="single-language"),
+        pytest.param("fra+ita+lao", ["fr", "it", "lo"], id="multiple-languages"),
+        pytest.param("fil", ["fil"], id="no-two-letter-equivalent"),
+        pytest.param(
+            "aze_cyrl+srp_latn",
+            ["az-Cyrl", "sr-Latn"],
+            id="script-supported-by-dateparser",
+        ),
+        pytest.param(
+            "deu_frak",
+            ["de"],
+            id="script-not-supported-falls-back-to-language",
+        ),
+        pytest.param(
+            "chi_tra+chi_sim",
+            ["zh"],
+            id="chinese-variants-collapse-to-general",
+        ),
+        pytest.param(
+            "eng+unsupported_language+por",
+            ["en", "pt"],
+            id="unsupported-language-skipped",
+        ),
+        pytest.param(
+            "unsupported1+unsupported2",
+            [],
+            id="all-unsupported-returns-empty",
+        ),
+        pytest.param("eng+eng", ["en"], id="duplicates-deduplicated"),
+        pytest.param(
+            "ita_unknownscript",
+            ["it"],
+            id="unknown-script-falls-back-to-language",
+        ),
    ],
 )
-def test_ocr_to_dateparser_languages(ocr_language, expected):
+def test_ocr_to_dateparser_languages(ocr_language: str, expected: list[str]) -> None:
    assert sorted(ocr_to_dateparser_languages(ocr_language)) == sorted(expected)


-def test_ocr_to_dateparser_languages_exception(monkeypatch, caplog):
+def test_ocr_to_dateparser_languages_exception(
+    monkeypatch: pytest.MonkeyPatch,
+    caplog: pytest.LogCaptureFixture,
+) -> None:
    # Patch LocaleDataLoader.get_locale_map to raise an exception
    class DummyLoader:
        def get_locale_map(self, locales=None):
--- a/src/paperless/tests/test_views.py
+++ b/src/paperless/tests/test_views.py
@@ -1,24 +1,31 @@
-import tempfile
 from pathlib import Path

-from django.test import override_settings
+from django.test import Client
+from pytest_django.fixtures import SettingsWrapper


-def test_favicon_view(client):
-    with tempfile.TemporaryDirectory() as tmpdir:
-        static_dir = Path(tmpdir)
-        favicon_path = static_dir / "paperless" / "img" / "favicon.ico"
-        favicon_path.parent.mkdir(parents=True, exist_ok=True)
-        favicon_path.write_bytes(b"FAKE ICON DATA")
+def test_favicon_view(
+    client: Client,
+    tmp_path: Path,
+    settings: SettingsWrapper,
+) -> None:
+    favicon_path = tmp_path / "paperless" / "img" / "favicon.ico"
+    favicon_path.parent.mkdir(parents=True)
+    favicon_path.write_bytes(b"FAKE ICON DATA")

-        with override_settings(STATIC_ROOT=static_dir):
-            response = client.get("/favicon.ico")
-            assert response.status_code == 200
-            assert response["Content-Type"] == "image/x-icon"
-            assert b"".join(response.streaming_content) == b"FAKE ICON DATA"
+    settings.STATIC_ROOT = tmp_path
+
+    response = client.get("/favicon.ico")
+    assert response.status_code == 200
+    assert response["Content-Type"] == "image/x-icon"
+    assert b"".join(response.streaming_content) == b"FAKE ICON DATA"


-def test_favicon_view_missing_file(client):
-    with override_settings(STATIC_ROOT=Path(tempfile.mkdtemp())):
-        response = client.get("/favicon.ico")
-        assert response.status_code == 404
+def test_favicon_view_missing_file(
+    client: Client,
+    tmp_path: Path,
+    settings: SettingsWrapper,
+) -> None:
+    settings.STATIC_ROOT = tmp_path
+    response = client.get("/favicon.ico")
+    assert response.status_code == 404
--- a/src/paperless/views.py
+++ b/src/paperless/views.py
@@ -378,7 +378,6 @@ class ApplicationConfigurationViewSet(ModelViewSet):
        ):
            # AI index was just enabled and vector store file does not exist
            llmindex_index.delay(
-                progress_bar_disable=True,
                rebuild=True,
                scheduled=False,
                auto=True,
--- a/src/paperless_ai/indexing.py
+++ b/src/paperless_ai/indexing.py
@@ -1,11 +1,13 @@
 import logging
 import shutil
+from collections.abc import Callable
+from collections.abc import Iterable
 from datetime import timedelta
 from pathlib import Path
+from typing import TypeVar

 import faiss
 import llama_index.core.settings as llama_settings
-import tqdm
 from celery import states
 from django.conf import settings
 from django.utils import timezone
@@ -29,6 +31,14 @@ from paperless_ai.embedding import build_llm_index_text
 from paperless_ai.embedding import get_embedding_dim
 from paperless_ai.embedding import get_embedding_model

+_T = TypeVar("_T")
+IterWrapper = Callable[[Iterable[_T]], Iterable[_T]]
+
+
+def _identity(iterable: Iterable[_T]) -> Iterable[_T]:
+    return iterable
+
+
 logger = logging.getLogger("paperless_ai.indexing")


@@ -156,7 +166,11 @@ def vector_store_file_exists():
    return Path(settings.LLM_INDEX_DIR / "default__vector_store.json").exists()


-def update_llm_index(*, progress_bar_disable=False, rebuild=False) -> str:
+def update_llm_index(
+    *,
+    iter_wrapper: IterWrapper[Document] = _identity,
+    rebuild=False,
+) -> str:
    """
    Rebuild or update the LLM index.
    """
@@ -176,7 +190,7 @@ def update_llm_index(*, progress_bar_disable=False, rebuild=False) -> str:
        embed_model = get_embedding_model()
        llama_settings.Settings.embed_model = embed_model
        storage_context = get_or_create_storage_context(rebuild=True)
-        for document in tqdm.tqdm(documents, disable=progress_bar_disable):
+        for document in iter_wrapper(documents):
            document_nodes = build_document_node(document)
            nodes.extend(document_nodes)

@@ -184,7 +198,7 @@ def update_llm_index(*, progress_bar_disable=False, rebuild=False) -> str:
            nodes=nodes,
            storage_context=storage_context,
            embed_model=embed_model,
-            show_progress=not progress_bar_disable,
+            show_progress=False,
        )
        msg = "LLM index rebuilt successfully."
    else:
@@ -196,7 +210,7 @@ def update_llm_index(*, progress_bar_disable=False, rebuild=False) -> str:
            for node in index.docstore.get_nodes(all_node_ids)
        }

-        for document in tqdm.tqdm(documents, disable=progress_bar_disable):
+        for document in iter_wrapper(documents):
            doc_id = str(document.id)
            document_modified = document.modified.isoformat()

--- a/uv.lock
+++ b/uv.lock
@@ -1342,11 +1342,11 @@ wheels = [

 [[package]]
 name = "faker"
-version = "40.1.2"
+version = "40.5.1"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/5e/77/1c3ff07b6739b9a1d23ca01ec0a90a309a33b78e345a3eb52f9ce9240e36/faker-40.1.2.tar.gz", hash = "sha256:b76a68163aa5f171d260fc24827a8349bc1db672f6a665359e8d0095e8135d30", size = 1949802, upload-time = "2026-01-13T20:51:49.917Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/03/2a/96fff3edcb10f6505143448a4b91535f77b74865cec45be52690ee280443/faker-40.5.1.tar.gz", hash = "sha256:70222361cd82aa10cb86066d1a4e8f47f2bcdc919615c412045a69c4e6da0cd3", size = 1952684, upload-time = "2026-02-23T21:34:38.362Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/46/ec/91a434c8a53d40c3598966621dea9c50512bec6ce8e76fa1751015e74cef/faker-40.1.2-py3-none-any.whl", hash = "sha256:93503165c165d330260e4379fd6dc07c94da90c611ed3191a0174d2ab9966a42", size = 1985633, upload-time = "2026-01-13T20:51:47.982Z" },
+    { url = "https://files.pythonhosted.org/packages/4d/a9/1eed4db92d0aec2f9bfdf1faae0ab0418b5e121dda5701f118a7a4f0cd6a/faker-40.5.1-py3-none-any.whl", hash = "sha256:c69640c1e13bad49b4bcebcbf1b52f9f1a872b6ea186c248ada34d798f1661bf", size = 1987053, upload-time = "2026-02-23T21:34:36.418Z" },
 ]

 [[package]]
@@ -3121,6 +3121,7 @@ webserver = [
 dev = [
    { name = "daphne", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "factory-boy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "faker", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "imagehash", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "prek", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "pytest", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
@@ -3145,6 +3146,7 @@ lint = [
 testing = [
    { name = "daphne", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "factory-boy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "faker", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "imagehash", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "pytest", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "pytest-cov", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
@@ -3257,6 +3259,7 @@ provides-extras = ["mariadb", "postgres", "webserver"]
 dev = [
    { name = "daphne" },
    { name = "factory-boy", specifier = "~=3.3.1" },
+    { name = "faker", specifier = "~=40.5.1" },
    { name = "imagehash" },
    { name = "prek", specifier = "~=0.3.0" },
    { name = "pytest", specifier = "~=9.0.0" },
@@ -3279,6 +3282,7 @@ lint = [
 testing = [
    { name = "daphne" },
    { name = "factory-boy", specifier = "~=3.3.1" },
+    { name = "faker", specifier = "~=40.5.1" },
    { name = "imagehash" },
    { name = "pytest", specifier = "~=9.0.0" },
    { name = "pytest-cov", specifier = "~=7.0.0" },
@@ -5906,11 +5910,11 @@ wheels = [

 [[package]]
 name = "whitenoise"
-version = "6.11.0"
+version = "6.12.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/15/95/8c81ec6b6ebcbf8aca2de7603070ccf37dbb873b03f20708e0f7c1664bc6/whitenoise-6.11.0.tar.gz", hash = "sha256:0f5bfce6061ae6611cd9396a8231e088722e4fc67bc13a111be74c738d99375f", size = 26432, upload-time = "2025-09-18T09:16:10.995Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/cb/2a/55b3f3a4ec326cd077c1c3defeee656b9298372a69229134d930151acd01/whitenoise-6.12.0.tar.gz", hash = "sha256:f723ebb76a112e98816ff80fcea0a6c9b8ecde835f8ddda25df7a30a3c2db6ad", size = 26841, upload-time = "2026-02-27T00:05:42.028Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/6c/e9/4366332f9295fe0647d7d3251ce18f5615fbcb12d02c79a26f8dba9221b3/whitenoise-6.11.0-py3-none-any.whl", hash = "sha256:b2aeb45950597236f53b5342b3121c5de69c8da0109362aee506ce88e022d258", size = 20197, upload-time = "2025-09-18T09:16:09.754Z" },
+    { url = "https://files.pythonhosted.org/packages/db/eb/d5583a11486211f3ebd4b385545ae787f32363d453c19fffd81106c9c138/whitenoise-6.12.0-py3-none-any.whl", hash = "sha256:fc5e8c572e33ebf24795b47b6a7da8da3c00cff2349f5b04c02f28d0cc5a3cc2", size = 20302, upload-time = "2026-02-27T00:05:40.086Z" },
 ]

 [[package]]
Author	SHA1	Message	Date
Trenton H	57ea7a716b	Chore: convert test_adapter.py to pytest style	2026-03-03 13:45:01 -08:00
Trenton H	ba7b538398	Chore: convert paperless unit tests to pytest style	2026-03-03 13:27:20 -08:00
dependabot[bot]	9c0f112e94	docker(deps): Bump astral-sh/uv (#12191 ) Bumps [astral-sh/uv](https://github.com/astral-sh/uv) from 0.10.5-python3.12-trixie-slim to 0.10.7-python3.12-trixie-slim. - [Release notes](https://github.com/astral-sh/uv/releases) - [Changelog](https://github.com/astral-sh/uv/blob/main/CHANGELOG.md) - [Commits](https://github.com/astral-sh/uv/compare/0.10.5...0.10.7) --- updated-dependencies: - dependency-name: astral-sh/uv dependency-version: 0.10.7-python3.12-trixie-slim dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2026-03-03 07:56:35 -08:00
Trenton H	43406f44f2	Feature: Improve the retagger output using rich (#12194 )	2026-03-03 07:14:59 -08:00
shamoon	b7ca3550b1	Merge branch 'main' into dev	2026-03-02 13:45:10 -08:00
shamoon	0e97419e0e	Chore: add existing logo for temporary url resolution	2026-03-02 13:43:24 -08:00
dependabot[bot]	10cb2ac183	Chore(deps): Bump the actions group across 1 directory with 6 updates (#12224 ) Bumps the actions group with 6 updates in the / directory: \| Package \| From \| To \| \| --- \| --- \| --- \| \| [astral-sh/setup-uv](https://github.com/astral-sh/setup-uv) \| `7.3.0` \| `7.3.1` \| \| [actions/upload-artifact](https://github.com/actions/upload-artifact) \| `6.0.0` \| `7.0.0` \| \| [actions/download-artifact](https://github.com/actions/download-artifact) \| `7.0.0` \| `8.0.0` \| \| [github/codeql-action](https://github.com/github/codeql-action) \| `4.32.3` \| `4.32.5` \| \| [crowdin/github-action](https://github.com/crowdin/github-action) \| `2.14.0` \| `2.15.0` \| \| [actions/stale](https://github.com/actions/stale) \| `10.1.1` \| `10.2.0` \| Updates `astral-sh/setup-uv` from 7.3.0 to 7.3.1 - [Release notes](https://github.com/astral-sh/setup-uv/releases) - [Commits](https://github.com/astral-sh/setup-uv/compare/v7.3.0...v7.3.1) Updates `actions/upload-artifact` from 6.0.0 to 7.0.0 - [Release notes](https://github.com/actions/upload-artifact/releases) - [Commits](https://github.com/actions/upload-artifact/compare/v6.0.0...v7.0.0) Updates `actions/download-artifact` from 7.0.0 to 8.0.0 - [Release notes](https://github.com/actions/download-artifact/releases) - [Commits](https://github.com/actions/download-artifact/compare/v7.0.0...v8.0.0) Updates `github/codeql-action` from 4.32.3 to 4.32.5 - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/v4.32.3...v4.32.5) Updates `crowdin/github-action` from 2.14.0 to 2.15.0 - [Release notes](https://github.com/crowdin/github-action/releases) - [Commits](https://github.com/crowdin/github-action/compare/v2.14.0...v2.15.0) Updates `actions/stale` from 10.1.1 to 10.2.0 - [Release notes](https://github.com/actions/stale/releases) - [Changelog](https://github.com/actions/stale/blob/main/CHANGELOG.md) - [Commits](https://github.com/actions/stale/compare/v10.1.1...v10.2.0) --- updated-dependencies: - dependency-name: astral-sh/setup-uv dependency-version: 7.3.1 dependency-type: direct:production update-type: version-update:semver-patch dependency-group: actions - dependency-name: actions/upload-artifact dependency-version: 7.0.0 dependency-type: direct:production update-type: version-update:semver-major dependency-group: actions - dependency-name: actions/download-artifact dependency-version: 8.0.0 dependency-type: direct:production update-type: version-update:semver-major dependency-group: actions - dependency-name: github/codeql-action dependency-version: 4.32.5 dependency-type: direct:production update-type: version-update:semver-patch dependency-group: actions - dependency-name: crowdin/github-action dependency-version: 2.15.0 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: actions - dependency-name: actions/stale dependency-version: 10.2.0 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: actions ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2026-03-02 11:55:24 -08:00
Trenton H	1d7cd5a7ad	Chore: Updates actions to the most specific version released (#12222 )	2026-03-02 11:34:57 -08:00
Trenton H	e58a35d40c	Feature: Transition sanity check to rich and improve output (#12182 )	2026-03-02 10:53:39 -08:00
Trenton H	20a9cd40e8	Feature: Switch all indexing to use rich (#12193 )	2026-03-02 10:41:09 -08:00
dependabot[bot]	b94ce85b46	Chore(deps): Bump whitenoise in the django-ecosystem group (#12192 ) Bumps the django-ecosystem group with 1 update: [whitenoise](https://github.com/evansd/whitenoise). Updates `whitenoise` from 6.11.0 to 6.12.0 - [Changelog](https://github.com/evansd/whitenoise/blob/main/docs/changelog.rst) - [Commits](https://github.com/evansd/whitenoise/compare/6.11.0...6.12.0) --- updated-dependencies: - dependency-name: whitenoise dependency-version: 6.12.0 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: django-ecosystem ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2026-03-02 10:25:11 -08:00
dependabot[bot]	484bef00c1	docker-compose(deps): Bump gotenberg/gotenberg in /docker/compose (#12190 ) Bumps gotenberg/gotenberg from 8.26 to 8.27. --- updated-dependencies: - dependency-name: gotenberg/gotenberg dependency-version: '8.27' dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2026-03-02 10:14:48 -08:00
Trenton H	317a177537	Chore: Updates s6-overlay to 3.2.2.0 (#12189 )	2026-03-02 09:00:03 -08:00
shamoon	7ff51452f0	Documentation: small note re filename vs original_filename	2026-03-01 11:45:06 -08:00