mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-03-03 07:46:25 +00:00
Compare commits
1 Commits
fix-doc-ve
...
chore/lock
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f2a48bb929 |
19
.github/workflows/ci-backend.yml
vendored
19
.github/workflows/ci-backend.yml
vendored
@@ -22,6 +22,7 @@ on:
|
||||
concurrency:
|
||||
group: backend-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
permissions: {}
|
||||
env:
|
||||
DEFAULT_UV_VERSION: "0.10.x"
|
||||
NLTK_DATA: "/usr/share/nltk_data"
|
||||
@@ -29,24 +30,26 @@ jobs:
|
||||
test:
|
||||
name: "Python ${{ matrix.python-version }}"
|
||||
runs-on: ubuntu-24.04
|
||||
permissions:
|
||||
contents: read
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ['3.10', '3.11', '3.12']
|
||||
fail-fast: false
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v6.0.2
|
||||
uses: actions/checkout@v6
|
||||
- name: Start containers
|
||||
run: |
|
||||
docker compose --file docker/compose/docker-compose.ci-test.yml pull --quiet
|
||||
docker compose --file docker/compose/docker-compose.ci-test.yml up --detach
|
||||
- name: Set up Python
|
||||
id: setup-python
|
||||
uses: actions/setup-python@v6.2.0
|
||||
uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: "${{ matrix.python-version }}"
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v7.3.1
|
||||
uses: astral-sh/setup-uv@v7
|
||||
with:
|
||||
version: ${{ env.DEFAULT_UV_VERSION }}
|
||||
enable-cache: true
|
||||
@@ -83,13 +86,13 @@ jobs:
|
||||
pytest
|
||||
- name: Upload test results to Codecov
|
||||
if: always()
|
||||
uses: codecov/codecov-action@v5.5.2
|
||||
uses: codecov/codecov-action@v5
|
||||
with:
|
||||
flags: backend-python-${{ matrix.python-version }}
|
||||
files: junit.xml
|
||||
report_type: test_results
|
||||
- name: Upload coverage to Codecov
|
||||
uses: codecov/codecov-action@v5.5.2
|
||||
uses: codecov/codecov-action@v5
|
||||
with:
|
||||
flags: backend-python-${{ matrix.python-version }}
|
||||
files: coverage.xml
|
||||
@@ -102,18 +105,20 @@ jobs:
|
||||
typing:
|
||||
name: Check project typing
|
||||
runs-on: ubuntu-24.04
|
||||
permissions:
|
||||
contents: read
|
||||
env:
|
||||
DEFAULT_PYTHON: "3.12"
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v6.0.2
|
||||
uses: actions/checkout@v6.0.1
|
||||
- name: Set up Python
|
||||
id: setup-python
|
||||
uses: actions/setup-python@v6.2.0
|
||||
with:
|
||||
python-version: "${{ env.DEFAULT_PYTHON }}"
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v7.3.1
|
||||
uses: astral-sh/setup-uv@v7.2.1
|
||||
with:
|
||||
version: ${{ env.DEFAULT_UV_VERSION }}
|
||||
enable-cache: true
|
||||
|
||||
9
.github/workflows/ci-docker.yml
vendored
9
.github/workflows/ci-docker.yml
vendored
@@ -15,6 +15,7 @@ on:
|
||||
concurrency:
|
||||
group: docker-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
permissions: {}
|
||||
env:
|
||||
REGISTRY: ghcr.io
|
||||
jobs:
|
||||
@@ -41,7 +42,7 @@ jobs:
|
||||
ref-name: ${{ steps.ref.outputs.name }}
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v6.0.2
|
||||
uses: actions/checkout@v6.0.1
|
||||
- name: Determine ref name
|
||||
id: ref
|
||||
run: |
|
||||
@@ -130,7 +131,7 @@ jobs:
|
||||
type=semver,pattern={{major}}.{{minor}}
|
||||
- name: Build and push by digest
|
||||
id: build
|
||||
uses: docker/build-push-action@v6.19.2
|
||||
uses: docker/build-push-action@v6.18.0
|
||||
with:
|
||||
context: .
|
||||
file: ./Dockerfile
|
||||
@@ -152,7 +153,7 @@ jobs:
|
||||
touch "/tmp/digests/${digest#sha256:}"
|
||||
- name: Upload digest
|
||||
if: steps.check-push.outputs.should-push == 'true'
|
||||
uses: actions/upload-artifact@v7.0.0
|
||||
uses: actions/upload-artifact@v6.0.0
|
||||
with:
|
||||
name: digests-${{ matrix.arch }}
|
||||
path: /tmp/digests/*
|
||||
@@ -168,7 +169,7 @@ jobs:
|
||||
packages: write
|
||||
steps:
|
||||
- name: Download digests
|
||||
uses: actions/download-artifact@v8.0.0
|
||||
uses: actions/download-artifact@v7.0.0
|
||||
with:
|
||||
path: /tmp/digests
|
||||
pattern: digests-*
|
||||
|
||||
23
.github/workflows/ci-docs.yml
vendored
23
.github/workflows/ci-docs.yml
vendored
@@ -21,10 +21,7 @@ on:
|
||||
concurrency:
|
||||
group: docs-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
permissions:
|
||||
contents: read
|
||||
pages: write
|
||||
id-token: write
|
||||
permissions: {}
|
||||
env:
|
||||
DEFAULT_UV_VERSION: "0.10.x"
|
||||
DEFAULT_PYTHON_VERSION: "3.12"
|
||||
@@ -32,17 +29,19 @@ jobs:
|
||||
build:
|
||||
name: Build Documentation
|
||||
runs-on: ubuntu-24.04
|
||||
permissions:
|
||||
contents: read
|
||||
steps:
|
||||
- uses: actions/configure-pages@v5.0.0
|
||||
- uses: actions/configure-pages@v5
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v6.0.2
|
||||
uses: actions/checkout@v6
|
||||
- name: Set up Python
|
||||
id: setup-python
|
||||
uses: actions/setup-python@v6.2.0
|
||||
uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: ${{ env.DEFAULT_PYTHON_VERSION }}
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v7.3.1
|
||||
uses: astral-sh/setup-uv@v7
|
||||
with:
|
||||
version: ${{ env.DEFAULT_UV_VERSION }}
|
||||
enable-cache: true
|
||||
@@ -58,7 +57,7 @@ jobs:
|
||||
--frozen \
|
||||
zensical build --clean
|
||||
- name: Upload GitHub Pages artifact
|
||||
uses: actions/upload-pages-artifact@v4.0.0
|
||||
uses: actions/upload-pages-artifact@v4
|
||||
with:
|
||||
path: site
|
||||
name: github-pages-${{ github.run_id }}-${{ github.run_attempt }}
|
||||
@@ -67,12 +66,16 @@ jobs:
|
||||
needs: build
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
|
||||
runs-on: ubuntu-24.04
|
||||
permissions:
|
||||
contents: read
|
||||
pages: write
|
||||
id-token: write
|
||||
environment:
|
||||
name: github-pages
|
||||
url: ${{ steps.deployment.outputs.page_url }}
|
||||
steps:
|
||||
- name: Deploy GitHub Pages
|
||||
uses: actions/deploy-pages@v4.0.5
|
||||
uses: actions/deploy-pages@v4
|
||||
id: deployment
|
||||
with:
|
||||
artifact_name: github-pages-${{ github.run_id }}-${{ github.run_attempt }}
|
||||
|
||||
55
.github/workflows/ci-frontend.yml
vendored
55
.github/workflows/ci-frontend.yml
vendored
@@ -16,26 +16,29 @@ on:
|
||||
concurrency:
|
||||
group: frontend-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
permissions: {}
|
||||
jobs:
|
||||
install-dependencies:
|
||||
name: Install Dependencies
|
||||
runs-on: ubuntu-24.04
|
||||
permissions:
|
||||
contents: read
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v6.0.2
|
||||
uses: actions/checkout@v6
|
||||
- name: Install pnpm
|
||||
uses: pnpm/action-setup@v4.2.0
|
||||
uses: pnpm/action-setup@v4
|
||||
with:
|
||||
version: 10
|
||||
- name: Use Node.js 24
|
||||
uses: actions/setup-node@v6.2.0
|
||||
uses: actions/setup-node@v6
|
||||
with:
|
||||
node-version: 24.x
|
||||
cache: 'pnpm'
|
||||
cache-dependency-path: 'src-ui/pnpm-lock.yaml'
|
||||
- name: Cache frontend dependencies
|
||||
id: cache-frontend-deps
|
||||
uses: actions/cache@v5.0.3
|
||||
uses: actions/cache@v5
|
||||
with:
|
||||
path: |
|
||||
~/.pnpm-store
|
||||
@@ -47,21 +50,23 @@ jobs:
|
||||
name: Lint
|
||||
needs: install-dependencies
|
||||
runs-on: ubuntu-24.04
|
||||
permissions:
|
||||
contents: read
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v6.0.2
|
||||
uses: actions/checkout@v6
|
||||
- name: Install pnpm
|
||||
uses: pnpm/action-setup@v4.2.0
|
||||
uses: pnpm/action-setup@v4
|
||||
with:
|
||||
version: 10
|
||||
- name: Use Node.js 24
|
||||
uses: actions/setup-node@v6.2.0
|
||||
uses: actions/setup-node@v6
|
||||
with:
|
||||
node-version: 24.x
|
||||
cache: 'pnpm'
|
||||
cache-dependency-path: 'src-ui/pnpm-lock.yaml'
|
||||
- name: Cache frontend dependencies
|
||||
uses: actions/cache@v5.0.3
|
||||
uses: actions/cache@v5
|
||||
with:
|
||||
path: |
|
||||
~/.pnpm-store
|
||||
@@ -75,6 +80,8 @@ jobs:
|
||||
name: "Unit Tests (${{ matrix.shard-index }}/${{ matrix.shard-count }})"
|
||||
needs: install-dependencies
|
||||
runs-on: ubuntu-24.04
|
||||
permissions:
|
||||
contents: read
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
@@ -83,19 +90,19 @@ jobs:
|
||||
shard-count: [4]
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v6.0.2
|
||||
uses: actions/checkout@v6
|
||||
- name: Install pnpm
|
||||
uses: pnpm/action-setup@v4.2.0
|
||||
uses: pnpm/action-setup@v4
|
||||
with:
|
||||
version: 10
|
||||
- name: Use Node.js 24
|
||||
uses: actions/setup-node@v6.2.0
|
||||
uses: actions/setup-node@v6
|
||||
with:
|
||||
node-version: 24.x
|
||||
cache: 'pnpm'
|
||||
cache-dependency-path: 'src-ui/pnpm-lock.yaml'
|
||||
- name: Cache frontend dependencies
|
||||
uses: actions/cache@v5.0.3
|
||||
uses: actions/cache@v5
|
||||
with:
|
||||
path: |
|
||||
~/.pnpm-store
|
||||
@@ -107,13 +114,13 @@ jobs:
|
||||
run: cd src-ui && pnpm run test --max-workers=2 --shard=${{ matrix.shard-index }}/${{ matrix.shard-count }}
|
||||
- name: Upload test results to Codecov
|
||||
if: always()
|
||||
uses: codecov/codecov-action@v5.5.2
|
||||
uses: codecov/codecov-action@v5
|
||||
with:
|
||||
flags: frontend-node-${{ matrix.node-version }}
|
||||
directory: src-ui/
|
||||
report_type: test_results
|
||||
- name: Upload coverage to Codecov
|
||||
uses: codecov/codecov-action@v5.5.2
|
||||
uses: codecov/codecov-action@v5
|
||||
with:
|
||||
flags: frontend-node-${{ matrix.node-version }}
|
||||
directory: src-ui/coverage/
|
||||
@@ -121,6 +128,8 @@ jobs:
|
||||
name: "E2E Tests (${{ matrix.shard-index }}/${{ matrix.shard-count }})"
|
||||
needs: install-dependencies
|
||||
runs-on: ubuntu-24.04
|
||||
permissions:
|
||||
contents: read
|
||||
container: mcr.microsoft.com/playwright:v1.58.2-noble
|
||||
env:
|
||||
PLAYWRIGHT_BROWSERS_PATH: /ms-playwright
|
||||
@@ -133,19 +142,19 @@ jobs:
|
||||
shard-count: [2]
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v6.0.2
|
||||
uses: actions/checkout@v6
|
||||
- name: Install pnpm
|
||||
uses: pnpm/action-setup@v4.2.0
|
||||
uses: pnpm/action-setup@v4
|
||||
with:
|
||||
version: 10
|
||||
- name: Use Node.js 24
|
||||
uses: actions/setup-node@v6.2.0
|
||||
uses: actions/setup-node@v6
|
||||
with:
|
||||
node-version: 24.x
|
||||
cache: 'pnpm'
|
||||
cache-dependency-path: 'src-ui/pnpm-lock.yaml'
|
||||
- name: Cache frontend dependencies
|
||||
uses: actions/cache@v5.0.3
|
||||
uses: actions/cache@v5
|
||||
with:
|
||||
path: |
|
||||
~/.pnpm-store
|
||||
@@ -161,21 +170,23 @@ jobs:
|
||||
name: Bundle Analysis
|
||||
needs: [unit-tests, e2e-tests]
|
||||
runs-on: ubuntu-24.04
|
||||
permissions:
|
||||
contents: read
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v6.0.2
|
||||
uses: actions/checkout@v6
|
||||
- name: Install pnpm
|
||||
uses: pnpm/action-setup@v4.2.0
|
||||
uses: pnpm/action-setup@v4
|
||||
with:
|
||||
version: 10
|
||||
- name: Use Node.js 24
|
||||
uses: actions/setup-node@v6.2.0
|
||||
uses: actions/setup-node@v6
|
||||
with:
|
||||
node-version: 24.x
|
||||
cache: 'pnpm'
|
||||
cache-dependency-path: 'src-ui/pnpm-lock.yaml'
|
||||
- name: Cache frontend dependencies
|
||||
uses: actions/cache@v5.0.3
|
||||
uses: actions/cache@v5
|
||||
with:
|
||||
path: |
|
||||
~/.pnpm-store
|
||||
|
||||
3
.github/workflows/ci-lint.yml
vendored
3
.github/workflows/ci-lint.yml
vendored
@@ -9,10 +9,13 @@ on:
|
||||
concurrency:
|
||||
group: lint-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
permissions: {}
|
||||
jobs:
|
||||
lint:
|
||||
name: Linting via prek
|
||||
runs-on: ubuntu-slim
|
||||
permissions:
|
||||
contents: read
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v6.0.2
|
||||
|
||||
42
.github/workflows/ci-release.yml
vendored
42
.github/workflows/ci-release.yml
vendored
@@ -7,6 +7,7 @@ on:
|
||||
concurrency:
|
||||
group: release-${{ github.ref }}
|
||||
cancel-in-progress: false
|
||||
permissions: {}
|
||||
env:
|
||||
DEFAULT_UV_VERSION: "0.10.x"
|
||||
DEFAULT_PYTHON_VERSION: "3.12"
|
||||
@@ -14,6 +15,10 @@ jobs:
|
||||
wait-for-docker:
|
||||
name: Wait for Docker Build
|
||||
runs-on: ubuntu-24.04
|
||||
permissions:
|
||||
# lewagon/wait-on-check-action reads workflow check runs
|
||||
actions: read
|
||||
contents: read
|
||||
steps:
|
||||
- name: Wait for Docker build
|
||||
uses: lewagon/wait-on-check-action@v1.5.0
|
||||
@@ -26,16 +31,18 @@ jobs:
|
||||
name: Build Release
|
||||
needs: wait-for-docker
|
||||
runs-on: ubuntu-24.04
|
||||
permissions:
|
||||
contents: read
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v6.0.2
|
||||
uses: actions/checkout@v6
|
||||
# ---- Frontend Build ----
|
||||
- name: Install pnpm
|
||||
uses: pnpm/action-setup@v4.2.0
|
||||
uses: pnpm/action-setup@v4
|
||||
with:
|
||||
version: 10
|
||||
- name: Use Node.js 24
|
||||
uses: actions/setup-node@v6.2.0
|
||||
uses: actions/setup-node@v6
|
||||
with:
|
||||
node-version: 24.x
|
||||
cache: 'pnpm'
|
||||
@@ -47,11 +54,11 @@ jobs:
|
||||
# ---- Backend Setup ----
|
||||
- name: Set up Python
|
||||
id: setup-python
|
||||
uses: actions/setup-python@v6.2.0
|
||||
uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: ${{ env.DEFAULT_PYTHON_VERSION }}
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v7.3.1
|
||||
uses: astral-sh/setup-uv@v7
|
||||
with:
|
||||
version: ${{ env.DEFAULT_UV_VERSION }}
|
||||
enable-cache: true
|
||||
@@ -118,7 +125,7 @@ jobs:
|
||||
sudo chown -R 1000:1000 paperless-ngx/
|
||||
tar -cJf paperless-ngx.tar.xz paperless-ngx/
|
||||
- name: Upload release artifact
|
||||
uses: actions/upload-artifact@v7.0.0
|
||||
uses: actions/upload-artifact@v6
|
||||
with:
|
||||
name: release
|
||||
path: dist/paperless-ngx.tar.xz
|
||||
@@ -127,13 +134,17 @@ jobs:
|
||||
name: Publish Release
|
||||
needs: build-release
|
||||
runs-on: ubuntu-24.04
|
||||
permissions:
|
||||
# release-drafter reads PRs to build the changelog and creates/publishes the release
|
||||
contents: write
|
||||
pull-requests: read
|
||||
outputs:
|
||||
prerelease: ${{ steps.get-version.outputs.prerelease }}
|
||||
changelog: ${{ steps.create-release.outputs.body }}
|
||||
version: ${{ steps.get-version.outputs.version }}
|
||||
steps:
|
||||
- name: Download release artifact
|
||||
uses: actions/download-artifact@v8.0.0
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: release
|
||||
path: ./
|
||||
@@ -148,7 +159,7 @@ jobs:
|
||||
fi
|
||||
- name: Create release and changelog
|
||||
id: create-release
|
||||
uses: release-drafter/release-drafter@v6.2.0
|
||||
uses: release-drafter/release-drafter@v6
|
||||
with:
|
||||
name: Paperless-ngx ${{ steps.get-version.outputs.version }}
|
||||
tag: ${{ steps.get-version.outputs.version }}
|
||||
@@ -159,7 +170,7 @@ jobs:
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
- name: Upload release archive
|
||||
uses: shogo82148/actions-upload-release-asset@v1.9.2
|
||||
uses: shogo82148/actions-upload-release-asset@v1
|
||||
with:
|
||||
github_token: ${{ secrets.GITHUB_TOKEN }}
|
||||
upload_url: ${{ steps.create-release.outputs.upload_url }}
|
||||
@@ -174,18 +185,23 @@ jobs:
|
||||
needs: publish-release
|
||||
if: needs.publish-release.outputs.prerelease == 'false'
|
||||
runs-on: ubuntu-24.04
|
||||
permissions:
|
||||
# git push of the changelog branch requires contents: write
|
||||
# github.rest.pulls.create() and github.rest.issues.addLabels() require pull-requests: write
|
||||
contents: write
|
||||
pull-requests: write
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v6.0.2
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
ref: main
|
||||
- name: Set up Python
|
||||
id: setup-python
|
||||
uses: actions/setup-python@v6.2.0
|
||||
uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: ${{ env.DEFAULT_PYTHON_VERSION }}
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v7.3.1
|
||||
uses: astral-sh/setup-uv@v7
|
||||
with:
|
||||
version: ${{ env.DEFAULT_UV_VERSION }}
|
||||
enable-cache: true
|
||||
@@ -218,7 +234,7 @@ jobs:
|
||||
git commit -am "Changelog ${{ needs.publish-release.outputs.version }} - GHA"
|
||||
git push origin ${{ needs.publish-release.outputs.version }}-changelog
|
||||
- name: Create pull request
|
||||
uses: actions/github-script@v8.0.0
|
||||
uses: actions/github-script@v8
|
||||
with:
|
||||
script: |
|
||||
const { repo, owner } = context.repo;
|
||||
|
||||
1
.github/workflows/cleanup-tags.yml
vendored
1
.github/workflows/cleanup-tags.yml
vendored
@@ -12,6 +12,7 @@ on:
|
||||
concurrency:
|
||||
group: registry-tags-cleanup
|
||||
cancel-in-progress: false
|
||||
permissions: {}
|
||||
jobs:
|
||||
cleanup-images:
|
||||
name: Cleanup Image Tags for ${{ matrix.primary-name }}
|
||||
|
||||
7
.github/workflows/codeql-analysis.yml
vendored
7
.github/workflows/codeql-analysis.yml
vendored
@@ -18,6 +18,7 @@ on:
|
||||
branches: [dev]
|
||||
schedule:
|
||||
- cron: '28 13 * * 5'
|
||||
permissions: {}
|
||||
jobs:
|
||||
analyze:
|
||||
name: Analyze
|
||||
@@ -34,10 +35,10 @@ jobs:
|
||||
# Learn more about CodeQL language support at https://git.io/codeql-language-support
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v6.0.2
|
||||
uses: actions/checkout@v6
|
||||
# Initializes the CodeQL tools for scanning.
|
||||
- name: Initialize CodeQL
|
||||
uses: github/codeql-action/init@v4.32.5
|
||||
uses: github/codeql-action/init@v4
|
||||
with:
|
||||
languages: ${{ matrix.language }}
|
||||
# If you wish to specify custom queries, you can do so here or in a config file.
|
||||
@@ -45,4 +46,4 @@ jobs:
|
||||
# Prefix the list here with "+" to use these queries and those in the config file.
|
||||
# queries: ./path/to/local/query, your-org/your-repo/queries@main
|
||||
- name: Perform CodeQL Analysis
|
||||
uses: github/codeql-action/analyze@v4.32.5
|
||||
uses: github/codeql-action/analyze@v4
|
||||
|
||||
9
.github/workflows/crowdin.yml
vendored
9
.github/workflows/crowdin.yml
vendored
@@ -6,18 +6,23 @@ on:
|
||||
push:
|
||||
paths: ['src/locale/**', 'src-ui/messages.xlf', 'src-ui/src/locale/**']
|
||||
branches: [dev]
|
||||
permissions: {}
|
||||
jobs:
|
||||
synchronize-with-crowdin:
|
||||
name: Crowdin Sync
|
||||
if: github.repository_owner == 'paperless-ngx'
|
||||
runs-on: ubuntu-24.04
|
||||
permissions:
|
||||
# Crowdin action pushes translation branches and creates/updates PRs via GITHUB_TOKEN
|
||||
contents: write
|
||||
pull-requests: write
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v6.0.2
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
token: ${{ secrets.PNGX_BOT_PAT }}
|
||||
- name: crowdin action
|
||||
uses: crowdin/github-action@v2.15.0
|
||||
uses: crowdin/github-action@v2
|
||||
with:
|
||||
upload_translations: false
|
||||
download_translations: true
|
||||
|
||||
16
.github/workflows/pr-bot.yml
vendored
16
.github/workflows/pr-bot.yml
vendored
@@ -2,17 +2,19 @@ name: PR Bot
|
||||
on:
|
||||
pull_request_target:
|
||||
types: [opened]
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: write
|
||||
permissions: {}
|
||||
jobs:
|
||||
pr-bot:
|
||||
name: Automated PR Bot
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
# labeler reads file paths; all steps add labels or post comments on PRs
|
||||
contents: read
|
||||
pull-requests: write
|
||||
steps:
|
||||
- name: Label PR by file path or branch name
|
||||
# see .github/labeler.yml for the labeler config
|
||||
uses: actions/labeler@v6.0.1
|
||||
uses: actions/labeler@v6
|
||||
with:
|
||||
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
- name: Label by size
|
||||
@@ -26,7 +28,7 @@ jobs:
|
||||
fail_if_xl: 'false'
|
||||
excluded_files: /\.lock$/ /\.txt$/ ^src-ui/pnpm-lock\.yaml$ ^src-ui/messages\.xlf$ ^src/locale/en_US/LC_MESSAGES/django\.po$
|
||||
- name: Label by PR title
|
||||
uses: actions/github-script@v8.0.0
|
||||
uses: actions/github-script@v8
|
||||
with:
|
||||
script: |
|
||||
const pr = context.payload.pull_request;
|
||||
@@ -52,7 +54,7 @@ jobs:
|
||||
}
|
||||
- name: Label bot-generated PRs
|
||||
if: ${{ contains(github.actor, 'dependabot') || contains(github.actor, 'crowdin-bot') }}
|
||||
uses: actions/github-script@v8.0.0
|
||||
uses: actions/github-script@v8
|
||||
with:
|
||||
script: |
|
||||
const pr = context.payload.pull_request;
|
||||
@@ -77,7 +79,7 @@ jobs:
|
||||
}
|
||||
- name: Welcome comment
|
||||
if: ${{ !contains(github.actor, 'bot') }}
|
||||
uses: actions/github-script@v8.0.0
|
||||
uses: actions/github-script@v8
|
||||
with:
|
||||
script: |
|
||||
const pr = context.payload.pull_request;
|
||||
|
||||
7
.github/workflows/project-actions.yml
vendored
7
.github/workflows/project-actions.yml
vendored
@@ -7,18 +7,19 @@ on:
|
||||
branches:
|
||||
- main
|
||||
- dev
|
||||
permissions:
|
||||
contents: read
|
||||
permissions: {}
|
||||
jobs:
|
||||
pr_opened_or_reopened:
|
||||
name: pr_opened_or_reopened
|
||||
runs-on: ubuntu-24.04
|
||||
permissions:
|
||||
# release-drafter reads its config file from the repo
|
||||
contents: read
|
||||
# write permission is required for autolabeler
|
||||
pull-requests: write
|
||||
if: github.event_name == 'pull_request_target' && (github.event.action == 'opened' || github.event.action == 'reopened') && github.event.pull_request.user.login != 'dependabot'
|
||||
steps:
|
||||
- name: Label PR with release-drafter
|
||||
uses: release-drafter/release-drafter@v6.2.0
|
||||
uses: release-drafter/release-drafter@v6
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
28
.github/workflows/repo-maintenance.yml
vendored
28
.github/workflows/repo-maintenance.yml
vendored
@@ -3,10 +3,7 @@ on:
|
||||
schedule:
|
||||
- cron: '0 3 * * *'
|
||||
workflow_dispatch:
|
||||
permissions:
|
||||
issues: write
|
||||
pull-requests: write
|
||||
discussions: write
|
||||
permissions: {}
|
||||
concurrency:
|
||||
group: lock
|
||||
jobs:
|
||||
@@ -14,8 +11,11 @@ jobs:
|
||||
name: 'Stale'
|
||||
if: github.repository_owner == 'paperless-ngx'
|
||||
runs-on: ubuntu-24.04
|
||||
permissions:
|
||||
issues: write
|
||||
pull-requests: write
|
||||
steps:
|
||||
- uses: actions/stale@v10.2.0
|
||||
- uses: actions/stale@v10
|
||||
with:
|
||||
days-before-stale: 7
|
||||
days-before-close: 14
|
||||
@@ -36,8 +36,12 @@ jobs:
|
||||
name: 'Lock Old Threads'
|
||||
if: github.repository_owner == 'paperless-ngx'
|
||||
runs-on: ubuntu-24.04
|
||||
permissions:
|
||||
issues: write
|
||||
pull-requests: write
|
||||
discussions: write
|
||||
steps:
|
||||
- uses: dessant/lock-threads@v6.0.0
|
||||
- uses: dessant/lock-threads@v6
|
||||
with:
|
||||
issue-inactive-days: '30'
|
||||
pr-inactive-days: '30'
|
||||
@@ -56,8 +60,10 @@ jobs:
|
||||
name: 'Close Answered Discussions'
|
||||
if: github.repository_owner == 'paperless-ngx'
|
||||
runs-on: ubuntu-24.04
|
||||
permissions:
|
||||
discussions: write
|
||||
steps:
|
||||
- uses: actions/github-script@v8.0.0
|
||||
- uses: actions/github-script@v8
|
||||
with:
|
||||
script: |
|
||||
function sleep(ms) {
|
||||
@@ -113,8 +119,10 @@ jobs:
|
||||
name: 'Close Outdated Discussions'
|
||||
if: github.repository_owner == 'paperless-ngx'
|
||||
runs-on: ubuntu-24.04
|
||||
permissions:
|
||||
discussions: write
|
||||
steps:
|
||||
- uses: actions/github-script@v8.0.0
|
||||
- uses: actions/github-script@v8
|
||||
with:
|
||||
script: |
|
||||
function sleep(ms) {
|
||||
@@ -205,8 +213,10 @@ jobs:
|
||||
name: 'Close Unsupported Feature Requests'
|
||||
if: github.repository_owner == 'paperless-ngx'
|
||||
runs-on: ubuntu-24.04
|
||||
permissions:
|
||||
discussions: write
|
||||
steps:
|
||||
- uses: actions/github-script@v8.0.0
|
||||
- uses: actions/github-script@v8
|
||||
with:
|
||||
script: |
|
||||
function sleep(ms) {
|
||||
|
||||
15
.github/workflows/translate-strings.yml
vendored
15
.github/workflows/translate-strings.yml
vendored
@@ -3,6 +3,7 @@ on:
|
||||
push:
|
||||
branches:
|
||||
- dev
|
||||
permissions: {}
|
||||
jobs:
|
||||
generate-translate-strings:
|
||||
name: Generate Translation Strings
|
||||
@@ -11,7 +12,7 @@ jobs:
|
||||
contents: write
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v6.0.2
|
||||
uses: actions/checkout@v6
|
||||
env:
|
||||
GH_REF: ${{ github.ref }} # sonar rule:githubactions:S7630 - avoid injection
|
||||
with:
|
||||
@@ -19,13 +20,13 @@ jobs:
|
||||
ref: ${{ env.GH_REF }}
|
||||
- name: Set up Python
|
||||
id: setup-python
|
||||
uses: actions/setup-python@v6.2.0
|
||||
uses: actions/setup-python@v6
|
||||
- name: Install system dependencies
|
||||
run: |
|
||||
sudo apt-get update -qq
|
||||
sudo apt-get install -qq --no-install-recommends gettext
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v7.3.1
|
||||
uses: astral-sh/setup-uv@v7
|
||||
with:
|
||||
enable-cache: true
|
||||
- name: Install backend python dependencies
|
||||
@@ -36,18 +37,18 @@ jobs:
|
||||
- name: Generate backend translation strings
|
||||
run: cd src/ && uv run manage.py makemessages -l en_US -i "samples*"
|
||||
- name: Install pnpm
|
||||
uses: pnpm/action-setup@v4.2.0
|
||||
uses: pnpm/action-setup@v4
|
||||
with:
|
||||
version: 10
|
||||
- name: Use Node.js 24
|
||||
uses: actions/setup-node@v6.2.0
|
||||
uses: actions/setup-node@v6
|
||||
with:
|
||||
node-version: 24.x
|
||||
cache: 'pnpm'
|
||||
cache-dependency-path: 'src-ui/pnpm-lock.yaml'
|
||||
- name: Cache frontend dependencies
|
||||
id: cache-frontend-deps
|
||||
uses: actions/cache@v5.0.3
|
||||
uses: actions/cache@v5
|
||||
with:
|
||||
path: |
|
||||
~/.pnpm-store
|
||||
@@ -63,7 +64,7 @@ jobs:
|
||||
cd src-ui
|
||||
pnpm run ng extract-i18n
|
||||
- name: Commit changes
|
||||
uses: stefanzweifel/git-auto-commit-action@v7.1.0
|
||||
uses: stefanzweifel/git-auto-commit-action@v7
|
||||
with:
|
||||
file_pattern: 'src-ui/messages.xlf src/locale/en_US/LC_MESSAGES/django.po'
|
||||
commit_message: "Auto translate strings"
|
||||
|
||||
@@ -45,7 +45,7 @@ ENV \
|
||||
ARG TARGETARCH
|
||||
ARG TARGETVARIANT
|
||||
# Lock this version
|
||||
ARG S6_OVERLAY_VERSION=3.2.2.0
|
||||
ARG S6_OVERLAY_VERSION=3.2.1.0
|
||||
|
||||
ARG S6_BUILD_TIME_PKGS="curl \
|
||||
xz-utils"
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
# correct networking for the tests
|
||||
services:
|
||||
gotenberg:
|
||||
image: docker.io/gotenberg/gotenberg:8.27
|
||||
image: docker.io/gotenberg/gotenberg:8.26
|
||||
hostname: gotenberg
|
||||
container_name: gotenberg
|
||||
network_mode: host
|
||||
|
||||
@@ -72,7 +72,7 @@ services:
|
||||
PAPERLESS_TIKA_GOTENBERG_ENDPOINT: http://gotenberg:3000
|
||||
PAPERLESS_TIKA_ENDPOINT: http://tika:9998
|
||||
gotenberg:
|
||||
image: docker.io/gotenberg/gotenberg:8.27
|
||||
image: docker.io/gotenberg/gotenberg:8.26
|
||||
restart: unless-stopped
|
||||
# The gotenberg chromium route is used to convert .eml files. We do not
|
||||
# want to allow external content like tracking pixels or even javascript.
|
||||
|
||||
@@ -66,7 +66,7 @@ services:
|
||||
PAPERLESS_TIKA_GOTENBERG_ENDPOINT: http://gotenberg:3000
|
||||
PAPERLESS_TIKA_ENDPOINT: http://tika:9998
|
||||
gotenberg:
|
||||
image: docker.io/gotenberg/gotenberg:8.27
|
||||
image: docker.io/gotenberg/gotenberg:8.26
|
||||
restart: unless-stopped
|
||||
# The gotenberg chromium route is used to convert .eml files. We do not
|
||||
# want to allow external content like tracking pixels or even javascript.
|
||||
|
||||
@@ -55,7 +55,7 @@ services:
|
||||
PAPERLESS_TIKA_GOTENBERG_ENDPOINT: http://gotenberg:3000
|
||||
PAPERLESS_TIKA_ENDPOINT: http://tika:9998
|
||||
gotenberg:
|
||||
image: docker.io/gotenberg/gotenberg:8.27
|
||||
image: docker.io/gotenberg/gotenberg:8.26
|
||||
restart: unless-stopped
|
||||
# The gotenberg chromium route is used to convert .eml files. We do not
|
||||
# want to allow external content like tracking pixels or even javascript.
|
||||
|
||||
@@ -262,10 +262,6 @@ your files differently, you can do that by adjusting the
|
||||
or using [storage paths (see below)](#storage-paths). Paperless adds the
|
||||
correct file extension e.g. `.pdf`, `.jpg` automatically.
|
||||
|
||||
When a document has file versions, each version uses the same naming rules and
|
||||
storage path resolution as any other document file, with an added version suffix
|
||||
such as `_v1`, `_v2`, etc.
|
||||
|
||||
This variable allows you to configure the filename (folders are allowed)
|
||||
using placeholders. For example, configuring this to
|
||||
|
||||
@@ -357,8 +353,6 @@ If paperless detects that two documents share the same filename,
|
||||
paperless will automatically append `_01`, `_02`, etc to the filename.
|
||||
This happens if all the placeholders in a filename evaluate to the same
|
||||
value.
|
||||
For versioned files, this counter is appended after the version suffix
|
||||
(for example `statement_v2_01.pdf`).
|
||||
|
||||
If there are any errors in the placeholders included in `PAPERLESS_FILENAME_FORMAT`,
|
||||
paperless will fall back to using the default naming scheme instead.
|
||||
|
||||
Binary file not shown.
|
Before Width: | Height: | Size: 57 KiB |
Binary file not shown.
|
Before Width: | Height: | Size: 61 KiB |
@@ -95,7 +95,6 @@ Think of versions as **file history** for a document.
|
||||
|
||||
- Versions track the underlying file and extracted text content (OCR/text).
|
||||
- Metadata such as tags, correspondent, document type, storage path and custom fields stay on the "root" document.
|
||||
- Version files follow normal filename formatting (including storage paths) and add a `_vN` suffix (for example `_v1`, `_v2`).
|
||||
- By default, search and document content use the latest version.
|
||||
- In document detail, selecting a version switches the preview, file metadata and content (and download etc buttons) to that version.
|
||||
- Deleting a non-root version keeps metadata and falls back to the latest remaining version.
|
||||
@@ -617,7 +616,7 @@ applied. You can use the following placeholders in the template with any trigger
|
||||
- `{{added_day}}`: added day
|
||||
- `{{added_time}}`: added time in HH:MM format
|
||||
- `{{original_filename}}`: original file name without extension
|
||||
- `{{filename}}`: current file name without extension (for "added" workflows this may not be final yet, you can use `{{original_filename}}`)
|
||||
- `{{filename}}`: current file name without extension
|
||||
- `{{doc_title}}`: current document title (cannot be used in title assignment)
|
||||
|
||||
The following placeholders are only available for "added" or "updated" triggers
|
||||
|
||||
@@ -11,7 +11,6 @@ import magic
|
||||
from django.conf import settings
|
||||
from django.contrib.auth.models import User
|
||||
from django.db import transaction
|
||||
from django.db.models import Max
|
||||
from django.db.models import Q
|
||||
from django.utils import timezone
|
||||
from filelock import FileLock
|
||||
@@ -124,6 +123,22 @@ class ConsumerPluginMixin:
|
||||
|
||||
self.filename = self.metadata.filename or self.input_doc.original_file.name
|
||||
|
||||
if input_doc.root_document_id:
|
||||
self.log.debug(
|
||||
f"Document root document id: {input_doc.root_document_id}",
|
||||
)
|
||||
root_document = Document.objects.get(pk=input_doc.root_document_id)
|
||||
version_index = Document.objects.filter(root_document=root_document).count()
|
||||
filename_path = Path(self.filename)
|
||||
if filename_path.suffix:
|
||||
self.filename = str(
|
||||
filename_path.with_name(
|
||||
f"{filename_path.stem}_v{version_index}{filename_path.suffix}",
|
||||
),
|
||||
)
|
||||
else:
|
||||
self.filename = f"{self.filename}_v{version_index}"
|
||||
|
||||
def _send_progress(
|
||||
self,
|
||||
current_progress: int,
|
||||
@@ -178,19 +193,9 @@ class ConsumerPlugin(
|
||||
mime_type: str,
|
||||
) -> Document:
|
||||
self.log.debug("Saving record for updated version to database")
|
||||
root_doc_frozen = Document.objects.select_for_update().get(pk=root_doc.pk)
|
||||
next_version_index = (
|
||||
Document.global_objects.filter(
|
||||
root_document_id=root_doc_frozen.pk,
|
||||
).aggregate(
|
||||
max_index=Max("version_index"),
|
||||
)["max_index"]
|
||||
or 0
|
||||
)
|
||||
version_doc = Document.objects.get(pk=root_doc_frozen.pk)
|
||||
version_doc = Document.objects.get(pk=root_doc.pk)
|
||||
setattr(version_doc, "pk", None)
|
||||
version_doc.root_document = root_doc_frozen
|
||||
version_doc.version_index = next_version_index + 1
|
||||
version_doc.root_document = root_doc
|
||||
file_for_checksum = (
|
||||
self.unmodified_original
|
||||
if self.unmodified_original is not None
|
||||
@@ -203,6 +208,7 @@ class ConsumerPlugin(
|
||||
version_doc.page_count = page_count
|
||||
version_doc.mime_type = mime_type
|
||||
version_doc.original_filename = self.filename
|
||||
version_doc.storage_path = root_doc.storage_path
|
||||
# Clear unique file path fields so they can be generated uniquely later
|
||||
version_doc.filename = None
|
||||
version_doc.archive_filename = None
|
||||
|
||||
@@ -128,18 +128,11 @@ def generate_filename(
|
||||
counter=0,
|
||||
archive_filename=False,
|
||||
) -> Path:
|
||||
# version docs use the root document for formatting, just with a suffix
|
||||
context_doc = doc if doc.root_document_id is None else doc.root_document
|
||||
version_suffix = (
|
||||
f"_v{doc.version_index}"
|
||||
if doc.root_document_id is not None and doc.version_index is not None
|
||||
else ""
|
||||
)
|
||||
base_path: Path | None = None
|
||||
|
||||
# Determine the source of the format string
|
||||
if context_doc.storage_path is not None:
|
||||
filename_format = context_doc.storage_path.path
|
||||
if doc.storage_path is not None:
|
||||
filename_format = doc.storage_path.path
|
||||
elif settings.FILENAME_FORMAT is not None:
|
||||
# Maybe convert old to new style
|
||||
filename_format = convert_format_str_to_template_format(
|
||||
@@ -150,7 +143,7 @@ def generate_filename(
|
||||
|
||||
# If we have one, render it
|
||||
if filename_format is not None:
|
||||
rendered_path: str | None = format_filename(context_doc, filename_format)
|
||||
rendered_path: str | None = format_filename(doc, filename_format)
|
||||
if rendered_path:
|
||||
base_path = Path(rendered_path)
|
||||
|
||||
@@ -164,7 +157,7 @@ def generate_filename(
|
||||
base_filename = base_path.name
|
||||
|
||||
# Build the final filename with counter and filetype
|
||||
final_filename = f"{base_filename}{version_suffix}{counter_str}{filetype_str}"
|
||||
final_filename = f"{base_filename}{counter_str}{filetype_str}"
|
||||
|
||||
# If we have a directory component, include it
|
||||
if str(directory) != ".":
|
||||
@@ -173,9 +166,7 @@ def generate_filename(
|
||||
full_path = Path(final_filename)
|
||||
else:
|
||||
# No template, use document ID
|
||||
final_filename = (
|
||||
f"{context_doc.pk:07}{version_suffix}{counter_str}{filetype_str}"
|
||||
)
|
||||
final_filename = f"{doc.pk:07}{counter_str}{filetype_str}"
|
||||
full_path = Path(final_filename)
|
||||
|
||||
return full_path
|
||||
|
||||
@@ -1,25 +1,22 @@
|
||||
from django.core.management import BaseCommand
|
||||
from django.db import transaction
|
||||
|
||||
from documents.management.commands.base import PaperlessCommand
|
||||
from documents.management.commands.mixins import ProgressBarMixin
|
||||
from documents.tasks import index_optimize
|
||||
from documents.tasks import index_reindex
|
||||
|
||||
|
||||
class Command(PaperlessCommand):
|
||||
class Command(ProgressBarMixin, BaseCommand):
|
||||
help = "Manages the document index."
|
||||
|
||||
def add_arguments(self, parser):
|
||||
super().add_arguments(parser)
|
||||
parser.add_argument("command", choices=["reindex", "optimize"])
|
||||
self.add_argument_progress_bar_mixin(parser)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
self.handle_progress_bar_mixin(**options)
|
||||
with transaction.atomic():
|
||||
if options["command"] == "reindex":
|
||||
index_reindex(
|
||||
iter_wrapper=lambda docs: self.track(
|
||||
docs,
|
||||
description="Indexing documents...",
|
||||
),
|
||||
)
|
||||
index_reindex(progress_bar_disable=self.no_progress_bar)
|
||||
elif options["command"] == "optimize":
|
||||
index_optimize()
|
||||
|
||||
@@ -1,22 +1,22 @@
|
||||
from typing import Any
|
||||
from django.core.management import BaseCommand
|
||||
from django.db import transaction
|
||||
|
||||
from documents.management.commands.base import PaperlessCommand
|
||||
from documents.management.commands.mixins import ProgressBarMixin
|
||||
from documents.tasks import llmindex_index
|
||||
|
||||
|
||||
class Command(PaperlessCommand):
|
||||
class Command(ProgressBarMixin, BaseCommand):
|
||||
help = "Manages the LLM-based vector index for Paperless."
|
||||
|
||||
def add_arguments(self, parser: Any) -> None:
|
||||
super().add_arguments(parser)
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument("command", choices=["rebuild", "update"])
|
||||
self.add_argument_progress_bar_mixin(parser)
|
||||
|
||||
def handle(self, *args: Any, **options: Any) -> None:
|
||||
llmindex_index(
|
||||
rebuild=options["command"] == "rebuild",
|
||||
scheduled=False,
|
||||
iter_wrapper=lambda docs: self.track(
|
||||
docs,
|
||||
description="Indexing documents...",
|
||||
),
|
||||
)
|
||||
def handle(self, *args, **options):
|
||||
self.handle_progress_bar_mixin(**options)
|
||||
with transaction.atomic():
|
||||
llmindex_index(
|
||||
progress_bar_disable=self.no_progress_bar,
|
||||
rebuild=options["command"] == "rebuild",
|
||||
scheduled=False,
|
||||
)
|
||||
|
||||
@@ -1,117 +1,17 @@
|
||||
"""Management command to check the document archive for issues."""
|
||||
from django.core.management.base import BaseCommand
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
from rich.panel import Panel
|
||||
from rich.table import Table
|
||||
from rich.text import Text
|
||||
|
||||
from documents.management.commands.base import PaperlessCommand
|
||||
from documents.models import Document
|
||||
from documents.sanity_checker import SanityCheckMessages
|
||||
from documents.management.commands.mixins import ProgressBarMixin
|
||||
from documents.sanity_checker import check_sanity
|
||||
|
||||
_LEVEL_STYLE: dict[int, tuple[str, str]] = {
|
||||
logging.ERROR: ("bold red", "ERROR"),
|
||||
logging.WARNING: ("yellow", "WARN"),
|
||||
logging.INFO: ("dim", "INFO"),
|
||||
}
|
||||
|
||||
|
||||
class Command(PaperlessCommand):
|
||||
class Command(ProgressBarMixin, BaseCommand):
|
||||
help = "This command checks your document archive for issues."
|
||||
|
||||
def _render_results(self, messages: SanityCheckMessages) -> None:
|
||||
"""Render sanity check results as a Rich table."""
|
||||
def add_arguments(self, parser):
|
||||
self.add_argument_progress_bar_mixin(parser)
|
||||
|
||||
if (
|
||||
not messages.has_error
|
||||
and not messages.has_warning
|
||||
and not messages.has_info
|
||||
):
|
||||
self.console.print(
|
||||
Panel(
|
||||
"[green]No issues detected.[/green]",
|
||||
title="Sanity Check",
|
||||
border_style="green",
|
||||
),
|
||||
)
|
||||
return
|
||||
def handle(self, *args, **options):
|
||||
self.handle_progress_bar_mixin(**options)
|
||||
messages = check_sanity(progress=self.use_progress_bar, scheduled=False)
|
||||
|
||||
# Build a lookup for document titles
|
||||
doc_pks = [pk for pk in messages.document_pks() if pk is not None]
|
||||
titles: dict[int, str] = {}
|
||||
if doc_pks:
|
||||
titles = dict(
|
||||
Document.global_objects.filter(pk__in=doc_pks)
|
||||
.only("pk", "title")
|
||||
.values_list("pk", "title"),
|
||||
)
|
||||
|
||||
table = Table(
|
||||
title="Sanity Check Results",
|
||||
show_lines=True,
|
||||
title_style="bold",
|
||||
)
|
||||
table.add_column("Level", width=7, no_wrap=True)
|
||||
table.add_column("Document", min_width=20)
|
||||
table.add_column("Issue", ratio=1)
|
||||
|
||||
for doc_pk, doc_messages in messages.iter_messages():
|
||||
if doc_pk is not None:
|
||||
title = titles.get(doc_pk, "Unknown")
|
||||
doc_label = f"#{doc_pk} {title}"
|
||||
else:
|
||||
doc_label = "(global)"
|
||||
|
||||
for msg in doc_messages:
|
||||
style, label = _LEVEL_STYLE.get(
|
||||
msg["level"],
|
||||
("dim", "INFO"),
|
||||
)
|
||||
table.add_row(
|
||||
Text(label, style=style),
|
||||
Text(doc_label),
|
||||
Text(str(msg["message"])),
|
||||
)
|
||||
|
||||
self.console.print(table)
|
||||
|
||||
parts: list[str] = []
|
||||
|
||||
if messages.document_error_count:
|
||||
parts.append(
|
||||
f"{messages.document_error_count} document(s) with [bold red]errors[/bold red]",
|
||||
)
|
||||
if messages.document_warning_count:
|
||||
parts.append(
|
||||
f"{messages.document_warning_count} document(s) with [yellow]warnings[/yellow]",
|
||||
)
|
||||
if messages.document_info_count:
|
||||
parts.append(f"{messages.document_info_count} document(s) with infos")
|
||||
if messages.global_warning_count:
|
||||
parts.append(
|
||||
f"{messages.global_warning_count} global [yellow]warning(s)[/yellow]",
|
||||
)
|
||||
|
||||
if parts:
|
||||
if len(parts) > 1:
|
||||
summary = ", ".join(parts[:-1]) + " and " + parts[-1]
|
||||
else:
|
||||
summary = parts[0]
|
||||
self.console.print(f"\nFound {summary}.")
|
||||
else:
|
||||
self.console.print("\nNo issues found.")
|
||||
|
||||
def handle(self, *args: Any, **options: Any) -> None:
|
||||
messages = check_sanity(
|
||||
scheduled=False,
|
||||
iter_wrapper=lambda docs: self.track(
|
||||
docs,
|
||||
description="Checking documents...",
|
||||
),
|
||||
)
|
||||
self._render_results(messages)
|
||||
messages.log_messages()
|
||||
|
||||
@@ -1,37 +0,0 @@
|
||||
# Generated by Django 5.2.11 on 2026-03-02 17:48
|
||||
|
||||
from django.conf import settings
|
||||
from django.db import migrations
|
||||
from django.db import models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
("documents", "0013_document_root_document"),
|
||||
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name="document",
|
||||
name="version_index",
|
||||
field=models.PositiveIntegerField(
|
||||
blank=True,
|
||||
db_index=True,
|
||||
help_text="Index of this version within the root document.",
|
||||
null=True,
|
||||
verbose_name="version index",
|
||||
),
|
||||
),
|
||||
migrations.AddConstraint(
|
||||
model_name="document",
|
||||
constraint=models.UniqueConstraint(
|
||||
condition=models.Q(
|
||||
("root_document__isnull", False),
|
||||
("version_index__isnull", False),
|
||||
),
|
||||
fields=("root_document", "version_index"),
|
||||
name="documents_document_root_version_index_uniq",
|
||||
),
|
||||
),
|
||||
]
|
||||
@@ -317,14 +317,6 @@ class Document(SoftDeleteModel, ModelWithOwner): # type: ignore[django-manager-
|
||||
verbose_name=_("root document for this version"),
|
||||
)
|
||||
|
||||
version_index = models.PositiveIntegerField(
|
||||
_("version index"),
|
||||
blank=True,
|
||||
null=True,
|
||||
db_index=True,
|
||||
help_text=_("Index of this version within the root document."),
|
||||
)
|
||||
|
||||
version_label = models.CharField(
|
||||
_("version label"),
|
||||
max_length=64,
|
||||
@@ -337,16 +329,6 @@ class Document(SoftDeleteModel, ModelWithOwner): # type: ignore[django-manager-
|
||||
ordering = ("-created",)
|
||||
verbose_name = _("document")
|
||||
verbose_name_plural = _("documents")
|
||||
constraints = [
|
||||
models.UniqueConstraint(
|
||||
fields=["root_document", "version_index"],
|
||||
condition=models.Q(
|
||||
root_document__isnull=False,
|
||||
version_index__isnull=False,
|
||||
),
|
||||
name="documents_document_root_version_index_uniq",
|
||||
),
|
||||
]
|
||||
|
||||
def __str__(self) -> str:
|
||||
created = self.created.isoformat()
|
||||
|
||||
@@ -1,174 +1,80 @@
|
||||
"""
|
||||
Sanity checker for the Paperless-ngx document archive.
|
||||
|
||||
Verifies that all documents have valid files, correct checksums,
|
||||
and consistent metadata. Reports orphaned files in the media directory.
|
||||
|
||||
Progress display is the caller's responsibility -- pass an ``iter_wrapper``
|
||||
to wrap the document queryset (e.g., with a progress bar). The default
|
||||
is an identity function that adds no overhead.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import logging
|
||||
import uuid
|
||||
from collections import defaultdict
|
||||
from collections.abc import Callable
|
||||
from collections.abc import Iterable
|
||||
from collections.abc import Iterator
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
from typing import Final
|
||||
from typing import TypedDict
|
||||
from typing import TypeVar
|
||||
|
||||
from celery import states
|
||||
from django.conf import settings
|
||||
from django.utils import timezone
|
||||
from tqdm import tqdm
|
||||
|
||||
from documents.models import Document
|
||||
from documents.models import PaperlessTask
|
||||
from paperless.config import GeneralConfig
|
||||
|
||||
logger = logging.getLogger("paperless.sanity_checker")
|
||||
|
||||
_T = TypeVar("_T")
|
||||
IterWrapper = Callable[[Iterable[_T]], Iterable[_T]]
|
||||
|
||||
|
||||
class MessageEntry(TypedDict):
|
||||
"""A single sanity check message with its severity level."""
|
||||
|
||||
level: int
|
||||
message: str
|
||||
|
||||
|
||||
def _identity(iterable: Iterable[_T]) -> Iterable[_T]:
|
||||
"""Pass through an iterable unchanged (default iter_wrapper)."""
|
||||
return iterable
|
||||
|
||||
|
||||
class SanityCheckMessages:
|
||||
"""Collects sanity check messages grouped by document primary key.
|
||||
|
||||
Messages are categorized as error, warning, or info. ``None`` is used
|
||||
as the key for messages not associated with a specific document
|
||||
(e.g., orphaned files).
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._messages: dict[int | None, list[MessageEntry]] = defaultdict(list)
|
||||
self.has_error: bool = False
|
||||
self.has_warning: bool = False
|
||||
self.has_info: bool = False
|
||||
self.document_count: int = 0
|
||||
self.document_error_count: int = 0
|
||||
self.document_warning_count: int = 0
|
||||
self.document_info_count: int = 0
|
||||
self.global_warning_count: int = 0
|
||||
self._messages: dict[int, list[dict]] = defaultdict(list)
|
||||
self.has_error = False
|
||||
self.has_warning = False
|
||||
|
||||
# -- Recording ----------------------------------------------------------
|
||||
|
||||
def error(self, doc_pk: int | None, message: str) -> None:
|
||||
def error(self, doc_pk, message) -> None:
|
||||
self._messages[doc_pk].append({"level": logging.ERROR, "message": message})
|
||||
self.has_error = True
|
||||
if doc_pk is not None:
|
||||
self.document_count += 1
|
||||
self.document_error_count += 1
|
||||
|
||||
def warning(self, doc_pk: int | None, message: str) -> None:
|
||||
def warning(self, doc_pk, message) -> None:
|
||||
self._messages[doc_pk].append({"level": logging.WARNING, "message": message})
|
||||
self.has_warning = True
|
||||
|
||||
if doc_pk is not None:
|
||||
self.document_count += 1
|
||||
self.document_warning_count += 1
|
||||
else:
|
||||
# This is the only type of global message we do right now
|
||||
self.global_warning_count += 1
|
||||
|
||||
def info(self, doc_pk: int | None, message: str) -> None:
|
||||
def info(self, doc_pk, message) -> None:
|
||||
self._messages[doc_pk].append({"level": logging.INFO, "message": message})
|
||||
self.has_info = True
|
||||
|
||||
if doc_pk is not None:
|
||||
self.document_count += 1
|
||||
self.document_info_count += 1
|
||||
|
||||
# -- Iteration / query --------------------------------------------------
|
||||
|
||||
def document_pks(self) -> list[int | None]:
|
||||
"""Return all document PKs (including None for global messages)."""
|
||||
return list(self._messages.keys())
|
||||
|
||||
def iter_messages(self) -> Iterator[tuple[int | None, list[MessageEntry]]]:
|
||||
"""Iterate over (doc_pk, messages) pairs."""
|
||||
yield from self._messages.items()
|
||||
|
||||
def __getitem__(self, item: int | None) -> list[MessageEntry]:
|
||||
return self._messages[item]
|
||||
|
||||
# -- Summarize Helpers --------------------------------------------------
|
||||
|
||||
@property
|
||||
def has_global_issues(self) -> bool:
|
||||
return None in self._messages
|
||||
|
||||
@property
|
||||
def total_issue_count(self) -> int:
|
||||
"""Total number of error and warning messages across all documents and global."""
|
||||
return (
|
||||
self.document_error_count
|
||||
+ self.document_warning_count
|
||||
+ self.global_warning_count
|
||||
)
|
||||
|
||||
# -- Logging output (used by Celery task path) --------------------------
|
||||
|
||||
def log_messages(self) -> None:
|
||||
"""Write all messages to the ``paperless.sanity_checker`` logger.
|
||||
logger = logging.getLogger("paperless.sanity_checker")
|
||||
|
||||
This is the output path for headless / Celery execution.
|
||||
Management commands use Rich rendering instead.
|
||||
"""
|
||||
if len(self._messages) == 0:
|
||||
logger.info("Sanity checker detected no issues.")
|
||||
return
|
||||
else:
|
||||
# Query once
|
||||
all_docs = Document.global_objects.all()
|
||||
|
||||
doc_pks = [pk for pk in self._messages if pk is not None]
|
||||
titles: dict[int, str] = {}
|
||||
if doc_pks:
|
||||
titles = dict(
|
||||
Document.global_objects.filter(pk__in=doc_pks)
|
||||
.only("pk", "title")
|
||||
.values_list("pk", "title"),
|
||||
)
|
||||
for doc_pk in self._messages:
|
||||
if doc_pk is not None:
|
||||
doc = all_docs.get(pk=doc_pk)
|
||||
logger.info(
|
||||
f"Detected following issue(s) with document #{doc.pk},"
|
||||
f" titled {doc.title}",
|
||||
)
|
||||
for msg in self._messages[doc_pk]:
|
||||
logger.log(msg["level"], msg["message"])
|
||||
|
||||
for doc_pk, entries in self._messages.items():
|
||||
if doc_pk is not None:
|
||||
title = titles.get(doc_pk, "Unknown")
|
||||
logger.info(
|
||||
"Detected following issue(s) with document #%s, titled %s",
|
||||
doc_pk,
|
||||
title,
|
||||
)
|
||||
for msg in entries:
|
||||
logger.log(msg["level"], msg["message"])
|
||||
def __len__(self):
|
||||
return len(self._messages)
|
||||
|
||||
def __getitem__(self, item):
|
||||
return self._messages[item]
|
||||
|
||||
|
||||
class SanityCheckFailedException(Exception):
|
||||
pass
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Internal helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
def check_sanity(*, progress=False, scheduled=True) -> SanityCheckMessages:
|
||||
paperless_task = PaperlessTask.objects.create(
|
||||
task_id=uuid.uuid4(),
|
||||
type=PaperlessTask.TaskType.SCHEDULED_TASK
|
||||
if scheduled
|
||||
else PaperlessTask.TaskType.MANUAL_TASK,
|
||||
task_name=PaperlessTask.TaskName.CHECK_SANITY,
|
||||
status=states.STARTED,
|
||||
date_created=timezone.now(),
|
||||
date_started=timezone.now(),
|
||||
)
|
||||
messages = SanityCheckMessages()
|
||||
|
||||
|
||||
def _build_present_files() -> set[Path]:
|
||||
"""Collect all files in MEDIA_ROOT, excluding directories and ignorable files."""
|
||||
present_files = {
|
||||
x.resolve()
|
||||
for x in Path(settings.MEDIA_ROOT).glob("**/*")
|
||||
@@ -176,178 +82,95 @@ def _build_present_files() -> set[Path]:
|
||||
}
|
||||
|
||||
lockfile = Path(settings.MEDIA_LOCK).resolve()
|
||||
present_files.discard(lockfile)
|
||||
if lockfile in present_files:
|
||||
present_files.remove(lockfile)
|
||||
|
||||
general_config = GeneralConfig()
|
||||
app_logo = general_config.app_logo or settings.APP_LOGO
|
||||
if app_logo:
|
||||
logo_file = Path(settings.MEDIA_ROOT / Path(app_logo.lstrip("/"))).resolve()
|
||||
present_files.discard(logo_file)
|
||||
if logo_file in present_files:
|
||||
present_files.remove(logo_file)
|
||||
|
||||
return present_files
|
||||
|
||||
|
||||
def _check_thumbnail(
|
||||
doc: Document,
|
||||
messages: SanityCheckMessages,
|
||||
present_files: set[Path],
|
||||
) -> None:
|
||||
"""Verify the thumbnail exists and is readable."""
|
||||
thumbnail_path: Final[Path] = Path(doc.thumbnail_path).resolve()
|
||||
if not thumbnail_path.exists() or not thumbnail_path.is_file():
|
||||
messages.error(doc.pk, "Thumbnail of document does not exist.")
|
||||
return
|
||||
|
||||
present_files.discard(thumbnail_path)
|
||||
try:
|
||||
_ = thumbnail_path.read_bytes()
|
||||
except OSError as e:
|
||||
messages.error(doc.pk, f"Cannot read thumbnail file of document: {e}")
|
||||
|
||||
|
||||
def _check_original(
|
||||
doc: Document,
|
||||
messages: SanityCheckMessages,
|
||||
present_files: set[Path],
|
||||
) -> None:
|
||||
"""Verify the original file exists, is readable, and has matching checksum."""
|
||||
source_path: Final[Path] = Path(doc.source_path).resolve()
|
||||
if not source_path.exists() or not source_path.is_file():
|
||||
messages.error(doc.pk, "Original of document does not exist.")
|
||||
return
|
||||
|
||||
present_files.discard(source_path)
|
||||
try:
|
||||
checksum = hashlib.md5(source_path.read_bytes()).hexdigest()
|
||||
except OSError as e:
|
||||
messages.error(doc.pk, f"Cannot read original file of document: {e}")
|
||||
else:
|
||||
if checksum != doc.checksum:
|
||||
messages.error(
|
||||
doc.pk,
|
||||
f"Checksum mismatch. Stored: {doc.checksum}, actual: {checksum}.",
|
||||
)
|
||||
|
||||
|
||||
def _check_archive(
|
||||
doc: Document,
|
||||
messages: SanityCheckMessages,
|
||||
present_files: set[Path],
|
||||
) -> None:
|
||||
"""Verify archive file consistency: checksum/filename pairing and file integrity."""
|
||||
if doc.archive_checksum is not None and doc.archive_filename is None:
|
||||
messages.error(
|
||||
doc.pk,
|
||||
"Document has an archive file checksum, but no archive filename.",
|
||||
)
|
||||
elif doc.archive_checksum is None and doc.archive_filename is not None:
|
||||
messages.error(
|
||||
doc.pk,
|
||||
"Document has an archive file, but its checksum is missing.",
|
||||
)
|
||||
elif doc.has_archive_version:
|
||||
if TYPE_CHECKING:
|
||||
assert isinstance(doc.archive_path, Path)
|
||||
archive_path: Final[Path] = Path(doc.archive_path).resolve()
|
||||
if not archive_path.exists() or not archive_path.is_file():
|
||||
messages.error(doc.pk, "Archived version of document does not exist.")
|
||||
return
|
||||
|
||||
present_files.discard(archive_path)
|
||||
try:
|
||||
checksum = hashlib.md5(archive_path.read_bytes()).hexdigest()
|
||||
except OSError as e:
|
||||
messages.error(
|
||||
doc.pk,
|
||||
f"Cannot read archive file of document: {e}",
|
||||
)
|
||||
for doc in tqdm(Document.global_objects.all(), disable=not progress):
|
||||
# Check sanity of the thumbnail
|
||||
thumbnail_path: Final[Path] = Path(doc.thumbnail_path).resolve()
|
||||
if not thumbnail_path.exists() or not thumbnail_path.is_file():
|
||||
messages.error(doc.pk, "Thumbnail of document does not exist.")
|
||||
else:
|
||||
if checksum != doc.archive_checksum:
|
||||
messages.error(
|
||||
doc.pk,
|
||||
"Checksum mismatch of archived document. "
|
||||
f"Stored: {doc.archive_checksum}, actual: {checksum}.",
|
||||
)
|
||||
if thumbnail_path in present_files:
|
||||
present_files.remove(thumbnail_path)
|
||||
try:
|
||||
_ = thumbnail_path.read_bytes()
|
||||
except OSError as e:
|
||||
messages.error(doc.pk, f"Cannot read thumbnail file of document: {e}")
|
||||
|
||||
# Check sanity of the original file
|
||||
# TODO: extract method
|
||||
source_path: Final[Path] = Path(doc.source_path).resolve()
|
||||
if not source_path.exists() or not source_path.is_file():
|
||||
messages.error(doc.pk, "Original of document does not exist.")
|
||||
else:
|
||||
if source_path in present_files:
|
||||
present_files.remove(source_path)
|
||||
try:
|
||||
checksum = hashlib.md5(source_path.read_bytes()).hexdigest()
|
||||
except OSError as e:
|
||||
messages.error(doc.pk, f"Cannot read original file of document: {e}")
|
||||
else:
|
||||
if checksum != doc.checksum:
|
||||
messages.error(
|
||||
doc.pk,
|
||||
"Checksum mismatch. "
|
||||
f"Stored: {doc.checksum}, actual: {checksum}.",
|
||||
)
|
||||
|
||||
def _check_content(doc: Document, messages: SanityCheckMessages) -> None:
|
||||
"""Flag documents with no OCR content."""
|
||||
if not doc.content:
|
||||
messages.info(doc.pk, "Document contains no OCR data")
|
||||
# Check sanity of the archive file.
|
||||
if doc.archive_checksum is not None and doc.archive_filename is None:
|
||||
messages.error(
|
||||
doc.pk,
|
||||
"Document has an archive file checksum, but no archive filename.",
|
||||
)
|
||||
elif doc.archive_checksum is None and doc.archive_filename is not None:
|
||||
messages.error(
|
||||
doc.pk,
|
||||
"Document has an archive file, but its checksum is missing.",
|
||||
)
|
||||
elif doc.has_archive_version:
|
||||
archive_path: Final[Path] = Path(doc.archive_path).resolve()
|
||||
if not archive_path.exists() or not archive_path.is_file():
|
||||
messages.error(doc.pk, "Archived version of document does not exist.")
|
||||
else:
|
||||
if archive_path in present_files:
|
||||
present_files.remove(archive_path)
|
||||
try:
|
||||
checksum = hashlib.md5(archive_path.read_bytes()).hexdigest()
|
||||
except OSError as e:
|
||||
messages.error(
|
||||
doc.pk,
|
||||
f"Cannot read archive file of document : {e}",
|
||||
)
|
||||
else:
|
||||
if checksum != doc.archive_checksum:
|
||||
messages.error(
|
||||
doc.pk,
|
||||
"Checksum mismatch of archived document. "
|
||||
f"Stored: {doc.archive_checksum}, "
|
||||
f"actual: {checksum}.",
|
||||
)
|
||||
|
||||
|
||||
def _check_document(
|
||||
doc: Document,
|
||||
messages: SanityCheckMessages,
|
||||
present_files: set[Path],
|
||||
) -> None:
|
||||
"""Run all checks for a single document."""
|
||||
_check_thumbnail(doc, messages, present_files)
|
||||
_check_original(doc, messages, present_files)
|
||||
_check_archive(doc, messages, present_files)
|
||||
_check_content(doc, messages)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public entry point
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def check_sanity(
|
||||
*,
|
||||
scheduled: bool = True,
|
||||
iter_wrapper: IterWrapper[Document] = _identity,
|
||||
) -> SanityCheckMessages:
|
||||
"""Run a full sanity check on the document archive.
|
||||
|
||||
Args:
|
||||
scheduled: Whether this is a scheduled (automatic) or manual check.
|
||||
Controls the task type recorded in the database.
|
||||
iter_wrapper: A callable that wraps the document iterable, e.g.,
|
||||
for progress bar display. Defaults to identity (no wrapping).
|
||||
|
||||
Returns:
|
||||
A SanityCheckMessages instance containing all detected issues.
|
||||
"""
|
||||
paperless_task = PaperlessTask.objects.create(
|
||||
task_id=uuid.uuid4(),
|
||||
type=(
|
||||
PaperlessTask.TaskType.SCHEDULED_TASK
|
||||
if scheduled
|
||||
else PaperlessTask.TaskType.MANUAL_TASK
|
||||
),
|
||||
task_name=PaperlessTask.TaskName.CHECK_SANITY,
|
||||
status=states.STARTED,
|
||||
date_created=timezone.now(),
|
||||
date_started=timezone.now(),
|
||||
)
|
||||
|
||||
messages = SanityCheckMessages()
|
||||
present_files = _build_present_files()
|
||||
|
||||
documents = Document.global_objects.all()
|
||||
for doc in iter_wrapper(documents):
|
||||
_check_document(doc, messages, present_files)
|
||||
# other document checks
|
||||
if not doc.content:
|
||||
messages.info(doc.pk, "Document contains no OCR data")
|
||||
|
||||
for extra_file in present_files:
|
||||
messages.warning(None, f"Orphaned file in media dir: {extra_file}")
|
||||
|
||||
paperless_task.status = states.SUCCESS if not messages.has_error else states.FAILURE
|
||||
if messages.total_issue_count == 0:
|
||||
paperless_task.result = "No issues found."
|
||||
else:
|
||||
parts: list[str] = []
|
||||
if messages.document_error_count:
|
||||
parts.append(f"{messages.document_error_count} document(s) with errors")
|
||||
if messages.document_warning_count:
|
||||
parts.append(f"{messages.document_warning_count} document(s) with warnings")
|
||||
if messages.global_warning_count:
|
||||
parts.append(f"{messages.global_warning_count} global warning(s)")
|
||||
paperless_task.result = ", ".join(parts) + " found."
|
||||
if messages.has_error:
|
||||
paperless_task.result += " Check logs for details."
|
||||
|
||||
# result is concatenated messages
|
||||
paperless_task.result = f"{len(messages)} issues found."
|
||||
if messages.has_error:
|
||||
paperless_task.result += " Check logs for details."
|
||||
paperless_task.date_done = timezone.now()
|
||||
paperless_task.save(update_fields=["status", "result", "date_done"])
|
||||
|
||||
return messages
|
||||
|
||||
@@ -596,16 +596,6 @@ def update_filename_and_move_files(
|
||||
root=settings.ARCHIVE_DIR,
|
||||
)
|
||||
|
||||
# Keep version files in sync with root
|
||||
if instance.root_document_id is None:
|
||||
for version_doc in Document.objects.filter(root_document_id=instance.pk).only(
|
||||
"pk",
|
||||
):
|
||||
update_filename_and_move_files(
|
||||
Document,
|
||||
version_doc,
|
||||
)
|
||||
|
||||
|
||||
@shared_task
|
||||
def process_cf_select_update(custom_field: CustomField) -> None:
|
||||
|
||||
@@ -4,13 +4,11 @@ import logging
|
||||
import shutil
|
||||
import uuid
|
||||
import zipfile
|
||||
from collections.abc import Callable
|
||||
from collections.abc import Iterable
|
||||
from pathlib import Path
|
||||
from tempfile import TemporaryDirectory
|
||||
from tempfile import mkstemp
|
||||
from typing import TypeVar
|
||||
|
||||
import tqdm
|
||||
from celery import Task
|
||||
from celery import shared_task
|
||||
from celery import states
|
||||
@@ -68,19 +66,11 @@ from paperless_ai.indexing import llm_index_add_or_update_document
|
||||
from paperless_ai.indexing import llm_index_remove_document
|
||||
from paperless_ai.indexing import update_llm_index
|
||||
|
||||
_T = TypeVar("_T")
|
||||
IterWrapper = Callable[[Iterable[_T]], Iterable[_T]]
|
||||
|
||||
|
||||
if settings.AUDIT_LOG_ENABLED:
|
||||
from auditlog.models import LogEntry
|
||||
logger = logging.getLogger("paperless.tasks")
|
||||
|
||||
|
||||
def _identity(iterable: Iterable[_T]) -> Iterable[_T]:
|
||||
return iterable
|
||||
|
||||
|
||||
@shared_task
|
||||
def index_optimize() -> None:
|
||||
ix = index.open_index()
|
||||
@@ -88,13 +78,13 @@ def index_optimize() -> None:
|
||||
writer.commit(optimize=True)
|
||||
|
||||
|
||||
def index_reindex(*, iter_wrapper: IterWrapper[Document] = _identity) -> None:
|
||||
def index_reindex(*, progress_bar_disable=False) -> None:
|
||||
documents = Document.objects.all()
|
||||
|
||||
ix = index.open_index(recreate=True)
|
||||
|
||||
with AsyncWriter(ix) as writer:
|
||||
for document in iter_wrapper(documents):
|
||||
for document in tqdm.tqdm(documents, disable=progress_bar_disable):
|
||||
index.update_document(writer, document)
|
||||
|
||||
|
||||
@@ -237,30 +227,20 @@ def consume_file(
|
||||
@shared_task
|
||||
def sanity_check(*, scheduled=True, raise_on_error=True):
|
||||
messages = sanity_checker.check_sanity(scheduled=scheduled)
|
||||
|
||||
messages.log_messages()
|
||||
|
||||
if not messages.has_error and not messages.has_warning and not messages.has_info:
|
||||
return "No issues detected."
|
||||
|
||||
parts: list[str] = []
|
||||
if messages.document_error_count:
|
||||
parts.append(f"{messages.document_error_count} document(s) with errors")
|
||||
if messages.document_warning_count:
|
||||
parts.append(f"{messages.document_warning_count} document(s) with warnings")
|
||||
if messages.document_info_count:
|
||||
parts.append(f"{messages.document_info_count} document(s) with infos")
|
||||
if messages.global_warning_count:
|
||||
parts.append(f"{messages.global_warning_count} global warning(s)")
|
||||
|
||||
summary = ", ".join(parts) + " found."
|
||||
|
||||
if messages.has_error:
|
||||
message = summary + " Check logs for details."
|
||||
message = "Sanity check exited with errors. See log."
|
||||
if raise_on_error:
|
||||
raise SanityCheckFailedException(message)
|
||||
return message
|
||||
|
||||
return summary
|
||||
elif messages.has_warning:
|
||||
return "Sanity check exited with warnings. See log."
|
||||
elif len(messages) > 0:
|
||||
return "Sanity check exited with infos. See log."
|
||||
else:
|
||||
return "No issues detected."
|
||||
|
||||
|
||||
@shared_task
|
||||
@@ -285,6 +265,7 @@ def bulk_update_documents(document_ids) -> None:
|
||||
ai_config = AIConfig()
|
||||
if ai_config.llm_index_enabled:
|
||||
update_llm_index(
|
||||
progress_bar_disable=True,
|
||||
rebuild=False,
|
||||
)
|
||||
|
||||
@@ -625,7 +606,7 @@ def update_document_parent_tags(tag: Tag, new_parent: Tag) -> None:
|
||||
@shared_task
|
||||
def llmindex_index(
|
||||
*,
|
||||
iter_wrapper: IterWrapper[Document] = _identity,
|
||||
progress_bar_disable=True,
|
||||
rebuild=False,
|
||||
scheduled=True,
|
||||
auto=False,
|
||||
@@ -648,7 +629,7 @@ def llmindex_index(
|
||||
|
||||
try:
|
||||
result = update_llm_index(
|
||||
iter_wrapper=iter_wrapper,
|
||||
progress_bar_disable=progress_bar_disable,
|
||||
rebuild=rebuild,
|
||||
)
|
||||
task.status = states.SUCCESS
|
||||
|
||||
@@ -1,96 +1,10 @@
|
||||
import shutil
|
||||
import zoneinfo
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import filelock
|
||||
import pytest
|
||||
from django.contrib.auth import get_user_model
|
||||
from pytest_django.fixtures import SettingsWrapper
|
||||
from rest_framework.test import APIClient
|
||||
|
||||
from documents.tests.factories import DocumentFactory
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from documents.models import Document
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class PaperlessDirs:
|
||||
"""Standard Paperless-ngx directory layout for tests."""
|
||||
|
||||
media: Path
|
||||
originals: Path
|
||||
archive: Path
|
||||
thumbnails: Path
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def samples_dir() -> Path:
|
||||
"""Path to the shared test sample documents."""
|
||||
return Path(__file__).parent / "samples" / "documents"
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def paperless_dirs(tmp_path: Path) -> PaperlessDirs:
|
||||
"""Create and return the directory structure for testing."""
|
||||
media = tmp_path / "media"
|
||||
dirs = PaperlessDirs(
|
||||
media=media,
|
||||
originals=media / "documents" / "originals",
|
||||
archive=media / "documents" / "archive",
|
||||
thumbnails=media / "documents" / "thumbnails",
|
||||
)
|
||||
for d in (dirs.originals, dirs.archive, dirs.thumbnails):
|
||||
d.mkdir(parents=True)
|
||||
return dirs
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def _media_settings(paperless_dirs: PaperlessDirs, settings) -> None:
|
||||
"""Configure Django settings to point at temp directories."""
|
||||
settings.MEDIA_ROOT = paperless_dirs.media
|
||||
settings.ORIGINALS_DIR = paperless_dirs.originals
|
||||
settings.ARCHIVE_DIR = paperless_dirs.archive
|
||||
settings.THUMBNAIL_DIR = paperless_dirs.thumbnails
|
||||
settings.MEDIA_LOCK = paperless_dirs.media / "media.lock"
|
||||
settings.IGNORABLE_FILES = {".DS_Store", "Thumbs.db", "desktop.ini"}
|
||||
settings.APP_LOGO = ""
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def sample_doc(
|
||||
paperless_dirs: PaperlessDirs,
|
||||
_media_settings: None,
|
||||
samples_dir: Path,
|
||||
) -> "Document":
|
||||
"""Create a document with valid files and matching checksums."""
|
||||
with filelock.FileLock(paperless_dirs.media / "media.lock"):
|
||||
shutil.copy(
|
||||
samples_dir / "originals" / "0000001.pdf",
|
||||
paperless_dirs.originals / "0000001.pdf",
|
||||
)
|
||||
shutil.copy(
|
||||
samples_dir / "archive" / "0000001.pdf",
|
||||
paperless_dirs.archive / "0000001.pdf",
|
||||
)
|
||||
shutil.copy(
|
||||
samples_dir / "thumbnails" / "0000001.webp",
|
||||
paperless_dirs.thumbnails / "0000001.webp",
|
||||
)
|
||||
|
||||
return DocumentFactory(
|
||||
title="test",
|
||||
checksum="42995833e01aea9b3edee44bbfdd7ce1",
|
||||
archive_checksum="62acb0bcbfbcaa62ca6ad3668e4e404b",
|
||||
content="test content",
|
||||
pk=1,
|
||||
filename="0000001.pdf",
|
||||
mime_type="application/pdf",
|
||||
archive_filename="0000001.pdf",
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def settings_timezone(settings: SettingsWrapper) -> zoneinfo.ZoneInfo:
|
||||
|
||||
@@ -1,193 +0,0 @@
|
||||
"""Tests for the document_sanity_checker management command.
|
||||
|
||||
Verifies Rich rendering (table, panel, summary) and end-to-end CLI behavior.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from io import StringIO
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import pytest
|
||||
from django.core.management import call_command
|
||||
from rich.console import Console
|
||||
|
||||
from documents.management.commands.document_sanity_checker import Command
|
||||
from documents.sanity_checker import SanityCheckMessages
|
||||
from documents.tests.factories import DocumentFactory
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from documents.models import Document
|
||||
from documents.tests.conftest import PaperlessDirs
|
||||
|
||||
|
||||
def _render_to_string(messages: SanityCheckMessages) -> str:
|
||||
"""Render command output to a plain string for assertion."""
|
||||
buf = StringIO()
|
||||
cmd = Command()
|
||||
cmd.console = Console(file=buf, width=120, no_color=True)
|
||||
cmd._render_results(messages)
|
||||
return buf.getvalue()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Rich rendering
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestRenderResultsNoIssues:
|
||||
"""No DB access needed -- renders an empty SanityCheckMessages."""
|
||||
|
||||
def test_shows_panel(self) -> None:
|
||||
output = _render_to_string(SanityCheckMessages())
|
||||
assert "No issues detected" in output
|
||||
assert "Sanity Check" in output
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
class TestRenderResultsWithIssues:
|
||||
def test_error_row(self, sample_doc: Document) -> None:
|
||||
msgs = SanityCheckMessages()
|
||||
msgs.error(sample_doc.pk, "Original missing")
|
||||
output = _render_to_string(msgs)
|
||||
assert "Sanity Check Results" in output
|
||||
assert "ERROR" in output
|
||||
assert "Original missing" in output
|
||||
assert f"#{sample_doc.pk}" in output
|
||||
assert sample_doc.title in output
|
||||
|
||||
def test_warning_row(self, sample_doc: Document) -> None:
|
||||
msgs = SanityCheckMessages()
|
||||
msgs.warning(sample_doc.pk, "Suspicious file")
|
||||
output = _render_to_string(msgs)
|
||||
assert "WARN" in output
|
||||
assert "Suspicious file" in output
|
||||
|
||||
def test_info_row(self, sample_doc: Document) -> None:
|
||||
msgs = SanityCheckMessages()
|
||||
msgs.info(sample_doc.pk, "No OCR data")
|
||||
output = _render_to_string(msgs)
|
||||
assert "INFO" in output
|
||||
assert "No OCR data" in output
|
||||
|
||||
@pytest.mark.usefixtures("_media_settings")
|
||||
def test_global_message(self) -> None:
|
||||
msgs = SanityCheckMessages()
|
||||
msgs.warning(None, "Orphaned file: /tmp/stray.pdf")
|
||||
output = _render_to_string(msgs)
|
||||
assert "(global)" in output
|
||||
assert "Orphaned file" in output
|
||||
|
||||
def test_multiple_messages_same_doc(self, sample_doc: Document) -> None:
|
||||
msgs = SanityCheckMessages()
|
||||
msgs.error(sample_doc.pk, "Thumbnail missing")
|
||||
msgs.error(sample_doc.pk, "Checksum mismatch")
|
||||
output = _render_to_string(msgs)
|
||||
assert "Thumbnail missing" in output
|
||||
assert "Checksum mismatch" in output
|
||||
|
||||
@pytest.mark.usefixtures("_media_settings")
|
||||
def test_unknown_doc_pk(self) -> None:
|
||||
msgs = SanityCheckMessages()
|
||||
msgs.error(99999, "Ghost document")
|
||||
output = _render_to_string(msgs)
|
||||
assert "#99999" in output
|
||||
assert "Unknown" in output
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
class TestRenderResultsSummary:
|
||||
def test_errors_only(self, sample_doc: Document) -> None:
|
||||
msgs = SanityCheckMessages()
|
||||
msgs.error(sample_doc.pk, "broken")
|
||||
output = _render_to_string(msgs)
|
||||
assert "1 document(s) with" in output
|
||||
assert "errors" in output
|
||||
|
||||
def test_warnings_only(self, sample_doc: Document) -> None:
|
||||
msgs = SanityCheckMessages()
|
||||
msgs.warning(sample_doc.pk, "odd")
|
||||
output = _render_to_string(msgs)
|
||||
assert "1 document(s) with" in output
|
||||
assert "warnings" in output
|
||||
|
||||
def test_infos_only(self, sample_doc: Document) -> None:
|
||||
msgs = SanityCheckMessages()
|
||||
msgs.info(sample_doc.pk, "no OCR")
|
||||
output = _render_to_string(msgs)
|
||||
assert "1 document(s) with infos" in output
|
||||
|
||||
def test_empty_messages(self) -> None:
|
||||
msgs = SanityCheckMessages()
|
||||
output = _render_to_string(msgs)
|
||||
assert "No issues detected." in output
|
||||
|
||||
def test_document_errors_and_global_warnings(self, sample_doc: Document) -> None:
|
||||
msgs = SanityCheckMessages()
|
||||
msgs.error(sample_doc.pk, "broken")
|
||||
msgs.warning(None, "orphan")
|
||||
output = _render_to_string(msgs)
|
||||
assert "1 document(s) with" in output
|
||||
assert "errors" in output
|
||||
assert "1 global warning(s)" in output
|
||||
assert "2 document(s)" not in output
|
||||
|
||||
def test_global_warnings_only(self) -> None:
|
||||
msgs = SanityCheckMessages()
|
||||
msgs.warning(None, "extra file")
|
||||
output = _render_to_string(msgs)
|
||||
assert "1 global warning(s)" in output
|
||||
assert "document(s) with" not in output
|
||||
|
||||
def test_all_levels_combined(self, sample_doc: Document) -> None:
|
||||
msgs = SanityCheckMessages()
|
||||
msgs.error(sample_doc.pk, "broken")
|
||||
msgs.warning(sample_doc.pk, "odd")
|
||||
msgs.info(sample_doc.pk, "fyi")
|
||||
msgs.warning(None, "extra file")
|
||||
output = _render_to_string(msgs)
|
||||
assert "1 document(s) with errors" in output
|
||||
assert "1 document(s) with warnings" in output
|
||||
assert "1 document(s) with infos" in output
|
||||
assert "1 global warning(s)" in output
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# End-to-end command execution
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
@pytest.mark.management
|
||||
class TestDocumentSanityCheckerCommand:
|
||||
def test_no_issues(self, sample_doc: Document) -> None:
|
||||
out = StringIO()
|
||||
call_command("document_sanity_checker", "--no-progress-bar", stdout=out)
|
||||
assert "No issues detected" in out.getvalue()
|
||||
|
||||
def test_missing_original(self, sample_doc: Document) -> None:
|
||||
Path(sample_doc.source_path).unlink()
|
||||
out = StringIO()
|
||||
call_command("document_sanity_checker", "--no-progress-bar", stdout=out)
|
||||
output = out.getvalue()
|
||||
assert "ERROR" in output
|
||||
assert "Original of document does not exist" in output
|
||||
|
||||
@pytest.mark.usefixtures("_media_settings")
|
||||
def test_checksum_mismatch(self, paperless_dirs: PaperlessDirs) -> None:
|
||||
"""Lightweight document with zero-byte files triggers checksum mismatch."""
|
||||
doc = DocumentFactory(
|
||||
title="test",
|
||||
content="test",
|
||||
filename="test.pdf",
|
||||
checksum="abc",
|
||||
)
|
||||
Path(doc.source_path).touch()
|
||||
Path(doc.thumbnail_path).touch()
|
||||
|
||||
out = StringIO()
|
||||
call_command("document_sanity_checker", "--no-progress-bar", stdout=out)
|
||||
output = out.getvalue()
|
||||
assert "ERROR" in output
|
||||
assert "Checksum mismatch. Stored: abc, actual:" in output
|
||||
@@ -699,13 +699,6 @@ class TestConsumer(
|
||||
self.assertIsNotNone(root_doc)
|
||||
assert root_doc is not None
|
||||
|
||||
root_storage_path = StoragePath.objects.create(
|
||||
name="version-root-path",
|
||||
path="root/{{title}}",
|
||||
)
|
||||
root_doc.storage_path = root_storage_path
|
||||
root_doc.save()
|
||||
|
||||
actor = User.objects.create_user(
|
||||
username="actor",
|
||||
email="actor@example.com",
|
||||
@@ -742,7 +735,7 @@ class TestConsumer(
|
||||
)
|
||||
consumer.setup()
|
||||
try:
|
||||
self.assertEqual(consumer.filename, version_file.name)
|
||||
self.assertTrue(consumer.filename.endswith("_v0.pdf"))
|
||||
consumer.run()
|
||||
finally:
|
||||
consumer.cleanup()
|
||||
@@ -752,9 +745,8 @@ class TestConsumer(
|
||||
version = versions.first()
|
||||
assert version is not None
|
||||
assert version.original_filename is not None
|
||||
self.assertEqual(version.version_index, 1)
|
||||
self.assertEqual(version.version_label, "v2")
|
||||
self.assertEqual(version.original_filename, version_file.name)
|
||||
self.assertTrue(version.original_filename.endswith("_v0.pdf"))
|
||||
self.assertTrue(bool(version.content))
|
||||
|
||||
@override_settings(AUDIT_LOG_ENABLED=True)
|
||||
@@ -803,7 +795,7 @@ class TestConsumer(
|
||||
)
|
||||
consumer.setup()
|
||||
try:
|
||||
self.assertEqual(consumer.filename, "valid_pdf_version-upload")
|
||||
self.assertEqual(consumer.filename, "valid_pdf_version-upload_v0")
|
||||
consumer.run()
|
||||
finally:
|
||||
consumer.cleanup()
|
||||
@@ -813,67 +805,9 @@ class TestConsumer(
|
||||
)
|
||||
self.assertIsNotNone(version)
|
||||
assert version is not None
|
||||
self.assertEqual(version.version_index, 1)
|
||||
self.assertEqual(version.original_filename, "valid_pdf_version-upload")
|
||||
self.assertEqual(version.original_filename, "valid_pdf_version-upload_v0")
|
||||
self.assertTrue(bool(version.content))
|
||||
|
||||
@override_settings(AUDIT_LOG_ENABLED=True)
|
||||
@mock.patch("documents.consumer.load_classifier")
|
||||
def test_consume_version_index_monotonic_after_version_deletion(self, m) -> None:
|
||||
m.return_value = MagicMock()
|
||||
|
||||
with self.get_consumer(self.get_test_file()) as consumer:
|
||||
consumer.run()
|
||||
|
||||
root_doc = Document.objects.first()
|
||||
self.assertIsNotNone(root_doc)
|
||||
assert root_doc is not None
|
||||
|
||||
def consume_version(version_file: Path) -> Document:
|
||||
status = DummyProgressManager(version_file.name, None)
|
||||
overrides = DocumentMetadataOverrides()
|
||||
doc = ConsumableDocument(
|
||||
DocumentSource.ApiUpload,
|
||||
original_file=version_file,
|
||||
root_document_id=root_doc.pk,
|
||||
)
|
||||
preflight = ConsumerPreflightPlugin(
|
||||
doc,
|
||||
overrides,
|
||||
status, # type: ignore[arg-type]
|
||||
self.dirs.scratch_dir,
|
||||
"task-id",
|
||||
)
|
||||
preflight.setup()
|
||||
preflight.run()
|
||||
|
||||
consumer = ConsumerPlugin(
|
||||
doc,
|
||||
overrides,
|
||||
status, # type: ignore[arg-type]
|
||||
self.dirs.scratch_dir,
|
||||
"task-id",
|
||||
)
|
||||
consumer.setup()
|
||||
try:
|
||||
consumer.run()
|
||||
finally:
|
||||
consumer.cleanup()
|
||||
|
||||
version = (
|
||||
Document.objects.filter(root_document=root_doc).order_by("-id").first()
|
||||
)
|
||||
assert version is not None
|
||||
return version
|
||||
|
||||
v1 = consume_version(self.get_test_file2())
|
||||
self.assertEqual(v1.version_index, 1)
|
||||
v1.delete()
|
||||
|
||||
# The next version should have version_index 2, even though version_index 1 was deleted
|
||||
v2 = consume_version(self.get_test_file())
|
||||
self.assertEqual(v2.version_index, 2)
|
||||
|
||||
@mock.patch("documents.consumer.load_classifier")
|
||||
def testClassifyDocument(self, m) -> None:
|
||||
correspondent = Correspondent.objects.create(
|
||||
|
||||
@@ -77,58 +77,6 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
settings.ORIGINALS_DIR / "test" / "test.pdf",
|
||||
)
|
||||
|
||||
@override_settings(FILENAME_FORMAT=None)
|
||||
def test_root_storage_path_change_updates_version_files(self) -> None:
|
||||
old_storage_path = StoragePath.objects.create(
|
||||
name="old-path",
|
||||
path="old/{{title}}",
|
||||
)
|
||||
new_storage_path = StoragePath.objects.create(
|
||||
name="new-path",
|
||||
path="new/{{title}}",
|
||||
)
|
||||
|
||||
root_doc = Document.objects.create(
|
||||
title="rootdoc",
|
||||
mime_type="application/pdf",
|
||||
checksum="root-checksum",
|
||||
storage_path=old_storage_path,
|
||||
)
|
||||
version_doc = Document.objects.create(
|
||||
title="version-title",
|
||||
mime_type="application/pdf",
|
||||
checksum="version-checksum",
|
||||
root_document=root_doc,
|
||||
version_index=1,
|
||||
)
|
||||
|
||||
Document.objects.filter(pk=root_doc.pk).update(
|
||||
filename=generate_filename(root_doc),
|
||||
)
|
||||
Document.objects.filter(pk=version_doc.pk).update(
|
||||
filename=generate_filename(version_doc),
|
||||
)
|
||||
root_doc.refresh_from_db()
|
||||
version_doc.refresh_from_db()
|
||||
|
||||
create_source_path_directory(root_doc.source_path)
|
||||
Path(root_doc.source_path).touch()
|
||||
create_source_path_directory(version_doc.source_path)
|
||||
Path(version_doc.source_path).touch()
|
||||
|
||||
root_doc.storage_path = new_storage_path
|
||||
root_doc.save()
|
||||
|
||||
root_doc.refresh_from_db()
|
||||
version_doc.refresh_from_db()
|
||||
|
||||
self.assertEqual(root_doc.filename, "new/rootdoc.pdf")
|
||||
self.assertEqual(version_doc.filename, "new/rootdoc_v1.pdf")
|
||||
self.assertIsFile(root_doc.source_path)
|
||||
self.assertIsFile(version_doc.source_path)
|
||||
self.assertIsNotFile(settings.ORIGINALS_DIR / "old" / "rootdoc.pdf")
|
||||
self.assertIsNotFile(settings.ORIGINALS_DIR / "old" / "rootdoc_v1.pdf")
|
||||
|
||||
@override_settings(FILENAME_FORMAT="{correspondent}/{correspondent}")
|
||||
def test_file_renaming_missing_permissions(self) -> None:
|
||||
document = Document()
|
||||
@@ -1274,94 +1222,6 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
|
||||
Path("logs.pdf"),
|
||||
)
|
||||
|
||||
@override_settings(FILENAME_FORMAT="{title}")
|
||||
def test_version_index_suffix_for_template_filename(self) -> None:
|
||||
root_doc = Document.objects.create(
|
||||
title="the_doc",
|
||||
mime_type="application/pdf",
|
||||
checksum="root-checksum",
|
||||
)
|
||||
version_doc = Document.objects.create(
|
||||
title="the_doc",
|
||||
mime_type="application/pdf",
|
||||
checksum="version-checksum",
|
||||
root_document=root_doc,
|
||||
version_index=1,
|
||||
)
|
||||
|
||||
self.assertEqual(generate_filename(version_doc), Path("the_doc_v1.pdf"))
|
||||
self.assertEqual(
|
||||
generate_filename(version_doc, counter=1),
|
||||
Path("the_doc_v1_01.pdf"),
|
||||
)
|
||||
|
||||
@override_settings(FILENAME_FORMAT=None)
|
||||
def test_version_index_suffix_for_default_filename(self) -> None:
|
||||
root_doc = Document.objects.create(
|
||||
title="root",
|
||||
mime_type="text/plain",
|
||||
checksum="root-checksum",
|
||||
)
|
||||
version_doc = Document.objects.create(
|
||||
title="root",
|
||||
mime_type="text/plain",
|
||||
checksum="version-checksum",
|
||||
root_document=root_doc,
|
||||
version_index=2,
|
||||
)
|
||||
|
||||
self.assertEqual(
|
||||
generate_filename(version_doc),
|
||||
Path(f"{root_doc.pk:07d}_v2.txt"),
|
||||
)
|
||||
self.assertEqual(
|
||||
generate_filename(version_doc, archive_filename=True),
|
||||
Path(f"{root_doc.pk:07d}_v2.pdf"),
|
||||
)
|
||||
|
||||
@override_settings(FILENAME_FORMAT="{original_name}")
|
||||
def test_version_index_suffix_with_original_name_placeholder(self) -> None:
|
||||
root_doc = Document.objects.create(
|
||||
title="root",
|
||||
mime_type="application/pdf",
|
||||
checksum="root-checksum",
|
||||
original_filename="root-upload.pdf",
|
||||
)
|
||||
version_doc = Document.objects.create(
|
||||
title="root",
|
||||
mime_type="application/pdf",
|
||||
checksum="version-checksum",
|
||||
root_document=root_doc,
|
||||
version_index=1,
|
||||
original_filename="version-upload.pdf",
|
||||
)
|
||||
|
||||
self.assertEqual(generate_filename(version_doc), Path("root-upload_v1.pdf"))
|
||||
|
||||
def test_version_index_suffix_with_storage_path(self) -> None:
|
||||
storage_path = StoragePath.objects.create(
|
||||
name="vtest",
|
||||
path="folder/{{title}}",
|
||||
)
|
||||
root_doc = Document.objects.create(
|
||||
title="storage_doc",
|
||||
mime_type="application/pdf",
|
||||
checksum="root-checksum",
|
||||
storage_path=storage_path,
|
||||
)
|
||||
version_doc = Document.objects.create(
|
||||
title="version_title_should_not_be_used",
|
||||
mime_type="application/pdf",
|
||||
checksum="version-checksum",
|
||||
root_document=root_doc,
|
||||
version_index=3,
|
||||
)
|
||||
|
||||
self.assertEqual(
|
||||
generate_filename(version_doc),
|
||||
Path("folder/storage_doc_v3.pdf"),
|
||||
)
|
||||
|
||||
@override_settings(
|
||||
FILENAME_FORMAT="XX{correspondent}/{title}",
|
||||
FILENAME_FORMAT_REMOVE_NONE=True,
|
||||
|
||||
@@ -134,7 +134,6 @@ class TestRenamer(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
self.assertIsFile(doc2.archive_path)
|
||||
|
||||
|
||||
@pytest.mark.management
|
||||
class TestCreateClassifier(TestCase):
|
||||
@mock.patch(
|
||||
"documents.management.commands.document_create_classifier.train_classifier",
|
||||
@@ -145,6 +144,32 @@ class TestCreateClassifier(TestCase):
|
||||
m.assert_called_once()
|
||||
|
||||
|
||||
@pytest.mark.management
|
||||
class TestSanityChecker(DirectoriesMixin, TestCase):
|
||||
def test_no_issues(self) -> None:
|
||||
with self.assertLogs() as capture:
|
||||
call_command("document_sanity_checker")
|
||||
|
||||
self.assertEqual(len(capture.output), 1)
|
||||
self.assertIn("Sanity checker detected no issues.", capture.output[0])
|
||||
|
||||
def test_errors(self) -> None:
|
||||
doc = Document.objects.create(
|
||||
title="test",
|
||||
content="test",
|
||||
filename="test.pdf",
|
||||
checksum="abc",
|
||||
)
|
||||
Path(doc.source_path).touch()
|
||||
Path(doc.thumbnail_path).touch()
|
||||
|
||||
with self.assertLogs() as capture:
|
||||
call_command("document_sanity_checker")
|
||||
|
||||
self.assertEqual(len(capture.output), 2)
|
||||
self.assertIn("Checksum mismatch. Stored: abc, actual:", capture.output[1])
|
||||
|
||||
|
||||
@pytest.mark.management
|
||||
class TestConvertMariaDBUUID(TestCase):
|
||||
@mock.patch("django.db.connection.schema_editor")
|
||||
|
||||
@@ -288,7 +288,7 @@ class TestExportImport(
|
||||
self.assertEqual(Permission.objects.count(), num_permission_objects)
|
||||
messages = check_sanity()
|
||||
# everything is alright after the test
|
||||
self.assertEqual(messages.total_issue_count, 0)
|
||||
self.assertEqual(len(messages), 0)
|
||||
|
||||
def test_exporter_with_filename_format(self) -> None:
|
||||
shutil.rmtree(Path(self.dirs.media_dir) / "documents")
|
||||
|
||||
@@ -1,295 +1,192 @@
|
||||
"""Tests for the sanity checker module.
|
||||
|
||||
Tests exercise ``check_sanity`` as a whole, verifying document validation,
|
||||
orphan detection, task recording, and the iter_wrapper contract.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import pytest
|
||||
import filelock
|
||||
from django.conf import settings
|
||||
from django.test import TestCase
|
||||
from django.test import override_settings
|
||||
|
||||
from documents.models import Document
|
||||
from documents.models import PaperlessTask
|
||||
from documents.sanity_checker import check_sanity
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Iterable
|
||||
|
||||
from documents.tests.conftest import PaperlessDirs
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
class TestCheckSanityNoDocuments:
|
||||
"""Sanity checks against an empty archive."""
|
||||
class TestSanityCheck(DirectoriesMixin, TestCase):
|
||||
def make_test_data(self):
|
||||
with filelock.FileLock(settings.MEDIA_LOCK):
|
||||
# just make sure that the lockfile is present.
|
||||
shutil.copy(
|
||||
(
|
||||
Path(__file__).parent
|
||||
/ "samples"
|
||||
/ "documents"
|
||||
/ "originals"
|
||||
/ "0000001.pdf"
|
||||
),
|
||||
Path(self.dirs.originals_dir) / "0000001.pdf",
|
||||
)
|
||||
shutil.copy(
|
||||
(
|
||||
Path(__file__).parent
|
||||
/ "samples"
|
||||
/ "documents"
|
||||
/ "archive"
|
||||
/ "0000001.pdf"
|
||||
),
|
||||
Path(self.dirs.archive_dir) / "0000001.pdf",
|
||||
)
|
||||
shutil.copy(
|
||||
(
|
||||
Path(__file__).parent
|
||||
/ "samples"
|
||||
/ "documents"
|
||||
/ "thumbnails"
|
||||
/ "0000001.webp"
|
||||
),
|
||||
Path(self.dirs.thumbnail_dir) / "0000001.webp",
|
||||
)
|
||||
|
||||
@pytest.mark.usefixtures("_media_settings")
|
||||
def test_no_documents(self) -> None:
|
||||
return Document.objects.create(
|
||||
title="test",
|
||||
checksum="42995833e01aea9b3edee44bbfdd7ce1",
|
||||
archive_checksum="62acb0bcbfbcaa62ca6ad3668e4e404b",
|
||||
content="test",
|
||||
pk=1,
|
||||
filename="0000001.pdf",
|
||||
mime_type="application/pdf",
|
||||
archive_filename="0000001.pdf",
|
||||
)
|
||||
|
||||
def assertSanityError(self, doc: Document, messageRegex) -> None:
|
||||
messages = check_sanity()
|
||||
assert not messages.has_error
|
||||
assert not messages.has_warning
|
||||
assert messages.total_issue_count == 0
|
||||
|
||||
@pytest.mark.usefixtures("_media_settings")
|
||||
def test_no_issues_logs_clean(self, caplog: pytest.LogCaptureFixture) -> None:
|
||||
messages = check_sanity()
|
||||
with caplog.at_level(logging.INFO, logger="paperless.sanity_checker"):
|
||||
self.assertTrue(messages.has_error)
|
||||
with self.assertLogs() as capture:
|
||||
messages.log_messages()
|
||||
assert "Sanity checker detected no issues." in caplog.text
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
class TestCheckSanityHealthyDocument:
|
||||
def test_no_errors(self, sample_doc: Document) -> None:
|
||||
messages = check_sanity()
|
||||
assert not messages.has_error
|
||||
assert not messages.has_warning
|
||||
assert messages.total_issue_count == 0
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
class TestCheckSanityThumbnail:
|
||||
def test_missing(self, sample_doc: Document) -> None:
|
||||
Path(sample_doc.thumbnail_path).unlink()
|
||||
messages = check_sanity()
|
||||
assert messages.has_error
|
||||
assert any(
|
||||
"Thumbnail of document does not exist" in m["message"]
|
||||
for m in messages[sample_doc.pk]
|
||||
)
|
||||
|
||||
def test_unreadable(self, sample_doc: Document) -> None:
|
||||
thumb = Path(sample_doc.thumbnail_path)
|
||||
thumb.chmod(0o000)
|
||||
try:
|
||||
messages = check_sanity()
|
||||
assert messages.has_error
|
||||
assert any(
|
||||
"Cannot read thumbnail" in m["message"] for m in messages[sample_doc.pk]
|
||||
self.assertEqual(
|
||||
capture.records[0].message,
|
||||
f"Detected following issue(s) with document #{doc.pk}, titled {doc.title}",
|
||||
)
|
||||
finally:
|
||||
thumb.chmod(0o644)
|
||||
self.assertRegex(capture.records[1].message, messageRegex)
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
class TestCheckSanityOriginal:
|
||||
def test_missing(self, sample_doc: Document) -> None:
|
||||
Path(sample_doc.source_path).unlink()
|
||||
def test_no_issues(self) -> None:
|
||||
self.make_test_data()
|
||||
messages = check_sanity()
|
||||
assert messages.has_error
|
||||
assert any(
|
||||
"Original of document does not exist" in m["message"]
|
||||
for m in messages[sample_doc.pk]
|
||||
)
|
||||
|
||||
def test_checksum_mismatch(self, sample_doc: Document) -> None:
|
||||
sample_doc.checksum = "badhash"
|
||||
sample_doc.save()
|
||||
messages = check_sanity()
|
||||
assert messages.has_error
|
||||
assert any(
|
||||
"Checksum mismatch" in m["message"] and "badhash" in m["message"]
|
||||
for m in messages[sample_doc.pk]
|
||||
)
|
||||
|
||||
def test_unreadable(self, sample_doc: Document) -> None:
|
||||
src = Path(sample_doc.source_path)
|
||||
src.chmod(0o000)
|
||||
try:
|
||||
messages = check_sanity()
|
||||
assert messages.has_error
|
||||
assert any(
|
||||
"Cannot read original" in m["message"] for m in messages[sample_doc.pk]
|
||||
self.assertFalse(messages.has_error)
|
||||
self.assertFalse(messages.has_warning)
|
||||
with self.assertLogs() as capture:
|
||||
messages.log_messages()
|
||||
self.assertEqual(len(capture.output), 1)
|
||||
self.assertEqual(capture.records[0].levelno, logging.INFO)
|
||||
self.assertEqual(
|
||||
capture.records[0].message,
|
||||
"Sanity checker detected no issues.",
|
||||
)
|
||||
finally:
|
||||
src.chmod(0o644)
|
||||
|
||||
def test_no_docs(self) -> None:
|
||||
self.assertEqual(len(check_sanity()), 0)
|
||||
|
||||
@pytest.mark.django_db
|
||||
class TestCheckSanityArchive:
|
||||
def test_checksum_without_filename(self, sample_doc: Document) -> None:
|
||||
sample_doc.archive_filename = None
|
||||
sample_doc.save()
|
||||
def test_success(self) -> None:
|
||||
self.make_test_data()
|
||||
self.assertEqual(len(check_sanity()), 0)
|
||||
|
||||
def test_no_thumbnail(self) -> None:
|
||||
doc = self.make_test_data()
|
||||
Path(doc.thumbnail_path).unlink()
|
||||
self.assertSanityError(doc, "Thumbnail of document does not exist")
|
||||
|
||||
def test_thumbnail_no_access(self) -> None:
|
||||
doc = self.make_test_data()
|
||||
Path(doc.thumbnail_path).chmod(0o000)
|
||||
self.assertSanityError(doc, "Cannot read thumbnail file of document")
|
||||
Path(doc.thumbnail_path).chmod(0o777)
|
||||
|
||||
def test_no_original(self) -> None:
|
||||
doc = self.make_test_data()
|
||||
Path(doc.source_path).unlink()
|
||||
self.assertSanityError(doc, "Original of document does not exist.")
|
||||
|
||||
def test_original_no_access(self) -> None:
|
||||
doc = self.make_test_data()
|
||||
Path(doc.source_path).chmod(0o000)
|
||||
self.assertSanityError(doc, "Cannot read original file of document")
|
||||
Path(doc.source_path).chmod(0o777)
|
||||
|
||||
def test_original_checksum_mismatch(self) -> None:
|
||||
doc = self.make_test_data()
|
||||
doc.checksum = "WOW"
|
||||
doc.save()
|
||||
self.assertSanityError(doc, "Checksum mismatch. Stored: WOW, actual: ")
|
||||
|
||||
def test_no_archive(self) -> None:
|
||||
doc = self.make_test_data()
|
||||
Path(doc.archive_path).unlink()
|
||||
self.assertSanityError(doc, "Archived version of document does not exist.")
|
||||
|
||||
def test_archive_no_access(self) -> None:
|
||||
doc = self.make_test_data()
|
||||
Path(doc.archive_path).chmod(0o000)
|
||||
self.assertSanityError(doc, "Cannot read archive file of document")
|
||||
Path(doc.archive_path).chmod(0o777)
|
||||
|
||||
def test_archive_checksum_mismatch(self) -> None:
|
||||
doc = self.make_test_data()
|
||||
doc.archive_checksum = "WOW"
|
||||
doc.save()
|
||||
self.assertSanityError(doc, "Checksum mismatch of archived document")
|
||||
|
||||
def test_empty_content(self) -> None:
|
||||
doc = self.make_test_data()
|
||||
doc.content = ""
|
||||
doc.save()
|
||||
messages = check_sanity()
|
||||
assert messages.has_error
|
||||
assert any(
|
||||
"checksum, but no archive filename" in m["message"]
|
||||
for m in messages[sample_doc.pk]
|
||||
self.assertFalse(messages.has_error)
|
||||
self.assertFalse(messages.has_warning)
|
||||
self.assertEqual(len(messages), 1)
|
||||
self.assertRegex(
|
||||
messages[doc.pk][0]["message"],
|
||||
"Document contains no OCR data",
|
||||
)
|
||||
|
||||
def test_filename_without_checksum(self, sample_doc: Document) -> None:
|
||||
sample_doc.archive_checksum = None
|
||||
sample_doc.save()
|
||||
def test_orphaned_file(self) -> None:
|
||||
self.make_test_data()
|
||||
Path(self.dirs.originals_dir, "orphaned").touch()
|
||||
messages = check_sanity()
|
||||
assert messages.has_error
|
||||
assert any(
|
||||
"checksum is missing" in m["message"] for m in messages[sample_doc.pk]
|
||||
self.assertTrue(messages.has_warning)
|
||||
self.assertRegex(
|
||||
messages._messages[None][0]["message"],
|
||||
"Orphaned file in media dir",
|
||||
)
|
||||
|
||||
def test_missing_file(self, sample_doc: Document) -> None:
|
||||
Path(sample_doc.archive_path).unlink()
|
||||
messages = check_sanity()
|
||||
assert messages.has_error
|
||||
assert any(
|
||||
"Archived version of document does not exist" in m["message"]
|
||||
for m in messages[sample_doc.pk]
|
||||
)
|
||||
|
||||
def test_checksum_mismatch(self, sample_doc: Document) -> None:
|
||||
sample_doc.archive_checksum = "wronghash"
|
||||
sample_doc.save()
|
||||
messages = check_sanity()
|
||||
assert messages.has_error
|
||||
assert any(
|
||||
"Checksum mismatch of archived document" in m["message"]
|
||||
for m in messages[sample_doc.pk]
|
||||
)
|
||||
|
||||
def test_unreadable(self, sample_doc: Document) -> None:
|
||||
archive = Path(sample_doc.archive_path)
|
||||
archive.chmod(0o000)
|
||||
try:
|
||||
messages = check_sanity()
|
||||
assert messages.has_error
|
||||
assert any(
|
||||
"Cannot read archive" in m["message"] for m in messages[sample_doc.pk]
|
||||
)
|
||||
finally:
|
||||
archive.chmod(0o644)
|
||||
|
||||
def test_no_archive_at_all(self, sample_doc: Document) -> None:
|
||||
"""Document with neither archive checksum nor filename is valid."""
|
||||
Path(sample_doc.archive_path).unlink()
|
||||
sample_doc.archive_checksum = None
|
||||
sample_doc.archive_filename = None
|
||||
sample_doc.save()
|
||||
messages = check_sanity()
|
||||
assert not messages.has_error
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
class TestCheckSanityContent:
|
||||
@pytest.mark.parametrize(
|
||||
"content",
|
||||
[
|
||||
pytest.param("", id="empty-string"),
|
||||
],
|
||||
@override_settings(
|
||||
APP_LOGO="logo/logo.png",
|
||||
)
|
||||
def test_no_content(self, sample_doc: Document, content: str) -> None:
|
||||
sample_doc.content = content
|
||||
sample_doc.save()
|
||||
def test_ignore_logo(self) -> None:
|
||||
self.make_test_data()
|
||||
logo_dir = Path(self.dirs.media_dir, "logo")
|
||||
logo_dir.mkdir(parents=True, exist_ok=True)
|
||||
Path(self.dirs.media_dir, "logo", "logo.png").touch()
|
||||
messages = check_sanity()
|
||||
assert not messages.has_error
|
||||
assert not messages.has_warning
|
||||
assert any("no OCR data" in m["message"] for m in messages[sample_doc.pk])
|
||||
self.assertFalse(messages.has_warning)
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
class TestCheckSanityOrphans:
|
||||
def test_orphaned_file(
|
||||
self,
|
||||
sample_doc: Document,
|
||||
paperless_dirs: PaperlessDirs,
|
||||
) -> None:
|
||||
(paperless_dirs.originals / "orphan.pdf").touch()
|
||||
def test_ignore_ignorable_files(self) -> None:
|
||||
self.make_test_data()
|
||||
Path(self.dirs.media_dir, ".DS_Store").touch()
|
||||
Path(self.dirs.media_dir, "desktop.ini").touch()
|
||||
messages = check_sanity()
|
||||
assert messages.has_warning
|
||||
assert any("Orphaned file" in m["message"] for m in messages[None])
|
||||
self.assertFalse(messages.has_warning)
|
||||
|
||||
@pytest.mark.usefixtures("_media_settings")
|
||||
def test_ignorable_files_not_flagged(
|
||||
self,
|
||||
paperless_dirs: PaperlessDirs,
|
||||
) -> None:
|
||||
(paperless_dirs.media / ".DS_Store").touch()
|
||||
(paperless_dirs.media / "desktop.ini").touch()
|
||||
messages = check_sanity()
|
||||
assert not messages.has_warning
|
||||
def test_archive_filename_no_checksum(self) -> None:
|
||||
doc = self.make_test_data()
|
||||
doc.archive_checksum = None
|
||||
doc.save()
|
||||
self.assertSanityError(doc, "has an archive file, but its checksum is missing.")
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
class TestCheckSanityIterWrapper:
|
||||
def test_wrapper_receives_documents(self, sample_doc: Document) -> None:
|
||||
seen: list[Document] = []
|
||||
|
||||
def tracking(iterable: Iterable[Document]) -> Iterable[Document]:
|
||||
for item in iterable:
|
||||
seen.append(item)
|
||||
yield item
|
||||
|
||||
check_sanity(iter_wrapper=tracking)
|
||||
assert len(seen) == 1
|
||||
assert seen[0].pk == sample_doc.pk
|
||||
|
||||
def test_default_works_without_wrapper(self, sample_doc: Document) -> None:
|
||||
messages = check_sanity()
|
||||
assert not messages.has_error
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
class TestCheckSanityTaskRecording:
|
||||
@pytest.mark.parametrize(
|
||||
("expected_type", "scheduled"),
|
||||
[
|
||||
pytest.param(PaperlessTask.TaskType.SCHEDULED_TASK, True, id="scheduled"),
|
||||
pytest.param(PaperlessTask.TaskType.MANUAL_TASK, False, id="manual"),
|
||||
],
|
||||
)
|
||||
@pytest.mark.usefixtures("_media_settings")
|
||||
def test_task_type(self, expected_type: str, *, scheduled: bool) -> None:
|
||||
check_sanity(scheduled=scheduled)
|
||||
task = PaperlessTask.objects.latest("date_created")
|
||||
assert task.task_name == PaperlessTask.TaskName.CHECK_SANITY
|
||||
assert task.type == expected_type
|
||||
|
||||
def test_success_status(self, sample_doc: Document) -> None:
|
||||
check_sanity()
|
||||
task = PaperlessTask.objects.latest("date_created")
|
||||
assert task.status == "SUCCESS"
|
||||
|
||||
def test_failure_status(self, sample_doc: Document) -> None:
|
||||
Path(sample_doc.source_path).unlink()
|
||||
check_sanity()
|
||||
task = PaperlessTask.objects.latest("date_created")
|
||||
assert task.status == "FAILURE"
|
||||
assert "Check logs for details" in task.result
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
class TestCheckSanityLogMessages:
|
||||
def test_logs_doc_issues(
|
||||
self,
|
||||
sample_doc: Document,
|
||||
caplog: pytest.LogCaptureFixture,
|
||||
) -> None:
|
||||
Path(sample_doc.source_path).unlink()
|
||||
messages = check_sanity()
|
||||
with caplog.at_level(logging.INFO, logger="paperless.sanity_checker"):
|
||||
messages.log_messages()
|
||||
assert f"document #{sample_doc.pk}" in caplog.text
|
||||
assert "Original of document does not exist" in caplog.text
|
||||
|
||||
def test_logs_global_issues(
|
||||
self,
|
||||
sample_doc: Document,
|
||||
paperless_dirs: PaperlessDirs,
|
||||
caplog: pytest.LogCaptureFixture,
|
||||
) -> None:
|
||||
(paperless_dirs.originals / "orphan.pdf").touch()
|
||||
messages = check_sanity()
|
||||
with caplog.at_level(logging.WARNING, logger="paperless.sanity_checker"):
|
||||
messages.log_messages()
|
||||
assert "Orphaned file" in caplog.text
|
||||
|
||||
@pytest.mark.usefixtures("_media_settings")
|
||||
def test_logs_unknown_doc_pk(self, caplog: pytest.LogCaptureFixture) -> None:
|
||||
"""A doc PK not in the DB logs 'Unknown' as the title."""
|
||||
messages = check_sanity()
|
||||
messages.error(99999, "Ghost document")
|
||||
with caplog.at_level(logging.INFO, logger="paperless.sanity_checker"):
|
||||
messages.log_messages()
|
||||
assert "#99999" in caplog.text
|
||||
assert "Unknown" in caplog.text
|
||||
def test_archive_checksum_no_filename(self) -> None:
|
||||
doc = self.make_test_data()
|
||||
doc.archive_filename = None
|
||||
doc.save()
|
||||
self.assertSanityError(
|
||||
doc,
|
||||
"has an archive file checksum, but no archive filename.",
|
||||
)
|
||||
|
||||
@@ -3,7 +3,6 @@ from datetime import timedelta
|
||||
from pathlib import Path
|
||||
from unittest import mock
|
||||
|
||||
import pytest
|
||||
from celery import states
|
||||
from django.conf import settings
|
||||
from django.test import TestCase
|
||||
@@ -106,83 +105,55 @@ class TestClassifier(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
self.assertNotEqual(mtime2, mtime3)
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
class TestSanityCheck:
|
||||
@pytest.fixture
|
||||
def mock_check_sanity(self, mocker) -> mock.MagicMock:
|
||||
return mocker.patch("documents.tasks.sanity_checker.check_sanity")
|
||||
class TestSanityCheck(DirectoriesMixin, TestCase):
|
||||
@mock.patch("documents.tasks.sanity_checker.check_sanity")
|
||||
def test_sanity_check_success(self, m) -> None:
|
||||
m.return_value = SanityCheckMessages()
|
||||
self.assertEqual(tasks.sanity_check(), "No issues detected.")
|
||||
m.assert_called_once()
|
||||
|
||||
def test_sanity_check_success(self, mock_check_sanity: mock.MagicMock) -> None:
|
||||
mock_check_sanity.return_value = SanityCheckMessages()
|
||||
assert tasks.sanity_check() == "No issues detected."
|
||||
mock_check_sanity.assert_called_once()
|
||||
|
||||
def test_sanity_check_error_raises(
|
||||
self,
|
||||
mock_check_sanity: mock.MagicMock,
|
||||
sample_doc: Document,
|
||||
) -> None:
|
||||
@mock.patch("documents.tasks.sanity_checker.check_sanity")
|
||||
def test_sanity_check_error(self, m) -> None:
|
||||
messages = SanityCheckMessages()
|
||||
messages.error(sample_doc.pk, "some error")
|
||||
mock_check_sanity.return_value = messages
|
||||
with pytest.raises(SanityCheckFailedException):
|
||||
tasks.sanity_check()
|
||||
mock_check_sanity.assert_called_once()
|
||||
messages.error(None, "Some error")
|
||||
m.return_value = messages
|
||||
self.assertRaises(SanityCheckFailedException, tasks.sanity_check)
|
||||
m.assert_called_once()
|
||||
|
||||
def test_sanity_check_error_no_raise(
|
||||
self,
|
||||
mock_check_sanity: mock.MagicMock,
|
||||
sample_doc: Document,
|
||||
) -> None:
|
||||
@mock.patch("documents.tasks.sanity_checker.check_sanity")
|
||||
def test_sanity_check_error_no_raise(self, m) -> None:
|
||||
messages = SanityCheckMessages()
|
||||
messages.error(sample_doc.pk, "some error")
|
||||
mock_check_sanity.return_value = messages
|
||||
messages.error(None, "Some error")
|
||||
m.return_value = messages
|
||||
# No exception should be raised
|
||||
result = tasks.sanity_check(raise_on_error=False)
|
||||
assert "1 document(s) with errors" in result
|
||||
assert "Check logs for details." in result
|
||||
mock_check_sanity.assert_called_once()
|
||||
self.assertEqual(
|
||||
result,
|
||||
"Sanity check exited with errors. See log.",
|
||||
)
|
||||
m.assert_called_once()
|
||||
|
||||
def test_sanity_check_warning_only(
|
||||
self,
|
||||
mock_check_sanity: mock.MagicMock,
|
||||
) -> None:
|
||||
@mock.patch("documents.tasks.sanity_checker.check_sanity")
|
||||
def test_sanity_check_warning(self, m) -> None:
|
||||
messages = SanityCheckMessages()
|
||||
messages.warning(None, "extra file")
|
||||
mock_check_sanity.return_value = messages
|
||||
result = tasks.sanity_check()
|
||||
assert result == "1 global warning(s) found."
|
||||
mock_check_sanity.assert_called_once()
|
||||
messages.warning(None, "Some warning")
|
||||
m.return_value = messages
|
||||
self.assertEqual(
|
||||
tasks.sanity_check(),
|
||||
"Sanity check exited with warnings. See log.",
|
||||
)
|
||||
m.assert_called_once()
|
||||
|
||||
def test_sanity_check_info_only(
|
||||
self,
|
||||
mock_check_sanity: mock.MagicMock,
|
||||
sample_doc: Document,
|
||||
) -> None:
|
||||
@mock.patch("documents.tasks.sanity_checker.check_sanity")
|
||||
def test_sanity_check_info(self, m) -> None:
|
||||
messages = SanityCheckMessages()
|
||||
messages.info(sample_doc.pk, "some info")
|
||||
mock_check_sanity.return_value = messages
|
||||
result = tasks.sanity_check()
|
||||
assert result == "1 document(s) with infos found."
|
||||
mock_check_sanity.assert_called_once()
|
||||
|
||||
def test_sanity_check_errors_warnings_and_infos(
|
||||
self,
|
||||
mock_check_sanity: mock.MagicMock,
|
||||
sample_doc: Document,
|
||||
) -> None:
|
||||
messages = SanityCheckMessages()
|
||||
messages.error(sample_doc.pk, "broken")
|
||||
messages.warning(sample_doc.pk, "odd")
|
||||
messages.info(sample_doc.pk, "fyi")
|
||||
messages.warning(None, "extra file")
|
||||
mock_check_sanity.return_value = messages
|
||||
result = tasks.sanity_check(raise_on_error=False)
|
||||
assert "1 document(s) with errors" in result
|
||||
assert "1 document(s) with warnings" in result
|
||||
assert "1 document(s) with infos" in result
|
||||
assert "1 global warning(s)" in result
|
||||
assert "Check logs for details." in result
|
||||
mock_check_sanity.assert_called_once()
|
||||
messages.info(None, "Some info")
|
||||
m.return_value = messages
|
||||
self.assertEqual(
|
||||
tasks.sanity_check(),
|
||||
"Sanity check exited with infos. See log.",
|
||||
)
|
||||
m.assert_called_once()
|
||||
|
||||
|
||||
class TestBulkUpdate(DirectoriesMixin, TestCase):
|
||||
|
||||
@@ -378,6 +378,7 @@ class ApplicationConfigurationViewSet(ModelViewSet):
|
||||
):
|
||||
# AI index was just enabled and vector store file does not exist
|
||||
llmindex_index.delay(
|
||||
progress_bar_disable=True,
|
||||
rebuild=True,
|
||||
scheduled=False,
|
||||
auto=True,
|
||||
|
||||
@@ -1,13 +1,11 @@
|
||||
import logging
|
||||
import shutil
|
||||
from collections.abc import Callable
|
||||
from collections.abc import Iterable
|
||||
from datetime import timedelta
|
||||
from pathlib import Path
|
||||
from typing import TypeVar
|
||||
|
||||
import faiss
|
||||
import llama_index.core.settings as llama_settings
|
||||
import tqdm
|
||||
from celery import states
|
||||
from django.conf import settings
|
||||
from django.utils import timezone
|
||||
@@ -31,14 +29,6 @@ from paperless_ai.embedding import build_llm_index_text
|
||||
from paperless_ai.embedding import get_embedding_dim
|
||||
from paperless_ai.embedding import get_embedding_model
|
||||
|
||||
_T = TypeVar("_T")
|
||||
IterWrapper = Callable[[Iterable[_T]], Iterable[_T]]
|
||||
|
||||
|
||||
def _identity(iterable: Iterable[_T]) -> Iterable[_T]:
|
||||
return iterable
|
||||
|
||||
|
||||
logger = logging.getLogger("paperless_ai.indexing")
|
||||
|
||||
|
||||
@@ -166,11 +156,7 @@ def vector_store_file_exists():
|
||||
return Path(settings.LLM_INDEX_DIR / "default__vector_store.json").exists()
|
||||
|
||||
|
||||
def update_llm_index(
|
||||
*,
|
||||
iter_wrapper: IterWrapper[Document] = _identity,
|
||||
rebuild=False,
|
||||
) -> str:
|
||||
def update_llm_index(*, progress_bar_disable=False, rebuild=False) -> str:
|
||||
"""
|
||||
Rebuild or update the LLM index.
|
||||
"""
|
||||
@@ -190,7 +176,7 @@ def update_llm_index(
|
||||
embed_model = get_embedding_model()
|
||||
llama_settings.Settings.embed_model = embed_model
|
||||
storage_context = get_or_create_storage_context(rebuild=True)
|
||||
for document in iter_wrapper(documents):
|
||||
for document in tqdm.tqdm(documents, disable=progress_bar_disable):
|
||||
document_nodes = build_document_node(document)
|
||||
nodes.extend(document_nodes)
|
||||
|
||||
@@ -198,7 +184,7 @@ def update_llm_index(
|
||||
nodes=nodes,
|
||||
storage_context=storage_context,
|
||||
embed_model=embed_model,
|
||||
show_progress=False,
|
||||
show_progress=not progress_bar_disable,
|
||||
)
|
||||
msg = "LLM index rebuilt successfully."
|
||||
else:
|
||||
@@ -210,7 +196,7 @@ def update_llm_index(
|
||||
for node in index.docstore.get_nodes(all_node_ids)
|
||||
}
|
||||
|
||||
for document in iter_wrapper(documents):
|
||||
for document in tqdm.tqdm(documents, disable=progress_bar_disable):
|
||||
doc_id = str(document.id)
|
||||
document_modified = document.modified.isoformat()
|
||||
|
||||
|
||||
6
uv.lock
generated
6
uv.lock
generated
@@ -5906,11 +5906,11 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "whitenoise"
|
||||
version = "6.12.0"
|
||||
version = "6.11.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/cb/2a/55b3f3a4ec326cd077c1c3defeee656b9298372a69229134d930151acd01/whitenoise-6.12.0.tar.gz", hash = "sha256:f723ebb76a112e98816ff80fcea0a6c9b8ecde835f8ddda25df7a30a3c2db6ad", size = 26841, upload-time = "2026-02-27T00:05:42.028Z" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/15/95/8c81ec6b6ebcbf8aca2de7603070ccf37dbb873b03f20708e0f7c1664bc6/whitenoise-6.11.0.tar.gz", hash = "sha256:0f5bfce6061ae6611cd9396a8231e088722e4fc67bc13a111be74c738d99375f", size = 26432, upload-time = "2025-09-18T09:16:10.995Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/db/eb/d5583a11486211f3ebd4b385545ae787f32363d453c19fffd81106c9c138/whitenoise-6.12.0-py3-none-any.whl", hash = "sha256:fc5e8c572e33ebf24795b47b6a7da8da3c00cff2349f5b04c02f28d0cc5a3cc2", size = 20302, upload-time = "2026-02-27T00:05:40.086Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/6c/e9/4366332f9295fe0647d7d3251ce18f5615fbcb12d02c79a26f8dba9221b3/whitenoise-6.11.0-py3-none-any.whl", hash = "sha256:b2aeb45950597236f53b5342b3121c5de69c8da0109362aee506ce88e022d258", size = 20197, upload-time = "2025-09-18T09:16:09.754Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
||||
Reference in New Issue
Block a user