mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-03-04 00:06:25 +00:00
Compare commits
14 Commits
feature-ve
...
chore/pyte
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
57ea7a716b | ||
|
|
ba7b538398 | ||
|
|
9c0f112e94 | ||
|
|
43406f44f2 | ||
|
|
b7ca3550b1 | ||
|
|
0e97419e0e | ||
|
|
10cb2ac183 | ||
|
|
1d7cd5a7ad | ||
|
|
e58a35d40c | ||
|
|
20a9cd40e8 | ||
|
|
b94ce85b46 | ||
|
|
484bef00c1 | ||
|
|
317a177537 | ||
|
|
7ff51452f0 |
14
.github/workflows/ci-backend.yml
vendored
14
.github/workflows/ci-backend.yml
vendored
@@ -35,18 +35,18 @@ jobs:
|
||||
fail-fast: false
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v6.0.2
|
||||
- name: Start containers
|
||||
run: |
|
||||
docker compose --file docker/compose/docker-compose.ci-test.yml pull --quiet
|
||||
docker compose --file docker/compose/docker-compose.ci-test.yml up --detach
|
||||
- name: Set up Python
|
||||
id: setup-python
|
||||
uses: actions/setup-python@v6
|
||||
uses: actions/setup-python@v6.2.0
|
||||
with:
|
||||
python-version: "${{ matrix.python-version }}"
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v7
|
||||
uses: astral-sh/setup-uv@v7.3.1
|
||||
with:
|
||||
version: ${{ env.DEFAULT_UV_VERSION }}
|
||||
enable-cache: true
|
||||
@@ -83,13 +83,13 @@ jobs:
|
||||
pytest
|
||||
- name: Upload test results to Codecov
|
||||
if: always()
|
||||
uses: codecov/codecov-action@v5
|
||||
uses: codecov/codecov-action@v5.5.2
|
||||
with:
|
||||
flags: backend-python-${{ matrix.python-version }}
|
||||
files: junit.xml
|
||||
report_type: test_results
|
||||
- name: Upload coverage to Codecov
|
||||
uses: codecov/codecov-action@v5
|
||||
uses: codecov/codecov-action@v5.5.2
|
||||
with:
|
||||
flags: backend-python-${{ matrix.python-version }}
|
||||
files: coverage.xml
|
||||
@@ -106,14 +106,14 @@ jobs:
|
||||
DEFAULT_PYTHON: "3.12"
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v6.0.1
|
||||
uses: actions/checkout@v6.0.2
|
||||
- name: Set up Python
|
||||
id: setup-python
|
||||
uses: actions/setup-python@v6.2.0
|
||||
with:
|
||||
python-version: "${{ env.DEFAULT_PYTHON }}"
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v7.2.1
|
||||
uses: astral-sh/setup-uv@v7.3.1
|
||||
with:
|
||||
version: ${{ env.DEFAULT_UV_VERSION }}
|
||||
enable-cache: true
|
||||
|
||||
8
.github/workflows/ci-docker.yml
vendored
8
.github/workflows/ci-docker.yml
vendored
@@ -41,7 +41,7 @@ jobs:
|
||||
ref-name: ${{ steps.ref.outputs.name }}
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v6.0.1
|
||||
uses: actions/checkout@v6.0.2
|
||||
- name: Determine ref name
|
||||
id: ref
|
||||
run: |
|
||||
@@ -130,7 +130,7 @@ jobs:
|
||||
type=semver,pattern={{major}}.{{minor}}
|
||||
- name: Build and push by digest
|
||||
id: build
|
||||
uses: docker/build-push-action@v6.18.0
|
||||
uses: docker/build-push-action@v6.19.2
|
||||
with:
|
||||
context: .
|
||||
file: ./Dockerfile
|
||||
@@ -152,7 +152,7 @@ jobs:
|
||||
touch "/tmp/digests/${digest#sha256:}"
|
||||
- name: Upload digest
|
||||
if: steps.check-push.outputs.should-push == 'true'
|
||||
uses: actions/upload-artifact@v6.0.0
|
||||
uses: actions/upload-artifact@v7.0.0
|
||||
with:
|
||||
name: digests-${{ matrix.arch }}
|
||||
path: /tmp/digests/*
|
||||
@@ -168,7 +168,7 @@ jobs:
|
||||
packages: write
|
||||
steps:
|
||||
- name: Download digests
|
||||
uses: actions/download-artifact@v7.0.0
|
||||
uses: actions/download-artifact@v8.0.0
|
||||
with:
|
||||
path: /tmp/digests
|
||||
pattern: digests-*
|
||||
|
||||
12
.github/workflows/ci-docs.yml
vendored
12
.github/workflows/ci-docs.yml
vendored
@@ -33,16 +33,16 @@ jobs:
|
||||
name: Build Documentation
|
||||
runs-on: ubuntu-24.04
|
||||
steps:
|
||||
- uses: actions/configure-pages@v5
|
||||
- uses: actions/configure-pages@v5.0.0
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v6.0.2
|
||||
- name: Set up Python
|
||||
id: setup-python
|
||||
uses: actions/setup-python@v6
|
||||
uses: actions/setup-python@v6.2.0
|
||||
with:
|
||||
python-version: ${{ env.DEFAULT_PYTHON_VERSION }}
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v7
|
||||
uses: astral-sh/setup-uv@v7.3.1
|
||||
with:
|
||||
version: ${{ env.DEFAULT_UV_VERSION }}
|
||||
enable-cache: true
|
||||
@@ -58,7 +58,7 @@ jobs:
|
||||
--frozen \
|
||||
zensical build --clean
|
||||
- name: Upload GitHub Pages artifact
|
||||
uses: actions/upload-pages-artifact@v4
|
||||
uses: actions/upload-pages-artifact@v4.0.0
|
||||
with:
|
||||
path: site
|
||||
name: github-pages-${{ github.run_id }}-${{ github.run_attempt }}
|
||||
@@ -72,7 +72,7 @@ jobs:
|
||||
url: ${{ steps.deployment.outputs.page_url }}
|
||||
steps:
|
||||
- name: Deploy GitHub Pages
|
||||
uses: actions/deploy-pages@v4
|
||||
uses: actions/deploy-pages@v4.0.5
|
||||
id: deployment
|
||||
with:
|
||||
artifact_name: github-pages-${{ github.run_id }}-${{ github.run_attempt }}
|
||||
|
||||
44
.github/workflows/ci-frontend.yml
vendored
44
.github/workflows/ci-frontend.yml
vendored
@@ -22,20 +22,20 @@ jobs:
|
||||
runs-on: ubuntu-24.04
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v6.0.2
|
||||
- name: Install pnpm
|
||||
uses: pnpm/action-setup@v4
|
||||
uses: pnpm/action-setup@v4.2.0
|
||||
with:
|
||||
version: 10
|
||||
- name: Use Node.js 24
|
||||
uses: actions/setup-node@v6
|
||||
uses: actions/setup-node@v6.2.0
|
||||
with:
|
||||
node-version: 24.x
|
||||
cache: 'pnpm'
|
||||
cache-dependency-path: 'src-ui/pnpm-lock.yaml'
|
||||
- name: Cache frontend dependencies
|
||||
id: cache-frontend-deps
|
||||
uses: actions/cache@v5
|
||||
uses: actions/cache@v5.0.3
|
||||
with:
|
||||
path: |
|
||||
~/.pnpm-store
|
||||
@@ -49,19 +49,19 @@ jobs:
|
||||
runs-on: ubuntu-24.04
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v6.0.2
|
||||
- name: Install pnpm
|
||||
uses: pnpm/action-setup@v4
|
||||
uses: pnpm/action-setup@v4.2.0
|
||||
with:
|
||||
version: 10
|
||||
- name: Use Node.js 24
|
||||
uses: actions/setup-node@v6
|
||||
uses: actions/setup-node@v6.2.0
|
||||
with:
|
||||
node-version: 24.x
|
||||
cache: 'pnpm'
|
||||
cache-dependency-path: 'src-ui/pnpm-lock.yaml'
|
||||
- name: Cache frontend dependencies
|
||||
uses: actions/cache@v5
|
||||
uses: actions/cache@v5.0.3
|
||||
with:
|
||||
path: |
|
||||
~/.pnpm-store
|
||||
@@ -83,19 +83,19 @@ jobs:
|
||||
shard-count: [4]
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v6.0.2
|
||||
- name: Install pnpm
|
||||
uses: pnpm/action-setup@v4
|
||||
uses: pnpm/action-setup@v4.2.0
|
||||
with:
|
||||
version: 10
|
||||
- name: Use Node.js 24
|
||||
uses: actions/setup-node@v6
|
||||
uses: actions/setup-node@v6.2.0
|
||||
with:
|
||||
node-version: 24.x
|
||||
cache: 'pnpm'
|
||||
cache-dependency-path: 'src-ui/pnpm-lock.yaml'
|
||||
- name: Cache frontend dependencies
|
||||
uses: actions/cache@v5
|
||||
uses: actions/cache@v5.0.3
|
||||
with:
|
||||
path: |
|
||||
~/.pnpm-store
|
||||
@@ -107,13 +107,13 @@ jobs:
|
||||
run: cd src-ui && pnpm run test --max-workers=2 --shard=${{ matrix.shard-index }}/${{ matrix.shard-count }}
|
||||
- name: Upload test results to Codecov
|
||||
if: always()
|
||||
uses: codecov/codecov-action@v5
|
||||
uses: codecov/codecov-action@v5.5.2
|
||||
with:
|
||||
flags: frontend-node-${{ matrix.node-version }}
|
||||
directory: src-ui/
|
||||
report_type: test_results
|
||||
- name: Upload coverage to Codecov
|
||||
uses: codecov/codecov-action@v5
|
||||
uses: codecov/codecov-action@v5.5.2
|
||||
with:
|
||||
flags: frontend-node-${{ matrix.node-version }}
|
||||
directory: src-ui/coverage/
|
||||
@@ -133,19 +133,19 @@ jobs:
|
||||
shard-count: [2]
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v6.0.2
|
||||
- name: Install pnpm
|
||||
uses: pnpm/action-setup@v4
|
||||
uses: pnpm/action-setup@v4.2.0
|
||||
with:
|
||||
version: 10
|
||||
- name: Use Node.js 24
|
||||
uses: actions/setup-node@v6
|
||||
uses: actions/setup-node@v6.2.0
|
||||
with:
|
||||
node-version: 24.x
|
||||
cache: 'pnpm'
|
||||
cache-dependency-path: 'src-ui/pnpm-lock.yaml'
|
||||
- name: Cache frontend dependencies
|
||||
uses: actions/cache@v5
|
||||
uses: actions/cache@v5.0.3
|
||||
with:
|
||||
path: |
|
||||
~/.pnpm-store
|
||||
@@ -163,19 +163,19 @@ jobs:
|
||||
runs-on: ubuntu-24.04
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v6.0.2
|
||||
- name: Install pnpm
|
||||
uses: pnpm/action-setup@v4
|
||||
uses: pnpm/action-setup@v4.2.0
|
||||
with:
|
||||
version: 10
|
||||
- name: Use Node.js 24
|
||||
uses: actions/setup-node@v6
|
||||
uses: actions/setup-node@v6.2.0
|
||||
with:
|
||||
node-version: 24.x
|
||||
cache: 'pnpm'
|
||||
cache-dependency-path: 'src-ui/pnpm-lock.yaml'
|
||||
- name: Cache frontend dependencies
|
||||
uses: actions/cache@v5
|
||||
uses: actions/cache@v5.0.3
|
||||
with:
|
||||
path: |
|
||||
~/.pnpm-store
|
||||
|
||||
26
.github/workflows/ci-release.yml
vendored
26
.github/workflows/ci-release.yml
vendored
@@ -28,14 +28,14 @@ jobs:
|
||||
runs-on: ubuntu-24.04
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v6.0.2
|
||||
# ---- Frontend Build ----
|
||||
- name: Install pnpm
|
||||
uses: pnpm/action-setup@v4
|
||||
uses: pnpm/action-setup@v4.2.0
|
||||
with:
|
||||
version: 10
|
||||
- name: Use Node.js 24
|
||||
uses: actions/setup-node@v6
|
||||
uses: actions/setup-node@v6.2.0
|
||||
with:
|
||||
node-version: 24.x
|
||||
cache: 'pnpm'
|
||||
@@ -47,11 +47,11 @@ jobs:
|
||||
# ---- Backend Setup ----
|
||||
- name: Set up Python
|
||||
id: setup-python
|
||||
uses: actions/setup-python@v6
|
||||
uses: actions/setup-python@v6.2.0
|
||||
with:
|
||||
python-version: ${{ env.DEFAULT_PYTHON_VERSION }}
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v7
|
||||
uses: astral-sh/setup-uv@v7.3.1
|
||||
with:
|
||||
version: ${{ env.DEFAULT_UV_VERSION }}
|
||||
enable-cache: true
|
||||
@@ -118,7 +118,7 @@ jobs:
|
||||
sudo chown -R 1000:1000 paperless-ngx/
|
||||
tar -cJf paperless-ngx.tar.xz paperless-ngx/
|
||||
- name: Upload release artifact
|
||||
uses: actions/upload-artifact@v6
|
||||
uses: actions/upload-artifact@v7.0.0
|
||||
with:
|
||||
name: release
|
||||
path: dist/paperless-ngx.tar.xz
|
||||
@@ -133,7 +133,7 @@ jobs:
|
||||
version: ${{ steps.get-version.outputs.version }}
|
||||
steps:
|
||||
- name: Download release artifact
|
||||
uses: actions/download-artifact@v7
|
||||
uses: actions/download-artifact@v8.0.0
|
||||
with:
|
||||
name: release
|
||||
path: ./
|
||||
@@ -148,7 +148,7 @@ jobs:
|
||||
fi
|
||||
- name: Create release and changelog
|
||||
id: create-release
|
||||
uses: release-drafter/release-drafter@v6
|
||||
uses: release-drafter/release-drafter@v6.2.0
|
||||
with:
|
||||
name: Paperless-ngx ${{ steps.get-version.outputs.version }}
|
||||
tag: ${{ steps.get-version.outputs.version }}
|
||||
@@ -159,7 +159,7 @@ jobs:
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
- name: Upload release archive
|
||||
uses: shogo82148/actions-upload-release-asset@v1
|
||||
uses: shogo82148/actions-upload-release-asset@v1.9.2
|
||||
with:
|
||||
github_token: ${{ secrets.GITHUB_TOKEN }}
|
||||
upload_url: ${{ steps.create-release.outputs.upload_url }}
|
||||
@@ -176,16 +176,16 @@ jobs:
|
||||
runs-on: ubuntu-24.04
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v6.0.2
|
||||
with:
|
||||
ref: main
|
||||
- name: Set up Python
|
||||
id: setup-python
|
||||
uses: actions/setup-python@v6
|
||||
uses: actions/setup-python@v6.2.0
|
||||
with:
|
||||
python-version: ${{ env.DEFAULT_PYTHON_VERSION }}
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v7
|
||||
uses: astral-sh/setup-uv@v7.3.1
|
||||
with:
|
||||
version: ${{ env.DEFAULT_UV_VERSION }}
|
||||
enable-cache: true
|
||||
@@ -218,7 +218,7 @@ jobs:
|
||||
git commit -am "Changelog ${{ needs.publish-release.outputs.version }} - GHA"
|
||||
git push origin ${{ needs.publish-release.outputs.version }}-changelog
|
||||
- name: Create pull request
|
||||
uses: actions/github-script@v8
|
||||
uses: actions/github-script@v8.0.0
|
||||
with:
|
||||
script: |
|
||||
const { repo, owner } = context.repo;
|
||||
|
||||
6
.github/workflows/codeql-analysis.yml
vendored
6
.github/workflows/codeql-analysis.yml
vendored
@@ -34,10 +34,10 @@ jobs:
|
||||
# Learn more about CodeQL language support at https://git.io/codeql-language-support
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v6.0.2
|
||||
# Initializes the CodeQL tools for scanning.
|
||||
- name: Initialize CodeQL
|
||||
uses: github/codeql-action/init@v4
|
||||
uses: github/codeql-action/init@v4.32.5
|
||||
with:
|
||||
languages: ${{ matrix.language }}
|
||||
# If you wish to specify custom queries, you can do so here or in a config file.
|
||||
@@ -45,4 +45,4 @@ jobs:
|
||||
# Prefix the list here with "+" to use these queries and those in the config file.
|
||||
# queries: ./path/to/local/query, your-org/your-repo/queries@main
|
||||
- name: Perform CodeQL Analysis
|
||||
uses: github/codeql-action/analyze@v4
|
||||
uses: github/codeql-action/analyze@v4.32.5
|
||||
|
||||
4
.github/workflows/crowdin.yml
vendored
4
.github/workflows/crowdin.yml
vendored
@@ -13,11 +13,11 @@ jobs:
|
||||
runs-on: ubuntu-24.04
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v6.0.2
|
||||
with:
|
||||
token: ${{ secrets.PNGX_BOT_PAT }}
|
||||
- name: crowdin action
|
||||
uses: crowdin/github-action@v2
|
||||
uses: crowdin/github-action@v2.15.0
|
||||
with:
|
||||
upload_translations: false
|
||||
download_translations: true
|
||||
|
||||
8
.github/workflows/pr-bot.yml
vendored
8
.github/workflows/pr-bot.yml
vendored
@@ -12,7 +12,7 @@ jobs:
|
||||
steps:
|
||||
- name: Label PR by file path or branch name
|
||||
# see .github/labeler.yml for the labeler config
|
||||
uses: actions/labeler@v6
|
||||
uses: actions/labeler@v6.0.1
|
||||
with:
|
||||
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
- name: Label by size
|
||||
@@ -26,7 +26,7 @@ jobs:
|
||||
fail_if_xl: 'false'
|
||||
excluded_files: /\.lock$/ /\.txt$/ ^src-ui/pnpm-lock\.yaml$ ^src-ui/messages\.xlf$ ^src/locale/en_US/LC_MESSAGES/django\.po$
|
||||
- name: Label by PR title
|
||||
uses: actions/github-script@v8
|
||||
uses: actions/github-script@v8.0.0
|
||||
with:
|
||||
script: |
|
||||
const pr = context.payload.pull_request;
|
||||
@@ -52,7 +52,7 @@ jobs:
|
||||
}
|
||||
- name: Label bot-generated PRs
|
||||
if: ${{ contains(github.actor, 'dependabot') || contains(github.actor, 'crowdin-bot') }}
|
||||
uses: actions/github-script@v8
|
||||
uses: actions/github-script@v8.0.0
|
||||
with:
|
||||
script: |
|
||||
const pr = context.payload.pull_request;
|
||||
@@ -77,7 +77,7 @@ jobs:
|
||||
}
|
||||
- name: Welcome comment
|
||||
if: ${{ !contains(github.actor, 'bot') }}
|
||||
uses: actions/github-script@v8
|
||||
uses: actions/github-script@v8.0.0
|
||||
with:
|
||||
script: |
|
||||
const pr = context.payload.pull_request;
|
||||
|
||||
2
.github/workflows/project-actions.yml
vendored
2
.github/workflows/project-actions.yml
vendored
@@ -19,6 +19,6 @@ jobs:
|
||||
if: github.event_name == 'pull_request_target' && (github.event.action == 'opened' || github.event.action == 'reopened') && github.event.pull_request.user.login != 'dependabot'
|
||||
steps:
|
||||
- name: Label PR with release-drafter
|
||||
uses: release-drafter/release-drafter@v6
|
||||
uses: release-drafter/release-drafter@v6.2.0
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
10
.github/workflows/repo-maintenance.yml
vendored
10
.github/workflows/repo-maintenance.yml
vendored
@@ -15,7 +15,7 @@ jobs:
|
||||
if: github.repository_owner == 'paperless-ngx'
|
||||
runs-on: ubuntu-24.04
|
||||
steps:
|
||||
- uses: actions/stale@v10
|
||||
- uses: actions/stale@v10.2.0
|
||||
with:
|
||||
days-before-stale: 7
|
||||
days-before-close: 14
|
||||
@@ -37,7 +37,7 @@ jobs:
|
||||
if: github.repository_owner == 'paperless-ngx'
|
||||
runs-on: ubuntu-24.04
|
||||
steps:
|
||||
- uses: dessant/lock-threads@v6
|
||||
- uses: dessant/lock-threads@v6.0.0
|
||||
with:
|
||||
issue-inactive-days: '30'
|
||||
pr-inactive-days: '30'
|
||||
@@ -57,7 +57,7 @@ jobs:
|
||||
if: github.repository_owner == 'paperless-ngx'
|
||||
runs-on: ubuntu-24.04
|
||||
steps:
|
||||
- uses: actions/github-script@v8
|
||||
- uses: actions/github-script@v8.0.0
|
||||
with:
|
||||
script: |
|
||||
function sleep(ms) {
|
||||
@@ -114,7 +114,7 @@ jobs:
|
||||
if: github.repository_owner == 'paperless-ngx'
|
||||
runs-on: ubuntu-24.04
|
||||
steps:
|
||||
- uses: actions/github-script@v8
|
||||
- uses: actions/github-script@v8.0.0
|
||||
with:
|
||||
script: |
|
||||
function sleep(ms) {
|
||||
@@ -206,7 +206,7 @@ jobs:
|
||||
if: github.repository_owner == 'paperless-ngx'
|
||||
runs-on: ubuntu-24.04
|
||||
steps:
|
||||
- uses: actions/github-script@v8
|
||||
- uses: actions/github-script@v8.0.0
|
||||
with:
|
||||
script: |
|
||||
function sleep(ms) {
|
||||
|
||||
14
.github/workflows/translate-strings.yml
vendored
14
.github/workflows/translate-strings.yml
vendored
@@ -11,7 +11,7 @@ jobs:
|
||||
contents: write
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v6.0.2
|
||||
env:
|
||||
GH_REF: ${{ github.ref }} # sonar rule:githubactions:S7630 - avoid injection
|
||||
with:
|
||||
@@ -19,13 +19,13 @@ jobs:
|
||||
ref: ${{ env.GH_REF }}
|
||||
- name: Set up Python
|
||||
id: setup-python
|
||||
uses: actions/setup-python@v6
|
||||
uses: actions/setup-python@v6.2.0
|
||||
- name: Install system dependencies
|
||||
run: |
|
||||
sudo apt-get update -qq
|
||||
sudo apt-get install -qq --no-install-recommends gettext
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v7
|
||||
uses: astral-sh/setup-uv@v7.3.1
|
||||
with:
|
||||
enable-cache: true
|
||||
- name: Install backend python dependencies
|
||||
@@ -36,18 +36,18 @@ jobs:
|
||||
- name: Generate backend translation strings
|
||||
run: cd src/ && uv run manage.py makemessages -l en_US -i "samples*"
|
||||
- name: Install pnpm
|
||||
uses: pnpm/action-setup@v4
|
||||
uses: pnpm/action-setup@v4.2.0
|
||||
with:
|
||||
version: 10
|
||||
- name: Use Node.js 24
|
||||
uses: actions/setup-node@v6
|
||||
uses: actions/setup-node@v6.2.0
|
||||
with:
|
||||
node-version: 24.x
|
||||
cache: 'pnpm'
|
||||
cache-dependency-path: 'src-ui/pnpm-lock.yaml'
|
||||
- name: Cache frontend dependencies
|
||||
id: cache-frontend-deps
|
||||
uses: actions/cache@v5
|
||||
uses: actions/cache@v5.0.3
|
||||
with:
|
||||
path: |
|
||||
~/.pnpm-store
|
||||
@@ -63,7 +63,7 @@ jobs:
|
||||
cd src-ui
|
||||
pnpm run ng extract-i18n
|
||||
- name: Commit changes
|
||||
uses: stefanzweifel/git-auto-commit-action@v7
|
||||
uses: stefanzweifel/git-auto-commit-action@v7.1.0
|
||||
with:
|
||||
file_pattern: 'src-ui/messages.xlf src/locale/en_US/LC_MESSAGES/django.po'
|
||||
commit_message: "Auto translate strings"
|
||||
|
||||
@@ -30,7 +30,7 @@ RUN set -eux \
|
||||
# Purpose: Installs s6-overlay and rootfs
|
||||
# Comments:
|
||||
# - Don't leave anything extra in here either
|
||||
FROM ghcr.io/astral-sh/uv:0.10.5-python3.12-trixie-slim AS s6-overlay-base
|
||||
FROM ghcr.io/astral-sh/uv:0.10.7-python3.12-trixie-slim AS s6-overlay-base
|
||||
|
||||
WORKDIR /usr/src/s6
|
||||
|
||||
@@ -45,7 +45,7 @@ ENV \
|
||||
ARG TARGETARCH
|
||||
ARG TARGETVARIANT
|
||||
# Lock this version
|
||||
ARG S6_OVERLAY_VERSION=3.2.1.0
|
||||
ARG S6_OVERLAY_VERSION=3.2.2.0
|
||||
|
||||
ARG S6_BUILD_TIME_PKGS="curl \
|
||||
xz-utils"
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
# correct networking for the tests
|
||||
services:
|
||||
gotenberg:
|
||||
image: docker.io/gotenberg/gotenberg:8.26
|
||||
image: docker.io/gotenberg/gotenberg:8.27
|
||||
hostname: gotenberg
|
||||
container_name: gotenberg
|
||||
network_mode: host
|
||||
|
||||
@@ -72,7 +72,7 @@ services:
|
||||
PAPERLESS_TIKA_GOTENBERG_ENDPOINT: http://gotenberg:3000
|
||||
PAPERLESS_TIKA_ENDPOINT: http://tika:9998
|
||||
gotenberg:
|
||||
image: docker.io/gotenberg/gotenberg:8.26
|
||||
image: docker.io/gotenberg/gotenberg:8.27
|
||||
restart: unless-stopped
|
||||
# The gotenberg chromium route is used to convert .eml files. We do not
|
||||
# want to allow external content like tracking pixels or even javascript.
|
||||
|
||||
@@ -66,7 +66,7 @@ services:
|
||||
PAPERLESS_TIKA_GOTENBERG_ENDPOINT: http://gotenberg:3000
|
||||
PAPERLESS_TIKA_ENDPOINT: http://tika:9998
|
||||
gotenberg:
|
||||
image: docker.io/gotenberg/gotenberg:8.26
|
||||
image: docker.io/gotenberg/gotenberg:8.27
|
||||
restart: unless-stopped
|
||||
# The gotenberg chromium route is used to convert .eml files. We do not
|
||||
# want to allow external content like tracking pixels or even javascript.
|
||||
|
||||
@@ -55,7 +55,7 @@ services:
|
||||
PAPERLESS_TIKA_GOTENBERG_ENDPOINT: http://gotenberg:3000
|
||||
PAPERLESS_TIKA_ENDPOINT: http://tika:9998
|
||||
gotenberg:
|
||||
image: docker.io/gotenberg/gotenberg:8.26
|
||||
image: docker.io/gotenberg/gotenberg:8.27
|
||||
restart: unless-stopped
|
||||
# The gotenberg chromium route is used to convert .eml files. We do not
|
||||
# want to allow external content like tracking pixels or even javascript.
|
||||
|
||||
BIN
docs/assets/logo_full_black.png
Normal file
BIN
docs/assets/logo_full_black.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 57 KiB |
BIN
docs/assets/logo_full_white.png
Normal file
BIN
docs/assets/logo_full_white.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 61 KiB |
@@ -616,7 +616,7 @@ applied. You can use the following placeholders in the template with any trigger
|
||||
- `{{added_day}}`: added day
|
||||
- `{{added_time}}`: added time in HH:MM format
|
||||
- `{{original_filename}}`: original file name without extension
|
||||
- `{{filename}}`: current file name without extension
|
||||
- `{{filename}}`: current file name without extension (for "added" workflows this may not be final yet, you can use `{{original_filename}}`)
|
||||
- `{{doc_title}}`: current document title (cannot be used in title assignment)
|
||||
|
||||
The following placeholders are only available for "added" or "updated" triggers
|
||||
|
||||
@@ -111,6 +111,7 @@ docs = [
|
||||
testing = [
|
||||
"daphne",
|
||||
"factory-boy~=3.3.1",
|
||||
"faker~=40.5.1",
|
||||
"imagehash",
|
||||
"pytest~=9.0.0",
|
||||
"pytest-cov~=7.0.0",
|
||||
|
||||
@@ -6,11 +6,14 @@ Provides automatic progress bar and multiprocessing support with minimal boilerp
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
from collections.abc import Callable
|
||||
from collections.abc import Iterable
|
||||
from collections.abc import Sized
|
||||
from concurrent.futures import ProcessPoolExecutor
|
||||
from concurrent.futures import as_completed
|
||||
from contextlib import contextmanager
|
||||
from dataclasses import dataclass
|
||||
from typing import TYPE_CHECKING
|
||||
from typing import Any
|
||||
@@ -22,7 +25,11 @@ from django import db
|
||||
from django.core.management import CommandError
|
||||
from django.db.models import QuerySet
|
||||
from django_rich.management import RichCommand
|
||||
from rich import box
|
||||
from rich.console import Console
|
||||
from rich.console import Group
|
||||
from rich.console import RenderableType
|
||||
from rich.live import Live
|
||||
from rich.progress import BarColumn
|
||||
from rich.progress import MofNCompleteColumn
|
||||
from rich.progress import Progress
|
||||
@@ -30,11 +37,11 @@ from rich.progress import SpinnerColumn
|
||||
from rich.progress import TextColumn
|
||||
from rich.progress import TimeElapsedColumn
|
||||
from rich.progress import TimeRemainingColumn
|
||||
from rich.table import Table
|
||||
from rich.text import Text
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Callable
|
||||
from collections.abc import Generator
|
||||
from collections.abc import Iterable
|
||||
from collections.abc import Sequence
|
||||
|
||||
from django.core.management import CommandParser
|
||||
@@ -43,6 +50,78 @@ T = TypeVar("T")
|
||||
R = TypeVar("R")
|
||||
|
||||
|
||||
@dataclass(slots=True, frozen=True)
|
||||
class _BufferedRecord:
|
||||
level: int
|
||||
name: str
|
||||
message: str
|
||||
|
||||
|
||||
class BufferingLogHandler(logging.Handler):
|
||||
"""Captures log records during a command run for deferred rendering.
|
||||
|
||||
Attach to a logger before a long operation and call ``render()``
|
||||
afterwards to emit the buffered records via Rich, optionally filtered
|
||||
by minimum level.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
self._records: list[_BufferedRecord] = []
|
||||
|
||||
def emit(self, record: logging.LogRecord) -> None:
|
||||
self._records.append(
|
||||
_BufferedRecord(
|
||||
level=record.levelno,
|
||||
name=record.name,
|
||||
message=self.format(record),
|
||||
),
|
||||
)
|
||||
|
||||
def render(
|
||||
self,
|
||||
console: Console,
|
||||
*,
|
||||
min_level: int = logging.DEBUG,
|
||||
title: str = "Log Output",
|
||||
) -> None:
|
||||
records = [r for r in self._records if r.level >= min_level]
|
||||
if not records:
|
||||
return
|
||||
|
||||
table = Table(
|
||||
title=title,
|
||||
show_header=True,
|
||||
header_style="bold",
|
||||
show_lines=False,
|
||||
box=box.SIMPLE,
|
||||
)
|
||||
table.add_column("Level", style="bold", width=8)
|
||||
table.add_column("Logger", style="dim")
|
||||
table.add_column("Message", no_wrap=False)
|
||||
|
||||
_level_styles: dict[int, str] = {
|
||||
logging.DEBUG: "dim",
|
||||
logging.INFO: "cyan",
|
||||
logging.WARNING: "yellow",
|
||||
logging.ERROR: "red",
|
||||
logging.CRITICAL: "bold red",
|
||||
}
|
||||
|
||||
for record in records:
|
||||
style = _level_styles.get(record.level, "")
|
||||
table.add_row(
|
||||
Text(logging.getLevelName(record.level), style=style),
|
||||
record.name,
|
||||
record.message,
|
||||
)
|
||||
|
||||
console.print(table)
|
||||
|
||||
def clear(self) -> None:
|
||||
self._records.clear()
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class ProcessResult(Generic[T, R]):
|
||||
"""
|
||||
@@ -91,6 +170,23 @@ class PaperlessCommand(RichCommand):
|
||||
for result in self.process_parallel(process_doc, ids):
|
||||
if result.error:
|
||||
self.console.print(f"[red]Failed: {result.error}[/red]")
|
||||
|
||||
class Command(PaperlessCommand):
|
||||
help = "Import documents with live stats"
|
||||
|
||||
def handle(self, *args, **options):
|
||||
stats = ImportStats()
|
||||
|
||||
def render_stats() -> Table:
|
||||
... # build Rich Table from stats
|
||||
|
||||
for item in self.track_with_stats(
|
||||
items,
|
||||
description="Importing...",
|
||||
stats_renderer=render_stats,
|
||||
):
|
||||
result = import_item(item)
|
||||
stats.imported += 1
|
||||
"""
|
||||
|
||||
supports_progress_bar: ClassVar[bool] = True
|
||||
@@ -128,13 +224,11 @@ class PaperlessCommand(RichCommand):
|
||||
This is called by Django's command infrastructure after argument parsing
|
||||
but before handle(). We use it to set instance attributes from options.
|
||||
"""
|
||||
# Set progress bar state
|
||||
if self.supports_progress_bar:
|
||||
self.no_progress_bar = options.get("no_progress_bar", False)
|
||||
else:
|
||||
self.no_progress_bar = True
|
||||
|
||||
# Set multiprocessing state
|
||||
if self.supports_multiprocessing:
|
||||
self.process_count = options.get("processes", 1)
|
||||
if self.process_count < 1:
|
||||
@@ -144,9 +238,69 @@ class PaperlessCommand(RichCommand):
|
||||
|
||||
return super().execute(*args, **options)
|
||||
|
||||
@contextmanager
|
||||
def buffered_logging(
|
||||
self,
|
||||
*logger_names: str,
|
||||
level: int = logging.DEBUG,
|
||||
) -> Generator[BufferingLogHandler, None, None]:
|
||||
"""Context manager that captures log output from named loggers.
|
||||
|
||||
Installs a ``BufferingLogHandler`` on each named logger for the
|
||||
duration of the block, suppressing propagation to avoid interleaving
|
||||
with the Rich live display. The handler is removed on exit regardless
|
||||
of whether an exception occurred.
|
||||
|
||||
Usage::
|
||||
|
||||
with self.buffered_logging("paperless", "documents") as log_buf:
|
||||
# ... run progress loop ...
|
||||
if options["verbose"]:
|
||||
log_buf.render(self.console)
|
||||
"""
|
||||
handler = BufferingLogHandler()
|
||||
handler.setFormatter(logging.Formatter("%(message)s"))
|
||||
|
||||
loggers: list[logging.Logger] = []
|
||||
original_propagate: dict[str, bool] = {}
|
||||
|
||||
for name in logger_names:
|
||||
log = logging.getLogger(name)
|
||||
log.addHandler(handler)
|
||||
original_propagate[name] = log.propagate
|
||||
log.propagate = False
|
||||
loggers.append(log)
|
||||
|
||||
try:
|
||||
yield handler
|
||||
finally:
|
||||
for log in loggers:
|
||||
log.removeHandler(handler)
|
||||
log.propagate = original_propagate[log.name]
|
||||
|
||||
@staticmethod
|
||||
def _progress_columns() -> tuple[Any, ...]:
|
||||
"""
|
||||
Return the standard set of progress bar columns.
|
||||
|
||||
Extracted so both _create_progress (standalone) and track_with_stats
|
||||
(inside Live) use identical column configuration without duplication.
|
||||
"""
|
||||
return (
|
||||
SpinnerColumn(),
|
||||
TextColumn("[progress.description]{task.description}"),
|
||||
BarColumn(),
|
||||
MofNCompleteColumn(),
|
||||
TimeElapsedColumn(),
|
||||
TimeRemainingColumn(),
|
||||
)
|
||||
|
||||
def _create_progress(self, description: str) -> Progress:
|
||||
"""
|
||||
Create a configured Progress instance.
|
||||
Create a standalone Progress instance with its own stderr Console.
|
||||
|
||||
Use this for track(). For track_with_stats(), Progress is created
|
||||
directly inside a Live context instead.
|
||||
|
||||
Progress output is directed to stderr to match the convention that
|
||||
progress bars are transient UI feedback, not command output. This
|
||||
@@ -161,12 +315,7 @@ class PaperlessCommand(RichCommand):
|
||||
A Progress instance configured with appropriate columns.
|
||||
"""
|
||||
return Progress(
|
||||
SpinnerColumn(),
|
||||
TextColumn("[progress.description]{task.description}"),
|
||||
BarColumn(),
|
||||
MofNCompleteColumn(),
|
||||
TimeElapsedColumn(),
|
||||
TimeRemainingColumn(),
|
||||
*self._progress_columns(),
|
||||
console=Console(stderr=True),
|
||||
transient=False,
|
||||
)
|
||||
@@ -222,7 +371,6 @@ class PaperlessCommand(RichCommand):
|
||||
yield from iterable
|
||||
return
|
||||
|
||||
# Attempt to determine total if not provided
|
||||
if total is None:
|
||||
total = self._get_iterable_length(iterable)
|
||||
|
||||
@@ -232,6 +380,87 @@ class PaperlessCommand(RichCommand):
|
||||
yield item
|
||||
progress.advance(task_id)
|
||||
|
||||
def track_with_stats(
|
||||
self,
|
||||
iterable: Iterable[T],
|
||||
*,
|
||||
description: str = "Processing...",
|
||||
stats_renderer: Callable[[], RenderableType],
|
||||
total: int | None = None,
|
||||
) -> Generator[T, None, None]:
|
||||
"""
|
||||
Iterate over items with a progress bar and a live-updating stats display.
|
||||
|
||||
The progress bar and stats renderable are combined in a single Live
|
||||
context, so the stats panel re-renders in place below the progress bar
|
||||
after each item is processed.
|
||||
|
||||
Respects --no-progress-bar flag. When disabled, yields items without
|
||||
any display (stats are still updated by the caller's loop body, so
|
||||
they will be accurate for any post-loop summary the caller prints).
|
||||
|
||||
Args:
|
||||
iterable: The items to iterate over.
|
||||
description: Text to display alongside the progress bar.
|
||||
stats_renderer: Zero-argument callable that returns a Rich
|
||||
renderable. Called after each item to refresh the display.
|
||||
The caller typically closes over a mutable dataclass and
|
||||
rebuilds a Table from it on each call.
|
||||
total: Total number of items. If None, attempts to determine
|
||||
automatically via .count() (for querysets) or len().
|
||||
|
||||
Yields:
|
||||
Items from the iterable.
|
||||
|
||||
Example:
|
||||
@dataclass
|
||||
class Stats:
|
||||
processed: int = 0
|
||||
failed: int = 0
|
||||
|
||||
stats = Stats()
|
||||
|
||||
def render_stats() -> Table:
|
||||
table = Table(box=None)
|
||||
table.add_column("Processed")
|
||||
table.add_column("Failed")
|
||||
table.add_row(str(stats.processed), str(stats.failed))
|
||||
return table
|
||||
|
||||
for item in self.track_with_stats(
|
||||
items,
|
||||
description="Importing...",
|
||||
stats_renderer=render_stats,
|
||||
):
|
||||
try:
|
||||
import_item(item)
|
||||
stats.processed += 1
|
||||
except Exception:
|
||||
stats.failed += 1
|
||||
"""
|
||||
if self.no_progress_bar:
|
||||
yield from iterable
|
||||
return
|
||||
|
||||
if total is None:
|
||||
total = self._get_iterable_length(iterable)
|
||||
|
||||
stderr_console = Console(stderr=True)
|
||||
|
||||
# Progress is created without its own console so Live controls rendering.
|
||||
progress = Progress(*self._progress_columns())
|
||||
task_id = progress.add_task(description, total=total)
|
||||
|
||||
with Live(
|
||||
Group(progress, stats_renderer()),
|
||||
console=stderr_console,
|
||||
refresh_per_second=4,
|
||||
) as live:
|
||||
for item in iterable:
|
||||
yield item
|
||||
progress.advance(task_id)
|
||||
live.update(Group(progress, stats_renderer()))
|
||||
|
||||
def process_parallel(
|
||||
self,
|
||||
fn: Callable[[T], R],
|
||||
@@ -269,7 +498,7 @@ class PaperlessCommand(RichCommand):
|
||||
total = len(items)
|
||||
|
||||
if self.process_count == 1:
|
||||
# Sequential execution in main process - critical for testing
|
||||
# Sequential execution in main process - critical for testing, so we don't fork in fork, etc
|
||||
yield from self._process_sequential(fn, items, description, total)
|
||||
else:
|
||||
# Parallel execution with ProcessPoolExecutor
|
||||
@@ -298,6 +527,7 @@ class PaperlessCommand(RichCommand):
|
||||
total: int,
|
||||
) -> Generator[ProcessResult[T, R], None, None]:
|
||||
"""Process items in parallel using ProcessPoolExecutor."""
|
||||
|
||||
# Close database connections before forking - required for PostgreSQL
|
||||
db.connections.close_all()
|
||||
|
||||
|
||||
@@ -1,22 +1,25 @@
|
||||
from django.core.management import BaseCommand
|
||||
from django.db import transaction
|
||||
|
||||
from documents.management.commands.mixins import ProgressBarMixin
|
||||
from documents.management.commands.base import PaperlessCommand
|
||||
from documents.tasks import index_optimize
|
||||
from documents.tasks import index_reindex
|
||||
|
||||
|
||||
class Command(ProgressBarMixin, BaseCommand):
|
||||
class Command(PaperlessCommand):
|
||||
help = "Manages the document index."
|
||||
|
||||
def add_arguments(self, parser):
|
||||
super().add_arguments(parser)
|
||||
parser.add_argument("command", choices=["reindex", "optimize"])
|
||||
self.add_argument_progress_bar_mixin(parser)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
self.handle_progress_bar_mixin(**options)
|
||||
with transaction.atomic():
|
||||
if options["command"] == "reindex":
|
||||
index_reindex(progress_bar_disable=self.no_progress_bar)
|
||||
index_reindex(
|
||||
iter_wrapper=lambda docs: self.track(
|
||||
docs,
|
||||
description="Indexing documents...",
|
||||
),
|
||||
)
|
||||
elif options["command"] == "optimize":
|
||||
index_optimize()
|
||||
|
||||
@@ -1,22 +1,22 @@
|
||||
from django.core.management import BaseCommand
|
||||
from django.db import transaction
|
||||
from typing import Any
|
||||
|
||||
from documents.management.commands.mixins import ProgressBarMixin
|
||||
from documents.management.commands.base import PaperlessCommand
|
||||
from documents.tasks import llmindex_index
|
||||
|
||||
|
||||
class Command(ProgressBarMixin, BaseCommand):
|
||||
class Command(PaperlessCommand):
|
||||
help = "Manages the LLM-based vector index for Paperless."
|
||||
|
||||
def add_arguments(self, parser):
|
||||
def add_arguments(self, parser: Any) -> None:
|
||||
super().add_arguments(parser)
|
||||
parser.add_argument("command", choices=["rebuild", "update"])
|
||||
self.add_argument_progress_bar_mixin(parser)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
self.handle_progress_bar_mixin(**options)
|
||||
with transaction.atomic():
|
||||
llmindex_index(
|
||||
progress_bar_disable=self.no_progress_bar,
|
||||
rebuild=options["command"] == "rebuild",
|
||||
scheduled=False,
|
||||
)
|
||||
def handle(self, *args: Any, **options: Any) -> None:
|
||||
llmindex_index(
|
||||
rebuild=options["command"] == "rebuild",
|
||||
scheduled=False,
|
||||
iter_wrapper=lambda docs: self.track(
|
||||
docs,
|
||||
description="Indexing documents...",
|
||||
),
|
||||
)
|
||||
|
||||
@@ -1,4 +1,12 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass
|
||||
from dataclasses import field
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from rich.table import Table
|
||||
from rich.text import Text
|
||||
|
||||
from documents.classifier import load_classifier
|
||||
from documents.management.commands.base import PaperlessCommand
|
||||
@@ -8,9 +16,162 @@ from documents.signals.handlers import set_document_type
|
||||
from documents.signals.handlers import set_storage_path
|
||||
from documents.signals.handlers import set_tags
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from rich.console import RenderableType
|
||||
|
||||
from documents.models import Correspondent
|
||||
from documents.models import DocumentType
|
||||
from documents.models import StoragePath
|
||||
from documents.models import Tag
|
||||
|
||||
logger = logging.getLogger("paperless.management.retagger")
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class RetaggerStats:
|
||||
"""Cumulative counters updated as the retagger processes documents.
|
||||
|
||||
Mutable by design -- fields are incremented in the processing loop.
|
||||
slots=True reduces per-instance memory overhead and speeds attribute access.
|
||||
"""
|
||||
|
||||
correspondents: int = 0
|
||||
document_types: int = 0
|
||||
tags_added: int = 0
|
||||
tags_removed: int = 0
|
||||
storage_paths: int = 0
|
||||
documents_processed: int = 0
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class DocumentSuggestion:
|
||||
"""Buffered classifier suggestions for a single document (suggest mode only).
|
||||
|
||||
Mutable by design -- fields are assigned incrementally as each setter runs.
|
||||
"""
|
||||
|
||||
document: Document
|
||||
correspondent: Correspondent | None = None
|
||||
document_type: DocumentType | None = None
|
||||
tags_to_add: frozenset[Tag] = field(default_factory=frozenset)
|
||||
tags_to_remove: frozenset[Tag] = field(default_factory=frozenset)
|
||||
storage_path: StoragePath | None = None
|
||||
|
||||
@property
|
||||
def has_suggestions(self) -> bool:
|
||||
return bool(
|
||||
self.correspondent is not None
|
||||
or self.document_type is not None
|
||||
or self.tags_to_add
|
||||
or self.tags_to_remove
|
||||
or self.storage_path is not None,
|
||||
)
|
||||
|
||||
|
||||
def _build_stats_table(stats: RetaggerStats, *, suggest: bool) -> Table:
|
||||
"""
|
||||
Build the live-updating stats table shown below the progress bar.
|
||||
|
||||
In suggest mode the labels read "would set / would add" to make clear
|
||||
that nothing has been written to the database.
|
||||
"""
|
||||
table = Table(box=None, padding=(0, 2), show_header=True, header_style="bold")
|
||||
|
||||
table.add_column("Documents")
|
||||
table.add_column("Correspondents")
|
||||
table.add_column("Doc Types")
|
||||
table.add_column("Tags (+)")
|
||||
table.add_column("Tags (-)")
|
||||
table.add_column("Storage Paths")
|
||||
|
||||
verb = "would set" if suggest else "set"
|
||||
|
||||
table.add_row(
|
||||
str(stats.documents_processed),
|
||||
f"{stats.correspondents} {verb}",
|
||||
f"{stats.document_types} {verb}",
|
||||
f"+{stats.tags_added}",
|
||||
f"-{stats.tags_removed}",
|
||||
f"{stats.storage_paths} {verb}",
|
||||
)
|
||||
|
||||
return table
|
||||
|
||||
|
||||
def _build_suggestion_table(
|
||||
suggestions: list[DocumentSuggestion],
|
||||
base_url: str | None,
|
||||
) -> Table:
|
||||
"""
|
||||
Build the final suggestion table printed after the progress bar completes.
|
||||
|
||||
Only documents with at least one suggestion are included.
|
||||
"""
|
||||
table = Table(
|
||||
title="Suggested Changes",
|
||||
show_header=True,
|
||||
header_style="bold cyan",
|
||||
show_lines=True,
|
||||
)
|
||||
|
||||
table.add_column("Document", style="bold", no_wrap=False, min_width=20)
|
||||
table.add_column("Correspondent")
|
||||
table.add_column("Doc Type")
|
||||
table.add_column("Tags")
|
||||
table.add_column("Storage Path")
|
||||
|
||||
for suggestion in suggestions:
|
||||
if not suggestion.has_suggestions:
|
||||
continue
|
||||
|
||||
doc = suggestion.document
|
||||
|
||||
if base_url:
|
||||
doc_cell = Text()
|
||||
doc_cell.append(str(doc))
|
||||
doc_cell.append(f"\n{base_url}/documents/{doc.pk}", style="dim")
|
||||
else:
|
||||
doc_cell = Text(f"{doc} [{doc.pk}]")
|
||||
|
||||
tag_parts: list[str] = []
|
||||
for tag in sorted(suggestion.tags_to_add, key=lambda t: t.name):
|
||||
tag_parts.append(f"[green]+{tag.name}[/green]")
|
||||
for tag in sorted(suggestion.tags_to_remove, key=lambda t: t.name):
|
||||
tag_parts.append(f"[red]-{tag.name}[/red]")
|
||||
tag_cell = Text.from_markup(", ".join(tag_parts)) if tag_parts else Text("-")
|
||||
|
||||
table.add_row(
|
||||
doc_cell,
|
||||
str(suggestion.correspondent) if suggestion.correspondent else "-",
|
||||
str(suggestion.document_type) if suggestion.document_type else "-",
|
||||
tag_cell,
|
||||
str(suggestion.storage_path) if suggestion.storage_path else "-",
|
||||
)
|
||||
|
||||
return table
|
||||
|
||||
|
||||
def _build_summary_table(stats: RetaggerStats) -> Table:
|
||||
"""Build the final applied-changes summary table."""
|
||||
table = Table(
|
||||
title="Retagger Summary",
|
||||
show_header=True,
|
||||
header_style="bold cyan",
|
||||
)
|
||||
|
||||
table.add_column("Metric", style="bold")
|
||||
table.add_column("Count", justify="right")
|
||||
|
||||
table.add_row("Documents processed", str(stats.documents_processed))
|
||||
table.add_row("Correspondents set", str(stats.correspondents))
|
||||
table.add_row("Document types set", str(stats.document_types))
|
||||
table.add_row("Tags added", str(stats.tags_added))
|
||||
table.add_row("Tags removed", str(stats.tags_removed))
|
||||
table.add_row("Storage paths set", str(stats.storage_paths))
|
||||
|
||||
return table
|
||||
|
||||
|
||||
class Command(PaperlessCommand):
|
||||
help = (
|
||||
"Using the current classification model, assigns correspondents, tags "
|
||||
@@ -19,7 +180,7 @@ class Command(PaperlessCommand):
|
||||
"modified) after their initial import."
|
||||
)
|
||||
|
||||
def add_arguments(self, parser):
|
||||
def add_arguments(self, parser) -> None:
|
||||
super().add_arguments(parser)
|
||||
parser.add_argument("-c", "--correspondent", default=False, action="store_true")
|
||||
parser.add_argument("-T", "--tags", default=False, action="store_true")
|
||||
@@ -31,9 +192,9 @@ class Command(PaperlessCommand):
|
||||
default=False,
|
||||
action="store_true",
|
||||
help=(
|
||||
"By default this command won't try to assign a correspondent "
|
||||
"if more than one matches the document. Use this flag if "
|
||||
"you'd rather it just pick the first one it finds."
|
||||
"By default this command will not try to assign a correspondent "
|
||||
"if more than one matches the document. Use this flag to pick "
|
||||
"the first match instead."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
@@ -42,91 +203,140 @@ class Command(PaperlessCommand):
|
||||
default=False,
|
||||
action="store_true",
|
||||
help=(
|
||||
"If set, the document retagger will overwrite any previously "
|
||||
"set correspondent, document and remove correspondents, types "
|
||||
"and tags that do not match anymore due to changed rules."
|
||||
"Overwrite any previously set correspondent, document type, and "
|
||||
"remove tags that no longer match due to changed rules."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--suggest",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="Return the suggestion, don't change anything.",
|
||||
help="Show what would be changed without applying anything.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--base-url",
|
||||
help="The base URL to use to build the link to the documents.",
|
||||
help="Base URL used to build document links in suggest output.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--id-range",
|
||||
help="A range of document ids on which the retagging should be applied.",
|
||||
help="Restrict retagging to documents within this ID range (inclusive).",
|
||||
nargs=2,
|
||||
type=int,
|
||||
)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
def handle(self, *args, **options) -> None:
|
||||
suggest: bool = options["suggest"]
|
||||
overwrite: bool = options["overwrite"]
|
||||
use_first: bool = options["use_first"]
|
||||
base_url: str | None = options["base_url"]
|
||||
|
||||
do_correspondent: bool = options["correspondent"]
|
||||
do_document_type: bool = options["document_type"]
|
||||
do_tags: bool = options["tags"]
|
||||
do_storage_path: bool = options["storage_path"]
|
||||
|
||||
if not any([do_correspondent, do_document_type, do_tags, do_storage_path]):
|
||||
self.console.print(
|
||||
"[yellow]No classifier targets specified. "
|
||||
"Use -c, -T, -t, or -s to select what to retag.[/yellow]",
|
||||
)
|
||||
return
|
||||
|
||||
if options["inbox_only"]:
|
||||
queryset = Document.objects.filter(tags__is_inbox_tag=True)
|
||||
else:
|
||||
queryset = Document.objects.all()
|
||||
|
||||
if options["id_range"]:
|
||||
queryset = queryset.filter(
|
||||
id__range=(options["id_range"][0], options["id_range"][1]),
|
||||
)
|
||||
lo, hi = options["id_range"]
|
||||
queryset = queryset.filter(id__range=(lo, hi))
|
||||
|
||||
documents = queryset.distinct()
|
||||
|
||||
classifier = load_classifier()
|
||||
|
||||
for document in self.track(documents, description="Retagging..."):
|
||||
if options["correspondent"]:
|
||||
set_correspondent(
|
||||
sender=None,
|
||||
document=document,
|
||||
classifier=classifier,
|
||||
replace=options["overwrite"],
|
||||
use_first=options["use_first"],
|
||||
suggest=options["suggest"],
|
||||
base_url=options["base_url"],
|
||||
stdout=self.stdout,
|
||||
style_func=self.style,
|
||||
)
|
||||
stats = RetaggerStats()
|
||||
suggestions: list[DocumentSuggestion] = []
|
||||
|
||||
if options["document_type"]:
|
||||
set_document_type(
|
||||
sender=None,
|
||||
document=document,
|
||||
classifier=classifier,
|
||||
replace=options["overwrite"],
|
||||
use_first=options["use_first"],
|
||||
suggest=options["suggest"],
|
||||
base_url=options["base_url"],
|
||||
stdout=self.stdout,
|
||||
style_func=self.style,
|
||||
)
|
||||
def render_stats() -> RenderableType:
|
||||
return _build_stats_table(stats, suggest=suggest)
|
||||
|
||||
if options["tags"]:
|
||||
set_tags(
|
||||
sender=None,
|
||||
document=document,
|
||||
classifier=classifier,
|
||||
replace=options["overwrite"],
|
||||
suggest=options["suggest"],
|
||||
base_url=options["base_url"],
|
||||
stdout=self.stdout,
|
||||
style_func=self.style,
|
||||
)
|
||||
with self.buffered_logging(
|
||||
"paperless",
|
||||
"paperless.handlers",
|
||||
"documents",
|
||||
) as log_buf:
|
||||
for document in self.track_with_stats(
|
||||
documents,
|
||||
description="Retagging...",
|
||||
stats_renderer=render_stats,
|
||||
):
|
||||
suggestion = DocumentSuggestion(document=document)
|
||||
|
||||
if options["storage_path"]:
|
||||
set_storage_path(
|
||||
sender=None,
|
||||
document=document,
|
||||
classifier=classifier,
|
||||
replace=options["overwrite"],
|
||||
use_first=options["use_first"],
|
||||
suggest=options["suggest"],
|
||||
base_url=options["base_url"],
|
||||
stdout=self.stdout,
|
||||
style_func=self.style,
|
||||
)
|
||||
if do_correspondent:
|
||||
correspondent = set_correspondent(
|
||||
None,
|
||||
document,
|
||||
classifier=classifier,
|
||||
replace=overwrite,
|
||||
use_first=use_first,
|
||||
dry_run=suggest,
|
||||
)
|
||||
if correspondent is not None:
|
||||
stats.correspondents += 1
|
||||
suggestion.correspondent = correspondent
|
||||
|
||||
if do_document_type:
|
||||
document_type = set_document_type(
|
||||
None,
|
||||
document,
|
||||
classifier=classifier,
|
||||
replace=overwrite,
|
||||
use_first=use_first,
|
||||
dry_run=suggest,
|
||||
)
|
||||
if document_type is not None:
|
||||
stats.document_types += 1
|
||||
suggestion.document_type = document_type
|
||||
|
||||
if do_tags:
|
||||
tags_to_add, tags_to_remove = set_tags(
|
||||
None,
|
||||
document,
|
||||
classifier=classifier,
|
||||
replace=overwrite,
|
||||
dry_run=suggest,
|
||||
)
|
||||
stats.tags_added += len(tags_to_add)
|
||||
stats.tags_removed += len(tags_to_remove)
|
||||
suggestion.tags_to_add = frozenset(tags_to_add)
|
||||
suggestion.tags_to_remove = frozenset(tags_to_remove)
|
||||
|
||||
if do_storage_path:
|
||||
storage_path = set_storage_path(
|
||||
None,
|
||||
document,
|
||||
classifier=classifier,
|
||||
replace=overwrite,
|
||||
use_first=use_first,
|
||||
dry_run=suggest,
|
||||
)
|
||||
if storage_path is not None:
|
||||
stats.storage_paths += 1
|
||||
suggestion.storage_path = storage_path
|
||||
|
||||
stats.documents_processed += 1
|
||||
|
||||
if suggest:
|
||||
suggestions.append(suggestion)
|
||||
|
||||
# Post-loop output
|
||||
if suggest:
|
||||
visible = [s for s in suggestions if s.has_suggestions]
|
||||
if visible:
|
||||
self.console.print(_build_suggestion_table(visible, base_url))
|
||||
else:
|
||||
self.console.print("[green]No changes suggested.[/green]")
|
||||
else:
|
||||
self.console.print(_build_summary_table(stats))
|
||||
|
||||
log_buf.render(self.console, min_level=logging.INFO, title="Retagger Log")
|
||||
|
||||
@@ -1,17 +1,117 @@
|
||||
from django.core.management.base import BaseCommand
|
||||
"""Management command to check the document archive for issues."""
|
||||
|
||||
from documents.management.commands.mixins import ProgressBarMixin
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
from rich.panel import Panel
|
||||
from rich.table import Table
|
||||
from rich.text import Text
|
||||
|
||||
from documents.management.commands.base import PaperlessCommand
|
||||
from documents.models import Document
|
||||
from documents.sanity_checker import SanityCheckMessages
|
||||
from documents.sanity_checker import check_sanity
|
||||
|
||||
_LEVEL_STYLE: dict[int, tuple[str, str]] = {
|
||||
logging.ERROR: ("bold red", "ERROR"),
|
||||
logging.WARNING: ("yellow", "WARN"),
|
||||
logging.INFO: ("dim", "INFO"),
|
||||
}
|
||||
|
||||
class Command(ProgressBarMixin, BaseCommand):
|
||||
|
||||
class Command(PaperlessCommand):
|
||||
help = "This command checks your document archive for issues."
|
||||
|
||||
def add_arguments(self, parser):
|
||||
self.add_argument_progress_bar_mixin(parser)
|
||||
def _render_results(self, messages: SanityCheckMessages) -> None:
|
||||
"""Render sanity check results as a Rich table."""
|
||||
|
||||
def handle(self, *args, **options):
|
||||
self.handle_progress_bar_mixin(**options)
|
||||
messages = check_sanity(progress=self.use_progress_bar, scheduled=False)
|
||||
if (
|
||||
not messages.has_error
|
||||
and not messages.has_warning
|
||||
and not messages.has_info
|
||||
):
|
||||
self.console.print(
|
||||
Panel(
|
||||
"[green]No issues detected.[/green]",
|
||||
title="Sanity Check",
|
||||
border_style="green",
|
||||
),
|
||||
)
|
||||
return
|
||||
|
||||
messages.log_messages()
|
||||
# Build a lookup for document titles
|
||||
doc_pks = [pk for pk in messages.document_pks() if pk is not None]
|
||||
titles: dict[int, str] = {}
|
||||
if doc_pks:
|
||||
titles = dict(
|
||||
Document.global_objects.filter(pk__in=doc_pks)
|
||||
.only("pk", "title")
|
||||
.values_list("pk", "title"),
|
||||
)
|
||||
|
||||
table = Table(
|
||||
title="Sanity Check Results",
|
||||
show_lines=True,
|
||||
title_style="bold",
|
||||
)
|
||||
table.add_column("Level", width=7, no_wrap=True)
|
||||
table.add_column("Document", min_width=20)
|
||||
table.add_column("Issue", ratio=1)
|
||||
|
||||
for doc_pk, doc_messages in messages.iter_messages():
|
||||
if doc_pk is not None:
|
||||
title = titles.get(doc_pk, "Unknown")
|
||||
doc_label = f"#{doc_pk} {title}"
|
||||
else:
|
||||
doc_label = "(global)"
|
||||
|
||||
for msg in doc_messages:
|
||||
style, label = _LEVEL_STYLE.get(
|
||||
msg["level"],
|
||||
("dim", "INFO"),
|
||||
)
|
||||
table.add_row(
|
||||
Text(label, style=style),
|
||||
Text(doc_label),
|
||||
Text(str(msg["message"])),
|
||||
)
|
||||
|
||||
self.console.print(table)
|
||||
|
||||
parts: list[str] = []
|
||||
|
||||
if messages.document_error_count:
|
||||
parts.append(
|
||||
f"{messages.document_error_count} document(s) with [bold red]errors[/bold red]",
|
||||
)
|
||||
if messages.document_warning_count:
|
||||
parts.append(
|
||||
f"{messages.document_warning_count} document(s) with [yellow]warnings[/yellow]",
|
||||
)
|
||||
if messages.document_info_count:
|
||||
parts.append(f"{messages.document_info_count} document(s) with infos")
|
||||
if messages.global_warning_count:
|
||||
parts.append(
|
||||
f"{messages.global_warning_count} global [yellow]warning(s)[/yellow]",
|
||||
)
|
||||
|
||||
if parts:
|
||||
if len(parts) > 1:
|
||||
summary = ", ".join(parts[:-1]) + " and " + parts[-1]
|
||||
else:
|
||||
summary = parts[0]
|
||||
self.console.print(f"\nFound {summary}.")
|
||||
else:
|
||||
self.console.print("\nNo issues found.")
|
||||
|
||||
def handle(self, *args: Any, **options: Any) -> None:
|
||||
messages = check_sanity(
|
||||
scheduled=False,
|
||||
iter_wrapper=lambda docs: self.track(
|
||||
docs,
|
||||
description="Checking documents...",
|
||||
),
|
||||
)
|
||||
self._render_results(messages)
|
||||
|
||||
@@ -75,7 +75,7 @@ class MatchingModel(ModelWithOwner):
|
||||
|
||||
is_insensitive = models.BooleanField(_("is insensitive"), default=True)
|
||||
|
||||
class Meta:
|
||||
class Meta(ModelWithOwner.Meta):
|
||||
abstract = True
|
||||
ordering = ("name",)
|
||||
constraints = [
|
||||
|
||||
@@ -1,80 +1,174 @@
|
||||
"""
|
||||
Sanity checker for the Paperless-ngx document archive.
|
||||
|
||||
Verifies that all documents have valid files, correct checksums,
|
||||
and consistent metadata. Reports orphaned files in the media directory.
|
||||
|
||||
Progress display is the caller's responsibility -- pass an ``iter_wrapper``
|
||||
to wrap the document queryset (e.g., with a progress bar). The default
|
||||
is an identity function that adds no overhead.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import logging
|
||||
import uuid
|
||||
from collections import defaultdict
|
||||
from collections.abc import Callable
|
||||
from collections.abc import Iterable
|
||||
from collections.abc import Iterator
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
from typing import Final
|
||||
from typing import TypedDict
|
||||
from typing import TypeVar
|
||||
|
||||
from celery import states
|
||||
from django.conf import settings
|
||||
from django.utils import timezone
|
||||
from tqdm import tqdm
|
||||
|
||||
from documents.models import Document
|
||||
from documents.models import PaperlessTask
|
||||
from paperless.config import GeneralConfig
|
||||
|
||||
logger = logging.getLogger("paperless.sanity_checker")
|
||||
|
||||
_T = TypeVar("_T")
|
||||
IterWrapper = Callable[[Iterable[_T]], Iterable[_T]]
|
||||
|
||||
|
||||
class MessageEntry(TypedDict):
|
||||
"""A single sanity check message with its severity level."""
|
||||
|
||||
level: int
|
||||
message: str
|
||||
|
||||
|
||||
def _identity(iterable: Iterable[_T]) -> Iterable[_T]:
|
||||
"""Pass through an iterable unchanged (default iter_wrapper)."""
|
||||
return iterable
|
||||
|
||||
|
||||
class SanityCheckMessages:
|
||||
def __init__(self) -> None:
|
||||
self._messages: dict[int, list[dict]] = defaultdict(list)
|
||||
self.has_error = False
|
||||
self.has_warning = False
|
||||
"""Collects sanity check messages grouped by document primary key.
|
||||
|
||||
def error(self, doc_pk, message) -> None:
|
||||
Messages are categorized as error, warning, or info. ``None`` is used
|
||||
as the key for messages not associated with a specific document
|
||||
(e.g., orphaned files).
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._messages: dict[int | None, list[MessageEntry]] = defaultdict(list)
|
||||
self.has_error: bool = False
|
||||
self.has_warning: bool = False
|
||||
self.has_info: bool = False
|
||||
self.document_count: int = 0
|
||||
self.document_error_count: int = 0
|
||||
self.document_warning_count: int = 0
|
||||
self.document_info_count: int = 0
|
||||
self.global_warning_count: int = 0
|
||||
|
||||
# -- Recording ----------------------------------------------------------
|
||||
|
||||
def error(self, doc_pk: int | None, message: str) -> None:
|
||||
self._messages[doc_pk].append({"level": logging.ERROR, "message": message})
|
||||
self.has_error = True
|
||||
if doc_pk is not None:
|
||||
self.document_count += 1
|
||||
self.document_error_count += 1
|
||||
|
||||
def warning(self, doc_pk, message) -> None:
|
||||
def warning(self, doc_pk: int | None, message: str) -> None:
|
||||
self._messages[doc_pk].append({"level": logging.WARNING, "message": message})
|
||||
self.has_warning = True
|
||||
|
||||
def info(self, doc_pk, message) -> None:
|
||||
if doc_pk is not None:
|
||||
self.document_count += 1
|
||||
self.document_warning_count += 1
|
||||
else:
|
||||
# This is the only type of global message we do right now
|
||||
self.global_warning_count += 1
|
||||
|
||||
def info(self, doc_pk: int | None, message: str) -> None:
|
||||
self._messages[doc_pk].append({"level": logging.INFO, "message": message})
|
||||
self.has_info = True
|
||||
|
||||
if doc_pk is not None:
|
||||
self.document_count += 1
|
||||
self.document_info_count += 1
|
||||
|
||||
# -- Iteration / query --------------------------------------------------
|
||||
|
||||
def document_pks(self) -> list[int | None]:
|
||||
"""Return all document PKs (including None for global messages)."""
|
||||
return list(self._messages.keys())
|
||||
|
||||
def iter_messages(self) -> Iterator[tuple[int | None, list[MessageEntry]]]:
|
||||
"""Iterate over (doc_pk, messages) pairs."""
|
||||
yield from self._messages.items()
|
||||
|
||||
def __getitem__(self, item: int | None) -> list[MessageEntry]:
|
||||
return self._messages[item]
|
||||
|
||||
# -- Summarize Helpers --------------------------------------------------
|
||||
|
||||
@property
|
||||
def has_global_issues(self) -> bool:
|
||||
return None in self._messages
|
||||
|
||||
@property
|
||||
def total_issue_count(self) -> int:
|
||||
"""Total number of error and warning messages across all documents and global."""
|
||||
return (
|
||||
self.document_error_count
|
||||
+ self.document_warning_count
|
||||
+ self.global_warning_count
|
||||
)
|
||||
|
||||
# -- Logging output (used by Celery task path) --------------------------
|
||||
|
||||
def log_messages(self) -> None:
|
||||
logger = logging.getLogger("paperless.sanity_checker")
|
||||
"""Write all messages to the ``paperless.sanity_checker`` logger.
|
||||
|
||||
This is the output path for headless / Celery execution.
|
||||
Management commands use Rich rendering instead.
|
||||
"""
|
||||
if len(self._messages) == 0:
|
||||
logger.info("Sanity checker detected no issues.")
|
||||
else:
|
||||
# Query once
|
||||
all_docs = Document.global_objects.all()
|
||||
return
|
||||
|
||||
for doc_pk in self._messages:
|
||||
if doc_pk is not None:
|
||||
doc = all_docs.get(pk=doc_pk)
|
||||
logger.info(
|
||||
f"Detected following issue(s) with document #{doc.pk},"
|
||||
f" titled {doc.title}",
|
||||
)
|
||||
for msg in self._messages[doc_pk]:
|
||||
logger.log(msg["level"], msg["message"])
|
||||
doc_pks = [pk for pk in self._messages if pk is not None]
|
||||
titles: dict[int, str] = {}
|
||||
if doc_pks:
|
||||
titles = dict(
|
||||
Document.global_objects.filter(pk__in=doc_pks)
|
||||
.only("pk", "title")
|
||||
.values_list("pk", "title"),
|
||||
)
|
||||
|
||||
def __len__(self):
|
||||
return len(self._messages)
|
||||
|
||||
def __getitem__(self, item):
|
||||
return self._messages[item]
|
||||
for doc_pk, entries in self._messages.items():
|
||||
if doc_pk is not None:
|
||||
title = titles.get(doc_pk, "Unknown")
|
||||
logger.info(
|
||||
"Detected following issue(s) with document #%s, titled %s",
|
||||
doc_pk,
|
||||
title,
|
||||
)
|
||||
for msg in entries:
|
||||
logger.log(msg["level"], msg["message"])
|
||||
|
||||
|
||||
class SanityCheckFailedException(Exception):
|
||||
pass
|
||||
|
||||
|
||||
def check_sanity(*, progress=False, scheduled=True) -> SanityCheckMessages:
|
||||
paperless_task = PaperlessTask.objects.create(
|
||||
task_id=uuid.uuid4(),
|
||||
type=PaperlessTask.TaskType.SCHEDULED_TASK
|
||||
if scheduled
|
||||
else PaperlessTask.TaskType.MANUAL_TASK,
|
||||
task_name=PaperlessTask.TaskName.CHECK_SANITY,
|
||||
status=states.STARTED,
|
||||
date_created=timezone.now(),
|
||||
date_started=timezone.now(),
|
||||
)
|
||||
messages = SanityCheckMessages()
|
||||
# ---------------------------------------------------------------------------
|
||||
# Internal helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _build_present_files() -> set[Path]:
|
||||
"""Collect all files in MEDIA_ROOT, excluding directories and ignorable files."""
|
||||
present_files = {
|
||||
x.resolve()
|
||||
for x in Path(settings.MEDIA_ROOT).glob("**/*")
|
||||
@@ -82,95 +176,178 @@ def check_sanity(*, progress=False, scheduled=True) -> SanityCheckMessages:
|
||||
}
|
||||
|
||||
lockfile = Path(settings.MEDIA_LOCK).resolve()
|
||||
if lockfile in present_files:
|
||||
present_files.remove(lockfile)
|
||||
present_files.discard(lockfile)
|
||||
|
||||
general_config = GeneralConfig()
|
||||
app_logo = general_config.app_logo or settings.APP_LOGO
|
||||
if app_logo:
|
||||
logo_file = Path(settings.MEDIA_ROOT / Path(app_logo.lstrip("/"))).resolve()
|
||||
if logo_file in present_files:
|
||||
present_files.remove(logo_file)
|
||||
present_files.discard(logo_file)
|
||||
|
||||
for doc in tqdm(Document.global_objects.all(), disable=not progress):
|
||||
# Check sanity of the thumbnail
|
||||
thumbnail_path: Final[Path] = Path(doc.thumbnail_path).resolve()
|
||||
if not thumbnail_path.exists() or not thumbnail_path.is_file():
|
||||
messages.error(doc.pk, "Thumbnail of document does not exist.")
|
||||
else:
|
||||
if thumbnail_path in present_files:
|
||||
present_files.remove(thumbnail_path)
|
||||
try:
|
||||
_ = thumbnail_path.read_bytes()
|
||||
except OSError as e:
|
||||
messages.error(doc.pk, f"Cannot read thumbnail file of document: {e}")
|
||||
return present_files
|
||||
|
||||
# Check sanity of the original file
|
||||
# TODO: extract method
|
||||
source_path: Final[Path] = Path(doc.source_path).resolve()
|
||||
if not source_path.exists() or not source_path.is_file():
|
||||
messages.error(doc.pk, "Original of document does not exist.")
|
||||
else:
|
||||
if source_path in present_files:
|
||||
present_files.remove(source_path)
|
||||
try:
|
||||
checksum = hashlib.md5(source_path.read_bytes()).hexdigest()
|
||||
except OSError as e:
|
||||
messages.error(doc.pk, f"Cannot read original file of document: {e}")
|
||||
else:
|
||||
if checksum != doc.checksum:
|
||||
messages.error(
|
||||
doc.pk,
|
||||
"Checksum mismatch. "
|
||||
f"Stored: {doc.checksum}, actual: {checksum}.",
|
||||
)
|
||||
|
||||
# Check sanity of the archive file.
|
||||
if doc.archive_checksum is not None and doc.archive_filename is None:
|
||||
def _check_thumbnail(
|
||||
doc: Document,
|
||||
messages: SanityCheckMessages,
|
||||
present_files: set[Path],
|
||||
) -> None:
|
||||
"""Verify the thumbnail exists and is readable."""
|
||||
thumbnail_path: Final[Path] = Path(doc.thumbnail_path).resolve()
|
||||
if not thumbnail_path.exists() or not thumbnail_path.is_file():
|
||||
messages.error(doc.pk, "Thumbnail of document does not exist.")
|
||||
return
|
||||
|
||||
present_files.discard(thumbnail_path)
|
||||
try:
|
||||
_ = thumbnail_path.read_bytes()
|
||||
except OSError as e:
|
||||
messages.error(doc.pk, f"Cannot read thumbnail file of document: {e}")
|
||||
|
||||
|
||||
def _check_original(
|
||||
doc: Document,
|
||||
messages: SanityCheckMessages,
|
||||
present_files: set[Path],
|
||||
) -> None:
|
||||
"""Verify the original file exists, is readable, and has matching checksum."""
|
||||
source_path: Final[Path] = Path(doc.source_path).resolve()
|
||||
if not source_path.exists() or not source_path.is_file():
|
||||
messages.error(doc.pk, "Original of document does not exist.")
|
||||
return
|
||||
|
||||
present_files.discard(source_path)
|
||||
try:
|
||||
checksum = hashlib.md5(source_path.read_bytes()).hexdigest()
|
||||
except OSError as e:
|
||||
messages.error(doc.pk, f"Cannot read original file of document: {e}")
|
||||
else:
|
||||
if checksum != doc.checksum:
|
||||
messages.error(
|
||||
doc.pk,
|
||||
"Document has an archive file checksum, but no archive filename.",
|
||||
f"Checksum mismatch. Stored: {doc.checksum}, actual: {checksum}.",
|
||||
)
|
||||
elif doc.archive_checksum is None and doc.archive_filename is not None:
|
||||
|
||||
|
||||
def _check_archive(
|
||||
doc: Document,
|
||||
messages: SanityCheckMessages,
|
||||
present_files: set[Path],
|
||||
) -> None:
|
||||
"""Verify archive file consistency: checksum/filename pairing and file integrity."""
|
||||
if doc.archive_checksum is not None and doc.archive_filename is None:
|
||||
messages.error(
|
||||
doc.pk,
|
||||
"Document has an archive file checksum, but no archive filename.",
|
||||
)
|
||||
elif doc.archive_checksum is None and doc.archive_filename is not None:
|
||||
messages.error(
|
||||
doc.pk,
|
||||
"Document has an archive file, but its checksum is missing.",
|
||||
)
|
||||
elif doc.has_archive_version:
|
||||
if TYPE_CHECKING:
|
||||
assert isinstance(doc.archive_path, Path)
|
||||
archive_path: Final[Path] = Path(doc.archive_path).resolve()
|
||||
if not archive_path.exists() or not archive_path.is_file():
|
||||
messages.error(doc.pk, "Archived version of document does not exist.")
|
||||
return
|
||||
|
||||
present_files.discard(archive_path)
|
||||
try:
|
||||
checksum = hashlib.md5(archive_path.read_bytes()).hexdigest()
|
||||
except OSError as e:
|
||||
messages.error(
|
||||
doc.pk,
|
||||
"Document has an archive file, but its checksum is missing.",
|
||||
f"Cannot read archive file of document: {e}",
|
||||
)
|
||||
elif doc.has_archive_version:
|
||||
archive_path: Final[Path] = Path(doc.archive_path).resolve()
|
||||
if not archive_path.exists() or not archive_path.is_file():
|
||||
messages.error(doc.pk, "Archived version of document does not exist.")
|
||||
else:
|
||||
if archive_path in present_files:
|
||||
present_files.remove(archive_path)
|
||||
try:
|
||||
checksum = hashlib.md5(archive_path.read_bytes()).hexdigest()
|
||||
except OSError as e:
|
||||
messages.error(
|
||||
doc.pk,
|
||||
f"Cannot read archive file of document : {e}",
|
||||
)
|
||||
else:
|
||||
if checksum != doc.archive_checksum:
|
||||
messages.error(
|
||||
doc.pk,
|
||||
"Checksum mismatch of archived document. "
|
||||
f"Stored: {doc.archive_checksum}, "
|
||||
f"actual: {checksum}.",
|
||||
)
|
||||
else:
|
||||
if checksum != doc.archive_checksum:
|
||||
messages.error(
|
||||
doc.pk,
|
||||
"Checksum mismatch of archived document. "
|
||||
f"Stored: {doc.archive_checksum}, actual: {checksum}.",
|
||||
)
|
||||
|
||||
# other document checks
|
||||
if not doc.content:
|
||||
messages.info(doc.pk, "Document contains no OCR data")
|
||||
|
||||
def _check_content(doc: Document, messages: SanityCheckMessages) -> None:
|
||||
"""Flag documents with no OCR content."""
|
||||
if not doc.content:
|
||||
messages.info(doc.pk, "Document contains no OCR data")
|
||||
|
||||
|
||||
def _check_document(
|
||||
doc: Document,
|
||||
messages: SanityCheckMessages,
|
||||
present_files: set[Path],
|
||||
) -> None:
|
||||
"""Run all checks for a single document."""
|
||||
_check_thumbnail(doc, messages, present_files)
|
||||
_check_original(doc, messages, present_files)
|
||||
_check_archive(doc, messages, present_files)
|
||||
_check_content(doc, messages)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public entry point
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def check_sanity(
|
||||
*,
|
||||
scheduled: bool = True,
|
||||
iter_wrapper: IterWrapper[Document] = _identity,
|
||||
) -> SanityCheckMessages:
|
||||
"""Run a full sanity check on the document archive.
|
||||
|
||||
Args:
|
||||
scheduled: Whether this is a scheduled (automatic) or manual check.
|
||||
Controls the task type recorded in the database.
|
||||
iter_wrapper: A callable that wraps the document iterable, e.g.,
|
||||
for progress bar display. Defaults to identity (no wrapping).
|
||||
|
||||
Returns:
|
||||
A SanityCheckMessages instance containing all detected issues.
|
||||
"""
|
||||
paperless_task = PaperlessTask.objects.create(
|
||||
task_id=uuid.uuid4(),
|
||||
type=(
|
||||
PaperlessTask.TaskType.SCHEDULED_TASK
|
||||
if scheduled
|
||||
else PaperlessTask.TaskType.MANUAL_TASK
|
||||
),
|
||||
task_name=PaperlessTask.TaskName.CHECK_SANITY,
|
||||
status=states.STARTED,
|
||||
date_created=timezone.now(),
|
||||
date_started=timezone.now(),
|
||||
)
|
||||
|
||||
messages = SanityCheckMessages()
|
||||
present_files = _build_present_files()
|
||||
|
||||
documents = Document.global_objects.all()
|
||||
for doc in iter_wrapper(documents):
|
||||
_check_document(doc, messages, present_files)
|
||||
|
||||
for extra_file in present_files:
|
||||
messages.warning(None, f"Orphaned file in media dir: {extra_file}")
|
||||
|
||||
paperless_task.status = states.SUCCESS if not messages.has_error else states.FAILURE
|
||||
# result is concatenated messages
|
||||
paperless_task.result = f"{len(messages)} issues found."
|
||||
if messages.has_error:
|
||||
paperless_task.result += " Check logs for details."
|
||||
if messages.total_issue_count == 0:
|
||||
paperless_task.result = "No issues found."
|
||||
else:
|
||||
parts: list[str] = []
|
||||
if messages.document_error_count:
|
||||
parts.append(f"{messages.document_error_count} document(s) with errors")
|
||||
if messages.document_warning_count:
|
||||
parts.append(f"{messages.document_warning_count} document(s) with warnings")
|
||||
if messages.global_warning_count:
|
||||
parts.append(f"{messages.global_warning_count} global warning(s)")
|
||||
paperless_task.result = ", ".join(parts) + " found."
|
||||
if messages.has_error:
|
||||
paperless_task.result += " Check logs for details."
|
||||
|
||||
paperless_task.date_done = timezone.now()
|
||||
paperless_task.save(update_fields=["status", "result", "date_done"])
|
||||
|
||||
return messages
|
||||
|
||||
@@ -4,6 +4,7 @@ import logging
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
from typing import Any
|
||||
|
||||
from celery import shared_task
|
||||
from celery import states
|
||||
@@ -32,12 +33,14 @@ from documents.file_handling import create_source_path_directory
|
||||
from documents.file_handling import delete_empty_directories
|
||||
from documents.file_handling import generate_filename
|
||||
from documents.file_handling import generate_unique_filename
|
||||
from documents.models import Correspondent
|
||||
from documents.models import CustomField
|
||||
from documents.models import CustomFieldInstance
|
||||
from documents.models import Document
|
||||
from documents.models import MatchingModel
|
||||
from documents.models import DocumentType
|
||||
from documents.models import PaperlessTask
|
||||
from documents.models import SavedView
|
||||
from documents.models import StoragePath
|
||||
from documents.models import Tag
|
||||
from documents.models import UiSettings
|
||||
from documents.models import Workflow
|
||||
@@ -81,47 +84,41 @@ def add_inbox_tags(sender, document: Document, logging_group=None, **kwargs) ->
|
||||
document.add_nested_tags(inbox_tags)
|
||||
|
||||
|
||||
def _suggestion_printer(
|
||||
stdout,
|
||||
style_func,
|
||||
suggestion_type: str,
|
||||
document: Document,
|
||||
selected: MatchingModel,
|
||||
base_url: str | None = None,
|
||||
) -> None:
|
||||
"""
|
||||
Smaller helper to reduce duplication when just outputting suggestions to the console
|
||||
"""
|
||||
doc_str = str(document)
|
||||
if base_url is not None:
|
||||
stdout.write(style_func.SUCCESS(doc_str))
|
||||
stdout.write(style_func.SUCCESS(f"{base_url}/documents/{document.pk}"))
|
||||
else:
|
||||
stdout.write(style_func.SUCCESS(f"{doc_str} [{document.pk}]"))
|
||||
stdout.write(f"Suggest {suggestion_type}: {selected}")
|
||||
|
||||
|
||||
def set_correspondent(
|
||||
sender,
|
||||
sender: object,
|
||||
document: Document,
|
||||
*,
|
||||
logging_group=None,
|
||||
logging_group: object = None,
|
||||
classifier: DocumentClassifier | None = None,
|
||||
replace=False,
|
||||
use_first=True,
|
||||
suggest=False,
|
||||
base_url=None,
|
||||
stdout=None,
|
||||
style_func=None,
|
||||
**kwargs,
|
||||
) -> None:
|
||||
replace: bool = False,
|
||||
use_first: bool = True,
|
||||
dry_run: bool = False,
|
||||
**kwargs: Any,
|
||||
) -> Correspondent | None:
|
||||
"""
|
||||
Assign a correspondent to a document based on classifier results.
|
||||
|
||||
Args:
|
||||
document: The document to classify.
|
||||
logging_group: Optional logging group for structured log output.
|
||||
classifier: The trained classifier. If None, only rule-based matching runs.
|
||||
replace: If True, overwrite an existing correspondent assignment.
|
||||
use_first: If True, pick the first match when multiple correspondents
|
||||
match. If False, skip assignment when multiple match.
|
||||
dry_run: If True, compute and return the selection without saving.
|
||||
**kwargs: Absorbed for Django signal compatibility (e.g. sender, signal).
|
||||
|
||||
Returns:
|
||||
The correspondent that was (or would be) assigned, or None if no match
|
||||
was found or assignment was skipped.
|
||||
"""
|
||||
if document.correspondent and not replace:
|
||||
return
|
||||
return None
|
||||
|
||||
potential_correspondents = matching.match_correspondents(document, classifier)
|
||||
|
||||
potential_count = len(potential_correspondents)
|
||||
selected = potential_correspondents[0] if potential_correspondents else None
|
||||
|
||||
if potential_count > 1:
|
||||
if use_first:
|
||||
logger.debug(
|
||||
@@ -135,49 +132,53 @@ def set_correspondent(
|
||||
f"not assigning any correspondent",
|
||||
extra={"group": logging_group},
|
||||
)
|
||||
return
|
||||
return None
|
||||
|
||||
if selected or replace:
|
||||
if suggest:
|
||||
_suggestion_printer(
|
||||
stdout,
|
||||
style_func,
|
||||
"correspondent",
|
||||
document,
|
||||
selected,
|
||||
base_url,
|
||||
)
|
||||
else:
|
||||
logger.info(
|
||||
f"Assigning correspondent {selected} to {document}",
|
||||
extra={"group": logging_group},
|
||||
)
|
||||
if (selected or replace) and not dry_run:
|
||||
logger.info(
|
||||
f"Assigning correspondent {selected} to {document}",
|
||||
extra={"group": logging_group},
|
||||
)
|
||||
document.correspondent = selected
|
||||
document.save(update_fields=("correspondent",))
|
||||
|
||||
document.correspondent = selected
|
||||
document.save(update_fields=("correspondent",))
|
||||
return selected
|
||||
|
||||
|
||||
def set_document_type(
|
||||
sender,
|
||||
sender: object,
|
||||
document: Document,
|
||||
*,
|
||||
logging_group=None,
|
||||
logging_group: object = None,
|
||||
classifier: DocumentClassifier | None = None,
|
||||
replace=False,
|
||||
use_first=True,
|
||||
suggest=False,
|
||||
base_url=None,
|
||||
stdout=None,
|
||||
style_func=None,
|
||||
**kwargs,
|
||||
) -> None:
|
||||
replace: bool = False,
|
||||
use_first: bool = True,
|
||||
dry_run: bool = False,
|
||||
**kwargs: Any,
|
||||
) -> DocumentType | None:
|
||||
"""
|
||||
Assign a document type to a document based on classifier results.
|
||||
|
||||
Args:
|
||||
document: The document to classify.
|
||||
logging_group: Optional logging group for structured log output.
|
||||
classifier: The trained classifier. If None, only rule-based matching runs.
|
||||
replace: If True, overwrite an existing document type assignment.
|
||||
use_first: If True, pick the first match when multiple types match.
|
||||
If False, skip assignment when multiple match.
|
||||
dry_run: If True, compute and return the selection without saving.
|
||||
**kwargs: Absorbed for Django signal compatibility (e.g. sender, signal).
|
||||
|
||||
Returns:
|
||||
The document type that was (or would be) assigned, or None if no match
|
||||
was found or assignment was skipped.
|
||||
"""
|
||||
if document.document_type and not replace:
|
||||
return
|
||||
return None
|
||||
|
||||
potential_document_type = matching.match_document_types(document, classifier)
|
||||
|
||||
potential_count = len(potential_document_type)
|
||||
selected = potential_document_type[0] if potential_document_type else None
|
||||
potential_document_types = matching.match_document_types(document, classifier)
|
||||
potential_count = len(potential_document_types)
|
||||
selected = potential_document_types[0] if potential_document_types else None
|
||||
|
||||
if potential_count > 1:
|
||||
if use_first:
|
||||
@@ -192,42 +193,64 @@ def set_document_type(
|
||||
f"not assigning any document type",
|
||||
extra={"group": logging_group},
|
||||
)
|
||||
return
|
||||
return None
|
||||
|
||||
if selected or replace:
|
||||
if suggest:
|
||||
_suggestion_printer(
|
||||
stdout,
|
||||
style_func,
|
||||
"document type",
|
||||
document,
|
||||
selected,
|
||||
base_url,
|
||||
)
|
||||
else:
|
||||
logger.info(
|
||||
f"Assigning document type {selected} to {document}",
|
||||
extra={"group": logging_group},
|
||||
)
|
||||
if (selected or replace) and not dry_run:
|
||||
logger.info(
|
||||
f"Assigning document type {selected} to {document}",
|
||||
extra={"group": logging_group},
|
||||
)
|
||||
document.document_type = selected
|
||||
document.save(update_fields=("document_type",))
|
||||
|
||||
document.document_type = selected
|
||||
document.save(update_fields=("document_type",))
|
||||
return selected
|
||||
|
||||
|
||||
def set_tags(
|
||||
sender,
|
||||
sender: object,
|
||||
document: Document,
|
||||
*,
|
||||
logging_group=None,
|
||||
logging_group: object = None,
|
||||
classifier: DocumentClassifier | None = None,
|
||||
replace=False,
|
||||
suggest=False,
|
||||
base_url=None,
|
||||
stdout=None,
|
||||
style_func=None,
|
||||
**kwargs,
|
||||
) -> None:
|
||||
replace: bool = False,
|
||||
dry_run: bool = False,
|
||||
**kwargs: Any,
|
||||
) -> tuple[set[Tag], set[Tag]]:
|
||||
"""
|
||||
Assign tags to a document based on classifier results.
|
||||
|
||||
When replace=True, existing auto-matched and rule-matched tags are removed
|
||||
before applying the new set (inbox tags and manually-added tags are preserved).
|
||||
|
||||
Args:
|
||||
document: The document to classify.
|
||||
logging_group: Optional logging group for structured log output.
|
||||
classifier: The trained classifier. If None, only rule-based matching runs.
|
||||
replace: If True, remove existing classifier-managed tags before applying
|
||||
new ones. Inbox tags and manually-added tags are always preserved.
|
||||
dry_run: If True, compute what would change without saving anything.
|
||||
**kwargs: Absorbed for Django signal compatibility (e.g. sender, signal).
|
||||
|
||||
Returns:
|
||||
A two-tuple of (tags_added, tags_removed). In non-replace mode,
|
||||
tags_removed is always an empty set. In dry_run mode, neither set
|
||||
is applied to the database.
|
||||
"""
|
||||
# Compute which tags would be removed under replace mode.
|
||||
# The filter mirrors the .delete() call below: keep inbox tags and
|
||||
# manually-added tags (match="" and not auto-matched).
|
||||
if replace:
|
||||
tags_to_remove: set[Tag] = set(
|
||||
document.tags.exclude(
|
||||
is_inbox_tag=True,
|
||||
).exclude(
|
||||
Q(match="") & ~Q(matching_algorithm=Tag.MATCH_AUTO),
|
||||
),
|
||||
)
|
||||
else:
|
||||
tags_to_remove = set()
|
||||
|
||||
if replace and not dry_run:
|
||||
Document.tags.through.objects.filter(document=document).exclude(
|
||||
Q(tag__is_inbox_tag=True),
|
||||
).exclude(
|
||||
@@ -235,65 +258,53 @@ def set_tags(
|
||||
).delete()
|
||||
|
||||
current_tags = set(document.tags.all())
|
||||
|
||||
matched_tags = matching.match_tags(document, classifier)
|
||||
tags_to_add = set(matched_tags) - current_tags
|
||||
|
||||
relevant_tags = set(matched_tags) - current_tags
|
||||
|
||||
if suggest:
|
||||
extra_tags = current_tags - set(matched_tags)
|
||||
extra_tags = [
|
||||
t for t in extra_tags if t.matching_algorithm == MatchingModel.MATCH_AUTO
|
||||
]
|
||||
if not relevant_tags and not extra_tags:
|
||||
return
|
||||
doc_str = style_func.SUCCESS(str(document))
|
||||
if base_url:
|
||||
stdout.write(doc_str)
|
||||
stdout.write(f"{base_url}/documents/{document.pk}")
|
||||
else:
|
||||
stdout.write(doc_str + style_func.SUCCESS(f" [{document.pk}]"))
|
||||
if relevant_tags:
|
||||
stdout.write("Suggest tags: " + ", ".join([t.name for t in relevant_tags]))
|
||||
if extra_tags:
|
||||
stdout.write("Extra tags: " + ", ".join([t.name for t in extra_tags]))
|
||||
else:
|
||||
if not relevant_tags:
|
||||
return
|
||||
|
||||
message = 'Tagging "{}" with "{}"'
|
||||
if tags_to_add and not dry_run:
|
||||
logger.info(
|
||||
message.format(document, ", ".join([t.name for t in relevant_tags])),
|
||||
f'Tagging "{document}" with "{", ".join(t.name for t in tags_to_add)}"',
|
||||
extra={"group": logging_group},
|
||||
)
|
||||
document.add_nested_tags(tags_to_add)
|
||||
|
||||
document.add_nested_tags(relevant_tags)
|
||||
return tags_to_add, tags_to_remove
|
||||
|
||||
|
||||
def set_storage_path(
|
||||
sender,
|
||||
sender: object,
|
||||
document: Document,
|
||||
*,
|
||||
logging_group=None,
|
||||
logging_group: object = None,
|
||||
classifier: DocumentClassifier | None = None,
|
||||
replace=False,
|
||||
use_first=True,
|
||||
suggest=False,
|
||||
base_url=None,
|
||||
stdout=None,
|
||||
style_func=None,
|
||||
**kwargs,
|
||||
) -> None:
|
||||
replace: bool = False,
|
||||
use_first: bool = True,
|
||||
dry_run: bool = False,
|
||||
**kwargs: Any,
|
||||
) -> StoragePath | None:
|
||||
"""
|
||||
Assign a storage path to a document based on classifier results.
|
||||
|
||||
Args:
|
||||
document: The document to classify.
|
||||
logging_group: Optional logging group for structured log output.
|
||||
classifier: The trained classifier. If None, only rule-based matching runs.
|
||||
replace: If True, overwrite an existing storage path assignment.
|
||||
use_first: If True, pick the first match when multiple paths match.
|
||||
If False, skip assignment when multiple match.
|
||||
dry_run: If True, compute and return the selection without saving.
|
||||
**kwargs: Absorbed for Django signal compatibility (e.g. sender, signal).
|
||||
|
||||
Returns:
|
||||
The storage path that was (or would be) assigned, or None if no match
|
||||
was found or assignment was skipped.
|
||||
"""
|
||||
if document.storage_path and not replace:
|
||||
return
|
||||
return None
|
||||
|
||||
potential_storage_path = matching.match_storage_paths(
|
||||
document,
|
||||
classifier,
|
||||
)
|
||||
|
||||
potential_count = len(potential_storage_path)
|
||||
selected = potential_storage_path[0] if potential_storage_path else None
|
||||
potential_storage_paths = matching.match_storage_paths(document, classifier)
|
||||
potential_count = len(potential_storage_paths)
|
||||
selected = potential_storage_paths[0] if potential_storage_paths else None
|
||||
|
||||
if potential_count > 1:
|
||||
if use_first:
|
||||
@@ -308,26 +319,17 @@ def set_storage_path(
|
||||
f"not assigning any storage directory",
|
||||
extra={"group": logging_group},
|
||||
)
|
||||
return
|
||||
return None
|
||||
|
||||
if selected or replace:
|
||||
if suggest:
|
||||
_suggestion_printer(
|
||||
stdout,
|
||||
style_func,
|
||||
"storage directory",
|
||||
document,
|
||||
selected,
|
||||
base_url,
|
||||
)
|
||||
else:
|
||||
logger.info(
|
||||
f"Assigning storage path {selected} to {document}",
|
||||
extra={"group": logging_group},
|
||||
)
|
||||
if (selected or replace) and not dry_run:
|
||||
logger.info(
|
||||
f"Assigning storage path {selected} to {document}",
|
||||
extra={"group": logging_group},
|
||||
)
|
||||
document.storage_path = selected
|
||||
document.save(update_fields=("storage_path",))
|
||||
|
||||
document.storage_path = selected
|
||||
document.save(update_fields=("storage_path",))
|
||||
return selected
|
||||
|
||||
|
||||
# see empty_trash in documents/tasks.py for signal handling
|
||||
|
||||
@@ -4,11 +4,13 @@ import logging
|
||||
import shutil
|
||||
import uuid
|
||||
import zipfile
|
||||
from collections.abc import Callable
|
||||
from collections.abc import Iterable
|
||||
from pathlib import Path
|
||||
from tempfile import TemporaryDirectory
|
||||
from tempfile import mkstemp
|
||||
from typing import TypeVar
|
||||
|
||||
import tqdm
|
||||
from celery import Task
|
||||
from celery import shared_task
|
||||
from celery import states
|
||||
@@ -66,11 +68,19 @@ from paperless_ai.indexing import llm_index_add_or_update_document
|
||||
from paperless_ai.indexing import llm_index_remove_document
|
||||
from paperless_ai.indexing import update_llm_index
|
||||
|
||||
_T = TypeVar("_T")
|
||||
IterWrapper = Callable[[Iterable[_T]], Iterable[_T]]
|
||||
|
||||
|
||||
if settings.AUDIT_LOG_ENABLED:
|
||||
from auditlog.models import LogEntry
|
||||
logger = logging.getLogger("paperless.tasks")
|
||||
|
||||
|
||||
def _identity(iterable: Iterable[_T]) -> Iterable[_T]:
|
||||
return iterable
|
||||
|
||||
|
||||
@shared_task
|
||||
def index_optimize() -> None:
|
||||
ix = index.open_index()
|
||||
@@ -78,13 +88,13 @@ def index_optimize() -> None:
|
||||
writer.commit(optimize=True)
|
||||
|
||||
|
||||
def index_reindex(*, progress_bar_disable=False) -> None:
|
||||
def index_reindex(*, iter_wrapper: IterWrapper[Document] = _identity) -> None:
|
||||
documents = Document.objects.all()
|
||||
|
||||
ix = index.open_index(recreate=True)
|
||||
|
||||
with AsyncWriter(ix) as writer:
|
||||
for document in tqdm.tqdm(documents, disable=progress_bar_disable):
|
||||
for document in iter_wrapper(documents):
|
||||
index.update_document(writer, document)
|
||||
|
||||
|
||||
@@ -227,20 +237,30 @@ def consume_file(
|
||||
@shared_task
|
||||
def sanity_check(*, scheduled=True, raise_on_error=True):
|
||||
messages = sanity_checker.check_sanity(scheduled=scheduled)
|
||||
|
||||
messages.log_messages()
|
||||
|
||||
if not messages.has_error and not messages.has_warning and not messages.has_info:
|
||||
return "No issues detected."
|
||||
|
||||
parts: list[str] = []
|
||||
if messages.document_error_count:
|
||||
parts.append(f"{messages.document_error_count} document(s) with errors")
|
||||
if messages.document_warning_count:
|
||||
parts.append(f"{messages.document_warning_count} document(s) with warnings")
|
||||
if messages.document_info_count:
|
||||
parts.append(f"{messages.document_info_count} document(s) with infos")
|
||||
if messages.global_warning_count:
|
||||
parts.append(f"{messages.global_warning_count} global warning(s)")
|
||||
|
||||
summary = ", ".join(parts) + " found."
|
||||
|
||||
if messages.has_error:
|
||||
message = "Sanity check exited with errors. See log."
|
||||
message = summary + " Check logs for details."
|
||||
if raise_on_error:
|
||||
raise SanityCheckFailedException(message)
|
||||
return message
|
||||
elif messages.has_warning:
|
||||
return "Sanity check exited with warnings. See log."
|
||||
elif len(messages) > 0:
|
||||
return "Sanity check exited with infos. See log."
|
||||
else:
|
||||
return "No issues detected."
|
||||
|
||||
return summary
|
||||
|
||||
|
||||
@shared_task
|
||||
@@ -265,7 +285,6 @@ def bulk_update_documents(document_ids) -> None:
|
||||
ai_config = AIConfig()
|
||||
if ai_config.llm_index_enabled:
|
||||
update_llm_index(
|
||||
progress_bar_disable=True,
|
||||
rebuild=False,
|
||||
)
|
||||
|
||||
@@ -606,7 +625,7 @@ def update_document_parent_tags(tag: Tag, new_parent: Tag) -> None:
|
||||
@shared_task
|
||||
def llmindex_index(
|
||||
*,
|
||||
progress_bar_disable=True,
|
||||
iter_wrapper: IterWrapper[Document] = _identity,
|
||||
rebuild=False,
|
||||
scheduled=True,
|
||||
auto=False,
|
||||
@@ -629,7 +648,7 @@ def llmindex_index(
|
||||
|
||||
try:
|
||||
result = update_llm_index(
|
||||
progress_bar_disable=progress_bar_disable,
|
||||
iter_wrapper=iter_wrapper,
|
||||
rebuild=rebuild,
|
||||
)
|
||||
task.status = states.SUCCESS
|
||||
|
||||
@@ -1,10 +1,96 @@
|
||||
import shutil
|
||||
import zoneinfo
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import filelock
|
||||
import pytest
|
||||
from django.contrib.auth import get_user_model
|
||||
from pytest_django.fixtures import SettingsWrapper
|
||||
from rest_framework.test import APIClient
|
||||
|
||||
from documents.tests.factories import DocumentFactory
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from documents.models import Document
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class PaperlessDirs:
|
||||
"""Standard Paperless-ngx directory layout for tests."""
|
||||
|
||||
media: Path
|
||||
originals: Path
|
||||
archive: Path
|
||||
thumbnails: Path
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def samples_dir() -> Path:
|
||||
"""Path to the shared test sample documents."""
|
||||
return Path(__file__).parent / "samples" / "documents"
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def paperless_dirs(tmp_path: Path) -> PaperlessDirs:
|
||||
"""Create and return the directory structure for testing."""
|
||||
media = tmp_path / "media"
|
||||
dirs = PaperlessDirs(
|
||||
media=media,
|
||||
originals=media / "documents" / "originals",
|
||||
archive=media / "documents" / "archive",
|
||||
thumbnails=media / "documents" / "thumbnails",
|
||||
)
|
||||
for d in (dirs.originals, dirs.archive, dirs.thumbnails):
|
||||
d.mkdir(parents=True)
|
||||
return dirs
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def _media_settings(paperless_dirs: PaperlessDirs, settings) -> None:
|
||||
"""Configure Django settings to point at temp directories."""
|
||||
settings.MEDIA_ROOT = paperless_dirs.media
|
||||
settings.ORIGINALS_DIR = paperless_dirs.originals
|
||||
settings.ARCHIVE_DIR = paperless_dirs.archive
|
||||
settings.THUMBNAIL_DIR = paperless_dirs.thumbnails
|
||||
settings.MEDIA_LOCK = paperless_dirs.media / "media.lock"
|
||||
settings.IGNORABLE_FILES = {".DS_Store", "Thumbs.db", "desktop.ini"}
|
||||
settings.APP_LOGO = ""
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def sample_doc(
|
||||
paperless_dirs: PaperlessDirs,
|
||||
_media_settings: None,
|
||||
samples_dir: Path,
|
||||
) -> "Document":
|
||||
"""Create a document with valid files and matching checksums."""
|
||||
with filelock.FileLock(paperless_dirs.media / "media.lock"):
|
||||
shutil.copy(
|
||||
samples_dir / "originals" / "0000001.pdf",
|
||||
paperless_dirs.originals / "0000001.pdf",
|
||||
)
|
||||
shutil.copy(
|
||||
samples_dir / "archive" / "0000001.pdf",
|
||||
paperless_dirs.archive / "0000001.pdf",
|
||||
)
|
||||
shutil.copy(
|
||||
samples_dir / "thumbnails" / "0000001.webp",
|
||||
paperless_dirs.thumbnails / "0000001.webp",
|
||||
)
|
||||
|
||||
return DocumentFactory(
|
||||
title="test",
|
||||
checksum="42995833e01aea9b3edee44bbfdd7ce1",
|
||||
archive_checksum="62acb0bcbfbcaa62ca6ad3668e4e404b",
|
||||
content="test content",
|
||||
pk=1,
|
||||
filename="0000001.pdf",
|
||||
mime_type="application/pdf",
|
||||
archive_filename="0000001.pdf",
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def settings_timezone(settings: SettingsWrapper) -> zoneinfo.ZoneInfo:
|
||||
@@ -28,3 +114,14 @@ def authenticated_rest_api_client(rest_api_client: APIClient):
|
||||
user = UserModel.objects.create_user(username="testuser", password="password")
|
||||
rest_api_client.force_authenticate(user=user)
|
||||
yield rest_api_client
|
||||
|
||||
|
||||
@pytest.fixture(scope="session", autouse=True)
|
||||
def faker_session_locale():
|
||||
"""Set Faker locale for reproducibility."""
|
||||
return "en_US"
|
||||
|
||||
|
||||
@pytest.fixture(scope="session", autouse=True)
|
||||
def faker_seed():
|
||||
return 12345
|
||||
|
||||
@@ -1,17 +1,67 @@
|
||||
from factory import Faker
|
||||
"""
|
||||
Factory-boy factories for documents app models.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import factory
|
||||
from factory.django import DjangoModelFactory
|
||||
|
||||
from documents.models import Correspondent
|
||||
from documents.models import Document
|
||||
from documents.models import DocumentType
|
||||
from documents.models import MatchingModel
|
||||
from documents.models import StoragePath
|
||||
from documents.models import Tag
|
||||
|
||||
|
||||
class CorrespondentFactory(DjangoModelFactory):
|
||||
class Meta:
|
||||
model = Correspondent
|
||||
|
||||
name = Faker("name")
|
||||
name = factory.Sequence(lambda n: f"{factory.Faker('company')} {n}")
|
||||
match = ""
|
||||
matching_algorithm = MatchingModel.MATCH_NONE
|
||||
|
||||
|
||||
class DocumentTypeFactory(DjangoModelFactory):
|
||||
class Meta:
|
||||
model = DocumentType
|
||||
|
||||
name = factory.Sequence(lambda n: f"{factory.Faker('bs')} {n}")
|
||||
match = ""
|
||||
matching_algorithm = MatchingModel.MATCH_NONE
|
||||
|
||||
|
||||
class TagFactory(DjangoModelFactory):
|
||||
class Meta:
|
||||
model = Tag
|
||||
|
||||
name = factory.Sequence(lambda n: f"{factory.Faker('word')} {n}")
|
||||
match = ""
|
||||
matching_algorithm = MatchingModel.MATCH_NONE
|
||||
is_inbox_tag = False
|
||||
|
||||
|
||||
class StoragePathFactory(DjangoModelFactory):
|
||||
class Meta:
|
||||
model = StoragePath
|
||||
|
||||
name = factory.Sequence(
|
||||
lambda n: f"{factory.Faker('file_path', depth=2, extension='')} {n}",
|
||||
)
|
||||
path = factory.LazyAttribute(lambda o: f"{o.name}/{{title}}")
|
||||
match = ""
|
||||
matching_algorithm = MatchingModel.MATCH_NONE
|
||||
|
||||
|
||||
class DocumentFactory(DjangoModelFactory):
|
||||
class Meta:
|
||||
model = Document
|
||||
|
||||
title = factory.Faker("sentence", nb_words=4)
|
||||
checksum = factory.Faker("md5")
|
||||
content = factory.Faker("paragraph")
|
||||
correspondent = None
|
||||
document_type = None
|
||||
storage_path = None
|
||||
|
||||
193
src/documents/tests/management/test_management_sanity_checker.py
Normal file
193
src/documents/tests/management/test_management_sanity_checker.py
Normal file
@@ -0,0 +1,193 @@
|
||||
"""Tests for the document_sanity_checker management command.
|
||||
|
||||
Verifies Rich rendering (table, panel, summary) and end-to-end CLI behavior.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from io import StringIO
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import pytest
|
||||
from django.core.management import call_command
|
||||
from rich.console import Console
|
||||
|
||||
from documents.management.commands.document_sanity_checker import Command
|
||||
from documents.sanity_checker import SanityCheckMessages
|
||||
from documents.tests.factories import DocumentFactory
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from documents.models import Document
|
||||
from documents.tests.conftest import PaperlessDirs
|
||||
|
||||
|
||||
def _render_to_string(messages: SanityCheckMessages) -> str:
|
||||
"""Render command output to a plain string for assertion."""
|
||||
buf = StringIO()
|
||||
cmd = Command()
|
||||
cmd.console = Console(file=buf, width=120, no_color=True)
|
||||
cmd._render_results(messages)
|
||||
return buf.getvalue()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Rich rendering
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestRenderResultsNoIssues:
|
||||
"""No DB access needed -- renders an empty SanityCheckMessages."""
|
||||
|
||||
def test_shows_panel(self) -> None:
|
||||
output = _render_to_string(SanityCheckMessages())
|
||||
assert "No issues detected" in output
|
||||
assert "Sanity Check" in output
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
class TestRenderResultsWithIssues:
|
||||
def test_error_row(self, sample_doc: Document) -> None:
|
||||
msgs = SanityCheckMessages()
|
||||
msgs.error(sample_doc.pk, "Original missing")
|
||||
output = _render_to_string(msgs)
|
||||
assert "Sanity Check Results" in output
|
||||
assert "ERROR" in output
|
||||
assert "Original missing" in output
|
||||
assert f"#{sample_doc.pk}" in output
|
||||
assert sample_doc.title in output
|
||||
|
||||
def test_warning_row(self, sample_doc: Document) -> None:
|
||||
msgs = SanityCheckMessages()
|
||||
msgs.warning(sample_doc.pk, "Suspicious file")
|
||||
output = _render_to_string(msgs)
|
||||
assert "WARN" in output
|
||||
assert "Suspicious file" in output
|
||||
|
||||
def test_info_row(self, sample_doc: Document) -> None:
|
||||
msgs = SanityCheckMessages()
|
||||
msgs.info(sample_doc.pk, "No OCR data")
|
||||
output = _render_to_string(msgs)
|
||||
assert "INFO" in output
|
||||
assert "No OCR data" in output
|
||||
|
||||
@pytest.mark.usefixtures("_media_settings")
|
||||
def test_global_message(self) -> None:
|
||||
msgs = SanityCheckMessages()
|
||||
msgs.warning(None, "Orphaned file: /tmp/stray.pdf")
|
||||
output = _render_to_string(msgs)
|
||||
assert "(global)" in output
|
||||
assert "Orphaned file" in output
|
||||
|
||||
def test_multiple_messages_same_doc(self, sample_doc: Document) -> None:
|
||||
msgs = SanityCheckMessages()
|
||||
msgs.error(sample_doc.pk, "Thumbnail missing")
|
||||
msgs.error(sample_doc.pk, "Checksum mismatch")
|
||||
output = _render_to_string(msgs)
|
||||
assert "Thumbnail missing" in output
|
||||
assert "Checksum mismatch" in output
|
||||
|
||||
@pytest.mark.usefixtures("_media_settings")
|
||||
def test_unknown_doc_pk(self) -> None:
|
||||
msgs = SanityCheckMessages()
|
||||
msgs.error(99999, "Ghost document")
|
||||
output = _render_to_string(msgs)
|
||||
assert "#99999" in output
|
||||
assert "Unknown" in output
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
class TestRenderResultsSummary:
|
||||
def test_errors_only(self, sample_doc: Document) -> None:
|
||||
msgs = SanityCheckMessages()
|
||||
msgs.error(sample_doc.pk, "broken")
|
||||
output = _render_to_string(msgs)
|
||||
assert "1 document(s) with" in output
|
||||
assert "errors" in output
|
||||
|
||||
def test_warnings_only(self, sample_doc: Document) -> None:
|
||||
msgs = SanityCheckMessages()
|
||||
msgs.warning(sample_doc.pk, "odd")
|
||||
output = _render_to_string(msgs)
|
||||
assert "1 document(s) with" in output
|
||||
assert "warnings" in output
|
||||
|
||||
def test_infos_only(self, sample_doc: Document) -> None:
|
||||
msgs = SanityCheckMessages()
|
||||
msgs.info(sample_doc.pk, "no OCR")
|
||||
output = _render_to_string(msgs)
|
||||
assert "1 document(s) with infos" in output
|
||||
|
||||
def test_empty_messages(self) -> None:
|
||||
msgs = SanityCheckMessages()
|
||||
output = _render_to_string(msgs)
|
||||
assert "No issues detected." in output
|
||||
|
||||
def test_document_errors_and_global_warnings(self, sample_doc: Document) -> None:
|
||||
msgs = SanityCheckMessages()
|
||||
msgs.error(sample_doc.pk, "broken")
|
||||
msgs.warning(None, "orphan")
|
||||
output = _render_to_string(msgs)
|
||||
assert "1 document(s) with" in output
|
||||
assert "errors" in output
|
||||
assert "1 global warning(s)" in output
|
||||
assert "2 document(s)" not in output
|
||||
|
||||
def test_global_warnings_only(self) -> None:
|
||||
msgs = SanityCheckMessages()
|
||||
msgs.warning(None, "extra file")
|
||||
output = _render_to_string(msgs)
|
||||
assert "1 global warning(s)" in output
|
||||
assert "document(s) with" not in output
|
||||
|
||||
def test_all_levels_combined(self, sample_doc: Document) -> None:
|
||||
msgs = SanityCheckMessages()
|
||||
msgs.error(sample_doc.pk, "broken")
|
||||
msgs.warning(sample_doc.pk, "odd")
|
||||
msgs.info(sample_doc.pk, "fyi")
|
||||
msgs.warning(None, "extra file")
|
||||
output = _render_to_string(msgs)
|
||||
assert "1 document(s) with errors" in output
|
||||
assert "1 document(s) with warnings" in output
|
||||
assert "1 document(s) with infos" in output
|
||||
assert "1 global warning(s)" in output
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# End-to-end command execution
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
@pytest.mark.management
|
||||
class TestDocumentSanityCheckerCommand:
|
||||
def test_no_issues(self, sample_doc: Document) -> None:
|
||||
out = StringIO()
|
||||
call_command("document_sanity_checker", "--no-progress-bar", stdout=out)
|
||||
assert "No issues detected" in out.getvalue()
|
||||
|
||||
def test_missing_original(self, sample_doc: Document) -> None:
|
||||
Path(sample_doc.source_path).unlink()
|
||||
out = StringIO()
|
||||
call_command("document_sanity_checker", "--no-progress-bar", stdout=out)
|
||||
output = out.getvalue()
|
||||
assert "ERROR" in output
|
||||
assert "Original of document does not exist" in output
|
||||
|
||||
@pytest.mark.usefixtures("_media_settings")
|
||||
def test_checksum_mismatch(self, paperless_dirs: PaperlessDirs) -> None:
|
||||
"""Lightweight document with zero-byte files triggers checksum mismatch."""
|
||||
doc = DocumentFactory(
|
||||
title="test",
|
||||
content="test",
|
||||
filename="test.pdf",
|
||||
checksum="abc",
|
||||
)
|
||||
Path(doc.source_path).touch()
|
||||
Path(doc.thumbnail_path).touch()
|
||||
|
||||
out = StringIO()
|
||||
call_command("document_sanity_checker", "--no-progress-bar", stdout=out)
|
||||
output = out.getvalue()
|
||||
assert "ERROR" in output
|
||||
assert "Checksum mismatch. Stored: abc, actual:" in output
|
||||
@@ -134,6 +134,7 @@ class TestRenamer(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
self.assertIsFile(doc2.archive_path)
|
||||
|
||||
|
||||
@pytest.mark.management
|
||||
class TestCreateClassifier(TestCase):
|
||||
@mock.patch(
|
||||
"documents.management.commands.document_create_classifier.train_classifier",
|
||||
@@ -144,32 +145,6 @@ class TestCreateClassifier(TestCase):
|
||||
m.assert_called_once()
|
||||
|
||||
|
||||
@pytest.mark.management
|
||||
class TestSanityChecker(DirectoriesMixin, TestCase):
|
||||
def test_no_issues(self) -> None:
|
||||
with self.assertLogs() as capture:
|
||||
call_command("document_sanity_checker")
|
||||
|
||||
self.assertEqual(len(capture.output), 1)
|
||||
self.assertIn("Sanity checker detected no issues.", capture.output[0])
|
||||
|
||||
def test_errors(self) -> None:
|
||||
doc = Document.objects.create(
|
||||
title="test",
|
||||
content="test",
|
||||
filename="test.pdf",
|
||||
checksum="abc",
|
||||
)
|
||||
Path(doc.source_path).touch()
|
||||
Path(doc.thumbnail_path).touch()
|
||||
|
||||
with self.assertLogs() as capture:
|
||||
call_command("document_sanity_checker")
|
||||
|
||||
self.assertEqual(len(capture.output), 2)
|
||||
self.assertIn("Checksum mismatch. Stored: abc, actual:", capture.output[1])
|
||||
|
||||
|
||||
@pytest.mark.management
|
||||
class TestConvertMariaDBUUID(TestCase):
|
||||
@mock.patch("django.db.connection.schema_editor")
|
||||
|
||||
@@ -288,7 +288,7 @@ class TestExportImport(
|
||||
self.assertEqual(Permission.objects.count(), num_permission_objects)
|
||||
messages = check_sanity()
|
||||
# everything is alright after the test
|
||||
self.assertEqual(len(messages), 0)
|
||||
self.assertEqual(messages.total_issue_count, 0)
|
||||
|
||||
def test_exporter_with_filename_format(self) -> None:
|
||||
shutil.rmtree(Path(self.dirs.media_dir) / "documents")
|
||||
|
||||
@@ -1,298 +1,442 @@
|
||||
"""
|
||||
Tests for the document_retagger management command.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
from django.core.management import call_command
|
||||
from django.core.management.base import CommandError
|
||||
from django.test import TestCase
|
||||
|
||||
from documents.models import Correspondent
|
||||
from documents.models import Document
|
||||
from documents.models import DocumentType
|
||||
from documents.models import MatchingModel
|
||||
from documents.models import StoragePath
|
||||
from documents.models import Tag
|
||||
from documents.tests.factories import CorrespondentFactory
|
||||
from documents.tests.factories import DocumentFactory
|
||||
from documents.tests.factories import DocumentTypeFactory
|
||||
from documents.tests.factories import StoragePathFactory
|
||||
from documents.tests.factories import TagFactory
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Module-level type aliases
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
StoragePathTuple = tuple[StoragePath, StoragePath, StoragePath]
|
||||
TagTuple = tuple[Tag, Tag, Tag, Tag, Tag]
|
||||
CorrespondentTuple = tuple[Correspondent, Correspondent]
|
||||
DocumentTypeTuple = tuple[DocumentType, DocumentType]
|
||||
DocumentTuple = tuple[Document, Document, Document, Document]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fixtures
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def storage_paths(db) -> StoragePathTuple:
|
||||
"""Three storage paths with varying match rules."""
|
||||
sp1 = StoragePathFactory(
|
||||
path="{created_data}/{title}",
|
||||
match="auto document",
|
||||
matching_algorithm=MatchingModel.MATCH_LITERAL,
|
||||
)
|
||||
sp2 = StoragePathFactory(
|
||||
path="{title}",
|
||||
match="^first|^unrelated",
|
||||
matching_algorithm=MatchingModel.MATCH_REGEX,
|
||||
)
|
||||
sp3 = StoragePathFactory(
|
||||
path="{title}",
|
||||
match="^blah",
|
||||
matching_algorithm=MatchingModel.MATCH_REGEX,
|
||||
)
|
||||
return sp1, sp2, sp3
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def tags(db) -> TagTuple:
|
||||
"""Tags covering the common matching scenarios."""
|
||||
tag_first = TagFactory(match="first", matching_algorithm=Tag.MATCH_ANY)
|
||||
tag_second = TagFactory(match="second", matching_algorithm=Tag.MATCH_ANY)
|
||||
tag_inbox = TagFactory(is_inbox_tag=True)
|
||||
tag_no_match = TagFactory()
|
||||
tag_auto = TagFactory(matching_algorithm=Tag.MATCH_AUTO)
|
||||
return tag_first, tag_second, tag_inbox, tag_no_match, tag_auto
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def correspondents(db) -> CorrespondentTuple:
|
||||
"""Two correspondents matching 'first' and 'second' content."""
|
||||
c_first = CorrespondentFactory(
|
||||
match="first",
|
||||
matching_algorithm=MatchingModel.MATCH_ANY,
|
||||
)
|
||||
c_second = CorrespondentFactory(
|
||||
match="second",
|
||||
matching_algorithm=MatchingModel.MATCH_ANY,
|
||||
)
|
||||
return c_first, c_second
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def document_types(db) -> DocumentTypeTuple:
|
||||
"""Two document types matching 'first' and 'second' content."""
|
||||
dt_first = DocumentTypeFactory(
|
||||
match="first",
|
||||
matching_algorithm=MatchingModel.MATCH_ANY,
|
||||
)
|
||||
dt_second = DocumentTypeFactory(
|
||||
match="second",
|
||||
matching_algorithm=MatchingModel.MATCH_ANY,
|
||||
)
|
||||
return dt_first, dt_second
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def documents(storage_paths: StoragePathTuple, tags: TagTuple) -> DocumentTuple:
|
||||
"""Four documents with varied content used across most retagger tests."""
|
||||
_, _, sp3 = storage_paths
|
||||
_, _, tag_inbox, tag_no_match, tag_auto = tags
|
||||
|
||||
d1 = DocumentFactory(checksum="A", title="A", content="first document")
|
||||
d2 = DocumentFactory(checksum="B", title="B", content="second document")
|
||||
d3 = DocumentFactory(
|
||||
checksum="C",
|
||||
title="C",
|
||||
content="unrelated document",
|
||||
storage_path=sp3,
|
||||
)
|
||||
d4 = DocumentFactory(checksum="D", title="D", content="auto document")
|
||||
|
||||
d3.tags.add(tag_inbox, tag_no_match)
|
||||
d4.tags.add(tag_auto)
|
||||
|
||||
return d1, d2, d3, d4
|
||||
|
||||
|
||||
def _get_docs() -> DocumentTuple:
|
||||
return (
|
||||
Document.objects.get(title="A"),
|
||||
Document.objects.get(title="B"),
|
||||
Document.objects.get(title="C"),
|
||||
Document.objects.get(title="D"),
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tag assignment
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.management
|
||||
class TestRetagger(DirectoriesMixin, TestCase):
|
||||
def make_models(self) -> None:
|
||||
self.sp1 = StoragePath.objects.create(
|
||||
name="dummy a",
|
||||
path="{created_data}/{title}",
|
||||
match="auto document",
|
||||
matching_algorithm=StoragePath.MATCH_LITERAL,
|
||||
)
|
||||
self.sp2 = StoragePath.objects.create(
|
||||
name="dummy b",
|
||||
path="{title}",
|
||||
match="^first|^unrelated",
|
||||
matching_algorithm=StoragePath.MATCH_REGEX,
|
||||
)
|
||||
|
||||
self.sp3 = StoragePath.objects.create(
|
||||
name="dummy c",
|
||||
path="{title}",
|
||||
match="^blah",
|
||||
matching_algorithm=StoragePath.MATCH_REGEX,
|
||||
)
|
||||
|
||||
self.d1 = Document.objects.create(
|
||||
checksum="A",
|
||||
title="A",
|
||||
content="first document",
|
||||
)
|
||||
self.d2 = Document.objects.create(
|
||||
checksum="B",
|
||||
title="B",
|
||||
content="second document",
|
||||
)
|
||||
self.d3 = Document.objects.create(
|
||||
checksum="C",
|
||||
title="C",
|
||||
content="unrelated document",
|
||||
storage_path=self.sp3,
|
||||
)
|
||||
self.d4 = Document.objects.create(
|
||||
checksum="D",
|
||||
title="D",
|
||||
content="auto document",
|
||||
)
|
||||
|
||||
self.tag_first = Tag.objects.create(
|
||||
name="tag1",
|
||||
match="first",
|
||||
matching_algorithm=Tag.MATCH_ANY,
|
||||
)
|
||||
self.tag_second = Tag.objects.create(
|
||||
name="tag2",
|
||||
match="second",
|
||||
matching_algorithm=Tag.MATCH_ANY,
|
||||
)
|
||||
self.tag_inbox = Tag.objects.create(name="test", is_inbox_tag=True)
|
||||
self.tag_no_match = Tag.objects.create(name="test2")
|
||||
self.tag_auto = Tag.objects.create(
|
||||
name="tagauto",
|
||||
matching_algorithm=Tag.MATCH_AUTO,
|
||||
)
|
||||
|
||||
self.d3.tags.add(self.tag_inbox)
|
||||
self.d3.tags.add(self.tag_no_match)
|
||||
self.d4.tags.add(self.tag_auto)
|
||||
|
||||
self.correspondent_first = Correspondent.objects.create(
|
||||
name="c1",
|
||||
match="first",
|
||||
matching_algorithm=Correspondent.MATCH_ANY,
|
||||
)
|
||||
self.correspondent_second = Correspondent.objects.create(
|
||||
name="c2",
|
||||
match="second",
|
||||
matching_algorithm=Correspondent.MATCH_ANY,
|
||||
)
|
||||
|
||||
self.doctype_first = DocumentType.objects.create(
|
||||
name="dt1",
|
||||
match="first",
|
||||
matching_algorithm=DocumentType.MATCH_ANY,
|
||||
)
|
||||
self.doctype_second = DocumentType.objects.create(
|
||||
name="dt2",
|
||||
match="second",
|
||||
matching_algorithm=DocumentType.MATCH_ANY,
|
||||
)
|
||||
|
||||
def get_updated_docs(self):
|
||||
return (
|
||||
Document.objects.get(title="A"),
|
||||
Document.objects.get(title="B"),
|
||||
Document.objects.get(title="C"),
|
||||
Document.objects.get(title="D"),
|
||||
)
|
||||
|
||||
def setUp(self) -> None:
|
||||
super().setUp()
|
||||
self.make_models()
|
||||
|
||||
def test_add_tags(self) -> None:
|
||||
@pytest.mark.django_db
|
||||
class TestRetaggerTags(DirectoriesMixin):
|
||||
@pytest.mark.usefixtures("documents")
|
||||
def test_add_tags(self, tags: TagTuple) -> None:
|
||||
tag_first, tag_second, *_ = tags
|
||||
call_command("document_retagger", "--tags")
|
||||
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
|
||||
d_first, d_second, d_unrelated, d_auto = _get_docs()
|
||||
|
||||
self.assertEqual(d_first.tags.count(), 1)
|
||||
self.assertEqual(d_second.tags.count(), 1)
|
||||
self.assertEqual(d_unrelated.tags.count(), 2)
|
||||
self.assertEqual(d_auto.tags.count(), 1)
|
||||
assert d_first.tags.count() == 1
|
||||
assert d_second.tags.count() == 1
|
||||
assert d_unrelated.tags.count() == 2
|
||||
assert d_auto.tags.count() == 1
|
||||
assert d_first.tags.first() == tag_first
|
||||
assert d_second.tags.first() == tag_second
|
||||
|
||||
self.assertEqual(d_first.tags.first(), self.tag_first)
|
||||
self.assertEqual(d_second.tags.first(), self.tag_second)
|
||||
|
||||
def test_add_type(self) -> None:
|
||||
call_command("document_retagger", "--document_type")
|
||||
d_first, d_second, _, _ = self.get_updated_docs()
|
||||
|
||||
self.assertEqual(d_first.document_type, self.doctype_first)
|
||||
self.assertEqual(d_second.document_type, self.doctype_second)
|
||||
|
||||
def test_add_correspondent(self) -> None:
|
||||
call_command("document_retagger", "--correspondent")
|
||||
d_first, d_second, _, _ = self.get_updated_docs()
|
||||
|
||||
self.assertEqual(d_first.correspondent, self.correspondent_first)
|
||||
self.assertEqual(d_second.correspondent, self.correspondent_second)
|
||||
|
||||
def test_overwrite_preserve_inbox(self) -> None:
|
||||
self.d1.tags.add(self.tag_second)
|
||||
def test_overwrite_removes_stale_tags_and_preserves_inbox(
|
||||
self,
|
||||
documents: DocumentTuple,
|
||||
tags: TagTuple,
|
||||
) -> None:
|
||||
d1, *_ = documents
|
||||
tag_first, tag_second, tag_inbox, tag_no_match, _ = tags
|
||||
d1.tags.add(tag_second)
|
||||
|
||||
call_command("document_retagger", "--tags", "--overwrite")
|
||||
|
||||
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
|
||||
d_first, d_second, d_unrelated, d_auto = _get_docs()
|
||||
|
||||
self.assertIsNotNone(Tag.objects.get(id=self.tag_second.id))
|
||||
assert Tag.objects.filter(id=tag_second.id).exists()
|
||||
assert list(d_first.tags.values_list("id", flat=True)) == [tag_first.id]
|
||||
assert list(d_second.tags.values_list("id", flat=True)) == [tag_second.id]
|
||||
assert set(d_unrelated.tags.values_list("id", flat=True)) == {
|
||||
tag_inbox.id,
|
||||
tag_no_match.id,
|
||||
}
|
||||
assert d_auto.tags.count() == 0
|
||||
|
||||
self.assertCountEqual(
|
||||
[tag.id for tag in d_first.tags.all()],
|
||||
[self.tag_first.id],
|
||||
@pytest.mark.usefixtures("documents")
|
||||
@pytest.mark.parametrize(
|
||||
"extra_args",
|
||||
[
|
||||
pytest.param([], id="no_base_url"),
|
||||
pytest.param(["--base-url=http://localhost"], id="with_base_url"),
|
||||
],
|
||||
)
|
||||
def test_suggest_does_not_apply_tags(self, extra_args: list[str]) -> None:
|
||||
call_command("document_retagger", "--tags", "--suggest", *extra_args)
|
||||
d_first, d_second, _, d_auto = _get_docs()
|
||||
|
||||
assert d_first.tags.count() == 0
|
||||
assert d_second.tags.count() == 0
|
||||
assert d_auto.tags.count() == 1
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Document type assignment
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.management
|
||||
@pytest.mark.django_db
|
||||
class TestRetaggerDocumentType(DirectoriesMixin):
|
||||
@pytest.mark.usefixtures("documents")
|
||||
def test_add_type(self, document_types: DocumentTypeTuple) -> None:
|
||||
dt_first, dt_second = document_types
|
||||
call_command("document_retagger", "--document_type")
|
||||
d_first, d_second, _, _ = _get_docs()
|
||||
|
||||
assert d_first.document_type == dt_first
|
||||
assert d_second.document_type == dt_second
|
||||
|
||||
@pytest.mark.usefixtures("documents", "document_types")
|
||||
@pytest.mark.parametrize(
|
||||
"extra_args",
|
||||
[
|
||||
pytest.param([], id="no_base_url"),
|
||||
pytest.param(["--base-url=http://localhost"], id="with_base_url"),
|
||||
],
|
||||
)
|
||||
def test_suggest_does_not_apply_document_type(self, extra_args: list[str]) -> None:
|
||||
call_command("document_retagger", "--document_type", "--suggest", *extra_args)
|
||||
d_first, d_second, _, _ = _get_docs()
|
||||
|
||||
assert d_first.document_type is None
|
||||
assert d_second.document_type is None
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("use_first_flag", "expects_assignment"),
|
||||
[
|
||||
pytest.param(["--use-first"], True, id="use_first_assigns_first_match"),
|
||||
pytest.param([], False, id="no_use_first_skips_ambiguous_match"),
|
||||
],
|
||||
)
|
||||
def test_use_first_with_multiple_matches(
|
||||
self,
|
||||
use_first_flag: list[str],
|
||||
*,
|
||||
expects_assignment: bool,
|
||||
) -> None:
|
||||
DocumentTypeFactory(
|
||||
match="ambiguous",
|
||||
matching_algorithm=MatchingModel.MATCH_ANY,
|
||||
)
|
||||
self.assertCountEqual(
|
||||
[tag.id for tag in d_second.tags.all()],
|
||||
[self.tag_second.id],
|
||||
DocumentTypeFactory(
|
||||
match="ambiguous",
|
||||
matching_algorithm=MatchingModel.MATCH_ANY,
|
||||
)
|
||||
self.assertCountEqual(
|
||||
[tag.id for tag in d_unrelated.tags.all()],
|
||||
[self.tag_inbox.id, self.tag_no_match.id],
|
||||
doc = DocumentFactory(content="ambiguous content")
|
||||
|
||||
call_command("document_retagger", "--document_type", *use_first_flag)
|
||||
|
||||
doc.refresh_from_db()
|
||||
assert (doc.document_type is not None) is expects_assignment
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Correspondent assignment
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.management
|
||||
@pytest.mark.django_db
|
||||
class TestRetaggerCorrespondent(DirectoriesMixin):
|
||||
@pytest.mark.usefixtures("documents")
|
||||
def test_add_correspondent(self, correspondents: CorrespondentTuple) -> None:
|
||||
c_first, c_second = correspondents
|
||||
call_command("document_retagger", "--correspondent")
|
||||
d_first, d_second, _, _ = _get_docs()
|
||||
|
||||
assert d_first.correspondent == c_first
|
||||
assert d_second.correspondent == c_second
|
||||
|
||||
@pytest.mark.usefixtures("documents", "correspondents")
|
||||
@pytest.mark.parametrize(
|
||||
"extra_args",
|
||||
[
|
||||
pytest.param([], id="no_base_url"),
|
||||
pytest.param(["--base-url=http://localhost"], id="with_base_url"),
|
||||
],
|
||||
)
|
||||
def test_suggest_does_not_apply_correspondent(self, extra_args: list[str]) -> None:
|
||||
call_command("document_retagger", "--correspondent", "--suggest", *extra_args)
|
||||
d_first, d_second, _, _ = _get_docs()
|
||||
|
||||
assert d_first.correspondent is None
|
||||
assert d_second.correspondent is None
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("use_first_flag", "expects_assignment"),
|
||||
[
|
||||
pytest.param(["--use-first"], True, id="use_first_assigns_first_match"),
|
||||
pytest.param([], False, id="no_use_first_skips_ambiguous_match"),
|
||||
],
|
||||
)
|
||||
def test_use_first_with_multiple_matches(
|
||||
self,
|
||||
use_first_flag: list[str],
|
||||
*,
|
||||
expects_assignment: bool,
|
||||
) -> None:
|
||||
CorrespondentFactory(
|
||||
match="ambiguous",
|
||||
matching_algorithm=MatchingModel.MATCH_ANY,
|
||||
)
|
||||
self.assertEqual(d_auto.tags.count(), 0)
|
||||
|
||||
def test_add_tags_suggest(self) -> None:
|
||||
call_command("document_retagger", "--tags", "--suggest")
|
||||
d_first, d_second, _, d_auto = self.get_updated_docs()
|
||||
|
||||
self.assertEqual(d_first.tags.count(), 0)
|
||||
self.assertEqual(d_second.tags.count(), 0)
|
||||
self.assertEqual(d_auto.tags.count(), 1)
|
||||
|
||||
def test_add_type_suggest(self) -> None:
|
||||
call_command("document_retagger", "--document_type", "--suggest")
|
||||
d_first, d_second, _, _ = self.get_updated_docs()
|
||||
|
||||
self.assertIsNone(d_first.document_type)
|
||||
self.assertIsNone(d_second.document_type)
|
||||
|
||||
def test_add_correspondent_suggest(self) -> None:
|
||||
call_command("document_retagger", "--correspondent", "--suggest")
|
||||
d_first, d_second, _, _ = self.get_updated_docs()
|
||||
|
||||
self.assertIsNone(d_first.correspondent)
|
||||
self.assertIsNone(d_second.correspondent)
|
||||
|
||||
def test_add_tags_suggest_url(self) -> None:
|
||||
call_command(
|
||||
"document_retagger",
|
||||
"--tags",
|
||||
"--suggest",
|
||||
"--base-url=http://localhost",
|
||||
CorrespondentFactory(
|
||||
match="ambiguous",
|
||||
matching_algorithm=MatchingModel.MATCH_ANY,
|
||||
)
|
||||
d_first, d_second, _, d_auto = self.get_updated_docs()
|
||||
doc = DocumentFactory(content="ambiguous content")
|
||||
|
||||
self.assertEqual(d_first.tags.count(), 0)
|
||||
self.assertEqual(d_second.tags.count(), 0)
|
||||
self.assertEqual(d_auto.tags.count(), 1)
|
||||
call_command("document_retagger", "--correspondent", *use_first_flag)
|
||||
|
||||
def test_add_type_suggest_url(self) -> None:
|
||||
call_command(
|
||||
"document_retagger",
|
||||
"--document_type",
|
||||
"--suggest",
|
||||
"--base-url=http://localhost",
|
||||
)
|
||||
d_first, d_second, _, _ = self.get_updated_docs()
|
||||
doc.refresh_from_db()
|
||||
assert (doc.correspondent is not None) is expects_assignment
|
||||
|
||||
self.assertIsNone(d_first.document_type)
|
||||
self.assertIsNone(d_second.document_type)
|
||||
|
||||
def test_add_correspondent_suggest_url(self) -> None:
|
||||
call_command(
|
||||
"document_retagger",
|
||||
"--correspondent",
|
||||
"--suggest",
|
||||
"--base-url=http://localhost",
|
||||
)
|
||||
d_first, d_second, _, _ = self.get_updated_docs()
|
||||
# ---------------------------------------------------------------------------
|
||||
# Storage path assignment
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
self.assertIsNone(d_first.correspondent)
|
||||
self.assertIsNone(d_second.correspondent)
|
||||
|
||||
def test_add_storage_path(self) -> None:
|
||||
@pytest.mark.management
|
||||
@pytest.mark.django_db
|
||||
class TestRetaggerStoragePath(DirectoriesMixin):
|
||||
@pytest.mark.usefixtures("documents")
|
||||
def test_add_storage_path(self, storage_paths: StoragePathTuple) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- 2 storage paths with documents which match them
|
||||
- 1 document which matches but has a storage path
|
||||
WHEN:
|
||||
- document retagger is called
|
||||
THEN:
|
||||
- Matching document's storage paths updated
|
||||
- Non-matching documents have no storage path
|
||||
- Existing storage patch left unchanged
|
||||
GIVEN documents matching various storage path rules
|
||||
WHEN document_retagger --storage_path is called
|
||||
THEN matching documents get the correct path; existing path is unchanged
|
||||
"""
|
||||
call_command(
|
||||
"document_retagger",
|
||||
"--storage_path",
|
||||
)
|
||||
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
|
||||
sp1, sp2, sp3 = storage_paths
|
||||
call_command("document_retagger", "--storage_path")
|
||||
d_first, d_second, d_unrelated, d_auto = _get_docs()
|
||||
|
||||
self.assertEqual(d_first.storage_path, self.sp2)
|
||||
self.assertEqual(d_auto.storage_path, self.sp1)
|
||||
self.assertIsNone(d_second.storage_path)
|
||||
self.assertEqual(d_unrelated.storage_path, self.sp3)
|
||||
assert d_first.storage_path == sp2
|
||||
assert d_auto.storage_path == sp1
|
||||
assert d_second.storage_path is None
|
||||
assert d_unrelated.storage_path == sp3
|
||||
|
||||
def test_overwrite_storage_path(self) -> None:
|
||||
@pytest.mark.usefixtures("documents")
|
||||
def test_overwrite_storage_path(self, storage_paths: StoragePathTuple) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- 2 storage paths with documents which match them
|
||||
- 1 document which matches but has a storage path
|
||||
WHEN:
|
||||
- document retagger is called with overwrite
|
||||
THEN:
|
||||
- Matching document's storage paths updated
|
||||
- Non-matching documents have no storage path
|
||||
- Existing storage patch overwritten
|
||||
GIVEN a document with an existing storage path that matches a different rule
|
||||
WHEN document_retagger --storage_path --overwrite is called
|
||||
THEN the existing path is replaced by the newly matched path
|
||||
"""
|
||||
sp1, sp2, _ = storage_paths
|
||||
call_command("document_retagger", "--storage_path", "--overwrite")
|
||||
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
|
||||
d_first, d_second, d_unrelated, d_auto = _get_docs()
|
||||
|
||||
self.assertEqual(d_first.storage_path, self.sp2)
|
||||
self.assertEqual(d_auto.storage_path, self.sp1)
|
||||
self.assertIsNone(d_second.storage_path)
|
||||
self.assertEqual(d_unrelated.storage_path, self.sp2)
|
||||
assert d_first.storage_path == sp2
|
||||
assert d_auto.storage_path == sp1
|
||||
assert d_second.storage_path is None
|
||||
assert d_unrelated.storage_path == sp2
|
||||
|
||||
def test_id_range_parameter(self) -> None:
|
||||
commandOutput = ""
|
||||
Document.objects.create(
|
||||
checksum="E",
|
||||
title="E",
|
||||
content="NOT the first document",
|
||||
@pytest.mark.parametrize(
|
||||
("use_first_flag", "expects_assignment"),
|
||||
[
|
||||
pytest.param(["--use-first"], True, id="use_first_assigns_first_match"),
|
||||
pytest.param([], False, id="no_use_first_skips_ambiguous_match"),
|
||||
],
|
||||
)
|
||||
def test_use_first_with_multiple_matches(
|
||||
self,
|
||||
use_first_flag: list[str],
|
||||
*,
|
||||
expects_assignment: bool,
|
||||
) -> None:
|
||||
StoragePathFactory(
|
||||
match="ambiguous",
|
||||
matching_algorithm=MatchingModel.MATCH_ANY,
|
||||
)
|
||||
call_command("document_retagger", "--tags", "--id-range", "1", "2")
|
||||
# The retagger shouldn`t apply the 'first' tag to our new document
|
||||
self.assertEqual(Document.objects.filter(tags__id=self.tag_first.id).count(), 1)
|
||||
StoragePathFactory(
|
||||
match="ambiguous",
|
||||
matching_algorithm=MatchingModel.MATCH_ANY,
|
||||
)
|
||||
doc = DocumentFactory(content="ambiguous content")
|
||||
|
||||
try:
|
||||
commandOutput = call_command("document_retagger", "--tags", "--id-range")
|
||||
except CommandError:
|
||||
# Just ignore the error
|
||||
None
|
||||
self.assertIn(commandOutput, "Error: argument --id-range: expected 2 arguments")
|
||||
call_command("document_retagger", "--storage_path", *use_first_flag)
|
||||
|
||||
try:
|
||||
commandOutput = call_command(
|
||||
"document_retagger",
|
||||
"--tags",
|
||||
"--id-range",
|
||||
"a",
|
||||
"b",
|
||||
)
|
||||
except CommandError:
|
||||
# Just ignore the error
|
||||
None
|
||||
self.assertIn(commandOutput, "error: argument --id-range: invalid int value:")
|
||||
doc.refresh_from_db()
|
||||
assert (doc.storage_path is not None) is expects_assignment
|
||||
|
||||
call_command("document_retagger", "--tags", "--id-range", "1", "9999")
|
||||
# Now we should have 2 documents
|
||||
self.assertEqual(Document.objects.filter(tags__id=self.tag_first.id).count(), 2)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# ID range filtering
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.management
|
||||
@pytest.mark.django_db
|
||||
class TestRetaggerIdRange(DirectoriesMixin):
|
||||
@pytest.mark.usefixtures("documents")
|
||||
@pytest.mark.parametrize(
|
||||
("id_range_args", "expected_count"),
|
||||
[
|
||||
pytest.param(["1", "2"], 1, id="narrow_range_limits_scope"),
|
||||
pytest.param(["1", "9999"], 2, id="wide_range_tags_all_matches"),
|
||||
],
|
||||
)
|
||||
def test_id_range_limits_scope(
|
||||
self,
|
||||
tags: TagTuple,
|
||||
id_range_args: list[str],
|
||||
expected_count: int,
|
||||
) -> None:
|
||||
DocumentFactory(content="NOT the first document")
|
||||
call_command("document_retagger", "--tags", "--id-range", *id_range_args)
|
||||
tag_first, *_ = tags
|
||||
assert Document.objects.filter(tags__id=tag_first.id).count() == expected_count
|
||||
|
||||
@pytest.mark.usefixtures("documents")
|
||||
@pytest.mark.parametrize(
|
||||
"args",
|
||||
[
|
||||
pytest.param(["--tags", "--id-range"], id="missing_both_values"),
|
||||
pytest.param(["--tags", "--id-range", "a", "b"], id="non_integer_values"),
|
||||
],
|
||||
)
|
||||
def test_id_range_invalid_arguments_raise(self, args: list[str]) -> None:
|
||||
with pytest.raises((CommandError, SystemExit)):
|
||||
call_command("document_retagger", *args)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Edge cases
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.management
|
||||
@pytest.mark.django_db
|
||||
class TestRetaggerEdgeCases(DirectoriesMixin):
|
||||
@pytest.mark.usefixtures("documents")
|
||||
def test_no_targets_exits_cleanly(self) -> None:
|
||||
"""Calling the retagger with no classifier targets should not raise."""
|
||||
call_command("document_retagger")
|
||||
|
||||
@pytest.mark.usefixtures("documents")
|
||||
def test_inbox_only_skips_non_inbox_documents(self) -> None:
|
||||
"""--inbox-only must restrict processing to documents with an inbox tag."""
|
||||
call_command("document_retagger", "--tags", "--inbox-only")
|
||||
d_first, _, d_unrelated, _ = _get_docs()
|
||||
|
||||
assert d_first.tags.count() == 0
|
||||
assert d_unrelated.tags.count() == 2
|
||||
|
||||
@@ -1,192 +1,295 @@
|
||||
import logging
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
"""Tests for the sanity checker module.
|
||||
|
||||
import filelock
|
||||
from django.conf import settings
|
||||
from django.test import TestCase
|
||||
from django.test import override_settings
|
||||
Tests exercise ``check_sanity`` as a whole, verifying document validation,
|
||||
orphan detection, task recording, and the iter_wrapper contract.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import pytest
|
||||
|
||||
from documents.models import Document
|
||||
from documents.models import PaperlessTask
|
||||
from documents.sanity_checker import check_sanity
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Iterable
|
||||
|
||||
from documents.tests.conftest import PaperlessDirs
|
||||
|
||||
|
||||
class TestSanityCheck(DirectoriesMixin, TestCase):
|
||||
def make_test_data(self):
|
||||
with filelock.FileLock(settings.MEDIA_LOCK):
|
||||
# just make sure that the lockfile is present.
|
||||
shutil.copy(
|
||||
(
|
||||
Path(__file__).parent
|
||||
/ "samples"
|
||||
/ "documents"
|
||||
/ "originals"
|
||||
/ "0000001.pdf"
|
||||
),
|
||||
Path(self.dirs.originals_dir) / "0000001.pdf",
|
||||
)
|
||||
shutil.copy(
|
||||
(
|
||||
Path(__file__).parent
|
||||
/ "samples"
|
||||
/ "documents"
|
||||
/ "archive"
|
||||
/ "0000001.pdf"
|
||||
),
|
||||
Path(self.dirs.archive_dir) / "0000001.pdf",
|
||||
)
|
||||
shutil.copy(
|
||||
(
|
||||
Path(__file__).parent
|
||||
/ "samples"
|
||||
/ "documents"
|
||||
/ "thumbnails"
|
||||
/ "0000001.webp"
|
||||
),
|
||||
Path(self.dirs.thumbnail_dir) / "0000001.webp",
|
||||
)
|
||||
@pytest.mark.django_db
|
||||
class TestCheckSanityNoDocuments:
|
||||
"""Sanity checks against an empty archive."""
|
||||
|
||||
return Document.objects.create(
|
||||
title="test",
|
||||
checksum="42995833e01aea9b3edee44bbfdd7ce1",
|
||||
archive_checksum="62acb0bcbfbcaa62ca6ad3668e4e404b",
|
||||
content="test",
|
||||
pk=1,
|
||||
filename="0000001.pdf",
|
||||
mime_type="application/pdf",
|
||||
archive_filename="0000001.pdf",
|
||||
)
|
||||
|
||||
def assertSanityError(self, doc: Document, messageRegex) -> None:
|
||||
@pytest.mark.usefixtures("_media_settings")
|
||||
def test_no_documents(self) -> None:
|
||||
messages = check_sanity()
|
||||
self.assertTrue(messages.has_error)
|
||||
with self.assertLogs() as capture:
|
||||
assert not messages.has_error
|
||||
assert not messages.has_warning
|
||||
assert messages.total_issue_count == 0
|
||||
|
||||
@pytest.mark.usefixtures("_media_settings")
|
||||
def test_no_issues_logs_clean(self, caplog: pytest.LogCaptureFixture) -> None:
|
||||
messages = check_sanity()
|
||||
with caplog.at_level(logging.INFO, logger="paperless.sanity_checker"):
|
||||
messages.log_messages()
|
||||
self.assertEqual(
|
||||
capture.records[0].message,
|
||||
f"Detected following issue(s) with document #{doc.pk}, titled {doc.title}",
|
||||
)
|
||||
self.assertRegex(capture.records[1].message, messageRegex)
|
||||
assert "Sanity checker detected no issues." in caplog.text
|
||||
|
||||
def test_no_issues(self) -> None:
|
||||
self.make_test_data()
|
||||
|
||||
@pytest.mark.django_db
|
||||
class TestCheckSanityHealthyDocument:
|
||||
def test_no_errors(self, sample_doc: Document) -> None:
|
||||
messages = check_sanity()
|
||||
self.assertFalse(messages.has_error)
|
||||
self.assertFalse(messages.has_warning)
|
||||
with self.assertLogs() as capture:
|
||||
messages.log_messages()
|
||||
self.assertEqual(len(capture.output), 1)
|
||||
self.assertEqual(capture.records[0].levelno, logging.INFO)
|
||||
self.assertEqual(
|
||||
capture.records[0].message,
|
||||
"Sanity checker detected no issues.",
|
||||
)
|
||||
assert not messages.has_error
|
||||
assert not messages.has_warning
|
||||
assert messages.total_issue_count == 0
|
||||
|
||||
def test_no_docs(self) -> None:
|
||||
self.assertEqual(len(check_sanity()), 0)
|
||||
|
||||
def test_success(self) -> None:
|
||||
self.make_test_data()
|
||||
self.assertEqual(len(check_sanity()), 0)
|
||||
|
||||
def test_no_thumbnail(self) -> None:
|
||||
doc = self.make_test_data()
|
||||
Path(doc.thumbnail_path).unlink()
|
||||
self.assertSanityError(doc, "Thumbnail of document does not exist")
|
||||
|
||||
def test_thumbnail_no_access(self) -> None:
|
||||
doc = self.make_test_data()
|
||||
Path(doc.thumbnail_path).chmod(0o000)
|
||||
self.assertSanityError(doc, "Cannot read thumbnail file of document")
|
||||
Path(doc.thumbnail_path).chmod(0o777)
|
||||
|
||||
def test_no_original(self) -> None:
|
||||
doc = self.make_test_data()
|
||||
Path(doc.source_path).unlink()
|
||||
self.assertSanityError(doc, "Original of document does not exist.")
|
||||
|
||||
def test_original_no_access(self) -> None:
|
||||
doc = self.make_test_data()
|
||||
Path(doc.source_path).chmod(0o000)
|
||||
self.assertSanityError(doc, "Cannot read original file of document")
|
||||
Path(doc.source_path).chmod(0o777)
|
||||
|
||||
def test_original_checksum_mismatch(self) -> None:
|
||||
doc = self.make_test_data()
|
||||
doc.checksum = "WOW"
|
||||
doc.save()
|
||||
self.assertSanityError(doc, "Checksum mismatch. Stored: WOW, actual: ")
|
||||
|
||||
def test_no_archive(self) -> None:
|
||||
doc = self.make_test_data()
|
||||
Path(doc.archive_path).unlink()
|
||||
self.assertSanityError(doc, "Archived version of document does not exist.")
|
||||
|
||||
def test_archive_no_access(self) -> None:
|
||||
doc = self.make_test_data()
|
||||
Path(doc.archive_path).chmod(0o000)
|
||||
self.assertSanityError(doc, "Cannot read archive file of document")
|
||||
Path(doc.archive_path).chmod(0o777)
|
||||
|
||||
def test_archive_checksum_mismatch(self) -> None:
|
||||
doc = self.make_test_data()
|
||||
doc.archive_checksum = "WOW"
|
||||
doc.save()
|
||||
self.assertSanityError(doc, "Checksum mismatch of archived document")
|
||||
|
||||
def test_empty_content(self) -> None:
|
||||
doc = self.make_test_data()
|
||||
doc.content = ""
|
||||
doc.save()
|
||||
@pytest.mark.django_db
|
||||
class TestCheckSanityThumbnail:
|
||||
def test_missing(self, sample_doc: Document) -> None:
|
||||
Path(sample_doc.thumbnail_path).unlink()
|
||||
messages = check_sanity()
|
||||
self.assertFalse(messages.has_error)
|
||||
self.assertFalse(messages.has_warning)
|
||||
self.assertEqual(len(messages), 1)
|
||||
self.assertRegex(
|
||||
messages[doc.pk][0]["message"],
|
||||
"Document contains no OCR data",
|
||||
assert messages.has_error
|
||||
assert any(
|
||||
"Thumbnail of document does not exist" in m["message"]
|
||||
for m in messages[sample_doc.pk]
|
||||
)
|
||||
|
||||
def test_orphaned_file(self) -> None:
|
||||
self.make_test_data()
|
||||
Path(self.dirs.originals_dir, "orphaned").touch()
|
||||
def test_unreadable(self, sample_doc: Document) -> None:
|
||||
thumb = Path(sample_doc.thumbnail_path)
|
||||
thumb.chmod(0o000)
|
||||
try:
|
||||
messages = check_sanity()
|
||||
assert messages.has_error
|
||||
assert any(
|
||||
"Cannot read thumbnail" in m["message"] for m in messages[sample_doc.pk]
|
||||
)
|
||||
finally:
|
||||
thumb.chmod(0o644)
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
class TestCheckSanityOriginal:
|
||||
def test_missing(self, sample_doc: Document) -> None:
|
||||
Path(sample_doc.source_path).unlink()
|
||||
messages = check_sanity()
|
||||
self.assertTrue(messages.has_warning)
|
||||
self.assertRegex(
|
||||
messages._messages[None][0]["message"],
|
||||
"Orphaned file in media dir",
|
||||
assert messages.has_error
|
||||
assert any(
|
||||
"Original of document does not exist" in m["message"]
|
||||
for m in messages[sample_doc.pk]
|
||||
)
|
||||
|
||||
@override_settings(
|
||||
APP_LOGO="logo/logo.png",
|
||||
def test_checksum_mismatch(self, sample_doc: Document) -> None:
|
||||
sample_doc.checksum = "badhash"
|
||||
sample_doc.save()
|
||||
messages = check_sanity()
|
||||
assert messages.has_error
|
||||
assert any(
|
||||
"Checksum mismatch" in m["message"] and "badhash" in m["message"]
|
||||
for m in messages[sample_doc.pk]
|
||||
)
|
||||
|
||||
def test_unreadable(self, sample_doc: Document) -> None:
|
||||
src = Path(sample_doc.source_path)
|
||||
src.chmod(0o000)
|
||||
try:
|
||||
messages = check_sanity()
|
||||
assert messages.has_error
|
||||
assert any(
|
||||
"Cannot read original" in m["message"] for m in messages[sample_doc.pk]
|
||||
)
|
||||
finally:
|
||||
src.chmod(0o644)
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
class TestCheckSanityArchive:
|
||||
def test_checksum_without_filename(self, sample_doc: Document) -> None:
|
||||
sample_doc.archive_filename = None
|
||||
sample_doc.save()
|
||||
messages = check_sanity()
|
||||
assert messages.has_error
|
||||
assert any(
|
||||
"checksum, but no archive filename" in m["message"]
|
||||
for m in messages[sample_doc.pk]
|
||||
)
|
||||
|
||||
def test_filename_without_checksum(self, sample_doc: Document) -> None:
|
||||
sample_doc.archive_checksum = None
|
||||
sample_doc.save()
|
||||
messages = check_sanity()
|
||||
assert messages.has_error
|
||||
assert any(
|
||||
"checksum is missing" in m["message"] for m in messages[sample_doc.pk]
|
||||
)
|
||||
|
||||
def test_missing_file(self, sample_doc: Document) -> None:
|
||||
Path(sample_doc.archive_path).unlink()
|
||||
messages = check_sanity()
|
||||
assert messages.has_error
|
||||
assert any(
|
||||
"Archived version of document does not exist" in m["message"]
|
||||
for m in messages[sample_doc.pk]
|
||||
)
|
||||
|
||||
def test_checksum_mismatch(self, sample_doc: Document) -> None:
|
||||
sample_doc.archive_checksum = "wronghash"
|
||||
sample_doc.save()
|
||||
messages = check_sanity()
|
||||
assert messages.has_error
|
||||
assert any(
|
||||
"Checksum mismatch of archived document" in m["message"]
|
||||
for m in messages[sample_doc.pk]
|
||||
)
|
||||
|
||||
def test_unreadable(self, sample_doc: Document) -> None:
|
||||
archive = Path(sample_doc.archive_path)
|
||||
archive.chmod(0o000)
|
||||
try:
|
||||
messages = check_sanity()
|
||||
assert messages.has_error
|
||||
assert any(
|
||||
"Cannot read archive" in m["message"] for m in messages[sample_doc.pk]
|
||||
)
|
||||
finally:
|
||||
archive.chmod(0o644)
|
||||
|
||||
def test_no_archive_at_all(self, sample_doc: Document) -> None:
|
||||
"""Document with neither archive checksum nor filename is valid."""
|
||||
Path(sample_doc.archive_path).unlink()
|
||||
sample_doc.archive_checksum = None
|
||||
sample_doc.archive_filename = None
|
||||
sample_doc.save()
|
||||
messages = check_sanity()
|
||||
assert not messages.has_error
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
class TestCheckSanityContent:
|
||||
@pytest.mark.parametrize(
|
||||
"content",
|
||||
[
|
||||
pytest.param("", id="empty-string"),
|
||||
],
|
||||
)
|
||||
def test_ignore_logo(self) -> None:
|
||||
self.make_test_data()
|
||||
logo_dir = Path(self.dirs.media_dir, "logo")
|
||||
logo_dir.mkdir(parents=True, exist_ok=True)
|
||||
Path(self.dirs.media_dir, "logo", "logo.png").touch()
|
||||
def test_no_content(self, sample_doc: Document, content: str) -> None:
|
||||
sample_doc.content = content
|
||||
sample_doc.save()
|
||||
messages = check_sanity()
|
||||
self.assertFalse(messages.has_warning)
|
||||
assert not messages.has_error
|
||||
assert not messages.has_warning
|
||||
assert any("no OCR data" in m["message"] for m in messages[sample_doc.pk])
|
||||
|
||||
def test_ignore_ignorable_files(self) -> None:
|
||||
self.make_test_data()
|
||||
Path(self.dirs.media_dir, ".DS_Store").touch()
|
||||
Path(self.dirs.media_dir, "desktop.ini").touch()
|
||||
|
||||
@pytest.mark.django_db
|
||||
class TestCheckSanityOrphans:
|
||||
def test_orphaned_file(
|
||||
self,
|
||||
sample_doc: Document,
|
||||
paperless_dirs: PaperlessDirs,
|
||||
) -> None:
|
||||
(paperless_dirs.originals / "orphan.pdf").touch()
|
||||
messages = check_sanity()
|
||||
self.assertFalse(messages.has_warning)
|
||||
assert messages.has_warning
|
||||
assert any("Orphaned file" in m["message"] for m in messages[None])
|
||||
|
||||
def test_archive_filename_no_checksum(self) -> None:
|
||||
doc = self.make_test_data()
|
||||
doc.archive_checksum = None
|
||||
doc.save()
|
||||
self.assertSanityError(doc, "has an archive file, but its checksum is missing.")
|
||||
@pytest.mark.usefixtures("_media_settings")
|
||||
def test_ignorable_files_not_flagged(
|
||||
self,
|
||||
paperless_dirs: PaperlessDirs,
|
||||
) -> None:
|
||||
(paperless_dirs.media / ".DS_Store").touch()
|
||||
(paperless_dirs.media / "desktop.ini").touch()
|
||||
messages = check_sanity()
|
||||
assert not messages.has_warning
|
||||
|
||||
def test_archive_checksum_no_filename(self) -> None:
|
||||
doc = self.make_test_data()
|
||||
doc.archive_filename = None
|
||||
doc.save()
|
||||
self.assertSanityError(
|
||||
doc,
|
||||
"has an archive file checksum, but no archive filename.",
|
||||
)
|
||||
|
||||
@pytest.mark.django_db
|
||||
class TestCheckSanityIterWrapper:
|
||||
def test_wrapper_receives_documents(self, sample_doc: Document) -> None:
|
||||
seen: list[Document] = []
|
||||
|
||||
def tracking(iterable: Iterable[Document]) -> Iterable[Document]:
|
||||
for item in iterable:
|
||||
seen.append(item)
|
||||
yield item
|
||||
|
||||
check_sanity(iter_wrapper=tracking)
|
||||
assert len(seen) == 1
|
||||
assert seen[0].pk == sample_doc.pk
|
||||
|
||||
def test_default_works_without_wrapper(self, sample_doc: Document) -> None:
|
||||
messages = check_sanity()
|
||||
assert not messages.has_error
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
class TestCheckSanityTaskRecording:
|
||||
@pytest.mark.parametrize(
|
||||
("expected_type", "scheduled"),
|
||||
[
|
||||
pytest.param(PaperlessTask.TaskType.SCHEDULED_TASK, True, id="scheduled"),
|
||||
pytest.param(PaperlessTask.TaskType.MANUAL_TASK, False, id="manual"),
|
||||
],
|
||||
)
|
||||
@pytest.mark.usefixtures("_media_settings")
|
||||
def test_task_type(self, expected_type: str, *, scheduled: bool) -> None:
|
||||
check_sanity(scheduled=scheduled)
|
||||
task = PaperlessTask.objects.latest("date_created")
|
||||
assert task.task_name == PaperlessTask.TaskName.CHECK_SANITY
|
||||
assert task.type == expected_type
|
||||
|
||||
def test_success_status(self, sample_doc: Document) -> None:
|
||||
check_sanity()
|
||||
task = PaperlessTask.objects.latest("date_created")
|
||||
assert task.status == "SUCCESS"
|
||||
|
||||
def test_failure_status(self, sample_doc: Document) -> None:
|
||||
Path(sample_doc.source_path).unlink()
|
||||
check_sanity()
|
||||
task = PaperlessTask.objects.latest("date_created")
|
||||
assert task.status == "FAILURE"
|
||||
assert "Check logs for details" in task.result
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
class TestCheckSanityLogMessages:
|
||||
def test_logs_doc_issues(
|
||||
self,
|
||||
sample_doc: Document,
|
||||
caplog: pytest.LogCaptureFixture,
|
||||
) -> None:
|
||||
Path(sample_doc.source_path).unlink()
|
||||
messages = check_sanity()
|
||||
with caplog.at_level(logging.INFO, logger="paperless.sanity_checker"):
|
||||
messages.log_messages()
|
||||
assert f"document #{sample_doc.pk}" in caplog.text
|
||||
assert "Original of document does not exist" in caplog.text
|
||||
|
||||
def test_logs_global_issues(
|
||||
self,
|
||||
sample_doc: Document,
|
||||
paperless_dirs: PaperlessDirs,
|
||||
caplog: pytest.LogCaptureFixture,
|
||||
) -> None:
|
||||
(paperless_dirs.originals / "orphan.pdf").touch()
|
||||
messages = check_sanity()
|
||||
with caplog.at_level(logging.WARNING, logger="paperless.sanity_checker"):
|
||||
messages.log_messages()
|
||||
assert "Orphaned file" in caplog.text
|
||||
|
||||
@pytest.mark.usefixtures("_media_settings")
|
||||
def test_logs_unknown_doc_pk(self, caplog: pytest.LogCaptureFixture) -> None:
|
||||
"""A doc PK not in the DB logs 'Unknown' as the title."""
|
||||
messages = check_sanity()
|
||||
messages.error(99999, "Ghost document")
|
||||
with caplog.at_level(logging.INFO, logger="paperless.sanity_checker"):
|
||||
messages.log_messages()
|
||||
assert "#99999" in caplog.text
|
||||
assert "Unknown" in caplog.text
|
||||
|
||||
@@ -3,6 +3,7 @@ from datetime import timedelta
|
||||
from pathlib import Path
|
||||
from unittest import mock
|
||||
|
||||
import pytest
|
||||
from celery import states
|
||||
from django.conf import settings
|
||||
from django.test import TestCase
|
||||
@@ -105,55 +106,83 @@ class TestClassifier(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
self.assertNotEqual(mtime2, mtime3)
|
||||
|
||||
|
||||
class TestSanityCheck(DirectoriesMixin, TestCase):
|
||||
@mock.patch("documents.tasks.sanity_checker.check_sanity")
|
||||
def test_sanity_check_success(self, m) -> None:
|
||||
m.return_value = SanityCheckMessages()
|
||||
self.assertEqual(tasks.sanity_check(), "No issues detected.")
|
||||
m.assert_called_once()
|
||||
@pytest.mark.django_db
|
||||
class TestSanityCheck:
|
||||
@pytest.fixture
|
||||
def mock_check_sanity(self, mocker) -> mock.MagicMock:
|
||||
return mocker.patch("documents.tasks.sanity_checker.check_sanity")
|
||||
|
||||
@mock.patch("documents.tasks.sanity_checker.check_sanity")
|
||||
def test_sanity_check_error(self, m) -> None:
|
||||
messages = SanityCheckMessages()
|
||||
messages.error(None, "Some error")
|
||||
m.return_value = messages
|
||||
self.assertRaises(SanityCheckFailedException, tasks.sanity_check)
|
||||
m.assert_called_once()
|
||||
def test_sanity_check_success(self, mock_check_sanity: mock.MagicMock) -> None:
|
||||
mock_check_sanity.return_value = SanityCheckMessages()
|
||||
assert tasks.sanity_check() == "No issues detected."
|
||||
mock_check_sanity.assert_called_once()
|
||||
|
||||
@mock.patch("documents.tasks.sanity_checker.check_sanity")
|
||||
def test_sanity_check_error_no_raise(self, m) -> None:
|
||||
def test_sanity_check_error_raises(
|
||||
self,
|
||||
mock_check_sanity: mock.MagicMock,
|
||||
sample_doc: Document,
|
||||
) -> None:
|
||||
messages = SanityCheckMessages()
|
||||
messages.error(None, "Some error")
|
||||
m.return_value = messages
|
||||
# No exception should be raised
|
||||
messages.error(sample_doc.pk, "some error")
|
||||
mock_check_sanity.return_value = messages
|
||||
with pytest.raises(SanityCheckFailedException):
|
||||
tasks.sanity_check()
|
||||
mock_check_sanity.assert_called_once()
|
||||
|
||||
def test_sanity_check_error_no_raise(
|
||||
self,
|
||||
mock_check_sanity: mock.MagicMock,
|
||||
sample_doc: Document,
|
||||
) -> None:
|
||||
messages = SanityCheckMessages()
|
||||
messages.error(sample_doc.pk, "some error")
|
||||
mock_check_sanity.return_value = messages
|
||||
result = tasks.sanity_check(raise_on_error=False)
|
||||
self.assertEqual(
|
||||
result,
|
||||
"Sanity check exited with errors. See log.",
|
||||
)
|
||||
m.assert_called_once()
|
||||
assert "1 document(s) with errors" in result
|
||||
assert "Check logs for details." in result
|
||||
mock_check_sanity.assert_called_once()
|
||||
|
||||
@mock.patch("documents.tasks.sanity_checker.check_sanity")
|
||||
def test_sanity_check_warning(self, m) -> None:
|
||||
def test_sanity_check_warning_only(
|
||||
self,
|
||||
mock_check_sanity: mock.MagicMock,
|
||||
) -> None:
|
||||
messages = SanityCheckMessages()
|
||||
messages.warning(None, "Some warning")
|
||||
m.return_value = messages
|
||||
self.assertEqual(
|
||||
tasks.sanity_check(),
|
||||
"Sanity check exited with warnings. See log.",
|
||||
)
|
||||
m.assert_called_once()
|
||||
messages.warning(None, "extra file")
|
||||
mock_check_sanity.return_value = messages
|
||||
result = tasks.sanity_check()
|
||||
assert result == "1 global warning(s) found."
|
||||
mock_check_sanity.assert_called_once()
|
||||
|
||||
@mock.patch("documents.tasks.sanity_checker.check_sanity")
|
||||
def test_sanity_check_info(self, m) -> None:
|
||||
def test_sanity_check_info_only(
|
||||
self,
|
||||
mock_check_sanity: mock.MagicMock,
|
||||
sample_doc: Document,
|
||||
) -> None:
|
||||
messages = SanityCheckMessages()
|
||||
messages.info(None, "Some info")
|
||||
m.return_value = messages
|
||||
self.assertEqual(
|
||||
tasks.sanity_check(),
|
||||
"Sanity check exited with infos. See log.",
|
||||
)
|
||||
m.assert_called_once()
|
||||
messages.info(sample_doc.pk, "some info")
|
||||
mock_check_sanity.return_value = messages
|
||||
result = tasks.sanity_check()
|
||||
assert result == "1 document(s) with infos found."
|
||||
mock_check_sanity.assert_called_once()
|
||||
|
||||
def test_sanity_check_errors_warnings_and_infos(
|
||||
self,
|
||||
mock_check_sanity: mock.MagicMock,
|
||||
sample_doc: Document,
|
||||
) -> None:
|
||||
messages = SanityCheckMessages()
|
||||
messages.error(sample_doc.pk, "broken")
|
||||
messages.warning(sample_doc.pk, "odd")
|
||||
messages.info(sample_doc.pk, "fyi")
|
||||
messages.warning(None, "extra file")
|
||||
mock_check_sanity.return_value = messages
|
||||
result = tasks.sanity_check(raise_on_error=False)
|
||||
assert "1 document(s) with errors" in result
|
||||
assert "1 document(s) with warnings" in result
|
||||
assert "1 document(s) with infos" in result
|
||||
assert "1 global warning(s)" in result
|
||||
assert "Check logs for details." in result
|
||||
mock_check_sanity.assert_called_once()
|
||||
|
||||
|
||||
class TestBulkUpdate(DirectoriesMixin, TestCase):
|
||||
|
||||
@@ -1,107 +1,100 @@
|
||||
from unittest import mock
|
||||
import logging
|
||||
|
||||
import pytest
|
||||
from allauth.account.adapter import get_adapter
|
||||
from allauth.core import context
|
||||
from allauth.socialaccount.adapter import get_adapter as get_social_adapter
|
||||
from django.conf import settings
|
||||
from django.contrib.auth.models import AnonymousUser
|
||||
from django.contrib.auth.models import Group
|
||||
from django.contrib.auth.models import User
|
||||
from django.forms import ValidationError
|
||||
from django.http import HttpRequest
|
||||
from django.test import TestCase
|
||||
from django.test import override_settings
|
||||
from django.urls import reverse
|
||||
from pytest_django.fixtures import SettingsWrapper
|
||||
from pytest_mock import MockerFixture
|
||||
from rest_framework.authtoken.models import Token
|
||||
|
||||
from paperless.adapter import DrfTokenStrategy
|
||||
|
||||
|
||||
class TestCustomAccountAdapter(TestCase):
|
||||
def test_is_open_for_signup(self) -> None:
|
||||
@pytest.mark.django_db
|
||||
class TestCustomAccountAdapter:
|
||||
def test_is_open_for_signup(self, settings: SettingsWrapper) -> None:
|
||||
adapter = get_adapter()
|
||||
|
||||
# With no accounts, signups should be allowed
|
||||
self.assertTrue(adapter.is_open_for_signup(None))
|
||||
assert adapter.is_open_for_signup(None)
|
||||
|
||||
User.objects.create_user("testuser")
|
||||
|
||||
# Test when ACCOUNT_ALLOW_SIGNUPS is True
|
||||
settings.ACCOUNT_ALLOW_SIGNUPS = True
|
||||
self.assertTrue(adapter.is_open_for_signup(None))
|
||||
assert adapter.is_open_for_signup(None)
|
||||
|
||||
# Test when ACCOUNT_ALLOW_SIGNUPS is False
|
||||
settings.ACCOUNT_ALLOW_SIGNUPS = False
|
||||
self.assertFalse(adapter.is_open_for_signup(None))
|
||||
assert not adapter.is_open_for_signup(None)
|
||||
|
||||
def test_is_safe_url(self) -> None:
|
||||
def test_is_safe_url(self, settings: SettingsWrapper) -> None:
|
||||
request = HttpRequest()
|
||||
request.get_host = mock.Mock(return_value="example.com")
|
||||
request.get_host = lambda: "example.com"
|
||||
with context.request_context(request):
|
||||
adapter = get_adapter()
|
||||
with override_settings(ALLOWED_HOSTS=["*"]):
|
||||
# True because request host is same
|
||||
url = "https://example.com"
|
||||
self.assertTrue(adapter.is_safe_url(url))
|
||||
|
||||
url = "https://evil.com"
|
||||
settings.ALLOWED_HOSTS = ["*"]
|
||||
# True because request host is same
|
||||
assert adapter.is_safe_url("https://example.com")
|
||||
# False despite wildcard because request host is different
|
||||
self.assertFalse(adapter.is_safe_url(url))
|
||||
assert not adapter.is_safe_url("https://evil.com")
|
||||
|
||||
settings.ALLOWED_HOSTS = ["example.com"]
|
||||
url = "https://example.com"
|
||||
# True because request host is same
|
||||
self.assertTrue(adapter.is_safe_url(url))
|
||||
assert adapter.is_safe_url("https://example.com")
|
||||
|
||||
settings.ALLOWED_HOSTS = ["*", "example.com"]
|
||||
url = "//evil.com"
|
||||
# False because request host is not in allowed hosts
|
||||
self.assertFalse(adapter.is_safe_url(url))
|
||||
assert not adapter.is_safe_url("//evil.com")
|
||||
|
||||
@mock.patch("allauth.core.internal.ratelimit.consume", return_value=True)
|
||||
def test_pre_authenticate(self, mock_consume) -> None:
|
||||
def test_pre_authenticate(
|
||||
self,
|
||||
settings: SettingsWrapper,
|
||||
mocker: MockerFixture,
|
||||
) -> None:
|
||||
mocker.patch("allauth.core.internal.ratelimit.consume", return_value=True)
|
||||
adapter = get_adapter()
|
||||
request = HttpRequest()
|
||||
request.get_host = mock.Mock(return_value="example.com")
|
||||
request.get_host = lambda: "example.com"
|
||||
|
||||
settings.DISABLE_REGULAR_LOGIN = False
|
||||
adapter.pre_authenticate(request)
|
||||
|
||||
settings.DISABLE_REGULAR_LOGIN = True
|
||||
with self.assertRaises(ValidationError):
|
||||
with pytest.raises(ValidationError):
|
||||
adapter.pre_authenticate(request)
|
||||
|
||||
def test_get_reset_password_from_key_url(self) -> None:
|
||||
def test_get_reset_password_from_key_url(self, settings: SettingsWrapper) -> None:
|
||||
request = HttpRequest()
|
||||
request.get_host = mock.Mock(return_value="foo.org")
|
||||
request.get_host = lambda: "foo.org"
|
||||
with context.request_context(request):
|
||||
adapter = get_adapter()
|
||||
|
||||
# Test when PAPERLESS_URL is None
|
||||
with override_settings(
|
||||
PAPERLESS_URL=None,
|
||||
ACCOUNT_DEFAULT_HTTP_PROTOCOL="https",
|
||||
):
|
||||
expected_url = f"https://foo.org{reverse('account_reset_password_from_key', kwargs={'uidb36': 'UID', 'key': 'KEY'})}"
|
||||
self.assertEqual(
|
||||
adapter.get_reset_password_from_key_url("UID-KEY"),
|
||||
expected_url,
|
||||
)
|
||||
settings.PAPERLESS_URL = None
|
||||
settings.ACCOUNT_DEFAULT_HTTP_PROTOCOL = "https"
|
||||
expected_url = f"https://foo.org{reverse('account_reset_password_from_key', kwargs={'uidb36': 'UID', 'key': 'KEY'})}"
|
||||
assert adapter.get_reset_password_from_key_url("UID-KEY") == expected_url
|
||||
|
||||
# Test when PAPERLESS_URL is not None
|
||||
with override_settings(PAPERLESS_URL="https://bar.com"):
|
||||
expected_url = f"https://bar.com{reverse('account_reset_password_from_key', kwargs={'uidb36': 'UID', 'key': 'KEY'})}"
|
||||
self.assertEqual(
|
||||
adapter.get_reset_password_from_key_url("UID-KEY"),
|
||||
expected_url,
|
||||
)
|
||||
settings.PAPERLESS_URL = "https://bar.com"
|
||||
expected_url = f"https://bar.com{reverse('account_reset_password_from_key', kwargs={'uidb36': 'UID', 'key': 'KEY'})}"
|
||||
assert adapter.get_reset_password_from_key_url("UID-KEY") == expected_url
|
||||
|
||||
@override_settings(ACCOUNT_DEFAULT_GROUPS=["group1", "group2"])
|
||||
def test_save_user_adds_groups(self) -> None:
|
||||
def test_save_user_adds_groups(
|
||||
self,
|
||||
settings: SettingsWrapper,
|
||||
mocker: MockerFixture,
|
||||
) -> None:
|
||||
settings.ACCOUNT_DEFAULT_GROUPS = ["group1", "group2"]
|
||||
Group.objects.create(name="group1")
|
||||
user = User.objects.create_user("testuser")
|
||||
adapter = get_adapter()
|
||||
form = mock.Mock(
|
||||
form = mocker.MagicMock(
|
||||
cleaned_data={
|
||||
"username": "testuser",
|
||||
"email": "user@example.com",
|
||||
@@ -110,88 +103,81 @@ class TestCustomAccountAdapter(TestCase):
|
||||
|
||||
user = adapter.save_user(HttpRequest(), user, form, commit=True)
|
||||
|
||||
self.assertEqual(user.groups.count(), 1)
|
||||
self.assertTrue(user.groups.filter(name="group1").exists())
|
||||
self.assertFalse(user.groups.filter(name="group2").exists())
|
||||
assert user.groups.count() == 1
|
||||
assert user.groups.filter(name="group1").exists()
|
||||
assert not user.groups.filter(name="group2").exists()
|
||||
|
||||
def test_fresh_install_save_creates_superuser(self) -> None:
|
||||
def test_fresh_install_save_creates_superuser(self, mocker: MockerFixture) -> None:
|
||||
adapter = get_adapter()
|
||||
form = mock.Mock(
|
||||
form = mocker.MagicMock(
|
||||
cleaned_data={
|
||||
"username": "testuser",
|
||||
"email": "user@paperless-ngx.com",
|
||||
},
|
||||
)
|
||||
user = adapter.save_user(HttpRequest(), User(), form, commit=True)
|
||||
self.assertTrue(user.is_superuser)
|
||||
assert user.is_superuser
|
||||
|
||||
# Next time, it should not create a superuser
|
||||
form = mock.Mock(
|
||||
form = mocker.MagicMock(
|
||||
cleaned_data={
|
||||
"username": "testuser2",
|
||||
"email": "user2@paperless-ngx.com",
|
||||
},
|
||||
)
|
||||
user2 = adapter.save_user(HttpRequest(), User(), form, commit=True)
|
||||
self.assertFalse(user2.is_superuser)
|
||||
assert not user2.is_superuser
|
||||
|
||||
|
||||
class TestCustomSocialAccountAdapter(TestCase):
|
||||
def test_is_open_for_signup(self) -> None:
|
||||
class TestCustomSocialAccountAdapter:
|
||||
@pytest.mark.django_db
|
||||
def test_is_open_for_signup(self, settings: SettingsWrapper) -> None:
|
||||
adapter = get_social_adapter()
|
||||
|
||||
# Test when SOCIALACCOUNT_ALLOW_SIGNUPS is True
|
||||
settings.SOCIALACCOUNT_ALLOW_SIGNUPS = True
|
||||
self.assertTrue(adapter.is_open_for_signup(None, None))
|
||||
assert adapter.is_open_for_signup(None, None)
|
||||
|
||||
# Test when SOCIALACCOUNT_ALLOW_SIGNUPS is False
|
||||
settings.SOCIALACCOUNT_ALLOW_SIGNUPS = False
|
||||
self.assertFalse(adapter.is_open_for_signup(None, None))
|
||||
assert not adapter.is_open_for_signup(None, None)
|
||||
|
||||
def test_get_connect_redirect_url(self) -> None:
|
||||
adapter = get_social_adapter()
|
||||
request = None
|
||||
socialaccount = None
|
||||
assert adapter.get_connect_redirect_url(None, None) == reverse("base")
|
||||
|
||||
# Test the default URL
|
||||
expected_url = reverse("base")
|
||||
self.assertEqual(
|
||||
adapter.get_connect_redirect_url(request, socialaccount),
|
||||
expected_url,
|
||||
)
|
||||
|
||||
@override_settings(SOCIAL_ACCOUNT_DEFAULT_GROUPS=["group1", "group2"])
|
||||
def test_save_user_adds_groups(self) -> None:
|
||||
@pytest.mark.django_db
|
||||
def test_save_user_adds_groups(
|
||||
self,
|
||||
settings: SettingsWrapper,
|
||||
mocker: MockerFixture,
|
||||
) -> None:
|
||||
settings.SOCIAL_ACCOUNT_DEFAULT_GROUPS = ["group1", "group2"]
|
||||
Group.objects.create(name="group1")
|
||||
adapter = get_social_adapter()
|
||||
request = HttpRequest()
|
||||
user = User.objects.create_user("testuser")
|
||||
sociallogin = mock.Mock(
|
||||
user=user,
|
||||
)
|
||||
sociallogin = mocker.MagicMock(user=user)
|
||||
|
||||
user = adapter.save_user(request, sociallogin, None)
|
||||
user = adapter.save_user(HttpRequest(), sociallogin, None)
|
||||
|
||||
self.assertEqual(user.groups.count(), 1)
|
||||
self.assertTrue(user.groups.filter(name="group1").exists())
|
||||
self.assertFalse(user.groups.filter(name="group2").exists())
|
||||
assert user.groups.count() == 1
|
||||
assert user.groups.filter(name="group1").exists()
|
||||
assert not user.groups.filter(name="group2").exists()
|
||||
|
||||
def test_error_logged_on_authentication_error(self) -> None:
|
||||
def test_error_logged_on_authentication_error(
|
||||
self,
|
||||
caplog: pytest.LogCaptureFixture,
|
||||
) -> None:
|
||||
adapter = get_social_adapter()
|
||||
request = HttpRequest()
|
||||
with self.assertLogs("paperless.auth", level="INFO") as log_cm:
|
||||
with caplog.at_level(logging.INFO, logger="paperless.auth"):
|
||||
adapter.on_authentication_error(
|
||||
request,
|
||||
HttpRequest(),
|
||||
provider="test-provider",
|
||||
error="Error",
|
||||
exception="Test authentication error",
|
||||
)
|
||||
self.assertTrue(
|
||||
any("Test authentication error" in message for message in log_cm.output),
|
||||
)
|
||||
assert any("Test authentication error" in msg for msg in caplog.messages)
|
||||
|
||||
|
||||
class TestDrfTokenStrategy(TestCase):
|
||||
@pytest.mark.django_db
|
||||
class TestDrfTokenStrategy:
|
||||
def test_create_access_token_creates_new_token(self) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
@@ -201,7 +187,6 @@ class TestDrfTokenStrategy(TestCase):
|
||||
THEN:
|
||||
- A new token is created and its key is returned
|
||||
"""
|
||||
|
||||
user = User.objects.create_user("testuser")
|
||||
request = HttpRequest()
|
||||
request.user = user
|
||||
@@ -209,13 +194,9 @@ class TestDrfTokenStrategy(TestCase):
|
||||
strategy = DrfTokenStrategy()
|
||||
token_key = strategy.create_access_token(request)
|
||||
|
||||
# Verify a token was created
|
||||
self.assertIsNotNone(token_key)
|
||||
self.assertTrue(Token.objects.filter(user=user).exists())
|
||||
|
||||
# Verify the returned key matches the created token
|
||||
token = Token.objects.get(user=user)
|
||||
self.assertEqual(token_key, token.key)
|
||||
assert token_key is not None
|
||||
assert Token.objects.filter(user=user).exists()
|
||||
assert token_key == Token.objects.get(user=user).key
|
||||
|
||||
def test_create_access_token_returns_existing_token(self) -> None:
|
||||
"""
|
||||
@@ -226,7 +207,6 @@ class TestDrfTokenStrategy(TestCase):
|
||||
THEN:
|
||||
- The same token key is returned (no new token created)
|
||||
"""
|
||||
|
||||
user = User.objects.create_user("testuser")
|
||||
existing_token = Token.objects.create(user=user)
|
||||
|
||||
@@ -236,11 +216,8 @@ class TestDrfTokenStrategy(TestCase):
|
||||
strategy = DrfTokenStrategy()
|
||||
token_key = strategy.create_access_token(request)
|
||||
|
||||
# Verify the existing token key is returned
|
||||
self.assertEqual(token_key, existing_token.key)
|
||||
|
||||
# Verify only one token exists (no duplicate created)
|
||||
self.assertEqual(Token.objects.filter(user=user).count(), 1)
|
||||
assert token_key == existing_token.key
|
||||
assert Token.objects.filter(user=user).count() == 1
|
||||
|
||||
def test_create_access_token_returns_none_for_unauthenticated_user(self) -> None:
|
||||
"""
|
||||
@@ -251,12 +228,11 @@ class TestDrfTokenStrategy(TestCase):
|
||||
THEN:
|
||||
- None is returned and no token is created
|
||||
"""
|
||||
|
||||
request = HttpRequest()
|
||||
request.user = AnonymousUser()
|
||||
|
||||
strategy = DrfTokenStrategy()
|
||||
token_key = strategy.create_access_token(request)
|
||||
|
||||
self.assertIsNone(token_key)
|
||||
self.assertEqual(Token.objects.count(), 0)
|
||||
assert token_key is None
|
||||
assert Token.objects.count() == 0
|
||||
|
||||
@@ -1,15 +1,12 @@
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from unittest import mock
|
||||
|
||||
import pytest
|
||||
from django.core.checks import Warning
|
||||
from django.test import TestCase
|
||||
from django.test import override_settings
|
||||
from pytest_django.fixtures import SettingsWrapper
|
||||
from pytest_mock import MockerFixture
|
||||
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
from documents.tests.utils import FileSystemAssertsMixin
|
||||
from paperless.checks import audit_log_check
|
||||
from paperless.checks import binaries_check
|
||||
from paperless.checks import check_deprecated_db_settings
|
||||
@@ -18,54 +15,84 @@ from paperless.checks import paths_check
|
||||
from paperless.checks import settings_values_check
|
||||
|
||||
|
||||
class TestChecks(DirectoriesMixin, TestCase):
|
||||
def test_binaries(self) -> None:
|
||||
self.assertEqual(binaries_check(None), [])
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class PaperlessTestDirs:
|
||||
data_dir: Path
|
||||
media_dir: Path
|
||||
consumption_dir: Path
|
||||
|
||||
@override_settings(CONVERT_BINARY="uuuhh")
|
||||
def test_binaries_fail(self) -> None:
|
||||
self.assertEqual(len(binaries_check(None)), 1)
|
||||
|
||||
def test_paths_check(self) -> None:
|
||||
self.assertEqual(paths_check(None), [])
|
||||
# TODO: consolidate with documents/tests/conftest.py PaperlessDirs/paperless_dirs
|
||||
# once the paperless and documents test suites are ready to share fixtures.
|
||||
@pytest.fixture()
|
||||
def directories(tmp_path: Path, settings: SettingsWrapper) -> PaperlessTestDirs:
|
||||
data_dir = tmp_path / "data"
|
||||
media_dir = tmp_path / "media"
|
||||
consumption_dir = tmp_path / "consumption"
|
||||
|
||||
@override_settings(
|
||||
MEDIA_ROOT=Path("uuh"),
|
||||
DATA_DIR=Path("whatever"),
|
||||
CONSUMPTION_DIR=Path("idontcare"),
|
||||
for d in (data_dir, media_dir, consumption_dir):
|
||||
d.mkdir()
|
||||
|
||||
settings.DATA_DIR = data_dir
|
||||
settings.MEDIA_ROOT = media_dir
|
||||
settings.CONSUMPTION_DIR = consumption_dir
|
||||
|
||||
return PaperlessTestDirs(
|
||||
data_dir=data_dir,
|
||||
media_dir=media_dir,
|
||||
consumption_dir=consumption_dir,
|
||||
)
|
||||
def test_paths_check_dont_exist(self) -> None:
|
||||
msgs = paths_check(None)
|
||||
self.assertEqual(len(msgs), 3, str(msgs))
|
||||
|
||||
for msg in msgs:
|
||||
self.assertTrue(msg.msg.endswith("is set but doesn't exist."))
|
||||
|
||||
def test_paths_check_no_access(self) -> None:
|
||||
Path(self.dirs.data_dir).chmod(0o000)
|
||||
Path(self.dirs.media_dir).chmod(0o000)
|
||||
Path(self.dirs.consumption_dir).chmod(0o000)
|
||||
class TestChecks:
|
||||
def test_binaries(self) -> None:
|
||||
assert binaries_check(None) == []
|
||||
|
||||
self.addCleanup(os.chmod, self.dirs.data_dir, 0o777)
|
||||
self.addCleanup(os.chmod, self.dirs.media_dir, 0o777)
|
||||
self.addCleanup(os.chmod, self.dirs.consumption_dir, 0o777)
|
||||
def test_binaries_fail(self, settings: SettingsWrapper) -> None:
|
||||
settings.CONVERT_BINARY = "uuuhh"
|
||||
assert len(binaries_check(None)) == 1
|
||||
|
||||
@pytest.mark.usefixtures("directories")
|
||||
def test_paths_check(self) -> None:
|
||||
assert paths_check(None) == []
|
||||
|
||||
def test_paths_check_dont_exist(self, settings: SettingsWrapper) -> None:
|
||||
settings.MEDIA_ROOT = Path("uuh")
|
||||
settings.DATA_DIR = Path("whatever")
|
||||
settings.CONSUMPTION_DIR = Path("idontcare")
|
||||
|
||||
msgs = paths_check(None)
|
||||
self.assertEqual(len(msgs), 3)
|
||||
|
||||
assert len(msgs) == 3, str(msgs)
|
||||
for msg in msgs:
|
||||
self.assertTrue(msg.msg.endswith("is not writeable"))
|
||||
assert msg.msg.endswith("is set but doesn't exist.")
|
||||
|
||||
@override_settings(DEBUG=False)
|
||||
def test_debug_disabled(self) -> None:
|
||||
self.assertEqual(debug_mode_check(None), [])
|
||||
def test_paths_check_no_access(self, directories: PaperlessTestDirs) -> None:
|
||||
directories.data_dir.chmod(0o000)
|
||||
directories.media_dir.chmod(0o000)
|
||||
directories.consumption_dir.chmod(0o000)
|
||||
|
||||
@override_settings(DEBUG=True)
|
||||
def test_debug_enabled(self) -> None:
|
||||
self.assertEqual(len(debug_mode_check(None)), 1)
|
||||
try:
|
||||
msgs = paths_check(None)
|
||||
finally:
|
||||
directories.data_dir.chmod(0o777)
|
||||
directories.media_dir.chmod(0o777)
|
||||
directories.consumption_dir.chmod(0o777)
|
||||
|
||||
assert len(msgs) == 3
|
||||
for msg in msgs:
|
||||
assert msg.msg.endswith("is not writeable")
|
||||
|
||||
def test_debug_disabled(self, settings: SettingsWrapper) -> None:
|
||||
settings.DEBUG = False
|
||||
assert debug_mode_check(None) == []
|
||||
|
||||
def test_debug_enabled(self, settings: SettingsWrapper) -> None:
|
||||
settings.DEBUG = True
|
||||
assert len(debug_mode_check(None)) == 1
|
||||
|
||||
|
||||
class TestSettingsChecksAgainstDefaults(DirectoriesMixin, TestCase):
|
||||
class TestSettingsChecksAgainstDefaults:
|
||||
def test_all_valid(self) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
@@ -76,104 +103,71 @@ class TestSettingsChecksAgainstDefaults(DirectoriesMixin, TestCase):
|
||||
- No system check errors reported
|
||||
"""
|
||||
msgs = settings_values_check(None)
|
||||
self.assertEqual(len(msgs), 0)
|
||||
assert len(msgs) == 0
|
||||
|
||||
|
||||
class TestOcrSettingsChecks(DirectoriesMixin, TestCase):
|
||||
@override_settings(OCR_OUTPUT_TYPE="notapdf")
|
||||
def test_invalid_output_type(self) -> None:
|
||||
class TestOcrSettingsChecks:
|
||||
@pytest.mark.parametrize(
|
||||
("setting", "value", "expected_msg"),
|
||||
[
|
||||
pytest.param(
|
||||
"OCR_OUTPUT_TYPE",
|
||||
"notapdf",
|
||||
'OCR output type "notapdf"',
|
||||
id="invalid-output-type",
|
||||
),
|
||||
pytest.param(
|
||||
"OCR_MODE",
|
||||
"makeitso",
|
||||
'OCR output mode "makeitso"',
|
||||
id="invalid-mode",
|
||||
),
|
||||
pytest.param(
|
||||
"OCR_MODE",
|
||||
"skip_noarchive",
|
||||
"deprecated",
|
||||
id="deprecated-mode",
|
||||
),
|
||||
pytest.param(
|
||||
"OCR_SKIP_ARCHIVE_FILE",
|
||||
"invalid",
|
||||
'OCR_SKIP_ARCHIVE_FILE setting "invalid"',
|
||||
id="invalid-skip-archive-file",
|
||||
),
|
||||
pytest.param(
|
||||
"OCR_CLEAN",
|
||||
"cleanme",
|
||||
'OCR clean mode "cleanme"',
|
||||
id="invalid-clean",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_invalid_setting_produces_one_error(
|
||||
self,
|
||||
settings: SettingsWrapper,
|
||||
setting: str,
|
||||
value: str,
|
||||
expected_msg: str,
|
||||
) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- Default settings
|
||||
- OCR output type is invalid
|
||||
- One OCR setting is set to an invalid value
|
||||
WHEN:
|
||||
- Settings are validated
|
||||
THEN:
|
||||
- system check error reported for OCR output type
|
||||
- Exactly one system check error is reported containing the expected message
|
||||
"""
|
||||
setattr(settings, setting, value)
|
||||
|
||||
msgs = settings_values_check(None)
|
||||
self.assertEqual(len(msgs), 1)
|
||||
|
||||
msg = msgs[0]
|
||||
|
||||
self.assertIn('OCR output type "notapdf"', msg.msg)
|
||||
|
||||
@override_settings(OCR_MODE="makeitso")
|
||||
def test_invalid_ocr_type(self) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- Default settings
|
||||
- OCR type is invalid
|
||||
WHEN:
|
||||
- Settings are validated
|
||||
THEN:
|
||||
- system check error reported for OCR type
|
||||
"""
|
||||
msgs = settings_values_check(None)
|
||||
self.assertEqual(len(msgs), 1)
|
||||
|
||||
msg = msgs[0]
|
||||
|
||||
self.assertIn('OCR output mode "makeitso"', msg.msg)
|
||||
|
||||
@override_settings(OCR_MODE="skip_noarchive")
|
||||
def test_deprecated_ocr_type(self) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- Default settings
|
||||
- OCR type is deprecated
|
||||
WHEN:
|
||||
- Settings are validated
|
||||
THEN:
|
||||
- deprecation warning reported for OCR type
|
||||
"""
|
||||
msgs = settings_values_check(None)
|
||||
self.assertEqual(len(msgs), 1)
|
||||
|
||||
msg = msgs[0]
|
||||
|
||||
self.assertIn("deprecated", msg.msg)
|
||||
|
||||
@override_settings(OCR_SKIP_ARCHIVE_FILE="invalid")
|
||||
def test_invalid_ocr_skip_archive_file(self) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- Default settings
|
||||
- OCR_SKIP_ARCHIVE_FILE is invalid
|
||||
WHEN:
|
||||
- Settings are validated
|
||||
THEN:
|
||||
- system check error reported for OCR_SKIP_ARCHIVE_FILE
|
||||
"""
|
||||
msgs = settings_values_check(None)
|
||||
self.assertEqual(len(msgs), 1)
|
||||
|
||||
msg = msgs[0]
|
||||
|
||||
self.assertIn('OCR_SKIP_ARCHIVE_FILE setting "invalid"', msg.msg)
|
||||
|
||||
@override_settings(OCR_CLEAN="cleanme")
|
||||
def test_invalid_ocr_clean(self) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- Default settings
|
||||
- OCR cleaning type is invalid
|
||||
WHEN:
|
||||
- Settings are validated
|
||||
THEN:
|
||||
- system check error reported for OCR cleaning type
|
||||
"""
|
||||
msgs = settings_values_check(None)
|
||||
self.assertEqual(len(msgs), 1)
|
||||
|
||||
msg = msgs[0]
|
||||
|
||||
self.assertIn('OCR clean mode "cleanme"', msg.msg)
|
||||
assert len(msgs) == 1
|
||||
assert expected_msg in msgs[0].msg
|
||||
|
||||
|
||||
class TestTimezoneSettingsChecks(DirectoriesMixin, TestCase):
|
||||
@override_settings(TIME_ZONE="TheMoon\\MyCrater")
|
||||
def test_invalid_timezone(self) -> None:
|
||||
class TestTimezoneSettingsChecks:
|
||||
def test_invalid_timezone(self, settings: SettingsWrapper) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- Default settings
|
||||
@@ -183,17 +177,16 @@ class TestTimezoneSettingsChecks(DirectoriesMixin, TestCase):
|
||||
THEN:
|
||||
- system check error reported for timezone
|
||||
"""
|
||||
settings.TIME_ZONE = "TheMoon\\MyCrater"
|
||||
|
||||
msgs = settings_values_check(None)
|
||||
self.assertEqual(len(msgs), 1)
|
||||
|
||||
msg = msgs[0]
|
||||
|
||||
self.assertIn('Timezone "TheMoon\\MyCrater"', msg.msg)
|
||||
assert len(msgs) == 1
|
||||
assert 'Timezone "TheMoon\\MyCrater"' in msgs[0].msg
|
||||
|
||||
|
||||
class TestEmailCertSettingsChecks(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
@override_settings(EMAIL_CERTIFICATE_FILE=Path("/tmp/not_actually_here.pem"))
|
||||
def test_not_valid_file(self) -> None:
|
||||
class TestEmailCertSettingsChecks:
|
||||
def test_not_valid_file(self, settings: SettingsWrapper) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- Default settings
|
||||
@@ -203,19 +196,22 @@ class TestEmailCertSettingsChecks(DirectoriesMixin, FileSystemAssertsMixin, Test
|
||||
THEN:
|
||||
- system check error reported for email certificate
|
||||
"""
|
||||
self.assertIsNotFile("/tmp/not_actually_here.pem")
|
||||
cert_path = Path("/tmp/not_actually_here.pem")
|
||||
assert not cert_path.is_file()
|
||||
settings.EMAIL_CERTIFICATE_FILE = cert_path
|
||||
|
||||
msgs = settings_values_check(None)
|
||||
|
||||
self.assertEqual(len(msgs), 1)
|
||||
|
||||
msg = msgs[0]
|
||||
|
||||
self.assertIn("Email cert /tmp/not_actually_here.pem is not a file", msg.msg)
|
||||
assert len(msgs) == 1
|
||||
assert "Email cert /tmp/not_actually_here.pem is not a file" in msgs[0].msg
|
||||
|
||||
|
||||
class TestAuditLogChecks(TestCase):
|
||||
def test_was_enabled_once(self) -> None:
|
||||
class TestAuditLogChecks:
|
||||
def test_was_enabled_once(
|
||||
self,
|
||||
settings: SettingsWrapper,
|
||||
mocker: MockerFixture,
|
||||
) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- Audit log is not enabled
|
||||
@@ -224,23 +220,18 @@ class TestAuditLogChecks(TestCase):
|
||||
THEN:
|
||||
- system check error reported for disabling audit log
|
||||
"""
|
||||
introspect_mock = mock.MagicMock()
|
||||
settings.AUDIT_LOG_ENABLED = False
|
||||
introspect_mock = mocker.MagicMock()
|
||||
introspect_mock.introspection.table_names.return_value = ["auditlog_logentry"]
|
||||
with override_settings(AUDIT_LOG_ENABLED=False):
|
||||
with mock.patch.dict(
|
||||
"paperless.checks.connections",
|
||||
{"default": introspect_mock},
|
||||
):
|
||||
msgs = audit_log_check(None)
|
||||
mocker.patch.dict(
|
||||
"paperless.checks.connections",
|
||||
{"default": introspect_mock},
|
||||
)
|
||||
|
||||
self.assertEqual(len(msgs), 1)
|
||||
msgs = audit_log_check(None)
|
||||
|
||||
msg = msgs[0]
|
||||
|
||||
self.assertIn(
|
||||
("auditlog table was found but audit log is disabled."),
|
||||
msg.msg,
|
||||
)
|
||||
assert len(msgs) == 1
|
||||
assert "auditlog table was found but audit log is disabled." in msgs[0].msg
|
||||
|
||||
|
||||
DEPRECATED_VARS: dict[str, str] = {
|
||||
@@ -269,20 +260,16 @@ class TestDeprecatedDbSettings:
|
||||
@pytest.mark.parametrize(
|
||||
("env_var", "db_option_key"),
|
||||
[
|
||||
("PAPERLESS_DB_TIMEOUT", "timeout"),
|
||||
("PAPERLESS_DB_POOLSIZE", "pool.min_size / pool.max_size"),
|
||||
("PAPERLESS_DBSSLMODE", "sslmode"),
|
||||
("PAPERLESS_DBSSLROOTCERT", "sslrootcert"),
|
||||
("PAPERLESS_DBSSLCERT", "sslcert"),
|
||||
("PAPERLESS_DBSSLKEY", "sslkey"),
|
||||
],
|
||||
ids=[
|
||||
"db-timeout",
|
||||
"db-poolsize",
|
||||
"ssl-mode",
|
||||
"ssl-rootcert",
|
||||
"ssl-cert",
|
||||
"ssl-key",
|
||||
pytest.param("PAPERLESS_DB_TIMEOUT", "timeout", id="db-timeout"),
|
||||
pytest.param(
|
||||
"PAPERLESS_DB_POOLSIZE",
|
||||
"pool.min_size / pool.max_size",
|
||||
id="db-poolsize",
|
||||
),
|
||||
pytest.param("PAPERLESS_DBSSLMODE", "sslmode", id="ssl-mode"),
|
||||
pytest.param("PAPERLESS_DBSSLROOTCERT", "sslrootcert", id="ssl-rootcert"),
|
||||
pytest.param("PAPERLESS_DBSSLCERT", "sslcert", id="ssl-cert"),
|
||||
pytest.param("PAPERLESS_DBSSLKEY", "sslkey", id="ssl-key"),
|
||||
],
|
||||
)
|
||||
def test_single_deprecated_var_produces_one_warning(
|
||||
|
||||
@@ -9,35 +9,50 @@ from paperless.utils import ocr_to_dateparser_languages
|
||||
@pytest.mark.parametrize(
|
||||
("ocr_language", "expected"),
|
||||
[
|
||||
# One language
|
||||
("eng", ["en"]),
|
||||
# Multiple languages
|
||||
("fra+ita+lao", ["fr", "it", "lo"]),
|
||||
# Languages that don't have a two-letter equivalent
|
||||
("fil", ["fil"]),
|
||||
# Languages with a script part supported by dateparser
|
||||
("aze_cyrl+srp_latn", ["az-Cyrl", "sr-Latn"]),
|
||||
# Languages with a script part not supported by dateparser
|
||||
# In this case, default to the language without script
|
||||
("deu_frak", ["de"]),
|
||||
# Traditional and simplified chinese don't have the same name in dateparser,
|
||||
# so they're converted to the general chinese language
|
||||
("chi_tra+chi_sim", ["zh"]),
|
||||
# If a language is not supported by dateparser, fallback to the supported ones
|
||||
("eng+unsupported_language+por", ["en", "pt"]),
|
||||
# If no language is supported, fallback to default
|
||||
("unsupported1+unsupported2", []),
|
||||
# Duplicate languages, should not duplicate in result
|
||||
("eng+eng", ["en"]),
|
||||
# Language with script, but script is not mapped
|
||||
("ita_unknownscript", ["it"]),
|
||||
pytest.param("eng", ["en"], id="single-language"),
|
||||
pytest.param("fra+ita+lao", ["fr", "it", "lo"], id="multiple-languages"),
|
||||
pytest.param("fil", ["fil"], id="no-two-letter-equivalent"),
|
||||
pytest.param(
|
||||
"aze_cyrl+srp_latn",
|
||||
["az-Cyrl", "sr-Latn"],
|
||||
id="script-supported-by-dateparser",
|
||||
),
|
||||
pytest.param(
|
||||
"deu_frak",
|
||||
["de"],
|
||||
id="script-not-supported-falls-back-to-language",
|
||||
),
|
||||
pytest.param(
|
||||
"chi_tra+chi_sim",
|
||||
["zh"],
|
||||
id="chinese-variants-collapse-to-general",
|
||||
),
|
||||
pytest.param(
|
||||
"eng+unsupported_language+por",
|
||||
["en", "pt"],
|
||||
id="unsupported-language-skipped",
|
||||
),
|
||||
pytest.param(
|
||||
"unsupported1+unsupported2",
|
||||
[],
|
||||
id="all-unsupported-returns-empty",
|
||||
),
|
||||
pytest.param("eng+eng", ["en"], id="duplicates-deduplicated"),
|
||||
pytest.param(
|
||||
"ita_unknownscript",
|
||||
["it"],
|
||||
id="unknown-script-falls-back-to-language",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_ocr_to_dateparser_languages(ocr_language, expected):
|
||||
def test_ocr_to_dateparser_languages(ocr_language: str, expected: list[str]) -> None:
|
||||
assert sorted(ocr_to_dateparser_languages(ocr_language)) == sorted(expected)
|
||||
|
||||
|
||||
def test_ocr_to_dateparser_languages_exception(monkeypatch, caplog):
|
||||
def test_ocr_to_dateparser_languages_exception(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
caplog: pytest.LogCaptureFixture,
|
||||
) -> None:
|
||||
# Patch LocaleDataLoader.get_locale_map to raise an exception
|
||||
class DummyLoader:
|
||||
def get_locale_map(self, locales=None):
|
||||
|
||||
@@ -1,24 +1,31 @@
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
from django.test import override_settings
|
||||
from django.test import Client
|
||||
from pytest_django.fixtures import SettingsWrapper
|
||||
|
||||
|
||||
def test_favicon_view(client):
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
static_dir = Path(tmpdir)
|
||||
favicon_path = static_dir / "paperless" / "img" / "favicon.ico"
|
||||
favicon_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
favicon_path.write_bytes(b"FAKE ICON DATA")
|
||||
def test_favicon_view(
|
||||
client: Client,
|
||||
tmp_path: Path,
|
||||
settings: SettingsWrapper,
|
||||
) -> None:
|
||||
favicon_path = tmp_path / "paperless" / "img" / "favicon.ico"
|
||||
favicon_path.parent.mkdir(parents=True)
|
||||
favicon_path.write_bytes(b"FAKE ICON DATA")
|
||||
|
||||
with override_settings(STATIC_ROOT=static_dir):
|
||||
response = client.get("/favicon.ico")
|
||||
assert response.status_code == 200
|
||||
assert response["Content-Type"] == "image/x-icon"
|
||||
assert b"".join(response.streaming_content) == b"FAKE ICON DATA"
|
||||
settings.STATIC_ROOT = tmp_path
|
||||
|
||||
response = client.get("/favicon.ico")
|
||||
assert response.status_code == 200
|
||||
assert response["Content-Type"] == "image/x-icon"
|
||||
assert b"".join(response.streaming_content) == b"FAKE ICON DATA"
|
||||
|
||||
|
||||
def test_favicon_view_missing_file(client):
|
||||
with override_settings(STATIC_ROOT=Path(tempfile.mkdtemp())):
|
||||
response = client.get("/favicon.ico")
|
||||
assert response.status_code == 404
|
||||
def test_favicon_view_missing_file(
|
||||
client: Client,
|
||||
tmp_path: Path,
|
||||
settings: SettingsWrapper,
|
||||
) -> None:
|
||||
settings.STATIC_ROOT = tmp_path
|
||||
response = client.get("/favicon.ico")
|
||||
assert response.status_code == 404
|
||||
|
||||
@@ -378,7 +378,6 @@ class ApplicationConfigurationViewSet(ModelViewSet):
|
||||
):
|
||||
# AI index was just enabled and vector store file does not exist
|
||||
llmindex_index.delay(
|
||||
progress_bar_disable=True,
|
||||
rebuild=True,
|
||||
scheduled=False,
|
||||
auto=True,
|
||||
|
||||
@@ -1,11 +1,13 @@
|
||||
import logging
|
||||
import shutil
|
||||
from collections.abc import Callable
|
||||
from collections.abc import Iterable
|
||||
from datetime import timedelta
|
||||
from pathlib import Path
|
||||
from typing import TypeVar
|
||||
|
||||
import faiss
|
||||
import llama_index.core.settings as llama_settings
|
||||
import tqdm
|
||||
from celery import states
|
||||
from django.conf import settings
|
||||
from django.utils import timezone
|
||||
@@ -29,6 +31,14 @@ from paperless_ai.embedding import build_llm_index_text
|
||||
from paperless_ai.embedding import get_embedding_dim
|
||||
from paperless_ai.embedding import get_embedding_model
|
||||
|
||||
_T = TypeVar("_T")
|
||||
IterWrapper = Callable[[Iterable[_T]], Iterable[_T]]
|
||||
|
||||
|
||||
def _identity(iterable: Iterable[_T]) -> Iterable[_T]:
|
||||
return iterable
|
||||
|
||||
|
||||
logger = logging.getLogger("paperless_ai.indexing")
|
||||
|
||||
|
||||
@@ -156,7 +166,11 @@ def vector_store_file_exists():
|
||||
return Path(settings.LLM_INDEX_DIR / "default__vector_store.json").exists()
|
||||
|
||||
|
||||
def update_llm_index(*, progress_bar_disable=False, rebuild=False) -> str:
|
||||
def update_llm_index(
|
||||
*,
|
||||
iter_wrapper: IterWrapper[Document] = _identity,
|
||||
rebuild=False,
|
||||
) -> str:
|
||||
"""
|
||||
Rebuild or update the LLM index.
|
||||
"""
|
||||
@@ -176,7 +190,7 @@ def update_llm_index(*, progress_bar_disable=False, rebuild=False) -> str:
|
||||
embed_model = get_embedding_model()
|
||||
llama_settings.Settings.embed_model = embed_model
|
||||
storage_context = get_or_create_storage_context(rebuild=True)
|
||||
for document in tqdm.tqdm(documents, disable=progress_bar_disable):
|
||||
for document in iter_wrapper(documents):
|
||||
document_nodes = build_document_node(document)
|
||||
nodes.extend(document_nodes)
|
||||
|
||||
@@ -184,7 +198,7 @@ def update_llm_index(*, progress_bar_disable=False, rebuild=False) -> str:
|
||||
nodes=nodes,
|
||||
storage_context=storage_context,
|
||||
embed_model=embed_model,
|
||||
show_progress=not progress_bar_disable,
|
||||
show_progress=False,
|
||||
)
|
||||
msg = "LLM index rebuilt successfully."
|
||||
else:
|
||||
@@ -196,7 +210,7 @@ def update_llm_index(*, progress_bar_disable=False, rebuild=False) -> str:
|
||||
for node in index.docstore.get_nodes(all_node_ids)
|
||||
}
|
||||
|
||||
for document in tqdm.tqdm(documents, disable=progress_bar_disable):
|
||||
for document in iter_wrapper(documents):
|
||||
doc_id = str(document.id)
|
||||
document_modified = document.modified.isoformat()
|
||||
|
||||
|
||||
16
uv.lock
generated
16
uv.lock
generated
@@ -1342,11 +1342,11 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "faker"
|
||||
version = "40.1.2"
|
||||
version = "40.5.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/5e/77/1c3ff07b6739b9a1d23ca01ec0a90a309a33b78e345a3eb52f9ce9240e36/faker-40.1.2.tar.gz", hash = "sha256:b76a68163aa5f171d260fc24827a8349bc1db672f6a665359e8d0095e8135d30", size = 1949802, upload-time = "2026-01-13T20:51:49.917Z" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/03/2a/96fff3edcb10f6505143448a4b91535f77b74865cec45be52690ee280443/faker-40.5.1.tar.gz", hash = "sha256:70222361cd82aa10cb86066d1a4e8f47f2bcdc919615c412045a69c4e6da0cd3", size = 1952684, upload-time = "2026-02-23T21:34:38.362Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/46/ec/91a434c8a53d40c3598966621dea9c50512bec6ce8e76fa1751015e74cef/faker-40.1.2-py3-none-any.whl", hash = "sha256:93503165c165d330260e4379fd6dc07c94da90c611ed3191a0174d2ab9966a42", size = 1985633, upload-time = "2026-01-13T20:51:47.982Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/4d/a9/1eed4db92d0aec2f9bfdf1faae0ab0418b5e121dda5701f118a7a4f0cd6a/faker-40.5.1-py3-none-any.whl", hash = "sha256:c69640c1e13bad49b4bcebcbf1b52f9f1a872b6ea186c248ada34d798f1661bf", size = 1987053, upload-time = "2026-02-23T21:34:36.418Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -3121,6 +3121,7 @@ webserver = [
|
||||
dev = [
|
||||
{ name = "daphne", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
{ name = "factory-boy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
{ name = "faker", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
{ name = "imagehash", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
{ name = "prek", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
{ name = "pytest", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
@@ -3145,6 +3146,7 @@ lint = [
|
||||
testing = [
|
||||
{ name = "daphne", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
{ name = "factory-boy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
{ name = "faker", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
{ name = "imagehash", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
{ name = "pytest", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
{ name = "pytest-cov", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
@@ -3257,6 +3259,7 @@ provides-extras = ["mariadb", "postgres", "webserver"]
|
||||
dev = [
|
||||
{ name = "daphne" },
|
||||
{ name = "factory-boy", specifier = "~=3.3.1" },
|
||||
{ name = "faker", specifier = "~=40.5.1" },
|
||||
{ name = "imagehash" },
|
||||
{ name = "prek", specifier = "~=0.3.0" },
|
||||
{ name = "pytest", specifier = "~=9.0.0" },
|
||||
@@ -3279,6 +3282,7 @@ lint = [
|
||||
testing = [
|
||||
{ name = "daphne" },
|
||||
{ name = "factory-boy", specifier = "~=3.3.1" },
|
||||
{ name = "faker", specifier = "~=40.5.1" },
|
||||
{ name = "imagehash" },
|
||||
{ name = "pytest", specifier = "~=9.0.0" },
|
||||
{ name = "pytest-cov", specifier = "~=7.0.0" },
|
||||
@@ -5906,11 +5910,11 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "whitenoise"
|
||||
version = "6.11.0"
|
||||
version = "6.12.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/15/95/8c81ec6b6ebcbf8aca2de7603070ccf37dbb873b03f20708e0f7c1664bc6/whitenoise-6.11.0.tar.gz", hash = "sha256:0f5bfce6061ae6611cd9396a8231e088722e4fc67bc13a111be74c738d99375f", size = 26432, upload-time = "2025-09-18T09:16:10.995Z" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/cb/2a/55b3f3a4ec326cd077c1c3defeee656b9298372a69229134d930151acd01/whitenoise-6.12.0.tar.gz", hash = "sha256:f723ebb76a112e98816ff80fcea0a6c9b8ecde835f8ddda25df7a30a3c2db6ad", size = 26841, upload-time = "2026-02-27T00:05:42.028Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/6c/e9/4366332f9295fe0647d7d3251ce18f5615fbcb12d02c79a26f8dba9221b3/whitenoise-6.11.0-py3-none-any.whl", hash = "sha256:b2aeb45950597236f53b5342b3121c5de69c8da0109362aee506ce88e022d258", size = 20197, upload-time = "2025-09-18T09:16:09.754Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/db/eb/d5583a11486211f3ebd4b385545ae787f32363d453c19fffd81106c9c138/whitenoise-6.12.0-py3-none-any.whl", hash = "sha256:fc5e8c572e33ebf24795b47b6a7da8da3c00cff2349f5b04c02f28d0cc5a3cc2", size = 20302, upload-time = "2026-02-27T00:05:40.086Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
||||
Reference in New Issue
Block a user