mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-03-04 08:16:25 +00:00
Compare commits
20 Commits
chore/lock
...
feature-py
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
56bb68ee3d | ||
|
|
81b99fb4fc | ||
|
|
e72859dc62 | ||
|
|
92c9e3720f | ||
|
|
d79b8806de | ||
|
|
5498503d60 | ||
|
|
16b58c2de5 | ||
|
|
c724fbb5d9 | ||
|
|
9c0f112e94 | ||
|
|
43406f44f2 | ||
|
|
b7ca3550b1 | ||
|
|
0e97419e0e | ||
|
|
10cb2ac183 | ||
|
|
1d7cd5a7ad | ||
|
|
e58a35d40c | ||
|
|
20a9cd40e8 | ||
|
|
b94ce85b46 | ||
|
|
484bef00c1 | ||
|
|
317a177537 | ||
|
|
7ff51452f0 |
18
.codecov.yml
18
.codecov.yml
@@ -14,10 +14,6 @@ component_management:
|
|||||||
# https://docs.codecov.com/docs/carryforward-flags
|
# https://docs.codecov.com/docs/carryforward-flags
|
||||||
flags:
|
flags:
|
||||||
# Backend Python versions
|
# Backend Python versions
|
||||||
backend-python-3.10:
|
|
||||||
paths:
|
|
||||||
- src/**
|
|
||||||
carryforward: true
|
|
||||||
backend-python-3.11:
|
backend-python-3.11:
|
||||||
paths:
|
paths:
|
||||||
- src/**
|
- src/**
|
||||||
@@ -26,6 +22,14 @@ flags:
|
|||||||
paths:
|
paths:
|
||||||
- src/**
|
- src/**
|
||||||
carryforward: true
|
carryforward: true
|
||||||
|
backend-python-3.13:
|
||||||
|
paths:
|
||||||
|
- src/**
|
||||||
|
carryforward: true
|
||||||
|
backend-python-3.14:
|
||||||
|
paths:
|
||||||
|
- src/**
|
||||||
|
carryforward: true
|
||||||
# Frontend (shards merge into single flag)
|
# Frontend (shards merge into single flag)
|
||||||
frontend-node-24.x:
|
frontend-node-24.x:
|
||||||
paths:
|
paths:
|
||||||
@@ -41,9 +45,10 @@ coverage:
|
|||||||
project:
|
project:
|
||||||
backend:
|
backend:
|
||||||
flags:
|
flags:
|
||||||
- backend-python-3.10
|
|
||||||
- backend-python-3.11
|
- backend-python-3.11
|
||||||
- backend-python-3.12
|
- backend-python-3.12
|
||||||
|
- backend-python-3.13
|
||||||
|
- backend-python-3.14
|
||||||
paths:
|
paths:
|
||||||
- src/**
|
- src/**
|
||||||
# https://docs.codecov.com/docs/commit-status#threshold
|
# https://docs.codecov.com/docs/commit-status#threshold
|
||||||
@@ -59,9 +64,10 @@ coverage:
|
|||||||
patch:
|
patch:
|
||||||
backend:
|
backend:
|
||||||
flags:
|
flags:
|
||||||
- backend-python-3.10
|
|
||||||
- backend-python-3.11
|
- backend-python-3.11
|
||||||
- backend-python-3.12
|
- backend-python-3.12
|
||||||
|
- backend-python-3.13
|
||||||
|
- backend-python-3.14
|
||||||
paths:
|
paths:
|
||||||
- src/**
|
- src/**
|
||||||
target: 100%
|
target: 100%
|
||||||
|
|||||||
21
.github/workflows/ci-backend.yml
vendored
21
.github/workflows/ci-backend.yml
vendored
@@ -22,7 +22,6 @@ on:
|
|||||||
concurrency:
|
concurrency:
|
||||||
group: backend-${{ github.event.pull_request.number || github.ref }}
|
group: backend-${{ github.event.pull_request.number || github.ref }}
|
||||||
cancel-in-progress: true
|
cancel-in-progress: true
|
||||||
permissions: {}
|
|
||||||
env:
|
env:
|
||||||
DEFAULT_UV_VERSION: "0.10.x"
|
DEFAULT_UV_VERSION: "0.10.x"
|
||||||
NLTK_DATA: "/usr/share/nltk_data"
|
NLTK_DATA: "/usr/share/nltk_data"
|
||||||
@@ -30,26 +29,24 @@ jobs:
|
|||||||
test:
|
test:
|
||||||
name: "Python ${{ matrix.python-version }}"
|
name: "Python ${{ matrix.python-version }}"
|
||||||
runs-on: ubuntu-24.04
|
runs-on: ubuntu-24.04
|
||||||
permissions:
|
|
||||||
contents: read
|
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
python-version: ['3.10', '3.11', '3.12']
|
python-version: ['3.11', '3.12', '3.13', '3.14']
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v6.0.2
|
||||||
- name: Start containers
|
- name: Start containers
|
||||||
run: |
|
run: |
|
||||||
docker compose --file docker/compose/docker-compose.ci-test.yml pull --quiet
|
docker compose --file docker/compose/docker-compose.ci-test.yml pull --quiet
|
||||||
docker compose --file docker/compose/docker-compose.ci-test.yml up --detach
|
docker compose --file docker/compose/docker-compose.ci-test.yml up --detach
|
||||||
- name: Set up Python
|
- name: Set up Python
|
||||||
id: setup-python
|
id: setup-python
|
||||||
uses: actions/setup-python@v6
|
uses: actions/setup-python@v6.2.0
|
||||||
with:
|
with:
|
||||||
python-version: "${{ matrix.python-version }}"
|
python-version: "${{ matrix.python-version }}"
|
||||||
- name: Install uv
|
- name: Install uv
|
||||||
uses: astral-sh/setup-uv@v7
|
uses: astral-sh/setup-uv@v7.3.1
|
||||||
with:
|
with:
|
||||||
version: ${{ env.DEFAULT_UV_VERSION }}
|
version: ${{ env.DEFAULT_UV_VERSION }}
|
||||||
enable-cache: true
|
enable-cache: true
|
||||||
@@ -86,13 +83,13 @@ jobs:
|
|||||||
pytest
|
pytest
|
||||||
- name: Upload test results to Codecov
|
- name: Upload test results to Codecov
|
||||||
if: always()
|
if: always()
|
||||||
uses: codecov/codecov-action@v5
|
uses: codecov/codecov-action@v5.5.2
|
||||||
with:
|
with:
|
||||||
flags: backend-python-${{ matrix.python-version }}
|
flags: backend-python-${{ matrix.python-version }}
|
||||||
files: junit.xml
|
files: junit.xml
|
||||||
report_type: test_results
|
report_type: test_results
|
||||||
- name: Upload coverage to Codecov
|
- name: Upload coverage to Codecov
|
||||||
uses: codecov/codecov-action@v5
|
uses: codecov/codecov-action@v5.5.2
|
||||||
with:
|
with:
|
||||||
flags: backend-python-${{ matrix.python-version }}
|
flags: backend-python-${{ matrix.python-version }}
|
||||||
files: coverage.xml
|
files: coverage.xml
|
||||||
@@ -105,20 +102,18 @@ jobs:
|
|||||||
typing:
|
typing:
|
||||||
name: Check project typing
|
name: Check project typing
|
||||||
runs-on: ubuntu-24.04
|
runs-on: ubuntu-24.04
|
||||||
permissions:
|
|
||||||
contents: read
|
|
||||||
env:
|
env:
|
||||||
DEFAULT_PYTHON: "3.12"
|
DEFAULT_PYTHON: "3.12"
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v6.0.1
|
uses: actions/checkout@v6.0.2
|
||||||
- name: Set up Python
|
- name: Set up Python
|
||||||
id: setup-python
|
id: setup-python
|
||||||
uses: actions/setup-python@v6.2.0
|
uses: actions/setup-python@v6.2.0
|
||||||
with:
|
with:
|
||||||
python-version: "${{ env.DEFAULT_PYTHON }}"
|
python-version: "${{ env.DEFAULT_PYTHON }}"
|
||||||
- name: Install uv
|
- name: Install uv
|
||||||
uses: astral-sh/setup-uv@v7.2.1
|
uses: astral-sh/setup-uv@v7.3.1
|
||||||
with:
|
with:
|
||||||
version: ${{ env.DEFAULT_UV_VERSION }}
|
version: ${{ env.DEFAULT_UV_VERSION }}
|
||||||
enable-cache: true
|
enable-cache: true
|
||||||
|
|||||||
9
.github/workflows/ci-docker.yml
vendored
9
.github/workflows/ci-docker.yml
vendored
@@ -15,7 +15,6 @@ on:
|
|||||||
concurrency:
|
concurrency:
|
||||||
group: docker-${{ github.event.pull_request.number || github.ref }}
|
group: docker-${{ github.event.pull_request.number || github.ref }}
|
||||||
cancel-in-progress: true
|
cancel-in-progress: true
|
||||||
permissions: {}
|
|
||||||
env:
|
env:
|
||||||
REGISTRY: ghcr.io
|
REGISTRY: ghcr.io
|
||||||
jobs:
|
jobs:
|
||||||
@@ -42,7 +41,7 @@ jobs:
|
|||||||
ref-name: ${{ steps.ref.outputs.name }}
|
ref-name: ${{ steps.ref.outputs.name }}
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v6.0.1
|
uses: actions/checkout@v6.0.2
|
||||||
- name: Determine ref name
|
- name: Determine ref name
|
||||||
id: ref
|
id: ref
|
||||||
run: |
|
run: |
|
||||||
@@ -131,7 +130,7 @@ jobs:
|
|||||||
type=semver,pattern={{major}}.{{minor}}
|
type=semver,pattern={{major}}.{{minor}}
|
||||||
- name: Build and push by digest
|
- name: Build and push by digest
|
||||||
id: build
|
id: build
|
||||||
uses: docker/build-push-action@v6.18.0
|
uses: docker/build-push-action@v6.19.2
|
||||||
with:
|
with:
|
||||||
context: .
|
context: .
|
||||||
file: ./Dockerfile
|
file: ./Dockerfile
|
||||||
@@ -153,7 +152,7 @@ jobs:
|
|||||||
touch "/tmp/digests/${digest#sha256:}"
|
touch "/tmp/digests/${digest#sha256:}"
|
||||||
- name: Upload digest
|
- name: Upload digest
|
||||||
if: steps.check-push.outputs.should-push == 'true'
|
if: steps.check-push.outputs.should-push == 'true'
|
||||||
uses: actions/upload-artifact@v6.0.0
|
uses: actions/upload-artifact@v7.0.0
|
||||||
with:
|
with:
|
||||||
name: digests-${{ matrix.arch }}
|
name: digests-${{ matrix.arch }}
|
||||||
path: /tmp/digests/*
|
path: /tmp/digests/*
|
||||||
@@ -169,7 +168,7 @@ jobs:
|
|||||||
packages: write
|
packages: write
|
||||||
steps:
|
steps:
|
||||||
- name: Download digests
|
- name: Download digests
|
||||||
uses: actions/download-artifact@v7.0.0
|
uses: actions/download-artifact@v8.0.0
|
||||||
with:
|
with:
|
||||||
path: /tmp/digests
|
path: /tmp/digests
|
||||||
pattern: digests-*
|
pattern: digests-*
|
||||||
|
|||||||
23
.github/workflows/ci-docs.yml
vendored
23
.github/workflows/ci-docs.yml
vendored
@@ -21,7 +21,10 @@ on:
|
|||||||
concurrency:
|
concurrency:
|
||||||
group: docs-${{ github.event.pull_request.number || github.ref }}
|
group: docs-${{ github.event.pull_request.number || github.ref }}
|
||||||
cancel-in-progress: true
|
cancel-in-progress: true
|
||||||
permissions: {}
|
permissions:
|
||||||
|
contents: read
|
||||||
|
pages: write
|
||||||
|
id-token: write
|
||||||
env:
|
env:
|
||||||
DEFAULT_UV_VERSION: "0.10.x"
|
DEFAULT_UV_VERSION: "0.10.x"
|
||||||
DEFAULT_PYTHON_VERSION: "3.12"
|
DEFAULT_PYTHON_VERSION: "3.12"
|
||||||
@@ -29,19 +32,17 @@ jobs:
|
|||||||
build:
|
build:
|
||||||
name: Build Documentation
|
name: Build Documentation
|
||||||
runs-on: ubuntu-24.04
|
runs-on: ubuntu-24.04
|
||||||
permissions:
|
|
||||||
contents: read
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/configure-pages@v5
|
- uses: actions/configure-pages@v5.0.0
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v6.0.2
|
||||||
- name: Set up Python
|
- name: Set up Python
|
||||||
id: setup-python
|
id: setup-python
|
||||||
uses: actions/setup-python@v6
|
uses: actions/setup-python@v6.2.0
|
||||||
with:
|
with:
|
||||||
python-version: ${{ env.DEFAULT_PYTHON_VERSION }}
|
python-version: ${{ env.DEFAULT_PYTHON_VERSION }}
|
||||||
- name: Install uv
|
- name: Install uv
|
||||||
uses: astral-sh/setup-uv@v7
|
uses: astral-sh/setup-uv@v7.3.1
|
||||||
with:
|
with:
|
||||||
version: ${{ env.DEFAULT_UV_VERSION }}
|
version: ${{ env.DEFAULT_UV_VERSION }}
|
||||||
enable-cache: true
|
enable-cache: true
|
||||||
@@ -57,7 +58,7 @@ jobs:
|
|||||||
--frozen \
|
--frozen \
|
||||||
zensical build --clean
|
zensical build --clean
|
||||||
- name: Upload GitHub Pages artifact
|
- name: Upload GitHub Pages artifact
|
||||||
uses: actions/upload-pages-artifact@v4
|
uses: actions/upload-pages-artifact@v4.0.0
|
||||||
with:
|
with:
|
||||||
path: site
|
path: site
|
||||||
name: github-pages-${{ github.run_id }}-${{ github.run_attempt }}
|
name: github-pages-${{ github.run_id }}-${{ github.run_attempt }}
|
||||||
@@ -66,16 +67,12 @@ jobs:
|
|||||||
needs: build
|
needs: build
|
||||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
|
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
|
||||||
runs-on: ubuntu-24.04
|
runs-on: ubuntu-24.04
|
||||||
permissions:
|
|
||||||
contents: read
|
|
||||||
pages: write
|
|
||||||
id-token: write
|
|
||||||
environment:
|
environment:
|
||||||
name: github-pages
|
name: github-pages
|
||||||
url: ${{ steps.deployment.outputs.page_url }}
|
url: ${{ steps.deployment.outputs.page_url }}
|
||||||
steps:
|
steps:
|
||||||
- name: Deploy GitHub Pages
|
- name: Deploy GitHub Pages
|
||||||
uses: actions/deploy-pages@v4
|
uses: actions/deploy-pages@v4.0.5
|
||||||
id: deployment
|
id: deployment
|
||||||
with:
|
with:
|
||||||
artifact_name: github-pages-${{ github.run_id }}-${{ github.run_attempt }}
|
artifact_name: github-pages-${{ github.run_id }}-${{ github.run_attempt }}
|
||||||
|
|||||||
55
.github/workflows/ci-frontend.yml
vendored
55
.github/workflows/ci-frontend.yml
vendored
@@ -16,29 +16,26 @@ on:
|
|||||||
concurrency:
|
concurrency:
|
||||||
group: frontend-${{ github.event.pull_request.number || github.ref }}
|
group: frontend-${{ github.event.pull_request.number || github.ref }}
|
||||||
cancel-in-progress: true
|
cancel-in-progress: true
|
||||||
permissions: {}
|
|
||||||
jobs:
|
jobs:
|
||||||
install-dependencies:
|
install-dependencies:
|
||||||
name: Install Dependencies
|
name: Install Dependencies
|
||||||
runs-on: ubuntu-24.04
|
runs-on: ubuntu-24.04
|
||||||
permissions:
|
|
||||||
contents: read
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v6.0.2
|
||||||
- name: Install pnpm
|
- name: Install pnpm
|
||||||
uses: pnpm/action-setup@v4
|
uses: pnpm/action-setup@v4.2.0
|
||||||
with:
|
with:
|
||||||
version: 10
|
version: 10
|
||||||
- name: Use Node.js 24
|
- name: Use Node.js 24
|
||||||
uses: actions/setup-node@v6
|
uses: actions/setup-node@v6.2.0
|
||||||
with:
|
with:
|
||||||
node-version: 24.x
|
node-version: 24.x
|
||||||
cache: 'pnpm'
|
cache: 'pnpm'
|
||||||
cache-dependency-path: 'src-ui/pnpm-lock.yaml'
|
cache-dependency-path: 'src-ui/pnpm-lock.yaml'
|
||||||
- name: Cache frontend dependencies
|
- name: Cache frontend dependencies
|
||||||
id: cache-frontend-deps
|
id: cache-frontend-deps
|
||||||
uses: actions/cache@v5
|
uses: actions/cache@v5.0.3
|
||||||
with:
|
with:
|
||||||
path: |
|
path: |
|
||||||
~/.pnpm-store
|
~/.pnpm-store
|
||||||
@@ -50,23 +47,21 @@ jobs:
|
|||||||
name: Lint
|
name: Lint
|
||||||
needs: install-dependencies
|
needs: install-dependencies
|
||||||
runs-on: ubuntu-24.04
|
runs-on: ubuntu-24.04
|
||||||
permissions:
|
|
||||||
contents: read
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v6.0.2
|
||||||
- name: Install pnpm
|
- name: Install pnpm
|
||||||
uses: pnpm/action-setup@v4
|
uses: pnpm/action-setup@v4.2.0
|
||||||
with:
|
with:
|
||||||
version: 10
|
version: 10
|
||||||
- name: Use Node.js 24
|
- name: Use Node.js 24
|
||||||
uses: actions/setup-node@v6
|
uses: actions/setup-node@v6.2.0
|
||||||
with:
|
with:
|
||||||
node-version: 24.x
|
node-version: 24.x
|
||||||
cache: 'pnpm'
|
cache: 'pnpm'
|
||||||
cache-dependency-path: 'src-ui/pnpm-lock.yaml'
|
cache-dependency-path: 'src-ui/pnpm-lock.yaml'
|
||||||
- name: Cache frontend dependencies
|
- name: Cache frontend dependencies
|
||||||
uses: actions/cache@v5
|
uses: actions/cache@v5.0.3
|
||||||
with:
|
with:
|
||||||
path: |
|
path: |
|
||||||
~/.pnpm-store
|
~/.pnpm-store
|
||||||
@@ -80,8 +75,6 @@ jobs:
|
|||||||
name: "Unit Tests (${{ matrix.shard-index }}/${{ matrix.shard-count }})"
|
name: "Unit Tests (${{ matrix.shard-index }}/${{ matrix.shard-count }})"
|
||||||
needs: install-dependencies
|
needs: install-dependencies
|
||||||
runs-on: ubuntu-24.04
|
runs-on: ubuntu-24.04
|
||||||
permissions:
|
|
||||||
contents: read
|
|
||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
@@ -90,19 +83,19 @@ jobs:
|
|||||||
shard-count: [4]
|
shard-count: [4]
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v6.0.2
|
||||||
- name: Install pnpm
|
- name: Install pnpm
|
||||||
uses: pnpm/action-setup@v4
|
uses: pnpm/action-setup@v4.2.0
|
||||||
with:
|
with:
|
||||||
version: 10
|
version: 10
|
||||||
- name: Use Node.js 24
|
- name: Use Node.js 24
|
||||||
uses: actions/setup-node@v6
|
uses: actions/setup-node@v6.2.0
|
||||||
with:
|
with:
|
||||||
node-version: 24.x
|
node-version: 24.x
|
||||||
cache: 'pnpm'
|
cache: 'pnpm'
|
||||||
cache-dependency-path: 'src-ui/pnpm-lock.yaml'
|
cache-dependency-path: 'src-ui/pnpm-lock.yaml'
|
||||||
- name: Cache frontend dependencies
|
- name: Cache frontend dependencies
|
||||||
uses: actions/cache@v5
|
uses: actions/cache@v5.0.3
|
||||||
with:
|
with:
|
||||||
path: |
|
path: |
|
||||||
~/.pnpm-store
|
~/.pnpm-store
|
||||||
@@ -114,13 +107,13 @@ jobs:
|
|||||||
run: cd src-ui && pnpm run test --max-workers=2 --shard=${{ matrix.shard-index }}/${{ matrix.shard-count }}
|
run: cd src-ui && pnpm run test --max-workers=2 --shard=${{ matrix.shard-index }}/${{ matrix.shard-count }}
|
||||||
- name: Upload test results to Codecov
|
- name: Upload test results to Codecov
|
||||||
if: always()
|
if: always()
|
||||||
uses: codecov/codecov-action@v5
|
uses: codecov/codecov-action@v5.5.2
|
||||||
with:
|
with:
|
||||||
flags: frontend-node-${{ matrix.node-version }}
|
flags: frontend-node-${{ matrix.node-version }}
|
||||||
directory: src-ui/
|
directory: src-ui/
|
||||||
report_type: test_results
|
report_type: test_results
|
||||||
- name: Upload coverage to Codecov
|
- name: Upload coverage to Codecov
|
||||||
uses: codecov/codecov-action@v5
|
uses: codecov/codecov-action@v5.5.2
|
||||||
with:
|
with:
|
||||||
flags: frontend-node-${{ matrix.node-version }}
|
flags: frontend-node-${{ matrix.node-version }}
|
||||||
directory: src-ui/coverage/
|
directory: src-ui/coverage/
|
||||||
@@ -128,8 +121,6 @@ jobs:
|
|||||||
name: "E2E Tests (${{ matrix.shard-index }}/${{ matrix.shard-count }})"
|
name: "E2E Tests (${{ matrix.shard-index }}/${{ matrix.shard-count }})"
|
||||||
needs: install-dependencies
|
needs: install-dependencies
|
||||||
runs-on: ubuntu-24.04
|
runs-on: ubuntu-24.04
|
||||||
permissions:
|
|
||||||
contents: read
|
|
||||||
container: mcr.microsoft.com/playwright:v1.58.2-noble
|
container: mcr.microsoft.com/playwright:v1.58.2-noble
|
||||||
env:
|
env:
|
||||||
PLAYWRIGHT_BROWSERS_PATH: /ms-playwright
|
PLAYWRIGHT_BROWSERS_PATH: /ms-playwright
|
||||||
@@ -142,19 +133,19 @@ jobs:
|
|||||||
shard-count: [2]
|
shard-count: [2]
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v6.0.2
|
||||||
- name: Install pnpm
|
- name: Install pnpm
|
||||||
uses: pnpm/action-setup@v4
|
uses: pnpm/action-setup@v4.2.0
|
||||||
with:
|
with:
|
||||||
version: 10
|
version: 10
|
||||||
- name: Use Node.js 24
|
- name: Use Node.js 24
|
||||||
uses: actions/setup-node@v6
|
uses: actions/setup-node@v6.2.0
|
||||||
with:
|
with:
|
||||||
node-version: 24.x
|
node-version: 24.x
|
||||||
cache: 'pnpm'
|
cache: 'pnpm'
|
||||||
cache-dependency-path: 'src-ui/pnpm-lock.yaml'
|
cache-dependency-path: 'src-ui/pnpm-lock.yaml'
|
||||||
- name: Cache frontend dependencies
|
- name: Cache frontend dependencies
|
||||||
uses: actions/cache@v5
|
uses: actions/cache@v5.0.3
|
||||||
with:
|
with:
|
||||||
path: |
|
path: |
|
||||||
~/.pnpm-store
|
~/.pnpm-store
|
||||||
@@ -170,23 +161,21 @@ jobs:
|
|||||||
name: Bundle Analysis
|
name: Bundle Analysis
|
||||||
needs: [unit-tests, e2e-tests]
|
needs: [unit-tests, e2e-tests]
|
||||||
runs-on: ubuntu-24.04
|
runs-on: ubuntu-24.04
|
||||||
permissions:
|
|
||||||
contents: read
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v6.0.2
|
||||||
- name: Install pnpm
|
- name: Install pnpm
|
||||||
uses: pnpm/action-setup@v4
|
uses: pnpm/action-setup@v4.2.0
|
||||||
with:
|
with:
|
||||||
version: 10
|
version: 10
|
||||||
- name: Use Node.js 24
|
- name: Use Node.js 24
|
||||||
uses: actions/setup-node@v6
|
uses: actions/setup-node@v6.2.0
|
||||||
with:
|
with:
|
||||||
node-version: 24.x
|
node-version: 24.x
|
||||||
cache: 'pnpm'
|
cache: 'pnpm'
|
||||||
cache-dependency-path: 'src-ui/pnpm-lock.yaml'
|
cache-dependency-path: 'src-ui/pnpm-lock.yaml'
|
||||||
- name: Cache frontend dependencies
|
- name: Cache frontend dependencies
|
||||||
uses: actions/cache@v5
|
uses: actions/cache@v5.0.3
|
||||||
with:
|
with:
|
||||||
path: |
|
path: |
|
||||||
~/.pnpm-store
|
~/.pnpm-store
|
||||||
|
|||||||
3
.github/workflows/ci-lint.yml
vendored
3
.github/workflows/ci-lint.yml
vendored
@@ -9,13 +9,10 @@ on:
|
|||||||
concurrency:
|
concurrency:
|
||||||
group: lint-${{ github.event.pull_request.number || github.ref }}
|
group: lint-${{ github.event.pull_request.number || github.ref }}
|
||||||
cancel-in-progress: true
|
cancel-in-progress: true
|
||||||
permissions: {}
|
|
||||||
jobs:
|
jobs:
|
||||||
lint:
|
lint:
|
||||||
name: Linting via prek
|
name: Linting via prek
|
||||||
runs-on: ubuntu-slim
|
runs-on: ubuntu-slim
|
||||||
permissions:
|
|
||||||
contents: read
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v6.0.2
|
uses: actions/checkout@v6.0.2
|
||||||
|
|||||||
42
.github/workflows/ci-release.yml
vendored
42
.github/workflows/ci-release.yml
vendored
@@ -7,7 +7,6 @@ on:
|
|||||||
concurrency:
|
concurrency:
|
||||||
group: release-${{ github.ref }}
|
group: release-${{ github.ref }}
|
||||||
cancel-in-progress: false
|
cancel-in-progress: false
|
||||||
permissions: {}
|
|
||||||
env:
|
env:
|
||||||
DEFAULT_UV_VERSION: "0.10.x"
|
DEFAULT_UV_VERSION: "0.10.x"
|
||||||
DEFAULT_PYTHON_VERSION: "3.12"
|
DEFAULT_PYTHON_VERSION: "3.12"
|
||||||
@@ -15,10 +14,6 @@ jobs:
|
|||||||
wait-for-docker:
|
wait-for-docker:
|
||||||
name: Wait for Docker Build
|
name: Wait for Docker Build
|
||||||
runs-on: ubuntu-24.04
|
runs-on: ubuntu-24.04
|
||||||
permissions:
|
|
||||||
# lewagon/wait-on-check-action reads workflow check runs
|
|
||||||
actions: read
|
|
||||||
contents: read
|
|
||||||
steps:
|
steps:
|
||||||
- name: Wait for Docker build
|
- name: Wait for Docker build
|
||||||
uses: lewagon/wait-on-check-action@v1.5.0
|
uses: lewagon/wait-on-check-action@v1.5.0
|
||||||
@@ -31,18 +26,16 @@ jobs:
|
|||||||
name: Build Release
|
name: Build Release
|
||||||
needs: wait-for-docker
|
needs: wait-for-docker
|
||||||
runs-on: ubuntu-24.04
|
runs-on: ubuntu-24.04
|
||||||
permissions:
|
|
||||||
contents: read
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v6.0.2
|
||||||
# ---- Frontend Build ----
|
# ---- Frontend Build ----
|
||||||
- name: Install pnpm
|
- name: Install pnpm
|
||||||
uses: pnpm/action-setup@v4
|
uses: pnpm/action-setup@v4.2.0
|
||||||
with:
|
with:
|
||||||
version: 10
|
version: 10
|
||||||
- name: Use Node.js 24
|
- name: Use Node.js 24
|
||||||
uses: actions/setup-node@v6
|
uses: actions/setup-node@v6.2.0
|
||||||
with:
|
with:
|
||||||
node-version: 24.x
|
node-version: 24.x
|
||||||
cache: 'pnpm'
|
cache: 'pnpm'
|
||||||
@@ -54,11 +47,11 @@ jobs:
|
|||||||
# ---- Backend Setup ----
|
# ---- Backend Setup ----
|
||||||
- name: Set up Python
|
- name: Set up Python
|
||||||
id: setup-python
|
id: setup-python
|
||||||
uses: actions/setup-python@v6
|
uses: actions/setup-python@v6.2.0
|
||||||
with:
|
with:
|
||||||
python-version: ${{ env.DEFAULT_PYTHON_VERSION }}
|
python-version: ${{ env.DEFAULT_PYTHON_VERSION }}
|
||||||
- name: Install uv
|
- name: Install uv
|
||||||
uses: astral-sh/setup-uv@v7
|
uses: astral-sh/setup-uv@v7.3.1
|
||||||
with:
|
with:
|
||||||
version: ${{ env.DEFAULT_UV_VERSION }}
|
version: ${{ env.DEFAULT_UV_VERSION }}
|
||||||
enable-cache: true
|
enable-cache: true
|
||||||
@@ -125,7 +118,7 @@ jobs:
|
|||||||
sudo chown -R 1000:1000 paperless-ngx/
|
sudo chown -R 1000:1000 paperless-ngx/
|
||||||
tar -cJf paperless-ngx.tar.xz paperless-ngx/
|
tar -cJf paperless-ngx.tar.xz paperless-ngx/
|
||||||
- name: Upload release artifact
|
- name: Upload release artifact
|
||||||
uses: actions/upload-artifact@v6
|
uses: actions/upload-artifact@v7.0.0
|
||||||
with:
|
with:
|
||||||
name: release
|
name: release
|
||||||
path: dist/paperless-ngx.tar.xz
|
path: dist/paperless-ngx.tar.xz
|
||||||
@@ -134,17 +127,13 @@ jobs:
|
|||||||
name: Publish Release
|
name: Publish Release
|
||||||
needs: build-release
|
needs: build-release
|
||||||
runs-on: ubuntu-24.04
|
runs-on: ubuntu-24.04
|
||||||
permissions:
|
|
||||||
# release-drafter reads PRs to build the changelog and creates/publishes the release
|
|
||||||
contents: write
|
|
||||||
pull-requests: read
|
|
||||||
outputs:
|
outputs:
|
||||||
prerelease: ${{ steps.get-version.outputs.prerelease }}
|
prerelease: ${{ steps.get-version.outputs.prerelease }}
|
||||||
changelog: ${{ steps.create-release.outputs.body }}
|
changelog: ${{ steps.create-release.outputs.body }}
|
||||||
version: ${{ steps.get-version.outputs.version }}
|
version: ${{ steps.get-version.outputs.version }}
|
||||||
steps:
|
steps:
|
||||||
- name: Download release artifact
|
- name: Download release artifact
|
||||||
uses: actions/download-artifact@v7
|
uses: actions/download-artifact@v8.0.0
|
||||||
with:
|
with:
|
||||||
name: release
|
name: release
|
||||||
path: ./
|
path: ./
|
||||||
@@ -159,7 +148,7 @@ jobs:
|
|||||||
fi
|
fi
|
||||||
- name: Create release and changelog
|
- name: Create release and changelog
|
||||||
id: create-release
|
id: create-release
|
||||||
uses: release-drafter/release-drafter@v6
|
uses: release-drafter/release-drafter@v6.2.0
|
||||||
with:
|
with:
|
||||||
name: Paperless-ngx ${{ steps.get-version.outputs.version }}
|
name: Paperless-ngx ${{ steps.get-version.outputs.version }}
|
||||||
tag: ${{ steps.get-version.outputs.version }}
|
tag: ${{ steps.get-version.outputs.version }}
|
||||||
@@ -170,7 +159,7 @@ jobs:
|
|||||||
env:
|
env:
|
||||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
- name: Upload release archive
|
- name: Upload release archive
|
||||||
uses: shogo82148/actions-upload-release-asset@v1
|
uses: shogo82148/actions-upload-release-asset@v1.9.2
|
||||||
with:
|
with:
|
||||||
github_token: ${{ secrets.GITHUB_TOKEN }}
|
github_token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
upload_url: ${{ steps.create-release.outputs.upload_url }}
|
upload_url: ${{ steps.create-release.outputs.upload_url }}
|
||||||
@@ -185,23 +174,18 @@ jobs:
|
|||||||
needs: publish-release
|
needs: publish-release
|
||||||
if: needs.publish-release.outputs.prerelease == 'false'
|
if: needs.publish-release.outputs.prerelease == 'false'
|
||||||
runs-on: ubuntu-24.04
|
runs-on: ubuntu-24.04
|
||||||
permissions:
|
|
||||||
# git push of the changelog branch requires contents: write
|
|
||||||
# github.rest.pulls.create() and github.rest.issues.addLabels() require pull-requests: write
|
|
||||||
contents: write
|
|
||||||
pull-requests: write
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v6.0.2
|
||||||
with:
|
with:
|
||||||
ref: main
|
ref: main
|
||||||
- name: Set up Python
|
- name: Set up Python
|
||||||
id: setup-python
|
id: setup-python
|
||||||
uses: actions/setup-python@v6
|
uses: actions/setup-python@v6.2.0
|
||||||
with:
|
with:
|
||||||
python-version: ${{ env.DEFAULT_PYTHON_VERSION }}
|
python-version: ${{ env.DEFAULT_PYTHON_VERSION }}
|
||||||
- name: Install uv
|
- name: Install uv
|
||||||
uses: astral-sh/setup-uv@v7
|
uses: astral-sh/setup-uv@v7.3.1
|
||||||
with:
|
with:
|
||||||
version: ${{ env.DEFAULT_UV_VERSION }}
|
version: ${{ env.DEFAULT_UV_VERSION }}
|
||||||
enable-cache: true
|
enable-cache: true
|
||||||
@@ -234,7 +218,7 @@ jobs:
|
|||||||
git commit -am "Changelog ${{ needs.publish-release.outputs.version }} - GHA"
|
git commit -am "Changelog ${{ needs.publish-release.outputs.version }} - GHA"
|
||||||
git push origin ${{ needs.publish-release.outputs.version }}-changelog
|
git push origin ${{ needs.publish-release.outputs.version }}-changelog
|
||||||
- name: Create pull request
|
- name: Create pull request
|
||||||
uses: actions/github-script@v8
|
uses: actions/github-script@v8.0.0
|
||||||
with:
|
with:
|
||||||
script: |
|
script: |
|
||||||
const { repo, owner } = context.repo;
|
const { repo, owner } = context.repo;
|
||||||
|
|||||||
1
.github/workflows/cleanup-tags.yml
vendored
1
.github/workflows/cleanup-tags.yml
vendored
@@ -12,7 +12,6 @@ on:
|
|||||||
concurrency:
|
concurrency:
|
||||||
group: registry-tags-cleanup
|
group: registry-tags-cleanup
|
||||||
cancel-in-progress: false
|
cancel-in-progress: false
|
||||||
permissions: {}
|
|
||||||
jobs:
|
jobs:
|
||||||
cleanup-images:
|
cleanup-images:
|
||||||
name: Cleanup Image Tags for ${{ matrix.primary-name }}
|
name: Cleanup Image Tags for ${{ matrix.primary-name }}
|
||||||
|
|||||||
7
.github/workflows/codeql-analysis.yml
vendored
7
.github/workflows/codeql-analysis.yml
vendored
@@ -18,7 +18,6 @@ on:
|
|||||||
branches: [dev]
|
branches: [dev]
|
||||||
schedule:
|
schedule:
|
||||||
- cron: '28 13 * * 5'
|
- cron: '28 13 * * 5'
|
||||||
permissions: {}
|
|
||||||
jobs:
|
jobs:
|
||||||
analyze:
|
analyze:
|
||||||
name: Analyze
|
name: Analyze
|
||||||
@@ -35,10 +34,10 @@ jobs:
|
|||||||
# Learn more about CodeQL language support at https://git.io/codeql-language-support
|
# Learn more about CodeQL language support at https://git.io/codeql-language-support
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout repository
|
- name: Checkout repository
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v6.0.2
|
||||||
# Initializes the CodeQL tools for scanning.
|
# Initializes the CodeQL tools for scanning.
|
||||||
- name: Initialize CodeQL
|
- name: Initialize CodeQL
|
||||||
uses: github/codeql-action/init@v4
|
uses: github/codeql-action/init@v4.32.5
|
||||||
with:
|
with:
|
||||||
languages: ${{ matrix.language }}
|
languages: ${{ matrix.language }}
|
||||||
# If you wish to specify custom queries, you can do so here or in a config file.
|
# If you wish to specify custom queries, you can do so here or in a config file.
|
||||||
@@ -46,4 +45,4 @@ jobs:
|
|||||||
# Prefix the list here with "+" to use these queries and those in the config file.
|
# Prefix the list here with "+" to use these queries and those in the config file.
|
||||||
# queries: ./path/to/local/query, your-org/your-repo/queries@main
|
# queries: ./path/to/local/query, your-org/your-repo/queries@main
|
||||||
- name: Perform CodeQL Analysis
|
- name: Perform CodeQL Analysis
|
||||||
uses: github/codeql-action/analyze@v4
|
uses: github/codeql-action/analyze@v4.32.5
|
||||||
|
|||||||
9
.github/workflows/crowdin.yml
vendored
9
.github/workflows/crowdin.yml
vendored
@@ -6,23 +6,18 @@ on:
|
|||||||
push:
|
push:
|
||||||
paths: ['src/locale/**', 'src-ui/messages.xlf', 'src-ui/src/locale/**']
|
paths: ['src/locale/**', 'src-ui/messages.xlf', 'src-ui/src/locale/**']
|
||||||
branches: [dev]
|
branches: [dev]
|
||||||
permissions: {}
|
|
||||||
jobs:
|
jobs:
|
||||||
synchronize-with-crowdin:
|
synchronize-with-crowdin:
|
||||||
name: Crowdin Sync
|
name: Crowdin Sync
|
||||||
if: github.repository_owner == 'paperless-ngx'
|
if: github.repository_owner == 'paperless-ngx'
|
||||||
runs-on: ubuntu-24.04
|
runs-on: ubuntu-24.04
|
||||||
permissions:
|
|
||||||
# Crowdin action pushes translation branches and creates/updates PRs via GITHUB_TOKEN
|
|
||||||
contents: write
|
|
||||||
pull-requests: write
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v6.0.2
|
||||||
with:
|
with:
|
||||||
token: ${{ secrets.PNGX_BOT_PAT }}
|
token: ${{ secrets.PNGX_BOT_PAT }}
|
||||||
- name: crowdin action
|
- name: crowdin action
|
||||||
uses: crowdin/github-action@v2
|
uses: crowdin/github-action@v2.15.0
|
||||||
with:
|
with:
|
||||||
upload_translations: false
|
upload_translations: false
|
||||||
download_translations: true
|
download_translations: true
|
||||||
|
|||||||
16
.github/workflows/pr-bot.yml
vendored
16
.github/workflows/pr-bot.yml
vendored
@@ -2,19 +2,17 @@ name: PR Bot
|
|||||||
on:
|
on:
|
||||||
pull_request_target:
|
pull_request_target:
|
||||||
types: [opened]
|
types: [opened]
|
||||||
permissions: {}
|
permissions:
|
||||||
|
contents: read
|
||||||
|
pull-requests: write
|
||||||
jobs:
|
jobs:
|
||||||
pr-bot:
|
pr-bot:
|
||||||
name: Automated PR Bot
|
name: Automated PR Bot
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
permissions:
|
|
||||||
# labeler reads file paths; all steps add labels or post comments on PRs
|
|
||||||
contents: read
|
|
||||||
pull-requests: write
|
|
||||||
steps:
|
steps:
|
||||||
- name: Label PR by file path or branch name
|
- name: Label PR by file path or branch name
|
||||||
# see .github/labeler.yml for the labeler config
|
# see .github/labeler.yml for the labeler config
|
||||||
uses: actions/labeler@v6
|
uses: actions/labeler@v6.0.1
|
||||||
with:
|
with:
|
||||||
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
- name: Label by size
|
- name: Label by size
|
||||||
@@ -28,7 +26,7 @@ jobs:
|
|||||||
fail_if_xl: 'false'
|
fail_if_xl: 'false'
|
||||||
excluded_files: /\.lock$/ /\.txt$/ ^src-ui/pnpm-lock\.yaml$ ^src-ui/messages\.xlf$ ^src/locale/en_US/LC_MESSAGES/django\.po$
|
excluded_files: /\.lock$/ /\.txt$/ ^src-ui/pnpm-lock\.yaml$ ^src-ui/messages\.xlf$ ^src/locale/en_US/LC_MESSAGES/django\.po$
|
||||||
- name: Label by PR title
|
- name: Label by PR title
|
||||||
uses: actions/github-script@v8
|
uses: actions/github-script@v8.0.0
|
||||||
with:
|
with:
|
||||||
script: |
|
script: |
|
||||||
const pr = context.payload.pull_request;
|
const pr = context.payload.pull_request;
|
||||||
@@ -54,7 +52,7 @@ jobs:
|
|||||||
}
|
}
|
||||||
- name: Label bot-generated PRs
|
- name: Label bot-generated PRs
|
||||||
if: ${{ contains(github.actor, 'dependabot') || contains(github.actor, 'crowdin-bot') }}
|
if: ${{ contains(github.actor, 'dependabot') || contains(github.actor, 'crowdin-bot') }}
|
||||||
uses: actions/github-script@v8
|
uses: actions/github-script@v8.0.0
|
||||||
with:
|
with:
|
||||||
script: |
|
script: |
|
||||||
const pr = context.payload.pull_request;
|
const pr = context.payload.pull_request;
|
||||||
@@ -79,7 +77,7 @@ jobs:
|
|||||||
}
|
}
|
||||||
- name: Welcome comment
|
- name: Welcome comment
|
||||||
if: ${{ !contains(github.actor, 'bot') }}
|
if: ${{ !contains(github.actor, 'bot') }}
|
||||||
uses: actions/github-script@v8
|
uses: actions/github-script@v8.0.0
|
||||||
with:
|
with:
|
||||||
script: |
|
script: |
|
||||||
const pr = context.payload.pull_request;
|
const pr = context.payload.pull_request;
|
||||||
|
|||||||
7
.github/workflows/project-actions.yml
vendored
7
.github/workflows/project-actions.yml
vendored
@@ -7,19 +7,18 @@ on:
|
|||||||
branches:
|
branches:
|
||||||
- main
|
- main
|
||||||
- dev
|
- dev
|
||||||
permissions: {}
|
permissions:
|
||||||
|
contents: read
|
||||||
jobs:
|
jobs:
|
||||||
pr_opened_or_reopened:
|
pr_opened_or_reopened:
|
||||||
name: pr_opened_or_reopened
|
name: pr_opened_or_reopened
|
||||||
runs-on: ubuntu-24.04
|
runs-on: ubuntu-24.04
|
||||||
permissions:
|
permissions:
|
||||||
# release-drafter reads its config file from the repo
|
|
||||||
contents: read
|
|
||||||
# write permission is required for autolabeler
|
# write permission is required for autolabeler
|
||||||
pull-requests: write
|
pull-requests: write
|
||||||
if: github.event_name == 'pull_request_target' && (github.event.action == 'opened' || github.event.action == 'reopened') && github.event.pull_request.user.login != 'dependabot'
|
if: github.event_name == 'pull_request_target' && (github.event.action == 'opened' || github.event.action == 'reopened') && github.event.pull_request.user.login != 'dependabot'
|
||||||
steps:
|
steps:
|
||||||
- name: Label PR with release-drafter
|
- name: Label PR with release-drafter
|
||||||
uses: release-drafter/release-drafter@v6
|
uses: release-drafter/release-drafter@v6.2.0
|
||||||
env:
|
env:
|
||||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
|||||||
28
.github/workflows/repo-maintenance.yml
vendored
28
.github/workflows/repo-maintenance.yml
vendored
@@ -3,7 +3,10 @@ on:
|
|||||||
schedule:
|
schedule:
|
||||||
- cron: '0 3 * * *'
|
- cron: '0 3 * * *'
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
permissions: {}
|
permissions:
|
||||||
|
issues: write
|
||||||
|
pull-requests: write
|
||||||
|
discussions: write
|
||||||
concurrency:
|
concurrency:
|
||||||
group: lock
|
group: lock
|
||||||
jobs:
|
jobs:
|
||||||
@@ -11,11 +14,8 @@ jobs:
|
|||||||
name: 'Stale'
|
name: 'Stale'
|
||||||
if: github.repository_owner == 'paperless-ngx'
|
if: github.repository_owner == 'paperless-ngx'
|
||||||
runs-on: ubuntu-24.04
|
runs-on: ubuntu-24.04
|
||||||
permissions:
|
|
||||||
issues: write
|
|
||||||
pull-requests: write
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/stale@v10
|
- uses: actions/stale@v10.2.0
|
||||||
with:
|
with:
|
||||||
days-before-stale: 7
|
days-before-stale: 7
|
||||||
days-before-close: 14
|
days-before-close: 14
|
||||||
@@ -36,12 +36,8 @@ jobs:
|
|||||||
name: 'Lock Old Threads'
|
name: 'Lock Old Threads'
|
||||||
if: github.repository_owner == 'paperless-ngx'
|
if: github.repository_owner == 'paperless-ngx'
|
||||||
runs-on: ubuntu-24.04
|
runs-on: ubuntu-24.04
|
||||||
permissions:
|
|
||||||
issues: write
|
|
||||||
pull-requests: write
|
|
||||||
discussions: write
|
|
||||||
steps:
|
steps:
|
||||||
- uses: dessant/lock-threads@v6
|
- uses: dessant/lock-threads@v6.0.0
|
||||||
with:
|
with:
|
||||||
issue-inactive-days: '30'
|
issue-inactive-days: '30'
|
||||||
pr-inactive-days: '30'
|
pr-inactive-days: '30'
|
||||||
@@ -60,10 +56,8 @@ jobs:
|
|||||||
name: 'Close Answered Discussions'
|
name: 'Close Answered Discussions'
|
||||||
if: github.repository_owner == 'paperless-ngx'
|
if: github.repository_owner == 'paperless-ngx'
|
||||||
runs-on: ubuntu-24.04
|
runs-on: ubuntu-24.04
|
||||||
permissions:
|
|
||||||
discussions: write
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/github-script@v8
|
- uses: actions/github-script@v8.0.0
|
||||||
with:
|
with:
|
||||||
script: |
|
script: |
|
||||||
function sleep(ms) {
|
function sleep(ms) {
|
||||||
@@ -119,10 +113,8 @@ jobs:
|
|||||||
name: 'Close Outdated Discussions'
|
name: 'Close Outdated Discussions'
|
||||||
if: github.repository_owner == 'paperless-ngx'
|
if: github.repository_owner == 'paperless-ngx'
|
||||||
runs-on: ubuntu-24.04
|
runs-on: ubuntu-24.04
|
||||||
permissions:
|
|
||||||
discussions: write
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/github-script@v8
|
- uses: actions/github-script@v8.0.0
|
||||||
with:
|
with:
|
||||||
script: |
|
script: |
|
||||||
function sleep(ms) {
|
function sleep(ms) {
|
||||||
@@ -213,10 +205,8 @@ jobs:
|
|||||||
name: 'Close Unsupported Feature Requests'
|
name: 'Close Unsupported Feature Requests'
|
||||||
if: github.repository_owner == 'paperless-ngx'
|
if: github.repository_owner == 'paperless-ngx'
|
||||||
runs-on: ubuntu-24.04
|
runs-on: ubuntu-24.04
|
||||||
permissions:
|
|
||||||
discussions: write
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/github-script@v8
|
- uses: actions/github-script@v8.0.0
|
||||||
with:
|
with:
|
||||||
script: |
|
script: |
|
||||||
function sleep(ms) {
|
function sleep(ms) {
|
||||||
|
|||||||
15
.github/workflows/translate-strings.yml
vendored
15
.github/workflows/translate-strings.yml
vendored
@@ -3,7 +3,6 @@ on:
|
|||||||
push:
|
push:
|
||||||
branches:
|
branches:
|
||||||
- dev
|
- dev
|
||||||
permissions: {}
|
|
||||||
jobs:
|
jobs:
|
||||||
generate-translate-strings:
|
generate-translate-strings:
|
||||||
name: Generate Translation Strings
|
name: Generate Translation Strings
|
||||||
@@ -12,7 +11,7 @@ jobs:
|
|||||||
contents: write
|
contents: write
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout code
|
- name: Checkout code
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v6.0.2
|
||||||
env:
|
env:
|
||||||
GH_REF: ${{ github.ref }} # sonar rule:githubactions:S7630 - avoid injection
|
GH_REF: ${{ github.ref }} # sonar rule:githubactions:S7630 - avoid injection
|
||||||
with:
|
with:
|
||||||
@@ -20,13 +19,13 @@ jobs:
|
|||||||
ref: ${{ env.GH_REF }}
|
ref: ${{ env.GH_REF }}
|
||||||
- name: Set up Python
|
- name: Set up Python
|
||||||
id: setup-python
|
id: setup-python
|
||||||
uses: actions/setup-python@v6
|
uses: actions/setup-python@v6.2.0
|
||||||
- name: Install system dependencies
|
- name: Install system dependencies
|
||||||
run: |
|
run: |
|
||||||
sudo apt-get update -qq
|
sudo apt-get update -qq
|
||||||
sudo apt-get install -qq --no-install-recommends gettext
|
sudo apt-get install -qq --no-install-recommends gettext
|
||||||
- name: Install uv
|
- name: Install uv
|
||||||
uses: astral-sh/setup-uv@v7
|
uses: astral-sh/setup-uv@v7.3.1
|
||||||
with:
|
with:
|
||||||
enable-cache: true
|
enable-cache: true
|
||||||
- name: Install backend python dependencies
|
- name: Install backend python dependencies
|
||||||
@@ -37,18 +36,18 @@ jobs:
|
|||||||
- name: Generate backend translation strings
|
- name: Generate backend translation strings
|
||||||
run: cd src/ && uv run manage.py makemessages -l en_US -i "samples*"
|
run: cd src/ && uv run manage.py makemessages -l en_US -i "samples*"
|
||||||
- name: Install pnpm
|
- name: Install pnpm
|
||||||
uses: pnpm/action-setup@v4
|
uses: pnpm/action-setup@v4.2.0
|
||||||
with:
|
with:
|
||||||
version: 10
|
version: 10
|
||||||
- name: Use Node.js 24
|
- name: Use Node.js 24
|
||||||
uses: actions/setup-node@v6
|
uses: actions/setup-node@v6.2.0
|
||||||
with:
|
with:
|
||||||
node-version: 24.x
|
node-version: 24.x
|
||||||
cache: 'pnpm'
|
cache: 'pnpm'
|
||||||
cache-dependency-path: 'src-ui/pnpm-lock.yaml'
|
cache-dependency-path: 'src-ui/pnpm-lock.yaml'
|
||||||
- name: Cache frontend dependencies
|
- name: Cache frontend dependencies
|
||||||
id: cache-frontend-deps
|
id: cache-frontend-deps
|
||||||
uses: actions/cache@v5
|
uses: actions/cache@v5.0.3
|
||||||
with:
|
with:
|
||||||
path: |
|
path: |
|
||||||
~/.pnpm-store
|
~/.pnpm-store
|
||||||
@@ -64,7 +63,7 @@ jobs:
|
|||||||
cd src-ui
|
cd src-ui
|
||||||
pnpm run ng extract-i18n
|
pnpm run ng extract-i18n
|
||||||
- name: Commit changes
|
- name: Commit changes
|
||||||
uses: stefanzweifel/git-auto-commit-action@v7
|
uses: stefanzweifel/git-auto-commit-action@v7.1.0
|
||||||
with:
|
with:
|
||||||
file_pattern: 'src-ui/messages.xlf src/locale/en_US/LC_MESSAGES/django.po'
|
file_pattern: 'src-ui/messages.xlf src/locale/en_US/LC_MESSAGES/django.po'
|
||||||
commit_message: "Auto translate strings"
|
commit_message: "Auto translate strings"
|
||||||
|
|||||||
@@ -13,7 +13,9 @@ If you want to implement something big:
|
|||||||
|
|
||||||
## Python
|
## Python
|
||||||
|
|
||||||
Paperless supports python 3.10 - 3.12 at this time. We format Python code with [ruff](https://docs.astral.sh/ruff/formatter/).
|
Paperless-ngx currently supports Python 3.11, 3.12, 3.13, and 3.14. As a policy, we aim to support at least the three most recent Python versions, and drop support for versions as they reach end-of-life. Older versions may be supported if dependencies permit, but this is not guaranteed.
|
||||||
|
|
||||||
|
We format Python code with [ruff](https://docs.astral.sh/ruff/formatter/).
|
||||||
|
|
||||||
## Branches
|
## Branches
|
||||||
|
|
||||||
|
|||||||
@@ -30,7 +30,7 @@ RUN set -eux \
|
|||||||
# Purpose: Installs s6-overlay and rootfs
|
# Purpose: Installs s6-overlay and rootfs
|
||||||
# Comments:
|
# Comments:
|
||||||
# - Don't leave anything extra in here either
|
# - Don't leave anything extra in here either
|
||||||
FROM ghcr.io/astral-sh/uv:0.10.5-python3.12-trixie-slim AS s6-overlay-base
|
FROM ghcr.io/astral-sh/uv:0.10.7-python3.12-trixie-slim AS s6-overlay-base
|
||||||
|
|
||||||
WORKDIR /usr/src/s6
|
WORKDIR /usr/src/s6
|
||||||
|
|
||||||
@@ -45,7 +45,7 @@ ENV \
|
|||||||
ARG TARGETARCH
|
ARG TARGETARCH
|
||||||
ARG TARGETVARIANT
|
ARG TARGETVARIANT
|
||||||
# Lock this version
|
# Lock this version
|
||||||
ARG S6_OVERLAY_VERSION=3.2.1.0
|
ARG S6_OVERLAY_VERSION=3.2.2.0
|
||||||
|
|
||||||
ARG S6_BUILD_TIME_PKGS="curl \
|
ARG S6_BUILD_TIME_PKGS="curl \
|
||||||
xz-utils"
|
xz-utils"
|
||||||
|
|||||||
@@ -4,7 +4,7 @@
|
|||||||
# correct networking for the tests
|
# correct networking for the tests
|
||||||
services:
|
services:
|
||||||
gotenberg:
|
gotenberg:
|
||||||
image: docker.io/gotenberg/gotenberg:8.26
|
image: docker.io/gotenberg/gotenberg:8.27
|
||||||
hostname: gotenberg
|
hostname: gotenberg
|
||||||
container_name: gotenberg
|
container_name: gotenberg
|
||||||
network_mode: host
|
network_mode: host
|
||||||
|
|||||||
@@ -72,7 +72,7 @@ services:
|
|||||||
PAPERLESS_TIKA_GOTENBERG_ENDPOINT: http://gotenberg:3000
|
PAPERLESS_TIKA_GOTENBERG_ENDPOINT: http://gotenberg:3000
|
||||||
PAPERLESS_TIKA_ENDPOINT: http://tika:9998
|
PAPERLESS_TIKA_ENDPOINT: http://tika:9998
|
||||||
gotenberg:
|
gotenberg:
|
||||||
image: docker.io/gotenberg/gotenberg:8.26
|
image: docker.io/gotenberg/gotenberg:8.27
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
# The gotenberg chromium route is used to convert .eml files. We do not
|
# The gotenberg chromium route is used to convert .eml files. We do not
|
||||||
# want to allow external content like tracking pixels or even javascript.
|
# want to allow external content like tracking pixels or even javascript.
|
||||||
|
|||||||
@@ -66,7 +66,7 @@ services:
|
|||||||
PAPERLESS_TIKA_GOTENBERG_ENDPOINT: http://gotenberg:3000
|
PAPERLESS_TIKA_GOTENBERG_ENDPOINT: http://gotenberg:3000
|
||||||
PAPERLESS_TIKA_ENDPOINT: http://tika:9998
|
PAPERLESS_TIKA_ENDPOINT: http://tika:9998
|
||||||
gotenberg:
|
gotenberg:
|
||||||
image: docker.io/gotenberg/gotenberg:8.26
|
image: docker.io/gotenberg/gotenberg:8.27
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
# The gotenberg chromium route is used to convert .eml files. We do not
|
# The gotenberg chromium route is used to convert .eml files. We do not
|
||||||
# want to allow external content like tracking pixels or even javascript.
|
# want to allow external content like tracking pixels or even javascript.
|
||||||
|
|||||||
@@ -55,7 +55,7 @@ services:
|
|||||||
PAPERLESS_TIKA_GOTENBERG_ENDPOINT: http://gotenberg:3000
|
PAPERLESS_TIKA_GOTENBERG_ENDPOINT: http://gotenberg:3000
|
||||||
PAPERLESS_TIKA_ENDPOINT: http://tika:9998
|
PAPERLESS_TIKA_ENDPOINT: http://tika:9998
|
||||||
gotenberg:
|
gotenberg:
|
||||||
image: docker.io/gotenberg/gotenberg:8.26
|
image: docker.io/gotenberg/gotenberg:8.27
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
# The gotenberg chromium route is used to convert .eml files. We do not
|
# The gotenberg chromium route is used to convert .eml files. We do not
|
||||||
# want to allow external content like tracking pixels or even javascript.
|
# want to allow external content like tracking pixels or even javascript.
|
||||||
|
|||||||
BIN
docs/assets/logo_full_black.png
Normal file
BIN
docs/assets/logo_full_black.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 57 KiB |
BIN
docs/assets/logo_full_white.png
Normal file
BIN
docs/assets/logo_full_white.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 61 KiB |
@@ -172,7 +172,7 @@ to enable polling and disable inotify. See [here](configuration.md#polling).
|
|||||||
#### Prerequisites
|
#### Prerequisites
|
||||||
|
|
||||||
- Paperless runs on Linux only, Windows is not supported.
|
- Paperless runs on Linux only, Windows is not supported.
|
||||||
- Python 3 is required with versions 3.10 - 3.12 currently supported. Newer versions may work, but some dependencies may not be fully compatible.
|
- Python 3.11, 3.12, 3.13, or 3.14 is required. As a policy, Paperless-ngx aims to support at least the three most recent Python versions and drops support for versions as they reach end-of-life. Newer versions may work, but some dependencies may not be fully compatible.
|
||||||
|
|
||||||
#### Installation
|
#### Installation
|
||||||
|
|
||||||
|
|||||||
@@ -616,7 +616,7 @@ applied. You can use the following placeholders in the template with any trigger
|
|||||||
- `{{added_day}}`: added day
|
- `{{added_day}}`: added day
|
||||||
- `{{added_time}}`: added time in HH:MM format
|
- `{{added_time}}`: added time in HH:MM format
|
||||||
- `{{original_filename}}`: original file name without extension
|
- `{{original_filename}}`: original file name without extension
|
||||||
- `{{filename}}`: current file name without extension
|
- `{{filename}}`: current file name without extension (for "added" workflows this may not be final yet, you can use `{{original_filename}}`)
|
||||||
- `{{doc_title}}`: current document title (cannot be used in title assignment)
|
- `{{doc_title}}`: current document title (cannot be used in title assignment)
|
||||||
|
|
||||||
The following placeholders are only available for "added" or "updated" triggers
|
The following placeholders are only available for "added" or "updated" triggers
|
||||||
|
|||||||
@@ -3,10 +3,9 @@ name = "paperless-ngx"
|
|||||||
version = "2.20.9"
|
version = "2.20.9"
|
||||||
description = "A community-supported supercharged document management system: scan, index and archive all your physical documents"
|
description = "A community-supported supercharged document management system: scan, index and archive all your physical documents"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
requires-python = ">=3.10"
|
requires-python = ">=3.11"
|
||||||
classifiers = [
|
classifiers = [
|
||||||
"Programming Language :: Python :: 3 :: Only",
|
"Programming Language :: Python :: 3 :: Only",
|
||||||
"Programming Language :: Python :: 3.10",
|
|
||||||
"Programming Language :: Python :: 3.11",
|
"Programming Language :: Python :: 3.11",
|
||||||
"Programming Language :: Python :: 3.12",
|
"Programming Language :: Python :: 3.12",
|
||||||
"Programming Language :: Python :: 3.13",
|
"Programming Language :: Python :: 3.13",
|
||||||
@@ -111,6 +110,7 @@ docs = [
|
|||||||
testing = [
|
testing = [
|
||||||
"daphne",
|
"daphne",
|
||||||
"factory-boy~=3.3.1",
|
"factory-boy~=3.3.1",
|
||||||
|
"faker~=40.5.1",
|
||||||
"imagehash",
|
"imagehash",
|
||||||
"pytest~=9.0.0",
|
"pytest~=9.0.0",
|
||||||
"pytest-cov~=7.0.0",
|
"pytest-cov~=7.0.0",
|
||||||
@@ -176,7 +176,7 @@ torch = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
[tool.ruff]
|
[tool.ruff]
|
||||||
target-version = "py310"
|
target-version = "py311"
|
||||||
line-length = 88
|
line-length = 88
|
||||||
src = [
|
src = [
|
||||||
"src",
|
"src",
|
||||||
|
|||||||
@@ -1238,8 +1238,8 @@
|
|||||||
<context context-type="linenumber">82</context>
|
<context context-type="linenumber">82</context>
|
||||||
</context-group>
|
</context-group>
|
||||||
</trans-unit>
|
</trans-unit>
|
||||||
<trans-unit id="8035757452478567832" datatype="html">
|
<trans-unit id="7860582931776068318" datatype="html">
|
||||||
<source>Update existing document</source>
|
<source>Add document version</source>
|
||||||
<context-group purpose="location">
|
<context-group purpose="location">
|
||||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||||
<context context-type="linenumber">280</context>
|
<context context-type="linenumber">280</context>
|
||||||
@@ -8411,8 +8411,8 @@
|
|||||||
<context context-type="linenumber">832</context>
|
<context context-type="linenumber">832</context>
|
||||||
</context-group>
|
</context-group>
|
||||||
</trans-unit>
|
</trans-unit>
|
||||||
<trans-unit id="6390006284731990222" datatype="html">
|
<trans-unit id="5203024009814367559" datatype="html">
|
||||||
<source>This operation will permanently rotate the original version of <x id="PH" equiv-text="this.list.selected.size"/> document(s).</source>
|
<source>This operation will add rotated versions of the <x id="PH" equiv-text="this.list.selected.size"/> document(s).</source>
|
||||||
<context-group purpose="location">
|
<context-group purpose="location">
|
||||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||||
<context context-type="linenumber">833</context>
|
<context context-type="linenumber">833</context>
|
||||||
|
|||||||
@@ -277,7 +277,7 @@
|
|||||||
<div class="col">
|
<div class="col">
|
||||||
<select class="form-select" formControlName="pdfEditorDefaultEditMode">
|
<select class="form-select" formControlName="pdfEditorDefaultEditMode">
|
||||||
<option [ngValue]="PdfEditorEditMode.Create" i18n>Create new document(s)</option>
|
<option [ngValue]="PdfEditorEditMode.Create" i18n>Create new document(s)</option>
|
||||||
<option [ngValue]="PdfEditorEditMode.Update" i18n>Update existing document</option>
|
<option [ngValue]="PdfEditorEditMode.Update" i18n>Add document version</option>
|
||||||
</select>
|
</select>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@@ -84,7 +84,7 @@
|
|||||||
<input type="radio" class="btn-check" [(ngModel)]="editMode" [value]="PdfEditorEditMode.Update" id="editModeUpdate" name="editmode" [disabled]="hasSplit()">
|
<input type="radio" class="btn-check" [(ngModel)]="editMode" [value]="PdfEditorEditMode.Update" id="editModeUpdate" name="editmode" [disabled]="hasSplit()">
|
||||||
<label for="editModeUpdate" class="btn btn-outline-primary btn-sm">
|
<label for="editModeUpdate" class="btn btn-outline-primary btn-sm">
|
||||||
<i-bs name="pencil"></i-bs>
|
<i-bs name="pencil"></i-bs>
|
||||||
<span class="form-check-label ms-2" i18n>Update existing document</span>
|
<span class="form-check-label ms-2" i18n>Add document version</span>
|
||||||
</label>
|
</label>
|
||||||
</div>
|
</div>
|
||||||
@if (editMode === PdfEditorEditMode.Create) {
|
@if (editMode === PdfEditorEditMode.Create) {
|
||||||
|
|||||||
@@ -830,7 +830,7 @@ export class BulkEditorComponent
|
|||||||
})
|
})
|
||||||
const rotateDialog = modal.componentInstance as RotateConfirmDialogComponent
|
const rotateDialog = modal.componentInstance as RotateConfirmDialogComponent
|
||||||
rotateDialog.title = $localize`Rotate confirm`
|
rotateDialog.title = $localize`Rotate confirm`
|
||||||
rotateDialog.messageBold = $localize`This operation will permanently rotate the original version of ${this.list.selected.size} document(s).`
|
rotateDialog.messageBold = $localize`This operation will add rotated versions of the ${this.list.selected.size} document(s).`
|
||||||
rotateDialog.btnClass = 'btn-danger'
|
rotateDialog.btnClass = 'btn-danger'
|
||||||
rotateDialog.btnCaption = $localize`Proceed`
|
rotateDialog.btnCaption = $localize`Proceed`
|
||||||
rotateDialog.documentID = Array.from(this.list.selected)[0]
|
rotateDialog.documentID = Array.from(this.list.selected)[0]
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
|
from datetime import UTC
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from datetime import timezone
|
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
@@ -139,7 +139,7 @@ def thumbnail_last_modified(request: Any, pk: int) -> datetime | None:
|
|||||||
# No cache, get the timestamp and cache the datetime
|
# No cache, get the timestamp and cache the datetime
|
||||||
last_modified = datetime.fromtimestamp(
|
last_modified = datetime.fromtimestamp(
|
||||||
doc.thumbnail_path.stat().st_mtime,
|
doc.thumbnail_path.stat().st_mtime,
|
||||||
tz=timezone.utc,
|
tz=UTC,
|
||||||
)
|
)
|
||||||
cache.set(doc_key, last_modified, CACHE_50_MINUTES)
|
cache.set(doc_key, last_modified, CACHE_50_MINUTES)
|
||||||
return last_modified
|
return last_modified
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ import datetime
|
|||||||
import hashlib
|
import hashlib
|
||||||
import os
|
import os
|
||||||
import tempfile
|
import tempfile
|
||||||
from enum import Enum
|
from enum import StrEnum
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
from typing import Final
|
from typing import Final
|
||||||
@@ -81,7 +81,7 @@ class ConsumerError(Exception):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class ConsumerStatusShortMessage(str, Enum):
|
class ConsumerStatusShortMessage(StrEnum):
|
||||||
DOCUMENT_ALREADY_EXISTS = "document_already_exists"
|
DOCUMENT_ALREADY_EXISTS = "document_already_exists"
|
||||||
DOCUMENT_ALREADY_EXISTS_IN_TRASH = "document_already_exists_in_trash"
|
DOCUMENT_ALREADY_EXISTS_IN_TRASH = "document_already_exists_in_trash"
|
||||||
ASN_ALREADY_EXISTS = "asn_already_exists"
|
ASN_ALREADY_EXISTS = "asn_already_exists"
|
||||||
|
|||||||
@@ -5,10 +5,10 @@ import math
|
|||||||
import re
|
import re
|
||||||
from collections import Counter
|
from collections import Counter
|
||||||
from contextlib import contextmanager
|
from contextlib import contextmanager
|
||||||
|
from datetime import UTC
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from datetime import time
|
from datetime import time
|
||||||
from datetime import timedelta
|
from datetime import timedelta
|
||||||
from datetime import timezone
|
|
||||||
from shutil import rmtree
|
from shutil import rmtree
|
||||||
from time import sleep
|
from time import sleep
|
||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
@@ -437,7 +437,7 @@ class ManualResults:
|
|||||||
class LocalDateParser(English):
|
class LocalDateParser(English):
|
||||||
def reverse_timezone_offset(self, d):
|
def reverse_timezone_offset(self, d):
|
||||||
return (d.replace(tzinfo=django_timezone.get_current_timezone())).astimezone(
|
return (d.replace(tzinfo=django_timezone.get_current_timezone())).astimezone(
|
||||||
timezone.utc,
|
UTC,
|
||||||
)
|
)
|
||||||
|
|
||||||
def date_from(self, *args, **kwargs):
|
def date_from(self, *args, **kwargs):
|
||||||
@@ -641,8 +641,8 @@ def rewrite_natural_date_keywords(query_string: str) -> str:
|
|||||||
end = datetime(local_now.year - 1, 12, 31, 23, 59, 59, tzinfo=tz)
|
end = datetime(local_now.year - 1, 12, 31, 23, 59, 59, tzinfo=tz)
|
||||||
|
|
||||||
# Convert to UTC and format
|
# Convert to UTC and format
|
||||||
start_str = start.astimezone(timezone.utc).strftime("%Y%m%d%H%M%S")
|
start_str = start.astimezone(UTC).strftime("%Y%m%d%H%M%S")
|
||||||
end_str = end.astimezone(timezone.utc).strftime("%Y%m%d%H%M%S")
|
end_str = end.astimezone(UTC).strftime("%Y%m%d%H%M%S")
|
||||||
return f"{field}:[{start_str} TO {end_str}]"
|
return f"{field}:[{start_str} TO {end_str}]"
|
||||||
|
|
||||||
return re.sub(pattern, repl, query_string, flags=re.IGNORECASE)
|
return re.sub(pattern, repl, query_string, flags=re.IGNORECASE)
|
||||||
|
|||||||
@@ -6,11 +6,14 @@ Provides automatic progress bar and multiprocessing support with minimal boilerp
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
import os
|
import os
|
||||||
|
from collections.abc import Callable
|
||||||
from collections.abc import Iterable
|
from collections.abc import Iterable
|
||||||
from collections.abc import Sized
|
from collections.abc import Sized
|
||||||
from concurrent.futures import ProcessPoolExecutor
|
from concurrent.futures import ProcessPoolExecutor
|
||||||
from concurrent.futures import as_completed
|
from concurrent.futures import as_completed
|
||||||
|
from contextlib import contextmanager
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
from typing import Any
|
from typing import Any
|
||||||
@@ -22,7 +25,11 @@ from django import db
|
|||||||
from django.core.management import CommandError
|
from django.core.management import CommandError
|
||||||
from django.db.models import QuerySet
|
from django.db.models import QuerySet
|
||||||
from django_rich.management import RichCommand
|
from django_rich.management import RichCommand
|
||||||
|
from rich import box
|
||||||
from rich.console import Console
|
from rich.console import Console
|
||||||
|
from rich.console import Group
|
||||||
|
from rich.console import RenderableType
|
||||||
|
from rich.live import Live
|
||||||
from rich.progress import BarColumn
|
from rich.progress import BarColumn
|
||||||
from rich.progress import MofNCompleteColumn
|
from rich.progress import MofNCompleteColumn
|
||||||
from rich.progress import Progress
|
from rich.progress import Progress
|
||||||
@@ -30,11 +37,11 @@ from rich.progress import SpinnerColumn
|
|||||||
from rich.progress import TextColumn
|
from rich.progress import TextColumn
|
||||||
from rich.progress import TimeElapsedColumn
|
from rich.progress import TimeElapsedColumn
|
||||||
from rich.progress import TimeRemainingColumn
|
from rich.progress import TimeRemainingColumn
|
||||||
|
from rich.table import Table
|
||||||
|
from rich.text import Text
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from collections.abc import Callable
|
|
||||||
from collections.abc import Generator
|
from collections.abc import Generator
|
||||||
from collections.abc import Iterable
|
|
||||||
from collections.abc import Sequence
|
from collections.abc import Sequence
|
||||||
|
|
||||||
from django.core.management import CommandParser
|
from django.core.management import CommandParser
|
||||||
@@ -43,6 +50,78 @@ T = TypeVar("T")
|
|||||||
R = TypeVar("R")
|
R = TypeVar("R")
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(slots=True, frozen=True)
|
||||||
|
class _BufferedRecord:
|
||||||
|
level: int
|
||||||
|
name: str
|
||||||
|
message: str
|
||||||
|
|
||||||
|
|
||||||
|
class BufferingLogHandler(logging.Handler):
|
||||||
|
"""Captures log records during a command run for deferred rendering.
|
||||||
|
|
||||||
|
Attach to a logger before a long operation and call ``render()``
|
||||||
|
afterwards to emit the buffered records via Rich, optionally filtered
|
||||||
|
by minimum level.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self) -> None:
|
||||||
|
super().__init__()
|
||||||
|
self._records: list[_BufferedRecord] = []
|
||||||
|
|
||||||
|
def emit(self, record: logging.LogRecord) -> None:
|
||||||
|
self._records.append(
|
||||||
|
_BufferedRecord(
|
||||||
|
level=record.levelno,
|
||||||
|
name=record.name,
|
||||||
|
message=self.format(record),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
def render(
|
||||||
|
self,
|
||||||
|
console: Console,
|
||||||
|
*,
|
||||||
|
min_level: int = logging.DEBUG,
|
||||||
|
title: str = "Log Output",
|
||||||
|
) -> None:
|
||||||
|
records = [r for r in self._records if r.level >= min_level]
|
||||||
|
if not records:
|
||||||
|
return
|
||||||
|
|
||||||
|
table = Table(
|
||||||
|
title=title,
|
||||||
|
show_header=True,
|
||||||
|
header_style="bold",
|
||||||
|
show_lines=False,
|
||||||
|
box=box.SIMPLE,
|
||||||
|
)
|
||||||
|
table.add_column("Level", style="bold", width=8)
|
||||||
|
table.add_column("Logger", style="dim")
|
||||||
|
table.add_column("Message", no_wrap=False)
|
||||||
|
|
||||||
|
_level_styles: dict[int, str] = {
|
||||||
|
logging.DEBUG: "dim",
|
||||||
|
logging.INFO: "cyan",
|
||||||
|
logging.WARNING: "yellow",
|
||||||
|
logging.ERROR: "red",
|
||||||
|
logging.CRITICAL: "bold red",
|
||||||
|
}
|
||||||
|
|
||||||
|
for record in records:
|
||||||
|
style = _level_styles.get(record.level, "")
|
||||||
|
table.add_row(
|
||||||
|
Text(logging.getLevelName(record.level), style=style),
|
||||||
|
record.name,
|
||||||
|
record.message,
|
||||||
|
)
|
||||||
|
|
||||||
|
console.print(table)
|
||||||
|
|
||||||
|
def clear(self) -> None:
|
||||||
|
self._records.clear()
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True, slots=True)
|
@dataclass(frozen=True, slots=True)
|
||||||
class ProcessResult(Generic[T, R]):
|
class ProcessResult(Generic[T, R]):
|
||||||
"""
|
"""
|
||||||
@@ -91,6 +170,23 @@ class PaperlessCommand(RichCommand):
|
|||||||
for result in self.process_parallel(process_doc, ids):
|
for result in self.process_parallel(process_doc, ids):
|
||||||
if result.error:
|
if result.error:
|
||||||
self.console.print(f"[red]Failed: {result.error}[/red]")
|
self.console.print(f"[red]Failed: {result.error}[/red]")
|
||||||
|
|
||||||
|
class Command(PaperlessCommand):
|
||||||
|
help = "Import documents with live stats"
|
||||||
|
|
||||||
|
def handle(self, *args, **options):
|
||||||
|
stats = ImportStats()
|
||||||
|
|
||||||
|
def render_stats() -> Table:
|
||||||
|
... # build Rich Table from stats
|
||||||
|
|
||||||
|
for item in self.track_with_stats(
|
||||||
|
items,
|
||||||
|
description="Importing...",
|
||||||
|
stats_renderer=render_stats,
|
||||||
|
):
|
||||||
|
result = import_item(item)
|
||||||
|
stats.imported += 1
|
||||||
"""
|
"""
|
||||||
|
|
||||||
supports_progress_bar: ClassVar[bool] = True
|
supports_progress_bar: ClassVar[bool] = True
|
||||||
@@ -128,13 +224,11 @@ class PaperlessCommand(RichCommand):
|
|||||||
This is called by Django's command infrastructure after argument parsing
|
This is called by Django's command infrastructure after argument parsing
|
||||||
but before handle(). We use it to set instance attributes from options.
|
but before handle(). We use it to set instance attributes from options.
|
||||||
"""
|
"""
|
||||||
# Set progress bar state
|
|
||||||
if self.supports_progress_bar:
|
if self.supports_progress_bar:
|
||||||
self.no_progress_bar = options.get("no_progress_bar", False)
|
self.no_progress_bar = options.get("no_progress_bar", False)
|
||||||
else:
|
else:
|
||||||
self.no_progress_bar = True
|
self.no_progress_bar = True
|
||||||
|
|
||||||
# Set multiprocessing state
|
|
||||||
if self.supports_multiprocessing:
|
if self.supports_multiprocessing:
|
||||||
self.process_count = options.get("processes", 1)
|
self.process_count = options.get("processes", 1)
|
||||||
if self.process_count < 1:
|
if self.process_count < 1:
|
||||||
@@ -144,9 +238,69 @@ class PaperlessCommand(RichCommand):
|
|||||||
|
|
||||||
return super().execute(*args, **options)
|
return super().execute(*args, **options)
|
||||||
|
|
||||||
|
@contextmanager
|
||||||
|
def buffered_logging(
|
||||||
|
self,
|
||||||
|
*logger_names: str,
|
||||||
|
level: int = logging.DEBUG,
|
||||||
|
) -> Generator[BufferingLogHandler, None, None]:
|
||||||
|
"""Context manager that captures log output from named loggers.
|
||||||
|
|
||||||
|
Installs a ``BufferingLogHandler`` on each named logger for the
|
||||||
|
duration of the block, suppressing propagation to avoid interleaving
|
||||||
|
with the Rich live display. The handler is removed on exit regardless
|
||||||
|
of whether an exception occurred.
|
||||||
|
|
||||||
|
Usage::
|
||||||
|
|
||||||
|
with self.buffered_logging("paperless", "documents") as log_buf:
|
||||||
|
# ... run progress loop ...
|
||||||
|
if options["verbose"]:
|
||||||
|
log_buf.render(self.console)
|
||||||
|
"""
|
||||||
|
handler = BufferingLogHandler()
|
||||||
|
handler.setFormatter(logging.Formatter("%(message)s"))
|
||||||
|
|
||||||
|
loggers: list[logging.Logger] = []
|
||||||
|
original_propagate: dict[str, bool] = {}
|
||||||
|
|
||||||
|
for name in logger_names:
|
||||||
|
log = logging.getLogger(name)
|
||||||
|
log.addHandler(handler)
|
||||||
|
original_propagate[name] = log.propagate
|
||||||
|
log.propagate = False
|
||||||
|
loggers.append(log)
|
||||||
|
|
||||||
|
try:
|
||||||
|
yield handler
|
||||||
|
finally:
|
||||||
|
for log in loggers:
|
||||||
|
log.removeHandler(handler)
|
||||||
|
log.propagate = original_propagate[log.name]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _progress_columns() -> tuple[Any, ...]:
|
||||||
|
"""
|
||||||
|
Return the standard set of progress bar columns.
|
||||||
|
|
||||||
|
Extracted so both _create_progress (standalone) and track_with_stats
|
||||||
|
(inside Live) use identical column configuration without duplication.
|
||||||
|
"""
|
||||||
|
return (
|
||||||
|
SpinnerColumn(),
|
||||||
|
TextColumn("[progress.description]{task.description}"),
|
||||||
|
BarColumn(),
|
||||||
|
MofNCompleteColumn(),
|
||||||
|
TimeElapsedColumn(),
|
||||||
|
TimeRemainingColumn(),
|
||||||
|
)
|
||||||
|
|
||||||
def _create_progress(self, description: str) -> Progress:
|
def _create_progress(self, description: str) -> Progress:
|
||||||
"""
|
"""
|
||||||
Create a configured Progress instance.
|
Create a standalone Progress instance with its own stderr Console.
|
||||||
|
|
||||||
|
Use this for track(). For track_with_stats(), Progress is created
|
||||||
|
directly inside a Live context instead.
|
||||||
|
|
||||||
Progress output is directed to stderr to match the convention that
|
Progress output is directed to stderr to match the convention that
|
||||||
progress bars are transient UI feedback, not command output. This
|
progress bars are transient UI feedback, not command output. This
|
||||||
@@ -161,12 +315,7 @@ class PaperlessCommand(RichCommand):
|
|||||||
A Progress instance configured with appropriate columns.
|
A Progress instance configured with appropriate columns.
|
||||||
"""
|
"""
|
||||||
return Progress(
|
return Progress(
|
||||||
SpinnerColumn(),
|
*self._progress_columns(),
|
||||||
TextColumn("[progress.description]{task.description}"),
|
|
||||||
BarColumn(),
|
|
||||||
MofNCompleteColumn(),
|
|
||||||
TimeElapsedColumn(),
|
|
||||||
TimeRemainingColumn(),
|
|
||||||
console=Console(stderr=True),
|
console=Console(stderr=True),
|
||||||
transient=False,
|
transient=False,
|
||||||
)
|
)
|
||||||
@@ -222,7 +371,6 @@ class PaperlessCommand(RichCommand):
|
|||||||
yield from iterable
|
yield from iterable
|
||||||
return
|
return
|
||||||
|
|
||||||
# Attempt to determine total if not provided
|
|
||||||
if total is None:
|
if total is None:
|
||||||
total = self._get_iterable_length(iterable)
|
total = self._get_iterable_length(iterable)
|
||||||
|
|
||||||
@@ -232,6 +380,87 @@ class PaperlessCommand(RichCommand):
|
|||||||
yield item
|
yield item
|
||||||
progress.advance(task_id)
|
progress.advance(task_id)
|
||||||
|
|
||||||
|
def track_with_stats(
|
||||||
|
self,
|
||||||
|
iterable: Iterable[T],
|
||||||
|
*,
|
||||||
|
description: str = "Processing...",
|
||||||
|
stats_renderer: Callable[[], RenderableType],
|
||||||
|
total: int | None = None,
|
||||||
|
) -> Generator[T, None, None]:
|
||||||
|
"""
|
||||||
|
Iterate over items with a progress bar and a live-updating stats display.
|
||||||
|
|
||||||
|
The progress bar and stats renderable are combined in a single Live
|
||||||
|
context, so the stats panel re-renders in place below the progress bar
|
||||||
|
after each item is processed.
|
||||||
|
|
||||||
|
Respects --no-progress-bar flag. When disabled, yields items without
|
||||||
|
any display (stats are still updated by the caller's loop body, so
|
||||||
|
they will be accurate for any post-loop summary the caller prints).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
iterable: The items to iterate over.
|
||||||
|
description: Text to display alongside the progress bar.
|
||||||
|
stats_renderer: Zero-argument callable that returns a Rich
|
||||||
|
renderable. Called after each item to refresh the display.
|
||||||
|
The caller typically closes over a mutable dataclass and
|
||||||
|
rebuilds a Table from it on each call.
|
||||||
|
total: Total number of items. If None, attempts to determine
|
||||||
|
automatically via .count() (for querysets) or len().
|
||||||
|
|
||||||
|
Yields:
|
||||||
|
Items from the iterable.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
@dataclass
|
||||||
|
class Stats:
|
||||||
|
processed: int = 0
|
||||||
|
failed: int = 0
|
||||||
|
|
||||||
|
stats = Stats()
|
||||||
|
|
||||||
|
def render_stats() -> Table:
|
||||||
|
table = Table(box=None)
|
||||||
|
table.add_column("Processed")
|
||||||
|
table.add_column("Failed")
|
||||||
|
table.add_row(str(stats.processed), str(stats.failed))
|
||||||
|
return table
|
||||||
|
|
||||||
|
for item in self.track_with_stats(
|
||||||
|
items,
|
||||||
|
description="Importing...",
|
||||||
|
stats_renderer=render_stats,
|
||||||
|
):
|
||||||
|
try:
|
||||||
|
import_item(item)
|
||||||
|
stats.processed += 1
|
||||||
|
except Exception:
|
||||||
|
stats.failed += 1
|
||||||
|
"""
|
||||||
|
if self.no_progress_bar:
|
||||||
|
yield from iterable
|
||||||
|
return
|
||||||
|
|
||||||
|
if total is None:
|
||||||
|
total = self._get_iterable_length(iterable)
|
||||||
|
|
||||||
|
stderr_console = Console(stderr=True)
|
||||||
|
|
||||||
|
# Progress is created without its own console so Live controls rendering.
|
||||||
|
progress = Progress(*self._progress_columns())
|
||||||
|
task_id = progress.add_task(description, total=total)
|
||||||
|
|
||||||
|
with Live(
|
||||||
|
Group(progress, stats_renderer()),
|
||||||
|
console=stderr_console,
|
||||||
|
refresh_per_second=4,
|
||||||
|
) as live:
|
||||||
|
for item in iterable:
|
||||||
|
yield item
|
||||||
|
progress.advance(task_id)
|
||||||
|
live.update(Group(progress, stats_renderer()))
|
||||||
|
|
||||||
def process_parallel(
|
def process_parallel(
|
||||||
self,
|
self,
|
||||||
fn: Callable[[T], R],
|
fn: Callable[[T], R],
|
||||||
@@ -269,7 +498,7 @@ class PaperlessCommand(RichCommand):
|
|||||||
total = len(items)
|
total = len(items)
|
||||||
|
|
||||||
if self.process_count == 1:
|
if self.process_count == 1:
|
||||||
# Sequential execution in main process - critical for testing
|
# Sequential execution in main process - critical for testing, so we don't fork in fork, etc
|
||||||
yield from self._process_sequential(fn, items, description, total)
|
yield from self._process_sequential(fn, items, description, total)
|
||||||
else:
|
else:
|
||||||
# Parallel execution with ProcessPoolExecutor
|
# Parallel execution with ProcessPoolExecutor
|
||||||
@@ -298,6 +527,7 @@ class PaperlessCommand(RichCommand):
|
|||||||
total: int,
|
total: int,
|
||||||
) -> Generator[ProcessResult[T, R], None, None]:
|
) -> Generator[ProcessResult[T, R], None, None]:
|
||||||
"""Process items in parallel using ProcessPoolExecutor."""
|
"""Process items in parallel using ProcessPoolExecutor."""
|
||||||
|
|
||||||
# Close database connections before forking - required for PostgreSQL
|
# Close database connections before forking - required for PostgreSQL
|
||||||
db.connections.close_all()
|
db.connections.close_all()
|
||||||
|
|
||||||
|
|||||||
@@ -1,22 +1,25 @@
|
|||||||
from django.core.management import BaseCommand
|
|
||||||
from django.db import transaction
|
from django.db import transaction
|
||||||
|
|
||||||
from documents.management.commands.mixins import ProgressBarMixin
|
from documents.management.commands.base import PaperlessCommand
|
||||||
from documents.tasks import index_optimize
|
from documents.tasks import index_optimize
|
||||||
from documents.tasks import index_reindex
|
from documents.tasks import index_reindex
|
||||||
|
|
||||||
|
|
||||||
class Command(ProgressBarMixin, BaseCommand):
|
class Command(PaperlessCommand):
|
||||||
help = "Manages the document index."
|
help = "Manages the document index."
|
||||||
|
|
||||||
def add_arguments(self, parser):
|
def add_arguments(self, parser):
|
||||||
|
super().add_arguments(parser)
|
||||||
parser.add_argument("command", choices=["reindex", "optimize"])
|
parser.add_argument("command", choices=["reindex", "optimize"])
|
||||||
self.add_argument_progress_bar_mixin(parser)
|
|
||||||
|
|
||||||
def handle(self, *args, **options):
|
def handle(self, *args, **options):
|
||||||
self.handle_progress_bar_mixin(**options)
|
|
||||||
with transaction.atomic():
|
with transaction.atomic():
|
||||||
if options["command"] == "reindex":
|
if options["command"] == "reindex":
|
||||||
index_reindex(progress_bar_disable=self.no_progress_bar)
|
index_reindex(
|
||||||
|
iter_wrapper=lambda docs: self.track(
|
||||||
|
docs,
|
||||||
|
description="Indexing documents...",
|
||||||
|
),
|
||||||
|
)
|
||||||
elif options["command"] == "optimize":
|
elif options["command"] == "optimize":
|
||||||
index_optimize()
|
index_optimize()
|
||||||
|
|||||||
@@ -1,22 +1,22 @@
|
|||||||
from django.core.management import BaseCommand
|
from typing import Any
|
||||||
from django.db import transaction
|
|
||||||
|
|
||||||
from documents.management.commands.mixins import ProgressBarMixin
|
from documents.management.commands.base import PaperlessCommand
|
||||||
from documents.tasks import llmindex_index
|
from documents.tasks import llmindex_index
|
||||||
|
|
||||||
|
|
||||||
class Command(ProgressBarMixin, BaseCommand):
|
class Command(PaperlessCommand):
|
||||||
help = "Manages the LLM-based vector index for Paperless."
|
help = "Manages the LLM-based vector index for Paperless."
|
||||||
|
|
||||||
def add_arguments(self, parser):
|
def add_arguments(self, parser: Any) -> None:
|
||||||
|
super().add_arguments(parser)
|
||||||
parser.add_argument("command", choices=["rebuild", "update"])
|
parser.add_argument("command", choices=["rebuild", "update"])
|
||||||
self.add_argument_progress_bar_mixin(parser)
|
|
||||||
|
|
||||||
def handle(self, *args, **options):
|
def handle(self, *args: Any, **options: Any) -> None:
|
||||||
self.handle_progress_bar_mixin(**options)
|
llmindex_index(
|
||||||
with transaction.atomic():
|
rebuild=options["command"] == "rebuild",
|
||||||
llmindex_index(
|
scheduled=False,
|
||||||
progress_bar_disable=self.no_progress_bar,
|
iter_wrapper=lambda docs: self.track(
|
||||||
rebuild=options["command"] == "rebuild",
|
docs,
|
||||||
scheduled=False,
|
description="Indexing documents...",
|
||||||
)
|
),
|
||||||
|
)
|
||||||
|
|||||||
@@ -1,4 +1,12 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from dataclasses import field
|
||||||
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
|
from rich.table import Table
|
||||||
|
from rich.text import Text
|
||||||
|
|
||||||
from documents.classifier import load_classifier
|
from documents.classifier import load_classifier
|
||||||
from documents.management.commands.base import PaperlessCommand
|
from documents.management.commands.base import PaperlessCommand
|
||||||
@@ -8,9 +16,162 @@ from documents.signals.handlers import set_document_type
|
|||||||
from documents.signals.handlers import set_storage_path
|
from documents.signals.handlers import set_storage_path
|
||||||
from documents.signals.handlers import set_tags
|
from documents.signals.handlers import set_tags
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from rich.console import RenderableType
|
||||||
|
|
||||||
|
from documents.models import Correspondent
|
||||||
|
from documents.models import DocumentType
|
||||||
|
from documents.models import StoragePath
|
||||||
|
from documents.models import Tag
|
||||||
|
|
||||||
logger = logging.getLogger("paperless.management.retagger")
|
logger = logging.getLogger("paperless.management.retagger")
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(slots=True)
|
||||||
|
class RetaggerStats:
|
||||||
|
"""Cumulative counters updated as the retagger processes documents.
|
||||||
|
|
||||||
|
Mutable by design -- fields are incremented in the processing loop.
|
||||||
|
slots=True reduces per-instance memory overhead and speeds attribute access.
|
||||||
|
"""
|
||||||
|
|
||||||
|
correspondents: int = 0
|
||||||
|
document_types: int = 0
|
||||||
|
tags_added: int = 0
|
||||||
|
tags_removed: int = 0
|
||||||
|
storage_paths: int = 0
|
||||||
|
documents_processed: int = 0
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(slots=True)
|
||||||
|
class DocumentSuggestion:
|
||||||
|
"""Buffered classifier suggestions for a single document (suggest mode only).
|
||||||
|
|
||||||
|
Mutable by design -- fields are assigned incrementally as each setter runs.
|
||||||
|
"""
|
||||||
|
|
||||||
|
document: Document
|
||||||
|
correspondent: Correspondent | None = None
|
||||||
|
document_type: DocumentType | None = None
|
||||||
|
tags_to_add: frozenset[Tag] = field(default_factory=frozenset)
|
||||||
|
tags_to_remove: frozenset[Tag] = field(default_factory=frozenset)
|
||||||
|
storage_path: StoragePath | None = None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def has_suggestions(self) -> bool:
|
||||||
|
return bool(
|
||||||
|
self.correspondent is not None
|
||||||
|
or self.document_type is not None
|
||||||
|
or self.tags_to_add
|
||||||
|
or self.tags_to_remove
|
||||||
|
or self.storage_path is not None,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _build_stats_table(stats: RetaggerStats, *, suggest: bool) -> Table:
|
||||||
|
"""
|
||||||
|
Build the live-updating stats table shown below the progress bar.
|
||||||
|
|
||||||
|
In suggest mode the labels read "would set / would add" to make clear
|
||||||
|
that nothing has been written to the database.
|
||||||
|
"""
|
||||||
|
table = Table(box=None, padding=(0, 2), show_header=True, header_style="bold")
|
||||||
|
|
||||||
|
table.add_column("Documents")
|
||||||
|
table.add_column("Correspondents")
|
||||||
|
table.add_column("Doc Types")
|
||||||
|
table.add_column("Tags (+)")
|
||||||
|
table.add_column("Tags (-)")
|
||||||
|
table.add_column("Storage Paths")
|
||||||
|
|
||||||
|
verb = "would set" if suggest else "set"
|
||||||
|
|
||||||
|
table.add_row(
|
||||||
|
str(stats.documents_processed),
|
||||||
|
f"{stats.correspondents} {verb}",
|
||||||
|
f"{stats.document_types} {verb}",
|
||||||
|
f"+{stats.tags_added}",
|
||||||
|
f"-{stats.tags_removed}",
|
||||||
|
f"{stats.storage_paths} {verb}",
|
||||||
|
)
|
||||||
|
|
||||||
|
return table
|
||||||
|
|
||||||
|
|
||||||
|
def _build_suggestion_table(
|
||||||
|
suggestions: list[DocumentSuggestion],
|
||||||
|
base_url: str | None,
|
||||||
|
) -> Table:
|
||||||
|
"""
|
||||||
|
Build the final suggestion table printed after the progress bar completes.
|
||||||
|
|
||||||
|
Only documents with at least one suggestion are included.
|
||||||
|
"""
|
||||||
|
table = Table(
|
||||||
|
title="Suggested Changes",
|
||||||
|
show_header=True,
|
||||||
|
header_style="bold cyan",
|
||||||
|
show_lines=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
table.add_column("Document", style="bold", no_wrap=False, min_width=20)
|
||||||
|
table.add_column("Correspondent")
|
||||||
|
table.add_column("Doc Type")
|
||||||
|
table.add_column("Tags")
|
||||||
|
table.add_column("Storage Path")
|
||||||
|
|
||||||
|
for suggestion in suggestions:
|
||||||
|
if not suggestion.has_suggestions:
|
||||||
|
continue
|
||||||
|
|
||||||
|
doc = suggestion.document
|
||||||
|
|
||||||
|
if base_url:
|
||||||
|
doc_cell = Text()
|
||||||
|
doc_cell.append(str(doc))
|
||||||
|
doc_cell.append(f"\n{base_url}/documents/{doc.pk}", style="dim")
|
||||||
|
else:
|
||||||
|
doc_cell = Text(f"{doc} [{doc.pk}]")
|
||||||
|
|
||||||
|
tag_parts: list[str] = []
|
||||||
|
for tag in sorted(suggestion.tags_to_add, key=lambda t: t.name):
|
||||||
|
tag_parts.append(f"[green]+{tag.name}[/green]")
|
||||||
|
for tag in sorted(suggestion.tags_to_remove, key=lambda t: t.name):
|
||||||
|
tag_parts.append(f"[red]-{tag.name}[/red]")
|
||||||
|
tag_cell = Text.from_markup(", ".join(tag_parts)) if tag_parts else Text("-")
|
||||||
|
|
||||||
|
table.add_row(
|
||||||
|
doc_cell,
|
||||||
|
str(suggestion.correspondent) if suggestion.correspondent else "-",
|
||||||
|
str(suggestion.document_type) if suggestion.document_type else "-",
|
||||||
|
tag_cell,
|
||||||
|
str(suggestion.storage_path) if suggestion.storage_path else "-",
|
||||||
|
)
|
||||||
|
|
||||||
|
return table
|
||||||
|
|
||||||
|
|
||||||
|
def _build_summary_table(stats: RetaggerStats) -> Table:
|
||||||
|
"""Build the final applied-changes summary table."""
|
||||||
|
table = Table(
|
||||||
|
title="Retagger Summary",
|
||||||
|
show_header=True,
|
||||||
|
header_style="bold cyan",
|
||||||
|
)
|
||||||
|
|
||||||
|
table.add_column("Metric", style="bold")
|
||||||
|
table.add_column("Count", justify="right")
|
||||||
|
|
||||||
|
table.add_row("Documents processed", str(stats.documents_processed))
|
||||||
|
table.add_row("Correspondents set", str(stats.correspondents))
|
||||||
|
table.add_row("Document types set", str(stats.document_types))
|
||||||
|
table.add_row("Tags added", str(stats.tags_added))
|
||||||
|
table.add_row("Tags removed", str(stats.tags_removed))
|
||||||
|
table.add_row("Storage paths set", str(stats.storage_paths))
|
||||||
|
|
||||||
|
return table
|
||||||
|
|
||||||
|
|
||||||
class Command(PaperlessCommand):
|
class Command(PaperlessCommand):
|
||||||
help = (
|
help = (
|
||||||
"Using the current classification model, assigns correspondents, tags "
|
"Using the current classification model, assigns correspondents, tags "
|
||||||
@@ -19,7 +180,7 @@ class Command(PaperlessCommand):
|
|||||||
"modified) after their initial import."
|
"modified) after their initial import."
|
||||||
)
|
)
|
||||||
|
|
||||||
def add_arguments(self, parser):
|
def add_arguments(self, parser) -> None:
|
||||||
super().add_arguments(parser)
|
super().add_arguments(parser)
|
||||||
parser.add_argument("-c", "--correspondent", default=False, action="store_true")
|
parser.add_argument("-c", "--correspondent", default=False, action="store_true")
|
||||||
parser.add_argument("-T", "--tags", default=False, action="store_true")
|
parser.add_argument("-T", "--tags", default=False, action="store_true")
|
||||||
@@ -31,9 +192,9 @@ class Command(PaperlessCommand):
|
|||||||
default=False,
|
default=False,
|
||||||
action="store_true",
|
action="store_true",
|
||||||
help=(
|
help=(
|
||||||
"By default this command won't try to assign a correspondent "
|
"By default this command will not try to assign a correspondent "
|
||||||
"if more than one matches the document. Use this flag if "
|
"if more than one matches the document. Use this flag to pick "
|
||||||
"you'd rather it just pick the first one it finds."
|
"the first match instead."
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
@@ -42,91 +203,140 @@ class Command(PaperlessCommand):
|
|||||||
default=False,
|
default=False,
|
||||||
action="store_true",
|
action="store_true",
|
||||||
help=(
|
help=(
|
||||||
"If set, the document retagger will overwrite any previously "
|
"Overwrite any previously set correspondent, document type, and "
|
||||||
"set correspondent, document and remove correspondents, types "
|
"remove tags that no longer match due to changed rules."
|
||||||
"and tags that do not match anymore due to changed rules."
|
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--suggest",
|
"--suggest",
|
||||||
default=False,
|
default=False,
|
||||||
action="store_true",
|
action="store_true",
|
||||||
help="Return the suggestion, don't change anything.",
|
help="Show what would be changed without applying anything.",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--base-url",
|
"--base-url",
|
||||||
help="The base URL to use to build the link to the documents.",
|
help="Base URL used to build document links in suggest output.",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--id-range",
|
"--id-range",
|
||||||
help="A range of document ids on which the retagging should be applied.",
|
help="Restrict retagging to documents within this ID range (inclusive).",
|
||||||
nargs=2,
|
nargs=2,
|
||||||
type=int,
|
type=int,
|
||||||
)
|
)
|
||||||
|
|
||||||
def handle(self, *args, **options):
|
def handle(self, *args, **options) -> None:
|
||||||
|
suggest: bool = options["suggest"]
|
||||||
|
overwrite: bool = options["overwrite"]
|
||||||
|
use_first: bool = options["use_first"]
|
||||||
|
base_url: str | None = options["base_url"]
|
||||||
|
|
||||||
|
do_correspondent: bool = options["correspondent"]
|
||||||
|
do_document_type: bool = options["document_type"]
|
||||||
|
do_tags: bool = options["tags"]
|
||||||
|
do_storage_path: bool = options["storage_path"]
|
||||||
|
|
||||||
|
if not any([do_correspondent, do_document_type, do_tags, do_storage_path]):
|
||||||
|
self.console.print(
|
||||||
|
"[yellow]No classifier targets specified. "
|
||||||
|
"Use -c, -T, -t, or -s to select what to retag.[/yellow]",
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
if options["inbox_only"]:
|
if options["inbox_only"]:
|
||||||
queryset = Document.objects.filter(tags__is_inbox_tag=True)
|
queryset = Document.objects.filter(tags__is_inbox_tag=True)
|
||||||
else:
|
else:
|
||||||
queryset = Document.objects.all()
|
queryset = Document.objects.all()
|
||||||
|
|
||||||
if options["id_range"]:
|
if options["id_range"]:
|
||||||
queryset = queryset.filter(
|
lo, hi = options["id_range"]
|
||||||
id__range=(options["id_range"][0], options["id_range"][1]),
|
queryset = queryset.filter(id__range=(lo, hi))
|
||||||
)
|
|
||||||
|
|
||||||
documents = queryset.distinct()
|
documents = queryset.distinct()
|
||||||
|
|
||||||
classifier = load_classifier()
|
classifier = load_classifier()
|
||||||
|
|
||||||
for document in self.track(documents, description="Retagging..."):
|
stats = RetaggerStats()
|
||||||
if options["correspondent"]:
|
suggestions: list[DocumentSuggestion] = []
|
||||||
set_correspondent(
|
|
||||||
sender=None,
|
|
||||||
document=document,
|
|
||||||
classifier=classifier,
|
|
||||||
replace=options["overwrite"],
|
|
||||||
use_first=options["use_first"],
|
|
||||||
suggest=options["suggest"],
|
|
||||||
base_url=options["base_url"],
|
|
||||||
stdout=self.stdout,
|
|
||||||
style_func=self.style,
|
|
||||||
)
|
|
||||||
|
|
||||||
if options["document_type"]:
|
def render_stats() -> RenderableType:
|
||||||
set_document_type(
|
return _build_stats_table(stats, suggest=suggest)
|
||||||
sender=None,
|
|
||||||
document=document,
|
|
||||||
classifier=classifier,
|
|
||||||
replace=options["overwrite"],
|
|
||||||
use_first=options["use_first"],
|
|
||||||
suggest=options["suggest"],
|
|
||||||
base_url=options["base_url"],
|
|
||||||
stdout=self.stdout,
|
|
||||||
style_func=self.style,
|
|
||||||
)
|
|
||||||
|
|
||||||
if options["tags"]:
|
with self.buffered_logging(
|
||||||
set_tags(
|
"paperless",
|
||||||
sender=None,
|
"paperless.handlers",
|
||||||
document=document,
|
"documents",
|
||||||
classifier=classifier,
|
) as log_buf:
|
||||||
replace=options["overwrite"],
|
for document in self.track_with_stats(
|
||||||
suggest=options["suggest"],
|
documents,
|
||||||
base_url=options["base_url"],
|
description="Retagging...",
|
||||||
stdout=self.stdout,
|
stats_renderer=render_stats,
|
||||||
style_func=self.style,
|
):
|
||||||
)
|
suggestion = DocumentSuggestion(document=document)
|
||||||
|
|
||||||
if options["storage_path"]:
|
if do_correspondent:
|
||||||
set_storage_path(
|
correspondent = set_correspondent(
|
||||||
sender=None,
|
None,
|
||||||
document=document,
|
document,
|
||||||
classifier=classifier,
|
classifier=classifier,
|
||||||
replace=options["overwrite"],
|
replace=overwrite,
|
||||||
use_first=options["use_first"],
|
use_first=use_first,
|
||||||
suggest=options["suggest"],
|
dry_run=suggest,
|
||||||
base_url=options["base_url"],
|
)
|
||||||
stdout=self.stdout,
|
if correspondent is not None:
|
||||||
style_func=self.style,
|
stats.correspondents += 1
|
||||||
)
|
suggestion.correspondent = correspondent
|
||||||
|
|
||||||
|
if do_document_type:
|
||||||
|
document_type = set_document_type(
|
||||||
|
None,
|
||||||
|
document,
|
||||||
|
classifier=classifier,
|
||||||
|
replace=overwrite,
|
||||||
|
use_first=use_first,
|
||||||
|
dry_run=suggest,
|
||||||
|
)
|
||||||
|
if document_type is not None:
|
||||||
|
stats.document_types += 1
|
||||||
|
suggestion.document_type = document_type
|
||||||
|
|
||||||
|
if do_tags:
|
||||||
|
tags_to_add, tags_to_remove = set_tags(
|
||||||
|
None,
|
||||||
|
document,
|
||||||
|
classifier=classifier,
|
||||||
|
replace=overwrite,
|
||||||
|
dry_run=suggest,
|
||||||
|
)
|
||||||
|
stats.tags_added += len(tags_to_add)
|
||||||
|
stats.tags_removed += len(tags_to_remove)
|
||||||
|
suggestion.tags_to_add = frozenset(tags_to_add)
|
||||||
|
suggestion.tags_to_remove = frozenset(tags_to_remove)
|
||||||
|
|
||||||
|
if do_storage_path:
|
||||||
|
storage_path = set_storage_path(
|
||||||
|
None,
|
||||||
|
document,
|
||||||
|
classifier=classifier,
|
||||||
|
replace=overwrite,
|
||||||
|
use_first=use_first,
|
||||||
|
dry_run=suggest,
|
||||||
|
)
|
||||||
|
if storage_path is not None:
|
||||||
|
stats.storage_paths += 1
|
||||||
|
suggestion.storage_path = storage_path
|
||||||
|
|
||||||
|
stats.documents_processed += 1
|
||||||
|
|
||||||
|
if suggest:
|
||||||
|
suggestions.append(suggestion)
|
||||||
|
|
||||||
|
# Post-loop output
|
||||||
|
if suggest:
|
||||||
|
visible = [s for s in suggestions if s.has_suggestions]
|
||||||
|
if visible:
|
||||||
|
self.console.print(_build_suggestion_table(visible, base_url))
|
||||||
|
else:
|
||||||
|
self.console.print("[green]No changes suggested.[/green]")
|
||||||
|
else:
|
||||||
|
self.console.print(_build_summary_table(stats))
|
||||||
|
|
||||||
|
log_buf.render(self.console, min_level=logging.INFO, title="Retagger Log")
|
||||||
|
|||||||
@@ -1,17 +1,117 @@
|
|||||||
from django.core.management.base import BaseCommand
|
"""Management command to check the document archive for issues."""
|
||||||
|
|
||||||
from documents.management.commands.mixins import ProgressBarMixin
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from rich.panel import Panel
|
||||||
|
from rich.table import Table
|
||||||
|
from rich.text import Text
|
||||||
|
|
||||||
|
from documents.management.commands.base import PaperlessCommand
|
||||||
|
from documents.models import Document
|
||||||
|
from documents.sanity_checker import SanityCheckMessages
|
||||||
from documents.sanity_checker import check_sanity
|
from documents.sanity_checker import check_sanity
|
||||||
|
|
||||||
|
_LEVEL_STYLE: dict[int, tuple[str, str]] = {
|
||||||
|
logging.ERROR: ("bold red", "ERROR"),
|
||||||
|
logging.WARNING: ("yellow", "WARN"),
|
||||||
|
logging.INFO: ("dim", "INFO"),
|
||||||
|
}
|
||||||
|
|
||||||
class Command(ProgressBarMixin, BaseCommand):
|
|
||||||
|
class Command(PaperlessCommand):
|
||||||
help = "This command checks your document archive for issues."
|
help = "This command checks your document archive for issues."
|
||||||
|
|
||||||
def add_arguments(self, parser):
|
def _render_results(self, messages: SanityCheckMessages) -> None:
|
||||||
self.add_argument_progress_bar_mixin(parser)
|
"""Render sanity check results as a Rich table."""
|
||||||
|
|
||||||
def handle(self, *args, **options):
|
if (
|
||||||
self.handle_progress_bar_mixin(**options)
|
not messages.has_error
|
||||||
messages = check_sanity(progress=self.use_progress_bar, scheduled=False)
|
and not messages.has_warning
|
||||||
|
and not messages.has_info
|
||||||
|
):
|
||||||
|
self.console.print(
|
||||||
|
Panel(
|
||||||
|
"[green]No issues detected.[/green]",
|
||||||
|
title="Sanity Check",
|
||||||
|
border_style="green",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
messages.log_messages()
|
# Build a lookup for document titles
|
||||||
|
doc_pks = [pk for pk in messages.document_pks() if pk is not None]
|
||||||
|
titles: dict[int, str] = {}
|
||||||
|
if doc_pks:
|
||||||
|
titles = dict(
|
||||||
|
Document.global_objects.filter(pk__in=doc_pks)
|
||||||
|
.only("pk", "title")
|
||||||
|
.values_list("pk", "title"),
|
||||||
|
)
|
||||||
|
|
||||||
|
table = Table(
|
||||||
|
title="Sanity Check Results",
|
||||||
|
show_lines=True,
|
||||||
|
title_style="bold",
|
||||||
|
)
|
||||||
|
table.add_column("Level", width=7, no_wrap=True)
|
||||||
|
table.add_column("Document", min_width=20)
|
||||||
|
table.add_column("Issue", ratio=1)
|
||||||
|
|
||||||
|
for doc_pk, doc_messages in messages.iter_messages():
|
||||||
|
if doc_pk is not None:
|
||||||
|
title = titles.get(doc_pk, "Unknown")
|
||||||
|
doc_label = f"#{doc_pk} {title}"
|
||||||
|
else:
|
||||||
|
doc_label = "(global)"
|
||||||
|
|
||||||
|
for msg in doc_messages:
|
||||||
|
style, label = _LEVEL_STYLE.get(
|
||||||
|
msg["level"],
|
||||||
|
("dim", "INFO"),
|
||||||
|
)
|
||||||
|
table.add_row(
|
||||||
|
Text(label, style=style),
|
||||||
|
Text(doc_label),
|
||||||
|
Text(str(msg["message"])),
|
||||||
|
)
|
||||||
|
|
||||||
|
self.console.print(table)
|
||||||
|
|
||||||
|
parts: list[str] = []
|
||||||
|
|
||||||
|
if messages.document_error_count:
|
||||||
|
parts.append(
|
||||||
|
f"{messages.document_error_count} document(s) with [bold red]errors[/bold red]",
|
||||||
|
)
|
||||||
|
if messages.document_warning_count:
|
||||||
|
parts.append(
|
||||||
|
f"{messages.document_warning_count} document(s) with [yellow]warnings[/yellow]",
|
||||||
|
)
|
||||||
|
if messages.document_info_count:
|
||||||
|
parts.append(f"{messages.document_info_count} document(s) with infos")
|
||||||
|
if messages.global_warning_count:
|
||||||
|
parts.append(
|
||||||
|
f"{messages.global_warning_count} global [yellow]warning(s)[/yellow]",
|
||||||
|
)
|
||||||
|
|
||||||
|
if parts:
|
||||||
|
if len(parts) > 1:
|
||||||
|
summary = ", ".join(parts[:-1]) + " and " + parts[-1]
|
||||||
|
else:
|
||||||
|
summary = parts[0]
|
||||||
|
self.console.print(f"\nFound {summary}.")
|
||||||
|
else:
|
||||||
|
self.console.print("\nNo issues found.")
|
||||||
|
|
||||||
|
def handle(self, *args: Any, **options: Any) -> None:
|
||||||
|
messages = check_sanity(
|
||||||
|
scheduled=False,
|
||||||
|
iter_wrapper=lambda docs: self.track(
|
||||||
|
docs,
|
||||||
|
description="Checking documents...",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
self._render_results(messages)
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
# Generated by Django 5.2.7 on 2026-01-15 22:08
|
# Generated by Django 5.2.11 on 2026-03-03 16:27
|
||||||
|
|
||||||
import datetime
|
import datetime
|
||||||
|
|
||||||
@@ -21,6 +21,207 @@ class Migration(migrations.Migration):
|
|||||||
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
|
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
replaces = [
|
||||||
|
("documents", "0001_initial"),
|
||||||
|
("documents", "0002_auto_20151226_1316"),
|
||||||
|
("documents", "0003_sender"),
|
||||||
|
("documents", "0004_auto_20160114_1844"),
|
||||||
|
(
|
||||||
|
"documents",
|
||||||
|
"0004_auto_20160114_1844_squashed_0011_auto_20160303_1929",
|
||||||
|
),
|
||||||
|
("documents", "0005_auto_20160123_0313"),
|
||||||
|
("documents", "0006_auto_20160123_0430"),
|
||||||
|
("documents", "0007_auto_20160126_2114"),
|
||||||
|
("documents", "0008_document_file_type"),
|
||||||
|
("documents", "0009_auto_20160214_0040"),
|
||||||
|
("documents", "0010_log"),
|
||||||
|
("documents", "0011_auto_20160303_1929"),
|
||||||
|
("documents", "0012_auto_20160305_0040"),
|
||||||
|
("documents", "0013_auto_20160325_2111"),
|
||||||
|
("documents", "0014_document_checksum"),
|
||||||
|
("documents", "0015_add_insensitive_to_match"),
|
||||||
|
(
|
||||||
|
"documents",
|
||||||
|
"0015_add_insensitive_to_match_squashed_0018_auto_20170715_1712",
|
||||||
|
),
|
||||||
|
("documents", "0016_auto_20170325_1558"),
|
||||||
|
("documents", "0017_auto_20170512_0507"),
|
||||||
|
("documents", "0018_auto_20170715_1712"),
|
||||||
|
("documents", "0019_add_consumer_user"),
|
||||||
|
("documents", "0020_document_added"),
|
||||||
|
("documents", "0021_document_storage_type"),
|
||||||
|
("documents", "0022_auto_20181007_1420"),
|
||||||
|
("documents", "0023_document_current_filename"),
|
||||||
|
("documents", "1000_update_paperless_all"),
|
||||||
|
("documents", "1001_auto_20201109_1636"),
|
||||||
|
("documents", "1002_auto_20201111_1105"),
|
||||||
|
("documents", "1003_mime_types"),
|
||||||
|
("documents", "1004_sanity_check_schedule"),
|
||||||
|
("documents", "1005_checksums"),
|
||||||
|
("documents", "1006_auto_20201208_2209"),
|
||||||
|
(
|
||||||
|
"documents",
|
||||||
|
"1006_auto_20201208_2209_squashed_1011_auto_20210101_2340",
|
||||||
|
),
|
||||||
|
("documents", "1007_savedview_savedviewfilterrule"),
|
||||||
|
("documents", "1008_auto_20201216_1736"),
|
||||||
|
("documents", "1009_auto_20201216_2005"),
|
||||||
|
("documents", "1010_auto_20210101_2159"),
|
||||||
|
("documents", "1011_auto_20210101_2340"),
|
||||||
|
("documents", "1012_fix_archive_files"),
|
||||||
|
("documents", "1013_migrate_tag_colour"),
|
||||||
|
("documents", "1014_auto_20210228_1614"),
|
||||||
|
("documents", "1015_remove_null_characters"),
|
||||||
|
("documents", "1016_auto_20210317_1351"),
|
||||||
|
(
|
||||||
|
"documents",
|
||||||
|
"1016_auto_20210317_1351_squashed_1020_merge_20220518_1839",
|
||||||
|
),
|
||||||
|
("documents", "1017_alter_savedviewfilterrule_rule_type"),
|
||||||
|
("documents", "1018_alter_savedviewfilterrule_value"),
|
||||||
|
("documents", "1019_storagepath_document_storage_path"),
|
||||||
|
("documents", "1019_uisettings"),
|
||||||
|
("documents", "1020_merge_20220518_1839"),
|
||||||
|
("documents", "1021_webp_thumbnail_conversion"),
|
||||||
|
("documents", "1022_paperlesstask"),
|
||||||
|
(
|
||||||
|
"documents",
|
||||||
|
"1022_paperlesstask_squashed_1036_alter_savedviewfilterrule_rule_type",
|
||||||
|
),
|
||||||
|
("documents", "1023_add_comments"),
|
||||||
|
("documents", "1024_document_original_filename"),
|
||||||
|
("documents", "1025_alter_savedviewfilterrule_rule_type"),
|
||||||
|
("documents", "1026_transition_to_celery"),
|
||||||
|
(
|
||||||
|
"documents",
|
||||||
|
"1027_remove_paperlesstask_attempted_task_and_more",
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"documents",
|
||||||
|
"1028_remove_paperlesstask_task_args_and_more",
|
||||||
|
),
|
||||||
|
("documents", "1029_alter_document_archive_serial_number"),
|
||||||
|
("documents", "1030_alter_paperlesstask_task_file_name"),
|
||||||
|
(
|
||||||
|
"documents",
|
||||||
|
"1031_remove_savedview_user_correspondent_owner_and_more",
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"documents",
|
||||||
|
"1032_alter_correspondent_matching_algorithm_and_more",
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"documents",
|
||||||
|
"1033_alter_documenttype_options_alter_tag_options_and_more",
|
||||||
|
),
|
||||||
|
("documents", "1034_alter_savedviewfilterrule_rule_type"),
|
||||||
|
("documents", "1035_rename_comment_note"),
|
||||||
|
("documents", "1036_alter_savedviewfilterrule_rule_type"),
|
||||||
|
("documents", "1037_webp_encrypted_thumbnail_conversion"),
|
||||||
|
("documents", "1038_sharelink"),
|
||||||
|
("documents", "1039_consumptiontemplate"),
|
||||||
|
(
|
||||||
|
"documents",
|
||||||
|
"1040_customfield_customfieldinstance_and_more",
|
||||||
|
),
|
||||||
|
("documents", "1041_alter_consumptiontemplate_sources"),
|
||||||
|
(
|
||||||
|
"documents",
|
||||||
|
"1042_consumptiontemplate_assign_custom_fields_and_more",
|
||||||
|
),
|
||||||
|
("documents", "1043_alter_savedviewfilterrule_rule_type"),
|
||||||
|
(
|
||||||
|
"documents",
|
||||||
|
"1044_workflow_workflowaction_workflowtrigger_and_more",
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"documents",
|
||||||
|
"1045_alter_customfieldinstance_value_monetary",
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"documents",
|
||||||
|
"1045_alter_customfieldinstance_value_monetary_squashed_1049_document_deleted_at_document_restored_at",
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"documents",
|
||||||
|
"1046_workflowaction_remove_all_correspondents_and_more",
|
||||||
|
),
|
||||||
|
("documents", "1047_savedview_display_mode_and_more"),
|
||||||
|
("documents", "1048_alter_savedviewfilterrule_rule_type"),
|
||||||
|
(
|
||||||
|
"documents",
|
||||||
|
"1049_document_deleted_at_document_restored_at",
|
||||||
|
),
|
||||||
|
("documents", "1050_customfield_extra_data_and_more"),
|
||||||
|
(
|
||||||
|
"documents",
|
||||||
|
"1051_alter_correspondent_owner_alter_document_owner_and_more",
|
||||||
|
),
|
||||||
|
("documents", "1052_document_transaction_id"),
|
||||||
|
("documents", "1053_document_page_count"),
|
||||||
|
(
|
||||||
|
"documents",
|
||||||
|
"1054_customfieldinstance_value_monetary_amount_and_more",
|
||||||
|
),
|
||||||
|
("documents", "1055_alter_storagepath_path"),
|
||||||
|
(
|
||||||
|
"documents",
|
||||||
|
"1056_customfieldinstance_deleted_at_and_more",
|
||||||
|
),
|
||||||
|
("documents", "1057_paperlesstask_owner"),
|
||||||
|
(
|
||||||
|
"documents",
|
||||||
|
"1058_workflowtrigger_schedule_date_custom_field_and_more",
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"documents",
|
||||||
|
"1059_workflowactionemail_workflowactionwebhook_and_more",
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"documents",
|
||||||
|
"1060_alter_customfieldinstance_value_select",
|
||||||
|
),
|
||||||
|
("documents", "1061_workflowactionwebhook_as_json"),
|
||||||
|
("documents", "1062_alter_savedviewfilterrule_rule_type"),
|
||||||
|
(
|
||||||
|
"documents",
|
||||||
|
"1063_paperlesstask_type_alter_paperlesstask_task_name_and_more",
|
||||||
|
),
|
||||||
|
("documents", "1064_delete_log"),
|
||||||
|
(
|
||||||
|
"documents",
|
||||||
|
"1065_workflowaction_assign_custom_fields_values",
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"documents",
|
||||||
|
"1066_alter_workflowtrigger_schedule_offset_days",
|
||||||
|
),
|
||||||
|
("documents", "1067_alter_document_created"),
|
||||||
|
("documents", "1068_alter_document_created"),
|
||||||
|
(
|
||||||
|
"documents",
|
||||||
|
"1069_workflowtrigger_filter_has_storage_path_and_more",
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"documents",
|
||||||
|
"1070_customfieldinstance_value_long_text_and_more",
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"documents",
|
||||||
|
"1071_tag_tn_ancestors_count_tag_tn_ancestors_pks_and_more",
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"documents",
|
||||||
|
"1072_workflowtrigger_filter_custom_field_query_and_more",
|
||||||
|
),
|
||||||
|
("documents", "1073_migrate_workflow_title_jinja"),
|
||||||
|
(
|
||||||
|
"documents",
|
||||||
|
"1074_workflowrun_deleted_at_workflowrun_restored_at_and_more",
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
operations = [
|
operations = [
|
||||||
migrations.CreateModel(
|
migrations.CreateModel(
|
||||||
name="WorkflowActionEmail",
|
name="WorkflowActionEmail",
|
||||||
@@ -185,70 +386,6 @@ class Migration(migrations.Migration):
|
|||||||
"abstract": False,
|
"abstract": False,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
migrations.CreateModel(
|
|
||||||
name="CustomField",
|
|
||||||
fields=[
|
|
||||||
(
|
|
||||||
"id",
|
|
||||||
models.AutoField(
|
|
||||||
auto_created=True,
|
|
||||||
primary_key=True,
|
|
||||||
serialize=False,
|
|
||||||
verbose_name="ID",
|
|
||||||
),
|
|
||||||
),
|
|
||||||
(
|
|
||||||
"created",
|
|
||||||
models.DateTimeField(
|
|
||||||
db_index=True,
|
|
||||||
default=django.utils.timezone.now,
|
|
||||||
editable=False,
|
|
||||||
verbose_name="created",
|
|
||||||
),
|
|
||||||
),
|
|
||||||
("name", models.CharField(max_length=128)),
|
|
||||||
(
|
|
||||||
"data_type",
|
|
||||||
models.CharField(
|
|
||||||
choices=[
|
|
||||||
("string", "String"),
|
|
||||||
("url", "URL"),
|
|
||||||
("date", "Date"),
|
|
||||||
("boolean", "Boolean"),
|
|
||||||
("integer", "Integer"),
|
|
||||||
("float", "Float"),
|
|
||||||
("monetary", "Monetary"),
|
|
||||||
("documentlink", "Document Link"),
|
|
||||||
("select", "Select"),
|
|
||||||
("longtext", "Long Text"),
|
|
||||||
],
|
|
||||||
editable=False,
|
|
||||||
max_length=50,
|
|
||||||
verbose_name="data type",
|
|
||||||
),
|
|
||||||
),
|
|
||||||
(
|
|
||||||
"extra_data",
|
|
||||||
models.JSONField(
|
|
||||||
blank=True,
|
|
||||||
help_text="Extra data for the custom field, such as select options",
|
|
||||||
null=True,
|
|
||||||
verbose_name="extra data",
|
|
||||||
),
|
|
||||||
),
|
|
||||||
],
|
|
||||||
options={
|
|
||||||
"verbose_name": "custom field",
|
|
||||||
"verbose_name_plural": "custom fields",
|
|
||||||
"ordering": ("created",),
|
|
||||||
"constraints": [
|
|
||||||
models.UniqueConstraint(
|
|
||||||
fields=("name",),
|
|
||||||
name="documents_customfield_unique_name",
|
|
||||||
),
|
|
||||||
],
|
|
||||||
},
|
|
||||||
),
|
|
||||||
migrations.CreateModel(
|
migrations.CreateModel(
|
||||||
name="DocumentType",
|
name="DocumentType",
|
||||||
fields=[
|
fields=[
|
||||||
@@ -733,17 +870,6 @@ class Migration(migrations.Migration):
|
|||||||
verbose_name="correspondent",
|
verbose_name="correspondent",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
(
|
|
||||||
"owner",
|
|
||||||
models.ForeignKey(
|
|
||||||
blank=True,
|
|
||||||
default=None,
|
|
||||||
null=True,
|
|
||||||
on_delete=django.db.models.deletion.SET_NULL,
|
|
||||||
to=settings.AUTH_USER_MODEL,
|
|
||||||
verbose_name="owner",
|
|
||||||
),
|
|
||||||
),
|
|
||||||
(
|
(
|
||||||
"document_type",
|
"document_type",
|
||||||
models.ForeignKey(
|
models.ForeignKey(
|
||||||
@@ -767,12 +893,14 @@ class Migration(migrations.Migration):
|
|||||||
),
|
),
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
"tags",
|
"owner",
|
||||||
models.ManyToManyField(
|
models.ForeignKey(
|
||||||
blank=True,
|
blank=True,
|
||||||
related_name="documents",
|
default=None,
|
||||||
to="documents.tag",
|
null=True,
|
||||||
verbose_name="tags",
|
on_delete=django.db.models.deletion.SET_NULL,
|
||||||
|
to=settings.AUTH_USER_MODEL,
|
||||||
|
verbose_name="owner",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -782,6 +910,140 @@ class Migration(migrations.Migration):
|
|||||||
"ordering": ("-created",),
|
"ordering": ("-created",),
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name="document",
|
||||||
|
name="tags",
|
||||||
|
field=models.ManyToManyField(
|
||||||
|
blank=True,
|
||||||
|
related_name="documents",
|
||||||
|
to="documents.tag",
|
||||||
|
verbose_name="tags",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
migrations.CreateModel(
|
||||||
|
name="Note",
|
||||||
|
fields=[
|
||||||
|
(
|
||||||
|
"id",
|
||||||
|
models.AutoField(
|
||||||
|
auto_created=True,
|
||||||
|
primary_key=True,
|
||||||
|
serialize=False,
|
||||||
|
verbose_name="ID",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
("deleted_at", models.DateTimeField(blank=True, null=True)),
|
||||||
|
("restored_at", models.DateTimeField(blank=True, null=True)),
|
||||||
|
("transaction_id", models.UUIDField(blank=True, null=True)),
|
||||||
|
(
|
||||||
|
"note",
|
||||||
|
models.TextField(
|
||||||
|
blank=True,
|
||||||
|
help_text="Note for the document",
|
||||||
|
verbose_name="content",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"created",
|
||||||
|
models.DateTimeField(
|
||||||
|
db_index=True,
|
||||||
|
default=django.utils.timezone.now,
|
||||||
|
verbose_name="created",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"document",
|
||||||
|
models.ForeignKey(
|
||||||
|
blank=True,
|
||||||
|
null=True,
|
||||||
|
on_delete=django.db.models.deletion.CASCADE,
|
||||||
|
related_name="notes",
|
||||||
|
to="documents.document",
|
||||||
|
verbose_name="document",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"user",
|
||||||
|
models.ForeignKey(
|
||||||
|
blank=True,
|
||||||
|
null=True,
|
||||||
|
on_delete=django.db.models.deletion.SET_NULL,
|
||||||
|
related_name="notes",
|
||||||
|
to=settings.AUTH_USER_MODEL,
|
||||||
|
verbose_name="user",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
],
|
||||||
|
options={
|
||||||
|
"verbose_name": "note",
|
||||||
|
"verbose_name_plural": "notes",
|
||||||
|
"ordering": ("created",),
|
||||||
|
},
|
||||||
|
),
|
||||||
|
migrations.CreateModel(
|
||||||
|
name="CustomField",
|
||||||
|
fields=[
|
||||||
|
(
|
||||||
|
"id",
|
||||||
|
models.AutoField(
|
||||||
|
auto_created=True,
|
||||||
|
primary_key=True,
|
||||||
|
serialize=False,
|
||||||
|
verbose_name="ID",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"created",
|
||||||
|
models.DateTimeField(
|
||||||
|
db_index=True,
|
||||||
|
default=django.utils.timezone.now,
|
||||||
|
editable=False,
|
||||||
|
verbose_name="created",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
("name", models.CharField(max_length=128)),
|
||||||
|
(
|
||||||
|
"data_type",
|
||||||
|
models.CharField(
|
||||||
|
choices=[
|
||||||
|
("string", "String"),
|
||||||
|
("url", "URL"),
|
||||||
|
("date", "Date"),
|
||||||
|
("boolean", "Boolean"),
|
||||||
|
("integer", "Integer"),
|
||||||
|
("float", "Float"),
|
||||||
|
("monetary", "Monetary"),
|
||||||
|
("documentlink", "Document Link"),
|
||||||
|
("select", "Select"),
|
||||||
|
("longtext", "Long Text"),
|
||||||
|
],
|
||||||
|
editable=False,
|
||||||
|
max_length=50,
|
||||||
|
verbose_name="data type",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"extra_data",
|
||||||
|
models.JSONField(
|
||||||
|
blank=True,
|
||||||
|
help_text="Extra data for the custom field, such as select options",
|
||||||
|
null=True,
|
||||||
|
verbose_name="extra data",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
],
|
||||||
|
options={
|
||||||
|
"verbose_name": "custom field",
|
||||||
|
"verbose_name_plural": "custom fields",
|
||||||
|
"ordering": ("created",),
|
||||||
|
"constraints": [
|
||||||
|
models.UniqueConstraint(
|
||||||
|
fields=("name",),
|
||||||
|
name="documents_customfield_unique_name",
|
||||||
|
),
|
||||||
|
],
|
||||||
|
},
|
||||||
|
),
|
||||||
migrations.CreateModel(
|
migrations.CreateModel(
|
||||||
name="CustomFieldInstance",
|
name="CustomFieldInstance",
|
||||||
fields=[
|
fields=[
|
||||||
@@ -880,66 +1142,6 @@ class Migration(migrations.Migration):
|
|||||||
"ordering": ("created",),
|
"ordering": ("created",),
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
migrations.CreateModel(
|
|
||||||
name="Note",
|
|
||||||
fields=[
|
|
||||||
(
|
|
||||||
"id",
|
|
||||||
models.AutoField(
|
|
||||||
auto_created=True,
|
|
||||||
primary_key=True,
|
|
||||||
serialize=False,
|
|
||||||
verbose_name="ID",
|
|
||||||
),
|
|
||||||
),
|
|
||||||
("deleted_at", models.DateTimeField(blank=True, null=True)),
|
|
||||||
("restored_at", models.DateTimeField(blank=True, null=True)),
|
|
||||||
("transaction_id", models.UUIDField(blank=True, null=True)),
|
|
||||||
(
|
|
||||||
"note",
|
|
||||||
models.TextField(
|
|
||||||
blank=True,
|
|
||||||
help_text="Note for the document",
|
|
||||||
verbose_name="content",
|
|
||||||
),
|
|
||||||
),
|
|
||||||
(
|
|
||||||
"created",
|
|
||||||
models.DateTimeField(
|
|
||||||
db_index=True,
|
|
||||||
default=django.utils.timezone.now,
|
|
||||||
verbose_name="created",
|
|
||||||
),
|
|
||||||
),
|
|
||||||
(
|
|
||||||
"document",
|
|
||||||
models.ForeignKey(
|
|
||||||
blank=True,
|
|
||||||
null=True,
|
|
||||||
on_delete=django.db.models.deletion.CASCADE,
|
|
||||||
related_name="notes",
|
|
||||||
to="documents.document",
|
|
||||||
verbose_name="document",
|
|
||||||
),
|
|
||||||
),
|
|
||||||
(
|
|
||||||
"user",
|
|
||||||
models.ForeignKey(
|
|
||||||
blank=True,
|
|
||||||
null=True,
|
|
||||||
on_delete=django.db.models.deletion.SET_NULL,
|
|
||||||
related_name="notes",
|
|
||||||
to=settings.AUTH_USER_MODEL,
|
|
||||||
verbose_name="user",
|
|
||||||
),
|
|
||||||
),
|
|
||||||
],
|
|
||||||
options={
|
|
||||||
"verbose_name": "note",
|
|
||||||
"verbose_name_plural": "notes",
|
|
||||||
"ordering": ("created",),
|
|
||||||
},
|
|
||||||
),
|
|
||||||
migrations.CreateModel(
|
migrations.CreateModel(
|
||||||
name="PaperlessTask",
|
name="PaperlessTask",
|
||||||
fields=[
|
fields=[
|
||||||
@@ -986,7 +1188,6 @@ class Migration(migrations.Migration):
|
|||||||
("train_classifier", "Train Classifier"),
|
("train_classifier", "Train Classifier"),
|
||||||
("check_sanity", "Check Sanity"),
|
("check_sanity", "Check Sanity"),
|
||||||
("index_optimize", "Index Optimize"),
|
("index_optimize", "Index Optimize"),
|
||||||
("llmindex_update", "LLM Index Update"),
|
|
||||||
],
|
],
|
||||||
help_text="Name of the task that was run",
|
help_text="Name of the task that was run",
|
||||||
max_length=255,
|
max_length=255,
|
||||||
@@ -1380,6 +1581,7 @@ class Migration(migrations.Migration):
|
|||||||
verbose_name="Workflow Action Type",
|
verbose_name="Workflow Action Type",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
|
("order", models.PositiveIntegerField(default=0, verbose_name="order")),
|
||||||
(
|
(
|
||||||
"assign_title",
|
"assign_title",
|
||||||
models.TextField(
|
models.TextField(
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
# Generated by Django 5.2.9 on 2026-01-20 18:46
|
# Generated by Django 5.2.11 on 2026-03-03 16:27
|
||||||
|
|
||||||
import django.db.models.deletion
|
import django.db.models.deletion
|
||||||
from django.db import migrations
|
from django.db import migrations
|
||||||
@@ -9,8 +9,14 @@ class Migration(migrations.Migration):
|
|||||||
initial = True
|
initial = True
|
||||||
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
("documents", "0001_initial"),
|
("documents", "0001_squashed"),
|
||||||
("paperless_mail", "0001_initial"),
|
("paperless_mail", "0001_squashed"),
|
||||||
|
]
|
||||||
|
|
||||||
|
# This migration needs a "replaces", but it doesn't matter which.
|
||||||
|
# Chose the last 2.20.x migration
|
||||||
|
replaces = [
|
||||||
|
("documents", "1075_workflowaction_order"),
|
||||||
]
|
]
|
||||||
|
|
||||||
operations = [
|
operations = [
|
||||||
@@ -6,7 +6,7 @@ from django.db import models
|
|||||||
|
|
||||||
class Migration(migrations.Migration):
|
class Migration(migrations.Migration):
|
||||||
dependencies = [
|
dependencies = [
|
||||||
("documents", "0002_initial"),
|
("documents", "0002_squashed"),
|
||||||
]
|
]
|
||||||
|
|
||||||
operations = [
|
operations = [
|
||||||
|
|||||||
@@ -0,0 +1,30 @@
|
|||||||
|
# Generated by Django 5.2.11 on 2026-03-03 16:42
|
||||||
|
|
||||||
|
from django.db import migrations
|
||||||
|
from django.db import models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
dependencies = [
|
||||||
|
("documents", "0013_document_root_document"),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name="paperlesstask",
|
||||||
|
name="task_name",
|
||||||
|
field=models.CharField(
|
||||||
|
choices=[
|
||||||
|
("consume_file", "Consume File"),
|
||||||
|
("train_classifier", "Train Classifier"),
|
||||||
|
("check_sanity", "Check Sanity"),
|
||||||
|
("index_optimize", "Index Optimize"),
|
||||||
|
("llmindex_update", "LLM Index Update"),
|
||||||
|
],
|
||||||
|
help_text="Name of the task that was run",
|
||||||
|
max_length=255,
|
||||||
|
null=True,
|
||||||
|
verbose_name="Task Name",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
]
|
||||||
@@ -75,7 +75,7 @@ class MatchingModel(ModelWithOwner):
|
|||||||
|
|
||||||
is_insensitive = models.BooleanField(_("is insensitive"), default=True)
|
is_insensitive = models.BooleanField(_("is insensitive"), default=True)
|
||||||
|
|
||||||
class Meta:
|
class Meta(ModelWithOwner.Meta):
|
||||||
abstract = True
|
abstract = True
|
||||||
ordering = ("name",)
|
ordering = ("name",)
|
||||||
constraints = [
|
constraints = [
|
||||||
|
|||||||
@@ -5,11 +5,7 @@ from abc import abstractmethod
|
|||||||
from collections.abc import Iterator
|
from collections.abc import Iterator
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from types import TracebackType
|
from types import TracebackType
|
||||||
|
from typing import Self
|
||||||
try:
|
|
||||||
from typing import Self
|
|
||||||
except ImportError:
|
|
||||||
from typing_extensions import Self
|
|
||||||
|
|
||||||
import dateparser
|
import dateparser
|
||||||
|
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ if TYPE_CHECKING:
|
|||||||
from channels_redis.pubsub import RedisPubSubChannelLayer
|
from channels_redis.pubsub import RedisPubSubChannelLayer
|
||||||
|
|
||||||
|
|
||||||
class ProgressStatusOptions(str, enum.Enum):
|
class ProgressStatusOptions(enum.StrEnum):
|
||||||
STARTED = "STARTED"
|
STARTED = "STARTED"
|
||||||
WORKING = "WORKING"
|
WORKING = "WORKING"
|
||||||
SUCCESS = "SUCCESS"
|
SUCCESS = "SUCCESS"
|
||||||
|
|||||||
@@ -1,80 +1,174 @@
|
|||||||
|
"""
|
||||||
|
Sanity checker for the Paperless-ngx document archive.
|
||||||
|
|
||||||
|
Verifies that all documents have valid files, correct checksums,
|
||||||
|
and consistent metadata. Reports orphaned files in the media directory.
|
||||||
|
|
||||||
|
Progress display is the caller's responsibility -- pass an ``iter_wrapper``
|
||||||
|
to wrap the document queryset (e.g., with a progress bar). The default
|
||||||
|
is an identity function that adds no overhead.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import hashlib
|
import hashlib
|
||||||
import logging
|
import logging
|
||||||
import uuid
|
import uuid
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
from collections.abc import Callable
|
||||||
|
from collections.abc import Iterable
|
||||||
|
from collections.abc import Iterator
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from typing import TYPE_CHECKING
|
||||||
from typing import Final
|
from typing import Final
|
||||||
|
from typing import TypedDict
|
||||||
|
from typing import TypeVar
|
||||||
|
|
||||||
from celery import states
|
from celery import states
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from django.utils import timezone
|
from django.utils import timezone
|
||||||
from tqdm import tqdm
|
|
||||||
|
|
||||||
from documents.models import Document
|
from documents.models import Document
|
||||||
from documents.models import PaperlessTask
|
from documents.models import PaperlessTask
|
||||||
from paperless.config import GeneralConfig
|
from paperless.config import GeneralConfig
|
||||||
|
|
||||||
|
logger = logging.getLogger("paperless.sanity_checker")
|
||||||
|
|
||||||
|
_T = TypeVar("_T")
|
||||||
|
IterWrapper = Callable[[Iterable[_T]], Iterable[_T]]
|
||||||
|
|
||||||
|
|
||||||
|
class MessageEntry(TypedDict):
|
||||||
|
"""A single sanity check message with its severity level."""
|
||||||
|
|
||||||
|
level: int
|
||||||
|
message: str
|
||||||
|
|
||||||
|
|
||||||
|
def _identity(iterable: Iterable[_T]) -> Iterable[_T]:
|
||||||
|
"""Pass through an iterable unchanged (default iter_wrapper)."""
|
||||||
|
return iterable
|
||||||
|
|
||||||
|
|
||||||
class SanityCheckMessages:
|
class SanityCheckMessages:
|
||||||
def __init__(self) -> None:
|
"""Collects sanity check messages grouped by document primary key.
|
||||||
self._messages: dict[int, list[dict]] = defaultdict(list)
|
|
||||||
self.has_error = False
|
|
||||||
self.has_warning = False
|
|
||||||
|
|
||||||
def error(self, doc_pk, message) -> None:
|
Messages are categorized as error, warning, or info. ``None`` is used
|
||||||
|
as the key for messages not associated with a specific document
|
||||||
|
(e.g., orphaned files).
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self._messages: dict[int | None, list[MessageEntry]] = defaultdict(list)
|
||||||
|
self.has_error: bool = False
|
||||||
|
self.has_warning: bool = False
|
||||||
|
self.has_info: bool = False
|
||||||
|
self.document_count: int = 0
|
||||||
|
self.document_error_count: int = 0
|
||||||
|
self.document_warning_count: int = 0
|
||||||
|
self.document_info_count: int = 0
|
||||||
|
self.global_warning_count: int = 0
|
||||||
|
|
||||||
|
# -- Recording ----------------------------------------------------------
|
||||||
|
|
||||||
|
def error(self, doc_pk: int | None, message: str) -> None:
|
||||||
self._messages[doc_pk].append({"level": logging.ERROR, "message": message})
|
self._messages[doc_pk].append({"level": logging.ERROR, "message": message})
|
||||||
self.has_error = True
|
self.has_error = True
|
||||||
|
if doc_pk is not None:
|
||||||
|
self.document_count += 1
|
||||||
|
self.document_error_count += 1
|
||||||
|
|
||||||
def warning(self, doc_pk, message) -> None:
|
def warning(self, doc_pk: int | None, message: str) -> None:
|
||||||
self._messages[doc_pk].append({"level": logging.WARNING, "message": message})
|
self._messages[doc_pk].append({"level": logging.WARNING, "message": message})
|
||||||
self.has_warning = True
|
self.has_warning = True
|
||||||
|
|
||||||
def info(self, doc_pk, message) -> None:
|
if doc_pk is not None:
|
||||||
|
self.document_count += 1
|
||||||
|
self.document_warning_count += 1
|
||||||
|
else:
|
||||||
|
# This is the only type of global message we do right now
|
||||||
|
self.global_warning_count += 1
|
||||||
|
|
||||||
|
def info(self, doc_pk: int | None, message: str) -> None:
|
||||||
self._messages[doc_pk].append({"level": logging.INFO, "message": message})
|
self._messages[doc_pk].append({"level": logging.INFO, "message": message})
|
||||||
|
self.has_info = True
|
||||||
|
|
||||||
|
if doc_pk is not None:
|
||||||
|
self.document_count += 1
|
||||||
|
self.document_info_count += 1
|
||||||
|
|
||||||
|
# -- Iteration / query --------------------------------------------------
|
||||||
|
|
||||||
|
def document_pks(self) -> list[int | None]:
|
||||||
|
"""Return all document PKs (including None for global messages)."""
|
||||||
|
return list(self._messages.keys())
|
||||||
|
|
||||||
|
def iter_messages(self) -> Iterator[tuple[int | None, list[MessageEntry]]]:
|
||||||
|
"""Iterate over (doc_pk, messages) pairs."""
|
||||||
|
yield from self._messages.items()
|
||||||
|
|
||||||
|
def __getitem__(self, item: int | None) -> list[MessageEntry]:
|
||||||
|
return self._messages[item]
|
||||||
|
|
||||||
|
# -- Summarize Helpers --------------------------------------------------
|
||||||
|
|
||||||
|
@property
|
||||||
|
def has_global_issues(self) -> bool:
|
||||||
|
return None in self._messages
|
||||||
|
|
||||||
|
@property
|
||||||
|
def total_issue_count(self) -> int:
|
||||||
|
"""Total number of error and warning messages across all documents and global."""
|
||||||
|
return (
|
||||||
|
self.document_error_count
|
||||||
|
+ self.document_warning_count
|
||||||
|
+ self.global_warning_count
|
||||||
|
)
|
||||||
|
|
||||||
|
# -- Logging output (used by Celery task path) --------------------------
|
||||||
|
|
||||||
def log_messages(self) -> None:
|
def log_messages(self) -> None:
|
||||||
logger = logging.getLogger("paperless.sanity_checker")
|
"""Write all messages to the ``paperless.sanity_checker`` logger.
|
||||||
|
|
||||||
|
This is the output path for headless / Celery execution.
|
||||||
|
Management commands use Rich rendering instead.
|
||||||
|
"""
|
||||||
if len(self._messages) == 0:
|
if len(self._messages) == 0:
|
||||||
logger.info("Sanity checker detected no issues.")
|
logger.info("Sanity checker detected no issues.")
|
||||||
else:
|
return
|
||||||
# Query once
|
|
||||||
all_docs = Document.global_objects.all()
|
|
||||||
|
|
||||||
for doc_pk in self._messages:
|
doc_pks = [pk for pk in self._messages if pk is not None]
|
||||||
if doc_pk is not None:
|
titles: dict[int, str] = {}
|
||||||
doc = all_docs.get(pk=doc_pk)
|
if doc_pks:
|
||||||
logger.info(
|
titles = dict(
|
||||||
f"Detected following issue(s) with document #{doc.pk},"
|
Document.global_objects.filter(pk__in=doc_pks)
|
||||||
f" titled {doc.title}",
|
.only("pk", "title")
|
||||||
)
|
.values_list("pk", "title"),
|
||||||
for msg in self._messages[doc_pk]:
|
)
|
||||||
logger.log(msg["level"], msg["message"])
|
|
||||||
|
|
||||||
def __len__(self):
|
for doc_pk, entries in self._messages.items():
|
||||||
return len(self._messages)
|
if doc_pk is not None:
|
||||||
|
title = titles.get(doc_pk, "Unknown")
|
||||||
def __getitem__(self, item):
|
logger.info(
|
||||||
return self._messages[item]
|
"Detected following issue(s) with document #%s, titled %s",
|
||||||
|
doc_pk,
|
||||||
|
title,
|
||||||
|
)
|
||||||
|
for msg in entries:
|
||||||
|
logger.log(msg["level"], msg["message"])
|
||||||
|
|
||||||
|
|
||||||
class SanityCheckFailedException(Exception):
|
class SanityCheckFailedException(Exception):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
def check_sanity(*, progress=False, scheduled=True) -> SanityCheckMessages:
|
# ---------------------------------------------------------------------------
|
||||||
paperless_task = PaperlessTask.objects.create(
|
# Internal helpers
|
||||||
task_id=uuid.uuid4(),
|
# ---------------------------------------------------------------------------
|
||||||
type=PaperlessTask.TaskType.SCHEDULED_TASK
|
|
||||||
if scheduled
|
|
||||||
else PaperlessTask.TaskType.MANUAL_TASK,
|
|
||||||
task_name=PaperlessTask.TaskName.CHECK_SANITY,
|
|
||||||
status=states.STARTED,
|
|
||||||
date_created=timezone.now(),
|
|
||||||
date_started=timezone.now(),
|
|
||||||
)
|
|
||||||
messages = SanityCheckMessages()
|
|
||||||
|
|
||||||
|
|
||||||
|
def _build_present_files() -> set[Path]:
|
||||||
|
"""Collect all files in MEDIA_ROOT, excluding directories and ignorable files."""
|
||||||
present_files = {
|
present_files = {
|
||||||
x.resolve()
|
x.resolve()
|
||||||
for x in Path(settings.MEDIA_ROOT).glob("**/*")
|
for x in Path(settings.MEDIA_ROOT).glob("**/*")
|
||||||
@@ -82,95 +176,178 @@ def check_sanity(*, progress=False, scheduled=True) -> SanityCheckMessages:
|
|||||||
}
|
}
|
||||||
|
|
||||||
lockfile = Path(settings.MEDIA_LOCK).resolve()
|
lockfile = Path(settings.MEDIA_LOCK).resolve()
|
||||||
if lockfile in present_files:
|
present_files.discard(lockfile)
|
||||||
present_files.remove(lockfile)
|
|
||||||
|
|
||||||
general_config = GeneralConfig()
|
general_config = GeneralConfig()
|
||||||
app_logo = general_config.app_logo or settings.APP_LOGO
|
app_logo = general_config.app_logo or settings.APP_LOGO
|
||||||
if app_logo:
|
if app_logo:
|
||||||
logo_file = Path(settings.MEDIA_ROOT / Path(app_logo.lstrip("/"))).resolve()
|
logo_file = Path(settings.MEDIA_ROOT / Path(app_logo.lstrip("/"))).resolve()
|
||||||
if logo_file in present_files:
|
present_files.discard(logo_file)
|
||||||
present_files.remove(logo_file)
|
|
||||||
|
|
||||||
for doc in tqdm(Document.global_objects.all(), disable=not progress):
|
return present_files
|
||||||
# Check sanity of the thumbnail
|
|
||||||
thumbnail_path: Final[Path] = Path(doc.thumbnail_path).resolve()
|
|
||||||
if not thumbnail_path.exists() or not thumbnail_path.is_file():
|
|
||||||
messages.error(doc.pk, "Thumbnail of document does not exist.")
|
|
||||||
else:
|
|
||||||
if thumbnail_path in present_files:
|
|
||||||
present_files.remove(thumbnail_path)
|
|
||||||
try:
|
|
||||||
_ = thumbnail_path.read_bytes()
|
|
||||||
except OSError as e:
|
|
||||||
messages.error(doc.pk, f"Cannot read thumbnail file of document: {e}")
|
|
||||||
|
|
||||||
# Check sanity of the original file
|
|
||||||
# TODO: extract method
|
|
||||||
source_path: Final[Path] = Path(doc.source_path).resolve()
|
|
||||||
if not source_path.exists() or not source_path.is_file():
|
|
||||||
messages.error(doc.pk, "Original of document does not exist.")
|
|
||||||
else:
|
|
||||||
if source_path in present_files:
|
|
||||||
present_files.remove(source_path)
|
|
||||||
try:
|
|
||||||
checksum = hashlib.md5(source_path.read_bytes()).hexdigest()
|
|
||||||
except OSError as e:
|
|
||||||
messages.error(doc.pk, f"Cannot read original file of document: {e}")
|
|
||||||
else:
|
|
||||||
if checksum != doc.checksum:
|
|
||||||
messages.error(
|
|
||||||
doc.pk,
|
|
||||||
"Checksum mismatch. "
|
|
||||||
f"Stored: {doc.checksum}, actual: {checksum}.",
|
|
||||||
)
|
|
||||||
|
|
||||||
# Check sanity of the archive file.
|
def _check_thumbnail(
|
||||||
if doc.archive_checksum is not None and doc.archive_filename is None:
|
doc: Document,
|
||||||
|
messages: SanityCheckMessages,
|
||||||
|
present_files: set[Path],
|
||||||
|
) -> None:
|
||||||
|
"""Verify the thumbnail exists and is readable."""
|
||||||
|
thumbnail_path: Final[Path] = Path(doc.thumbnail_path).resolve()
|
||||||
|
if not thumbnail_path.exists() or not thumbnail_path.is_file():
|
||||||
|
messages.error(doc.pk, "Thumbnail of document does not exist.")
|
||||||
|
return
|
||||||
|
|
||||||
|
present_files.discard(thumbnail_path)
|
||||||
|
try:
|
||||||
|
_ = thumbnail_path.read_bytes()
|
||||||
|
except OSError as e:
|
||||||
|
messages.error(doc.pk, f"Cannot read thumbnail file of document: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
def _check_original(
|
||||||
|
doc: Document,
|
||||||
|
messages: SanityCheckMessages,
|
||||||
|
present_files: set[Path],
|
||||||
|
) -> None:
|
||||||
|
"""Verify the original file exists, is readable, and has matching checksum."""
|
||||||
|
source_path: Final[Path] = Path(doc.source_path).resolve()
|
||||||
|
if not source_path.exists() or not source_path.is_file():
|
||||||
|
messages.error(doc.pk, "Original of document does not exist.")
|
||||||
|
return
|
||||||
|
|
||||||
|
present_files.discard(source_path)
|
||||||
|
try:
|
||||||
|
checksum = hashlib.md5(source_path.read_bytes()).hexdigest()
|
||||||
|
except OSError as e:
|
||||||
|
messages.error(doc.pk, f"Cannot read original file of document: {e}")
|
||||||
|
else:
|
||||||
|
if checksum != doc.checksum:
|
||||||
messages.error(
|
messages.error(
|
||||||
doc.pk,
|
doc.pk,
|
||||||
"Document has an archive file checksum, but no archive filename.",
|
f"Checksum mismatch. Stored: {doc.checksum}, actual: {checksum}.",
|
||||||
)
|
)
|
||||||
elif doc.archive_checksum is None and doc.archive_filename is not None:
|
|
||||||
|
|
||||||
|
def _check_archive(
|
||||||
|
doc: Document,
|
||||||
|
messages: SanityCheckMessages,
|
||||||
|
present_files: set[Path],
|
||||||
|
) -> None:
|
||||||
|
"""Verify archive file consistency: checksum/filename pairing and file integrity."""
|
||||||
|
if doc.archive_checksum is not None and doc.archive_filename is None:
|
||||||
|
messages.error(
|
||||||
|
doc.pk,
|
||||||
|
"Document has an archive file checksum, but no archive filename.",
|
||||||
|
)
|
||||||
|
elif doc.archive_checksum is None and doc.archive_filename is not None:
|
||||||
|
messages.error(
|
||||||
|
doc.pk,
|
||||||
|
"Document has an archive file, but its checksum is missing.",
|
||||||
|
)
|
||||||
|
elif doc.has_archive_version:
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
assert isinstance(doc.archive_path, Path)
|
||||||
|
archive_path: Final[Path] = Path(doc.archive_path).resolve()
|
||||||
|
if not archive_path.exists() or not archive_path.is_file():
|
||||||
|
messages.error(doc.pk, "Archived version of document does not exist.")
|
||||||
|
return
|
||||||
|
|
||||||
|
present_files.discard(archive_path)
|
||||||
|
try:
|
||||||
|
checksum = hashlib.md5(archive_path.read_bytes()).hexdigest()
|
||||||
|
except OSError as e:
|
||||||
messages.error(
|
messages.error(
|
||||||
doc.pk,
|
doc.pk,
|
||||||
"Document has an archive file, but its checksum is missing.",
|
f"Cannot read archive file of document: {e}",
|
||||||
)
|
)
|
||||||
elif doc.has_archive_version:
|
else:
|
||||||
archive_path: Final[Path] = Path(doc.archive_path).resolve()
|
if checksum != doc.archive_checksum:
|
||||||
if not archive_path.exists() or not archive_path.is_file():
|
messages.error(
|
||||||
messages.error(doc.pk, "Archived version of document does not exist.")
|
doc.pk,
|
||||||
else:
|
"Checksum mismatch of archived document. "
|
||||||
if archive_path in present_files:
|
f"Stored: {doc.archive_checksum}, actual: {checksum}.",
|
||||||
present_files.remove(archive_path)
|
)
|
||||||
try:
|
|
||||||
checksum = hashlib.md5(archive_path.read_bytes()).hexdigest()
|
|
||||||
except OSError as e:
|
|
||||||
messages.error(
|
|
||||||
doc.pk,
|
|
||||||
f"Cannot read archive file of document : {e}",
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
if checksum != doc.archive_checksum:
|
|
||||||
messages.error(
|
|
||||||
doc.pk,
|
|
||||||
"Checksum mismatch of archived document. "
|
|
||||||
f"Stored: {doc.archive_checksum}, "
|
|
||||||
f"actual: {checksum}.",
|
|
||||||
)
|
|
||||||
|
|
||||||
# other document checks
|
|
||||||
if not doc.content:
|
def _check_content(doc: Document, messages: SanityCheckMessages) -> None:
|
||||||
messages.info(doc.pk, "Document contains no OCR data")
|
"""Flag documents with no OCR content."""
|
||||||
|
if not doc.content:
|
||||||
|
messages.info(doc.pk, "Document contains no OCR data")
|
||||||
|
|
||||||
|
|
||||||
|
def _check_document(
|
||||||
|
doc: Document,
|
||||||
|
messages: SanityCheckMessages,
|
||||||
|
present_files: set[Path],
|
||||||
|
) -> None:
|
||||||
|
"""Run all checks for a single document."""
|
||||||
|
_check_thumbnail(doc, messages, present_files)
|
||||||
|
_check_original(doc, messages, present_files)
|
||||||
|
_check_archive(doc, messages, present_files)
|
||||||
|
_check_content(doc, messages)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Public entry point
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def check_sanity(
|
||||||
|
*,
|
||||||
|
scheduled: bool = True,
|
||||||
|
iter_wrapper: IterWrapper[Document] = _identity,
|
||||||
|
) -> SanityCheckMessages:
|
||||||
|
"""Run a full sanity check on the document archive.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
scheduled: Whether this is a scheduled (automatic) or manual check.
|
||||||
|
Controls the task type recorded in the database.
|
||||||
|
iter_wrapper: A callable that wraps the document iterable, e.g.,
|
||||||
|
for progress bar display. Defaults to identity (no wrapping).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A SanityCheckMessages instance containing all detected issues.
|
||||||
|
"""
|
||||||
|
paperless_task = PaperlessTask.objects.create(
|
||||||
|
task_id=uuid.uuid4(),
|
||||||
|
type=(
|
||||||
|
PaperlessTask.TaskType.SCHEDULED_TASK
|
||||||
|
if scheduled
|
||||||
|
else PaperlessTask.TaskType.MANUAL_TASK
|
||||||
|
),
|
||||||
|
task_name=PaperlessTask.TaskName.CHECK_SANITY,
|
||||||
|
status=states.STARTED,
|
||||||
|
date_created=timezone.now(),
|
||||||
|
date_started=timezone.now(),
|
||||||
|
)
|
||||||
|
|
||||||
|
messages = SanityCheckMessages()
|
||||||
|
present_files = _build_present_files()
|
||||||
|
|
||||||
|
documents = Document.global_objects.all()
|
||||||
|
for doc in iter_wrapper(documents):
|
||||||
|
_check_document(doc, messages, present_files)
|
||||||
|
|
||||||
for extra_file in present_files:
|
for extra_file in present_files:
|
||||||
messages.warning(None, f"Orphaned file in media dir: {extra_file}")
|
messages.warning(None, f"Orphaned file in media dir: {extra_file}")
|
||||||
|
|
||||||
paperless_task.status = states.SUCCESS if not messages.has_error else states.FAILURE
|
paperless_task.status = states.SUCCESS if not messages.has_error else states.FAILURE
|
||||||
# result is concatenated messages
|
if messages.total_issue_count == 0:
|
||||||
paperless_task.result = f"{len(messages)} issues found."
|
paperless_task.result = "No issues found."
|
||||||
if messages.has_error:
|
else:
|
||||||
paperless_task.result += " Check logs for details."
|
parts: list[str] = []
|
||||||
|
if messages.document_error_count:
|
||||||
|
parts.append(f"{messages.document_error_count} document(s) with errors")
|
||||||
|
if messages.document_warning_count:
|
||||||
|
parts.append(f"{messages.document_warning_count} document(s) with warnings")
|
||||||
|
if messages.global_warning_count:
|
||||||
|
parts.append(f"{messages.global_warning_count} global warning(s)")
|
||||||
|
paperless_task.result = ", ".join(parts) + " found."
|
||||||
|
if messages.has_error:
|
||||||
|
paperless_task.result += " Check logs for details."
|
||||||
|
|
||||||
paperless_task.date_done = timezone.now()
|
paperless_task.date_done = timezone.now()
|
||||||
paperless_task.save(update_fields=["status", "result", "date_done"])
|
paperless_task.save(update_fields=["status", "result", "date_done"])
|
||||||
|
|
||||||
return messages
|
return messages
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ import logging
|
|||||||
import shutil
|
import shutil
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
from celery import shared_task
|
from celery import shared_task
|
||||||
from celery import states
|
from celery import states
|
||||||
@@ -32,12 +33,14 @@ from documents.file_handling import create_source_path_directory
|
|||||||
from documents.file_handling import delete_empty_directories
|
from documents.file_handling import delete_empty_directories
|
||||||
from documents.file_handling import generate_filename
|
from documents.file_handling import generate_filename
|
||||||
from documents.file_handling import generate_unique_filename
|
from documents.file_handling import generate_unique_filename
|
||||||
|
from documents.models import Correspondent
|
||||||
from documents.models import CustomField
|
from documents.models import CustomField
|
||||||
from documents.models import CustomFieldInstance
|
from documents.models import CustomFieldInstance
|
||||||
from documents.models import Document
|
from documents.models import Document
|
||||||
from documents.models import MatchingModel
|
from documents.models import DocumentType
|
||||||
from documents.models import PaperlessTask
|
from documents.models import PaperlessTask
|
||||||
from documents.models import SavedView
|
from documents.models import SavedView
|
||||||
|
from documents.models import StoragePath
|
||||||
from documents.models import Tag
|
from documents.models import Tag
|
||||||
from documents.models import UiSettings
|
from documents.models import UiSettings
|
||||||
from documents.models import Workflow
|
from documents.models import Workflow
|
||||||
@@ -81,47 +84,41 @@ def add_inbox_tags(sender, document: Document, logging_group=None, **kwargs) ->
|
|||||||
document.add_nested_tags(inbox_tags)
|
document.add_nested_tags(inbox_tags)
|
||||||
|
|
||||||
|
|
||||||
def _suggestion_printer(
|
|
||||||
stdout,
|
|
||||||
style_func,
|
|
||||||
suggestion_type: str,
|
|
||||||
document: Document,
|
|
||||||
selected: MatchingModel,
|
|
||||||
base_url: str | None = None,
|
|
||||||
) -> None:
|
|
||||||
"""
|
|
||||||
Smaller helper to reduce duplication when just outputting suggestions to the console
|
|
||||||
"""
|
|
||||||
doc_str = str(document)
|
|
||||||
if base_url is not None:
|
|
||||||
stdout.write(style_func.SUCCESS(doc_str))
|
|
||||||
stdout.write(style_func.SUCCESS(f"{base_url}/documents/{document.pk}"))
|
|
||||||
else:
|
|
||||||
stdout.write(style_func.SUCCESS(f"{doc_str} [{document.pk}]"))
|
|
||||||
stdout.write(f"Suggest {suggestion_type}: {selected}")
|
|
||||||
|
|
||||||
|
|
||||||
def set_correspondent(
|
def set_correspondent(
|
||||||
sender,
|
sender: object,
|
||||||
document: Document,
|
document: Document,
|
||||||
*,
|
*,
|
||||||
logging_group=None,
|
logging_group: object = None,
|
||||||
classifier: DocumentClassifier | None = None,
|
classifier: DocumentClassifier | None = None,
|
||||||
replace=False,
|
replace: bool = False,
|
||||||
use_first=True,
|
use_first: bool = True,
|
||||||
suggest=False,
|
dry_run: bool = False,
|
||||||
base_url=None,
|
**kwargs: Any,
|
||||||
stdout=None,
|
) -> Correspondent | None:
|
||||||
style_func=None,
|
"""
|
||||||
**kwargs,
|
Assign a correspondent to a document based on classifier results.
|
||||||
) -> None:
|
|
||||||
|
Args:
|
||||||
|
document: The document to classify.
|
||||||
|
logging_group: Optional logging group for structured log output.
|
||||||
|
classifier: The trained classifier. If None, only rule-based matching runs.
|
||||||
|
replace: If True, overwrite an existing correspondent assignment.
|
||||||
|
use_first: If True, pick the first match when multiple correspondents
|
||||||
|
match. If False, skip assignment when multiple match.
|
||||||
|
dry_run: If True, compute and return the selection without saving.
|
||||||
|
**kwargs: Absorbed for Django signal compatibility (e.g. sender, signal).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The correspondent that was (or would be) assigned, or None if no match
|
||||||
|
was found or assignment was skipped.
|
||||||
|
"""
|
||||||
if document.correspondent and not replace:
|
if document.correspondent and not replace:
|
||||||
return
|
return None
|
||||||
|
|
||||||
potential_correspondents = matching.match_correspondents(document, classifier)
|
potential_correspondents = matching.match_correspondents(document, classifier)
|
||||||
|
|
||||||
potential_count = len(potential_correspondents)
|
potential_count = len(potential_correspondents)
|
||||||
selected = potential_correspondents[0] if potential_correspondents else None
|
selected = potential_correspondents[0] if potential_correspondents else None
|
||||||
|
|
||||||
if potential_count > 1:
|
if potential_count > 1:
|
||||||
if use_first:
|
if use_first:
|
||||||
logger.debug(
|
logger.debug(
|
||||||
@@ -135,49 +132,53 @@ def set_correspondent(
|
|||||||
f"not assigning any correspondent",
|
f"not assigning any correspondent",
|
||||||
extra={"group": logging_group},
|
extra={"group": logging_group},
|
||||||
)
|
)
|
||||||
return
|
return None
|
||||||
|
|
||||||
if selected or replace:
|
if (selected or replace) and not dry_run:
|
||||||
if suggest:
|
logger.info(
|
||||||
_suggestion_printer(
|
f"Assigning correspondent {selected} to {document}",
|
||||||
stdout,
|
extra={"group": logging_group},
|
||||||
style_func,
|
)
|
||||||
"correspondent",
|
document.correspondent = selected
|
||||||
document,
|
document.save(update_fields=("correspondent",))
|
||||||
selected,
|
|
||||||
base_url,
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
logger.info(
|
|
||||||
f"Assigning correspondent {selected} to {document}",
|
|
||||||
extra={"group": logging_group},
|
|
||||||
)
|
|
||||||
|
|
||||||
document.correspondent = selected
|
return selected
|
||||||
document.save(update_fields=("correspondent",))
|
|
||||||
|
|
||||||
|
|
||||||
def set_document_type(
|
def set_document_type(
|
||||||
sender,
|
sender: object,
|
||||||
document: Document,
|
document: Document,
|
||||||
*,
|
*,
|
||||||
logging_group=None,
|
logging_group: object = None,
|
||||||
classifier: DocumentClassifier | None = None,
|
classifier: DocumentClassifier | None = None,
|
||||||
replace=False,
|
replace: bool = False,
|
||||||
use_first=True,
|
use_first: bool = True,
|
||||||
suggest=False,
|
dry_run: bool = False,
|
||||||
base_url=None,
|
**kwargs: Any,
|
||||||
stdout=None,
|
) -> DocumentType | None:
|
||||||
style_func=None,
|
"""
|
||||||
**kwargs,
|
Assign a document type to a document based on classifier results.
|
||||||
) -> None:
|
|
||||||
|
Args:
|
||||||
|
document: The document to classify.
|
||||||
|
logging_group: Optional logging group for structured log output.
|
||||||
|
classifier: The trained classifier. If None, only rule-based matching runs.
|
||||||
|
replace: If True, overwrite an existing document type assignment.
|
||||||
|
use_first: If True, pick the first match when multiple types match.
|
||||||
|
If False, skip assignment when multiple match.
|
||||||
|
dry_run: If True, compute and return the selection without saving.
|
||||||
|
**kwargs: Absorbed for Django signal compatibility (e.g. sender, signal).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The document type that was (or would be) assigned, or None if no match
|
||||||
|
was found or assignment was skipped.
|
||||||
|
"""
|
||||||
if document.document_type and not replace:
|
if document.document_type and not replace:
|
||||||
return
|
return None
|
||||||
|
|
||||||
potential_document_type = matching.match_document_types(document, classifier)
|
potential_document_types = matching.match_document_types(document, classifier)
|
||||||
|
potential_count = len(potential_document_types)
|
||||||
potential_count = len(potential_document_type)
|
selected = potential_document_types[0] if potential_document_types else None
|
||||||
selected = potential_document_type[0] if potential_document_type else None
|
|
||||||
|
|
||||||
if potential_count > 1:
|
if potential_count > 1:
|
||||||
if use_first:
|
if use_first:
|
||||||
@@ -192,42 +193,64 @@ def set_document_type(
|
|||||||
f"not assigning any document type",
|
f"not assigning any document type",
|
||||||
extra={"group": logging_group},
|
extra={"group": logging_group},
|
||||||
)
|
)
|
||||||
return
|
return None
|
||||||
|
|
||||||
if selected or replace:
|
if (selected or replace) and not dry_run:
|
||||||
if suggest:
|
logger.info(
|
||||||
_suggestion_printer(
|
f"Assigning document type {selected} to {document}",
|
||||||
stdout,
|
extra={"group": logging_group},
|
||||||
style_func,
|
)
|
||||||
"document type",
|
document.document_type = selected
|
||||||
document,
|
document.save(update_fields=("document_type",))
|
||||||
selected,
|
|
||||||
base_url,
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
logger.info(
|
|
||||||
f"Assigning document type {selected} to {document}",
|
|
||||||
extra={"group": logging_group},
|
|
||||||
)
|
|
||||||
|
|
||||||
document.document_type = selected
|
return selected
|
||||||
document.save(update_fields=("document_type",))
|
|
||||||
|
|
||||||
|
|
||||||
def set_tags(
|
def set_tags(
|
||||||
sender,
|
sender: object,
|
||||||
document: Document,
|
document: Document,
|
||||||
*,
|
*,
|
||||||
logging_group=None,
|
logging_group: object = None,
|
||||||
classifier: DocumentClassifier | None = None,
|
classifier: DocumentClassifier | None = None,
|
||||||
replace=False,
|
replace: bool = False,
|
||||||
suggest=False,
|
dry_run: bool = False,
|
||||||
base_url=None,
|
**kwargs: Any,
|
||||||
stdout=None,
|
) -> tuple[set[Tag], set[Tag]]:
|
||||||
style_func=None,
|
"""
|
||||||
**kwargs,
|
Assign tags to a document based on classifier results.
|
||||||
) -> None:
|
|
||||||
|
When replace=True, existing auto-matched and rule-matched tags are removed
|
||||||
|
before applying the new set (inbox tags and manually-added tags are preserved).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
document: The document to classify.
|
||||||
|
logging_group: Optional logging group for structured log output.
|
||||||
|
classifier: The trained classifier. If None, only rule-based matching runs.
|
||||||
|
replace: If True, remove existing classifier-managed tags before applying
|
||||||
|
new ones. Inbox tags and manually-added tags are always preserved.
|
||||||
|
dry_run: If True, compute what would change without saving anything.
|
||||||
|
**kwargs: Absorbed for Django signal compatibility (e.g. sender, signal).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A two-tuple of (tags_added, tags_removed). In non-replace mode,
|
||||||
|
tags_removed is always an empty set. In dry_run mode, neither set
|
||||||
|
is applied to the database.
|
||||||
|
"""
|
||||||
|
# Compute which tags would be removed under replace mode.
|
||||||
|
# The filter mirrors the .delete() call below: keep inbox tags and
|
||||||
|
# manually-added tags (match="" and not auto-matched).
|
||||||
if replace:
|
if replace:
|
||||||
|
tags_to_remove: set[Tag] = set(
|
||||||
|
document.tags.exclude(
|
||||||
|
is_inbox_tag=True,
|
||||||
|
).exclude(
|
||||||
|
Q(match="") & ~Q(matching_algorithm=Tag.MATCH_AUTO),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
tags_to_remove = set()
|
||||||
|
|
||||||
|
if replace and not dry_run:
|
||||||
Document.tags.through.objects.filter(document=document).exclude(
|
Document.tags.through.objects.filter(document=document).exclude(
|
||||||
Q(tag__is_inbox_tag=True),
|
Q(tag__is_inbox_tag=True),
|
||||||
).exclude(
|
).exclude(
|
||||||
@@ -235,65 +258,53 @@ def set_tags(
|
|||||||
).delete()
|
).delete()
|
||||||
|
|
||||||
current_tags = set(document.tags.all())
|
current_tags = set(document.tags.all())
|
||||||
|
|
||||||
matched_tags = matching.match_tags(document, classifier)
|
matched_tags = matching.match_tags(document, classifier)
|
||||||
|
tags_to_add = set(matched_tags) - current_tags
|
||||||
|
|
||||||
relevant_tags = set(matched_tags) - current_tags
|
if tags_to_add and not dry_run:
|
||||||
|
|
||||||
if suggest:
|
|
||||||
extra_tags = current_tags - set(matched_tags)
|
|
||||||
extra_tags = [
|
|
||||||
t for t in extra_tags if t.matching_algorithm == MatchingModel.MATCH_AUTO
|
|
||||||
]
|
|
||||||
if not relevant_tags and not extra_tags:
|
|
||||||
return
|
|
||||||
doc_str = style_func.SUCCESS(str(document))
|
|
||||||
if base_url:
|
|
||||||
stdout.write(doc_str)
|
|
||||||
stdout.write(f"{base_url}/documents/{document.pk}")
|
|
||||||
else:
|
|
||||||
stdout.write(doc_str + style_func.SUCCESS(f" [{document.pk}]"))
|
|
||||||
if relevant_tags:
|
|
||||||
stdout.write("Suggest tags: " + ", ".join([t.name for t in relevant_tags]))
|
|
||||||
if extra_tags:
|
|
||||||
stdout.write("Extra tags: " + ", ".join([t.name for t in extra_tags]))
|
|
||||||
else:
|
|
||||||
if not relevant_tags:
|
|
||||||
return
|
|
||||||
|
|
||||||
message = 'Tagging "{}" with "{}"'
|
|
||||||
logger.info(
|
logger.info(
|
||||||
message.format(document, ", ".join([t.name for t in relevant_tags])),
|
f'Tagging "{document}" with "{", ".join(t.name for t in tags_to_add)}"',
|
||||||
extra={"group": logging_group},
|
extra={"group": logging_group},
|
||||||
)
|
)
|
||||||
|
document.add_nested_tags(tags_to_add)
|
||||||
|
|
||||||
document.add_nested_tags(relevant_tags)
|
return tags_to_add, tags_to_remove
|
||||||
|
|
||||||
|
|
||||||
def set_storage_path(
|
def set_storage_path(
|
||||||
sender,
|
sender: object,
|
||||||
document: Document,
|
document: Document,
|
||||||
*,
|
*,
|
||||||
logging_group=None,
|
logging_group: object = None,
|
||||||
classifier: DocumentClassifier | None = None,
|
classifier: DocumentClassifier | None = None,
|
||||||
replace=False,
|
replace: bool = False,
|
||||||
use_first=True,
|
use_first: bool = True,
|
||||||
suggest=False,
|
dry_run: bool = False,
|
||||||
base_url=None,
|
**kwargs: Any,
|
||||||
stdout=None,
|
) -> StoragePath | None:
|
||||||
style_func=None,
|
"""
|
||||||
**kwargs,
|
Assign a storage path to a document based on classifier results.
|
||||||
) -> None:
|
|
||||||
|
Args:
|
||||||
|
document: The document to classify.
|
||||||
|
logging_group: Optional logging group for structured log output.
|
||||||
|
classifier: The trained classifier. If None, only rule-based matching runs.
|
||||||
|
replace: If True, overwrite an existing storage path assignment.
|
||||||
|
use_first: If True, pick the first match when multiple paths match.
|
||||||
|
If False, skip assignment when multiple match.
|
||||||
|
dry_run: If True, compute and return the selection without saving.
|
||||||
|
**kwargs: Absorbed for Django signal compatibility (e.g. sender, signal).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The storage path that was (or would be) assigned, or None if no match
|
||||||
|
was found or assignment was skipped.
|
||||||
|
"""
|
||||||
if document.storage_path and not replace:
|
if document.storage_path and not replace:
|
||||||
return
|
return None
|
||||||
|
|
||||||
potential_storage_path = matching.match_storage_paths(
|
potential_storage_paths = matching.match_storage_paths(document, classifier)
|
||||||
document,
|
potential_count = len(potential_storage_paths)
|
||||||
classifier,
|
selected = potential_storage_paths[0] if potential_storage_paths else None
|
||||||
)
|
|
||||||
|
|
||||||
potential_count = len(potential_storage_path)
|
|
||||||
selected = potential_storage_path[0] if potential_storage_path else None
|
|
||||||
|
|
||||||
if potential_count > 1:
|
if potential_count > 1:
|
||||||
if use_first:
|
if use_first:
|
||||||
@@ -308,26 +319,17 @@ def set_storage_path(
|
|||||||
f"not assigning any storage directory",
|
f"not assigning any storage directory",
|
||||||
extra={"group": logging_group},
|
extra={"group": logging_group},
|
||||||
)
|
)
|
||||||
return
|
return None
|
||||||
|
|
||||||
if selected or replace:
|
if (selected or replace) and not dry_run:
|
||||||
if suggest:
|
logger.info(
|
||||||
_suggestion_printer(
|
f"Assigning storage path {selected} to {document}",
|
||||||
stdout,
|
extra={"group": logging_group},
|
||||||
style_func,
|
)
|
||||||
"storage directory",
|
document.storage_path = selected
|
||||||
document,
|
document.save(update_fields=("storage_path",))
|
||||||
selected,
|
|
||||||
base_url,
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
logger.info(
|
|
||||||
f"Assigning storage path {selected} to {document}",
|
|
||||||
extra={"group": logging_group},
|
|
||||||
)
|
|
||||||
|
|
||||||
document.storage_path = selected
|
return selected
|
||||||
document.save(update_fields=("storage_path",))
|
|
||||||
|
|
||||||
|
|
||||||
# see empty_trash in documents/tasks.py for signal handling
|
# see empty_trash in documents/tasks.py for signal handling
|
||||||
|
|||||||
@@ -4,11 +4,13 @@ import logging
|
|||||||
import shutil
|
import shutil
|
||||||
import uuid
|
import uuid
|
||||||
import zipfile
|
import zipfile
|
||||||
|
from collections.abc import Callable
|
||||||
|
from collections.abc import Iterable
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from tempfile import TemporaryDirectory
|
from tempfile import TemporaryDirectory
|
||||||
from tempfile import mkstemp
|
from tempfile import mkstemp
|
||||||
|
from typing import TypeVar
|
||||||
|
|
||||||
import tqdm
|
|
||||||
from celery import Task
|
from celery import Task
|
||||||
from celery import shared_task
|
from celery import shared_task
|
||||||
from celery import states
|
from celery import states
|
||||||
@@ -66,11 +68,19 @@ from paperless_ai.indexing import llm_index_add_or_update_document
|
|||||||
from paperless_ai.indexing import llm_index_remove_document
|
from paperless_ai.indexing import llm_index_remove_document
|
||||||
from paperless_ai.indexing import update_llm_index
|
from paperless_ai.indexing import update_llm_index
|
||||||
|
|
||||||
|
_T = TypeVar("_T")
|
||||||
|
IterWrapper = Callable[[Iterable[_T]], Iterable[_T]]
|
||||||
|
|
||||||
|
|
||||||
if settings.AUDIT_LOG_ENABLED:
|
if settings.AUDIT_LOG_ENABLED:
|
||||||
from auditlog.models import LogEntry
|
from auditlog.models import LogEntry
|
||||||
logger = logging.getLogger("paperless.tasks")
|
logger = logging.getLogger("paperless.tasks")
|
||||||
|
|
||||||
|
|
||||||
|
def _identity(iterable: Iterable[_T]) -> Iterable[_T]:
|
||||||
|
return iterable
|
||||||
|
|
||||||
|
|
||||||
@shared_task
|
@shared_task
|
||||||
def index_optimize() -> None:
|
def index_optimize() -> None:
|
||||||
ix = index.open_index()
|
ix = index.open_index()
|
||||||
@@ -78,13 +88,13 @@ def index_optimize() -> None:
|
|||||||
writer.commit(optimize=True)
|
writer.commit(optimize=True)
|
||||||
|
|
||||||
|
|
||||||
def index_reindex(*, progress_bar_disable=False) -> None:
|
def index_reindex(*, iter_wrapper: IterWrapper[Document] = _identity) -> None:
|
||||||
documents = Document.objects.all()
|
documents = Document.objects.all()
|
||||||
|
|
||||||
ix = index.open_index(recreate=True)
|
ix = index.open_index(recreate=True)
|
||||||
|
|
||||||
with AsyncWriter(ix) as writer:
|
with AsyncWriter(ix) as writer:
|
||||||
for document in tqdm.tqdm(documents, disable=progress_bar_disable):
|
for document in iter_wrapper(documents):
|
||||||
index.update_document(writer, document)
|
index.update_document(writer, document)
|
||||||
|
|
||||||
|
|
||||||
@@ -227,20 +237,30 @@ def consume_file(
|
|||||||
@shared_task
|
@shared_task
|
||||||
def sanity_check(*, scheduled=True, raise_on_error=True):
|
def sanity_check(*, scheduled=True, raise_on_error=True):
|
||||||
messages = sanity_checker.check_sanity(scheduled=scheduled)
|
messages = sanity_checker.check_sanity(scheduled=scheduled)
|
||||||
|
|
||||||
messages.log_messages()
|
messages.log_messages()
|
||||||
|
|
||||||
|
if not messages.has_error and not messages.has_warning and not messages.has_info:
|
||||||
|
return "No issues detected."
|
||||||
|
|
||||||
|
parts: list[str] = []
|
||||||
|
if messages.document_error_count:
|
||||||
|
parts.append(f"{messages.document_error_count} document(s) with errors")
|
||||||
|
if messages.document_warning_count:
|
||||||
|
parts.append(f"{messages.document_warning_count} document(s) with warnings")
|
||||||
|
if messages.document_info_count:
|
||||||
|
parts.append(f"{messages.document_info_count} document(s) with infos")
|
||||||
|
if messages.global_warning_count:
|
||||||
|
parts.append(f"{messages.global_warning_count} global warning(s)")
|
||||||
|
|
||||||
|
summary = ", ".join(parts) + " found."
|
||||||
|
|
||||||
if messages.has_error:
|
if messages.has_error:
|
||||||
message = "Sanity check exited with errors. See log."
|
message = summary + " Check logs for details."
|
||||||
if raise_on_error:
|
if raise_on_error:
|
||||||
raise SanityCheckFailedException(message)
|
raise SanityCheckFailedException(message)
|
||||||
return message
|
return message
|
||||||
elif messages.has_warning:
|
|
||||||
return "Sanity check exited with warnings. See log."
|
return summary
|
||||||
elif len(messages) > 0:
|
|
||||||
return "Sanity check exited with infos. See log."
|
|
||||||
else:
|
|
||||||
return "No issues detected."
|
|
||||||
|
|
||||||
|
|
||||||
@shared_task
|
@shared_task
|
||||||
@@ -265,7 +285,6 @@ def bulk_update_documents(document_ids) -> None:
|
|||||||
ai_config = AIConfig()
|
ai_config = AIConfig()
|
||||||
if ai_config.llm_index_enabled:
|
if ai_config.llm_index_enabled:
|
||||||
update_llm_index(
|
update_llm_index(
|
||||||
progress_bar_disable=True,
|
|
||||||
rebuild=False,
|
rebuild=False,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -606,7 +625,7 @@ def update_document_parent_tags(tag: Tag, new_parent: Tag) -> None:
|
|||||||
@shared_task
|
@shared_task
|
||||||
def llmindex_index(
|
def llmindex_index(
|
||||||
*,
|
*,
|
||||||
progress_bar_disable=True,
|
iter_wrapper: IterWrapper[Document] = _identity,
|
||||||
rebuild=False,
|
rebuild=False,
|
||||||
scheduled=True,
|
scheduled=True,
|
||||||
auto=False,
|
auto=False,
|
||||||
@@ -629,7 +648,7 @@ def llmindex_index(
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
result = update_llm_index(
|
result = update_llm_index(
|
||||||
progress_bar_disable=progress_bar_disable,
|
iter_wrapper=iter_wrapper,
|
||||||
rebuild=rebuild,
|
rebuild=rebuild,
|
||||||
)
|
)
|
||||||
task.status = states.SUCCESS
|
task.status = states.SUCCESS
|
||||||
|
|||||||
@@ -1,10 +1,96 @@
|
|||||||
|
import shutil
|
||||||
import zoneinfo
|
import zoneinfo
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
|
import filelock
|
||||||
import pytest
|
import pytest
|
||||||
from django.contrib.auth import get_user_model
|
from django.contrib.auth import get_user_model
|
||||||
from pytest_django.fixtures import SettingsWrapper
|
from pytest_django.fixtures import SettingsWrapper
|
||||||
from rest_framework.test import APIClient
|
from rest_framework.test import APIClient
|
||||||
|
|
||||||
|
from documents.tests.factories import DocumentFactory
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from documents.models import Document
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True, slots=True)
|
||||||
|
class PaperlessDirs:
|
||||||
|
"""Standard Paperless-ngx directory layout for tests."""
|
||||||
|
|
||||||
|
media: Path
|
||||||
|
originals: Path
|
||||||
|
archive: Path
|
||||||
|
thumbnails: Path
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session")
|
||||||
|
def samples_dir() -> Path:
|
||||||
|
"""Path to the shared test sample documents."""
|
||||||
|
return Path(__file__).parent / "samples" / "documents"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture()
|
||||||
|
def paperless_dirs(tmp_path: Path) -> PaperlessDirs:
|
||||||
|
"""Create and return the directory structure for testing."""
|
||||||
|
media = tmp_path / "media"
|
||||||
|
dirs = PaperlessDirs(
|
||||||
|
media=media,
|
||||||
|
originals=media / "documents" / "originals",
|
||||||
|
archive=media / "documents" / "archive",
|
||||||
|
thumbnails=media / "documents" / "thumbnails",
|
||||||
|
)
|
||||||
|
for d in (dirs.originals, dirs.archive, dirs.thumbnails):
|
||||||
|
d.mkdir(parents=True)
|
||||||
|
return dirs
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture()
|
||||||
|
def _media_settings(paperless_dirs: PaperlessDirs, settings) -> None:
|
||||||
|
"""Configure Django settings to point at temp directories."""
|
||||||
|
settings.MEDIA_ROOT = paperless_dirs.media
|
||||||
|
settings.ORIGINALS_DIR = paperless_dirs.originals
|
||||||
|
settings.ARCHIVE_DIR = paperless_dirs.archive
|
||||||
|
settings.THUMBNAIL_DIR = paperless_dirs.thumbnails
|
||||||
|
settings.MEDIA_LOCK = paperless_dirs.media / "media.lock"
|
||||||
|
settings.IGNORABLE_FILES = {".DS_Store", "Thumbs.db", "desktop.ini"}
|
||||||
|
settings.APP_LOGO = ""
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture()
|
||||||
|
def sample_doc(
|
||||||
|
paperless_dirs: PaperlessDirs,
|
||||||
|
_media_settings: None,
|
||||||
|
samples_dir: Path,
|
||||||
|
) -> "Document":
|
||||||
|
"""Create a document with valid files and matching checksums."""
|
||||||
|
with filelock.FileLock(paperless_dirs.media / "media.lock"):
|
||||||
|
shutil.copy(
|
||||||
|
samples_dir / "originals" / "0000001.pdf",
|
||||||
|
paperless_dirs.originals / "0000001.pdf",
|
||||||
|
)
|
||||||
|
shutil.copy(
|
||||||
|
samples_dir / "archive" / "0000001.pdf",
|
||||||
|
paperless_dirs.archive / "0000001.pdf",
|
||||||
|
)
|
||||||
|
shutil.copy(
|
||||||
|
samples_dir / "thumbnails" / "0000001.webp",
|
||||||
|
paperless_dirs.thumbnails / "0000001.webp",
|
||||||
|
)
|
||||||
|
|
||||||
|
return DocumentFactory(
|
||||||
|
title="test",
|
||||||
|
checksum="42995833e01aea9b3edee44bbfdd7ce1",
|
||||||
|
archive_checksum="62acb0bcbfbcaa62ca6ad3668e4e404b",
|
||||||
|
content="test content",
|
||||||
|
pk=1,
|
||||||
|
filename="0000001.pdf",
|
||||||
|
mime_type="application/pdf",
|
||||||
|
archive_filename="0000001.pdf",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture()
|
@pytest.fixture()
|
||||||
def settings_timezone(settings: SettingsWrapper) -> zoneinfo.ZoneInfo:
|
def settings_timezone(settings: SettingsWrapper) -> zoneinfo.ZoneInfo:
|
||||||
@@ -28,3 +114,14 @@ def authenticated_rest_api_client(rest_api_client: APIClient):
|
|||||||
user = UserModel.objects.create_user(username="testuser", password="password")
|
user = UserModel.objects.create_user(username="testuser", password="password")
|
||||||
rest_api_client.force_authenticate(user=user)
|
rest_api_client.force_authenticate(user=user)
|
||||||
yield rest_api_client
|
yield rest_api_client
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session", autouse=True)
|
||||||
|
def faker_session_locale():
|
||||||
|
"""Set Faker locale for reproducibility."""
|
||||||
|
return "en_US"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session", autouse=True)
|
||||||
|
def faker_seed():
|
||||||
|
return 12345
|
||||||
|
|||||||
@@ -24,7 +24,7 @@ def base_config() -> DateParserConfig:
|
|||||||
12,
|
12,
|
||||||
0,
|
0,
|
||||||
0,
|
0,
|
||||||
tzinfo=datetime.timezone.utc,
|
tzinfo=datetime.UTC,
|
||||||
),
|
),
|
||||||
filename_date_order="YMD",
|
filename_date_order="YMD",
|
||||||
content_date_order="DMY",
|
content_date_order="DMY",
|
||||||
@@ -45,7 +45,7 @@ def config_with_ignore_dates() -> DateParserConfig:
|
|||||||
12,
|
12,
|
||||||
0,
|
0,
|
||||||
0,
|
0,
|
||||||
tzinfo=datetime.timezone.utc,
|
tzinfo=datetime.UTC,
|
||||||
),
|
),
|
||||||
filename_date_order="DMY",
|
filename_date_order="DMY",
|
||||||
content_date_order="MDY",
|
content_date_order="MDY",
|
||||||
|
|||||||
@@ -101,50 +101,50 @@ class TestFilterDate:
|
|||||||
[
|
[
|
||||||
# Valid Dates
|
# Valid Dates
|
||||||
pytest.param(
|
pytest.param(
|
||||||
datetime.datetime(2024, 1, 10, tzinfo=datetime.timezone.utc),
|
datetime.datetime(2024, 1, 10, tzinfo=datetime.UTC),
|
||||||
datetime.datetime(2024, 1, 10, tzinfo=datetime.timezone.utc),
|
datetime.datetime(2024, 1, 10, tzinfo=datetime.UTC),
|
||||||
id="valid_past_date",
|
id="valid_past_date",
|
||||||
),
|
),
|
||||||
pytest.param(
|
pytest.param(
|
||||||
datetime.datetime(2024, 1, 15, 12, 0, 0, tzinfo=datetime.timezone.utc),
|
datetime.datetime(2024, 1, 15, 12, 0, 0, tzinfo=datetime.UTC),
|
||||||
datetime.datetime(2024, 1, 15, 12, 0, 0, tzinfo=datetime.timezone.utc),
|
datetime.datetime(2024, 1, 15, 12, 0, 0, tzinfo=datetime.UTC),
|
||||||
id="exactly_at_reference",
|
id="exactly_at_reference",
|
||||||
),
|
),
|
||||||
pytest.param(
|
pytest.param(
|
||||||
datetime.datetime(1901, 1, 1, tzinfo=datetime.timezone.utc),
|
datetime.datetime(1901, 1, 1, tzinfo=datetime.UTC),
|
||||||
datetime.datetime(1901, 1, 1, tzinfo=datetime.timezone.utc),
|
datetime.datetime(1901, 1, 1, tzinfo=datetime.UTC),
|
||||||
id="year_1901_valid",
|
id="year_1901_valid",
|
||||||
),
|
),
|
||||||
# Date is > reference_time
|
# Date is > reference_time
|
||||||
pytest.param(
|
pytest.param(
|
||||||
datetime.datetime(2024, 1, 16, tzinfo=datetime.timezone.utc),
|
datetime.datetime(2024, 1, 16, tzinfo=datetime.UTC),
|
||||||
None,
|
None,
|
||||||
id="future_date_day_after",
|
id="future_date_day_after",
|
||||||
),
|
),
|
||||||
# date.date() in ignore_dates
|
# date.date() in ignore_dates
|
||||||
pytest.param(
|
pytest.param(
|
||||||
datetime.datetime(2024, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc),
|
datetime.datetime(2024, 1, 1, 0, 0, 0, tzinfo=datetime.UTC),
|
||||||
None,
|
None,
|
||||||
id="ignored_date_midnight_jan1",
|
id="ignored_date_midnight_jan1",
|
||||||
),
|
),
|
||||||
pytest.param(
|
pytest.param(
|
||||||
datetime.datetime(2024, 1, 1, 10, 30, 0, tzinfo=datetime.timezone.utc),
|
datetime.datetime(2024, 1, 1, 10, 30, 0, tzinfo=datetime.UTC),
|
||||||
None,
|
None,
|
||||||
id="ignored_date_midday_jan1",
|
id="ignored_date_midday_jan1",
|
||||||
),
|
),
|
||||||
pytest.param(
|
pytest.param(
|
||||||
datetime.datetime(2024, 12, 25, 15, 0, 0, tzinfo=datetime.timezone.utc),
|
datetime.datetime(2024, 12, 25, 15, 0, 0, tzinfo=datetime.UTC),
|
||||||
None,
|
None,
|
||||||
id="ignored_date_dec25_future",
|
id="ignored_date_dec25_future",
|
||||||
),
|
),
|
||||||
# date.year <= 1900
|
# date.year <= 1900
|
||||||
pytest.param(
|
pytest.param(
|
||||||
datetime.datetime(1899, 12, 31, tzinfo=datetime.timezone.utc),
|
datetime.datetime(1899, 12, 31, tzinfo=datetime.UTC),
|
||||||
None,
|
None,
|
||||||
id="year_1899",
|
id="year_1899",
|
||||||
),
|
),
|
||||||
pytest.param(
|
pytest.param(
|
||||||
datetime.datetime(1900, 1, 1, tzinfo=datetime.timezone.utc),
|
datetime.datetime(1900, 1, 1, tzinfo=datetime.UTC),
|
||||||
None,
|
None,
|
||||||
id="year_1900_boundary",
|
id="year_1900_boundary",
|
||||||
),
|
),
|
||||||
@@ -176,7 +176,7 @@ class TestFilterDate:
|
|||||||
1,
|
1,
|
||||||
12,
|
12,
|
||||||
0,
|
0,
|
||||||
tzinfo=datetime.timezone.utc,
|
tzinfo=datetime.UTC,
|
||||||
)
|
)
|
||||||
another_ignored = datetime.datetime(
|
another_ignored = datetime.datetime(
|
||||||
2024,
|
2024,
|
||||||
@@ -184,7 +184,7 @@ class TestFilterDate:
|
|||||||
25,
|
25,
|
||||||
15,
|
15,
|
||||||
30,
|
30,
|
||||||
tzinfo=datetime.timezone.utc,
|
tzinfo=datetime.UTC,
|
||||||
)
|
)
|
||||||
allowed_date = datetime.datetime(
|
allowed_date = datetime.datetime(
|
||||||
2024,
|
2024,
|
||||||
@@ -192,7 +192,7 @@ class TestFilterDate:
|
|||||||
2,
|
2,
|
||||||
12,
|
12,
|
||||||
0,
|
0,
|
||||||
tzinfo=datetime.timezone.utc,
|
tzinfo=datetime.UTC,
|
||||||
)
|
)
|
||||||
|
|
||||||
assert parser._filter_date(ignored_date) is None
|
assert parser._filter_date(ignored_date) is None
|
||||||
@@ -204,7 +204,7 @@ class TestFilterDate:
|
|||||||
regex_parser: RegexDateParserPlugin,
|
regex_parser: RegexDateParserPlugin,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Should work with timezone-aware datetimes."""
|
"""Should work with timezone-aware datetimes."""
|
||||||
date_utc = datetime.datetime(2024, 1, 10, 12, 0, tzinfo=datetime.timezone.utc)
|
date_utc = datetime.datetime(2024, 1, 10, 12, 0, tzinfo=datetime.UTC)
|
||||||
|
|
||||||
result = regex_parser._filter_date(date_utc)
|
result = regex_parser._filter_date(date_utc)
|
||||||
|
|
||||||
@@ -221,8 +221,8 @@ class TestRegexDateParser:
|
|||||||
"report-2023-12-25.txt",
|
"report-2023-12-25.txt",
|
||||||
"Event recorded on 25/12/2022.",
|
"Event recorded on 25/12/2022.",
|
||||||
[
|
[
|
||||||
datetime.datetime(2023, 12, 25, tzinfo=datetime.timezone.utc),
|
datetime.datetime(2023, 12, 25, tzinfo=datetime.UTC),
|
||||||
datetime.datetime(2022, 12, 25, tzinfo=datetime.timezone.utc),
|
datetime.datetime(2022, 12, 25, tzinfo=datetime.UTC),
|
||||||
],
|
],
|
||||||
id="filename-y-m-d_and_content-d-m-y",
|
id="filename-y-m-d_and_content-d-m-y",
|
||||||
),
|
),
|
||||||
@@ -230,8 +230,8 @@ class TestRegexDateParser:
|
|||||||
"img_2023.01.02.jpg",
|
"img_2023.01.02.jpg",
|
||||||
"Taken on 01/02/2023",
|
"Taken on 01/02/2023",
|
||||||
[
|
[
|
||||||
datetime.datetime(2023, 1, 2, tzinfo=datetime.timezone.utc),
|
datetime.datetime(2023, 1, 2, tzinfo=datetime.UTC),
|
||||||
datetime.datetime(2023, 2, 1, tzinfo=datetime.timezone.utc),
|
datetime.datetime(2023, 2, 1, tzinfo=datetime.UTC),
|
||||||
],
|
],
|
||||||
id="ambiguous-dates-respect-orders",
|
id="ambiguous-dates-respect-orders",
|
||||||
),
|
),
|
||||||
@@ -239,7 +239,7 @@ class TestRegexDateParser:
|
|||||||
"notes.txt",
|
"notes.txt",
|
||||||
"bad date 99/99/9999 and 25/12/2022",
|
"bad date 99/99/9999 and 25/12/2022",
|
||||||
[
|
[
|
||||||
datetime.datetime(2022, 12, 25, tzinfo=datetime.timezone.utc),
|
datetime.datetime(2022, 12, 25, tzinfo=datetime.UTC),
|
||||||
],
|
],
|
||||||
id="parse-exception-skips-bad-and-yields-good",
|
id="parse-exception-skips-bad-and-yields-good",
|
||||||
),
|
),
|
||||||
@@ -275,24 +275,24 @@ class TestRegexDateParser:
|
|||||||
or "2023.12.25" in date_string
|
or "2023.12.25" in date_string
|
||||||
or "2023-12-25" in date_string
|
or "2023-12-25" in date_string
|
||||||
):
|
):
|
||||||
return datetime.datetime(2023, 12, 25, tzinfo=datetime.timezone.utc)
|
return datetime.datetime(2023, 12, 25, tzinfo=datetime.UTC)
|
||||||
|
|
||||||
# content DMY 25/12/2022
|
# content DMY 25/12/2022
|
||||||
if "25/12/2022" in date_string or "25-12-2022" in date_string:
|
if "25/12/2022" in date_string or "25-12-2022" in date_string:
|
||||||
return datetime.datetime(2022, 12, 25, tzinfo=datetime.timezone.utc)
|
return datetime.datetime(2022, 12, 25, tzinfo=datetime.UTC)
|
||||||
|
|
||||||
# filename YMD 2023.01.02
|
# filename YMD 2023.01.02
|
||||||
if "2023.01.02" in date_string or "2023-01-02" in date_string:
|
if "2023.01.02" in date_string or "2023-01-02" in date_string:
|
||||||
return datetime.datetime(2023, 1, 2, tzinfo=datetime.timezone.utc)
|
return datetime.datetime(2023, 1, 2, tzinfo=datetime.UTC)
|
||||||
|
|
||||||
# ambiguous 01/02/2023 -> respect DATE_ORDER setting
|
# ambiguous 01/02/2023 -> respect DATE_ORDER setting
|
||||||
if "01/02/2023" in date_string:
|
if "01/02/2023" in date_string:
|
||||||
if date_order == "DMY":
|
if date_order == "DMY":
|
||||||
return datetime.datetime(2023, 2, 1, tzinfo=datetime.timezone.utc)
|
return datetime.datetime(2023, 2, 1, tzinfo=datetime.UTC)
|
||||||
if date_order == "YMD":
|
if date_order == "YMD":
|
||||||
return datetime.datetime(2023, 1, 2, tzinfo=datetime.timezone.utc)
|
return datetime.datetime(2023, 1, 2, tzinfo=datetime.UTC)
|
||||||
# fallback
|
# fallback
|
||||||
return datetime.datetime(2023, 2, 1, tzinfo=datetime.timezone.utc)
|
return datetime.datetime(2023, 2, 1, tzinfo=datetime.UTC)
|
||||||
|
|
||||||
# simulate parse failure for malformed input
|
# simulate parse failure for malformed input
|
||||||
if "99/99/9999" in date_string or "bad date" in date_string:
|
if "99/99/9999" in date_string or "bad date" in date_string:
|
||||||
@@ -328,7 +328,7 @@ class TestRegexDateParser:
|
|||||||
12,
|
12,
|
||||||
0,
|
0,
|
||||||
0,
|
0,
|
||||||
tzinfo=datetime.timezone.utc,
|
tzinfo=datetime.UTC,
|
||||||
),
|
),
|
||||||
filename_date_order="YMD",
|
filename_date_order="YMD",
|
||||||
content_date_order="DMY",
|
content_date_order="DMY",
|
||||||
@@ -344,13 +344,13 @@ class TestRegexDateParser:
|
|||||||
) -> datetime.datetime | None:
|
) -> datetime.datetime | None:
|
||||||
if "10/12/2023" in date_string or "10-12-2023" in date_string:
|
if "10/12/2023" in date_string or "10-12-2023" in date_string:
|
||||||
# ignored date
|
# ignored date
|
||||||
return datetime.datetime(2023, 12, 10, tzinfo=datetime.timezone.utc)
|
return datetime.datetime(2023, 12, 10, tzinfo=datetime.UTC)
|
||||||
if "01/02/2024" in date_string or "01-02-2024" in date_string:
|
if "01/02/2024" in date_string or "01-02-2024" in date_string:
|
||||||
# future relative to reference_time -> filtered
|
# future relative to reference_time -> filtered
|
||||||
return datetime.datetime(2024, 2, 1, tzinfo=datetime.timezone.utc)
|
return datetime.datetime(2024, 2, 1, tzinfo=datetime.UTC)
|
||||||
if "05/01/2023" in date_string or "05-01-2023" in date_string:
|
if "05/01/2023" in date_string or "05-01-2023" in date_string:
|
||||||
# valid
|
# valid
|
||||||
return datetime.datetime(2023, 1, 5, tzinfo=datetime.timezone.utc)
|
return datetime.datetime(2023, 1, 5, tzinfo=datetime.UTC)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
mocker.patch(target, side_effect=fake_parse)
|
mocker.patch(target, side_effect=fake_parse)
|
||||||
@@ -358,7 +358,7 @@ class TestRegexDateParser:
|
|||||||
content = "Ignored: 10/12/2023, Future: 01/02/2024, Keep: 05/01/2023"
|
content = "Ignored: 10/12/2023, Future: 01/02/2024, Keep: 05/01/2023"
|
||||||
results = list(parser.parse("whatever.txt", content))
|
results = list(parser.parse("whatever.txt", content))
|
||||||
|
|
||||||
assert results == [datetime.datetime(2023, 1, 5, tzinfo=datetime.timezone.utc)]
|
assert results == [datetime.datetime(2023, 1, 5, tzinfo=datetime.UTC)]
|
||||||
|
|
||||||
def test_parse_handles_no_matches_and_returns_empty_list(
|
def test_parse_handles_no_matches_and_returns_empty_list(
|
||||||
self,
|
self,
|
||||||
@@ -392,7 +392,7 @@ class TestRegexDateParser:
|
|||||||
12,
|
12,
|
||||||
0,
|
0,
|
||||||
0,
|
0,
|
||||||
tzinfo=datetime.timezone.utc,
|
tzinfo=datetime.UTC,
|
||||||
),
|
),
|
||||||
filename_date_order=None,
|
filename_date_order=None,
|
||||||
content_date_order="DMY",
|
content_date_order="DMY",
|
||||||
@@ -409,9 +409,9 @@ class TestRegexDateParser:
|
|||||||
) -> datetime.datetime | None:
|
) -> datetime.datetime | None:
|
||||||
# return distinct datetimes so we can tell which source was parsed
|
# return distinct datetimes so we can tell which source was parsed
|
||||||
if "25/12/2022" in date_string:
|
if "25/12/2022" in date_string:
|
||||||
return datetime.datetime(2022, 12, 25, tzinfo=datetime.timezone.utc)
|
return datetime.datetime(2022, 12, 25, tzinfo=datetime.UTC)
|
||||||
if "2023-12-25" in date_string:
|
if "2023-12-25" in date_string:
|
||||||
return datetime.datetime(2023, 12, 25, tzinfo=datetime.timezone.utc)
|
return datetime.datetime(2023, 12, 25, tzinfo=datetime.UTC)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
mock = mocker.patch(target, side_effect=fake_parse)
|
mock = mocker.patch(target, side_effect=fake_parse)
|
||||||
@@ -429,5 +429,5 @@ class TestRegexDateParser:
|
|||||||
assert "25/12/2022" in called_date_string
|
assert "25/12/2022" in called_date_string
|
||||||
# And the parser should have yielded the corresponding datetime
|
# And the parser should have yielded the corresponding datetime
|
||||||
assert results == [
|
assert results == [
|
||||||
datetime.datetime(2022, 12, 25, tzinfo=datetime.timezone.utc),
|
datetime.datetime(2022, 12, 25, tzinfo=datetime.UTC),
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -1,17 +1,67 @@
|
|||||||
from factory import Faker
|
"""
|
||||||
|
Factory-boy factories for documents app models.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import factory
|
||||||
from factory.django import DjangoModelFactory
|
from factory.django import DjangoModelFactory
|
||||||
|
|
||||||
from documents.models import Correspondent
|
from documents.models import Correspondent
|
||||||
from documents.models import Document
|
from documents.models import Document
|
||||||
|
from documents.models import DocumentType
|
||||||
|
from documents.models import MatchingModel
|
||||||
|
from documents.models import StoragePath
|
||||||
|
from documents.models import Tag
|
||||||
|
|
||||||
|
|
||||||
class CorrespondentFactory(DjangoModelFactory):
|
class CorrespondentFactory(DjangoModelFactory):
|
||||||
class Meta:
|
class Meta:
|
||||||
model = Correspondent
|
model = Correspondent
|
||||||
|
|
||||||
name = Faker("name")
|
name = factory.Sequence(lambda n: f"{factory.Faker('company')} {n}")
|
||||||
|
match = ""
|
||||||
|
matching_algorithm = MatchingModel.MATCH_NONE
|
||||||
|
|
||||||
|
|
||||||
|
class DocumentTypeFactory(DjangoModelFactory):
|
||||||
|
class Meta:
|
||||||
|
model = DocumentType
|
||||||
|
|
||||||
|
name = factory.Sequence(lambda n: f"{factory.Faker('bs')} {n}")
|
||||||
|
match = ""
|
||||||
|
matching_algorithm = MatchingModel.MATCH_NONE
|
||||||
|
|
||||||
|
|
||||||
|
class TagFactory(DjangoModelFactory):
|
||||||
|
class Meta:
|
||||||
|
model = Tag
|
||||||
|
|
||||||
|
name = factory.Sequence(lambda n: f"{factory.Faker('word')} {n}")
|
||||||
|
match = ""
|
||||||
|
matching_algorithm = MatchingModel.MATCH_NONE
|
||||||
|
is_inbox_tag = False
|
||||||
|
|
||||||
|
|
||||||
|
class StoragePathFactory(DjangoModelFactory):
|
||||||
|
class Meta:
|
||||||
|
model = StoragePath
|
||||||
|
|
||||||
|
name = factory.Sequence(
|
||||||
|
lambda n: f"{factory.Faker('file_path', depth=2, extension='')} {n}",
|
||||||
|
)
|
||||||
|
path = factory.LazyAttribute(lambda o: f"{o.name}/{{title}}")
|
||||||
|
match = ""
|
||||||
|
matching_algorithm = MatchingModel.MATCH_NONE
|
||||||
|
|
||||||
|
|
||||||
class DocumentFactory(DjangoModelFactory):
|
class DocumentFactory(DjangoModelFactory):
|
||||||
class Meta:
|
class Meta:
|
||||||
model = Document
|
model = Document
|
||||||
|
|
||||||
|
title = factory.Faker("sentence", nb_words=4)
|
||||||
|
checksum = factory.Faker("md5")
|
||||||
|
content = factory.Faker("paragraph")
|
||||||
|
correspondent = None
|
||||||
|
document_type = None
|
||||||
|
storage_path = None
|
||||||
|
|||||||
193
src/documents/tests/management/test_management_sanity_checker.py
Normal file
193
src/documents/tests/management/test_management_sanity_checker.py
Normal file
@@ -0,0 +1,193 @@
|
|||||||
|
"""Tests for the document_sanity_checker management command.
|
||||||
|
|
||||||
|
Verifies Rich rendering (table, panel, summary) and end-to-end CLI behavior.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from io import StringIO
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from django.core.management import call_command
|
||||||
|
from rich.console import Console
|
||||||
|
|
||||||
|
from documents.management.commands.document_sanity_checker import Command
|
||||||
|
from documents.sanity_checker import SanityCheckMessages
|
||||||
|
from documents.tests.factories import DocumentFactory
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from documents.models import Document
|
||||||
|
from documents.tests.conftest import PaperlessDirs
|
||||||
|
|
||||||
|
|
||||||
|
def _render_to_string(messages: SanityCheckMessages) -> str:
|
||||||
|
"""Render command output to a plain string for assertion."""
|
||||||
|
buf = StringIO()
|
||||||
|
cmd = Command()
|
||||||
|
cmd.console = Console(file=buf, width=120, no_color=True)
|
||||||
|
cmd._render_results(messages)
|
||||||
|
return buf.getvalue()
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Rich rendering
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestRenderResultsNoIssues:
|
||||||
|
"""No DB access needed -- renders an empty SanityCheckMessages."""
|
||||||
|
|
||||||
|
def test_shows_panel(self) -> None:
|
||||||
|
output = _render_to_string(SanityCheckMessages())
|
||||||
|
assert "No issues detected" in output
|
||||||
|
assert "Sanity Check" in output
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.django_db
|
||||||
|
class TestRenderResultsWithIssues:
|
||||||
|
def test_error_row(self, sample_doc: Document) -> None:
|
||||||
|
msgs = SanityCheckMessages()
|
||||||
|
msgs.error(sample_doc.pk, "Original missing")
|
||||||
|
output = _render_to_string(msgs)
|
||||||
|
assert "Sanity Check Results" in output
|
||||||
|
assert "ERROR" in output
|
||||||
|
assert "Original missing" in output
|
||||||
|
assert f"#{sample_doc.pk}" in output
|
||||||
|
assert sample_doc.title in output
|
||||||
|
|
||||||
|
def test_warning_row(self, sample_doc: Document) -> None:
|
||||||
|
msgs = SanityCheckMessages()
|
||||||
|
msgs.warning(sample_doc.pk, "Suspicious file")
|
||||||
|
output = _render_to_string(msgs)
|
||||||
|
assert "WARN" in output
|
||||||
|
assert "Suspicious file" in output
|
||||||
|
|
||||||
|
def test_info_row(self, sample_doc: Document) -> None:
|
||||||
|
msgs = SanityCheckMessages()
|
||||||
|
msgs.info(sample_doc.pk, "No OCR data")
|
||||||
|
output = _render_to_string(msgs)
|
||||||
|
assert "INFO" in output
|
||||||
|
assert "No OCR data" in output
|
||||||
|
|
||||||
|
@pytest.mark.usefixtures("_media_settings")
|
||||||
|
def test_global_message(self) -> None:
|
||||||
|
msgs = SanityCheckMessages()
|
||||||
|
msgs.warning(None, "Orphaned file: /tmp/stray.pdf")
|
||||||
|
output = _render_to_string(msgs)
|
||||||
|
assert "(global)" in output
|
||||||
|
assert "Orphaned file" in output
|
||||||
|
|
||||||
|
def test_multiple_messages_same_doc(self, sample_doc: Document) -> None:
|
||||||
|
msgs = SanityCheckMessages()
|
||||||
|
msgs.error(sample_doc.pk, "Thumbnail missing")
|
||||||
|
msgs.error(sample_doc.pk, "Checksum mismatch")
|
||||||
|
output = _render_to_string(msgs)
|
||||||
|
assert "Thumbnail missing" in output
|
||||||
|
assert "Checksum mismatch" in output
|
||||||
|
|
||||||
|
@pytest.mark.usefixtures("_media_settings")
|
||||||
|
def test_unknown_doc_pk(self) -> None:
|
||||||
|
msgs = SanityCheckMessages()
|
||||||
|
msgs.error(99999, "Ghost document")
|
||||||
|
output = _render_to_string(msgs)
|
||||||
|
assert "#99999" in output
|
||||||
|
assert "Unknown" in output
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.django_db
|
||||||
|
class TestRenderResultsSummary:
|
||||||
|
def test_errors_only(self, sample_doc: Document) -> None:
|
||||||
|
msgs = SanityCheckMessages()
|
||||||
|
msgs.error(sample_doc.pk, "broken")
|
||||||
|
output = _render_to_string(msgs)
|
||||||
|
assert "1 document(s) with" in output
|
||||||
|
assert "errors" in output
|
||||||
|
|
||||||
|
def test_warnings_only(self, sample_doc: Document) -> None:
|
||||||
|
msgs = SanityCheckMessages()
|
||||||
|
msgs.warning(sample_doc.pk, "odd")
|
||||||
|
output = _render_to_string(msgs)
|
||||||
|
assert "1 document(s) with" in output
|
||||||
|
assert "warnings" in output
|
||||||
|
|
||||||
|
def test_infos_only(self, sample_doc: Document) -> None:
|
||||||
|
msgs = SanityCheckMessages()
|
||||||
|
msgs.info(sample_doc.pk, "no OCR")
|
||||||
|
output = _render_to_string(msgs)
|
||||||
|
assert "1 document(s) with infos" in output
|
||||||
|
|
||||||
|
def test_empty_messages(self) -> None:
|
||||||
|
msgs = SanityCheckMessages()
|
||||||
|
output = _render_to_string(msgs)
|
||||||
|
assert "No issues detected." in output
|
||||||
|
|
||||||
|
def test_document_errors_and_global_warnings(self, sample_doc: Document) -> None:
|
||||||
|
msgs = SanityCheckMessages()
|
||||||
|
msgs.error(sample_doc.pk, "broken")
|
||||||
|
msgs.warning(None, "orphan")
|
||||||
|
output = _render_to_string(msgs)
|
||||||
|
assert "1 document(s) with" in output
|
||||||
|
assert "errors" in output
|
||||||
|
assert "1 global warning(s)" in output
|
||||||
|
assert "2 document(s)" not in output
|
||||||
|
|
||||||
|
def test_global_warnings_only(self) -> None:
|
||||||
|
msgs = SanityCheckMessages()
|
||||||
|
msgs.warning(None, "extra file")
|
||||||
|
output = _render_to_string(msgs)
|
||||||
|
assert "1 global warning(s)" in output
|
||||||
|
assert "document(s) with" not in output
|
||||||
|
|
||||||
|
def test_all_levels_combined(self, sample_doc: Document) -> None:
|
||||||
|
msgs = SanityCheckMessages()
|
||||||
|
msgs.error(sample_doc.pk, "broken")
|
||||||
|
msgs.warning(sample_doc.pk, "odd")
|
||||||
|
msgs.info(sample_doc.pk, "fyi")
|
||||||
|
msgs.warning(None, "extra file")
|
||||||
|
output = _render_to_string(msgs)
|
||||||
|
assert "1 document(s) with errors" in output
|
||||||
|
assert "1 document(s) with warnings" in output
|
||||||
|
assert "1 document(s) with infos" in output
|
||||||
|
assert "1 global warning(s)" in output
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# End-to-end command execution
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.django_db
|
||||||
|
@pytest.mark.management
|
||||||
|
class TestDocumentSanityCheckerCommand:
|
||||||
|
def test_no_issues(self, sample_doc: Document) -> None:
|
||||||
|
out = StringIO()
|
||||||
|
call_command("document_sanity_checker", "--no-progress-bar", stdout=out)
|
||||||
|
assert "No issues detected" in out.getvalue()
|
||||||
|
|
||||||
|
def test_missing_original(self, sample_doc: Document) -> None:
|
||||||
|
Path(sample_doc.source_path).unlink()
|
||||||
|
out = StringIO()
|
||||||
|
call_command("document_sanity_checker", "--no-progress-bar", stdout=out)
|
||||||
|
output = out.getvalue()
|
||||||
|
assert "ERROR" in output
|
||||||
|
assert "Original of document does not exist" in output
|
||||||
|
|
||||||
|
@pytest.mark.usefixtures("_media_settings")
|
||||||
|
def test_checksum_mismatch(self, paperless_dirs: PaperlessDirs) -> None:
|
||||||
|
"""Lightweight document with zero-byte files triggers checksum mismatch."""
|
||||||
|
doc = DocumentFactory(
|
||||||
|
title="test",
|
||||||
|
content="test",
|
||||||
|
filename="test.pdf",
|
||||||
|
checksum="abc",
|
||||||
|
)
|
||||||
|
Path(doc.source_path).touch()
|
||||||
|
Path(doc.thumbnail_path).touch()
|
||||||
|
|
||||||
|
out = StringIO()
|
||||||
|
call_command("document_sanity_checker", "--no-progress-bar", stdout=out)
|
||||||
|
output = out.getvalue()
|
||||||
|
assert "ERROR" in output
|
||||||
|
assert "Checksum mismatch. Stored: abc, actual:" in output
|
||||||
@@ -336,7 +336,11 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
added=d1,
|
added=d1,
|
||||||
)
|
)
|
||||||
|
|
||||||
self.assertEqual(generate_filename(doc1), Path("1232-01-09.pdf"))
|
# Account for 3.14 padding changes
|
||||||
|
expected_year: str = d1.strftime("%Y")
|
||||||
|
expected_filename: Path = Path(f"{expected_year}-01-09.pdf")
|
||||||
|
|
||||||
|
self.assertEqual(generate_filename(doc1), expected_filename)
|
||||||
|
|
||||||
doc1.added = timezone.make_aware(datetime.datetime(2020, 11, 16, 1, 1, 1))
|
doc1.added = timezone.make_aware(datetime.datetime(2020, 11, 16, 1, 1, 1))
|
||||||
|
|
||||||
|
|||||||
@@ -21,7 +21,7 @@ class TestDateLocalization:
|
|||||||
14,
|
14,
|
||||||
30,
|
30,
|
||||||
5,
|
5,
|
||||||
tzinfo=datetime.timezone.utc,
|
tzinfo=datetime.UTC,
|
||||||
)
|
)
|
||||||
|
|
||||||
TEST_DATETIME_STRING: str = "2023-10-26T14:30:05+00:00"
|
TEST_DATETIME_STRING: str = "2023-10-26T14:30:05+00:00"
|
||||||
|
|||||||
@@ -134,6 +134,7 @@ class TestRenamer(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
self.assertIsFile(doc2.archive_path)
|
self.assertIsFile(doc2.archive_path)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.management
|
||||||
class TestCreateClassifier(TestCase):
|
class TestCreateClassifier(TestCase):
|
||||||
@mock.patch(
|
@mock.patch(
|
||||||
"documents.management.commands.document_create_classifier.train_classifier",
|
"documents.management.commands.document_create_classifier.train_classifier",
|
||||||
@@ -144,32 +145,6 @@ class TestCreateClassifier(TestCase):
|
|||||||
m.assert_called_once()
|
m.assert_called_once()
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.management
|
|
||||||
class TestSanityChecker(DirectoriesMixin, TestCase):
|
|
||||||
def test_no_issues(self) -> None:
|
|
||||||
with self.assertLogs() as capture:
|
|
||||||
call_command("document_sanity_checker")
|
|
||||||
|
|
||||||
self.assertEqual(len(capture.output), 1)
|
|
||||||
self.assertIn("Sanity checker detected no issues.", capture.output[0])
|
|
||||||
|
|
||||||
def test_errors(self) -> None:
|
|
||||||
doc = Document.objects.create(
|
|
||||||
title="test",
|
|
||||||
content="test",
|
|
||||||
filename="test.pdf",
|
|
||||||
checksum="abc",
|
|
||||||
)
|
|
||||||
Path(doc.source_path).touch()
|
|
||||||
Path(doc.thumbnail_path).touch()
|
|
||||||
|
|
||||||
with self.assertLogs() as capture:
|
|
||||||
call_command("document_sanity_checker")
|
|
||||||
|
|
||||||
self.assertEqual(len(capture.output), 2)
|
|
||||||
self.assertIn("Checksum mismatch. Stored: abc, actual:", capture.output[1])
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.management
|
@pytest.mark.management
|
||||||
class TestConvertMariaDBUUID(TestCase):
|
class TestConvertMariaDBUUID(TestCase):
|
||||||
@mock.patch("django.db.connection.schema_editor")
|
@mock.patch("django.db.connection.schema_editor")
|
||||||
|
|||||||
@@ -288,7 +288,7 @@ class TestExportImport(
|
|||||||
self.assertEqual(Permission.objects.count(), num_permission_objects)
|
self.assertEqual(Permission.objects.count(), num_permission_objects)
|
||||||
messages = check_sanity()
|
messages = check_sanity()
|
||||||
# everything is alright after the test
|
# everything is alright after the test
|
||||||
self.assertEqual(len(messages), 0)
|
self.assertEqual(messages.total_issue_count, 0)
|
||||||
|
|
||||||
def test_exporter_with_filename_format(self) -> None:
|
def test_exporter_with_filename_format(self) -> None:
|
||||||
shutil.rmtree(Path(self.dirs.media_dir) / "documents")
|
shutil.rmtree(Path(self.dirs.media_dir) / "documents")
|
||||||
|
|||||||
@@ -1,298 +1,442 @@
|
|||||||
|
"""
|
||||||
|
Tests for the document_retagger management command.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from django.core.management import call_command
|
from django.core.management import call_command
|
||||||
from django.core.management.base import CommandError
|
from django.core.management.base import CommandError
|
||||||
from django.test import TestCase
|
|
||||||
|
|
||||||
from documents.models import Correspondent
|
from documents.models import Correspondent
|
||||||
from documents.models import Document
|
from documents.models import Document
|
||||||
from documents.models import DocumentType
|
from documents.models import DocumentType
|
||||||
|
from documents.models import MatchingModel
|
||||||
from documents.models import StoragePath
|
from documents.models import StoragePath
|
||||||
from documents.models import Tag
|
from documents.models import Tag
|
||||||
|
from documents.tests.factories import CorrespondentFactory
|
||||||
|
from documents.tests.factories import DocumentFactory
|
||||||
|
from documents.tests.factories import DocumentTypeFactory
|
||||||
|
from documents.tests.factories import StoragePathFactory
|
||||||
|
from documents.tests.factories import TagFactory
|
||||||
from documents.tests.utils import DirectoriesMixin
|
from documents.tests.utils import DirectoriesMixin
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Module-level type aliases
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
StoragePathTuple = tuple[StoragePath, StoragePath, StoragePath]
|
||||||
|
TagTuple = tuple[Tag, Tag, Tag, Tag, Tag]
|
||||||
|
CorrespondentTuple = tuple[Correspondent, Correspondent]
|
||||||
|
DocumentTypeTuple = tuple[DocumentType, DocumentType]
|
||||||
|
DocumentTuple = tuple[Document, Document, Document, Document]
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Fixtures
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture()
|
||||||
|
def storage_paths(db) -> StoragePathTuple:
|
||||||
|
"""Three storage paths with varying match rules."""
|
||||||
|
sp1 = StoragePathFactory(
|
||||||
|
path="{created_data}/{title}",
|
||||||
|
match="auto document",
|
||||||
|
matching_algorithm=MatchingModel.MATCH_LITERAL,
|
||||||
|
)
|
||||||
|
sp2 = StoragePathFactory(
|
||||||
|
path="{title}",
|
||||||
|
match="^first|^unrelated",
|
||||||
|
matching_algorithm=MatchingModel.MATCH_REGEX,
|
||||||
|
)
|
||||||
|
sp3 = StoragePathFactory(
|
||||||
|
path="{title}",
|
||||||
|
match="^blah",
|
||||||
|
matching_algorithm=MatchingModel.MATCH_REGEX,
|
||||||
|
)
|
||||||
|
return sp1, sp2, sp3
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture()
|
||||||
|
def tags(db) -> TagTuple:
|
||||||
|
"""Tags covering the common matching scenarios."""
|
||||||
|
tag_first = TagFactory(match="first", matching_algorithm=Tag.MATCH_ANY)
|
||||||
|
tag_second = TagFactory(match="second", matching_algorithm=Tag.MATCH_ANY)
|
||||||
|
tag_inbox = TagFactory(is_inbox_tag=True)
|
||||||
|
tag_no_match = TagFactory()
|
||||||
|
tag_auto = TagFactory(matching_algorithm=Tag.MATCH_AUTO)
|
||||||
|
return tag_first, tag_second, tag_inbox, tag_no_match, tag_auto
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture()
|
||||||
|
def correspondents(db) -> CorrespondentTuple:
|
||||||
|
"""Two correspondents matching 'first' and 'second' content."""
|
||||||
|
c_first = CorrespondentFactory(
|
||||||
|
match="first",
|
||||||
|
matching_algorithm=MatchingModel.MATCH_ANY,
|
||||||
|
)
|
||||||
|
c_second = CorrespondentFactory(
|
||||||
|
match="second",
|
||||||
|
matching_algorithm=MatchingModel.MATCH_ANY,
|
||||||
|
)
|
||||||
|
return c_first, c_second
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture()
|
||||||
|
def document_types(db) -> DocumentTypeTuple:
|
||||||
|
"""Two document types matching 'first' and 'second' content."""
|
||||||
|
dt_first = DocumentTypeFactory(
|
||||||
|
match="first",
|
||||||
|
matching_algorithm=MatchingModel.MATCH_ANY,
|
||||||
|
)
|
||||||
|
dt_second = DocumentTypeFactory(
|
||||||
|
match="second",
|
||||||
|
matching_algorithm=MatchingModel.MATCH_ANY,
|
||||||
|
)
|
||||||
|
return dt_first, dt_second
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture()
|
||||||
|
def documents(storage_paths: StoragePathTuple, tags: TagTuple) -> DocumentTuple:
|
||||||
|
"""Four documents with varied content used across most retagger tests."""
|
||||||
|
_, _, sp3 = storage_paths
|
||||||
|
_, _, tag_inbox, tag_no_match, tag_auto = tags
|
||||||
|
|
||||||
|
d1 = DocumentFactory(checksum="A", title="A", content="first document")
|
||||||
|
d2 = DocumentFactory(checksum="B", title="B", content="second document")
|
||||||
|
d3 = DocumentFactory(
|
||||||
|
checksum="C",
|
||||||
|
title="C",
|
||||||
|
content="unrelated document",
|
||||||
|
storage_path=sp3,
|
||||||
|
)
|
||||||
|
d4 = DocumentFactory(checksum="D", title="D", content="auto document")
|
||||||
|
|
||||||
|
d3.tags.add(tag_inbox, tag_no_match)
|
||||||
|
d4.tags.add(tag_auto)
|
||||||
|
|
||||||
|
return d1, d2, d3, d4
|
||||||
|
|
||||||
|
|
||||||
|
def _get_docs() -> DocumentTuple:
|
||||||
|
return (
|
||||||
|
Document.objects.get(title="A"),
|
||||||
|
Document.objects.get(title="B"),
|
||||||
|
Document.objects.get(title="C"),
|
||||||
|
Document.objects.get(title="D"),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Tag assignment
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.management
|
@pytest.mark.management
|
||||||
class TestRetagger(DirectoriesMixin, TestCase):
|
@pytest.mark.django_db
|
||||||
def make_models(self) -> None:
|
class TestRetaggerTags(DirectoriesMixin):
|
||||||
self.sp1 = StoragePath.objects.create(
|
@pytest.mark.usefixtures("documents")
|
||||||
name="dummy a",
|
def test_add_tags(self, tags: TagTuple) -> None:
|
||||||
path="{created_data}/{title}",
|
tag_first, tag_second, *_ = tags
|
||||||
match="auto document",
|
|
||||||
matching_algorithm=StoragePath.MATCH_LITERAL,
|
|
||||||
)
|
|
||||||
self.sp2 = StoragePath.objects.create(
|
|
||||||
name="dummy b",
|
|
||||||
path="{title}",
|
|
||||||
match="^first|^unrelated",
|
|
||||||
matching_algorithm=StoragePath.MATCH_REGEX,
|
|
||||||
)
|
|
||||||
|
|
||||||
self.sp3 = StoragePath.objects.create(
|
|
||||||
name="dummy c",
|
|
||||||
path="{title}",
|
|
||||||
match="^blah",
|
|
||||||
matching_algorithm=StoragePath.MATCH_REGEX,
|
|
||||||
)
|
|
||||||
|
|
||||||
self.d1 = Document.objects.create(
|
|
||||||
checksum="A",
|
|
||||||
title="A",
|
|
||||||
content="first document",
|
|
||||||
)
|
|
||||||
self.d2 = Document.objects.create(
|
|
||||||
checksum="B",
|
|
||||||
title="B",
|
|
||||||
content="second document",
|
|
||||||
)
|
|
||||||
self.d3 = Document.objects.create(
|
|
||||||
checksum="C",
|
|
||||||
title="C",
|
|
||||||
content="unrelated document",
|
|
||||||
storage_path=self.sp3,
|
|
||||||
)
|
|
||||||
self.d4 = Document.objects.create(
|
|
||||||
checksum="D",
|
|
||||||
title="D",
|
|
||||||
content="auto document",
|
|
||||||
)
|
|
||||||
|
|
||||||
self.tag_first = Tag.objects.create(
|
|
||||||
name="tag1",
|
|
||||||
match="first",
|
|
||||||
matching_algorithm=Tag.MATCH_ANY,
|
|
||||||
)
|
|
||||||
self.tag_second = Tag.objects.create(
|
|
||||||
name="tag2",
|
|
||||||
match="second",
|
|
||||||
matching_algorithm=Tag.MATCH_ANY,
|
|
||||||
)
|
|
||||||
self.tag_inbox = Tag.objects.create(name="test", is_inbox_tag=True)
|
|
||||||
self.tag_no_match = Tag.objects.create(name="test2")
|
|
||||||
self.tag_auto = Tag.objects.create(
|
|
||||||
name="tagauto",
|
|
||||||
matching_algorithm=Tag.MATCH_AUTO,
|
|
||||||
)
|
|
||||||
|
|
||||||
self.d3.tags.add(self.tag_inbox)
|
|
||||||
self.d3.tags.add(self.tag_no_match)
|
|
||||||
self.d4.tags.add(self.tag_auto)
|
|
||||||
|
|
||||||
self.correspondent_first = Correspondent.objects.create(
|
|
||||||
name="c1",
|
|
||||||
match="first",
|
|
||||||
matching_algorithm=Correspondent.MATCH_ANY,
|
|
||||||
)
|
|
||||||
self.correspondent_second = Correspondent.objects.create(
|
|
||||||
name="c2",
|
|
||||||
match="second",
|
|
||||||
matching_algorithm=Correspondent.MATCH_ANY,
|
|
||||||
)
|
|
||||||
|
|
||||||
self.doctype_first = DocumentType.objects.create(
|
|
||||||
name="dt1",
|
|
||||||
match="first",
|
|
||||||
matching_algorithm=DocumentType.MATCH_ANY,
|
|
||||||
)
|
|
||||||
self.doctype_second = DocumentType.objects.create(
|
|
||||||
name="dt2",
|
|
||||||
match="second",
|
|
||||||
matching_algorithm=DocumentType.MATCH_ANY,
|
|
||||||
)
|
|
||||||
|
|
||||||
def get_updated_docs(self):
|
|
||||||
return (
|
|
||||||
Document.objects.get(title="A"),
|
|
||||||
Document.objects.get(title="B"),
|
|
||||||
Document.objects.get(title="C"),
|
|
||||||
Document.objects.get(title="D"),
|
|
||||||
)
|
|
||||||
|
|
||||||
def setUp(self) -> None:
|
|
||||||
super().setUp()
|
|
||||||
self.make_models()
|
|
||||||
|
|
||||||
def test_add_tags(self) -> None:
|
|
||||||
call_command("document_retagger", "--tags")
|
call_command("document_retagger", "--tags")
|
||||||
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
|
d_first, d_second, d_unrelated, d_auto = _get_docs()
|
||||||
|
|
||||||
self.assertEqual(d_first.tags.count(), 1)
|
assert d_first.tags.count() == 1
|
||||||
self.assertEqual(d_second.tags.count(), 1)
|
assert d_second.tags.count() == 1
|
||||||
self.assertEqual(d_unrelated.tags.count(), 2)
|
assert d_unrelated.tags.count() == 2
|
||||||
self.assertEqual(d_auto.tags.count(), 1)
|
assert d_auto.tags.count() == 1
|
||||||
|
assert d_first.tags.first() == tag_first
|
||||||
|
assert d_second.tags.first() == tag_second
|
||||||
|
|
||||||
self.assertEqual(d_first.tags.first(), self.tag_first)
|
def test_overwrite_removes_stale_tags_and_preserves_inbox(
|
||||||
self.assertEqual(d_second.tags.first(), self.tag_second)
|
self,
|
||||||
|
documents: DocumentTuple,
|
||||||
def test_add_type(self) -> None:
|
tags: TagTuple,
|
||||||
call_command("document_retagger", "--document_type")
|
) -> None:
|
||||||
d_first, d_second, _, _ = self.get_updated_docs()
|
d1, *_ = documents
|
||||||
|
tag_first, tag_second, tag_inbox, tag_no_match, _ = tags
|
||||||
self.assertEqual(d_first.document_type, self.doctype_first)
|
d1.tags.add(tag_second)
|
||||||
self.assertEqual(d_second.document_type, self.doctype_second)
|
|
||||||
|
|
||||||
def test_add_correspondent(self) -> None:
|
|
||||||
call_command("document_retagger", "--correspondent")
|
|
||||||
d_first, d_second, _, _ = self.get_updated_docs()
|
|
||||||
|
|
||||||
self.assertEqual(d_first.correspondent, self.correspondent_first)
|
|
||||||
self.assertEqual(d_second.correspondent, self.correspondent_second)
|
|
||||||
|
|
||||||
def test_overwrite_preserve_inbox(self) -> None:
|
|
||||||
self.d1.tags.add(self.tag_second)
|
|
||||||
|
|
||||||
call_command("document_retagger", "--tags", "--overwrite")
|
call_command("document_retagger", "--tags", "--overwrite")
|
||||||
|
|
||||||
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
|
d_first, d_second, d_unrelated, d_auto = _get_docs()
|
||||||
|
|
||||||
self.assertIsNotNone(Tag.objects.get(id=self.tag_second.id))
|
assert Tag.objects.filter(id=tag_second.id).exists()
|
||||||
|
assert list(d_first.tags.values_list("id", flat=True)) == [tag_first.id]
|
||||||
|
assert list(d_second.tags.values_list("id", flat=True)) == [tag_second.id]
|
||||||
|
assert set(d_unrelated.tags.values_list("id", flat=True)) == {
|
||||||
|
tag_inbox.id,
|
||||||
|
tag_no_match.id,
|
||||||
|
}
|
||||||
|
assert d_auto.tags.count() == 0
|
||||||
|
|
||||||
self.assertCountEqual(
|
@pytest.mark.usefixtures("documents")
|
||||||
[tag.id for tag in d_first.tags.all()],
|
@pytest.mark.parametrize(
|
||||||
[self.tag_first.id],
|
"extra_args",
|
||||||
|
[
|
||||||
|
pytest.param([], id="no_base_url"),
|
||||||
|
pytest.param(["--base-url=http://localhost"], id="with_base_url"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_suggest_does_not_apply_tags(self, extra_args: list[str]) -> None:
|
||||||
|
call_command("document_retagger", "--tags", "--suggest", *extra_args)
|
||||||
|
d_first, d_second, _, d_auto = _get_docs()
|
||||||
|
|
||||||
|
assert d_first.tags.count() == 0
|
||||||
|
assert d_second.tags.count() == 0
|
||||||
|
assert d_auto.tags.count() == 1
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Document type assignment
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.management
|
||||||
|
@pytest.mark.django_db
|
||||||
|
class TestRetaggerDocumentType(DirectoriesMixin):
|
||||||
|
@pytest.mark.usefixtures("documents")
|
||||||
|
def test_add_type(self, document_types: DocumentTypeTuple) -> None:
|
||||||
|
dt_first, dt_second = document_types
|
||||||
|
call_command("document_retagger", "--document_type")
|
||||||
|
d_first, d_second, _, _ = _get_docs()
|
||||||
|
|
||||||
|
assert d_first.document_type == dt_first
|
||||||
|
assert d_second.document_type == dt_second
|
||||||
|
|
||||||
|
@pytest.mark.usefixtures("documents", "document_types")
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"extra_args",
|
||||||
|
[
|
||||||
|
pytest.param([], id="no_base_url"),
|
||||||
|
pytest.param(["--base-url=http://localhost"], id="with_base_url"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_suggest_does_not_apply_document_type(self, extra_args: list[str]) -> None:
|
||||||
|
call_command("document_retagger", "--document_type", "--suggest", *extra_args)
|
||||||
|
d_first, d_second, _, _ = _get_docs()
|
||||||
|
|
||||||
|
assert d_first.document_type is None
|
||||||
|
assert d_second.document_type is None
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
("use_first_flag", "expects_assignment"),
|
||||||
|
[
|
||||||
|
pytest.param(["--use-first"], True, id="use_first_assigns_first_match"),
|
||||||
|
pytest.param([], False, id="no_use_first_skips_ambiguous_match"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_use_first_with_multiple_matches(
|
||||||
|
self,
|
||||||
|
use_first_flag: list[str],
|
||||||
|
*,
|
||||||
|
expects_assignment: bool,
|
||||||
|
) -> None:
|
||||||
|
DocumentTypeFactory(
|
||||||
|
match="ambiguous",
|
||||||
|
matching_algorithm=MatchingModel.MATCH_ANY,
|
||||||
)
|
)
|
||||||
self.assertCountEqual(
|
DocumentTypeFactory(
|
||||||
[tag.id for tag in d_second.tags.all()],
|
match="ambiguous",
|
||||||
[self.tag_second.id],
|
matching_algorithm=MatchingModel.MATCH_ANY,
|
||||||
)
|
)
|
||||||
self.assertCountEqual(
|
doc = DocumentFactory(content="ambiguous content")
|
||||||
[tag.id for tag in d_unrelated.tags.all()],
|
|
||||||
[self.tag_inbox.id, self.tag_no_match.id],
|
call_command("document_retagger", "--document_type", *use_first_flag)
|
||||||
|
|
||||||
|
doc.refresh_from_db()
|
||||||
|
assert (doc.document_type is not None) is expects_assignment
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Correspondent assignment
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.management
|
||||||
|
@pytest.mark.django_db
|
||||||
|
class TestRetaggerCorrespondent(DirectoriesMixin):
|
||||||
|
@pytest.mark.usefixtures("documents")
|
||||||
|
def test_add_correspondent(self, correspondents: CorrespondentTuple) -> None:
|
||||||
|
c_first, c_second = correspondents
|
||||||
|
call_command("document_retagger", "--correspondent")
|
||||||
|
d_first, d_second, _, _ = _get_docs()
|
||||||
|
|
||||||
|
assert d_first.correspondent == c_first
|
||||||
|
assert d_second.correspondent == c_second
|
||||||
|
|
||||||
|
@pytest.mark.usefixtures("documents", "correspondents")
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"extra_args",
|
||||||
|
[
|
||||||
|
pytest.param([], id="no_base_url"),
|
||||||
|
pytest.param(["--base-url=http://localhost"], id="with_base_url"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_suggest_does_not_apply_correspondent(self, extra_args: list[str]) -> None:
|
||||||
|
call_command("document_retagger", "--correspondent", "--suggest", *extra_args)
|
||||||
|
d_first, d_second, _, _ = _get_docs()
|
||||||
|
|
||||||
|
assert d_first.correspondent is None
|
||||||
|
assert d_second.correspondent is None
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
("use_first_flag", "expects_assignment"),
|
||||||
|
[
|
||||||
|
pytest.param(["--use-first"], True, id="use_first_assigns_first_match"),
|
||||||
|
pytest.param([], False, id="no_use_first_skips_ambiguous_match"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_use_first_with_multiple_matches(
|
||||||
|
self,
|
||||||
|
use_first_flag: list[str],
|
||||||
|
*,
|
||||||
|
expects_assignment: bool,
|
||||||
|
) -> None:
|
||||||
|
CorrespondentFactory(
|
||||||
|
match="ambiguous",
|
||||||
|
matching_algorithm=MatchingModel.MATCH_ANY,
|
||||||
)
|
)
|
||||||
self.assertEqual(d_auto.tags.count(), 0)
|
CorrespondentFactory(
|
||||||
|
match="ambiguous",
|
||||||
def test_add_tags_suggest(self) -> None:
|
matching_algorithm=MatchingModel.MATCH_ANY,
|
||||||
call_command("document_retagger", "--tags", "--suggest")
|
|
||||||
d_first, d_second, _, d_auto = self.get_updated_docs()
|
|
||||||
|
|
||||||
self.assertEqual(d_first.tags.count(), 0)
|
|
||||||
self.assertEqual(d_second.tags.count(), 0)
|
|
||||||
self.assertEqual(d_auto.tags.count(), 1)
|
|
||||||
|
|
||||||
def test_add_type_suggest(self) -> None:
|
|
||||||
call_command("document_retagger", "--document_type", "--suggest")
|
|
||||||
d_first, d_second, _, _ = self.get_updated_docs()
|
|
||||||
|
|
||||||
self.assertIsNone(d_first.document_type)
|
|
||||||
self.assertIsNone(d_second.document_type)
|
|
||||||
|
|
||||||
def test_add_correspondent_suggest(self) -> None:
|
|
||||||
call_command("document_retagger", "--correspondent", "--suggest")
|
|
||||||
d_first, d_second, _, _ = self.get_updated_docs()
|
|
||||||
|
|
||||||
self.assertIsNone(d_first.correspondent)
|
|
||||||
self.assertIsNone(d_second.correspondent)
|
|
||||||
|
|
||||||
def test_add_tags_suggest_url(self) -> None:
|
|
||||||
call_command(
|
|
||||||
"document_retagger",
|
|
||||||
"--tags",
|
|
||||||
"--suggest",
|
|
||||||
"--base-url=http://localhost",
|
|
||||||
)
|
)
|
||||||
d_first, d_second, _, d_auto = self.get_updated_docs()
|
doc = DocumentFactory(content="ambiguous content")
|
||||||
|
|
||||||
self.assertEqual(d_first.tags.count(), 0)
|
call_command("document_retagger", "--correspondent", *use_first_flag)
|
||||||
self.assertEqual(d_second.tags.count(), 0)
|
|
||||||
self.assertEqual(d_auto.tags.count(), 1)
|
|
||||||
|
|
||||||
def test_add_type_suggest_url(self) -> None:
|
doc.refresh_from_db()
|
||||||
call_command(
|
assert (doc.correspondent is not None) is expects_assignment
|
||||||
"document_retagger",
|
|
||||||
"--document_type",
|
|
||||||
"--suggest",
|
|
||||||
"--base-url=http://localhost",
|
|
||||||
)
|
|
||||||
d_first, d_second, _, _ = self.get_updated_docs()
|
|
||||||
|
|
||||||
self.assertIsNone(d_first.document_type)
|
|
||||||
self.assertIsNone(d_second.document_type)
|
|
||||||
|
|
||||||
def test_add_correspondent_suggest_url(self) -> None:
|
# ---------------------------------------------------------------------------
|
||||||
call_command(
|
# Storage path assignment
|
||||||
"document_retagger",
|
# ---------------------------------------------------------------------------
|
||||||
"--correspondent",
|
|
||||||
"--suggest",
|
|
||||||
"--base-url=http://localhost",
|
|
||||||
)
|
|
||||||
d_first, d_second, _, _ = self.get_updated_docs()
|
|
||||||
|
|
||||||
self.assertIsNone(d_first.correspondent)
|
|
||||||
self.assertIsNone(d_second.correspondent)
|
|
||||||
|
|
||||||
def test_add_storage_path(self) -> None:
|
@pytest.mark.management
|
||||||
|
@pytest.mark.django_db
|
||||||
|
class TestRetaggerStoragePath(DirectoriesMixin):
|
||||||
|
@pytest.mark.usefixtures("documents")
|
||||||
|
def test_add_storage_path(self, storage_paths: StoragePathTuple) -> None:
|
||||||
"""
|
"""
|
||||||
GIVEN:
|
GIVEN documents matching various storage path rules
|
||||||
- 2 storage paths with documents which match them
|
WHEN document_retagger --storage_path is called
|
||||||
- 1 document which matches but has a storage path
|
THEN matching documents get the correct path; existing path is unchanged
|
||||||
WHEN:
|
|
||||||
- document retagger is called
|
|
||||||
THEN:
|
|
||||||
- Matching document's storage paths updated
|
|
||||||
- Non-matching documents have no storage path
|
|
||||||
- Existing storage patch left unchanged
|
|
||||||
"""
|
"""
|
||||||
call_command(
|
sp1, sp2, sp3 = storage_paths
|
||||||
"document_retagger",
|
call_command("document_retagger", "--storage_path")
|
||||||
"--storage_path",
|
d_first, d_second, d_unrelated, d_auto = _get_docs()
|
||||||
)
|
|
||||||
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
|
|
||||||
|
|
||||||
self.assertEqual(d_first.storage_path, self.sp2)
|
assert d_first.storage_path == sp2
|
||||||
self.assertEqual(d_auto.storage_path, self.sp1)
|
assert d_auto.storage_path == sp1
|
||||||
self.assertIsNone(d_second.storage_path)
|
assert d_second.storage_path is None
|
||||||
self.assertEqual(d_unrelated.storage_path, self.sp3)
|
assert d_unrelated.storage_path == sp3
|
||||||
|
|
||||||
def test_overwrite_storage_path(self) -> None:
|
@pytest.mark.usefixtures("documents")
|
||||||
|
def test_overwrite_storage_path(self, storage_paths: StoragePathTuple) -> None:
|
||||||
"""
|
"""
|
||||||
GIVEN:
|
GIVEN a document with an existing storage path that matches a different rule
|
||||||
- 2 storage paths with documents which match them
|
WHEN document_retagger --storage_path --overwrite is called
|
||||||
- 1 document which matches but has a storage path
|
THEN the existing path is replaced by the newly matched path
|
||||||
WHEN:
|
|
||||||
- document retagger is called with overwrite
|
|
||||||
THEN:
|
|
||||||
- Matching document's storage paths updated
|
|
||||||
- Non-matching documents have no storage path
|
|
||||||
- Existing storage patch overwritten
|
|
||||||
"""
|
"""
|
||||||
|
sp1, sp2, _ = storage_paths
|
||||||
call_command("document_retagger", "--storage_path", "--overwrite")
|
call_command("document_retagger", "--storage_path", "--overwrite")
|
||||||
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
|
d_first, d_second, d_unrelated, d_auto = _get_docs()
|
||||||
|
|
||||||
self.assertEqual(d_first.storage_path, self.sp2)
|
assert d_first.storage_path == sp2
|
||||||
self.assertEqual(d_auto.storage_path, self.sp1)
|
assert d_auto.storage_path == sp1
|
||||||
self.assertIsNone(d_second.storage_path)
|
assert d_second.storage_path is None
|
||||||
self.assertEqual(d_unrelated.storage_path, self.sp2)
|
assert d_unrelated.storage_path == sp2
|
||||||
|
|
||||||
def test_id_range_parameter(self) -> None:
|
@pytest.mark.parametrize(
|
||||||
commandOutput = ""
|
("use_first_flag", "expects_assignment"),
|
||||||
Document.objects.create(
|
[
|
||||||
checksum="E",
|
pytest.param(["--use-first"], True, id="use_first_assigns_first_match"),
|
||||||
title="E",
|
pytest.param([], False, id="no_use_first_skips_ambiguous_match"),
|
||||||
content="NOT the first document",
|
],
|
||||||
|
)
|
||||||
|
def test_use_first_with_multiple_matches(
|
||||||
|
self,
|
||||||
|
use_first_flag: list[str],
|
||||||
|
*,
|
||||||
|
expects_assignment: bool,
|
||||||
|
) -> None:
|
||||||
|
StoragePathFactory(
|
||||||
|
match="ambiguous",
|
||||||
|
matching_algorithm=MatchingModel.MATCH_ANY,
|
||||||
)
|
)
|
||||||
call_command("document_retagger", "--tags", "--id-range", "1", "2")
|
StoragePathFactory(
|
||||||
# The retagger shouldn`t apply the 'first' tag to our new document
|
match="ambiguous",
|
||||||
self.assertEqual(Document.objects.filter(tags__id=self.tag_first.id).count(), 1)
|
matching_algorithm=MatchingModel.MATCH_ANY,
|
||||||
|
)
|
||||||
|
doc = DocumentFactory(content="ambiguous content")
|
||||||
|
|
||||||
try:
|
call_command("document_retagger", "--storage_path", *use_first_flag)
|
||||||
commandOutput = call_command("document_retagger", "--tags", "--id-range")
|
|
||||||
except CommandError:
|
|
||||||
# Just ignore the error
|
|
||||||
None
|
|
||||||
self.assertIn(commandOutput, "Error: argument --id-range: expected 2 arguments")
|
|
||||||
|
|
||||||
try:
|
doc.refresh_from_db()
|
||||||
commandOutput = call_command(
|
assert (doc.storage_path is not None) is expects_assignment
|
||||||
"document_retagger",
|
|
||||||
"--tags",
|
|
||||||
"--id-range",
|
|
||||||
"a",
|
|
||||||
"b",
|
|
||||||
)
|
|
||||||
except CommandError:
|
|
||||||
# Just ignore the error
|
|
||||||
None
|
|
||||||
self.assertIn(commandOutput, "error: argument --id-range: invalid int value:")
|
|
||||||
|
|
||||||
call_command("document_retagger", "--tags", "--id-range", "1", "9999")
|
|
||||||
# Now we should have 2 documents
|
# ---------------------------------------------------------------------------
|
||||||
self.assertEqual(Document.objects.filter(tags__id=self.tag_first.id).count(), 2)
|
# ID range filtering
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.management
|
||||||
|
@pytest.mark.django_db
|
||||||
|
class TestRetaggerIdRange(DirectoriesMixin):
|
||||||
|
@pytest.mark.usefixtures("documents")
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
("id_range_args", "expected_count"),
|
||||||
|
[
|
||||||
|
pytest.param(["1", "2"], 1, id="narrow_range_limits_scope"),
|
||||||
|
pytest.param(["1", "9999"], 2, id="wide_range_tags_all_matches"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_id_range_limits_scope(
|
||||||
|
self,
|
||||||
|
tags: TagTuple,
|
||||||
|
id_range_args: list[str],
|
||||||
|
expected_count: int,
|
||||||
|
) -> None:
|
||||||
|
DocumentFactory(content="NOT the first document")
|
||||||
|
call_command("document_retagger", "--tags", "--id-range", *id_range_args)
|
||||||
|
tag_first, *_ = tags
|
||||||
|
assert Document.objects.filter(tags__id=tag_first.id).count() == expected_count
|
||||||
|
|
||||||
|
@pytest.mark.usefixtures("documents")
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"args",
|
||||||
|
[
|
||||||
|
pytest.param(["--tags", "--id-range"], id="missing_both_values"),
|
||||||
|
pytest.param(["--tags", "--id-range", "a", "b"], id="non_integer_values"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_id_range_invalid_arguments_raise(self, args: list[str]) -> None:
|
||||||
|
with pytest.raises((CommandError, SystemExit)):
|
||||||
|
call_command("document_retagger", *args)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Edge cases
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.management
|
||||||
|
@pytest.mark.django_db
|
||||||
|
class TestRetaggerEdgeCases(DirectoriesMixin):
|
||||||
|
@pytest.mark.usefixtures("documents")
|
||||||
|
def test_no_targets_exits_cleanly(self) -> None:
|
||||||
|
"""Calling the retagger with no classifier targets should not raise."""
|
||||||
|
call_command("document_retagger")
|
||||||
|
|
||||||
|
@pytest.mark.usefixtures("documents")
|
||||||
|
def test_inbox_only_skips_non_inbox_documents(self) -> None:
|
||||||
|
"""--inbox-only must restrict processing to documents with an inbox tag."""
|
||||||
|
call_command("document_retagger", "--tags", "--inbox-only")
|
||||||
|
d_first, _, d_unrelated, _ = _get_docs()
|
||||||
|
|
||||||
|
assert d_first.tags.count() == 0
|
||||||
|
assert d_unrelated.tags.count() == 2
|
||||||
|
|||||||
@@ -1,192 +1,295 @@
|
|||||||
import logging
|
"""Tests for the sanity checker module.
|
||||||
import shutil
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
import filelock
|
Tests exercise ``check_sanity`` as a whole, verifying document validation,
|
||||||
from django.conf import settings
|
orphan detection, task recording, and the iter_wrapper contract.
|
||||||
from django.test import TestCase
|
"""
|
||||||
from django.test import override_settings
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
from documents.models import Document
|
from documents.models import Document
|
||||||
|
from documents.models import PaperlessTask
|
||||||
from documents.sanity_checker import check_sanity
|
from documents.sanity_checker import check_sanity
|
||||||
from documents.tests.utils import DirectoriesMixin
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from collections.abc import Iterable
|
||||||
|
|
||||||
|
from documents.tests.conftest import PaperlessDirs
|
||||||
|
|
||||||
|
|
||||||
class TestSanityCheck(DirectoriesMixin, TestCase):
|
@pytest.mark.django_db
|
||||||
def make_test_data(self):
|
class TestCheckSanityNoDocuments:
|
||||||
with filelock.FileLock(settings.MEDIA_LOCK):
|
"""Sanity checks against an empty archive."""
|
||||||
# just make sure that the lockfile is present.
|
|
||||||
shutil.copy(
|
|
||||||
(
|
|
||||||
Path(__file__).parent
|
|
||||||
/ "samples"
|
|
||||||
/ "documents"
|
|
||||||
/ "originals"
|
|
||||||
/ "0000001.pdf"
|
|
||||||
),
|
|
||||||
Path(self.dirs.originals_dir) / "0000001.pdf",
|
|
||||||
)
|
|
||||||
shutil.copy(
|
|
||||||
(
|
|
||||||
Path(__file__).parent
|
|
||||||
/ "samples"
|
|
||||||
/ "documents"
|
|
||||||
/ "archive"
|
|
||||||
/ "0000001.pdf"
|
|
||||||
),
|
|
||||||
Path(self.dirs.archive_dir) / "0000001.pdf",
|
|
||||||
)
|
|
||||||
shutil.copy(
|
|
||||||
(
|
|
||||||
Path(__file__).parent
|
|
||||||
/ "samples"
|
|
||||||
/ "documents"
|
|
||||||
/ "thumbnails"
|
|
||||||
/ "0000001.webp"
|
|
||||||
),
|
|
||||||
Path(self.dirs.thumbnail_dir) / "0000001.webp",
|
|
||||||
)
|
|
||||||
|
|
||||||
return Document.objects.create(
|
@pytest.mark.usefixtures("_media_settings")
|
||||||
title="test",
|
def test_no_documents(self) -> None:
|
||||||
checksum="42995833e01aea9b3edee44bbfdd7ce1",
|
|
||||||
archive_checksum="62acb0bcbfbcaa62ca6ad3668e4e404b",
|
|
||||||
content="test",
|
|
||||||
pk=1,
|
|
||||||
filename="0000001.pdf",
|
|
||||||
mime_type="application/pdf",
|
|
||||||
archive_filename="0000001.pdf",
|
|
||||||
)
|
|
||||||
|
|
||||||
def assertSanityError(self, doc: Document, messageRegex) -> None:
|
|
||||||
messages = check_sanity()
|
messages = check_sanity()
|
||||||
self.assertTrue(messages.has_error)
|
assert not messages.has_error
|
||||||
with self.assertLogs() as capture:
|
assert not messages.has_warning
|
||||||
|
assert messages.total_issue_count == 0
|
||||||
|
|
||||||
|
@pytest.mark.usefixtures("_media_settings")
|
||||||
|
def test_no_issues_logs_clean(self, caplog: pytest.LogCaptureFixture) -> None:
|
||||||
|
messages = check_sanity()
|
||||||
|
with caplog.at_level(logging.INFO, logger="paperless.sanity_checker"):
|
||||||
messages.log_messages()
|
messages.log_messages()
|
||||||
self.assertEqual(
|
assert "Sanity checker detected no issues." in caplog.text
|
||||||
capture.records[0].message,
|
|
||||||
f"Detected following issue(s) with document #{doc.pk}, titled {doc.title}",
|
|
||||||
)
|
|
||||||
self.assertRegex(capture.records[1].message, messageRegex)
|
|
||||||
|
|
||||||
def test_no_issues(self) -> None:
|
|
||||||
self.make_test_data()
|
@pytest.mark.django_db
|
||||||
|
class TestCheckSanityHealthyDocument:
|
||||||
|
def test_no_errors(self, sample_doc: Document) -> None:
|
||||||
messages = check_sanity()
|
messages = check_sanity()
|
||||||
self.assertFalse(messages.has_error)
|
assert not messages.has_error
|
||||||
self.assertFalse(messages.has_warning)
|
assert not messages.has_warning
|
||||||
with self.assertLogs() as capture:
|
assert messages.total_issue_count == 0
|
||||||
messages.log_messages()
|
|
||||||
self.assertEqual(len(capture.output), 1)
|
|
||||||
self.assertEqual(capture.records[0].levelno, logging.INFO)
|
|
||||||
self.assertEqual(
|
|
||||||
capture.records[0].message,
|
|
||||||
"Sanity checker detected no issues.",
|
|
||||||
)
|
|
||||||
|
|
||||||
def test_no_docs(self) -> None:
|
|
||||||
self.assertEqual(len(check_sanity()), 0)
|
|
||||||
|
|
||||||
def test_success(self) -> None:
|
@pytest.mark.django_db
|
||||||
self.make_test_data()
|
class TestCheckSanityThumbnail:
|
||||||
self.assertEqual(len(check_sanity()), 0)
|
def test_missing(self, sample_doc: Document) -> None:
|
||||||
|
Path(sample_doc.thumbnail_path).unlink()
|
||||||
def test_no_thumbnail(self) -> None:
|
|
||||||
doc = self.make_test_data()
|
|
||||||
Path(doc.thumbnail_path).unlink()
|
|
||||||
self.assertSanityError(doc, "Thumbnail of document does not exist")
|
|
||||||
|
|
||||||
def test_thumbnail_no_access(self) -> None:
|
|
||||||
doc = self.make_test_data()
|
|
||||||
Path(doc.thumbnail_path).chmod(0o000)
|
|
||||||
self.assertSanityError(doc, "Cannot read thumbnail file of document")
|
|
||||||
Path(doc.thumbnail_path).chmod(0o777)
|
|
||||||
|
|
||||||
def test_no_original(self) -> None:
|
|
||||||
doc = self.make_test_data()
|
|
||||||
Path(doc.source_path).unlink()
|
|
||||||
self.assertSanityError(doc, "Original of document does not exist.")
|
|
||||||
|
|
||||||
def test_original_no_access(self) -> None:
|
|
||||||
doc = self.make_test_data()
|
|
||||||
Path(doc.source_path).chmod(0o000)
|
|
||||||
self.assertSanityError(doc, "Cannot read original file of document")
|
|
||||||
Path(doc.source_path).chmod(0o777)
|
|
||||||
|
|
||||||
def test_original_checksum_mismatch(self) -> None:
|
|
||||||
doc = self.make_test_data()
|
|
||||||
doc.checksum = "WOW"
|
|
||||||
doc.save()
|
|
||||||
self.assertSanityError(doc, "Checksum mismatch. Stored: WOW, actual: ")
|
|
||||||
|
|
||||||
def test_no_archive(self) -> None:
|
|
||||||
doc = self.make_test_data()
|
|
||||||
Path(doc.archive_path).unlink()
|
|
||||||
self.assertSanityError(doc, "Archived version of document does not exist.")
|
|
||||||
|
|
||||||
def test_archive_no_access(self) -> None:
|
|
||||||
doc = self.make_test_data()
|
|
||||||
Path(doc.archive_path).chmod(0o000)
|
|
||||||
self.assertSanityError(doc, "Cannot read archive file of document")
|
|
||||||
Path(doc.archive_path).chmod(0o777)
|
|
||||||
|
|
||||||
def test_archive_checksum_mismatch(self) -> None:
|
|
||||||
doc = self.make_test_data()
|
|
||||||
doc.archive_checksum = "WOW"
|
|
||||||
doc.save()
|
|
||||||
self.assertSanityError(doc, "Checksum mismatch of archived document")
|
|
||||||
|
|
||||||
def test_empty_content(self) -> None:
|
|
||||||
doc = self.make_test_data()
|
|
||||||
doc.content = ""
|
|
||||||
doc.save()
|
|
||||||
messages = check_sanity()
|
messages = check_sanity()
|
||||||
self.assertFalse(messages.has_error)
|
assert messages.has_error
|
||||||
self.assertFalse(messages.has_warning)
|
assert any(
|
||||||
self.assertEqual(len(messages), 1)
|
"Thumbnail of document does not exist" in m["message"]
|
||||||
self.assertRegex(
|
for m in messages[sample_doc.pk]
|
||||||
messages[doc.pk][0]["message"],
|
|
||||||
"Document contains no OCR data",
|
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_orphaned_file(self) -> None:
|
def test_unreadable(self, sample_doc: Document) -> None:
|
||||||
self.make_test_data()
|
thumb = Path(sample_doc.thumbnail_path)
|
||||||
Path(self.dirs.originals_dir, "orphaned").touch()
|
thumb.chmod(0o000)
|
||||||
|
try:
|
||||||
|
messages = check_sanity()
|
||||||
|
assert messages.has_error
|
||||||
|
assert any(
|
||||||
|
"Cannot read thumbnail" in m["message"] for m in messages[sample_doc.pk]
|
||||||
|
)
|
||||||
|
finally:
|
||||||
|
thumb.chmod(0o644)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.django_db
|
||||||
|
class TestCheckSanityOriginal:
|
||||||
|
def test_missing(self, sample_doc: Document) -> None:
|
||||||
|
Path(sample_doc.source_path).unlink()
|
||||||
messages = check_sanity()
|
messages = check_sanity()
|
||||||
self.assertTrue(messages.has_warning)
|
assert messages.has_error
|
||||||
self.assertRegex(
|
assert any(
|
||||||
messages._messages[None][0]["message"],
|
"Original of document does not exist" in m["message"]
|
||||||
"Orphaned file in media dir",
|
for m in messages[sample_doc.pk]
|
||||||
)
|
)
|
||||||
|
|
||||||
@override_settings(
|
def test_checksum_mismatch(self, sample_doc: Document) -> None:
|
||||||
APP_LOGO="logo/logo.png",
|
sample_doc.checksum = "badhash"
|
||||||
|
sample_doc.save()
|
||||||
|
messages = check_sanity()
|
||||||
|
assert messages.has_error
|
||||||
|
assert any(
|
||||||
|
"Checksum mismatch" in m["message"] and "badhash" in m["message"]
|
||||||
|
for m in messages[sample_doc.pk]
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_unreadable(self, sample_doc: Document) -> None:
|
||||||
|
src = Path(sample_doc.source_path)
|
||||||
|
src.chmod(0o000)
|
||||||
|
try:
|
||||||
|
messages = check_sanity()
|
||||||
|
assert messages.has_error
|
||||||
|
assert any(
|
||||||
|
"Cannot read original" in m["message"] for m in messages[sample_doc.pk]
|
||||||
|
)
|
||||||
|
finally:
|
||||||
|
src.chmod(0o644)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.django_db
|
||||||
|
class TestCheckSanityArchive:
|
||||||
|
def test_checksum_without_filename(self, sample_doc: Document) -> None:
|
||||||
|
sample_doc.archive_filename = None
|
||||||
|
sample_doc.save()
|
||||||
|
messages = check_sanity()
|
||||||
|
assert messages.has_error
|
||||||
|
assert any(
|
||||||
|
"checksum, but no archive filename" in m["message"]
|
||||||
|
for m in messages[sample_doc.pk]
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_filename_without_checksum(self, sample_doc: Document) -> None:
|
||||||
|
sample_doc.archive_checksum = None
|
||||||
|
sample_doc.save()
|
||||||
|
messages = check_sanity()
|
||||||
|
assert messages.has_error
|
||||||
|
assert any(
|
||||||
|
"checksum is missing" in m["message"] for m in messages[sample_doc.pk]
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_missing_file(self, sample_doc: Document) -> None:
|
||||||
|
Path(sample_doc.archive_path).unlink()
|
||||||
|
messages = check_sanity()
|
||||||
|
assert messages.has_error
|
||||||
|
assert any(
|
||||||
|
"Archived version of document does not exist" in m["message"]
|
||||||
|
for m in messages[sample_doc.pk]
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_checksum_mismatch(self, sample_doc: Document) -> None:
|
||||||
|
sample_doc.archive_checksum = "wronghash"
|
||||||
|
sample_doc.save()
|
||||||
|
messages = check_sanity()
|
||||||
|
assert messages.has_error
|
||||||
|
assert any(
|
||||||
|
"Checksum mismatch of archived document" in m["message"]
|
||||||
|
for m in messages[sample_doc.pk]
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_unreadable(self, sample_doc: Document) -> None:
|
||||||
|
archive = Path(sample_doc.archive_path)
|
||||||
|
archive.chmod(0o000)
|
||||||
|
try:
|
||||||
|
messages = check_sanity()
|
||||||
|
assert messages.has_error
|
||||||
|
assert any(
|
||||||
|
"Cannot read archive" in m["message"] for m in messages[sample_doc.pk]
|
||||||
|
)
|
||||||
|
finally:
|
||||||
|
archive.chmod(0o644)
|
||||||
|
|
||||||
|
def test_no_archive_at_all(self, sample_doc: Document) -> None:
|
||||||
|
"""Document with neither archive checksum nor filename is valid."""
|
||||||
|
Path(sample_doc.archive_path).unlink()
|
||||||
|
sample_doc.archive_checksum = None
|
||||||
|
sample_doc.archive_filename = None
|
||||||
|
sample_doc.save()
|
||||||
|
messages = check_sanity()
|
||||||
|
assert not messages.has_error
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.django_db
|
||||||
|
class TestCheckSanityContent:
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"content",
|
||||||
|
[
|
||||||
|
pytest.param("", id="empty-string"),
|
||||||
|
],
|
||||||
)
|
)
|
||||||
def test_ignore_logo(self) -> None:
|
def test_no_content(self, sample_doc: Document, content: str) -> None:
|
||||||
self.make_test_data()
|
sample_doc.content = content
|
||||||
logo_dir = Path(self.dirs.media_dir, "logo")
|
sample_doc.save()
|
||||||
logo_dir.mkdir(parents=True, exist_ok=True)
|
|
||||||
Path(self.dirs.media_dir, "logo", "logo.png").touch()
|
|
||||||
messages = check_sanity()
|
messages = check_sanity()
|
||||||
self.assertFalse(messages.has_warning)
|
assert not messages.has_error
|
||||||
|
assert not messages.has_warning
|
||||||
|
assert any("no OCR data" in m["message"] for m in messages[sample_doc.pk])
|
||||||
|
|
||||||
def test_ignore_ignorable_files(self) -> None:
|
|
||||||
self.make_test_data()
|
@pytest.mark.django_db
|
||||||
Path(self.dirs.media_dir, ".DS_Store").touch()
|
class TestCheckSanityOrphans:
|
||||||
Path(self.dirs.media_dir, "desktop.ini").touch()
|
def test_orphaned_file(
|
||||||
|
self,
|
||||||
|
sample_doc: Document,
|
||||||
|
paperless_dirs: PaperlessDirs,
|
||||||
|
) -> None:
|
||||||
|
(paperless_dirs.originals / "orphan.pdf").touch()
|
||||||
messages = check_sanity()
|
messages = check_sanity()
|
||||||
self.assertFalse(messages.has_warning)
|
assert messages.has_warning
|
||||||
|
assert any("Orphaned file" in m["message"] for m in messages[None])
|
||||||
|
|
||||||
def test_archive_filename_no_checksum(self) -> None:
|
@pytest.mark.usefixtures("_media_settings")
|
||||||
doc = self.make_test_data()
|
def test_ignorable_files_not_flagged(
|
||||||
doc.archive_checksum = None
|
self,
|
||||||
doc.save()
|
paperless_dirs: PaperlessDirs,
|
||||||
self.assertSanityError(doc, "has an archive file, but its checksum is missing.")
|
) -> None:
|
||||||
|
(paperless_dirs.media / ".DS_Store").touch()
|
||||||
|
(paperless_dirs.media / "desktop.ini").touch()
|
||||||
|
messages = check_sanity()
|
||||||
|
assert not messages.has_warning
|
||||||
|
|
||||||
def test_archive_checksum_no_filename(self) -> None:
|
|
||||||
doc = self.make_test_data()
|
@pytest.mark.django_db
|
||||||
doc.archive_filename = None
|
class TestCheckSanityIterWrapper:
|
||||||
doc.save()
|
def test_wrapper_receives_documents(self, sample_doc: Document) -> None:
|
||||||
self.assertSanityError(
|
seen: list[Document] = []
|
||||||
doc,
|
|
||||||
"has an archive file checksum, but no archive filename.",
|
def tracking(iterable: Iterable[Document]) -> Iterable[Document]:
|
||||||
)
|
for item in iterable:
|
||||||
|
seen.append(item)
|
||||||
|
yield item
|
||||||
|
|
||||||
|
check_sanity(iter_wrapper=tracking)
|
||||||
|
assert len(seen) == 1
|
||||||
|
assert seen[0].pk == sample_doc.pk
|
||||||
|
|
||||||
|
def test_default_works_without_wrapper(self, sample_doc: Document) -> None:
|
||||||
|
messages = check_sanity()
|
||||||
|
assert not messages.has_error
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.django_db
|
||||||
|
class TestCheckSanityTaskRecording:
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
("expected_type", "scheduled"),
|
||||||
|
[
|
||||||
|
pytest.param(PaperlessTask.TaskType.SCHEDULED_TASK, True, id="scheduled"),
|
||||||
|
pytest.param(PaperlessTask.TaskType.MANUAL_TASK, False, id="manual"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
@pytest.mark.usefixtures("_media_settings")
|
||||||
|
def test_task_type(self, expected_type: str, *, scheduled: bool) -> None:
|
||||||
|
check_sanity(scheduled=scheduled)
|
||||||
|
task = PaperlessTask.objects.latest("date_created")
|
||||||
|
assert task.task_name == PaperlessTask.TaskName.CHECK_SANITY
|
||||||
|
assert task.type == expected_type
|
||||||
|
|
||||||
|
def test_success_status(self, sample_doc: Document) -> None:
|
||||||
|
check_sanity()
|
||||||
|
task = PaperlessTask.objects.latest("date_created")
|
||||||
|
assert task.status == "SUCCESS"
|
||||||
|
|
||||||
|
def test_failure_status(self, sample_doc: Document) -> None:
|
||||||
|
Path(sample_doc.source_path).unlink()
|
||||||
|
check_sanity()
|
||||||
|
task = PaperlessTask.objects.latest("date_created")
|
||||||
|
assert task.status == "FAILURE"
|
||||||
|
assert "Check logs for details" in task.result
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.django_db
|
||||||
|
class TestCheckSanityLogMessages:
|
||||||
|
def test_logs_doc_issues(
|
||||||
|
self,
|
||||||
|
sample_doc: Document,
|
||||||
|
caplog: pytest.LogCaptureFixture,
|
||||||
|
) -> None:
|
||||||
|
Path(sample_doc.source_path).unlink()
|
||||||
|
messages = check_sanity()
|
||||||
|
with caplog.at_level(logging.INFO, logger="paperless.sanity_checker"):
|
||||||
|
messages.log_messages()
|
||||||
|
assert f"document #{sample_doc.pk}" in caplog.text
|
||||||
|
assert "Original of document does not exist" in caplog.text
|
||||||
|
|
||||||
|
def test_logs_global_issues(
|
||||||
|
self,
|
||||||
|
sample_doc: Document,
|
||||||
|
paperless_dirs: PaperlessDirs,
|
||||||
|
caplog: pytest.LogCaptureFixture,
|
||||||
|
) -> None:
|
||||||
|
(paperless_dirs.originals / "orphan.pdf").touch()
|
||||||
|
messages = check_sanity()
|
||||||
|
with caplog.at_level(logging.WARNING, logger="paperless.sanity_checker"):
|
||||||
|
messages.log_messages()
|
||||||
|
assert "Orphaned file" in caplog.text
|
||||||
|
|
||||||
|
@pytest.mark.usefixtures("_media_settings")
|
||||||
|
def test_logs_unknown_doc_pk(self, caplog: pytest.LogCaptureFixture) -> None:
|
||||||
|
"""A doc PK not in the DB logs 'Unknown' as the title."""
|
||||||
|
messages = check_sanity()
|
||||||
|
messages.error(99999, "Ghost document")
|
||||||
|
with caplog.at_level(logging.INFO, logger="paperless.sanity_checker"):
|
||||||
|
messages.log_messages()
|
||||||
|
assert "#99999" in caplog.text
|
||||||
|
assert "Unknown" in caplog.text
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ from datetime import timedelta
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from unittest import mock
|
from unittest import mock
|
||||||
|
|
||||||
|
import pytest
|
||||||
from celery import states
|
from celery import states
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from django.test import TestCase
|
from django.test import TestCase
|
||||||
@@ -105,55 +106,83 @@ class TestClassifier(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
self.assertNotEqual(mtime2, mtime3)
|
self.assertNotEqual(mtime2, mtime3)
|
||||||
|
|
||||||
|
|
||||||
class TestSanityCheck(DirectoriesMixin, TestCase):
|
@pytest.mark.django_db
|
||||||
@mock.patch("documents.tasks.sanity_checker.check_sanity")
|
class TestSanityCheck:
|
||||||
def test_sanity_check_success(self, m) -> None:
|
@pytest.fixture
|
||||||
m.return_value = SanityCheckMessages()
|
def mock_check_sanity(self, mocker) -> mock.MagicMock:
|
||||||
self.assertEqual(tasks.sanity_check(), "No issues detected.")
|
return mocker.patch("documents.tasks.sanity_checker.check_sanity")
|
||||||
m.assert_called_once()
|
|
||||||
|
|
||||||
@mock.patch("documents.tasks.sanity_checker.check_sanity")
|
def test_sanity_check_success(self, mock_check_sanity: mock.MagicMock) -> None:
|
||||||
def test_sanity_check_error(self, m) -> None:
|
mock_check_sanity.return_value = SanityCheckMessages()
|
||||||
messages = SanityCheckMessages()
|
assert tasks.sanity_check() == "No issues detected."
|
||||||
messages.error(None, "Some error")
|
mock_check_sanity.assert_called_once()
|
||||||
m.return_value = messages
|
|
||||||
self.assertRaises(SanityCheckFailedException, tasks.sanity_check)
|
|
||||||
m.assert_called_once()
|
|
||||||
|
|
||||||
@mock.patch("documents.tasks.sanity_checker.check_sanity")
|
def test_sanity_check_error_raises(
|
||||||
def test_sanity_check_error_no_raise(self, m) -> None:
|
self,
|
||||||
|
mock_check_sanity: mock.MagicMock,
|
||||||
|
sample_doc: Document,
|
||||||
|
) -> None:
|
||||||
messages = SanityCheckMessages()
|
messages = SanityCheckMessages()
|
||||||
messages.error(None, "Some error")
|
messages.error(sample_doc.pk, "some error")
|
||||||
m.return_value = messages
|
mock_check_sanity.return_value = messages
|
||||||
# No exception should be raised
|
with pytest.raises(SanityCheckFailedException):
|
||||||
|
tasks.sanity_check()
|
||||||
|
mock_check_sanity.assert_called_once()
|
||||||
|
|
||||||
|
def test_sanity_check_error_no_raise(
|
||||||
|
self,
|
||||||
|
mock_check_sanity: mock.MagicMock,
|
||||||
|
sample_doc: Document,
|
||||||
|
) -> None:
|
||||||
|
messages = SanityCheckMessages()
|
||||||
|
messages.error(sample_doc.pk, "some error")
|
||||||
|
mock_check_sanity.return_value = messages
|
||||||
result = tasks.sanity_check(raise_on_error=False)
|
result = tasks.sanity_check(raise_on_error=False)
|
||||||
self.assertEqual(
|
assert "1 document(s) with errors" in result
|
||||||
result,
|
assert "Check logs for details." in result
|
||||||
"Sanity check exited with errors. See log.",
|
mock_check_sanity.assert_called_once()
|
||||||
)
|
|
||||||
m.assert_called_once()
|
|
||||||
|
|
||||||
@mock.patch("documents.tasks.sanity_checker.check_sanity")
|
def test_sanity_check_warning_only(
|
||||||
def test_sanity_check_warning(self, m) -> None:
|
self,
|
||||||
|
mock_check_sanity: mock.MagicMock,
|
||||||
|
) -> None:
|
||||||
messages = SanityCheckMessages()
|
messages = SanityCheckMessages()
|
||||||
messages.warning(None, "Some warning")
|
messages.warning(None, "extra file")
|
||||||
m.return_value = messages
|
mock_check_sanity.return_value = messages
|
||||||
self.assertEqual(
|
result = tasks.sanity_check()
|
||||||
tasks.sanity_check(),
|
assert result == "1 global warning(s) found."
|
||||||
"Sanity check exited with warnings. See log.",
|
mock_check_sanity.assert_called_once()
|
||||||
)
|
|
||||||
m.assert_called_once()
|
|
||||||
|
|
||||||
@mock.patch("documents.tasks.sanity_checker.check_sanity")
|
def test_sanity_check_info_only(
|
||||||
def test_sanity_check_info(self, m) -> None:
|
self,
|
||||||
|
mock_check_sanity: mock.MagicMock,
|
||||||
|
sample_doc: Document,
|
||||||
|
) -> None:
|
||||||
messages = SanityCheckMessages()
|
messages = SanityCheckMessages()
|
||||||
messages.info(None, "Some info")
|
messages.info(sample_doc.pk, "some info")
|
||||||
m.return_value = messages
|
mock_check_sanity.return_value = messages
|
||||||
self.assertEqual(
|
result = tasks.sanity_check()
|
||||||
tasks.sanity_check(),
|
assert result == "1 document(s) with infos found."
|
||||||
"Sanity check exited with infos. See log.",
|
mock_check_sanity.assert_called_once()
|
||||||
)
|
|
||||||
m.assert_called_once()
|
def test_sanity_check_errors_warnings_and_infos(
|
||||||
|
self,
|
||||||
|
mock_check_sanity: mock.MagicMock,
|
||||||
|
sample_doc: Document,
|
||||||
|
) -> None:
|
||||||
|
messages = SanityCheckMessages()
|
||||||
|
messages.error(sample_doc.pk, "broken")
|
||||||
|
messages.warning(sample_doc.pk, "odd")
|
||||||
|
messages.info(sample_doc.pk, "fyi")
|
||||||
|
messages.warning(None, "extra file")
|
||||||
|
mock_check_sanity.return_value = messages
|
||||||
|
result = tasks.sanity_check(raise_on_error=False)
|
||||||
|
assert "1 document(s) with errors" in result
|
||||||
|
assert "1 document(s) with warnings" in result
|
||||||
|
assert "1 document(s) with infos" in result
|
||||||
|
assert "1 global warning(s)" in result
|
||||||
|
assert "Check logs for details." in result
|
||||||
|
mock_check_sanity.assert_called_once()
|
||||||
|
|
||||||
|
|
||||||
class TestBulkUpdate(DirectoriesMixin, TestCase):
|
class TestBulkUpdate(DirectoriesMixin, TestCase):
|
||||||
|
|||||||
@@ -4666,7 +4666,7 @@ class TestDateWorkflowLocalization(
|
|||||||
14,
|
14,
|
||||||
30,
|
30,
|
||||||
5,
|
5,
|
||||||
tzinfo=datetime.timezone.utc,
|
tzinfo=datetime.UTC,
|
||||||
)
|
)
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from enum import Enum
|
from enum import StrEnum
|
||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
@@ -11,7 +11,7 @@ if TYPE_CHECKING:
|
|||||||
from django.http import HttpRequest
|
from django.http import HttpRequest
|
||||||
|
|
||||||
|
|
||||||
class VersionResolutionError(str, Enum):
|
class VersionResolutionError(StrEnum):
|
||||||
INVALID = "invalid"
|
INVALID = "invalid"
|
||||||
NOT_FOUND = "not_found"
|
NOT_FOUND = "not_found"
|
||||||
|
|
||||||
|
|||||||
@@ -204,6 +204,61 @@ def audit_log_check(app_configs, **kwargs):
|
|||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
@register()
|
||||||
|
def check_v3_minimum_upgrade_version(
|
||||||
|
app_configs: object,
|
||||||
|
**kwargs: object,
|
||||||
|
) -> list[Error]:
|
||||||
|
"""Enforce that upgrades to v3 must start from v2.20.9.
|
||||||
|
|
||||||
|
v3 squashes all prior migrations into 0001_squashed and 0002_squashed.
|
||||||
|
If a user skips v2.20.9, the data migration in 1075_workflowaction_order
|
||||||
|
never runs and the squash may apply schema changes against an incomplete
|
||||||
|
database state.
|
||||||
|
"""
|
||||||
|
from django.db import DatabaseError
|
||||||
|
from django.db import OperationalError
|
||||||
|
|
||||||
|
try:
|
||||||
|
all_tables = connections["default"].introspection.table_names()
|
||||||
|
|
||||||
|
if "django_migrations" not in all_tables:
|
||||||
|
return []
|
||||||
|
|
||||||
|
with connections["default"].cursor() as cursor:
|
||||||
|
cursor.execute(
|
||||||
|
"SELECT name FROM django_migrations WHERE app = %s",
|
||||||
|
["documents"],
|
||||||
|
)
|
||||||
|
applied: set[str] = {row[0] for row in cursor.fetchall()}
|
||||||
|
|
||||||
|
if not applied:
|
||||||
|
return []
|
||||||
|
|
||||||
|
# Already in a valid v3 state
|
||||||
|
if {"0001_squashed", "0002_squashed"} & applied:
|
||||||
|
return []
|
||||||
|
|
||||||
|
# On v2.20.9 exactly — squash will pick up cleanly from here
|
||||||
|
if "1075_workflowaction_order" in applied:
|
||||||
|
return []
|
||||||
|
|
||||||
|
except (DatabaseError, OperationalError):
|
||||||
|
return []
|
||||||
|
|
||||||
|
return [
|
||||||
|
Error(
|
||||||
|
"Cannot upgrade to Paperless-ngx v3 from this version.",
|
||||||
|
hint=(
|
||||||
|
"Upgrading to v3 can only be performed from v2.20.9."
|
||||||
|
"Please upgrade to v2.20.9, run migrations, then upgrade to v3."
|
||||||
|
"See https://docs.paperless-ngx.com/setup/#upgrading for details."
|
||||||
|
),
|
||||||
|
id="paperless.E002",
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
@register()
|
@register()
|
||||||
def check_deprecated_db_settings(
|
def check_deprecated_db_settings(
|
||||||
app_configs: object,
|
app_configs: object,
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ from pathlib import Path
|
|||||||
from unittest import mock
|
from unittest import mock
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
from django.core.checks import Error
|
||||||
from django.core.checks import Warning
|
from django.core.checks import Warning
|
||||||
from django.test import TestCase
|
from django.test import TestCase
|
||||||
from django.test import override_settings
|
from django.test import override_settings
|
||||||
@@ -13,6 +14,7 @@ from documents.tests.utils import FileSystemAssertsMixin
|
|||||||
from paperless.checks import audit_log_check
|
from paperless.checks import audit_log_check
|
||||||
from paperless.checks import binaries_check
|
from paperless.checks import binaries_check
|
||||||
from paperless.checks import check_deprecated_db_settings
|
from paperless.checks import check_deprecated_db_settings
|
||||||
|
from paperless.checks import check_v3_minimum_upgrade_version
|
||||||
from paperless.checks import debug_mode_check
|
from paperless.checks import debug_mode_check
|
||||||
from paperless.checks import paths_check
|
from paperless.checks import paths_check
|
||||||
from paperless.checks import settings_values_check
|
from paperless.checks import settings_values_check
|
||||||
@@ -395,3 +397,240 @@ class TestDeprecatedDbSettings:
|
|||||||
|
|
||||||
assert len(result) == 1
|
assert len(result) == 1
|
||||||
assert "PAPERLESS_DBSSLCERT" in result[0].msg
|
assert "PAPERLESS_DBSSLCERT" in result[0].msg
|
||||||
|
|
||||||
|
|
||||||
|
class TestV3MinimumUpgradeVersionCheck:
|
||||||
|
"""Test suite for check_v3_minimum_upgrade_version system check."""
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def build_conn_mock(self, mocker: MockerFixture):
|
||||||
|
"""Factory fixture that builds a connections['default'] mock.
|
||||||
|
|
||||||
|
Usage::
|
||||||
|
|
||||||
|
conn = build_conn_mock(tables=["django_migrations"], applied=["1075_..."])
|
||||||
|
"""
|
||||||
|
|
||||||
|
def _build(tables: list[str], applied: list[str]) -> mock.MagicMock:
|
||||||
|
conn = mocker.MagicMock()
|
||||||
|
conn.introspection.table_names.return_value = tables
|
||||||
|
cursor = conn.cursor.return_value.__enter__.return_value
|
||||||
|
cursor.fetchall.return_value = [(name,) for name in applied]
|
||||||
|
return conn
|
||||||
|
|
||||||
|
return _build
|
||||||
|
|
||||||
|
def test_no_migrations_table_fresh_install(
|
||||||
|
self,
|
||||||
|
mocker: MockerFixture,
|
||||||
|
build_conn_mock,
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
GIVEN:
|
||||||
|
- No django_migrations table exists in the database
|
||||||
|
WHEN:
|
||||||
|
- The v3 upgrade check runs
|
||||||
|
THEN:
|
||||||
|
- No errors are reported (fresh install, nothing to enforce)
|
||||||
|
"""
|
||||||
|
mocker.patch.dict(
|
||||||
|
"paperless.checks.connections",
|
||||||
|
{"default": build_conn_mock([], [])},
|
||||||
|
)
|
||||||
|
assert check_v3_minimum_upgrade_version(None) == []
|
||||||
|
|
||||||
|
def test_no_documents_migrations_fresh_install(
|
||||||
|
self,
|
||||||
|
mocker: MockerFixture,
|
||||||
|
build_conn_mock,
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
GIVEN:
|
||||||
|
- django_migrations table exists but has no documents app rows
|
||||||
|
WHEN:
|
||||||
|
- The v3 upgrade check runs
|
||||||
|
THEN:
|
||||||
|
- No errors are reported (fresh install, nothing to enforce)
|
||||||
|
"""
|
||||||
|
mocker.patch.dict(
|
||||||
|
"paperless.checks.connections",
|
||||||
|
{"default": build_conn_mock(["django_migrations"], [])},
|
||||||
|
)
|
||||||
|
assert check_v3_minimum_upgrade_version(None) == []
|
||||||
|
|
||||||
|
def test_v3_state_with_0001_squashed(
|
||||||
|
self,
|
||||||
|
mocker: MockerFixture,
|
||||||
|
build_conn_mock,
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
GIVEN:
|
||||||
|
- 0001_squashed is recorded in django_migrations
|
||||||
|
WHEN:
|
||||||
|
- The v3 upgrade check runs
|
||||||
|
THEN:
|
||||||
|
- No errors are reported (DB is already in a valid v3 state)
|
||||||
|
"""
|
||||||
|
mocker.patch.dict(
|
||||||
|
"paperless.checks.connections",
|
||||||
|
{
|
||||||
|
"default": build_conn_mock(
|
||||||
|
["django_migrations"],
|
||||||
|
["0001_squashed", "0002_squashed", "0003_workflowaction_order"],
|
||||||
|
),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
assert check_v3_minimum_upgrade_version(None) == []
|
||||||
|
|
||||||
|
def test_v3_state_with_0002_squashed_only(
|
||||||
|
self,
|
||||||
|
mocker: MockerFixture,
|
||||||
|
build_conn_mock,
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
GIVEN:
|
||||||
|
- Only 0002_squashed is recorded in django_migrations
|
||||||
|
WHEN:
|
||||||
|
- The v3 upgrade check runs
|
||||||
|
THEN:
|
||||||
|
- No errors are reported (0002_squashed alone confirms a valid v3 state)
|
||||||
|
"""
|
||||||
|
mocker.patch.dict(
|
||||||
|
"paperless.checks.connections",
|
||||||
|
{"default": build_conn_mock(["django_migrations"], ["0002_squashed"])},
|
||||||
|
)
|
||||||
|
assert check_v3_minimum_upgrade_version(None) == []
|
||||||
|
|
||||||
|
def test_v2_20_9_state_ready_to_upgrade(
|
||||||
|
self,
|
||||||
|
mocker: MockerFixture,
|
||||||
|
build_conn_mock,
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
GIVEN:
|
||||||
|
- 1075_workflowaction_order (the last v2.20.9 migration) is in the DB
|
||||||
|
WHEN:
|
||||||
|
- The v3 upgrade check runs
|
||||||
|
THEN:
|
||||||
|
- No errors are reported (squash will pick up cleanly from this state)
|
||||||
|
"""
|
||||||
|
mocker.patch.dict(
|
||||||
|
"paperless.checks.connections",
|
||||||
|
{
|
||||||
|
"default": build_conn_mock(
|
||||||
|
["django_migrations"],
|
||||||
|
[
|
||||||
|
"1074_workflowrun_deleted_at_workflowrun_restored_at_and_more",
|
||||||
|
"1075_workflowaction_order",
|
||||||
|
],
|
||||||
|
),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
assert check_v3_minimum_upgrade_version(None) == []
|
||||||
|
|
||||||
|
def test_v2_20_8_raises_error(
|
||||||
|
self,
|
||||||
|
mocker: MockerFixture,
|
||||||
|
build_conn_mock,
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
GIVEN:
|
||||||
|
- 1074 (last v2.20.8 migration) is applied but 1075 is not
|
||||||
|
WHEN:
|
||||||
|
- The v3 upgrade check runs
|
||||||
|
THEN:
|
||||||
|
- An Error with id paperless.E002 is returned
|
||||||
|
"""
|
||||||
|
mocker.patch.dict(
|
||||||
|
"paperless.checks.connections",
|
||||||
|
{
|
||||||
|
"default": build_conn_mock(
|
||||||
|
["django_migrations"],
|
||||||
|
["1074_workflowrun_deleted_at_workflowrun_restored_at_and_more"],
|
||||||
|
),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
result = check_v3_minimum_upgrade_version(None)
|
||||||
|
assert len(result) == 1
|
||||||
|
assert isinstance(result[0], Error)
|
||||||
|
assert result[0].id == "paperless.E002"
|
||||||
|
|
||||||
|
def test_very_old_version_raises_error(
|
||||||
|
self,
|
||||||
|
mocker: MockerFixture,
|
||||||
|
build_conn_mock,
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
GIVEN:
|
||||||
|
- Only old migrations (well below v2.20.9) are applied
|
||||||
|
WHEN:
|
||||||
|
- The v3 upgrade check runs
|
||||||
|
THEN:
|
||||||
|
- An Error with id paperless.E002 is returned
|
||||||
|
"""
|
||||||
|
mocker.patch.dict(
|
||||||
|
"paperless.checks.connections",
|
||||||
|
{
|
||||||
|
"default": build_conn_mock(
|
||||||
|
["django_migrations"],
|
||||||
|
["1000_update_paperless_all", "1022_paperlesstask"],
|
||||||
|
),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
result = check_v3_minimum_upgrade_version(None)
|
||||||
|
assert len(result) == 1
|
||||||
|
assert isinstance(result[0], Error)
|
||||||
|
assert result[0].id == "paperless.E002"
|
||||||
|
|
||||||
|
def test_error_hint_mentions_v2_20_9(
|
||||||
|
self,
|
||||||
|
mocker: MockerFixture,
|
||||||
|
build_conn_mock,
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
GIVEN:
|
||||||
|
- DB is on an old v2 version (pre-v2.20.9)
|
||||||
|
WHEN:
|
||||||
|
- The v3 upgrade check runs
|
||||||
|
THEN:
|
||||||
|
- The error hint explicitly references v2.20.9 so users know what to do
|
||||||
|
"""
|
||||||
|
mocker.patch.dict(
|
||||||
|
"paperless.checks.connections",
|
||||||
|
{"default": build_conn_mock(["django_migrations"], ["1022_paperlesstask"])},
|
||||||
|
)
|
||||||
|
result = check_v3_minimum_upgrade_version(None)
|
||||||
|
assert len(result) == 1
|
||||||
|
assert "v2.20.9" in result[0].hint
|
||||||
|
|
||||||
|
def test_db_error_is_swallowed(self, mocker: MockerFixture) -> None:
|
||||||
|
"""
|
||||||
|
GIVEN:
|
||||||
|
- A DatabaseError is raised when querying the DB
|
||||||
|
WHEN:
|
||||||
|
- The v3 upgrade check runs
|
||||||
|
THEN:
|
||||||
|
- No exception propagates and an empty list is returned
|
||||||
|
"""
|
||||||
|
from django.db import DatabaseError
|
||||||
|
|
||||||
|
conn = mocker.MagicMock()
|
||||||
|
conn.introspection.table_names.side_effect = DatabaseError("connection refused")
|
||||||
|
mocker.patch.dict("paperless.checks.connections", {"default": conn})
|
||||||
|
assert check_v3_minimum_upgrade_version(None) == []
|
||||||
|
|
||||||
|
def test_operational_error_is_swallowed(self, mocker: MockerFixture) -> None:
|
||||||
|
"""
|
||||||
|
GIVEN:
|
||||||
|
- An OperationalError is raised when querying the DB
|
||||||
|
WHEN:
|
||||||
|
- The v3 upgrade check runs
|
||||||
|
THEN:
|
||||||
|
- No exception propagates and an empty list is returned
|
||||||
|
"""
|
||||||
|
from django.db import OperationalError
|
||||||
|
|
||||||
|
conn = mocker.MagicMock()
|
||||||
|
conn.introspection.table_names.side_effect = OperationalError("DB unavailable")
|
||||||
|
mocker.patch.dict("paperless.checks.connections", {"default": conn})
|
||||||
|
assert check_v3_minimum_upgrade_version(None) == []
|
||||||
|
|||||||
@@ -378,7 +378,6 @@ class ApplicationConfigurationViewSet(ModelViewSet):
|
|||||||
):
|
):
|
||||||
# AI index was just enabled and vector store file does not exist
|
# AI index was just enabled and vector store file does not exist
|
||||||
llmindex_index.delay(
|
llmindex_index.delay(
|
||||||
progress_bar_disable=True,
|
|
||||||
rebuild=True,
|
rebuild=True,
|
||||||
scheduled=False,
|
scheduled=False,
|
||||||
auto=True,
|
auto=True,
|
||||||
|
|||||||
@@ -1,11 +1,13 @@
|
|||||||
import logging
|
import logging
|
||||||
import shutil
|
import shutil
|
||||||
|
from collections.abc import Callable
|
||||||
|
from collections.abc import Iterable
|
||||||
from datetime import timedelta
|
from datetime import timedelta
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from typing import TypeVar
|
||||||
|
|
||||||
import faiss
|
import faiss
|
||||||
import llama_index.core.settings as llama_settings
|
import llama_index.core.settings as llama_settings
|
||||||
import tqdm
|
|
||||||
from celery import states
|
from celery import states
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from django.utils import timezone
|
from django.utils import timezone
|
||||||
@@ -29,6 +31,14 @@ from paperless_ai.embedding import build_llm_index_text
|
|||||||
from paperless_ai.embedding import get_embedding_dim
|
from paperless_ai.embedding import get_embedding_dim
|
||||||
from paperless_ai.embedding import get_embedding_model
|
from paperless_ai.embedding import get_embedding_model
|
||||||
|
|
||||||
|
_T = TypeVar("_T")
|
||||||
|
IterWrapper = Callable[[Iterable[_T]], Iterable[_T]]
|
||||||
|
|
||||||
|
|
||||||
|
def _identity(iterable: Iterable[_T]) -> Iterable[_T]:
|
||||||
|
return iterable
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger("paperless_ai.indexing")
|
logger = logging.getLogger("paperless_ai.indexing")
|
||||||
|
|
||||||
|
|
||||||
@@ -156,7 +166,11 @@ def vector_store_file_exists():
|
|||||||
return Path(settings.LLM_INDEX_DIR / "default__vector_store.json").exists()
|
return Path(settings.LLM_INDEX_DIR / "default__vector_store.json").exists()
|
||||||
|
|
||||||
|
|
||||||
def update_llm_index(*, progress_bar_disable=False, rebuild=False) -> str:
|
def update_llm_index(
|
||||||
|
*,
|
||||||
|
iter_wrapper: IterWrapper[Document] = _identity,
|
||||||
|
rebuild=False,
|
||||||
|
) -> str:
|
||||||
"""
|
"""
|
||||||
Rebuild or update the LLM index.
|
Rebuild or update the LLM index.
|
||||||
"""
|
"""
|
||||||
@@ -176,7 +190,7 @@ def update_llm_index(*, progress_bar_disable=False, rebuild=False) -> str:
|
|||||||
embed_model = get_embedding_model()
|
embed_model = get_embedding_model()
|
||||||
llama_settings.Settings.embed_model = embed_model
|
llama_settings.Settings.embed_model = embed_model
|
||||||
storage_context = get_or_create_storage_context(rebuild=True)
|
storage_context = get_or_create_storage_context(rebuild=True)
|
||||||
for document in tqdm.tqdm(documents, disable=progress_bar_disable):
|
for document in iter_wrapper(documents):
|
||||||
document_nodes = build_document_node(document)
|
document_nodes = build_document_node(document)
|
||||||
nodes.extend(document_nodes)
|
nodes.extend(document_nodes)
|
||||||
|
|
||||||
@@ -184,7 +198,7 @@ def update_llm_index(*, progress_bar_disable=False, rebuild=False) -> str:
|
|||||||
nodes=nodes,
|
nodes=nodes,
|
||||||
storage_context=storage_context,
|
storage_context=storage_context,
|
||||||
embed_model=embed_model,
|
embed_model=embed_model,
|
||||||
show_progress=not progress_bar_disable,
|
show_progress=False,
|
||||||
)
|
)
|
||||||
msg = "LLM index rebuilt successfully."
|
msg = "LLM index rebuilt successfully."
|
||||||
else:
|
else:
|
||||||
@@ -196,7 +210,7 @@ def update_llm_index(*, progress_bar_disable=False, rebuild=False) -> str:
|
|||||||
for node in index.docstore.get_nodes(all_node_ids)
|
for node in index.docstore.get_nodes(all_node_ids)
|
||||||
}
|
}
|
||||||
|
|
||||||
for document in tqdm.tqdm(documents, disable=progress_bar_disable):
|
for document in iter_wrapper(documents):
|
||||||
doc_id = str(document.id)
|
doc_id = str(document.id)
|
||||||
document_modified = document.modified.isoformat()
|
document_modified = document.modified.isoformat()
|
||||||
|
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
# Generated by Django 5.2.9 on 2026-01-20 18:46
|
# Generated by Django 5.2.11 on 2026-03-03 16:27
|
||||||
|
|
||||||
import django.db.models.deletion
|
import django.db.models.deletion
|
||||||
import django.utils.timezone
|
import django.utils.timezone
|
||||||
@@ -15,6 +15,50 @@ class Migration(migrations.Migration):
|
|||||||
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
|
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
replaces = [
|
||||||
|
("paperless_mail", "0001_initial"),
|
||||||
|
("paperless_mail", "0001_initial_squashed_0009_mailrule_assign_tags"),
|
||||||
|
("paperless_mail", "0002_auto_20201117_1334"),
|
||||||
|
("paperless_mail", "0003_auto_20201118_1940"),
|
||||||
|
("paperless_mail", "0004_mailrule_order"),
|
||||||
|
("paperless_mail", "0005_help_texts"),
|
||||||
|
("paperless_mail", "0006_auto_20210101_2340"),
|
||||||
|
("paperless_mail", "0007_auto_20210106_0138"),
|
||||||
|
("paperless_mail", "0008_auto_20210516_0940"),
|
||||||
|
("paperless_mail", "0009_alter_mailrule_action_alter_mailrule_folder"),
|
||||||
|
("paperless_mail", "0009_mailrule_assign_tags"),
|
||||||
|
("paperless_mail", "0010_auto_20220311_1602"),
|
||||||
|
("paperless_mail", "0011_remove_mailrule_assign_tag"),
|
||||||
|
(
|
||||||
|
"paperless_mail",
|
||||||
|
"0011_remove_mailrule_assign_tag_squashed_0024_alter_mailrule_name_and_more",
|
||||||
|
),
|
||||||
|
("paperless_mail", "0012_alter_mailrule_assign_tags"),
|
||||||
|
("paperless_mail", "0013_merge_20220412_1051"),
|
||||||
|
("paperless_mail", "0014_alter_mailrule_action"),
|
||||||
|
("paperless_mail", "0015_alter_mailrule_action"),
|
||||||
|
("paperless_mail", "0016_mailrule_consumption_scope"),
|
||||||
|
("paperless_mail", "0017_mailaccount_owner_mailrule_owner"),
|
||||||
|
("paperless_mail", "0018_processedmail"),
|
||||||
|
("paperless_mail", "0019_mailrule_filter_to"),
|
||||||
|
("paperless_mail", "0020_mailaccount_is_token"),
|
||||||
|
("paperless_mail", "0021_alter_mailaccount_password"),
|
||||||
|
("paperless_mail", "0022_mailrule_assign_owner_from_rule_and_more"),
|
||||||
|
("paperless_mail", "0023_remove_mailrule_filter_attachment_filename_and_more"),
|
||||||
|
("paperless_mail", "0024_alter_mailrule_name_and_more"),
|
||||||
|
(
|
||||||
|
"paperless_mail",
|
||||||
|
"0025_alter_mailaccount_owner_alter_mailrule_owner_and_more",
|
||||||
|
),
|
||||||
|
("paperless_mail", "0026_mailrule_enabled"),
|
||||||
|
(
|
||||||
|
"paperless_mail",
|
||||||
|
"0027_mailaccount_expiration_mailaccount_account_type_and_more",
|
||||||
|
),
|
||||||
|
("paperless_mail", "0028_alter_mailaccount_password_and_more"),
|
||||||
|
("paperless_mail", "0029_mailrule_pdf_layout"),
|
||||||
|
]
|
||||||
|
|
||||||
operations = [
|
operations = [
|
||||||
migrations.CreateModel(
|
migrations.CreateModel(
|
||||||
name="MailAccount",
|
name="MailAccount",
|
||||||
@@ -6,7 +6,7 @@ from django.db import models
|
|||||||
|
|
||||||
class Migration(migrations.Migration):
|
class Migration(migrations.Migration):
|
||||||
dependencies = [
|
dependencies = [
|
||||||
("paperless_mail", "0001_initial"),
|
("paperless_mail", "0001_squashed"),
|
||||||
]
|
]
|
||||||
|
|
||||||
operations = [
|
operations = [
|
||||||
|
|||||||
Reference in New Issue
Block a user