mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-03-30 04:42:45 +00:00
Compare commits
111 Commits
feature-di
...
chore/plug
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3d0d243057 | ||
|
|
9383471fa0 | ||
|
|
0060b46c8b | ||
|
|
b153ec803b | ||
|
|
38dba60ceb | ||
|
|
ae0474450f | ||
|
|
8efb01010c | ||
|
|
d18bbfa9c3 | ||
|
|
ec76d3c762 | ||
|
|
bdc0a58242 | ||
|
|
b049ad9626 | ||
|
|
7a192d021f | ||
|
|
1e30490a46 | ||
|
|
bd9e529a63 | ||
|
|
79def8a200 | ||
|
|
701735f6e5 | ||
|
|
07f54bfdab | ||
|
|
0f84af27d0 | ||
|
|
9646b8c67d | ||
|
|
e590d7df69 | ||
|
|
cc71aad058 | ||
|
|
3cbdf5d0b7 | ||
|
|
f84e0097e5 | ||
|
|
7dbf8bdd4a | ||
|
|
d2a752a196 | ||
|
|
2cb155e717 | ||
|
|
9e9fc6213c | ||
|
|
a9756f9462 | ||
|
|
c2b8b22fb4 | ||
|
|
d671e34559 | ||
|
|
f7c12d550a | ||
|
|
68fc898042 | ||
|
|
2cbe6ae892 | ||
|
|
b0bb31654f | ||
|
|
0f7c02de5e | ||
|
|
95dea787f2 | ||
|
|
b6501b0c47 | ||
|
|
d162c83eb7 | ||
|
|
d3ac75741f | ||
|
|
87ebd13abc | ||
|
|
3abff21d1f | ||
|
|
0a08499fc7 | ||
|
|
330ee696a8 | ||
|
|
b98697ab8b | ||
|
|
7e94dd8208 | ||
|
|
79da72f69c | ||
|
|
261ae9d8ce | ||
|
|
0e2c191524 | ||
|
|
ab4656692d | ||
|
|
03e2c352c2 | ||
|
|
2d46ed9692 | ||
|
|
8d23d17ae8 | ||
|
|
aea2927a02 | ||
|
|
a86c9d32fe | ||
|
|
d53dcad4f6 | ||
|
|
736b08ad09 | ||
|
|
ca5879a54e | ||
|
|
4d4f30b5f8 | ||
|
|
85fecac401 | ||
|
|
7942edfdf4 | ||
|
|
470018c011 | ||
|
|
54679a093a | ||
|
|
58ebcc21be | ||
|
|
1caa3eb8aa | ||
|
|
866c9fd858 | ||
|
|
2bb4af2be6 | ||
|
|
6b8ff9763d | ||
|
|
6034f17c87 | ||
|
|
48cd1cce6a | ||
|
|
1e00ad5f30 | ||
|
|
5f26c01c6f | ||
|
|
92e133eeb0 | ||
|
|
06b2d5102c | ||
|
|
9d69705e26 | ||
|
|
01abacab52 | ||
|
|
88b8f9b326 | ||
|
|
365ff99934 | ||
|
|
d86cfdb088 | ||
|
|
40255cfdbb | ||
|
|
c2e1085418 | ||
|
|
ee0d1a3094 | ||
|
|
f15394fa5c | ||
|
|
773eb25f7d | ||
|
|
d919c341b1 | ||
|
|
e2947ccff2 | ||
|
|
61841a767b | ||
|
|
15db023caa | ||
|
|
45b363659e | ||
|
|
7494161c95 | ||
|
|
5331312699 | ||
|
|
b5a002b8ed | ||
|
|
dd8573242d | ||
|
|
86fa74c115 | ||
|
|
ba0a80a8ad | ||
|
|
b7b9e83f37 | ||
|
|
217b5df591 | ||
|
|
3efc9a5733 | ||
|
|
e19f341974 | ||
|
|
2b4ea570ef | ||
|
|
86573fc1a0 | ||
|
|
3856ec19c0 | ||
|
|
60319c6d37 | ||
|
|
1221e7f21c | ||
|
|
3e32e90355 | ||
|
|
63cb75564e | ||
|
|
6955d6c07f | ||
|
|
d85ee29976 | ||
|
|
0c7d56c5e7 | ||
|
|
0bcf904e3a | ||
|
|
bcc2f11152 | ||
|
|
e18b1fd99d |
3
.github/ISSUE_TEMPLATE/bug-report.yml
vendored
3
.github/ISSUE_TEMPLATE/bug-report.yml
vendored
@@ -21,6 +21,7 @@ body:
|
||||
- [The installation instructions](https://docs.paperless-ngx.com/setup/#installation).
|
||||
- [Existing issues and discussions](https://github.com/paperless-ngx/paperless-ngx/search?q=&type=issues).
|
||||
- Disable any custom container initialization scripts, if using
|
||||
- Remove any third-party parser plugins — issues caused by or requiring changes to a third-party plugin will be closed without investigation.
|
||||
|
||||
If you encounter issues while installing or configuring Paperless-ngx, please post in the ["Support" section of the discussions](https://github.com/paperless-ngx/paperless-ngx/discussions/new?category=support).
|
||||
- type: textarea
|
||||
@@ -120,5 +121,7 @@ body:
|
||||
required: true
|
||||
- label: I have already searched for relevant existing issues and discussions before opening this report.
|
||||
required: true
|
||||
- label: I have reproduced this issue with all third-party parser plugins removed. I understand that issues caused by third-party plugins will be closed without investigation.
|
||||
required: true
|
||||
- label: I have updated the title field above with a concise description.
|
||||
required: true
|
||||
|
||||
27
.github/dependabot.yml
vendored
27
.github/dependabot.yml
vendored
@@ -12,6 +12,8 @@ updates:
|
||||
open-pull-requests-limit: 10
|
||||
schedule:
|
||||
interval: "monthly"
|
||||
cooldown:
|
||||
default-days: 7
|
||||
labels:
|
||||
- "frontend"
|
||||
- "dependencies"
|
||||
@@ -36,7 +38,9 @@ updates:
|
||||
directory: "/"
|
||||
# Check for updates once a week
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
interval: "monthly"
|
||||
cooldown:
|
||||
default-days: 7
|
||||
labels:
|
||||
- "backend"
|
||||
- "dependencies"
|
||||
@@ -97,6 +101,8 @@ updates:
|
||||
schedule:
|
||||
# Check for updates to GitHub Actions every month
|
||||
interval: "monthly"
|
||||
cooldown:
|
||||
default-days: 7
|
||||
labels:
|
||||
- "ci-cd"
|
||||
- "dependencies"
|
||||
@@ -112,7 +118,9 @@ updates:
|
||||
- "/"
|
||||
- "/.devcontainer/"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
interval: "monthly"
|
||||
cooldown:
|
||||
default-days: 7
|
||||
open-pull-requests-limit: 5
|
||||
labels:
|
||||
- "dependencies"
|
||||
@@ -123,7 +131,9 @@ updates:
|
||||
- package-ecosystem: "docker-compose"
|
||||
directory: "/docker/compose/"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
interval: "monthly"
|
||||
cooldown:
|
||||
default-days: 7
|
||||
open-pull-requests-limit: 5
|
||||
labels:
|
||||
- "dependencies"
|
||||
@@ -147,3 +157,14 @@ updates:
|
||||
postgres:
|
||||
patterns:
|
||||
- "docker.io/library/postgres*"
|
||||
greenmail:
|
||||
patterns:
|
||||
- "docker.io/greenmail*"
|
||||
- package-ecosystem: "pre-commit" # See documentation for possible values
|
||||
directory: "/" # Location of package manifests
|
||||
schedule:
|
||||
interval: "monthly"
|
||||
groups:
|
||||
pre-commit-dependencies:
|
||||
patterns:
|
||||
- "*"
|
||||
|
||||
86
.github/workflows/ci-backend.yml
vendored
86
.github/workflows/ci-backend.yml
vendored
@@ -3,21 +3,9 @@ on:
|
||||
push:
|
||||
branches-ignore:
|
||||
- 'translations**'
|
||||
paths:
|
||||
- 'src/**'
|
||||
- 'pyproject.toml'
|
||||
- 'uv.lock'
|
||||
- 'docker/compose/docker-compose.ci-test.yml'
|
||||
- '.github/workflows/ci-backend.yml'
|
||||
pull_request:
|
||||
branches-ignore:
|
||||
- 'translations**'
|
||||
paths:
|
||||
- 'src/**'
|
||||
- 'pyproject.toml'
|
||||
- 'uv.lock'
|
||||
- 'docker/compose/docker-compose.ci-test.yml'
|
||||
- '.github/workflows/ci-backend.yml'
|
||||
workflow_dispatch:
|
||||
concurrency:
|
||||
group: backend-${{ github.event.pull_request.number || github.ref }}
|
||||
@@ -26,7 +14,55 @@ env:
|
||||
DEFAULT_UV_VERSION: "0.10.x"
|
||||
NLTK_DATA: "/usr/share/nltk_data"
|
||||
jobs:
|
||||
changes:
|
||||
name: Detect Backend Changes
|
||||
runs-on: ubuntu-slim
|
||||
outputs:
|
||||
backend_changed: ${{ steps.force.outputs.run_all == 'true' || steps.filter.outputs.backend == 'true' }}
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v6.0.2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- name: Decide run mode
|
||||
id: force
|
||||
run: |
|
||||
if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then
|
||||
echo "run_all=true" >> "$GITHUB_OUTPUT"
|
||||
elif [[ "${{ github.event_name }}" == "push" && ( "${{ github.ref_name }}" == "main" || "${{ github.ref_name }}" == "dev" ) ]]; then
|
||||
echo "run_all=true" >> "$GITHUB_OUTPUT"
|
||||
else
|
||||
echo "run_all=false" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
- name: Set diff range
|
||||
id: range
|
||||
if: steps.force.outputs.run_all != 'true'
|
||||
run: |
|
||||
if [[ "${{ github.event_name }}" == "pull_request" ]]; then
|
||||
echo "base=${{ github.event.pull_request.base.sha }}" >> "$GITHUB_OUTPUT"
|
||||
elif [[ "${{ github.event.created }}" == "true" ]]; then
|
||||
echo "base=${{ github.event.repository.default_branch }}" >> "$GITHUB_OUTPUT"
|
||||
else
|
||||
echo "base=${{ github.event.before }}" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
echo "ref=${{ github.sha }}" >> "$GITHUB_OUTPUT"
|
||||
- name: Detect changes
|
||||
id: filter
|
||||
if: steps.force.outputs.run_all != 'true'
|
||||
uses: dorny/paths-filter@v3.0.2
|
||||
with:
|
||||
base: ${{ steps.range.outputs.base }}
|
||||
ref: ${{ steps.range.outputs.ref }}
|
||||
filters: |
|
||||
backend:
|
||||
- 'src/**'
|
||||
- 'pyproject.toml'
|
||||
- 'uv.lock'
|
||||
- 'docker/compose/docker-compose.ci-test.yml'
|
||||
- '.github/workflows/ci-backend.yml'
|
||||
test:
|
||||
needs: changes
|
||||
if: needs.changes.outputs.backend_changed == 'true'
|
||||
name: "Python ${{ matrix.python-version }}"
|
||||
runs-on: ubuntu-24.04
|
||||
strategy:
|
||||
@@ -100,6 +136,8 @@ jobs:
|
||||
docker compose --file docker/compose/docker-compose.ci-test.yml logs
|
||||
docker compose --file docker/compose/docker-compose.ci-test.yml down
|
||||
typing:
|
||||
needs: changes
|
||||
if: needs.changes.outputs.backend_changed == 'true'
|
||||
name: Check project typing
|
||||
runs-on: ubuntu-24.04
|
||||
env:
|
||||
@@ -150,3 +188,27 @@ jobs:
|
||||
--show-error-codes \
|
||||
--warn-unused-configs \
|
||||
src/ | uv run mypy-baseline filter
|
||||
gate:
|
||||
name: Backend CI Gate
|
||||
needs: [changes, test, typing]
|
||||
if: always()
|
||||
runs-on: ubuntu-slim
|
||||
steps:
|
||||
- name: Check gate
|
||||
run: |
|
||||
if [[ "${{ needs.changes.outputs.backend_changed }}" != "true" ]]; then
|
||||
echo "No backend-relevant changes detected."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if [[ "${{ needs.test.result }}" != "success" ]]; then
|
||||
echo "::error::Backend test job result: ${{ needs.test.result }}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ "${{ needs.typing.result }}" != "success" ]]; then
|
||||
echo "::error::Backend typing job result: ${{ needs.typing.result }}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Backend checks passed."
|
||||
|
||||
18
.github/workflows/ci-docker.yml
vendored
18
.github/workflows/ci-docker.yml
vendored
@@ -104,9 +104,9 @@ jobs:
|
||||
echo "repository=${repo_name}"
|
||||
echo "name=${repo_name}" >> $GITHUB_OUTPUT
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3.12.0
|
||||
uses: docker/setup-buildx-action@v4.0.0
|
||||
- name: Login to GitHub Container Registry
|
||||
uses: docker/login-action@v3.7.0
|
||||
uses: docker/login-action@v4.0.0
|
||||
with:
|
||||
registry: ${{ env.REGISTRY }}
|
||||
username: ${{ github.actor }}
|
||||
@@ -119,7 +119,7 @@ jobs:
|
||||
sudo rm -rf "$AGENT_TOOLSDIRECTORY"
|
||||
- name: Docker metadata
|
||||
id: docker-meta
|
||||
uses: docker/metadata-action@v5.10.0
|
||||
uses: docker/metadata-action@v6.0.0
|
||||
with:
|
||||
images: |
|
||||
${{ env.REGISTRY }}/${{ steps.repo.outputs.name }}
|
||||
@@ -130,7 +130,7 @@ jobs:
|
||||
type=semver,pattern={{major}}.{{minor}}
|
||||
- name: Build and push by digest
|
||||
id: build
|
||||
uses: docker/build-push-action@v6.19.2
|
||||
uses: docker/build-push-action@v7.0.0
|
||||
with:
|
||||
context: .
|
||||
file: ./Dockerfile
|
||||
@@ -179,29 +179,29 @@ jobs:
|
||||
echo "Downloaded digests:"
|
||||
ls -la /tmp/digests/
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3.12.0
|
||||
uses: docker/setup-buildx-action@v4.0.0
|
||||
- name: Login to GitHub Container Registry
|
||||
uses: docker/login-action@v3.7.0
|
||||
uses: docker/login-action@v4.0.0
|
||||
with:
|
||||
registry: ${{ env.REGISTRY }}
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
- name: Login to Docker Hub
|
||||
if: needs.build-arch.outputs.push-external == 'true'
|
||||
uses: docker/login-action@v3.7.0
|
||||
uses: docker/login-action@v4.0.0
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
- name: Login to Quay.io
|
||||
if: needs.build-arch.outputs.push-external == 'true'
|
||||
uses: docker/login-action@v3.7.0
|
||||
uses: docker/login-action@v4.0.0
|
||||
with:
|
||||
registry: quay.io
|
||||
username: ${{ secrets.QUAY_USERNAME }}
|
||||
password: ${{ secrets.QUAY_ROBOT_TOKEN }}
|
||||
- name: Docker metadata
|
||||
id: docker-meta
|
||||
uses: docker/metadata-action@v5.10.0
|
||||
uses: docker/metadata-action@v6.0.0
|
||||
with:
|
||||
images: |
|
||||
${{ env.REGISTRY }}/${{ needs.build-arch.outputs.repository }}
|
||||
|
||||
88
.github/workflows/ci-docs.yml
vendored
88
.github/workflows/ci-docs.yml
vendored
@@ -1,22 +1,9 @@
|
||||
name: Documentation
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
- dev
|
||||
paths:
|
||||
- 'docs/**'
|
||||
- 'zensical.toml'
|
||||
- 'pyproject.toml'
|
||||
- 'uv.lock'
|
||||
- '.github/workflows/ci-docs.yml'
|
||||
branches-ignore:
|
||||
- 'translations**'
|
||||
pull_request:
|
||||
paths:
|
||||
- 'docs/**'
|
||||
- 'zensical.toml'
|
||||
- 'pyproject.toml'
|
||||
- 'uv.lock'
|
||||
- '.github/workflows/ci-docs.yml'
|
||||
workflow_dispatch:
|
||||
concurrency:
|
||||
group: docs-${{ github.event.pull_request.number || github.ref }}
|
||||
@@ -29,7 +16,55 @@ env:
|
||||
DEFAULT_UV_VERSION: "0.10.x"
|
||||
DEFAULT_PYTHON_VERSION: "3.12"
|
||||
jobs:
|
||||
changes:
|
||||
name: Detect Docs Changes
|
||||
runs-on: ubuntu-slim
|
||||
outputs:
|
||||
docs_changed: ${{ steps.force.outputs.run_all == 'true' || steps.filter.outputs.docs == 'true' }}
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v6.0.2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- name: Decide run mode
|
||||
id: force
|
||||
run: |
|
||||
if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then
|
||||
echo "run_all=true" >> "$GITHUB_OUTPUT"
|
||||
elif [[ "${{ github.event_name }}" == "push" && ( "${{ github.ref_name }}" == "main" || "${{ github.ref_name }}" == "dev" ) ]]; then
|
||||
echo "run_all=true" >> "$GITHUB_OUTPUT"
|
||||
else
|
||||
echo "run_all=false" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
- name: Set diff range
|
||||
id: range
|
||||
if: steps.force.outputs.run_all != 'true'
|
||||
run: |
|
||||
if [[ "${{ github.event_name }}" == "pull_request" ]]; then
|
||||
echo "base=${{ github.event.pull_request.base.sha }}" >> "$GITHUB_OUTPUT"
|
||||
elif [[ "${{ github.event.created }}" == "true" ]]; then
|
||||
echo "base=${{ github.event.repository.default_branch }}" >> "$GITHUB_OUTPUT"
|
||||
else
|
||||
echo "base=${{ github.event.before }}" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
echo "ref=${{ github.sha }}" >> "$GITHUB_OUTPUT"
|
||||
- name: Detect changes
|
||||
id: filter
|
||||
if: steps.force.outputs.run_all != 'true'
|
||||
uses: dorny/paths-filter@v3.0.2
|
||||
with:
|
||||
base: ${{ steps.range.outputs.base }}
|
||||
ref: ${{ steps.range.outputs.ref }}
|
||||
filters: |
|
||||
docs:
|
||||
- 'docs/**'
|
||||
- 'zensical.toml'
|
||||
- 'pyproject.toml'
|
||||
- 'uv.lock'
|
||||
- '.github/workflows/ci-docs.yml'
|
||||
build:
|
||||
needs: changes
|
||||
if: needs.changes.outputs.docs_changed == 'true'
|
||||
name: Build Documentation
|
||||
runs-on: ubuntu-24.04
|
||||
steps:
|
||||
@@ -64,8 +99,8 @@ jobs:
|
||||
name: github-pages-${{ github.run_id }}-${{ github.run_attempt }}
|
||||
deploy:
|
||||
name: Deploy Documentation
|
||||
needs: build
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
|
||||
needs: [changes, build]
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main' && needs.changes.outputs.docs_changed == 'true'
|
||||
runs-on: ubuntu-24.04
|
||||
environment:
|
||||
name: github-pages
|
||||
@@ -76,3 +111,22 @@ jobs:
|
||||
id: deployment
|
||||
with:
|
||||
artifact_name: github-pages-${{ github.run_id }}-${{ github.run_attempt }}
|
||||
gate:
|
||||
name: Docs CI Gate
|
||||
needs: [changes, build]
|
||||
if: always()
|
||||
runs-on: ubuntu-slim
|
||||
steps:
|
||||
- name: Check gate
|
||||
run: |
|
||||
if [[ "${{ needs.changes.outputs.docs_changed }}" != "true" ]]; then
|
||||
echo "No docs-relevant changes detected."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if [[ "${{ needs.build.result }}" != "success" ]]; then
|
||||
echo "::error::Docs build job result: ${{ needs.build.result }}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Docs checks passed."
|
||||
|
||||
112
.github/workflows/ci-frontend.yml
vendored
112
.github/workflows/ci-frontend.yml
vendored
@@ -3,21 +3,60 @@ on:
|
||||
push:
|
||||
branches-ignore:
|
||||
- 'translations**'
|
||||
paths:
|
||||
- 'src-ui/**'
|
||||
- '.github/workflows/ci-frontend.yml'
|
||||
pull_request:
|
||||
branches-ignore:
|
||||
- 'translations**'
|
||||
paths:
|
||||
- 'src-ui/**'
|
||||
- '.github/workflows/ci-frontend.yml'
|
||||
workflow_dispatch:
|
||||
concurrency:
|
||||
group: frontend-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
jobs:
|
||||
changes:
|
||||
name: Detect Frontend Changes
|
||||
runs-on: ubuntu-slim
|
||||
outputs:
|
||||
frontend_changed: ${{ steps.force.outputs.run_all == 'true' || steps.filter.outputs.frontend == 'true' }}
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v6.0.2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- name: Decide run mode
|
||||
id: force
|
||||
run: |
|
||||
if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then
|
||||
echo "run_all=true" >> "$GITHUB_OUTPUT"
|
||||
elif [[ "${{ github.event_name }}" == "push" && ( "${{ github.ref_name }}" == "main" || "${{ github.ref_name }}" == "dev" ) ]]; then
|
||||
echo "run_all=true" >> "$GITHUB_OUTPUT"
|
||||
else
|
||||
echo "run_all=false" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
- name: Set diff range
|
||||
id: range
|
||||
if: steps.force.outputs.run_all != 'true'
|
||||
run: |
|
||||
if [[ "${{ github.event_name }}" == "pull_request" ]]; then
|
||||
echo "base=${{ github.event.pull_request.base.sha }}" >> "$GITHUB_OUTPUT"
|
||||
elif [[ "${{ github.event.created }}" == "true" ]]; then
|
||||
echo "base=${{ github.event.repository.default_branch }}" >> "$GITHUB_OUTPUT"
|
||||
else
|
||||
echo "base=${{ github.event.before }}" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
echo "ref=${{ github.sha }}" >> "$GITHUB_OUTPUT"
|
||||
- name: Detect changes
|
||||
id: filter
|
||||
if: steps.force.outputs.run_all != 'true'
|
||||
uses: dorny/paths-filter@v3.0.2
|
||||
with:
|
||||
base: ${{ steps.range.outputs.base }}
|
||||
ref: ${{ steps.range.outputs.ref }}
|
||||
filters: |
|
||||
frontend:
|
||||
- 'src-ui/**'
|
||||
- '.github/workflows/ci-frontend.yml'
|
||||
install-dependencies:
|
||||
needs: changes
|
||||
if: needs.changes.outputs.frontend_changed == 'true'
|
||||
name: Install Dependencies
|
||||
runs-on: ubuntu-24.04
|
||||
steps:
|
||||
@@ -28,7 +67,7 @@ jobs:
|
||||
with:
|
||||
version: 10
|
||||
- name: Use Node.js 24
|
||||
uses: actions/setup-node@v6.2.0
|
||||
uses: actions/setup-node@v6.3.0
|
||||
with:
|
||||
node-version: 24.x
|
||||
cache: 'pnpm'
|
||||
@@ -45,7 +84,8 @@ jobs:
|
||||
run: cd src-ui && pnpm install
|
||||
lint:
|
||||
name: Lint
|
||||
needs: install-dependencies
|
||||
needs: [changes, install-dependencies]
|
||||
if: needs.changes.outputs.frontend_changed == 'true'
|
||||
runs-on: ubuntu-24.04
|
||||
steps:
|
||||
- name: Checkout
|
||||
@@ -55,7 +95,7 @@ jobs:
|
||||
with:
|
||||
version: 10
|
||||
- name: Use Node.js 24
|
||||
uses: actions/setup-node@v6.2.0
|
||||
uses: actions/setup-node@v6.3.0
|
||||
with:
|
||||
node-version: 24.x
|
||||
cache: 'pnpm'
|
||||
@@ -73,7 +113,8 @@ jobs:
|
||||
run: cd src-ui && pnpm run lint
|
||||
unit-tests:
|
||||
name: "Unit Tests (${{ matrix.shard-index }}/${{ matrix.shard-count }})"
|
||||
needs: install-dependencies
|
||||
needs: [changes, install-dependencies]
|
||||
if: needs.changes.outputs.frontend_changed == 'true'
|
||||
runs-on: ubuntu-24.04
|
||||
strategy:
|
||||
fail-fast: false
|
||||
@@ -89,7 +130,7 @@ jobs:
|
||||
with:
|
||||
version: 10
|
||||
- name: Use Node.js 24
|
||||
uses: actions/setup-node@v6.2.0
|
||||
uses: actions/setup-node@v6.3.0
|
||||
with:
|
||||
node-version: 24.x
|
||||
cache: 'pnpm'
|
||||
@@ -119,7 +160,8 @@ jobs:
|
||||
directory: src-ui/coverage/
|
||||
e2e-tests:
|
||||
name: "E2E Tests (${{ matrix.shard-index }}/${{ matrix.shard-count }})"
|
||||
needs: install-dependencies
|
||||
needs: [changes, install-dependencies]
|
||||
if: needs.changes.outputs.frontend_changed == 'true'
|
||||
runs-on: ubuntu-24.04
|
||||
container: mcr.microsoft.com/playwright:v1.58.2-noble
|
||||
env:
|
||||
@@ -139,7 +181,7 @@ jobs:
|
||||
with:
|
||||
version: 10
|
||||
- name: Use Node.js 24
|
||||
uses: actions/setup-node@v6.2.0
|
||||
uses: actions/setup-node@v6.3.0
|
||||
with:
|
||||
node-version: 24.x
|
||||
cache: 'pnpm'
|
||||
@@ -159,7 +201,8 @@ jobs:
|
||||
run: cd src-ui && pnpm exec playwright test --shard ${{ matrix.shard-index }}/${{ matrix.shard-count }}
|
||||
bundle-analysis:
|
||||
name: Bundle Analysis
|
||||
needs: [unit-tests, e2e-tests]
|
||||
needs: [changes, unit-tests, e2e-tests]
|
||||
if: needs.changes.outputs.frontend_changed == 'true'
|
||||
runs-on: ubuntu-24.04
|
||||
steps:
|
||||
- name: Checkout
|
||||
@@ -171,7 +214,7 @@ jobs:
|
||||
with:
|
||||
version: 10
|
||||
- name: Use Node.js 24
|
||||
uses: actions/setup-node@v6.2.0
|
||||
uses: actions/setup-node@v6.3.0
|
||||
with:
|
||||
node-version: 24.x
|
||||
cache: 'pnpm'
|
||||
@@ -189,3 +232,42 @@ jobs:
|
||||
env:
|
||||
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
|
||||
run: cd src-ui && pnpm run build --configuration=production
|
||||
gate:
|
||||
name: Frontend CI Gate
|
||||
needs: [changes, install-dependencies, lint, unit-tests, e2e-tests, bundle-analysis]
|
||||
if: always()
|
||||
runs-on: ubuntu-slim
|
||||
steps:
|
||||
- name: Check gate
|
||||
run: |
|
||||
if [[ "${{ needs.changes.outputs.frontend_changed }}" != "true" ]]; then
|
||||
echo "No frontend-relevant changes detected."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if [[ "${{ needs['install-dependencies'].result }}" != "success" ]]; then
|
||||
echo "::error::Frontend install job result: ${{ needs['install-dependencies'].result }}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ "${{ needs.lint.result }}" != "success" ]]; then
|
||||
echo "::error::Frontend lint job result: ${{ needs.lint.result }}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ "${{ needs['unit-tests'].result }}" != "success" ]]; then
|
||||
echo "::error::Frontend unit-tests job result: ${{ needs['unit-tests'].result }}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ "${{ needs['e2e-tests'].result }}" != "success" ]]; then
|
||||
echo "::error::Frontend e2e-tests job result: ${{ needs['e2e-tests'].result }}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ "${{ needs['bundle-analysis'].result }}" != "success" ]]; then
|
||||
echo "::error::Frontend bundle-analysis job result: ${{ needs['bundle-analysis'].result }}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Frontend checks passed."
|
||||
|
||||
2
.github/workflows/ci-release.yml
vendored
2
.github/workflows/ci-release.yml
vendored
@@ -35,7 +35,7 @@ jobs:
|
||||
with:
|
||||
version: 10
|
||||
- name: Use Node.js 24
|
||||
uses: actions/setup-node@v6.2.0
|
||||
uses: actions/setup-node@v6.3.0
|
||||
with:
|
||||
node-version: 24.x
|
||||
cache: 'pnpm'
|
||||
|
||||
2
.github/workflows/translate-strings.yml
vendored
2
.github/workflows/translate-strings.yml
vendored
@@ -40,7 +40,7 @@ jobs:
|
||||
with:
|
||||
version: 10
|
||||
- name: Use Node.js 24
|
||||
uses: actions/setup-node@v6.2.0
|
||||
uses: actions/setup-node@v6.3.0
|
||||
with:
|
||||
node-version: 24.x
|
||||
cache: 'pnpm'
|
||||
|
||||
@@ -2437,17 +2437,3 @@ src/paperless_tesseract/tests/test_parser_custom_settings.py:0: error: Item "Non
|
||||
src/paperless_tesseract/tests/test_parser_custom_settings.py:0: error: Item "None" of "ApplicationConfiguration | None" has no attribute "unpaper_clean" [union-attr]
|
||||
src/paperless_tesseract/tests/test_parser_custom_settings.py:0: error: Item "None" of "ApplicationConfiguration | None" has no attribute "unpaper_clean" [union-attr]
|
||||
src/paperless_tesseract/tests/test_parser_custom_settings.py:0: error: Item "None" of "ApplicationConfiguration | None" has no attribute "user_args" [union-attr]
|
||||
src/paperless_text/parsers.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def]
|
||||
src/paperless_text/parsers.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def]
|
||||
src/paperless_text/parsers.py:0: error: Incompatible types in assignment (expression has type "str", variable has type "None") [assignment]
|
||||
src/paperless_text/signals.py:0: error: Function is missing a type annotation [no-untyped-def]
|
||||
src/paperless_text/signals.py:0: error: Function is missing a type annotation [no-untyped-def]
|
||||
src/paperless_tika/parsers.py:0: error: Argument 1 to "make_thumbnail_from_pdf" has incompatible type "None"; expected "Path" [arg-type]
|
||||
src/paperless_tika/parsers.py:0: error: Function is missing a return type annotation [no-untyped-def]
|
||||
src/paperless_tika/parsers.py:0: error: Function is missing a type annotation [no-untyped-def]
|
||||
src/paperless_tika/parsers.py:0: error: Function is missing a type annotation [no-untyped-def]
|
||||
src/paperless_tika/parsers.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def]
|
||||
src/paperless_tika/parsers.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def]
|
||||
src/paperless_tika/parsers.py:0: error: Incompatible types in assignment (expression has type "str | None", variable has type "None") [assignment]
|
||||
src/paperless_tika/signals.py:0: error: Function is missing a type annotation [no-untyped-def]
|
||||
src/paperless_tika/signals.py:0: error: Function is missing a type annotation [no-untyped-def]
|
||||
|
||||
@@ -29,7 +29,7 @@ repos:
|
||||
- id: check-case-conflict
|
||||
- id: detect-private-key
|
||||
- repo: https://github.com/codespell-project/codespell
|
||||
rev: v2.4.1
|
||||
rev: v2.4.2
|
||||
hooks:
|
||||
- id: codespell
|
||||
additional_dependencies: [tomli]
|
||||
@@ -46,11 +46,11 @@ repos:
|
||||
- ts
|
||||
- markdown
|
||||
additional_dependencies:
|
||||
- prettier@3.3.3
|
||||
- 'prettier-plugin-organize-imports@4.1.0'
|
||||
- prettier@3.8.1
|
||||
- 'prettier-plugin-organize-imports@4.3.0'
|
||||
# Python hooks
|
||||
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||
rev: v0.15.0
|
||||
rev: v0.15.6
|
||||
hooks:
|
||||
- id: ruff-check
|
||||
- id: ruff-format
|
||||
@@ -65,7 +65,7 @@ repos:
|
||||
- id: hadolint
|
||||
# Shell script hooks
|
||||
- repo: https://github.com/lovesegfault/beautysh
|
||||
rev: v6.4.2
|
||||
rev: v6.4.3
|
||||
hooks:
|
||||
- id: beautysh
|
||||
types: [file]
|
||||
|
||||
@@ -5,14 +5,6 @@ const config = {
|
||||
singleQuote: true,
|
||||
// https://prettier.io/docs/en/options.html#trailing-commas
|
||||
trailingComma: 'es5',
|
||||
overrides: [
|
||||
{
|
||||
files: ['docs/*.md'],
|
||||
options: {
|
||||
tabWidth: 4,
|
||||
},
|
||||
},
|
||||
],
|
||||
plugins: [require('prettier-plugin-organize-imports')],
|
||||
}
|
||||
|
||||
|
||||
@@ -30,7 +30,7 @@ RUN set -eux \
|
||||
# Purpose: Installs s6-overlay and rootfs
|
||||
# Comments:
|
||||
# - Don't leave anything extra in here either
|
||||
FROM ghcr.io/astral-sh/uv:0.10.7-python3.12-trixie-slim AS s6-overlay-base
|
||||
FROM ghcr.io/astral-sh/uv:0.10.9-python3.12-trixie-slim AS s6-overlay-base
|
||||
|
||||
WORKDIR /usr/src/s6
|
||||
|
||||
|
||||
@@ -18,13 +18,13 @@ services:
|
||||
- "--log-level=warn"
|
||||
- "--log-format=text"
|
||||
tika:
|
||||
image: docker.io/apache/tika:latest
|
||||
image: docker.io/apache/tika:3.2.3.0
|
||||
hostname: tika
|
||||
container_name: tika
|
||||
network_mode: host
|
||||
restart: unless-stopped
|
||||
greenmail:
|
||||
image: greenmail/standalone:2.1.8
|
||||
image: docker.io/greenmail/standalone:2.1.8
|
||||
hostname: greenmail
|
||||
container_name: greenmail
|
||||
environment:
|
||||
|
||||
@@ -56,6 +56,7 @@ services:
|
||||
environment:
|
||||
PAPERLESS_REDIS: redis://broker:6379
|
||||
PAPERLESS_DBHOST: db
|
||||
PAPERLESS_DBENGINE: postgres
|
||||
env_file:
|
||||
- stack.env
|
||||
volumes:
|
||||
|
||||
@@ -62,6 +62,7 @@ services:
|
||||
environment:
|
||||
PAPERLESS_REDIS: redis://broker:6379
|
||||
PAPERLESS_DBHOST: db
|
||||
PAPERLESS_DBENGINE: postgresql
|
||||
PAPERLESS_TIKA_ENABLED: 1
|
||||
PAPERLESS_TIKA_GOTENBERG_ENDPOINT: http://gotenberg:3000
|
||||
PAPERLESS_TIKA_ENDPOINT: http://tika:9998
|
||||
|
||||
@@ -56,6 +56,7 @@ services:
|
||||
environment:
|
||||
PAPERLESS_REDIS: redis://broker:6379
|
||||
PAPERLESS_DBHOST: db
|
||||
PAPERLESS_DBENGINE: postgresql
|
||||
volumes:
|
||||
data:
|
||||
media:
|
||||
|
||||
@@ -51,6 +51,7 @@ services:
|
||||
env_file: docker-compose.env
|
||||
environment:
|
||||
PAPERLESS_REDIS: redis://broker:6379
|
||||
PAPERLESS_DBENGINE: sqlite
|
||||
PAPERLESS_TIKA_ENABLED: 1
|
||||
PAPERLESS_TIKA_GOTENBERG_ENDPOINT: http://gotenberg:3000
|
||||
PAPERLESS_TIKA_ENDPOINT: http://tika:9998
|
||||
|
||||
@@ -42,6 +42,7 @@ services:
|
||||
env_file: docker-compose.env
|
||||
environment:
|
||||
PAPERLESS_REDIS: redis://broker:6379
|
||||
PAPERLESS_DBENGINE: sqlite
|
||||
volumes:
|
||||
data:
|
||||
media:
|
||||
|
||||
@@ -10,8 +10,10 @@ cd "${PAPERLESS_SRC_DIR}"
|
||||
|
||||
# The whole migrate, with flock, needs to run as the right user
|
||||
if [[ -n "${USER_IS_NON_ROOT}" ]]; then
|
||||
python3 manage.py check --tag compatibility paperless || exit 1
|
||||
exec s6-setlock -n "${data_dir}/migration_lock" python3 manage.py migrate --skip-checks --no-input
|
||||
else
|
||||
s6-setuidgid paperless python3 manage.py check --tag compatibility paperless || exit 1
|
||||
exec s6-setuidgid paperless \
|
||||
s6-setlock -n "${data_dir}/migration_lock" \
|
||||
python3 manage.py migrate --skip-checks --no-input
|
||||
|
||||
@@ -2,6 +2,17 @@
|
||||
# shellcheck shell=bash
|
||||
declare -r log_prefix="[init-user]"
|
||||
|
||||
# When the container is started as a non-root user (e.g. via `user: 999:999`
|
||||
# in Docker Compose), usermod/groupmod require root and are meaningless.
|
||||
# USERMAP_* variables only apply to the root-started path.
|
||||
if [[ -n "${USER_IS_NON_ROOT}" ]]; then
|
||||
if [[ -n "${USERMAP_UID}" || -n "${USERMAP_GID}" ]]; then
|
||||
echo "${log_prefix} WARNING: USERMAP_UID/USERMAP_GID are set but have no effect when the container is started as a non-root user"
|
||||
fi
|
||||
echo "${log_prefix} Running as non-root user ($(id --user):$(id --group)), skipping UID/GID remapping"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
declare -r usermap_original_uid=$(id -u paperless)
|
||||
declare -r usermap_original_gid=$(id -g paperless)
|
||||
declare -r usermap_new_uid=${USERMAP_UID:-$usermap_original_uid}
|
||||
|
||||
@@ -10,16 +10,16 @@ consuming documents at that time.
|
||||
|
||||
Options available to any installation of paperless:
|
||||
|
||||
- Use the [document exporter](#exporter). The document exporter exports all your documents,
|
||||
thumbnails, metadata, and database contents to a specific folder. You may import your
|
||||
documents and settings into a fresh instance of paperless again or store your
|
||||
documents in another DMS with this export.
|
||||
- Use the [document exporter](#exporter). The document exporter exports all your documents,
|
||||
thumbnails, metadata, and database contents to a specific folder. You may import your
|
||||
documents and settings into a fresh instance of paperless again or store your
|
||||
documents in another DMS with this export.
|
||||
|
||||
The document exporter is also able to update an already existing
|
||||
export. Therefore, incremental backups with `rsync` are entirely
|
||||
possible.
|
||||
The document exporter is also able to update an already existing
|
||||
export. Therefore, incremental backups with `rsync` are entirely
|
||||
possible.
|
||||
|
||||
The exporter does not include API tokens and they will need to be re-generated after importing.
|
||||
The exporter does not include API tokens and they will need to be re-generated after importing.
|
||||
|
||||
!!! caution
|
||||
|
||||
@@ -29,28 +29,27 @@ Options available to any installation of paperless:
|
||||
|
||||
Options available to docker installations:
|
||||
|
||||
- Backup the docker volumes. These usually reside within
|
||||
`/var/lib/docker/volumes` on the host and you need to be root in
|
||||
order to access them.
|
||||
- Backup the docker volumes. These usually reside within
|
||||
`/var/lib/docker/volumes` on the host and you need to be root in
|
||||
order to access them.
|
||||
|
||||
Paperless uses 4 volumes:
|
||||
|
||||
- `paperless_media`: This is where your documents are stored.
|
||||
- `paperless_data`: This is where auxiliary data is stored. This
|
||||
folder also contains the SQLite database, if you use it.
|
||||
- `paperless_pgdata`: Exists only if you use PostgreSQL and
|
||||
contains the database.
|
||||
- `paperless_dbdata`: Exists only if you use MariaDB and contains
|
||||
the database.
|
||||
Paperless uses 4 volumes:
|
||||
- `paperless_media`: This is where your documents are stored.
|
||||
- `paperless_data`: This is where auxiliary data is stored. This
|
||||
folder also contains the SQLite database, if you use it.
|
||||
- `paperless_pgdata`: Exists only if you use PostgreSQL and
|
||||
contains the database.
|
||||
- `paperless_dbdata`: Exists only if you use MariaDB and contains
|
||||
the database.
|
||||
|
||||
Options available to bare-metal and non-docker installations:
|
||||
|
||||
- Backup the entire paperless folder. This ensures that if your
|
||||
paperless instance crashes at some point or your disk fails, you can
|
||||
simply copy the folder back into place and it works.
|
||||
- Backup the entire paperless folder. This ensures that if your
|
||||
paperless instance crashes at some point or your disk fails, you can
|
||||
simply copy the folder back into place and it works.
|
||||
|
||||
When using PostgreSQL or MariaDB, you'll also have to backup the
|
||||
database.
|
||||
When using PostgreSQL or MariaDB, you'll also have to backup the
|
||||
database.
|
||||
|
||||
### Restoring {#migrating-restoring}
|
||||
|
||||
@@ -509,19 +508,19 @@ collection for issues.
|
||||
|
||||
The issues detected by the sanity checker are as follows:
|
||||
|
||||
- Missing original files.
|
||||
- Missing archive files.
|
||||
- Inaccessible original files due to improper permissions.
|
||||
- Inaccessible archive files due to improper permissions.
|
||||
- Corrupted original documents by comparing their checksum against
|
||||
what is stored in the database.
|
||||
- Corrupted archive documents by comparing their checksum against what
|
||||
is stored in the database.
|
||||
- Missing thumbnails.
|
||||
- Inaccessible thumbnails due to improper permissions.
|
||||
- Documents without any content (warning).
|
||||
- Orphaned files in the media directory (warning). These are files
|
||||
that are not referenced by any document in paperless.
|
||||
- Missing original files.
|
||||
- Missing archive files.
|
||||
- Inaccessible original files due to improper permissions.
|
||||
- Inaccessible archive files due to improper permissions.
|
||||
- Corrupted original documents by comparing their checksum against
|
||||
what is stored in the database.
|
||||
- Corrupted archive documents by comparing their checksum against what
|
||||
is stored in the database.
|
||||
- Missing thumbnails.
|
||||
- Inaccessible thumbnails due to improper permissions.
|
||||
- Documents without any content (warning).
|
||||
- Orphaned files in the media directory (warning). These are files
|
||||
that are not referenced by any document in paperless.
|
||||
|
||||
```
|
||||
document_sanity_checker
|
||||
|
||||
@@ -25,20 +25,20 @@ documents.
|
||||
|
||||
The following algorithms are available:
|
||||
|
||||
- **None:** No matching will be performed.
|
||||
- **Any:** Looks for any occurrence of any word provided in match in
|
||||
the PDF. If you define the match as `Bank1 Bank2`, it will match
|
||||
documents containing either of these terms.
|
||||
- **All:** Requires that every word provided appears in the PDF,
|
||||
albeit not in the order provided.
|
||||
- **Exact:** Matches only if the match appears exactly as provided
|
||||
(i.e. preserve ordering) in the PDF.
|
||||
- **Regular expression:** Parses the match as a regular expression and
|
||||
tries to find a match within the document.
|
||||
- **Fuzzy match:** Uses a partial matching based on locating the tag text
|
||||
inside the document, using a [partial ratio](https://rapidfuzz.github.io/RapidFuzz/Usage/fuzz.html#partial-ratio)
|
||||
- **Auto:** Tries to automatically match new documents. This does not
|
||||
require you to set a match. See the [notes below](#automatic-matching).
|
||||
- **None:** No matching will be performed.
|
||||
- **Any:** Looks for any occurrence of any word provided in match in
|
||||
the PDF. If you define the match as `Bank1 Bank2`, it will match
|
||||
documents containing either of these terms.
|
||||
- **All:** Requires that every word provided appears in the PDF,
|
||||
albeit not in the order provided.
|
||||
- **Exact:** Matches only if the match appears exactly as provided
|
||||
(i.e. preserve ordering) in the PDF.
|
||||
- **Regular expression:** Parses the match as a regular expression and
|
||||
tries to find a match within the document.
|
||||
- **Fuzzy match:** Uses a partial matching based on locating the tag text
|
||||
inside the document, using a [partial ratio](https://rapidfuzz.github.io/RapidFuzz/Usage/fuzz.html#partial-ratio)
|
||||
- **Auto:** Tries to automatically match new documents. This does not
|
||||
require you to set a match. See the [notes below](#automatic-matching).
|
||||
|
||||
When using the _any_ or _all_ matching algorithms, you can search for
|
||||
terms that consist of multiple words by enclosing them in double quotes.
|
||||
@@ -69,33 +69,33 @@ Paperless tries to hide much of the involved complexity with this
|
||||
approach. However, there are a couple caveats you need to keep in mind
|
||||
when using this feature:
|
||||
|
||||
- Changes to your documents are not immediately reflected by the
|
||||
matching algorithm. The neural network needs to be _trained_ on your
|
||||
documents after changes. Paperless periodically (default: once each
|
||||
hour) checks for changes and does this automatically for you.
|
||||
- The Auto matching algorithm only takes documents into account which
|
||||
are NOT placed in your inbox (i.e. have any inbox tags assigned to
|
||||
them). This ensures that the neural network only learns from
|
||||
documents which you have correctly tagged before.
|
||||
- The matching algorithm can only work if there is a correlation
|
||||
between the tag, correspondent, document type, or storage path and
|
||||
the document itself. Your bank statements usually contain your bank
|
||||
account number and the name of the bank, so this works reasonably
|
||||
well, However, tags such as "TODO" cannot be automatically
|
||||
assigned.
|
||||
- The matching algorithm needs a reasonable number of documents to
|
||||
identify when to assign tags, correspondents, storage paths, and
|
||||
types. If one out of a thousand documents has the correspondent
|
||||
"Very obscure web shop I bought something five years ago", it will
|
||||
probably not assign this correspondent automatically if you buy
|
||||
something from them again. The more documents, the better.
|
||||
- Paperless also needs a reasonable amount of negative examples to
|
||||
decide when not to assign a certain tag, correspondent, document
|
||||
type, or storage path. This will usually be the case as you start
|
||||
filling up paperless with documents. Example: If all your documents
|
||||
are either from "Webshop" or "Bank", paperless will assign one
|
||||
of these correspondents to ANY new document, if both are set to
|
||||
automatic matching.
|
||||
- Changes to your documents are not immediately reflected by the
|
||||
matching algorithm. The neural network needs to be _trained_ on your
|
||||
documents after changes. Paperless periodically (default: once each
|
||||
hour) checks for changes and does this automatically for you.
|
||||
- The Auto matching algorithm only takes documents into account which
|
||||
are NOT placed in your inbox (i.e. have any inbox tags assigned to
|
||||
them). This ensures that the neural network only learns from
|
||||
documents which you have correctly tagged before.
|
||||
- The matching algorithm can only work if there is a correlation
|
||||
between the tag, correspondent, document type, or storage path and
|
||||
the document itself. Your bank statements usually contain your bank
|
||||
account number and the name of the bank, so this works reasonably
|
||||
well, However, tags such as "TODO" cannot be automatically
|
||||
assigned.
|
||||
- The matching algorithm needs a reasonable number of documents to
|
||||
identify when to assign tags, correspondents, storage paths, and
|
||||
types. If one out of a thousand documents has the correspondent
|
||||
"Very obscure web shop I bought something five years ago", it will
|
||||
probably not assign this correspondent automatically if you buy
|
||||
something from them again. The more documents, the better.
|
||||
- Paperless also needs a reasonable amount of negative examples to
|
||||
decide when not to assign a certain tag, correspondent, document
|
||||
type, or storage path. This will usually be the case as you start
|
||||
filling up paperless with documents. Example: If all your documents
|
||||
are either from "Webshop" or "Bank", paperless will assign one
|
||||
of these correspondents to ANY new document, if both are set to
|
||||
automatic matching.
|
||||
|
||||
## Hooking into the consumption process {#consume-hooks}
|
||||
|
||||
@@ -243,12 +243,12 @@ webserver:
|
||||
|
||||
Troubleshooting:
|
||||
|
||||
- Monitor the Docker Compose log
|
||||
`cd ~/paperless-ngx; docker compose logs -f`
|
||||
- Check your script's permission e.g. in case of permission error
|
||||
`sudo chmod 755 post-consumption-example.sh`
|
||||
- Pipe your scripts's output to a log file e.g.
|
||||
`echo "${DOCUMENT_ID}" | tee --append /usr/src/paperless/scripts/post-consumption-example.log`
|
||||
- Monitor the Docker Compose log
|
||||
`cd ~/paperless-ngx; docker compose logs -f`
|
||||
- Check your script's permission e.g. in case of permission error
|
||||
`sudo chmod 755 post-consumption-example.sh`
|
||||
- Pipe your scripts's output to a log file e.g.
|
||||
`echo "${DOCUMENT_ID}" | tee --append /usr/src/paperless/scripts/post-consumption-example.log`
|
||||
|
||||
## File name handling {#file-name-handling}
|
||||
|
||||
@@ -307,35 +307,35 @@ will create a directory structure as follows:
|
||||
|
||||
Paperless provides the following variables for use within filenames:
|
||||
|
||||
- `{{ asn }}`: The archive serial number of the document, or "none".
|
||||
- `{{ correspondent }}`: The name of the correspondent, or "none".
|
||||
- `{{ document_type }}`: The name of the document type, or "none".
|
||||
- `{{ tag_list }}`: A comma separated list of all tags assigned to the
|
||||
document.
|
||||
- `{{ title }}`: The title of the document.
|
||||
- `{{ created }}`: The full date (ISO 8601 format, e.g. `2024-03-14`) the document was created.
|
||||
- `{{ created_year }}`: Year created only, formatted as the year with
|
||||
century.
|
||||
- `{{ created_year_short }}`: Year created only, formatted as the year
|
||||
without century, zero padded.
|
||||
- `{{ created_month }}`: Month created only (number 01-12).
|
||||
- `{{ created_month_name }}`: Month created name, as per locale
|
||||
- `{{ created_month_name_short }}`: Month created abbreviated name, as per
|
||||
locale
|
||||
- `{{ created_day }}`: Day created only (number 01-31).
|
||||
- `{{ added }}`: The full date (ISO format) the document was added to
|
||||
paperless.
|
||||
- `{{ added_year }}`: Year added only.
|
||||
- `{{ added_year_short }}`: Year added only, formatted as the year without
|
||||
century, zero padded.
|
||||
- `{{ added_month }}`: Month added only (number 01-12).
|
||||
- `{{ added_month_name }}`: Month added name, as per locale
|
||||
- `{{ added_month_name_short }}`: Month added abbreviated name, as per
|
||||
locale
|
||||
- `{{ added_day }}`: Day added only (number 01-31).
|
||||
- `{{ owner_username }}`: Username of document owner, if any, or "none"
|
||||
- `{{ original_name }}`: Document original filename, minus the extension, if any, or "none"
|
||||
- `{{ doc_pk }}`: The paperless identifier (primary key) for the document.
|
||||
- `{{ asn }}`: The archive serial number of the document, or "none".
|
||||
- `{{ correspondent }}`: The name of the correspondent, or "none".
|
||||
- `{{ document_type }}`: The name of the document type, or "none".
|
||||
- `{{ tag_list }}`: A comma separated list of all tags assigned to the
|
||||
document.
|
||||
- `{{ title }}`: The title of the document.
|
||||
- `{{ created }}`: The full date (ISO 8601 format, e.g. `2024-03-14`) the document was created.
|
||||
- `{{ created_year }}`: Year created only, formatted as the year with
|
||||
century.
|
||||
- `{{ created_year_short }}`: Year created only, formatted as the year
|
||||
without century, zero padded.
|
||||
- `{{ created_month }}`: Month created only (number 01-12).
|
||||
- `{{ created_month_name }}`: Month created name, as per locale
|
||||
- `{{ created_month_name_short }}`: Month created abbreviated name, as per
|
||||
locale
|
||||
- `{{ created_day }}`: Day created only (number 01-31).
|
||||
- `{{ added }}`: The full date (ISO format) the document was added to
|
||||
paperless.
|
||||
- `{{ added_year }}`: Year added only.
|
||||
- `{{ added_year_short }}`: Year added only, formatted as the year without
|
||||
century, zero padded.
|
||||
- `{{ added_month }}`: Month added only (number 01-12).
|
||||
- `{{ added_month_name }}`: Month added name, as per locale
|
||||
- `{{ added_month_name_short }}`: Month added abbreviated name, as per
|
||||
locale
|
||||
- `{{ added_day }}`: Day added only (number 01-31).
|
||||
- `{{ owner_username }}`: Username of document owner, if any, or "none"
|
||||
- `{{ original_name }}`: Document original filename, minus the extension, if any, or "none"
|
||||
- `{{ doc_pk }}`: The paperless identifier (primary key) for the document.
|
||||
|
||||
!!! warning
|
||||
|
||||
@@ -388,10 +388,10 @@ before empty placeholders are removed as well, empty directories are omitted.
|
||||
When a single storage layout is not sufficient for your use case, storage paths allow for more complex
|
||||
structure to set precisely where each document is stored in the file system.
|
||||
|
||||
- Each storage path is a [`PAPERLESS_FILENAME_FORMAT`](configuration.md#PAPERLESS_FILENAME_FORMAT) and
|
||||
follows the rules described above
|
||||
- Each document is assigned a storage path using the matching algorithms described above, but can be
|
||||
overwritten at any time
|
||||
- Each storage path is a [`PAPERLESS_FILENAME_FORMAT`](configuration.md#PAPERLESS_FILENAME_FORMAT) and
|
||||
follows the rules described above
|
||||
- Each document is assigned a storage path using the matching algorithms described above, but can be
|
||||
overwritten at any time
|
||||
|
||||
For example, you could define the following two storage paths:
|
||||
|
||||
@@ -457,13 +457,13 @@ The `get_cf_value` filter retrieves a value from custom field data with optional
|
||||
|
||||
###### Parameters
|
||||
|
||||
- `custom_fields`: This _must_ be the provided custom field data
|
||||
- `name` (str): Name of the custom field to retrieve
|
||||
- `default` (str, optional): Default value to return if field is not found or has no value
|
||||
- `custom_fields`: This _must_ be the provided custom field data
|
||||
- `name` (str): Name of the custom field to retrieve
|
||||
- `default` (str, optional): Default value to return if field is not found or has no value
|
||||
|
||||
###### Returns
|
||||
|
||||
- `str | None`: The field value, default value, or `None` if neither exists
|
||||
- `str | None`: The field value, default value, or `None` if neither exists
|
||||
|
||||
###### Examples
|
||||
|
||||
@@ -487,12 +487,12 @@ The `datetime` filter formats a datetime string or datetime object using Python'
|
||||
|
||||
###### Parameters
|
||||
|
||||
- `value` (str | datetime): Date/time value to format (strings will be parsed automatically)
|
||||
- `format` (str): Python strftime format string
|
||||
- `value` (str | datetime): Date/time value to format (strings will be parsed automatically)
|
||||
- `format` (str): Python strftime format string
|
||||
|
||||
###### Returns
|
||||
|
||||
- `str`: Formatted datetime string
|
||||
- `str`: Formatted datetime string
|
||||
|
||||
###### Examples
|
||||
|
||||
@@ -525,13 +525,13 @@ An ISO string can also be provided to control the output format.
|
||||
|
||||
###### Parameters
|
||||
|
||||
- `value` (date | datetime | str): Date, datetime object or ISO string to format (datetime should be timezone-aware)
|
||||
- `format` (str): Format type - either a Babel preset ('short', 'medium', 'long', 'full') or custom pattern
|
||||
- `locale` (str): Locale code for localization (e.g., 'en_US', 'fr_FR', 'de_DE')
|
||||
- `value` (date | datetime | str): Date, datetime object or ISO string to format (datetime should be timezone-aware)
|
||||
- `format` (str): Format type - either a Babel preset ('short', 'medium', 'long', 'full') or custom pattern
|
||||
- `locale` (str): Locale code for localization (e.g., 'en_US', 'fr_FR', 'de_DE')
|
||||
|
||||
###### Returns
|
||||
|
||||
- `str`: Localized, formatted date string
|
||||
- `str`: Localized, formatted date string
|
||||
|
||||
###### Examples
|
||||
|
||||
@@ -565,15 +565,15 @@ See the [supported format codes](https://unicode.org/reports/tr35/tr35-dates.htm
|
||||
|
||||
### Format Presets
|
||||
|
||||
- **short**: Abbreviated format (e.g., "1/15/24")
|
||||
- **medium**: Medium-length format (e.g., "Jan 15, 2024")
|
||||
- **long**: Long format with full month name (e.g., "January 15, 2024")
|
||||
- **full**: Full format including day of week (e.g., "Monday, January 15, 2024")
|
||||
- **short**: Abbreviated format (e.g., "1/15/24")
|
||||
- **medium**: Medium-length format (e.g., "Jan 15, 2024")
|
||||
- **long**: Long format with full month name (e.g., "January 15, 2024")
|
||||
- **full**: Full format including day of week (e.g., "Monday, January 15, 2024")
|
||||
|
||||
#### Additional Variables
|
||||
|
||||
- `{{ tag_name_list }}`: A list of tag names applied to the document, ordered by the tag name. Note this is a list, not a single string
|
||||
- `{{ custom_fields }}`: A mapping of custom field names to their type and value. A user can access the mapping by field name or check if a field is applied by checking its existence in the variable.
|
||||
- `{{ tag_name_list }}`: A list of tag names applied to the document, ordered by the tag name. Note this is a list, not a single string
|
||||
- `{{ custom_fields }}`: A mapping of custom field names to their type and value. A user can access the mapping by field name or check if a field is applied by checking its existence in the variable.
|
||||
|
||||
!!! tip
|
||||
|
||||
@@ -675,15 +675,15 @@ installation, you can use volumes to accomplish this:
|
||||
|
||||
```yaml
|
||||
services:
|
||||
# ...
|
||||
webserver:
|
||||
environment:
|
||||
- PAPERLESS_ENABLE_FLOWER
|
||||
ports:
|
||||
- 5555:5555 # (2)!
|
||||
# ...
|
||||
webserver:
|
||||
environment:
|
||||
- PAPERLESS_ENABLE_FLOWER
|
||||
ports:
|
||||
- 5555:5555 # (2)!
|
||||
# ...
|
||||
volumes:
|
||||
- /path/to/my/flowerconfig.py:/usr/src/paperless/src/paperless/flowerconfig.py:ro # (1)!
|
||||
volumes:
|
||||
- /path/to/my/flowerconfig.py:/usr/src/paperless/src/paperless/flowerconfig.py:ro # (1)!
|
||||
```
|
||||
|
||||
1. Note the `:ro` tag means the file will be mounted as read only.
|
||||
@@ -714,15 +714,90 @@ For example, using Docker Compose:
|
||||
|
||||
```yaml
|
||||
services:
|
||||
# ...
|
||||
webserver:
|
||||
# ...
|
||||
webserver:
|
||||
# ...
|
||||
volumes:
|
||||
- /path/to/my/scripts:/custom-cont-init.d:ro # (1)!
|
||||
volumes:
|
||||
- /path/to/my/scripts:/custom-cont-init.d:ro # (1)!
|
||||
```
|
||||
|
||||
1. Note the `:ro` tag means the folder will be mounted as read only. This is for extra security against changes
|
||||
|
||||
## Installing third-party parser plugins {#parser-plugins}
|
||||
|
||||
Third-party parser plugins extend Paperless-ngx to support additional file
|
||||
formats. A plugin is a Python package that advertises itself under the
|
||||
`paperless_ngx.parsers` entry point group. Refer to the
|
||||
[developer documentation](development.md#making-custom-parsers) for how to
|
||||
create one.
|
||||
|
||||
!!! warning "Third-party plugins are not officially supported"
|
||||
|
||||
The Paperless-ngx maintainers do not provide support for third-party
|
||||
plugins. Issues caused by or requiring changes to a third-party plugin
|
||||
will be closed without further investigation. Always reproduce problems
|
||||
with all plugins removed before filing a bug report.
|
||||
|
||||
### Docker
|
||||
|
||||
Use a [custom container initialization script](#custom-container-initialization)
|
||||
to install the package before the webserver starts. Create a shell script and
|
||||
mount it into `/custom-cont-init.d`:
|
||||
|
||||
```bash
|
||||
#!/bin/bash
|
||||
# /path/to/my/scripts/install-parsers.sh
|
||||
|
||||
pip install my-paperless-parser-package
|
||||
```
|
||||
|
||||
Mount it in your `docker-compose.yml`:
|
||||
|
||||
```yaml
|
||||
services:
|
||||
webserver:
|
||||
# ...
|
||||
volumes:
|
||||
- /path/to/my/scripts:/custom-cont-init.d:ro
|
||||
```
|
||||
|
||||
The script runs as `root` before the webserver starts, so the package will be
|
||||
available when Paperless-ngx discovers plugins at startup.
|
||||
|
||||
### Bare metal
|
||||
|
||||
Install the package into the same Python environment that runs Paperless-ngx.
|
||||
If you followed the standard bare-metal install guide, that is the `paperless`
|
||||
user's environment:
|
||||
|
||||
```bash
|
||||
sudo -Hu paperless pip3 install my-paperless-parser-package
|
||||
```
|
||||
|
||||
If you are using `uv` or a virtual environment, activate it first and then run:
|
||||
|
||||
```bash
|
||||
uv pip install my-paperless-parser-package
|
||||
# or
|
||||
pip install my-paperless-parser-package
|
||||
```
|
||||
|
||||
Restart all Paperless-ngx services after installation so the new plugin is
|
||||
discovered.
|
||||
|
||||
### Verifying installation
|
||||
|
||||
On the next startup, check the application logs for a line confirming
|
||||
discovery:
|
||||
|
||||
```
|
||||
Loaded third-party parser 'My Parser' v1.0.0 by Acme Corp (entrypoint: 'my_parser').
|
||||
```
|
||||
|
||||
If this line does not appear, verify that the package is installed in the
|
||||
correct environment and that its `pyproject.toml` declares the
|
||||
`paperless_ngx.parsers` entry point.
|
||||
|
||||
## MySQL Caveats {#mysql-caveats}
|
||||
|
||||
### Case Sensitivity
|
||||
@@ -771,16 +846,16 @@ Paperless is able to utilize barcodes for automatically performing some tasks.
|
||||
|
||||
At this time, the library utilized for detection of barcodes supports the following types:
|
||||
|
||||
- AN-13/UPC-A
|
||||
- UPC-E
|
||||
- EAN-8
|
||||
- Code 128
|
||||
- Code 93
|
||||
- Code 39
|
||||
- Codabar
|
||||
- Interleaved 2 of 5
|
||||
- QR Code
|
||||
- SQ Code
|
||||
- AN-13/UPC-A
|
||||
- UPC-E
|
||||
- EAN-8
|
||||
- Code 128
|
||||
- Code 93
|
||||
- Code 39
|
||||
- Codabar
|
||||
- Interleaved 2 of 5
|
||||
- QR Code
|
||||
- SQ Code
|
||||
|
||||
For usage in Paperless, the type of barcode does not matter, only the contents of it.
|
||||
|
||||
@@ -793,8 +868,8 @@ below.
|
||||
If document splitting is enabled, Paperless splits _after_ a separator barcode by default.
|
||||
This means:
|
||||
|
||||
- any page containing the configured separator barcode starts a new document, starting with the **next** page
|
||||
- pages containing the separator barcode are discarded
|
||||
- any page containing the configured separator barcode starts a new document, starting with the **next** page
|
||||
- pages containing the separator barcode are discarded
|
||||
|
||||
This is intended for dedicated separator sheets such as PATCH-T pages.
|
||||
|
||||
@@ -831,10 +906,10 @@ to `true`.
|
||||
When enabled, documents will be split at pages containing tag barcodes, similar to how
|
||||
ASN barcodes work. Key features:
|
||||
|
||||
- The page with the tag barcode is **retained** in the resulting document
|
||||
- **Each split document extracts its own tags** - only tags on pages within that document are assigned
|
||||
- Multiple tag barcodes can trigger multiple splits in the same document
|
||||
- Works seamlessly with ASN barcodes - each split document gets its own ASN and tags
|
||||
- The page with the tag barcode is **retained** in the resulting document
|
||||
- **Each split document extracts its own tags** - only tags on pages within that document are assigned
|
||||
- Multiple tag barcodes can trigger multiple splits in the same document
|
||||
- Works seamlessly with ASN barcodes - each split document gets its own ASN and tags
|
||||
|
||||
This is useful for batch scanning where you place tag barcode pages between different
|
||||
documents to both separate and categorize them in a single operation.
|
||||
@@ -996,9 +1071,9 @@ If using docker, you'll need to add the following volume mounts to your `docker-
|
||||
|
||||
```yaml
|
||||
webserver:
|
||||
volumes:
|
||||
- /home/user/.gnupg/pubring.gpg:/usr/src/paperless/.gnupg/pubring.gpg
|
||||
- <path to gpg-agent socket>:/usr/src/paperless/.gnupg/S.gpg-agent
|
||||
volumes:
|
||||
- /home/user/.gnupg/pubring.gpg:/usr/src/paperless/.gnupg/pubring.gpg
|
||||
- <path to gpg-agent socket>:/usr/src/paperless/.gnupg/S.gpg-agent
|
||||
```
|
||||
|
||||
For a 'bare-metal' installation no further configuration is necessary. If you
|
||||
@@ -1006,9 +1081,9 @@ want to use a separate `GNUPG_HOME`, you can do so by configuring the [PAPERLESS
|
||||
|
||||
### Troubleshooting
|
||||
|
||||
- Make sure, that `gpg-agent` is running on your host machine
|
||||
- Make sure, that encryption and decryption works from inside the container using the `gpg` commands from above.
|
||||
- Check that all files in `/usr/src/paperless/.gnupg` have correct permissions
|
||||
- Make sure, that `gpg-agent` is running on your host machine
|
||||
- Make sure, that encryption and decryption works from inside the container using the `gpg` commands from above.
|
||||
- Check that all files in `/usr/src/paperless/.gnupg` have correct permissions
|
||||
|
||||
```shell
|
||||
paperless@9da1865df327:~/.gnupg$ ls -al
|
||||
|
||||
270
docs/api.md
270
docs/api.md
@@ -66,10 +66,10 @@ Full text searching is available on the `/api/documents/` endpoint. Two
|
||||
specific query parameters cause the API to return full text search
|
||||
results:
|
||||
|
||||
- `/api/documents/?query=your%20search%20query`: Search for a document
|
||||
using a full text query. For details on the syntax, see [Basic Usage - Searching](usage.md#basic-usage_searching).
|
||||
- `/api/documents/?more_like_id=1234`: Search for documents similar to
|
||||
the document with id 1234.
|
||||
- `/api/documents/?query=your%20search%20query`: Search for a document
|
||||
using a full text query. For details on the syntax, see [Basic Usage - Searching](usage.md#basic-usage_searching).
|
||||
- `/api/documents/?more_like_id=1234`: Search for documents similar to
|
||||
the document with id 1234.
|
||||
|
||||
Pagination works exactly the same as it does for normal requests on this
|
||||
endpoint.
|
||||
@@ -106,12 +106,12 @@ attribute with various information about the search results:
|
||||
}
|
||||
```
|
||||
|
||||
- `score` is an indication how well this document matches the query
|
||||
relative to the other search results.
|
||||
- `highlights` is an excerpt from the document content and highlights
|
||||
the search terms with `<span>` tags as shown above.
|
||||
- `rank` is the index of the search results. The first result will
|
||||
have rank 0.
|
||||
- `score` is an indication how well this document matches the query
|
||||
relative to the other search results.
|
||||
- `highlights` is an excerpt from the document content and highlights
|
||||
the search terms with `<span>` tags as shown above.
|
||||
- `rank` is the index of the search results. The first result will
|
||||
have rank 0.
|
||||
|
||||
### Filtering by custom fields
|
||||
|
||||
@@ -122,33 +122,33 @@ use cases:
|
||||
1. Documents with a custom field "due" (date) between Aug 1, 2024 and
|
||||
Sept 1, 2024 (inclusive):
|
||||
|
||||
`?custom_field_query=["due", "range", ["2024-08-01", "2024-09-01"]]`
|
||||
`?custom_field_query=["due", "range", ["2024-08-01", "2024-09-01"]]`
|
||||
|
||||
2. Documents with a custom field "customer" (text) that equals "bob"
|
||||
(case sensitive):
|
||||
|
||||
`?custom_field_query=["customer", "exact", "bob"]`
|
||||
`?custom_field_query=["customer", "exact", "bob"]`
|
||||
|
||||
3. Documents with a custom field "answered" (boolean) set to `true`:
|
||||
|
||||
`?custom_field_query=["answered", "exact", true]`
|
||||
`?custom_field_query=["answered", "exact", true]`
|
||||
|
||||
4. Documents with a custom field "favorite animal" (select) set to either
|
||||
"cat" or "dog":
|
||||
|
||||
`?custom_field_query=["favorite animal", "in", ["cat", "dog"]]`
|
||||
`?custom_field_query=["favorite animal", "in", ["cat", "dog"]]`
|
||||
|
||||
5. Documents with a custom field "address" (text) that is empty:
|
||||
|
||||
`?custom_field_query=["OR", [["address", "isnull", true], ["address", "exact", ""]]]`
|
||||
`?custom_field_query=["OR", [["address", "isnull", true], ["address", "exact", ""]]]`
|
||||
|
||||
6. Documents that don't have a field called "foo":
|
||||
|
||||
`?custom_field_query=["foo", "exists", false]`
|
||||
`?custom_field_query=["foo", "exists", false]`
|
||||
|
||||
7. Documents that have document links "references" to both document 3 and 7:
|
||||
|
||||
`?custom_field_query=["references", "contains", [3, 7]]`
|
||||
`?custom_field_query=["references", "contains", [3, 7]]`
|
||||
|
||||
All field types support basic operations including `exact`, `in`, `isnull`,
|
||||
and `exists`. String, URL, and monetary fields support case-insensitive
|
||||
@@ -164,8 +164,8 @@ Get auto completions for a partial search term.
|
||||
|
||||
Query parameters:
|
||||
|
||||
- `term`: The incomplete term.
|
||||
- `limit`: Amount of results. Defaults to 10.
|
||||
- `term`: The incomplete term.
|
||||
- `limit`: Amount of results. Defaults to 10.
|
||||
|
||||
Results returned by the endpoint are ordered by importance of the term
|
||||
in the document index. The first result is the term that has the highest
|
||||
@@ -189,19 +189,19 @@ from there.
|
||||
|
||||
The endpoint supports the following optional form fields:
|
||||
|
||||
- `title`: Specify a title that the consumer should use for the
|
||||
document.
|
||||
- `created`: Specify a DateTime where the document was created (e.g.
|
||||
"2016-04-19" or "2016-04-19 06:15:00+02:00").
|
||||
- `correspondent`: Specify the ID of a correspondent that the consumer
|
||||
should use for the document.
|
||||
- `document_type`: Similar to correspondent.
|
||||
- `storage_path`: Similar to correspondent.
|
||||
- `tags`: Similar to correspondent. Specify this multiple times to
|
||||
have multiple tags added to the document.
|
||||
- `archive_serial_number`: An optional archive serial number to set.
|
||||
- `custom_fields`: Either an array of custom field ids to assign (with an empty
|
||||
value) to the document or an object mapping field id -> value.
|
||||
- `title`: Specify a title that the consumer should use for the
|
||||
document.
|
||||
- `created`: Specify a DateTime where the document was created (e.g.
|
||||
"2016-04-19" or "2016-04-19 06:15:00+02:00").
|
||||
- `correspondent`: Specify the ID of a correspondent that the consumer
|
||||
should use for the document.
|
||||
- `document_type`: Similar to correspondent.
|
||||
- `storage_path`: Similar to correspondent.
|
||||
- `tags`: Similar to correspondent. Specify this multiple times to
|
||||
have multiple tags added to the document.
|
||||
- `archive_serial_number`: An optional archive serial number to set.
|
||||
- `custom_fields`: Either an array of custom field ids to assign (with an empty
|
||||
value) to the document or an object mapping field id -> value.
|
||||
|
||||
The endpoint will immediately return HTTP 200 if the document consumption
|
||||
process was started successfully, with the UUID of the consumption task
|
||||
@@ -215,16 +215,16 @@ consumption including the ID of a created document if consumption succeeded.
|
||||
|
||||
Document versions are file-level versions linked to one root document.
|
||||
|
||||
- Root document metadata (title, tags, correspondent, document type, storage path, custom fields, permissions) remains shared.
|
||||
- Version-specific file data (file, mime type, checksums, archive info, extracted text content) belongs to the selected/latest version.
|
||||
- Root document metadata (title, tags, correspondent, document type, storage path, custom fields, permissions) remains shared.
|
||||
- Version-specific file data (file, mime type, checksums, archive info, extracted text content) belongs to the selected/latest version.
|
||||
|
||||
Version-aware endpoints:
|
||||
|
||||
- `GET /api/documents/{id}/`: returns root document data; `content` resolves to latest version content by default. Use `?version={version_id}` to resolve content for a specific version.
|
||||
- `PATCH /api/documents/{id}/`: content updates target the selected version (`?version={version_id}`) or latest version by default; non-content metadata updates target the root document.
|
||||
- `GET /api/documents/{id}/download/`, `GET /api/documents/{id}/preview/`, `GET /api/documents/{id}/thumb/`, `GET /api/documents/{id}/metadata/`: accept `?version={version_id}`.
|
||||
- `POST /api/documents/{id}/update_version/`: uploads a new version using multipart form field `document` and optional `version_label`.
|
||||
- `DELETE /api/documents/{root_id}/versions/{version_id}/`: deletes a non-root version.
|
||||
- `GET /api/documents/{id}/`: returns root document data; `content` resolves to latest version content by default. Use `?version={version_id}` to resolve content for a specific version.
|
||||
- `PATCH /api/documents/{id}/`: content updates target the selected version (`?version={version_id}`) or latest version by default; non-content metadata updates target the root document.
|
||||
- `GET /api/documents/{id}/download/`, `GET /api/documents/{id}/preview/`, `GET /api/documents/{id}/thumb/`, `GET /api/documents/{id}/metadata/`: accept `?version={version_id}`.
|
||||
- `POST /api/documents/{id}/update_version/`: uploads a new version using multipart form field `document` and optional `version_label`.
|
||||
- `DELETE /api/documents/{root_id}/versions/{version_id}/`: deletes a non-root version.
|
||||
|
||||
## Permissions
|
||||
|
||||
@@ -282,74 +282,38 @@ a json payload of the format:
|
||||
|
||||
The following methods are supported:
|
||||
|
||||
- `set_correspondent`
|
||||
- Requires `parameters`: `{ "correspondent": CORRESPONDENT_ID }`
|
||||
- `set_document_type`
|
||||
- Requires `parameters`: `{ "document_type": DOCUMENT_TYPE_ID }`
|
||||
- `set_storage_path`
|
||||
- Requires `parameters`: `{ "storage_path": STORAGE_PATH_ID }`
|
||||
- `add_tag`
|
||||
- Requires `parameters`: `{ "tag": TAG_ID }`
|
||||
- `remove_tag`
|
||||
- Requires `parameters`: `{ "tag": TAG_ID }`
|
||||
- `modify_tags`
|
||||
- Requires `parameters`: `{ "add_tags": [LIST_OF_TAG_IDS] }` and `{ "remove_tags": [LIST_OF_TAG_IDS] }`
|
||||
- `delete`
|
||||
- No `parameters` required
|
||||
- `reprocess`
|
||||
- No `parameters` required
|
||||
- `set_permissions`
|
||||
- Requires `parameters`:
|
||||
- `"set_permissions": PERMISSIONS_OBJ` (see format [above](#permissions)) and / or
|
||||
- `"owner": OWNER_ID or null`
|
||||
- `"merge": true or false` (defaults to false)
|
||||
- The `merge` flag determines if the supplied permissions will overwrite all existing permissions (including
|
||||
removing them) or be merged with existing permissions.
|
||||
- `edit_pdf`
|
||||
- Requires `parameters`:
|
||||
- `"doc_ids": [DOCUMENT_ID]` A list of a single document ID to edit.
|
||||
- `"operations": [OPERATION, ...]` A list of operations to perform on the documents. Each operation is a dictionary
|
||||
with the following keys:
|
||||
- `"page": PAGE_NUMBER` The page number to edit (1-based).
|
||||
- `"rotate": DEGREES` Optional rotation in degrees (90, 180, 270).
|
||||
- `"doc": OUTPUT_DOCUMENT_INDEX` Optional index of the output document for split operations.
|
||||
- Optional `parameters`:
|
||||
- `"delete_original": true` to delete the original documents after editing.
|
||||
- `"update_document": true` to add the edited PDF as a new version of the root document.
|
||||
- `"include_metadata": true` to copy metadata from the original document to the edited document.
|
||||
- `remove_password`
|
||||
- Requires `parameters`:
|
||||
- `"password": "PASSWORD_STRING"` The password to remove from the PDF documents.
|
||||
- Optional `parameters`:
|
||||
- `"update_document": true` to add the password-less PDF as a new version of the root document.
|
||||
- `"delete_original": true` to delete the original document after editing.
|
||||
- `"include_metadata": true` to copy metadata from the original document to the new password-less document.
|
||||
- `merge`
|
||||
- No additional `parameters` required.
|
||||
- The ordering of the merged document is determined by the list of IDs.
|
||||
- Optional `parameters`:
|
||||
- `"metadata_document_id": DOC_ID` apply metadata (tags, correspondent, etc.) from this document to the merged document.
|
||||
- `"delete_originals": true` to delete the original documents. This requires the calling user being the owner of
|
||||
all documents that are merged.
|
||||
- `split`
|
||||
- Requires `parameters`:
|
||||
- `"pages": [..]` The list should be a list of pages and/or a ranges, separated by commas e.g. `"[1,2-3,4,5-7]"`
|
||||
- Optional `parameters`:
|
||||
- `"delete_originals": true` to delete the original document after consumption. This requires the calling user being the owner of
|
||||
the document.
|
||||
- The split operation only accepts a single document.
|
||||
- `rotate`
|
||||
- Requires `parameters`:
|
||||
- `"degrees": DEGREES`. Must be an integer i.e. 90, 180, 270
|
||||
- `delete_pages`
|
||||
- Requires `parameters`:
|
||||
- `"pages": [..]` The list should be a list of integers e.g. `"[2,3,4]"`
|
||||
- The delete_pages operation only accepts a single document.
|
||||
- `modify_custom_fields`
|
||||
- Requires `parameters`:
|
||||
- `"add_custom_fields": { CUSTOM_FIELD_ID: VALUE }`: JSON object consisting of custom field id:value pairs to add to the document, can also be a list of custom field IDs
|
||||
to add with empty values.
|
||||
- `"remove_custom_fields": [CUSTOM_FIELD_ID]`: custom field ids to remove from the document.
|
||||
- `set_correspondent`
|
||||
- Requires `parameters`: `{ "correspondent": CORRESPONDENT_ID }`
|
||||
- `set_document_type`
|
||||
- Requires `parameters`: `{ "document_type": DOCUMENT_TYPE_ID }`
|
||||
- `set_storage_path`
|
||||
- Requires `parameters`: `{ "storage_path": STORAGE_PATH_ID }`
|
||||
- `add_tag`
|
||||
- Requires `parameters`: `{ "tag": TAG_ID }`
|
||||
- `remove_tag`
|
||||
- Requires `parameters`: `{ "tag": TAG_ID }`
|
||||
- `modify_tags`
|
||||
- Requires `parameters`: `{ "add_tags": [LIST_OF_TAG_IDS] }` and `{ "remove_tags": [LIST_OF_TAG_IDS] }`
|
||||
- `delete`
|
||||
- No `parameters` required
|
||||
- `reprocess`
|
||||
- No `parameters` required
|
||||
- `set_permissions`
|
||||
- Requires `parameters`:
|
||||
- `"set_permissions": PERMISSIONS_OBJ` (see format [above](#permissions)) and / or
|
||||
- `"owner": OWNER_ID or null`
|
||||
- `"merge": true or false` (defaults to false)
|
||||
- The `merge` flag determines if the supplied permissions will overwrite all existing permissions (including
|
||||
removing them) or be merged with existing permissions.
|
||||
- `modify_custom_fields`
|
||||
- Requires `parameters`:
|
||||
- `"add_custom_fields": { CUSTOM_FIELD_ID: VALUE }`: JSON object consisting of custom field id:value pairs to add to the document, can also be a list of custom field IDs
|
||||
to add with empty values.
|
||||
- `"remove_custom_fields": [CUSTOM_FIELD_ID]`: custom field ids to remove from the document.
|
||||
|
||||
#### Document-editing operations
|
||||
|
||||
Beginning with version 10+, the API supports individual endpoints for document-editing operations (`merge`, `rotate`, `edit_pdf`, etc), thus their documentation can be found in the API spec / viewer. Legacy document-editing methods via `/api/documents/bulk_edit/` are still supported for compatibility, are deprecated and clients should migrate to the individual endpoints before they are removed in a future version.
|
||||
|
||||
### Objects
|
||||
|
||||
@@ -369,41 +333,38 @@ operations, using the endpoint: `/api/bulk_edit_objects/`, which requires a json
|
||||
|
||||
## API Versioning
|
||||
|
||||
The REST API is versioned since Paperless-ngx 1.3.0.
|
||||
The REST API is versioned.
|
||||
|
||||
- Versioning ensures that changes to the API don't break older
|
||||
clients.
|
||||
- Clients specify the specific version of the API they wish to use
|
||||
with every request and Paperless will handle the request using the
|
||||
specified API version.
|
||||
- Even if the underlying data model changes, older API versions will
|
||||
always serve compatible data.
|
||||
- If no version is specified, Paperless will serve version 1 to ensure
|
||||
compatibility with older clients that do not request a specific API
|
||||
version.
|
||||
- Versioning ensures that changes to the API don't break older
|
||||
clients.
|
||||
- Clients specify the specific version of the API they wish to use
|
||||
with every request and Paperless will handle the request using the
|
||||
specified API version.
|
||||
- Even if the underlying data model changes, supported older API
|
||||
versions continue to serve compatible data.
|
||||
- If no version is specified, Paperless serves the configured default
|
||||
API version (currently `10`).
|
||||
- Supported API versions are currently `9` and `10`.
|
||||
|
||||
API versions are specified by submitting an additional HTTP `Accept`
|
||||
header with every request:
|
||||
|
||||
```
|
||||
Accept: application/json; version=6
|
||||
Accept: application/json; version=10
|
||||
```
|
||||
|
||||
If an invalid version is specified, Paperless 1.3.0 will respond with
|
||||
"406 Not Acceptable" and an error message in the body. Earlier
|
||||
versions of Paperless will serve API version 1 regardless of whether a
|
||||
version is specified via the `Accept` header.
|
||||
If an invalid version is specified, Paperless responds with
|
||||
`406 Not Acceptable` and an error message in the body.
|
||||
|
||||
If a client wishes to verify whether it is compatible with any given
|
||||
server, the following procedure should be performed:
|
||||
|
||||
1. Perform an _authenticated_ request against any API endpoint. If the
|
||||
server is on version 1.3.0 or newer, the server will add two custom
|
||||
headers to the response:
|
||||
1. Perform an _authenticated_ request against any API endpoint. The
|
||||
server will add two custom headers to the response:
|
||||
|
||||
```
|
||||
X-Api-Version: 2
|
||||
X-Version: 1.3.0
|
||||
X-Api-Version: 10
|
||||
X-Version: <server-version>
|
||||
```
|
||||
|
||||
2. Determine whether the client is compatible with this server based on
|
||||
@@ -423,51 +384,56 @@ Initial API version.
|
||||
|
||||
#### Version 2
|
||||
|
||||
- Added field `Tag.color`. This read/write string field contains a hex
|
||||
color such as `#a6cee3`.
|
||||
- Added read-only field `Tag.text_color`. This field contains the text
|
||||
color to use for a specific tag, which is either black or white
|
||||
depending on the brightness of `Tag.color`.
|
||||
- Removed field `Tag.colour`.
|
||||
- Added field `Tag.color`. This read/write string field contains a hex
|
||||
color such as `#a6cee3`.
|
||||
- Added read-only field `Tag.text_color`. This field contains the text
|
||||
color to use for a specific tag, which is either black or white
|
||||
depending on the brightness of `Tag.color`.
|
||||
- Removed field `Tag.colour`.
|
||||
|
||||
#### Version 3
|
||||
|
||||
- Permissions endpoints have been added.
|
||||
- The format of the `/api/ui_settings/` has changed.
|
||||
- Permissions endpoints have been added.
|
||||
- The format of the `/api/ui_settings/` has changed.
|
||||
|
||||
#### Version 4
|
||||
|
||||
- Consumption templates were refactored to workflows and API endpoints
|
||||
changed as such.
|
||||
- Consumption templates were refactored to workflows and API endpoints
|
||||
changed as such.
|
||||
|
||||
#### Version 5
|
||||
|
||||
- Added bulk deletion methods for documents and objects.
|
||||
- Added bulk deletion methods for documents and objects.
|
||||
|
||||
#### Version 6
|
||||
|
||||
- Moved acknowledge tasks endpoint to be under `/api/tasks/acknowledge/`.
|
||||
- Moved acknowledge tasks endpoint to be under `/api/tasks/acknowledge/`.
|
||||
|
||||
#### Version 7
|
||||
|
||||
- The format of select type custom fields has changed to return the options
|
||||
as an array of objects with `id` and `label` fields as opposed to a simple
|
||||
list of strings. When creating or updating a custom field value of a
|
||||
document for a select type custom field, the value should be the `id` of
|
||||
the option whereas previously was the index of the option.
|
||||
- The format of select type custom fields has changed to return the options
|
||||
as an array of objects with `id` and `label` fields as opposed to a simple
|
||||
list of strings. When creating or updating a custom field value of a
|
||||
document for a select type custom field, the value should be the `id` of
|
||||
the option whereas previously was the index of the option.
|
||||
|
||||
#### Version 8
|
||||
|
||||
- The user field of document notes now returns a simplified user object
|
||||
rather than just the user ID.
|
||||
- The user field of document notes now returns a simplified user object
|
||||
rather than just the user ID.
|
||||
|
||||
#### Version 9
|
||||
|
||||
- The document `created` field is now a date, not a datetime. The
|
||||
`created_date` field is considered deprecated and will be removed in a
|
||||
future version.
|
||||
- The document `created` field is now a date, not a datetime. The
|
||||
`created_date` field is considered deprecated and will be removed in a
|
||||
future version.
|
||||
|
||||
#### Version 10
|
||||
|
||||
- The `show_on_dashboard` and `show_in_sidebar` fields of saved views have been
|
||||
removed. Relevant settings are now stored in the UISettings model.
|
||||
- The `show_on_dashboard` and `show_in_sidebar` fields of saved views have been
|
||||
removed. Relevant settings are now stored in the UISettings model. Compatibility is maintained
|
||||
for versions < 10 until support for API v9 is dropped.
|
||||
- Document-editing operations such as `merge`, `rotate`, and `edit_pdf` have been
|
||||
moved from the bulk edit endpoint to their own individual endpoints. Using these methods via
|
||||
the bulk edit endpoint is still supported for compatibility with versions < 10 until support
|
||||
for API v9 is dropped.
|
||||
|
||||
10177
docs/changelog.md
10177
docs/changelog.md
File diff suppressed because it is too large
Load Diff
@@ -8,17 +8,17 @@ common [OCR](#ocr) related settings and some frontend settings. If set, these wi
|
||||
preference over the settings via environment variables. If not set, the environment setting
|
||||
or applicable default will be utilized instead.
|
||||
|
||||
- If you run paperless on docker, `paperless.conf` is not used.
|
||||
Rather, configure paperless by copying necessary options to
|
||||
`docker-compose.env`.
|
||||
- If you run paperless on docker, `paperless.conf` is not used.
|
||||
Rather, configure paperless by copying necessary options to
|
||||
`docker-compose.env`.
|
||||
|
||||
- If you are running paperless on anything else, paperless will search
|
||||
for the configuration file in these locations and use the first one
|
||||
it finds:
|
||||
- The environment variable `PAPERLESS_CONFIGURATION_PATH`
|
||||
- `/path/to/paperless/paperless.conf`
|
||||
- `/etc/paperless.conf`
|
||||
- `/usr/local/etc/paperless.conf`
|
||||
- If you are running paperless on anything else, paperless will search
|
||||
for the configuration file in these locations and use the first one
|
||||
it finds:
|
||||
- The environment variable `PAPERLESS_CONFIGURATION_PATH`
|
||||
- `/path/to/paperless/paperless.conf`
|
||||
- `/etc/paperless.conf`
|
||||
- `/usr/local/etc/paperless.conf`
|
||||
|
||||
## Required services
|
||||
|
||||
@@ -674,6 +674,9 @@ See the corresponding [django-allauth documentation](https://docs.allauth.org/en
|
||||
for a list of provider configurations. You will also need to include the relevant Django 'application' inside the
|
||||
[PAPERLESS_APPS](#PAPERLESS_APPS) setting to activate that specific authentication provider (e.g. `allauth.socialaccount.providers.openid_connect` for the [OIDC Connect provider](https://docs.allauth.org/en/latest/socialaccount/providers/openid_connect.html)).
|
||||
|
||||
: For OpenID Connect providers, set `settings.token_auth_method` if your identity provider
|
||||
requires a specific token endpoint authentication method.
|
||||
|
||||
Defaults to None, which does not enable any third party authentication systems.
|
||||
|
||||
#### [`PAPERLESS_SOCIAL_AUTO_SIGNUP=<bool>`](#PAPERLESS_SOCIAL_AUTO_SIGNUP) {#PAPERLESS_SOCIAL_AUTO_SIGNUP}
|
||||
@@ -1947,6 +1950,12 @@ current backend. If not supplied, defaults to "gpt-3.5-turbo" for OpenAI and "ll
|
||||
|
||||
Defaults to None.
|
||||
|
||||
#### [`PAPERLESS_AI_LLM_ALLOW_INTERNAL_ENDPOINTS=<bool>`](#PAPERLESS_AI_LLM_ALLOW_INTERNAL_ENDPOINTS) {#PAPERLESS_AI_LLM_ALLOW_INTERNAL_ENDPOINTS}
|
||||
|
||||
: If set to false, Paperless blocks AI endpoint URLs that resolve to non-public addresses (e.g., localhost, etc).
|
||||
|
||||
Defaults to true, which allows internal endpoints.
|
||||
|
||||
#### [`PAPERLESS_AI_LLM_INDEX_TASK_CRON=<cron expression>`](#PAPERLESS_AI_LLM_INDEX_TASK_CRON) {#PAPERLESS_AI_LLM_INDEX_TASK_CRON}
|
||||
|
||||
: Configures the schedule to update the AI embeddings of text content and metadata for all documents. Only performed if
|
||||
|
||||
@@ -6,23 +6,23 @@ on Paperless-ngx.
|
||||
Check out the source from GitHub. The repository is organized in the
|
||||
following way:
|
||||
|
||||
- `main` always represents the latest release and will only see
|
||||
changes when a new release is made.
|
||||
- `dev` contains the code that will be in the next release.
|
||||
- `feature-X` contains bigger changes that will be in some release, but
|
||||
not necessarily the next one.
|
||||
- `main` always represents the latest release and will only see
|
||||
changes when a new release is made.
|
||||
- `dev` contains the code that will be in the next release.
|
||||
- `feature-X` contains bigger changes that will be in some release, but
|
||||
not necessarily the next one.
|
||||
|
||||
When making functional changes to Paperless-ngx, _always_ make your changes
|
||||
on the `dev` branch.
|
||||
|
||||
Apart from that, the folder structure is as follows:
|
||||
|
||||
- `docs/` - Documentation.
|
||||
- `src-ui/` - Code of the front end.
|
||||
- `src/` - Code of the back end.
|
||||
- `scripts/` - Various scripts that help with different parts of
|
||||
development.
|
||||
- `docker/` - Files required to build the docker image.
|
||||
- `docs/` - Documentation.
|
||||
- `src-ui/` - Code of the front end.
|
||||
- `src/` - Code of the back end.
|
||||
- `scripts/` - Various scripts that help with different parts of
|
||||
development.
|
||||
- `docker/` - Files required to build the docker image.
|
||||
|
||||
## Contributing to Paperless-ngx
|
||||
|
||||
@@ -94,18 +94,17 @@ first-time setup.
|
||||
```
|
||||
|
||||
7. You can now either ...
|
||||
- install Redis or
|
||||
|
||||
- install Redis or
|
||||
- use the included `scripts/start_services.sh` to use Docker to fire
|
||||
up a Redis instance (and some other services such as Tika,
|
||||
Gotenberg and a database server) or
|
||||
|
||||
- use the included `scripts/start_services.sh` to use Docker to fire
|
||||
up a Redis instance (and some other services such as Tika,
|
||||
Gotenberg and a database server) or
|
||||
- spin up a bare Redis container
|
||||
|
||||
- spin up a bare Redis container
|
||||
|
||||
```bash
|
||||
docker run -d -p 6379:6379 --restart unless-stopped redis:latest
|
||||
```
|
||||
```bash
|
||||
docker run -d -p 6379:6379 --restart unless-stopped redis:latest
|
||||
```
|
||||
|
||||
8. Continue with either back-end or front-end development – or both :-).
|
||||
|
||||
@@ -118,9 +117,9 @@ work well for development, but you can use whatever you want.
|
||||
Configure the IDE to use the `src/`-folder as the base source folder.
|
||||
Configure the following launch configurations in your IDE:
|
||||
|
||||
- `uv run manage.py runserver`
|
||||
- `uv run manage.py document_consumer`
|
||||
- `uv run celery --app paperless worker -l DEBUG` (or any other log level)
|
||||
- `uv run manage.py runserver`
|
||||
- `uv run manage.py document_consumer`
|
||||
- `uv run celery --app paperless worker -l DEBUG` (or any other log level)
|
||||
|
||||
To start them all:
|
||||
|
||||
@@ -146,11 +145,11 @@ pnpm ng build --configuration production
|
||||
|
||||
### Testing
|
||||
|
||||
- Run `pytest` in the `src/` directory to execute all tests. This also
|
||||
generates a HTML coverage report. When running tests, `paperless.conf`
|
||||
is loaded as well. However, the tests rely on the default
|
||||
configuration. This is not ideal. But for now, make sure no settings
|
||||
except for DEBUG are overridden when testing.
|
||||
- Run `pytest` in the `src/` directory to execute all tests. This also
|
||||
generates a HTML coverage report. When running tests, `paperless.conf`
|
||||
is loaded as well. However, the tests rely on the default
|
||||
configuration. This is not ideal. But for now, make sure no settings
|
||||
except for DEBUG are overridden when testing.
|
||||
|
||||
!!! note
|
||||
|
||||
@@ -254,14 +253,14 @@ these parts have to be translated separately.
|
||||
|
||||
### Front end localization
|
||||
|
||||
- The AngularJS front end does localization according to the [Angular
|
||||
documentation](https://angular.io/guide/i18n).
|
||||
- The source language of the project is "en_US".
|
||||
- The source strings end up in the file `src-ui/messages.xlf`.
|
||||
- The translated strings need to be placed in the
|
||||
`src-ui/src/locale/` folder.
|
||||
- In order to extract added or changed strings from the source files,
|
||||
call `ng extract-i18n`.
|
||||
- The AngularJS front end does localization according to the [Angular
|
||||
documentation](https://angular.io/guide/i18n).
|
||||
- The source language of the project is "en_US".
|
||||
- The source strings end up in the file `src-ui/messages.xlf`.
|
||||
- The translated strings need to be placed in the
|
||||
`src-ui/src/locale/` folder.
|
||||
- In order to extract added or changed strings from the source files,
|
||||
call `ng extract-i18n`.
|
||||
|
||||
Adding new languages requires adding the translated files in the
|
||||
`src-ui/src/locale/` folder and adjusting a couple files.
|
||||
@@ -307,18 +306,18 @@ A majority of the strings that appear in the back end appear only when
|
||||
the admin is used. However, some of these are still shown on the front
|
||||
end (such as error messages).
|
||||
|
||||
- The django application does localization according to the [Django
|
||||
documentation](https://docs.djangoproject.com/en/3.1/topics/i18n/translation/).
|
||||
- The source language of the project is "en_US".
|
||||
- Localization files end up in the folder `src/locale/`.
|
||||
- In order to extract strings from the application, call
|
||||
`uv run manage.py makemessages -l en_US`. This is important after
|
||||
making changes to translatable strings.
|
||||
- The message files need to be compiled for them to show up in the
|
||||
application. Call `uv run manage.py compilemessages` to do this.
|
||||
The generated files don't get committed into git, since these are
|
||||
derived artifacts. The build pipeline takes care of executing this
|
||||
command.
|
||||
- The django application does localization according to the [Django
|
||||
documentation](https://docs.djangoproject.com/en/3.1/topics/i18n/translation/).
|
||||
- The source language of the project is "en_US".
|
||||
- Localization files end up in the folder `src/locale/`.
|
||||
- In order to extract strings from the application, call
|
||||
`uv run manage.py makemessages -l en_US`. This is important after
|
||||
making changes to translatable strings.
|
||||
- The message files need to be compiled for them to show up in the
|
||||
application. Call `uv run manage.py compilemessages` to do this.
|
||||
The generated files don't get committed into git, since these are
|
||||
derived artifacts. The build pipeline takes care of executing this
|
||||
command.
|
||||
|
||||
Adding new languages requires adding the translated files in the
|
||||
`src/locale/`-folder and adjusting the file
|
||||
@@ -371,122 +370,363 @@ docker build --file Dockerfile --tag paperless:local .
|
||||
|
||||
## Extending Paperless-ngx
|
||||
|
||||
Paperless-ngx does not have any fancy plugin systems and will probably never
|
||||
have. However, some parts of the application have been designed to allow
|
||||
easy integration of additional features without any modification to the
|
||||
base code.
|
||||
Paperless-ngx supports third-party document parsers via a Python entry point
|
||||
plugin system. Plugins are distributed as ordinary Python packages and
|
||||
discovered automatically at startup — no changes to the Paperless-ngx source
|
||||
are required.
|
||||
|
||||
!!! warning "Third-party plugins are not officially supported"
|
||||
|
||||
The Paperless-ngx maintainers do not provide support for third-party
|
||||
plugins. Issues that are caused by or require changes to a third-party
|
||||
plugin will be closed without further investigation. If you believe you
|
||||
have found a bug in Paperless-ngx itself (not in a plugin), please
|
||||
reproduce it with all third-party plugins removed before filing an issue.
|
||||
|
||||
### Making custom parsers
|
||||
|
||||
Paperless-ngx uses parsers to add documents. A parser is
|
||||
responsible for:
|
||||
Paperless-ngx uses parsers to add documents. A parser is responsible for:
|
||||
|
||||
- Retrieving the content from the original
|
||||
- Creating a thumbnail
|
||||
- _optional:_ Retrieving a created date from the original
|
||||
- _optional:_ Creating an archived document from the original
|
||||
- Extracting plain-text content from the document
|
||||
- Generating a thumbnail image
|
||||
- _optional:_ Detecting the document's creation date
|
||||
- _optional:_ Producing a searchable PDF archive copy
|
||||
|
||||
Custom parsers can be added to Paperless-ngx to support more file types. In
|
||||
order to do that, you need to write the parser itself and announce its
|
||||
existence to Paperless-ngx.
|
||||
Custom parsers are distributed as ordinary Python packages and registered
|
||||
via a [setuptools entry point](https://setuptools.pypa.io/en/latest/userguide/entry_point.html).
|
||||
No changes to the Paperless-ngx source are required.
|
||||
|
||||
The parser itself must extend `documents.parsers.DocumentParser` and
|
||||
must implement the methods `parse` and `get_thumbnail`. You can provide
|
||||
your own implementation to `get_date` if you don't want to rely on
|
||||
Paperless-ngx' default date guessing mechanisms.
|
||||
#### 1. Implementing the parser class
|
||||
|
||||
Your parser must satisfy the `ParserProtocol` structural interface defined in
|
||||
`paperless.parsers`. The simplest approach is to write a plain class — no base
|
||||
class is required, only the right attributes and methods.
|
||||
|
||||
**Class-level identity attributes**
|
||||
|
||||
The registry reads these before instantiating the parser, so they must be
|
||||
plain class attributes (not instance attributes or properties):
|
||||
|
||||
```python
|
||||
class MyCustomParser(DocumentParser):
|
||||
|
||||
def parse(self, document_path, mime_type):
|
||||
# This method does not return anything. Rather, you should assign
|
||||
# whatever you got from the document to the following fields:
|
||||
|
||||
# The content of the document.
|
||||
self.text = "content"
|
||||
|
||||
# Optional: path to a PDF document that you created from the original.
|
||||
self.archive_path = os.path.join(self.tempdir, "archived.pdf")
|
||||
|
||||
# Optional: "created" date of the document.
|
||||
self.date = get_created_from_metadata(document_path)
|
||||
|
||||
def get_thumbnail(self, document_path, mime_type):
|
||||
# This should return the path to a thumbnail you created for this
|
||||
# document.
|
||||
return os.path.join(self.tempdir, "thumb.webp")
|
||||
class MyCustomParser:
|
||||
name = "My Format Parser" # human-readable name shown in logs
|
||||
version = "1.0.0" # semantic version string
|
||||
author = "Acme Corp" # author / organisation
|
||||
url = "https://example.com/my-parser" # docs or issue tracker
|
||||
```
|
||||
|
||||
If you encounter any issues during parsing, raise a
|
||||
`documents.parsers.ParseError`.
|
||||
**Declaring supported MIME types**
|
||||
|
||||
The `self.tempdir` directory is a temporary directory that is guaranteed
|
||||
to be empty and removed after consumption finished. You can use that
|
||||
directory to store any intermediate files and also use it to store the
|
||||
thumbnail / archived document.
|
||||
|
||||
After that, you need to announce your parser to Paperless-ngx. You need to
|
||||
connect a handler to the `document_consumer_declaration` signal. Have a
|
||||
look in the file `src/paperless_tesseract/apps.py` on how that's done.
|
||||
The handler is a method that returns information about your parser:
|
||||
Return a `dict` mapping MIME type strings to preferred file extensions
|
||||
(including the leading dot). Paperless-ngx uses the extension when storing
|
||||
archive copies and serving files for download.
|
||||
|
||||
```python
|
||||
def myparser_consumer_declaration(sender, **kwargs):
|
||||
@classmethod
|
||||
def supported_mime_types(cls) -> dict[str, str]:
|
||||
return {
|
||||
"parser": MyCustomParser,
|
||||
"weight": 0,
|
||||
"mime_types": {
|
||||
"application/pdf": ".pdf",
|
||||
"image/jpeg": ".jpg",
|
||||
}
|
||||
"application/x-my-format": ".myf",
|
||||
"application/x-my-format-alt": ".myf",
|
||||
}
|
||||
```
|
||||
|
||||
- `parser` is a reference to a class that extends `DocumentParser`.
|
||||
- `weight` is used whenever two or more parsers are able to parse a
|
||||
file: The parser with the higher weight wins. This can be used to
|
||||
override the parsers provided by Paperless-ngx.
|
||||
- `mime_types` is a dictionary. The keys are the mime types your
|
||||
parser supports and the value is the default file extension that
|
||||
Paperless-ngx should use when storing files and serving them for
|
||||
download. We could guess that from the file extensions, but some
|
||||
mime types have many extensions associated with them and the Python
|
||||
methods responsible for guessing the extension do not always return
|
||||
the same value.
|
||||
**Scoring**
|
||||
|
||||
## Using Visual Studio Code devcontainer
|
||||
When more than one parser can handle a file, the registry calls `score()` on
|
||||
each candidate and picks the one with the highest result. Return `None` to
|
||||
decline handling a file even though the MIME type is listed as supported (for
|
||||
example, when a required external service is not configured).
|
||||
|
||||
Another easy way to get started with development is to use Visual Studio
|
||||
Code devcontainers. This approach will create a preconfigured development
|
||||
environment with all of the required tools and dependencies.
|
||||
[Learn more about devcontainers](https://code.visualstudio.com/docs/devcontainers/containers).
|
||||
The .devcontainer/vscode/tasks.json and .devcontainer/vscode/launch.json files
|
||||
contain more information about the specific tasks and launch configurations (see the
|
||||
non-standard "description" field).
|
||||
| Score | Meaning |
|
||||
| ------ | ------------------------------------------------- |
|
||||
| `None` | Decline — do not handle this file |
|
||||
| `10` | Default priority used by all built-in parsers |
|
||||
| `> 10` | Override a built-in parser for the same MIME type |
|
||||
|
||||
To get started:
|
||||
```python
|
||||
@classmethod
|
||||
def score(
|
||||
cls,
|
||||
mime_type: str,
|
||||
filename: str,
|
||||
path: "Path | None" = None,
|
||||
) -> int | None:
|
||||
# Inspect filename or file bytes here if needed.
|
||||
return 10
|
||||
```
|
||||
|
||||
1. Clone the repository on your machine and open the Paperless-ngx folder in VS Code.
|
||||
**Archive and rendition flags**
|
||||
|
||||
2. VS Code will prompt you with "Reopen in container". Do so and wait for the environment to start.
|
||||
```python
|
||||
@property
|
||||
def can_produce_archive(self) -> bool:
|
||||
"""True if parse() can produce a searchable PDF archive copy."""
|
||||
return True # or False if your parser doesn't produce PDFs
|
||||
|
||||
3. In case your host operating system is Windows:
|
||||
@property
|
||||
def requires_pdf_rendition(self) -> bool:
|
||||
"""True if the original format cannot be displayed by a browser
|
||||
(e.g. DOCX, ODT) and the PDF output must always be kept."""
|
||||
return False
|
||||
```
|
||||
|
||||
- The Source Control view in Visual Studio Code might show: "The detected Git repository is potentially unsafe as the folder is owned by someone other than the current user." Use "Manage Unsafe Repositories" to fix this.
|
||||
- Git might have detecteded modifications for all files, because Windows is using CRLF line endings. Run `git checkout .` in the containers terminal to fix this issue.
|
||||
**Context manager — temp directory lifecycle**
|
||||
|
||||
4. Initialize the project by running the task **Project Setup: Run all Init Tasks**. This
|
||||
will initialize the database tables and create a superuser. Then you can compile the front end
|
||||
for production or run the frontend in debug mode.
|
||||
Paperless-ngx always uses parsers as context managers. Create a temporary
|
||||
working directory in `__enter__` (or `__init__`) and remove it in `__exit__`
|
||||
regardless of whether an exception occurred. Store intermediate files,
|
||||
thumbnails, and archive PDFs inside this directory.
|
||||
|
||||
5. The project is ready for debugging, start either run the fullstack debug or individual debug
|
||||
processes. Yo spin up the project without debugging run the task **Project Start: Run all Services**
|
||||
```python
|
||||
import shutil
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from typing import Self
|
||||
from types import TracebackType
|
||||
|
||||
## Developing Date Parser Plugins
|
||||
from django.conf import settings
|
||||
|
||||
class MyCustomParser:
|
||||
...
|
||||
|
||||
def __init__(self, logging_group: object = None) -> None:
|
||||
settings.SCRATCH_DIR.mkdir(parents=True, exist_ok=True)
|
||||
self._tempdir = Path(
|
||||
tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR)
|
||||
)
|
||||
self._text: str | None = None
|
||||
self._archive_path: Path | None = None
|
||||
|
||||
def __enter__(self) -> Self:
|
||||
return self
|
||||
|
||||
def __exit__(
|
||||
self,
|
||||
exc_type: type[BaseException] | None,
|
||||
exc_val: BaseException | None,
|
||||
exc_tb: TracebackType | None,
|
||||
) -> None:
|
||||
shutil.rmtree(self._tempdir, ignore_errors=True)
|
||||
```
|
||||
|
||||
**Optional context — `configure()`**
|
||||
|
||||
The consumer calls `configure()` with a `ParserContext` after instantiation
|
||||
and before `parse()`. If your parser doesn't need context, a no-op
|
||||
implementation is fine:
|
||||
|
||||
```python
|
||||
from paperless.parsers import ParserContext
|
||||
|
||||
def configure(self, context: ParserContext) -> None:
|
||||
pass # override if you need context.mailrule_id, etc.
|
||||
```
|
||||
|
||||
**Parsing**
|
||||
|
||||
`parse()` is the core method. It must not return a value; instead, store
|
||||
results in instance attributes and expose them via the accessor methods below.
|
||||
Raise `documents.parsers.ParseError` on any unrecoverable failure.
|
||||
|
||||
```python
|
||||
from documents.parsers import ParseError
|
||||
|
||||
def parse(
|
||||
self,
|
||||
document_path: Path,
|
||||
mime_type: str,
|
||||
*,
|
||||
produce_archive: bool = True,
|
||||
) -> None:
|
||||
try:
|
||||
self._text = extract_text_from_my_format(document_path)
|
||||
except Exception as e:
|
||||
raise ParseError(f"Failed to parse {document_path}: {e}") from e
|
||||
|
||||
if produce_archive and self.can_produce_archive:
|
||||
archive = self._tempdir / "archived.pdf"
|
||||
convert_to_pdf(document_path, archive)
|
||||
self._archive_path = archive
|
||||
```
|
||||
|
||||
**Result accessors**
|
||||
|
||||
```python
|
||||
def get_text(self) -> str | None:
|
||||
return self._text
|
||||
|
||||
def get_date(self) -> "datetime.datetime | None":
|
||||
# Return a datetime extracted from the document, or None to let
|
||||
# Paperless-ngx use its default date-guessing logic.
|
||||
return None
|
||||
|
||||
def get_archive_path(self) -> Path | None:
|
||||
return self._archive_path
|
||||
```
|
||||
|
||||
**Thumbnail**
|
||||
|
||||
`get_thumbnail()` may be called independently of `parse()`. Return the path
|
||||
to a WebP image inside `self._tempdir`. The image should be roughly 500 × 700
|
||||
pixels.
|
||||
|
||||
```python
|
||||
def get_thumbnail(self, document_path: Path, mime_type: str) -> Path:
|
||||
thumb = self._tempdir / "thumb.webp"
|
||||
render_thumbnail(document_path, thumb)
|
||||
return thumb
|
||||
```
|
||||
|
||||
**Optional methods**
|
||||
|
||||
These are called by the API on demand, not during the consumption pipeline.
|
||||
Implement them if your format supports the information; otherwise return
|
||||
`None` / `[]`.
|
||||
|
||||
```python
|
||||
def get_page_count(self, document_path: Path, mime_type: str) -> int | None:
|
||||
return count_pages(document_path)
|
||||
|
||||
def extract_metadata(
|
||||
self,
|
||||
document_path: Path,
|
||||
mime_type: str,
|
||||
) -> "list[MetadataEntry]":
|
||||
# Must never raise. Return [] if metadata cannot be read.
|
||||
from paperless.parsers import MetadataEntry
|
||||
return [
|
||||
MetadataEntry(
|
||||
namespace="https://example.com/ns/",
|
||||
prefix="ex",
|
||||
key="Author",
|
||||
value="Alice",
|
||||
)
|
||||
]
|
||||
```
|
||||
|
||||
#### 2. Registering via entry point
|
||||
|
||||
Add the following to your package's `pyproject.toml`. The key (left of `=`)
|
||||
is an arbitrary name used only in log output; the value is the
|
||||
`module:ClassName` import path.
|
||||
|
||||
```toml
|
||||
[project.entry-points."paperless_ngx.parsers"]
|
||||
my_parser = "my_package.parsers:MyCustomParser"
|
||||
```
|
||||
|
||||
Install your package into the same Python environment as Paperless-ngx (or
|
||||
add it to the Docker image), and the parser will be discovered automatically
|
||||
on the next startup. No configuration changes are needed.
|
||||
|
||||
To verify discovery, check the application logs at startup for a line like:
|
||||
|
||||
```
|
||||
Loaded third-party parser 'My Format Parser' v1.0.0 by Acme Corp (entrypoint: 'my_parser').
|
||||
```
|
||||
|
||||
#### 3. Utilities
|
||||
|
||||
`paperless.parsers.utils` provides helpers you can import directly:
|
||||
|
||||
| Function | Description |
|
||||
| --------------------------------------- | ---------------------------------------------------------------- |
|
||||
| `read_file_handle_unicode_errors(path)` | Read a file as UTF-8, replacing invalid bytes instead of raising |
|
||||
| `get_page_count_for_pdf(path)` | Count pages in a PDF using pikepdf |
|
||||
| `extract_pdf_metadata(path)` | Extract XMP metadata from a PDF as a `list[MetadataEntry]` |
|
||||
|
||||
#### Minimal example
|
||||
|
||||
A complete, working parser for a hypothetical plain-XML format:
|
||||
|
||||
```python
|
||||
from __future__ import annotations
|
||||
|
||||
import shutil
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from typing import Self
|
||||
from types import TracebackType
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
from django.conf import settings
|
||||
|
||||
from documents.parsers import ParseError
|
||||
from paperless.parsers import ParserContext
|
||||
|
||||
|
||||
class XmlDocumentParser:
|
||||
name = "XML Parser"
|
||||
version = "1.0.0"
|
||||
author = "Acme Corp"
|
||||
url = "https://example.com/xml-parser"
|
||||
|
||||
@classmethod
|
||||
def supported_mime_types(cls) -> dict[str, str]:
|
||||
return {"application/xml": ".xml", "text/xml": ".xml"}
|
||||
|
||||
@classmethod
|
||||
def score(cls, mime_type: str, filename: str, path: Path | None = None) -> int | None:
|
||||
return 10
|
||||
|
||||
@property
|
||||
def can_produce_archive(self) -> bool:
|
||||
return False
|
||||
|
||||
@property
|
||||
def requires_pdf_rendition(self) -> bool:
|
||||
return False
|
||||
|
||||
def __init__(self, logging_group: object = None) -> None:
|
||||
settings.SCRATCH_DIR.mkdir(parents=True, exist_ok=True)
|
||||
self._tempdir = Path(tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR))
|
||||
self._text: str | None = None
|
||||
|
||||
def __enter__(self) -> Self:
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb) -> None:
|
||||
shutil.rmtree(self._tempdir, ignore_errors=True)
|
||||
|
||||
def configure(self, context: ParserContext) -> None:
|
||||
pass
|
||||
|
||||
def parse(self, document_path: Path, mime_type: str, *, produce_archive: bool = True) -> None:
|
||||
try:
|
||||
tree = ET.parse(document_path)
|
||||
self._text = " ".join(tree.getroot().itertext())
|
||||
except ET.ParseError as e:
|
||||
raise ParseError(f"XML parse error: {e}") from e
|
||||
|
||||
def get_text(self) -> str | None:
|
||||
return self._text
|
||||
|
||||
def get_date(self):
|
||||
return None
|
||||
|
||||
def get_archive_path(self) -> Path | None:
|
||||
return None
|
||||
|
||||
def get_thumbnail(self, document_path: Path, mime_type: str) -> Path:
|
||||
from PIL import Image, ImageDraw
|
||||
img = Image.new("RGB", (500, 700), color="white")
|
||||
ImageDraw.Draw(img).text((10, 10), "XML Document", fill="black")
|
||||
out = self._tempdir / "thumb.webp"
|
||||
img.save(out, format="WEBP")
|
||||
return out
|
||||
|
||||
def get_page_count(self, document_path: Path, mime_type: str) -> int | None:
|
||||
return None
|
||||
|
||||
def extract_metadata(self, document_path: Path, mime_type: str) -> list:
|
||||
return []
|
||||
```
|
||||
|
||||
### Developing date parser plugins
|
||||
|
||||
Paperless-ngx uses a plugin system for date parsing, allowing you to extend or replace the default date parsing behavior. Plugins are discovered using [Python entry points](https://setuptools.pypa.io/en/latest/userguide/entry_point.html).
|
||||
|
||||
### Creating a Date Parser Plugin
|
||||
#### Creating a Date Parser Plugin
|
||||
|
||||
To create a custom date parser plugin, you need to:
|
||||
|
||||
@@ -494,7 +734,7 @@ To create a custom date parser plugin, you need to:
|
||||
2. Implement the required abstract method
|
||||
3. Register your plugin via an entry point
|
||||
|
||||
#### 1. Implementing the Parser Class
|
||||
##### 1. Implementing the Parser Class
|
||||
|
||||
Your parser must extend `documents.plugins.date_parsing.DateParserPluginBase` and implement the `parse` method:
|
||||
|
||||
@@ -534,16 +774,16 @@ class MyDateParserPlugin(DateParserPluginBase):
|
||||
yield another_datetime
|
||||
```
|
||||
|
||||
#### 2. Configuration and Helper Methods
|
||||
##### 2. Configuration and Helper Methods
|
||||
|
||||
Your parser instance is initialized with a `DateParserConfig` object accessible via `self.config`. This provides:
|
||||
|
||||
- `languages: list[str]` - List of language codes for date parsing
|
||||
- `timezone_str: str` - Timezone string for date localization
|
||||
- `ignore_dates: set[datetime.date]` - Dates that should be filtered out
|
||||
- `reference_time: datetime.datetime` - Current time for filtering future dates
|
||||
- `filename_date_order: str | None` - Date order preference for filenames (e.g., "DMY", "MDY")
|
||||
- `content_date_order: str` - Date order preference for content
|
||||
- `languages: list[str]` - List of language codes for date parsing
|
||||
- `timezone_str: str` - Timezone string for date localization
|
||||
- `ignore_dates: set[datetime.date]` - Dates that should be filtered out
|
||||
- `reference_time: datetime.datetime` - Current time for filtering future dates
|
||||
- `filename_date_order: str | None` - Date order preference for filenames (e.g., "DMY", "MDY")
|
||||
- `content_date_order: str` - Date order preference for content
|
||||
|
||||
The base class provides two helper methods you can use:
|
||||
|
||||
@@ -567,11 +807,11 @@ def _filter_date(
|
||||
"""
|
||||
```
|
||||
|
||||
#### 3. Resource Management (Optional)
|
||||
##### 3. Resource Management (Optional)
|
||||
|
||||
If your plugin needs to acquire or release resources (database connections, API clients, etc.), override the context manager methods. Paperless-ngx will always use plugins as context managers, ensuring resources can be released even in the event of errors.
|
||||
|
||||
#### 4. Registering Your Plugin
|
||||
##### 4. Registering Your Plugin
|
||||
|
||||
Register your plugin using a setuptools entry point in your package's `pyproject.toml`:
|
||||
|
||||
@@ -582,7 +822,7 @@ my_parser = "my_package.parsers:MyDateParserPlugin"
|
||||
|
||||
The entry point name (e.g., `"my_parser"`) is used for sorting when multiple plugins are found. Paperless-ngx will use the first plugin alphabetically by name if multiple plugins are discovered.
|
||||
|
||||
### Plugin Discovery
|
||||
#### Plugin Discovery
|
||||
|
||||
Paperless-ngx automatically discovers and loads date parser plugins at runtime. The discovery process:
|
||||
|
||||
@@ -593,7 +833,7 @@ Paperless-ngx automatically discovers and loads date parser plugins at runtime.
|
||||
|
||||
If multiple plugins are installed, a warning is logged indicating which plugin was selected.
|
||||
|
||||
### Example: Simple Date Parser
|
||||
#### Example: Simple Date Parser
|
||||
|
||||
Here's a minimal example that only looks for ISO 8601 dates:
|
||||
|
||||
@@ -625,3 +865,30 @@ class ISODateParserPlugin(DateParserPluginBase):
|
||||
if filtered_date is not None:
|
||||
yield filtered_date
|
||||
```
|
||||
|
||||
## Using Visual Studio Code devcontainer
|
||||
|
||||
Another easy way to get started with development is to use Visual Studio
|
||||
Code devcontainers. This approach will create a preconfigured development
|
||||
environment with all of the required tools and dependencies.
|
||||
[Learn more about devcontainers](https://code.visualstudio.com/docs/devcontainers/containers).
|
||||
The .devcontainer/vscode/tasks.json and .devcontainer/vscode/launch.json files
|
||||
contain more information about the specific tasks and launch configurations (see the
|
||||
non-standard "description" field).
|
||||
|
||||
To get started:
|
||||
|
||||
1. Clone the repository on your machine and open the Paperless-ngx folder in VS Code.
|
||||
|
||||
2. VS Code will prompt you with "Reopen in container". Do so and wait for the environment to start.
|
||||
|
||||
3. In case your host operating system is Windows:
|
||||
- The Source Control view in Visual Studio Code might show: "The detected Git repository is potentially unsafe as the folder is owned by someone other than the current user." Use "Manage Unsafe Repositories" to fix this.
|
||||
- Git might have detecteded modifications for all files, because Windows is using CRLF line endings. Run `git checkout .` in the containers terminal to fix this issue.
|
||||
|
||||
4. Initialize the project by running the task **Project Setup: Run all Init Tasks**. This
|
||||
will initialize the database tables and create a superuser. Then you can compile the front end
|
||||
for production or run the frontend in debug mode.
|
||||
|
||||
5. The project is ready for debugging, start either run the fullstack debug or individual debug
|
||||
processes. Yo spin up the project without debugging run the task **Project Start: Run all Services**
|
||||
|
||||
34
docs/faq.md
34
docs/faq.md
@@ -44,28 +44,28 @@ system. On Linux, chances are high that this location is
|
||||
You can always drag those files out of that folder to use them
|
||||
elsewhere. Here are a couple notes about that.
|
||||
|
||||
- Paperless-ngx never modifies your original documents. It keeps
|
||||
checksums of all documents and uses a scheduled sanity checker to
|
||||
check that they remain the same.
|
||||
- By default, paperless uses the internal ID of each document as its
|
||||
filename. This might not be very convenient for export. However, you
|
||||
can adjust the way files are stored in paperless by
|
||||
[configuring the filename format](advanced_usage.md#file-name-handling).
|
||||
- [The exporter](administration.md#exporter) is
|
||||
another easy way to get your files out of paperless with reasonable
|
||||
file names.
|
||||
- Paperless-ngx never modifies your original documents. It keeps
|
||||
checksums of all documents and uses a scheduled sanity checker to
|
||||
check that they remain the same.
|
||||
- By default, paperless uses the internal ID of each document as its
|
||||
filename. This might not be very convenient for export. However, you
|
||||
can adjust the way files are stored in paperless by
|
||||
[configuring the filename format](advanced_usage.md#file-name-handling).
|
||||
- [The exporter](administration.md#exporter) is
|
||||
another easy way to get your files out of paperless with reasonable
|
||||
file names.
|
||||
|
||||
## _What file types does paperless-ngx support?_
|
||||
|
||||
**A:** Currently, the following files are supported:
|
||||
|
||||
- PDF documents, PNG images, JPEG images, TIFF images, GIF images and
|
||||
WebP images are processed with OCR and converted into PDF documents.
|
||||
- Plain text documents are supported as well and are added verbatim to
|
||||
paperless.
|
||||
- With the optional Tika integration enabled (see [Tika configuration](https://docs.paperless-ngx.com/configuration#tika)),
|
||||
Paperless also supports various Office documents (.docx, .doc, odt,
|
||||
.ppt, .pptx, .odp, .xls, .xlsx, .ods).
|
||||
- PDF documents, PNG images, JPEG images, TIFF images, GIF images and
|
||||
WebP images are processed with OCR and converted into PDF documents.
|
||||
- Plain text documents are supported as well and are added verbatim to
|
||||
paperless.
|
||||
- With the optional Tika integration enabled (see [Tika configuration](https://docs.paperless-ngx.com/configuration#tika)),
|
||||
Paperless also supports various Office documents (.docx, .doc, odt,
|
||||
.ppt, .pptx, .odp, .xls, .xlsx, .ods).
|
||||
|
||||
Paperless-ngx determines the type of a file by inspecting its content
|
||||
rather than its file extensions. However, files processed via the
|
||||
|
||||
@@ -28,36 +28,36 @@ physical documents into a searchable online archive so you can keep, well, _less
|
||||
|
||||
## Features
|
||||
|
||||
- **Organize and index** your scanned documents with tags, correspondents, types, and more.
|
||||
- _Your_ data is stored locally on _your_ server and is never transmitted or shared in any way, unless you explicitly choose to do so.
|
||||
- Performs **OCR** on your documents, adding searchable and selectable text, even to documents scanned with only images.
|
||||
- Utilizes the open-source Tesseract engine to recognize more than 100 languages.
|
||||
- _New!_ Supports remote OCR with Azure AI (opt-in).
|
||||
- Documents are saved as PDF/A format which is designed for long term storage, alongside the unaltered originals.
|
||||
- Uses machine-learning to automatically add tags, correspondents and document types to your documents.
|
||||
- **New**: Paperless-ngx can now leverage AI (Large Language Models or LLMs) for document suggestions. This is an optional feature that can be enabled (and is disabled by default).
|
||||
- Supports PDF documents, images, plain text files, Office documents (Word, Excel, PowerPoint, and LibreOffice equivalents)[^1] and more.
|
||||
- Paperless stores your documents plain on disk. Filenames and folders are managed by paperless and their format can be configured freely with different configurations assigned to different documents.
|
||||
- **Beautiful, modern web application** that features:
|
||||
- Customizable dashboard with statistics.
|
||||
- Filtering by tags, correspondents, types, and more.
|
||||
- Bulk editing of tags, correspondents, types and more.
|
||||
- Drag-and-drop uploading of documents throughout the app.
|
||||
- Customizable views can be saved and displayed on the dashboard and / or sidebar.
|
||||
- Support for custom fields of various data types.
|
||||
- Shareable public links with optional expiration.
|
||||
- **Full text search** helps you find what you need:
|
||||
- Auto completion suggests relevant words from your documents.
|
||||
- Results are sorted by relevance to your search query.
|
||||
- Highlighting shows you which parts of the document matched the query.
|
||||
- Searching for similar documents ("More like this")
|
||||
- **Email processing**[^1]: import documents from your email accounts:
|
||||
- Configure multiple accounts and rules for each account.
|
||||
- After processing, paperless can perform actions on the messages such as marking as read, deleting and more.
|
||||
- A built-in robust **multi-user permissions** system that supports 'global' permissions as well as per document or object.
|
||||
- A powerful workflow system that gives you even more control.
|
||||
- **Optimized** for multi core systems: Paperless-ngx consumes multiple documents in parallel.
|
||||
- The integrated sanity checker makes sure that your document archive is in good health.
|
||||
- **Organize and index** your scanned documents with tags, correspondents, types, and more.
|
||||
- _Your_ data is stored locally on _your_ server and is never transmitted or shared in any way, unless you explicitly choose to do so.
|
||||
- Performs **OCR** on your documents, adding searchable and selectable text, even to documents scanned with only images.
|
||||
- Utilizes the open-source Tesseract engine to recognize more than 100 languages.
|
||||
- _New!_ Supports remote OCR with Azure AI (opt-in).
|
||||
- Documents are saved as PDF/A format which is designed for long term storage, alongside the unaltered originals.
|
||||
- Uses machine-learning to automatically add tags, correspondents and document types to your documents.
|
||||
- **New**: Paperless-ngx can now leverage AI (Large Language Models or LLMs) for document suggestions. This is an optional feature that can be enabled (and is disabled by default).
|
||||
- Supports PDF documents, images, plain text files, Office documents (Word, Excel, PowerPoint, and LibreOffice equivalents)[^1] and more.
|
||||
- Paperless stores your documents plain on disk. Filenames and folders are managed by paperless and their format can be configured freely with different configurations assigned to different documents.
|
||||
- **Beautiful, modern web application** that features:
|
||||
- Customizable dashboard with statistics.
|
||||
- Filtering by tags, correspondents, types, and more.
|
||||
- Bulk editing of tags, correspondents, types and more.
|
||||
- Drag-and-drop uploading of documents throughout the app.
|
||||
- Customizable views can be saved and displayed on the dashboard and / or sidebar.
|
||||
- Support for custom fields of various data types.
|
||||
- Shareable public links with optional expiration.
|
||||
- **Full text search** helps you find what you need:
|
||||
- Auto completion suggests relevant words from your documents.
|
||||
- Results are sorted by relevance to your search query.
|
||||
- Highlighting shows you which parts of the document matched the query.
|
||||
- Searching for similar documents ("More like this")
|
||||
- **Email processing**[^1]: import documents from your email accounts:
|
||||
- Configure multiple accounts and rules for each account.
|
||||
- After processing, paperless can perform actions on the messages such as marking as read, deleting and more.
|
||||
- A built-in robust **multi-user permissions** system that supports 'global' permissions as well as per document or object.
|
||||
- A powerful workflow system that gives you even more control.
|
||||
- **Optimized** for multi core systems: Paperless-ngx consumes multiple documents in parallel.
|
||||
- The integrated sanity checker makes sure that your document archive is in good health.
|
||||
|
||||
[^1]: Office document and email consumption support is optional and provided by Apache Tika (see [configuration](https://docs.paperless-ngx.com/configuration/#tika))
|
||||
|
||||
|
||||
@@ -42,12 +42,12 @@ The `CONSUMER_BARCODE_SCANNER` setting has been removed. zxing-cpp is now the on
|
||||
|
||||
### Action Required
|
||||
|
||||
- If you were already using `CONSUMER_BARCODE_SCANNER=ZXING`, simply remove the setting.
|
||||
- If you had `CONSUMER_BARCODE_SCANNER=PYZBAR` or were using the default, no functional changes are needed beyond
|
||||
removing the setting. zxing-cpp supports all the same barcode formats and you should see improved detection
|
||||
reliability.
|
||||
- The `libzbar0` / `libzbar-dev` system packages are no longer required and can be removed from any custom Docker
|
||||
images or host installations.
|
||||
- If you were already using `CONSUMER_BARCODE_SCANNER=ZXING`, simply remove the setting.
|
||||
- If you had `CONSUMER_BARCODE_SCANNER=PYZBAR` or were using the default, no functional changes are needed beyond
|
||||
removing the setting. zxing-cpp supports all the same barcode formats and you should see improved detection
|
||||
reliability.
|
||||
- The `libzbar0` / `libzbar-dev` system packages are no longer required and can be removed from any custom Docker
|
||||
images or host installations.
|
||||
|
||||
## Database Engine
|
||||
|
||||
@@ -103,3 +103,30 @@ Multiple options are combined in a single value:
|
||||
```bash
|
||||
PAPERLESS_DB_OPTIONS="sslmode=require;sslrootcert=/certs/ca.pem;pool.max_size=10"
|
||||
```
|
||||
|
||||
## OpenID Connect Token Endpoint Authentication
|
||||
|
||||
Some existing OpenID Connect setups may require an explicit token endpoint authentication method after upgrading to v3.
|
||||
|
||||
#### Action Required
|
||||
|
||||
If OIDC login fails at the callback with an `invalid_client` error, add `token_auth_method` to the provider `settings` in
|
||||
[`PAPERLESS_SOCIALACCOUNT_PROVIDERS`](configuration.md#PAPERLESS_SOCIALACCOUNT_PROVIDERS).
|
||||
|
||||
For example:
|
||||
|
||||
```json
|
||||
{
|
||||
"openid_connect": {
|
||||
"APPS": [
|
||||
{
|
||||
...
|
||||
"settings": {
|
||||
"server_url": "https://login.example.com",
|
||||
"token_auth_method": "client_secret_basic"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
248
docs/setup.md
248
docs/setup.md
@@ -44,8 +44,8 @@ account. In short, it automates the [Docker Compose setup](#docker) described be
|
||||
|
||||
#### Prerequisites
|
||||
|
||||
- Docker and Docker Compose must be [installed](https://docs.docker.com/engine/install/){:target="\_blank"}.
|
||||
- macOS users will need [GNU sed](https://formulae.brew.sh/formula/gnu-sed) with support for running as `sed` as well as [wget](https://formulae.brew.sh/formula/wget).
|
||||
- Docker and Docker Compose must be [installed](https://docs.docker.com/engine/install/){:target="\_blank"}.
|
||||
- macOS users will need [GNU sed](https://formulae.brew.sh/formula/gnu-sed) with support for running as `sed` as well as [wget](https://formulae.brew.sh/formula/wget).
|
||||
|
||||
#### Run the installation script
|
||||
|
||||
@@ -63,7 +63,7 @@ credentials you provided during the installation script.
|
||||
|
||||
#### Prerequisites
|
||||
|
||||
- Docker and Docker Compose must be [installed](https://docs.docker.com/engine/install/){:target="\_blank"}.
|
||||
- Docker and Docker Compose must be [installed](https://docs.docker.com/engine/install/){:target="\_blank"}.
|
||||
|
||||
#### Installation
|
||||
|
||||
@@ -101,7 +101,7 @@ credentials you provided during the installation script.
|
||||
|
||||
```yaml
|
||||
ports:
|
||||
- 8010:8000
|
||||
- 8010:8000
|
||||
```
|
||||
|
||||
3. Modify `docker-compose.env` with any configuration options you need.
|
||||
@@ -140,24 +140,17 @@ a [superuser](usage.md#superusers) account.
|
||||
|
||||
!!! warning
|
||||
|
||||
It is currently not possible to run the container rootless if additional languages are specified via `PAPERLESS_OCR_LANGUAGES`.
|
||||
It is not possible to run the container rootless if additional languages are specified via `PAPERLESS_OCR_LANGUAGES`.
|
||||
|
||||
If you want to run Paperless as a rootless container, make this
|
||||
change in `docker-compose.yml`:
|
||||
If you want to run Paperless as a rootless container, set `user:` in `docker-compose.yml` to the UID and GID of your host user (use `id -u` and `id -g` to find these values). The container process starts directly as that user with no internal privilege remapping:
|
||||
|
||||
- Set the `user` running the container to map to the `paperless`
|
||||
user in the container. This value (`user_id` below) should be
|
||||
the same ID that `USERMAP_UID` and `USERMAP_GID` are set to in
|
||||
`docker-compose.env`. See `USERMAP_UID` and `USERMAP_GID`
|
||||
[here](configuration.md#docker).
|
||||
```yaml
|
||||
webserver:
|
||||
image: ghcr.io/paperless-ngx/paperless-ngx:latest
|
||||
user: '1000:1000'
|
||||
```
|
||||
|
||||
Your entry for Paperless should contain something like:
|
||||
|
||||
> ```
|
||||
> webserver:
|
||||
> image: ghcr.io/paperless-ngx/paperless-ngx:latest
|
||||
> user: <user_id>
|
||||
> ```
|
||||
Do not combine this with `USERMAP_UID` or `USERMAP_GID`, which are intended for the non-rootless case described in step 3.
|
||||
|
||||
**File systems without inotify support (e.g. NFS)**
|
||||
|
||||
@@ -171,26 +164,25 @@ to enable polling and disable inotify. See [here](configuration.md#polling).
|
||||
|
||||
#### Prerequisites
|
||||
|
||||
- Paperless runs on Linux only, Windows is not supported.
|
||||
- Python 3.11, 3.12, 3.13, or 3.14 is required. As a policy, Paperless-ngx aims to support at least the three most recent Python versions and drops support for versions as they reach end-of-life. Newer versions may work, but some dependencies may not be fully compatible.
|
||||
- Paperless runs on Linux only, Windows is not supported.
|
||||
- Python 3.11, 3.12, 3.13, or 3.14 is required. As a policy, Paperless-ngx aims to support at least the three most recent Python versions and drops support for versions as they reach end-of-life. Newer versions may work, but some dependencies may not be fully compatible.
|
||||
|
||||
#### Installation
|
||||
|
||||
1. Install dependencies. Paperless requires the following packages:
|
||||
|
||||
- `python3`
|
||||
- `python3-pip`
|
||||
- `python3-dev`
|
||||
- `default-libmysqlclient-dev` for MariaDB
|
||||
- `pkg-config` for mysqlclient (python dependency)
|
||||
- `fonts-liberation` for generating thumbnails for plain text
|
||||
files
|
||||
- `imagemagick` >= 6 for PDF conversion
|
||||
- `gnupg` for handling encrypted documents
|
||||
- `libpq-dev` for PostgreSQL
|
||||
- `libmagic-dev` for mime type detection
|
||||
- `mariadb-client` for MariaDB compile time
|
||||
- `poppler-utils` for barcode detection
|
||||
- `python3`
|
||||
- `python3-pip`
|
||||
- `python3-dev`
|
||||
- `default-libmysqlclient-dev` for MariaDB
|
||||
- `pkg-config` for mysqlclient (python dependency)
|
||||
- `fonts-liberation` for generating thumbnails for plain text
|
||||
files
|
||||
- `imagemagick` >= 6 for PDF conversion
|
||||
- `gnupg` for handling encrypted documents
|
||||
- `libpq-dev` for PostgreSQL
|
||||
- `libmagic-dev` for mime type detection
|
||||
- `mariadb-client` for MariaDB compile time
|
||||
- `poppler-utils` for barcode detection
|
||||
|
||||
Use this list for your preferred package management:
|
||||
|
||||
@@ -200,18 +192,17 @@ to enable polling and disable inotify. See [here](configuration.md#polling).
|
||||
|
||||
These dependencies are required for OCRmyPDF, which is used for text
|
||||
recognition.
|
||||
|
||||
- `unpaper`
|
||||
- `ghostscript`
|
||||
- `icc-profiles-free`
|
||||
- `qpdf`
|
||||
- `liblept5`
|
||||
- `libxml2`
|
||||
- `pngquant` (suggested for certain PDF image optimizations)
|
||||
- `zlib1g`
|
||||
- `tesseract-ocr` >= 4.0.0 for OCR
|
||||
- `tesseract-ocr` language packs (`tesseract-ocr-eng`,
|
||||
`tesseract-ocr-deu`, etc)
|
||||
- `unpaper`
|
||||
- `ghostscript`
|
||||
- `icc-profiles-free`
|
||||
- `qpdf`
|
||||
- `liblept5`
|
||||
- `libxml2`
|
||||
- `pngquant` (suggested for certain PDF image optimizations)
|
||||
- `zlib1g`
|
||||
- `tesseract-ocr` >= 4.0.0 for OCR
|
||||
- `tesseract-ocr` language packs (`tesseract-ocr-eng`,
|
||||
`tesseract-ocr-deu`, etc)
|
||||
|
||||
Use this list for your preferred package management:
|
||||
|
||||
@@ -220,16 +211,14 @@ to enable polling and disable inotify. See [here](configuration.md#polling).
|
||||
```
|
||||
|
||||
On Raspberry Pi, these libraries are required as well:
|
||||
|
||||
- `libatlas-base-dev`
|
||||
- `libxslt1-dev`
|
||||
- `mime-support`
|
||||
- `libatlas-base-dev`
|
||||
- `libxslt1-dev`
|
||||
- `mime-support`
|
||||
|
||||
You will also need these for installing some of the python dependencies:
|
||||
|
||||
- `build-essential`
|
||||
- `python3-setuptools`
|
||||
- `python3-wheel`
|
||||
- `build-essential`
|
||||
- `python3-setuptools`
|
||||
- `python3-wheel`
|
||||
|
||||
Use this list for your preferred package management:
|
||||
|
||||
@@ -279,44 +268,41 @@ to enable polling and disable inotify. See [here](configuration.md#polling).
|
||||
6. Configure Paperless-ngx. See [configuration](configuration.md) for details.
|
||||
Edit the included `paperless.conf` and adjust the settings to your
|
||||
needs. Required settings for getting Paperless-ngx running are:
|
||||
|
||||
- [`PAPERLESS_REDIS`](configuration.md#PAPERLESS_REDIS) should point to your Redis server, such as
|
||||
`redis://localhost:6379`.
|
||||
- [`PAPERLESS_DBENGINE`](configuration.md#PAPERLESS_DBENGINE) is optional, and should be one of `postgres`,
|
||||
`mariadb`, or `sqlite`
|
||||
- [`PAPERLESS_DBHOST`](configuration.md#PAPERLESS_DBHOST) should be the hostname on which your
|
||||
PostgreSQL server is running. Do not configure this to use
|
||||
SQLite instead. Also configure port, database name, user and
|
||||
password as necessary.
|
||||
- [`PAPERLESS_CONSUMPTION_DIR`](configuration.md#PAPERLESS_CONSUMPTION_DIR) should point to the folder
|
||||
that Paperless-ngx should watch for incoming documents.
|
||||
Likewise, [`PAPERLESS_DATA_DIR`](configuration.md#PAPERLESS_DATA_DIR) and
|
||||
[`PAPERLESS_MEDIA_ROOT`](configuration.md#PAPERLESS_MEDIA_ROOT) define where Paperless-ngx stores its data.
|
||||
If needed, these can point to the same directory.
|
||||
- [`PAPERLESS_SECRET_KEY`](configuration.md#PAPERLESS_SECRET_KEY) should be a random sequence of
|
||||
characters. It's used for authentication. Failure to do so
|
||||
allows third parties to forge authentication credentials.
|
||||
- Set [`PAPERLESS_URL`](configuration.md#PAPERLESS_URL) if you are behind a reverse proxy. This should
|
||||
point to your domain. Please see
|
||||
[configuration](configuration.md) for more
|
||||
information.
|
||||
- [`PAPERLESS_REDIS`](configuration.md#PAPERLESS_REDIS) should point to your Redis server, such as
|
||||
`redis://localhost:6379`.
|
||||
- [`PAPERLESS_DBENGINE`](configuration.md#PAPERLESS_DBENGINE) is optional, and should be one of `postgres`,
|
||||
`mariadb`, or `sqlite`
|
||||
- [`PAPERLESS_DBHOST`](configuration.md#PAPERLESS_DBHOST) should be the hostname on which your
|
||||
PostgreSQL server is running. Do not configure this to use
|
||||
SQLite instead. Also configure port, database name, user and
|
||||
password as necessary.
|
||||
- [`PAPERLESS_CONSUMPTION_DIR`](configuration.md#PAPERLESS_CONSUMPTION_DIR) should point to the folder
|
||||
that Paperless-ngx should watch for incoming documents.
|
||||
Likewise, [`PAPERLESS_DATA_DIR`](configuration.md#PAPERLESS_DATA_DIR) and
|
||||
[`PAPERLESS_MEDIA_ROOT`](configuration.md#PAPERLESS_MEDIA_ROOT) define where Paperless-ngx stores its data.
|
||||
If needed, these can point to the same directory.
|
||||
- [`PAPERLESS_SECRET_KEY`](configuration.md#PAPERLESS_SECRET_KEY) should be a random sequence of
|
||||
characters. It's used for authentication. Failure to do so
|
||||
allows third parties to forge authentication credentials.
|
||||
- Set [`PAPERLESS_URL`](configuration.md#PAPERLESS_URL) if you are behind a reverse proxy. This should
|
||||
point to your domain. Please see
|
||||
[configuration](configuration.md) for more
|
||||
information.
|
||||
|
||||
You can make many more adjustments, especially for OCR.
|
||||
The following options are recommended for most users:
|
||||
|
||||
- Set [`PAPERLESS_OCR_LANGUAGE`](configuration.md#PAPERLESS_OCR_LANGUAGE) to the language most of your
|
||||
documents are written in.
|
||||
- Set [`PAPERLESS_TIME_ZONE`](configuration.md#PAPERLESS_TIME_ZONE) to your local time zone.
|
||||
- Set [`PAPERLESS_OCR_LANGUAGE`](configuration.md#PAPERLESS_OCR_LANGUAGE) to the language most of your
|
||||
documents are written in.
|
||||
- Set [`PAPERLESS_TIME_ZONE`](configuration.md#PAPERLESS_TIME_ZONE) to your local time zone.
|
||||
|
||||
!!! warning
|
||||
|
||||
Ensure your Redis instance [is secured](https://redis.io/docs/latest/operate/oss_and_stack/management/security/).
|
||||
|
||||
7. Create the following directories if they do not already exist:
|
||||
|
||||
- `/opt/paperless/media`
|
||||
- `/opt/paperless/data`
|
||||
- `/opt/paperless/consume`
|
||||
- `/opt/paperless/media`
|
||||
- `/opt/paperless/data`
|
||||
- `/opt/paperless/consume`
|
||||
|
||||
Adjust these paths if you configured different folders.
|
||||
Then verify that the `paperless` user has write permissions:
|
||||
@@ -391,11 +377,10 @@ to enable polling and disable inotify. See [here](configuration.md#polling).
|
||||
starting point.
|
||||
|
||||
Paperless needs:
|
||||
|
||||
- The `webserver` script to run the webserver.
|
||||
- The `consumer` script to watch the input folder.
|
||||
- The `taskqueue` script for background workers (document consumption, etc.).
|
||||
- The `scheduler` script for periodic tasks such as email checking.
|
||||
- The `webserver` script to run the webserver.
|
||||
- The `consumer` script to watch the input folder.
|
||||
- The `taskqueue` script for background workers (document consumption, etc.).
|
||||
- The `scheduler` script for periodic tasks such as email checking.
|
||||
|
||||
!!! note
|
||||
|
||||
@@ -501,19 +486,19 @@ your setup depending on how you installed Paperless.
|
||||
This section describes how to update an existing Paperless Docker
|
||||
installation. Keep these points in mind:
|
||||
|
||||
- Read the [changelog](changelog.md) and
|
||||
take note of breaking changes.
|
||||
- Decide whether to stay on SQLite or migrate to PostgreSQL.
|
||||
Both work fine with Paperless-ngx.
|
||||
However, if you already have a database server running
|
||||
for other services, you might as well use it for Paperless as well.
|
||||
- The task scheduler of Paperless, which is used to execute periodic
|
||||
tasks such as email checking and maintenance, requires a
|
||||
[Redis](https://redis.io/) message broker instance. The
|
||||
Docker Compose route takes care of that.
|
||||
- The layout of the folder structure for your documents and data
|
||||
remains the same, so you can plug your old Docker volumes into
|
||||
paperless-ngx and expect it to find everything where it should be.
|
||||
- Read the [changelog](changelog.md) and
|
||||
take note of breaking changes.
|
||||
- Decide whether to stay on SQLite or migrate to PostgreSQL.
|
||||
Both work fine with Paperless-ngx.
|
||||
However, if you already have a database server running
|
||||
for other services, you might as well use it for Paperless as well.
|
||||
- The task scheduler of Paperless, which is used to execute periodic
|
||||
tasks such as email checking and maintenance, requires a
|
||||
[Redis](https://redis.io/) message broker instance. The
|
||||
Docker Compose route takes care of that.
|
||||
- The layout of the folder structure for your documents and data
|
||||
remains the same, so you can plug your old Docker volumes into
|
||||
paperless-ngx and expect it to find everything where it should be.
|
||||
|
||||
Migration to Paperless-ngx is then performed in a few simple steps:
|
||||
|
||||
@@ -598,7 +583,6 @@ commands as well.
|
||||
1. Stop and remove the Paperless container.
|
||||
2. If using an external database, stop that container.
|
||||
3. Update Redis configuration.
|
||||
|
||||
1. If `REDIS_URL` is already set, change it to [`PAPERLESS_REDIS`](configuration.md#PAPERLESS_REDIS)
|
||||
and continue to step 4.
|
||||
|
||||
@@ -610,22 +594,18 @@ commands as well.
|
||||
the new Redis container.
|
||||
|
||||
4. Update user mapping.
|
||||
|
||||
1. If set, change the environment variable `PUID` to `USERMAP_UID`.
|
||||
|
||||
1. If set, change the environment variable `PGID` to `USERMAP_GID`.
|
||||
|
||||
5. Update configuration paths.
|
||||
|
||||
1. Set the environment variable [`PAPERLESS_DATA_DIR`](configuration.md#PAPERLESS_DATA_DIR) to `/config`.
|
||||
|
||||
6. Update media paths.
|
||||
|
||||
1. Set the environment variable [`PAPERLESS_MEDIA_ROOT`](configuration.md#PAPERLESS_MEDIA_ROOT) to
|
||||
`/data/media`.
|
||||
|
||||
7. Update timezone.
|
||||
|
||||
1. Set the environment variable [`PAPERLESS_TIME_ZONE`](configuration.md#PAPERLESS_TIME_ZONE) to the same
|
||||
value as `TZ`.
|
||||
|
||||
@@ -639,33 +619,33 @@ commands as well.
|
||||
Paperless runs on Raspberry Pi. Some tasks can be slow on lower-powered
|
||||
hardware, but a few settings can improve performance:
|
||||
|
||||
- Stick with SQLite to save some resources. See [troubleshooting](troubleshooting.md#log-reports-creating-paperlesstask-failed)
|
||||
if you encounter issues with SQLite locking.
|
||||
- If you do not need the filesystem-based consumer, consider disabling it
|
||||
entirely by setting [`PAPERLESS_CONSUMER_DISABLE`](configuration.md#PAPERLESS_CONSUMER_DISABLE) to `true`.
|
||||
- Consider setting [`PAPERLESS_OCR_PAGES`](configuration.md#PAPERLESS_OCR_PAGES) to 1, so that Paperless
|
||||
OCRs only the first page of your documents. In most cases, this page
|
||||
contains enough information to be able to find it.
|
||||
- [`PAPERLESS_TASK_WORKERS`](configuration.md#PAPERLESS_TASK_WORKERS) and [`PAPERLESS_THREADS_PER_WORKER`](configuration.md#PAPERLESS_THREADS_PER_WORKER) are
|
||||
configured to use all cores. The Raspberry Pi models 3 and up have 4
|
||||
cores, meaning that Paperless will use 2 workers and 2 threads per
|
||||
worker. This may result in sluggish response times during
|
||||
consumption, so you might want to lower these settings (example: 2
|
||||
workers and 1 thread to always have some computing power left for
|
||||
other tasks).
|
||||
- Keep [`PAPERLESS_OCR_MODE`](configuration.md#PAPERLESS_OCR_MODE) at its default value `skip` and consider
|
||||
OCRing your documents before feeding them into Paperless. Some
|
||||
scanners are able to do this!
|
||||
- Set [`PAPERLESS_OCR_SKIP_ARCHIVE_FILE`](configuration.md#PAPERLESS_OCR_SKIP_ARCHIVE_FILE) to `with_text` to skip archive
|
||||
file generation for already OCRed documents, or `always` to skip it
|
||||
for all documents.
|
||||
- If you want to perform OCR on the device, consider using
|
||||
`PAPERLESS_OCR_CLEAN=none`. This will speed up OCR times and use
|
||||
less memory at the expense of slightly worse OCR results.
|
||||
- If using Docker, consider setting [`PAPERLESS_WEBSERVER_WORKERS`](configuration.md#PAPERLESS_WEBSERVER_WORKERS) to 1. This will save some memory.
|
||||
- Consider setting [`PAPERLESS_ENABLE_NLTK`](configuration.md#PAPERLESS_ENABLE_NLTK) to false, to disable the
|
||||
more advanced language processing, which can take more memory and
|
||||
processing time.
|
||||
- Stick with SQLite to save some resources. See [troubleshooting](troubleshooting.md#log-reports-creating-paperlesstask-failed)
|
||||
if you encounter issues with SQLite locking.
|
||||
- If you do not need the filesystem-based consumer, consider disabling it
|
||||
entirely by setting [`PAPERLESS_CONSUMER_DISABLE`](configuration.md#PAPERLESS_CONSUMER_DISABLE) to `true`.
|
||||
- Consider setting [`PAPERLESS_OCR_PAGES`](configuration.md#PAPERLESS_OCR_PAGES) to 1, so that Paperless
|
||||
OCRs only the first page of your documents. In most cases, this page
|
||||
contains enough information to be able to find it.
|
||||
- [`PAPERLESS_TASK_WORKERS`](configuration.md#PAPERLESS_TASK_WORKERS) and [`PAPERLESS_THREADS_PER_WORKER`](configuration.md#PAPERLESS_THREADS_PER_WORKER) are
|
||||
configured to use all cores. The Raspberry Pi models 3 and up have 4
|
||||
cores, meaning that Paperless will use 2 workers and 2 threads per
|
||||
worker. This may result in sluggish response times during
|
||||
consumption, so you might want to lower these settings (example: 2
|
||||
workers and 1 thread to always have some computing power left for
|
||||
other tasks).
|
||||
- Keep [`PAPERLESS_OCR_MODE`](configuration.md#PAPERLESS_OCR_MODE) at its default value `skip` and consider
|
||||
OCRing your documents before feeding them into Paperless. Some
|
||||
scanners are able to do this!
|
||||
- Set [`PAPERLESS_OCR_SKIP_ARCHIVE_FILE`](configuration.md#PAPERLESS_OCR_SKIP_ARCHIVE_FILE) to `with_text` to skip archive
|
||||
file generation for already OCRed documents, or `always` to skip it
|
||||
for all documents.
|
||||
- If you want to perform OCR on the device, consider using
|
||||
`PAPERLESS_OCR_CLEAN=none`. This will speed up OCR times and use
|
||||
less memory at the expense of slightly worse OCR results.
|
||||
- If using Docker, consider setting [`PAPERLESS_WEBSERVER_WORKERS`](configuration.md#PAPERLESS_WEBSERVER_WORKERS) to 1. This will save some memory.
|
||||
- Consider setting [`PAPERLESS_ENABLE_NLTK`](configuration.md#PAPERLESS_ENABLE_NLTK) to false, to disable the
|
||||
more advanced language processing, which can take more memory and
|
||||
processing time.
|
||||
|
||||
For details, refer to [configuration](configuration.md).
|
||||
|
||||
|
||||
@@ -4,27 +4,27 @@
|
||||
|
||||
Check for the following issues:
|
||||
|
||||
- Ensure that the directory you're putting your documents in is the
|
||||
folder paperless is watching. With docker, this setting is performed
|
||||
in the `docker-compose.yml` file. Without Docker, look at the
|
||||
`CONSUMPTION_DIR` setting. Don't adjust this setting if you're
|
||||
using docker.
|
||||
- Ensure that the directory you're putting your documents in is the
|
||||
folder paperless is watching. With docker, this setting is performed
|
||||
in the `docker-compose.yml` file. Without Docker, look at the
|
||||
`CONSUMPTION_DIR` setting. Don't adjust this setting if you're
|
||||
using docker.
|
||||
|
||||
- Ensure that redis is up and running. Paperless does its task
|
||||
processing asynchronously, and for documents to arrive at the task
|
||||
processor, it needs redis to run.
|
||||
- Ensure that redis is up and running. Paperless does its task
|
||||
processing asynchronously, and for documents to arrive at the task
|
||||
processor, it needs redis to run.
|
||||
|
||||
- Ensure that the task processor is running. Docker does this
|
||||
automatically. Manually invoke the task processor by executing
|
||||
- Ensure that the task processor is running. Docker does this
|
||||
automatically. Manually invoke the task processor by executing
|
||||
|
||||
```shell-session
|
||||
celery --app paperless worker
|
||||
```
|
||||
```shell-session
|
||||
celery --app paperless worker
|
||||
```
|
||||
|
||||
- Look at the output of paperless and inspect it for any errors.
|
||||
- Look at the output of paperless and inspect it for any errors.
|
||||
|
||||
- Go to the admin interface, and check if there are failed tasks. If
|
||||
so, the tasks will contain an error message.
|
||||
- Go to the admin interface, and check if there are failed tasks. If
|
||||
so, the tasks will contain an error message.
|
||||
|
||||
## Consumer warns `OCR for XX failed`
|
||||
|
||||
@@ -78,12 +78,12 @@ Ensure that `chown` is possible on these directories.
|
||||
This indicates that the Auto matching algorithm found no documents to
|
||||
learn from. This may have two reasons:
|
||||
|
||||
- You don't use the Auto matching algorithm: The error can be safely
|
||||
ignored in this case.
|
||||
- You are using the Auto matching algorithm: The classifier explicitly
|
||||
excludes documents with Inbox tags. Verify that there are documents
|
||||
in your archive without inbox tags. The algorithm will only learn
|
||||
from documents not in your inbox.
|
||||
- You don't use the Auto matching algorithm: The error can be safely
|
||||
ignored in this case.
|
||||
- You are using the Auto matching algorithm: The classifier explicitly
|
||||
excludes documents with Inbox tags. Verify that there are documents
|
||||
in your archive without inbox tags. The algorithm will only learn
|
||||
from documents not in your inbox.
|
||||
|
||||
## UserWarning in sklearn on every single document
|
||||
|
||||
@@ -127,10 +127,10 @@ change in the `docker-compose.yml` file:
|
||||
# The gotenberg chromium route is used to convert .eml files. We do not
|
||||
# want to allow external content like tracking pixels or even javascript.
|
||||
command:
|
||||
- 'gotenberg'
|
||||
- '--chromium-disable-javascript=true'
|
||||
- '--chromium-allow-list=file:///tmp/.*'
|
||||
- '--api-timeout=60s'
|
||||
- 'gotenberg'
|
||||
- '--chromium-disable-javascript=true'
|
||||
- '--chromium-allow-list=file:///tmp/.*'
|
||||
- '--api-timeout=60s'
|
||||
```
|
||||
|
||||
## Permission denied errors in the consumption directory
|
||||
|
||||
404
docs/usage.md
404
docs/usage.md
@@ -14,42 +14,42 @@ for finding and managing your documents.
|
||||
Paperless essentially consists of two different parts for managing your
|
||||
documents:
|
||||
|
||||
- The _consumer_ watches a specified folder and adds all documents in
|
||||
that folder to paperless.
|
||||
- The _web server_ (web UI) provides a UI that you use to manage and
|
||||
search documents.
|
||||
- The _consumer_ watches a specified folder and adds all documents in
|
||||
that folder to paperless.
|
||||
- The _web server_ (web UI) provides a UI that you use to manage and
|
||||
search documents.
|
||||
|
||||
Each document has data fields that you can assign to them:
|
||||
|
||||
- A _Document_ is a piece of paper that sometimes contains valuable
|
||||
information.
|
||||
- The _correspondent_ of a document is the person, institution or
|
||||
company that a document either originates from, or is sent to.
|
||||
- A _tag_ is a label that you can assign to documents. Think of labels
|
||||
as more powerful folders: Multiple documents can be grouped together
|
||||
with a single tag, however, a single document can also have multiple
|
||||
tags. This is not possible with folders. The reason folders are not
|
||||
implemented in paperless is simply that tags are much more versatile
|
||||
than folders.
|
||||
- A _document type_ is used to demarcate the type of a document such
|
||||
as letter, bank statement, invoice, contract, etc. It is used to
|
||||
identify what a document is about.
|
||||
- The document _storage path_ is the location where the document files
|
||||
are stored. See [Storage Paths](advanced_usage.md#storage-paths) for
|
||||
more information.
|
||||
- The _date added_ of a document is the date the document was scanned
|
||||
into paperless. You cannot and should not change this date.
|
||||
- The _date created_ of a document is the date the document was
|
||||
initially issued. This can be the date you bought a product, the
|
||||
date you signed a contract, or the date a letter was sent to you.
|
||||
- The _archive serial number_ (short: ASN) of a document is the
|
||||
identifier of the document in your physical document binders. See
|
||||
[recommended workflow](#usage-recommended-workflow) below.
|
||||
- The _content_ of a document is the text that was OCR'ed from the
|
||||
document. This text is fed into the search engine and is used for
|
||||
matching tags, correspondents and document types.
|
||||
- Paperless-ngx also supports _custom fields_ which can be used to
|
||||
store additional metadata about a document.
|
||||
- A _Document_ is a piece of paper that sometimes contains valuable
|
||||
information.
|
||||
- The _correspondent_ of a document is the person, institution or
|
||||
company that a document either originates from, or is sent to.
|
||||
- A _tag_ is a label that you can assign to documents. Think of labels
|
||||
as more powerful folders: Multiple documents can be grouped together
|
||||
with a single tag, however, a single document can also have multiple
|
||||
tags. This is not possible with folders. The reason folders are not
|
||||
implemented in paperless is simply that tags are much more versatile
|
||||
than folders.
|
||||
- A _document type_ is used to demarcate the type of a document such
|
||||
as letter, bank statement, invoice, contract, etc. It is used to
|
||||
identify what a document is about.
|
||||
- The document _storage path_ is the location where the document files
|
||||
are stored. See [Storage Paths](advanced_usage.md#storage-paths) for
|
||||
more information.
|
||||
- The _date added_ of a document is the date the document was scanned
|
||||
into paperless. You cannot and should not change this date.
|
||||
- The _date created_ of a document is the date the document was
|
||||
initially issued. This can be the date you bought a product, the
|
||||
date you signed a contract, or the date a letter was sent to you.
|
||||
- The _archive serial number_ (short: ASN) of a document is the
|
||||
identifier of the document in your physical document binders. See
|
||||
[recommended workflow](#usage-recommended-workflow) below.
|
||||
- The _content_ of a document is the text that was OCR'ed from the
|
||||
document. This text is fed into the search engine and is used for
|
||||
matching tags, correspondents and document types.
|
||||
- Paperless-ngx also supports _custom fields_ which can be used to
|
||||
store additional metadata about a document.
|
||||
|
||||
## The Web UI
|
||||
|
||||
@@ -93,12 +93,12 @@ download the document or share it via a share link.
|
||||
|
||||
Think of versions as **file history** for a document.
|
||||
|
||||
- Versions track the underlying file and extracted text content (OCR/text).
|
||||
- Metadata such as tags, correspondent, document type, storage path and custom fields stay on the "root" document.
|
||||
- Version files follow normal filename formatting (including storage paths) and add a `_vN` suffix (for example `_v1`, `_v2`).
|
||||
- By default, search and document content use the latest version.
|
||||
- In document detail, selecting a version switches the preview, file metadata and content (and download etc buttons) to that version.
|
||||
- Deleting a non-root version keeps metadata and falls back to the latest remaining version.
|
||||
- Versions track the underlying file and extracted text content (OCR/text).
|
||||
- Metadata such as tags, correspondent, document type, storage path and custom fields stay on the "root" document.
|
||||
- Version files follow normal filename formatting (including storage paths) and add a `_vN` suffix (for example `_v1`, `_v2`).
|
||||
- By default, search and document content use the latest version.
|
||||
- In document detail, selecting a version switches the preview, file metadata and content (and download etc buttons) to that version.
|
||||
- Deleting a non-root version keeps metadata and falls back to the latest remaining version.
|
||||
|
||||
### Management Lists
|
||||
|
||||
@@ -218,21 +218,20 @@ patterns can include wildcards and multiple patterns separated by a comma.
|
||||
The actions all ensure that the same mail is not consumed twice by
|
||||
different means. These are as follows:
|
||||
|
||||
- **Delete:** Immediately deletes mail that paperless has consumed
|
||||
documents from. Use with caution.
|
||||
- **Mark as read:** Mark consumed mail as read. Paperless will not
|
||||
consume documents from already read mails. If you read a mail before
|
||||
paperless sees it, it will be ignored.
|
||||
- **Flag:** Sets the 'important' flag on mails with consumed
|
||||
documents. Paperless will not consume flagged mails.
|
||||
- **Move to folder:** Moves consumed mails out of the way so that
|
||||
paperless won't consume them again.
|
||||
- **Add custom Tag:** Adds a custom tag to mails with consumed
|
||||
documents (the IMAP standard calls these "keywords"). Paperless
|
||||
will not consume mails already tagged. Not all mail servers support
|
||||
this feature!
|
||||
|
||||
- **Apple Mail support:** Apple Mail clients allow differently colored tags. For this to work use `apple:<color>` (e.g. _apple:green_) as a custom tag. Available colors are _red_, _orange_, _yellow_, _blue_, _green_, _violet_ and _grey_.
|
||||
- **Delete:** Immediately deletes mail that paperless has consumed
|
||||
documents from. Use with caution.
|
||||
- **Mark as read:** Mark consumed mail as read. Paperless will not
|
||||
consume documents from already read mails. If you read a mail before
|
||||
paperless sees it, it will be ignored.
|
||||
- **Flag:** Sets the 'important' flag on mails with consumed
|
||||
documents. Paperless will not consume flagged mails.
|
||||
- **Move to folder:** Moves consumed mails out of the way so that
|
||||
paperless won't consume them again.
|
||||
- **Add custom Tag:** Adds a custom tag to mails with consumed
|
||||
documents (the IMAP standard calls these "keywords"). Paperless
|
||||
will not consume mails already tagged. Not all mail servers support
|
||||
this feature!
|
||||
- **Apple Mail support:** Apple Mail clients allow differently colored tags. For this to work use `apple:<color>` (e.g. _apple:green_) as a custom tag. Available colors are _red_, _orange_, _yellow_, _blue_, _green_, _violet_ and _grey_.
|
||||
|
||||
!!! warning
|
||||
|
||||
@@ -325,12 +324,12 @@ or using [email](#workflow-action-email) or [webhook](#workflow-action-webhook)
|
||||
|
||||
"Share links" are public links to files (or an archive of files) and can be created and managed under the 'Send' button on the document detail screen or from the bulk editor.
|
||||
|
||||
- Share links do not require a user to login and thus link directly to a file or bundled download.
|
||||
- Links are unique and are of the form `{paperless-url}/share/{randomly-generated-slug}`.
|
||||
- Links can optionally have an expiration time set.
|
||||
- After a link expires or is deleted users will be redirected to the regular paperless-ngx login.
|
||||
- From the document detail screen you can create a share link for that single document.
|
||||
- From the bulk editor you can create a **share link bundle** for any selection. Paperless-ngx prepares a ZIP archive in the background and exposes a single share link. You can revisit the "Manage share link bundles" dialog to monitor progress, retry failed bundles, or delete links.
|
||||
- Share links do not require a user to login and thus link directly to a file or bundled download.
|
||||
- Links are unique and are of the form `{paperless-url}/share/{randomly-generated-slug}`.
|
||||
- Links can optionally have an expiration time set.
|
||||
- After a link expires or is deleted users will be redirected to the regular paperless-ngx login.
|
||||
- From the document detail screen you can create a share link for that single document.
|
||||
- From the bulk editor you can create a **share link bundle** for any selection. Paperless-ngx prepares a ZIP archive in the background and exposes a single share link. You can revisit the "Manage share link bundles" dialog to monitor progress, retry failed bundles, or delete links.
|
||||
|
||||
!!! tip
|
||||
|
||||
@@ -514,25 +513,25 @@ flowchart TD
|
||||
|
||||
Workflows allow you to filter by:
|
||||
|
||||
- Source, e.g. documents uploaded via consume folder, API (& the web UI) and mail fetch
|
||||
- File name, including wildcards e.g. \*.pdf will apply to all pdfs.
|
||||
- File path, including wildcards. Note that enabling `PAPERLESS_CONSUMER_RECURSIVE` would allow, for
|
||||
example, automatically assigning documents to different owners based on the upload directory.
|
||||
- Mail rule. Choosing this option will force 'mail fetch' to be the workflow source.
|
||||
- Content matching (`Added`, `Updated` and `Scheduled` triggers only). Filter document content using the matching settings.
|
||||
- Source, e.g. documents uploaded via consume folder, API (& the web UI) and mail fetch
|
||||
- File name, including wildcards e.g. \*.pdf will apply to all pdfs.
|
||||
- File path, including wildcards. Note that enabling `PAPERLESS_CONSUMER_RECURSIVE` would allow, for
|
||||
example, automatically assigning documents to different owners based on the upload directory.
|
||||
- Mail rule. Choosing this option will force 'mail fetch' to be the workflow source.
|
||||
- Content matching (`Added`, `Updated` and `Scheduled` triggers only). Filter document content using the matching settings.
|
||||
|
||||
There are also 'advanced' filters available for `Added`, `Updated` and `Scheduled` triggers:
|
||||
|
||||
- Any Tags: Filter for documents with any of the specified tags.
|
||||
- All Tags: Filter for documents with all of the specified tags.
|
||||
- No Tags: Filter for documents with none of the specified tags.
|
||||
- Document type: Filter documents with this document type.
|
||||
- Not Document types: Filter documents without any of these document types.
|
||||
- Correspondent: Filter documents with this correspondent.
|
||||
- Not Correspondents: Filter documents without any of these correspondents.
|
||||
- Storage path: Filter documents with this storage path.
|
||||
- Not Storage paths: Filter documents without any of these storage paths.
|
||||
- Custom field query: Filter documents with a custom field query (the same as used for the document list filters).
|
||||
- Any Tags: Filter for documents with any of the specified tags.
|
||||
- All Tags: Filter for documents with all of the specified tags.
|
||||
- No Tags: Filter for documents with none of the specified tags.
|
||||
- Document type: Filter documents with this document type.
|
||||
- Not Document types: Filter documents without any of these document types.
|
||||
- Correspondent: Filter documents with this correspondent.
|
||||
- Not Correspondents: Filter documents without any of these correspondents.
|
||||
- Storage path: Filter documents with this storage path.
|
||||
- Not Storage paths: Filter documents without any of these storage paths.
|
||||
- Custom field query: Filter documents with a custom field query (the same as used for the document list filters).
|
||||
|
||||
### Workflow Actions
|
||||
|
||||
@@ -544,37 +543,37 @@ The following workflow action types are available:
|
||||
|
||||
"Assignment" actions can assign:
|
||||
|
||||
- Title, see [workflow placeholders](usage.md#workflow-placeholders) below
|
||||
- Tags, correspondent, document type and storage path
|
||||
- Document owner
|
||||
- View and / or edit permissions to users or groups
|
||||
- Custom fields. Note that no value for the field will be set
|
||||
- Title, see [workflow placeholders](usage.md#workflow-placeholders) below
|
||||
- Tags, correspondent, document type and storage path
|
||||
- Document owner
|
||||
- View and / or edit permissions to users or groups
|
||||
- Custom fields. Note that no value for the field will be set
|
||||
|
||||
##### Removal {#workflow-action-removal}
|
||||
|
||||
"Removal" actions can remove either all of or specific sets of the following:
|
||||
|
||||
- Tags, correspondents, document types or storage paths
|
||||
- Document owner
|
||||
- View and / or edit permissions
|
||||
- Custom fields
|
||||
- Tags, correspondents, document types or storage paths
|
||||
- Document owner
|
||||
- View and / or edit permissions
|
||||
- Custom fields
|
||||
|
||||
##### Email {#workflow-action-email}
|
||||
|
||||
"Email" actions can send documents via email. This action requires a mail server to be [configured](configuration.md#email-sending). You can specify:
|
||||
|
||||
- The recipient email address(es) separated by commas
|
||||
- The subject and body of the email, which can include placeholders, see [placeholders](usage.md#workflow-placeholders) below
|
||||
- Whether to include the document as an attachment
|
||||
- The recipient email address(es) separated by commas
|
||||
- The subject and body of the email, which can include placeholders, see [placeholders](usage.md#workflow-placeholders) below
|
||||
- Whether to include the document as an attachment
|
||||
|
||||
##### Webhook {#workflow-action-webhook}
|
||||
|
||||
"Webhook" actions send a POST request to a specified URL. You can specify:
|
||||
|
||||
- The URL to send the request to
|
||||
- The request body as text or as key-value pairs, which can include placeholders, see [placeholders](usage.md#workflow-placeholders) below.
|
||||
- Encoding for the request body, either JSON or form data
|
||||
- The request headers as key-value pairs
|
||||
- The URL to send the request to
|
||||
- The request body as text or as key-value pairs, which can include placeholders, see [placeholders](usage.md#workflow-placeholders) below.
|
||||
- Encoding for the request body, either JSON or form data
|
||||
- The request headers as key-value pairs
|
||||
|
||||
For security reasons, webhooks can be limited to specific ports and disallowed from connecting to local URLs. See the relevant
|
||||
[configuration settings](configuration.md#workflow-webhooks) to change this behavior. If you are allowing non-admins to create workflows,
|
||||
@@ -605,33 +604,33 @@ The available inputs differ depending on the type of workflow trigger.
|
||||
This is because at the time of consumption (when the text is to be set), no automatic tags etc. have been
|
||||
applied. You can use the following placeholders in the template with any trigger type:
|
||||
|
||||
- `{{correspondent}}`: assigned correspondent name
|
||||
- `{{document_type}}`: assigned document type name
|
||||
- `{{owner_username}}`: assigned owner username
|
||||
- `{{added}}`: added datetime
|
||||
- `{{added_year}}`: added year
|
||||
- `{{added_year_short}}`: added year
|
||||
- `{{added_month}}`: added month
|
||||
- `{{added_month_name}}`: added month name
|
||||
- `{{added_month_name_short}}`: added month short name
|
||||
- `{{added_day}}`: added day
|
||||
- `{{added_time}}`: added time in HH:MM format
|
||||
- `{{original_filename}}`: original file name without extension
|
||||
- `{{filename}}`: current file name without extension (for "added" workflows this may not be final yet, you can use `{{original_filename}}`)
|
||||
- `{{doc_title}}`: current document title (cannot be used in title assignment)
|
||||
- `{{correspondent}}`: assigned correspondent name
|
||||
- `{{document_type}}`: assigned document type name
|
||||
- `{{owner_username}}`: assigned owner username
|
||||
- `{{added}}`: added datetime
|
||||
- `{{added_year}}`: added year
|
||||
- `{{added_year_short}}`: added year
|
||||
- `{{added_month}}`: added month
|
||||
- `{{added_month_name}}`: added month name
|
||||
- `{{added_month_name_short}}`: added month short name
|
||||
- `{{added_day}}`: added day
|
||||
- `{{added_time}}`: added time in HH:MM format
|
||||
- `{{original_filename}}`: original file name without extension
|
||||
- `{{filename}}`: current file name without extension (for "added" workflows this may not be final yet, you can use `{{original_filename}}`)
|
||||
- `{{doc_title}}`: current document title (cannot be used in title assignment)
|
||||
|
||||
The following placeholders are only available for "added" or "updated" triggers
|
||||
|
||||
- `{{created}}`: created datetime
|
||||
- `{{created_year}}`: created year
|
||||
- `{{created_year_short}}`: created year
|
||||
- `{{created_month}}`: created month
|
||||
- `{{created_month_name}}`: created month name
|
||||
- `{{created_month_name_short}}`: created month short name
|
||||
- `{{created_day}}`: created day
|
||||
- `{{created_time}}`: created time in HH:MM format
|
||||
- `{{doc_url}}`: URL to the document in the web UI. Requires the `PAPERLESS_URL` setting to be set.
|
||||
- `{{doc_id}}`: Document ID
|
||||
- `{{created}}`: created datetime
|
||||
- `{{created_year}}`: created year
|
||||
- `{{created_year_short}}`: created year
|
||||
- `{{created_month}}`: created month
|
||||
- `{{created_month_name}}`: created month name
|
||||
- `{{created_month_name_short}}`: created month short name
|
||||
- `{{created_day}}`: created day
|
||||
- `{{created_time}}`: created time in HH:MM format
|
||||
- `{{doc_url}}`: URL to the document in the web UI. Requires the `PAPERLESS_URL` setting to be set.
|
||||
- `{{doc_id}}`: Document ID
|
||||
|
||||
##### Examples
|
||||
|
||||
@@ -676,26 +675,26 @@ Multiple fields may be attached to a document but the same field name cannot be
|
||||
|
||||
The following custom field types are supported:
|
||||
|
||||
- `Text`: any text
|
||||
- `Boolean`: true / false (check / unchecked) field
|
||||
- `Date`: date
|
||||
- `URL`: a valid url
|
||||
- `Integer`: integer number e.g. 12
|
||||
- `Number`: float number e.g. 12.3456
|
||||
- `Monetary`: [ISO 4217 currency code](https://en.wikipedia.org/wiki/ISO_4217#List_of_ISO_4217_currency_codes) and a number with exactly two decimals, e.g. USD12.30
|
||||
- `Document Link`: reference(s) to other document(s) displayed as links, automatically creates a symmetrical link in reverse
|
||||
- `Select`: a pre-defined list of strings from which the user can choose
|
||||
- `Text`: any text
|
||||
- `Boolean`: true / false (check / unchecked) field
|
||||
- `Date`: date
|
||||
- `URL`: a valid url
|
||||
- `Integer`: integer number e.g. 12
|
||||
- `Number`: float number e.g. 12.3456
|
||||
- `Monetary`: [ISO 4217 currency code](https://en.wikipedia.org/wiki/ISO_4217#List_of_ISO_4217_currency_codes) and a number with exactly two decimals, e.g. USD12.30
|
||||
- `Document Link`: reference(s) to other document(s) displayed as links, automatically creates a symmetrical link in reverse
|
||||
- `Select`: a pre-defined list of strings from which the user can choose
|
||||
|
||||
## PDF Actions
|
||||
|
||||
Paperless-ngx supports basic editing operations for PDFs (these operations currently cannot be performed on non-PDF files). When viewing an individual document you can
|
||||
open the 'PDF Editor' to use a simple UI for re-arranging, rotating, deleting pages and splitting documents.
|
||||
|
||||
- Merging documents: available when selecting multiple documents for 'bulk editing'.
|
||||
- Rotating documents: available when selecting multiple documents for 'bulk editing' and via the pdf editor on an individual document's details page.
|
||||
- Splitting documents: via the pdf editor on an individual document's details page.
|
||||
- Deleting pages: via the pdf editor on an individual document's details page.
|
||||
- Re-arranging pages: via the pdf editor on an individual document's details page.
|
||||
- Merging documents: available when selecting multiple documents for 'bulk editing'.
|
||||
- Rotating documents: available when selecting multiple documents for 'bulk editing' and via the pdf editor on an individual document's details page.
|
||||
- Splitting documents: via the pdf editor on an individual document's details page.
|
||||
- Deleting pages: via the pdf editor on an individual document's details page.
|
||||
- Re-arranging pages: via the pdf editor on an individual document's details page.
|
||||
|
||||
!!! important
|
||||
|
||||
@@ -773,18 +772,18 @@ the system.
|
||||
Here are a couple examples of tags and types that you could use in your
|
||||
collection.
|
||||
|
||||
- An `inbox` tag for newly added documents that you haven't manually
|
||||
edited yet.
|
||||
- A tag `car` for everything car related (repairs, registration,
|
||||
insurance, etc)
|
||||
- A tag `todo` for documents that you still need to do something with,
|
||||
such as reply, or perform some task online.
|
||||
- A tag `bank account x` for all bank statement related to that
|
||||
account.
|
||||
- A tag `mail` for anything that you added to paperless via its mail
|
||||
processing capabilities.
|
||||
- A tag `missing_metadata` when you still need to add some metadata to
|
||||
a document, but can't or don't want to do this right now.
|
||||
- An `inbox` tag for newly added documents that you haven't manually
|
||||
edited yet.
|
||||
- A tag `car` for everything car related (repairs, registration,
|
||||
insurance, etc)
|
||||
- A tag `todo` for documents that you still need to do something with,
|
||||
such as reply, or perform some task online.
|
||||
- A tag `bank account x` for all bank statement related to that
|
||||
account.
|
||||
- A tag `mail` for anything that you added to paperless via its mail
|
||||
processing capabilities.
|
||||
- A tag `missing_metadata` when you still need to add some metadata to
|
||||
a document, but can't or don't want to do this right now.
|
||||
|
||||
## Searching {#basic-usage_searching}
|
||||
|
||||
@@ -873,8 +872,8 @@ The following diagram shows how easy it is to manage your documents.
|
||||
|
||||
### Preparations in paperless
|
||||
|
||||
- Create an inbox tag that gets assigned to all new documents.
|
||||
- Create a TODO tag.
|
||||
- Create an inbox tag that gets assigned to all new documents.
|
||||
- Create a TODO tag.
|
||||
|
||||
### Processing of the physical documents
|
||||
|
||||
@@ -948,15 +947,15 @@ Some documents require attention and require you to act on the document.
|
||||
You may take two different approaches to handle these documents based on
|
||||
how regularly you intend to scan documents and use paperless.
|
||||
|
||||
- If you scan and process your documents in paperless regularly,
|
||||
assign a TODO tag to all scanned documents that you need to process.
|
||||
Create a saved view on the dashboard that shows all documents with
|
||||
this tag.
|
||||
- If you do not scan documents regularly and use paperless solely for
|
||||
archiving, create a physical todo box next to your physical inbox
|
||||
and put documents you need to process in the TODO box. When you
|
||||
performed the task associated with the document, move it to the
|
||||
inbox.
|
||||
- If you scan and process your documents in paperless regularly,
|
||||
assign a TODO tag to all scanned documents that you need to process.
|
||||
Create a saved view on the dashboard that shows all documents with
|
||||
this tag.
|
||||
- If you do not scan documents regularly and use paperless solely for
|
||||
archiving, create a physical todo box next to your physical inbox
|
||||
and put documents you need to process in the TODO box. When you
|
||||
performed the task associated with the document, move it to the
|
||||
inbox.
|
||||
|
||||
## Remote OCR
|
||||
|
||||
@@ -977,64 +976,63 @@ or page limitations (e.g. with a free tier).
|
||||
|
||||
Paperless-ngx consists of the following components:
|
||||
|
||||
- **The webserver:** This serves the administration pages, the API,
|
||||
and the new frontend. This is the main tool you'll be using to interact
|
||||
with paperless. You may start the webserver directly with
|
||||
- **The webserver:** This serves the administration pages, the API,
|
||||
and the new frontend. This is the main tool you'll be using to interact
|
||||
with paperless. You may start the webserver directly with
|
||||
|
||||
```shell-session
|
||||
cd /path/to/paperless/src/
|
||||
granian --interface asginl --ws "paperless.asgi:application"
|
||||
```
|
||||
```shell-session
|
||||
cd /path/to/paperless/src/
|
||||
granian --interface asginl --ws "paperless.asgi:application"
|
||||
```
|
||||
|
||||
or by any other means such as Apache `mod_wsgi`.
|
||||
or by any other means such as Apache `mod_wsgi`.
|
||||
|
||||
- **The consumer:** This is what watches your consumption folder for
|
||||
documents. However, the consumer itself does not really consume your
|
||||
documents. Now it notifies a task processor that a new file is ready
|
||||
for consumption. I suppose it should be named differently. This was
|
||||
also used to check your emails, but that's now done elsewhere as
|
||||
well.
|
||||
- **The consumer:** This is what watches your consumption folder for
|
||||
documents. However, the consumer itself does not really consume your
|
||||
documents. Now it notifies a task processor that a new file is ready
|
||||
for consumption. I suppose it should be named differently. This was
|
||||
also used to check your emails, but that's now done elsewhere as
|
||||
well.
|
||||
|
||||
Start the consumer with the management command `document_consumer`:
|
||||
Start the consumer with the management command `document_consumer`:
|
||||
|
||||
```shell-session
|
||||
cd /path/to/paperless/src/
|
||||
python3 manage.py document_consumer
|
||||
```
|
||||
```shell-session
|
||||
cd /path/to/paperless/src/
|
||||
python3 manage.py document_consumer
|
||||
```
|
||||
|
||||
- **The task processor:** Paperless relies on [Celery - Distributed
|
||||
Task Queue](https://docs.celeryq.dev/en/stable/index.html) for doing
|
||||
most of the heavy lifting. This is a task queue that accepts tasks
|
||||
from multiple sources and processes these in parallel. It also comes
|
||||
with a scheduler that executes certain commands periodically.
|
||||
- **The task processor:** Paperless relies on [Celery - Distributed
|
||||
Task Queue](https://docs.celeryq.dev/en/stable/index.html) for doing
|
||||
most of the heavy lifting. This is a task queue that accepts tasks
|
||||
from multiple sources and processes these in parallel. It also comes
|
||||
with a scheduler that executes certain commands periodically.
|
||||
|
||||
This task processor is responsible for:
|
||||
This task processor is responsible for:
|
||||
- Consuming documents. When the consumer finds new documents, it
|
||||
notifies the task processor to start a consumption task.
|
||||
- The task processor also performs the consumption of any
|
||||
documents you upload through the web interface.
|
||||
- Consuming emails. It periodically checks your configured
|
||||
accounts for new emails and notifies the task processor to
|
||||
consume the attachment of an email.
|
||||
- Maintaining the search index and the automatic matching
|
||||
algorithm. These are things that paperless needs to do from time
|
||||
to time in order to operate properly.
|
||||
|
||||
- Consuming documents. When the consumer finds new documents, it
|
||||
notifies the task processor to start a consumption task.
|
||||
- The task processor also performs the consumption of any
|
||||
documents you upload through the web interface.
|
||||
- Consuming emails. It periodically checks your configured
|
||||
accounts for new emails and notifies the task processor to
|
||||
consume the attachment of an email.
|
||||
- Maintaining the search index and the automatic matching
|
||||
algorithm. These are things that paperless needs to do from time
|
||||
to time in order to operate properly.
|
||||
This allows paperless to process multiple documents from your
|
||||
consumption folder in parallel! On a modern multi core system, this
|
||||
makes the consumption process with full OCR blazingly fast.
|
||||
|
||||
This allows paperless to process multiple documents from your
|
||||
consumption folder in parallel! On a modern multi core system, this
|
||||
makes the consumption process with full OCR blazingly fast.
|
||||
The task processor comes with a built-in admin interface that you
|
||||
can use to check whenever any of the tasks fail and inspect the
|
||||
errors (i.e., wrong email credentials, errors during consuming a
|
||||
specific file, etc).
|
||||
|
||||
The task processor comes with a built-in admin interface that you
|
||||
can use to check whenever any of the tasks fail and inspect the
|
||||
errors (i.e., wrong email credentials, errors during consuming a
|
||||
specific file, etc).
|
||||
- A [redis](https://redis.io/) message broker: This is a really
|
||||
lightweight service that is responsible for getting the tasks from
|
||||
the webserver and the consumer to the task scheduler. These run in a
|
||||
different process (maybe even on different machines!), and
|
||||
therefore, this is necessary.
|
||||
|
||||
- A [redis](https://redis.io/) message broker: This is a really
|
||||
lightweight service that is responsible for getting the tasks from
|
||||
the webserver and the consumer to the task scheduler. These run in a
|
||||
different process (maybe even on different machines!), and
|
||||
therefore, this is necessary.
|
||||
|
||||
- Optional: A database server. Paperless supports PostgreSQL, MariaDB
|
||||
and SQLite for storing its data.
|
||||
- Optional: A database server. Paperless supports PostgreSQL, MariaDB
|
||||
and SQLite for storing its data.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[project]
|
||||
name = "paperless-ngx"
|
||||
version = "2.20.10"
|
||||
version = "2.20.13"
|
||||
description = "A community-supported supercharged document management system: scan, index and archive all your physical documents"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.11"
|
||||
@@ -26,7 +26,7 @@ dependencies = [
|
||||
# WARNING: django does not use semver.
|
||||
# Only patch versions are guaranteed to not introduce breaking changes.
|
||||
"django~=5.2.10",
|
||||
"django-allauth[mfa,socialaccount]~=65.14.0",
|
||||
"django-allauth[mfa,socialaccount]~=65.15.0",
|
||||
"django-auditlog~=3.4.1",
|
||||
"django-cachalot~=2.9.0",
|
||||
"django-celery-results~=2.6.0",
|
||||
@@ -42,13 +42,14 @@ dependencies = [
|
||||
"djangorestframework~=3.16",
|
||||
"djangorestframework-guardian~=0.4.0",
|
||||
"drf-spectacular~=0.28",
|
||||
"drf-spectacular-sidecar~=2026.1.1",
|
||||
"drf-spectacular-sidecar~=2026.3.1",
|
||||
"drf-writable-nested~=0.7.1",
|
||||
"faiss-cpu>=1.10",
|
||||
"filelock~=3.24.3",
|
||||
"filelock~=3.25.2",
|
||||
"flower~=2.0.1",
|
||||
"gotenberg-client~=0.13.1",
|
||||
"httpx-oauth~=0.16",
|
||||
"ijson>=3.2",
|
||||
"imap-tools~=1.11.0",
|
||||
"jinja2~=3.1.5",
|
||||
"langdetect~=1.0.9",
|
||||
@@ -59,7 +60,7 @@ dependencies = [
|
||||
"llama-index-llms-openai>=0.6.13",
|
||||
"llama-index-vector-stores-faiss>=0.5.2",
|
||||
"nltk~=3.9.1",
|
||||
"ocrmypdf~=16.13.0",
|
||||
"ocrmypdf~=17.3.0",
|
||||
"openai>=1.76",
|
||||
"pathvalidate~=3.3.1",
|
||||
"pdf2image~=1.17.0",
|
||||
@@ -71,7 +72,7 @@ dependencies = [
|
||||
"rapidfuzz~=3.14.0",
|
||||
"redis[hiredis]~=5.2.1",
|
||||
"regex>=2025.9.18",
|
||||
"scikit-learn~=1.7.0",
|
||||
"scikit-learn~=1.8.0",
|
||||
"sentence-transformers>=4.1",
|
||||
"setproctitle~=1.3.4",
|
||||
"tika-client~=0.10.0",
|
||||
@@ -110,7 +111,7 @@ docs = [
|
||||
testing = [
|
||||
"daphne",
|
||||
"factory-boy~=3.3.1",
|
||||
"faker~=40.5.1",
|
||||
"faker~=40.8.0",
|
||||
"imagehash",
|
||||
"pytest~=9.0.0",
|
||||
"pytest-cov~=7.0.0",
|
||||
@@ -247,15 +248,13 @@ lint.per-file-ignores."docker/wait-for-redis.py" = [
|
||||
lint.per-file-ignores."src/documents/models.py" = [
|
||||
"SIM115",
|
||||
]
|
||||
lint.per-file-ignores."src/paperless_tesseract/tests/test_parser.py" = [
|
||||
"RUF001",
|
||||
]
|
||||
|
||||
lint.isort.force-single-line = true
|
||||
|
||||
[tool.codespell]
|
||||
write-changes = true
|
||||
ignore-words-list = "criterias,afterall,valeu,ureue,equest,ure,assertIn,Oktober,commitish"
|
||||
skip = "src-ui/src/locale/*,src-ui/pnpm-lock.yaml,src-ui/e2e/*,src/paperless_mail/tests/samples/*,src/documents/tests/samples/*,*.po,*.json"
|
||||
skip = "src-ui/src/locale/*,src-ui/pnpm-lock.yaml,src-ui/e2e/*,src/paperless_mail/tests/samples/*,src/paperless/tests/samples/mail/*,src/documents/tests/samples/*,*.po,*.json"
|
||||
|
||||
[tool.pytest]
|
||||
minversion = "9.0"
|
||||
@@ -270,10 +269,6 @@ testpaths = [
|
||||
"src/documents/tests/",
|
||||
"src/paperless/tests/",
|
||||
"src/paperless_mail/tests/",
|
||||
"src/paperless_tesseract/tests/",
|
||||
"src/paperless_tika/tests",
|
||||
"src/paperless_text/tests/",
|
||||
"src/paperless_remote/tests/",
|
||||
"src/paperless_ai/tests",
|
||||
]
|
||||
|
||||
|
||||
@@ -19,6 +19,4 @@ following additional information about it:
|
||||
* Correspondent: ${DOCUMENT_CORRESPONDENT}
|
||||
* Tags: ${DOCUMENT_TAGS}
|
||||
|
||||
It was consumed with the passphrase ${PASSPHRASE}
|
||||
|
||||
"
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "paperless-ngx-ui",
|
||||
"version": "2.20.10",
|
||||
"version": "2.20.13",
|
||||
"scripts": {
|
||||
"preinstall": "npx only-allow pnpm",
|
||||
"ng": "ng",
|
||||
@@ -11,17 +11,17 @@
|
||||
},
|
||||
"private": true,
|
||||
"dependencies": {
|
||||
"@angular/cdk": "^21.2.0",
|
||||
"@angular/common": "~21.2.0",
|
||||
"@angular/compiler": "~21.2.0",
|
||||
"@angular/core": "~21.2.0",
|
||||
"@angular/forms": "~21.2.0",
|
||||
"@angular/localize": "~21.2.0",
|
||||
"@angular/platform-browser": "~21.2.0",
|
||||
"@angular/platform-browser-dynamic": "~21.2.0",
|
||||
"@angular/router": "~21.2.0",
|
||||
"@angular/cdk": "^21.2.2",
|
||||
"@angular/common": "~21.2.4",
|
||||
"@angular/compiler": "~21.2.4",
|
||||
"@angular/core": "~21.2.4",
|
||||
"@angular/forms": "~21.2.4",
|
||||
"@angular/localize": "~21.2.4",
|
||||
"@angular/platform-browser": "~21.2.4",
|
||||
"@angular/platform-browser-dynamic": "~21.2.4",
|
||||
"@angular/router": "~21.2.4",
|
||||
"@ng-bootstrap/ng-bootstrap": "^20.0.0",
|
||||
"@ng-select/ng-select": "^21.4.1",
|
||||
"@ng-select/ng-select": "^21.5.2",
|
||||
"@ngneat/dirty-check-forms": "^3.0.3",
|
||||
"@popperjs/core": "^2.11.8",
|
||||
"bootstrap": "^5.3.8",
|
||||
@@ -42,26 +42,26 @@
|
||||
"devDependencies": {
|
||||
"@angular-builders/custom-webpack": "^21.0.3",
|
||||
"@angular-builders/jest": "^21.0.3",
|
||||
"@angular-devkit/core": "^21.2.0",
|
||||
"@angular-devkit/schematics": "^21.2.0",
|
||||
"@angular-devkit/core": "^21.2.2",
|
||||
"@angular-devkit/schematics": "^21.2.2",
|
||||
"@angular-eslint/builder": "21.3.0",
|
||||
"@angular-eslint/eslint-plugin": "21.3.0",
|
||||
"@angular-eslint/eslint-plugin-template": "21.3.0",
|
||||
"@angular-eslint/schematics": "21.3.0",
|
||||
"@angular-eslint/template-parser": "21.3.0",
|
||||
"@angular/build": "^21.2.0",
|
||||
"@angular/cli": "~21.2.0",
|
||||
"@angular/compiler-cli": "~21.2.0",
|
||||
"@angular/build": "^21.2.2",
|
||||
"@angular/cli": "~21.2.2",
|
||||
"@angular/compiler-cli": "~21.2.4",
|
||||
"@codecov/webpack-plugin": "^1.9.1",
|
||||
"@playwright/test": "^1.58.2",
|
||||
"@types/jest": "^30.0.0",
|
||||
"@types/node": "^25.3.3",
|
||||
"@typescript-eslint/eslint-plugin": "^8.54.0",
|
||||
"@typescript-eslint/parser": "^8.54.0",
|
||||
"@typescript-eslint/utils": "^8.54.0",
|
||||
"eslint": "^10.0.2",
|
||||
"jest": "30.2.0",
|
||||
"jest-environment-jsdom": "^30.2.0",
|
||||
"@types/node": "^25.4.0",
|
||||
"@typescript-eslint/eslint-plugin": "^8.57.0",
|
||||
"@typescript-eslint/parser": "^8.57.0",
|
||||
"@typescript-eslint/utils": "^8.57.0",
|
||||
"eslint": "^10.0.3",
|
||||
"jest": "30.3.0",
|
||||
"jest-environment-jsdom": "^30.3.0",
|
||||
"jest-junit": "^16.0.0",
|
||||
"jest-preset-angular": "^16.1.1",
|
||||
"jest-websocket-mock": "^2.5.0",
|
||||
|
||||
1858
src-ui/pnpm-lock.yaml
generated
1858
src-ui/pnpm-lock.yaml
generated
File diff suppressed because it is too large
Load Diff
@@ -59,7 +59,7 @@
|
||||
<div [ngbNavOutlet]="nav" class="border-start border-end border-bottom p-3 mb-3 shadow-sm"></div>
|
||||
<div class="btn-toolbar" role="toolbar">
|
||||
<div class="btn-group me-2">
|
||||
<button type="button" (click)="discardChanges()" class="btn btn-outline-secondary" [disabled]="loading || (isDirty$ | async) === false" i18n>Discard</button>
|
||||
<button type="button" (click)="discardChanges()" class="btn btn-outline-secondary" [disabled]="loading || (isDirty$ | async) === false" i18n>Cancel</button>
|
||||
</div>
|
||||
<div class="btn-group">
|
||||
<button type="submit" class="btn btn-primary" [disabled]="loading || !configForm.valid || (isDirty$ | async) === false" i18n>Save</button>
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
<nav class="navbar navbar-dark fixed-top bg-primary flex-md-nowrap p-0 shadow-sm">
|
||||
<button class="navbar-toggler d-md-none collapsed border-0" type="button" data-toggle="collapse"
|
||||
data-target="#sidebarMenu" aria-controls="sidebarMenu" aria-expanded="false" aria-label="Toggle navigation"
|
||||
(click)="isMenuCollapsed = !isMenuCollapsed">
|
||||
(click)="mobileSearchHidden = false; isMenuCollapsed = !isMenuCollapsed">
|
||||
<span class="navbar-toggler-icon"></span>
|
||||
</button>
|
||||
<a class="navbar-brand d-flex align-items-center me-0 px-3 py-3 order-sm-0"
|
||||
@@ -24,7 +24,8 @@
|
||||
}
|
||||
</div>
|
||||
</a>
|
||||
<div class="search-container flex-grow-1 py-2 pb-3 pb-sm-2 px-3 ps-md-4 me-sm-auto order-3 order-sm-1">
|
||||
<div class="search-container flex-grow-1 py-2 pb-3 pb-sm-2 px-3 ps-md-4 me-sm-auto order-3 order-sm-1"
|
||||
[class.mobile-hidden]="mobileSearchHidden">
|
||||
<div class="col-12 col-md-7">
|
||||
<pngx-global-search></pngx-global-search>
|
||||
</div>
|
||||
@@ -378,7 +379,7 @@
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
<main role="main" class="ms-sm-auto px-md-4"
|
||||
<main role="main" class="ms-sm-auto px-md-4" [class.mobile-search-hidden]="mobileSearchHidden"
|
||||
[ngClass]="slimSidebarEnabled ? 'col-slim' : 'col-md-9 col-lg-10 col-xxxl-11'">
|
||||
<router-outlet></router-outlet>
|
||||
</main>
|
||||
|
||||
@@ -44,6 +44,23 @@
|
||||
.sidebar {
|
||||
top: 3.5rem;
|
||||
}
|
||||
|
||||
.search-container {
|
||||
max-height: 4.5rem;
|
||||
overflow: hidden;
|
||||
transition: max-height .2s ease, opacity .2s ease, padding-top .2s ease, padding-bottom .2s ease;
|
||||
|
||||
&.mobile-hidden {
|
||||
max-height: 0;
|
||||
opacity: 0;
|
||||
padding-top: 0 !important;
|
||||
padding-bottom: 0 !important;
|
||||
}
|
||||
}
|
||||
|
||||
main.mobile-search-hidden {
|
||||
padding-top: 56px;
|
||||
}
|
||||
}
|
||||
|
||||
main {
|
||||
|
||||
@@ -293,6 +293,59 @@ describe('AppFrameComponent', () => {
|
||||
expect(component.isMenuCollapsed).toBeTruthy()
|
||||
})
|
||||
|
||||
it('should hide mobile search when scrolling down and show it when scrolling up', () => {
|
||||
Object.defineProperty(globalThis, 'innerWidth', {
|
||||
value: 767,
|
||||
})
|
||||
|
||||
component.ngOnInit()
|
||||
|
||||
Object.defineProperty(globalThis, 'scrollY', {
|
||||
configurable: true,
|
||||
value: 40,
|
||||
})
|
||||
component.onWindowScroll()
|
||||
expect(component.mobileSearchHidden).toBe(true)
|
||||
|
||||
Object.defineProperty(globalThis, 'scrollY', {
|
||||
configurable: true,
|
||||
value: 0,
|
||||
})
|
||||
component.onWindowScroll()
|
||||
expect(component.mobileSearchHidden).toBe(false)
|
||||
})
|
||||
|
||||
it('should keep mobile search visible on desktop scroll or resize', () => {
|
||||
Object.defineProperty(globalThis, 'innerWidth', {
|
||||
value: 1024,
|
||||
})
|
||||
component.ngOnInit()
|
||||
component.mobileSearchHidden = true
|
||||
|
||||
component.onWindowScroll()
|
||||
|
||||
expect(component.mobileSearchHidden).toBe(false)
|
||||
|
||||
component.mobileSearchHidden = true
|
||||
component.onWindowResize()
|
||||
})
|
||||
|
||||
it('should keep mobile search visible while the mobile menu is expanded', () => {
|
||||
Object.defineProperty(globalThis, 'innerWidth', {
|
||||
value: 767,
|
||||
})
|
||||
component.ngOnInit()
|
||||
component.isMenuCollapsed = false
|
||||
|
||||
Object.defineProperty(globalThis, 'scrollY', {
|
||||
configurable: true,
|
||||
value: 40,
|
||||
})
|
||||
component.onWindowScroll()
|
||||
|
||||
expect(component.mobileSearchHidden).toBe(false)
|
||||
})
|
||||
|
||||
it('should support close document & navigate on close current doc', () => {
|
||||
const closeSpy = jest.spyOn(openDocumentsService, 'closeDocument')
|
||||
closeSpy.mockReturnValue(of(true))
|
||||
|
||||
@@ -51,6 +51,8 @@ import { ComponentWithPermissions } from '../with-permissions/with-permissions.c
|
||||
import { GlobalSearchComponent } from './global-search/global-search.component'
|
||||
import { ToastsDropdownComponent } from './toasts-dropdown/toasts-dropdown.component'
|
||||
|
||||
const SCROLL_THRESHOLD = 16
|
||||
|
||||
@Component({
|
||||
selector: 'pngx-app-frame',
|
||||
templateUrl: './app-frame.component.html',
|
||||
@@ -94,6 +96,10 @@ export class AppFrameComponent
|
||||
|
||||
slimSidebarAnimating: boolean = false
|
||||
|
||||
public mobileSearchHidden: boolean = false
|
||||
|
||||
private lastScrollY: number = 0
|
||||
|
||||
constructor() {
|
||||
super()
|
||||
const permissionsService = this.permissionsService
|
||||
@@ -111,6 +117,8 @@ export class AppFrameComponent
|
||||
}
|
||||
|
||||
ngOnInit(): void {
|
||||
this.lastScrollY = window.scrollY
|
||||
|
||||
if (this.settingsService.get(SETTINGS_KEYS.UPDATE_CHECKING_ENABLED)) {
|
||||
this.checkForUpdates()
|
||||
}
|
||||
@@ -263,6 +271,38 @@ export class AppFrameComponent
|
||||
return this.settingsService.get(SETTINGS_KEYS.AI_ENABLED)
|
||||
}
|
||||
|
||||
@HostListener('window:resize')
|
||||
onWindowResize(): void {
|
||||
if (!this.isMobileViewport()) {
|
||||
this.mobileSearchHidden = false
|
||||
}
|
||||
}
|
||||
|
||||
@HostListener('window:scroll')
|
||||
onWindowScroll(): void {
|
||||
const currentScrollY = window.scrollY
|
||||
|
||||
if (!this.isMobileViewport() || this.isMenuCollapsed === false) {
|
||||
this.mobileSearchHidden = false
|
||||
this.lastScrollY = currentScrollY
|
||||
return
|
||||
}
|
||||
|
||||
const delta = currentScrollY - this.lastScrollY
|
||||
|
||||
if (currentScrollY <= 0 || delta < -SCROLL_THRESHOLD) {
|
||||
this.mobileSearchHidden = false
|
||||
} else if (currentScrollY > SCROLL_THRESHOLD && delta > SCROLL_THRESHOLD) {
|
||||
this.mobileSearchHidden = true
|
||||
}
|
||||
|
||||
this.lastScrollY = currentScrollY
|
||||
}
|
||||
|
||||
private isMobileViewport(): boolean {
|
||||
return window.innerWidth < 768
|
||||
}
|
||||
|
||||
closeMenu() {
|
||||
this.isMenuCollapsed = true
|
||||
}
|
||||
|
||||
@@ -31,8 +31,8 @@ export enum EditDialogMode {
|
||||
|
||||
@Directive()
|
||||
export abstract class EditDialogComponent<
|
||||
T extends ObjectWithPermissions | ObjectWithId,
|
||||
>
|
||||
T extends ObjectWithPermissions | ObjectWithId,
|
||||
>
|
||||
extends LoadingComponentWithPermissions
|
||||
implements OnInit
|
||||
{
|
||||
|
||||
@@ -631,6 +631,59 @@ describe('FilterableDropdownComponent & FilterableDropdownSelectionModel', () =>
|
||||
])
|
||||
})
|
||||
|
||||
it('deselecting a parent clears selected descendants', () => {
|
||||
const root: Tag = { id: 100, name: 'Root Tag' }
|
||||
const child: Tag = { id: 101, name: 'Child Tag', parent: root.id }
|
||||
const grandchild: Tag = {
|
||||
id: 102,
|
||||
name: 'Grandchild Tag',
|
||||
parent: child.id,
|
||||
}
|
||||
const other: Tag = { id: 103, name: 'Other Tag' }
|
||||
|
||||
selectionModel.items = [root, child, grandchild, other]
|
||||
selectionModel.set(root.id, ToggleableItemState.Selected, false)
|
||||
selectionModel.set(child.id, ToggleableItemState.Selected, false)
|
||||
selectionModel.set(grandchild.id, ToggleableItemState.Selected, false)
|
||||
selectionModel.set(other.id, ToggleableItemState.Selected, false)
|
||||
|
||||
selectionModel.toggle(root.id, false)
|
||||
|
||||
expect(selectionModel.getSelectedItems()).toEqual([other])
|
||||
})
|
||||
|
||||
it('un-excluding a parent clears excluded descendants', () => {
|
||||
const root: Tag = { id: 110, name: 'Root Tag' }
|
||||
const child: Tag = { id: 111, name: 'Child Tag', parent: root.id }
|
||||
const other: Tag = { id: 112, name: 'Other Tag' }
|
||||
|
||||
selectionModel.items = [root, child, other]
|
||||
selectionModel.set(root.id, ToggleableItemState.Excluded, false)
|
||||
selectionModel.set(child.id, ToggleableItemState.Excluded, false)
|
||||
selectionModel.set(other.id, ToggleableItemState.Excluded, false)
|
||||
|
||||
selectionModel.exclude(root.id, false)
|
||||
|
||||
expect(selectionModel.getExcludedItems()).toEqual([other])
|
||||
})
|
||||
|
||||
it('excluding a selected parent clears selected descendants', () => {
|
||||
const root: Tag = { id: 120, name: 'Root Tag' }
|
||||
const child: Tag = { id: 121, name: 'Child Tag', parent: root.id }
|
||||
const other: Tag = { id: 122, name: 'Other Tag' }
|
||||
|
||||
selectionModel.manyToOne = true
|
||||
selectionModel.items = [root, child, other]
|
||||
selectionModel.set(root.id, ToggleableItemState.Selected, false)
|
||||
selectionModel.set(child.id, ToggleableItemState.Selected, false)
|
||||
selectionModel.set(other.id, ToggleableItemState.Selected, false)
|
||||
|
||||
selectionModel.exclude(root.id, false)
|
||||
|
||||
expect(selectionModel.getExcludedItems()).toEqual([root])
|
||||
expect(selectionModel.getSelectedItems()).toEqual([other])
|
||||
})
|
||||
|
||||
it('resorts items immediately when document count sorting enabled', () => {
|
||||
const apple: Tag = { id: 55, name: 'Apple' }
|
||||
const zebra: Tag = { id: 56, name: 'Zebra' }
|
||||
|
||||
@@ -235,6 +235,7 @@ export class FilterableDropdownSelectionModel {
|
||||
state == ToggleableItemState.Excluded
|
||||
) {
|
||||
this.temporarySelectionStates.delete(id)
|
||||
this.clearDescendantSelections(id)
|
||||
}
|
||||
|
||||
if (!id) {
|
||||
@@ -261,6 +262,7 @@ export class FilterableDropdownSelectionModel {
|
||||
|
||||
if (this.manyToOne || this.singleSelect) {
|
||||
this.temporarySelectionStates.set(id, ToggleableItemState.Excluded)
|
||||
this.clearDescendantSelections(id)
|
||||
|
||||
if (this.singleSelect) {
|
||||
for (let key of this.temporarySelectionStates.keys()) {
|
||||
@@ -281,9 +283,15 @@ export class FilterableDropdownSelectionModel {
|
||||
newState = ToggleableItemState.NotSelected
|
||||
}
|
||||
this.temporarySelectionStates.set(id, newState)
|
||||
if (newState == ToggleableItemState.Excluded) {
|
||||
this.clearDescendantSelections(id)
|
||||
}
|
||||
}
|
||||
} else if (!id || state == ToggleableItemState.Excluded) {
|
||||
this.temporarySelectionStates.delete(id)
|
||||
if (id) {
|
||||
this.clearDescendantSelections(id)
|
||||
}
|
||||
}
|
||||
|
||||
if (fireEvent) {
|
||||
@@ -295,6 +303,33 @@ export class FilterableDropdownSelectionModel {
|
||||
return this.selectionStates.get(id) || ToggleableItemState.NotSelected
|
||||
}
|
||||
|
||||
private clearDescendantSelections(id: number) {
|
||||
for (const descendantID of this.getDescendantIDs(id)) {
|
||||
this.temporarySelectionStates.delete(descendantID)
|
||||
}
|
||||
}
|
||||
|
||||
private getDescendantIDs(id: number): number[] {
|
||||
const descendants: number[] = []
|
||||
const queue: number[] = [id]
|
||||
|
||||
while (queue.length) {
|
||||
const parentID = queue.shift()
|
||||
for (const item of this._items) {
|
||||
if (
|
||||
typeof item?.id === 'number' &&
|
||||
typeof (item as any)['parent'] === 'number' &&
|
||||
(item as any)['parent'] === parentID
|
||||
) {
|
||||
descendants.push(item.id)
|
||||
queue.push(item.id)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return descendants
|
||||
}
|
||||
|
||||
get logicalOperator(): LogicalOperator {
|
||||
return this.temporaryLogicalOperator
|
||||
}
|
||||
|
||||
@@ -950,8 +950,8 @@ describe('DocumentDetailComponent', () => {
|
||||
|
||||
it('should support reprocess, confirm and close modal after started', () => {
|
||||
initNormally()
|
||||
const bulkEditSpy = jest.spyOn(documentService, 'bulkEdit')
|
||||
bulkEditSpy.mockReturnValue(of(true))
|
||||
const reprocessSpy = jest.spyOn(documentService, 'reprocessDocuments')
|
||||
reprocessSpy.mockReturnValue(of(true))
|
||||
let openModal: NgbModalRef
|
||||
modalService.activeInstances.subscribe((modal) => (openModal = modal[0]))
|
||||
const modalSpy = jest.spyOn(modalService, 'open')
|
||||
@@ -959,7 +959,7 @@ describe('DocumentDetailComponent', () => {
|
||||
component.reprocess()
|
||||
const modalCloseSpy = jest.spyOn(openModal, 'close')
|
||||
openModal.componentInstance.confirmClicked.next()
|
||||
expect(bulkEditSpy).toHaveBeenCalledWith([doc.id], 'reprocess', {})
|
||||
expect(reprocessSpy).toHaveBeenCalledWith([doc.id])
|
||||
expect(modalSpy).toHaveBeenCalled()
|
||||
expect(toastSpy).toHaveBeenCalled()
|
||||
expect(modalCloseSpy).toHaveBeenCalled()
|
||||
@@ -967,13 +967,13 @@ describe('DocumentDetailComponent', () => {
|
||||
|
||||
it('should show error if redo ocr call fails', () => {
|
||||
initNormally()
|
||||
const bulkEditSpy = jest.spyOn(documentService, 'bulkEdit')
|
||||
const reprocessSpy = jest.spyOn(documentService, 'reprocessDocuments')
|
||||
let openModal: NgbModalRef
|
||||
modalService.activeInstances.subscribe((modal) => (openModal = modal[0]))
|
||||
const toastSpy = jest.spyOn(toastService, 'showError')
|
||||
component.reprocess()
|
||||
const modalCloseSpy = jest.spyOn(openModal, 'close')
|
||||
bulkEditSpy.mockReturnValue(throwError(() => new Error('error occurred')))
|
||||
reprocessSpy.mockReturnValue(throwError(() => new Error('error occurred')))
|
||||
openModal.componentInstance.confirmClicked.next()
|
||||
expect(toastSpy).toHaveBeenCalled()
|
||||
expect(modalCloseSpy).not.toHaveBeenCalled()
|
||||
@@ -1644,9 +1644,9 @@ describe('DocumentDetailComponent', () => {
|
||||
expect(
|
||||
fixture.debugElement.query(By.css('.preview-sticky img'))
|
||||
).not.toBeUndefined()
|
||||
;(component.document.mime_type =
|
||||
;((component.document.mime_type =
|
||||
'application/vnd.openxmlformats-officedocument.wordprocessingml.document'),
|
||||
fixture.detectChanges()
|
||||
fixture.detectChanges())
|
||||
expect(component.archiveContentRenderType).toEqual(
|
||||
component.ContentRenderType.Other
|
||||
)
|
||||
@@ -1669,18 +1669,15 @@ describe('DocumentDetailComponent', () => {
|
||||
modal.componentInstance.pages = [{ page: 1, rotate: 0, splitAfter: false }]
|
||||
modal.componentInstance.confirm()
|
||||
let req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/bulk_edit/`
|
||||
`${environment.apiBaseUrl}documents/edit_pdf/`
|
||||
)
|
||||
expect(req.request.body).toEqual({
|
||||
documents: [10],
|
||||
method: 'edit_pdf',
|
||||
parameters: {
|
||||
operations: [{ page: 1, rotate: 0, doc: 0 }],
|
||||
delete_original: false,
|
||||
update_document: false,
|
||||
include_metadata: true,
|
||||
source_mode: 'explicit_selection',
|
||||
},
|
||||
operations: [{ page: 1, rotate: 0, doc: 0 }],
|
||||
delete_original: false,
|
||||
update_document: false,
|
||||
include_metadata: true,
|
||||
source_mode: 'explicit_selection',
|
||||
})
|
||||
req.error(new ErrorEvent('failed'))
|
||||
expect(errorSpy).toHaveBeenCalled()
|
||||
@@ -1691,7 +1688,7 @@ describe('DocumentDetailComponent', () => {
|
||||
modal.componentInstance.deleteOriginal = true
|
||||
modal.componentInstance.confirm()
|
||||
req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/bulk_edit/`
|
||||
`${environment.apiBaseUrl}documents/edit_pdf/`
|
||||
)
|
||||
req.flush(true)
|
||||
expect(closeSpy).toHaveBeenCalled()
|
||||
@@ -1711,18 +1708,15 @@ describe('DocumentDetailComponent', () => {
|
||||
dialog.deleteOriginal = true
|
||||
dialog.confirm()
|
||||
const req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/bulk_edit/`
|
||||
`${environment.apiBaseUrl}documents/remove_password/`
|
||||
)
|
||||
expect(req.request.body).toEqual({
|
||||
documents: [10],
|
||||
method: 'remove_password',
|
||||
parameters: {
|
||||
password: 'secret',
|
||||
update_document: false,
|
||||
include_metadata: false,
|
||||
delete_original: true,
|
||||
source_mode: 'explicit_selection',
|
||||
},
|
||||
password: 'secret',
|
||||
update_document: false,
|
||||
include_metadata: false,
|
||||
delete_original: true,
|
||||
source_mode: 'explicit_selection',
|
||||
})
|
||||
req.flush(true)
|
||||
})
|
||||
@@ -1737,7 +1731,7 @@ describe('DocumentDetailComponent', () => {
|
||||
|
||||
expect(errorSpy).toHaveBeenCalled()
|
||||
httpTestingController.expectNone(
|
||||
`${environment.apiBaseUrl}documents/bulk_edit/`
|
||||
`${environment.apiBaseUrl}documents/remove_password/`
|
||||
)
|
||||
})
|
||||
|
||||
@@ -1753,7 +1747,7 @@ describe('DocumentDetailComponent', () => {
|
||||
modal.componentInstance as PasswordRemovalConfirmDialogComponent
|
||||
dialog.confirm()
|
||||
const req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/bulk_edit/`
|
||||
`${environment.apiBaseUrl}documents/remove_password/`
|
||||
)
|
||||
req.error(new ErrorEvent('failed'))
|
||||
|
||||
@@ -1774,7 +1768,7 @@ describe('DocumentDetailComponent', () => {
|
||||
modal.componentInstance as PasswordRemovalConfirmDialogComponent
|
||||
dialog.confirm()
|
||||
const req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/bulk_edit/`
|
||||
`${environment.apiBaseUrl}documents/remove_password/`
|
||||
)
|
||||
req.flush(true)
|
||||
|
||||
|
||||
@@ -1379,27 +1379,25 @@ export class DocumentDetailComponent
|
||||
modal.componentInstance.btnCaption = $localize`Proceed`
|
||||
modal.componentInstance.confirmClicked.subscribe(() => {
|
||||
modal.componentInstance.buttonsEnabled = false
|
||||
this.documentsService
|
||||
.bulkEdit([this.document.id], 'reprocess', {})
|
||||
.subscribe({
|
||||
next: () => {
|
||||
this.toastService.showInfo(
|
||||
$localize`Reprocess operation for "${this.document.title}" will begin in the background.`
|
||||
)
|
||||
if (modal) {
|
||||
modal.close()
|
||||
}
|
||||
},
|
||||
error: (error) => {
|
||||
if (modal) {
|
||||
modal.componentInstance.buttonsEnabled = true
|
||||
}
|
||||
this.toastService.showError(
|
||||
$localize`Error executing operation`,
|
||||
error
|
||||
)
|
||||
},
|
||||
})
|
||||
this.documentsService.reprocessDocuments([this.document.id]).subscribe({
|
||||
next: () => {
|
||||
this.toastService.showInfo(
|
||||
$localize`Reprocess operation for "${this.document.title}" will begin in the background.`
|
||||
)
|
||||
if (modal) {
|
||||
modal.close()
|
||||
}
|
||||
},
|
||||
error: (error) => {
|
||||
if (modal) {
|
||||
modal.componentInstance.buttonsEnabled = true
|
||||
}
|
||||
this.toastService.showError(
|
||||
$localize`Error executing operation`,
|
||||
error
|
||||
)
|
||||
},
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
@@ -1766,7 +1764,7 @@ export class DocumentDetailComponent
|
||||
.subscribe(() => {
|
||||
modal.componentInstance.buttonsEnabled = false
|
||||
this.documentsService
|
||||
.bulkEdit([sourceDocumentId], 'edit_pdf', {
|
||||
.editPdfDocuments([sourceDocumentId], {
|
||||
operations: modal.componentInstance.getOperations(),
|
||||
delete_original: modal.componentInstance.deleteOriginal,
|
||||
update_document:
|
||||
@@ -1824,7 +1822,7 @@ export class DocumentDetailComponent
|
||||
dialog.buttonsEnabled = false
|
||||
this.networkActive = true
|
||||
this.documentsService
|
||||
.bulkEdit([sourceDocumentId], 'remove_password', {
|
||||
.removePasswordDocuments([sourceDocumentId], {
|
||||
password: this.password,
|
||||
update_document: dialog.updateDocument,
|
||||
include_metadata: dialog.includeMetadata,
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import { DatePipe } from '@angular/common'
|
||||
import { provideHttpClient, withInterceptorsFromDi } from '@angular/common/http'
|
||||
import {
|
||||
HttpTestingController,
|
||||
@@ -138,6 +139,7 @@ describe('BulkEditorComponent', () => {
|
||||
},
|
||||
},
|
||||
FilterPipe,
|
||||
DatePipe,
|
||||
SettingsService,
|
||||
{
|
||||
provide: UserService,
|
||||
@@ -849,13 +851,11 @@ describe('BulkEditorComponent', () => {
|
||||
expect(modal).not.toBeUndefined()
|
||||
modal.componentInstance.confirm()
|
||||
let req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/bulk_edit/`
|
||||
`${environment.apiBaseUrl}documents/delete/`
|
||||
)
|
||||
req.flush(true)
|
||||
expect(req.request.body).toEqual({
|
||||
documents: [3, 4],
|
||||
method: 'delete',
|
||||
parameters: {},
|
||||
})
|
||||
httpTestingController.match(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||
@@ -868,7 +868,7 @@ describe('BulkEditorComponent', () => {
|
||||
fixture.detectChanges()
|
||||
component.applyDelete()
|
||||
req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/bulk_edit/`
|
||||
`${environment.apiBaseUrl}documents/delete/`
|
||||
)
|
||||
})
|
||||
|
||||
@@ -944,13 +944,11 @@ describe('BulkEditorComponent', () => {
|
||||
expect(modal).not.toBeUndefined()
|
||||
modal.componentInstance.confirm()
|
||||
let req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/bulk_edit/`
|
||||
`${environment.apiBaseUrl}documents/reprocess/`
|
||||
)
|
||||
req.flush(true)
|
||||
expect(req.request.body).toEqual({
|
||||
documents: [3, 4],
|
||||
method: 'reprocess',
|
||||
parameters: {},
|
||||
})
|
||||
httpTestingController.match(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||
@@ -979,13 +977,13 @@ describe('BulkEditorComponent', () => {
|
||||
modal.componentInstance.rotate()
|
||||
modal.componentInstance.confirm()
|
||||
let req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/bulk_edit/`
|
||||
`${environment.apiBaseUrl}documents/rotate/`
|
||||
)
|
||||
req.flush(true)
|
||||
expect(req.request.body).toEqual({
|
||||
documents: [3, 4],
|
||||
method: 'rotate',
|
||||
parameters: { degrees: 90 },
|
||||
degrees: 90,
|
||||
source_mode: 'latest_version',
|
||||
})
|
||||
httpTestingController.match(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||
@@ -1021,13 +1019,12 @@ describe('BulkEditorComponent', () => {
|
||||
modal.componentInstance.metadataDocumentID = 3
|
||||
modal.componentInstance.confirm()
|
||||
let req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/bulk_edit/`
|
||||
`${environment.apiBaseUrl}documents/merge/`
|
||||
)
|
||||
req.flush(true)
|
||||
expect(req.request.body).toEqual({
|
||||
documents: [3, 4],
|
||||
method: 'merge',
|
||||
parameters: { metadata_document_id: 3 },
|
||||
metadata_document_id: 3,
|
||||
})
|
||||
httpTestingController.match(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||
@@ -1040,13 +1037,13 @@ describe('BulkEditorComponent', () => {
|
||||
modal.componentInstance.deleteOriginals = true
|
||||
modal.componentInstance.confirm()
|
||||
req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/bulk_edit/`
|
||||
`${environment.apiBaseUrl}documents/merge/`
|
||||
)
|
||||
req.flush(true)
|
||||
expect(req.request.body).toEqual({
|
||||
documents: [3, 4],
|
||||
method: 'merge',
|
||||
parameters: { metadata_document_id: 3, delete_originals: true },
|
||||
metadata_document_id: 3,
|
||||
delete_originals: true,
|
||||
})
|
||||
httpTestingController.match(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||
@@ -1061,13 +1058,13 @@ describe('BulkEditorComponent', () => {
|
||||
modal.componentInstance.archiveFallback = true
|
||||
modal.componentInstance.confirm()
|
||||
req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/bulk_edit/`
|
||||
`${environment.apiBaseUrl}documents/merge/`
|
||||
)
|
||||
req.flush(true)
|
||||
expect(req.request.body).toEqual({
|
||||
documents: [3, 4],
|
||||
method: 'merge',
|
||||
parameters: { metadata_document_id: 3, archive_fallback: true },
|
||||
metadata_document_id: 3,
|
||||
archive_fallback: true,
|
||||
})
|
||||
httpTestingController.match(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||
|
||||
@@ -12,7 +12,7 @@ import {
|
||||
} from '@ng-bootstrap/ng-bootstrap'
|
||||
import { saveAs } from 'file-saver'
|
||||
import { NgxBootstrapIconsModule } from 'ngx-bootstrap-icons'
|
||||
import { first, map, Subject, switchMap, takeUntil } from 'rxjs'
|
||||
import { first, map, Observable, Subject, switchMap, takeUntil } from 'rxjs'
|
||||
import { ConfirmDialogComponent } from 'src/app/components/common/confirm-dialog/confirm-dialog.component'
|
||||
import { CustomField } from 'src/app/data/custom-field'
|
||||
import { MatchingModel } from 'src/app/data/matching-model'
|
||||
@@ -29,7 +29,9 @@ import { CorrespondentService } from 'src/app/services/rest/correspondent.servic
|
||||
import { CustomFieldsService } from 'src/app/services/rest/custom-fields.service'
|
||||
import { DocumentTypeService } from 'src/app/services/rest/document-type.service'
|
||||
import {
|
||||
DocumentBulkEditMethod,
|
||||
DocumentService,
|
||||
MergeDocumentsRequest,
|
||||
SelectionDataItem,
|
||||
} from 'src/app/services/rest/document.service'
|
||||
import { SavedViewService } from 'src/app/services/rest/saved-view.service'
|
||||
@@ -255,9 +257,9 @@ export class BulkEditorComponent
|
||||
this.unsubscribeNotifier.complete()
|
||||
}
|
||||
|
||||
private executeBulkOperation(
|
||||
private executeBulkEditMethod(
|
||||
modal: NgbModalRef,
|
||||
method: string,
|
||||
method: DocumentBulkEditMethod,
|
||||
args: any,
|
||||
overrideDocumentIDs?: number[]
|
||||
) {
|
||||
@@ -272,32 +274,55 @@ export class BulkEditorComponent
|
||||
)
|
||||
.pipe(first())
|
||||
.subscribe({
|
||||
next: () => {
|
||||
if (args['delete_originals']) {
|
||||
this.list.selected.clear()
|
||||
}
|
||||
this.list.reload()
|
||||
this.list.reduceSelectionToFilter()
|
||||
this.list.selected.forEach((id) => {
|
||||
this.openDocumentService.refreshDocument(id)
|
||||
})
|
||||
this.savedViewService.maybeRefreshDocumentCounts()
|
||||
if (modal) {
|
||||
modal.close()
|
||||
}
|
||||
},
|
||||
error: (error) => {
|
||||
if (modal) {
|
||||
modal.componentInstance.buttonsEnabled = true
|
||||
}
|
||||
this.toastService.showError(
|
||||
$localize`Error executing bulk operation`,
|
||||
error
|
||||
)
|
||||
},
|
||||
next: () => this.handleOperationSuccess(modal),
|
||||
error: (error) => this.handleOperationError(modal, error),
|
||||
})
|
||||
}
|
||||
|
||||
private executeDocumentAction(
|
||||
modal: NgbModalRef,
|
||||
request: Observable<any>,
|
||||
options: { deleteOriginals?: boolean } = {}
|
||||
) {
|
||||
if (modal) {
|
||||
modal.componentInstance.buttonsEnabled = false
|
||||
}
|
||||
request.pipe(first()).subscribe({
|
||||
next: () => {
|
||||
this.handleOperationSuccess(modal, options.deleteOriginals ?? false)
|
||||
},
|
||||
error: (error) => this.handleOperationError(modal, error),
|
||||
})
|
||||
}
|
||||
|
||||
private handleOperationSuccess(
|
||||
modal: NgbModalRef,
|
||||
clearSelection: boolean = false
|
||||
) {
|
||||
if (clearSelection) {
|
||||
this.list.selected.clear()
|
||||
}
|
||||
this.list.reload()
|
||||
this.list.reduceSelectionToFilter()
|
||||
this.list.selected.forEach((id) => {
|
||||
this.openDocumentService.refreshDocument(id)
|
||||
})
|
||||
this.savedViewService.maybeRefreshDocumentCounts()
|
||||
if (modal) {
|
||||
modal.close()
|
||||
}
|
||||
}
|
||||
|
||||
private handleOperationError(modal: NgbModalRef, error: any) {
|
||||
if (modal) {
|
||||
modal.componentInstance.buttonsEnabled = true
|
||||
}
|
||||
this.toastService.showError(
|
||||
$localize`Error executing bulk operation`,
|
||||
error
|
||||
)
|
||||
}
|
||||
|
||||
private applySelectionData(
|
||||
items: SelectionDataItem[],
|
||||
selectionModel: FilterableDropdownSelectionModel
|
||||
@@ -446,13 +471,13 @@ export class BulkEditorComponent
|
||||
modal.componentInstance.confirmClicked
|
||||
.pipe(takeUntil(this.unsubscribeNotifier))
|
||||
.subscribe(() => {
|
||||
this.executeBulkOperation(modal, 'modify_tags', {
|
||||
this.executeBulkEditMethod(modal, 'modify_tags', {
|
||||
add_tags: changedTags.itemsToAdd.map((t) => t.id),
|
||||
remove_tags: changedTags.itemsToRemove.map((t) => t.id),
|
||||
})
|
||||
})
|
||||
} else {
|
||||
this.executeBulkOperation(null, 'modify_tags', {
|
||||
this.executeBulkEditMethod(null, 'modify_tags', {
|
||||
add_tags: changedTags.itemsToAdd.map((t) => t.id),
|
||||
remove_tags: changedTags.itemsToRemove.map((t) => t.id),
|
||||
})
|
||||
@@ -486,12 +511,12 @@ export class BulkEditorComponent
|
||||
modal.componentInstance.confirmClicked
|
||||
.pipe(takeUntil(this.unsubscribeNotifier))
|
||||
.subscribe(() => {
|
||||
this.executeBulkOperation(modal, 'set_correspondent', {
|
||||
this.executeBulkEditMethod(modal, 'set_correspondent', {
|
||||
correspondent: correspondent ? correspondent.id : null,
|
||||
})
|
||||
})
|
||||
} else {
|
||||
this.executeBulkOperation(null, 'set_correspondent', {
|
||||
this.executeBulkEditMethod(null, 'set_correspondent', {
|
||||
correspondent: correspondent ? correspondent.id : null,
|
||||
})
|
||||
}
|
||||
@@ -524,12 +549,12 @@ export class BulkEditorComponent
|
||||
modal.componentInstance.confirmClicked
|
||||
.pipe(takeUntil(this.unsubscribeNotifier))
|
||||
.subscribe(() => {
|
||||
this.executeBulkOperation(modal, 'set_document_type', {
|
||||
this.executeBulkEditMethod(modal, 'set_document_type', {
|
||||
document_type: documentType ? documentType.id : null,
|
||||
})
|
||||
})
|
||||
} else {
|
||||
this.executeBulkOperation(null, 'set_document_type', {
|
||||
this.executeBulkEditMethod(null, 'set_document_type', {
|
||||
document_type: documentType ? documentType.id : null,
|
||||
})
|
||||
}
|
||||
@@ -562,12 +587,12 @@ export class BulkEditorComponent
|
||||
modal.componentInstance.confirmClicked
|
||||
.pipe(takeUntil(this.unsubscribeNotifier))
|
||||
.subscribe(() => {
|
||||
this.executeBulkOperation(modal, 'set_storage_path', {
|
||||
this.executeBulkEditMethod(modal, 'set_storage_path', {
|
||||
storage_path: storagePath ? storagePath.id : null,
|
||||
})
|
||||
})
|
||||
} else {
|
||||
this.executeBulkOperation(null, 'set_storage_path', {
|
||||
this.executeBulkEditMethod(null, 'set_storage_path', {
|
||||
storage_path: storagePath ? storagePath.id : null,
|
||||
})
|
||||
}
|
||||
@@ -624,7 +649,7 @@ export class BulkEditorComponent
|
||||
modal.componentInstance.confirmClicked
|
||||
.pipe(takeUntil(this.unsubscribeNotifier))
|
||||
.subscribe(() => {
|
||||
this.executeBulkOperation(modal, 'modify_custom_fields', {
|
||||
this.executeBulkEditMethod(modal, 'modify_custom_fields', {
|
||||
add_custom_fields: changedCustomFields.itemsToAdd.map((f) => f.id),
|
||||
remove_custom_fields: changedCustomFields.itemsToRemove.map(
|
||||
(f) => f.id
|
||||
@@ -632,7 +657,7 @@ export class BulkEditorComponent
|
||||
})
|
||||
})
|
||||
} else {
|
||||
this.executeBulkOperation(null, 'modify_custom_fields', {
|
||||
this.executeBulkEditMethod(null, 'modify_custom_fields', {
|
||||
add_custom_fields: changedCustomFields.itemsToAdd.map((f) => f.id),
|
||||
remove_custom_fields: changedCustomFields.itemsToRemove.map(
|
||||
(f) => f.id
|
||||
@@ -762,10 +787,16 @@ export class BulkEditorComponent
|
||||
.pipe(takeUntil(this.unsubscribeNotifier))
|
||||
.subscribe(() => {
|
||||
modal.componentInstance.buttonsEnabled = false
|
||||
this.executeBulkOperation(modal, 'delete', {})
|
||||
this.executeDocumentAction(
|
||||
modal,
|
||||
this.documentService.deleteDocuments(Array.from(this.list.selected))
|
||||
)
|
||||
})
|
||||
} else {
|
||||
this.executeBulkOperation(null, 'delete', {})
|
||||
this.executeDocumentAction(
|
||||
null,
|
||||
this.documentService.deleteDocuments(Array.from(this.list.selected))
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -804,7 +835,12 @@ export class BulkEditorComponent
|
||||
.pipe(takeUntil(this.unsubscribeNotifier))
|
||||
.subscribe(() => {
|
||||
modal.componentInstance.buttonsEnabled = false
|
||||
this.executeBulkOperation(modal, 'reprocess', {})
|
||||
this.executeDocumentAction(
|
||||
modal,
|
||||
this.documentService.reprocessDocuments(
|
||||
Array.from(this.list.selected)
|
||||
)
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
@@ -815,7 +851,7 @@ export class BulkEditorComponent
|
||||
modal.componentInstance.confirmClicked.subscribe(
|
||||
({ permissions, merge }) => {
|
||||
modal.componentInstance.buttonsEnabled = false
|
||||
this.executeBulkOperation(modal, 'set_permissions', {
|
||||
this.executeBulkEditMethod(modal, 'set_permissions', {
|
||||
...permissions,
|
||||
merge,
|
||||
})
|
||||
@@ -838,9 +874,13 @@ export class BulkEditorComponent
|
||||
.pipe(takeUntil(this.unsubscribeNotifier))
|
||||
.subscribe(() => {
|
||||
rotateDialog.buttonsEnabled = false
|
||||
this.executeBulkOperation(modal, 'rotate', {
|
||||
degrees: rotateDialog.degrees,
|
||||
})
|
||||
this.executeDocumentAction(
|
||||
modal,
|
||||
this.documentService.rotateDocuments(
|
||||
Array.from(this.list.selected),
|
||||
rotateDialog.degrees
|
||||
)
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
@@ -856,18 +896,22 @@ export class BulkEditorComponent
|
||||
mergeDialog.confirmClicked
|
||||
.pipe(takeUntil(this.unsubscribeNotifier))
|
||||
.subscribe(() => {
|
||||
const args = {}
|
||||
const args: MergeDocumentsRequest = {}
|
||||
if (mergeDialog.metadataDocumentID > -1) {
|
||||
args['metadata_document_id'] = mergeDialog.metadataDocumentID
|
||||
args.metadata_document_id = mergeDialog.metadataDocumentID
|
||||
}
|
||||
if (mergeDialog.deleteOriginals) {
|
||||
args['delete_originals'] = true
|
||||
args.delete_originals = true
|
||||
}
|
||||
if (mergeDialog.archiveFallback) {
|
||||
args['archive_fallback'] = true
|
||||
args.archive_fallback = true
|
||||
}
|
||||
mergeDialog.buttonsEnabled = false
|
||||
this.executeBulkOperation(modal, 'merge', args, mergeDialog.documentIDs)
|
||||
this.executeDocumentAction(
|
||||
modal,
|
||||
this.documentService.mergeDocuments(mergeDialog.documentIDs, args),
|
||||
{ deleteOriginals: !!args.delete_originals }
|
||||
)
|
||||
this.toastService.showInfo(
|
||||
$localize`Merged document will be queued for consumption.`
|
||||
)
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
}
|
||||
|
||||
@if (document && displayFields?.includes(DisplayField.TAGS)) {
|
||||
<div class="tags d-flex flex-column text-end position-absolute me-1 fs-6">
|
||||
<div class="tags d-flex flex-column text-end position-absolute me-1 fs-6" [class.tags-no-wrap]="document.tags.length > 3">
|
||||
@for (tagID of tagIDs; track tagID) {
|
||||
<pngx-tag [tagID]="tagID" (click)="clickTag.emit(tagID);$event.stopPropagation()" [clickable]="true" linkTitle="Toggle tag filter" i18n-linkTitle></pngx-tag>
|
||||
}
|
||||
|
||||
@@ -72,4 +72,14 @@ a {
|
||||
max-width: 80%;
|
||||
row-gap: .2rem;
|
||||
line-height: 1;
|
||||
|
||||
&.tags-no-wrap {
|
||||
::ng-deep .badge {
|
||||
display: inline-block;
|
||||
max-width: 100%;
|
||||
white-space: nowrap;
|
||||
overflow: hidden;
|
||||
text-overflow: ellipsis;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -82,6 +82,16 @@ describe('DocumentCardSmallComponent', () => {
|
||||
).toHaveLength(6)
|
||||
})
|
||||
|
||||
it('should clear hidden tag counter when tag count falls below the limit', () => {
|
||||
expect(component.moreTags).toEqual(3)
|
||||
|
||||
component.document.tags = [1, 2, 3, 4, 5, 6]
|
||||
fixture.detectChanges()
|
||||
|
||||
expect(component.moreTags).toBeNull()
|
||||
expect(fixture.nativeElement.textContent).not.toContain('+ 3')
|
||||
})
|
||||
|
||||
it('should try to close the preview on mouse leave', () => {
|
||||
component.popupPreview = {
|
||||
close: jest.fn(),
|
||||
|
||||
@@ -126,6 +126,7 @@ export class DocumentCardSmallComponent
|
||||
this.moreTags = this.document.tags.length - (limit - 1)
|
||||
return this.document.tags.slice(0, limit - 1)
|
||||
} else {
|
||||
this.moreTags = null
|
||||
return this.document.tags
|
||||
}
|
||||
}
|
||||
|
||||
@@ -56,13 +56,20 @@ $paperless-card-breakpoints: (
|
||||
|
||||
.sticky-top {
|
||||
z-index: 990; // below main navbar
|
||||
top: calc(7rem - 2px); // height of navbar (mobile)
|
||||
top: calc(7rem - 2px); // height of navbar + search row (mobile)
|
||||
transition: top 0.2s ease;
|
||||
|
||||
@media (min-width: 580px) {
|
||||
top: 3.5rem; // height of navbar
|
||||
}
|
||||
}
|
||||
|
||||
@media (max-width: 579.98px) {
|
||||
:host-context(main.mobile-search-hidden) .sticky-top {
|
||||
top: calc(3.5rem - 2px); // height of navbar only when search is hidden
|
||||
}
|
||||
}
|
||||
|
||||
.table .form-check {
|
||||
padding: 0.2rem;
|
||||
min-height: 0;
|
||||
|
||||
122
src-ui/src/app/interceptors/auth-expiry.interceptor.spec.ts
Normal file
122
src-ui/src/app/interceptors/auth-expiry.interceptor.spec.ts
Normal file
@@ -0,0 +1,122 @@
|
||||
import {
|
||||
HttpErrorResponse,
|
||||
HttpHandlerFn,
|
||||
HttpRequest,
|
||||
} from '@angular/common/http'
|
||||
import { throwError } from 'rxjs'
|
||||
import * as navUtils from '../utils/navigation'
|
||||
import { createAuthExpiryInterceptor } from './auth-expiry.interceptor'
|
||||
|
||||
describe('withAuthExpiryInterceptor', () => {
|
||||
let interceptor: ReturnType<typeof createAuthExpiryInterceptor>
|
||||
let dateNowSpy: jest.SpiedFunction<typeof Date.now>
|
||||
|
||||
beforeEach(() => {
|
||||
interceptor = createAuthExpiryInterceptor()
|
||||
dateNowSpy = jest.spyOn(Date, 'now').mockReturnValue(1000)
|
||||
})
|
||||
|
||||
afterEach(() => {
|
||||
jest.restoreAllMocks()
|
||||
})
|
||||
|
||||
it('reloads when an API request returns 401', () => {
|
||||
const reloadSpy = jest
|
||||
.spyOn(navUtils, 'locationReload')
|
||||
.mockImplementation(() => {})
|
||||
|
||||
interceptor(
|
||||
new HttpRequest('GET', '/api/documents/'),
|
||||
failingHandler('/api/documents/', 401)
|
||||
).subscribe({
|
||||
error: () => undefined,
|
||||
})
|
||||
|
||||
expect(reloadSpy).toHaveBeenCalledTimes(1)
|
||||
})
|
||||
|
||||
it('does not reload for non-401 errors', () => {
|
||||
const reloadSpy = jest
|
||||
.spyOn(navUtils, 'locationReload')
|
||||
.mockImplementation(() => {})
|
||||
|
||||
interceptor(
|
||||
new HttpRequest('GET', '/api/documents/'),
|
||||
failingHandler('/api/documents/', 500)
|
||||
).subscribe({
|
||||
error: () => undefined,
|
||||
})
|
||||
|
||||
expect(reloadSpy).not.toHaveBeenCalled()
|
||||
})
|
||||
|
||||
it('does not reload for non-api 401 responses', () => {
|
||||
const reloadSpy = jest
|
||||
.spyOn(navUtils, 'locationReload')
|
||||
.mockImplementation(() => {})
|
||||
|
||||
interceptor(
|
||||
new HttpRequest('GET', '/accounts/profile/'),
|
||||
failingHandler('/accounts/profile/', 401)
|
||||
).subscribe({
|
||||
error: () => undefined,
|
||||
})
|
||||
|
||||
expect(reloadSpy).not.toHaveBeenCalled()
|
||||
})
|
||||
|
||||
it('reloads only once even with multiple API 401 responses', () => {
|
||||
const reloadSpy = jest
|
||||
.spyOn(navUtils, 'locationReload')
|
||||
.mockImplementation(() => {})
|
||||
|
||||
const request = new HttpRequest('GET', '/api/documents/')
|
||||
const handler = failingHandler('/api/documents/', 401)
|
||||
|
||||
interceptor(request, handler).subscribe({
|
||||
error: () => undefined,
|
||||
})
|
||||
interceptor(request, handler).subscribe({
|
||||
error: () => undefined,
|
||||
})
|
||||
|
||||
expect(reloadSpy).toHaveBeenCalledTimes(1)
|
||||
})
|
||||
|
||||
it('retries reload after cooldown for repeated API 401 responses', () => {
|
||||
const reloadSpy = jest
|
||||
.spyOn(navUtils, 'locationReload')
|
||||
.mockImplementation(() => {})
|
||||
|
||||
dateNowSpy
|
||||
.mockReturnValueOnce(1000)
|
||||
.mockReturnValueOnce(2500)
|
||||
.mockReturnValueOnce(3501)
|
||||
|
||||
const request = new HttpRequest('GET', '/api/documents/')
|
||||
const handler = failingHandler('/api/documents/', 401)
|
||||
|
||||
interceptor(request, handler).subscribe({
|
||||
error: () => undefined,
|
||||
})
|
||||
interceptor(request, handler).subscribe({
|
||||
error: () => undefined,
|
||||
})
|
||||
interceptor(request, handler).subscribe({
|
||||
error: () => undefined,
|
||||
})
|
||||
|
||||
expect(reloadSpy).toHaveBeenCalledTimes(2)
|
||||
})
|
||||
})
|
||||
|
||||
function failingHandler(url: string, status: number): HttpHandlerFn {
|
||||
return (_request) =>
|
||||
throwError(
|
||||
() =>
|
||||
new HttpErrorResponse({
|
||||
status,
|
||||
url,
|
||||
})
|
||||
)
|
||||
}
|
||||
37
src-ui/src/app/interceptors/auth-expiry.interceptor.ts
Normal file
37
src-ui/src/app/interceptors/auth-expiry.interceptor.ts
Normal file
@@ -0,0 +1,37 @@
|
||||
import {
|
||||
HttpErrorResponse,
|
||||
HttpEvent,
|
||||
HttpHandlerFn,
|
||||
HttpInterceptorFn,
|
||||
HttpRequest,
|
||||
} from '@angular/common/http'
|
||||
import { catchError, Observable, throwError } from 'rxjs'
|
||||
import { locationReload } from '../utils/navigation'
|
||||
|
||||
export const createAuthExpiryInterceptor = (): HttpInterceptorFn => {
|
||||
let lastReloadAttempt = Number.NEGATIVE_INFINITY
|
||||
|
||||
return (
|
||||
request: HttpRequest<unknown>,
|
||||
next: HttpHandlerFn
|
||||
): Observable<HttpEvent<unknown>> =>
|
||||
next(request).pipe(
|
||||
catchError((error: unknown) => {
|
||||
if (
|
||||
error instanceof HttpErrorResponse &&
|
||||
error.status === 401 &&
|
||||
request.url.includes('/api/')
|
||||
) {
|
||||
const now = Date.now()
|
||||
if (now - lastReloadAttempt >= 2000) {
|
||||
lastReloadAttempt = now
|
||||
locationReload()
|
||||
}
|
||||
}
|
||||
|
||||
return throwError(() => error)
|
||||
})
|
||||
)
|
||||
}
|
||||
|
||||
export const withAuthExpiryInterceptor = createAuthExpiryInterceptor()
|
||||
@@ -21,6 +21,7 @@ import {
|
||||
FILTER_HAS_TAGS_ANY,
|
||||
} from '../data/filter-rule-type'
|
||||
import { SavedView } from '../data/saved-view'
|
||||
import { DOCUMENT_LIST_SERVICE } from '../data/storage-keys'
|
||||
import { SETTINGS_KEYS } from '../data/ui-settings'
|
||||
import { PermissionsGuard } from '../guards/permissions.guard'
|
||||
import { DocumentListViewService } from './document-list-view.service'
|
||||
@@ -248,6 +249,29 @@ describe('DocumentListViewService', () => {
|
||||
expect(documentListViewService.sortReverse).toBeTruthy()
|
||||
})
|
||||
|
||||
it('restores only known list view state fields from local storage', () => {
|
||||
try {
|
||||
localStorage.setItem(
|
||||
DOCUMENT_LIST_SERVICE.CURRENT_VIEW_CONFIG,
|
||||
'{"currentPage":3,"sortField":"title","sortReverse":false,"__proto__":{"polluted":true},"injected":"ignored"}'
|
||||
)
|
||||
|
||||
const restoredService = TestBed.runInInjectionContext(
|
||||
() => new DocumentListViewService()
|
||||
)
|
||||
|
||||
expect(restoredService.currentPage).toEqual(3)
|
||||
expect(restoredService.sortField).toEqual('title')
|
||||
expect(restoredService.sortReverse).toBeFalsy()
|
||||
expect(
|
||||
(restoredService as any).activeListViewState.injected
|
||||
).toBeUndefined()
|
||||
expect(({} as any).polluted).toBeUndefined()
|
||||
} finally {
|
||||
localStorage.removeItem(DOCUMENT_LIST_SERVICE.CURRENT_VIEW_CONFIG)
|
||||
}
|
||||
})
|
||||
|
||||
it('should load from query params', () => {
|
||||
expect(documentListViewService.currentPage).toEqual(1)
|
||||
const page = 2
|
||||
|
||||
@@ -24,6 +24,20 @@ const LIST_DEFAULT_DISPLAY_FIELDS: DisplayField[] = DEFAULT_DISPLAY_FIELDS.map(
|
||||
(f) => f.id
|
||||
).filter((f) => f !== DisplayField.ADDED)
|
||||
|
||||
const RESTORABLE_LIST_VIEW_STATE_KEYS: (keyof ListViewState)[] = [
|
||||
'title',
|
||||
'documents',
|
||||
'currentPage',
|
||||
'collectionSize',
|
||||
'sortField',
|
||||
'sortReverse',
|
||||
'filterRules',
|
||||
'selected',
|
||||
'pageSize',
|
||||
'displayMode',
|
||||
'displayFields',
|
||||
]
|
||||
|
||||
/**
|
||||
* Captures the current state of the list view.
|
||||
*/
|
||||
@@ -112,6 +126,32 @@ export class DocumentListViewService {
|
||||
|
||||
private displayFieldsInitialized: boolean = false
|
||||
|
||||
private restoreListViewState(savedState: unknown): ListViewState {
|
||||
const newState = this.defaultListViewState()
|
||||
|
||||
if (
|
||||
!savedState ||
|
||||
typeof savedState !== 'object' ||
|
||||
Array.isArray(savedState)
|
||||
) {
|
||||
return newState
|
||||
}
|
||||
|
||||
const parsedState = savedState as Partial<
|
||||
Record<keyof ListViewState, unknown>
|
||||
>
|
||||
const mutableState = newState as Record<keyof ListViewState, unknown>
|
||||
|
||||
for (const key of RESTORABLE_LIST_VIEW_STATE_KEYS) {
|
||||
const value = parsedState[key]
|
||||
if (value != null) {
|
||||
mutableState[key] = value
|
||||
}
|
||||
}
|
||||
|
||||
return newState
|
||||
}
|
||||
|
||||
get activeSavedViewId() {
|
||||
return this._activeSavedViewId
|
||||
}
|
||||
@@ -127,14 +167,7 @@ export class DocumentListViewService {
|
||||
if (documentListViewConfigJson) {
|
||||
try {
|
||||
let savedState: ListViewState = JSON.parse(documentListViewConfigJson)
|
||||
// Remove null elements from the restored state
|
||||
Object.keys(savedState).forEach((k) => {
|
||||
if (savedState[k] == null) {
|
||||
delete savedState[k]
|
||||
}
|
||||
})
|
||||
// only use restored state attributes instead of defaults if they are not null
|
||||
let newState = Object.assign(this.defaultListViewState(), savedState)
|
||||
let newState = this.restoreListViewState(savedState)
|
||||
this.listViewStates.set(null, newState)
|
||||
} catch (e) {
|
||||
localStorage.removeItem(DOCUMENT_LIST_SERVICE.CURRENT_VIEW_CONFIG)
|
||||
|
||||
@@ -230,6 +230,88 @@ describe(`DocumentService`, () => {
|
||||
})
|
||||
})
|
||||
|
||||
it('should call appropriate api endpoint for delete documents', () => {
|
||||
const ids = [1, 2, 3]
|
||||
subscription = service.deleteDocuments(ids).subscribe()
|
||||
const req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}${endpoint}/delete/`
|
||||
)
|
||||
expect(req.request.method).toEqual('POST')
|
||||
expect(req.request.body).toEqual({
|
||||
documents: ids,
|
||||
})
|
||||
})
|
||||
|
||||
it('should call appropriate api endpoint for reprocess documents', () => {
|
||||
const ids = [1, 2, 3]
|
||||
subscription = service.reprocessDocuments(ids).subscribe()
|
||||
const req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}${endpoint}/reprocess/`
|
||||
)
|
||||
expect(req.request.method).toEqual('POST')
|
||||
expect(req.request.body).toEqual({
|
||||
documents: ids,
|
||||
})
|
||||
})
|
||||
|
||||
it('should call appropriate api endpoint for rotate documents', () => {
|
||||
const ids = [1, 2, 3]
|
||||
subscription = service.rotateDocuments(ids, 90).subscribe()
|
||||
const req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}${endpoint}/rotate/`
|
||||
)
|
||||
expect(req.request.method).toEqual('POST')
|
||||
expect(req.request.body).toEqual({
|
||||
documents: ids,
|
||||
degrees: 90,
|
||||
source_mode: 'latest_version',
|
||||
})
|
||||
})
|
||||
|
||||
it('should call appropriate api endpoint for merge documents', () => {
|
||||
const ids = [1, 2, 3]
|
||||
const args = { metadata_document_id: 1, delete_originals: true }
|
||||
subscription = service.mergeDocuments(ids, args).subscribe()
|
||||
const req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}${endpoint}/merge/`
|
||||
)
|
||||
expect(req.request.method).toEqual('POST')
|
||||
expect(req.request.body).toEqual({
|
||||
documents: ids,
|
||||
metadata_document_id: 1,
|
||||
delete_originals: true,
|
||||
})
|
||||
})
|
||||
|
||||
it('should call appropriate api endpoint for edit pdf', () => {
|
||||
const ids = [1]
|
||||
const args = { operations: [{ page: 1, rotate: 90, doc: 0 }] }
|
||||
subscription = service.editPdfDocuments(ids, args).subscribe()
|
||||
const req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}${endpoint}/edit_pdf/`
|
||||
)
|
||||
expect(req.request.method).toEqual('POST')
|
||||
expect(req.request.body).toEqual({
|
||||
documents: ids,
|
||||
operations: [{ page: 1, rotate: 90, doc: 0 }],
|
||||
})
|
||||
})
|
||||
|
||||
it('should call appropriate api endpoint for remove password', () => {
|
||||
const ids = [1]
|
||||
const args = { password: 'secret', update_document: true }
|
||||
subscription = service.removePasswordDocuments(ids, args).subscribe()
|
||||
const req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}${endpoint}/remove_password/`
|
||||
)
|
||||
expect(req.request.method).toEqual('POST')
|
||||
expect(req.request.body).toEqual({
|
||||
documents: ids,
|
||||
password: 'secret',
|
||||
update_document: true,
|
||||
})
|
||||
})
|
||||
|
||||
it('should return the correct preview URL for a single document', () => {
|
||||
let url = service.getPreviewUrl(documents[0].id)
|
||||
expect(url).toEqual(
|
||||
|
||||
@@ -42,6 +42,45 @@ export enum BulkEditSourceMode {
|
||||
EXPLICIT_SELECTION = 'explicit_selection',
|
||||
}
|
||||
|
||||
export type DocumentBulkEditMethod =
|
||||
| 'set_correspondent'
|
||||
| 'set_document_type'
|
||||
| 'set_storage_path'
|
||||
| 'add_tag'
|
||||
| 'remove_tag'
|
||||
| 'modify_tags'
|
||||
| 'modify_custom_fields'
|
||||
| 'set_permissions'
|
||||
|
||||
export interface MergeDocumentsRequest {
|
||||
metadata_document_id?: number
|
||||
delete_originals?: boolean
|
||||
archive_fallback?: boolean
|
||||
source_mode?: BulkEditSourceMode
|
||||
}
|
||||
|
||||
export interface EditPdfOperation {
|
||||
page: number
|
||||
rotate?: number
|
||||
doc?: number
|
||||
}
|
||||
|
||||
export interface EditPdfDocumentsRequest {
|
||||
operations: EditPdfOperation[]
|
||||
delete_original?: boolean
|
||||
update_document?: boolean
|
||||
include_metadata?: boolean
|
||||
source_mode?: BulkEditSourceMode
|
||||
}
|
||||
|
||||
export interface RemovePasswordDocumentsRequest {
|
||||
password: string
|
||||
update_document?: boolean
|
||||
delete_original?: boolean
|
||||
include_metadata?: boolean
|
||||
source_mode?: BulkEditSourceMode
|
||||
}
|
||||
|
||||
@Injectable({
|
||||
providedIn: 'root',
|
||||
})
|
||||
@@ -299,7 +338,7 @@ export class DocumentService extends AbstractPaperlessService<Document> {
|
||||
return this.http.get<DocumentMetadata>(url.toString())
|
||||
}
|
||||
|
||||
bulkEdit(ids: number[], method: string, args: any) {
|
||||
bulkEdit(ids: number[], method: DocumentBulkEditMethod, args: any) {
|
||||
return this.http.post(this.getResourceUrl(null, 'bulk_edit'), {
|
||||
documents: ids,
|
||||
method: method,
|
||||
@@ -307,6 +346,54 @@ export class DocumentService extends AbstractPaperlessService<Document> {
|
||||
})
|
||||
}
|
||||
|
||||
deleteDocuments(ids: number[]) {
|
||||
return this.http.post(this.getResourceUrl(null, 'delete'), {
|
||||
documents: ids,
|
||||
})
|
||||
}
|
||||
|
||||
reprocessDocuments(ids: number[]) {
|
||||
return this.http.post(this.getResourceUrl(null, 'reprocess'), {
|
||||
documents: ids,
|
||||
})
|
||||
}
|
||||
|
||||
rotateDocuments(
|
||||
ids: number[],
|
||||
degrees: number,
|
||||
sourceMode: BulkEditSourceMode = BulkEditSourceMode.LATEST_VERSION
|
||||
) {
|
||||
return this.http.post(this.getResourceUrl(null, 'rotate'), {
|
||||
documents: ids,
|
||||
degrees,
|
||||
source_mode: sourceMode,
|
||||
})
|
||||
}
|
||||
|
||||
mergeDocuments(ids: number[], request: MergeDocumentsRequest = {}) {
|
||||
return this.http.post(this.getResourceUrl(null, 'merge'), {
|
||||
documents: ids,
|
||||
...request,
|
||||
})
|
||||
}
|
||||
|
||||
editPdfDocuments(ids: number[], request: EditPdfDocumentsRequest) {
|
||||
return this.http.post(this.getResourceUrl(null, 'edit_pdf'), {
|
||||
documents: ids,
|
||||
...request,
|
||||
})
|
||||
}
|
||||
|
||||
removePasswordDocuments(
|
||||
ids: number[],
|
||||
request: RemovePasswordDocumentsRequest
|
||||
) {
|
||||
return this.http.post(this.getResourceUrl(null, 'remove_password'), {
|
||||
documents: ids,
|
||||
...request,
|
||||
})
|
||||
}
|
||||
|
||||
getSelectionData(ids: number[]): Observable<SelectionData> {
|
||||
return this.http.post<SelectionData>(
|
||||
this.getResourceUrl(null, 'selection_data'),
|
||||
|
||||
@@ -166,6 +166,23 @@ describe('SettingsService', () => {
|
||||
expect(settingsService.get(SETTINGS_KEYS.THEME_COLOR)).toEqual('#9fbf2f')
|
||||
})
|
||||
|
||||
it('ignores unsafe top-level keys from loaded settings', () => {
|
||||
const req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}ui_settings/`
|
||||
)
|
||||
const payload = JSON.parse(
|
||||
JSON.stringify(ui_settings).replace(
|
||||
'"settings":{',
|
||||
'"settings":{"__proto__":{"polluted":"yes"},'
|
||||
)
|
||||
)
|
||||
payload.settings.app_title = 'Safe Title'
|
||||
req.flush(payload)
|
||||
|
||||
expect(settingsService.get(SETTINGS_KEYS.APP_TITLE)).toEqual('Safe Title')
|
||||
expect(({} as any).polluted).toBeUndefined()
|
||||
})
|
||||
|
||||
it('correctly allows updating settings of various types', () => {
|
||||
const req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}ui_settings/`
|
||||
|
||||
@@ -276,6 +276,8 @@ const ISO_LANGUAGE_OPTION: LanguageOption = {
|
||||
dateInputFormat: 'yyyy-mm-dd',
|
||||
}
|
||||
|
||||
const UNSAFE_OBJECT_KEYS = new Set(['__proto__', 'prototype', 'constructor'])
|
||||
|
||||
@Injectable({
|
||||
providedIn: 'root',
|
||||
})
|
||||
@@ -291,7 +293,7 @@ export class SettingsService {
|
||||
|
||||
protected baseUrl: string = environment.apiBaseUrl + 'ui_settings/'
|
||||
|
||||
private settings: Object = {}
|
||||
private settings: Record<string, any> = {}
|
||||
currentUser: User
|
||||
|
||||
public settingsSaved: EventEmitter<any> = new EventEmitter()
|
||||
@@ -320,6 +322,21 @@ export class SettingsService {
|
||||
this._renderer = rendererFactory.createRenderer(null, null)
|
||||
}
|
||||
|
||||
private isSafeObjectKey(key: string): boolean {
|
||||
return !UNSAFE_OBJECT_KEYS.has(key)
|
||||
}
|
||||
|
||||
private assignSafeSettings(source: Record<string, any>) {
|
||||
if (!source || typeof source !== 'object' || Array.isArray(source)) {
|
||||
return
|
||||
}
|
||||
|
||||
for (const key of Object.keys(source)) {
|
||||
if (!this.isSafeObjectKey(key)) continue
|
||||
this.settings[key] = source[key]
|
||||
}
|
||||
}
|
||||
|
||||
// this is called by the app initializer in app.module
|
||||
public initializeSettings(): Observable<UiSettings> {
|
||||
return this.http.get<UiSettings>(this.baseUrl).pipe(
|
||||
@@ -338,7 +355,7 @@ export class SettingsService {
|
||||
})
|
||||
}),
|
||||
tap((uisettings) => {
|
||||
Object.assign(this.settings, uisettings.settings)
|
||||
this.assignSafeSettings(uisettings.settings)
|
||||
if (this.get(SETTINGS_KEYS.APP_TITLE)?.length) {
|
||||
environment.appTitle = this.get(SETTINGS_KEYS.APP_TITLE)
|
||||
}
|
||||
@@ -533,7 +550,11 @@ export class SettingsService {
|
||||
let settingObj = this.settings
|
||||
keys.forEach((keyPart, index) => {
|
||||
keyPart = keyPart.replace(/-/g, '_')
|
||||
if (!settingObj.hasOwnProperty(keyPart)) return
|
||||
if (
|
||||
!this.isSafeObjectKey(keyPart) ||
|
||||
!Object.prototype.hasOwnProperty.call(settingObj, keyPart)
|
||||
)
|
||||
return
|
||||
if (index == keys.length - 1) value = settingObj[keyPart]
|
||||
else settingObj = settingObj[keyPart]
|
||||
})
|
||||
@@ -579,7 +600,9 @@ export class SettingsService {
|
||||
const keys = key.replace('general-settings:', '').split(':')
|
||||
keys.forEach((keyPart, index) => {
|
||||
keyPart = keyPart.replace(/-/g, '_')
|
||||
if (!settingObj.hasOwnProperty(keyPart)) settingObj[keyPart] = {}
|
||||
if (!this.isSafeObjectKey(keyPart)) return
|
||||
if (!Object.prototype.hasOwnProperty.call(settingObj, keyPart))
|
||||
settingObj[keyPart] = {}
|
||||
if (index == keys.length - 1) settingObj[keyPart] = value
|
||||
else settingObj = settingObj[keyPart]
|
||||
})
|
||||
@@ -602,7 +625,10 @@ export class SettingsService {
|
||||
|
||||
maybeMigrateSettings() {
|
||||
if (
|
||||
!this.settings.hasOwnProperty('documentListSize') &&
|
||||
!Object.prototype.hasOwnProperty.call(
|
||||
this.settings,
|
||||
'documentListSize'
|
||||
) &&
|
||||
localStorage.getItem(SETTINGS_KEYS.DOCUMENT_LIST_SIZE)
|
||||
) {
|
||||
// lets migrate
|
||||
@@ -610,8 +636,7 @@ export class SettingsService {
|
||||
const errorMessage = $localize`Unable to migrate settings to the database, please try saving manually.`
|
||||
|
||||
try {
|
||||
for (const setting in SETTINGS_KEYS) {
|
||||
const key = SETTINGS_KEYS[setting]
|
||||
for (const key of Object.values(SETTINGS_KEYS)) {
|
||||
const value = localStorage.getItem(key)
|
||||
this.set(key, value)
|
||||
}
|
||||
|
||||
@@ -62,7 +62,7 @@ export function hslToRgb(h, s, l) {
|
||||
* @return Array The HSL representation
|
||||
*/
|
||||
export function rgbToHsl(r, g, b) {
|
||||
;(r /= 255), (g /= 255), (b /= 255)
|
||||
;((r /= 255), (g /= 255), (b /= 255))
|
||||
var max = Math.max(r, g, b),
|
||||
min = Math.min(r, g, b)
|
||||
var h,
|
||||
|
||||
@@ -6,7 +6,7 @@ export const environment = {
|
||||
apiVersion: '10', // match src/paperless/settings.py
|
||||
appTitle: 'Paperless-ngx',
|
||||
tag: 'prod',
|
||||
version: '2.20.10',
|
||||
version: '2.20.13',
|
||||
webSocketHost: window.location.host,
|
||||
webSocketProtocol: window.location.protocol == 'https:' ? 'wss:' : 'ws:',
|
||||
webSocketBaseUrl: base_url.pathname + 'ws/',
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
export const environment = {
|
||||
production: false,
|
||||
apiBaseUrl: 'http://localhost:8000/api/',
|
||||
apiVersion: '9',
|
||||
apiVersion: '10',
|
||||
appTitle: 'Paperless-ngx',
|
||||
tag: 'dev',
|
||||
version: 'DEVELOPMENT',
|
||||
|
||||
@@ -154,6 +154,7 @@ import { DirtyDocGuard } from './app/guards/dirty-doc.guard'
|
||||
import { DirtySavedViewGuard } from './app/guards/dirty-saved-view.guard'
|
||||
import { PermissionsGuard } from './app/guards/permissions.guard'
|
||||
import { withApiVersionInterceptor } from './app/interceptors/api-version.interceptor'
|
||||
import { withAuthExpiryInterceptor } from './app/interceptors/auth-expiry.interceptor'
|
||||
import { withCsrfInterceptor } from './app/interceptors/csrf.interceptor'
|
||||
import { DocumentTitlePipe } from './app/pipes/document-title.pipe'
|
||||
import { FilterPipe } from './app/pipes/filter.pipe'
|
||||
@@ -399,7 +400,11 @@ bootstrapApplication(AppComponent, {
|
||||
StoragePathNamePipe,
|
||||
provideHttpClient(
|
||||
withInterceptorsFromDi(),
|
||||
withInterceptors([withCsrfInterceptor, withApiVersionInterceptor]),
|
||||
withInterceptors([
|
||||
withCsrfInterceptor,
|
||||
withApiVersionInterceptor,
|
||||
withAuthExpiryInterceptor,
|
||||
]),
|
||||
withFetch()
|
||||
),
|
||||
provideUiTour({
|
||||
|
||||
@@ -150,6 +150,15 @@ $form-check-radio-checked-bg-image-dark: url("data:image/svg+xml,<svg xmlns='htt
|
||||
background-color: var(--pngx-body-color-accent);
|
||||
}
|
||||
|
||||
.list-group-item-action:not(.active):active {
|
||||
--bs-list-group-action-active-color: var(--bs-body-color);
|
||||
--bs-list-group-action-active-bg: var(--pngx-bg-darker);
|
||||
}
|
||||
|
||||
.form-control:hover::file-selector-button {
|
||||
background-color:var(--pngx-bg-dark) !important
|
||||
}
|
||||
|
||||
.search-container {
|
||||
input, input:focus, i-bs[name="search"] , ::placeholder {
|
||||
color: var(--pngx-primary-text-contrast) !important;
|
||||
|
||||
@@ -576,8 +576,8 @@ def merge(
|
||||
except Exception:
|
||||
restore_archive_serial_numbers(backup)
|
||||
raise
|
||||
else:
|
||||
consume_task.delay()
|
||||
else:
|
||||
consume_task.delay()
|
||||
|
||||
return "OK"
|
||||
|
||||
|
||||
@@ -3,25 +3,20 @@ from django.core.checks import Error
|
||||
from django.core.checks import Warning
|
||||
from django.core.checks import register
|
||||
|
||||
from documents.signals import document_consumer_declaration
|
||||
from documents.templating.utils import convert_format_str_to_template_format
|
||||
from paperless.parsers.registry import get_parser_registry
|
||||
|
||||
|
||||
@register()
|
||||
def parser_check(app_configs, **kwargs):
|
||||
parsers = []
|
||||
for response in document_consumer_declaration.send(None):
|
||||
parsers.append(response[1])
|
||||
|
||||
if len(parsers) == 0:
|
||||
if not get_parser_registry().all_parsers():
|
||||
return [
|
||||
Error(
|
||||
"No parsers found. This is a bug. The consumer won't be "
|
||||
"able to consume any documents without parsers.",
|
||||
),
|
||||
]
|
||||
else:
|
||||
return []
|
||||
return []
|
||||
|
||||
|
||||
@register()
|
||||
|
||||
@@ -9,6 +9,7 @@ from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Callable
|
||||
from collections.abc import Iterator
|
||||
from datetime import datetime
|
||||
|
||||
@@ -191,7 +192,12 @@ class DocumentClassifier:
|
||||
|
||||
target_file_temp.rename(target_file)
|
||||
|
||||
def train(self) -> bool:
|
||||
def train(
|
||||
self,
|
||||
status_callback: Callable[[str], None] | None = None,
|
||||
) -> bool:
|
||||
notify = status_callback if status_callback is not None else lambda _: None
|
||||
|
||||
# Get non-inbox documents
|
||||
docs_queryset = (
|
||||
Document.objects.exclude(
|
||||
@@ -213,6 +219,7 @@ class DocumentClassifier:
|
||||
|
||||
# Step 1: Extract and preprocess training data from the database.
|
||||
logger.debug("Gathering data from database...")
|
||||
notify(f"Gathering data from {docs_queryset.count()} document(s)...")
|
||||
hasher = sha256()
|
||||
for doc in docs_queryset:
|
||||
y = -1
|
||||
@@ -290,6 +297,7 @@ class DocumentClassifier:
|
||||
|
||||
# Step 2: vectorize data
|
||||
logger.debug("Vectorizing data...")
|
||||
notify("Vectorizing document content...")
|
||||
|
||||
def content_generator() -> Iterator[str]:
|
||||
"""
|
||||
@@ -316,6 +324,7 @@ class DocumentClassifier:
|
||||
# Step 3: train the classifiers
|
||||
if num_tags > 0:
|
||||
logger.debug("Training tags classifier...")
|
||||
notify(f"Training tags classifier ({num_tags} tag(s))...")
|
||||
|
||||
if num_tags == 1:
|
||||
# Special case where only one tag has auto:
|
||||
@@ -339,6 +348,9 @@ class DocumentClassifier:
|
||||
|
||||
if num_correspondents > 0:
|
||||
logger.debug("Training correspondent classifier...")
|
||||
notify(
|
||||
f"Training correspondent classifier ({num_correspondents} correspondent(s))...",
|
||||
)
|
||||
self.correspondent_classifier = MLPClassifier(tol=0.01)
|
||||
self.correspondent_classifier.fit(data_vectorized, labels_correspondent)
|
||||
else:
|
||||
@@ -349,6 +361,9 @@ class DocumentClassifier:
|
||||
|
||||
if num_document_types > 0:
|
||||
logger.debug("Training document type classifier...")
|
||||
notify(
|
||||
f"Training document type classifier ({num_document_types} type(s))...",
|
||||
)
|
||||
self.document_type_classifier = MLPClassifier(tol=0.01)
|
||||
self.document_type_classifier.fit(data_vectorized, labels_document_type)
|
||||
else:
|
||||
@@ -361,6 +376,7 @@ class DocumentClassifier:
|
||||
logger.debug(
|
||||
"Training storage paths classifier...",
|
||||
)
|
||||
notify(f"Training storage path classifier ({num_storage_paths} path(s))...")
|
||||
self.storage_path_classifier = MLPClassifier(tol=0.01)
|
||||
self.storage_path_classifier.fit(
|
||||
data_vectorized,
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import datetime
|
||||
import hashlib
|
||||
import os
|
||||
import shutil
|
||||
import tempfile
|
||||
from enum import StrEnum
|
||||
from pathlib import Path
|
||||
@@ -32,9 +32,7 @@ from documents.models import DocumentType
|
||||
from documents.models import StoragePath
|
||||
from documents.models import Tag
|
||||
from documents.models import WorkflowTrigger
|
||||
from documents.parsers import DocumentParser
|
||||
from documents.parsers import ParseError
|
||||
from documents.parsers import get_parser_class_for_mime_type
|
||||
from documents.permissions import set_permissions_for_object
|
||||
from documents.plugins.base import AlwaysRunPluginMixin
|
||||
from documents.plugins.base import ConsumeTaskPlugin
|
||||
@@ -48,10 +46,13 @@ from documents.signals import document_consumption_started
|
||||
from documents.signals import document_updated
|
||||
from documents.signals.handlers import run_workflows
|
||||
from documents.templating.workflows import parse_w_workflow_placeholders
|
||||
from documents.utils import compute_checksum
|
||||
from documents.utils import copy_basic_file_stats
|
||||
from documents.utils import copy_file_with_basic_stats
|
||||
from documents.utils import run_subprocess
|
||||
from paperless_mail.parsers import MailDocumentParser
|
||||
from paperless.parsers import ParserContext
|
||||
from paperless.parsers import ParserProtocol
|
||||
from paperless.parsers.registry import get_parser_registry
|
||||
|
||||
LOGGING_NAME: Final[str] = "paperless.consumer"
|
||||
|
||||
@@ -196,9 +197,7 @@ class ConsumerPlugin(
|
||||
version_doc = Document(
|
||||
root_document=root_doc_frozen,
|
||||
version_index=next_version_index + 1,
|
||||
checksum=hashlib.md5(
|
||||
file_for_checksum.read_bytes(),
|
||||
).hexdigest(),
|
||||
checksum=compute_checksum(file_for_checksum),
|
||||
content=text or "",
|
||||
page_count=page_count,
|
||||
mime_type=mime_type,
|
||||
@@ -338,18 +337,15 @@ class ConsumerPlugin(
|
||||
Return the document object if it was successfully created.
|
||||
"""
|
||||
|
||||
tempdir = None
|
||||
# Preflight has already run including progress update to 0%
|
||||
self.log.info(f"Consuming {self.filename}")
|
||||
|
||||
try:
|
||||
# Preflight has already run including progress update to 0%
|
||||
self.log.info(f"Consuming {self.filename}")
|
||||
|
||||
# For the actual work, copy the file into a tempdir
|
||||
tempdir = tempfile.TemporaryDirectory(
|
||||
prefix="paperless-ngx",
|
||||
dir=settings.SCRATCH_DIR,
|
||||
)
|
||||
self.working_copy = Path(tempdir.name) / Path(self.filename)
|
||||
# For the actual work, copy the file into a tempdir
|
||||
with tempfile.TemporaryDirectory(
|
||||
prefix="paperless-ngx",
|
||||
dir=settings.SCRATCH_DIR,
|
||||
) as tmpdir:
|
||||
self.working_copy = Path(tmpdir) / Path(self.filename)
|
||||
copy_file_with_basic_stats(self.input_doc.original_file, self.working_copy)
|
||||
self.unmodified_original = None
|
||||
|
||||
@@ -381,7 +377,7 @@ class ConsumerPlugin(
|
||||
self.log.debug(f"Detected mime type after qpdf: {mime_type}")
|
||||
# Save the original file for later
|
||||
self.unmodified_original = (
|
||||
Path(tempdir.name) / Path("uo") / Path(self.filename)
|
||||
Path(tmpdir) / Path("uo") / Path(self.filename)
|
||||
)
|
||||
self.unmodified_original.parent.mkdir(exist_ok=True)
|
||||
copy_file_with_basic_stats(
|
||||
@@ -392,11 +388,14 @@ class ConsumerPlugin(
|
||||
self.log.error(f"Error attempting to clean PDF: {e}")
|
||||
|
||||
# Based on the mime type, get the parser for that type
|
||||
parser_class: type[DocumentParser] | None = get_parser_class_for_mime_type(
|
||||
mime_type,
|
||||
parser_class: type[ParserProtocol] | None = (
|
||||
get_parser_registry().get_parser_for_file(
|
||||
mime_type,
|
||||
self.filename,
|
||||
self.working_copy,
|
||||
)
|
||||
)
|
||||
if not parser_class:
|
||||
tempdir.cleanup()
|
||||
self._fail(
|
||||
ConsumerStatusShortMessage.UNSUPPORTED_TYPE,
|
||||
f"Unsupported mime type {mime_type}",
|
||||
@@ -411,300 +410,275 @@ class ConsumerPlugin(
|
||||
)
|
||||
|
||||
self.run_pre_consume_script()
|
||||
except:
|
||||
if tempdir:
|
||||
tempdir.cleanup()
|
||||
raise
|
||||
|
||||
def progress_callback(
|
||||
current_progress,
|
||||
max_progress,
|
||||
) -> None: # pragma: no cover
|
||||
# recalculate progress to be within 20 and 80
|
||||
p = int((current_progress / max_progress) * 50 + 20)
|
||||
self._send_progress(p, 100, ProgressStatusOptions.WORKING)
|
||||
|
||||
# This doesn't parse the document yet, but gives us a parser.
|
||||
|
||||
document_parser: DocumentParser = parser_class(
|
||||
self.logging_group,
|
||||
progress_callback=progress_callback,
|
||||
)
|
||||
|
||||
self.log.debug(f"Parser: {type(document_parser).__name__}")
|
||||
|
||||
# Parse the document. This may take some time.
|
||||
|
||||
text = None
|
||||
date = None
|
||||
thumbnail = None
|
||||
archive_path = None
|
||||
page_count = None
|
||||
|
||||
try:
|
||||
self._send_progress(
|
||||
20,
|
||||
100,
|
||||
ProgressStatusOptions.WORKING,
|
||||
ConsumerStatusShortMessage.PARSING_DOCUMENT,
|
||||
)
|
||||
self.log.debug(f"Parsing {self.filename}...")
|
||||
if (
|
||||
isinstance(document_parser, MailDocumentParser)
|
||||
and self.input_doc.mailrule_id
|
||||
):
|
||||
document_parser.parse(
|
||||
self.working_copy,
|
||||
mime_type,
|
||||
self.filename,
|
||||
self.input_doc.mailrule_id,
|
||||
# This doesn't parse the document yet, but gives us a parser.
|
||||
with parser_class() as document_parser:
|
||||
document_parser.configure(
|
||||
ParserContext(mailrule_id=self.input_doc.mailrule_id),
|
||||
)
|
||||
else:
|
||||
document_parser.parse(self.working_copy, mime_type, self.filename)
|
||||
|
||||
self.log.debug(f"Generating thumbnail for {self.filename}...")
|
||||
self._send_progress(
|
||||
70,
|
||||
100,
|
||||
ProgressStatusOptions.WORKING,
|
||||
ConsumerStatusShortMessage.GENERATING_THUMBNAIL,
|
||||
)
|
||||
thumbnail = document_parser.get_thumbnail(
|
||||
self.working_copy,
|
||||
mime_type,
|
||||
self.filename,
|
||||
)
|
||||
self.log.debug(
|
||||
f"Parser: {document_parser.name} v{document_parser.version}",
|
||||
)
|
||||
|
||||
# Parse the document. This may take some time.
|
||||
|
||||
text = None
|
||||
date = None
|
||||
thumbnail = None
|
||||
archive_path = None
|
||||
page_count = None
|
||||
|
||||
try:
|
||||
self._send_progress(
|
||||
20,
|
||||
100,
|
||||
ProgressStatusOptions.WORKING,
|
||||
ConsumerStatusShortMessage.PARSING_DOCUMENT,
|
||||
)
|
||||
self.log.debug(f"Parsing {self.filename}...")
|
||||
|
||||
document_parser.parse(self.working_copy, mime_type)
|
||||
|
||||
self.log.debug(f"Generating thumbnail for {self.filename}...")
|
||||
self._send_progress(
|
||||
70,
|
||||
100,
|
||||
ProgressStatusOptions.WORKING,
|
||||
ConsumerStatusShortMessage.GENERATING_THUMBNAIL,
|
||||
)
|
||||
thumbnail = document_parser.get_thumbnail(
|
||||
self.working_copy,
|
||||
mime_type,
|
||||
)
|
||||
|
||||
text = document_parser.get_text()
|
||||
date = document_parser.get_date()
|
||||
if date is None:
|
||||
self._send_progress(
|
||||
90,
|
||||
100,
|
||||
ProgressStatusOptions.WORKING,
|
||||
ConsumerStatusShortMessage.PARSE_DATE,
|
||||
)
|
||||
with get_date_parser() as date_parser:
|
||||
date = next(date_parser.parse(self.filename, text), None)
|
||||
archive_path = document_parser.get_archive_path()
|
||||
page_count = document_parser.get_page_count(
|
||||
self.working_copy,
|
||||
mime_type,
|
||||
)
|
||||
|
||||
except ParseError as e:
|
||||
self._fail(
|
||||
str(e),
|
||||
f"Error occurred while consuming document {self.filename}: {e}",
|
||||
exc_info=True,
|
||||
exception=e,
|
||||
)
|
||||
except Exception as e:
|
||||
self._fail(
|
||||
str(e),
|
||||
f"Unexpected error while consuming document {self.filename}: {e}",
|
||||
exc_info=True,
|
||||
exception=e,
|
||||
)
|
||||
|
||||
# Prepare the document classifier.
|
||||
|
||||
# TODO: I don't really like to do this here, but this way we avoid
|
||||
# reloading the classifier multiple times, since there are multiple
|
||||
# post-consume hooks that all require the classifier.
|
||||
|
||||
classifier = load_classifier()
|
||||
|
||||
text = document_parser.get_text()
|
||||
date = document_parser.get_date()
|
||||
if date is None:
|
||||
self._send_progress(
|
||||
90,
|
||||
95,
|
||||
100,
|
||||
ProgressStatusOptions.WORKING,
|
||||
ConsumerStatusShortMessage.PARSE_DATE,
|
||||
ConsumerStatusShortMessage.SAVE_DOCUMENT,
|
||||
)
|
||||
with get_date_parser() as date_parser:
|
||||
date = next(date_parser.parse(self.filename, text), None)
|
||||
archive_path = document_parser.get_archive_path()
|
||||
page_count = document_parser.get_page_count(self.working_copy, mime_type)
|
||||
|
||||
except ParseError as e:
|
||||
document_parser.cleanup()
|
||||
if tempdir:
|
||||
tempdir.cleanup()
|
||||
self._fail(
|
||||
str(e),
|
||||
f"Error occurred while consuming document {self.filename}: {e}",
|
||||
exc_info=True,
|
||||
exception=e,
|
||||
)
|
||||
except Exception as e:
|
||||
document_parser.cleanup()
|
||||
if tempdir:
|
||||
tempdir.cleanup()
|
||||
self._fail(
|
||||
str(e),
|
||||
f"Unexpected error while consuming document {self.filename}: {e}",
|
||||
exc_info=True,
|
||||
exception=e,
|
||||
)
|
||||
|
||||
# Prepare the document classifier.
|
||||
|
||||
# TODO: I don't really like to do this here, but this way we avoid
|
||||
# reloading the classifier multiple times, since there are multiple
|
||||
# post-consume hooks that all require the classifier.
|
||||
|
||||
classifier = load_classifier()
|
||||
|
||||
self._send_progress(
|
||||
95,
|
||||
100,
|
||||
ProgressStatusOptions.WORKING,
|
||||
ConsumerStatusShortMessage.SAVE_DOCUMENT,
|
||||
)
|
||||
# now that everything is done, we can start to store the document
|
||||
# in the system. This will be a transaction and reasonably fast.
|
||||
try:
|
||||
with transaction.atomic():
|
||||
# store the document.
|
||||
if self.input_doc.root_document_id:
|
||||
# If this is a new version of an existing document, we need
|
||||
# to make sure we're not creating a new document, but updating
|
||||
# the existing one.
|
||||
root_doc = Document.objects.get(
|
||||
pk=self.input_doc.root_document_id,
|
||||
)
|
||||
original_document = self._create_version_from_root(
|
||||
root_doc,
|
||||
text=text,
|
||||
page_count=page_count,
|
||||
mime_type=mime_type,
|
||||
)
|
||||
actor = None
|
||||
|
||||
# Save the new version, potentially creating an audit log entry for the version addition if enabled.
|
||||
if (
|
||||
settings.AUDIT_LOG_ENABLED
|
||||
and self.metadata.actor_id is not None
|
||||
):
|
||||
actor = User.objects.filter(pk=self.metadata.actor_id).first()
|
||||
if actor is not None:
|
||||
from auditlog.context import ( # type: ignore[import-untyped]
|
||||
set_actor,
|
||||
# now that everything is done, we can start to store the document
|
||||
# in the system. This will be a transaction and reasonably fast.
|
||||
try:
|
||||
with transaction.atomic():
|
||||
# store the document.
|
||||
if self.input_doc.root_document_id:
|
||||
# If this is a new version of an existing document, we need
|
||||
# to make sure we're not creating a new document, but updating
|
||||
# the existing one.
|
||||
root_doc = Document.objects.get(
|
||||
pk=self.input_doc.root_document_id,
|
||||
)
|
||||
original_document = self._create_version_from_root(
|
||||
root_doc,
|
||||
text=text,
|
||||
page_count=page_count,
|
||||
mime_type=mime_type,
|
||||
)
|
||||
actor = None
|
||||
|
||||
with set_actor(actor):
|
||||
# Save the new version, potentially creating an audit log entry for the version addition if enabled.
|
||||
if (
|
||||
settings.AUDIT_LOG_ENABLED
|
||||
and self.metadata.actor_id is not None
|
||||
):
|
||||
actor = User.objects.filter(
|
||||
pk=self.metadata.actor_id,
|
||||
).first()
|
||||
if actor is not None:
|
||||
from auditlog.context import ( # type: ignore[import-untyped]
|
||||
set_actor,
|
||||
)
|
||||
|
||||
with set_actor(actor):
|
||||
original_document.save()
|
||||
else:
|
||||
original_document.save()
|
||||
else:
|
||||
original_document.save()
|
||||
|
||||
# Create a log entry for the version addition, if enabled
|
||||
if settings.AUDIT_LOG_ENABLED:
|
||||
from auditlog.models import ( # type: ignore[import-untyped]
|
||||
LogEntry,
|
||||
)
|
||||
|
||||
LogEntry.objects.log_create(
|
||||
instance=root_doc,
|
||||
changes={
|
||||
"Version Added": ["None", original_document.id],
|
||||
},
|
||||
action=LogEntry.Action.UPDATE,
|
||||
actor=actor,
|
||||
additional_data={
|
||||
"reason": "Version added",
|
||||
"version_id": original_document.id,
|
||||
},
|
||||
)
|
||||
document = original_document
|
||||
else:
|
||||
original_document.save()
|
||||
else:
|
||||
original_document.save()
|
||||
|
||||
# Create a log entry for the version addition, if enabled
|
||||
if settings.AUDIT_LOG_ENABLED:
|
||||
from auditlog.models import ( # type: ignore[import-untyped]
|
||||
LogEntry,
|
||||
)
|
||||
|
||||
LogEntry.objects.log_create(
|
||||
instance=root_doc,
|
||||
changes={
|
||||
"Version Added": ["None", original_document.id],
|
||||
},
|
||||
action=LogEntry.Action.UPDATE,
|
||||
actor=actor,
|
||||
additional_data={
|
||||
"reason": "Version added",
|
||||
"version_id": original_document.id,
|
||||
},
|
||||
)
|
||||
document = original_document
|
||||
else:
|
||||
document = self._store(
|
||||
text=text,
|
||||
date=date,
|
||||
page_count=page_count,
|
||||
mime_type=mime_type,
|
||||
)
|
||||
|
||||
# If we get here, it was successful. Proceed with post-consume
|
||||
# hooks. If they fail, nothing will get changed.
|
||||
|
||||
document_consumption_finished.send(
|
||||
sender=self.__class__,
|
||||
document=document,
|
||||
logging_group=self.logging_group,
|
||||
classifier=classifier,
|
||||
original_file=self.unmodified_original
|
||||
if self.unmodified_original
|
||||
else self.working_copy,
|
||||
)
|
||||
|
||||
# After everything is in the database, copy the files into
|
||||
# place. If this fails, we'll also rollback the transaction.
|
||||
with FileLock(settings.MEDIA_LOCK):
|
||||
generated_filename = generate_unique_filename(document)
|
||||
if (
|
||||
len(str(generated_filename))
|
||||
> Document.MAX_STORED_FILENAME_LENGTH
|
||||
):
|
||||
self.log.warning(
|
||||
"Generated source filename exceeds db path limit, falling back to default naming",
|
||||
)
|
||||
generated_filename = generate_filename(
|
||||
document,
|
||||
use_format=False,
|
||||
)
|
||||
document.filename = generated_filename
|
||||
create_source_path_directory(document.source_path)
|
||||
|
||||
self._write(
|
||||
self.unmodified_original
|
||||
if self.unmodified_original is not None
|
||||
else self.working_copy,
|
||||
document.source_path,
|
||||
)
|
||||
|
||||
self._write(
|
||||
thumbnail,
|
||||
document.thumbnail_path,
|
||||
)
|
||||
|
||||
if archive_path and Path(archive_path).is_file():
|
||||
generated_archive_filename = generate_unique_filename(
|
||||
document,
|
||||
archive_filename=True,
|
||||
)
|
||||
if (
|
||||
len(str(generated_archive_filename))
|
||||
> Document.MAX_STORED_FILENAME_LENGTH
|
||||
):
|
||||
self.log.warning(
|
||||
"Generated archive filename exceeds db path limit, falling back to default naming",
|
||||
document = self._store(
|
||||
text=text,
|
||||
date=date,
|
||||
page_count=page_count,
|
||||
mime_type=mime_type,
|
||||
)
|
||||
generated_archive_filename = generate_filename(
|
||||
document,
|
||||
archive_filename=True,
|
||||
use_format=False,
|
||||
)
|
||||
document.archive_filename = generated_archive_filename
|
||||
create_source_path_directory(document.archive_path)
|
||||
self._write(
|
||||
archive_path,
|
||||
document.archive_path,
|
||||
|
||||
# If we get here, it was successful. Proceed with post-consume
|
||||
# hooks. If they fail, nothing will get changed.
|
||||
|
||||
document_consumption_finished.send(
|
||||
sender=self.__class__,
|
||||
document=document,
|
||||
logging_group=self.logging_group,
|
||||
classifier=classifier,
|
||||
original_file=self.unmodified_original
|
||||
if self.unmodified_original
|
||||
else self.working_copy,
|
||||
)
|
||||
|
||||
with Path(archive_path).open("rb") as f:
|
||||
document.archive_checksum = hashlib.md5(
|
||||
f.read(),
|
||||
).hexdigest()
|
||||
# After everything is in the database, copy the files into
|
||||
# place. If this fails, we'll also rollback the transaction.
|
||||
with FileLock(settings.MEDIA_LOCK):
|
||||
generated_filename = generate_unique_filename(document)
|
||||
if (
|
||||
len(str(generated_filename))
|
||||
> Document.MAX_STORED_FILENAME_LENGTH
|
||||
):
|
||||
self.log.warning(
|
||||
"Generated source filename exceeds db path limit, falling back to default naming",
|
||||
)
|
||||
generated_filename = generate_filename(
|
||||
document,
|
||||
use_format=False,
|
||||
)
|
||||
document.filename = generated_filename
|
||||
create_source_path_directory(document.source_path)
|
||||
|
||||
# Don't save with the lock active. Saving will cause the file
|
||||
# renaming logic to acquire the lock as well.
|
||||
# This triggers things like file renaming
|
||||
document.save()
|
||||
self._write(
|
||||
self.unmodified_original
|
||||
if self.unmodified_original is not None
|
||||
else self.working_copy,
|
||||
document.source_path,
|
||||
)
|
||||
|
||||
if document.root_document_id:
|
||||
document_updated.send(
|
||||
sender=self.__class__,
|
||||
document=document.root_document,
|
||||
self._write(
|
||||
thumbnail,
|
||||
document.thumbnail_path,
|
||||
)
|
||||
|
||||
if archive_path and Path(archive_path).is_file():
|
||||
generated_archive_filename = generate_unique_filename(
|
||||
document,
|
||||
archive_filename=True,
|
||||
)
|
||||
if (
|
||||
len(str(generated_archive_filename))
|
||||
> Document.MAX_STORED_FILENAME_LENGTH
|
||||
):
|
||||
self.log.warning(
|
||||
"Generated archive filename exceeds db path limit, falling back to default naming",
|
||||
)
|
||||
generated_archive_filename = generate_filename(
|
||||
document,
|
||||
archive_filename=True,
|
||||
use_format=False,
|
||||
)
|
||||
document.archive_filename = generated_archive_filename
|
||||
create_source_path_directory(document.archive_path)
|
||||
self._write(
|
||||
archive_path,
|
||||
document.archive_path,
|
||||
)
|
||||
|
||||
document.archive_checksum = compute_checksum(
|
||||
document.archive_path,
|
||||
)
|
||||
|
||||
# Don't save with the lock active. Saving will cause the file
|
||||
# renaming logic to acquire the lock as well.
|
||||
# This triggers things like file renaming
|
||||
document.save()
|
||||
|
||||
if document.root_document_id:
|
||||
document_updated.send(
|
||||
sender=self.__class__,
|
||||
document=document.root_document,
|
||||
)
|
||||
|
||||
# Delete the file only if it was successfully consumed
|
||||
self.log.debug(
|
||||
f"Deleting original file {self.input_doc.original_file}",
|
||||
)
|
||||
self.input_doc.original_file.unlink()
|
||||
self.log.debug(f"Deleting working copy {self.working_copy}")
|
||||
self.working_copy.unlink()
|
||||
if self.unmodified_original is not None: # pragma: no cover
|
||||
self.log.debug(
|
||||
f"Deleting unmodified original file {self.unmodified_original}",
|
||||
)
|
||||
self.unmodified_original.unlink()
|
||||
|
||||
# https://github.com/jonaswinkler/paperless-ng/discussions/1037
|
||||
shadow_file = (
|
||||
Path(self.input_doc.original_file).parent
|
||||
/ f"._{Path(self.input_doc.original_file).name}"
|
||||
)
|
||||
|
||||
if Path(shadow_file).is_file():
|
||||
self.log.debug(f"Deleting shadow file {shadow_file}")
|
||||
Path(shadow_file).unlink()
|
||||
|
||||
except Exception as e:
|
||||
self._fail(
|
||||
str(e),
|
||||
f"The following error occurred while storing document "
|
||||
f"{self.filename} after parsing: {e}",
|
||||
exc_info=True,
|
||||
exception=e,
|
||||
)
|
||||
|
||||
# Delete the file only if it was successfully consumed
|
||||
self.log.debug(f"Deleting original file {self.input_doc.original_file}")
|
||||
self.input_doc.original_file.unlink()
|
||||
self.log.debug(f"Deleting working copy {self.working_copy}")
|
||||
self.working_copy.unlink()
|
||||
if self.unmodified_original is not None: # pragma: no cover
|
||||
self.log.debug(
|
||||
f"Deleting unmodified original file {self.unmodified_original}",
|
||||
)
|
||||
self.unmodified_original.unlink()
|
||||
|
||||
# https://github.com/jonaswinkler/paperless-ng/discussions/1037
|
||||
shadow_file = (
|
||||
Path(self.input_doc.original_file).parent
|
||||
/ f"._{Path(self.input_doc.original_file).name}"
|
||||
)
|
||||
|
||||
if Path(shadow_file).is_file():
|
||||
self.log.debug(f"Deleting shadow file {shadow_file}")
|
||||
Path(shadow_file).unlink()
|
||||
|
||||
except Exception as e:
|
||||
self._fail(
|
||||
str(e),
|
||||
f"The following error occurred while storing document "
|
||||
f"{self.filename} after parsing: {e}",
|
||||
exc_info=True,
|
||||
exception=e,
|
||||
)
|
||||
finally:
|
||||
document_parser.cleanup()
|
||||
tempdir.cleanup()
|
||||
|
||||
self.run_post_consume_script(document)
|
||||
|
||||
self.log.info(f"Document {document} consumption finished")
|
||||
@@ -800,7 +774,7 @@ class ConsumerPlugin(
|
||||
title=title[:127],
|
||||
content=text,
|
||||
mime_type=mime_type,
|
||||
checksum=hashlib.md5(file_for_checksum.read_bytes()).hexdigest(),
|
||||
checksum=compute_checksum(file_for_checksum),
|
||||
created=create_date,
|
||||
modified=create_date,
|
||||
page_count=page_count,
|
||||
@@ -848,7 +822,7 @@ class ConsumerPlugin(
|
||||
self.metadata.view_users is not None
|
||||
or self.metadata.view_groups is not None
|
||||
or self.metadata.change_users is not None
|
||||
or self.metadata.change_users is not None
|
||||
or self.metadata.change_groups is not None
|
||||
):
|
||||
permissions = {
|
||||
"view": {
|
||||
@@ -881,7 +855,7 @@ class ConsumerPlugin(
|
||||
Path(source).open("rb") as read_file,
|
||||
Path(target).open("wb") as write_file,
|
||||
):
|
||||
write_file.write(read_file.read())
|
||||
shutil.copyfileobj(read_file, write_file)
|
||||
|
||||
# Attempt to copy file's original stats, but it's ok if we can't
|
||||
try:
|
||||
@@ -917,10 +891,9 @@ class ConsumerPreflightPlugin(
|
||||
|
||||
def pre_check_duplicate(self) -> None:
|
||||
"""
|
||||
Using the MD5 of the file, check this exact file doesn't already exist
|
||||
Using the SHA256 of the file, check this exact file doesn't already exist
|
||||
"""
|
||||
with Path(self.input_doc.original_file).open("rb") as f:
|
||||
checksum = hashlib.md5(f.read()).hexdigest()
|
||||
checksum = compute_checksum(Path(self.input_doc.original_file))
|
||||
existing_doc = Document.global_objects.filter(
|
||||
Q(checksum=checksum) | Q(archive_checksum=checksum),
|
||||
)
|
||||
|
||||
@@ -477,7 +477,14 @@ class DelayedFullTextQuery(DelayedQuery):
|
||||
try:
|
||||
corrected = self.searcher.correct_query(q, q_str)
|
||||
if corrected.string != q_str:
|
||||
suggested_correction = corrected.string
|
||||
corrected_results = self.searcher.search(
|
||||
corrected.query,
|
||||
limit=1,
|
||||
filter=MappedDocIdSet(self.filter_queryset, self.searcher.ixreader),
|
||||
scored=False,
|
||||
)
|
||||
if len(corrected_results) > 0:
|
||||
suggested_correction = corrected.string
|
||||
except Exception as e:
|
||||
logger.info(
|
||||
"Error while correcting query %s: %s",
|
||||
|
||||
@@ -1,13 +1,32 @@
|
||||
from django.core.management.base import BaseCommand
|
||||
from __future__ import annotations
|
||||
|
||||
import time
|
||||
|
||||
from documents.management.commands.base import PaperlessCommand
|
||||
from documents.tasks import train_classifier
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
class Command(PaperlessCommand):
|
||||
help = (
|
||||
"Trains the classifier on your data and saves the resulting models to a "
|
||||
"file. The document consumer will then automatically use this new model."
|
||||
)
|
||||
supports_progress_bar = False
|
||||
supports_multiprocessing = False
|
||||
|
||||
def handle(self, *args, **options):
|
||||
train_classifier(scheduled=False)
|
||||
def handle(self, *args, **options) -> None:
|
||||
start = time.monotonic()
|
||||
|
||||
with (
|
||||
self.buffered_logging("paperless.tasks"),
|
||||
self.buffered_logging("paperless.classifier"),
|
||||
):
|
||||
train_classifier(
|
||||
scheduled=False,
|
||||
status_callback=lambda msg: self.console.print(f" {msg}"),
|
||||
)
|
||||
|
||||
elapsed = time.monotonic() - start
|
||||
self.console.print(
|
||||
f"[green]✓[/green] Classifier training complete ({elapsed:.1f}s)",
|
||||
)
|
||||
|
||||
@@ -56,6 +56,7 @@ from documents.models import WorkflowTrigger
|
||||
from documents.settings import EXPORTER_ARCHIVE_NAME
|
||||
from documents.settings import EXPORTER_FILE_NAME
|
||||
from documents.settings import EXPORTER_THUMBNAIL_NAME
|
||||
from documents.utils import compute_checksum
|
||||
from documents.utils import copy_file_with_basic_stats
|
||||
from paperless import version
|
||||
from paperless.models import ApplicationConfiguration
|
||||
@@ -693,7 +694,7 @@ class Command(CryptMixin, PaperlessCommand):
|
||||
source_stat = source.stat()
|
||||
target_stat = target.stat()
|
||||
if self.compare_checksums and source_checksum:
|
||||
target_checksum = hashlib.md5(target.read_bytes()).hexdigest()
|
||||
target_checksum = compute_checksum(target)
|
||||
perform_copy = target_checksum != source_checksum
|
||||
elif (
|
||||
source_stat.st_mtime != target_stat.st_mtime
|
||||
|
||||
@@ -8,6 +8,7 @@ from pathlib import Path
|
||||
from zipfile import ZipFile
|
||||
from zipfile import is_zipfile
|
||||
|
||||
import ijson
|
||||
from django.conf import settings
|
||||
from django.contrib.auth.models import Permission
|
||||
from django.contrib.auth.models import User
|
||||
@@ -32,7 +33,6 @@ from documents.models import Document
|
||||
from documents.models import DocumentType
|
||||
from documents.models import Note
|
||||
from documents.models import Tag
|
||||
from documents.parsers import run_convert
|
||||
from documents.settings import EXPORTER_ARCHIVE_NAME
|
||||
from documents.settings import EXPORTER_CRYPTO_SETTINGS_NAME
|
||||
from documents.settings import EXPORTER_FILE_NAME
|
||||
@@ -46,6 +46,15 @@ if settings.AUDIT_LOG_ENABLED:
|
||||
from auditlog.registry import auditlog
|
||||
|
||||
|
||||
def iter_manifest_records(path: Path) -> Generator[dict, None, None]:
|
||||
"""Yield records one at a time from a manifest JSON array via ijson."""
|
||||
try:
|
||||
with path.open("rb") as f:
|
||||
yield from ijson.items(f, "item")
|
||||
except ijson.JSONError as e:
|
||||
raise CommandError(f"Failed to parse manifest file {path}: {e}") from e
|
||||
|
||||
|
||||
@contextmanager
|
||||
def disable_signal(sig, receiver, sender, *, weak: bool | None = None) -> Generator:
|
||||
try:
|
||||
@@ -143,14 +152,9 @@ class Command(CryptMixin, PaperlessCommand):
|
||||
Loads manifest data from the various JSON files for parsing and loading the database
|
||||
"""
|
||||
main_manifest_path: Path = self.source / "manifest.json"
|
||||
|
||||
with main_manifest_path.open() as infile:
|
||||
self.manifest = json.load(infile)
|
||||
self.manifest_paths.append(main_manifest_path)
|
||||
|
||||
for file in Path(self.source).glob("**/*-manifest.json"):
|
||||
with file.open() as infile:
|
||||
self.manifest += json.load(infile)
|
||||
self.manifest_paths.append(file)
|
||||
|
||||
def load_metadata(self) -> None:
|
||||
@@ -201,7 +205,7 @@ class Command(CryptMixin, PaperlessCommand):
|
||||
ContentType.objects.all().delete()
|
||||
Permission.objects.all().delete()
|
||||
for manifest_path in self.manifest_paths:
|
||||
call_command("loaddata", manifest_path)
|
||||
call_command("loaddata", manifest_path, skip_checks=True)
|
||||
except (FieldDoesNotExist, DeserializationError, IntegrityError) as e:
|
||||
self.stdout.write(self.style.ERROR("Database import failed"))
|
||||
if (
|
||||
@@ -231,7 +235,6 @@ class Command(CryptMixin, PaperlessCommand):
|
||||
self.version: str | None = None
|
||||
self.salt: str | None = None
|
||||
self.manifest_paths = []
|
||||
self.manifest = []
|
||||
|
||||
# Create a temporary directory for extracting a zip file into it, even if supplied source is no zip file to keep code cleaner.
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
@@ -291,6 +294,9 @@ class Command(CryptMixin, PaperlessCommand):
|
||||
else:
|
||||
self.stdout.write(self.style.NOTICE("Data only import completed"))
|
||||
|
||||
for tmp in getattr(self, "_decrypted_tmp_paths", []):
|
||||
tmp.unlink(missing_ok=True)
|
||||
|
||||
self.stdout.write("Updating search index...")
|
||||
call_command(
|
||||
"document_index",
|
||||
@@ -343,11 +349,12 @@ class Command(CryptMixin, PaperlessCommand):
|
||||
) from e
|
||||
|
||||
self.stdout.write("Checking the manifest")
|
||||
for record in self.manifest:
|
||||
# Only check if the document files exist if this is not data only
|
||||
# We don't care about documents for a data only import
|
||||
if not self.data_only and record["model"] == "documents.document":
|
||||
check_document_validity(record)
|
||||
for manifest_path in self.manifest_paths:
|
||||
for record in iter_manifest_records(manifest_path):
|
||||
# Only check if the document files exist if this is not data only
|
||||
# We don't care about documents for a data only import
|
||||
if not self.data_only and record["model"] == "documents.document":
|
||||
check_document_validity(record)
|
||||
|
||||
def _import_files_from_manifest(self) -> None:
|
||||
settings.ORIGINALS_DIR.mkdir(parents=True, exist_ok=True)
|
||||
@@ -356,23 +363,31 @@ class Command(CryptMixin, PaperlessCommand):
|
||||
|
||||
self.stdout.write("Copy files into paperless...")
|
||||
|
||||
manifest_documents = list(
|
||||
filter(lambda r: r["model"] == "documents.document", self.manifest),
|
||||
)
|
||||
document_records = [
|
||||
{
|
||||
"pk": record["pk"],
|
||||
EXPORTER_FILE_NAME: record[EXPORTER_FILE_NAME],
|
||||
EXPORTER_THUMBNAIL_NAME: record.get(EXPORTER_THUMBNAIL_NAME),
|
||||
EXPORTER_ARCHIVE_NAME: record.get(EXPORTER_ARCHIVE_NAME),
|
||||
}
|
||||
for manifest_path in self.manifest_paths
|
||||
for record in iter_manifest_records(manifest_path)
|
||||
if record["model"] == "documents.document"
|
||||
]
|
||||
|
||||
for record in self.track(manifest_documents, description="Copying files..."):
|
||||
for record in self.track(document_records, description="Copying files..."):
|
||||
document = Document.objects.get(pk=record["pk"])
|
||||
|
||||
doc_file = record[EXPORTER_FILE_NAME]
|
||||
document_path = self.source / doc_file
|
||||
|
||||
if EXPORTER_THUMBNAIL_NAME in record:
|
||||
if record[EXPORTER_THUMBNAIL_NAME]:
|
||||
thumb_file = record[EXPORTER_THUMBNAIL_NAME]
|
||||
thumbnail_path = (self.source / thumb_file).resolve()
|
||||
else:
|
||||
thumbnail_path = None
|
||||
|
||||
if EXPORTER_ARCHIVE_NAME in record:
|
||||
if record[EXPORTER_ARCHIVE_NAME]:
|
||||
archive_file = record[EXPORTER_ARCHIVE_NAME]
|
||||
archive_path = self.source / archive_file
|
||||
else:
|
||||
@@ -387,22 +402,10 @@ class Command(CryptMixin, PaperlessCommand):
|
||||
copy_file_with_basic_stats(document_path, document.source_path)
|
||||
|
||||
if thumbnail_path:
|
||||
if thumbnail_path.suffix in {".png", ".PNG"}:
|
||||
run_convert(
|
||||
density=300,
|
||||
scale="500x5000>",
|
||||
alpha="remove",
|
||||
strip=True,
|
||||
trim=False,
|
||||
auto_orient=True,
|
||||
input_file=f"{thumbnail_path}[0]",
|
||||
output_file=str(document.thumbnail_path),
|
||||
)
|
||||
else:
|
||||
copy_file_with_basic_stats(
|
||||
thumbnail_path,
|
||||
document.thumbnail_path,
|
||||
)
|
||||
copy_file_with_basic_stats(
|
||||
thumbnail_path,
|
||||
document.thumbnail_path,
|
||||
)
|
||||
|
||||
if archive_path:
|
||||
create_source_path_directory(document.archive_path)
|
||||
@@ -413,33 +416,43 @@ class Command(CryptMixin, PaperlessCommand):
|
||||
|
||||
document.save()
|
||||
|
||||
def _decrypt_record_if_needed(self, record: dict) -> dict:
|
||||
fields = self.CRYPT_FIELDS_BY_MODEL.get(record.get("model", ""))
|
||||
if fields:
|
||||
for field in fields:
|
||||
if record["fields"].get(field):
|
||||
record["fields"][field] = self.decrypt_string(
|
||||
value=record["fields"][field],
|
||||
)
|
||||
return record
|
||||
|
||||
def decrypt_secret_fields(self) -> None:
|
||||
"""
|
||||
The converse decryption of some fields out of the export before importing to database
|
||||
The converse decryption of some fields out of the export before importing to database.
|
||||
Streams records from each manifest path and writes decrypted content to a temp file.
|
||||
"""
|
||||
if self.passphrase:
|
||||
# Salt has been loaded from metadata.json at this point, so it cannot be None
|
||||
self.setup_crypto(passphrase=self.passphrase, salt=self.salt)
|
||||
|
||||
had_at_least_one_record = False
|
||||
|
||||
for crypt_config in self.CRYPT_FIELDS:
|
||||
importer_model: str = crypt_config["model_name"]
|
||||
crypt_fields: str = crypt_config["fields"]
|
||||
for record in filter(
|
||||
lambda x: x["model"] == importer_model,
|
||||
self.manifest,
|
||||
):
|
||||
had_at_least_one_record = True
|
||||
for field in crypt_fields:
|
||||
if record["fields"][field]:
|
||||
record["fields"][field] = self.decrypt_string(
|
||||
value=record["fields"][field],
|
||||
)
|
||||
|
||||
if had_at_least_one_record:
|
||||
# It's annoying, but the DB is loaded from the JSON directly
|
||||
# Maybe could change that in the future?
|
||||
(self.source / "manifest.json").write_text(
|
||||
json.dumps(self.manifest, indent=2, ensure_ascii=False),
|
||||
)
|
||||
if not self.passphrase:
|
||||
return
|
||||
# Salt has been loaded from metadata.json at this point, so it cannot be None
|
||||
self.setup_crypto(passphrase=self.passphrase, salt=self.salt)
|
||||
self._decrypted_tmp_paths: list[Path] = []
|
||||
new_paths: list[Path] = []
|
||||
for manifest_path in self.manifest_paths:
|
||||
tmp = manifest_path.with_name(manifest_path.stem + ".decrypted.json")
|
||||
with tmp.open("w", encoding="utf-8") as out:
|
||||
out.write("[\n")
|
||||
first = True
|
||||
for record in iter_manifest_records(manifest_path):
|
||||
if not first:
|
||||
out.write(",\n")
|
||||
json.dump(
|
||||
self._decrypt_record_if_needed(record),
|
||||
out,
|
||||
indent=2,
|
||||
ensure_ascii=False,
|
||||
)
|
||||
first = False
|
||||
out.write("\n]\n")
|
||||
self._decrypted_tmp_paths.append(tmp)
|
||||
new_paths.append(tmp)
|
||||
self.manifest_paths = new_paths
|
||||
|
||||
@@ -3,14 +3,18 @@ import shutil
|
||||
|
||||
from documents.management.commands.base import PaperlessCommand
|
||||
from documents.models import Document
|
||||
from documents.parsers import get_parser_class_for_mime_type
|
||||
from paperless.parsers.registry import get_parser_registry
|
||||
|
||||
logger = logging.getLogger("paperless.management.thumbnails")
|
||||
|
||||
|
||||
def _process_document(doc_id: int) -> None:
|
||||
document: Document = Document.objects.get(id=doc_id)
|
||||
parser_class = get_parser_class_for_mime_type(document.mime_type)
|
||||
parser_class = get_parser_registry().get_parser_for_file(
|
||||
document.mime_type,
|
||||
document.original_filename or "",
|
||||
document.source_path,
|
||||
)
|
||||
|
||||
if parser_class is None:
|
||||
logger.warning(
|
||||
@@ -20,17 +24,9 @@ def _process_document(doc_id: int) -> None:
|
||||
)
|
||||
return
|
||||
|
||||
parser = parser_class(logging_group=None)
|
||||
|
||||
try:
|
||||
thumb = parser.get_thumbnail(
|
||||
document.source_path,
|
||||
document.mime_type,
|
||||
document.get_public_filename(),
|
||||
)
|
||||
with parser_class() as parser:
|
||||
thumb = parser.get_thumbnail(document.source_path, document.mime_type)
|
||||
shutil.move(thumb, document.thumbnail_path)
|
||||
finally:
|
||||
parser.cleanup()
|
||||
|
||||
|
||||
class Command(PaperlessCommand):
|
||||
|
||||
@@ -1,22 +0,0 @@
|
||||
import sys
|
||||
|
||||
from django.core.management.commands.loaddata import Command as LoadDataCommand
|
||||
|
||||
|
||||
# This class is used to migrate data between databases
|
||||
# That's difficult to test
|
||||
class Command(LoadDataCommand): # pragma: no cover
|
||||
"""
|
||||
Allow the loading of data from standard in. Sourced originally from:
|
||||
https://gist.github.com/bmispelon/ad5a2c333443b3a1d051 (MIT licensed)
|
||||
"""
|
||||
|
||||
def parse_name(self, fixture_name):
|
||||
self.compression_formats["stdin"] = (lambda x, y: sys.stdin, None)
|
||||
if fixture_name == "-":
|
||||
return "-", "json", "stdin"
|
||||
|
||||
def find_fixtures(self, fixture_label):
|
||||
if fixture_label == "-":
|
||||
return [("-", None, "-")]
|
||||
return super().find_fixtures(fixture_label)
|
||||
@@ -169,7 +169,7 @@ def match_storage_paths(document: Document, classifier: DocumentClassifier, user
|
||||
def matches(matching_model: MatchingModel, document: Document):
|
||||
search_flags = 0
|
||||
|
||||
document_content = document.content
|
||||
document_content = document.get_effective_content() or ""
|
||||
|
||||
# Check that match is not empty
|
||||
if not matching_model.match.strip():
|
||||
|
||||
@@ -5,7 +5,7 @@ from django.db import migrations
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
("documents", "0003_workflowaction_order"),
|
||||
("documents", "0002_squashed"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
@@ -1,18 +0,0 @@
|
||||
# Generated by Django 5.2.9 on 2026-01-20 20:06
|
||||
|
||||
from django.db import migrations
|
||||
from django.db import models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
("documents", "0002_squashed"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name="workflowaction",
|
||||
name="order",
|
||||
field=models.PositiveIntegerField(default=0, verbose_name="order"),
|
||||
),
|
||||
]
|
||||
@@ -6,7 +6,7 @@ from django.db import models
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
("documents", "0004_remove_document_storage_type"),
|
||||
("documents", "0003_remove_document_storage_type"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
@@ -6,7 +6,7 @@ from django.db import models
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
("documents", "0005_workflowtrigger_filter_has_any_correspondents_and_more"),
|
||||
("documents", "0004_workflowtrigger_filter_has_any_correspondents_and_more"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
@@ -7,7 +7,7 @@ from django.db import models
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
("documents", "0006_alter_document_checksum_unique"),
|
||||
("documents", "0005_alter_document_checksum_unique"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
@@ -46,7 +46,7 @@ def revoke_share_link_bundle_permissions(apps, schema_editor):
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
|
||||
("documents", "0007_document_content_length"),
|
||||
("documents", "0006_document_content_length"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
@@ -6,7 +6,7 @@ from django.db import models
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
("documents", "0008_sharelinkbundle"),
|
||||
("documents", "0007_sharelinkbundle"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
@@ -7,7 +7,7 @@ from django.db import models
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
("documents", "0009_workflowaction_passwords_alter_workflowaction_type"),
|
||||
("documents", "0008_workflowaction_passwords_alter_workflowaction_type"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
@@ -7,7 +7,7 @@ from django.db import models
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
("documents", "0010_alter_document_content_length"),
|
||||
("documents", "0009_alter_document_content_length"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
@@ -6,7 +6,7 @@ from django.db import models
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
("documents", "0011_optimize_integer_field_sizes"),
|
||||
("documents", "0010_optimize_integer_field_sizes"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
@@ -7,7 +7,7 @@ from django.db import models
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
("documents", "0012_alter_workflowaction_type"),
|
||||
("documents", "0011_alter_workflowaction_type"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
@@ -6,7 +6,7 @@ from django.db import models
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
("documents", "0013_document_root_document"),
|
||||
("documents", "0012_document_root_document"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
@@ -124,7 +124,7 @@ def _restore_visibility_fields(apps, schema_editor):
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
("documents", "0014_alter_paperlesstask_task_name"),
|
||||
("documents", "0013_alter_paperlesstask_task_name"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
@@ -7,7 +7,7 @@ from django.db import models
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
("documents", "0015_savedview_visibility_to_ui_settings"),
|
||||
("documents", "0014_savedview_visibility_to_ui_settings"),
|
||||
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
|
||||
]
|
||||
|
||||
130
src/documents/migrations/0016_sha256_checksums.py
Normal file
130
src/documents/migrations/0016_sha256_checksums.py
Normal file
@@ -0,0 +1,130 @@
|
||||
import hashlib
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
from django.conf import settings
|
||||
from django.db import migrations
|
||||
from django.db import models
|
||||
|
||||
logger = logging.getLogger("paperless.migrations")
|
||||
|
||||
_CHUNK_SIZE = 65536 # 64 KiB — avoids loading entire files into memory
|
||||
_BATCH_SIZE = 500 # documents per bulk_update call
|
||||
_PROGRESS_INTERVAL = 500 # log a progress line every N documents
|
||||
|
||||
|
||||
def _sha256(path: Path) -> str:
|
||||
h = hashlib.sha256()
|
||||
with path.open("rb") as fh:
|
||||
while chunk := fh.read(_CHUNK_SIZE):
|
||||
h.update(chunk)
|
||||
return h.hexdigest()
|
||||
|
||||
|
||||
def recompute_checksums(apps, schema_editor):
|
||||
"""Recompute all document checksums from MD5 to SHA256."""
|
||||
Document = apps.get_model("documents", "Document")
|
||||
|
||||
total = Document.objects.count()
|
||||
if total == 0:
|
||||
return
|
||||
|
||||
logger.info("Recomputing SHA-256 checksums for %d document(s)...", total)
|
||||
|
||||
batch: list = []
|
||||
processed = 0
|
||||
|
||||
for doc in Document.objects.only(
|
||||
"pk",
|
||||
"filename",
|
||||
"checksum",
|
||||
"archive_filename",
|
||||
"archive_checksum",
|
||||
).iterator(chunk_size=_BATCH_SIZE):
|
||||
updated_fields: list[str] = []
|
||||
|
||||
# Reconstruct source path the same way Document.source_path does
|
||||
fname = str(doc.filename) if doc.filename else f"{doc.pk:07}.pdf"
|
||||
source_path = (settings.ORIGINALS_DIR / Path(fname)).resolve()
|
||||
|
||||
if source_path.exists():
|
||||
doc.checksum = _sha256(source_path)
|
||||
updated_fields.append("checksum")
|
||||
else:
|
||||
logger.warning(
|
||||
"Document %s: original file %s not found, checksum not updated.",
|
||||
doc.pk,
|
||||
source_path,
|
||||
)
|
||||
|
||||
# Mirror Document.has_archive_version: archive_filename is not None
|
||||
if doc.archive_filename is not None:
|
||||
archive_path = (
|
||||
settings.ARCHIVE_DIR / Path(str(doc.archive_filename))
|
||||
).resolve()
|
||||
if archive_path.exists():
|
||||
doc.archive_checksum = _sha256(archive_path)
|
||||
updated_fields.append("archive_checksum")
|
||||
else:
|
||||
logger.warning(
|
||||
"Document %s: archive file %s not found, checksum not updated.",
|
||||
doc.pk,
|
||||
archive_path,
|
||||
)
|
||||
|
||||
if updated_fields:
|
||||
batch.append(doc)
|
||||
|
||||
processed += 1
|
||||
|
||||
if len(batch) >= _BATCH_SIZE:
|
||||
Document.objects.bulk_update(batch, ["checksum", "archive_checksum"])
|
||||
batch.clear()
|
||||
|
||||
if processed % _PROGRESS_INTERVAL == 0:
|
||||
logger.info(
|
||||
"SHA-256 checksum progress: %d/%d (%d%%)",
|
||||
processed,
|
||||
total,
|
||||
processed * 100 // total,
|
||||
)
|
||||
|
||||
if batch:
|
||||
Document.objects.bulk_update(batch, ["checksum", "archive_checksum"])
|
||||
|
||||
logger.info(
|
||||
"SHA-256 checksum recomputation complete: %d document(s) processed.",
|
||||
total,
|
||||
)
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
("documents", "0015_document_version_index_and_more"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name="document",
|
||||
name="checksum",
|
||||
field=models.CharField(
|
||||
editable=False,
|
||||
help_text="The checksum of the original document.",
|
||||
max_length=64,
|
||||
verbose_name="checksum",
|
||||
),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name="document",
|
||||
name="archive_checksum",
|
||||
field=models.CharField(
|
||||
blank=True,
|
||||
editable=False,
|
||||
help_text="The checksum of the archived document.",
|
||||
max_length=64,
|
||||
null=True,
|
||||
verbose_name="archive checksum",
|
||||
),
|
||||
),
|
||||
migrations.RunPython(recompute_checksums, migrations.RunPython.noop),
|
||||
]
|
||||
@@ -216,14 +216,14 @@ class Document(SoftDeleteModel, ModelWithOwner): # type: ignore[django-manager-
|
||||
|
||||
checksum = models.CharField(
|
||||
_("checksum"),
|
||||
max_length=32,
|
||||
max_length=64,
|
||||
editable=False,
|
||||
help_text=_("The checksum of the original document."),
|
||||
)
|
||||
|
||||
archive_checksum = models.CharField(
|
||||
_("archive checksum"),
|
||||
max_length=32,
|
||||
max_length=64,
|
||||
editable=False,
|
||||
blank=True,
|
||||
null=True,
|
||||
@@ -361,6 +361,42 @@ class Document(SoftDeleteModel, ModelWithOwner): # type: ignore[django-manager-
|
||||
res += f" {self.title}"
|
||||
return res
|
||||
|
||||
def get_effective_content(self) -> str | None:
|
||||
"""
|
||||
Returns the effective content for the document.
|
||||
|
||||
For root documents, this is the latest version's content when available.
|
||||
For version documents, this is always the document's own content.
|
||||
If the queryset already annotated ``effective_content``, that value is used.
|
||||
"""
|
||||
if hasattr(self, "effective_content"):
|
||||
return getattr(self, "effective_content")
|
||||
|
||||
if self.root_document_id is not None or self.pk is None:
|
||||
return self.content
|
||||
|
||||
prefetched_cache = getattr(self, "_prefetched_objects_cache", None)
|
||||
prefetched_versions = (
|
||||
prefetched_cache.get("versions")
|
||||
if isinstance(prefetched_cache, dict)
|
||||
else None
|
||||
)
|
||||
if prefetched_versions:
|
||||
latest_prefetched = max(prefetched_versions, key=lambda doc: doc.id)
|
||||
return latest_prefetched.content
|
||||
|
||||
latest_version_content = (
|
||||
Document.objects.filter(root_document=self)
|
||||
.order_by("-id")
|
||||
.values_list("content", flat=True)
|
||||
.first()
|
||||
)
|
||||
return (
|
||||
latest_version_content
|
||||
if latest_version_content is not None
|
||||
else self.content
|
||||
)
|
||||
|
||||
@property
|
||||
def suggestion_content(self):
|
||||
"""
|
||||
@@ -373,15 +409,21 @@ class Document(SoftDeleteModel, ModelWithOwner): # type: ignore[django-manager-
|
||||
This improves processing speed for large documents while keeping
|
||||
enough context for accurate suggestions.
|
||||
"""
|
||||
if not self.content or len(self.content) <= 1200000:
|
||||
return self.content
|
||||
effective_content = self.get_effective_content()
|
||||
if not effective_content or len(effective_content) <= 1200000:
|
||||
return effective_content
|
||||
else:
|
||||
# Use 80% from the start and 20% from the end
|
||||
# to preserve both opening and closing context.
|
||||
head_len = 800000
|
||||
tail_len = 200000
|
||||
|
||||
return " ".join((self.content[:head_len], self.content[-tail_len:]))
|
||||
return " ".join(
|
||||
(
|
||||
effective_content[:head_len],
|
||||
effective_content[-tail_len:],
|
||||
),
|
||||
)
|
||||
|
||||
@property
|
||||
def source_path(self) -> Path:
|
||||
|
||||
@@ -3,84 +3,47 @@ from __future__ import annotations
|
||||
import logging
|
||||
import mimetypes
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
import tempfile
|
||||
from functools import lru_cache
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from django.conf import settings
|
||||
|
||||
from documents.loggers import LoggingMixin
|
||||
from documents.signals import document_consumer_declaration
|
||||
from documents.utils import copy_file_with_basic_stats
|
||||
from documents.utils import run_subprocess
|
||||
from paperless.parsers.registry import get_parser_registry
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import datetime
|
||||
|
||||
# This regular expression will try to find dates in the document at
|
||||
# hand and will match the following formats:
|
||||
# - XX.YY.ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
|
||||
# - XX/YY/ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
|
||||
# - XX-YY-ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
|
||||
# - ZZZZ.XX.YY with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
|
||||
# - ZZZZ/XX/YY with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
|
||||
# - ZZZZ-XX-YY with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
|
||||
# - XX. MONTH ZZZZ with XX being 1 or 2 and ZZZZ being 2 or 4 digits
|
||||
# - MONTH ZZZZ, with ZZZZ being 4 digits
|
||||
# - MONTH XX, ZZZZ with XX being 1 or 2 and ZZZZ being 4 digits
|
||||
# - XX MON ZZZZ with XX being 1 or 2 and ZZZZ being 4 digits. MONTH is 3 letters
|
||||
# - XXPP MONTH ZZZZ with XX being 1 or 2 and PP being 2 letters and ZZZZ being 4 digits
|
||||
|
||||
# TODO: isn't there a date parsing library for this?
|
||||
|
||||
DATE_REGEX = re.compile(
|
||||
r"(\b|(?!=([_-])))(\d{1,2})[\.\/-](\d{1,2})[\.\/-](\d{4}|\d{2})(\b|(?=([_-])))|"
|
||||
r"(\b|(?!=([_-])))(\d{4}|\d{2})[\.\/-](\d{1,2})[\.\/-](\d{1,2})(\b|(?=([_-])))|"
|
||||
r"(\b|(?!=([_-])))(\d{1,2}[\. ]+[a-zéûäëčžúřěáíóńźçŞğü]{3,9} \d{4}|[a-zéûäëčžúřěáíóńźçŞğü]{3,9} \d{1,2}, \d{4})(\b|(?=([_-])))|"
|
||||
r"(\b|(?!=([_-])))([^\W\d_]{3,9} \d{1,2}, (\d{4}))(\b|(?=([_-])))|"
|
||||
r"(\b|(?!=([_-])))([^\W\d_]{3,9} \d{4})(\b|(?=([_-])))|"
|
||||
r"(\b|(?!=([_-])))(\d{1,2}[^ 0-9]{2}[\. ]+[^ ]{3,9}[ \.\/-]\d{4})(\b|(?=([_-])))|"
|
||||
r"(\b|(?!=([_-])))(\b\d{1,2}[ \.\/-][a-zéûäëčžúřěáíóńźçŞğü]{3}[ \.\/-]\d{4})(\b|(?=([_-])))",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
|
||||
logger = logging.getLogger("paperless.parsing")
|
||||
|
||||
|
||||
@lru_cache(maxsize=8)
|
||||
def is_mime_type_supported(mime_type: str) -> bool:
|
||||
"""
|
||||
Returns True if the mime type is supported, False otherwise
|
||||
"""
|
||||
return get_parser_class_for_mime_type(mime_type) is not None
|
||||
return get_parser_registry().get_parser_for_file(mime_type, "") is not None
|
||||
|
||||
|
||||
@lru_cache(maxsize=8)
|
||||
def get_default_file_extension(mime_type: str) -> str:
|
||||
"""
|
||||
Returns the default file extension for a mimetype, or
|
||||
an empty string if it could not be determined
|
||||
"""
|
||||
for response in document_consumer_declaration.send(None):
|
||||
parser_declaration = response[1]
|
||||
supported_mime_types = parser_declaration["mime_types"]
|
||||
|
||||
if mime_type in supported_mime_types:
|
||||
return supported_mime_types[mime_type]
|
||||
parser_class = get_parser_registry().get_parser_for_file(mime_type, "")
|
||||
if parser_class is not None:
|
||||
supported = parser_class.supported_mime_types()
|
||||
if mime_type in supported:
|
||||
return supported[mime_type]
|
||||
|
||||
ext = mimetypes.guess_extension(mime_type)
|
||||
if ext:
|
||||
return ext
|
||||
else:
|
||||
return ""
|
||||
return ext if ext else ""
|
||||
|
||||
|
||||
@lru_cache(maxsize=8)
|
||||
def is_file_ext_supported(ext: str) -> bool:
|
||||
"""
|
||||
Returns True if the file extension is supported, False otherwise
|
||||
@@ -94,44 +57,17 @@ def is_file_ext_supported(ext: str) -> bool:
|
||||
|
||||
def get_supported_file_extensions() -> set[str]:
|
||||
extensions = set()
|
||||
for response in document_consumer_declaration.send(None):
|
||||
parser_declaration = response[1]
|
||||
supported_mime_types = parser_declaration["mime_types"]
|
||||
|
||||
for mime_type in supported_mime_types:
|
||||
for parser_class in get_parser_registry().all_parsers():
|
||||
for mime_type, ext in parser_class.supported_mime_types().items():
|
||||
extensions.update(mimetypes.guess_all_extensions(mime_type))
|
||||
# Python's stdlib might be behind, so also add what the parser
|
||||
# says is the default extension
|
||||
# This makes image/webp supported on Python < 3.11
|
||||
extensions.add(supported_mime_types[mime_type])
|
||||
extensions.add(ext)
|
||||
|
||||
return extensions
|
||||
|
||||
|
||||
def get_parser_class_for_mime_type(mime_type: str) -> type[DocumentParser] | None:
|
||||
"""
|
||||
Returns the best parser (by weight) for the given mimetype or
|
||||
None if no parser exists
|
||||
"""
|
||||
|
||||
options = []
|
||||
|
||||
for response in document_consumer_declaration.send(None):
|
||||
parser_declaration = response[1]
|
||||
supported_mime_types = parser_declaration["mime_types"]
|
||||
|
||||
if mime_type in supported_mime_types:
|
||||
options.append(parser_declaration)
|
||||
|
||||
if not options:
|
||||
return None
|
||||
|
||||
best_parser = sorted(options, key=lambda _: _["weight"], reverse=True)[0]
|
||||
|
||||
# Return the parser with the highest weight.
|
||||
return best_parser["parser"]
|
||||
|
||||
|
||||
def run_convert(
|
||||
input_file,
|
||||
output_file,
|
||||
|
||||
@@ -11,7 +11,6 @@ is an identity function that adds no overhead.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import logging
|
||||
import uuid
|
||||
from collections import defaultdict
|
||||
@@ -30,6 +29,7 @@ from django.utils import timezone
|
||||
|
||||
from documents.models import Document
|
||||
from documents.models import PaperlessTask
|
||||
from documents.utils import compute_checksum
|
||||
from paperless.config import GeneralConfig
|
||||
|
||||
logger = logging.getLogger("paperless.sanity_checker")
|
||||
@@ -218,7 +218,7 @@ def _check_original(
|
||||
|
||||
present_files.discard(source_path)
|
||||
try:
|
||||
checksum = hashlib.md5(source_path.read_bytes()).hexdigest()
|
||||
checksum = compute_checksum(source_path)
|
||||
except OSError as e:
|
||||
messages.error(doc.pk, f"Cannot read original file of document: {e}")
|
||||
else:
|
||||
@@ -255,7 +255,7 @@ def _check_archive(
|
||||
|
||||
present_files.discard(archive_path)
|
||||
try:
|
||||
checksum = hashlib.md5(archive_path.read_bytes()).hexdigest()
|
||||
checksum = compute_checksum(archive_path)
|
||||
except OSError as e:
|
||||
messages.error(
|
||||
doc.pk,
|
||||
|
||||
@@ -703,15 +703,6 @@ class StoragePathField(serializers.PrimaryKeyRelatedField):
|
||||
|
||||
|
||||
class CustomFieldSerializer(serializers.ModelSerializer):
|
||||
def __init__(self, *args, **kwargs):
|
||||
context = kwargs.get("context")
|
||||
self.api_version = int(
|
||||
context.get("request").version
|
||||
if context and context.get("request")
|
||||
else settings.REST_FRAMEWORK["DEFAULT_VERSION"],
|
||||
)
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
data_type = serializers.ChoiceField(
|
||||
choices=CustomField.FieldDataType,
|
||||
read_only=False,
|
||||
@@ -791,38 +782,6 @@ class CustomFieldSerializer(serializers.ModelSerializer):
|
||||
)
|
||||
return super().validate(attrs)
|
||||
|
||||
def to_internal_value(self, data):
|
||||
ret = super().to_internal_value(data)
|
||||
|
||||
if (
|
||||
self.api_version < 7
|
||||
and ret.get("data_type", "") == CustomField.FieldDataType.SELECT
|
||||
and isinstance(ret.get("extra_data", {}).get("select_options"), list)
|
||||
):
|
||||
ret["extra_data"]["select_options"] = [
|
||||
{
|
||||
"label": option,
|
||||
"id": get_random_string(length=16),
|
||||
}
|
||||
for option in ret["extra_data"]["select_options"]
|
||||
]
|
||||
|
||||
return ret
|
||||
|
||||
def to_representation(self, instance):
|
||||
ret = super().to_representation(instance)
|
||||
|
||||
if (
|
||||
self.api_version < 7
|
||||
and instance.data_type == CustomField.FieldDataType.SELECT
|
||||
):
|
||||
# Convert the select options with ids to a list of strings
|
||||
ret["extra_data"]["select_options"] = [
|
||||
option["label"] for option in ret["extra_data"]["select_options"]
|
||||
]
|
||||
|
||||
return ret
|
||||
|
||||
|
||||
class ReadWriteSerializerMethodField(serializers.SerializerMethodField):
|
||||
"""
|
||||
@@ -838,6 +797,25 @@ class ReadWriteSerializerMethodField(serializers.SerializerMethodField):
|
||||
return {self.field_name: data}
|
||||
|
||||
|
||||
def validate_documentlink_targets(user, doc_ids):
|
||||
if Document.objects.filter(id__in=doc_ids).count() != len(doc_ids):
|
||||
raise serializers.ValidationError(
|
||||
"Some documents in value don't exist or were specified twice.",
|
||||
)
|
||||
|
||||
if user is None:
|
||||
return
|
||||
|
||||
target_documents = Document.objects.filter(id__in=doc_ids).select_related("owner")
|
||||
if not all(
|
||||
has_perms_owner_aware(user, "change_document", document)
|
||||
for document in target_documents
|
||||
):
|
||||
raise PermissionDenied(
|
||||
_("Insufficient permissions."),
|
||||
)
|
||||
|
||||
|
||||
class CustomFieldInstanceSerializer(serializers.ModelSerializer):
|
||||
field = serializers.PrimaryKeyRelatedField(queryset=CustomField.objects.all())
|
||||
value = ReadWriteSerializerMethodField(allow_null=True)
|
||||
@@ -928,59 +906,14 @@ class CustomFieldInstanceSerializer(serializers.ModelSerializer):
|
||||
"Value must be a list",
|
||||
)
|
||||
doc_ids = data["value"]
|
||||
if Document.objects.filter(id__in=doc_ids).count() != len(
|
||||
data["value"],
|
||||
):
|
||||
raise serializers.ValidationError(
|
||||
"Some documents in value don't exist or were specified twice.",
|
||||
)
|
||||
request = self.context.get("request")
|
||||
validate_documentlink_targets(
|
||||
getattr(request, "user", None) if request is not None else None,
|
||||
doc_ids,
|
||||
)
|
||||
|
||||
return data
|
||||
|
||||
def get_api_version(self):
|
||||
return int(
|
||||
self.context.get("request").version
|
||||
if self.context.get("request")
|
||||
else settings.REST_FRAMEWORK["DEFAULT_VERSION"],
|
||||
)
|
||||
|
||||
def to_internal_value(self, data):
|
||||
ret = super().to_internal_value(data)
|
||||
|
||||
if (
|
||||
self.get_api_version() < 7
|
||||
and ret.get("field").data_type == CustomField.FieldDataType.SELECT
|
||||
and ret.get("value") is not None
|
||||
):
|
||||
# Convert the index of the option in the field.extra_data["select_options"]
|
||||
# list to the options unique id
|
||||
ret["value"] = ret.get("field").extra_data["select_options"][ret["value"]][
|
||||
"id"
|
||||
]
|
||||
|
||||
return ret
|
||||
|
||||
def to_representation(self, instance):
|
||||
ret = super().to_representation(instance)
|
||||
|
||||
if (
|
||||
self.get_api_version() < 7
|
||||
and instance.field.data_type == CustomField.FieldDataType.SELECT
|
||||
):
|
||||
# return the index of the option in the field.extra_data["select_options"] list
|
||||
ret["value"] = next(
|
||||
(
|
||||
idx
|
||||
for idx, option in enumerate(
|
||||
instance.field.extra_data["select_options"],
|
||||
)
|
||||
if option["id"] == instance.value
|
||||
),
|
||||
None,
|
||||
)
|
||||
|
||||
return ret
|
||||
|
||||
class Meta:
|
||||
model = CustomFieldInstance
|
||||
fields = [
|
||||
@@ -1004,20 +937,6 @@ class NotesSerializer(serializers.ModelSerializer):
|
||||
fields = ["id", "note", "created", "user"]
|
||||
ordering = ["-created"]
|
||||
|
||||
def to_representation(self, instance):
|
||||
ret = super().to_representation(instance)
|
||||
|
||||
request = self.context.get("request")
|
||||
api_version = int(
|
||||
request.version if request else settings.REST_FRAMEWORK["DEFAULT_VERSION"],
|
||||
)
|
||||
|
||||
if api_version < 8 and "user" in ret:
|
||||
user_id = ret["user"]["id"]
|
||||
ret["user"] = user_id
|
||||
|
||||
return ret
|
||||
|
||||
|
||||
def _get_viewable_duplicates(
|
||||
document: Document,
|
||||
@@ -1172,22 +1091,6 @@ class DocumentSerializer(
|
||||
doc["content"] = getattr(instance, "effective_content") or ""
|
||||
if self.truncate_content and "content" in self.fields:
|
||||
doc["content"] = doc.get("content")[0:550]
|
||||
|
||||
request = self.context.get("request")
|
||||
api_version = int(
|
||||
request.version if request else settings.REST_FRAMEWORK["DEFAULT_VERSION"],
|
||||
)
|
||||
|
||||
if api_version < 9 and "created" in self.fields:
|
||||
# provide created as a datetime for backwards compatibility
|
||||
from django.utils import timezone
|
||||
|
||||
doc["created"] = timezone.make_aware(
|
||||
datetime.combine(
|
||||
instance.created,
|
||||
datetime.min.time(),
|
||||
),
|
||||
).isoformat()
|
||||
return doc
|
||||
|
||||
def to_internal_value(self, data):
|
||||
@@ -1655,11 +1558,124 @@ class DocumentListSerializer(serializers.Serializer):
|
||||
return documents
|
||||
|
||||
|
||||
class SourceModeValidationMixin:
|
||||
def validate_source_mode(self, source_mode: str) -> str:
|
||||
if source_mode not in bulk_edit.SourceModeChoices.__dict__.values():
|
||||
raise serializers.ValidationError("Invalid source_mode")
|
||||
return source_mode
|
||||
|
||||
|
||||
class RotateDocumentsSerializer(DocumentListSerializer, SourceModeValidationMixin):
|
||||
degrees = serializers.IntegerField(required=True)
|
||||
source_mode = serializers.CharField(
|
||||
required=False,
|
||||
default=bulk_edit.SourceModeChoices.LATEST_VERSION,
|
||||
)
|
||||
|
||||
|
||||
class MergeDocumentsSerializer(DocumentListSerializer, SourceModeValidationMixin):
|
||||
metadata_document_id = serializers.IntegerField(
|
||||
required=False,
|
||||
allow_null=True,
|
||||
)
|
||||
delete_originals = serializers.BooleanField(required=False, default=False)
|
||||
archive_fallback = serializers.BooleanField(required=False, default=False)
|
||||
source_mode = serializers.CharField(
|
||||
required=False,
|
||||
default=bulk_edit.SourceModeChoices.LATEST_VERSION,
|
||||
)
|
||||
|
||||
|
||||
class EditPdfDocumentsSerializer(DocumentListSerializer, SourceModeValidationMixin):
|
||||
operations = serializers.ListField(required=True)
|
||||
delete_original = serializers.BooleanField(required=False, default=False)
|
||||
update_document = serializers.BooleanField(required=False, default=False)
|
||||
include_metadata = serializers.BooleanField(required=False, default=True)
|
||||
source_mode = serializers.CharField(
|
||||
required=False,
|
||||
default=bulk_edit.SourceModeChoices.LATEST_VERSION,
|
||||
)
|
||||
|
||||
def validate(self, attrs):
|
||||
documents = attrs["documents"]
|
||||
if len(documents) > 1:
|
||||
raise serializers.ValidationError(
|
||||
"Edit PDF method only supports one document",
|
||||
)
|
||||
|
||||
operations = attrs["operations"]
|
||||
if not isinstance(operations, list):
|
||||
raise serializers.ValidationError("operations must be a list")
|
||||
|
||||
for op in operations:
|
||||
if not isinstance(op, dict):
|
||||
raise serializers.ValidationError("invalid operation entry")
|
||||
if "page" not in op or not isinstance(op["page"], int):
|
||||
raise serializers.ValidationError("page must be an integer")
|
||||
if "rotate" in op and not isinstance(op["rotate"], int):
|
||||
raise serializers.ValidationError("rotate must be an integer")
|
||||
if "doc" in op and not isinstance(op["doc"], int):
|
||||
raise serializers.ValidationError("doc must be an integer")
|
||||
|
||||
if attrs["update_document"]:
|
||||
max_idx = max(op.get("doc", 0) for op in operations)
|
||||
if max_idx > 0:
|
||||
raise serializers.ValidationError(
|
||||
"update_document only allowed with a single output document",
|
||||
)
|
||||
|
||||
doc = Document.objects.get(id=documents[0])
|
||||
if doc.page_count:
|
||||
for op in operations:
|
||||
if op["page"] < 1 or op["page"] > doc.page_count:
|
||||
raise serializers.ValidationError(
|
||||
f"Page {op['page']} is out of bounds for document with {doc.page_count} pages.",
|
||||
)
|
||||
return attrs
|
||||
|
||||
|
||||
class RemovePasswordDocumentsSerializer(
|
||||
DocumentListSerializer,
|
||||
SourceModeValidationMixin,
|
||||
):
|
||||
password = serializers.CharField(required=True)
|
||||
update_document = serializers.BooleanField(required=False, default=False)
|
||||
delete_original = serializers.BooleanField(required=False, default=False)
|
||||
include_metadata = serializers.BooleanField(required=False, default=True)
|
||||
source_mode = serializers.CharField(
|
||||
required=False,
|
||||
default=bulk_edit.SourceModeChoices.LATEST_VERSION,
|
||||
)
|
||||
|
||||
|
||||
class DeleteDocumentsSerializer(DocumentListSerializer):
|
||||
pass
|
||||
|
||||
|
||||
class ReprocessDocumentsSerializer(DocumentListSerializer):
|
||||
pass
|
||||
|
||||
|
||||
class BulkEditSerializer(
|
||||
SerializerWithPerms,
|
||||
DocumentListSerializer,
|
||||
SetPermissionsMixin,
|
||||
SourceModeValidationMixin,
|
||||
):
|
||||
# TODO: remove this and related backwards compatibility code when API v9 is dropped
|
||||
# split, delete_pages can be removed entirely
|
||||
MOVED_DOCUMENT_ACTION_ENDPOINTS = {
|
||||
"delete": "/api/documents/delete/",
|
||||
"reprocess": "/api/documents/reprocess/",
|
||||
"rotate": "/api/documents/rotate/",
|
||||
"merge": "/api/documents/merge/",
|
||||
"edit_pdf": "/api/documents/edit_pdf/",
|
||||
"remove_password": "/api/documents/remove_password/",
|
||||
"split": "/api/documents/edit_pdf/",
|
||||
"delete_pages": "/api/documents/edit_pdf/",
|
||||
}
|
||||
LEGACY_DOCUMENT_ACTION_METHODS = tuple(MOVED_DOCUMENT_ACTION_ENDPOINTS.keys())
|
||||
|
||||
method = serializers.ChoiceField(
|
||||
choices=[
|
||||
"set_correspondent",
|
||||
@@ -1669,15 +1685,8 @@ class BulkEditSerializer(
|
||||
"remove_tag",
|
||||
"modify_tags",
|
||||
"modify_custom_fields",
|
||||
"delete",
|
||||
"reprocess",
|
||||
"set_permissions",
|
||||
"rotate",
|
||||
"merge",
|
||||
"split",
|
||||
"delete_pages",
|
||||
"edit_pdf",
|
||||
"remove_password",
|
||||
*LEGACY_DOCUMENT_ACTION_METHODS,
|
||||
],
|
||||
label="Method",
|
||||
write_only=True,
|
||||
@@ -1722,6 +1731,19 @@ class BulkEditSerializer(
|
||||
f"Some custom fields in {name} don't exist or were specified twice.",
|
||||
)
|
||||
|
||||
if isinstance(custom_fields, dict):
|
||||
custom_field_map = CustomField.objects.in_bulk(ids)
|
||||
for raw_field_id, value in custom_fields.items():
|
||||
field = custom_field_map.get(int(raw_field_id))
|
||||
if (
|
||||
field is not None
|
||||
and field.data_type == CustomField.FieldDataType.DOCUMENTLINK
|
||||
and value is not None
|
||||
):
|
||||
if not isinstance(value, list):
|
||||
raise serializers.ValidationError("Value must be a list")
|
||||
validate_documentlink_targets(self.user, value)
|
||||
|
||||
def validate_method(self, method):
|
||||
if method == "set_correspondent":
|
||||
return bulk_edit.set_correspondent
|
||||
@@ -1755,8 +1777,7 @@ class BulkEditSerializer(
|
||||
return bulk_edit.edit_pdf
|
||||
elif method == "remove_password":
|
||||
return bulk_edit.remove_password
|
||||
else: # pragma: no cover
|
||||
# This will never happen as it is handled by the ChoiceField
|
||||
else:
|
||||
raise serializers.ValidationError("Unsupported method.")
|
||||
|
||||
def _validate_parameters_tags(self, parameters) -> None:
|
||||
@@ -1866,9 +1887,7 @@ class BulkEditSerializer(
|
||||
"source_mode",
|
||||
bulk_edit.SourceModeChoices.LATEST_VERSION,
|
||||
)
|
||||
if source_mode not in bulk_edit.SourceModeChoices.__dict__.values():
|
||||
raise serializers.ValidationError("Invalid source_mode")
|
||||
parameters["source_mode"] = source_mode
|
||||
parameters["source_mode"] = self.validate_source_mode(source_mode)
|
||||
|
||||
def _validate_parameters_split(self, parameters) -> None:
|
||||
if "pages" not in parameters:
|
||||
|
||||
@@ -2,5 +2,4 @@ from django.dispatch import Signal
|
||||
|
||||
document_consumption_started = Signal()
|
||||
document_consumption_finished = Signal()
|
||||
document_consumer_declaration = Signal()
|
||||
document_updated = Signal()
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import logging
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
@@ -403,6 +404,14 @@ class CannotMoveFilesException(Exception):
|
||||
pass
|
||||
|
||||
|
||||
def _path_matches_checksum(path: Path, checksum: str | None) -> bool:
|
||||
if checksum is None or not path.is_file():
|
||||
return False
|
||||
|
||||
with path.open("rb") as f:
|
||||
return hashlib.md5(f.read()).hexdigest() == checksum
|
||||
|
||||
|
||||
def _filename_template_uses_custom_fields(doc: Document) -> bool:
|
||||
template = None
|
||||
if doc.storage_path is not None:
|
||||
@@ -473,10 +482,12 @@ def update_filename_and_move_files(
|
||||
old_filename = instance.filename
|
||||
old_source_path = instance.source_path
|
||||
move_original = False
|
||||
original_already_moved = False
|
||||
|
||||
old_archive_filename = instance.archive_filename
|
||||
old_archive_path = instance.archive_path
|
||||
move_archive = False
|
||||
archive_already_moved = False
|
||||
|
||||
candidate_filename = generate_filename(instance)
|
||||
if len(str(candidate_filename)) > Document.MAX_STORED_FILENAME_LENGTH:
|
||||
@@ -497,14 +508,23 @@ def update_filename_and_move_files(
|
||||
candidate_source_path.exists()
|
||||
and candidate_source_path != old_source_path
|
||||
):
|
||||
# Only fall back to unique search when there is an actual conflict
|
||||
new_filename = generate_unique_filename(instance)
|
||||
if not old_source_path.is_file() and _path_matches_checksum(
|
||||
candidate_source_path,
|
||||
instance.checksum,
|
||||
):
|
||||
new_filename = candidate_filename
|
||||
original_already_moved = True
|
||||
else:
|
||||
# Only fall back to unique search when there is an actual conflict
|
||||
new_filename = generate_unique_filename(instance)
|
||||
else:
|
||||
new_filename = candidate_filename
|
||||
|
||||
# Need to convert to string to be able to save it to the db
|
||||
instance.filename = str(new_filename)
|
||||
move_original = old_filename != instance.filename
|
||||
move_original = (
|
||||
old_filename != instance.filename and not original_already_moved
|
||||
)
|
||||
|
||||
if instance.has_archive_version:
|
||||
archive_candidate = generate_filename(instance, archive_filename=True)
|
||||
@@ -525,24 +545,38 @@ def update_filename_and_move_files(
|
||||
archive_candidate_path.exists()
|
||||
and archive_candidate_path != old_archive_path
|
||||
):
|
||||
new_archive_filename = generate_unique_filename(
|
||||
instance,
|
||||
archive_filename=True,
|
||||
)
|
||||
if not old_archive_path.is_file() and _path_matches_checksum(
|
||||
archive_candidate_path,
|
||||
instance.archive_checksum,
|
||||
):
|
||||
new_archive_filename = archive_candidate
|
||||
archive_already_moved = True
|
||||
else:
|
||||
new_archive_filename = generate_unique_filename(
|
||||
instance,
|
||||
archive_filename=True,
|
||||
)
|
||||
else:
|
||||
new_archive_filename = archive_candidate
|
||||
|
||||
instance.archive_filename = str(new_archive_filename)
|
||||
|
||||
move_archive = old_archive_filename != instance.archive_filename
|
||||
move_archive = (
|
||||
old_archive_filename != instance.archive_filename
|
||||
and not archive_already_moved
|
||||
)
|
||||
else:
|
||||
move_archive = False
|
||||
|
||||
if not move_original and not move_archive:
|
||||
# Just update modified. Also, don't save() here to prevent infinite recursion.
|
||||
Document.objects.filter(pk=instance.pk).update(
|
||||
modified=timezone.now(),
|
||||
)
|
||||
updates = {"modified": timezone.now()}
|
||||
if old_filename != instance.filename:
|
||||
updates["filename"] = instance.filename
|
||||
if old_archive_filename != instance.archive_filename:
|
||||
updates["archive_filename"] = instance.archive_filename
|
||||
|
||||
# Don't save() here to prevent infinite recursion.
|
||||
Document.objects.filter(pk=instance.pk).update(**updates)
|
||||
return
|
||||
|
||||
if move_original:
|
||||
@@ -932,8 +966,25 @@ def run_workflows(
|
||||
if not use_overrides:
|
||||
# limit title to 128 characters
|
||||
document.title = document.title[:128]
|
||||
# save first before setting tags
|
||||
document.save()
|
||||
# Save only the fields that workflow actions can set directly.
|
||||
# Deliberately excludes filename and archive_filename — those are
|
||||
# managed exclusively by update_filename_and_move_files via the
|
||||
# post_save signal. Writing stale in-memory values here would revert
|
||||
# a concurrent update_filename_and_move_files DB write, leaving the
|
||||
# DB pointing at the old path while the file is already at the new
|
||||
# one (see: https://github.com/paperless-ngx/paperless-ngx/issues/12386).
|
||||
# modified has auto_now=True but is not auto-added when update_fields
|
||||
# is specified, so it must be listed explicitly.
|
||||
document.save(
|
||||
update_fields=[
|
||||
"title",
|
||||
"correspondent",
|
||||
"document_type",
|
||||
"storage_path",
|
||||
"owner",
|
||||
"modified",
|
||||
],
|
||||
)
|
||||
document.tags.set(doc_tag_ids)
|
||||
|
||||
WorkflowRun.objects.create(
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
import datetime
|
||||
import hashlib
|
||||
import logging
|
||||
import shutil
|
||||
import uuid
|
||||
@@ -52,19 +51,20 @@ from documents.models import StoragePath
|
||||
from documents.models import Tag
|
||||
from documents.models import WorkflowRun
|
||||
from documents.models import WorkflowTrigger
|
||||
from documents.parsers import DocumentParser
|
||||
from documents.parsers import get_parser_class_for_mime_type
|
||||
from documents.plugins.base import ConsumeTaskPlugin
|
||||
from documents.plugins.base import ProgressManager
|
||||
from documents.plugins.base import StopConsumeTaskError
|
||||
from documents.plugins.helpers import ProgressManager
|
||||
from documents.plugins.helpers import ProgressStatusOptions
|
||||
from documents.sanity_checker import SanityCheckFailedException
|
||||
from documents.signals import document_updated
|
||||
from documents.signals.handlers import cleanup_document_deletion
|
||||
from documents.signals.handlers import run_workflows
|
||||
from documents.signals.handlers import send_websocket_document_updated
|
||||
from documents.utils import compute_checksum
|
||||
from documents.workflows.utils import get_workflows_for_trigger
|
||||
from paperless.config import AIConfig
|
||||
from paperless.parsers import ParserContext
|
||||
from paperless.parsers.registry import get_parser_registry
|
||||
from paperless_ai.indexing import llm_index_add_or_update_document
|
||||
from paperless_ai.indexing import llm_index_remove_document
|
||||
from paperless_ai.indexing import update_llm_index
|
||||
@@ -100,7 +100,11 @@ def index_reindex(*, iter_wrapper: IterWrapper[Document] = _identity) -> None:
|
||||
|
||||
|
||||
@shared_task
|
||||
def train_classifier(*, scheduled=True) -> None:
|
||||
def train_classifier(
|
||||
*,
|
||||
scheduled=True,
|
||||
status_callback: Callable[[str], None] | None = None,
|
||||
) -> None:
|
||||
task = PaperlessTask.objects.create(
|
||||
type=PaperlessTask.TaskType.SCHEDULED_TASK
|
||||
if scheduled
|
||||
@@ -136,7 +140,7 @@ def train_classifier(*, scheduled=True) -> None:
|
||||
classifier = DocumentClassifier()
|
||||
|
||||
try:
|
||||
if classifier.train():
|
||||
if classifier.train(status_callback=status_callback):
|
||||
logger.info(
|
||||
f"Saving updated classifier model to {settings.MODEL_FILE}...",
|
||||
)
|
||||
@@ -300,7 +304,11 @@ def update_document_content_maybe_archive_file(document_id) -> None:
|
||||
|
||||
mime_type = document.mime_type
|
||||
|
||||
parser_class: type[DocumentParser] = get_parser_class_for_mime_type(mime_type)
|
||||
parser_class = get_parser_registry().get_parser_for_file(
|
||||
mime_type,
|
||||
document.original_filename or "",
|
||||
document.source_path,
|
||||
)
|
||||
|
||||
if not parser_class:
|
||||
logger.error(
|
||||
@@ -309,97 +317,91 @@ def update_document_content_maybe_archive_file(document_id) -> None:
|
||||
)
|
||||
return
|
||||
|
||||
parser: DocumentParser = parser_class(logging_group=uuid.uuid4())
|
||||
with parser_class() as parser:
|
||||
parser.configure(ParserContext())
|
||||
|
||||
try:
|
||||
parser.parse(document.source_path, mime_type, document.get_public_filename())
|
||||
try:
|
||||
parser.parse(document.source_path, mime_type)
|
||||
|
||||
thumbnail = parser.get_thumbnail(
|
||||
document.source_path,
|
||||
mime_type,
|
||||
document.get_public_filename(),
|
||||
)
|
||||
thumbnail = parser.get_thumbnail(document.source_path, mime_type)
|
||||
|
||||
with transaction.atomic():
|
||||
oldDocument = Document.objects.get(pk=document.pk)
|
||||
if parser.get_archive_path():
|
||||
with Path(parser.get_archive_path()).open("rb") as f:
|
||||
checksum = hashlib.md5(f.read()).hexdigest()
|
||||
# I'm going to save first so that in case the file move
|
||||
# fails, the database is rolled back.
|
||||
# We also don't use save() since that triggers the filehandling
|
||||
# logic, and we don't want that yet (file not yet in place)
|
||||
document.archive_filename = generate_unique_filename(
|
||||
document,
|
||||
archive_filename=True,
|
||||
)
|
||||
Document.objects.filter(pk=document.pk).update(
|
||||
archive_checksum=checksum,
|
||||
content=parser.get_text(),
|
||||
archive_filename=document.archive_filename,
|
||||
)
|
||||
newDocument = Document.objects.get(pk=document.pk)
|
||||
if settings.AUDIT_LOG_ENABLED:
|
||||
LogEntry.objects.log_create(
|
||||
instance=oldDocument,
|
||||
changes={
|
||||
"content": [oldDocument.content, newDocument.content],
|
||||
"archive_checksum": [
|
||||
oldDocument.archive_checksum,
|
||||
newDocument.archive_checksum,
|
||||
],
|
||||
"archive_filename": [
|
||||
oldDocument.archive_filename,
|
||||
newDocument.archive_filename,
|
||||
],
|
||||
},
|
||||
additional_data={
|
||||
"reason": "Update document content",
|
||||
},
|
||||
action=LogEntry.Action.UPDATE,
|
||||
)
|
||||
else:
|
||||
Document.objects.filter(pk=document.pk).update(
|
||||
content=parser.get_text(),
|
||||
)
|
||||
|
||||
if settings.AUDIT_LOG_ENABLED:
|
||||
LogEntry.objects.log_create(
|
||||
instance=oldDocument,
|
||||
changes={
|
||||
"content": [oldDocument.content, parser.get_text()],
|
||||
},
|
||||
additional_data={
|
||||
"reason": "Update document content",
|
||||
},
|
||||
action=LogEntry.Action.UPDATE,
|
||||
)
|
||||
|
||||
with FileLock(settings.MEDIA_LOCK):
|
||||
with transaction.atomic():
|
||||
oldDocument = Document.objects.get(pk=document.pk)
|
||||
if parser.get_archive_path():
|
||||
create_source_path_directory(document.archive_path)
|
||||
shutil.move(parser.get_archive_path(), document.archive_path)
|
||||
shutil.move(thumbnail, document.thumbnail_path)
|
||||
checksum = compute_checksum(parser.get_archive_path())
|
||||
# I'm going to save first so that in case the file move
|
||||
# fails, the database is rolled back.
|
||||
# We also don't use save() since that triggers the filehandling
|
||||
# logic, and we don't want that yet (file not yet in place)
|
||||
document.archive_filename = generate_unique_filename(
|
||||
document,
|
||||
archive_filename=True,
|
||||
)
|
||||
Document.objects.filter(pk=document.pk).update(
|
||||
archive_checksum=checksum,
|
||||
content=parser.get_text(),
|
||||
archive_filename=document.archive_filename,
|
||||
)
|
||||
newDocument = Document.objects.get(pk=document.pk)
|
||||
if settings.AUDIT_LOG_ENABLED:
|
||||
LogEntry.objects.log_create(
|
||||
instance=oldDocument,
|
||||
changes={
|
||||
"content": [oldDocument.content, newDocument.content],
|
||||
"archive_checksum": [
|
||||
oldDocument.archive_checksum,
|
||||
newDocument.archive_checksum,
|
||||
],
|
||||
"archive_filename": [
|
||||
oldDocument.archive_filename,
|
||||
newDocument.archive_filename,
|
||||
],
|
||||
},
|
||||
additional_data={
|
||||
"reason": "Update document content",
|
||||
},
|
||||
action=LogEntry.Action.UPDATE,
|
||||
)
|
||||
else:
|
||||
Document.objects.filter(pk=document.pk).update(
|
||||
content=parser.get_text(),
|
||||
)
|
||||
|
||||
document.refresh_from_db()
|
||||
logger.info(
|
||||
f"Updating index for document {document_id} ({document.archive_checksum})",
|
||||
)
|
||||
with index.open_index_writer() as writer:
|
||||
index.update_document(writer, document)
|
||||
if settings.AUDIT_LOG_ENABLED:
|
||||
LogEntry.objects.log_create(
|
||||
instance=oldDocument,
|
||||
changes={
|
||||
"content": [oldDocument.content, parser.get_text()],
|
||||
},
|
||||
additional_data={
|
||||
"reason": "Update document content",
|
||||
},
|
||||
action=LogEntry.Action.UPDATE,
|
||||
)
|
||||
|
||||
ai_config = AIConfig()
|
||||
if ai_config.llm_index_enabled:
|
||||
llm_index_add_or_update_document(document)
|
||||
with FileLock(settings.MEDIA_LOCK):
|
||||
if parser.get_archive_path():
|
||||
create_source_path_directory(document.archive_path)
|
||||
shutil.move(parser.get_archive_path(), document.archive_path)
|
||||
shutil.move(thumbnail, document.thumbnail_path)
|
||||
|
||||
clear_document_caches(document.pk)
|
||||
document.refresh_from_db()
|
||||
logger.info(
|
||||
f"Updating index for document {document_id} ({document.archive_checksum})",
|
||||
)
|
||||
with index.open_index_writer() as writer:
|
||||
index.update_document(writer, document)
|
||||
|
||||
except Exception:
|
||||
logger.exception(
|
||||
f"Error while parsing document {document} (ID: {document_id})",
|
||||
)
|
||||
finally:
|
||||
parser.cleanup()
|
||||
ai_config = AIConfig()
|
||||
if ai_config.llm_index_enabled:
|
||||
llm_index_add_or_update_document(document)
|
||||
|
||||
clear_document_caches(document.pk)
|
||||
|
||||
except Exception:
|
||||
logger.exception(
|
||||
f"Error while parsing document {document} (ID: {document_id})",
|
||||
)
|
||||
|
||||
|
||||
@shared_task
|
||||
@@ -530,13 +532,13 @@ def check_scheduled_workflows() -> None:
|
||||
id__in=matched_ids,
|
||||
)
|
||||
|
||||
if documents.count() > 0:
|
||||
if documents.exists():
|
||||
documents = prefilter_documents_by_workflowtrigger(
|
||||
documents,
|
||||
trigger,
|
||||
)
|
||||
|
||||
if documents.count() > 0:
|
||||
if documents.exists():
|
||||
logger.debug(
|
||||
f"Found {documents.count()} documents for trigger {trigger}",
|
||||
)
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user