mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-03-10 19:21:24 +00:00
Compare commits
37 Commits
feature-se
...
feature-pa
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f2eef8a6d1 | ||
|
|
c8e5e6c4e2 | ||
|
|
0b00c66b96 | ||
|
|
e4cadff749 | ||
|
|
d4b6075a2a | ||
|
|
130a73ec71 | ||
|
|
8ebc24bcfa | ||
|
|
d7052b8dee | ||
|
|
c96e9f5dc7 | ||
|
|
f7f162424b | ||
|
|
cdeabaf75d | ||
|
|
1221e7f21c | ||
|
|
3e32e90355 | ||
|
|
63cb75564e | ||
|
|
404ef6b40d | ||
|
|
8c40491034 | ||
|
|
0f6bdaf5de | ||
|
|
6955d6c07f | ||
|
|
d85ee29976 | ||
|
|
0c7d56c5e7 | ||
|
|
0bcf904e3a | ||
|
|
bcc2f11152 | ||
|
|
e18b1fd99d | ||
|
|
e30676f889 | ||
|
|
2a28549c5a | ||
|
|
4badf0e7c2 | ||
|
|
bc26d94593 | ||
|
|
93cbbf34b7 | ||
|
|
1e8622494d | ||
|
|
0c3298f030 | ||
|
|
2b288c094d | ||
|
|
2cdb1424ef | ||
|
|
f5c0c21922 | ||
|
|
91ddda9256 | ||
|
|
9d5e618de8 | ||
|
|
50ae49c7da | ||
|
|
ba023ef332 |
86
.github/workflows/ci-backend.yml
vendored
86
.github/workflows/ci-backend.yml
vendored
@@ -3,21 +3,9 @@ on:
|
|||||||
push:
|
push:
|
||||||
branches-ignore:
|
branches-ignore:
|
||||||
- 'translations**'
|
- 'translations**'
|
||||||
paths:
|
|
||||||
- 'src/**'
|
|
||||||
- 'pyproject.toml'
|
|
||||||
- 'uv.lock'
|
|
||||||
- 'docker/compose/docker-compose.ci-test.yml'
|
|
||||||
- '.github/workflows/ci-backend.yml'
|
|
||||||
pull_request:
|
pull_request:
|
||||||
branches-ignore:
|
branches-ignore:
|
||||||
- 'translations**'
|
- 'translations**'
|
||||||
paths:
|
|
||||||
- 'src/**'
|
|
||||||
- 'pyproject.toml'
|
|
||||||
- 'uv.lock'
|
|
||||||
- 'docker/compose/docker-compose.ci-test.yml'
|
|
||||||
- '.github/workflows/ci-backend.yml'
|
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
concurrency:
|
concurrency:
|
||||||
group: backend-${{ github.event.pull_request.number || github.ref }}
|
group: backend-${{ github.event.pull_request.number || github.ref }}
|
||||||
@@ -26,7 +14,55 @@ env:
|
|||||||
DEFAULT_UV_VERSION: "0.10.x"
|
DEFAULT_UV_VERSION: "0.10.x"
|
||||||
NLTK_DATA: "/usr/share/nltk_data"
|
NLTK_DATA: "/usr/share/nltk_data"
|
||||||
jobs:
|
jobs:
|
||||||
|
changes:
|
||||||
|
name: Detect Backend Changes
|
||||||
|
runs-on: ubuntu-slim
|
||||||
|
outputs:
|
||||||
|
backend_changed: ${{ steps.force.outputs.run_all == 'true' || steps.filter.outputs.backend == 'true' }}
|
||||||
|
steps:
|
||||||
|
- name: Checkout
|
||||||
|
uses: actions/checkout@v6.0.2
|
||||||
|
with:
|
||||||
|
fetch-depth: 0
|
||||||
|
- name: Decide run mode
|
||||||
|
id: force
|
||||||
|
run: |
|
||||||
|
if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then
|
||||||
|
echo "run_all=true" >> "$GITHUB_OUTPUT"
|
||||||
|
elif [[ "${{ github.event_name }}" == "push" && ( "${{ github.ref_name }}" == "main" || "${{ github.ref_name }}" == "dev" ) ]]; then
|
||||||
|
echo "run_all=true" >> "$GITHUB_OUTPUT"
|
||||||
|
else
|
||||||
|
echo "run_all=false" >> "$GITHUB_OUTPUT"
|
||||||
|
fi
|
||||||
|
- name: Set diff range
|
||||||
|
id: range
|
||||||
|
if: steps.force.outputs.run_all != 'true'
|
||||||
|
run: |
|
||||||
|
if [[ "${{ github.event_name }}" == "pull_request" ]]; then
|
||||||
|
echo "base=${{ github.event.pull_request.base.sha }}" >> "$GITHUB_OUTPUT"
|
||||||
|
elif [[ "${{ github.event.created }}" == "true" ]]; then
|
||||||
|
echo "base=${{ github.event.repository.default_branch }}" >> "$GITHUB_OUTPUT"
|
||||||
|
else
|
||||||
|
echo "base=${{ github.event.before }}" >> "$GITHUB_OUTPUT"
|
||||||
|
fi
|
||||||
|
echo "ref=${{ github.sha }}" >> "$GITHUB_OUTPUT"
|
||||||
|
- name: Detect changes
|
||||||
|
id: filter
|
||||||
|
if: steps.force.outputs.run_all != 'true'
|
||||||
|
uses: dorny/paths-filter@v3.0.2
|
||||||
|
with:
|
||||||
|
base: ${{ steps.range.outputs.base }}
|
||||||
|
ref: ${{ steps.range.outputs.ref }}
|
||||||
|
filters: |
|
||||||
|
backend:
|
||||||
|
- 'src/**'
|
||||||
|
- 'pyproject.toml'
|
||||||
|
- 'uv.lock'
|
||||||
|
- 'docker/compose/docker-compose.ci-test.yml'
|
||||||
|
- '.github/workflows/ci-backend.yml'
|
||||||
test:
|
test:
|
||||||
|
needs: changes
|
||||||
|
if: needs.changes.outputs.backend_changed == 'true'
|
||||||
name: "Python ${{ matrix.python-version }}"
|
name: "Python ${{ matrix.python-version }}"
|
||||||
runs-on: ubuntu-24.04
|
runs-on: ubuntu-24.04
|
||||||
strategy:
|
strategy:
|
||||||
@@ -100,6 +136,8 @@ jobs:
|
|||||||
docker compose --file docker/compose/docker-compose.ci-test.yml logs
|
docker compose --file docker/compose/docker-compose.ci-test.yml logs
|
||||||
docker compose --file docker/compose/docker-compose.ci-test.yml down
|
docker compose --file docker/compose/docker-compose.ci-test.yml down
|
||||||
typing:
|
typing:
|
||||||
|
needs: changes
|
||||||
|
if: needs.changes.outputs.backend_changed == 'true'
|
||||||
name: Check project typing
|
name: Check project typing
|
||||||
runs-on: ubuntu-24.04
|
runs-on: ubuntu-24.04
|
||||||
env:
|
env:
|
||||||
@@ -150,3 +188,27 @@ jobs:
|
|||||||
--show-error-codes \
|
--show-error-codes \
|
||||||
--warn-unused-configs \
|
--warn-unused-configs \
|
||||||
src/ | uv run mypy-baseline filter
|
src/ | uv run mypy-baseline filter
|
||||||
|
gate:
|
||||||
|
name: Backend CI Gate
|
||||||
|
needs: [changes, test, typing]
|
||||||
|
if: always()
|
||||||
|
runs-on: ubuntu-slim
|
||||||
|
steps:
|
||||||
|
- name: Check gate
|
||||||
|
run: |
|
||||||
|
if [[ "${{ needs.changes.outputs.backend_changed }}" != "true" ]]; then
|
||||||
|
echo "No backend-relevant changes detected."
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ "${{ needs.test.result }}" != "success" ]]; then
|
||||||
|
echo "::error::Backend test job result: ${{ needs.test.result }}"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ "${{ needs.typing.result }}" != "success" ]]; then
|
||||||
|
echo "::error::Backend typing job result: ${{ needs.typing.result }}"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Backend checks passed."
|
||||||
|
|||||||
12
.github/workflows/ci-docker.yml
vendored
12
.github/workflows/ci-docker.yml
vendored
@@ -149,15 +149,16 @@ jobs:
|
|||||||
mkdir -p /tmp/digests
|
mkdir -p /tmp/digests
|
||||||
digest="${{ steps.build.outputs.digest }}"
|
digest="${{ steps.build.outputs.digest }}"
|
||||||
echo "digest=${digest}"
|
echo "digest=${digest}"
|
||||||
touch "/tmp/digests/${digest#sha256:}"
|
echo "${digest}" > "/tmp/digests/digest-${{ matrix.arch }}.txt"
|
||||||
- name: Upload digest
|
- name: Upload digest
|
||||||
if: steps.check-push.outputs.should-push == 'true'
|
if: steps.check-push.outputs.should-push == 'true'
|
||||||
uses: actions/upload-artifact@v7.0.0
|
uses: actions/upload-artifact@v7.0.0
|
||||||
with:
|
with:
|
||||||
name: digests-${{ matrix.arch }}
|
name: digests-${{ matrix.arch }}
|
||||||
path: /tmp/digests/*
|
path: /tmp/digests/digest-${{ matrix.arch }}.txt
|
||||||
if-no-files-found: error
|
if-no-files-found: error
|
||||||
retention-days: 1
|
retention-days: 1
|
||||||
|
archive: false
|
||||||
merge-and-push:
|
merge-and-push:
|
||||||
name: Merge and Push Manifest
|
name: Merge and Push Manifest
|
||||||
runs-on: ubuntu-24.04
|
runs-on: ubuntu-24.04
|
||||||
@@ -171,7 +172,7 @@ jobs:
|
|||||||
uses: actions/download-artifact@v8.0.0
|
uses: actions/download-artifact@v8.0.0
|
||||||
with:
|
with:
|
||||||
path: /tmp/digests
|
path: /tmp/digests
|
||||||
pattern: digests-*
|
pattern: digest-*.txt
|
||||||
merge-multiple: true
|
merge-multiple: true
|
||||||
- name: List digests
|
- name: List digests
|
||||||
run: |
|
run: |
|
||||||
@@ -217,8 +218,9 @@ jobs:
|
|||||||
tags=$(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "${DOCKER_METADATA_OUTPUT_JSON}")
|
tags=$(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "${DOCKER_METADATA_OUTPUT_JSON}")
|
||||||
|
|
||||||
digests=""
|
digests=""
|
||||||
for digest in *; do
|
for digest_file in digest-*.txt; do
|
||||||
digests+="${{ env.REGISTRY }}/${REPOSITORY}@sha256:${digest} "
|
digest=$(cat "${digest_file}")
|
||||||
|
digests+="${{ env.REGISTRY }}/${REPOSITORY}@${digest} "
|
||||||
done
|
done
|
||||||
|
|
||||||
echo "Creating manifest with tags: ${tags}"
|
echo "Creating manifest with tags: ${tags}"
|
||||||
|
|||||||
88
.github/workflows/ci-docs.yml
vendored
88
.github/workflows/ci-docs.yml
vendored
@@ -1,22 +1,9 @@
|
|||||||
name: Documentation
|
name: Documentation
|
||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
branches:
|
branches-ignore:
|
||||||
- main
|
- 'translations**'
|
||||||
- dev
|
|
||||||
paths:
|
|
||||||
- 'docs/**'
|
|
||||||
- 'zensical.toml'
|
|
||||||
- 'pyproject.toml'
|
|
||||||
- 'uv.lock'
|
|
||||||
- '.github/workflows/ci-docs.yml'
|
|
||||||
pull_request:
|
pull_request:
|
||||||
paths:
|
|
||||||
- 'docs/**'
|
|
||||||
- 'zensical.toml'
|
|
||||||
- 'pyproject.toml'
|
|
||||||
- 'uv.lock'
|
|
||||||
- '.github/workflows/ci-docs.yml'
|
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
concurrency:
|
concurrency:
|
||||||
group: docs-${{ github.event.pull_request.number || github.ref }}
|
group: docs-${{ github.event.pull_request.number || github.ref }}
|
||||||
@@ -29,7 +16,55 @@ env:
|
|||||||
DEFAULT_UV_VERSION: "0.10.x"
|
DEFAULT_UV_VERSION: "0.10.x"
|
||||||
DEFAULT_PYTHON_VERSION: "3.12"
|
DEFAULT_PYTHON_VERSION: "3.12"
|
||||||
jobs:
|
jobs:
|
||||||
|
changes:
|
||||||
|
name: Detect Docs Changes
|
||||||
|
runs-on: ubuntu-slim
|
||||||
|
outputs:
|
||||||
|
docs_changed: ${{ steps.force.outputs.run_all == 'true' || steps.filter.outputs.docs == 'true' }}
|
||||||
|
steps:
|
||||||
|
- name: Checkout
|
||||||
|
uses: actions/checkout@v6.0.2
|
||||||
|
with:
|
||||||
|
fetch-depth: 0
|
||||||
|
- name: Decide run mode
|
||||||
|
id: force
|
||||||
|
run: |
|
||||||
|
if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then
|
||||||
|
echo "run_all=true" >> "$GITHUB_OUTPUT"
|
||||||
|
elif [[ "${{ github.event_name }}" == "push" && ( "${{ github.ref_name }}" == "main" || "${{ github.ref_name }}" == "dev" ) ]]; then
|
||||||
|
echo "run_all=true" >> "$GITHUB_OUTPUT"
|
||||||
|
else
|
||||||
|
echo "run_all=false" >> "$GITHUB_OUTPUT"
|
||||||
|
fi
|
||||||
|
- name: Set diff range
|
||||||
|
id: range
|
||||||
|
if: steps.force.outputs.run_all != 'true'
|
||||||
|
run: |
|
||||||
|
if [[ "${{ github.event_name }}" == "pull_request" ]]; then
|
||||||
|
echo "base=${{ github.event.pull_request.base.sha }}" >> "$GITHUB_OUTPUT"
|
||||||
|
elif [[ "${{ github.event.created }}" == "true" ]]; then
|
||||||
|
echo "base=${{ github.event.repository.default_branch }}" >> "$GITHUB_OUTPUT"
|
||||||
|
else
|
||||||
|
echo "base=${{ github.event.before }}" >> "$GITHUB_OUTPUT"
|
||||||
|
fi
|
||||||
|
echo "ref=${{ github.sha }}" >> "$GITHUB_OUTPUT"
|
||||||
|
- name: Detect changes
|
||||||
|
id: filter
|
||||||
|
if: steps.force.outputs.run_all != 'true'
|
||||||
|
uses: dorny/paths-filter@v3.0.2
|
||||||
|
with:
|
||||||
|
base: ${{ steps.range.outputs.base }}
|
||||||
|
ref: ${{ steps.range.outputs.ref }}
|
||||||
|
filters: |
|
||||||
|
docs:
|
||||||
|
- 'docs/**'
|
||||||
|
- 'zensical.toml'
|
||||||
|
- 'pyproject.toml'
|
||||||
|
- 'uv.lock'
|
||||||
|
- '.github/workflows/ci-docs.yml'
|
||||||
build:
|
build:
|
||||||
|
needs: changes
|
||||||
|
if: needs.changes.outputs.docs_changed == 'true'
|
||||||
name: Build Documentation
|
name: Build Documentation
|
||||||
runs-on: ubuntu-24.04
|
runs-on: ubuntu-24.04
|
||||||
steps:
|
steps:
|
||||||
@@ -64,8 +99,8 @@ jobs:
|
|||||||
name: github-pages-${{ github.run_id }}-${{ github.run_attempt }}
|
name: github-pages-${{ github.run_id }}-${{ github.run_attempt }}
|
||||||
deploy:
|
deploy:
|
||||||
name: Deploy Documentation
|
name: Deploy Documentation
|
||||||
needs: build
|
needs: [changes, build]
|
||||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
|
if: github.event_name == 'push' && github.ref == 'refs/heads/main' && needs.changes.outputs.docs_changed == 'true'
|
||||||
runs-on: ubuntu-24.04
|
runs-on: ubuntu-24.04
|
||||||
environment:
|
environment:
|
||||||
name: github-pages
|
name: github-pages
|
||||||
@@ -76,3 +111,22 @@ jobs:
|
|||||||
id: deployment
|
id: deployment
|
||||||
with:
|
with:
|
||||||
artifact_name: github-pages-${{ github.run_id }}-${{ github.run_attempt }}
|
artifact_name: github-pages-${{ github.run_id }}-${{ github.run_attempt }}
|
||||||
|
gate:
|
||||||
|
name: Docs CI Gate
|
||||||
|
needs: [changes, build]
|
||||||
|
if: always()
|
||||||
|
runs-on: ubuntu-slim
|
||||||
|
steps:
|
||||||
|
- name: Check gate
|
||||||
|
run: |
|
||||||
|
if [[ "${{ needs.changes.outputs.docs_changed }}" != "true" ]]; then
|
||||||
|
echo "No docs-relevant changes detected."
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ "${{ needs.build.result }}" != "success" ]]; then
|
||||||
|
echo "::error::Docs build job result: ${{ needs.build.result }}"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Docs checks passed."
|
||||||
|
|||||||
102
.github/workflows/ci-frontend.yml
vendored
102
.github/workflows/ci-frontend.yml
vendored
@@ -3,21 +3,60 @@ on:
|
|||||||
push:
|
push:
|
||||||
branches-ignore:
|
branches-ignore:
|
||||||
- 'translations**'
|
- 'translations**'
|
||||||
paths:
|
|
||||||
- 'src-ui/**'
|
|
||||||
- '.github/workflows/ci-frontend.yml'
|
|
||||||
pull_request:
|
pull_request:
|
||||||
branches-ignore:
|
branches-ignore:
|
||||||
- 'translations**'
|
- 'translations**'
|
||||||
paths:
|
|
||||||
- 'src-ui/**'
|
|
||||||
- '.github/workflows/ci-frontend.yml'
|
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
concurrency:
|
concurrency:
|
||||||
group: frontend-${{ github.event.pull_request.number || github.ref }}
|
group: frontend-${{ github.event.pull_request.number || github.ref }}
|
||||||
cancel-in-progress: true
|
cancel-in-progress: true
|
||||||
jobs:
|
jobs:
|
||||||
|
changes:
|
||||||
|
name: Detect Frontend Changes
|
||||||
|
runs-on: ubuntu-slim
|
||||||
|
outputs:
|
||||||
|
frontend_changed: ${{ steps.force.outputs.run_all == 'true' || steps.filter.outputs.frontend == 'true' }}
|
||||||
|
steps:
|
||||||
|
- name: Checkout
|
||||||
|
uses: actions/checkout@v6.0.2
|
||||||
|
with:
|
||||||
|
fetch-depth: 0
|
||||||
|
- name: Decide run mode
|
||||||
|
id: force
|
||||||
|
run: |
|
||||||
|
if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then
|
||||||
|
echo "run_all=true" >> "$GITHUB_OUTPUT"
|
||||||
|
elif [[ "${{ github.event_name }}" == "push" && ( "${{ github.ref_name }}" == "main" || "${{ github.ref_name }}" == "dev" ) ]]; then
|
||||||
|
echo "run_all=true" >> "$GITHUB_OUTPUT"
|
||||||
|
else
|
||||||
|
echo "run_all=false" >> "$GITHUB_OUTPUT"
|
||||||
|
fi
|
||||||
|
- name: Set diff range
|
||||||
|
id: range
|
||||||
|
if: steps.force.outputs.run_all != 'true'
|
||||||
|
run: |
|
||||||
|
if [[ "${{ github.event_name }}" == "pull_request" ]]; then
|
||||||
|
echo "base=${{ github.event.pull_request.base.sha }}" >> "$GITHUB_OUTPUT"
|
||||||
|
elif [[ "${{ github.event.created }}" == "true" ]]; then
|
||||||
|
echo "base=${{ github.event.repository.default_branch }}" >> "$GITHUB_OUTPUT"
|
||||||
|
else
|
||||||
|
echo "base=${{ github.event.before }}" >> "$GITHUB_OUTPUT"
|
||||||
|
fi
|
||||||
|
echo "ref=${{ github.sha }}" >> "$GITHUB_OUTPUT"
|
||||||
|
- name: Detect changes
|
||||||
|
id: filter
|
||||||
|
if: steps.force.outputs.run_all != 'true'
|
||||||
|
uses: dorny/paths-filter@v3.0.2
|
||||||
|
with:
|
||||||
|
base: ${{ steps.range.outputs.base }}
|
||||||
|
ref: ${{ steps.range.outputs.ref }}
|
||||||
|
filters: |
|
||||||
|
frontend:
|
||||||
|
- 'src-ui/**'
|
||||||
|
- '.github/workflows/ci-frontend.yml'
|
||||||
install-dependencies:
|
install-dependencies:
|
||||||
|
needs: changes
|
||||||
|
if: needs.changes.outputs.frontend_changed == 'true'
|
||||||
name: Install Dependencies
|
name: Install Dependencies
|
||||||
runs-on: ubuntu-24.04
|
runs-on: ubuntu-24.04
|
||||||
steps:
|
steps:
|
||||||
@@ -45,7 +84,8 @@ jobs:
|
|||||||
run: cd src-ui && pnpm install
|
run: cd src-ui && pnpm install
|
||||||
lint:
|
lint:
|
||||||
name: Lint
|
name: Lint
|
||||||
needs: install-dependencies
|
needs: [changes, install-dependencies]
|
||||||
|
if: needs.changes.outputs.frontend_changed == 'true'
|
||||||
runs-on: ubuntu-24.04
|
runs-on: ubuntu-24.04
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
@@ -73,7 +113,8 @@ jobs:
|
|||||||
run: cd src-ui && pnpm run lint
|
run: cd src-ui && pnpm run lint
|
||||||
unit-tests:
|
unit-tests:
|
||||||
name: "Unit Tests (${{ matrix.shard-index }}/${{ matrix.shard-count }})"
|
name: "Unit Tests (${{ matrix.shard-index }}/${{ matrix.shard-count }})"
|
||||||
needs: install-dependencies
|
needs: [changes, install-dependencies]
|
||||||
|
if: needs.changes.outputs.frontend_changed == 'true'
|
||||||
runs-on: ubuntu-24.04
|
runs-on: ubuntu-24.04
|
||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
@@ -119,7 +160,8 @@ jobs:
|
|||||||
directory: src-ui/coverage/
|
directory: src-ui/coverage/
|
||||||
e2e-tests:
|
e2e-tests:
|
||||||
name: "E2E Tests (${{ matrix.shard-index }}/${{ matrix.shard-count }})"
|
name: "E2E Tests (${{ matrix.shard-index }}/${{ matrix.shard-count }})"
|
||||||
needs: install-dependencies
|
needs: [changes, install-dependencies]
|
||||||
|
if: needs.changes.outputs.frontend_changed == 'true'
|
||||||
runs-on: ubuntu-24.04
|
runs-on: ubuntu-24.04
|
||||||
container: mcr.microsoft.com/playwright:v1.58.2-noble
|
container: mcr.microsoft.com/playwright:v1.58.2-noble
|
||||||
env:
|
env:
|
||||||
@@ -159,7 +201,8 @@ jobs:
|
|||||||
run: cd src-ui && pnpm exec playwright test --shard ${{ matrix.shard-index }}/${{ matrix.shard-count }}
|
run: cd src-ui && pnpm exec playwright test --shard ${{ matrix.shard-index }}/${{ matrix.shard-count }}
|
||||||
bundle-analysis:
|
bundle-analysis:
|
||||||
name: Bundle Analysis
|
name: Bundle Analysis
|
||||||
needs: [unit-tests, e2e-tests]
|
needs: [changes, unit-tests, e2e-tests]
|
||||||
|
if: needs.changes.outputs.frontend_changed == 'true'
|
||||||
runs-on: ubuntu-24.04
|
runs-on: ubuntu-24.04
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
@@ -189,3 +232,42 @@ jobs:
|
|||||||
env:
|
env:
|
||||||
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
|
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
|
||||||
run: cd src-ui && pnpm run build --configuration=production
|
run: cd src-ui && pnpm run build --configuration=production
|
||||||
|
gate:
|
||||||
|
name: Frontend CI Gate
|
||||||
|
needs: [changes, install-dependencies, lint, unit-tests, e2e-tests, bundle-analysis]
|
||||||
|
if: always()
|
||||||
|
runs-on: ubuntu-slim
|
||||||
|
steps:
|
||||||
|
- name: Check gate
|
||||||
|
run: |
|
||||||
|
if [[ "${{ needs.changes.outputs.frontend_changed }}" != "true" ]]; then
|
||||||
|
echo "No frontend-relevant changes detected."
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ "${{ needs['install-dependencies'].result }}" != "success" ]]; then
|
||||||
|
echo "::error::Frontend install job result: ${{ needs['install-dependencies'].result }}"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ "${{ needs.lint.result }}" != "success" ]]; then
|
||||||
|
echo "::error::Frontend lint job result: ${{ needs.lint.result }}"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ "${{ needs['unit-tests'].result }}" != "success" ]]; then
|
||||||
|
echo "::error::Frontend unit-tests job result: ${{ needs['unit-tests'].result }}"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ "${{ needs['e2e-tests'].result }}" != "success" ]]; then
|
||||||
|
echo "::error::Frontend e2e-tests job result: ${{ needs['e2e-tests'].result }}"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ "${{ needs['bundle-analysis'].result }}" != "success" ]]; then
|
||||||
|
echo "::error::Frontend bundle-analysis job result: ${{ needs['bundle-analysis'].result }}"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Frontend checks passed."
|
||||||
|
|||||||
17
.github/workflows/pr-bot.yml
vendored
17
.github/workflows/pr-bot.yml
vendored
@@ -2,13 +2,24 @@ name: PR Bot
|
|||||||
on:
|
on:
|
||||||
pull_request_target:
|
pull_request_target:
|
||||||
types: [opened]
|
types: [opened]
|
||||||
permissions:
|
|
||||||
contents: read
|
|
||||||
pull-requests: write
|
|
||||||
jobs:
|
jobs:
|
||||||
|
anti-slop:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
issues: read
|
||||||
|
pull-requests: write
|
||||||
|
steps:
|
||||||
|
- uses: peakoss/anti-slop@v0.2.1
|
||||||
|
with:
|
||||||
|
max-failures: 4
|
||||||
|
failure-add-pr-labels: 'ai'
|
||||||
pr-bot:
|
pr-bot:
|
||||||
name: Automated PR Bot
|
name: Automated PR Bot
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
pull-requests: write
|
||||||
steps:
|
steps:
|
||||||
- name: Label PR by file path or branch name
|
- name: Label PR by file path or branch name
|
||||||
# see .github/labeler.yml for the labeler config
|
# see .github/labeler.yml for the labeler config
|
||||||
|
|||||||
@@ -50,7 +50,7 @@ repos:
|
|||||||
- 'prettier-plugin-organize-imports@4.1.0'
|
- 'prettier-plugin-organize-imports@4.1.0'
|
||||||
# Python hooks
|
# Python hooks
|
||||||
- repo: https://github.com/astral-sh/ruff-pre-commit
|
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||||
rev: v0.15.0
|
rev: v0.15.5
|
||||||
hooks:
|
hooks:
|
||||||
- id: ruff-check
|
- id: ruff-check
|
||||||
- id: ruff-format
|
- id: ruff-format
|
||||||
|
|||||||
29
docs/api.md
29
docs/api.md
@@ -369,41 +369,38 @@ operations, using the endpoint: `/api/bulk_edit_objects/`, which requires a json
|
|||||||
|
|
||||||
## API Versioning
|
## API Versioning
|
||||||
|
|
||||||
The REST API is versioned since Paperless-ngx 1.3.0.
|
The REST API is versioned.
|
||||||
|
|
||||||
- Versioning ensures that changes to the API don't break older
|
- Versioning ensures that changes to the API don't break older
|
||||||
clients.
|
clients.
|
||||||
- Clients specify the specific version of the API they wish to use
|
- Clients specify the specific version of the API they wish to use
|
||||||
with every request and Paperless will handle the request using the
|
with every request and Paperless will handle the request using the
|
||||||
specified API version.
|
specified API version.
|
||||||
- Even if the underlying data model changes, older API versions will
|
- Even if the underlying data model changes, supported older API
|
||||||
always serve compatible data.
|
versions continue to serve compatible data.
|
||||||
- If no version is specified, Paperless will serve version 1 to ensure
|
- If no version is specified, Paperless serves the configured default
|
||||||
compatibility with older clients that do not request a specific API
|
API version (currently `10`).
|
||||||
version.
|
- Supported API versions are currently `9` and `10`.
|
||||||
|
|
||||||
API versions are specified by submitting an additional HTTP `Accept`
|
API versions are specified by submitting an additional HTTP `Accept`
|
||||||
header with every request:
|
header with every request:
|
||||||
|
|
||||||
```
|
```
|
||||||
Accept: application/json; version=6
|
Accept: application/json; version=10
|
||||||
```
|
```
|
||||||
|
|
||||||
If an invalid version is specified, Paperless 1.3.0 will respond with
|
If an invalid version is specified, Paperless responds with
|
||||||
"406 Not Acceptable" and an error message in the body. Earlier
|
`406 Not Acceptable` and an error message in the body.
|
||||||
versions of Paperless will serve API version 1 regardless of whether a
|
|
||||||
version is specified via the `Accept` header.
|
|
||||||
|
|
||||||
If a client wishes to verify whether it is compatible with any given
|
If a client wishes to verify whether it is compatible with any given
|
||||||
server, the following procedure should be performed:
|
server, the following procedure should be performed:
|
||||||
|
|
||||||
1. Perform an _authenticated_ request against any API endpoint. If the
|
1. Perform an _authenticated_ request against any API endpoint. The
|
||||||
server is on version 1.3.0 or newer, the server will add two custom
|
server will add two custom headers to the response:
|
||||||
headers to the response:
|
|
||||||
|
|
||||||
```
|
```
|
||||||
X-Api-Version: 2
|
X-Api-Version: 10
|
||||||
X-Version: 1.3.0
|
X-Version: <server-version>
|
||||||
```
|
```
|
||||||
|
|
||||||
2. Determine whether the client is compatible with this server based on
|
2. Determine whether the client is compatible with this server based on
|
||||||
|
|||||||
@@ -75,13 +75,13 @@ first-time setup.
|
|||||||
4. Install the Python dependencies:
|
4. Install the Python dependencies:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
$ uv sync --group dev
|
uv sync --group dev
|
||||||
```
|
```
|
||||||
|
|
||||||
5. Install pre-commit hooks:
|
5. Install pre-commit hooks:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
$ uv run prek install
|
uv run prek install
|
||||||
```
|
```
|
||||||
|
|
||||||
6. Apply migrations and create a superuser (also can be done via the web UI) for your development instance:
|
6. Apply migrations and create a superuser (also can be done via the web UI) for your development instance:
|
||||||
@@ -89,8 +89,8 @@ first-time setup.
|
|||||||
```bash
|
```bash
|
||||||
# src/
|
# src/
|
||||||
|
|
||||||
$ uv run manage.py migrate
|
uv run manage.py migrate
|
||||||
$ uv run manage.py createsuperuser
|
uv run manage.py createsuperuser
|
||||||
```
|
```
|
||||||
|
|
||||||
7. You can now either ...
|
7. You can now either ...
|
||||||
@@ -103,7 +103,7 @@ first-time setup.
|
|||||||
|
|
||||||
- spin up a bare Redis container
|
- spin up a bare Redis container
|
||||||
|
|
||||||
```
|
```bash
|
||||||
docker run -d -p 6379:6379 --restart unless-stopped redis:latest
|
docker run -d -p 6379:6379 --restart unless-stopped redis:latest
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -118,18 +118,18 @@ work well for development, but you can use whatever you want.
|
|||||||
Configure the IDE to use the `src/`-folder as the base source folder.
|
Configure the IDE to use the `src/`-folder as the base source folder.
|
||||||
Configure the following launch configurations in your IDE:
|
Configure the following launch configurations in your IDE:
|
||||||
|
|
||||||
- `python3 manage.py runserver`
|
- `uv run manage.py runserver`
|
||||||
- `python3 manage.py document_consumer`
|
- `uv run manage.py document_consumer`
|
||||||
- `celery --app paperless worker -l DEBUG` (or any other log level)
|
- `uv run celery --app paperless worker -l DEBUG` (or any other log level)
|
||||||
|
|
||||||
To start them all:
|
To start them all:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# src/
|
# src/
|
||||||
|
|
||||||
$ python3 manage.py runserver & \
|
uv run manage.py runserver & \
|
||||||
python3 manage.py document_consumer & \
|
uv run manage.py document_consumer & \
|
||||||
celery --app paperless worker -l DEBUG
|
uv run celery --app paperless worker -l DEBUG
|
||||||
```
|
```
|
||||||
|
|
||||||
You might need the front end to test your back end code.
|
You might need the front end to test your back end code.
|
||||||
@@ -140,8 +140,8 @@ To build the front end once use this command:
|
|||||||
```bash
|
```bash
|
||||||
# src-ui/
|
# src-ui/
|
||||||
|
|
||||||
$ pnpm install
|
pnpm install
|
||||||
$ ng build --configuration production
|
pnpm ng build --configuration production
|
||||||
```
|
```
|
||||||
|
|
||||||
### Testing
|
### Testing
|
||||||
@@ -199,7 +199,7 @@ The front end is built using AngularJS. In order to get started, you need Node.j
|
|||||||
4. You can launch a development server by running:
|
4. You can launch a development server by running:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
ng serve
|
pnpm ng serve
|
||||||
```
|
```
|
||||||
|
|
||||||
This will automatically update whenever you save. However, in-place
|
This will automatically update whenever you save. However, in-place
|
||||||
@@ -217,21 +217,21 @@ commit. See [above](#code-formatting-with-pre-commit-hooks) for installation ins
|
|||||||
command such as
|
command such as
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
$ git ls-files -- '*.ts' | xargs prek run prettier --files
|
git ls-files -- '*.ts' | xargs uv run prek run prettier --files
|
||||||
```
|
```
|
||||||
|
|
||||||
Front end testing uses Jest and Playwright. Unit tests and e2e tests,
|
Front end testing uses Jest and Playwright. Unit tests and e2e tests,
|
||||||
respectively, can be run non-interactively with:
|
respectively, can be run non-interactively with:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
$ ng test
|
pnpm ng test
|
||||||
$ npx playwright test
|
pnpm playwright test
|
||||||
```
|
```
|
||||||
|
|
||||||
Playwright also includes a UI which can be run with:
|
Playwright also includes a UI which can be run with:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
$ npx playwright test --ui
|
pnpm playwright test --ui
|
||||||
```
|
```
|
||||||
|
|
||||||
### Building the frontend
|
### Building the frontend
|
||||||
@@ -239,7 +239,7 @@ $ npx playwright test --ui
|
|||||||
In order to build the front end and serve it as part of Django, execute:
|
In order to build the front end and serve it as part of Django, execute:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
$ ng build --configuration production
|
pnpm ng build --configuration production
|
||||||
```
|
```
|
||||||
|
|
||||||
This will build the front end and put it in a location from which the
|
This will build the front end and put it in a location from which the
|
||||||
@@ -312,10 +312,10 @@ end (such as error messages).
|
|||||||
- The source language of the project is "en_US".
|
- The source language of the project is "en_US".
|
||||||
- Localization files end up in the folder `src/locale/`.
|
- Localization files end up in the folder `src/locale/`.
|
||||||
- In order to extract strings from the application, call
|
- In order to extract strings from the application, call
|
||||||
`python3 manage.py makemessages -l en_US`. This is important after
|
`uv run manage.py makemessages -l en_US`. This is important after
|
||||||
making changes to translatable strings.
|
making changes to translatable strings.
|
||||||
- The message files need to be compiled for them to show up in the
|
- The message files need to be compiled for them to show up in the
|
||||||
application. Call `python3 manage.py compilemessages` to do this.
|
application. Call `uv run manage.py compilemessages` to do this.
|
||||||
The generated files don't get committed into git, since these are
|
The generated files don't get committed into git, since these are
|
||||||
derived artifacts. The build pipeline takes care of executing this
|
derived artifacts. The build pipeline takes care of executing this
|
||||||
command.
|
command.
|
||||||
|
|||||||
@@ -49,6 +49,7 @@ dependencies = [
|
|||||||
"flower~=2.0.1",
|
"flower~=2.0.1",
|
||||||
"gotenberg-client~=0.13.1",
|
"gotenberg-client~=0.13.1",
|
||||||
"httpx-oauth~=0.16",
|
"httpx-oauth~=0.16",
|
||||||
|
"ijson>=3.2",
|
||||||
"imap-tools~=1.11.0",
|
"imap-tools~=1.11.0",
|
||||||
"jinja2~=3.1.5",
|
"jinja2~=3.1.5",
|
||||||
"langdetect~=1.0.9",
|
"langdetect~=1.0.9",
|
||||||
|
|||||||
@@ -19,6 +19,4 @@ following additional information about it:
|
|||||||
* Correspondent: ${DOCUMENT_CORRESPONDENT}
|
* Correspondent: ${DOCUMENT_CORRESPONDENT}
|
||||||
* Tags: ${DOCUMENT_TAGS}
|
* Tags: ${DOCUMENT_TAGS}
|
||||||
|
|
||||||
It was consumed with the passphrase ${PASSPHRASE}
|
|
||||||
|
|
||||||
"
|
"
|
||||||
|
|||||||
@@ -3434,39 +3434,46 @@
|
|||||||
<context context-type="linenumber">9</context>
|
<context context-type="linenumber">9</context>
|
||||||
</context-group>
|
</context-group>
|
||||||
</trans-unit>
|
</trans-unit>
|
||||||
|
<trans-unit id="6705735915615634619" datatype="html">
|
||||||
|
<source>{VAR_PLURAL, plural, =1 {One page} other {<x id="INTERPOLATION"/> pages}}</source>
|
||||||
|
<context-group purpose="location">
|
||||||
|
<context context-type="sourcefile">src/app/components/common/confirm-dialog/merge-confirm-dialog/merge-confirm-dialog.component.html</context>
|
||||||
|
<context context-type="linenumber">25</context>
|
||||||
|
</context-group>
|
||||||
|
</trans-unit>
|
||||||
<trans-unit id="7508164375697837821" datatype="html">
|
<trans-unit id="7508164375697837821" datatype="html">
|
||||||
<source>Use metadata from:</source>
|
<source>Use metadata from:</source>
|
||||||
<context-group purpose="location">
|
<context-group purpose="location">
|
||||||
<context context-type="sourcefile">src/app/components/common/confirm-dialog/merge-confirm-dialog/merge-confirm-dialog.component.html</context>
|
<context context-type="sourcefile">src/app/components/common/confirm-dialog/merge-confirm-dialog/merge-confirm-dialog.component.html</context>
|
||||||
<context context-type="linenumber">22</context>
|
<context context-type="linenumber">34</context>
|
||||||
</context-group>
|
</context-group>
|
||||||
</trans-unit>
|
</trans-unit>
|
||||||
<trans-unit id="2020403212524346652" datatype="html">
|
<trans-unit id="2020403212524346652" datatype="html">
|
||||||
<source>Regenerate all metadata</source>
|
<source>Regenerate all metadata</source>
|
||||||
<context-group purpose="location">
|
<context-group purpose="location">
|
||||||
<context context-type="sourcefile">src/app/components/common/confirm-dialog/merge-confirm-dialog/merge-confirm-dialog.component.html</context>
|
<context context-type="sourcefile">src/app/components/common/confirm-dialog/merge-confirm-dialog/merge-confirm-dialog.component.html</context>
|
||||||
<context context-type="linenumber">24</context>
|
<context context-type="linenumber">36</context>
|
||||||
</context-group>
|
</context-group>
|
||||||
</trans-unit>
|
</trans-unit>
|
||||||
<trans-unit id="2710430925353472741" datatype="html">
|
<trans-unit id="2710430925353472741" datatype="html">
|
||||||
<source>Try to include archive version in merge for non-PDF files</source>
|
<source>Try to include archive version in merge for non-PDF files</source>
|
||||||
<context-group purpose="location">
|
<context-group purpose="location">
|
||||||
<context context-type="sourcefile">src/app/components/common/confirm-dialog/merge-confirm-dialog/merge-confirm-dialog.component.html</context>
|
<context context-type="sourcefile">src/app/components/common/confirm-dialog/merge-confirm-dialog/merge-confirm-dialog.component.html</context>
|
||||||
<context context-type="linenumber">32</context>
|
<context context-type="linenumber">44</context>
|
||||||
</context-group>
|
</context-group>
|
||||||
</trans-unit>
|
</trans-unit>
|
||||||
<trans-unit id="5612366187076076264" datatype="html">
|
<trans-unit id="5612366187076076264" datatype="html">
|
||||||
<source>Delete original documents after successful merge</source>
|
<source>Delete original documents after successful merge</source>
|
||||||
<context-group purpose="location">
|
<context-group purpose="location">
|
||||||
<context context-type="sourcefile">src/app/components/common/confirm-dialog/merge-confirm-dialog/merge-confirm-dialog.component.html</context>
|
<context context-type="sourcefile">src/app/components/common/confirm-dialog/merge-confirm-dialog/merge-confirm-dialog.component.html</context>
|
||||||
<context context-type="linenumber">36</context>
|
<context context-type="linenumber">48</context>
|
||||||
</context-group>
|
</context-group>
|
||||||
</trans-unit>
|
</trans-unit>
|
||||||
<trans-unit id="5138283234724909648" datatype="html">
|
<trans-unit id="5138283234724909648" datatype="html">
|
||||||
<source>Note that only PDFs will be included.</source>
|
<source>Note that only PDFs will be included.</source>
|
||||||
<context-group purpose="location">
|
<context-group purpose="location">
|
||||||
<context context-type="sourcefile">src/app/components/common/confirm-dialog/merge-confirm-dialog/merge-confirm-dialog.component.html</context>
|
<context context-type="sourcefile">src/app/components/common/confirm-dialog/merge-confirm-dialog/merge-confirm-dialog.component.html</context>
|
||||||
<context context-type="linenumber">39</context>
|
<context context-type="linenumber">51</context>
|
||||||
</context-group>
|
</context-group>
|
||||||
</trans-unit>
|
</trans-unit>
|
||||||
<trans-unit id="1309641780471803652" datatype="html">
|
<trans-unit id="1309641780471803652" datatype="html">
|
||||||
|
|||||||
@@ -10,10 +10,22 @@
|
|||||||
<ul class="list-group"
|
<ul class="list-group"
|
||||||
cdkDropList
|
cdkDropList
|
||||||
(cdkDropListDropped)="onDrop($event)">
|
(cdkDropListDropped)="onDrop($event)">
|
||||||
@for (documentID of documentIDs; track documentID) {
|
@for (document of documents; track document.id) {
|
||||||
<li class="list-group-item" cdkDrag>
|
<li class="list-group-item d-flex align-items-center" cdkDrag>
|
||||||
<i-bs name="grip-vertical" class="me-2"></i-bs>
|
<i-bs name="grip-vertical" class="me-2"></i-bs>
|
||||||
{{getDocument(documentID)?.title}}
|
<div class="d-flex flex-column">
|
||||||
|
<div>
|
||||||
|
@if (document.correspondent) {
|
||||||
|
<b>{{document.correspondent | correspondentName | async}}: </b>
|
||||||
|
}{{document.title}}
|
||||||
|
</div>
|
||||||
|
<small class="text-muted">
|
||||||
|
{{document.created | customDate:'mediumDate'}}
|
||||||
|
@if (document.page_count) {
|
||||||
|
| {document.page_count, plural, =1 {One page} other {{{document.page_count}} pages}}
|
||||||
|
}
|
||||||
|
</small>
|
||||||
|
</div>
|
||||||
</li>
|
</li>
|
||||||
}
|
}
|
||||||
</ul>
|
</ul>
|
||||||
|
|||||||
@@ -3,11 +3,14 @@ import {
|
|||||||
DragDropModule,
|
DragDropModule,
|
||||||
moveItemInArray,
|
moveItemInArray,
|
||||||
} from '@angular/cdk/drag-drop'
|
} from '@angular/cdk/drag-drop'
|
||||||
|
import { AsyncPipe } from '@angular/common'
|
||||||
import { Component, OnInit, inject } from '@angular/core'
|
import { Component, OnInit, inject } from '@angular/core'
|
||||||
import { FormsModule, ReactiveFormsModule } from '@angular/forms'
|
import { FormsModule, ReactiveFormsModule } from '@angular/forms'
|
||||||
import { NgxBootstrapIconsModule } from 'ngx-bootstrap-icons'
|
import { NgxBootstrapIconsModule } from 'ngx-bootstrap-icons'
|
||||||
import { takeUntil } from 'rxjs'
|
import { takeUntil } from 'rxjs'
|
||||||
import { Document } from 'src/app/data/document'
|
import { Document } from 'src/app/data/document'
|
||||||
|
import { CorrespondentNamePipe } from 'src/app/pipes/correspondent-name.pipe'
|
||||||
|
import { CustomDatePipe } from 'src/app/pipes/custom-date.pipe'
|
||||||
import { PermissionsService } from 'src/app/services/permissions.service'
|
import { PermissionsService } from 'src/app/services/permissions.service'
|
||||||
import { DocumentService } from 'src/app/services/rest/document.service'
|
import { DocumentService } from 'src/app/services/rest/document.service'
|
||||||
import { ConfirmDialogComponent } from '../confirm-dialog.component'
|
import { ConfirmDialogComponent } from '../confirm-dialog.component'
|
||||||
@@ -17,6 +20,9 @@ import { ConfirmDialogComponent } from '../confirm-dialog.component'
|
|||||||
templateUrl: './merge-confirm-dialog.component.html',
|
templateUrl: './merge-confirm-dialog.component.html',
|
||||||
styleUrl: './merge-confirm-dialog.component.scss',
|
styleUrl: './merge-confirm-dialog.component.scss',
|
||||||
imports: [
|
imports: [
|
||||||
|
AsyncPipe,
|
||||||
|
CorrespondentNamePipe,
|
||||||
|
CustomDatePipe,
|
||||||
DragDropModule,
|
DragDropModule,
|
||||||
FormsModule,
|
FormsModule,
|
||||||
ReactiveFormsModule,
|
ReactiveFormsModule,
|
||||||
|
|||||||
@@ -51,11 +51,28 @@ from documents.templating.workflows import parse_w_workflow_placeholders
|
|||||||
from documents.utils import copy_basic_file_stats
|
from documents.utils import copy_basic_file_stats
|
||||||
from documents.utils import copy_file_with_basic_stats
|
from documents.utils import copy_file_with_basic_stats
|
||||||
from documents.utils import run_subprocess
|
from documents.utils import run_subprocess
|
||||||
|
from paperless.parsers.text import TextDocumentParser
|
||||||
from paperless_mail.parsers import MailDocumentParser
|
from paperless_mail.parsers import MailDocumentParser
|
||||||
|
|
||||||
LOGGING_NAME: Final[str] = "paperless.consumer"
|
LOGGING_NAME: Final[str] = "paperless.consumer"
|
||||||
|
|
||||||
|
|
||||||
|
def _parser_cleanup(parser: DocumentParser) -> None:
|
||||||
|
"""
|
||||||
|
Call cleanup on a parser, handling the new-style context-manager parsers.
|
||||||
|
|
||||||
|
New-style parsers (e.g. TextDocumentParser) use __exit__ for teardown
|
||||||
|
instead of a cleanup() method. This shim will be removed once all existing parsers
|
||||||
|
have switched to the new style and this consumer is updated to use it
|
||||||
|
|
||||||
|
TODO(stumpylog): Remove me in the future
|
||||||
|
"""
|
||||||
|
if isinstance(parser, TextDocumentParser):
|
||||||
|
parser.__exit__(None, None, None)
|
||||||
|
else:
|
||||||
|
parser.cleanup()
|
||||||
|
|
||||||
|
|
||||||
class WorkflowTriggerPlugin(
|
class WorkflowTriggerPlugin(
|
||||||
NoCleanupPluginMixin,
|
NoCleanupPluginMixin,
|
||||||
NoSetupPluginMixin,
|
NoSetupPluginMixin,
|
||||||
@@ -459,6 +476,9 @@ class ConsumerPlugin(
|
|||||||
self.filename,
|
self.filename,
|
||||||
self.input_doc.mailrule_id,
|
self.input_doc.mailrule_id,
|
||||||
)
|
)
|
||||||
|
elif isinstance(document_parser, TextDocumentParser):
|
||||||
|
# TODO(stumpylog): Remove me in the future
|
||||||
|
document_parser.parse(self.working_copy, mime_type)
|
||||||
else:
|
else:
|
||||||
document_parser.parse(self.working_copy, mime_type, self.filename)
|
document_parser.parse(self.working_copy, mime_type, self.filename)
|
||||||
|
|
||||||
@@ -469,11 +489,15 @@ class ConsumerPlugin(
|
|||||||
ProgressStatusOptions.WORKING,
|
ProgressStatusOptions.WORKING,
|
||||||
ConsumerStatusShortMessage.GENERATING_THUMBNAIL,
|
ConsumerStatusShortMessage.GENERATING_THUMBNAIL,
|
||||||
)
|
)
|
||||||
thumbnail = document_parser.get_thumbnail(
|
if isinstance(document_parser, TextDocumentParser):
|
||||||
self.working_copy,
|
# TODO(stumpylog): Remove me in the future
|
||||||
mime_type,
|
thumbnail = document_parser.get_thumbnail(self.working_copy, mime_type)
|
||||||
self.filename,
|
else:
|
||||||
)
|
thumbnail = document_parser.get_thumbnail(
|
||||||
|
self.working_copy,
|
||||||
|
mime_type,
|
||||||
|
self.filename,
|
||||||
|
)
|
||||||
|
|
||||||
text = document_parser.get_text()
|
text = document_parser.get_text()
|
||||||
date = document_parser.get_date()
|
date = document_parser.get_date()
|
||||||
@@ -490,7 +514,7 @@ class ConsumerPlugin(
|
|||||||
page_count = document_parser.get_page_count(self.working_copy, mime_type)
|
page_count = document_parser.get_page_count(self.working_copy, mime_type)
|
||||||
|
|
||||||
except ParseError as e:
|
except ParseError as e:
|
||||||
document_parser.cleanup()
|
_parser_cleanup(document_parser)
|
||||||
if tempdir:
|
if tempdir:
|
||||||
tempdir.cleanup()
|
tempdir.cleanup()
|
||||||
self._fail(
|
self._fail(
|
||||||
@@ -500,7 +524,7 @@ class ConsumerPlugin(
|
|||||||
exception=e,
|
exception=e,
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
document_parser.cleanup()
|
_parser_cleanup(document_parser)
|
||||||
if tempdir:
|
if tempdir:
|
||||||
tempdir.cleanup()
|
tempdir.cleanup()
|
||||||
self._fail(
|
self._fail(
|
||||||
@@ -702,7 +726,7 @@ class ConsumerPlugin(
|
|||||||
exception=e,
|
exception=e,
|
||||||
)
|
)
|
||||||
finally:
|
finally:
|
||||||
document_parser.cleanup()
|
_parser_cleanup(document_parser)
|
||||||
tempdir.cleanup()
|
tempdir.cleanup()
|
||||||
|
|
||||||
self.run_post_consume_script(document)
|
self.run_post_consume_script(document)
|
||||||
|
|||||||
@@ -304,7 +304,7 @@ class PaperlessCommand(RichCommand):
|
|||||||
|
|
||||||
Progress output is directed to stderr to match the convention that
|
Progress output is directed to stderr to match the convention that
|
||||||
progress bars are transient UI feedback, not command output. This
|
progress bars are transient UI feedback, not command output. This
|
||||||
mirrors tqdm's default behavior and prevents progress bar rendering
|
mirrors the convention that progress bars are transient UI feedback and prevents progress bar rendering
|
||||||
from interfering with stdout-based assertions in tests or piped
|
from interfering with stdout-based assertions in tests or piped
|
||||||
command output.
|
command output.
|
||||||
|
|
||||||
|
|||||||
@@ -17,6 +17,7 @@ class Command(PaperlessCommand):
|
|||||||
"modified) after their initial import."
|
"modified) after their initial import."
|
||||||
)
|
)
|
||||||
|
|
||||||
|
supports_progress_bar = True
|
||||||
supports_multiprocessing = True
|
supports_multiprocessing = True
|
||||||
|
|
||||||
def add_arguments(self, parser):
|
def add_arguments(self, parser):
|
||||||
|
|||||||
@@ -3,12 +3,10 @@ import json
|
|||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
import tempfile
|
import tempfile
|
||||||
from itertools import chain
|
|
||||||
from itertools import islice
|
from itertools import islice
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
import tqdm
|
|
||||||
from allauth.mfa.models import Authenticator
|
from allauth.mfa.models import Authenticator
|
||||||
from allauth.socialaccount.models import SocialAccount
|
from allauth.socialaccount.models import SocialAccount
|
||||||
from allauth.socialaccount.models import SocialApp
|
from allauth.socialaccount.models import SocialApp
|
||||||
@@ -19,7 +17,6 @@ from django.contrib.auth.models import Permission
|
|||||||
from django.contrib.auth.models import User
|
from django.contrib.auth.models import User
|
||||||
from django.contrib.contenttypes.models import ContentType
|
from django.contrib.contenttypes.models import ContentType
|
||||||
from django.core import serializers
|
from django.core import serializers
|
||||||
from django.core.management.base import BaseCommand
|
|
||||||
from django.core.management.base import CommandError
|
from django.core.management.base import CommandError
|
||||||
from django.core.serializers.json import DjangoJSONEncoder
|
from django.core.serializers.json import DjangoJSONEncoder
|
||||||
from django.db import transaction
|
from django.db import transaction
|
||||||
@@ -38,6 +35,7 @@ if settings.AUDIT_LOG_ENABLED:
|
|||||||
|
|
||||||
from documents.file_handling import delete_empty_directories
|
from documents.file_handling import delete_empty_directories
|
||||||
from documents.file_handling import generate_filename
|
from documents.file_handling import generate_filename
|
||||||
|
from documents.management.commands.base import PaperlessCommand
|
||||||
from documents.management.commands.mixins import CryptMixin
|
from documents.management.commands.mixins import CryptMixin
|
||||||
from documents.models import Correspondent
|
from documents.models import Correspondent
|
||||||
from documents.models import CustomField
|
from documents.models import CustomField
|
||||||
@@ -81,14 +79,99 @@ def serialize_queryset_batched(
|
|||||||
yield serializers.serialize("python", chunk)
|
yield serializers.serialize("python", chunk)
|
||||||
|
|
||||||
|
|
||||||
class Command(CryptMixin, BaseCommand):
|
class StreamingManifestWriter:
|
||||||
|
"""Incrementally writes a JSON array to a file, one record at a time.
|
||||||
|
|
||||||
|
Writes to <target>.tmp first; on close(), optionally BLAKE2b-compares
|
||||||
|
with the existing file (--compare-json) and renames or discards accordingly.
|
||||||
|
On exception, discard() deletes the tmp file and leaves the original intact.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
path: Path,
|
||||||
|
*,
|
||||||
|
compare_json: bool = False,
|
||||||
|
files_in_export_dir: "set[Path] | None" = None,
|
||||||
|
) -> None:
|
||||||
|
self._path = path.resolve()
|
||||||
|
self._tmp_path = self._path.with_suffix(self._path.suffix + ".tmp")
|
||||||
|
self._compare_json = compare_json
|
||||||
|
self._files_in_export_dir: set[Path] = (
|
||||||
|
files_in_export_dir if files_in_export_dir is not None else set()
|
||||||
|
)
|
||||||
|
self._file = None
|
||||||
|
self._first = True
|
||||||
|
|
||||||
|
def open(self) -> None:
|
||||||
|
self._path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
self._file = self._tmp_path.open("w", encoding="utf-8")
|
||||||
|
self._file.write("[")
|
||||||
|
self._first = True
|
||||||
|
|
||||||
|
def write_record(self, record: dict) -> None:
|
||||||
|
if not self._first:
|
||||||
|
self._file.write(",\n")
|
||||||
|
else:
|
||||||
|
self._first = False
|
||||||
|
self._file.write(
|
||||||
|
json.dumps(record, cls=DjangoJSONEncoder, indent=2, ensure_ascii=False),
|
||||||
|
)
|
||||||
|
|
||||||
|
def write_batch(self, records: list[dict]) -> None:
|
||||||
|
for record in records:
|
||||||
|
self.write_record(record)
|
||||||
|
|
||||||
|
def close(self) -> None:
|
||||||
|
if self._file is None:
|
||||||
|
return
|
||||||
|
self._file.write("\n]")
|
||||||
|
self._file.close()
|
||||||
|
self._file = None
|
||||||
|
self._finalize()
|
||||||
|
|
||||||
|
def discard(self) -> None:
|
||||||
|
if self._file is not None:
|
||||||
|
self._file.close()
|
||||||
|
self._file = None
|
||||||
|
if self._tmp_path.exists():
|
||||||
|
self._tmp_path.unlink()
|
||||||
|
|
||||||
|
def _finalize(self) -> None:
|
||||||
|
"""Compare with existing file (if --compare-json) then rename or discard tmp."""
|
||||||
|
if self._path in self._files_in_export_dir:
|
||||||
|
self._files_in_export_dir.remove(self._path)
|
||||||
|
if self._compare_json:
|
||||||
|
existing_hash = hashlib.blake2b(self._path.read_bytes()).hexdigest()
|
||||||
|
new_hash = hashlib.blake2b(self._tmp_path.read_bytes()).hexdigest()
|
||||||
|
if existing_hash == new_hash:
|
||||||
|
self._tmp_path.unlink()
|
||||||
|
return
|
||||||
|
self._tmp_path.rename(self._path)
|
||||||
|
|
||||||
|
def __enter__(self) -> "StreamingManifestWriter":
|
||||||
|
self.open()
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __exit__(self, exc_type, exc_val, exc_tb) -> None:
|
||||||
|
if exc_type is not None:
|
||||||
|
self.discard()
|
||||||
|
else:
|
||||||
|
self.close()
|
||||||
|
|
||||||
|
|
||||||
|
class Command(CryptMixin, PaperlessCommand):
|
||||||
help = (
|
help = (
|
||||||
"Decrypt and rename all files in our collection into a given target "
|
"Decrypt and rename all files in our collection into a given target "
|
||||||
"directory. And include a manifest file containing document data for "
|
"directory. And include a manifest file containing document data for "
|
||||||
"easy import."
|
"easy import."
|
||||||
)
|
)
|
||||||
|
|
||||||
|
supports_progress_bar = True
|
||||||
|
supports_multiprocessing = False
|
||||||
|
|
||||||
def add_arguments(self, parser) -> None:
|
def add_arguments(self, parser) -> None:
|
||||||
|
super().add_arguments(parser)
|
||||||
parser.add_argument("target")
|
parser.add_argument("target")
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
@@ -195,13 +278,6 @@ class Command(CryptMixin, BaseCommand):
|
|||||||
help="If set, only the database will be imported, not files",
|
help="If set, only the database will be imported, not files",
|
||||||
)
|
)
|
||||||
|
|
||||||
parser.add_argument(
|
|
||||||
"--no-progress-bar",
|
|
||||||
default=False,
|
|
||||||
action="store_true",
|
|
||||||
help="If set, the progress bar will not be shown",
|
|
||||||
)
|
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--passphrase",
|
"--passphrase",
|
||||||
help="If provided, is used to encrypt sensitive data in the export",
|
help="If provided, is used to encrypt sensitive data in the export",
|
||||||
@@ -230,7 +306,6 @@ class Command(CryptMixin, BaseCommand):
|
|||||||
self.no_thumbnail: bool = options["no_thumbnail"]
|
self.no_thumbnail: bool = options["no_thumbnail"]
|
||||||
self.zip_export: bool = options["zip"]
|
self.zip_export: bool = options["zip"]
|
||||||
self.data_only: bool = options["data_only"]
|
self.data_only: bool = options["data_only"]
|
||||||
self.no_progress_bar: bool = options["no_progress_bar"]
|
|
||||||
self.passphrase: str | None = options.get("passphrase")
|
self.passphrase: str | None = options.get("passphrase")
|
||||||
self.batch_size: int = options["batch_size"]
|
self.batch_size: int = options["batch_size"]
|
||||||
|
|
||||||
@@ -322,95 +397,85 @@ class Command(CryptMixin, BaseCommand):
|
|||||||
if settings.AUDIT_LOG_ENABLED:
|
if settings.AUDIT_LOG_ENABLED:
|
||||||
manifest_key_to_object_query["log_entries"] = LogEntry.objects.all()
|
manifest_key_to_object_query["log_entries"] = LogEntry.objects.all()
|
||||||
|
|
||||||
with transaction.atomic():
|
# Crypto setup before streaming begins
|
||||||
manifest_dict = {}
|
if self.passphrase:
|
||||||
|
self.setup_crypto(passphrase=self.passphrase)
|
||||||
# Build an overall manifest
|
elif MailAccount.objects.count() > 0 or SocialToken.objects.count() > 0:
|
||||||
for key, object_query in manifest_key_to_object_query.items():
|
self.stdout.write(
|
||||||
manifest_dict[key] = list(
|
self.style.NOTICE(
|
||||||
chain.from_iterable(
|
"No passphrase was given, sensitive fields will be in plaintext",
|
||||||
serialize_queryset_batched(
|
),
|
||||||
object_query,
|
|
||||||
batch_size=self.batch_size,
|
|
||||||
),
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
self.encrypt_secret_fields(manifest_dict)
|
|
||||||
|
|
||||||
# These are treated specially and included in the per-document manifest
|
|
||||||
# if that setting is enabled. Otherwise, they are just exported to the bulk
|
|
||||||
# manifest
|
|
||||||
document_map: dict[int, Document] = {
|
|
||||||
d.pk: d for d in manifest_key_to_object_query["documents"]
|
|
||||||
}
|
|
||||||
document_manifest = manifest_dict["documents"]
|
|
||||||
|
|
||||||
# 3. Export files from each document
|
|
||||||
for index, document_dict in tqdm.tqdm(
|
|
||||||
enumerate(document_manifest),
|
|
||||||
total=len(document_manifest),
|
|
||||||
disable=self.no_progress_bar,
|
|
||||||
):
|
|
||||||
document = document_map[document_dict["pk"]]
|
|
||||||
|
|
||||||
# 3.1. generate a unique filename
|
|
||||||
base_name = self.generate_base_name(document)
|
|
||||||
|
|
||||||
# 3.2. write filenames into manifest
|
|
||||||
original_target, thumbnail_target, archive_target = (
|
|
||||||
self.generate_document_targets(document, base_name, document_dict)
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# 3.3. write files to target folder
|
document_manifest: list[dict] = []
|
||||||
if not self.data_only:
|
|
||||||
self.copy_document_files(
|
|
||||||
document,
|
|
||||||
original_target,
|
|
||||||
thumbnail_target,
|
|
||||||
archive_target,
|
|
||||||
)
|
|
||||||
|
|
||||||
if self.split_manifest:
|
|
||||||
manifest_name = base_name.with_name(f"{base_name.stem}-manifest.json")
|
|
||||||
if self.use_folder_prefix:
|
|
||||||
manifest_name = Path("json") / manifest_name
|
|
||||||
manifest_name = (self.target / manifest_name).resolve()
|
|
||||||
manifest_name.parent.mkdir(parents=True, exist_ok=True)
|
|
||||||
content = [document_manifest[index]]
|
|
||||||
content += list(
|
|
||||||
filter(
|
|
||||||
lambda d: d["fields"]["document"] == document_dict["pk"],
|
|
||||||
manifest_dict["notes"],
|
|
||||||
),
|
|
||||||
)
|
|
||||||
content += list(
|
|
||||||
filter(
|
|
||||||
lambda d: d["fields"]["document"] == document_dict["pk"],
|
|
||||||
manifest_dict["custom_field_instances"],
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
self.check_and_write_json(
|
|
||||||
content,
|
|
||||||
manifest_name,
|
|
||||||
)
|
|
||||||
|
|
||||||
# These were exported already
|
|
||||||
if self.split_manifest:
|
|
||||||
del manifest_dict["documents"]
|
|
||||||
del manifest_dict["notes"]
|
|
||||||
del manifest_dict["custom_field_instances"]
|
|
||||||
|
|
||||||
# 4.1 write primary manifest to target folder
|
|
||||||
manifest = []
|
|
||||||
for key, item in manifest_dict.items():
|
|
||||||
manifest.extend(item)
|
|
||||||
manifest_path = (self.target / "manifest.json").resolve()
|
manifest_path = (self.target / "manifest.json").resolve()
|
||||||
self.check_and_write_json(
|
|
||||||
manifest,
|
with StreamingManifestWriter(
|
||||||
manifest_path,
|
manifest_path,
|
||||||
)
|
compare_json=self.compare_json,
|
||||||
|
files_in_export_dir=self.files_in_export_dir,
|
||||||
|
) as writer:
|
||||||
|
with transaction.atomic():
|
||||||
|
for key, qs in manifest_key_to_object_query.items():
|
||||||
|
if key == "documents":
|
||||||
|
# Accumulate for file-copy loop; written to manifest after
|
||||||
|
for batch in serialize_queryset_batched(
|
||||||
|
qs,
|
||||||
|
batch_size=self.batch_size,
|
||||||
|
):
|
||||||
|
for record in batch:
|
||||||
|
self._encrypt_record_inline(record)
|
||||||
|
document_manifest.extend(batch)
|
||||||
|
elif self.split_manifest and key in (
|
||||||
|
"notes",
|
||||||
|
"custom_field_instances",
|
||||||
|
):
|
||||||
|
# Written per-document in _write_split_manifest
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
for batch in serialize_queryset_batched(
|
||||||
|
qs,
|
||||||
|
batch_size=self.batch_size,
|
||||||
|
):
|
||||||
|
for record in batch:
|
||||||
|
self._encrypt_record_inline(record)
|
||||||
|
writer.write_batch(batch)
|
||||||
|
|
||||||
|
document_map: dict[int, Document] = {
|
||||||
|
d.pk: d for d in Document.objects.order_by("id")
|
||||||
|
}
|
||||||
|
|
||||||
|
# 3. Export files from each document
|
||||||
|
for index, document_dict in enumerate(
|
||||||
|
self.track(
|
||||||
|
document_manifest,
|
||||||
|
description="Exporting documents...",
|
||||||
|
total=len(document_manifest),
|
||||||
|
),
|
||||||
|
):
|
||||||
|
document = document_map[document_dict["pk"]]
|
||||||
|
|
||||||
|
# 3.1. generate a unique filename
|
||||||
|
base_name = self.generate_base_name(document)
|
||||||
|
|
||||||
|
# 3.2. write filenames into manifest
|
||||||
|
original_target, thumbnail_target, archive_target = (
|
||||||
|
self.generate_document_targets(document, base_name, document_dict)
|
||||||
|
)
|
||||||
|
|
||||||
|
# 3.3. write files to target folder
|
||||||
|
if not self.data_only:
|
||||||
|
self.copy_document_files(
|
||||||
|
document,
|
||||||
|
original_target,
|
||||||
|
thumbnail_target,
|
||||||
|
archive_target,
|
||||||
|
)
|
||||||
|
|
||||||
|
if self.split_manifest:
|
||||||
|
self._write_split_manifest(document_dict, document, base_name)
|
||||||
|
else:
|
||||||
|
writer.write_record(document_dict)
|
||||||
|
|
||||||
# 4.2 write version information to target folder
|
# 4.2 write version information to target folder
|
||||||
extra_metadata_path = (self.target / "metadata.json").resolve()
|
extra_metadata_path = (self.target / "metadata.json").resolve()
|
||||||
@@ -532,6 +597,42 @@ class Command(CryptMixin, BaseCommand):
|
|||||||
archive_target,
|
archive_target,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def _encrypt_record_inline(self, record: dict) -> None:
|
||||||
|
"""Encrypt sensitive fields in a single record, if passphrase is set."""
|
||||||
|
if not self.passphrase:
|
||||||
|
return
|
||||||
|
fields = self.CRYPT_FIELDS_BY_MODEL.get(record.get("model", ""))
|
||||||
|
if fields:
|
||||||
|
for field in fields:
|
||||||
|
if record["fields"].get(field):
|
||||||
|
record["fields"][field] = self.encrypt_string(
|
||||||
|
value=record["fields"][field],
|
||||||
|
)
|
||||||
|
|
||||||
|
def _write_split_manifest(
|
||||||
|
self,
|
||||||
|
document_dict: dict,
|
||||||
|
document: Document,
|
||||||
|
base_name: Path,
|
||||||
|
) -> None:
|
||||||
|
"""Write per-document manifest file for --split-manifest mode."""
|
||||||
|
content = [document_dict]
|
||||||
|
content.extend(
|
||||||
|
serializers.serialize("python", Note.objects.filter(document=document)),
|
||||||
|
)
|
||||||
|
content.extend(
|
||||||
|
serializers.serialize(
|
||||||
|
"python",
|
||||||
|
CustomFieldInstance.objects.filter(document=document),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
manifest_name = base_name.with_name(f"{base_name.stem}-manifest.json")
|
||||||
|
if self.use_folder_prefix:
|
||||||
|
manifest_name = Path("json") / manifest_name
|
||||||
|
manifest_name = (self.target / manifest_name).resolve()
|
||||||
|
manifest_name.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
self.check_and_write_json(content, manifest_name)
|
||||||
|
|
||||||
def check_and_write_json(
|
def check_and_write_json(
|
||||||
self,
|
self,
|
||||||
content: list[dict] | dict,
|
content: list[dict] | dict,
|
||||||
@@ -549,14 +650,14 @@ class Command(CryptMixin, BaseCommand):
|
|||||||
if target in self.files_in_export_dir:
|
if target in self.files_in_export_dir:
|
||||||
self.files_in_export_dir.remove(target)
|
self.files_in_export_dir.remove(target)
|
||||||
if self.compare_json:
|
if self.compare_json:
|
||||||
target_checksum = hashlib.md5(target.read_bytes()).hexdigest()
|
target_checksum = hashlib.blake2b(target.read_bytes()).hexdigest()
|
||||||
src_str = json.dumps(
|
src_str = json.dumps(
|
||||||
content,
|
content,
|
||||||
cls=DjangoJSONEncoder,
|
cls=DjangoJSONEncoder,
|
||||||
indent=2,
|
indent=2,
|
||||||
ensure_ascii=False,
|
ensure_ascii=False,
|
||||||
)
|
)
|
||||||
src_checksum = hashlib.md5(src_str.encode("utf-8")).hexdigest()
|
src_checksum = hashlib.blake2b(src_str.encode("utf-8")).hexdigest()
|
||||||
if src_checksum == target_checksum:
|
if src_checksum == target_checksum:
|
||||||
perform_write = False
|
perform_write = False
|
||||||
|
|
||||||
@@ -606,28 +707,3 @@ class Command(CryptMixin, BaseCommand):
|
|||||||
if perform_copy:
|
if perform_copy:
|
||||||
target.parent.mkdir(parents=True, exist_ok=True)
|
target.parent.mkdir(parents=True, exist_ok=True)
|
||||||
copy_file_with_basic_stats(source, target)
|
copy_file_with_basic_stats(source, target)
|
||||||
|
|
||||||
def encrypt_secret_fields(self, manifest: dict) -> None:
|
|
||||||
"""
|
|
||||||
Encrypts certain fields in the export. Currently limited to the mail account password
|
|
||||||
"""
|
|
||||||
|
|
||||||
if self.passphrase:
|
|
||||||
self.setup_crypto(passphrase=self.passphrase)
|
|
||||||
|
|
||||||
for crypt_config in self.CRYPT_FIELDS:
|
|
||||||
exporter_key = crypt_config["exporter_key"]
|
|
||||||
crypt_fields = crypt_config["fields"]
|
|
||||||
for manifest_record in manifest[exporter_key]:
|
|
||||||
for field in crypt_fields:
|
|
||||||
if manifest_record["fields"][field]:
|
|
||||||
manifest_record["fields"][field] = self.encrypt_string(
|
|
||||||
value=manifest_record["fields"][field],
|
|
||||||
)
|
|
||||||
|
|
||||||
elif MailAccount.objects.count() > 0 or SocialToken.objects.count() > 0:
|
|
||||||
self.stdout.write(
|
|
||||||
self.style.NOTICE(
|
|
||||||
"No passphrase was given, sensitive fields will be in plaintext",
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|||||||
@@ -40,6 +40,7 @@ def _process_and_match(work: _WorkPackage) -> _WorkResult:
|
|||||||
class Command(PaperlessCommand):
|
class Command(PaperlessCommand):
|
||||||
help = "Searches for documents where the content almost matches"
|
help = "Searches for documents where the content almost matches"
|
||||||
|
|
||||||
|
supports_progress_bar = True
|
||||||
supports_multiprocessing = True
|
supports_multiprocessing = True
|
||||||
|
|
||||||
def add_arguments(self, parser):
|
def add_arguments(self, parser):
|
||||||
|
|||||||
@@ -8,14 +8,13 @@ from pathlib import Path
|
|||||||
from zipfile import ZipFile
|
from zipfile import ZipFile
|
||||||
from zipfile import is_zipfile
|
from zipfile import is_zipfile
|
||||||
|
|
||||||
import tqdm
|
import ijson
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from django.contrib.auth.models import Permission
|
from django.contrib.auth.models import Permission
|
||||||
from django.contrib.auth.models import User
|
from django.contrib.auth.models import User
|
||||||
from django.contrib.contenttypes.models import ContentType
|
from django.contrib.contenttypes.models import ContentType
|
||||||
from django.core.exceptions import FieldDoesNotExist
|
from django.core.exceptions import FieldDoesNotExist
|
||||||
from django.core.management import call_command
|
from django.core.management import call_command
|
||||||
from django.core.management.base import BaseCommand
|
|
||||||
from django.core.management.base import CommandError
|
from django.core.management.base import CommandError
|
||||||
from django.core.serializers.base import DeserializationError
|
from django.core.serializers.base import DeserializationError
|
||||||
from django.db import IntegrityError
|
from django.db import IntegrityError
|
||||||
@@ -25,6 +24,7 @@ from django.db.models.signals import post_save
|
|||||||
from filelock import FileLock
|
from filelock import FileLock
|
||||||
|
|
||||||
from documents.file_handling import create_source_path_directory
|
from documents.file_handling import create_source_path_directory
|
||||||
|
from documents.management.commands.base import PaperlessCommand
|
||||||
from documents.management.commands.mixins import CryptMixin
|
from documents.management.commands.mixins import CryptMixin
|
||||||
from documents.models import Correspondent
|
from documents.models import Correspondent
|
||||||
from documents.models import CustomField
|
from documents.models import CustomField
|
||||||
@@ -33,7 +33,6 @@ from documents.models import Document
|
|||||||
from documents.models import DocumentType
|
from documents.models import DocumentType
|
||||||
from documents.models import Note
|
from documents.models import Note
|
||||||
from documents.models import Tag
|
from documents.models import Tag
|
||||||
from documents.parsers import run_convert
|
|
||||||
from documents.settings import EXPORTER_ARCHIVE_NAME
|
from documents.settings import EXPORTER_ARCHIVE_NAME
|
||||||
from documents.settings import EXPORTER_CRYPTO_SETTINGS_NAME
|
from documents.settings import EXPORTER_CRYPTO_SETTINGS_NAME
|
||||||
from documents.settings import EXPORTER_FILE_NAME
|
from documents.settings import EXPORTER_FILE_NAME
|
||||||
@@ -47,6 +46,15 @@ if settings.AUDIT_LOG_ENABLED:
|
|||||||
from auditlog.registry import auditlog
|
from auditlog.registry import auditlog
|
||||||
|
|
||||||
|
|
||||||
|
def iter_manifest_records(path: Path) -> Generator[dict, None, None]:
|
||||||
|
"""Yield records one at a time from a manifest JSON array via ijson."""
|
||||||
|
try:
|
||||||
|
with path.open("rb") as f:
|
||||||
|
yield from ijson.items(f, "item")
|
||||||
|
except ijson.JSONError as e:
|
||||||
|
raise CommandError(f"Failed to parse manifest file {path}: {e}") from e
|
||||||
|
|
||||||
|
|
||||||
@contextmanager
|
@contextmanager
|
||||||
def disable_signal(sig, receiver, sender, *, weak: bool | None = None) -> Generator:
|
def disable_signal(sig, receiver, sender, *, weak: bool | None = None) -> Generator:
|
||||||
try:
|
try:
|
||||||
@@ -57,21 +65,18 @@ def disable_signal(sig, receiver, sender, *, weak: bool | None = None) -> Genera
|
|||||||
sig.connect(receiver=receiver, sender=sender, **kwargs)
|
sig.connect(receiver=receiver, sender=sender, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
class Command(CryptMixin, BaseCommand):
|
class Command(CryptMixin, PaperlessCommand):
|
||||||
help = (
|
help = (
|
||||||
"Using a manifest.json file, load the data from there, and import the "
|
"Using a manifest.json file, load the data from there, and import the "
|
||||||
"documents it refers to."
|
"documents it refers to."
|
||||||
)
|
)
|
||||||
|
|
||||||
def add_arguments(self, parser) -> None:
|
supports_progress_bar = True
|
||||||
parser.add_argument("source")
|
supports_multiprocessing = False
|
||||||
|
|
||||||
parser.add_argument(
|
def add_arguments(self, parser) -> None:
|
||||||
"--no-progress-bar",
|
super().add_arguments(parser)
|
||||||
default=False,
|
parser.add_argument("source")
|
||||||
action="store_true",
|
|
||||||
help="If set, the progress bar will not be shown",
|
|
||||||
)
|
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--data-only",
|
"--data-only",
|
||||||
@@ -147,14 +152,9 @@ class Command(CryptMixin, BaseCommand):
|
|||||||
Loads manifest data from the various JSON files for parsing and loading the database
|
Loads manifest data from the various JSON files for parsing and loading the database
|
||||||
"""
|
"""
|
||||||
main_manifest_path: Path = self.source / "manifest.json"
|
main_manifest_path: Path = self.source / "manifest.json"
|
||||||
|
|
||||||
with main_manifest_path.open() as infile:
|
|
||||||
self.manifest = json.load(infile)
|
|
||||||
self.manifest_paths.append(main_manifest_path)
|
self.manifest_paths.append(main_manifest_path)
|
||||||
|
|
||||||
for file in Path(self.source).glob("**/*-manifest.json"):
|
for file in Path(self.source).glob("**/*-manifest.json"):
|
||||||
with file.open() as infile:
|
|
||||||
self.manifest += json.load(infile)
|
|
||||||
self.manifest_paths.append(file)
|
self.manifest_paths.append(file)
|
||||||
|
|
||||||
def load_metadata(self) -> None:
|
def load_metadata(self) -> None:
|
||||||
@@ -231,12 +231,10 @@ class Command(CryptMixin, BaseCommand):
|
|||||||
|
|
||||||
self.source = Path(options["source"]).resolve()
|
self.source = Path(options["source"]).resolve()
|
||||||
self.data_only: bool = options["data_only"]
|
self.data_only: bool = options["data_only"]
|
||||||
self.no_progress_bar: bool = options["no_progress_bar"]
|
|
||||||
self.passphrase: str | None = options.get("passphrase")
|
self.passphrase: str | None = options.get("passphrase")
|
||||||
self.version: str | None = None
|
self.version: str | None = None
|
||||||
self.salt: str | None = None
|
self.salt: str | None = None
|
||||||
self.manifest_paths = []
|
self.manifest_paths = []
|
||||||
self.manifest = []
|
|
||||||
|
|
||||||
# Create a temporary directory for extracting a zip file into it, even if supplied source is no zip file to keep code cleaner.
|
# Create a temporary directory for extracting a zip file into it, even if supplied source is no zip file to keep code cleaner.
|
||||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||||
@@ -296,6 +294,9 @@ class Command(CryptMixin, BaseCommand):
|
|||||||
else:
|
else:
|
||||||
self.stdout.write(self.style.NOTICE("Data only import completed"))
|
self.stdout.write(self.style.NOTICE("Data only import completed"))
|
||||||
|
|
||||||
|
for tmp in getattr(self, "_decrypted_tmp_paths", []):
|
||||||
|
tmp.unlink(missing_ok=True)
|
||||||
|
|
||||||
self.stdout.write("Updating search index...")
|
self.stdout.write("Updating search index...")
|
||||||
call_command(
|
call_command(
|
||||||
"document_index",
|
"document_index",
|
||||||
@@ -348,11 +349,12 @@ class Command(CryptMixin, BaseCommand):
|
|||||||
) from e
|
) from e
|
||||||
|
|
||||||
self.stdout.write("Checking the manifest")
|
self.stdout.write("Checking the manifest")
|
||||||
for record in self.manifest:
|
for manifest_path in self.manifest_paths:
|
||||||
# Only check if the document files exist if this is not data only
|
for record in iter_manifest_records(manifest_path):
|
||||||
# We don't care about documents for a data only import
|
# Only check if the document files exist if this is not data only
|
||||||
if not self.data_only and record["model"] == "documents.document":
|
# We don't care about documents for a data only import
|
||||||
check_document_validity(record)
|
if not self.data_only and record["model"] == "documents.document":
|
||||||
|
check_document_validity(record)
|
||||||
|
|
||||||
def _import_files_from_manifest(self) -> None:
|
def _import_files_from_manifest(self) -> None:
|
||||||
settings.ORIGINALS_DIR.mkdir(parents=True, exist_ok=True)
|
settings.ORIGINALS_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
@@ -361,23 +363,31 @@ class Command(CryptMixin, BaseCommand):
|
|||||||
|
|
||||||
self.stdout.write("Copy files into paperless...")
|
self.stdout.write("Copy files into paperless...")
|
||||||
|
|
||||||
manifest_documents = list(
|
document_records = [
|
||||||
filter(lambda r: r["model"] == "documents.document", self.manifest),
|
{
|
||||||
)
|
"pk": record["pk"],
|
||||||
|
EXPORTER_FILE_NAME: record[EXPORTER_FILE_NAME],
|
||||||
|
EXPORTER_THUMBNAIL_NAME: record.get(EXPORTER_THUMBNAIL_NAME),
|
||||||
|
EXPORTER_ARCHIVE_NAME: record.get(EXPORTER_ARCHIVE_NAME),
|
||||||
|
}
|
||||||
|
for manifest_path in self.manifest_paths
|
||||||
|
for record in iter_manifest_records(manifest_path)
|
||||||
|
if record["model"] == "documents.document"
|
||||||
|
]
|
||||||
|
|
||||||
for record in tqdm.tqdm(manifest_documents, disable=self.no_progress_bar):
|
for record in self.track(document_records, description="Copying files..."):
|
||||||
document = Document.objects.get(pk=record["pk"])
|
document = Document.objects.get(pk=record["pk"])
|
||||||
|
|
||||||
doc_file = record[EXPORTER_FILE_NAME]
|
doc_file = record[EXPORTER_FILE_NAME]
|
||||||
document_path = self.source / doc_file
|
document_path = self.source / doc_file
|
||||||
|
|
||||||
if EXPORTER_THUMBNAIL_NAME in record:
|
if record[EXPORTER_THUMBNAIL_NAME]:
|
||||||
thumb_file = record[EXPORTER_THUMBNAIL_NAME]
|
thumb_file = record[EXPORTER_THUMBNAIL_NAME]
|
||||||
thumbnail_path = (self.source / thumb_file).resolve()
|
thumbnail_path = (self.source / thumb_file).resolve()
|
||||||
else:
|
else:
|
||||||
thumbnail_path = None
|
thumbnail_path = None
|
||||||
|
|
||||||
if EXPORTER_ARCHIVE_NAME in record:
|
if record[EXPORTER_ARCHIVE_NAME]:
|
||||||
archive_file = record[EXPORTER_ARCHIVE_NAME]
|
archive_file = record[EXPORTER_ARCHIVE_NAME]
|
||||||
archive_path = self.source / archive_file
|
archive_path = self.source / archive_file
|
||||||
else:
|
else:
|
||||||
@@ -392,22 +402,10 @@ class Command(CryptMixin, BaseCommand):
|
|||||||
copy_file_with_basic_stats(document_path, document.source_path)
|
copy_file_with_basic_stats(document_path, document.source_path)
|
||||||
|
|
||||||
if thumbnail_path:
|
if thumbnail_path:
|
||||||
if thumbnail_path.suffix in {".png", ".PNG"}:
|
copy_file_with_basic_stats(
|
||||||
run_convert(
|
thumbnail_path,
|
||||||
density=300,
|
document.thumbnail_path,
|
||||||
scale="500x5000>",
|
)
|
||||||
alpha="remove",
|
|
||||||
strip=True,
|
|
||||||
trim=False,
|
|
||||||
auto_orient=True,
|
|
||||||
input_file=f"{thumbnail_path}[0]",
|
|
||||||
output_file=str(document.thumbnail_path),
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
copy_file_with_basic_stats(
|
|
||||||
thumbnail_path,
|
|
||||||
document.thumbnail_path,
|
|
||||||
)
|
|
||||||
|
|
||||||
if archive_path:
|
if archive_path:
|
||||||
create_source_path_directory(document.archive_path)
|
create_source_path_directory(document.archive_path)
|
||||||
@@ -418,33 +416,43 @@ class Command(CryptMixin, BaseCommand):
|
|||||||
|
|
||||||
document.save()
|
document.save()
|
||||||
|
|
||||||
|
def _decrypt_record_if_needed(self, record: dict) -> dict:
|
||||||
|
fields = self.CRYPT_FIELDS_BY_MODEL.get(record.get("model", ""))
|
||||||
|
if fields:
|
||||||
|
for field in fields:
|
||||||
|
if record["fields"].get(field):
|
||||||
|
record["fields"][field] = self.decrypt_string(
|
||||||
|
value=record["fields"][field],
|
||||||
|
)
|
||||||
|
return record
|
||||||
|
|
||||||
def decrypt_secret_fields(self) -> None:
|
def decrypt_secret_fields(self) -> None:
|
||||||
"""
|
"""
|
||||||
The converse decryption of some fields out of the export before importing to database
|
The converse decryption of some fields out of the export before importing to database.
|
||||||
|
Streams records from each manifest path and writes decrypted content to a temp file.
|
||||||
"""
|
"""
|
||||||
if self.passphrase:
|
if not self.passphrase:
|
||||||
# Salt has been loaded from metadata.json at this point, so it cannot be None
|
return
|
||||||
self.setup_crypto(passphrase=self.passphrase, salt=self.salt)
|
# Salt has been loaded from metadata.json at this point, so it cannot be None
|
||||||
|
self.setup_crypto(passphrase=self.passphrase, salt=self.salt)
|
||||||
had_at_least_one_record = False
|
self._decrypted_tmp_paths: list[Path] = []
|
||||||
|
new_paths: list[Path] = []
|
||||||
for crypt_config in self.CRYPT_FIELDS:
|
for manifest_path in self.manifest_paths:
|
||||||
importer_model: str = crypt_config["model_name"]
|
tmp = manifest_path.with_name(manifest_path.stem + ".decrypted.json")
|
||||||
crypt_fields: str = crypt_config["fields"]
|
with tmp.open("w", encoding="utf-8") as out:
|
||||||
for record in filter(
|
out.write("[\n")
|
||||||
lambda x: x["model"] == importer_model,
|
first = True
|
||||||
self.manifest,
|
for record in iter_manifest_records(manifest_path):
|
||||||
):
|
if not first:
|
||||||
had_at_least_one_record = True
|
out.write(",\n")
|
||||||
for field in crypt_fields:
|
json.dump(
|
||||||
if record["fields"][field]:
|
self._decrypt_record_if_needed(record),
|
||||||
record["fields"][field] = self.decrypt_string(
|
out,
|
||||||
value=record["fields"][field],
|
indent=2,
|
||||||
)
|
ensure_ascii=False,
|
||||||
|
)
|
||||||
if had_at_least_one_record:
|
first = False
|
||||||
# It's annoying, but the DB is loaded from the JSON directly
|
out.write("\n]\n")
|
||||||
# Maybe could change that in the future?
|
self._decrypted_tmp_paths.append(tmp)
|
||||||
(self.source / "manifest.json").write_text(
|
new_paths.append(tmp)
|
||||||
json.dumps(self.manifest, indent=2, ensure_ascii=False),
|
self.manifest_paths = new_paths
|
||||||
)
|
|
||||||
|
|||||||
@@ -8,6 +8,9 @@ from documents.tasks import index_reindex
|
|||||||
class Command(PaperlessCommand):
|
class Command(PaperlessCommand):
|
||||||
help = "Manages the document index."
|
help = "Manages the document index."
|
||||||
|
|
||||||
|
supports_progress_bar = True
|
||||||
|
supports_multiprocessing = False
|
||||||
|
|
||||||
def add_arguments(self, parser):
|
def add_arguments(self, parser):
|
||||||
super().add_arguments(parser)
|
super().add_arguments(parser)
|
||||||
parser.add_argument("command", choices=["reindex", "optimize"])
|
parser.add_argument("command", choices=["reindex", "optimize"])
|
||||||
|
|||||||
@@ -7,6 +7,9 @@ from documents.tasks import llmindex_index
|
|||||||
class Command(PaperlessCommand):
|
class Command(PaperlessCommand):
|
||||||
help = "Manages the LLM-based vector index for Paperless."
|
help = "Manages the LLM-based vector index for Paperless."
|
||||||
|
|
||||||
|
supports_progress_bar = True
|
||||||
|
supports_multiprocessing = False
|
||||||
|
|
||||||
def add_arguments(self, parser: Any) -> None:
|
def add_arguments(self, parser: Any) -> None:
|
||||||
super().add_arguments(parser)
|
super().add_arguments(parser)
|
||||||
parser.add_argument("command", choices=["rebuild", "update"])
|
parser.add_argument("command", choices=["rebuild", "update"])
|
||||||
|
|||||||
@@ -7,6 +7,9 @@ from documents.models import Document
|
|||||||
class Command(PaperlessCommand):
|
class Command(PaperlessCommand):
|
||||||
help = "Rename all documents"
|
help = "Rename all documents"
|
||||||
|
|
||||||
|
supports_progress_bar = True
|
||||||
|
supports_multiprocessing = False
|
||||||
|
|
||||||
def handle(self, *args, **options):
|
def handle(self, *args, **options):
|
||||||
for document in self.track(Document.objects.all(), description="Renaming..."):
|
for document in self.track(Document.objects.all(), description="Renaming..."):
|
||||||
post_save.send(Document, instance=document, created=False)
|
post_save.send(Document, instance=document, created=False)
|
||||||
|
|||||||
@@ -180,6 +180,9 @@ class Command(PaperlessCommand):
|
|||||||
"modified) after their initial import."
|
"modified) after their initial import."
|
||||||
)
|
)
|
||||||
|
|
||||||
|
supports_progress_bar = True
|
||||||
|
supports_multiprocessing = False
|
||||||
|
|
||||||
def add_arguments(self, parser) -> None:
|
def add_arguments(self, parser) -> None:
|
||||||
super().add_arguments(parser)
|
super().add_arguments(parser)
|
||||||
parser.add_argument("-c", "--correspondent", default=False, action="store_true")
|
parser.add_argument("-c", "--correspondent", default=False, action="store_true")
|
||||||
|
|||||||
@@ -24,6 +24,9 @@ _LEVEL_STYLE: dict[int, tuple[str, str]] = {
|
|||||||
class Command(PaperlessCommand):
|
class Command(PaperlessCommand):
|
||||||
help = "This command checks your document archive for issues."
|
help = "This command checks your document archive for issues."
|
||||||
|
|
||||||
|
supports_progress_bar = True
|
||||||
|
supports_multiprocessing = False
|
||||||
|
|
||||||
def _render_results(self, messages: SanityCheckMessages) -> None:
|
def _render_results(self, messages: SanityCheckMessages) -> None:
|
||||||
"""Render sanity check results as a Rich table."""
|
"""Render sanity check results as a Rich table."""
|
||||||
|
|
||||||
|
|||||||
@@ -30,12 +30,14 @@ def _process_document(doc_id: int) -> None:
|
|||||||
)
|
)
|
||||||
shutil.move(thumb, document.thumbnail_path)
|
shutil.move(thumb, document.thumbnail_path)
|
||||||
finally:
|
finally:
|
||||||
|
# TODO(stumpylog): Cleanup once all parsers are handled
|
||||||
parser.cleanup()
|
parser.cleanup()
|
||||||
|
|
||||||
|
|
||||||
class Command(PaperlessCommand):
|
class Command(PaperlessCommand):
|
||||||
help = "This will regenerate the thumbnails for all documents."
|
help = "This will regenerate the thumbnails for all documents."
|
||||||
|
|
||||||
|
supports_progress_bar = True
|
||||||
supports_multiprocessing = True
|
supports_multiprocessing = True
|
||||||
|
|
||||||
def add_arguments(self, parser) -> None:
|
def add_arguments(self, parser) -> None:
|
||||||
|
|||||||
@@ -1,22 +0,0 @@
|
|||||||
import sys
|
|
||||||
|
|
||||||
from django.core.management.commands.loaddata import Command as LoadDataCommand
|
|
||||||
|
|
||||||
|
|
||||||
# This class is used to migrate data between databases
|
|
||||||
# That's difficult to test
|
|
||||||
class Command(LoadDataCommand): # pragma: no cover
|
|
||||||
"""
|
|
||||||
Allow the loading of data from standard in. Sourced originally from:
|
|
||||||
https://gist.github.com/bmispelon/ad5a2c333443b3a1d051 (MIT licensed)
|
|
||||||
"""
|
|
||||||
|
|
||||||
def parse_name(self, fixture_name):
|
|
||||||
self.compression_formats["stdin"] = (lambda x, y: sys.stdin, None)
|
|
||||||
if fixture_name == "-":
|
|
||||||
return "-", "json", "stdin"
|
|
||||||
|
|
||||||
def find_fixtures(self, fixture_label):
|
|
||||||
if fixture_label == "-":
|
|
||||||
return [("-", None, "-")]
|
|
||||||
return super().find_fixtures(fixture_label)
|
|
||||||
@@ -1,6 +1,5 @@
|
|||||||
import base64
|
import base64
|
||||||
import os
|
import os
|
||||||
from argparse import ArgumentParser
|
|
||||||
from typing import TypedDict
|
from typing import TypedDict
|
||||||
|
|
||||||
from cryptography.fernet import Fernet
|
from cryptography.fernet import Fernet
|
||||||
@@ -21,25 +20,6 @@ class CryptFields(TypedDict):
|
|||||||
fields: list[str]
|
fields: list[str]
|
||||||
|
|
||||||
|
|
||||||
class ProgressBarMixin:
|
|
||||||
"""
|
|
||||||
Many commands use a progress bar, which can be disabled
|
|
||||||
via this class
|
|
||||||
"""
|
|
||||||
|
|
||||||
def add_argument_progress_bar_mixin(self, parser: ArgumentParser) -> None:
|
|
||||||
parser.add_argument(
|
|
||||||
"--no-progress-bar",
|
|
||||||
default=False,
|
|
||||||
action="store_true",
|
|
||||||
help="If set, the progress bar will not be shown",
|
|
||||||
)
|
|
||||||
|
|
||||||
def handle_progress_bar_mixin(self, *args, **options) -> None:
|
|
||||||
self.no_progress_bar = options["no_progress_bar"]
|
|
||||||
self.use_progress_bar = not self.no_progress_bar
|
|
||||||
|
|
||||||
|
|
||||||
class CryptMixin:
|
class CryptMixin:
|
||||||
"""
|
"""
|
||||||
Fully based on:
|
Fully based on:
|
||||||
@@ -71,7 +51,7 @@ class CryptMixin:
|
|||||||
key_size = 32
|
key_size = 32
|
||||||
kdf_algorithm = "pbkdf2_sha256"
|
kdf_algorithm = "pbkdf2_sha256"
|
||||||
|
|
||||||
CRYPT_FIELDS: CryptFields = [
|
CRYPT_FIELDS: list[CryptFields] = [
|
||||||
{
|
{
|
||||||
"exporter_key": "mail_accounts",
|
"exporter_key": "mail_accounts",
|
||||||
"model_name": "paperless_mail.mailaccount",
|
"model_name": "paperless_mail.mailaccount",
|
||||||
@@ -89,6 +69,10 @@ class CryptMixin:
|
|||||||
],
|
],
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
# O(1) lookup for per-record encryption; derived from CRYPT_FIELDS at class definition time
|
||||||
|
CRYPT_FIELDS_BY_MODEL: dict[str, list[str]] = {
|
||||||
|
cfg["model_name"]: cfg["fields"] for cfg in CRYPT_FIELDS
|
||||||
|
}
|
||||||
|
|
||||||
def get_crypt_params(self) -> dict[str, dict[str, str | int]]:
|
def get_crypt_params(self) -> dict[str, dict[str, str | int]]:
|
||||||
return {
|
return {
|
||||||
|
|||||||
@@ -9,6 +9,9 @@ class Command(PaperlessCommand):
|
|||||||
|
|
||||||
help = "Prunes the audit logs of objects that no longer exist."
|
help = "Prunes the audit logs of objects that no longer exist."
|
||||||
|
|
||||||
|
supports_progress_bar = True
|
||||||
|
supports_multiprocessing = False
|
||||||
|
|
||||||
def handle(self, *args, **options):
|
def handle(self, *args, **options):
|
||||||
with transaction.atomic():
|
with transaction.atomic():
|
||||||
for log_entry in self.track(
|
for log_entry in self.track(
|
||||||
|
|||||||
@@ -703,15 +703,6 @@ class StoragePathField(serializers.PrimaryKeyRelatedField):
|
|||||||
|
|
||||||
|
|
||||||
class CustomFieldSerializer(serializers.ModelSerializer):
|
class CustomFieldSerializer(serializers.ModelSerializer):
|
||||||
def __init__(self, *args, **kwargs):
|
|
||||||
context = kwargs.get("context")
|
|
||||||
self.api_version = int(
|
|
||||||
context.get("request").version
|
|
||||||
if context and context.get("request")
|
|
||||||
else settings.REST_FRAMEWORK["DEFAULT_VERSION"],
|
|
||||||
)
|
|
||||||
super().__init__(*args, **kwargs)
|
|
||||||
|
|
||||||
data_type = serializers.ChoiceField(
|
data_type = serializers.ChoiceField(
|
||||||
choices=CustomField.FieldDataType,
|
choices=CustomField.FieldDataType,
|
||||||
read_only=False,
|
read_only=False,
|
||||||
@@ -791,38 +782,6 @@ class CustomFieldSerializer(serializers.ModelSerializer):
|
|||||||
)
|
)
|
||||||
return super().validate(attrs)
|
return super().validate(attrs)
|
||||||
|
|
||||||
def to_internal_value(self, data):
|
|
||||||
ret = super().to_internal_value(data)
|
|
||||||
|
|
||||||
if (
|
|
||||||
self.api_version < 7
|
|
||||||
and ret.get("data_type", "") == CustomField.FieldDataType.SELECT
|
|
||||||
and isinstance(ret.get("extra_data", {}).get("select_options"), list)
|
|
||||||
):
|
|
||||||
ret["extra_data"]["select_options"] = [
|
|
||||||
{
|
|
||||||
"label": option,
|
|
||||||
"id": get_random_string(length=16),
|
|
||||||
}
|
|
||||||
for option in ret["extra_data"]["select_options"]
|
|
||||||
]
|
|
||||||
|
|
||||||
return ret
|
|
||||||
|
|
||||||
def to_representation(self, instance):
|
|
||||||
ret = super().to_representation(instance)
|
|
||||||
|
|
||||||
if (
|
|
||||||
self.api_version < 7
|
|
||||||
and instance.data_type == CustomField.FieldDataType.SELECT
|
|
||||||
):
|
|
||||||
# Convert the select options with ids to a list of strings
|
|
||||||
ret["extra_data"]["select_options"] = [
|
|
||||||
option["label"] for option in ret["extra_data"]["select_options"]
|
|
||||||
]
|
|
||||||
|
|
||||||
return ret
|
|
||||||
|
|
||||||
|
|
||||||
class ReadWriteSerializerMethodField(serializers.SerializerMethodField):
|
class ReadWriteSerializerMethodField(serializers.SerializerMethodField):
|
||||||
"""
|
"""
|
||||||
@@ -937,50 +896,6 @@ class CustomFieldInstanceSerializer(serializers.ModelSerializer):
|
|||||||
|
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def get_api_version(self):
|
|
||||||
return int(
|
|
||||||
self.context.get("request").version
|
|
||||||
if self.context.get("request")
|
|
||||||
else settings.REST_FRAMEWORK["DEFAULT_VERSION"],
|
|
||||||
)
|
|
||||||
|
|
||||||
def to_internal_value(self, data):
|
|
||||||
ret = super().to_internal_value(data)
|
|
||||||
|
|
||||||
if (
|
|
||||||
self.get_api_version() < 7
|
|
||||||
and ret.get("field").data_type == CustomField.FieldDataType.SELECT
|
|
||||||
and ret.get("value") is not None
|
|
||||||
):
|
|
||||||
# Convert the index of the option in the field.extra_data["select_options"]
|
|
||||||
# list to the options unique id
|
|
||||||
ret["value"] = ret.get("field").extra_data["select_options"][ret["value"]][
|
|
||||||
"id"
|
|
||||||
]
|
|
||||||
|
|
||||||
return ret
|
|
||||||
|
|
||||||
def to_representation(self, instance):
|
|
||||||
ret = super().to_representation(instance)
|
|
||||||
|
|
||||||
if (
|
|
||||||
self.get_api_version() < 7
|
|
||||||
and instance.field.data_type == CustomField.FieldDataType.SELECT
|
|
||||||
):
|
|
||||||
# return the index of the option in the field.extra_data["select_options"] list
|
|
||||||
ret["value"] = next(
|
|
||||||
(
|
|
||||||
idx
|
|
||||||
for idx, option in enumerate(
|
|
||||||
instance.field.extra_data["select_options"],
|
|
||||||
)
|
|
||||||
if option["id"] == instance.value
|
|
||||||
),
|
|
||||||
None,
|
|
||||||
)
|
|
||||||
|
|
||||||
return ret
|
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
model = CustomFieldInstance
|
model = CustomFieldInstance
|
||||||
fields = [
|
fields = [
|
||||||
@@ -1004,20 +919,6 @@ class NotesSerializer(serializers.ModelSerializer):
|
|||||||
fields = ["id", "note", "created", "user"]
|
fields = ["id", "note", "created", "user"]
|
||||||
ordering = ["-created"]
|
ordering = ["-created"]
|
||||||
|
|
||||||
def to_representation(self, instance):
|
|
||||||
ret = super().to_representation(instance)
|
|
||||||
|
|
||||||
request = self.context.get("request")
|
|
||||||
api_version = int(
|
|
||||||
request.version if request else settings.REST_FRAMEWORK["DEFAULT_VERSION"],
|
|
||||||
)
|
|
||||||
|
|
||||||
if api_version < 8 and "user" in ret:
|
|
||||||
user_id = ret["user"]["id"]
|
|
||||||
ret["user"] = user_id
|
|
||||||
|
|
||||||
return ret
|
|
||||||
|
|
||||||
|
|
||||||
def _get_viewable_duplicates(
|
def _get_viewable_duplicates(
|
||||||
document: Document,
|
document: Document,
|
||||||
@@ -1172,22 +1073,6 @@ class DocumentSerializer(
|
|||||||
doc["content"] = getattr(instance, "effective_content") or ""
|
doc["content"] = getattr(instance, "effective_content") or ""
|
||||||
if self.truncate_content and "content" in self.fields:
|
if self.truncate_content and "content" in self.fields:
|
||||||
doc["content"] = doc.get("content")[0:550]
|
doc["content"] = doc.get("content")[0:550]
|
||||||
|
|
||||||
request = self.context.get("request")
|
|
||||||
api_version = int(
|
|
||||||
request.version if request else settings.REST_FRAMEWORK["DEFAULT_VERSION"],
|
|
||||||
)
|
|
||||||
|
|
||||||
if api_version < 9 and "created" in self.fields:
|
|
||||||
# provide created as a datetime for backwards compatibility
|
|
||||||
from django.utils import timezone
|
|
||||||
|
|
||||||
doc["created"] = timezone.make_aware(
|
|
||||||
datetime.combine(
|
|
||||||
instance.created,
|
|
||||||
datetime.min.time(),
|
|
||||||
),
|
|
||||||
).isoformat()
|
|
||||||
return doc
|
return doc
|
||||||
|
|
||||||
def to_internal_value(self, data):
|
def to_internal_value(self, data):
|
||||||
@@ -1440,6 +1325,124 @@ class SavedViewSerializer(OwnedObjectSerializer):
|
|||||||
"set_permissions",
|
"set_permissions",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
def _get_api_version(self) -> int:
|
||||||
|
request = self.context.get("request")
|
||||||
|
return int(
|
||||||
|
request.version if request else settings.REST_FRAMEWORK["DEFAULT_VERSION"],
|
||||||
|
)
|
||||||
|
|
||||||
|
def _update_legacy_visibility_preferences(
|
||||||
|
self,
|
||||||
|
saved_view_id: int,
|
||||||
|
*,
|
||||||
|
show_on_dashboard: bool | None,
|
||||||
|
show_in_sidebar: bool | None,
|
||||||
|
) -> UiSettings | None:
|
||||||
|
if show_on_dashboard is None and show_in_sidebar is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
request = self.context.get("request")
|
||||||
|
user = request.user if request else self.user
|
||||||
|
if user is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
ui_settings, _ = UiSettings.objects.get_or_create(
|
||||||
|
user=user,
|
||||||
|
defaults={"settings": {}},
|
||||||
|
)
|
||||||
|
current_settings = (
|
||||||
|
ui_settings.settings if isinstance(ui_settings.settings, dict) else {}
|
||||||
|
)
|
||||||
|
current_settings = dict(current_settings)
|
||||||
|
|
||||||
|
saved_views_settings = current_settings.get("saved_views")
|
||||||
|
if isinstance(saved_views_settings, dict):
|
||||||
|
saved_views_settings = dict(saved_views_settings)
|
||||||
|
else:
|
||||||
|
saved_views_settings = {}
|
||||||
|
|
||||||
|
dashboard_ids = {
|
||||||
|
int(raw_id)
|
||||||
|
for raw_id in saved_views_settings.get("dashboard_views_visible_ids", [])
|
||||||
|
if str(raw_id).isdigit()
|
||||||
|
}
|
||||||
|
sidebar_ids = {
|
||||||
|
int(raw_id)
|
||||||
|
for raw_id in saved_views_settings.get("sidebar_views_visible_ids", [])
|
||||||
|
if str(raw_id).isdigit()
|
||||||
|
}
|
||||||
|
|
||||||
|
if show_on_dashboard is not None:
|
||||||
|
if show_on_dashboard:
|
||||||
|
dashboard_ids.add(saved_view_id)
|
||||||
|
else:
|
||||||
|
dashboard_ids.discard(saved_view_id)
|
||||||
|
if show_in_sidebar is not None:
|
||||||
|
if show_in_sidebar:
|
||||||
|
sidebar_ids.add(saved_view_id)
|
||||||
|
else:
|
||||||
|
sidebar_ids.discard(saved_view_id)
|
||||||
|
|
||||||
|
saved_views_settings["dashboard_views_visible_ids"] = sorted(dashboard_ids)
|
||||||
|
saved_views_settings["sidebar_views_visible_ids"] = sorted(sidebar_ids)
|
||||||
|
current_settings["saved_views"] = saved_views_settings
|
||||||
|
ui_settings.settings = current_settings
|
||||||
|
ui_settings.save(update_fields=["settings"])
|
||||||
|
return ui_settings
|
||||||
|
|
||||||
|
def to_representation(self, instance):
|
||||||
|
# TODO: remove this and related backwards compatibility code when API v9 is dropped
|
||||||
|
ret = super().to_representation(instance)
|
||||||
|
request = self.context.get("request")
|
||||||
|
api_version = self._get_api_version()
|
||||||
|
|
||||||
|
if api_version < 10:
|
||||||
|
dashboard_ids = set()
|
||||||
|
sidebar_ids = set()
|
||||||
|
user = request.user if request else None
|
||||||
|
if user is not None and hasattr(user, "ui_settings"):
|
||||||
|
ui_settings = user.ui_settings.settings or None
|
||||||
|
saved_views = None
|
||||||
|
if isinstance(ui_settings, dict):
|
||||||
|
saved_views = ui_settings.get("saved_views", {})
|
||||||
|
if isinstance(saved_views, dict):
|
||||||
|
dashboard_ids = set(
|
||||||
|
saved_views.get("dashboard_views_visible_ids", []),
|
||||||
|
)
|
||||||
|
sidebar_ids = set(
|
||||||
|
saved_views.get("sidebar_views_visible_ids", []),
|
||||||
|
)
|
||||||
|
ret["show_on_dashboard"] = instance.id in dashboard_ids
|
||||||
|
ret["show_in_sidebar"] = instance.id in sidebar_ids
|
||||||
|
|
||||||
|
return ret
|
||||||
|
|
||||||
|
def to_internal_value(self, data):
|
||||||
|
# TODO: remove this and related backwards compatibility code when API v9 is dropped
|
||||||
|
api_version = self._get_api_version()
|
||||||
|
if api_version >= 10:
|
||||||
|
return super().to_internal_value(data)
|
||||||
|
|
||||||
|
normalized_data = data.copy()
|
||||||
|
legacy_visibility_fields = {}
|
||||||
|
boolean_field = serializers.BooleanField()
|
||||||
|
|
||||||
|
for field_name in ("show_on_dashboard", "show_in_sidebar"):
|
||||||
|
if field_name in normalized_data:
|
||||||
|
try:
|
||||||
|
legacy_visibility_fields[field_name] = (
|
||||||
|
boolean_field.to_internal_value(
|
||||||
|
normalized_data.get(field_name),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
except serializers.ValidationError as exc:
|
||||||
|
raise serializers.ValidationError({field_name: exc.detail})
|
||||||
|
del normalized_data[field_name]
|
||||||
|
|
||||||
|
ret = super().to_internal_value(normalized_data)
|
||||||
|
ret.update(legacy_visibility_fields)
|
||||||
|
return ret
|
||||||
|
|
||||||
def validate(self, attrs):
|
def validate(self, attrs):
|
||||||
attrs = super().validate(attrs)
|
attrs = super().validate(attrs)
|
||||||
if "display_fields" in attrs and attrs["display_fields"] is not None:
|
if "display_fields" in attrs and attrs["display_fields"] is not None:
|
||||||
@@ -1459,6 +1462,9 @@ class SavedViewSerializer(OwnedObjectSerializer):
|
|||||||
return attrs
|
return attrs
|
||||||
|
|
||||||
def update(self, instance, validated_data):
|
def update(self, instance, validated_data):
|
||||||
|
request = self.context.get("request")
|
||||||
|
show_on_dashboard = validated_data.pop("show_on_dashboard", None)
|
||||||
|
show_in_sidebar = validated_data.pop("show_in_sidebar", None)
|
||||||
if "filter_rules" in validated_data:
|
if "filter_rules" in validated_data:
|
||||||
rules_data = validated_data.pop("filter_rules")
|
rules_data = validated_data.pop("filter_rules")
|
||||||
else:
|
else:
|
||||||
@@ -1480,9 +1486,19 @@ class SavedViewSerializer(OwnedObjectSerializer):
|
|||||||
SavedViewFilterRule.objects.filter(saved_view=instance).delete()
|
SavedViewFilterRule.objects.filter(saved_view=instance).delete()
|
||||||
for rule_data in rules_data:
|
for rule_data in rules_data:
|
||||||
SavedViewFilterRule.objects.create(saved_view=instance, **rule_data)
|
SavedViewFilterRule.objects.create(saved_view=instance, **rule_data)
|
||||||
|
ui_settings = self._update_legacy_visibility_preferences(
|
||||||
|
instance.id,
|
||||||
|
show_on_dashboard=show_on_dashboard,
|
||||||
|
show_in_sidebar=show_in_sidebar,
|
||||||
|
)
|
||||||
|
if request is not None and ui_settings is not None:
|
||||||
|
request.user.ui_settings = ui_settings
|
||||||
return instance
|
return instance
|
||||||
|
|
||||||
def create(self, validated_data):
|
def create(self, validated_data):
|
||||||
|
request = self.context.get("request")
|
||||||
|
show_on_dashboard = validated_data.pop("show_on_dashboard", None)
|
||||||
|
show_in_sidebar = validated_data.pop("show_in_sidebar", None)
|
||||||
rules_data = validated_data.pop("filter_rules")
|
rules_data = validated_data.pop("filter_rules")
|
||||||
if "user" in validated_data:
|
if "user" in validated_data:
|
||||||
# backwards compatibility
|
# backwards compatibility
|
||||||
@@ -1490,6 +1506,13 @@ class SavedViewSerializer(OwnedObjectSerializer):
|
|||||||
saved_view = super().create(validated_data)
|
saved_view = super().create(validated_data)
|
||||||
for rule_data in rules_data:
|
for rule_data in rules_data:
|
||||||
SavedViewFilterRule.objects.create(saved_view=saved_view, **rule_data)
|
SavedViewFilterRule.objects.create(saved_view=saved_view, **rule_data)
|
||||||
|
ui_settings = self._update_legacy_visibility_preferences(
|
||||||
|
saved_view.id,
|
||||||
|
show_on_dashboard=show_on_dashboard,
|
||||||
|
show_in_sidebar=show_in_sidebar,
|
||||||
|
)
|
||||||
|
if request is not None and ui_settings is not None:
|
||||||
|
request.user.ui_settings = ui_settings
|
||||||
return saved_view
|
return saved_view
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -399,6 +399,7 @@ def update_document_content_maybe_archive_file(document_id) -> None:
|
|||||||
f"Error while parsing document {document} (ID: {document_id})",
|
f"Error while parsing document {document} (ID: {document_id})",
|
||||||
)
|
)
|
||||||
finally:
|
finally:
|
||||||
|
# TODO(stumpylog): Cleanup once all parsers are handled
|
||||||
parser.cleanup()
|
parser.cleanup()
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -323,113 +323,6 @@ class TestCustomFieldsAPI(DirectoriesMixin, APITestCase):
|
|||||||
|
|
||||||
mock_delay.assert_called_once_with(cf_select)
|
mock_delay.assert_called_once_with(cf_select)
|
||||||
|
|
||||||
def test_custom_field_select_old_version(self) -> None:
|
|
||||||
"""
|
|
||||||
GIVEN:
|
|
||||||
- Nothing
|
|
||||||
WHEN:
|
|
||||||
- API post request is made for custom fields with api version header < 7
|
|
||||||
- API get request is made for custom fields with api version header < 7
|
|
||||||
THEN:
|
|
||||||
- The select options are created with unique ids
|
|
||||||
- The select options are returned in the old format
|
|
||||||
"""
|
|
||||||
resp = self.client.post(
|
|
||||||
self.ENDPOINT,
|
|
||||||
headers={"Accept": "application/json; version=6"},
|
|
||||||
data=json.dumps(
|
|
||||||
{
|
|
||||||
"data_type": "select",
|
|
||||||
"name": "Select Field",
|
|
||||||
"extra_data": {
|
|
||||||
"select_options": [
|
|
||||||
"Option 1",
|
|
||||||
"Option 2",
|
|
||||||
],
|
|
||||||
},
|
|
||||||
},
|
|
||||||
),
|
|
||||||
content_type="application/json",
|
|
||||||
)
|
|
||||||
self.assertEqual(resp.status_code, status.HTTP_201_CREATED)
|
|
||||||
|
|
||||||
field = CustomField.objects.get(name="Select Field")
|
|
||||||
self.assertEqual(
|
|
||||||
field.extra_data["select_options"],
|
|
||||||
[
|
|
||||||
{"label": "Option 1", "id": ANY},
|
|
||||||
{"label": "Option 2", "id": ANY},
|
|
||||||
],
|
|
||||||
)
|
|
||||||
|
|
||||||
resp = self.client.get(
|
|
||||||
f"{self.ENDPOINT}{field.id}/",
|
|
||||||
headers={"Accept": "application/json; version=6"},
|
|
||||||
)
|
|
||||||
self.assertEqual(resp.status_code, status.HTTP_200_OK)
|
|
||||||
|
|
||||||
data = resp.json()
|
|
||||||
self.assertEqual(
|
|
||||||
data["extra_data"]["select_options"],
|
|
||||||
[
|
|
||||||
"Option 1",
|
|
||||||
"Option 2",
|
|
||||||
],
|
|
||||||
)
|
|
||||||
|
|
||||||
def test_custom_field_select_value_old_version(self) -> None:
|
|
||||||
"""
|
|
||||||
GIVEN:
|
|
||||||
- Existing document with custom field select
|
|
||||||
WHEN:
|
|
||||||
- API post request is made to add the field for document with api version header < 7
|
|
||||||
- API get request is made for document with api version header < 7
|
|
||||||
THEN:
|
|
||||||
- The select value is returned in the old format, the index of the option
|
|
||||||
"""
|
|
||||||
custom_field_select = CustomField.objects.create(
|
|
||||||
name="Select Field",
|
|
||||||
data_type=CustomField.FieldDataType.SELECT,
|
|
||||||
extra_data={
|
|
||||||
"select_options": [
|
|
||||||
{"label": "Option 1", "id": "abc-123"},
|
|
||||||
{"label": "Option 2", "id": "def-456"},
|
|
||||||
],
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
doc = Document.objects.create(
|
|
||||||
title="WOW",
|
|
||||||
content="the content",
|
|
||||||
checksum="123",
|
|
||||||
mime_type="application/pdf",
|
|
||||||
)
|
|
||||||
|
|
||||||
resp = self.client.patch(
|
|
||||||
f"/api/documents/{doc.id}/",
|
|
||||||
headers={"Accept": "application/json; version=6"},
|
|
||||||
data=json.dumps(
|
|
||||||
{
|
|
||||||
"custom_fields": [
|
|
||||||
{"field": custom_field_select.id, "value": 1},
|
|
||||||
],
|
|
||||||
},
|
|
||||||
),
|
|
||||||
content_type="application/json",
|
|
||||||
)
|
|
||||||
self.assertEqual(resp.status_code, status.HTTP_200_OK)
|
|
||||||
doc.refresh_from_db()
|
|
||||||
self.assertEqual(doc.custom_fields.first().value, "def-456")
|
|
||||||
|
|
||||||
resp = self.client.get(
|
|
||||||
f"/api/documents/{doc.id}/",
|
|
||||||
headers={"Accept": "application/json; version=6"},
|
|
||||||
)
|
|
||||||
self.assertEqual(resp.status_code, status.HTTP_200_OK)
|
|
||||||
|
|
||||||
data = resp.json()
|
|
||||||
self.assertEqual(data["custom_fields"][0]["value"], 1)
|
|
||||||
|
|
||||||
def test_create_custom_field_monetary_validation(self) -> None:
|
def test_create_custom_field_monetary_validation(self) -> None:
|
||||||
"""
|
"""
|
||||||
GIVEN:
|
GIVEN:
|
||||||
|
|||||||
@@ -41,6 +41,7 @@ from documents.models import SavedView
|
|||||||
from documents.models import ShareLink
|
from documents.models import ShareLink
|
||||||
from documents.models import StoragePath
|
from documents.models import StoragePath
|
||||||
from documents.models import Tag
|
from documents.models import Tag
|
||||||
|
from documents.models import UiSettings
|
||||||
from documents.models import Workflow
|
from documents.models import Workflow
|
||||||
from documents.models import WorkflowAction
|
from documents.models import WorkflowAction
|
||||||
from documents.models import WorkflowTrigger
|
from documents.models import WorkflowTrigger
|
||||||
@@ -176,7 +177,7 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
|
|||||||
results = response.data["results"]
|
results = response.data["results"]
|
||||||
self.assertEqual(len(results[0]), 0)
|
self.assertEqual(len(results[0]), 0)
|
||||||
|
|
||||||
def test_document_fields_api_version_8_respects_created(self) -> None:
|
def test_document_fields_respects_created(self) -> None:
|
||||||
Document.objects.create(
|
Document.objects.create(
|
||||||
title="legacy",
|
title="legacy",
|
||||||
checksum="123",
|
checksum="123",
|
||||||
@@ -186,7 +187,6 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
|
|||||||
|
|
||||||
response = self.client.get(
|
response = self.client.get(
|
||||||
"/api/documents/?fields=id",
|
"/api/documents/?fields=id",
|
||||||
headers={"Accept": "application/json; version=8"},
|
|
||||||
format="json",
|
format="json",
|
||||||
)
|
)
|
||||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||||
@@ -196,25 +196,22 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
|
|||||||
|
|
||||||
response = self.client.get(
|
response = self.client.get(
|
||||||
"/api/documents/?fields=id,created",
|
"/api/documents/?fields=id,created",
|
||||||
headers={"Accept": "application/json; version=8"},
|
|
||||||
format="json",
|
format="json",
|
||||||
)
|
)
|
||||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||||
results = response.data["results"]
|
results = response.data["results"]
|
||||||
self.assertIn("id", results[0])
|
self.assertIn("id", results[0])
|
||||||
self.assertIn("created", results[0])
|
self.assertIn("created", results[0])
|
||||||
self.assertRegex(results[0]["created"], r"^2024-01-15T00:00:00.*$")
|
self.assertEqual(results[0]["created"], "2024-01-15")
|
||||||
|
|
||||||
def test_document_legacy_created_format(self) -> None:
|
def test_document_created_format(self) -> None:
|
||||||
"""
|
"""
|
||||||
GIVEN:
|
GIVEN:
|
||||||
- Existing document
|
- Existing document
|
||||||
WHEN:
|
WHEN:
|
||||||
- Document is requested with api version ≥ 9
|
- Document is requested
|
||||||
- Document is requested with api version < 9
|
|
||||||
THEN:
|
THEN:
|
||||||
- Document created field is returned as date
|
- Document created field is returned as date
|
||||||
- Document created field is returned as datetime
|
|
||||||
"""
|
"""
|
||||||
doc = Document.objects.create(
|
doc = Document.objects.create(
|
||||||
title="none",
|
title="none",
|
||||||
@@ -225,14 +222,6 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
|
|||||||
|
|
||||||
response = self.client.get(
|
response = self.client.get(
|
||||||
f"/api/documents/{doc.pk}/",
|
f"/api/documents/{doc.pk}/",
|
||||||
headers={"Accept": "application/json; version=8"},
|
|
||||||
)
|
|
||||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
|
||||||
self.assertRegex(response.data["created"], r"^2023-01-01T00:00:00.*$")
|
|
||||||
|
|
||||||
response = self.client.get(
|
|
||||||
f"/api/documents/{doc.pk}/",
|
|
||||||
headers={"Accept": "application/json; version=9"},
|
|
||||||
)
|
)
|
||||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||||
self.assertEqual(response.data["created"], "2023-01-01")
|
self.assertEqual(response.data["created"], "2023-01-01")
|
||||||
@@ -2200,6 +2189,205 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
|
|||||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||||
self.assertEqual(response.data["count"], 0)
|
self.assertEqual(response.data["count"], 0)
|
||||||
|
|
||||||
|
def test_saved_view_api_version_backward_compatibility(self) -> None:
|
||||||
|
"""
|
||||||
|
GIVEN:
|
||||||
|
- Saved views and UiSettings with visibility preferences
|
||||||
|
WHEN:
|
||||||
|
- API request with version=9 (legacy)
|
||||||
|
- API request with version=10 (current)
|
||||||
|
THEN:
|
||||||
|
- Version 9 returns show_on_dashboard and show_in_sidebar from UiSettings
|
||||||
|
- Version 10 omits these fields (moved to UiSettings)
|
||||||
|
"""
|
||||||
|
v1 = SavedView.objects.create(
|
||||||
|
owner=self.user,
|
||||||
|
name="dashboard_view",
|
||||||
|
sort_field="created",
|
||||||
|
)
|
||||||
|
v2 = SavedView.objects.create(
|
||||||
|
owner=self.user,
|
||||||
|
name="sidebar_view",
|
||||||
|
sort_field="created",
|
||||||
|
)
|
||||||
|
v3 = SavedView.objects.create(
|
||||||
|
owner=self.user,
|
||||||
|
name="hidden_view",
|
||||||
|
sort_field="created",
|
||||||
|
)
|
||||||
|
|
||||||
|
UiSettings.objects.update_or_create(
|
||||||
|
user=self.user,
|
||||||
|
defaults={
|
||||||
|
"settings": {
|
||||||
|
"saved_views": {
|
||||||
|
"dashboard_views_visible_ids": [v1.id],
|
||||||
|
"sidebar_views_visible_ids": [v2.id],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
response_v9 = self.client.get(
|
||||||
|
"/api/saved_views/",
|
||||||
|
headers={"Accept": "application/json; version=9"},
|
||||||
|
format="json",
|
||||||
|
)
|
||||||
|
self.assertEqual(response_v9.status_code, status.HTTP_200_OK)
|
||||||
|
results_v9 = {r["id"]: r for r in response_v9.data["results"]}
|
||||||
|
self.assertIn("show_on_dashboard", results_v9[v1.id])
|
||||||
|
self.assertIn("show_in_sidebar", results_v9[v1.id])
|
||||||
|
self.assertTrue(results_v9[v1.id]["show_on_dashboard"])
|
||||||
|
self.assertFalse(results_v9[v1.id]["show_in_sidebar"])
|
||||||
|
self.assertTrue(results_v9[v2.id]["show_in_sidebar"])
|
||||||
|
self.assertFalse(results_v9[v2.id]["show_on_dashboard"])
|
||||||
|
self.assertFalse(results_v9[v3.id]["show_on_dashboard"])
|
||||||
|
self.assertFalse(results_v9[v3.id]["show_in_sidebar"])
|
||||||
|
|
||||||
|
response_v10 = self.client.get(
|
||||||
|
"/api/saved_views/",
|
||||||
|
headers={"Accept": "application/json; version=10"},
|
||||||
|
format="json",
|
||||||
|
)
|
||||||
|
self.assertEqual(response_v10.status_code, status.HTTP_200_OK)
|
||||||
|
results_v10 = {r["id"]: r for r in response_v10.data["results"]}
|
||||||
|
self.assertNotIn("show_on_dashboard", results_v10[v1.id])
|
||||||
|
self.assertNotIn("show_in_sidebar", results_v10[v1.id])
|
||||||
|
|
||||||
|
def test_saved_view_api_version_9_user_without_ui_settings(self) -> None:
|
||||||
|
"""
|
||||||
|
GIVEN:
|
||||||
|
- User with no UiSettings and a saved view
|
||||||
|
WHEN:
|
||||||
|
- API request with version=9
|
||||||
|
THEN:
|
||||||
|
- show_on_dashboard and show_in_sidebar are False (default)
|
||||||
|
"""
|
||||||
|
SavedView.objects.create(
|
||||||
|
owner=self.user,
|
||||||
|
name="test_view",
|
||||||
|
sort_field="created",
|
||||||
|
)
|
||||||
|
UiSettings.objects.filter(user=self.user).delete()
|
||||||
|
|
||||||
|
response = self.client.get(
|
||||||
|
"/api/saved_views/",
|
||||||
|
headers={"Accept": "application/json; version=9"},
|
||||||
|
format="json",
|
||||||
|
)
|
||||||
|
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||||
|
result = response.data["results"][0]
|
||||||
|
self.assertFalse(result["show_on_dashboard"])
|
||||||
|
self.assertFalse(result["show_in_sidebar"])
|
||||||
|
|
||||||
|
def test_saved_view_api_version_9_create_writes_visibility_to_ui_settings(
|
||||||
|
self,
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
GIVEN:
|
||||||
|
- No UiSettings for the current user
|
||||||
|
WHEN:
|
||||||
|
- A saved view is created through API version 9 with visibility flags
|
||||||
|
THEN:
|
||||||
|
- Visibility is persisted in UiSettings.saved_views
|
||||||
|
"""
|
||||||
|
UiSettings.objects.filter(user=self.user).delete()
|
||||||
|
|
||||||
|
response = self.client.post(
|
||||||
|
"/api/saved_views/",
|
||||||
|
{
|
||||||
|
"name": "legacy-v9-create",
|
||||||
|
"sort_field": "created",
|
||||||
|
"filter_rules": [],
|
||||||
|
"show_on_dashboard": True,
|
||||||
|
"show_in_sidebar": False,
|
||||||
|
},
|
||||||
|
headers={"Accept": "application/json; version=9"},
|
||||||
|
format="json",
|
||||||
|
)
|
||||||
|
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
|
||||||
|
self.assertTrue(response.data["show_on_dashboard"])
|
||||||
|
self.assertFalse(response.data["show_in_sidebar"])
|
||||||
|
|
||||||
|
self.user.refresh_from_db()
|
||||||
|
self.assertTrue(hasattr(self.user, "ui_settings"))
|
||||||
|
saved_view_settings = self.user.ui_settings.settings["saved_views"]
|
||||||
|
self.assertListEqual(
|
||||||
|
saved_view_settings["dashboard_views_visible_ids"],
|
||||||
|
[response.data["id"]],
|
||||||
|
)
|
||||||
|
self.assertListEqual(saved_view_settings["sidebar_views_visible_ids"], [])
|
||||||
|
|
||||||
|
def test_saved_view_api_version_9_patch_writes_visibility_to_ui_settings(
|
||||||
|
self,
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
GIVEN:
|
||||||
|
- Existing saved views and UiSettings visibility ids
|
||||||
|
WHEN:
|
||||||
|
- A saved view is updated through API version 9 visibility flags
|
||||||
|
THEN:
|
||||||
|
- The per-user UiSettings visibility ids are updated
|
||||||
|
"""
|
||||||
|
v1 = SavedView.objects.create(
|
||||||
|
owner=self.user,
|
||||||
|
name="legacy-v9-patch-1",
|
||||||
|
sort_field="created",
|
||||||
|
)
|
||||||
|
v2 = SavedView.objects.create(
|
||||||
|
owner=self.user,
|
||||||
|
name="legacy-v9-patch-2",
|
||||||
|
sort_field="created",
|
||||||
|
)
|
||||||
|
UiSettings.objects.update_or_create(
|
||||||
|
user=self.user,
|
||||||
|
defaults={
|
||||||
|
"settings": {
|
||||||
|
"saved_views": {
|
||||||
|
"dashboard_views_visible_ids": [v1.id],
|
||||||
|
"sidebar_views_visible_ids": [v1.id, v2.id],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
response = self.client.patch(
|
||||||
|
f"/api/saved_views/{v1.id}/",
|
||||||
|
{
|
||||||
|
"show_on_dashboard": False,
|
||||||
|
},
|
||||||
|
headers={"Accept": "application/json; version=9"},
|
||||||
|
format="json",
|
||||||
|
)
|
||||||
|
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||||
|
self.assertFalse(response.data["show_on_dashboard"])
|
||||||
|
self.assertTrue(response.data["show_in_sidebar"])
|
||||||
|
|
||||||
|
self.user.refresh_from_db()
|
||||||
|
saved_view_settings = self.user.ui_settings.settings["saved_views"]
|
||||||
|
self.assertListEqual(saved_view_settings["dashboard_views_visible_ids"], [])
|
||||||
|
self.assertListEqual(
|
||||||
|
saved_view_settings["sidebar_views_visible_ids"],
|
||||||
|
[v1.id, v2.id],
|
||||||
|
)
|
||||||
|
|
||||||
|
response = self.client.patch(
|
||||||
|
f"/api/saved_views/{v1.id}/",
|
||||||
|
{
|
||||||
|
"show_in_sidebar": False,
|
||||||
|
},
|
||||||
|
headers={"Accept": "application/json; version=9"},
|
||||||
|
format="json",
|
||||||
|
)
|
||||||
|
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||||
|
self.assertFalse(response.data["show_on_dashboard"])
|
||||||
|
self.assertFalse(response.data["show_in_sidebar"])
|
||||||
|
|
||||||
|
self.user.refresh_from_db()
|
||||||
|
saved_view_settings = self.user.ui_settings.settings["saved_views"]
|
||||||
|
self.assertListEqual(saved_view_settings["dashboard_views_visible_ids"], [])
|
||||||
|
self.assertListEqual(saved_view_settings["sidebar_views_visible_ids"], [v2.id])
|
||||||
|
|
||||||
def test_saved_view_create_update_patch(self) -> None:
|
def test_saved_view_create_update_patch(self) -> None:
|
||||||
User.objects.create_user("user1")
|
User.objects.create_user("user1")
|
||||||
|
|
||||||
@@ -2603,26 +2791,6 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
|
|||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_docnote_serializer_v7(self) -> None:
|
|
||||||
doc = Document.objects.create(
|
|
||||||
title="test",
|
|
||||||
mime_type="application/pdf",
|
|
||||||
content="this is a document which will have notes!",
|
|
||||||
)
|
|
||||||
Note.objects.create(
|
|
||||||
note="This is a note.",
|
|
||||||
document=doc,
|
|
||||||
user=self.user,
|
|
||||||
)
|
|
||||||
self.assertEqual(
|
|
||||||
self.client.get(
|
|
||||||
f"/api/documents/{doc.pk}/",
|
|
||||||
headers={"Accept": "application/json; version=7"},
|
|
||||||
format="json",
|
|
||||||
).data["notes"][0]["user"],
|
|
||||||
self.user.id,
|
|
||||||
)
|
|
||||||
|
|
||||||
def test_create_note(self) -> None:
|
def test_create_note(self) -> None:
|
||||||
"""
|
"""
|
||||||
GIVEN:
|
GIVEN:
|
||||||
@@ -3391,14 +3559,13 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestDocumentApiV2(DirectoriesMixin, APITestCase):
|
class TestDocumentApiTagColors(DirectoriesMixin, APITestCase):
|
||||||
def setUp(self) -> None:
|
def setUp(self) -> None:
|
||||||
super().setUp()
|
super().setUp()
|
||||||
|
|
||||||
self.user = User.objects.create_superuser(username="temp_admin")
|
self.user = User.objects.create_superuser(username="temp_admin")
|
||||||
|
|
||||||
self.client.force_authenticate(user=self.user)
|
self.client.force_authenticate(user=self.user)
|
||||||
self.client.defaults["HTTP_ACCEPT"] = "application/json; version=2"
|
|
||||||
|
|
||||||
def test_tag_validate_color(self) -> None:
|
def test_tag_validate_color(self) -> None:
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
|
|||||||
@@ -152,7 +152,7 @@ class TestCustomFieldsSearch(DirectoriesMixin, APITestCase):
|
|||||||
context={
|
context={
|
||||||
"request": types.SimpleNamespace(
|
"request": types.SimpleNamespace(
|
||||||
method="GET",
|
method="GET",
|
||||||
version="7",
|
version="9",
|
||||||
),
|
),
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -147,7 +147,6 @@ class TestExportImport(
|
|||||||
else:
|
else:
|
||||||
raise ValueError(f"document with id {id} does not exist in manifest")
|
raise ValueError(f"document with id {id} does not exist in manifest")
|
||||||
|
|
||||||
@override_settings(PASSPHRASE="test")
|
|
||||||
def _do_export(
|
def _do_export(
|
||||||
self,
|
self,
|
||||||
*,
|
*,
|
||||||
@@ -441,7 +440,6 @@ class TestExportImport(
|
|||||||
)
|
)
|
||||||
self.assertRaises(FileNotFoundError, call_command, "document_exporter", target)
|
self.assertRaises(FileNotFoundError, call_command, "document_exporter", target)
|
||||||
|
|
||||||
@override_settings(PASSPHRASE="test")
|
|
||||||
def test_export_zipped(self) -> None:
|
def test_export_zipped(self) -> None:
|
||||||
"""
|
"""
|
||||||
GIVEN:
|
GIVEN:
|
||||||
@@ -473,7 +471,6 @@ class TestExportImport(
|
|||||||
self.assertIn("manifest.json", zip.namelist())
|
self.assertIn("manifest.json", zip.namelist())
|
||||||
self.assertIn("metadata.json", zip.namelist())
|
self.assertIn("metadata.json", zip.namelist())
|
||||||
|
|
||||||
@override_settings(PASSPHRASE="test")
|
|
||||||
def test_export_zipped_format(self) -> None:
|
def test_export_zipped_format(self) -> None:
|
||||||
"""
|
"""
|
||||||
GIVEN:
|
GIVEN:
|
||||||
@@ -510,7 +507,6 @@ class TestExportImport(
|
|||||||
self.assertIn("manifest.json", zip.namelist())
|
self.assertIn("manifest.json", zip.namelist())
|
||||||
self.assertIn("metadata.json", zip.namelist())
|
self.assertIn("metadata.json", zip.namelist())
|
||||||
|
|
||||||
@override_settings(PASSPHRASE="test")
|
|
||||||
def test_export_zipped_with_delete(self) -> None:
|
def test_export_zipped_with_delete(self) -> None:
|
||||||
"""
|
"""
|
||||||
GIVEN:
|
GIVEN:
|
||||||
@@ -753,6 +749,31 @@ class TestExportImport(
|
|||||||
call_command("document_importer", "--no-progress-bar", self.target)
|
call_command("document_importer", "--no-progress-bar", self.target)
|
||||||
self.assertEqual(Document.objects.count(), 4)
|
self.assertEqual(Document.objects.count(), 4)
|
||||||
|
|
||||||
|
def test_folder_prefix_with_split(self) -> None:
|
||||||
|
"""
|
||||||
|
GIVEN:
|
||||||
|
- Request to export documents to directory
|
||||||
|
WHEN:
|
||||||
|
- Option use_folder_prefix is used
|
||||||
|
- Option split manifest is used
|
||||||
|
THEN:
|
||||||
|
- Documents can be imported again
|
||||||
|
"""
|
||||||
|
shutil.rmtree(Path(self.dirs.media_dir) / "documents")
|
||||||
|
shutil.copytree(
|
||||||
|
Path(__file__).parent / "samples" / "documents",
|
||||||
|
Path(self.dirs.media_dir) / "documents",
|
||||||
|
)
|
||||||
|
|
||||||
|
self._do_export(use_folder_prefix=True, split_manifest=True)
|
||||||
|
|
||||||
|
with paperless_environment():
|
||||||
|
self.assertEqual(Document.objects.count(), 4)
|
||||||
|
Document.objects.all().delete()
|
||||||
|
self.assertEqual(Document.objects.count(), 0)
|
||||||
|
call_command("document_importer", "--no-progress-bar", self.target)
|
||||||
|
self.assertEqual(Document.objects.count(), 4)
|
||||||
|
|
||||||
def test_import_db_transaction_failed(self) -> None:
|
def test_import_db_transaction_failed(self) -> None:
|
||||||
"""
|
"""
|
||||||
GIVEN:
|
GIVEN:
|
||||||
|
|||||||
@@ -119,15 +119,22 @@ class TestCommandImport(
|
|||||||
# No read permissions
|
# No read permissions
|
||||||
original_path.chmod(0o222)
|
original_path.chmod(0o222)
|
||||||
|
|
||||||
|
manifest_path = Path(temp_dir) / "manifest.json"
|
||||||
|
manifest_path.write_text(
|
||||||
|
json.dumps(
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"model": "documents.document",
|
||||||
|
EXPORTER_FILE_NAME: "original.pdf",
|
||||||
|
EXPORTER_ARCHIVE_NAME: "archive.pdf",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
cmd = Command()
|
cmd = Command()
|
||||||
cmd.source = Path(temp_dir)
|
cmd.source = Path(temp_dir)
|
||||||
cmd.manifest = [
|
cmd.manifest_paths = [manifest_path]
|
||||||
{
|
|
||||||
"model": "documents.document",
|
|
||||||
EXPORTER_FILE_NAME: "original.pdf",
|
|
||||||
EXPORTER_ARCHIVE_NAME: "archive.pdf",
|
|
||||||
},
|
|
||||||
]
|
|
||||||
cmd.data_only = False
|
cmd.data_only = False
|
||||||
with self.assertRaises(CommandError) as cm:
|
with self.assertRaises(CommandError) as cm:
|
||||||
cmd.check_manifest_validity()
|
cmd.check_manifest_validity()
|
||||||
@@ -296,7 +303,7 @@ class TestCommandImport(
|
|||||||
(self.dirs.scratch_dir / "manifest.json").touch()
|
(self.dirs.scratch_dir / "manifest.json").touch()
|
||||||
|
|
||||||
# We're not building a manifest, so it fails, but this test doesn't care
|
# We're not building a manifest, so it fails, but this test doesn't care
|
||||||
with self.assertRaises(json.decoder.JSONDecodeError):
|
with self.assertRaises(CommandError):
|
||||||
call_command(
|
call_command(
|
||||||
"document_importer",
|
"document_importer",
|
||||||
"--no-progress-bar",
|
"--no-progress-bar",
|
||||||
@@ -325,7 +332,7 @@ class TestCommandImport(
|
|||||||
)
|
)
|
||||||
|
|
||||||
# We're not building a manifest, so it fails, but this test doesn't care
|
# We're not building a manifest, so it fails, but this test doesn't care
|
||||||
with self.assertRaises(json.decoder.JSONDecodeError):
|
with self.assertRaises(CommandError):
|
||||||
call_command(
|
call_command(
|
||||||
"document_importer",
|
"document_importer",
|
||||||
"--no-progress-bar",
|
"--no-progress-bar",
|
||||||
|
|||||||
@@ -9,8 +9,8 @@ from documents.parsers import get_default_file_extension
|
|||||||
from documents.parsers import get_parser_class_for_mime_type
|
from documents.parsers import get_parser_class_for_mime_type
|
||||||
from documents.parsers import get_supported_file_extensions
|
from documents.parsers import get_supported_file_extensions
|
||||||
from documents.parsers import is_file_ext_supported
|
from documents.parsers import is_file_ext_supported
|
||||||
|
from paperless.parsers.text import TextDocumentParser
|
||||||
from paperless_tesseract.parsers import RasterisedDocumentParser
|
from paperless_tesseract.parsers import RasterisedDocumentParser
|
||||||
from paperless_text.parsers import TextDocumentParser
|
|
||||||
from paperless_tika.parsers import TikaDocumentParser
|
from paperless_tika.parsers import TikaDocumentParser
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ msgid ""
|
|||||||
msgstr ""
|
msgstr ""
|
||||||
"Project-Id-Version: paperless-ngx\n"
|
"Project-Id-Version: paperless-ngx\n"
|
||||||
"Report-Msgid-Bugs-To: \n"
|
"Report-Msgid-Bugs-To: \n"
|
||||||
"POT-Creation-Date: 2026-03-06 20:00+0000\n"
|
"POT-Creation-Date: 2026-03-09 22:37+0000\n"
|
||||||
"PO-Revision-Date: 2022-02-17 04:17\n"
|
"PO-Revision-Date: 2022-02-17 04:17\n"
|
||||||
"Last-Translator: \n"
|
"Last-Translator: \n"
|
||||||
"Language-Team: English\n"
|
"Language-Team: English\n"
|
||||||
@@ -1299,7 +1299,7 @@ msgstr ""
|
|||||||
msgid "workflow runs"
|
msgid "workflow runs"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: documents/serialisers.py:463 documents/serialisers.py:2344
|
#: documents/serialisers.py:463 documents/serialisers.py:2367
|
||||||
msgid "Insufficient permissions."
|
msgid "Insufficient permissions."
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
@@ -1307,39 +1307,39 @@ msgstr ""
|
|||||||
msgid "Invalid color."
|
msgid "Invalid color."
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: documents/serialisers.py:1967
|
#: documents/serialisers.py:1990
|
||||||
#, python-format
|
#, python-format
|
||||||
msgid "File type %(type)s not supported"
|
msgid "File type %(type)s not supported"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: documents/serialisers.py:2011
|
#: documents/serialisers.py:2034
|
||||||
#, python-format
|
#, python-format
|
||||||
msgid "Custom field id must be an integer: %(id)s"
|
msgid "Custom field id must be an integer: %(id)s"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: documents/serialisers.py:2018
|
#: documents/serialisers.py:2041
|
||||||
#, python-format
|
#, python-format
|
||||||
msgid "Custom field with id %(id)s does not exist"
|
msgid "Custom field with id %(id)s does not exist"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: documents/serialisers.py:2035 documents/serialisers.py:2045
|
#: documents/serialisers.py:2058 documents/serialisers.py:2068
|
||||||
msgid ""
|
msgid ""
|
||||||
"Custom fields must be a list of integers or an object mapping ids to values."
|
"Custom fields must be a list of integers or an object mapping ids to values."
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: documents/serialisers.py:2040
|
#: documents/serialisers.py:2063
|
||||||
msgid "Some custom fields don't exist or were specified twice."
|
msgid "Some custom fields don't exist or were specified twice."
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: documents/serialisers.py:2187
|
#: documents/serialisers.py:2210
|
||||||
msgid "Invalid variable detected."
|
msgid "Invalid variable detected."
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: documents/serialisers.py:2400
|
#: documents/serialisers.py:2423
|
||||||
msgid "Duplicate document identifiers are not allowed."
|
msgid "Duplicate document identifiers are not allowed."
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: documents/serialisers.py:2430 documents/views.py:3328
|
#: documents/serialisers.py:2453 documents/views.py:3328
|
||||||
#, python-format
|
#, python-format
|
||||||
msgid "Documents not found: %(ids)s"
|
msgid "Documents not found: %(ids)s"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
@@ -1856,151 +1856,151 @@ msgstr ""
|
|||||||
msgid "paperless application settings"
|
msgid "paperless application settings"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:752
|
#: paperless/settings/__init__.py:521
|
||||||
msgid "English (US)"
|
msgid "English (US)"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:753
|
#: paperless/settings/__init__.py:522
|
||||||
msgid "Arabic"
|
msgid "Arabic"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:754
|
#: paperless/settings/__init__.py:523
|
||||||
msgid "Afrikaans"
|
msgid "Afrikaans"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:755
|
#: paperless/settings/__init__.py:524
|
||||||
msgid "Belarusian"
|
msgid "Belarusian"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:756
|
#: paperless/settings/__init__.py:525
|
||||||
msgid "Bulgarian"
|
msgid "Bulgarian"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:757
|
#: paperless/settings/__init__.py:526
|
||||||
msgid "Catalan"
|
msgid "Catalan"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:758
|
#: paperless/settings/__init__.py:527
|
||||||
msgid "Czech"
|
msgid "Czech"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:759
|
#: paperless/settings/__init__.py:528
|
||||||
msgid "Danish"
|
msgid "Danish"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:760
|
#: paperless/settings/__init__.py:529
|
||||||
msgid "German"
|
msgid "German"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:761
|
#: paperless/settings/__init__.py:530
|
||||||
msgid "Greek"
|
msgid "Greek"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:762
|
#: paperless/settings/__init__.py:531
|
||||||
msgid "English (GB)"
|
msgid "English (GB)"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:763
|
#: paperless/settings/__init__.py:532
|
||||||
msgid "Spanish"
|
msgid "Spanish"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:764
|
#: paperless/settings/__init__.py:533
|
||||||
msgid "Persian"
|
msgid "Persian"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:765
|
#: paperless/settings/__init__.py:534
|
||||||
msgid "Finnish"
|
msgid "Finnish"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:766
|
#: paperless/settings/__init__.py:535
|
||||||
msgid "French"
|
msgid "French"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:767
|
#: paperless/settings/__init__.py:536
|
||||||
msgid "Hungarian"
|
msgid "Hungarian"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:768
|
#: paperless/settings/__init__.py:537
|
||||||
msgid "Indonesian"
|
msgid "Indonesian"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:769
|
#: paperless/settings/__init__.py:538
|
||||||
msgid "Italian"
|
msgid "Italian"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:770
|
#: paperless/settings/__init__.py:539
|
||||||
msgid "Japanese"
|
msgid "Japanese"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:771
|
#: paperless/settings/__init__.py:540
|
||||||
msgid "Korean"
|
msgid "Korean"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:772
|
#: paperless/settings/__init__.py:541
|
||||||
msgid "Luxembourgish"
|
msgid "Luxembourgish"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:773
|
#: paperless/settings/__init__.py:542
|
||||||
msgid "Norwegian"
|
msgid "Norwegian"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:774
|
#: paperless/settings/__init__.py:543
|
||||||
msgid "Dutch"
|
msgid "Dutch"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:775
|
#: paperless/settings/__init__.py:544
|
||||||
msgid "Polish"
|
msgid "Polish"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:776
|
#: paperless/settings/__init__.py:545
|
||||||
msgid "Portuguese (Brazil)"
|
msgid "Portuguese (Brazil)"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:777
|
#: paperless/settings/__init__.py:546
|
||||||
msgid "Portuguese"
|
msgid "Portuguese"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:778
|
#: paperless/settings/__init__.py:547
|
||||||
msgid "Romanian"
|
msgid "Romanian"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:779
|
#: paperless/settings/__init__.py:548
|
||||||
msgid "Russian"
|
msgid "Russian"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:780
|
#: paperless/settings/__init__.py:549
|
||||||
msgid "Slovak"
|
msgid "Slovak"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:781
|
#: paperless/settings/__init__.py:550
|
||||||
msgid "Slovenian"
|
msgid "Slovenian"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:782
|
#: paperless/settings/__init__.py:551
|
||||||
msgid "Serbian"
|
msgid "Serbian"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:783
|
#: paperless/settings/__init__.py:552
|
||||||
msgid "Swedish"
|
msgid "Swedish"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:784
|
#: paperless/settings/__init__.py:553
|
||||||
msgid "Turkish"
|
msgid "Turkish"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:785
|
#: paperless/settings/__init__.py:554
|
||||||
msgid "Ukrainian"
|
msgid "Ukrainian"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:786
|
#: paperless/settings/__init__.py:555
|
||||||
msgid "Vietnamese"
|
msgid "Vietnamese"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:787
|
#: paperless/settings/__init__.py:556
|
||||||
msgid "Chinese Simplified"
|
msgid "Chinese Simplified"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: paperless/settings/__init__.py:788
|
#: paperless/settings/__init__.py:557
|
||||||
msgid "Chinese Traditional"
|
msgid "Chinese Traditional"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
import os
|
import os
|
||||||
|
|
||||||
from celery import Celery
|
from celery import Celery
|
||||||
|
from celery.signals import worker_process_init
|
||||||
|
|
||||||
# Set the default Django settings module for the 'celery' program.
|
# Set the default Django settings module for the 'celery' program.
|
||||||
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "paperless.settings")
|
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "paperless.settings")
|
||||||
@@ -15,3 +16,19 @@ app.config_from_object("django.conf:settings", namespace="CELERY")
|
|||||||
|
|
||||||
# Load task modules from all registered Django apps.
|
# Load task modules from all registered Django apps.
|
||||||
app.autodiscover_tasks()
|
app.autodiscover_tasks()
|
||||||
|
|
||||||
|
|
||||||
|
@worker_process_init.connect
|
||||||
|
def on_worker_process_init(**kwargs) -> None: # pragma: no cover
|
||||||
|
"""
|
||||||
|
Register built-in parsers eagerly in each Celery worker process.
|
||||||
|
|
||||||
|
This registers only the built-in parsers (no entrypoint discovery) so
|
||||||
|
that workers can begin consuming documents immediately. Entrypoint
|
||||||
|
discovery for third-party parsers is deferred to the first call of
|
||||||
|
get_parser_registry() inside a task, keeping worker_process_init
|
||||||
|
well within its 4-second timeout budget.
|
||||||
|
"""
|
||||||
|
from paperless.parsers.registry import init_builtin_parsers
|
||||||
|
|
||||||
|
init_builtin_parsers()
|
||||||
|
|||||||
379
src/paperless/parsers/__init__.py
Normal file
379
src/paperless/parsers/__init__.py
Normal file
@@ -0,0 +1,379 @@
|
|||||||
|
"""
|
||||||
|
Public interface for the Paperless-ngx parser plugin system.
|
||||||
|
|
||||||
|
This module defines ParserProtocol — the structural contract that every
|
||||||
|
document parser must satisfy, whether it is a built-in parser shipped with
|
||||||
|
Paperless-ngx or a third-party parser installed via a Python entrypoint.
|
||||||
|
|
||||||
|
Phase 1/2 scope: only the Protocol is defined here. The transitional
|
||||||
|
DocumentParser ABC (Phase 3) and concrete built-in parsers (Phase 3+) will
|
||||||
|
be added in later phases, so there are intentionally no imports of parser
|
||||||
|
implementations here.
|
||||||
|
|
||||||
|
Usage example (third-party parser)::
|
||||||
|
|
||||||
|
from paperless.parsers import ParserProtocol
|
||||||
|
|
||||||
|
class MyParser:
|
||||||
|
name = "my-parser"
|
||||||
|
version = "1.0.0"
|
||||||
|
author = "Acme Corp"
|
||||||
|
url = "https://example.com/my-parser"
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def supported_mime_types(cls) -> dict[str, str]:
|
||||||
|
return {"application/x-my-format": ".myf"}
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def score(cls, mime_type, filename, path=None):
|
||||||
|
return 10
|
||||||
|
|
||||||
|
# … implement remaining protocol methods …
|
||||||
|
|
||||||
|
assert isinstance(MyParser(), ParserProtocol)
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import TYPE_CHECKING
|
||||||
|
from typing import Protocol
|
||||||
|
from typing import Self
|
||||||
|
from typing import TypedDict
|
||||||
|
from typing import runtime_checkable
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
import datetime
|
||||||
|
from pathlib import Path
|
||||||
|
from types import TracebackType
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"MetadataEntry",
|
||||||
|
"ParserProtocol",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
class MetadataEntry(TypedDict):
|
||||||
|
"""A single metadata field extracted from a document.
|
||||||
|
|
||||||
|
All four keys are required. Values are always serialised to strings —
|
||||||
|
type-specific conversion (dates, integers, lists) is the responsibility
|
||||||
|
of the parser before returning.
|
||||||
|
"""
|
||||||
|
|
||||||
|
namespace: str
|
||||||
|
"""URI of the metadata namespace (e.g. 'http://ns.adobe.com/pdf/1.3/')."""
|
||||||
|
|
||||||
|
prefix: str
|
||||||
|
"""Conventional namespace prefix (e.g. 'pdf', 'xmp', 'dc')."""
|
||||||
|
|
||||||
|
key: str
|
||||||
|
"""Field name within the namespace (e.g. 'Author', 'CreateDate')."""
|
||||||
|
|
||||||
|
value: str
|
||||||
|
"""String representation of the field value."""
|
||||||
|
|
||||||
|
|
||||||
|
@runtime_checkable
|
||||||
|
class ParserProtocol(Protocol):
|
||||||
|
"""Structural contract for all Paperless-ngx document parsers.
|
||||||
|
|
||||||
|
Both built-in parsers and third-party plugins (discovered via the
|
||||||
|
"paperless_ngx.parsers" entrypoint group) must satisfy this Protocol.
|
||||||
|
Because it is decorated with runtime_checkable, isinstance(obj,
|
||||||
|
ParserProtocol) works at runtime based on method presence, which is
|
||||||
|
useful for validation in ParserRegistry.discover.
|
||||||
|
|
||||||
|
Parsers must expose four string attributes at the class level so the
|
||||||
|
registry can log attribution information without instantiating the parser:
|
||||||
|
|
||||||
|
name : str
|
||||||
|
Human-readable parser name (e.g. "Tesseract OCR").
|
||||||
|
version : str
|
||||||
|
Semantic version string (e.g. "1.2.3").
|
||||||
|
author : str
|
||||||
|
Author or organisation name.
|
||||||
|
url : str
|
||||||
|
URL for documentation, source code, or issue tracker.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Class-level identity (checked by the registry, not Protocol methods)
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
name: str
|
||||||
|
version: str
|
||||||
|
author: str
|
||||||
|
url: str
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Class methods
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def supported_mime_types(cls) -> dict[str, str]:
|
||||||
|
"""Return a mapping of supported MIME types to preferred file extensions.
|
||||||
|
|
||||||
|
The keys are MIME type strings (e.g. "application/pdf"), and the
|
||||||
|
values are the preferred file extension including the leading dot
|
||||||
|
(e.g. ".pdf"). The registry uses this mapping both to decide whether
|
||||||
|
a parser is a candidate for a given file and to determine the default
|
||||||
|
extension when creating archive copies.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
dict[str, str]
|
||||||
|
{mime_type: extension} mapping — may be empty if the parser
|
||||||
|
has been temporarily disabled.
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def score(
|
||||||
|
cls,
|
||||||
|
mime_type: str,
|
||||||
|
filename: str,
|
||||||
|
path: Path | None = None,
|
||||||
|
) -> int | None:
|
||||||
|
"""Return a priority score for handling this file, or None to decline.
|
||||||
|
|
||||||
|
The registry calls this after confirming that the MIME type is in
|
||||||
|
supported_mime_types. Parsers may inspect filename and optionally
|
||||||
|
the file at path to refine their confidence level.
|
||||||
|
|
||||||
|
A higher score wins. Return None to explicitly decline handling a file
|
||||||
|
even though the MIME type is listed as supported (e.g. when a feature
|
||||||
|
flag is disabled, or a required service is not configured).
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
mime_type:
|
||||||
|
The detected MIME type of the file to be parsed.
|
||||||
|
filename:
|
||||||
|
The original filename, including extension.
|
||||||
|
path:
|
||||||
|
Optional filesystem path to the file. Parsers that need to
|
||||||
|
inspect file content (e.g. magic-byte sniffing) may use this.
|
||||||
|
May be None when scoring happens before the file is available locally.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
int | None
|
||||||
|
Priority score (higher wins), or None to decline.
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Properties
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
@property
|
||||||
|
def can_produce_archive(self) -> bool:
|
||||||
|
"""Whether this parser can produce a searchable PDF archive copy.
|
||||||
|
|
||||||
|
If True, the consumption pipeline may request an archive version when
|
||||||
|
processing the document, subject to the ARCHIVE_FILE_GENERATION
|
||||||
|
setting. If False, only thumbnail and text extraction are performed.
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
||||||
|
@property
|
||||||
|
def requires_pdf_rendition(self) -> bool:
|
||||||
|
"""Whether the parser must produce a PDF for the frontend to display.
|
||||||
|
|
||||||
|
True for formats the browser cannot display natively (e.g. DOCX, ODT).
|
||||||
|
When True, the pipeline always stores the PDF output regardless of the
|
||||||
|
ARCHIVE_FILE_GENERATION setting, since the original format cannot be
|
||||||
|
shown to the user.
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Core parsing interface
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def parse(
|
||||||
|
self,
|
||||||
|
document_path: Path,
|
||||||
|
mime_type: str,
|
||||||
|
*,
|
||||||
|
produce_archive: bool = True,
|
||||||
|
) -> None:
|
||||||
|
"""Parse document_path and populate internal state.
|
||||||
|
|
||||||
|
After a successful call, callers retrieve results via get_text,
|
||||||
|
get_date, and get_archive_path.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
document_path:
|
||||||
|
Absolute path to the document file to parse.
|
||||||
|
mime_type:
|
||||||
|
Detected MIME type of the document.
|
||||||
|
produce_archive:
|
||||||
|
When True (the default) and can_produce_archive is also True,
|
||||||
|
the parser should produce a searchable PDF at the path returned
|
||||||
|
by get_archive_path. Pass False when only text extraction and
|
||||||
|
thumbnail generation are required and disk I/O should be minimised.
|
||||||
|
|
||||||
|
Raises
|
||||||
|
------
|
||||||
|
documents.parsers.ParseError
|
||||||
|
If parsing fails for any reason.
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Result accessors
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def get_text(self) -> str | None:
|
||||||
|
"""Return the plain-text content extracted during parse.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
str | None
|
||||||
|
Extracted text, or None if no text could be found.
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
||||||
|
def get_date(self) -> datetime.datetime | None:
|
||||||
|
"""Return the document date detected during parse.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
datetime.datetime | None
|
||||||
|
Detected document date, or None if no date was found.
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
||||||
|
def get_archive_path(self) -> Path | None:
|
||||||
|
"""Return the path to the generated archive PDF, or None.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
Path | None
|
||||||
|
Path to the searchable PDF archive, or None if no archive was
|
||||||
|
produced (e.g. because produce_archive=False or the parser does
|
||||||
|
not support archive generation).
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Thumbnail and metadata
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def get_thumbnail(self, document_path: Path, mime_type: str) -> Path:
|
||||||
|
"""Generate and return the path to a thumbnail image for the document.
|
||||||
|
|
||||||
|
May be called independently of parse. The returned path must point to
|
||||||
|
an existing WebP image file inside the parser's temporary working
|
||||||
|
directory.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
document_path:
|
||||||
|
Absolute path to the source document.
|
||||||
|
mime_type:
|
||||||
|
Detected MIME type of the document.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
Path
|
||||||
|
Path to the generated thumbnail image (WebP format preferred).
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
||||||
|
def get_page_count(
|
||||||
|
self,
|
||||||
|
document_path: Path,
|
||||||
|
mime_type: str,
|
||||||
|
) -> int | None:
|
||||||
|
"""Return the number of pages in the document, if determinable.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
document_path:
|
||||||
|
Absolute path to the source document.
|
||||||
|
mime_type:
|
||||||
|
Detected MIME type of the document.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
int | None
|
||||||
|
Page count, or None if the parser cannot determine it.
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
||||||
|
def extract_metadata(
|
||||||
|
self,
|
||||||
|
document_path: Path,
|
||||||
|
mime_type: str,
|
||||||
|
) -> list[MetadataEntry]:
|
||||||
|
"""Extract format-specific metadata from the document.
|
||||||
|
|
||||||
|
Called by the API view layer on demand — not during the consumption
|
||||||
|
pipeline. Results are returned to the frontend for per-file display.
|
||||||
|
|
||||||
|
For documents with an archive version, this method is called twice:
|
||||||
|
once for the original file (with its native MIME type) and once for
|
||||||
|
the archive file (with ``"application/pdf"``). Parsers that produce
|
||||||
|
archives should handle both cases.
|
||||||
|
|
||||||
|
Implementations must not raise. A failure to read metadata is not
|
||||||
|
fatal — log a warning and return whatever partial results were
|
||||||
|
collected, or ``[]`` if none.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
document_path:
|
||||||
|
Absolute path to the file to extract metadata from.
|
||||||
|
mime_type:
|
||||||
|
MIME type of the file at ``document_path``. May be
|
||||||
|
``"application/pdf"`` when called for the archive version.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
list[MetadataEntry]
|
||||||
|
Zero or more metadata entries. Returns ``[]`` if no metadata
|
||||||
|
could be extracted or the format does not support it.
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Context manager
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def __enter__(self) -> Self:
|
||||||
|
"""Enter the parser context, returning the parser instance.
|
||||||
|
|
||||||
|
Implementations should perform any resource allocation here if not
|
||||||
|
done in __init__ (e.g. creating API clients or temp directories).
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
Self
|
||||||
|
The parser instance itself.
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
||||||
|
def __exit__(
|
||||||
|
self,
|
||||||
|
exc_type: type[BaseException] | None,
|
||||||
|
exc_val: BaseException | None,
|
||||||
|
exc_tb: TracebackType | None,
|
||||||
|
) -> None:
|
||||||
|
"""Exit the parser context and release all resources.
|
||||||
|
|
||||||
|
Implementations must clean up all temporary files and other resources
|
||||||
|
regardless of whether an exception occurred.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
exc_type:
|
||||||
|
The exception class, or None if no exception was raised.
|
||||||
|
exc_val:
|
||||||
|
The exception instance, or None.
|
||||||
|
exc_tb:
|
||||||
|
The traceback, or None.
|
||||||
|
"""
|
||||||
|
...
|
||||||
364
src/paperless/parsers/registry.py
Normal file
364
src/paperless/parsers/registry.py
Normal file
@@ -0,0 +1,364 @@
|
|||||||
|
"""
|
||||||
|
Singleton registry that tracks all document parsers available to
|
||||||
|
Paperless-ngx — both built-ins shipped with the application and third-party
|
||||||
|
plugins installed via Python entrypoints.
|
||||||
|
|
||||||
|
Public surface
|
||||||
|
--------------
|
||||||
|
get_parser_registry
|
||||||
|
Lazy-initialise and return the shared ParserRegistry. This is the primary
|
||||||
|
entry point for production code.
|
||||||
|
|
||||||
|
init_builtin_parsers
|
||||||
|
Register built-in parsers only, without entrypoint discovery. Safe to
|
||||||
|
call from Celery worker_process_init where importing all entrypoints
|
||||||
|
would be wasteful or cause side effects.
|
||||||
|
|
||||||
|
reset_parser_registry
|
||||||
|
Reset module-level state. For tests only.
|
||||||
|
|
||||||
|
Entrypoint group
|
||||||
|
----------------
|
||||||
|
Third-party parsers must advertise themselves under the
|
||||||
|
"paperless_ngx.parsers" entrypoint group in their pyproject.toml::
|
||||||
|
|
||||||
|
[project.entry-points."paperless_ngx.parsers"]
|
||||||
|
my_parser = "my_package.parsers:MyParser"
|
||||||
|
|
||||||
|
The loaded class must expose the following attributes at the class level
|
||||||
|
(not just on instances) for the registry to accept it:
|
||||||
|
name, version, author, url, supported_mime_types (callable), score (callable).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from importlib.metadata import entry_points
|
||||||
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from paperless.parsers import ParserProtocol
|
||||||
|
|
||||||
|
logger = logging.getLogger("paperless.parsers.registry")
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Module-level singleton state
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
_registry: ParserRegistry | None = None
|
||||||
|
_discovery_complete: bool = False
|
||||||
|
|
||||||
|
# Attribute names that every registered external parser class must expose.
|
||||||
|
_REQUIRED_ATTRS: tuple[str, ...] = (
|
||||||
|
"name",
|
||||||
|
"version",
|
||||||
|
"author",
|
||||||
|
"url",
|
||||||
|
"supported_mime_types",
|
||||||
|
"score",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Module-level accessor functions
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def get_parser_registry() -> ParserRegistry:
|
||||||
|
"""Return the shared ParserRegistry instance.
|
||||||
|
|
||||||
|
On the first call this function:
|
||||||
|
|
||||||
|
1. Creates a new ParserRegistry.
|
||||||
|
2. Calls register_defaults to install built-in parsers.
|
||||||
|
3. Calls discover to load third-party plugins via importlib.metadata entrypoints.
|
||||||
|
4. Calls log_summary to emit a startup summary.
|
||||||
|
|
||||||
|
Subsequent calls return the same instance immediately.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
ParserRegistry
|
||||||
|
The shared registry singleton.
|
||||||
|
"""
|
||||||
|
global _registry, _discovery_complete
|
||||||
|
|
||||||
|
if _registry is None:
|
||||||
|
_registry = ParserRegistry()
|
||||||
|
_registry.register_defaults()
|
||||||
|
|
||||||
|
if not _discovery_complete:
|
||||||
|
_registry.discover()
|
||||||
|
_registry.log_summary()
|
||||||
|
_discovery_complete = True
|
||||||
|
|
||||||
|
return _registry
|
||||||
|
|
||||||
|
|
||||||
|
def init_builtin_parsers() -> None:
|
||||||
|
"""Register built-in parsers without performing entrypoint discovery.
|
||||||
|
|
||||||
|
Intended for use in Celery worker_process_init handlers where importing
|
||||||
|
all installed entrypoints would be wasteful, slow, or could produce
|
||||||
|
undesirable side effects. Entrypoint discovery (third-party plugins) is
|
||||||
|
deliberately not performed.
|
||||||
|
|
||||||
|
Safe to call multiple times — subsequent calls are no-ops.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
None
|
||||||
|
"""
|
||||||
|
global _registry
|
||||||
|
|
||||||
|
if _registry is None:
|
||||||
|
_registry = ParserRegistry()
|
||||||
|
_registry.register_defaults()
|
||||||
|
|
||||||
|
|
||||||
|
def reset_parser_registry() -> None:
|
||||||
|
"""Reset the module-level registry state to its initial values.
|
||||||
|
|
||||||
|
Resets _registry and _discovery_complete so the next call to
|
||||||
|
get_parser_registry will re-initialise everything from scratch.
|
||||||
|
|
||||||
|
FOR TESTS ONLY. Do not call this in production code — resetting the
|
||||||
|
registry mid-request causes all subsequent parser lookups to go through
|
||||||
|
discovery again, which is expensive and may have unexpected side effects
|
||||||
|
in multi-threaded environments.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
None
|
||||||
|
"""
|
||||||
|
global _registry, _discovery_complete
|
||||||
|
|
||||||
|
_registry = None
|
||||||
|
_discovery_complete = False
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Registry class
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class ParserRegistry:
|
||||||
|
"""Registry that maps MIME types to the best available parser class.
|
||||||
|
|
||||||
|
Parsers are partitioned into two lists:
|
||||||
|
|
||||||
|
_builtins
|
||||||
|
Parser classes registered via register_builtin (populated by
|
||||||
|
register_defaults in Phase 3+).
|
||||||
|
|
||||||
|
_external
|
||||||
|
Parser classes loaded from installed Python entrypoints via discover.
|
||||||
|
|
||||||
|
When resolving a parser for a file, external parsers are evaluated
|
||||||
|
alongside built-in parsers using a uniform scoring mechanism. Both lists
|
||||||
|
are iterated together; the class with the highest score wins. If an
|
||||||
|
external parser wins, its attribution details are logged so users can
|
||||||
|
identify which third-party package handled their document.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self._external: list[type[ParserProtocol]] = []
|
||||||
|
self._builtins: list[type[ParserProtocol]] = []
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Registration
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def register_builtin(self, parser_class: type[ParserProtocol]) -> None:
|
||||||
|
"""Register a built-in parser class.
|
||||||
|
|
||||||
|
Built-in parsers are shipped with Paperless-ngx and are appended to
|
||||||
|
the _builtins list. They are never overridden by external parsers;
|
||||||
|
instead, scoring determines which parser wins for any given file.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
parser_class:
|
||||||
|
The parser class to register. Must satisfy ParserProtocol.
|
||||||
|
"""
|
||||||
|
self._builtins.append(parser_class)
|
||||||
|
|
||||||
|
def register_defaults(self) -> None:
|
||||||
|
"""Register the built-in parsers that ship with Paperless-ngx.
|
||||||
|
|
||||||
|
Each parser that has been migrated to the new ParserProtocol interface
|
||||||
|
is registered here. Parsers are added in ascending weight order so
|
||||||
|
that log output is predictable; scoring determines which parser wins
|
||||||
|
at runtime regardless of registration order.
|
||||||
|
"""
|
||||||
|
from paperless.parsers.text import TextDocumentParser
|
||||||
|
|
||||||
|
self.register_builtin(TextDocumentParser)
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Discovery
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def discover(self) -> None:
|
||||||
|
"""Load third-party parsers from the "paperless_ngx.parsers" entrypoint group.
|
||||||
|
|
||||||
|
For each advertised entrypoint the method:
|
||||||
|
|
||||||
|
1. Calls ep.load() to import the class.
|
||||||
|
2. Validates that the class exposes all required attributes.
|
||||||
|
3. On success, appends the class to _external and logs an info message.
|
||||||
|
4. On failure (import error or missing attributes), logs an appropriate
|
||||||
|
warning/error and continues to the next entrypoint.
|
||||||
|
|
||||||
|
Errors during discovery of a single parser do not prevent other parsers
|
||||||
|
from being loaded.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
None
|
||||||
|
"""
|
||||||
|
eps = entry_points(group="paperless_ngx.parsers")
|
||||||
|
|
||||||
|
for ep in eps:
|
||||||
|
try:
|
||||||
|
parser_class = ep.load()
|
||||||
|
except Exception:
|
||||||
|
logger.exception(
|
||||||
|
"Failed to load parser entrypoint '%s' — skipping.",
|
||||||
|
ep.name,
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
|
missing = [
|
||||||
|
attr for attr in _REQUIRED_ATTRS if not hasattr(parser_class, attr)
|
||||||
|
]
|
||||||
|
if missing:
|
||||||
|
logger.warning(
|
||||||
|
"Parser loaded from entrypoint '%s' is missing required "
|
||||||
|
"attributes %r — skipping.",
|
||||||
|
ep.name,
|
||||||
|
missing,
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
|
self._external.append(parser_class)
|
||||||
|
logger.info(
|
||||||
|
"Loaded third-party parser '%s' v%s by %s (entrypoint: '%s').",
|
||||||
|
parser_class.name,
|
||||||
|
parser_class.version,
|
||||||
|
parser_class.author,
|
||||||
|
ep.name,
|
||||||
|
)
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Summary logging
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def log_summary(self) -> None:
|
||||||
|
"""Log a startup summary of all registered parsers.
|
||||||
|
|
||||||
|
Built-in parsers are listed first, followed by any external parsers
|
||||||
|
discovered from entrypoints. If no external parsers were found a
|
||||||
|
short informational message is logged instead of an empty list.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
None
|
||||||
|
"""
|
||||||
|
logger.info(
|
||||||
|
"Built-in parsers (%d):",
|
||||||
|
len(self._builtins),
|
||||||
|
)
|
||||||
|
for cls in self._builtins:
|
||||||
|
logger.info(
|
||||||
|
" [built-in] %s v%s — %s",
|
||||||
|
getattr(cls, "name", repr(cls)),
|
||||||
|
getattr(cls, "version", "unknown"),
|
||||||
|
getattr(cls, "url", "built-in"),
|
||||||
|
)
|
||||||
|
|
||||||
|
if not self._external:
|
||||||
|
logger.info("No third-party parsers discovered.")
|
||||||
|
return
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"Third-party parsers (%d):",
|
||||||
|
len(self._external),
|
||||||
|
)
|
||||||
|
for cls in self._external:
|
||||||
|
logger.info(
|
||||||
|
" [external] %s v%s by %s — report issues at %s",
|
||||||
|
getattr(cls, "name", repr(cls)),
|
||||||
|
getattr(cls, "version", "unknown"),
|
||||||
|
getattr(cls, "author", "unknown"),
|
||||||
|
getattr(cls, "url", "unknown"),
|
||||||
|
)
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Parser resolution
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def get_parser_for_file(
|
||||||
|
self,
|
||||||
|
mime_type: str,
|
||||||
|
filename: str,
|
||||||
|
path: Path | None = None,
|
||||||
|
) -> type[ParserProtocol] | None:
|
||||||
|
"""Return the best parser class for the given file, or None.
|
||||||
|
|
||||||
|
All registered parsers (external first, then built-ins) are evaluated
|
||||||
|
against the file. A parser is eligible if mime_type appears in the dict
|
||||||
|
returned by its supported_mime_types classmethod, and its score
|
||||||
|
classmethod returns a non-None integer.
|
||||||
|
|
||||||
|
The parser with the highest score wins. When two parsers return the
|
||||||
|
same score, the one that appears earlier in the evaluation order wins
|
||||||
|
(external parsers are evaluated before built-ins, giving third-party
|
||||||
|
packages a chance to override defaults at equal priority).
|
||||||
|
|
||||||
|
When an external parser is selected, its identity is logged at INFO
|
||||||
|
level so operators can trace which package handled a document.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
mime_type:
|
||||||
|
The detected MIME type of the file.
|
||||||
|
filename:
|
||||||
|
The original filename, including extension.
|
||||||
|
path:
|
||||||
|
Optional filesystem path to the file. Forwarded to each
|
||||||
|
parser's score method.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
type[ParserProtocol] | None
|
||||||
|
The winning parser class, or None if no parser can handle the file.
|
||||||
|
"""
|
||||||
|
best_score: int | None = None
|
||||||
|
best_parser: type[ParserProtocol] | None = None
|
||||||
|
|
||||||
|
# External parsers are placed first so that, at equal scores, an
|
||||||
|
# external parser wins over a built-in (first-seen policy).
|
||||||
|
for parser_class in (*self._external, *self._builtins):
|
||||||
|
if mime_type not in parser_class.supported_mime_types():
|
||||||
|
continue
|
||||||
|
|
||||||
|
score = parser_class.score(mime_type, filename, path)
|
||||||
|
if score is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if best_score is None or score > best_score:
|
||||||
|
best_score = score
|
||||||
|
best_parser = parser_class
|
||||||
|
|
||||||
|
if best_parser is not None and best_parser in self._external:
|
||||||
|
logger.info(
|
||||||
|
"Document handled by third-party parser '%s' v%s — %s",
|
||||||
|
getattr(best_parser, "name", repr(best_parser)),
|
||||||
|
getattr(best_parser, "version", "unknown"),
|
||||||
|
getattr(best_parser, "url", "unknown"),
|
||||||
|
)
|
||||||
|
|
||||||
|
return best_parser
|
||||||
320
src/paperless/parsers/text.py
Normal file
320
src/paperless/parsers/text.py
Normal file
@@ -0,0 +1,320 @@
|
|||||||
|
"""
|
||||||
|
Built-in plain-text document parser.
|
||||||
|
|
||||||
|
Handles text/plain, text/csv, and application/csv MIME types by reading the
|
||||||
|
file content directly. Thumbnails are generated by rendering a page-sized
|
||||||
|
WebP image from the first 100,000 characters using Pillow.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import shutil
|
||||||
|
import tempfile
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import TYPE_CHECKING
|
||||||
|
from typing import Self
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
|
from PIL import Image
|
||||||
|
from PIL import ImageDraw
|
||||||
|
from PIL import ImageFont
|
||||||
|
|
||||||
|
from paperless.version import __full_version_str__
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
import datetime
|
||||||
|
from types import TracebackType
|
||||||
|
|
||||||
|
from paperless.parsers import MetadataEntry
|
||||||
|
|
||||||
|
logger = logging.getLogger("paperless.parsing.text")
|
||||||
|
|
||||||
|
_SUPPORTED_MIME_TYPES: dict[str, str] = {
|
||||||
|
"text/plain": ".txt",
|
||||||
|
"text/csv": ".csv",
|
||||||
|
"application/csv": ".csv",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class TextDocumentParser:
|
||||||
|
"""Parse plain-text documents (txt, csv) for Paperless-ngx.
|
||||||
|
|
||||||
|
This parser reads the file content directly as UTF-8 text and renders a
|
||||||
|
simple thumbnail using Pillow. It does not perform OCR and does not
|
||||||
|
produce a searchable PDF archive copy.
|
||||||
|
|
||||||
|
Class attributes
|
||||||
|
----------------
|
||||||
|
name : str
|
||||||
|
Human-readable parser name.
|
||||||
|
version : str
|
||||||
|
Semantic version string, kept in sync with Paperless-ngx releases.
|
||||||
|
author : str
|
||||||
|
Maintainer name.
|
||||||
|
url : str
|
||||||
|
Issue tracker / source URL.
|
||||||
|
"""
|
||||||
|
|
||||||
|
name: str = "Paperless-ngx Text Parser"
|
||||||
|
version: str = __full_version_str__
|
||||||
|
author: str = "Paperless-ngx Contributors"
|
||||||
|
url: str = "https://github.com/paperless-ngx/paperless-ngx"
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Class methods
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def supported_mime_types(cls) -> dict[str, str]:
|
||||||
|
"""Return the MIME types this parser handles.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
dict[str, str]
|
||||||
|
Mapping of MIME type to preferred file extension.
|
||||||
|
"""
|
||||||
|
return _SUPPORTED_MIME_TYPES
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def score(
|
||||||
|
cls,
|
||||||
|
mime_type: str,
|
||||||
|
filename: str,
|
||||||
|
path: Path | None = None,
|
||||||
|
) -> int | None:
|
||||||
|
"""Return the priority score for handling this file.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
mime_type:
|
||||||
|
Detected MIME type of the file.
|
||||||
|
filename:
|
||||||
|
Original filename including extension.
|
||||||
|
path:
|
||||||
|
Optional filesystem path. Not inspected by this parser.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
int | None
|
||||||
|
10 if the MIME type is supported, otherwise None.
|
||||||
|
"""
|
||||||
|
if mime_type in _SUPPORTED_MIME_TYPES:
|
||||||
|
return 10
|
||||||
|
return None
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Properties
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
@property
|
||||||
|
def can_produce_archive(self) -> bool:
|
||||||
|
"""Whether this parser can produce a searchable PDF archive copy.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
bool
|
||||||
|
Always False — the text parser does not produce a PDF archive.
|
||||||
|
"""
|
||||||
|
return False
|
||||||
|
|
||||||
|
@property
|
||||||
|
def requires_pdf_rendition(self) -> bool:
|
||||||
|
"""Whether the parser must produce a PDF for the frontend to display.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
bool
|
||||||
|
Always False — plain text files are displayable as-is.
|
||||||
|
"""
|
||||||
|
return False
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Lifecycle
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def __init__(self, logging_group: object = None) -> None:
|
||||||
|
settings.SCRATCH_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
|
self._tempdir = Path(
|
||||||
|
tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR),
|
||||||
|
)
|
||||||
|
self._text: str | None = None
|
||||||
|
|
||||||
|
def __enter__(self) -> Self:
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __exit__(
|
||||||
|
self,
|
||||||
|
exc_type: type[BaseException] | None,
|
||||||
|
exc_val: BaseException | None,
|
||||||
|
exc_tb: TracebackType | None,
|
||||||
|
) -> None:
|
||||||
|
logger.debug("Cleaning up temporary directory %s", self._tempdir)
|
||||||
|
shutil.rmtree(self._tempdir, ignore_errors=True)
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Core parsing interface
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def parse(
|
||||||
|
self,
|
||||||
|
document_path: Path,
|
||||||
|
mime_type: str,
|
||||||
|
*,
|
||||||
|
produce_archive: bool = True,
|
||||||
|
) -> None:
|
||||||
|
"""Read the document and store its text content.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
document_path:
|
||||||
|
Absolute path to the text file.
|
||||||
|
mime_type:
|
||||||
|
Detected MIME type of the document.
|
||||||
|
produce_archive:
|
||||||
|
Ignored — this parser never produces a PDF archive.
|
||||||
|
|
||||||
|
Raises
|
||||||
|
------
|
||||||
|
documents.parsers.ParseError
|
||||||
|
If the file cannot be read.
|
||||||
|
"""
|
||||||
|
self._text = self._read_text(document_path)
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Result accessors
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def get_text(self) -> str | None:
|
||||||
|
"""Return the plain-text content extracted during parse.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
str | None
|
||||||
|
Extracted text, or None if parse has not been called yet.
|
||||||
|
"""
|
||||||
|
return self._text
|
||||||
|
|
||||||
|
def get_date(self) -> datetime.datetime | None:
|
||||||
|
"""Return the document date detected during parse.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
datetime.datetime | None
|
||||||
|
Always None — the text parser does not detect dates.
|
||||||
|
"""
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_archive_path(self) -> Path | None:
|
||||||
|
"""Return the path to a generated archive PDF, or None.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
Path | None
|
||||||
|
Always None — the text parser does not produce a PDF archive.
|
||||||
|
"""
|
||||||
|
return None
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Thumbnail and metadata
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def get_thumbnail(self, document_path: Path, mime_type: str) -> Path:
|
||||||
|
"""Render the first portion of the document as a WebP thumbnail.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
document_path:
|
||||||
|
Absolute path to the source document.
|
||||||
|
mime_type:
|
||||||
|
Detected MIME type of the document.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
Path
|
||||||
|
Path to the generated WebP thumbnail inside the temporary directory.
|
||||||
|
"""
|
||||||
|
max_chars = 100_000
|
||||||
|
file_size_limit = 50 * 1024 * 1024
|
||||||
|
|
||||||
|
if document_path.stat().st_size > file_size_limit:
|
||||||
|
text = "[File too large to preview]"
|
||||||
|
else:
|
||||||
|
with Path(document_path).open("r", encoding="utf-8", errors="replace") as f:
|
||||||
|
text = f.read(max_chars)
|
||||||
|
|
||||||
|
img = Image.new("RGB", (500, 700), color="white")
|
||||||
|
draw = ImageDraw.Draw(img)
|
||||||
|
font = ImageFont.truetype(
|
||||||
|
font=settings.THUMBNAIL_FONT_NAME,
|
||||||
|
size=20,
|
||||||
|
layout_engine=ImageFont.Layout.BASIC,
|
||||||
|
)
|
||||||
|
draw.multiline_text((5, 5), text, font=font, fill="black", spacing=4)
|
||||||
|
|
||||||
|
out_path = self._tempdir / "thumb.webp"
|
||||||
|
img.save(out_path, format="WEBP")
|
||||||
|
|
||||||
|
return out_path
|
||||||
|
|
||||||
|
def get_page_count(
|
||||||
|
self,
|
||||||
|
document_path: Path,
|
||||||
|
mime_type: str,
|
||||||
|
) -> int | None:
|
||||||
|
"""Return the number of pages in the document.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
document_path:
|
||||||
|
Absolute path to the source document.
|
||||||
|
mime_type:
|
||||||
|
Detected MIME type of the document.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
int | None
|
||||||
|
Always None — page count is not meaningful for plain text.
|
||||||
|
"""
|
||||||
|
return None
|
||||||
|
|
||||||
|
def extract_metadata(
|
||||||
|
self,
|
||||||
|
document_path: Path,
|
||||||
|
mime_type: str,
|
||||||
|
) -> list[MetadataEntry]:
|
||||||
|
"""Extract format-specific metadata from the document.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
list[MetadataEntry]
|
||||||
|
Always ``[]`` — plain text files carry no structured metadata.
|
||||||
|
"""
|
||||||
|
return []
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Private helpers
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _read_text(self, filepath: Path) -> str:
|
||||||
|
"""Read file content, replacing invalid UTF-8 bytes rather than failing.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
filepath:
|
||||||
|
Path to the file to read.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
str
|
||||||
|
File content as a string.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
return filepath.read_text(encoding="utf-8")
|
||||||
|
except UnicodeDecodeError as exc:
|
||||||
|
logger.warning(
|
||||||
|
"Unicode error reading %s, replacing bad bytes: %s",
|
||||||
|
filepath,
|
||||||
|
exc,
|
||||||
|
)
|
||||||
|
return filepath.read_bytes().decode("utf-8", errors="replace")
|
||||||
@@ -6,18 +6,25 @@ import math
|
|||||||
import multiprocessing
|
import multiprocessing
|
||||||
import os
|
import os
|
||||||
import tempfile
|
import tempfile
|
||||||
from os import PathLike
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Final
|
from typing import Final
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
from celery.schedules import crontab
|
|
||||||
from compression_middleware.middleware import CompressionMiddleware
|
from compression_middleware.middleware import CompressionMiddleware
|
||||||
from dateparser.languages.loader import LocaleDataLoader
|
|
||||||
from django.utils.translation import gettext_lazy as _
|
from django.utils.translation import gettext_lazy as _
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
from paperless.settings.custom import parse_beat_schedule
|
||||||
|
from paperless.settings.custom import parse_dateparser_languages
|
||||||
from paperless.settings.custom import parse_db_settings
|
from paperless.settings.custom import parse_db_settings
|
||||||
|
from paperless.settings.custom import parse_hosting_settings
|
||||||
|
from paperless.settings.custom import parse_ignore_dates
|
||||||
|
from paperless.settings.custom import parse_redis_url
|
||||||
|
from paperless.settings.parsers import get_bool_from_env
|
||||||
|
from paperless.settings.parsers import get_float_from_env
|
||||||
|
from paperless.settings.parsers import get_int_from_env
|
||||||
|
from paperless.settings.parsers import get_list_from_env
|
||||||
|
from paperless.settings.parsers import get_path_from_env
|
||||||
|
|
||||||
logger = logging.getLogger("paperless.settings")
|
logger = logging.getLogger("paperless.settings")
|
||||||
|
|
||||||
@@ -45,239 +52,8 @@ for path in [
|
|||||||
os.environ["OMP_THREAD_LIMIT"] = "1"
|
os.environ["OMP_THREAD_LIMIT"] = "1"
|
||||||
|
|
||||||
|
|
||||||
def __get_boolean(key: str, default: str = "NO") -> bool:
|
|
||||||
"""
|
|
||||||
Return a boolean value based on whatever the user has supplied in the
|
|
||||||
environment based on whether the value "looks like" it's True or not.
|
|
||||||
"""
|
|
||||||
return bool(os.getenv(key, default).lower() in ("yes", "y", "1", "t", "true"))
|
|
||||||
|
|
||||||
|
|
||||||
def __get_int(key: str, default: int) -> int:
|
|
||||||
"""
|
|
||||||
Return an integer value based on the environment variable or a default
|
|
||||||
"""
|
|
||||||
return int(os.getenv(key, default))
|
|
||||||
|
|
||||||
|
|
||||||
def __get_optional_int(key: str) -> int | None:
|
|
||||||
"""
|
|
||||||
Returns None if the environment key is not present, otherwise an integer
|
|
||||||
"""
|
|
||||||
if key in os.environ:
|
|
||||||
return __get_int(key, -1) # pragma: no cover
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def __get_float(key: str, default: float) -> float:
|
|
||||||
"""
|
|
||||||
Return an integer value based on the environment variable or a default
|
|
||||||
"""
|
|
||||||
return float(os.getenv(key, default))
|
|
||||||
|
|
||||||
|
|
||||||
def __get_path(
|
|
||||||
key: str,
|
|
||||||
default: PathLike | str,
|
|
||||||
) -> Path:
|
|
||||||
"""
|
|
||||||
Return a normalized, absolute path based on the environment variable or a default,
|
|
||||||
if provided
|
|
||||||
"""
|
|
||||||
if key in os.environ:
|
|
||||||
return Path(os.environ[key]).resolve()
|
|
||||||
return Path(default).resolve()
|
|
||||||
|
|
||||||
|
|
||||||
def __get_optional_path(key: str) -> Path | None:
|
|
||||||
"""
|
|
||||||
Returns None if the environment key is not present, otherwise a fully resolved Path
|
|
||||||
"""
|
|
||||||
if key in os.environ:
|
|
||||||
return __get_path(key, "")
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def __get_list(
|
|
||||||
key: str,
|
|
||||||
default: list[str] | None = None,
|
|
||||||
sep: str = ",",
|
|
||||||
) -> list[str]:
|
|
||||||
"""
|
|
||||||
Return a list of elements from the environment, as separated by the given
|
|
||||||
string, or the default if the key does not exist
|
|
||||||
"""
|
|
||||||
if key in os.environ:
|
|
||||||
return list(filter(None, os.environ[key].split(sep)))
|
|
||||||
elif default is not None:
|
|
||||||
return default
|
|
||||||
else:
|
|
||||||
return []
|
|
||||||
|
|
||||||
|
|
||||||
def _parse_redis_url(env_redis: str | None) -> tuple[str, str]:
|
|
||||||
"""
|
|
||||||
Gets the Redis information from the environment or a default and handles
|
|
||||||
converting from incompatible django_channels and celery formats.
|
|
||||||
|
|
||||||
Returns a tuple of (celery_url, channels_url)
|
|
||||||
"""
|
|
||||||
|
|
||||||
# Not set, return a compatible default
|
|
||||||
if env_redis is None:
|
|
||||||
return ("redis://localhost:6379", "redis://localhost:6379")
|
|
||||||
|
|
||||||
if "unix" in env_redis.lower():
|
|
||||||
# channels_redis socket format, looks like:
|
|
||||||
# "unix:///path/to/redis.sock"
|
|
||||||
_, path = env_redis.split(":", 1)
|
|
||||||
# Optionally setting a db number
|
|
||||||
if "?db=" in env_redis:
|
|
||||||
path, number = path.split("?db=")
|
|
||||||
return (f"redis+socket:{path}?virtual_host={number}", env_redis)
|
|
||||||
else:
|
|
||||||
return (f"redis+socket:{path}", env_redis)
|
|
||||||
|
|
||||||
elif "+socket" in env_redis.lower():
|
|
||||||
# celery socket style, looks like:
|
|
||||||
# "redis+socket:///path/to/redis.sock"
|
|
||||||
_, path = env_redis.split(":", 1)
|
|
||||||
if "?virtual_host=" in env_redis:
|
|
||||||
# Virtual host (aka db number)
|
|
||||||
path, number = path.split("?virtual_host=")
|
|
||||||
return (env_redis, f"unix:{path}?db={number}")
|
|
||||||
else:
|
|
||||||
return (env_redis, f"unix:{path}")
|
|
||||||
|
|
||||||
# Not a socket
|
|
||||||
return (env_redis, env_redis)
|
|
||||||
|
|
||||||
|
|
||||||
def _parse_beat_schedule() -> dict:
|
|
||||||
"""
|
|
||||||
Configures the scheduled tasks, according to default or
|
|
||||||
environment variables. Task expiration is configured so the task will
|
|
||||||
expire (and not run), shortly before the default frequency will put another
|
|
||||||
of the same task into the queue
|
|
||||||
|
|
||||||
|
|
||||||
https://docs.celeryq.dev/en/stable/userguide/periodic-tasks.html#beat-entries
|
|
||||||
https://docs.celeryq.dev/en/latest/userguide/calling.html#expiration
|
|
||||||
"""
|
|
||||||
schedule = {}
|
|
||||||
tasks = [
|
|
||||||
{
|
|
||||||
"name": "Check all e-mail accounts",
|
|
||||||
"env_key": "PAPERLESS_EMAIL_TASK_CRON",
|
|
||||||
# Default every ten minutes
|
|
||||||
"env_default": "*/10 * * * *",
|
|
||||||
"task": "paperless_mail.tasks.process_mail_accounts",
|
|
||||||
"options": {
|
|
||||||
# 1 minute before default schedule sends again
|
|
||||||
"expires": 9.0 * 60.0,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Train the classifier",
|
|
||||||
"env_key": "PAPERLESS_TRAIN_TASK_CRON",
|
|
||||||
# Default hourly at 5 minutes past the hour
|
|
||||||
"env_default": "5 */1 * * *",
|
|
||||||
"task": "documents.tasks.train_classifier",
|
|
||||||
"options": {
|
|
||||||
# 1 minute before default schedule sends again
|
|
||||||
"expires": 59.0 * 60.0,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Optimize the index",
|
|
||||||
"env_key": "PAPERLESS_INDEX_TASK_CRON",
|
|
||||||
# Default daily at midnight
|
|
||||||
"env_default": "0 0 * * *",
|
|
||||||
"task": "documents.tasks.index_optimize",
|
|
||||||
"options": {
|
|
||||||
# 1 hour before default schedule sends again
|
|
||||||
"expires": 23.0 * 60.0 * 60.0,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Perform sanity check",
|
|
||||||
"env_key": "PAPERLESS_SANITY_TASK_CRON",
|
|
||||||
# Default Sunday at 00:30
|
|
||||||
"env_default": "30 0 * * sun",
|
|
||||||
"task": "documents.tasks.sanity_check",
|
|
||||||
"options": {
|
|
||||||
# 1 hour before default schedule sends again
|
|
||||||
"expires": ((7.0 * 24.0) - 1.0) * 60.0 * 60.0,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Empty trash",
|
|
||||||
"env_key": "PAPERLESS_EMPTY_TRASH_TASK_CRON",
|
|
||||||
# Default daily at 01:00
|
|
||||||
"env_default": "0 1 * * *",
|
|
||||||
"task": "documents.tasks.empty_trash",
|
|
||||||
"options": {
|
|
||||||
# 1 hour before default schedule sends again
|
|
||||||
"expires": 23.0 * 60.0 * 60.0,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Check and run scheduled workflows",
|
|
||||||
"env_key": "PAPERLESS_WORKFLOW_SCHEDULED_TASK_CRON",
|
|
||||||
# Default hourly at 5 minutes past the hour
|
|
||||||
"env_default": "5 */1 * * *",
|
|
||||||
"task": "documents.tasks.check_scheduled_workflows",
|
|
||||||
"options": {
|
|
||||||
# 1 minute before default schedule sends again
|
|
||||||
"expires": 59.0 * 60.0,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Rebuild LLM index",
|
|
||||||
"env_key": "PAPERLESS_LLM_INDEX_TASK_CRON",
|
|
||||||
# Default daily at 02:10
|
|
||||||
"env_default": "10 2 * * *",
|
|
||||||
"task": "documents.tasks.llmindex_index",
|
|
||||||
"options": {
|
|
||||||
# 1 hour before default schedule sends again
|
|
||||||
"expires": 23.0 * 60.0 * 60.0,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Cleanup expired share link bundles",
|
|
||||||
"env_key": "PAPERLESS_SHARE_LINK_BUNDLE_CLEANUP_CRON",
|
|
||||||
# Default daily at 02:00
|
|
||||||
"env_default": "0 2 * * *",
|
|
||||||
"task": "documents.tasks.cleanup_expired_share_link_bundles",
|
|
||||||
"options": {
|
|
||||||
# 1 hour before default schedule sends again
|
|
||||||
"expires": 23.0 * 60.0 * 60.0,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
]
|
|
||||||
for task in tasks:
|
|
||||||
# Either get the environment setting or use the default
|
|
||||||
value = os.getenv(task["env_key"], task["env_default"])
|
|
||||||
# Don't add disabled tasks to the schedule
|
|
||||||
if value == "disable":
|
|
||||||
continue
|
|
||||||
# I find https://crontab.guru/ super helpful
|
|
||||||
# crontab(5) format
|
|
||||||
# - five time-and-date fields
|
|
||||||
# - separated by at least one blank
|
|
||||||
minute, hour, day_month, month, day_week = value.split(" ")
|
|
||||||
|
|
||||||
schedule[task["name"]] = {
|
|
||||||
"task": task["task"],
|
|
||||||
"schedule": crontab(minute, hour, day_week, day_month, month),
|
|
||||||
"options": task["options"],
|
|
||||||
}
|
|
||||||
|
|
||||||
return schedule
|
|
||||||
|
|
||||||
|
|
||||||
# NEVER RUN WITH DEBUG IN PRODUCTION.
|
# NEVER RUN WITH DEBUG IN PRODUCTION.
|
||||||
DEBUG = __get_boolean("PAPERLESS_DEBUG", "NO")
|
DEBUG = get_bool_from_env("PAPERLESS_DEBUG", "NO")
|
||||||
|
|
||||||
|
|
||||||
###############################################################################
|
###############################################################################
|
||||||
@@ -286,21 +62,21 @@ DEBUG = __get_boolean("PAPERLESS_DEBUG", "NO")
|
|||||||
|
|
||||||
BASE_DIR: Path = Path(__file__).resolve().parent.parent.parent
|
BASE_DIR: Path = Path(__file__).resolve().parent.parent.parent
|
||||||
|
|
||||||
STATIC_ROOT = __get_path("PAPERLESS_STATICDIR", BASE_DIR.parent / "static")
|
STATIC_ROOT = get_path_from_env("PAPERLESS_STATICDIR", BASE_DIR.parent / "static")
|
||||||
|
|
||||||
MEDIA_ROOT = __get_path("PAPERLESS_MEDIA_ROOT", BASE_DIR.parent / "media")
|
MEDIA_ROOT = get_path_from_env("PAPERLESS_MEDIA_ROOT", BASE_DIR.parent / "media")
|
||||||
ORIGINALS_DIR = MEDIA_ROOT / "documents" / "originals"
|
ORIGINALS_DIR = MEDIA_ROOT / "documents" / "originals"
|
||||||
ARCHIVE_DIR = MEDIA_ROOT / "documents" / "archive"
|
ARCHIVE_DIR = MEDIA_ROOT / "documents" / "archive"
|
||||||
THUMBNAIL_DIR = MEDIA_ROOT / "documents" / "thumbnails"
|
THUMBNAIL_DIR = MEDIA_ROOT / "documents" / "thumbnails"
|
||||||
SHARE_LINK_BUNDLE_DIR = MEDIA_ROOT / "documents" / "share_link_bundles"
|
SHARE_LINK_BUNDLE_DIR = MEDIA_ROOT / "documents" / "share_link_bundles"
|
||||||
|
|
||||||
DATA_DIR = __get_path("PAPERLESS_DATA_DIR", BASE_DIR.parent / "data")
|
DATA_DIR = get_path_from_env("PAPERLESS_DATA_DIR", BASE_DIR.parent / "data")
|
||||||
|
|
||||||
NLTK_DIR = __get_path("PAPERLESS_NLTK_DIR", "/usr/share/nltk_data")
|
NLTK_DIR = get_path_from_env("PAPERLESS_NLTK_DIR", "/usr/share/nltk_data")
|
||||||
|
|
||||||
# Check deprecated setting first
|
# Check deprecated setting first
|
||||||
EMPTY_TRASH_DIR = (
|
EMPTY_TRASH_DIR = (
|
||||||
__get_path("PAPERLESS_TRASH_DIR", os.getenv("PAPERLESS_EMPTY_TRASH_DIR"))
|
get_path_from_env("PAPERLESS_TRASH_DIR", os.getenv("PAPERLESS_EMPTY_TRASH_DIR"))
|
||||||
if os.getenv("PAPERLESS_TRASH_DIR") or os.getenv("PAPERLESS_EMPTY_TRASH_DIR")
|
if os.getenv("PAPERLESS_TRASH_DIR") or os.getenv("PAPERLESS_EMPTY_TRASH_DIR")
|
||||||
else None
|
else None
|
||||||
)
|
)
|
||||||
@@ -309,21 +85,21 @@ EMPTY_TRASH_DIR = (
|
|||||||
# threads.
|
# threads.
|
||||||
MEDIA_LOCK = MEDIA_ROOT / "media.lock"
|
MEDIA_LOCK = MEDIA_ROOT / "media.lock"
|
||||||
INDEX_DIR = DATA_DIR / "index"
|
INDEX_DIR = DATA_DIR / "index"
|
||||||
MODEL_FILE = __get_path(
|
MODEL_FILE = get_path_from_env(
|
||||||
"PAPERLESS_MODEL_FILE",
|
"PAPERLESS_MODEL_FILE",
|
||||||
DATA_DIR / "classification_model.pickle",
|
DATA_DIR / "classification_model.pickle",
|
||||||
)
|
)
|
||||||
LLM_INDEX_DIR = DATA_DIR / "llm_index"
|
LLM_INDEX_DIR = DATA_DIR / "llm_index"
|
||||||
|
|
||||||
LOGGING_DIR = __get_path("PAPERLESS_LOGGING_DIR", DATA_DIR / "log")
|
LOGGING_DIR = get_path_from_env("PAPERLESS_LOGGING_DIR", DATA_DIR / "log")
|
||||||
|
|
||||||
CONSUMPTION_DIR = __get_path(
|
CONSUMPTION_DIR = get_path_from_env(
|
||||||
"PAPERLESS_CONSUMPTION_DIR",
|
"PAPERLESS_CONSUMPTION_DIR",
|
||||||
BASE_DIR.parent / "consume",
|
BASE_DIR.parent / "consume",
|
||||||
)
|
)
|
||||||
|
|
||||||
# This will be created if it doesn't exist
|
# This will be created if it doesn't exist
|
||||||
SCRATCH_DIR = __get_path(
|
SCRATCH_DIR = get_path_from_env(
|
||||||
"PAPERLESS_SCRATCH_DIR",
|
"PAPERLESS_SCRATCH_DIR",
|
||||||
Path(tempfile.gettempdir()) / "paperless",
|
Path(tempfile.gettempdir()) / "paperless",
|
||||||
)
|
)
|
||||||
@@ -332,7 +108,7 @@ SCRATCH_DIR = __get_path(
|
|||||||
# Application Definition #
|
# Application Definition #
|
||||||
###############################################################################
|
###############################################################################
|
||||||
|
|
||||||
env_apps = __get_list("PAPERLESS_APPS")
|
env_apps = get_list_from_env("PAPERLESS_APPS")
|
||||||
|
|
||||||
INSTALLED_APPS = [
|
INSTALLED_APPS = [
|
||||||
"whitenoise.runserver_nostatic",
|
"whitenoise.runserver_nostatic",
|
||||||
@@ -379,7 +155,7 @@ REST_FRAMEWORK = {
|
|||||||
"DEFAULT_VERSION": "10", # match src-ui/src/environments/environment.prod.ts
|
"DEFAULT_VERSION": "10", # match src-ui/src/environments/environment.prod.ts
|
||||||
# Make sure these are ordered and that the most recent version appears
|
# Make sure these are ordered and that the most recent version appears
|
||||||
# last. See api.md#api-versioning when adding new versions.
|
# last. See api.md#api-versioning when adding new versions.
|
||||||
"ALLOWED_VERSIONS": ["2", "3", "4", "5", "6", "7", "8", "9", "10"],
|
"ALLOWED_VERSIONS": ["9", "10"],
|
||||||
# DRF Spectacular default schema
|
# DRF Spectacular default schema
|
||||||
"DEFAULT_SCHEMA_CLASS": "drf_spectacular.openapi.AutoSchema",
|
"DEFAULT_SCHEMA_CLASS": "drf_spectacular.openapi.AutoSchema",
|
||||||
}
|
}
|
||||||
@@ -405,7 +181,7 @@ MIDDLEWARE = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
# Optional to enable compression
|
# Optional to enable compression
|
||||||
if __get_boolean("PAPERLESS_ENABLE_COMPRESSION", "yes"): # pragma: no cover
|
if get_bool_from_env("PAPERLESS_ENABLE_COMPRESSION", "yes"): # pragma: no cover
|
||||||
MIDDLEWARE.insert(0, "compression_middleware.middleware.CompressionMiddleware")
|
MIDDLEWARE.insert(0, "compression_middleware.middleware.CompressionMiddleware")
|
||||||
|
|
||||||
# Workaround to not compress streaming responses (e.g. chat).
|
# Workaround to not compress streaming responses (e.g. chat).
|
||||||
@@ -424,20 +200,8 @@ CompressionMiddleware.process_response = patched_process_response
|
|||||||
ROOT_URLCONF = "paperless.urls"
|
ROOT_URLCONF = "paperless.urls"
|
||||||
|
|
||||||
|
|
||||||
def _parse_base_paths() -> tuple[str, str, str, str, str]:
|
|
||||||
script_name = os.getenv("PAPERLESS_FORCE_SCRIPT_NAME")
|
|
||||||
base_url = (script_name or "") + "/"
|
|
||||||
login_url = base_url + "accounts/login/"
|
|
||||||
login_redirect_url = base_url + "dashboard"
|
|
||||||
logout_redirect_url = os.getenv(
|
|
||||||
"PAPERLESS_LOGOUT_REDIRECT_URL",
|
|
||||||
login_url + "?loggedout=1",
|
|
||||||
)
|
|
||||||
return script_name, base_url, login_url, login_redirect_url, logout_redirect_url
|
|
||||||
|
|
||||||
|
|
||||||
FORCE_SCRIPT_NAME, BASE_URL, LOGIN_URL, LOGIN_REDIRECT_URL, LOGOUT_REDIRECT_URL = (
|
FORCE_SCRIPT_NAME, BASE_URL, LOGIN_URL, LOGIN_REDIRECT_URL, LOGOUT_REDIRECT_URL = (
|
||||||
_parse_base_paths()
|
parse_hosting_settings()
|
||||||
)
|
)
|
||||||
|
|
||||||
# DRF Spectacular settings
|
# DRF Spectacular settings
|
||||||
@@ -471,7 +235,7 @@ STORAGES = {
|
|||||||
"default": {"BACKEND": "django.core.files.storage.FileSystemStorage"},
|
"default": {"BACKEND": "django.core.files.storage.FileSystemStorage"},
|
||||||
}
|
}
|
||||||
|
|
||||||
_CELERY_REDIS_URL, _CHANNELS_REDIS_URL = _parse_redis_url(
|
_CELERY_REDIS_URL, _CHANNELS_REDIS_URL = parse_redis_url(
|
||||||
os.getenv("PAPERLESS_REDIS", None),
|
os.getenv("PAPERLESS_REDIS", None),
|
||||||
)
|
)
|
||||||
_REDIS_KEY_PREFIX = os.getenv("PAPERLESS_REDIS_PREFIX", "")
|
_REDIS_KEY_PREFIX = os.getenv("PAPERLESS_REDIS_PREFIX", "")
|
||||||
@@ -520,8 +284,8 @@ EMAIL_PORT: Final[int] = int(os.getenv("PAPERLESS_EMAIL_PORT", 25))
|
|||||||
EMAIL_HOST_USER: Final[str] = os.getenv("PAPERLESS_EMAIL_HOST_USER", "")
|
EMAIL_HOST_USER: Final[str] = os.getenv("PAPERLESS_EMAIL_HOST_USER", "")
|
||||||
EMAIL_HOST_PASSWORD: Final[str] = os.getenv("PAPERLESS_EMAIL_HOST_PASSWORD", "")
|
EMAIL_HOST_PASSWORD: Final[str] = os.getenv("PAPERLESS_EMAIL_HOST_PASSWORD", "")
|
||||||
DEFAULT_FROM_EMAIL: Final[str] = os.getenv("PAPERLESS_EMAIL_FROM", EMAIL_HOST_USER)
|
DEFAULT_FROM_EMAIL: Final[str] = os.getenv("PAPERLESS_EMAIL_FROM", EMAIL_HOST_USER)
|
||||||
EMAIL_USE_TLS: Final[bool] = __get_boolean("PAPERLESS_EMAIL_USE_TLS")
|
EMAIL_USE_TLS: Final[bool] = get_bool_from_env("PAPERLESS_EMAIL_USE_TLS")
|
||||||
EMAIL_USE_SSL: Final[bool] = __get_boolean("PAPERLESS_EMAIL_USE_SSL")
|
EMAIL_USE_SSL: Final[bool] = get_bool_from_env("PAPERLESS_EMAIL_USE_SSL")
|
||||||
EMAIL_SUBJECT_PREFIX: Final[str] = "[Paperless-ngx] "
|
EMAIL_SUBJECT_PREFIX: Final[str] = "[Paperless-ngx] "
|
||||||
EMAIL_TIMEOUT = 30.0
|
EMAIL_TIMEOUT = 30.0
|
||||||
EMAIL_ENABLED = EMAIL_HOST != "localhost" or EMAIL_HOST_USER != ""
|
EMAIL_ENABLED = EMAIL_HOST != "localhost" or EMAIL_HOST_USER != ""
|
||||||
@@ -546,20 +310,22 @@ ACCOUNT_DEFAULT_HTTP_PROTOCOL = os.getenv(
|
|||||||
)
|
)
|
||||||
|
|
||||||
ACCOUNT_ADAPTER = "paperless.adapter.CustomAccountAdapter"
|
ACCOUNT_ADAPTER = "paperless.adapter.CustomAccountAdapter"
|
||||||
ACCOUNT_ALLOW_SIGNUPS = __get_boolean("PAPERLESS_ACCOUNT_ALLOW_SIGNUPS")
|
ACCOUNT_ALLOW_SIGNUPS = get_bool_from_env("PAPERLESS_ACCOUNT_ALLOW_SIGNUPS")
|
||||||
ACCOUNT_DEFAULT_GROUPS = __get_list("PAPERLESS_ACCOUNT_DEFAULT_GROUPS")
|
ACCOUNT_DEFAULT_GROUPS = get_list_from_env("PAPERLESS_ACCOUNT_DEFAULT_GROUPS")
|
||||||
|
|
||||||
SOCIALACCOUNT_ADAPTER = "paperless.adapter.CustomSocialAccountAdapter"
|
SOCIALACCOUNT_ADAPTER = "paperless.adapter.CustomSocialAccountAdapter"
|
||||||
SOCIALACCOUNT_ALLOW_SIGNUPS = __get_boolean(
|
SOCIALACCOUNT_ALLOW_SIGNUPS = get_bool_from_env(
|
||||||
"PAPERLESS_SOCIALACCOUNT_ALLOW_SIGNUPS",
|
"PAPERLESS_SOCIALACCOUNT_ALLOW_SIGNUPS",
|
||||||
"yes",
|
"yes",
|
||||||
)
|
)
|
||||||
SOCIALACCOUNT_AUTO_SIGNUP = __get_boolean("PAPERLESS_SOCIAL_AUTO_SIGNUP")
|
SOCIALACCOUNT_AUTO_SIGNUP = get_bool_from_env("PAPERLESS_SOCIAL_AUTO_SIGNUP")
|
||||||
SOCIALACCOUNT_PROVIDERS = json.loads(
|
SOCIALACCOUNT_PROVIDERS = json.loads(
|
||||||
os.getenv("PAPERLESS_SOCIALACCOUNT_PROVIDERS", "{}"),
|
os.getenv("PAPERLESS_SOCIALACCOUNT_PROVIDERS", "{}"),
|
||||||
)
|
)
|
||||||
SOCIAL_ACCOUNT_DEFAULT_GROUPS = __get_list("PAPERLESS_SOCIAL_ACCOUNT_DEFAULT_GROUPS")
|
SOCIAL_ACCOUNT_DEFAULT_GROUPS = get_list_from_env(
|
||||||
SOCIAL_ACCOUNT_SYNC_GROUPS = __get_boolean("PAPERLESS_SOCIAL_ACCOUNT_SYNC_GROUPS")
|
"PAPERLESS_SOCIAL_ACCOUNT_DEFAULT_GROUPS",
|
||||||
|
)
|
||||||
|
SOCIAL_ACCOUNT_SYNC_GROUPS = get_bool_from_env("PAPERLESS_SOCIAL_ACCOUNT_SYNC_GROUPS")
|
||||||
SOCIAL_ACCOUNT_SYNC_GROUPS_CLAIM: Final[str] = os.getenv(
|
SOCIAL_ACCOUNT_SYNC_GROUPS_CLAIM: Final[str] = os.getenv(
|
||||||
"PAPERLESS_SOCIAL_ACCOUNT_SYNC_GROUPS_CLAIM",
|
"PAPERLESS_SOCIAL_ACCOUNT_SYNC_GROUPS_CLAIM",
|
||||||
"groups",
|
"groups",
|
||||||
@@ -571,8 +337,8 @@ MFA_TOTP_ISSUER = "Paperless-ngx"
|
|||||||
|
|
||||||
ACCOUNT_EMAIL_SUBJECT_PREFIX = "[Paperless-ngx] "
|
ACCOUNT_EMAIL_SUBJECT_PREFIX = "[Paperless-ngx] "
|
||||||
|
|
||||||
DISABLE_REGULAR_LOGIN = __get_boolean("PAPERLESS_DISABLE_REGULAR_LOGIN")
|
DISABLE_REGULAR_LOGIN = get_bool_from_env("PAPERLESS_DISABLE_REGULAR_LOGIN")
|
||||||
REDIRECT_LOGIN_TO_SSO = __get_boolean("PAPERLESS_REDIRECT_LOGIN_TO_SSO")
|
REDIRECT_LOGIN_TO_SSO = get_bool_from_env("PAPERLESS_REDIRECT_LOGIN_TO_SSO")
|
||||||
|
|
||||||
AUTO_LOGIN_USERNAME = os.getenv("PAPERLESS_AUTO_LOGIN_USERNAME")
|
AUTO_LOGIN_USERNAME = os.getenv("PAPERLESS_AUTO_LOGIN_USERNAME")
|
||||||
|
|
||||||
@@ -585,12 +351,15 @@ ACCOUNT_EMAIL_VERIFICATION = (
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
ACCOUNT_EMAIL_UNKNOWN_ACCOUNTS = __get_boolean(
|
ACCOUNT_EMAIL_UNKNOWN_ACCOUNTS = get_bool_from_env(
|
||||||
"PAPERLESS_ACCOUNT_EMAIL_UNKNOWN_ACCOUNTS",
|
"PAPERLESS_ACCOUNT_EMAIL_UNKNOWN_ACCOUNTS",
|
||||||
"True",
|
"True",
|
||||||
)
|
)
|
||||||
|
|
||||||
ACCOUNT_SESSION_REMEMBER = __get_boolean("PAPERLESS_ACCOUNT_SESSION_REMEMBER", "True")
|
ACCOUNT_SESSION_REMEMBER = get_bool_from_env(
|
||||||
|
"PAPERLESS_ACCOUNT_SESSION_REMEMBER",
|
||||||
|
"True",
|
||||||
|
)
|
||||||
SESSION_EXPIRE_AT_BROWSER_CLOSE = not ACCOUNT_SESSION_REMEMBER
|
SESSION_EXPIRE_AT_BROWSER_CLOSE = not ACCOUNT_SESSION_REMEMBER
|
||||||
SESSION_COOKIE_AGE = int(
|
SESSION_COOKIE_AGE = int(
|
||||||
os.getenv("PAPERLESS_SESSION_COOKIE_AGE", 60 * 60 * 24 * 7 * 3),
|
os.getenv("PAPERLESS_SESSION_COOKIE_AGE", 60 * 60 * 24 * 7 * 3),
|
||||||
@@ -607,8 +376,8 @@ if AUTO_LOGIN_USERNAME:
|
|||||||
|
|
||||||
def _parse_remote_user_settings() -> str:
|
def _parse_remote_user_settings() -> str:
|
||||||
global MIDDLEWARE, AUTHENTICATION_BACKENDS, REST_FRAMEWORK
|
global MIDDLEWARE, AUTHENTICATION_BACKENDS, REST_FRAMEWORK
|
||||||
enable = __get_boolean("PAPERLESS_ENABLE_HTTP_REMOTE_USER")
|
enable = get_bool_from_env("PAPERLESS_ENABLE_HTTP_REMOTE_USER")
|
||||||
enable_api = __get_boolean("PAPERLESS_ENABLE_HTTP_REMOTE_USER_API")
|
enable_api = get_bool_from_env("PAPERLESS_ENABLE_HTTP_REMOTE_USER_API")
|
||||||
if enable or enable_api:
|
if enable or enable_api:
|
||||||
MIDDLEWARE.append("paperless.auth.HttpRemoteUserMiddleware")
|
MIDDLEWARE.append("paperless.auth.HttpRemoteUserMiddleware")
|
||||||
AUTHENTICATION_BACKENDS.insert(
|
AUTHENTICATION_BACKENDS.insert(
|
||||||
@@ -636,16 +405,16 @@ HTTP_REMOTE_USER_HEADER_NAME = _parse_remote_user_settings()
|
|||||||
X_FRAME_OPTIONS = "SAMEORIGIN"
|
X_FRAME_OPTIONS = "SAMEORIGIN"
|
||||||
|
|
||||||
# The next 3 settings can also be set using just PAPERLESS_URL
|
# The next 3 settings can also be set using just PAPERLESS_URL
|
||||||
CSRF_TRUSTED_ORIGINS = __get_list("PAPERLESS_CSRF_TRUSTED_ORIGINS")
|
CSRF_TRUSTED_ORIGINS = get_list_from_env("PAPERLESS_CSRF_TRUSTED_ORIGINS")
|
||||||
|
|
||||||
if DEBUG:
|
if DEBUG:
|
||||||
# Allow access from the angular development server during debugging
|
# Allow access from the angular development server during debugging
|
||||||
CSRF_TRUSTED_ORIGINS.append("http://localhost:4200")
|
CSRF_TRUSTED_ORIGINS.append("http://localhost:4200")
|
||||||
|
|
||||||
# We allow CORS from localhost:8000
|
# We allow CORS from localhost:8000
|
||||||
CORS_ALLOWED_ORIGINS = __get_list(
|
CORS_ALLOWED_ORIGINS = get_list_from_env(
|
||||||
"PAPERLESS_CORS_ALLOWED_HOSTS",
|
"PAPERLESS_CORS_ALLOWED_HOSTS",
|
||||||
["http://localhost:8000"],
|
default=["http://localhost:8000"],
|
||||||
)
|
)
|
||||||
|
|
||||||
if DEBUG:
|
if DEBUG:
|
||||||
@@ -658,7 +427,7 @@ CORS_EXPOSE_HEADERS = [
|
|||||||
"Content-Disposition",
|
"Content-Disposition",
|
||||||
]
|
]
|
||||||
|
|
||||||
ALLOWED_HOSTS = __get_list("PAPERLESS_ALLOWED_HOSTS", ["*"])
|
ALLOWED_HOSTS = get_list_from_env("PAPERLESS_ALLOWED_HOSTS", default=["*"])
|
||||||
if ALLOWED_HOSTS != ["*"]:
|
if ALLOWED_HOSTS != ["*"]:
|
||||||
# always allow localhost. Necessary e.g. for healthcheck in docker.
|
# always allow localhost. Necessary e.g. for healthcheck in docker.
|
||||||
ALLOWED_HOSTS.append("localhost")
|
ALLOWED_HOSTS.append("localhost")
|
||||||
@@ -678,10 +447,10 @@ def _parse_paperless_url():
|
|||||||
PAPERLESS_URL = _parse_paperless_url()
|
PAPERLESS_URL = _parse_paperless_url()
|
||||||
|
|
||||||
# For use with trusted proxies
|
# For use with trusted proxies
|
||||||
TRUSTED_PROXIES = __get_list("PAPERLESS_TRUSTED_PROXIES")
|
TRUSTED_PROXIES = get_list_from_env("PAPERLESS_TRUSTED_PROXIES")
|
||||||
|
|
||||||
USE_X_FORWARDED_HOST = __get_boolean("PAPERLESS_USE_X_FORWARD_HOST", "false")
|
USE_X_FORWARDED_HOST = get_bool_from_env("PAPERLESS_USE_X_FORWARD_HOST", "false")
|
||||||
USE_X_FORWARDED_PORT = __get_boolean("PAPERLESS_USE_X_FORWARD_PORT", "false")
|
USE_X_FORWARDED_PORT = get_bool_from_env("PAPERLESS_USE_X_FORWARD_PORT", "false")
|
||||||
SECURE_PROXY_SSL_HEADER = (
|
SECURE_PROXY_SSL_HEADER = (
|
||||||
tuple(json.loads(os.environ["PAPERLESS_PROXY_SSL_HEADER"]))
|
tuple(json.loads(os.environ["PAPERLESS_PROXY_SSL_HEADER"]))
|
||||||
if "PAPERLESS_PROXY_SSL_HEADER" in os.environ
|
if "PAPERLESS_PROXY_SSL_HEADER" in os.environ
|
||||||
@@ -724,7 +493,7 @@ CSRF_COOKIE_NAME = f"{COOKIE_PREFIX}csrftoken"
|
|||||||
SESSION_COOKIE_NAME = f"{COOKIE_PREFIX}sessionid"
|
SESSION_COOKIE_NAME = f"{COOKIE_PREFIX}sessionid"
|
||||||
LANGUAGE_COOKIE_NAME = f"{COOKIE_PREFIX}django_language"
|
LANGUAGE_COOKIE_NAME = f"{COOKIE_PREFIX}django_language"
|
||||||
|
|
||||||
EMAIL_CERTIFICATE_FILE = __get_optional_path("PAPERLESS_EMAIL_CERTIFICATE_LOCATION")
|
EMAIL_CERTIFICATE_FILE = get_path_from_env("PAPERLESS_EMAIL_CERTIFICATE_LOCATION")
|
||||||
|
|
||||||
|
|
||||||
###############################################################################
|
###############################################################################
|
||||||
@@ -875,7 +644,7 @@ CELERY_BROKER_URL = _CELERY_REDIS_URL
|
|||||||
CELERY_TIMEZONE = TIME_ZONE
|
CELERY_TIMEZONE = TIME_ZONE
|
||||||
|
|
||||||
CELERY_WORKER_HIJACK_ROOT_LOGGER = False
|
CELERY_WORKER_HIJACK_ROOT_LOGGER = False
|
||||||
CELERY_WORKER_CONCURRENCY: Final[int] = __get_int("PAPERLESS_TASK_WORKERS", 1)
|
CELERY_WORKER_CONCURRENCY: Final[int] = get_int_from_env("PAPERLESS_TASK_WORKERS", 1)
|
||||||
TASK_WORKERS = CELERY_WORKER_CONCURRENCY
|
TASK_WORKERS = CELERY_WORKER_CONCURRENCY
|
||||||
CELERY_WORKER_MAX_TASKS_PER_CHILD = 1
|
CELERY_WORKER_MAX_TASKS_PER_CHILD = 1
|
||||||
CELERY_WORKER_SEND_TASK_EVENTS = True
|
CELERY_WORKER_SEND_TASK_EVENTS = True
|
||||||
@@ -888,7 +657,7 @@ CELERY_BROKER_TRANSPORT_OPTIONS = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
CELERY_TASK_TRACK_STARTED = True
|
CELERY_TASK_TRACK_STARTED = True
|
||||||
CELERY_TASK_TIME_LIMIT: Final[int] = __get_int("PAPERLESS_WORKER_TIMEOUT", 1800)
|
CELERY_TASK_TIME_LIMIT: Final[int] = get_int_from_env("PAPERLESS_WORKER_TIMEOUT", 1800)
|
||||||
|
|
||||||
CELERY_RESULT_EXTENDED = True
|
CELERY_RESULT_EXTENDED = True
|
||||||
CELERY_RESULT_BACKEND = "django-db"
|
CELERY_RESULT_BACKEND = "django-db"
|
||||||
@@ -900,7 +669,7 @@ CELERY_TASK_SERIALIZER = "pickle"
|
|||||||
CELERY_ACCEPT_CONTENT = ["application/json", "application/x-python-serialize"]
|
CELERY_ACCEPT_CONTENT = ["application/json", "application/x-python-serialize"]
|
||||||
|
|
||||||
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#beat-schedule
|
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#beat-schedule
|
||||||
CELERY_BEAT_SCHEDULE = _parse_beat_schedule()
|
CELERY_BEAT_SCHEDULE = parse_beat_schedule()
|
||||||
|
|
||||||
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#beat-schedule-filename
|
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#beat-schedule-filename
|
||||||
CELERY_BEAT_SCHEDULE_FILENAME = str(DATA_DIR / "celerybeat-schedule.db")
|
CELERY_BEAT_SCHEDULE_FILENAME = str(DATA_DIR / "celerybeat-schedule.db")
|
||||||
@@ -908,14 +677,14 @@ CELERY_BEAT_SCHEDULE_FILENAME = str(DATA_DIR / "celerybeat-schedule.db")
|
|||||||
|
|
||||||
# Cachalot: Database read cache.
|
# Cachalot: Database read cache.
|
||||||
def _parse_cachalot_settings():
|
def _parse_cachalot_settings():
|
||||||
ttl = __get_int("PAPERLESS_READ_CACHE_TTL", 3600)
|
ttl = get_int_from_env("PAPERLESS_READ_CACHE_TTL", 3600)
|
||||||
ttl = min(ttl, 31536000) if ttl > 0 else 3600
|
ttl = min(ttl, 31536000) if ttl > 0 else 3600
|
||||||
_, redis_url = _parse_redis_url(
|
_, redis_url = parse_redis_url(
|
||||||
os.getenv("PAPERLESS_READ_CACHE_REDIS_URL", _CHANNELS_REDIS_URL),
|
os.getenv("PAPERLESS_READ_CACHE_REDIS_URL", _CHANNELS_REDIS_URL),
|
||||||
)
|
)
|
||||||
result = {
|
result = {
|
||||||
"CACHALOT_CACHE": "read-cache",
|
"CACHALOT_CACHE": "read-cache",
|
||||||
"CACHALOT_ENABLED": __get_boolean(
|
"CACHALOT_ENABLED": get_bool_from_env(
|
||||||
"PAPERLESS_DB_READ_CACHE_ENABLED",
|
"PAPERLESS_DB_READ_CACHE_ENABLED",
|
||||||
default="no",
|
default="no",
|
||||||
),
|
),
|
||||||
@@ -1000,9 +769,9 @@ CONSUMER_POLLING_INTERVAL = float(os.getenv("PAPERLESS_CONSUMER_POLLING_INTERVAL
|
|||||||
|
|
||||||
CONSUMER_STABILITY_DELAY = float(os.getenv("PAPERLESS_CONSUMER_STABILITY_DELAY", 5))
|
CONSUMER_STABILITY_DELAY = float(os.getenv("PAPERLESS_CONSUMER_STABILITY_DELAY", 5))
|
||||||
|
|
||||||
CONSUMER_DELETE_DUPLICATES = __get_boolean("PAPERLESS_CONSUMER_DELETE_DUPLICATES")
|
CONSUMER_DELETE_DUPLICATES = get_bool_from_env("PAPERLESS_CONSUMER_DELETE_DUPLICATES")
|
||||||
|
|
||||||
CONSUMER_RECURSIVE = __get_boolean("PAPERLESS_CONSUMER_RECURSIVE")
|
CONSUMER_RECURSIVE = get_bool_from_env("PAPERLESS_CONSUMER_RECURSIVE")
|
||||||
|
|
||||||
# Ignore regex patterns, matched against filename only
|
# Ignore regex patterns, matched against filename only
|
||||||
CONSUMER_IGNORE_PATTERNS = list(
|
CONSUMER_IGNORE_PATTERNS = list(
|
||||||
@@ -1024,13 +793,13 @@ CONSUMER_IGNORE_DIRS = list(
|
|||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
CONSUMER_SUBDIRS_AS_TAGS = __get_boolean("PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS")
|
CONSUMER_SUBDIRS_AS_TAGS = get_bool_from_env("PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS")
|
||||||
|
|
||||||
CONSUMER_ENABLE_BARCODES: Final[bool] = __get_boolean(
|
CONSUMER_ENABLE_BARCODES: Final[bool] = get_bool_from_env(
|
||||||
"PAPERLESS_CONSUMER_ENABLE_BARCODES",
|
"PAPERLESS_CONSUMER_ENABLE_BARCODES",
|
||||||
)
|
)
|
||||||
|
|
||||||
CONSUMER_BARCODE_TIFF_SUPPORT: Final[bool] = __get_boolean(
|
CONSUMER_BARCODE_TIFF_SUPPORT: Final[bool] = get_bool_from_env(
|
||||||
"PAPERLESS_CONSUMER_BARCODE_TIFF_SUPPORT",
|
"PAPERLESS_CONSUMER_BARCODE_TIFF_SUPPORT",
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -1039,7 +808,7 @@ CONSUMER_BARCODE_STRING: Final[str] = os.getenv(
|
|||||||
"PATCHT",
|
"PATCHT",
|
||||||
)
|
)
|
||||||
|
|
||||||
CONSUMER_ENABLE_ASN_BARCODE: Final[bool] = __get_boolean(
|
CONSUMER_ENABLE_ASN_BARCODE: Final[bool] = get_bool_from_env(
|
||||||
"PAPERLESS_CONSUMER_ENABLE_ASN_BARCODE",
|
"PAPERLESS_CONSUMER_ENABLE_ASN_BARCODE",
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -1048,23 +817,26 @@ CONSUMER_ASN_BARCODE_PREFIX: Final[str] = os.getenv(
|
|||||||
"ASN",
|
"ASN",
|
||||||
)
|
)
|
||||||
|
|
||||||
CONSUMER_BARCODE_UPSCALE: Final[float] = __get_float(
|
CONSUMER_BARCODE_UPSCALE: Final[float] = get_float_from_env(
|
||||||
"PAPERLESS_CONSUMER_BARCODE_UPSCALE",
|
"PAPERLESS_CONSUMER_BARCODE_UPSCALE",
|
||||||
0.0,
|
0.0,
|
||||||
)
|
)
|
||||||
|
|
||||||
CONSUMER_BARCODE_DPI: Final[int] = __get_int("PAPERLESS_CONSUMER_BARCODE_DPI", 300)
|
CONSUMER_BARCODE_DPI: Final[int] = get_int_from_env(
|
||||||
|
"PAPERLESS_CONSUMER_BARCODE_DPI",
|
||||||
|
300,
|
||||||
|
)
|
||||||
|
|
||||||
CONSUMER_BARCODE_MAX_PAGES: Final[int] = __get_int(
|
CONSUMER_BARCODE_MAX_PAGES: Final[int] = get_int_from_env(
|
||||||
"PAPERLESS_CONSUMER_BARCODE_MAX_PAGES",
|
"PAPERLESS_CONSUMER_BARCODE_MAX_PAGES",
|
||||||
0,
|
0,
|
||||||
)
|
)
|
||||||
|
|
||||||
CONSUMER_BARCODE_RETAIN_SPLIT_PAGES = __get_boolean(
|
CONSUMER_BARCODE_RETAIN_SPLIT_PAGES = get_bool_from_env(
|
||||||
"PAPERLESS_CONSUMER_BARCODE_RETAIN_SPLIT_PAGES",
|
"PAPERLESS_CONSUMER_BARCODE_RETAIN_SPLIT_PAGES",
|
||||||
)
|
)
|
||||||
|
|
||||||
CONSUMER_ENABLE_TAG_BARCODE: Final[bool] = __get_boolean(
|
CONSUMER_ENABLE_TAG_BARCODE: Final[bool] = get_bool_from_env(
|
||||||
"PAPERLESS_CONSUMER_ENABLE_TAG_BARCODE",
|
"PAPERLESS_CONSUMER_ENABLE_TAG_BARCODE",
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -1077,11 +849,11 @@ CONSUMER_TAG_BARCODE_MAPPING = dict(
|
|||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
CONSUMER_TAG_BARCODE_SPLIT: Final[bool] = __get_boolean(
|
CONSUMER_TAG_BARCODE_SPLIT: Final[bool] = get_bool_from_env(
|
||||||
"PAPERLESS_CONSUMER_TAG_BARCODE_SPLIT",
|
"PAPERLESS_CONSUMER_TAG_BARCODE_SPLIT",
|
||||||
)
|
)
|
||||||
|
|
||||||
CONSUMER_ENABLE_COLLATE_DOUBLE_SIDED: Final[bool] = __get_boolean(
|
CONSUMER_ENABLE_COLLATE_DOUBLE_SIDED: Final[bool] = get_bool_from_env(
|
||||||
"PAPERLESS_CONSUMER_ENABLE_COLLATE_DOUBLE_SIDED",
|
"PAPERLESS_CONSUMER_ENABLE_COLLATE_DOUBLE_SIDED",
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -1090,13 +862,13 @@ CONSUMER_COLLATE_DOUBLE_SIDED_SUBDIR_NAME: Final[str] = os.getenv(
|
|||||||
"double-sided",
|
"double-sided",
|
||||||
)
|
)
|
||||||
|
|
||||||
CONSUMER_COLLATE_DOUBLE_SIDED_TIFF_SUPPORT: Final[bool] = __get_boolean(
|
CONSUMER_COLLATE_DOUBLE_SIDED_TIFF_SUPPORT: Final[bool] = get_bool_from_env(
|
||||||
"PAPERLESS_CONSUMER_COLLATE_DOUBLE_SIDED_TIFF_SUPPORT",
|
"PAPERLESS_CONSUMER_COLLATE_DOUBLE_SIDED_TIFF_SUPPORT",
|
||||||
)
|
)
|
||||||
|
|
||||||
CONSUMER_PDF_RECOVERABLE_MIME_TYPES = ("application/octet-stream",)
|
CONSUMER_PDF_RECOVERABLE_MIME_TYPES = ("application/octet-stream",)
|
||||||
|
|
||||||
OCR_PAGES = __get_optional_int("PAPERLESS_OCR_PAGES")
|
OCR_PAGES = get_int_from_env("PAPERLESS_OCR_PAGES")
|
||||||
|
|
||||||
# The default language that tesseract will attempt to use when parsing
|
# The default language that tesseract will attempt to use when parsing
|
||||||
# documents. It should be a 3-letter language code consistent with ISO 639.
|
# documents. It should be a 3-letter language code consistent with ISO 639.
|
||||||
@@ -1110,20 +882,20 @@ OCR_MODE = os.getenv("PAPERLESS_OCR_MODE", "skip")
|
|||||||
|
|
||||||
OCR_SKIP_ARCHIVE_FILE = os.getenv("PAPERLESS_OCR_SKIP_ARCHIVE_FILE", "never")
|
OCR_SKIP_ARCHIVE_FILE = os.getenv("PAPERLESS_OCR_SKIP_ARCHIVE_FILE", "never")
|
||||||
|
|
||||||
OCR_IMAGE_DPI = __get_optional_int("PAPERLESS_OCR_IMAGE_DPI")
|
OCR_IMAGE_DPI = get_int_from_env("PAPERLESS_OCR_IMAGE_DPI")
|
||||||
|
|
||||||
OCR_CLEAN = os.getenv("PAPERLESS_OCR_CLEAN", "clean")
|
OCR_CLEAN = os.getenv("PAPERLESS_OCR_CLEAN", "clean")
|
||||||
|
|
||||||
OCR_DESKEW: Final[bool] = __get_boolean("PAPERLESS_OCR_DESKEW", "true")
|
OCR_DESKEW: Final[bool] = get_bool_from_env("PAPERLESS_OCR_DESKEW", "true")
|
||||||
|
|
||||||
OCR_ROTATE_PAGES: Final[bool] = __get_boolean("PAPERLESS_OCR_ROTATE_PAGES", "true")
|
OCR_ROTATE_PAGES: Final[bool] = get_bool_from_env("PAPERLESS_OCR_ROTATE_PAGES", "true")
|
||||||
|
|
||||||
OCR_ROTATE_PAGES_THRESHOLD: Final[float] = __get_float(
|
OCR_ROTATE_PAGES_THRESHOLD: Final[float] = get_float_from_env(
|
||||||
"PAPERLESS_OCR_ROTATE_PAGES_THRESHOLD",
|
"PAPERLESS_OCR_ROTATE_PAGES_THRESHOLD",
|
||||||
12.0,
|
12.0,
|
||||||
)
|
)
|
||||||
|
|
||||||
OCR_MAX_IMAGE_PIXELS: Final[int | None] = __get_optional_int(
|
OCR_MAX_IMAGE_PIXELS: Final[int | None] = get_int_from_env(
|
||||||
"PAPERLESS_OCR_MAX_IMAGE_PIXELS",
|
"PAPERLESS_OCR_MAX_IMAGE_PIXELS",
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -1134,7 +906,7 @@ OCR_COLOR_CONVERSION_STRATEGY = os.getenv(
|
|||||||
|
|
||||||
OCR_USER_ARGS = os.getenv("PAPERLESS_OCR_USER_ARGS")
|
OCR_USER_ARGS = os.getenv("PAPERLESS_OCR_USER_ARGS")
|
||||||
|
|
||||||
MAX_IMAGE_PIXELS: Final[int | None] = __get_optional_int(
|
MAX_IMAGE_PIXELS: Final[int | None] = get_int_from_env(
|
||||||
"PAPERLESS_MAX_IMAGE_PIXELS",
|
"PAPERLESS_MAX_IMAGE_PIXELS",
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -1149,7 +921,7 @@ CONVERT_MEMORY_LIMIT = os.getenv("PAPERLESS_CONVERT_MEMORY_LIMIT")
|
|||||||
GS_BINARY = os.getenv("PAPERLESS_GS_BINARY", "gs")
|
GS_BINARY = os.getenv("PAPERLESS_GS_BINARY", "gs")
|
||||||
|
|
||||||
# Fallback layout for .eml consumption
|
# Fallback layout for .eml consumption
|
||||||
EMAIL_PARSE_DEFAULT_LAYOUT = __get_int(
|
EMAIL_PARSE_DEFAULT_LAYOUT = get_int_from_env(
|
||||||
"PAPERLESS_EMAIL_PARSE_DEFAULT_LAYOUT",
|
"PAPERLESS_EMAIL_PARSE_DEFAULT_LAYOUT",
|
||||||
1, # MailRule.PdfLayout.TEXT_HTML but that can't be imported here
|
1, # MailRule.PdfLayout.TEXT_HTML but that can't be imported here
|
||||||
)
|
)
|
||||||
@@ -1163,23 +935,9 @@ DATE_ORDER = os.getenv("PAPERLESS_DATE_ORDER", "DMY")
|
|||||||
FILENAME_DATE_ORDER = os.getenv("PAPERLESS_FILENAME_DATE_ORDER")
|
FILENAME_DATE_ORDER = os.getenv("PAPERLESS_FILENAME_DATE_ORDER")
|
||||||
|
|
||||||
|
|
||||||
def _parse_dateparser_languages(languages: str | None):
|
|
||||||
language_list = languages.split("+") if languages else []
|
|
||||||
# There is an unfixed issue in zh-Hant and zh-Hans locales in the dateparser lib.
|
|
||||||
# See: https://github.com/scrapinghub/dateparser/issues/875
|
|
||||||
for index, language in enumerate(language_list):
|
|
||||||
if language.startswith("zh-") and "zh" not in language_list:
|
|
||||||
logger.warning(
|
|
||||||
f'Chinese locale detected: {language}. dateparser might fail to parse some dates with this locale, so Chinese ("zh") will be used as a fallback.',
|
|
||||||
)
|
|
||||||
language_list.append("zh")
|
|
||||||
|
|
||||||
return list(LocaleDataLoader().get_locale_map(locales=language_list))
|
|
||||||
|
|
||||||
|
|
||||||
# If not set, we will infer it at runtime
|
# If not set, we will infer it at runtime
|
||||||
DATE_PARSER_LANGUAGES = (
|
DATE_PARSER_LANGUAGES = (
|
||||||
_parse_dateparser_languages(
|
parse_dateparser_languages(
|
||||||
os.getenv("PAPERLESS_DATE_PARSER_LANGUAGES"),
|
os.getenv("PAPERLESS_DATE_PARSER_LANGUAGES"),
|
||||||
)
|
)
|
||||||
if os.getenv("PAPERLESS_DATE_PARSER_LANGUAGES")
|
if os.getenv("PAPERLESS_DATE_PARSER_LANGUAGES")
|
||||||
@@ -1190,7 +948,7 @@ DATE_PARSER_LANGUAGES = (
|
|||||||
# Maximum number of dates taken from document start to end to show as suggestions for
|
# Maximum number of dates taken from document start to end to show as suggestions for
|
||||||
# `created` date in the frontend. Duplicates are removed, which can result in
|
# `created` date in the frontend. Duplicates are removed, which can result in
|
||||||
# fewer dates shown.
|
# fewer dates shown.
|
||||||
NUMBER_OF_SUGGESTED_DATES = __get_int("PAPERLESS_NUMBER_OF_SUGGESTED_DATES", 3)
|
NUMBER_OF_SUGGESTED_DATES = get_int_from_env("PAPERLESS_NUMBER_OF_SUGGESTED_DATES", 3)
|
||||||
|
|
||||||
# Specify the filename format for out files
|
# Specify the filename format for out files
|
||||||
FILENAME_FORMAT = os.getenv("PAPERLESS_FILENAME_FORMAT")
|
FILENAME_FORMAT = os.getenv("PAPERLESS_FILENAME_FORMAT")
|
||||||
@@ -1198,7 +956,7 @@ FILENAME_FORMAT = os.getenv("PAPERLESS_FILENAME_FORMAT")
|
|||||||
# If this is enabled, variables in filename format will resolve to
|
# If this is enabled, variables in filename format will resolve to
|
||||||
# empty-string instead of 'none'.
|
# empty-string instead of 'none'.
|
||||||
# Directories with 'empty names' are omitted, too.
|
# Directories with 'empty names' are omitted, too.
|
||||||
FILENAME_FORMAT_REMOVE_NONE = __get_boolean(
|
FILENAME_FORMAT_REMOVE_NONE = get_bool_from_env(
|
||||||
"PAPERLESS_FILENAME_FORMAT_REMOVE_NONE",
|
"PAPERLESS_FILENAME_FORMAT_REMOVE_NONE",
|
||||||
"NO",
|
"NO",
|
||||||
)
|
)
|
||||||
@@ -1209,7 +967,7 @@ THUMBNAIL_FONT_NAME = os.getenv(
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Tika settings
|
# Tika settings
|
||||||
TIKA_ENABLED = __get_boolean("PAPERLESS_TIKA_ENABLED", "NO")
|
TIKA_ENABLED = get_bool_from_env("PAPERLESS_TIKA_ENABLED", "NO")
|
||||||
TIKA_ENDPOINT = os.getenv("PAPERLESS_TIKA_ENDPOINT", "http://localhost:9998")
|
TIKA_ENDPOINT = os.getenv("PAPERLESS_TIKA_ENDPOINT", "http://localhost:9998")
|
||||||
TIKA_GOTENBERG_ENDPOINT = os.getenv(
|
TIKA_GOTENBERG_ENDPOINT = os.getenv(
|
||||||
"PAPERLESS_TIKA_GOTENBERG_ENDPOINT",
|
"PAPERLESS_TIKA_GOTENBERG_ENDPOINT",
|
||||||
@@ -1219,52 +977,21 @@ TIKA_GOTENBERG_ENDPOINT = os.getenv(
|
|||||||
if TIKA_ENABLED:
|
if TIKA_ENABLED:
|
||||||
INSTALLED_APPS.append("paperless_tika.apps.PaperlessTikaConfig")
|
INSTALLED_APPS.append("paperless_tika.apps.PaperlessTikaConfig")
|
||||||
|
|
||||||
AUDIT_LOG_ENABLED = __get_boolean("PAPERLESS_AUDIT_LOG_ENABLED", "true")
|
AUDIT_LOG_ENABLED = get_bool_from_env("PAPERLESS_AUDIT_LOG_ENABLED", "true")
|
||||||
if AUDIT_LOG_ENABLED:
|
if AUDIT_LOG_ENABLED:
|
||||||
INSTALLED_APPS.append("auditlog")
|
INSTALLED_APPS.append("auditlog")
|
||||||
MIDDLEWARE.append("auditlog.middleware.AuditlogMiddleware")
|
MIDDLEWARE.append("auditlog.middleware.AuditlogMiddleware")
|
||||||
|
|
||||||
|
|
||||||
def _parse_ignore_dates(
|
|
||||||
env_ignore: str,
|
|
||||||
date_order: str = DATE_ORDER,
|
|
||||||
) -> set[datetime.datetime]:
|
|
||||||
"""
|
|
||||||
If the PAPERLESS_IGNORE_DATES environment variable is set, parse the
|
|
||||||
user provided string(s) into dates
|
|
||||||
|
|
||||||
Args:
|
|
||||||
env_ignore (str): The value of the environment variable, comma separated dates
|
|
||||||
date_order (str, optional): The format of the date strings.
|
|
||||||
Defaults to DATE_ORDER.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Set[datetime.datetime]: The set of parsed date objects
|
|
||||||
"""
|
|
||||||
import dateparser
|
|
||||||
|
|
||||||
ignored_dates = set()
|
|
||||||
for s in env_ignore.split(","):
|
|
||||||
d = dateparser.parse(
|
|
||||||
s,
|
|
||||||
settings={
|
|
||||||
"DATE_ORDER": date_order,
|
|
||||||
},
|
|
||||||
)
|
|
||||||
if d:
|
|
||||||
ignored_dates.add(d.date())
|
|
||||||
return ignored_dates
|
|
||||||
|
|
||||||
|
|
||||||
# List dates that should be ignored when trying to parse date from document text
|
# List dates that should be ignored when trying to parse date from document text
|
||||||
IGNORE_DATES: set[datetime.date] = set()
|
IGNORE_DATES: set[datetime.date] = set()
|
||||||
|
|
||||||
if os.getenv("PAPERLESS_IGNORE_DATES") is not None:
|
if os.getenv("PAPERLESS_IGNORE_DATES") is not None:
|
||||||
IGNORE_DATES = _parse_ignore_dates(os.getenv("PAPERLESS_IGNORE_DATES"))
|
IGNORE_DATES = parse_ignore_dates(os.getenv("PAPERLESS_IGNORE_DATES"), DATE_ORDER)
|
||||||
|
|
||||||
ENABLE_UPDATE_CHECK = os.getenv("PAPERLESS_ENABLE_UPDATE_CHECK", "default")
|
ENABLE_UPDATE_CHECK = os.getenv("PAPERLESS_ENABLE_UPDATE_CHECK", "default")
|
||||||
if ENABLE_UPDATE_CHECK != "default":
|
if ENABLE_UPDATE_CHECK != "default":
|
||||||
ENABLE_UPDATE_CHECK = __get_boolean("PAPERLESS_ENABLE_UPDATE_CHECK")
|
ENABLE_UPDATE_CHECK = get_bool_from_env("PAPERLESS_ENABLE_UPDATE_CHECK")
|
||||||
|
|
||||||
APP_TITLE = os.getenv("PAPERLESS_APP_TITLE", None)
|
APP_TITLE = os.getenv("PAPERLESS_APP_TITLE", None)
|
||||||
APP_LOGO = os.getenv("PAPERLESS_APP_LOGO", None)
|
APP_LOGO = os.getenv("PAPERLESS_APP_LOGO", None)
|
||||||
@@ -1309,7 +1036,7 @@ def _get_nltk_language_setting(ocr_lang: str) -> str | None:
|
|||||||
return iso_code_to_nltk.get(ocr_lang)
|
return iso_code_to_nltk.get(ocr_lang)
|
||||||
|
|
||||||
|
|
||||||
NLTK_ENABLED: Final[bool] = __get_boolean("PAPERLESS_ENABLE_NLTK", "yes")
|
NLTK_ENABLED: Final[bool] = get_bool_from_env("PAPERLESS_ENABLE_NLTK", "yes")
|
||||||
|
|
||||||
NLTK_LANGUAGE: str | None = _get_nltk_language_setting(OCR_LANGUAGE)
|
NLTK_LANGUAGE: str | None = _get_nltk_language_setting(OCR_LANGUAGE)
|
||||||
|
|
||||||
@@ -1318,7 +1045,7 @@ NLTK_LANGUAGE: str | None = _get_nltk_language_setting(OCR_LANGUAGE)
|
|||||||
###############################################################################
|
###############################################################################
|
||||||
|
|
||||||
EMAIL_GNUPG_HOME: Final[str | None] = os.getenv("PAPERLESS_EMAIL_GNUPG_HOME")
|
EMAIL_GNUPG_HOME: Final[str | None] = os.getenv("PAPERLESS_EMAIL_GNUPG_HOME")
|
||||||
EMAIL_ENABLE_GPG_DECRYPTOR: Final[bool] = __get_boolean(
|
EMAIL_ENABLE_GPG_DECRYPTOR: Final[bool] = get_bool_from_env(
|
||||||
"PAPERLESS_ENABLE_GPG_DECRYPTOR",
|
"PAPERLESS_ENABLE_GPG_DECRYPTOR",
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -1326,7 +1053,7 @@ EMAIL_ENABLE_GPG_DECRYPTOR: Final[bool] = __get_boolean(
|
|||||||
###############################################################################
|
###############################################################################
|
||||||
# Soft Delete #
|
# Soft Delete #
|
||||||
###############################################################################
|
###############################################################################
|
||||||
EMPTY_TRASH_DELAY = max(__get_int("PAPERLESS_EMPTY_TRASH_DELAY", 30), 1)
|
EMPTY_TRASH_DELAY = max(get_int_from_env("PAPERLESS_EMPTY_TRASH_DELAY", 30), 1)
|
||||||
|
|
||||||
|
|
||||||
###############################################################################
|
###############################################################################
|
||||||
@@ -1351,21 +1078,17 @@ OUTLOOK_OAUTH_ENABLED = bool(
|
|||||||
###############################################################################
|
###############################################################################
|
||||||
# Webhooks
|
# Webhooks
|
||||||
###############################################################################
|
###############################################################################
|
||||||
WEBHOOKS_ALLOWED_SCHEMES = set(
|
WEBHOOKS_ALLOWED_SCHEMES = {
|
||||||
s.lower()
|
s.lower()
|
||||||
for s in __get_list(
|
for s in get_list_from_env(
|
||||||
"PAPERLESS_WEBHOOKS_ALLOWED_SCHEMES",
|
"PAPERLESS_WEBHOOKS_ALLOWED_SCHEMES",
|
||||||
["http", "https"],
|
default=["http", "https"],
|
||||||
)
|
)
|
||||||
)
|
}
|
||||||
WEBHOOKS_ALLOWED_PORTS = set(
|
WEBHOOKS_ALLOWED_PORTS = {
|
||||||
int(p)
|
int(p) for p in get_list_from_env("PAPERLESS_WEBHOOKS_ALLOWED_PORTS", default=[])
|
||||||
for p in __get_list(
|
}
|
||||||
"PAPERLESS_WEBHOOKS_ALLOWED_PORTS",
|
WEBHOOKS_ALLOW_INTERNAL_REQUESTS = get_bool_from_env(
|
||||||
[],
|
|
||||||
)
|
|
||||||
)
|
|
||||||
WEBHOOKS_ALLOW_INTERNAL_REQUESTS = __get_boolean(
|
|
||||||
"PAPERLESS_WEBHOOKS_ALLOW_INTERNAL_REQUESTS",
|
"PAPERLESS_WEBHOOKS_ALLOW_INTERNAL_REQUESTS",
|
||||||
"true",
|
"true",
|
||||||
)
|
)
|
||||||
@@ -1380,7 +1103,7 @@ REMOTE_OCR_ENDPOINT = os.getenv("PAPERLESS_REMOTE_OCR_ENDPOINT")
|
|||||||
################################################################################
|
################################################################################
|
||||||
# AI Settings #
|
# AI Settings #
|
||||||
################################################################################
|
################################################################################
|
||||||
AI_ENABLED = __get_boolean("PAPERLESS_AI_ENABLED", "NO")
|
AI_ENABLED = get_bool_from_env("PAPERLESS_AI_ENABLED", "NO")
|
||||||
LLM_EMBEDDING_BACKEND = os.getenv(
|
LLM_EMBEDDING_BACKEND = os.getenv(
|
||||||
"PAPERLESS_AI_LLM_EMBEDDING_BACKEND",
|
"PAPERLESS_AI_LLM_EMBEDDING_BACKEND",
|
||||||
) # "huggingface" or "openai"
|
) # "huggingface" or "openai"
|
||||||
|
|||||||
@@ -1,11 +1,191 @@
|
|||||||
|
import datetime
|
||||||
|
import logging
|
||||||
import os
|
import os
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
|
from celery.schedules import crontab
|
||||||
|
from dateparser.languages.loader import LocaleDataLoader
|
||||||
|
|
||||||
from paperless.settings.parsers import get_choice_from_env
|
from paperless.settings.parsers import get_choice_from_env
|
||||||
from paperless.settings.parsers import get_int_from_env
|
from paperless.settings.parsers import get_int_from_env
|
||||||
from paperless.settings.parsers import parse_dict_from_str
|
from paperless.settings.parsers import parse_dict_from_str
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def parse_hosting_settings() -> tuple[str | None, str, str, str, str]:
|
||||||
|
script_name = os.getenv("PAPERLESS_FORCE_SCRIPT_NAME")
|
||||||
|
base_url = (script_name or "") + "/"
|
||||||
|
login_url = base_url + "accounts/login/"
|
||||||
|
login_redirect_url = base_url + "dashboard"
|
||||||
|
logout_redirect_url = os.getenv(
|
||||||
|
"PAPERLESS_LOGOUT_REDIRECT_URL",
|
||||||
|
login_url + "?loggedout=1",
|
||||||
|
)
|
||||||
|
return script_name, base_url, login_url, login_redirect_url, logout_redirect_url
|
||||||
|
|
||||||
|
|
||||||
|
def parse_redis_url(env_redis: str | None) -> tuple[str, str]:
|
||||||
|
"""
|
||||||
|
Gets the Redis information from the environment or a default and handles
|
||||||
|
converting from incompatible django_channels and celery formats.
|
||||||
|
|
||||||
|
Returns a tuple of (celery_url, channels_url)
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Not set, return a compatible default
|
||||||
|
if env_redis is None:
|
||||||
|
return ("redis://localhost:6379", "redis://localhost:6379")
|
||||||
|
|
||||||
|
if "unix" in env_redis.lower():
|
||||||
|
# channels_redis socket format, looks like:
|
||||||
|
# "unix:///path/to/redis.sock"
|
||||||
|
_, path = env_redis.split(":", maxsplit=1)
|
||||||
|
# Optionally setting a db number
|
||||||
|
if "?db=" in env_redis:
|
||||||
|
path, number = path.split("?db=")
|
||||||
|
return (f"redis+socket:{path}?virtual_host={number}", env_redis)
|
||||||
|
else:
|
||||||
|
return (f"redis+socket:{path}", env_redis)
|
||||||
|
|
||||||
|
elif "+socket" in env_redis.lower():
|
||||||
|
# celery socket style, looks like:
|
||||||
|
# "redis+socket:///path/to/redis.sock"
|
||||||
|
_, path = env_redis.split(":", maxsplit=1)
|
||||||
|
if "?virtual_host=" in env_redis:
|
||||||
|
# Virtual host (aka db number)
|
||||||
|
path, number = path.split("?virtual_host=")
|
||||||
|
return (env_redis, f"unix:{path}?db={number}")
|
||||||
|
else:
|
||||||
|
return (env_redis, f"unix:{path}")
|
||||||
|
|
||||||
|
# Not a socket
|
||||||
|
return (env_redis, env_redis)
|
||||||
|
|
||||||
|
|
||||||
|
def parse_beat_schedule() -> dict:
|
||||||
|
"""
|
||||||
|
Configures the scheduled tasks, according to default or
|
||||||
|
environment variables. Task expiration is configured so the task will
|
||||||
|
expire (and not run), shortly before the default frequency will put another
|
||||||
|
of the same task into the queue
|
||||||
|
|
||||||
|
|
||||||
|
https://docs.celeryq.dev/en/stable/userguide/periodic-tasks.html#beat-entries
|
||||||
|
https://docs.celeryq.dev/en/latest/userguide/calling.html#expiration
|
||||||
|
"""
|
||||||
|
schedule = {}
|
||||||
|
tasks = [
|
||||||
|
{
|
||||||
|
"name": "Check all e-mail accounts",
|
||||||
|
"env_key": "PAPERLESS_EMAIL_TASK_CRON",
|
||||||
|
# Default every ten minutes
|
||||||
|
"env_default": "*/10 * * * *",
|
||||||
|
"task": "paperless_mail.tasks.process_mail_accounts",
|
||||||
|
"options": {
|
||||||
|
# 1 minute before default schedule sends again
|
||||||
|
"expires": 9.0 * 60.0,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Train the classifier",
|
||||||
|
"env_key": "PAPERLESS_TRAIN_TASK_CRON",
|
||||||
|
# Default hourly at 5 minutes past the hour
|
||||||
|
"env_default": "5 */1 * * *",
|
||||||
|
"task": "documents.tasks.train_classifier",
|
||||||
|
"options": {
|
||||||
|
# 1 minute before default schedule sends again
|
||||||
|
"expires": 59.0 * 60.0,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Optimize the index",
|
||||||
|
"env_key": "PAPERLESS_INDEX_TASK_CRON",
|
||||||
|
# Default daily at midnight
|
||||||
|
"env_default": "0 0 * * *",
|
||||||
|
"task": "documents.tasks.index_optimize",
|
||||||
|
"options": {
|
||||||
|
# 1 hour before default schedule sends again
|
||||||
|
"expires": 23.0 * 60.0 * 60.0,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Perform sanity check",
|
||||||
|
"env_key": "PAPERLESS_SANITY_TASK_CRON",
|
||||||
|
# Default Sunday at 00:30
|
||||||
|
"env_default": "30 0 * * sun",
|
||||||
|
"task": "documents.tasks.sanity_check",
|
||||||
|
"options": {
|
||||||
|
# 1 hour before default schedule sends again
|
||||||
|
"expires": ((7.0 * 24.0) - 1.0) * 60.0 * 60.0,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Empty trash",
|
||||||
|
"env_key": "PAPERLESS_EMPTY_TRASH_TASK_CRON",
|
||||||
|
# Default daily at 01:00
|
||||||
|
"env_default": "0 1 * * *",
|
||||||
|
"task": "documents.tasks.empty_trash",
|
||||||
|
"options": {
|
||||||
|
# 1 hour before default schedule sends again
|
||||||
|
"expires": 23.0 * 60.0 * 60.0,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Check and run scheduled workflows",
|
||||||
|
"env_key": "PAPERLESS_WORKFLOW_SCHEDULED_TASK_CRON",
|
||||||
|
# Default hourly at 5 minutes past the hour
|
||||||
|
"env_default": "5 */1 * * *",
|
||||||
|
"task": "documents.tasks.check_scheduled_workflows",
|
||||||
|
"options": {
|
||||||
|
# 1 minute before default schedule sends again
|
||||||
|
"expires": 59.0 * 60.0,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Rebuild LLM index",
|
||||||
|
"env_key": "PAPERLESS_LLM_INDEX_TASK_CRON",
|
||||||
|
# Default daily at 02:10
|
||||||
|
"env_default": "10 2 * * *",
|
||||||
|
"task": "documents.tasks.llmindex_index",
|
||||||
|
"options": {
|
||||||
|
# 1 hour before default schedule sends again
|
||||||
|
"expires": 23.0 * 60.0 * 60.0,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Cleanup expired share link bundles",
|
||||||
|
"env_key": "PAPERLESS_SHARE_LINK_BUNDLE_CLEANUP_CRON",
|
||||||
|
# Default daily at 02:00
|
||||||
|
"env_default": "0 2 * * *",
|
||||||
|
"task": "documents.tasks.cleanup_expired_share_link_bundles",
|
||||||
|
"options": {
|
||||||
|
# 1 hour before default schedule sends again
|
||||||
|
"expires": 23.0 * 60.0 * 60.0,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
for task in tasks:
|
||||||
|
# Either get the environment setting or use the default
|
||||||
|
value = os.getenv(task["env_key"], task["env_default"])
|
||||||
|
# Don't add disabled tasks to the schedule
|
||||||
|
if value == "disable":
|
||||||
|
continue
|
||||||
|
# I find https://crontab.guru/ super helpful
|
||||||
|
# crontab(5) format
|
||||||
|
# - five time-and-date fields
|
||||||
|
# - separated by at least one blank
|
||||||
|
minute, hour, day_month, month, day_week = value.split(" ")
|
||||||
|
|
||||||
|
schedule[task["name"]] = {
|
||||||
|
"task": task["task"],
|
||||||
|
"schedule": crontab(minute, hour, day_week, day_month, month),
|
||||||
|
"options": task["options"],
|
||||||
|
}
|
||||||
|
|
||||||
|
return schedule
|
||||||
|
|
||||||
|
|
||||||
def parse_db_settings(data_dir: Path) -> dict[str, dict[str, Any]]:
|
def parse_db_settings(data_dir: Path) -> dict[str, dict[str, Any]]:
|
||||||
"""Parse database settings from environment variables.
|
"""Parse database settings from environment variables.
|
||||||
@@ -120,3 +300,48 @@ def parse_db_settings(data_dir: Path) -> dict[str, dict[str, Any]]:
|
|||||||
)
|
)
|
||||||
|
|
||||||
return {"default": db_config}
|
return {"default": db_config}
|
||||||
|
|
||||||
|
|
||||||
|
def parse_dateparser_languages(languages: str | None) -> list[str]:
|
||||||
|
language_list = languages.split("+") if languages else []
|
||||||
|
# There is an unfixed issue in zh-Hant and zh-Hans locales in the dateparser lib.
|
||||||
|
# See: https://github.com/scrapinghub/dateparser/issues/875
|
||||||
|
for index, language in enumerate(language_list):
|
||||||
|
if language.startswith("zh-") and "zh" not in language_list:
|
||||||
|
logger.warning(
|
||||||
|
f"Chinese locale detected: {language}. dateparser might fail to parse"
|
||||||
|
f' some dates with this locale, so Chinese ("zh") will be used as a fallback.',
|
||||||
|
)
|
||||||
|
language_list.append("zh")
|
||||||
|
|
||||||
|
return list(LocaleDataLoader().get_locale_map(locales=language_list))
|
||||||
|
|
||||||
|
|
||||||
|
def parse_ignore_dates(
|
||||||
|
env_ignore: str,
|
||||||
|
date_order: str,
|
||||||
|
) -> set[datetime.date]:
|
||||||
|
"""
|
||||||
|
If the PAPERLESS_IGNORE_DATES environment variable is set, parse the
|
||||||
|
user provided string(s) into dates
|
||||||
|
|
||||||
|
Args:
|
||||||
|
env_ignore (str): The value of the environment variable, comma separated dates
|
||||||
|
date_order (str): The format of the date strings.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
set[datetime.date]: The set of parsed date objects
|
||||||
|
"""
|
||||||
|
import dateparser
|
||||||
|
|
||||||
|
ignored_dates = set()
|
||||||
|
for s in env_ignore.split(","):
|
||||||
|
d = dateparser.parse(
|
||||||
|
s,
|
||||||
|
settings={
|
||||||
|
"DATE_ORDER": date_order,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
if d:
|
||||||
|
ignored_dates.add(d.date())
|
||||||
|
return ignored_dates
|
||||||
|
|||||||
@@ -156,6 +156,108 @@ def parse_dict_from_str(
|
|||||||
return settings
|
return settings
|
||||||
|
|
||||||
|
|
||||||
|
def get_bool_from_env(key: str, default: str = "NO") -> bool:
|
||||||
|
"""
|
||||||
|
Return a boolean value based on whatever the user has supplied in the
|
||||||
|
environment based on whether the value "looks like" it's True or not.
|
||||||
|
"""
|
||||||
|
return str_to_bool(os.getenv(key, default))
|
||||||
|
|
||||||
|
|
||||||
|
@overload
|
||||||
|
def get_float_from_env(key: str) -> float | None: ...
|
||||||
|
|
||||||
|
|
||||||
|
@overload
|
||||||
|
def get_float_from_env(key: str, default: None) -> float | None: ...
|
||||||
|
|
||||||
|
|
||||||
|
@overload
|
||||||
|
def get_float_from_env(key: str, default: float) -> float: ...
|
||||||
|
|
||||||
|
|
||||||
|
def get_float_from_env(key: str, default: float | None = None) -> float | None:
|
||||||
|
"""
|
||||||
|
Return a float value based on the environment variable.
|
||||||
|
If default is provided, returns that value when key is missing.
|
||||||
|
If default is None, returns None when key is missing.
|
||||||
|
"""
|
||||||
|
if key not in os.environ:
|
||||||
|
return default
|
||||||
|
|
||||||
|
return float(os.environ[key])
|
||||||
|
|
||||||
|
|
||||||
|
@overload
|
||||||
|
def get_path_from_env(key: str) -> Path | None: ...
|
||||||
|
|
||||||
|
|
||||||
|
@overload
|
||||||
|
def get_path_from_env(key: str, default: None) -> Path | None: ...
|
||||||
|
|
||||||
|
|
||||||
|
@overload
|
||||||
|
def get_path_from_env(key: str, default: Path | str) -> Path: ...
|
||||||
|
|
||||||
|
|
||||||
|
def get_path_from_env(key: str, default: Path | str | None = None) -> Path | None:
|
||||||
|
"""
|
||||||
|
Return a Path object based on the environment variable.
|
||||||
|
If default is provided, returns that value when key is missing.
|
||||||
|
If default is None, returns None when key is missing.
|
||||||
|
"""
|
||||||
|
if key not in os.environ:
|
||||||
|
return default if default is None else Path(default).resolve()
|
||||||
|
|
||||||
|
return Path(os.environ[key]).resolve()
|
||||||
|
|
||||||
|
|
||||||
|
def get_list_from_env(
|
||||||
|
key: str,
|
||||||
|
separator: str = ",",
|
||||||
|
default: list[T] | None = None,
|
||||||
|
*,
|
||||||
|
strip_whitespace: bool = True,
|
||||||
|
remove_empty: bool = True,
|
||||||
|
required: bool = False,
|
||||||
|
) -> list[str] | list[T]:
|
||||||
|
"""
|
||||||
|
Get and parse a list from an environment variable or return a default.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
key: Environment variable name
|
||||||
|
separator: Character(s) to split on (default: ',')
|
||||||
|
default: Default value to return if env var is not set or empty
|
||||||
|
strip_whitespace: Whether to strip whitespace from each element
|
||||||
|
remove_empty: Whether to remove empty strings from the result
|
||||||
|
required: If True, raise an error when the env var is missing and no default provided
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of strings or list of type-cast values, or default if env var is empty/None
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError: If required=True and env var is missing and there is no default
|
||||||
|
"""
|
||||||
|
# Get the environment variable value
|
||||||
|
env_value = os.environ.get(key)
|
||||||
|
|
||||||
|
# Handle required environment variables
|
||||||
|
if required and env_value is None and default is None:
|
||||||
|
raise ValueError(f"Required environment variable '{key}' is not set")
|
||||||
|
|
||||||
|
if env_value:
|
||||||
|
items = env_value.split(separator)
|
||||||
|
if strip_whitespace:
|
||||||
|
items = [item.strip() for item in items]
|
||||||
|
if remove_empty:
|
||||||
|
items = [item for item in items if item]
|
||||||
|
return items
|
||||||
|
elif default is not None:
|
||||||
|
return default
|
||||||
|
else:
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
def get_choice_from_env(
|
def get_choice_from_env(
|
||||||
env_key: str,
|
env_key: str,
|
||||||
choices: set[str],
|
choices: set[str],
|
||||||
|
|||||||
48
src/paperless/tests/conftest.py
Normal file
48
src/paperless/tests/conftest.py
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
"""
|
||||||
|
Fixtures defined here are available to every test module under
|
||||||
|
src/paperless/tests/ (including sub-packages such as parsers/).
|
||||||
|
|
||||||
|
Session-scoped fixtures for the shared samples directory live here so
|
||||||
|
sub-package conftest files can reference them without duplicating path logic.
|
||||||
|
Parser-specific fixtures (concrete parser instances, format-specific sample
|
||||||
|
files) live in paperless/tests/parsers/conftest.py.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from paperless.parsers.registry import reset_parser_registry
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from collections.abc import Generator
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session")
|
||||||
|
def samples_dir() -> Path:
|
||||||
|
"""Absolute path to the shared parser sample files directory.
|
||||||
|
|
||||||
|
Sub-package conftest files derive format-specific paths from this root,
|
||||||
|
e.g. ``samples_dir / "text" / "test.txt"``.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
Path
|
||||||
|
Directory containing all sample documents used by parser tests.
|
||||||
|
"""
|
||||||
|
return (Path(__file__).parent / "samples").resolve()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(autouse=True)
|
||||||
|
def clean_registry() -> Generator[None, None, None]:
|
||||||
|
"""Reset the parser registry before and after every test.
|
||||||
|
|
||||||
|
This prevents registry state from leaking between tests that call
|
||||||
|
get_parser_registry() or init_builtin_parsers().
|
||||||
|
"""
|
||||||
|
reset_parser_registry()
|
||||||
|
yield
|
||||||
|
reset_parser_registry()
|
||||||
0
src/paperless/tests/parsers/__init__.py
Normal file
0
src/paperless/tests/parsers/__init__.py
Normal file
76
src/paperless/tests/parsers/conftest.py
Normal file
76
src/paperless/tests/parsers/conftest.py
Normal file
@@ -0,0 +1,76 @@
|
|||||||
|
"""
|
||||||
|
Parser fixtures that are used across multiple test modules in this package
|
||||||
|
are defined here. Format-specific sample-file fixtures are grouped by parser
|
||||||
|
so it is easy to see which files belong to which test module.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from paperless.parsers.text import TextDocumentParser
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from collections.abc import Generator
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Text parser sample files
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session")
|
||||||
|
def text_samples_dir(samples_dir: Path) -> Path:
|
||||||
|
"""Absolute path to the text parser sample files directory.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
Path
|
||||||
|
``<samples_dir>/text/``
|
||||||
|
"""
|
||||||
|
return samples_dir / "text"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session")
|
||||||
|
def sample_txt_file(text_samples_dir: Path) -> Path:
|
||||||
|
"""Path to a valid UTF-8 plain-text sample file.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
Path
|
||||||
|
Absolute path to ``text/test.txt``.
|
||||||
|
"""
|
||||||
|
return text_samples_dir / "test.txt"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session")
|
||||||
|
def malformed_txt_file(text_samples_dir: Path) -> Path:
|
||||||
|
"""Path to a text file containing invalid UTF-8 bytes.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
Path
|
||||||
|
Absolute path to ``text/decode_error.txt``.
|
||||||
|
"""
|
||||||
|
return text_samples_dir / "decode_error.txt"
|
||||||
|
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Text parser instance
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture()
|
||||||
|
def text_parser() -> Generator[TextDocumentParser, None, None]:
|
||||||
|
"""Yield a TextDocumentParser and clean up its temporary directory afterwards.
|
||||||
|
|
||||||
|
Yields
|
||||||
|
------
|
||||||
|
TextDocumentParser
|
||||||
|
A ready-to-use parser instance.
|
||||||
|
"""
|
||||||
|
with TextDocumentParser() as parser:
|
||||||
|
yield parser
|
||||||
256
src/paperless/tests/parsers/test_text_parser.py
Normal file
256
src/paperless/tests/parsers/test_text_parser.py
Normal file
@@ -0,0 +1,256 @@
|
|||||||
|
"""
|
||||||
|
Tests for paperless.parsers.text.TextDocumentParser.
|
||||||
|
|
||||||
|
All tests use the context-manager protocol for parser lifecycle. Sample
|
||||||
|
files are provided by session-scoped fixtures defined in conftest.py.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import tempfile
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from paperless.parsers import ParserProtocol
|
||||||
|
from paperless.parsers.text import TextDocumentParser
|
||||||
|
|
||||||
|
|
||||||
|
class TestTextParserProtocol:
|
||||||
|
"""Verify that TextDocumentParser satisfies the ParserProtocol contract."""
|
||||||
|
|
||||||
|
def test_isinstance_satisfies_protocol(
|
||||||
|
self,
|
||||||
|
text_parser: TextDocumentParser,
|
||||||
|
) -> None:
|
||||||
|
assert isinstance(text_parser, ParserProtocol)
|
||||||
|
|
||||||
|
def test_class_attributes_present(self) -> None:
|
||||||
|
assert isinstance(TextDocumentParser.name, str) and TextDocumentParser.name
|
||||||
|
assert (
|
||||||
|
isinstance(TextDocumentParser.version, str) and TextDocumentParser.version
|
||||||
|
)
|
||||||
|
assert isinstance(TextDocumentParser.author, str) and TextDocumentParser.author
|
||||||
|
assert isinstance(TextDocumentParser.url, str) and TextDocumentParser.url
|
||||||
|
|
||||||
|
def test_supported_mime_types_returns_dict(self) -> None:
|
||||||
|
mime_types = TextDocumentParser.supported_mime_types()
|
||||||
|
assert isinstance(mime_types, dict)
|
||||||
|
assert "text/plain" in mime_types
|
||||||
|
assert "text/csv" in mime_types
|
||||||
|
assert "application/csv" in mime_types
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
("mime_type", "expected"),
|
||||||
|
[
|
||||||
|
("text/plain", 10),
|
||||||
|
("text/csv", 10),
|
||||||
|
("application/csv", 10),
|
||||||
|
("application/pdf", None),
|
||||||
|
("image/png", None),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_score(self, mime_type: str, expected: int | None) -> None:
|
||||||
|
assert TextDocumentParser.score(mime_type, "file.txt") == expected
|
||||||
|
|
||||||
|
def test_can_produce_archive_is_false(
|
||||||
|
self,
|
||||||
|
text_parser: TextDocumentParser,
|
||||||
|
) -> None:
|
||||||
|
assert text_parser.can_produce_archive is False
|
||||||
|
|
||||||
|
def test_requires_pdf_rendition_is_false(
|
||||||
|
self,
|
||||||
|
text_parser: TextDocumentParser,
|
||||||
|
) -> None:
|
||||||
|
assert text_parser.requires_pdf_rendition is False
|
||||||
|
|
||||||
|
|
||||||
|
class TestTextParserLifecycle:
|
||||||
|
"""Verify context-manager behaviour and temporary directory cleanup."""
|
||||||
|
|
||||||
|
def test_context_manager_cleans_up_tempdir(self) -> None:
|
||||||
|
with TextDocumentParser() as parser:
|
||||||
|
tempdir = parser._tempdir
|
||||||
|
assert tempdir.exists()
|
||||||
|
assert not tempdir.exists()
|
||||||
|
|
||||||
|
def test_context_manager_cleans_up_after_exception(self) -> None:
|
||||||
|
tempdir: Path | None = None
|
||||||
|
with pytest.raises(RuntimeError):
|
||||||
|
with TextDocumentParser() as parser:
|
||||||
|
tempdir = parser._tempdir
|
||||||
|
raise RuntimeError("boom")
|
||||||
|
assert tempdir is not None
|
||||||
|
assert not tempdir.exists()
|
||||||
|
|
||||||
|
|
||||||
|
class TestTextParserParse:
|
||||||
|
"""Verify parse() and the result accessors."""
|
||||||
|
|
||||||
|
def test_parse_valid_utf8(
|
||||||
|
self,
|
||||||
|
text_parser: TextDocumentParser,
|
||||||
|
sample_txt_file: Path,
|
||||||
|
) -> None:
|
||||||
|
text_parser.parse(sample_txt_file, "text/plain")
|
||||||
|
|
||||||
|
assert text_parser.get_text() == "This is a test file.\n"
|
||||||
|
|
||||||
|
def test_parse_returns_none_for_archive_path(
|
||||||
|
self,
|
||||||
|
text_parser: TextDocumentParser,
|
||||||
|
sample_txt_file: Path,
|
||||||
|
) -> None:
|
||||||
|
text_parser.parse(sample_txt_file, "text/plain")
|
||||||
|
|
||||||
|
assert text_parser.get_archive_path() is None
|
||||||
|
|
||||||
|
def test_parse_returns_none_for_date(
|
||||||
|
self,
|
||||||
|
text_parser: TextDocumentParser,
|
||||||
|
sample_txt_file: Path,
|
||||||
|
) -> None:
|
||||||
|
text_parser.parse(sample_txt_file, "text/plain")
|
||||||
|
|
||||||
|
assert text_parser.get_date() is None
|
||||||
|
|
||||||
|
def test_parse_invalid_utf8_bytes_replaced(
|
||||||
|
self,
|
||||||
|
text_parser: TextDocumentParser,
|
||||||
|
malformed_txt_file: Path,
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
GIVEN:
|
||||||
|
- A text file containing invalid UTF-8 byte sequences
|
||||||
|
WHEN:
|
||||||
|
- The file is parsed
|
||||||
|
THEN:
|
||||||
|
- Parsing succeeds
|
||||||
|
- Invalid bytes are replaced with the Unicode replacement character
|
||||||
|
"""
|
||||||
|
text_parser.parse(malformed_txt_file, "text/plain")
|
||||||
|
|
||||||
|
assert text_parser.get_text() == "Pantothens\ufffdure\n"
|
||||||
|
|
||||||
|
def test_get_text_none_before_parse(
|
||||||
|
self,
|
||||||
|
text_parser: TextDocumentParser,
|
||||||
|
) -> None:
|
||||||
|
assert text_parser.get_text() is None
|
||||||
|
|
||||||
|
|
||||||
|
class TestTextParserThumbnail:
|
||||||
|
"""Verify thumbnail generation."""
|
||||||
|
|
||||||
|
def test_thumbnail_exists_and_is_file(
|
||||||
|
self,
|
||||||
|
text_parser: TextDocumentParser,
|
||||||
|
sample_txt_file: Path,
|
||||||
|
) -> None:
|
||||||
|
thumb = text_parser.get_thumbnail(sample_txt_file, "text/plain")
|
||||||
|
|
||||||
|
assert thumb.exists()
|
||||||
|
assert thumb.is_file()
|
||||||
|
|
||||||
|
def test_thumbnail_large_file_does_not_read_all(
|
||||||
|
self,
|
||||||
|
text_parser: TextDocumentParser,
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
GIVEN:
|
||||||
|
- A text file larger than 50 MB
|
||||||
|
WHEN:
|
||||||
|
- A thumbnail is requested
|
||||||
|
THEN:
|
||||||
|
- The thumbnail is generated without loading the full file
|
||||||
|
"""
|
||||||
|
with tempfile.NamedTemporaryFile(
|
||||||
|
delete=False,
|
||||||
|
mode="w",
|
||||||
|
encoding="utf-8",
|
||||||
|
suffix=".txt",
|
||||||
|
) as tmp:
|
||||||
|
tmp.write("A" * (51 * 1024 * 1024))
|
||||||
|
large_file = Path(tmp.name)
|
||||||
|
|
||||||
|
try:
|
||||||
|
thumb = text_parser.get_thumbnail(large_file, "text/plain")
|
||||||
|
assert thumb.exists()
|
||||||
|
assert thumb.is_file()
|
||||||
|
finally:
|
||||||
|
large_file.unlink(missing_ok=True)
|
||||||
|
|
||||||
|
def test_get_page_count_returns_none(
|
||||||
|
self,
|
||||||
|
text_parser: TextDocumentParser,
|
||||||
|
sample_txt_file: Path,
|
||||||
|
) -> None:
|
||||||
|
assert text_parser.get_page_count(sample_txt_file, "text/plain") is None
|
||||||
|
|
||||||
|
|
||||||
|
class TestTextParserMetadata:
|
||||||
|
"""Verify extract_metadata behaviour."""
|
||||||
|
|
||||||
|
def test_extract_metadata_returns_empty_list(
|
||||||
|
self,
|
||||||
|
text_parser: TextDocumentParser,
|
||||||
|
sample_txt_file: Path,
|
||||||
|
) -> None:
|
||||||
|
result = text_parser.extract_metadata(sample_txt_file, "text/plain")
|
||||||
|
|
||||||
|
assert result == []
|
||||||
|
|
||||||
|
def test_extract_metadata_returns_list_type(
|
||||||
|
self,
|
||||||
|
text_parser: TextDocumentParser,
|
||||||
|
sample_txt_file: Path,
|
||||||
|
) -> None:
|
||||||
|
result = text_parser.extract_metadata(sample_txt_file, "text/plain")
|
||||||
|
|
||||||
|
assert isinstance(result, list)
|
||||||
|
|
||||||
|
def test_extract_metadata_ignores_mime_type(
|
||||||
|
self,
|
||||||
|
text_parser: TextDocumentParser,
|
||||||
|
sample_txt_file: Path,
|
||||||
|
) -> None:
|
||||||
|
"""extract_metadata returns [] regardless of the mime_type argument."""
|
||||||
|
assert text_parser.extract_metadata(sample_txt_file, "application/pdf") == []
|
||||||
|
assert text_parser.extract_metadata(sample_txt_file, "text/csv") == []
|
||||||
|
|
||||||
|
|
||||||
|
class TestTextParserRegistry:
|
||||||
|
"""Verify that TextDocumentParser is registered by default."""
|
||||||
|
|
||||||
|
def test_registered_in_defaults(self) -> None:
|
||||||
|
from paperless.parsers.registry import ParserRegistry
|
||||||
|
|
||||||
|
registry = ParserRegistry()
|
||||||
|
registry.register_defaults()
|
||||||
|
|
||||||
|
assert TextDocumentParser in registry._builtins
|
||||||
|
|
||||||
|
def test_get_parser_for_text_plain(self) -> None:
|
||||||
|
from paperless.parsers.registry import get_parser_registry
|
||||||
|
|
||||||
|
registry = get_parser_registry()
|
||||||
|
parser_cls = registry.get_parser_for_file("text/plain", "doc.txt")
|
||||||
|
|
||||||
|
assert parser_cls is TextDocumentParser
|
||||||
|
|
||||||
|
def test_get_parser_for_text_csv(self) -> None:
|
||||||
|
from paperless.parsers.registry import get_parser_registry
|
||||||
|
|
||||||
|
registry = get_parser_registry()
|
||||||
|
parser_cls = registry.get_parser_for_file("text/csv", "data.csv")
|
||||||
|
|
||||||
|
assert parser_cls is TextDocumentParser
|
||||||
|
|
||||||
|
def test_get_parser_for_unknown_type_returns_none(self) -> None:
|
||||||
|
from paperless.parsers.registry import get_parser_registry
|
||||||
|
|
||||||
|
registry = get_parser_registry()
|
||||||
|
parser_cls = registry.get_parser_for_file("application/pdf", "doc.pdf")
|
||||||
|
|
||||||
|
assert parser_cls is None
|
||||||
@@ -1,10 +1,279 @@
|
|||||||
|
import datetime
|
||||||
import os
|
import os
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
from celery.schedules import crontab
|
||||||
from pytest_mock import MockerFixture
|
from pytest_mock import MockerFixture
|
||||||
|
|
||||||
|
from paperless.settings.custom import parse_beat_schedule
|
||||||
|
from paperless.settings.custom import parse_dateparser_languages
|
||||||
from paperless.settings.custom import parse_db_settings
|
from paperless.settings.custom import parse_db_settings
|
||||||
|
from paperless.settings.custom import parse_hosting_settings
|
||||||
|
from paperless.settings.custom import parse_ignore_dates
|
||||||
|
from paperless.settings.custom import parse_redis_url
|
||||||
|
|
||||||
|
|
||||||
|
class TestRedisSocketConversion:
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
("input_url", "expected"),
|
||||||
|
[
|
||||||
|
pytest.param(
|
||||||
|
None,
|
||||||
|
("redis://localhost:6379", "redis://localhost:6379"),
|
||||||
|
id="none_uses_default",
|
||||||
|
),
|
||||||
|
pytest.param(
|
||||||
|
"redis+socket:///run/redis/redis.sock",
|
||||||
|
(
|
||||||
|
"redis+socket:///run/redis/redis.sock",
|
||||||
|
"unix:///run/redis/redis.sock",
|
||||||
|
),
|
||||||
|
id="celery_style_socket",
|
||||||
|
),
|
||||||
|
pytest.param(
|
||||||
|
"unix:///run/redis/redis.sock",
|
||||||
|
(
|
||||||
|
"redis+socket:///run/redis/redis.sock",
|
||||||
|
"unix:///run/redis/redis.sock",
|
||||||
|
),
|
||||||
|
id="redis_py_style_socket",
|
||||||
|
),
|
||||||
|
pytest.param(
|
||||||
|
"redis+socket:///run/redis/redis.sock?virtual_host=5",
|
||||||
|
(
|
||||||
|
"redis+socket:///run/redis/redis.sock?virtual_host=5",
|
||||||
|
"unix:///run/redis/redis.sock?db=5",
|
||||||
|
),
|
||||||
|
id="celery_style_socket_with_db",
|
||||||
|
),
|
||||||
|
pytest.param(
|
||||||
|
"unix:///run/redis/redis.sock?db=10",
|
||||||
|
(
|
||||||
|
"redis+socket:///run/redis/redis.sock?virtual_host=10",
|
||||||
|
"unix:///run/redis/redis.sock?db=10",
|
||||||
|
),
|
||||||
|
id="redis_py_style_socket_with_db",
|
||||||
|
),
|
||||||
|
pytest.param(
|
||||||
|
"redis://myredishost:6379",
|
||||||
|
("redis://myredishost:6379", "redis://myredishost:6379"),
|
||||||
|
id="host_with_port_unchanged",
|
||||||
|
),
|
||||||
|
# Credentials in unix:// URL contain multiple colons (user:password@)
|
||||||
|
# Regression test for https://github.com/paperless-ngx/paperless-ngx/pull/12239
|
||||||
|
pytest.param(
|
||||||
|
"unix://user:password@/run/redis/redis.sock",
|
||||||
|
(
|
||||||
|
"redis+socket://user:password@/run/redis/redis.sock",
|
||||||
|
"unix://user:password@/run/redis/redis.sock",
|
||||||
|
),
|
||||||
|
id="redis_py_style_socket_with_credentials",
|
||||||
|
),
|
||||||
|
pytest.param(
|
||||||
|
"redis+socket://user:password@/run/redis/redis.sock",
|
||||||
|
(
|
||||||
|
"redis+socket://user:password@/run/redis/redis.sock",
|
||||||
|
"unix://user:password@/run/redis/redis.sock",
|
||||||
|
),
|
||||||
|
id="celery_style_socket_with_credentials",
|
||||||
|
),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_redis_socket_parsing(
|
||||||
|
self,
|
||||||
|
input_url: str | None,
|
||||||
|
expected: tuple[str, str],
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
GIVEN:
|
||||||
|
- Various Redis connection URI formats
|
||||||
|
WHEN:
|
||||||
|
- The URI is parsed
|
||||||
|
THEN:
|
||||||
|
- Socket based URIs are translated
|
||||||
|
- Non-socket URIs are unchanged
|
||||||
|
- None provided uses default
|
||||||
|
"""
|
||||||
|
result = parse_redis_url(input_url)
|
||||||
|
assert expected == result
|
||||||
|
|
||||||
|
|
||||||
|
class TestParseHostingSettings:
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
("env", "expected"),
|
||||||
|
[
|
||||||
|
pytest.param(
|
||||||
|
{},
|
||||||
|
(
|
||||||
|
None,
|
||||||
|
"/",
|
||||||
|
"/accounts/login/",
|
||||||
|
"/dashboard",
|
||||||
|
"/accounts/login/?loggedout=1",
|
||||||
|
),
|
||||||
|
id="no_env_vars",
|
||||||
|
),
|
||||||
|
pytest.param(
|
||||||
|
{"PAPERLESS_FORCE_SCRIPT_NAME": "/paperless"},
|
||||||
|
(
|
||||||
|
"/paperless",
|
||||||
|
"/paperless/",
|
||||||
|
"/paperless/accounts/login/",
|
||||||
|
"/paperless/dashboard",
|
||||||
|
"/paperless/accounts/login/?loggedout=1",
|
||||||
|
),
|
||||||
|
id="force_script_name_only",
|
||||||
|
),
|
||||||
|
pytest.param(
|
||||||
|
{
|
||||||
|
"PAPERLESS_FORCE_SCRIPT_NAME": "/docs",
|
||||||
|
"PAPERLESS_LOGOUT_REDIRECT_URL": "/custom/logout",
|
||||||
|
},
|
||||||
|
(
|
||||||
|
"/docs",
|
||||||
|
"/docs/",
|
||||||
|
"/docs/accounts/login/",
|
||||||
|
"/docs/dashboard",
|
||||||
|
"/custom/logout",
|
||||||
|
),
|
||||||
|
id="force_script_name_and_logout_redirect",
|
||||||
|
),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_parse_hosting_settings(
|
||||||
|
self,
|
||||||
|
mocker: MockerFixture,
|
||||||
|
env: dict[str, str],
|
||||||
|
expected: tuple[str | None, str, str, str, str],
|
||||||
|
) -> None:
|
||||||
|
"""Test parse_hosting_settings with various env configurations."""
|
||||||
|
mocker.patch.dict(os.environ, env, clear=True)
|
||||||
|
|
||||||
|
result = parse_hosting_settings()
|
||||||
|
|
||||||
|
assert result == expected
|
||||||
|
|
||||||
|
|
||||||
|
def make_expected_schedule(
|
||||||
|
overrides: dict[str, dict[str, Any]] | None = None,
|
||||||
|
disabled: set[str] | None = None,
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Build the expected schedule with optional overrides and disabled tasks.
|
||||||
|
"""
|
||||||
|
|
||||||
|
mail_expire = 9.0 * 60.0
|
||||||
|
classifier_expire = 59.0 * 60.0
|
||||||
|
index_expire = 23.0 * 60.0 * 60.0
|
||||||
|
sanity_expire = ((7.0 * 24.0) - 1.0) * 60.0 * 60.0
|
||||||
|
empty_trash_expire = 23.0 * 60.0 * 60.0
|
||||||
|
workflow_expire = 59.0 * 60.0
|
||||||
|
llm_index_expire = 23.0 * 60.0 * 60.0
|
||||||
|
share_link_cleanup_expire = 23.0 * 60.0 * 60.0
|
||||||
|
|
||||||
|
schedule: dict[str, Any] = {
|
||||||
|
"Check all e-mail accounts": {
|
||||||
|
"task": "paperless_mail.tasks.process_mail_accounts",
|
||||||
|
"schedule": crontab(minute="*/10"),
|
||||||
|
"options": {"expires": mail_expire},
|
||||||
|
},
|
||||||
|
"Train the classifier": {
|
||||||
|
"task": "documents.tasks.train_classifier",
|
||||||
|
"schedule": crontab(minute="5", hour="*/1"),
|
||||||
|
"options": {"expires": classifier_expire},
|
||||||
|
},
|
||||||
|
"Optimize the index": {
|
||||||
|
"task": "documents.tasks.index_optimize",
|
||||||
|
"schedule": crontab(minute=0, hour=0),
|
||||||
|
"options": {"expires": index_expire},
|
||||||
|
},
|
||||||
|
"Perform sanity check": {
|
||||||
|
"task": "documents.tasks.sanity_check",
|
||||||
|
"schedule": crontab(minute=30, hour=0, day_of_week="sun"),
|
||||||
|
"options": {"expires": sanity_expire},
|
||||||
|
},
|
||||||
|
"Empty trash": {
|
||||||
|
"task": "documents.tasks.empty_trash",
|
||||||
|
"schedule": crontab(minute=0, hour="1"),
|
||||||
|
"options": {"expires": empty_trash_expire},
|
||||||
|
},
|
||||||
|
"Check and run scheduled workflows": {
|
||||||
|
"task": "documents.tasks.check_scheduled_workflows",
|
||||||
|
"schedule": crontab(minute="5", hour="*/1"),
|
||||||
|
"options": {"expires": workflow_expire},
|
||||||
|
},
|
||||||
|
"Rebuild LLM index": {
|
||||||
|
"task": "documents.tasks.llmindex_index",
|
||||||
|
"schedule": crontab(minute="10", hour="2"),
|
||||||
|
"options": {"expires": llm_index_expire},
|
||||||
|
},
|
||||||
|
"Cleanup expired share link bundles": {
|
||||||
|
"task": "documents.tasks.cleanup_expired_share_link_bundles",
|
||||||
|
"schedule": crontab(minute=0, hour="2"),
|
||||||
|
"options": {"expires": share_link_cleanup_expire},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
overrides = overrides or {}
|
||||||
|
disabled = disabled or set()
|
||||||
|
|
||||||
|
for key, val in overrides.items():
|
||||||
|
schedule[key] = {**schedule.get(key, {}), **val}
|
||||||
|
|
||||||
|
for key in disabled:
|
||||||
|
schedule.pop(key, None)
|
||||||
|
|
||||||
|
return schedule
|
||||||
|
|
||||||
|
|
||||||
|
class TestParseBeatSchedule:
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
("env", "expected"),
|
||||||
|
[
|
||||||
|
pytest.param({}, make_expected_schedule(), id="defaults"),
|
||||||
|
pytest.param(
|
||||||
|
{"PAPERLESS_EMAIL_TASK_CRON": "*/50 * * * mon"},
|
||||||
|
make_expected_schedule(
|
||||||
|
overrides={
|
||||||
|
"Check all e-mail accounts": {
|
||||||
|
"schedule": crontab(minute="*/50", day_of_week="mon"),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
),
|
||||||
|
id="email-changed",
|
||||||
|
),
|
||||||
|
pytest.param(
|
||||||
|
{"PAPERLESS_INDEX_TASK_CRON": "disable"},
|
||||||
|
make_expected_schedule(disabled={"Optimize the index"}),
|
||||||
|
id="index-disabled",
|
||||||
|
),
|
||||||
|
pytest.param(
|
||||||
|
{
|
||||||
|
"PAPERLESS_EMAIL_TASK_CRON": "disable",
|
||||||
|
"PAPERLESS_TRAIN_TASK_CRON": "disable",
|
||||||
|
"PAPERLESS_SANITY_TASK_CRON": "disable",
|
||||||
|
"PAPERLESS_INDEX_TASK_CRON": "disable",
|
||||||
|
"PAPERLESS_EMPTY_TRASH_TASK_CRON": "disable",
|
||||||
|
"PAPERLESS_WORKFLOW_SCHEDULED_TASK_CRON": "disable",
|
||||||
|
"PAPERLESS_LLM_INDEX_TASK_CRON": "disable",
|
||||||
|
"PAPERLESS_SHARE_LINK_BUNDLE_CLEANUP_CRON": "disable",
|
||||||
|
},
|
||||||
|
{},
|
||||||
|
id="all-disabled",
|
||||||
|
),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_parse_beat_schedule(
|
||||||
|
self,
|
||||||
|
env: dict[str, str],
|
||||||
|
expected: dict[str, Any],
|
||||||
|
mocker: MockerFixture,
|
||||||
|
) -> None:
|
||||||
|
mocker.patch.dict(os.environ, env, clear=False)
|
||||||
|
schedule = parse_beat_schedule()
|
||||||
|
assert schedule == expected
|
||||||
|
|
||||||
|
|
||||||
class TestParseDbSettings:
|
class TestParseDbSettings:
|
||||||
@@ -264,3 +533,85 @@ class TestParseDbSettings:
|
|||||||
settings = parse_db_settings(tmp_path)
|
settings = parse_db_settings(tmp_path)
|
||||||
|
|
||||||
assert settings == expected_database_settings
|
assert settings == expected_database_settings
|
||||||
|
|
||||||
|
|
||||||
|
class TestParseIgnoreDates:
|
||||||
|
"""Tests the parsing of the PAPERLESS_IGNORE_DATES setting value."""
|
||||||
|
|
||||||
|
def test_no_ignore_dates_set(self) -> None:
|
||||||
|
"""
|
||||||
|
GIVEN:
|
||||||
|
- No ignore dates are set
|
||||||
|
THEN:
|
||||||
|
- No ignore dates are parsed
|
||||||
|
"""
|
||||||
|
assert parse_ignore_dates("", "YMD") == set()
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
("env_str", "date_format", "expected"),
|
||||||
|
[
|
||||||
|
pytest.param(
|
||||||
|
"1985-05-01",
|
||||||
|
"YMD",
|
||||||
|
{datetime.date(1985, 5, 1)},
|
||||||
|
id="single-ymd",
|
||||||
|
),
|
||||||
|
pytest.param(
|
||||||
|
"1985-05-01,1991-12-05",
|
||||||
|
"YMD",
|
||||||
|
{datetime.date(1985, 5, 1), datetime.date(1991, 12, 5)},
|
||||||
|
id="multiple-ymd",
|
||||||
|
),
|
||||||
|
pytest.param(
|
||||||
|
"2010-12-13",
|
||||||
|
"YMD",
|
||||||
|
{datetime.date(2010, 12, 13)},
|
||||||
|
id="single-ymd-2",
|
||||||
|
),
|
||||||
|
pytest.param(
|
||||||
|
"11.01.10",
|
||||||
|
"DMY",
|
||||||
|
{datetime.date(2010, 1, 11)},
|
||||||
|
id="single-dmy",
|
||||||
|
),
|
||||||
|
pytest.param(
|
||||||
|
"11.01.2001,15-06-1996",
|
||||||
|
"DMY",
|
||||||
|
{datetime.date(2001, 1, 11), datetime.date(1996, 6, 15)},
|
||||||
|
id="multiple-dmy",
|
||||||
|
),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_ignore_dates_parsed(
|
||||||
|
self,
|
||||||
|
env_str: str,
|
||||||
|
date_format: str,
|
||||||
|
expected: set[datetime.date],
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
GIVEN:
|
||||||
|
- Ignore dates are set per certain inputs
|
||||||
|
THEN:
|
||||||
|
- All ignore dates are parsed
|
||||||
|
"""
|
||||||
|
assert parse_ignore_dates(env_str, date_format) == expected
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
("languages", "expected"),
|
||||||
|
[
|
||||||
|
("de", ["de"]),
|
||||||
|
("zh", ["zh"]),
|
||||||
|
("fr+en", ["fr", "en"]),
|
||||||
|
# Locales must be supported
|
||||||
|
("en-001+fr-CA", ["en-001", "fr-CA"]),
|
||||||
|
("en-001+fr", ["en-001", "fr"]),
|
||||||
|
# Special case for Chinese: variants seem to miss some dates,
|
||||||
|
# so we always add "zh" as a fallback.
|
||||||
|
("en+zh-Hans-HK", ["en", "zh-Hans-HK", "zh"]),
|
||||||
|
("en+zh-Hans", ["en", "zh-Hans", "zh"]),
|
||||||
|
("en+zh-Hans+zh-Hant", ["en", "zh-Hans", "zh-Hant", "zh"]),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_parse_dateparser_languages(languages: str, expected: list[str]) -> None:
|
||||||
|
assert sorted(parse_dateparser_languages(languages)) == sorted(expected)
|
||||||
|
|||||||
@@ -4,8 +4,12 @@ from pathlib import Path
|
|||||||
import pytest
|
import pytest
|
||||||
from pytest_mock import MockerFixture
|
from pytest_mock import MockerFixture
|
||||||
|
|
||||||
|
from paperless.settings.parsers import get_bool_from_env
|
||||||
from paperless.settings.parsers import get_choice_from_env
|
from paperless.settings.parsers import get_choice_from_env
|
||||||
|
from paperless.settings.parsers import get_float_from_env
|
||||||
from paperless.settings.parsers import get_int_from_env
|
from paperless.settings.parsers import get_int_from_env
|
||||||
|
from paperless.settings.parsers import get_list_from_env
|
||||||
|
from paperless.settings.parsers import get_path_from_env
|
||||||
from paperless.settings.parsers import parse_dict_from_str
|
from paperless.settings.parsers import parse_dict_from_str
|
||||||
from paperless.settings.parsers import str_to_bool
|
from paperless.settings.parsers import str_to_bool
|
||||||
|
|
||||||
@@ -205,6 +209,29 @@ class TestParseDictFromString:
|
|||||||
assert isinstance(result["database"]["port"], int)
|
assert isinstance(result["database"]["port"], int)
|
||||||
|
|
||||||
|
|
||||||
|
class TestGetBoolFromEnv:
|
||||||
|
def test_existing_env_var(self, mocker):
|
||||||
|
"""Test that an existing environment variable is read and converted."""
|
||||||
|
mocker.patch.dict(os.environ, {"TEST_VAR": "true"})
|
||||||
|
assert get_bool_from_env("TEST_VAR") is True
|
||||||
|
|
||||||
|
def test_missing_env_var_uses_default_no(self, mocker):
|
||||||
|
"""Test that a missing environment variable uses default 'NO' and returns False."""
|
||||||
|
mocker.patch.dict(os.environ, {}, clear=True)
|
||||||
|
assert get_bool_from_env("MISSING_VAR") is False
|
||||||
|
|
||||||
|
def test_missing_env_var_with_explicit_default(self, mocker):
|
||||||
|
"""Test that a missing environment variable uses the provided default."""
|
||||||
|
mocker.patch.dict(os.environ, {}, clear=True)
|
||||||
|
assert get_bool_from_env("MISSING_VAR", default="yes") is True
|
||||||
|
|
||||||
|
def test_invalid_value_raises_error(self, mocker):
|
||||||
|
"""Test that an invalid value raises ValueError (delegates to str_to_bool)."""
|
||||||
|
mocker.patch.dict(os.environ, {"INVALID_VAR": "maybe"})
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
get_bool_from_env("INVALID_VAR")
|
||||||
|
|
||||||
|
|
||||||
class TestGetIntFromEnv:
|
class TestGetIntFromEnv:
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
("env_value", "expected"),
|
("env_value", "expected"),
|
||||||
@@ -259,6 +286,199 @@ class TestGetIntFromEnv:
|
|||||||
get_int_from_env("INVALID_INT")
|
get_int_from_env("INVALID_INT")
|
||||||
|
|
||||||
|
|
||||||
|
class TestGetFloatFromEnv:
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
("env_value", "expected"),
|
||||||
|
[
|
||||||
|
pytest.param("3.14", 3.14, id="pi"),
|
||||||
|
pytest.param("42", 42.0, id="int_as_float"),
|
||||||
|
pytest.param("-2.5", -2.5, id="negative"),
|
||||||
|
pytest.param("0.0", 0.0, id="zero_float"),
|
||||||
|
pytest.param("0", 0.0, id="zero_int"),
|
||||||
|
pytest.param("1.5e2", 150.0, id="sci_positive"),
|
||||||
|
pytest.param("1e-3", 0.001, id="sci_negative"),
|
||||||
|
pytest.param("-1.23e4", -12300.0, id="sci_large"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_existing_env_var_valid_floats(self, mocker, env_value, expected):
|
||||||
|
"""Test that existing environment variables with valid floats return correct values."""
|
||||||
|
mocker.patch.dict(os.environ, {"FLOAT_VAR": env_value})
|
||||||
|
assert get_float_from_env("FLOAT_VAR") == expected
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
("default", "expected"),
|
||||||
|
[
|
||||||
|
pytest.param(3.14, 3.14, id="pi_default"),
|
||||||
|
pytest.param(0.0, 0.0, id="zero_default"),
|
||||||
|
pytest.param(-2.5, -2.5, id="negative_default"),
|
||||||
|
pytest.param(None, None, id="none_default"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_missing_env_var_with_defaults(self, mocker, default, expected):
|
||||||
|
"""Test that missing environment variables return provided defaults."""
|
||||||
|
mocker.patch.dict(os.environ, {}, clear=True)
|
||||||
|
assert get_float_from_env("MISSING_VAR", default=default) == expected
|
||||||
|
|
||||||
|
def test_missing_env_var_no_default(self, mocker):
|
||||||
|
"""Test that missing environment variable with no default returns None."""
|
||||||
|
mocker.patch.dict(os.environ, {}, clear=True)
|
||||||
|
assert get_float_from_env("MISSING_VAR") is None
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"invalid_value",
|
||||||
|
[
|
||||||
|
pytest.param("not_a_number", id="text"),
|
||||||
|
pytest.param("42.5.0", id="double_decimal"),
|
||||||
|
pytest.param("42a", id="alpha_suffix"),
|
||||||
|
pytest.param("", id="empty"),
|
||||||
|
pytest.param(" ", id="whitespace"),
|
||||||
|
pytest.param("true", id="boolean"),
|
||||||
|
pytest.param("1.2.3", id="triple_decimal"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_invalid_float_values_raise_error(self, mocker, invalid_value):
|
||||||
|
"""Test that invalid float values raise ValueError."""
|
||||||
|
mocker.patch.dict(os.environ, {"INVALID_FLOAT": invalid_value})
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
get_float_from_env("INVALID_FLOAT")
|
||||||
|
|
||||||
|
|
||||||
|
class TestGetPathFromEnv:
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"env_value",
|
||||||
|
[
|
||||||
|
pytest.param("/tmp/test", id="absolute"),
|
||||||
|
pytest.param("relative/path", id="relative"),
|
||||||
|
pytest.param("/path/with spaces/file.txt", id="spaces"),
|
||||||
|
pytest.param(".", id="current_dir"),
|
||||||
|
pytest.param("..", id="parent_dir"),
|
||||||
|
pytest.param("/", id="root"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_existing_env_var_paths(self, mocker, env_value):
|
||||||
|
"""Test that existing environment variables with paths return resolved Path objects."""
|
||||||
|
mocker.patch.dict(os.environ, {"PATH_VAR": env_value})
|
||||||
|
result = get_path_from_env("PATH_VAR")
|
||||||
|
assert isinstance(result, Path)
|
||||||
|
assert result == Path(env_value).resolve()
|
||||||
|
|
||||||
|
def test_missing_env_var_no_default(self, mocker):
|
||||||
|
"""Test that missing environment variable with no default returns None."""
|
||||||
|
mocker.patch.dict(os.environ, {}, clear=True)
|
||||||
|
assert get_path_from_env("MISSING_VAR") is None
|
||||||
|
|
||||||
|
def test_missing_env_var_with_none_default(self, mocker):
|
||||||
|
"""Test that missing environment variable with None default returns None."""
|
||||||
|
mocker.patch.dict(os.environ, {}, clear=True)
|
||||||
|
assert get_path_from_env("MISSING_VAR", default=None) is None
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"default_path_str",
|
||||||
|
[
|
||||||
|
pytest.param("/default/path", id="absolute_default"),
|
||||||
|
pytest.param("relative/default", id="relative_default"),
|
||||||
|
pytest.param(".", id="current_default"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_missing_env_var_with_path_defaults(self, mocker, default_path_str):
|
||||||
|
"""Test that missing environment variables return resolved default Path objects."""
|
||||||
|
mocker.patch.dict(os.environ, {}, clear=True)
|
||||||
|
default_path = Path(default_path_str)
|
||||||
|
result = get_path_from_env("MISSING_VAR", default=default_path)
|
||||||
|
assert isinstance(result, Path)
|
||||||
|
assert result == default_path.resolve()
|
||||||
|
|
||||||
|
def test_relative_paths_are_resolved(self, mocker):
|
||||||
|
"""Test that relative paths are properly resolved to absolute paths."""
|
||||||
|
mocker.patch.dict(os.environ, {"REL_PATH": "relative/path"})
|
||||||
|
result = get_path_from_env("REL_PATH")
|
||||||
|
assert result is not None
|
||||||
|
assert result.is_absolute()
|
||||||
|
|
||||||
|
|
||||||
|
class TestGetListFromEnv:
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
("env_value", "expected"),
|
||||||
|
[
|
||||||
|
pytest.param("a,b,c", ["a", "b", "c"], id="basic_comma_separated"),
|
||||||
|
pytest.param("single", ["single"], id="single_element"),
|
||||||
|
pytest.param("", [], id="empty_string"),
|
||||||
|
pytest.param("a, b , c", ["a", "b", "c"], id="whitespace_trimmed"),
|
||||||
|
pytest.param("a,,b,c", ["a", "b", "c"], id="empty_elements_removed"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_existing_env_var_basic_parsing(self, mocker, env_value, expected):
|
||||||
|
"""Test that existing environment variables are parsed correctly."""
|
||||||
|
mocker.patch.dict(os.environ, {"LIST_VAR": env_value})
|
||||||
|
result = get_list_from_env("LIST_VAR")
|
||||||
|
assert result == expected
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
("separator", "env_value", "expected"),
|
||||||
|
[
|
||||||
|
pytest.param("|", "a|b|c", ["a", "b", "c"], id="pipe_separator"),
|
||||||
|
pytest.param(":", "a:b:c", ["a", "b", "c"], id="colon_separator"),
|
||||||
|
pytest.param(";", "a;b;c", ["a", "b", "c"], id="semicolon_separator"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_custom_separators(self, mocker, separator, env_value, expected):
|
||||||
|
"""Test that custom separators work correctly."""
|
||||||
|
mocker.patch.dict(os.environ, {"LIST_VAR": env_value})
|
||||||
|
result = get_list_from_env("LIST_VAR", separator=separator)
|
||||||
|
assert result == expected
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
("default", "expected"),
|
||||||
|
[
|
||||||
|
pytest.param(
|
||||||
|
["default1", "default2"],
|
||||||
|
["default1", "default2"],
|
||||||
|
id="string_list_default",
|
||||||
|
),
|
||||||
|
pytest.param([1, 2, 3], [1, 2, 3], id="int_list_default"),
|
||||||
|
pytest.param(None, [], id="none_default_returns_empty_list"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_missing_env_var_with_defaults(self, mocker, default, expected):
|
||||||
|
"""Test that missing environment variables return provided defaults."""
|
||||||
|
mocker.patch.dict(os.environ, {}, clear=True)
|
||||||
|
result = get_list_from_env("MISSING_VAR", default=default)
|
||||||
|
assert result == expected
|
||||||
|
|
||||||
|
def test_missing_env_var_no_default(self, mocker):
|
||||||
|
"""Test that missing environment variable with no default returns empty list."""
|
||||||
|
mocker.patch.dict(os.environ, {}, clear=True)
|
||||||
|
result = get_list_from_env("MISSING_VAR")
|
||||||
|
assert result == []
|
||||||
|
|
||||||
|
def test_required_env_var_missing_raises_error(self, mocker):
|
||||||
|
"""Test that missing required environment variable raises ValueError."""
|
||||||
|
mocker.patch.dict(os.environ, {}, clear=True)
|
||||||
|
with pytest.raises(
|
||||||
|
ValueError,
|
||||||
|
match="Required environment variable 'REQUIRED_VAR' is not set",
|
||||||
|
):
|
||||||
|
get_list_from_env("REQUIRED_VAR", required=True)
|
||||||
|
|
||||||
|
def test_required_env_var_with_default_does_not_raise(self, mocker):
|
||||||
|
"""Test that required environment variable with default does not raise error."""
|
||||||
|
mocker.patch.dict(os.environ, {}, clear=True)
|
||||||
|
result = get_list_from_env("REQUIRED_VAR", default=["default"], required=True)
|
||||||
|
assert result == ["default"]
|
||||||
|
|
||||||
|
def test_strip_whitespace_false(self, mocker):
|
||||||
|
"""Test that whitespace is preserved when strip_whitespace=False."""
|
||||||
|
mocker.patch.dict(os.environ, {"LIST_VAR": " a , b , c "})
|
||||||
|
result = get_list_from_env("LIST_VAR", strip_whitespace=False)
|
||||||
|
assert result == [" a ", " b ", " c "]
|
||||||
|
|
||||||
|
def test_remove_empty_false(self, mocker):
|
||||||
|
"""Test that empty elements are preserved when remove_empty=False."""
|
||||||
|
mocker.patch.dict(os.environ, {"LIST_VAR": "a,,b,,c"})
|
||||||
|
result = get_list_from_env("LIST_VAR", remove_empty=False)
|
||||||
|
assert result == ["a", "", "b", "", "c"]
|
||||||
|
|
||||||
|
|
||||||
class TestGetEnvChoice:
|
class TestGetEnvChoice:
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def valid_choices(self) -> set[str]:
|
def valid_choices(self) -> set[str]:
|
||||||
@@ -394,21 +614,3 @@ class TestGetEnvChoice:
|
|||||||
result = get_choice_from_env("TEST_ENV", large_choices)
|
result = get_choice_from_env("TEST_ENV", large_choices)
|
||||||
|
|
||||||
assert result == "option_50"
|
assert result == "option_50"
|
||||||
|
|
||||||
def test_different_env_keys(
|
|
||||||
self,
|
|
||||||
mocker: MockerFixture,
|
|
||||||
valid_choices: set[str],
|
|
||||||
) -> None:
|
|
||||||
"""Test function works with different environment variable keys."""
|
|
||||||
test_cases = [
|
|
||||||
("DJANGO_ENV", "development"),
|
|
||||||
("DATABASE_BACKEND", "staging"),
|
|
||||||
("LOG_LEVEL", "production"),
|
|
||||||
("APP_MODE", "development"),
|
|
||||||
]
|
|
||||||
|
|
||||||
for env_key, env_value in test_cases:
|
|
||||||
mocker.patch.dict("os.environ", {env_key: env_value})
|
|
||||||
result = get_choice_from_env(env_key, valid_choices)
|
|
||||||
assert result == env_value
|
|
||||||
|
|||||||
56
src/paperless/tests/settings/test_settings.py
Normal file
56
src/paperless/tests/settings/test_settings.py
Normal file
@@ -0,0 +1,56 @@
|
|||||||
|
import os
|
||||||
|
from unittest import TestCase
|
||||||
|
from unittest import mock
|
||||||
|
|
||||||
|
from paperless.settings import _parse_paperless_url
|
||||||
|
from paperless.settings import default_threads_per_worker
|
||||||
|
|
||||||
|
|
||||||
|
class TestThreadCalculation(TestCase):
|
||||||
|
def test_workers_threads(self) -> None:
|
||||||
|
"""
|
||||||
|
GIVEN:
|
||||||
|
- Certain CPU counts
|
||||||
|
WHEN:
|
||||||
|
- Threads per worker is calculated
|
||||||
|
THEN:
|
||||||
|
- Threads per worker less than or equal to CPU count
|
||||||
|
- At least 1 thread per worker
|
||||||
|
"""
|
||||||
|
default_workers = 1
|
||||||
|
|
||||||
|
for i in range(1, 64):
|
||||||
|
with mock.patch(
|
||||||
|
"paperless.settings.multiprocessing.cpu_count",
|
||||||
|
) as cpu_count:
|
||||||
|
cpu_count.return_value = i
|
||||||
|
|
||||||
|
default_threads = default_threads_per_worker(default_workers)
|
||||||
|
|
||||||
|
self.assertGreaterEqual(default_threads, 1)
|
||||||
|
|
||||||
|
self.assertLessEqual(default_workers * default_threads, i)
|
||||||
|
|
||||||
|
|
||||||
|
class TestPaperlessURLSettings(TestCase):
|
||||||
|
def test_paperless_url(self) -> None:
|
||||||
|
"""
|
||||||
|
GIVEN:
|
||||||
|
- PAPERLESS_URL is set
|
||||||
|
WHEN:
|
||||||
|
- The URL is parsed
|
||||||
|
THEN:
|
||||||
|
- The URL is returned and present in related settings
|
||||||
|
"""
|
||||||
|
with mock.patch.dict(
|
||||||
|
os.environ,
|
||||||
|
{
|
||||||
|
"PAPERLESS_URL": "https://example.com",
|
||||||
|
},
|
||||||
|
):
|
||||||
|
url = _parse_paperless_url()
|
||||||
|
self.assertEqual("https://example.com", url)
|
||||||
|
from django.conf import settings
|
||||||
|
|
||||||
|
self.assertIn(url, settings.CSRF_TRUSTED_ORIGINS)
|
||||||
|
self.assertIn(url, settings.CORS_ALLOWED_ORIGINS)
|
||||||
@@ -1,107 +1,100 @@
|
|||||||
from unittest import mock
|
import logging
|
||||||
|
|
||||||
|
import pytest
|
||||||
from allauth.account.adapter import get_adapter
|
from allauth.account.adapter import get_adapter
|
||||||
from allauth.core import context
|
from allauth.core import context
|
||||||
from allauth.socialaccount.adapter import get_adapter as get_social_adapter
|
from allauth.socialaccount.adapter import get_adapter as get_social_adapter
|
||||||
from django.conf import settings
|
|
||||||
from django.contrib.auth.models import AnonymousUser
|
from django.contrib.auth.models import AnonymousUser
|
||||||
from django.contrib.auth.models import Group
|
from django.contrib.auth.models import Group
|
||||||
from django.contrib.auth.models import User
|
from django.contrib.auth.models import User
|
||||||
from django.forms import ValidationError
|
from django.forms import ValidationError
|
||||||
from django.http import HttpRequest
|
from django.http import HttpRequest
|
||||||
from django.test import TestCase
|
|
||||||
from django.test import override_settings
|
|
||||||
from django.urls import reverse
|
from django.urls import reverse
|
||||||
|
from pytest_django.fixtures import SettingsWrapper
|
||||||
|
from pytest_mock import MockerFixture
|
||||||
from rest_framework.authtoken.models import Token
|
from rest_framework.authtoken.models import Token
|
||||||
|
|
||||||
from paperless.adapter import DrfTokenStrategy
|
from paperless.adapter import DrfTokenStrategy
|
||||||
|
|
||||||
|
|
||||||
class TestCustomAccountAdapter(TestCase):
|
@pytest.mark.django_db
|
||||||
def test_is_open_for_signup(self) -> None:
|
class TestCustomAccountAdapter:
|
||||||
|
def test_is_open_for_signup(self, settings: SettingsWrapper) -> None:
|
||||||
adapter = get_adapter()
|
adapter = get_adapter()
|
||||||
|
|
||||||
# With no accounts, signups should be allowed
|
# With no accounts, signups should be allowed
|
||||||
self.assertTrue(adapter.is_open_for_signup(None))
|
assert adapter.is_open_for_signup(None)
|
||||||
|
|
||||||
User.objects.create_user("testuser")
|
User.objects.create_user("testuser")
|
||||||
|
|
||||||
# Test when ACCOUNT_ALLOW_SIGNUPS is True
|
|
||||||
settings.ACCOUNT_ALLOW_SIGNUPS = True
|
settings.ACCOUNT_ALLOW_SIGNUPS = True
|
||||||
self.assertTrue(adapter.is_open_for_signup(None))
|
assert adapter.is_open_for_signup(None)
|
||||||
|
|
||||||
# Test when ACCOUNT_ALLOW_SIGNUPS is False
|
|
||||||
settings.ACCOUNT_ALLOW_SIGNUPS = False
|
settings.ACCOUNT_ALLOW_SIGNUPS = False
|
||||||
self.assertFalse(adapter.is_open_for_signup(None))
|
assert not adapter.is_open_for_signup(None)
|
||||||
|
|
||||||
def test_is_safe_url(self) -> None:
|
def test_is_safe_url(self, settings: SettingsWrapper) -> None:
|
||||||
request = HttpRequest()
|
request = HttpRequest()
|
||||||
request.get_host = mock.Mock(return_value="example.com")
|
request.get_host = lambda: "example.com"
|
||||||
with context.request_context(request):
|
with context.request_context(request):
|
||||||
adapter = get_adapter()
|
adapter = get_adapter()
|
||||||
with override_settings(ALLOWED_HOSTS=["*"]):
|
|
||||||
# True because request host is same
|
|
||||||
url = "https://example.com"
|
|
||||||
self.assertTrue(adapter.is_safe_url(url))
|
|
||||||
|
|
||||||
url = "https://evil.com"
|
settings.ALLOWED_HOSTS = ["*"]
|
||||||
|
# True because request host is same
|
||||||
|
assert adapter.is_safe_url("https://example.com")
|
||||||
# False despite wildcard because request host is different
|
# False despite wildcard because request host is different
|
||||||
self.assertFalse(adapter.is_safe_url(url))
|
assert not adapter.is_safe_url("https://evil.com")
|
||||||
|
|
||||||
settings.ALLOWED_HOSTS = ["example.com"]
|
settings.ALLOWED_HOSTS = ["example.com"]
|
||||||
url = "https://example.com"
|
|
||||||
# True because request host is same
|
# True because request host is same
|
||||||
self.assertTrue(adapter.is_safe_url(url))
|
assert adapter.is_safe_url("https://example.com")
|
||||||
|
|
||||||
settings.ALLOWED_HOSTS = ["*", "example.com"]
|
settings.ALLOWED_HOSTS = ["*", "example.com"]
|
||||||
url = "//evil.com"
|
|
||||||
# False because request host is not in allowed hosts
|
# False because request host is not in allowed hosts
|
||||||
self.assertFalse(adapter.is_safe_url(url))
|
assert not adapter.is_safe_url("//evil.com")
|
||||||
|
|
||||||
@mock.patch("allauth.core.internal.ratelimit.consume", return_value=True)
|
def test_pre_authenticate(
|
||||||
def test_pre_authenticate(self, mock_consume) -> None:
|
self,
|
||||||
|
settings: SettingsWrapper,
|
||||||
|
mocker: MockerFixture,
|
||||||
|
) -> None:
|
||||||
|
mocker.patch("allauth.core.internal.ratelimit.consume", return_value=True)
|
||||||
adapter = get_adapter()
|
adapter = get_adapter()
|
||||||
request = HttpRequest()
|
request = HttpRequest()
|
||||||
request.get_host = mock.Mock(return_value="example.com")
|
request.get_host = lambda: "example.com"
|
||||||
|
|
||||||
settings.DISABLE_REGULAR_LOGIN = False
|
settings.DISABLE_REGULAR_LOGIN = False
|
||||||
adapter.pre_authenticate(request)
|
adapter.pre_authenticate(request)
|
||||||
|
|
||||||
settings.DISABLE_REGULAR_LOGIN = True
|
settings.DISABLE_REGULAR_LOGIN = True
|
||||||
with self.assertRaises(ValidationError):
|
with pytest.raises(ValidationError):
|
||||||
adapter.pre_authenticate(request)
|
adapter.pre_authenticate(request)
|
||||||
|
|
||||||
def test_get_reset_password_from_key_url(self) -> None:
|
def test_get_reset_password_from_key_url(self, settings: SettingsWrapper) -> None:
|
||||||
request = HttpRequest()
|
request = HttpRequest()
|
||||||
request.get_host = mock.Mock(return_value="foo.org")
|
request.get_host = lambda: "foo.org"
|
||||||
with context.request_context(request):
|
with context.request_context(request):
|
||||||
adapter = get_adapter()
|
adapter = get_adapter()
|
||||||
|
|
||||||
# Test when PAPERLESS_URL is None
|
settings.PAPERLESS_URL = None
|
||||||
with override_settings(
|
settings.ACCOUNT_DEFAULT_HTTP_PROTOCOL = "https"
|
||||||
PAPERLESS_URL=None,
|
expected_url = f"https://foo.org{reverse('account_reset_password_from_key', kwargs={'uidb36': 'UID', 'key': 'KEY'})}"
|
||||||
ACCOUNT_DEFAULT_HTTP_PROTOCOL="https",
|
assert adapter.get_reset_password_from_key_url("UID-KEY") == expected_url
|
||||||
):
|
|
||||||
expected_url = f"https://foo.org{reverse('account_reset_password_from_key', kwargs={'uidb36': 'UID', 'key': 'KEY'})}"
|
|
||||||
self.assertEqual(
|
|
||||||
adapter.get_reset_password_from_key_url("UID-KEY"),
|
|
||||||
expected_url,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Test when PAPERLESS_URL is not None
|
settings.PAPERLESS_URL = "https://bar.com"
|
||||||
with override_settings(PAPERLESS_URL="https://bar.com"):
|
expected_url = f"https://bar.com{reverse('account_reset_password_from_key', kwargs={'uidb36': 'UID', 'key': 'KEY'})}"
|
||||||
expected_url = f"https://bar.com{reverse('account_reset_password_from_key', kwargs={'uidb36': 'UID', 'key': 'KEY'})}"
|
assert adapter.get_reset_password_from_key_url("UID-KEY") == expected_url
|
||||||
self.assertEqual(
|
|
||||||
adapter.get_reset_password_from_key_url("UID-KEY"),
|
|
||||||
expected_url,
|
|
||||||
)
|
|
||||||
|
|
||||||
@override_settings(ACCOUNT_DEFAULT_GROUPS=["group1", "group2"])
|
def test_save_user_adds_groups(
|
||||||
def test_save_user_adds_groups(self) -> None:
|
self,
|
||||||
|
settings: SettingsWrapper,
|
||||||
|
mocker: MockerFixture,
|
||||||
|
) -> None:
|
||||||
|
settings.ACCOUNT_DEFAULT_GROUPS = ["group1", "group2"]
|
||||||
Group.objects.create(name="group1")
|
Group.objects.create(name="group1")
|
||||||
user = User.objects.create_user("testuser")
|
user = User.objects.create_user("testuser")
|
||||||
adapter = get_adapter()
|
adapter = get_adapter()
|
||||||
form = mock.Mock(
|
form = mocker.MagicMock(
|
||||||
cleaned_data={
|
cleaned_data={
|
||||||
"username": "testuser",
|
"username": "testuser",
|
||||||
"email": "user@example.com",
|
"email": "user@example.com",
|
||||||
@@ -110,88 +103,81 @@ class TestCustomAccountAdapter(TestCase):
|
|||||||
|
|
||||||
user = adapter.save_user(HttpRequest(), user, form, commit=True)
|
user = adapter.save_user(HttpRequest(), user, form, commit=True)
|
||||||
|
|
||||||
self.assertEqual(user.groups.count(), 1)
|
assert user.groups.count() == 1
|
||||||
self.assertTrue(user.groups.filter(name="group1").exists())
|
assert user.groups.filter(name="group1").exists()
|
||||||
self.assertFalse(user.groups.filter(name="group2").exists())
|
assert not user.groups.filter(name="group2").exists()
|
||||||
|
|
||||||
def test_fresh_install_save_creates_superuser(self) -> None:
|
def test_fresh_install_save_creates_superuser(self, mocker: MockerFixture) -> None:
|
||||||
adapter = get_adapter()
|
adapter = get_adapter()
|
||||||
form = mock.Mock(
|
form = mocker.MagicMock(
|
||||||
cleaned_data={
|
cleaned_data={
|
||||||
"username": "testuser",
|
"username": "testuser",
|
||||||
"email": "user@paperless-ngx.com",
|
"email": "user@paperless-ngx.com",
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
user = adapter.save_user(HttpRequest(), User(), form, commit=True)
|
user = adapter.save_user(HttpRequest(), User(), form, commit=True)
|
||||||
self.assertTrue(user.is_superuser)
|
assert user.is_superuser
|
||||||
|
|
||||||
# Next time, it should not create a superuser
|
form = mocker.MagicMock(
|
||||||
form = mock.Mock(
|
|
||||||
cleaned_data={
|
cleaned_data={
|
||||||
"username": "testuser2",
|
"username": "testuser2",
|
||||||
"email": "user2@paperless-ngx.com",
|
"email": "user2@paperless-ngx.com",
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
user2 = adapter.save_user(HttpRequest(), User(), form, commit=True)
|
user2 = adapter.save_user(HttpRequest(), User(), form, commit=True)
|
||||||
self.assertFalse(user2.is_superuser)
|
assert not user2.is_superuser
|
||||||
|
|
||||||
|
|
||||||
class TestCustomSocialAccountAdapter(TestCase):
|
class TestCustomSocialAccountAdapter:
|
||||||
def test_is_open_for_signup(self) -> None:
|
@pytest.mark.django_db
|
||||||
|
def test_is_open_for_signup(self, settings: SettingsWrapper) -> None:
|
||||||
adapter = get_social_adapter()
|
adapter = get_social_adapter()
|
||||||
|
|
||||||
# Test when SOCIALACCOUNT_ALLOW_SIGNUPS is True
|
|
||||||
settings.SOCIALACCOUNT_ALLOW_SIGNUPS = True
|
settings.SOCIALACCOUNT_ALLOW_SIGNUPS = True
|
||||||
self.assertTrue(adapter.is_open_for_signup(None, None))
|
assert adapter.is_open_for_signup(None, None)
|
||||||
|
|
||||||
# Test when SOCIALACCOUNT_ALLOW_SIGNUPS is False
|
|
||||||
settings.SOCIALACCOUNT_ALLOW_SIGNUPS = False
|
settings.SOCIALACCOUNT_ALLOW_SIGNUPS = False
|
||||||
self.assertFalse(adapter.is_open_for_signup(None, None))
|
assert not adapter.is_open_for_signup(None, None)
|
||||||
|
|
||||||
def test_get_connect_redirect_url(self) -> None:
|
def test_get_connect_redirect_url(self) -> None:
|
||||||
adapter = get_social_adapter()
|
adapter = get_social_adapter()
|
||||||
request = None
|
assert adapter.get_connect_redirect_url(None, None) == reverse("base")
|
||||||
socialaccount = None
|
|
||||||
|
|
||||||
# Test the default URL
|
@pytest.mark.django_db
|
||||||
expected_url = reverse("base")
|
def test_save_user_adds_groups(
|
||||||
self.assertEqual(
|
self,
|
||||||
adapter.get_connect_redirect_url(request, socialaccount),
|
settings: SettingsWrapper,
|
||||||
expected_url,
|
mocker: MockerFixture,
|
||||||
)
|
) -> None:
|
||||||
|
settings.SOCIAL_ACCOUNT_DEFAULT_GROUPS = ["group1", "group2"]
|
||||||
@override_settings(SOCIAL_ACCOUNT_DEFAULT_GROUPS=["group1", "group2"])
|
|
||||||
def test_save_user_adds_groups(self) -> None:
|
|
||||||
Group.objects.create(name="group1")
|
Group.objects.create(name="group1")
|
||||||
adapter = get_social_adapter()
|
adapter = get_social_adapter()
|
||||||
request = HttpRequest()
|
|
||||||
user = User.objects.create_user("testuser")
|
user = User.objects.create_user("testuser")
|
||||||
sociallogin = mock.Mock(
|
sociallogin = mocker.MagicMock(user=user)
|
||||||
user=user,
|
|
||||||
)
|
|
||||||
|
|
||||||
user = adapter.save_user(request, sociallogin, None)
|
user = adapter.save_user(HttpRequest(), sociallogin, None)
|
||||||
|
|
||||||
self.assertEqual(user.groups.count(), 1)
|
assert user.groups.count() == 1
|
||||||
self.assertTrue(user.groups.filter(name="group1").exists())
|
assert user.groups.filter(name="group1").exists()
|
||||||
self.assertFalse(user.groups.filter(name="group2").exists())
|
assert not user.groups.filter(name="group2").exists()
|
||||||
|
|
||||||
def test_error_logged_on_authentication_error(self) -> None:
|
def test_error_logged_on_authentication_error(
|
||||||
|
self,
|
||||||
|
caplog: pytest.LogCaptureFixture,
|
||||||
|
) -> None:
|
||||||
adapter = get_social_adapter()
|
adapter = get_social_adapter()
|
||||||
request = HttpRequest()
|
with caplog.at_level(logging.INFO, logger="paperless.auth"):
|
||||||
with self.assertLogs("paperless.auth", level="INFO") as log_cm:
|
|
||||||
adapter.on_authentication_error(
|
adapter.on_authentication_error(
|
||||||
request,
|
HttpRequest(),
|
||||||
provider="test-provider",
|
provider="test-provider",
|
||||||
error="Error",
|
error="Error",
|
||||||
exception="Test authentication error",
|
exception="Test authentication error",
|
||||||
)
|
)
|
||||||
self.assertTrue(
|
assert any("Test authentication error" in msg for msg in caplog.messages)
|
||||||
any("Test authentication error" in message for message in log_cm.output),
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class TestDrfTokenStrategy(TestCase):
|
@pytest.mark.django_db
|
||||||
|
class TestDrfTokenStrategy:
|
||||||
def test_create_access_token_creates_new_token(self) -> None:
|
def test_create_access_token_creates_new_token(self) -> None:
|
||||||
"""
|
"""
|
||||||
GIVEN:
|
GIVEN:
|
||||||
@@ -201,7 +187,6 @@ class TestDrfTokenStrategy(TestCase):
|
|||||||
THEN:
|
THEN:
|
||||||
- A new token is created and its key is returned
|
- A new token is created and its key is returned
|
||||||
"""
|
"""
|
||||||
|
|
||||||
user = User.objects.create_user("testuser")
|
user = User.objects.create_user("testuser")
|
||||||
request = HttpRequest()
|
request = HttpRequest()
|
||||||
request.user = user
|
request.user = user
|
||||||
@@ -209,13 +194,9 @@ class TestDrfTokenStrategy(TestCase):
|
|||||||
strategy = DrfTokenStrategy()
|
strategy = DrfTokenStrategy()
|
||||||
token_key = strategy.create_access_token(request)
|
token_key = strategy.create_access_token(request)
|
||||||
|
|
||||||
# Verify a token was created
|
assert token_key is not None
|
||||||
self.assertIsNotNone(token_key)
|
assert Token.objects.filter(user=user).exists()
|
||||||
self.assertTrue(Token.objects.filter(user=user).exists())
|
assert token_key == Token.objects.get(user=user).key
|
||||||
|
|
||||||
# Verify the returned key matches the created token
|
|
||||||
token = Token.objects.get(user=user)
|
|
||||||
self.assertEqual(token_key, token.key)
|
|
||||||
|
|
||||||
def test_create_access_token_returns_existing_token(self) -> None:
|
def test_create_access_token_returns_existing_token(self) -> None:
|
||||||
"""
|
"""
|
||||||
@@ -226,7 +207,6 @@ class TestDrfTokenStrategy(TestCase):
|
|||||||
THEN:
|
THEN:
|
||||||
- The same token key is returned (no new token created)
|
- The same token key is returned (no new token created)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
user = User.objects.create_user("testuser")
|
user = User.objects.create_user("testuser")
|
||||||
existing_token = Token.objects.create(user=user)
|
existing_token = Token.objects.create(user=user)
|
||||||
|
|
||||||
@@ -236,11 +216,8 @@ class TestDrfTokenStrategy(TestCase):
|
|||||||
strategy = DrfTokenStrategy()
|
strategy = DrfTokenStrategy()
|
||||||
token_key = strategy.create_access_token(request)
|
token_key = strategy.create_access_token(request)
|
||||||
|
|
||||||
# Verify the existing token key is returned
|
assert token_key == existing_token.key
|
||||||
self.assertEqual(token_key, existing_token.key)
|
assert Token.objects.filter(user=user).count() == 1
|
||||||
|
|
||||||
# Verify only one token exists (no duplicate created)
|
|
||||||
self.assertEqual(Token.objects.filter(user=user).count(), 1)
|
|
||||||
|
|
||||||
def test_create_access_token_returns_none_for_unauthenticated_user(self) -> None:
|
def test_create_access_token_returns_none_for_unauthenticated_user(self) -> None:
|
||||||
"""
|
"""
|
||||||
@@ -251,12 +228,11 @@ class TestDrfTokenStrategy(TestCase):
|
|||||||
THEN:
|
THEN:
|
||||||
- None is returned and no token is created
|
- None is returned and no token is created
|
||||||
"""
|
"""
|
||||||
|
|
||||||
request = HttpRequest()
|
request = HttpRequest()
|
||||||
request.user = AnonymousUser()
|
request.user = AnonymousUser()
|
||||||
|
|
||||||
strategy = DrfTokenStrategy()
|
strategy = DrfTokenStrategy()
|
||||||
token_key = strategy.create_access_token(request)
|
token_key = strategy.create_access_token(request)
|
||||||
|
|
||||||
self.assertIsNone(token_key)
|
assert token_key is None
|
||||||
self.assertEqual(Token.objects.count(), 0)
|
assert Token.objects.count() == 0
|
||||||
|
|||||||
@@ -1,16 +1,15 @@
|
|||||||
import os
|
import os
|
||||||
|
from collections.abc import Callable
|
||||||
|
from dataclasses import dataclass
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from unittest import mock
|
from unittest import mock
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from django.core.checks import Error
|
from django.core.checks import Error
|
||||||
from django.core.checks import Warning
|
from django.core.checks import Warning
|
||||||
from django.test import TestCase
|
from pytest_django.fixtures import SettingsWrapper
|
||||||
from django.test import override_settings
|
|
||||||
from pytest_mock import MockerFixture
|
from pytest_mock import MockerFixture
|
||||||
|
|
||||||
from documents.tests.utils import DirectoriesMixin
|
|
||||||
from documents.tests.utils import FileSystemAssertsMixin
|
|
||||||
from paperless.checks import audit_log_check
|
from paperless.checks import audit_log_check
|
||||||
from paperless.checks import binaries_check
|
from paperless.checks import binaries_check
|
||||||
from paperless.checks import check_deprecated_db_settings
|
from paperless.checks import check_deprecated_db_settings
|
||||||
@@ -20,54 +19,84 @@ from paperless.checks import paths_check
|
|||||||
from paperless.checks import settings_values_check
|
from paperless.checks import settings_values_check
|
||||||
|
|
||||||
|
|
||||||
class TestChecks(DirectoriesMixin, TestCase):
|
@dataclass(frozen=True, slots=True)
|
||||||
def test_binaries(self) -> None:
|
class PaperlessTestDirs:
|
||||||
self.assertEqual(binaries_check(None), [])
|
data_dir: Path
|
||||||
|
media_dir: Path
|
||||||
|
consumption_dir: Path
|
||||||
|
|
||||||
@override_settings(CONVERT_BINARY="uuuhh")
|
|
||||||
def test_binaries_fail(self) -> None:
|
|
||||||
self.assertEqual(len(binaries_check(None)), 1)
|
|
||||||
|
|
||||||
def test_paths_check(self) -> None:
|
# TODO: consolidate with documents/tests/conftest.py PaperlessDirs/paperless_dirs
|
||||||
self.assertEqual(paths_check(None), [])
|
# once the paperless and documents test suites are ready to share fixtures.
|
||||||
|
@pytest.fixture()
|
||||||
|
def directories(tmp_path: Path, settings: SettingsWrapper) -> PaperlessTestDirs:
|
||||||
|
data_dir = tmp_path / "data"
|
||||||
|
media_dir = tmp_path / "media"
|
||||||
|
consumption_dir = tmp_path / "consumption"
|
||||||
|
|
||||||
@override_settings(
|
for d in (data_dir, media_dir, consumption_dir):
|
||||||
MEDIA_ROOT=Path("uuh"),
|
d.mkdir()
|
||||||
DATA_DIR=Path("whatever"),
|
|
||||||
CONSUMPTION_DIR=Path("idontcare"),
|
settings.DATA_DIR = data_dir
|
||||||
|
settings.MEDIA_ROOT = media_dir
|
||||||
|
settings.CONSUMPTION_DIR = consumption_dir
|
||||||
|
|
||||||
|
return PaperlessTestDirs(
|
||||||
|
data_dir=data_dir,
|
||||||
|
media_dir=media_dir,
|
||||||
|
consumption_dir=consumption_dir,
|
||||||
)
|
)
|
||||||
def test_paths_check_dont_exist(self) -> None:
|
|
||||||
msgs = paths_check(None)
|
|
||||||
self.assertEqual(len(msgs), 3, str(msgs))
|
|
||||||
|
|
||||||
for msg in msgs:
|
|
||||||
self.assertTrue(msg.msg.endswith("is set but doesn't exist."))
|
|
||||||
|
|
||||||
def test_paths_check_no_access(self) -> None:
|
class TestChecks:
|
||||||
Path(self.dirs.data_dir).chmod(0o000)
|
def test_binaries(self) -> None:
|
||||||
Path(self.dirs.media_dir).chmod(0o000)
|
assert binaries_check(None) == []
|
||||||
Path(self.dirs.consumption_dir).chmod(0o000)
|
|
||||||
|
|
||||||
self.addCleanup(os.chmod, self.dirs.data_dir, 0o777)
|
def test_binaries_fail(self, settings: SettingsWrapper) -> None:
|
||||||
self.addCleanup(os.chmod, self.dirs.media_dir, 0o777)
|
settings.CONVERT_BINARY = "uuuhh"
|
||||||
self.addCleanup(os.chmod, self.dirs.consumption_dir, 0o777)
|
assert len(binaries_check(None)) == 1
|
||||||
|
|
||||||
|
@pytest.mark.usefixtures("directories")
|
||||||
|
def test_paths_check(self) -> None:
|
||||||
|
assert paths_check(None) == []
|
||||||
|
|
||||||
|
def test_paths_check_dont_exist(self, settings: SettingsWrapper) -> None:
|
||||||
|
settings.MEDIA_ROOT = Path("uuh")
|
||||||
|
settings.DATA_DIR = Path("whatever")
|
||||||
|
settings.CONSUMPTION_DIR = Path("idontcare")
|
||||||
|
|
||||||
msgs = paths_check(None)
|
msgs = paths_check(None)
|
||||||
self.assertEqual(len(msgs), 3)
|
|
||||||
|
|
||||||
|
assert len(msgs) == 3, str(msgs)
|
||||||
for msg in msgs:
|
for msg in msgs:
|
||||||
self.assertTrue(msg.msg.endswith("is not writeable"))
|
assert msg.msg.endswith("is set but doesn't exist.")
|
||||||
|
|
||||||
@override_settings(DEBUG=False)
|
def test_paths_check_no_access(self, directories: PaperlessTestDirs) -> None:
|
||||||
def test_debug_disabled(self) -> None:
|
directories.data_dir.chmod(0o000)
|
||||||
self.assertEqual(debug_mode_check(None), [])
|
directories.media_dir.chmod(0o000)
|
||||||
|
directories.consumption_dir.chmod(0o000)
|
||||||
|
|
||||||
@override_settings(DEBUG=True)
|
try:
|
||||||
def test_debug_enabled(self) -> None:
|
msgs = paths_check(None)
|
||||||
self.assertEqual(len(debug_mode_check(None)), 1)
|
finally:
|
||||||
|
directories.data_dir.chmod(0o777)
|
||||||
|
directories.media_dir.chmod(0o777)
|
||||||
|
directories.consumption_dir.chmod(0o777)
|
||||||
|
|
||||||
|
assert len(msgs) == 3
|
||||||
|
for msg in msgs:
|
||||||
|
assert msg.msg.endswith("is not writeable")
|
||||||
|
|
||||||
|
def test_debug_disabled(self, settings: SettingsWrapper) -> None:
|
||||||
|
settings.DEBUG = False
|
||||||
|
assert debug_mode_check(None) == []
|
||||||
|
|
||||||
|
def test_debug_enabled(self, settings: SettingsWrapper) -> None:
|
||||||
|
settings.DEBUG = True
|
||||||
|
assert len(debug_mode_check(None)) == 1
|
||||||
|
|
||||||
|
|
||||||
class TestSettingsChecksAgainstDefaults(DirectoriesMixin, TestCase):
|
class TestSettingsChecksAgainstDefaults:
|
||||||
def test_all_valid(self) -> None:
|
def test_all_valid(self) -> None:
|
||||||
"""
|
"""
|
||||||
GIVEN:
|
GIVEN:
|
||||||
@@ -78,104 +107,71 @@ class TestSettingsChecksAgainstDefaults(DirectoriesMixin, TestCase):
|
|||||||
- No system check errors reported
|
- No system check errors reported
|
||||||
"""
|
"""
|
||||||
msgs = settings_values_check(None)
|
msgs = settings_values_check(None)
|
||||||
self.assertEqual(len(msgs), 0)
|
assert len(msgs) == 0
|
||||||
|
|
||||||
|
|
||||||
class TestOcrSettingsChecks(DirectoriesMixin, TestCase):
|
class TestOcrSettingsChecks:
|
||||||
@override_settings(OCR_OUTPUT_TYPE="notapdf")
|
@pytest.mark.parametrize(
|
||||||
def test_invalid_output_type(self) -> None:
|
("setting", "value", "expected_msg"),
|
||||||
|
[
|
||||||
|
pytest.param(
|
||||||
|
"OCR_OUTPUT_TYPE",
|
||||||
|
"notapdf",
|
||||||
|
'OCR output type "notapdf"',
|
||||||
|
id="invalid-output-type",
|
||||||
|
),
|
||||||
|
pytest.param(
|
||||||
|
"OCR_MODE",
|
||||||
|
"makeitso",
|
||||||
|
'OCR output mode "makeitso"',
|
||||||
|
id="invalid-mode",
|
||||||
|
),
|
||||||
|
pytest.param(
|
||||||
|
"OCR_MODE",
|
||||||
|
"skip_noarchive",
|
||||||
|
"deprecated",
|
||||||
|
id="deprecated-mode",
|
||||||
|
),
|
||||||
|
pytest.param(
|
||||||
|
"OCR_SKIP_ARCHIVE_FILE",
|
||||||
|
"invalid",
|
||||||
|
'OCR_SKIP_ARCHIVE_FILE setting "invalid"',
|
||||||
|
id="invalid-skip-archive-file",
|
||||||
|
),
|
||||||
|
pytest.param(
|
||||||
|
"OCR_CLEAN",
|
||||||
|
"cleanme",
|
||||||
|
'OCR clean mode "cleanme"',
|
||||||
|
id="invalid-clean",
|
||||||
|
),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_invalid_setting_produces_one_error(
|
||||||
|
self,
|
||||||
|
settings: SettingsWrapper,
|
||||||
|
setting: str,
|
||||||
|
value: str,
|
||||||
|
expected_msg: str,
|
||||||
|
) -> None:
|
||||||
"""
|
"""
|
||||||
GIVEN:
|
GIVEN:
|
||||||
- Default settings
|
- Default settings
|
||||||
- OCR output type is invalid
|
- One OCR setting is set to an invalid value
|
||||||
WHEN:
|
WHEN:
|
||||||
- Settings are validated
|
- Settings are validated
|
||||||
THEN:
|
THEN:
|
||||||
- system check error reported for OCR output type
|
- Exactly one system check error is reported containing the expected message
|
||||||
"""
|
"""
|
||||||
|
setattr(settings, setting, value)
|
||||||
|
|
||||||
msgs = settings_values_check(None)
|
msgs = settings_values_check(None)
|
||||||
self.assertEqual(len(msgs), 1)
|
|
||||||
|
|
||||||
msg = msgs[0]
|
assert len(msgs) == 1
|
||||||
|
assert expected_msg in msgs[0].msg
|
||||||
self.assertIn('OCR output type "notapdf"', msg.msg)
|
|
||||||
|
|
||||||
@override_settings(OCR_MODE="makeitso")
|
|
||||||
def test_invalid_ocr_type(self) -> None:
|
|
||||||
"""
|
|
||||||
GIVEN:
|
|
||||||
- Default settings
|
|
||||||
- OCR type is invalid
|
|
||||||
WHEN:
|
|
||||||
- Settings are validated
|
|
||||||
THEN:
|
|
||||||
- system check error reported for OCR type
|
|
||||||
"""
|
|
||||||
msgs = settings_values_check(None)
|
|
||||||
self.assertEqual(len(msgs), 1)
|
|
||||||
|
|
||||||
msg = msgs[0]
|
|
||||||
|
|
||||||
self.assertIn('OCR output mode "makeitso"', msg.msg)
|
|
||||||
|
|
||||||
@override_settings(OCR_MODE="skip_noarchive")
|
|
||||||
def test_deprecated_ocr_type(self) -> None:
|
|
||||||
"""
|
|
||||||
GIVEN:
|
|
||||||
- Default settings
|
|
||||||
- OCR type is deprecated
|
|
||||||
WHEN:
|
|
||||||
- Settings are validated
|
|
||||||
THEN:
|
|
||||||
- deprecation warning reported for OCR type
|
|
||||||
"""
|
|
||||||
msgs = settings_values_check(None)
|
|
||||||
self.assertEqual(len(msgs), 1)
|
|
||||||
|
|
||||||
msg = msgs[0]
|
|
||||||
|
|
||||||
self.assertIn("deprecated", msg.msg)
|
|
||||||
|
|
||||||
@override_settings(OCR_SKIP_ARCHIVE_FILE="invalid")
|
|
||||||
def test_invalid_ocr_skip_archive_file(self) -> None:
|
|
||||||
"""
|
|
||||||
GIVEN:
|
|
||||||
- Default settings
|
|
||||||
- OCR_SKIP_ARCHIVE_FILE is invalid
|
|
||||||
WHEN:
|
|
||||||
- Settings are validated
|
|
||||||
THEN:
|
|
||||||
- system check error reported for OCR_SKIP_ARCHIVE_FILE
|
|
||||||
"""
|
|
||||||
msgs = settings_values_check(None)
|
|
||||||
self.assertEqual(len(msgs), 1)
|
|
||||||
|
|
||||||
msg = msgs[0]
|
|
||||||
|
|
||||||
self.assertIn('OCR_SKIP_ARCHIVE_FILE setting "invalid"', msg.msg)
|
|
||||||
|
|
||||||
@override_settings(OCR_CLEAN="cleanme")
|
|
||||||
def test_invalid_ocr_clean(self) -> None:
|
|
||||||
"""
|
|
||||||
GIVEN:
|
|
||||||
- Default settings
|
|
||||||
- OCR cleaning type is invalid
|
|
||||||
WHEN:
|
|
||||||
- Settings are validated
|
|
||||||
THEN:
|
|
||||||
- system check error reported for OCR cleaning type
|
|
||||||
"""
|
|
||||||
msgs = settings_values_check(None)
|
|
||||||
self.assertEqual(len(msgs), 1)
|
|
||||||
|
|
||||||
msg = msgs[0]
|
|
||||||
|
|
||||||
self.assertIn('OCR clean mode "cleanme"', msg.msg)
|
|
||||||
|
|
||||||
|
|
||||||
class TestTimezoneSettingsChecks(DirectoriesMixin, TestCase):
|
class TestTimezoneSettingsChecks:
|
||||||
@override_settings(TIME_ZONE="TheMoon\\MyCrater")
|
def test_invalid_timezone(self, settings: SettingsWrapper) -> None:
|
||||||
def test_invalid_timezone(self) -> None:
|
|
||||||
"""
|
"""
|
||||||
GIVEN:
|
GIVEN:
|
||||||
- Default settings
|
- Default settings
|
||||||
@@ -185,17 +181,16 @@ class TestTimezoneSettingsChecks(DirectoriesMixin, TestCase):
|
|||||||
THEN:
|
THEN:
|
||||||
- system check error reported for timezone
|
- system check error reported for timezone
|
||||||
"""
|
"""
|
||||||
|
settings.TIME_ZONE = "TheMoon\\MyCrater"
|
||||||
|
|
||||||
msgs = settings_values_check(None)
|
msgs = settings_values_check(None)
|
||||||
self.assertEqual(len(msgs), 1)
|
|
||||||
|
|
||||||
msg = msgs[0]
|
assert len(msgs) == 1
|
||||||
|
assert 'Timezone "TheMoon\\MyCrater"' in msgs[0].msg
|
||||||
self.assertIn('Timezone "TheMoon\\MyCrater"', msg.msg)
|
|
||||||
|
|
||||||
|
|
||||||
class TestEmailCertSettingsChecks(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
class TestEmailCertSettingsChecks:
|
||||||
@override_settings(EMAIL_CERTIFICATE_FILE=Path("/tmp/not_actually_here.pem"))
|
def test_not_valid_file(self, settings: SettingsWrapper) -> None:
|
||||||
def test_not_valid_file(self) -> None:
|
|
||||||
"""
|
"""
|
||||||
GIVEN:
|
GIVEN:
|
||||||
- Default settings
|
- Default settings
|
||||||
@@ -205,19 +200,22 @@ class TestEmailCertSettingsChecks(DirectoriesMixin, FileSystemAssertsMixin, Test
|
|||||||
THEN:
|
THEN:
|
||||||
- system check error reported for email certificate
|
- system check error reported for email certificate
|
||||||
"""
|
"""
|
||||||
self.assertIsNotFile("/tmp/not_actually_here.pem")
|
cert_path = Path("/tmp/not_actually_here.pem")
|
||||||
|
assert not cert_path.is_file()
|
||||||
|
settings.EMAIL_CERTIFICATE_FILE = cert_path
|
||||||
|
|
||||||
msgs = settings_values_check(None)
|
msgs = settings_values_check(None)
|
||||||
|
|
||||||
self.assertEqual(len(msgs), 1)
|
assert len(msgs) == 1
|
||||||
|
assert "Email cert /tmp/not_actually_here.pem is not a file" in msgs[0].msg
|
||||||
msg = msgs[0]
|
|
||||||
|
|
||||||
self.assertIn("Email cert /tmp/not_actually_here.pem is not a file", msg.msg)
|
|
||||||
|
|
||||||
|
|
||||||
class TestAuditLogChecks(TestCase):
|
class TestAuditLogChecks:
|
||||||
def test_was_enabled_once(self) -> None:
|
def test_was_enabled_once(
|
||||||
|
self,
|
||||||
|
settings: SettingsWrapper,
|
||||||
|
mocker: MockerFixture,
|
||||||
|
) -> None:
|
||||||
"""
|
"""
|
||||||
GIVEN:
|
GIVEN:
|
||||||
- Audit log is not enabled
|
- Audit log is not enabled
|
||||||
@@ -226,23 +224,18 @@ class TestAuditLogChecks(TestCase):
|
|||||||
THEN:
|
THEN:
|
||||||
- system check error reported for disabling audit log
|
- system check error reported for disabling audit log
|
||||||
"""
|
"""
|
||||||
introspect_mock = mock.MagicMock()
|
settings.AUDIT_LOG_ENABLED = False
|
||||||
|
introspect_mock = mocker.MagicMock()
|
||||||
introspect_mock.introspection.table_names.return_value = ["auditlog_logentry"]
|
introspect_mock.introspection.table_names.return_value = ["auditlog_logentry"]
|
||||||
with override_settings(AUDIT_LOG_ENABLED=False):
|
mocker.patch.dict(
|
||||||
with mock.patch.dict(
|
"paperless.checks.connections",
|
||||||
"paperless.checks.connections",
|
{"default": introspect_mock},
|
||||||
{"default": introspect_mock},
|
)
|
||||||
):
|
|
||||||
msgs = audit_log_check(None)
|
|
||||||
|
|
||||||
self.assertEqual(len(msgs), 1)
|
msgs = audit_log_check(None)
|
||||||
|
|
||||||
msg = msgs[0]
|
assert len(msgs) == 1
|
||||||
|
assert "auditlog table was found but audit log is disabled." in msgs[0].msg
|
||||||
self.assertIn(
|
|
||||||
("auditlog table was found but audit log is disabled."),
|
|
||||||
msg.msg,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
DEPRECATED_VARS: dict[str, str] = {
|
DEPRECATED_VARS: dict[str, str] = {
|
||||||
@@ -271,20 +264,16 @@ class TestDeprecatedDbSettings:
|
|||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
("env_var", "db_option_key"),
|
("env_var", "db_option_key"),
|
||||||
[
|
[
|
||||||
("PAPERLESS_DB_TIMEOUT", "timeout"),
|
pytest.param("PAPERLESS_DB_TIMEOUT", "timeout", id="db-timeout"),
|
||||||
("PAPERLESS_DB_POOLSIZE", "pool.min_size / pool.max_size"),
|
pytest.param(
|
||||||
("PAPERLESS_DBSSLMODE", "sslmode"),
|
"PAPERLESS_DB_POOLSIZE",
|
||||||
("PAPERLESS_DBSSLROOTCERT", "sslrootcert"),
|
"pool.min_size / pool.max_size",
|
||||||
("PAPERLESS_DBSSLCERT", "sslcert"),
|
id="db-poolsize",
|
||||||
("PAPERLESS_DBSSLKEY", "sslkey"),
|
),
|
||||||
],
|
pytest.param("PAPERLESS_DBSSLMODE", "sslmode", id="ssl-mode"),
|
||||||
ids=[
|
pytest.param("PAPERLESS_DBSSLROOTCERT", "sslrootcert", id="ssl-rootcert"),
|
||||||
"db-timeout",
|
pytest.param("PAPERLESS_DBSSLCERT", "sslcert", id="ssl-cert"),
|
||||||
"db-poolsize",
|
pytest.param("PAPERLESS_DBSSLKEY", "sslkey", id="ssl-key"),
|
||||||
"ssl-mode",
|
|
||||||
"ssl-rootcert",
|
|
||||||
"ssl-cert",
|
|
||||||
"ssl-key",
|
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
def test_single_deprecated_var_produces_one_warning(
|
def test_single_deprecated_var_produces_one_warning(
|
||||||
@@ -403,7 +392,10 @@ class TestV3MinimumUpgradeVersionCheck:
|
|||||||
"""Test suite for check_v3_minimum_upgrade_version system check."""
|
"""Test suite for check_v3_minimum_upgrade_version system check."""
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def build_conn_mock(self, mocker: MockerFixture):
|
def build_conn_mock(
|
||||||
|
self,
|
||||||
|
mocker: MockerFixture,
|
||||||
|
) -> Callable[[list[str], list[str]], mock.MagicMock]:
|
||||||
"""Factory fixture that builds a connections['default'] mock.
|
"""Factory fixture that builds a connections['default'] mock.
|
||||||
|
|
||||||
Usage::
|
Usage::
|
||||||
@@ -423,7 +415,7 @@ class TestV3MinimumUpgradeVersionCheck:
|
|||||||
def test_no_migrations_table_fresh_install(
|
def test_no_migrations_table_fresh_install(
|
||||||
self,
|
self,
|
||||||
mocker: MockerFixture,
|
mocker: MockerFixture,
|
||||||
build_conn_mock,
|
build_conn_mock: Callable[[list[str], list[str]], mock.MagicMock],
|
||||||
) -> None:
|
) -> None:
|
||||||
"""
|
"""
|
||||||
GIVEN:
|
GIVEN:
|
||||||
@@ -442,7 +434,7 @@ class TestV3MinimumUpgradeVersionCheck:
|
|||||||
def test_no_documents_migrations_fresh_install(
|
def test_no_documents_migrations_fresh_install(
|
||||||
self,
|
self,
|
||||||
mocker: MockerFixture,
|
mocker: MockerFixture,
|
||||||
build_conn_mock,
|
build_conn_mock: Callable[[list[str], list[str]], mock.MagicMock],
|
||||||
) -> None:
|
) -> None:
|
||||||
"""
|
"""
|
||||||
GIVEN:
|
GIVEN:
|
||||||
@@ -461,7 +453,7 @@ class TestV3MinimumUpgradeVersionCheck:
|
|||||||
def test_v3_state_with_0001_squashed(
|
def test_v3_state_with_0001_squashed(
|
||||||
self,
|
self,
|
||||||
mocker: MockerFixture,
|
mocker: MockerFixture,
|
||||||
build_conn_mock,
|
build_conn_mock: Callable[[list[str], list[str]], mock.MagicMock],
|
||||||
) -> None:
|
) -> None:
|
||||||
"""
|
"""
|
||||||
GIVEN:
|
GIVEN:
|
||||||
@@ -485,7 +477,7 @@ class TestV3MinimumUpgradeVersionCheck:
|
|||||||
def test_v3_state_with_0002_squashed_only(
|
def test_v3_state_with_0002_squashed_only(
|
||||||
self,
|
self,
|
||||||
mocker: MockerFixture,
|
mocker: MockerFixture,
|
||||||
build_conn_mock,
|
build_conn_mock: Callable[[list[str], list[str]], mock.MagicMock],
|
||||||
) -> None:
|
) -> None:
|
||||||
"""
|
"""
|
||||||
GIVEN:
|
GIVEN:
|
||||||
@@ -504,7 +496,7 @@ class TestV3MinimumUpgradeVersionCheck:
|
|||||||
def test_v2_20_9_state_ready_to_upgrade(
|
def test_v2_20_9_state_ready_to_upgrade(
|
||||||
self,
|
self,
|
||||||
mocker: MockerFixture,
|
mocker: MockerFixture,
|
||||||
build_conn_mock,
|
build_conn_mock: Callable[[list[str], list[str]], mock.MagicMock],
|
||||||
) -> None:
|
) -> None:
|
||||||
"""
|
"""
|
||||||
GIVEN:
|
GIVEN:
|
||||||
@@ -531,7 +523,7 @@ class TestV3MinimumUpgradeVersionCheck:
|
|||||||
def test_v2_20_8_raises_error(
|
def test_v2_20_8_raises_error(
|
||||||
self,
|
self,
|
||||||
mocker: MockerFixture,
|
mocker: MockerFixture,
|
||||||
build_conn_mock,
|
build_conn_mock: Callable[[list[str], list[str]], mock.MagicMock],
|
||||||
) -> None:
|
) -> None:
|
||||||
"""
|
"""
|
||||||
GIVEN:
|
GIVEN:
|
||||||
@@ -558,7 +550,7 @@ class TestV3MinimumUpgradeVersionCheck:
|
|||||||
def test_very_old_version_raises_error(
|
def test_very_old_version_raises_error(
|
||||||
self,
|
self,
|
||||||
mocker: MockerFixture,
|
mocker: MockerFixture,
|
||||||
build_conn_mock,
|
build_conn_mock: Callable[[list[str], list[str]], mock.MagicMock],
|
||||||
) -> None:
|
) -> None:
|
||||||
"""
|
"""
|
||||||
GIVEN:
|
GIVEN:
|
||||||
@@ -585,7 +577,7 @@ class TestV3MinimumUpgradeVersionCheck:
|
|||||||
def test_error_hint_mentions_v2_20_9(
|
def test_error_hint_mentions_v2_20_9(
|
||||||
self,
|
self,
|
||||||
mocker: MockerFixture,
|
mocker: MockerFixture,
|
||||||
build_conn_mock,
|
build_conn_mock: Callable[[list[str], list[str]], mock.MagicMock],
|
||||||
) -> None:
|
) -> None:
|
||||||
"""
|
"""
|
||||||
GIVEN:
|
GIVEN:
|
||||||
|
|||||||
714
src/paperless/tests/test_registry.py
Normal file
714
src/paperless/tests/test_registry.py
Normal file
@@ -0,0 +1,714 @@
|
|||||||
|
"""
|
||||||
|
Tests for :mod:`paperless.parsers` (ParserProtocol) and
|
||||||
|
:mod:`paperless.parsers.registry` (ParserRegistry + module-level helpers).
|
||||||
|
|
||||||
|
All tests use pytest-style functions/classes — no unittest.TestCase.
|
||||||
|
The ``clean_registry`` fixture ensures complete isolation between tests by
|
||||||
|
resetting the module-level singleton before and after every test.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from importlib.metadata import EntryPoint
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Self
|
||||||
|
from unittest.mock import MagicMock
|
||||||
|
from unittest.mock import patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from paperless.parsers import ParserProtocol
|
||||||
|
from paperless.parsers.registry import ParserRegistry
|
||||||
|
from paperless.parsers.registry import get_parser_registry
|
||||||
|
from paperless.parsers.registry import init_builtin_parsers
|
||||||
|
from paperless.parsers.registry import reset_parser_registry
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture()
|
||||||
|
def dummy_parser_cls() -> type:
|
||||||
|
"""Return a class that fully satisfies :class:`ParserProtocol`.
|
||||||
|
|
||||||
|
GIVEN: A need to exercise registry and Protocol logic with a minimal
|
||||||
|
but complete parser.
|
||||||
|
WHEN: A test requests this fixture.
|
||||||
|
THEN: A class with all required attributes and methods is returned.
|
||||||
|
"""
|
||||||
|
|
||||||
|
class DummyParser:
|
||||||
|
name = "dummy-parser"
|
||||||
|
version = "0.1.0"
|
||||||
|
author = "Test Author"
|
||||||
|
url = "https://example.com/dummy-parser"
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def supported_mime_types(cls) -> dict[str, str]:
|
||||||
|
return {"text/plain": ".txt"}
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def score(
|
||||||
|
cls,
|
||||||
|
mime_type: str,
|
||||||
|
filename: str,
|
||||||
|
path: Path | None = None,
|
||||||
|
) -> int | None:
|
||||||
|
return 10
|
||||||
|
|
||||||
|
@property
|
||||||
|
def can_produce_archive(self) -> bool:
|
||||||
|
return False
|
||||||
|
|
||||||
|
@property
|
||||||
|
def requires_pdf_rendition(self) -> bool:
|
||||||
|
return False
|
||||||
|
|
||||||
|
def parse(
|
||||||
|
self,
|
||||||
|
document_path: Path,
|
||||||
|
mime_type: str,
|
||||||
|
*,
|
||||||
|
produce_archive: bool = True,
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
Required to exist, but doesn't need to do anything
|
||||||
|
"""
|
||||||
|
|
||||||
|
def get_text(self) -> str | None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_date(self) -> None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_archive_path(self) -> Path | None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_thumbnail(
|
||||||
|
self,
|
||||||
|
document_path: Path,
|
||||||
|
mime_type: str,
|
||||||
|
) -> Path:
|
||||||
|
return Path("/tmp/thumbnail.webp")
|
||||||
|
|
||||||
|
def get_page_count(
|
||||||
|
self,
|
||||||
|
document_path: Path,
|
||||||
|
mime_type: str,
|
||||||
|
) -> int | None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def extract_metadata(
|
||||||
|
self,
|
||||||
|
document_path: Path,
|
||||||
|
mime_type: str,
|
||||||
|
) -> list:
|
||||||
|
return []
|
||||||
|
|
||||||
|
def __enter__(self) -> Self:
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __exit__(self, exc_type, exc_val, exc_tb) -> None:
|
||||||
|
"""
|
||||||
|
Required to exist, but doesn't need to do anything
|
||||||
|
"""
|
||||||
|
|
||||||
|
return DummyParser
|
||||||
|
|
||||||
|
|
||||||
|
class TestParserProtocol:
|
||||||
|
"""Verify runtime isinstance() checks against ParserProtocol."""
|
||||||
|
|
||||||
|
def test_compliant_class_instance_passes_isinstance(
|
||||||
|
self,
|
||||||
|
dummy_parser_cls: type,
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
GIVEN: A class that implements every method required by ParserProtocol.
|
||||||
|
WHEN: isinstance() is called with the Protocol.
|
||||||
|
THEN: The check passes (returns True).
|
||||||
|
"""
|
||||||
|
instance = dummy_parser_cls()
|
||||||
|
assert isinstance(instance, ParserProtocol)
|
||||||
|
|
||||||
|
def test_non_compliant_class_instance_fails_isinstance(self) -> None:
|
||||||
|
"""
|
||||||
|
GIVEN: A plain class with no parser-related methods.
|
||||||
|
WHEN: isinstance() is called with ParserProtocol.
|
||||||
|
THEN: The check fails (returns False).
|
||||||
|
"""
|
||||||
|
|
||||||
|
class Unrelated:
|
||||||
|
pass
|
||||||
|
|
||||||
|
assert not isinstance(Unrelated(), ParserProtocol)
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"missing_method",
|
||||||
|
[
|
||||||
|
pytest.param("parse", id="missing-parse"),
|
||||||
|
pytest.param("get_text", id="missing-get_text"),
|
||||||
|
pytest.param("get_thumbnail", id="missing-get_thumbnail"),
|
||||||
|
pytest.param("__enter__", id="missing-__enter__"),
|
||||||
|
pytest.param("__exit__", id="missing-__exit__"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_partial_compliant_fails_isinstance(
|
||||||
|
self,
|
||||||
|
dummy_parser_cls: type,
|
||||||
|
missing_method: str,
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
GIVEN: A class that satisfies ParserProtocol except for one method.
|
||||||
|
WHEN: isinstance() is called with ParserProtocol.
|
||||||
|
THEN: The check fails because the Protocol is not fully satisfied.
|
||||||
|
"""
|
||||||
|
# Create a subclass and delete the specified method to break compliance.
|
||||||
|
partial_cls = type(
|
||||||
|
"PartialParser",
|
||||||
|
(dummy_parser_cls,),
|
||||||
|
{missing_method: None}, # Replace with None — not callable
|
||||||
|
)
|
||||||
|
assert not isinstance(partial_cls(), ParserProtocol)
|
||||||
|
|
||||||
|
|
||||||
|
class TestRegistrySingleton:
|
||||||
|
"""Verify the module-level singleton lifecycle functions."""
|
||||||
|
|
||||||
|
def test_get_parser_registry_returns_instance(self) -> None:
|
||||||
|
"""
|
||||||
|
GIVEN: No registry has been created yet.
|
||||||
|
WHEN: get_parser_registry() is called.
|
||||||
|
THEN: A ParserRegistry instance is returned.
|
||||||
|
"""
|
||||||
|
registry = get_parser_registry()
|
||||||
|
assert isinstance(registry, ParserRegistry)
|
||||||
|
|
||||||
|
def test_get_parser_registry_same_instance_on_repeated_calls(self) -> None:
|
||||||
|
"""
|
||||||
|
GIVEN: A registry instance was created by a prior call.
|
||||||
|
WHEN: get_parser_registry() is called a second time.
|
||||||
|
THEN: The exact same object (identity) is returned.
|
||||||
|
"""
|
||||||
|
first = get_parser_registry()
|
||||||
|
second = get_parser_registry()
|
||||||
|
assert first is second
|
||||||
|
|
||||||
|
def test_reset_parser_registry_gives_fresh_instance(self) -> None:
|
||||||
|
"""
|
||||||
|
GIVEN: A registry instance already exists.
|
||||||
|
WHEN: reset_parser_registry() is called and then get_parser_registry()
|
||||||
|
is called again.
|
||||||
|
THEN: A new, distinct registry instance is returned.
|
||||||
|
"""
|
||||||
|
first = get_parser_registry()
|
||||||
|
reset_parser_registry()
|
||||||
|
second = get_parser_registry()
|
||||||
|
assert first is not second
|
||||||
|
|
||||||
|
def test_init_builtin_parsers_does_not_run_discover(
|
||||||
|
self,
|
||||||
|
monkeypatch: pytest.MonkeyPatch,
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
GIVEN: discover() would raise an exception if called.
|
||||||
|
WHEN: init_builtin_parsers() is called.
|
||||||
|
THEN: No exception is raised, confirming discover() was not invoked.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def exploding_discover(self) -> None:
|
||||||
|
raise RuntimeError(
|
||||||
|
"discover() must not be called from init_builtin_parsers",
|
||||||
|
)
|
||||||
|
|
||||||
|
monkeypatch.setattr(ParserRegistry, "discover", exploding_discover)
|
||||||
|
|
||||||
|
# Should complete without raising.
|
||||||
|
init_builtin_parsers()
|
||||||
|
|
||||||
|
def test_init_builtin_parsers_idempotent(self) -> None:
|
||||||
|
"""
|
||||||
|
GIVEN: init_builtin_parsers() has already been called once.
|
||||||
|
WHEN: init_builtin_parsers() is called a second time.
|
||||||
|
THEN: No error is raised and the same registry instance is reused.
|
||||||
|
"""
|
||||||
|
init_builtin_parsers()
|
||||||
|
# Capture the registry created by the first call.
|
||||||
|
import paperless.parsers.registry as reg_module
|
||||||
|
|
||||||
|
first_registry = reg_module._registry
|
||||||
|
|
||||||
|
init_builtin_parsers()
|
||||||
|
|
||||||
|
assert reg_module._registry is first_registry
|
||||||
|
|
||||||
|
|
||||||
|
class TestParserRegistryGetParserForFile:
|
||||||
|
"""Verify parser selection logic in get_parser_for_file()."""
|
||||||
|
|
||||||
|
def test_returns_none_when_no_parsers_registered(self) -> None:
|
||||||
|
"""
|
||||||
|
GIVEN: A registry with no parsers registered.
|
||||||
|
WHEN: get_parser_for_file() is called for any MIME type.
|
||||||
|
THEN: None is returned.
|
||||||
|
"""
|
||||||
|
registry = ParserRegistry()
|
||||||
|
result = registry.get_parser_for_file("text/plain", "doc.txt")
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
def test_returns_none_for_unsupported_mime_type(
|
||||||
|
self,
|
||||||
|
dummy_parser_cls: type,
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
GIVEN: A registry with a parser that supports only 'text/plain'.
|
||||||
|
WHEN: get_parser_for_file() is called with 'application/pdf'.
|
||||||
|
THEN: None is returned.
|
||||||
|
"""
|
||||||
|
registry = ParserRegistry()
|
||||||
|
registry.register_builtin(dummy_parser_cls)
|
||||||
|
result = registry.get_parser_for_file("application/pdf", "file.pdf")
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
def test_returns_parser_for_supported_mime_type(
|
||||||
|
self,
|
||||||
|
dummy_parser_cls: type,
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
GIVEN: A registry with a parser registered for 'text/plain'.
|
||||||
|
WHEN: get_parser_for_file() is called with 'text/plain'.
|
||||||
|
THEN: The registered parser class is returned.
|
||||||
|
"""
|
||||||
|
registry = ParserRegistry()
|
||||||
|
registry.register_builtin(dummy_parser_cls)
|
||||||
|
result = registry.get_parser_for_file("text/plain", "readme.txt")
|
||||||
|
assert result is dummy_parser_cls
|
||||||
|
|
||||||
|
def test_highest_score_wins(self) -> None:
|
||||||
|
"""
|
||||||
|
GIVEN: Two parsers both supporting 'text/plain' with scores 5 and 20.
|
||||||
|
WHEN: get_parser_for_file() is called for 'text/plain'.
|
||||||
|
THEN: The parser with score 20 is returned.
|
||||||
|
"""
|
||||||
|
|
||||||
|
class LowScoreParser:
|
||||||
|
name = "low"
|
||||||
|
version = "1.0"
|
||||||
|
author = "A"
|
||||||
|
url = "https://example.com/low"
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def supported_mime_types(cls):
|
||||||
|
return {"text/plain": ".txt"}
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def score(cls, mime_type, filename, path=None):
|
||||||
|
return 5
|
||||||
|
|
||||||
|
class HighScoreParser:
|
||||||
|
name = "high"
|
||||||
|
version = "1.0"
|
||||||
|
author = "B"
|
||||||
|
url = "https://example.com/high"
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def supported_mime_types(cls):
|
||||||
|
return {"text/plain": ".txt"}
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def score(cls, mime_type, filename, path=None):
|
||||||
|
return 20
|
||||||
|
|
||||||
|
registry = ParserRegistry()
|
||||||
|
registry.register_builtin(LowScoreParser)
|
||||||
|
registry.register_builtin(HighScoreParser)
|
||||||
|
result = registry.get_parser_for_file("text/plain", "readme.txt")
|
||||||
|
assert result is HighScoreParser
|
||||||
|
|
||||||
|
def test_parser_returning_none_score_is_skipped(self) -> None:
|
||||||
|
"""
|
||||||
|
GIVEN: A parser that returns None from score() for the given file.
|
||||||
|
WHEN: get_parser_for_file() is called.
|
||||||
|
THEN: That parser is skipped and None is returned (no other candidates).
|
||||||
|
"""
|
||||||
|
|
||||||
|
class DecliningParser:
|
||||||
|
name = "declining"
|
||||||
|
version = "1.0"
|
||||||
|
author = "A"
|
||||||
|
url = "https://example.com"
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def supported_mime_types(cls):
|
||||||
|
return {"text/plain": ".txt"}
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def score(cls, mime_type, filename, path=None):
|
||||||
|
return None # Explicitly declines
|
||||||
|
|
||||||
|
registry = ParserRegistry()
|
||||||
|
registry.register_builtin(DecliningParser)
|
||||||
|
result = registry.get_parser_for_file("text/plain", "readme.txt")
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
def test_all_parsers_decline_returns_none(self) -> None:
|
||||||
|
"""
|
||||||
|
GIVEN: Multiple parsers that all return None from score().
|
||||||
|
WHEN: get_parser_for_file() is called.
|
||||||
|
THEN: None is returned.
|
||||||
|
"""
|
||||||
|
|
||||||
|
class AlwaysDeclines:
|
||||||
|
name = "declines"
|
||||||
|
version = "1.0"
|
||||||
|
author = "A"
|
||||||
|
url = "https://example.com"
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def supported_mime_types(cls):
|
||||||
|
return {"text/plain": ".txt"}
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def score(cls, mime_type, filename, path=None):
|
||||||
|
return None
|
||||||
|
|
||||||
|
registry = ParserRegistry()
|
||||||
|
registry.register_builtin(AlwaysDeclines)
|
||||||
|
registry._external.append(AlwaysDeclines)
|
||||||
|
result = registry.get_parser_for_file("text/plain", "file.txt")
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
def test_external_parser_beats_builtin_same_score(self) -> None:
|
||||||
|
"""
|
||||||
|
GIVEN: An external and a built-in parser both returning score 10.
|
||||||
|
WHEN: get_parser_for_file() is called.
|
||||||
|
THEN: The external parser wins because externals are evaluated first
|
||||||
|
and the first-seen-wins policy applies at equal scores.
|
||||||
|
"""
|
||||||
|
|
||||||
|
class BuiltinParser:
|
||||||
|
name = "builtin"
|
||||||
|
version = "1.0"
|
||||||
|
author = "Core"
|
||||||
|
url = "https://example.com/builtin"
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def supported_mime_types(cls):
|
||||||
|
return {"text/plain": ".txt"}
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def score(cls, mime_type, filename, path=None):
|
||||||
|
return 10
|
||||||
|
|
||||||
|
class ExternalParser:
|
||||||
|
name = "external"
|
||||||
|
version = "2.0"
|
||||||
|
author = "Third Party"
|
||||||
|
url = "https://example.com/external"
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def supported_mime_types(cls):
|
||||||
|
return {"text/plain": ".txt"}
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def score(cls, mime_type, filename, path=None):
|
||||||
|
return 10
|
||||||
|
|
||||||
|
registry = ParserRegistry()
|
||||||
|
registry.register_builtin(BuiltinParser)
|
||||||
|
registry._external.append(ExternalParser)
|
||||||
|
result = registry.get_parser_for_file("text/plain", "file.txt")
|
||||||
|
assert result is ExternalParser
|
||||||
|
|
||||||
|
def test_builtin_wins_when_external_declines(self) -> None:
|
||||||
|
"""
|
||||||
|
GIVEN: An external parser that declines (score None) and a built-in
|
||||||
|
that returns score 5.
|
||||||
|
WHEN: get_parser_for_file() is called.
|
||||||
|
THEN: The built-in parser is returned.
|
||||||
|
"""
|
||||||
|
|
||||||
|
class DecliningExternal:
|
||||||
|
name = "declining-external"
|
||||||
|
version = "1.0"
|
||||||
|
author = "Third Party"
|
||||||
|
url = "https://example.com/declining"
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def supported_mime_types(cls):
|
||||||
|
return {"text/plain": ".txt"}
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def score(cls, mime_type, filename, path=None):
|
||||||
|
return None
|
||||||
|
|
||||||
|
class AcceptingBuiltin:
|
||||||
|
name = "accepting-builtin"
|
||||||
|
version = "1.0"
|
||||||
|
author = "Core"
|
||||||
|
url = "https://example.com/accepting"
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def supported_mime_types(cls):
|
||||||
|
return {"text/plain": ".txt"}
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def score(cls, mime_type, filename, path=None):
|
||||||
|
return 5
|
||||||
|
|
||||||
|
registry = ParserRegistry()
|
||||||
|
registry.register_builtin(AcceptingBuiltin)
|
||||||
|
registry._external.append(DecliningExternal)
|
||||||
|
result = registry.get_parser_for_file("text/plain", "file.txt")
|
||||||
|
assert result is AcceptingBuiltin
|
||||||
|
|
||||||
|
|
||||||
|
class TestDiscover:
|
||||||
|
"""Verify entrypoint discovery in ParserRegistry.discover()."""
|
||||||
|
|
||||||
|
def test_discover_with_no_entrypoints(self) -> None:
|
||||||
|
"""
|
||||||
|
GIVEN: No entrypoints are registered under 'paperless_ngx.parsers'.
|
||||||
|
WHEN: discover() is called.
|
||||||
|
THEN: _external remains empty and no errors are raised.
|
||||||
|
"""
|
||||||
|
registry = ParserRegistry()
|
||||||
|
|
||||||
|
with patch(
|
||||||
|
"paperless.parsers.registry.entry_points",
|
||||||
|
return_value=[],
|
||||||
|
):
|
||||||
|
registry.discover()
|
||||||
|
|
||||||
|
assert registry._external == []
|
||||||
|
|
||||||
|
def test_discover_adds_valid_external_parser(self) -> None:
|
||||||
|
"""
|
||||||
|
GIVEN: One valid entrypoint whose loaded class has all required attrs.
|
||||||
|
WHEN: discover() is called.
|
||||||
|
THEN: The class is appended to _external.
|
||||||
|
"""
|
||||||
|
|
||||||
|
class ValidExternal:
|
||||||
|
name = "valid-external"
|
||||||
|
version = "3.0.0"
|
||||||
|
author = "Someone"
|
||||||
|
url = "https://example.com/valid"
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def supported_mime_types(cls):
|
||||||
|
return {"application/pdf": ".pdf"}
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def score(cls, mime_type, filename, path=None):
|
||||||
|
return 5
|
||||||
|
|
||||||
|
mock_ep = MagicMock(spec=EntryPoint)
|
||||||
|
mock_ep.name = "valid_external"
|
||||||
|
mock_ep.load.return_value = ValidExternal
|
||||||
|
|
||||||
|
registry = ParserRegistry()
|
||||||
|
|
||||||
|
with patch(
|
||||||
|
"paperless.parsers.registry.entry_points",
|
||||||
|
return_value=[mock_ep],
|
||||||
|
):
|
||||||
|
registry.discover()
|
||||||
|
|
||||||
|
assert ValidExternal in registry._external
|
||||||
|
|
||||||
|
def test_discover_skips_entrypoint_with_load_error(
|
||||||
|
self,
|
||||||
|
caplog: pytest.LogCaptureFixture,
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
GIVEN: An entrypoint whose load() method raises ImportError.
|
||||||
|
WHEN: discover() is called.
|
||||||
|
THEN: The entrypoint is skipped, an error is logged, and _external
|
||||||
|
remains empty.
|
||||||
|
"""
|
||||||
|
mock_ep = MagicMock(spec=EntryPoint)
|
||||||
|
mock_ep.name = "broken_ep"
|
||||||
|
mock_ep.load.side_effect = ImportError("missing dependency")
|
||||||
|
|
||||||
|
registry = ParserRegistry()
|
||||||
|
|
||||||
|
with caplog.at_level(logging.ERROR, logger="paperless.parsers.registry"):
|
||||||
|
with patch(
|
||||||
|
"paperless.parsers.registry.entry_points",
|
||||||
|
return_value=[mock_ep],
|
||||||
|
):
|
||||||
|
registry.discover()
|
||||||
|
|
||||||
|
assert registry._external == []
|
||||||
|
assert any(
|
||||||
|
"broken_ep" in record.message
|
||||||
|
for record in caplog.records
|
||||||
|
if record.levelno >= logging.ERROR
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_discover_skips_entrypoint_with_missing_attrs(
|
||||||
|
self,
|
||||||
|
caplog: pytest.LogCaptureFixture,
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
GIVEN: A class loaded from an entrypoint that is missing the 'score'
|
||||||
|
attribute.
|
||||||
|
WHEN: discover() is called.
|
||||||
|
THEN: The entrypoint is skipped, a warning is logged, and _external
|
||||||
|
remains empty.
|
||||||
|
"""
|
||||||
|
|
||||||
|
class MissingScore:
|
||||||
|
name = "missing-score"
|
||||||
|
version = "1.0"
|
||||||
|
author = "Someone"
|
||||||
|
url = "https://example.com"
|
||||||
|
|
||||||
|
# 'score' classmethod is intentionally absent.
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def supported_mime_types(cls):
|
||||||
|
return {"text/plain": ".txt"}
|
||||||
|
|
||||||
|
mock_ep = MagicMock(spec=EntryPoint)
|
||||||
|
mock_ep.name = "missing_score_ep"
|
||||||
|
mock_ep.load.return_value = MissingScore
|
||||||
|
|
||||||
|
registry = ParserRegistry()
|
||||||
|
|
||||||
|
with caplog.at_level(logging.WARNING, logger="paperless.parsers.registry"):
|
||||||
|
with patch(
|
||||||
|
"paperless.parsers.registry.entry_points",
|
||||||
|
return_value=[mock_ep],
|
||||||
|
):
|
||||||
|
registry.discover()
|
||||||
|
|
||||||
|
assert registry._external == []
|
||||||
|
assert any(
|
||||||
|
"missing_score_ep" in record.message
|
||||||
|
for record in caplog.records
|
||||||
|
if record.levelno >= logging.WARNING
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_discover_logs_loaded_parser_info(
|
||||||
|
self,
|
||||||
|
caplog: pytest.LogCaptureFixture,
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
GIVEN: A valid entrypoint that loads successfully.
|
||||||
|
WHEN: discover() is called.
|
||||||
|
THEN: An INFO log message is emitted containing the parser name,
|
||||||
|
version, author, and entrypoint name.
|
||||||
|
"""
|
||||||
|
|
||||||
|
class LoggableParser:
|
||||||
|
name = "loggable"
|
||||||
|
version = "4.2.0"
|
||||||
|
author = "Log Tester"
|
||||||
|
url = "https://example.com/loggable"
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def supported_mime_types(cls):
|
||||||
|
return {"image/png": ".png"}
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def score(cls, mime_type, filename, path=None):
|
||||||
|
return 1
|
||||||
|
|
||||||
|
mock_ep = MagicMock(spec=EntryPoint)
|
||||||
|
mock_ep.name = "loggable_ep"
|
||||||
|
mock_ep.load.return_value = LoggableParser
|
||||||
|
|
||||||
|
registry = ParserRegistry()
|
||||||
|
|
||||||
|
with caplog.at_level(logging.INFO, logger="paperless.parsers.registry"):
|
||||||
|
with patch(
|
||||||
|
"paperless.parsers.registry.entry_points",
|
||||||
|
return_value=[mock_ep],
|
||||||
|
):
|
||||||
|
registry.discover()
|
||||||
|
|
||||||
|
info_messages = " ".join(
|
||||||
|
r.message for r in caplog.records if r.levelno == logging.INFO
|
||||||
|
)
|
||||||
|
assert "loggable" in info_messages
|
||||||
|
assert "4.2.0" in info_messages
|
||||||
|
assert "Log Tester" in info_messages
|
||||||
|
assert "loggable_ep" in info_messages
|
||||||
|
|
||||||
|
|
||||||
|
class TestLogSummary:
|
||||||
|
"""Verify log output from ParserRegistry.log_summary()."""
|
||||||
|
|
||||||
|
def test_log_summary_with_no_external_parsers(
|
||||||
|
self,
|
||||||
|
dummy_parser_cls: type,
|
||||||
|
caplog: pytest.LogCaptureFixture,
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
GIVEN: A registry with one built-in parser and no external parsers.
|
||||||
|
WHEN: log_summary() is called.
|
||||||
|
THEN: The built-in parser name appears in the logs.
|
||||||
|
"""
|
||||||
|
registry = ParserRegistry()
|
||||||
|
registry.register_builtin(dummy_parser_cls)
|
||||||
|
|
||||||
|
with caplog.at_level(logging.INFO, logger="paperless.parsers.registry"):
|
||||||
|
registry.log_summary()
|
||||||
|
|
||||||
|
all_messages = " ".join(r.message for r in caplog.records)
|
||||||
|
assert dummy_parser_cls.name in all_messages
|
||||||
|
|
||||||
|
def test_log_summary_with_external_parsers(
|
||||||
|
self,
|
||||||
|
caplog: pytest.LogCaptureFixture,
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
GIVEN: A registry with one external parser registered.
|
||||||
|
WHEN: log_summary() is called.
|
||||||
|
THEN: The external parser name, version, author, and url appear in
|
||||||
|
the log output.
|
||||||
|
"""
|
||||||
|
|
||||||
|
class ExtParser:
|
||||||
|
name = "ext-parser"
|
||||||
|
version = "9.9.9"
|
||||||
|
author = "Ext Corp"
|
||||||
|
url = "https://ext.example.com"
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def supported_mime_types(cls):
|
||||||
|
return {}
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def score(cls, mime_type, filename, path=None):
|
||||||
|
return None
|
||||||
|
|
||||||
|
registry = ParserRegistry()
|
||||||
|
registry._external.append(ExtParser)
|
||||||
|
|
||||||
|
with caplog.at_level(logging.INFO, logger="paperless.parsers.registry"):
|
||||||
|
registry.log_summary()
|
||||||
|
|
||||||
|
all_messages = " ".join(r.message for r in caplog.records)
|
||||||
|
assert "ext-parser" in all_messages
|
||||||
|
assert "9.9.9" in all_messages
|
||||||
|
assert "Ext Corp" in all_messages
|
||||||
|
assert "https://ext.example.com" in all_messages
|
||||||
|
|
||||||
|
def test_log_summary_logs_no_third_party_message_when_none(
|
||||||
|
self,
|
||||||
|
caplog: pytest.LogCaptureFixture,
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
GIVEN: A registry with no external parsers.
|
||||||
|
WHEN: log_summary() is called.
|
||||||
|
THEN: A message containing 'No third-party parsers discovered.' is
|
||||||
|
logged.
|
||||||
|
"""
|
||||||
|
registry = ParserRegistry()
|
||||||
|
|
||||||
|
with caplog.at_level(logging.INFO, logger="paperless.parsers.registry"):
|
||||||
|
registry.log_summary()
|
||||||
|
|
||||||
|
all_messages = " ".join(r.message for r in caplog.records)
|
||||||
|
assert "No third-party parsers discovered." in all_messages
|
||||||
@@ -1,482 +0,0 @@
|
|||||||
import datetime
|
|
||||||
import os
|
|
||||||
from unittest import TestCase
|
|
||||||
from unittest import mock
|
|
||||||
|
|
||||||
import pytest
|
|
||||||
from celery.schedules import crontab
|
|
||||||
|
|
||||||
from paperless.settings import _parse_base_paths
|
|
||||||
from paperless.settings import _parse_beat_schedule
|
|
||||||
from paperless.settings import _parse_dateparser_languages
|
|
||||||
from paperless.settings import _parse_ignore_dates
|
|
||||||
from paperless.settings import _parse_paperless_url
|
|
||||||
from paperless.settings import _parse_redis_url
|
|
||||||
from paperless.settings import default_threads_per_worker
|
|
||||||
|
|
||||||
|
|
||||||
class TestIgnoreDateParsing(TestCase):
|
|
||||||
"""
|
|
||||||
Tests the parsing of the PAPERLESS_IGNORE_DATES setting value
|
|
||||||
"""
|
|
||||||
|
|
||||||
def _parse_checker(self, test_cases) -> None:
|
|
||||||
"""
|
|
||||||
Helper function to check ignore date parsing
|
|
||||||
|
|
||||||
Args:
|
|
||||||
test_cases (_type_): _description_
|
|
||||||
"""
|
|
||||||
for env_str, date_format, expected_date_set in test_cases:
|
|
||||||
self.assertSetEqual(
|
|
||||||
_parse_ignore_dates(env_str, date_format),
|
|
||||||
expected_date_set,
|
|
||||||
)
|
|
||||||
|
|
||||||
def test_no_ignore_dates_set(self) -> None:
|
|
||||||
"""
|
|
||||||
GIVEN:
|
|
||||||
- No ignore dates are set
|
|
||||||
THEN:
|
|
||||||
- No ignore dates are parsed
|
|
||||||
"""
|
|
||||||
self.assertSetEqual(_parse_ignore_dates(""), set())
|
|
||||||
|
|
||||||
def test_single_ignore_dates_set(self) -> None:
|
|
||||||
"""
|
|
||||||
GIVEN:
|
|
||||||
- Ignore dates are set per certain inputs
|
|
||||||
THEN:
|
|
||||||
- All ignore dates are parsed
|
|
||||||
"""
|
|
||||||
test_cases = [
|
|
||||||
("1985-05-01", "YMD", {datetime.date(1985, 5, 1)}),
|
|
||||||
(
|
|
||||||
"1985-05-01,1991-12-05",
|
|
||||||
"YMD",
|
|
||||||
{datetime.date(1985, 5, 1), datetime.date(1991, 12, 5)},
|
|
||||||
),
|
|
||||||
("2010-12-13", "YMD", {datetime.date(2010, 12, 13)}),
|
|
||||||
("11.01.10", "DMY", {datetime.date(2010, 1, 11)}),
|
|
||||||
(
|
|
||||||
"11.01.2001,15-06-1996",
|
|
||||||
"DMY",
|
|
||||||
{datetime.date(2001, 1, 11), datetime.date(1996, 6, 15)},
|
|
||||||
),
|
|
||||||
]
|
|
||||||
|
|
||||||
self._parse_checker(test_cases)
|
|
||||||
|
|
||||||
|
|
||||||
class TestThreadCalculation(TestCase):
|
|
||||||
def test_workers_threads(self) -> None:
|
|
||||||
"""
|
|
||||||
GIVEN:
|
|
||||||
- Certain CPU counts
|
|
||||||
WHEN:
|
|
||||||
- Threads per worker is calculated
|
|
||||||
THEN:
|
|
||||||
- Threads per worker less than or equal to CPU count
|
|
||||||
- At least 1 thread per worker
|
|
||||||
"""
|
|
||||||
default_workers = 1
|
|
||||||
|
|
||||||
for i in range(1, 64):
|
|
||||||
with mock.patch(
|
|
||||||
"paperless.settings.multiprocessing.cpu_count",
|
|
||||||
) as cpu_count:
|
|
||||||
cpu_count.return_value = i
|
|
||||||
|
|
||||||
default_threads = default_threads_per_worker(default_workers)
|
|
||||||
|
|
||||||
self.assertGreaterEqual(default_threads, 1)
|
|
||||||
|
|
||||||
self.assertLessEqual(default_workers * default_threads, i)
|
|
||||||
|
|
||||||
|
|
||||||
class TestRedisSocketConversion(TestCase):
|
|
||||||
def test_redis_socket_parsing(self) -> None:
|
|
||||||
"""
|
|
||||||
GIVEN:
|
|
||||||
- Various Redis connection URI formats
|
|
||||||
WHEN:
|
|
||||||
- The URI is parsed
|
|
||||||
THEN:
|
|
||||||
- Socket based URIs are translated
|
|
||||||
- Non-socket URIs are unchanged
|
|
||||||
- None provided uses default
|
|
||||||
"""
|
|
||||||
|
|
||||||
for input, expected in [
|
|
||||||
# Nothing is set
|
|
||||||
(None, ("redis://localhost:6379", "redis://localhost:6379")),
|
|
||||||
# celery style
|
|
||||||
(
|
|
||||||
"redis+socket:///run/redis/redis.sock",
|
|
||||||
(
|
|
||||||
"redis+socket:///run/redis/redis.sock",
|
|
||||||
"unix:///run/redis/redis.sock",
|
|
||||||
),
|
|
||||||
),
|
|
||||||
# redis-py / channels-redis style
|
|
||||||
(
|
|
||||||
"unix:///run/redis/redis.sock",
|
|
||||||
(
|
|
||||||
"redis+socket:///run/redis/redis.sock",
|
|
||||||
"unix:///run/redis/redis.sock",
|
|
||||||
),
|
|
||||||
),
|
|
||||||
# celery style with db
|
|
||||||
(
|
|
||||||
"redis+socket:///run/redis/redis.sock?virtual_host=5",
|
|
||||||
(
|
|
||||||
"redis+socket:///run/redis/redis.sock?virtual_host=5",
|
|
||||||
"unix:///run/redis/redis.sock?db=5",
|
|
||||||
),
|
|
||||||
),
|
|
||||||
# redis-py / channels-redis style with db
|
|
||||||
(
|
|
||||||
"unix:///run/redis/redis.sock?db=10",
|
|
||||||
(
|
|
||||||
"redis+socket:///run/redis/redis.sock?virtual_host=10",
|
|
||||||
"unix:///run/redis/redis.sock?db=10",
|
|
||||||
),
|
|
||||||
),
|
|
||||||
# Just a host with a port
|
|
||||||
(
|
|
||||||
"redis://myredishost:6379",
|
|
||||||
("redis://myredishost:6379", "redis://myredishost:6379"),
|
|
||||||
),
|
|
||||||
]:
|
|
||||||
result = _parse_redis_url(input)
|
|
||||||
self.assertTupleEqual(expected, result)
|
|
||||||
|
|
||||||
|
|
||||||
class TestCeleryScheduleParsing(TestCase):
|
|
||||||
MAIL_EXPIRE_TIME = 9.0 * 60.0
|
|
||||||
CLASSIFIER_EXPIRE_TIME = 59.0 * 60.0
|
|
||||||
INDEX_EXPIRE_TIME = 23.0 * 60.0 * 60.0
|
|
||||||
SANITY_EXPIRE_TIME = ((7.0 * 24.0) - 1.0) * 60.0 * 60.0
|
|
||||||
EMPTY_TRASH_EXPIRE_TIME = 23.0 * 60.0 * 60.0
|
|
||||||
RUN_SCHEDULED_WORKFLOWS_EXPIRE_TIME = 59.0 * 60.0
|
|
||||||
LLM_INDEX_EXPIRE_TIME = 23.0 * 60.0 * 60.0
|
|
||||||
CLEANUP_EXPIRED_SHARE_BUNDLES_EXPIRE_TIME = 23.0 * 60.0 * 60.0
|
|
||||||
|
|
||||||
def test_schedule_configuration_default(self) -> None:
|
|
||||||
"""
|
|
||||||
GIVEN:
|
|
||||||
- No configured task schedules
|
|
||||||
WHEN:
|
|
||||||
- The celery beat schedule is built
|
|
||||||
THEN:
|
|
||||||
- The default schedule is returned
|
|
||||||
"""
|
|
||||||
schedule = _parse_beat_schedule()
|
|
||||||
|
|
||||||
self.assertDictEqual(
|
|
||||||
{
|
|
||||||
"Check all e-mail accounts": {
|
|
||||||
"task": "paperless_mail.tasks.process_mail_accounts",
|
|
||||||
"schedule": crontab(minute="*/10"),
|
|
||||||
"options": {"expires": self.MAIL_EXPIRE_TIME},
|
|
||||||
},
|
|
||||||
"Train the classifier": {
|
|
||||||
"task": "documents.tasks.train_classifier",
|
|
||||||
"schedule": crontab(minute="5", hour="*/1"),
|
|
||||||
"options": {"expires": self.CLASSIFIER_EXPIRE_TIME},
|
|
||||||
},
|
|
||||||
"Optimize the index": {
|
|
||||||
"task": "documents.tasks.index_optimize",
|
|
||||||
"schedule": crontab(minute=0, hour=0),
|
|
||||||
"options": {"expires": self.INDEX_EXPIRE_TIME},
|
|
||||||
},
|
|
||||||
"Perform sanity check": {
|
|
||||||
"task": "documents.tasks.sanity_check",
|
|
||||||
"schedule": crontab(minute=30, hour=0, day_of_week="sun"),
|
|
||||||
"options": {"expires": self.SANITY_EXPIRE_TIME},
|
|
||||||
},
|
|
||||||
"Empty trash": {
|
|
||||||
"task": "documents.tasks.empty_trash",
|
|
||||||
"schedule": crontab(minute=0, hour="1"),
|
|
||||||
"options": {"expires": self.EMPTY_TRASH_EXPIRE_TIME},
|
|
||||||
},
|
|
||||||
"Check and run scheduled workflows": {
|
|
||||||
"task": "documents.tasks.check_scheduled_workflows",
|
|
||||||
"schedule": crontab(minute="5", hour="*/1"),
|
|
||||||
"options": {"expires": self.RUN_SCHEDULED_WORKFLOWS_EXPIRE_TIME},
|
|
||||||
},
|
|
||||||
"Rebuild LLM index": {
|
|
||||||
"task": "documents.tasks.llmindex_index",
|
|
||||||
"schedule": crontab(minute=10, hour=2),
|
|
||||||
"options": {
|
|
||||||
"expires": self.LLM_INDEX_EXPIRE_TIME,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
"Cleanup expired share link bundles": {
|
|
||||||
"task": "documents.tasks.cleanup_expired_share_link_bundles",
|
|
||||||
"schedule": crontab(minute=0, hour=2),
|
|
||||||
"options": {
|
|
||||||
"expires": self.CLEANUP_EXPIRED_SHARE_BUNDLES_EXPIRE_TIME,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
schedule,
|
|
||||||
)
|
|
||||||
|
|
||||||
def test_schedule_configuration_changed(self) -> None:
|
|
||||||
"""
|
|
||||||
GIVEN:
|
|
||||||
- Email task is configured non-default
|
|
||||||
WHEN:
|
|
||||||
- The celery beat schedule is built
|
|
||||||
THEN:
|
|
||||||
- The email task is configured per environment
|
|
||||||
- The default schedule is returned for other tasks
|
|
||||||
"""
|
|
||||||
with mock.patch.dict(
|
|
||||||
os.environ,
|
|
||||||
{"PAPERLESS_EMAIL_TASK_CRON": "*/50 * * * mon"},
|
|
||||||
):
|
|
||||||
schedule = _parse_beat_schedule()
|
|
||||||
|
|
||||||
self.assertDictEqual(
|
|
||||||
{
|
|
||||||
"Check all e-mail accounts": {
|
|
||||||
"task": "paperless_mail.tasks.process_mail_accounts",
|
|
||||||
"schedule": crontab(minute="*/50", day_of_week="mon"),
|
|
||||||
"options": {"expires": self.MAIL_EXPIRE_TIME},
|
|
||||||
},
|
|
||||||
"Train the classifier": {
|
|
||||||
"task": "documents.tasks.train_classifier",
|
|
||||||
"schedule": crontab(minute="5", hour="*/1"),
|
|
||||||
"options": {"expires": self.CLASSIFIER_EXPIRE_TIME},
|
|
||||||
},
|
|
||||||
"Optimize the index": {
|
|
||||||
"task": "documents.tasks.index_optimize",
|
|
||||||
"schedule": crontab(minute=0, hour=0),
|
|
||||||
"options": {"expires": self.INDEX_EXPIRE_TIME},
|
|
||||||
},
|
|
||||||
"Perform sanity check": {
|
|
||||||
"task": "documents.tasks.sanity_check",
|
|
||||||
"schedule": crontab(minute=30, hour=0, day_of_week="sun"),
|
|
||||||
"options": {"expires": self.SANITY_EXPIRE_TIME},
|
|
||||||
},
|
|
||||||
"Empty trash": {
|
|
||||||
"task": "documents.tasks.empty_trash",
|
|
||||||
"schedule": crontab(minute=0, hour="1"),
|
|
||||||
"options": {"expires": self.EMPTY_TRASH_EXPIRE_TIME},
|
|
||||||
},
|
|
||||||
"Check and run scheduled workflows": {
|
|
||||||
"task": "documents.tasks.check_scheduled_workflows",
|
|
||||||
"schedule": crontab(minute="5", hour="*/1"),
|
|
||||||
"options": {"expires": self.RUN_SCHEDULED_WORKFLOWS_EXPIRE_TIME},
|
|
||||||
},
|
|
||||||
"Rebuild LLM index": {
|
|
||||||
"task": "documents.tasks.llmindex_index",
|
|
||||||
"schedule": crontab(minute=10, hour=2),
|
|
||||||
"options": {
|
|
||||||
"expires": self.LLM_INDEX_EXPIRE_TIME,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
"Cleanup expired share link bundles": {
|
|
||||||
"task": "documents.tasks.cleanup_expired_share_link_bundles",
|
|
||||||
"schedule": crontab(minute=0, hour=2),
|
|
||||||
"options": {
|
|
||||||
"expires": self.CLEANUP_EXPIRED_SHARE_BUNDLES_EXPIRE_TIME,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
schedule,
|
|
||||||
)
|
|
||||||
|
|
||||||
def test_schedule_configuration_disabled(self) -> None:
|
|
||||||
"""
|
|
||||||
GIVEN:
|
|
||||||
- Search index task is disabled
|
|
||||||
WHEN:
|
|
||||||
- The celery beat schedule is built
|
|
||||||
THEN:
|
|
||||||
- The search index task is not present
|
|
||||||
- The default schedule is returned for other tasks
|
|
||||||
"""
|
|
||||||
with mock.patch.dict(os.environ, {"PAPERLESS_INDEX_TASK_CRON": "disable"}):
|
|
||||||
schedule = _parse_beat_schedule()
|
|
||||||
|
|
||||||
self.assertDictEqual(
|
|
||||||
{
|
|
||||||
"Check all e-mail accounts": {
|
|
||||||
"task": "paperless_mail.tasks.process_mail_accounts",
|
|
||||||
"schedule": crontab(minute="*/10"),
|
|
||||||
"options": {"expires": self.MAIL_EXPIRE_TIME},
|
|
||||||
},
|
|
||||||
"Train the classifier": {
|
|
||||||
"task": "documents.tasks.train_classifier",
|
|
||||||
"schedule": crontab(minute="5", hour="*/1"),
|
|
||||||
"options": {"expires": self.CLASSIFIER_EXPIRE_TIME},
|
|
||||||
},
|
|
||||||
"Perform sanity check": {
|
|
||||||
"task": "documents.tasks.sanity_check",
|
|
||||||
"schedule": crontab(minute=30, hour=0, day_of_week="sun"),
|
|
||||||
"options": {"expires": self.SANITY_EXPIRE_TIME},
|
|
||||||
},
|
|
||||||
"Empty trash": {
|
|
||||||
"task": "documents.tasks.empty_trash",
|
|
||||||
"schedule": crontab(minute=0, hour="1"),
|
|
||||||
"options": {"expires": self.EMPTY_TRASH_EXPIRE_TIME},
|
|
||||||
},
|
|
||||||
"Check and run scheduled workflows": {
|
|
||||||
"task": "documents.tasks.check_scheduled_workflows",
|
|
||||||
"schedule": crontab(minute="5", hour="*/1"),
|
|
||||||
"options": {"expires": self.RUN_SCHEDULED_WORKFLOWS_EXPIRE_TIME},
|
|
||||||
},
|
|
||||||
"Rebuild LLM index": {
|
|
||||||
"task": "documents.tasks.llmindex_index",
|
|
||||||
"schedule": crontab(minute=10, hour=2),
|
|
||||||
"options": {
|
|
||||||
"expires": self.LLM_INDEX_EXPIRE_TIME,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
"Cleanup expired share link bundles": {
|
|
||||||
"task": "documents.tasks.cleanup_expired_share_link_bundles",
|
|
||||||
"schedule": crontab(minute=0, hour=2),
|
|
||||||
"options": {
|
|
||||||
"expires": self.CLEANUP_EXPIRED_SHARE_BUNDLES_EXPIRE_TIME,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
schedule,
|
|
||||||
)
|
|
||||||
|
|
||||||
def test_schedule_configuration_disabled_all(self) -> None:
|
|
||||||
"""
|
|
||||||
GIVEN:
|
|
||||||
- All tasks are disabled
|
|
||||||
WHEN:
|
|
||||||
- The celery beat schedule is built
|
|
||||||
THEN:
|
|
||||||
- No tasks are scheduled
|
|
||||||
"""
|
|
||||||
with mock.patch.dict(
|
|
||||||
os.environ,
|
|
||||||
{
|
|
||||||
"PAPERLESS_EMAIL_TASK_CRON": "disable",
|
|
||||||
"PAPERLESS_TRAIN_TASK_CRON": "disable",
|
|
||||||
"PAPERLESS_SANITY_TASK_CRON": "disable",
|
|
||||||
"PAPERLESS_INDEX_TASK_CRON": "disable",
|
|
||||||
"PAPERLESS_EMPTY_TRASH_TASK_CRON": "disable",
|
|
||||||
"PAPERLESS_WORKFLOW_SCHEDULED_TASK_CRON": "disable",
|
|
||||||
"PAPERLESS_LLM_INDEX_TASK_CRON": "disable",
|
|
||||||
"PAPERLESS_SHARE_LINK_BUNDLE_CLEANUP_CRON": "disable",
|
|
||||||
},
|
|
||||||
):
|
|
||||||
schedule = _parse_beat_schedule()
|
|
||||||
|
|
||||||
self.assertDictEqual(
|
|
||||||
{},
|
|
||||||
schedule,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class TestPaperlessURLSettings(TestCase):
|
|
||||||
def test_paperless_url(self) -> None:
|
|
||||||
"""
|
|
||||||
GIVEN:
|
|
||||||
- PAPERLESS_URL is set
|
|
||||||
WHEN:
|
|
||||||
- The URL is parsed
|
|
||||||
THEN:
|
|
||||||
- The URL is returned and present in related settings
|
|
||||||
"""
|
|
||||||
with mock.patch.dict(
|
|
||||||
os.environ,
|
|
||||||
{
|
|
||||||
"PAPERLESS_URL": "https://example.com",
|
|
||||||
},
|
|
||||||
):
|
|
||||||
url = _parse_paperless_url()
|
|
||||||
self.assertEqual("https://example.com", url)
|
|
||||||
from django.conf import settings
|
|
||||||
|
|
||||||
self.assertIn(url, settings.CSRF_TRUSTED_ORIGINS)
|
|
||||||
self.assertIn(url, settings.CORS_ALLOWED_ORIGINS)
|
|
||||||
|
|
||||||
|
|
||||||
class TestPathSettings(TestCase):
|
|
||||||
def test_default_paths(self) -> None:
|
|
||||||
"""
|
|
||||||
GIVEN:
|
|
||||||
- PAPERLESS_FORCE_SCRIPT_NAME is not set
|
|
||||||
WHEN:
|
|
||||||
- Settings are parsed
|
|
||||||
THEN:
|
|
||||||
- Paths are as expected
|
|
||||||
"""
|
|
||||||
base_paths = _parse_base_paths()
|
|
||||||
self.assertEqual(None, base_paths[0]) # FORCE_SCRIPT_NAME
|
|
||||||
self.assertEqual("/", base_paths[1]) # BASE_URL
|
|
||||||
self.assertEqual("/accounts/login/", base_paths[2]) # LOGIN_URL
|
|
||||||
self.assertEqual("/dashboard", base_paths[3]) # LOGIN_REDIRECT_URL
|
|
||||||
self.assertEqual(
|
|
||||||
"/accounts/login/?loggedout=1",
|
|
||||||
base_paths[4],
|
|
||||||
) # LOGOUT_REDIRECT_URL
|
|
||||||
|
|
||||||
@mock.patch("os.environ", {"PAPERLESS_FORCE_SCRIPT_NAME": "/paperless"})
|
|
||||||
def test_subpath(self) -> None:
|
|
||||||
"""
|
|
||||||
GIVEN:
|
|
||||||
- PAPERLESS_FORCE_SCRIPT_NAME is set
|
|
||||||
WHEN:
|
|
||||||
- Settings are parsed
|
|
||||||
THEN:
|
|
||||||
- The path is returned and present in related settings
|
|
||||||
"""
|
|
||||||
base_paths = _parse_base_paths()
|
|
||||||
self.assertEqual("/paperless", base_paths[0]) # FORCE_SCRIPT_NAME
|
|
||||||
self.assertEqual("/paperless/", base_paths[1]) # BASE_URL
|
|
||||||
self.assertEqual("/paperless/accounts/login/", base_paths[2]) # LOGIN_URL
|
|
||||||
self.assertEqual("/paperless/dashboard", base_paths[3]) # LOGIN_REDIRECT_URL
|
|
||||||
self.assertEqual(
|
|
||||||
"/paperless/accounts/login/?loggedout=1",
|
|
||||||
base_paths[4],
|
|
||||||
) # LOGOUT_REDIRECT_URL
|
|
||||||
|
|
||||||
@mock.patch(
|
|
||||||
"os.environ",
|
|
||||||
{
|
|
||||||
"PAPERLESS_FORCE_SCRIPT_NAME": "/paperless",
|
|
||||||
"PAPERLESS_LOGOUT_REDIRECT_URL": "/foobar/",
|
|
||||||
},
|
|
||||||
)
|
|
||||||
def test_subpath_with_explicit_logout_url(self) -> None:
|
|
||||||
"""
|
|
||||||
GIVEN:
|
|
||||||
- PAPERLESS_FORCE_SCRIPT_NAME is set and so is PAPERLESS_LOGOUT_REDIRECT_URL
|
|
||||||
WHEN:
|
|
||||||
- Settings are parsed
|
|
||||||
THEN:
|
|
||||||
- The correct logout redirect URL is returned
|
|
||||||
"""
|
|
||||||
base_paths = _parse_base_paths()
|
|
||||||
self.assertEqual("/paperless/", base_paths[1]) # BASE_URL
|
|
||||||
self.assertEqual("/foobar/", base_paths[4]) # LOGOUT_REDIRECT_URL
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
|
||||||
("languages", "expected"),
|
|
||||||
[
|
|
||||||
("de", ["de"]),
|
|
||||||
("zh", ["zh"]),
|
|
||||||
("fr+en", ["fr", "en"]),
|
|
||||||
# Locales must be supported
|
|
||||||
("en-001+fr-CA", ["en-001", "fr-CA"]),
|
|
||||||
("en-001+fr", ["en-001", "fr"]),
|
|
||||||
# Special case for Chinese: variants seem to miss some dates,
|
|
||||||
# so we always add "zh" as a fallback.
|
|
||||||
("en+zh-Hans-HK", ["en", "zh-Hans-HK", "zh"]),
|
|
||||||
("en+zh-Hans", ["en", "zh-Hans", "zh"]),
|
|
||||||
("en+zh-Hans+zh-Hant", ["en", "zh-Hans", "zh-Hant", "zh"]),
|
|
||||||
],
|
|
||||||
)
|
|
||||||
def test_parser_date_parser_languages(languages, expected) -> None:
|
|
||||||
assert sorted(_parse_dateparser_languages(languages)) == sorted(expected)
|
|
||||||
@@ -9,35 +9,50 @@ from paperless.utils import ocr_to_dateparser_languages
|
|||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
("ocr_language", "expected"),
|
("ocr_language", "expected"),
|
||||||
[
|
[
|
||||||
# One language
|
pytest.param("eng", ["en"], id="single-language"),
|
||||||
("eng", ["en"]),
|
pytest.param("fra+ita+lao", ["fr", "it", "lo"], id="multiple-languages"),
|
||||||
# Multiple languages
|
pytest.param("fil", ["fil"], id="no-two-letter-equivalent"),
|
||||||
("fra+ita+lao", ["fr", "it", "lo"]),
|
pytest.param(
|
||||||
# Languages that don't have a two-letter equivalent
|
"aze_cyrl+srp_latn",
|
||||||
("fil", ["fil"]),
|
["az-Cyrl", "sr-Latn"],
|
||||||
# Languages with a script part supported by dateparser
|
id="script-supported-by-dateparser",
|
||||||
("aze_cyrl+srp_latn", ["az-Cyrl", "sr-Latn"]),
|
),
|
||||||
# Languages with a script part not supported by dateparser
|
pytest.param(
|
||||||
# In this case, default to the language without script
|
"deu_frak",
|
||||||
("deu_frak", ["de"]),
|
["de"],
|
||||||
# Traditional and simplified chinese don't have the same name in dateparser,
|
id="script-not-supported-falls-back-to-language",
|
||||||
# so they're converted to the general chinese language
|
),
|
||||||
("chi_tra+chi_sim", ["zh"]),
|
pytest.param(
|
||||||
# If a language is not supported by dateparser, fallback to the supported ones
|
"chi_tra+chi_sim",
|
||||||
("eng+unsupported_language+por", ["en", "pt"]),
|
["zh"],
|
||||||
# If no language is supported, fallback to default
|
id="chinese-variants-collapse-to-general",
|
||||||
("unsupported1+unsupported2", []),
|
),
|
||||||
# Duplicate languages, should not duplicate in result
|
pytest.param(
|
||||||
("eng+eng", ["en"]),
|
"eng+unsupported_language+por",
|
||||||
# Language with script, but script is not mapped
|
["en", "pt"],
|
||||||
("ita_unknownscript", ["it"]),
|
id="unsupported-language-skipped",
|
||||||
|
),
|
||||||
|
pytest.param(
|
||||||
|
"unsupported1+unsupported2",
|
||||||
|
[],
|
||||||
|
id="all-unsupported-returns-empty",
|
||||||
|
),
|
||||||
|
pytest.param("eng+eng", ["en"], id="duplicates-deduplicated"),
|
||||||
|
pytest.param(
|
||||||
|
"ita_unknownscript",
|
||||||
|
["it"],
|
||||||
|
id="unknown-script-falls-back-to-language",
|
||||||
|
),
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
def test_ocr_to_dateparser_languages(ocr_language, expected):
|
def test_ocr_to_dateparser_languages(ocr_language: str, expected: list[str]) -> None:
|
||||||
assert sorted(ocr_to_dateparser_languages(ocr_language)) == sorted(expected)
|
assert sorted(ocr_to_dateparser_languages(ocr_language)) == sorted(expected)
|
||||||
|
|
||||||
|
|
||||||
def test_ocr_to_dateparser_languages_exception(monkeypatch, caplog):
|
def test_ocr_to_dateparser_languages_exception(
|
||||||
|
monkeypatch: pytest.MonkeyPatch,
|
||||||
|
caplog: pytest.LogCaptureFixture,
|
||||||
|
) -> None:
|
||||||
# Patch LocaleDataLoader.get_locale_map to raise an exception
|
# Patch LocaleDataLoader.get_locale_map to raise an exception
|
||||||
class DummyLoader:
|
class DummyLoader:
|
||||||
def get_locale_map(self, locales=None):
|
def get_locale_map(self, locales=None):
|
||||||
|
|||||||
@@ -1,24 +1,31 @@
|
|||||||
import tempfile
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from django.test import override_settings
|
from django.test import Client
|
||||||
|
from pytest_django.fixtures import SettingsWrapper
|
||||||
|
|
||||||
|
|
||||||
def test_favicon_view(client):
|
def test_favicon_view(
|
||||||
with tempfile.TemporaryDirectory() as tmpdir:
|
client: Client,
|
||||||
static_dir = Path(tmpdir)
|
tmp_path: Path,
|
||||||
favicon_path = static_dir / "paperless" / "img" / "favicon.ico"
|
settings: SettingsWrapper,
|
||||||
favicon_path.parent.mkdir(parents=True, exist_ok=True)
|
) -> None:
|
||||||
favicon_path.write_bytes(b"FAKE ICON DATA")
|
favicon_path = tmp_path / "paperless" / "img" / "favicon.ico"
|
||||||
|
favicon_path.parent.mkdir(parents=True)
|
||||||
|
favicon_path.write_bytes(b"FAKE ICON DATA")
|
||||||
|
|
||||||
with override_settings(STATIC_ROOT=static_dir):
|
settings.STATIC_ROOT = tmp_path
|
||||||
response = client.get("/favicon.ico")
|
|
||||||
assert response.status_code == 200
|
response = client.get("/favicon.ico")
|
||||||
assert response["Content-Type"] == "image/x-icon"
|
assert response.status_code == 200
|
||||||
assert b"".join(response.streaming_content) == b"FAKE ICON DATA"
|
assert response["Content-Type"] == "image/x-icon"
|
||||||
|
assert b"".join(response.streaming_content) == b"FAKE ICON DATA"
|
||||||
|
|
||||||
|
|
||||||
def test_favicon_view_missing_file(client):
|
def test_favicon_view_missing_file(
|
||||||
with override_settings(STATIC_ROOT=Path(tempfile.mkdtemp())):
|
client: Client,
|
||||||
response = client.get("/favicon.ico")
|
tmp_path: Path,
|
||||||
assert response.status_code == 404
|
settings: SettingsWrapper,
|
||||||
|
) -> None:
|
||||||
|
settings.STATIC_ROOT = tmp_path
|
||||||
|
response = client.get("/favicon.ico")
|
||||||
|
assert response.status_code == 404
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
from llama_index.core.bridge.pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
|
||||||
class DocumentClassifierSchema(BaseModel):
|
class DocumentClassifierSchema(BaseModel):
|
||||||
|
|||||||
@@ -1,10 +1,6 @@
|
|||||||
import logging
|
import logging
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
from llama_index.core import VectorStoreIndex
|
|
||||||
from llama_index.core.prompts import PromptTemplate
|
|
||||||
from llama_index.core.query_engine import RetrieverQueryEngine
|
|
||||||
|
|
||||||
from documents.models import Document
|
from documents.models import Document
|
||||||
from paperless_ai.client import AIClient
|
from paperless_ai.client import AIClient
|
||||||
from paperless_ai.indexing import load_or_build_index
|
from paperless_ai.indexing import load_or_build_index
|
||||||
@@ -14,15 +10,13 @@ logger = logging.getLogger("paperless_ai.chat")
|
|||||||
MAX_SINGLE_DOC_CONTEXT_CHARS = 15000
|
MAX_SINGLE_DOC_CONTEXT_CHARS = 15000
|
||||||
SINGLE_DOC_SNIPPET_CHARS = 800
|
SINGLE_DOC_SNIPPET_CHARS = 800
|
||||||
|
|
||||||
CHAT_PROMPT_TMPL = PromptTemplate(
|
CHAT_PROMPT_TMPL = """Context information is below.
|
||||||
template="""Context information is below.
|
|
||||||
---------------------
|
---------------------
|
||||||
{context_str}
|
{context_str}
|
||||||
---------------------
|
---------------------
|
||||||
Given the context information and not prior knowledge, answer the query.
|
Given the context information and not prior knowledge, answer the query.
|
||||||
Query: {query_str}
|
Query: {query_str}
|
||||||
Answer:""",
|
Answer:"""
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def stream_chat_with_documents(query_str: str, documents: list[Document]):
|
def stream_chat_with_documents(query_str: str, documents: list[Document]):
|
||||||
@@ -43,6 +37,10 @@ def stream_chat_with_documents(query_str: str, documents: list[Document]):
|
|||||||
yield "Sorry, I couldn't find any content to answer your question."
|
yield "Sorry, I couldn't find any content to answer your question."
|
||||||
return
|
return
|
||||||
|
|
||||||
|
from llama_index.core import VectorStoreIndex
|
||||||
|
from llama_index.core.prompts import PromptTemplate
|
||||||
|
from llama_index.core.query_engine import RetrieverQueryEngine
|
||||||
|
|
||||||
local_index = VectorStoreIndex(nodes=nodes)
|
local_index = VectorStoreIndex(nodes=nodes)
|
||||||
retriever = local_index.as_retriever(
|
retriever = local_index.as_retriever(
|
||||||
similarity_top_k=3 if len(documents) == 1 else 5,
|
similarity_top_k=3 if len(documents) == 1 else 5,
|
||||||
@@ -85,7 +83,8 @@ def stream_chat_with_documents(query_str: str, documents: list[Document]):
|
|||||||
for node in top_nodes
|
for node in top_nodes
|
||||||
)
|
)
|
||||||
|
|
||||||
prompt = CHAT_PROMPT_TMPL.partial_format(
|
prompt_template = PromptTemplate(template=CHAT_PROMPT_TMPL)
|
||||||
|
prompt = prompt_template.partial_format(
|
||||||
context_str=context,
|
context_str=context,
|
||||||
query_str=query_str,
|
query_str=query_str,
|
||||||
).format(llm=client.llm)
|
).format(llm=client.llm)
|
||||||
|
|||||||
@@ -1,9 +1,10 @@
|
|||||||
import logging
|
import logging
|
||||||
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
from llama_index.core.llms import ChatMessage
|
if TYPE_CHECKING:
|
||||||
from llama_index.core.program.function_program import get_function_tool
|
from llama_index.core.llms import ChatMessage
|
||||||
from llama_index.llms.ollama import Ollama
|
from llama_index.llms.ollama import Ollama
|
||||||
from llama_index.llms.openai import OpenAI
|
from llama_index.llms.openai import OpenAI
|
||||||
|
|
||||||
from paperless.config import AIConfig
|
from paperless.config import AIConfig
|
||||||
from paperless_ai.base_model import DocumentClassifierSchema
|
from paperless_ai.base_model import DocumentClassifierSchema
|
||||||
@@ -20,14 +21,18 @@ class AIClient:
|
|||||||
self.settings = AIConfig()
|
self.settings = AIConfig()
|
||||||
self.llm = self.get_llm()
|
self.llm = self.get_llm()
|
||||||
|
|
||||||
def get_llm(self) -> Ollama | OpenAI:
|
def get_llm(self) -> "Ollama | OpenAI":
|
||||||
if self.settings.llm_backend == "ollama":
|
if self.settings.llm_backend == "ollama":
|
||||||
|
from llama_index.llms.ollama import Ollama
|
||||||
|
|
||||||
return Ollama(
|
return Ollama(
|
||||||
model=self.settings.llm_model or "llama3.1",
|
model=self.settings.llm_model or "llama3.1",
|
||||||
base_url=self.settings.llm_endpoint or "http://localhost:11434",
|
base_url=self.settings.llm_endpoint or "http://localhost:11434",
|
||||||
request_timeout=120,
|
request_timeout=120,
|
||||||
)
|
)
|
||||||
elif self.settings.llm_backend == "openai":
|
elif self.settings.llm_backend == "openai":
|
||||||
|
from llama_index.llms.openai import OpenAI
|
||||||
|
|
||||||
return OpenAI(
|
return OpenAI(
|
||||||
model=self.settings.llm_model or "gpt-3.5-turbo",
|
model=self.settings.llm_model or "gpt-3.5-turbo",
|
||||||
api_base=self.settings.llm_endpoint or None,
|
api_base=self.settings.llm_endpoint or None,
|
||||||
@@ -43,6 +48,9 @@ class AIClient:
|
|||||||
self.settings.llm_model,
|
self.settings.llm_model,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
from llama_index.core.llms import ChatMessage
|
||||||
|
from llama_index.core.program.function_program import get_function_tool
|
||||||
|
|
||||||
user_msg = ChatMessage(role="user", content=prompt)
|
user_msg = ChatMessage(role="user", content=prompt)
|
||||||
tool = get_function_tool(DocumentClassifierSchema)
|
tool = get_function_tool(DocumentClassifierSchema)
|
||||||
result = self.llm.chat_with_tools(
|
result = self.llm.chat_with_tools(
|
||||||
@@ -58,7 +66,7 @@ class AIClient:
|
|||||||
parsed = DocumentClassifierSchema(**tool_calls[0].tool_kwargs)
|
parsed = DocumentClassifierSchema(**tool_calls[0].tool_kwargs)
|
||||||
return parsed.model_dump()
|
return parsed.model_dump()
|
||||||
|
|
||||||
def run_chat(self, messages: list[ChatMessage]) -> str:
|
def run_chat(self, messages: list["ChatMessage"]) -> str:
|
||||||
logger.debug(
|
logger.debug(
|
||||||
"Running chat query against %s with model %s",
|
"Running chat query against %s with model %s",
|
||||||
self.settings.llm_backend,
|
self.settings.llm_backend,
|
||||||
|
|||||||
@@ -1,13 +1,12 @@
|
|||||||
import json
|
import json
|
||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from django.conf import settings
|
from llama_index.core.base.embeddings.base import BaseEmbedding
|
||||||
from llama_index.core.base.embeddings.base import BaseEmbedding
|
|
||||||
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
|
||||||
from llama_index.embeddings.openai import OpenAIEmbedding
|
|
||||||
|
|
||||||
from documents.models import Document
|
from documents.models import Document
|
||||||
from documents.models import Note
|
from documents.models import Note
|
||||||
@@ -15,17 +14,21 @@ from paperless.config import AIConfig
|
|||||||
from paperless.models import LLMEmbeddingBackend
|
from paperless.models import LLMEmbeddingBackend
|
||||||
|
|
||||||
|
|
||||||
def get_embedding_model() -> BaseEmbedding:
|
def get_embedding_model() -> "BaseEmbedding":
|
||||||
config = AIConfig()
|
config = AIConfig()
|
||||||
|
|
||||||
match config.llm_embedding_backend:
|
match config.llm_embedding_backend:
|
||||||
case LLMEmbeddingBackend.OPENAI:
|
case LLMEmbeddingBackend.OPENAI:
|
||||||
|
from llama_index.embeddings.openai import OpenAIEmbedding
|
||||||
|
|
||||||
return OpenAIEmbedding(
|
return OpenAIEmbedding(
|
||||||
model=config.llm_embedding_model or "text-embedding-3-small",
|
model=config.llm_embedding_model or "text-embedding-3-small",
|
||||||
api_key=config.llm_api_key,
|
api_key=config.llm_api_key,
|
||||||
api_base=config.llm_endpoint or None,
|
api_base=config.llm_endpoint or None,
|
||||||
)
|
)
|
||||||
case LLMEmbeddingBackend.HUGGINGFACE:
|
case LLMEmbeddingBackend.HUGGINGFACE:
|
||||||
|
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
||||||
|
|
||||||
return HuggingFaceEmbedding(
|
return HuggingFaceEmbedding(
|
||||||
model_name=config.llm_embedding_model
|
model_name=config.llm_embedding_model
|
||||||
or "sentence-transformers/all-MiniLM-L6-v2",
|
or "sentence-transformers/all-MiniLM-L6-v2",
|
||||||
|
|||||||
@@ -4,26 +4,12 @@ from collections.abc import Callable
|
|||||||
from collections.abc import Iterable
|
from collections.abc import Iterable
|
||||||
from datetime import timedelta
|
from datetime import timedelta
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from typing import TYPE_CHECKING
|
||||||
from typing import TypeVar
|
from typing import TypeVar
|
||||||
|
|
||||||
import faiss
|
|
||||||
import llama_index.core.settings as llama_settings
|
|
||||||
from celery import states
|
from celery import states
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from django.utils import timezone
|
from django.utils import timezone
|
||||||
from llama_index.core import Document as LlamaDocument
|
|
||||||
from llama_index.core import StorageContext
|
|
||||||
from llama_index.core import VectorStoreIndex
|
|
||||||
from llama_index.core import load_index_from_storage
|
|
||||||
from llama_index.core.indices.prompt_helper import PromptHelper
|
|
||||||
from llama_index.core.node_parser import SimpleNodeParser
|
|
||||||
from llama_index.core.prompts import PromptTemplate
|
|
||||||
from llama_index.core.retrievers import VectorIndexRetriever
|
|
||||||
from llama_index.core.schema import BaseNode
|
|
||||||
from llama_index.core.storage.docstore import SimpleDocumentStore
|
|
||||||
from llama_index.core.storage.index_store import SimpleIndexStore
|
|
||||||
from llama_index.core.text_splitter import TokenTextSplitter
|
|
||||||
from llama_index.vector_stores.faiss import FaissVectorStore
|
|
||||||
|
|
||||||
from documents.models import Document
|
from documents.models import Document
|
||||||
from documents.models import PaperlessTask
|
from documents.models import PaperlessTask
|
||||||
@@ -34,6 +20,10 @@ from paperless_ai.embedding import get_embedding_model
|
|||||||
_T = TypeVar("_T")
|
_T = TypeVar("_T")
|
||||||
IterWrapper = Callable[[Iterable[_T]], Iterable[_T]]
|
IterWrapper = Callable[[Iterable[_T]], Iterable[_T]]
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from llama_index.core import VectorStoreIndex
|
||||||
|
from llama_index.core.schema import BaseNode
|
||||||
|
|
||||||
|
|
||||||
def _identity(iterable: Iterable[_T]) -> Iterable[_T]:
|
def _identity(iterable: Iterable[_T]) -> Iterable[_T]:
|
||||||
return iterable
|
return iterable
|
||||||
@@ -75,12 +65,23 @@ def get_or_create_storage_context(*, rebuild=False):
|
|||||||
settings.LLM_INDEX_DIR.mkdir(parents=True, exist_ok=True)
|
settings.LLM_INDEX_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
if rebuild or not settings.LLM_INDEX_DIR.exists():
|
if rebuild or not settings.LLM_INDEX_DIR.exists():
|
||||||
|
import faiss
|
||||||
|
from llama_index.core import StorageContext
|
||||||
|
from llama_index.core.storage.docstore import SimpleDocumentStore
|
||||||
|
from llama_index.core.storage.index_store import SimpleIndexStore
|
||||||
|
from llama_index.vector_stores.faiss import FaissVectorStore
|
||||||
|
|
||||||
embedding_dim = get_embedding_dim()
|
embedding_dim = get_embedding_dim()
|
||||||
faiss_index = faiss.IndexFlatL2(embedding_dim)
|
faiss_index = faiss.IndexFlatL2(embedding_dim)
|
||||||
vector_store = FaissVectorStore(faiss_index=faiss_index)
|
vector_store = FaissVectorStore(faiss_index=faiss_index)
|
||||||
docstore = SimpleDocumentStore()
|
docstore = SimpleDocumentStore()
|
||||||
index_store = SimpleIndexStore()
|
index_store = SimpleIndexStore()
|
||||||
else:
|
else:
|
||||||
|
from llama_index.core import StorageContext
|
||||||
|
from llama_index.core.storage.docstore import SimpleDocumentStore
|
||||||
|
from llama_index.core.storage.index_store import SimpleIndexStore
|
||||||
|
from llama_index.vector_stores.faiss import FaissVectorStore
|
||||||
|
|
||||||
vector_store = FaissVectorStore.from_persist_dir(settings.LLM_INDEX_DIR)
|
vector_store = FaissVectorStore.from_persist_dir(settings.LLM_INDEX_DIR)
|
||||||
docstore = SimpleDocumentStore.from_persist_dir(settings.LLM_INDEX_DIR)
|
docstore = SimpleDocumentStore.from_persist_dir(settings.LLM_INDEX_DIR)
|
||||||
index_store = SimpleIndexStore.from_persist_dir(settings.LLM_INDEX_DIR)
|
index_store = SimpleIndexStore.from_persist_dir(settings.LLM_INDEX_DIR)
|
||||||
@@ -93,7 +94,7 @@ def get_or_create_storage_context(*, rebuild=False):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def build_document_node(document: Document) -> list[BaseNode]:
|
def build_document_node(document: Document) -> list["BaseNode"]:
|
||||||
"""
|
"""
|
||||||
Given a Document, returns parsed Nodes ready for indexing.
|
Given a Document, returns parsed Nodes ready for indexing.
|
||||||
"""
|
"""
|
||||||
@@ -112,6 +113,9 @@ def build_document_node(document: Document) -> list[BaseNode]:
|
|||||||
"added": document.added.isoformat() if document.added else None,
|
"added": document.added.isoformat() if document.added else None,
|
||||||
"modified": document.modified.isoformat(),
|
"modified": document.modified.isoformat(),
|
||||||
}
|
}
|
||||||
|
from llama_index.core import Document as LlamaDocument
|
||||||
|
from llama_index.core.node_parser import SimpleNodeParser
|
||||||
|
|
||||||
doc = LlamaDocument(text=text, metadata=metadata)
|
doc = LlamaDocument(text=text, metadata=metadata)
|
||||||
parser = SimpleNodeParser()
|
parser = SimpleNodeParser()
|
||||||
return parser.get_nodes_from_documents([doc])
|
return parser.get_nodes_from_documents([doc])
|
||||||
@@ -122,6 +126,10 @@ def load_or_build_index(nodes=None):
|
|||||||
Load an existing VectorStoreIndex if present,
|
Load an existing VectorStoreIndex if present,
|
||||||
or build a new one using provided nodes if storage is empty.
|
or build a new one using provided nodes if storage is empty.
|
||||||
"""
|
"""
|
||||||
|
import llama_index.core.settings as llama_settings
|
||||||
|
from llama_index.core import VectorStoreIndex
|
||||||
|
from llama_index.core import load_index_from_storage
|
||||||
|
|
||||||
embed_model = get_embedding_model()
|
embed_model = get_embedding_model()
|
||||||
llama_settings.Settings.embed_model = embed_model
|
llama_settings.Settings.embed_model = embed_model
|
||||||
storage_context = get_or_create_storage_context()
|
storage_context = get_or_create_storage_context()
|
||||||
@@ -143,7 +151,7 @@ def load_or_build_index(nodes=None):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def remove_document_docstore_nodes(document: Document, index: VectorStoreIndex):
|
def remove_document_docstore_nodes(document: Document, index: "VectorStoreIndex"):
|
||||||
"""
|
"""
|
||||||
Removes existing documents from docstore for a given document from the index.
|
Removes existing documents from docstore for a given document from the index.
|
||||||
This is necessary because FAISS IndexFlatL2 is append-only.
|
This is necessary because FAISS IndexFlatL2 is append-only.
|
||||||
@@ -174,6 +182,8 @@ def update_llm_index(
|
|||||||
"""
|
"""
|
||||||
Rebuild or update the LLM index.
|
Rebuild or update the LLM index.
|
||||||
"""
|
"""
|
||||||
|
from llama_index.core import VectorStoreIndex
|
||||||
|
|
||||||
nodes = []
|
nodes = []
|
||||||
|
|
||||||
documents = Document.objects.all()
|
documents = Document.objects.all()
|
||||||
@@ -187,6 +197,8 @@ def update_llm_index(
|
|||||||
(settings.LLM_INDEX_DIR / "meta.json").unlink(missing_ok=True)
|
(settings.LLM_INDEX_DIR / "meta.json").unlink(missing_ok=True)
|
||||||
# Rebuild index from scratch
|
# Rebuild index from scratch
|
||||||
logger.info("Rebuilding LLM index.")
|
logger.info("Rebuilding LLM index.")
|
||||||
|
import llama_index.core.settings as llama_settings
|
||||||
|
|
||||||
embed_model = get_embedding_model()
|
embed_model = get_embedding_model()
|
||||||
llama_settings.Settings.embed_model = embed_model
|
llama_settings.Settings.embed_model = embed_model
|
||||||
storage_context = get_or_create_storage_context(rebuild=True)
|
storage_context = get_or_create_storage_context(rebuild=True)
|
||||||
@@ -271,6 +283,10 @@ def llm_index_remove_document(document: Document):
|
|||||||
|
|
||||||
|
|
||||||
def truncate_content(content: str) -> str:
|
def truncate_content(content: str) -> str:
|
||||||
|
from llama_index.core.indices.prompt_helper import PromptHelper
|
||||||
|
from llama_index.core.prompts import PromptTemplate
|
||||||
|
from llama_index.core.text_splitter import TokenTextSplitter
|
||||||
|
|
||||||
prompt_helper = PromptHelper(
|
prompt_helper = PromptHelper(
|
||||||
context_window=8192,
|
context_window=8192,
|
||||||
num_output=512,
|
num_output=512,
|
||||||
@@ -315,6 +331,8 @@ def query_similar_documents(
|
|||||||
else None
|
else None
|
||||||
)
|
)
|
||||||
|
|
||||||
|
from llama_index.core.retrievers import VectorIndexRetriever
|
||||||
|
|
||||||
retriever = VectorIndexRetriever(
|
retriever = VectorIndexRetriever(
|
||||||
index=index,
|
index=index,
|
||||||
similarity_top_k=top_k,
|
similarity_top_k=top_k,
|
||||||
|
|||||||
@@ -181,11 +181,11 @@ def test_load_or_build_index_builds_when_nodes_given(
|
|||||||
) -> None:
|
) -> None:
|
||||||
with (
|
with (
|
||||||
patch(
|
patch(
|
||||||
"paperless_ai.indexing.load_index_from_storage",
|
"llama_index.core.load_index_from_storage",
|
||||||
side_effect=ValueError("Index not found"),
|
side_effect=ValueError("Index not found"),
|
||||||
),
|
),
|
||||||
patch(
|
patch(
|
||||||
"paperless_ai.indexing.VectorStoreIndex",
|
"llama_index.core.VectorStoreIndex",
|
||||||
return_value=MagicMock(),
|
return_value=MagicMock(),
|
||||||
) as mock_index_cls,
|
) as mock_index_cls,
|
||||||
patch(
|
patch(
|
||||||
@@ -206,7 +206,7 @@ def test_load_or_build_index_raises_exception_when_no_nodes(
|
|||||||
) -> None:
|
) -> None:
|
||||||
with (
|
with (
|
||||||
patch(
|
patch(
|
||||||
"paperless_ai.indexing.load_index_from_storage",
|
"llama_index.core.load_index_from_storage",
|
||||||
side_effect=ValueError("Index not found"),
|
side_effect=ValueError("Index not found"),
|
||||||
),
|
),
|
||||||
patch(
|
patch(
|
||||||
@@ -225,11 +225,11 @@ def test_load_or_build_index_succeeds_when_nodes_given(
|
|||||||
) -> None:
|
) -> None:
|
||||||
with (
|
with (
|
||||||
patch(
|
patch(
|
||||||
"paperless_ai.indexing.load_index_from_storage",
|
"llama_index.core.load_index_from_storage",
|
||||||
side_effect=ValueError("Index not found"),
|
side_effect=ValueError("Index not found"),
|
||||||
),
|
),
|
||||||
patch(
|
patch(
|
||||||
"paperless_ai.indexing.VectorStoreIndex",
|
"llama_index.core.VectorStoreIndex",
|
||||||
return_value=MagicMock(),
|
return_value=MagicMock(),
|
||||||
) as mock_index_cls,
|
) as mock_index_cls,
|
||||||
patch(
|
patch(
|
||||||
@@ -334,7 +334,7 @@ def test_query_similar_documents(
|
|||||||
patch(
|
patch(
|
||||||
"paperless_ai.indexing.vector_store_file_exists",
|
"paperless_ai.indexing.vector_store_file_exists",
|
||||||
) as mock_vector_store_exists,
|
) as mock_vector_store_exists,
|
||||||
patch("paperless_ai.indexing.VectorIndexRetriever") as mock_retriever_cls,
|
patch("llama_index.core.retrievers.VectorIndexRetriever") as mock_retriever_cls,
|
||||||
patch("paperless_ai.indexing.Document.objects.filter") as mock_filter,
|
patch("paperless_ai.indexing.Document.objects.filter") as mock_filter,
|
||||||
):
|
):
|
||||||
mock_storage.return_value = MagicMock()
|
mock_storage.return_value = MagicMock()
|
||||||
|
|||||||
@@ -45,7 +45,7 @@ def test_stream_chat_with_one_document_full_content(mock_document) -> None:
|
|||||||
patch("paperless_ai.chat.AIClient") as mock_client_cls,
|
patch("paperless_ai.chat.AIClient") as mock_client_cls,
|
||||||
patch("paperless_ai.chat.load_or_build_index") as mock_load_index,
|
patch("paperless_ai.chat.load_or_build_index") as mock_load_index,
|
||||||
patch(
|
patch(
|
||||||
"paperless_ai.chat.RetrieverQueryEngine.from_args",
|
"llama_index.core.query_engine.RetrieverQueryEngine.from_args",
|
||||||
) as mock_query_engine_cls,
|
) as mock_query_engine_cls,
|
||||||
):
|
):
|
||||||
mock_client = MagicMock()
|
mock_client = MagicMock()
|
||||||
@@ -76,7 +76,7 @@ def test_stream_chat_with_multiple_documents_retrieval(patch_embed_nodes) -> Non
|
|||||||
patch("paperless_ai.chat.AIClient") as mock_client_cls,
|
patch("paperless_ai.chat.AIClient") as mock_client_cls,
|
||||||
patch("paperless_ai.chat.load_or_build_index") as mock_load_index,
|
patch("paperless_ai.chat.load_or_build_index") as mock_load_index,
|
||||||
patch(
|
patch(
|
||||||
"paperless_ai.chat.RetrieverQueryEngine.from_args",
|
"llama_index.core.query_engine.RetrieverQueryEngine.from_args",
|
||||||
) as mock_query_engine_cls,
|
) as mock_query_engine_cls,
|
||||||
patch.object(VectorStoreIndex, "as_retriever") as mock_as_retriever,
|
patch.object(VectorStoreIndex, "as_retriever") as mock_as_retriever,
|
||||||
):
|
):
|
||||||
|
|||||||
@@ -18,13 +18,13 @@ def mock_ai_config():
|
|||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def mock_ollama_llm():
|
def mock_ollama_llm():
|
||||||
with patch("paperless_ai.client.Ollama") as MockOllama:
|
with patch("llama_index.llms.ollama.Ollama") as MockOllama:
|
||||||
yield MockOllama
|
yield MockOllama
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def mock_openai_llm():
|
def mock_openai_llm():
|
||||||
with patch("paperless_ai.client.OpenAI") as MockOpenAI:
|
with patch("llama_index.llms.openai.OpenAI") as MockOpenAI:
|
||||||
yield MockOpenAI
|
yield MockOpenAI
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -67,7 +67,7 @@ def test_get_embedding_model_openai(mock_ai_config):
|
|||||||
mock_ai_config.return_value.llm_api_key = "test_api_key"
|
mock_ai_config.return_value.llm_api_key = "test_api_key"
|
||||||
mock_ai_config.return_value.llm_endpoint = "http://test-url"
|
mock_ai_config.return_value.llm_endpoint = "http://test-url"
|
||||||
|
|
||||||
with patch("paperless_ai.embedding.OpenAIEmbedding") as MockOpenAIEmbedding:
|
with patch("llama_index.embeddings.openai.OpenAIEmbedding") as MockOpenAIEmbedding:
|
||||||
model = get_embedding_model()
|
model = get_embedding_model()
|
||||||
MockOpenAIEmbedding.assert_called_once_with(
|
MockOpenAIEmbedding.assert_called_once_with(
|
||||||
model="text-embedding-3-small",
|
model="text-embedding-3-small",
|
||||||
@@ -84,7 +84,7 @@ def test_get_embedding_model_huggingface(mock_ai_config):
|
|||||||
)
|
)
|
||||||
|
|
||||||
with patch(
|
with patch(
|
||||||
"paperless_ai.embedding.HuggingFaceEmbedding",
|
"llama_index.embeddings.huggingface.HuggingFaceEmbedding",
|
||||||
) as MockHuggingFaceEmbedding:
|
) as MockHuggingFaceEmbedding:
|
||||||
model = get_embedding_model()
|
model = get_embedding_model()
|
||||||
MockHuggingFaceEmbedding.assert_called_once_with(
|
MockHuggingFaceEmbedding.assert_called_once_with(
|
||||||
|
|||||||
@@ -1,50 +0,0 @@
|
|||||||
from pathlib import Path
|
|
||||||
|
|
||||||
from django.conf import settings
|
|
||||||
from PIL import Image
|
|
||||||
from PIL import ImageDraw
|
|
||||||
from PIL import ImageFont
|
|
||||||
|
|
||||||
from documents.parsers import DocumentParser
|
|
||||||
|
|
||||||
|
|
||||||
class TextDocumentParser(DocumentParser):
|
|
||||||
"""
|
|
||||||
This parser directly parses a text document (.txt, .md, or .csv)
|
|
||||||
"""
|
|
||||||
|
|
||||||
logging_name = "paperless.parsing.text"
|
|
||||||
|
|
||||||
def get_thumbnail(self, document_path: Path, mime_type, file_name=None) -> Path:
|
|
||||||
# Avoid reading entire file into memory
|
|
||||||
max_chars = 100_000
|
|
||||||
file_size_limit = 50 * 1024 * 1024
|
|
||||||
|
|
||||||
if document_path.stat().st_size > file_size_limit:
|
|
||||||
text = "[File too large to preview]"
|
|
||||||
else:
|
|
||||||
with Path(document_path).open("r", encoding="utf-8", errors="replace") as f:
|
|
||||||
text = f.read(max_chars)
|
|
||||||
|
|
||||||
img = Image.new("RGB", (500, 700), color="white")
|
|
||||||
draw = ImageDraw.Draw(img)
|
|
||||||
font = ImageFont.truetype(
|
|
||||||
font=settings.THUMBNAIL_FONT_NAME,
|
|
||||||
size=20,
|
|
||||||
layout_engine=ImageFont.Layout.BASIC,
|
|
||||||
)
|
|
||||||
draw.multiline_text((5, 5), text, font=font, fill="black", spacing=4)
|
|
||||||
|
|
||||||
out_path = self.tempdir / "thumb.webp"
|
|
||||||
img.save(out_path, format="WEBP")
|
|
||||||
|
|
||||||
return out_path
|
|
||||||
|
|
||||||
def parse(self, document_path, mime_type, file_name=None) -> None:
|
|
||||||
self.text = self.read_file_handle_unicode_errors(document_path)
|
|
||||||
|
|
||||||
def get_settings(self) -> None:
|
|
||||||
"""
|
|
||||||
This parser does not implement additional settings yet
|
|
||||||
"""
|
|
||||||
return None
|
|
||||||
@@ -1,7 +1,13 @@
|
|||||||
def get_parser(*args, **kwargs):
|
def get_parser(*args, **kwargs):
|
||||||
from paperless_text.parsers import TextDocumentParser
|
from paperless.parsers.text import TextDocumentParser
|
||||||
|
|
||||||
return TextDocumentParser(*args, **kwargs)
|
# The new TextDocumentParser does not accept the legacy logging_group /
|
||||||
|
# progress_callback kwargs injected by the old signal-based consumer.
|
||||||
|
# These are dropped here; Phase 4 will replace this signal path with the
|
||||||
|
# new ParserRegistry so the shim can be removed at that point.
|
||||||
|
kwargs.pop("logging_group", None)
|
||||||
|
kwargs.pop("progress_callback", None)
|
||||||
|
return TextDocumentParser()
|
||||||
|
|
||||||
|
|
||||||
def text_consumer_declaration(sender, **kwargs):
|
def text_consumer_declaration(sender, **kwargs):
|
||||||
|
|||||||
@@ -1,30 +0,0 @@
|
|||||||
from collections.abc import Generator
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
import pytest
|
|
||||||
|
|
||||||
from paperless_text.parsers import TextDocumentParser
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="session")
|
|
||||||
def sample_dir() -> Path:
|
|
||||||
return (Path(__file__).parent / Path("samples")).resolve()
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture()
|
|
||||||
def text_parser() -> Generator[TextDocumentParser, None, None]:
|
|
||||||
try:
|
|
||||||
parser = TextDocumentParser(logging_group=None)
|
|
||||||
yield parser
|
|
||||||
finally:
|
|
||||||
parser.cleanup()
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="session")
|
|
||||||
def sample_txt_file(sample_dir: Path) -> Path:
|
|
||||||
return sample_dir / "test.txt"
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="session")
|
|
||||||
def malformed_txt_file(sample_dir: Path) -> Path:
|
|
||||||
return sample_dir / "decode_error.txt"
|
|
||||||
@@ -1,69 +0,0 @@
|
|||||||
import tempfile
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
from paperless_text.parsers import TextDocumentParser
|
|
||||||
|
|
||||||
|
|
||||||
class TestTextParser:
|
|
||||||
def test_thumbnail(
|
|
||||||
self,
|
|
||||||
text_parser: TextDocumentParser,
|
|
||||||
sample_txt_file: Path,
|
|
||||||
) -> None:
|
|
||||||
# just make sure that it does not crash
|
|
||||||
f = text_parser.get_thumbnail(sample_txt_file, "text/plain")
|
|
||||||
assert f.exists()
|
|
||||||
assert f.is_file()
|
|
||||||
|
|
||||||
def test_parse(
|
|
||||||
self,
|
|
||||||
text_parser: TextDocumentParser,
|
|
||||||
sample_txt_file: Path,
|
|
||||||
) -> None:
|
|
||||||
text_parser.parse(sample_txt_file, "text/plain")
|
|
||||||
|
|
||||||
assert text_parser.get_text() == "This is a test file.\n"
|
|
||||||
assert text_parser.get_archive_path() is None
|
|
||||||
|
|
||||||
def test_parse_invalid_bytes(
|
|
||||||
self,
|
|
||||||
text_parser: TextDocumentParser,
|
|
||||||
malformed_txt_file: Path,
|
|
||||||
) -> None:
|
|
||||||
"""
|
|
||||||
GIVEN:
|
|
||||||
- Text file which contains invalid UTF bytes
|
|
||||||
WHEN:
|
|
||||||
- The file is parsed
|
|
||||||
THEN:
|
|
||||||
- Parsing continues
|
|
||||||
- Invalid bytes are removed
|
|
||||||
"""
|
|
||||||
|
|
||||||
text_parser.parse(malformed_txt_file, "text/plain")
|
|
||||||
|
|
||||||
assert text_parser.get_text() == "Pantothens<EFBFBD>ure\n"
|
|
||||||
assert text_parser.get_archive_path() is None
|
|
||||||
|
|
||||||
def test_thumbnail_large_file(self, text_parser: TextDocumentParser) -> None:
|
|
||||||
"""
|
|
||||||
GIVEN:
|
|
||||||
- A very large text file (>50MB)
|
|
||||||
WHEN:
|
|
||||||
- A thumbnail is requested
|
|
||||||
THEN:
|
|
||||||
- A thumbnail is created without reading the entire file into memory
|
|
||||||
"""
|
|
||||||
with tempfile.NamedTemporaryFile(
|
|
||||||
delete=False,
|
|
||||||
mode="w",
|
|
||||||
encoding="utf-8",
|
|
||||||
suffix=".txt",
|
|
||||||
) as tmp:
|
|
||||||
tmp.write("A" * (51 * 1024 * 1024)) # 51 MB of 'A'
|
|
||||||
large_file = Path(tmp.name)
|
|
||||||
|
|
||||||
thumb = text_parser.get_thumbnail(large_file, "text/plain")
|
|
||||||
assert thumb.exists()
|
|
||||||
assert thumb.is_file()
|
|
||||||
large_file.unlink()
|
|
||||||
@@ -12,6 +12,7 @@ def tika_parser() -> Generator[TikaDocumentParser, None, None]:
|
|||||||
parser = TikaDocumentParser(logging_group=None)
|
parser = TikaDocumentParser(logging_group=None)
|
||||||
yield parser
|
yield parser
|
||||||
finally:
|
finally:
|
||||||
|
# TODO(stumpylog): Cleanup once all parsers are handled
|
||||||
parser.cleanup()
|
parser.cleanup()
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
309
uv.lock
generated
309
uv.lock
generated
@@ -1748,6 +1748,73 @@ wheels = [
|
|||||||
{ url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008, upload-time = "2025-10-12T14:55:18.883Z" },
|
{ url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008, upload-time = "2025-10-12T14:55:18.883Z" },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ijson"
|
||||||
|
version = "3.5.0"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/f4/57/60d1a6a512f2f0508d0bc8b4f1cc5616fd3196619b66bd6a01f9155a1292/ijson-3.5.0.tar.gz", hash = "sha256:94688760720e3f5212731b3cb8d30267f9a045fb38fb3870254e7b9504246f31", size = 68658, upload-time = "2026-02-24T03:58:30.974Z" }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/65/da/644343198abca5e0f6e2486063f8d8f3c443ca0ef5e5c890e51ef6032e33/ijson-3.5.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:5616311404b858d32740b7ad8b9a799c62165f5ecb85d0a8ed16c21665a90533", size = 88964, upload-time = "2026-02-24T03:56:53.099Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/5b/63/8621190aa2baf96156dfd4c632b6aa9f1464411e50b98750c09acc0505ea/ijson-3.5.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e9733f94029dd41702d573ef64752e2556e72aea14623d6dbb7a44ca1ccf30fd", size = 60582, upload-time = "2026-02-24T03:56:54.261Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/20/31/6a3f041fdd17dacff33b7d7d3ba3df6dca48740108340c6042f974b2ad20/ijson-3.5.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:db8398c6721b98412a4f618da8022550c8b9c5d9214040646071b5deb4d4a393", size = 60632, upload-time = "2026-02-24T03:56:55.159Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/e4/68/474541998abbdecfd46a744536878335de89aceb9f085bff1aaf35575ceb/ijson-3.5.0-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:c061314845c08163b1784b6076ea5f075372461a32e6916f4e5f211fd4130b64", size = 131988, upload-time = "2026-02-24T03:56:56.35Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/cd/32/e05ff8b72a44fe9d192f41c5dcbc35cfa87efc280cdbfe539ffaf4a7535e/ijson-3.5.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1111a1c5ac79119c5d6e836f900c1a53844b50a18af38311baa6bb61e2645aca", size = 138669, upload-time = "2026-02-24T03:56:57.555Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/49/b5/955a83b031102c7a602e2c06d03aff0a0e584212f09edb94ccc754d203ac/ijson-3.5.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1e74aff8c681c24002b61b1822f9511d4c384f324f7dbc08c78538e01fdc9fcb", size = 135093, upload-time = "2026-02-24T03:56:59.267Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/e8/f2/30250cfcb4d2766669b31f6732689aab2bb91de426a15a3ebe482df7ee48/ijson-3.5.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:739a7229b1b0cc5f7e2785a6e7a5fc915e850d3fed9588d0e89a09f88a417253", size = 138715, upload-time = "2026-02-24T03:57:00.491Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/a2/05/785a145d7e75e04e04480d59b6323cd4b1d9013a6cd8643fa635fbc93490/ijson-3.5.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:ef88712160360cab3ca6471a4e5418243f8b267cf1fe1620879d1b5558babc71", size = 133194, upload-time = "2026-02-24T03:57:01.759Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/14/eb/80d6f8a748dead4034cea0939494a67d10ccf88d6413bf6e860393139676/ijson-3.5.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:6ca0d1b6b5f8166a6248f4309497585fb8553b04bc8179a0260fad636cfdb798", size = 135588, upload-time = "2026-02-24T03:57:03.131Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/aa/17/9c63c7688025f3a8c47ea717b8306649c8c7244e49e20a2be4e3515dc75c/ijson-3.5.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:1ebefbe149a6106cc848a3eaf536af51a9b5ccc9082de801389f152dba6ab755", size = 88536, upload-time = "2026-02-24T03:57:06.809Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/6f/dd/e15c2400244c117b06585452ebc63ae254f5a6964f712306afd1422daae0/ijson-3.5.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:19e30d9f00f82e64de689c0b8651b9cfed879c184b139d7e1ea5030cec401c21", size = 60499, upload-time = "2026-02-24T03:57:09.155Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/77/a9/bf4fe3538a0c965f16b406f180a06105b875da83f0743e36246be64ef550/ijson-3.5.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a04a33ee78a6f27b9b8528c1ca3c207b1df3b8b867a4cf2fcc4109986f35c227", size = 60330, upload-time = "2026-02-24T03:57:10.574Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/31/76/6f91bdb019dd978fce1bc5ea1cd620cfc096d258126c91db2c03a20a7f34/ijson-3.5.0-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:7d48dc2984af02eb3c56edfb3f13b3f62f2f3e4fe36f058c8cfc75d93adf4fed", size = 138977, upload-time = "2026-02-24T03:57:11.932Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/11/be/bbc983059e48a54b0121ee60042979faed7674490bbe7b2c41560db3f436/ijson-3.5.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f1e73a44844d9adbca9cf2c4132cd875933e83f3d4b23881fcaf82be83644c7d", size = 149785, upload-time = "2026-02-24T03:57:13.255Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/6d/81/2fee58f9024a3449aee83edfa7167fb5ccd7e1af2557300e28531bb68e16/ijson-3.5.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7389a56b8562a19948bdf1d7bae3a2edc8c7f86fb59834dcb1c4c722818e645a", size = 149729, upload-time = "2026-02-24T03:57:14.191Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/c7/56/f1706761fcc096c9d414b3dcd000b1e6e5c24364c21cfba429837f98ee8d/ijson-3.5.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3176f23f8ebec83f374ed0c3b4e5a0c4db7ede54c005864efebbed46da123608", size = 150697, upload-time = "2026-02-24T03:57:15.855Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/d9/6e/ee0d9c875a0193b632b3e9ccd1b22a50685fb510256ad57ba483b6529f77/ijson-3.5.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:6babd88e508630c6ef86c9bebaaf13bb2fb8ec1d8f8868773a03c20253f599bc", size = 142873, upload-time = "2026-02-24T03:57:16.831Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/d2/bf/f9d4399d0e6e3fd615035290a71e97c843f17f329b43638c0a01cf112d73/ijson-3.5.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:dc1b3836b174b6db2fa8319f1926fb5445abd195dc963368092103f8579cb8ed", size = 151583, upload-time = "2026-02-24T03:57:17.757Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/a2/71/d67e764a712c3590627480643a3b51efcc3afa4ef3cb54ee4c989073c97e/ijson-3.5.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:e9cedc10e40dd6023c351ed8bfc7dcfce58204f15c321c3c1546b9c7b12562a4", size = 88544, upload-time = "2026-02-24T03:57:21.293Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/1a/39/f1c299371686153fa3cf5c0736b96247a87a1bee1b7145e6d21f359c505a/ijson-3.5.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:3647649f782ee06c97490b43680371186651f3f69bebe64c6083ee7615d185e5", size = 60495, upload-time = "2026-02-24T03:57:22.501Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/16/94/b1438e204d75e01541bebe3e668fe3e68612d210e9931ae1611062dd0a56/ijson-3.5.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:90e74be1dce05fce73451c62d1118671f78f47c9f6be3991c82b91063bf01fc9", size = 60325, upload-time = "2026-02-24T03:57:23.332Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/30/e2/4aa9c116fa86cc8b0f574f3c3a47409edc1cd4face05d0e589a5a176b05d/ijson-3.5.0-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:78e9ad73e7be2dd80627504bd5cbf512348c55ce2c06e362ed7683b5220e8568", size = 138774, upload-time = "2026-02-24T03:57:24.683Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/d2/d2/738b88752a70c3be1505faa4dcd7110668c2712e582a6a36488ed1e295d4/ijson-3.5.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9577449313cc94be89a4fe4b3e716c65f09cc19636d5a6b2861c4e80dddebd58", size = 149820, upload-time = "2026-02-24T03:57:26.062Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/ed/df/0b3ab9f393ca8f72ea03bc896ba9fdc987e90ae08cdb51c32a4ee0c14d5e/ijson-3.5.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3e4c1178fb50aff5f5701a30a5152ead82a14e189ce0f6102fa1b5f10b2f54ff", size = 149747, upload-time = "2026-02-24T03:57:27.308Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/cc/a3/b0037119f75131b78cb00acc2657b1a9d0435475f1f2c5f8f5a170b66b9c/ijson-3.5.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:0eb402ab026ffb37a918d75af2b7260fe6cfbce13232cc83728a714dd30bd81d", size = 151027, upload-time = "2026-02-24T03:57:28.522Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/22/a0/cb344de1862bf09d8f769c9d25c944078c87dd59a1b496feec5ad96309a4/ijson-3.5.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:5b08ee08355f9f729612a8eb9bf69cc14f9310c3b2a487c6f1c3c65d85216ec4", size = 142996, upload-time = "2026-02-24T03:57:29.774Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/ca/32/a8ffd67182e02ea61f70f62daf43ded4fa8a830a2520a851d2782460aba8/ijson-3.5.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:bda62b6d48442903e7bf56152108afb7f0f1293c2b9bef2f2c369defea76ab18", size = 152068, upload-time = "2026-02-24T03:57:30.969Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/42/65/13e2492d17e19a2084523e18716dc2809159f2287fd2700c735f311e76c4/ijson-3.5.0-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:4d4b0cd676b8c842f7648c1a783448fac5cd3b98289abd83711b3e275e143524", size = 93019, upload-time = "2026-02-24T03:57:33.976Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/33/92/483fc97ece0c3f1cecabf48f6a7a36e89d19369eec462faaeaa34c788992/ijson-3.5.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:252dec3680a48bb82d475e36b4ae1b3a9d7eb690b951bb98a76c5fe519e30188", size = 62714, upload-time = "2026-02-24T03:57:34.819Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/4b/88/793fe020a0fe9d9eed4c285cf4a5cfdb0a935708b3bde0d72f35c794b513/ijson-3.5.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:aa1b5dca97d323931fde2501172337384c958914d81a9dac7f00f0d4bfc76bc7", size = 62460, upload-time = "2026-02-24T03:57:35.874Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/51/69/f1a2690aa8d4df1f4e262b385e65a933ffdc250b091531bac9a449c19e16/ijson-3.5.0-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:7a5ec7fd86d606094bba6f6f8f87494897102fa4584ef653f3005c51a784c320", size = 199273, upload-time = "2026-02-24T03:57:37.07Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/ea/a2/f1346d5299e79b988ab472dc773d5381ec2d57c23cb2f1af3ede4a810e62/ijson-3.5.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:009f41443e1521847701c6d87fa3923c0b1961be3c7e7de90947c8cb92ea7c44", size = 216884, upload-time = "2026-02-24T03:57:38.346Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/28/3c/8b637e869be87799e6c2c3c275a30a546f086b1aed77e2b7f11512168c5a/ijson-3.5.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e4c3651d1f9fe2839a93fdf8fd1d5ca3a54975349894249f3b1b572bcc4bd577", size = 207306, upload-time = "2026-02-24T03:57:39.718Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/7f/7c/18b1c1df6951ca056782d7580ec40cea4ff9a27a0947d92640d1cc8c4ae3/ijson-3.5.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:945b7abcfcfeae2cde17d8d900870f03536494245dda7ad4f8d056faa303256c", size = 211364, upload-time = "2026-02-24T03:57:40.953Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/f3/55/e795812e82851574a9dba8a53fde045378f531ef14110c6fb55dbd23b443/ijson-3.5.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:0574b0a841ff97495c13e9d7260fbf3d85358b061f540c52a123db9dbbaa2ed6", size = 200608, upload-time = "2026-02-24T03:57:42.272Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/5c/cd/013c85b4749b57a4cb4c2670014d1b32b8db4ab1a7be92ea7aeb5d7fe7b5/ijson-3.5.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:f969ffb2b89c5cdf686652d7fb66252bc72126fa54d416317411497276056a18", size = 205127, upload-time = "2026-02-24T03:57:43.286Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/7a/93/0868efe753dc1df80cc405cf0c1f2527a6991643607c741bff8dcb899b3b/ijson-3.5.0-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:25a5a6b2045c90bb83061df27cfa43572afa43ba9408611d7bfe237c20a731a9", size = 89094, upload-time = "2026-02-24T03:57:46.115Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/24/94/fd5a832a0df52ef5e4e740f14ac8640725d61034a1b0c561e8b5fb424706/ijson-3.5.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:8976c54c0b864bc82b951bae06567566ac77ef63b90a773a69cd73aab47f4f4f", size = 60715, upload-time = "2026-02-24T03:57:47.552Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/70/79/1b9a90af5732491f9eec751ee211b86b11011e1158c555c06576d52c3919/ijson-3.5.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:859eb2038f7f1b0664df4241957694cc35e6295992d71c98659b22c69b3cbc10", size = 60638, upload-time = "2026-02-24T03:57:48.428Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/23/6f/2c551ea980fe56f68710a8d5389cfbd015fc45aaafd17c3c52c346db6aa1/ijson-3.5.0-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:c911aa02991c7c0d3639b6619b93a93210ff1e7f58bf7225d613abea10adc78e", size = 140667, upload-time = "2026-02-24T03:57:49.314Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/25/0e/27b887879ba6a5bc29766e3c5af4942638c952220fd63e1e442674f7883a/ijson-3.5.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:903cbdc350173605220edc19796fbea9b2203c8b3951fb7335abfa8ed37afda8", size = 149850, upload-time = "2026-02-24T03:57:50.329Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/da/1e/23e10e1bc04bf31193b21e2960dce14b17dbd5d0c62204e8401c59d62c08/ijson-3.5.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a4549d96ded5b8efa71639b2160235415f6bdb8c83367615e2dbabcb72755c33", size = 149206, upload-time = "2026-02-24T03:57:51.261Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/8e/90/e552f6495063b235cf7fa2c592f6597c057077195e517b842a0374fd470c/ijson-3.5.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:6b2dcf6349e6042d83f3f8c39ce84823cf7577eba25bac5aae5e39bbbbbe9c1c", size = 150438, upload-time = "2026-02-24T03:57:52.198Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/5c/18/45bf8f297c41b42a1c231d261141097babd953d2c28a07be57ae4c3a1a02/ijson-3.5.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:e44af39e6f8a17e5627dcd89715d8279bf3474153ff99aae031a936e5c5572e5", size = 144369, upload-time = "2026-02-24T03:57:53.22Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/9b/3a/deb9772bb2c0cead7ad64f00c3598eec9072bdf511818e70e2c512eeabbe/ijson-3.5.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:9260332304b7e7828db56d43f08fc970a3ab741bf84ff10189361ea1b60c395b", size = 151352, upload-time = "2026-02-24T03:57:54.375Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/9f/d9/86f7fac35e0835faa188085ae0579e813493d5261ce056484015ad533445/ijson-3.5.0-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:2ea4b676ec98e374c1df400a47929859e4fa1239274339024df4716e802aa7e4", size = 93069, upload-time = "2026-02-24T03:57:57.849Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/33/d2/e7366ed9c6e60228d35baf4404bac01a126e7775ea8ce57f560125ed190a/ijson-3.5.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:014586eec043e23c80be9a923c56c3a0920a0f1f7d17478ce7bc20ba443968ef", size = 62767, upload-time = "2026-02-24T03:57:58.758Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/35/8b/3e703e8cc4b3ada79f13b28070b51d9550c578f76d1968657905857b2ddd/ijson-3.5.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:d5b8b886b0248652d437f66e7c5ac318bbdcb2c7137a7e5327a68ca00b286f5f", size = 62467, upload-time = "2026-02-24T03:58:00.261Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/21/42/0c91af32c1ee8a957fdac2e051b5780756d05fd34e4b60d94a08d51bac1d/ijson-3.5.0-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:498fd46ae2349297e43acf97cdc421e711dbd7198418677259393d2acdc62d78", size = 200447, upload-time = "2026-02-24T03:58:01.591Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/f9/80/796ea0e391b7e2d45c5b1b451734bba03f81c2984cf955ea5eaa6c4920ad/ijson-3.5.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:22a51b4f9b81f12793731cf226266d1de2112c3c04ba4a04117ad4e466897e05", size = 217820, upload-time = "2026-02-24T03:58:02.598Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/38/14/52b6613fdda4078c62eb5b4fe3efc724ddc55a4ad524c93de51830107aa3/ijson-3.5.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9636c710dc4ac4a281baa266a64f323b4cc165cec26836af702c44328b59a515", size = 208310, upload-time = "2026-02-24T03:58:04.759Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/6a/ad/8b3105a78774fd4a65e534a21d975ef3a77e189489fe3029ebcaeba5e243/ijson-3.5.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:f7168a39e8211107666d71b25693fd1b2bac0b33735ef744114c403c6cac21e1", size = 211843, upload-time = "2026-02-24T03:58:05.836Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/36/ab/a2739f6072d6e1160581bc3ed32da614c8cced023dcd519d9c5fa66e0425/ijson-3.5.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:8696454245415bc617ab03b0dc3ae4c86987df5dc6a90bad378fe72c5409d89e", size = 200906, upload-time = "2026-02-24T03:58:07.788Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/6d/5e/e06c2de3c3d4a9cfb655c1ad08a68fb72838d271072cdd3196576ac4431a/ijson-3.5.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:c21bfb61f71f191565885bf1bc29e0a186292d866b4880637b833848360bdc1b", size = 205495, upload-time = "2026-02-24T03:58:09.163Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/d9/3b/d31ecfa63a218978617446159f3d77aab2417a5bd2885c425b176353ff78/ijson-3.5.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:d64c624da0e9d692d6eb0ff63a79656b59d76bf80773a17c5b0f835e4e8ef627", size = 57715, upload-time = "2026-02-24T03:58:24.545Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/30/51/b170e646d378e8cccf9637c05edb5419b00c2c4df64b0258c3af5355608e/ijson-3.5.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:876f7df73b7e0d6474f9caa729b9cdbfc8e76de9075a4887dfd689e29e85c4ca", size = 57205, upload-time = "2026-02-24T03:58:25.681Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/ef/83/44dbd0231b0a8c6c14d27473d10c4e27dfbce7d5d9a833c79e3e6c33eb40/ijson-3.5.0-pp311-pypy311_pp73-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:e7dbff2c8d9027809b0cde663df44f3210da10ea377121d42896fb6ee405dd31", size = 71229, upload-time = "2026-02-24T03:58:27.103Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/c8/98/cf84048b7c6cec888826e696a31f45bee7ebcac15e532b6be1fc4c2c9608/ijson-3.5.0-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4217a1edc278660679e1197c83a1a2a2d367792bfbb2a3279577f4b59b93730d", size = 71217, upload-time = "2026-02-24T03:58:28.021Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/3c/0a/e34c729a87ff67dc6540f6bcc896626158e691d433ab57db0086d73decd2/ijson-3.5.0-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:04f0fc740311388ee745ba55a12292b722d6f52000b11acbb913982ba5fbdf87", size = 68618, upload-time = "2026-02-24T03:58:28.918Z" },
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "imagehash"
|
name = "imagehash"
|
||||||
version = "4.3.2"
|
version = "4.3.2"
|
||||||
@@ -2072,15 +2139,15 @@ wheels = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "llama-index-embeddings-openai"
|
name = "llama-index-embeddings-openai"
|
||||||
version = "0.5.1"
|
version = "0.5.2"
|
||||||
source = { registry = "https://pypi.org/simple" }
|
source = { registry = "https://pypi.org/simple" }
|
||||||
dependencies = [
|
dependencies = [
|
||||||
{ name = "llama-index-core", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
{ name = "llama-index-core", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||||
{ name = "openai", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
{ name = "openai", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||||
]
|
]
|
||||||
sdist = { url = "https://files.pythonhosted.org/packages/10/36/90336d054a5061a3f5bc17ac2c18ef63d9d84c55c14d557de484e811ea4d/llama_index_embeddings_openai-0.5.1.tar.gz", hash = "sha256:1c89867a48b0d0daa3d2d44f5e76b394b2b2ef9935932daf921b9e77939ccda8", size = 7020, upload-time = "2025-09-08T20:17:44.681Z" }
|
sdist = { url = "https://files.pythonhosted.org/packages/ea/a1/d238dfa453ba8ebc4f6261d6384b663f50b8dba6f4b22d8be800b305863d/llama_index_embeddings_openai-0.5.2.tar.gz", hash = "sha256:091bd0c3e9182748e8827de7d79713a219d5f5e0dc97d1bb7b271cf524520e4b", size = 7630, upload-time = "2026-03-03T11:27:38.127Z" }
|
||||||
wheels = [
|
wheels = [
|
||||||
{ url = "https://files.pythonhosted.org/packages/23/4a/8ab11026cf8deff8f555aa73919be0bac48332683111e5fc4290f352dc50/llama_index_embeddings_openai-0.5.1-py3-none-any.whl", hash = "sha256:a2fcda3398bbd987b5ce3f02367caee8e84a56b930fdf43cc1d059aa9fd20ca5", size = 7011, upload-time = "2025-09-08T20:17:44.015Z" },
|
{ url = "https://files.pythonhosted.org/packages/1f/5e/da156f9c77443d22287eeaea341fe35fdcc25e59a9250e4cb10d4d5a066a/llama_index_embeddings_openai-0.5.2-py3-none-any.whl", hash = "sha256:37e7967de05b05f16c9b171091110bb1c6e5a0720198ea306d57cd3920cb81b7", size = 7667, upload-time = "2026-03-03T11:27:37.394Z" },
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -2111,15 +2178,15 @@ wheels = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "llama-index-llms-openai"
|
name = "llama-index-llms-openai"
|
||||||
version = "0.6.21"
|
version = "0.6.26"
|
||||||
source = { registry = "https://pypi.org/simple" }
|
source = { registry = "https://pypi.org/simple" }
|
||||||
dependencies = [
|
dependencies = [
|
||||||
{ name = "llama-index-core", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
{ name = "llama-index-core", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||||
{ name = "openai", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
{ name = "openai", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||||
]
|
]
|
||||||
sdist = { url = "https://files.pythonhosted.org/packages/d8/5b/775289b3064302966cc839bbccfdbe314f706eaf58ad4233b86e5d53343d/llama_index_llms_openai-0.6.21.tar.gz", hash = "sha256:0b92dcfb01cbc7752f5b8bdf6d93430643d295210cf9392b45291d6fdd81e0ee", size = 25961, upload-time = "2026-02-26T04:19:33.604Z" }
|
sdist = { url = "https://files.pythonhosted.org/packages/4a/5e/a7a47d46dc2eb30953d83654112c8af6f61821ca78ef3ea22e30729aac3a/llama_index_llms_openai-0.6.26.tar.gz", hash = "sha256:3474602ecbc30c88a8b585cfd5737891d45da78251a5e067c4dbc2d3cc3d08db", size = 27262, upload-time = "2026-03-05T02:53:50.581Z" }
|
||||||
wheels = [
|
wheels = [
|
||||||
{ url = "https://files.pythonhosted.org/packages/e3/d7/5b513acbf0bfc2b6ef281b6bbca764062facc431e8f13763c16005fbd34b/llama_index_llms_openai-0.6.21-py3-none-any.whl", hash = "sha256:ef8c048849f844c7db9ff4208cca9878a799bc5fcdd72954197ea11e64b37c97", size = 26965, upload-time = "2026-02-26T04:19:34.561Z" },
|
{ url = "https://files.pythonhosted.org/packages/2e/8a/f46f59279c078b001374813f69987b43b7c3bd9df01981af545cf2d954d7/llama_index_llms_openai-0.6.26-py3-none-any.whl", hash = "sha256:2062ef505676d0a1c7c116c138c2f890aa7653619fc3ca697e47df7bd2ef8b3f", size = 28330, upload-time = "2026-03-05T02:53:40.421Z" },
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -2751,6 +2818,7 @@ dependencies = [
|
|||||||
{ name = "flower", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
{ name = "flower", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||||
{ name = "gotenberg-client", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
{ name = "gotenberg-client", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||||
{ name = "httpx-oauth", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
{ name = "httpx-oauth", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||||
|
{ name = "ijson", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||||
{ name = "imap-tools", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
{ name = "imap-tools", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||||
{ name = "jinja2", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
{ name = "jinja2", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||||
{ name = "langdetect", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
{ name = "langdetect", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||||
@@ -2898,6 +2966,7 @@ requires-dist = [
|
|||||||
{ name = "gotenberg-client", specifier = "~=0.13.1" },
|
{ name = "gotenberg-client", specifier = "~=0.13.1" },
|
||||||
{ name = "granian", extras = ["uvloop"], marker = "extra == 'webserver'", specifier = "~=2.7.0" },
|
{ name = "granian", extras = ["uvloop"], marker = "extra == 'webserver'", specifier = "~=2.7.0" },
|
||||||
{ name = "httpx-oauth", specifier = "~=0.16" },
|
{ name = "httpx-oauth", specifier = "~=0.16" },
|
||||||
|
{ name = "ijson", specifier = ">=3.2" },
|
||||||
{ name = "imap-tools", specifier = "~=1.11.0" },
|
{ name = "imap-tools", specifier = "~=1.11.0" },
|
||||||
{ name = "jinja2", specifier = "~=3.1.5" },
|
{ name = "jinja2", specifier = "~=3.1.5" },
|
||||||
{ name = "langdetect", specifier = "~=1.0.9" },
|
{ name = "langdetect", specifier = "~=1.0.9" },
|
||||||
@@ -3216,23 +3285,23 @@ wheels = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "prek"
|
name = "prek"
|
||||||
version = "0.3.3"
|
version = "0.3.5"
|
||||||
source = { registry = "https://pypi.org/simple" }
|
source = { registry = "https://pypi.org/simple" }
|
||||||
sdist = { url = "https://files.pythonhosted.org/packages/bf/f1/7613dc8347a33e40fc5b79eec6bc7d458d8bbc339782333d8433b665f86f/prek-0.3.3.tar.gz", hash = "sha256:117bd46ebeb39def24298ce021ccc73edcf697b81856fcff36d762dd56093f6f", size = 343697, upload-time = "2026-02-15T13:33:28.723Z" }
|
sdist = { url = "https://files.pythonhosted.org/packages/46/d6/277e002e56eeab3a9d48f1ca4cc067d249d6326fc1783b770d70ad5ae2be/prek-0.3.5.tar.gz", hash = "sha256:ca40b6685a4192256bc807f32237af94bf9b8799c0d708b98735738250685642", size = 374806, upload-time = "2026-03-09T10:35:18.842Z" }
|
||||||
wheels = [
|
wheels = [
|
||||||
{ url = "https://files.pythonhosted.org/packages/2d/8b/dce13d2a3065fd1e8ffce593a0e51c4a79c3cde9c9a15dc0acc8d9d1573d/prek-0.3.3-py3-none-linux_armv6l.whl", hash = "sha256:e8629cac4bdb131be8dc6e5a337f0f76073ad34a8305f3fe2bc1ab6201ede0a4", size = 4644636, upload-time = "2026-02-15T13:33:43.609Z" },
|
{ url = "https://files.pythonhosted.org/packages/8f/a9/16dd8d3a50362ebccffe58518af1f1f571c96f0695d7fcd8bbd386585f58/prek-0.3.5-py3-none-linux_armv6l.whl", hash = "sha256:44b3e12791805804f286d103682b42a84e0f98a2687faa37045e9d3375d3d73d", size = 5105604, upload-time = "2026-03-09T10:35:00.332Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/01/30/06ab4dbe7ce02a8ce833e92deb1d9a8e85ae9d40e33d1959a2070b7494c6/prek-0.3.3-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:4b9e819b9e4118e1e785047b1c8bd9aec7e4d836ed034cb58b7db5bcaaf49437", size = 4651410, upload-time = "2026-02-15T13:33:34.277Z" },
|
{ url = "https://files.pythonhosted.org/packages/e4/74/bc6036f5bf03860cda66ab040b32737e54802b71a81ec381839deb25df9e/prek-0.3.5-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:e3cb451cc51ac068974557491beb4c7d2d41dfde29ed559c1694c8ce23bf53e8", size = 5506155, upload-time = "2026-03-09T10:35:17.64Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/d4/fc/da3bc5cb38471e7192eda06b7a26b7c24ef83e82da2c1dbc145f2bf33640/prek-0.3.3-py3-none-macosx_11_0_arm64.whl", hash = "sha256:bf29db3b5657c083eb8444c25aadeeec5167dc492e9019e188f87932f01ea50a", size = 4273163, upload-time = "2026-02-15T13:33:42.106Z" },
|
{ url = "https://files.pythonhosted.org/packages/02/d9/a3745c2a10509c63b6a118ada766614dd705efefd08f275804d5c807aa4a/prek-0.3.5-py3-none-macosx_11_0_arm64.whl", hash = "sha256:ad8f5f0d8da53dc94d00b76979af312b3dacccc9dcbc6417756c5dca3633c052", size = 5100383, upload-time = "2026-03-09T10:35:13.302Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/b4/74/47839395091e2937beced81a5dd2f8ea9c8239c853da8611aaf78ee21a8b/prek-0.3.3-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.musllinux_1_1_aarch64.whl", hash = "sha256:ae09736149815b26e64a9d350ca05692bab32c2afdf2939114d3211aaad68a3e", size = 4631808, upload-time = "2026-02-15T13:33:20.076Z" },
|
{ url = "https://files.pythonhosted.org/packages/43/8e/de965fc515d39309a332789cd3778161f7bc80cde15070bedf17f9f8cb93/prek-0.3.5-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.musllinux_1_1_aarch64.whl", hash = "sha256:4511e15d34072851ac88e4b2006868fbe13655059ad941d7a0ff9ee17138fd9f", size = 5334913, upload-time = "2026-03-09T10:35:14.813Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/e2/89/3f5ef6f7c928c017cb63b029349d6bc03598ab7f6979d4a770ce02575f82/prek-0.3.3-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:856c2b55c51703c366bb4ce81c6a91102b70573a9fc8637db2ac61c66e4565f9", size = 4548959, upload-time = "2026-02-15T13:33:36.325Z" },
|
{ url = "https://files.pythonhosted.org/packages/3f/8c/44f07e8940256059cfd82520e3cbe0764ab06ddb4aa43148465db00b39ad/prek-0.3.5-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:fcc0b63b8337e2046f51267facaac63ba755bc14aad53991840a5eccba3e5c28", size = 5033825, upload-time = "2026-03-09T10:35:06.976Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/b2/18/80002c4c4475f90ca025f27739a016927a0e5d905c60612fc95da1c56ab7/prek-0.3.3-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3acdf13a018f685beaff0a71d4b0d2ccbab4eaa1aced6d08fd471c1a654183eb", size = 4862256, upload-time = "2026-02-15T13:33:37.754Z" },
|
{ url = "https://files.pythonhosted.org/packages/94/85/3ff0f96881ff2360c212d310ff23c3cf5a15b223d34fcfa8cdcef203be69/prek-0.3.5-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f5fc0d78c3896a674aeb8247a83bbda7efec85274dbdfbc978ceff8d37e4ed20", size = 5438586, upload-time = "2026-03-09T10:34:58.779Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/c5/25/648bf084c2468fa7cfcdbbe9e59956bbb31b81f36e113bc9107d80af26a7/prek-0.3.3-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0f035667a8bd0a77b2bfa2b2e125da8cb1793949e9eeef0d8daab7f8ac8b57fe", size = 5404486, upload-time = "2026-02-15T13:33:39.239Z" },
|
{ url = "https://files.pythonhosted.org/packages/79/a5/c6d08d31293400fcb5d427f8e7e6bacfc959988e868ad3a9d97b4d87c4b7/prek-0.3.5-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:64cad21cb9072d985179495b77b312f6b81e7b45357d0c68dc1de66e0408eabc", size = 6359714, upload-time = "2026-03-09T10:34:57.454Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/8b/43/261fb60a11712a327da345912bd8b338dc5a050199de800faafa278a6133/prek-0.3.3-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d09b2ad14332eede441d977de08eb57fb3f61226ed5fd2ceb7aadf5afcdb6794", size = 4887513, upload-time = "2026-02-15T13:33:40.702Z" },
|
{ url = "https://files.pythonhosted.org/packages/ba/18/321dcff9ece8065d42c8c1c7a53a23b45d2b4330aa70993be75dc5f2822f/prek-0.3.5-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:45ee84199bb48e013bdfde0c84352c17a44cc42d5792681b86d94e9474aab6f8", size = 5717632, upload-time = "2026-03-09T10:35:08.634Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/c7/2c/581e757ee57ec6046b32e0ee25660fc734bc2622c319f57119c49c0cab58/prek-0.3.3-py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:c0c3ffac16e37a9daba43a7e8316778f5809b70254be138761a8b5b9ef0df28e", size = 4632336, upload-time = "2026-02-15T13:33:25.867Z" },
|
{ url = "https://files.pythonhosted.org/packages/a3/7f/1288226aa381d0cea403157f4e6b64b356e1a745f2441c31dd9d8a1d63da/prek-0.3.5-py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:f43275e5d564e18e52133129ebeb5cb071af7ce4a547766c7f025aa0955dfbb6", size = 5339040, upload-time = "2026-03-09T10:35:03.665Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/d5/d8/aa276ce5d11b77882da4102ca0cb7161095831105043ae7979bbfdcc3dc4/prek-0.3.3-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:a3dc7720b580c07c0386e17af2486a5b4bc2f6cc57034a288a614dcbc4abe555", size = 4679370, upload-time = "2026-02-15T13:33:22.247Z" },
|
{ url = "https://files.pythonhosted.org/packages/22/94/cfec83df9c2b8e7ed1608087bcf9538a6a77b4c2e7365123e9e0a3162cd1/prek-0.3.5-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:abcee520d31522bcbad9311f21326b447694cd5edba33618c25fd023fc9865ec", size = 5162586, upload-time = "2026-03-09T10:35:11.564Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/70/19/9d4fa7bde428e58d9f48a74290c08736d42aeb5690dcdccc7a713e34a449/prek-0.3.3-py3-none-musllinux_1_1_armv7l.whl", hash = "sha256:60e0fa15da5020a03df2ee40268145ec5b88267ec2141a205317ad4df8c992d6", size = 4540316, upload-time = "2026-02-15T13:33:24.088Z" },
|
{ url = "https://files.pythonhosted.org/packages/13/b7/741d62132f37a5f7cc0fad1168bd31f20dea9628f482f077f569547e0436/prek-0.3.5-py3-none-musllinux_1_1_armv7l.whl", hash = "sha256:499c56a94a155790c75a973d351a33f8065579d9094c93f6d451ada5d1e469be", size = 5002933, upload-time = "2026-03-09T10:35:16.347Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/25/b5/973cce29257e0b47b16cc9b4c162772ea01dbb7c080791ea0c068e106e05/prek-0.3.3-py3-none-musllinux_1_1_i686.whl", hash = "sha256:553515da9586d9624dc42db32b744fdb91cf62b053753037a0cadb3c2d8d82a2", size = 4724566, upload-time = "2026-02-15T13:33:29.832Z" },
|
{ url = "https://files.pythonhosted.org/packages/6f/83/630a5671df6550fcfa67c54955e8a8174eb9b4d97ac38fb05a362029245b/prek-0.3.5-py3-none-musllinux_1_1_i686.whl", hash = "sha256:de1065b59f194624adc9dea269d4ff6b50e98a1b5bb662374a9adaa496b3c1eb", size = 5304934, upload-time = "2026-03-09T10:35:09.975Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/d6/8b/ad8b2658895a8ed2b0bc630bf38686fe38b7ff2c619c58953a80e4de3048/prek-0.3.3-py3-none-musllinux_1_1_x86_64.whl", hash = "sha256:9512cf370e0d1496503463a4a65621480efb41b487841a9e9ff1661edf14b238", size = 4995072, upload-time = "2026-02-15T13:33:27.417Z" },
|
{ url = "https://files.pythonhosted.org/packages/de/79/67a7afd0c0b6c436630b7dba6e586a42d21d5d6e5778fbd9eba7bbd3dd26/prek-0.3.5-py3-none-musllinux_1_1_x86_64.whl", hash = "sha256:a1c4869e45ee341735d07179da3a79fa2afb5959cef8b3c8a71906eb52dc6933", size = 5829914, upload-time = "2026-03-09T10:35:05.39Z" },
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -3731,11 +3800,11 @@ wheels = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "python-dotenv"
|
name = "python-dotenv"
|
||||||
version = "1.2.1"
|
version = "1.2.2"
|
||||||
source = { registry = "https://pypi.org/simple" }
|
source = { registry = "https://pypi.org/simple" }
|
||||||
sdist = { url = "https://files.pythonhosted.org/packages/f0/26/19cadc79a718c5edbec86fd4919a6b6d3f681039a2f6d66d14be94e75fb9/python_dotenv-1.2.1.tar.gz", hash = "sha256:42667e897e16ab0d66954af0e60a9caa94f0fd4ecf3aaf6d2d260eec1aa36ad6", size = 44221, upload-time = "2025-10-26T15:12:10.434Z" }
|
sdist = { url = "https://files.pythonhosted.org/packages/82/ed/0301aeeac3e5353ef3d94b6ec08bbcabd04a72018415dcb29e588514bba8/python_dotenv-1.2.2.tar.gz", hash = "sha256:2c371a91fbd7ba082c2c1dc1f8bf89ca22564a087c2c287cd9b662adde799cf3", size = 50135, upload-time = "2026-03-01T16:00:26.196Z" }
|
||||||
wheels = [
|
wheels = [
|
||||||
{ url = "https://files.pythonhosted.org/packages/14/1b/a298b06749107c305e1fe0f814c6c74aea7b2f1e10989cb30f544a1b3253/python_dotenv-1.2.1-py3-none-any.whl", hash = "sha256:b81ee9561e9ca4004139c6cbba3a238c32b03e4894671e181b671e8cb8425d61", size = 21230, upload-time = "2025-10-26T15:12:09.109Z" },
|
{ url = "https://files.pythonhosted.org/packages/0b/d7/1959b9648791274998a9c3526f6d0ec8fd2233e4d4acce81bbae76b44b2a/python_dotenv-1.2.2-py3-none-any.whl", hash = "sha256:1d8214789a24de455a8b8bd8ae6fe3c6b69a5e3d64aa8a8e5d68e694bbcb285a", size = 22101, upload-time = "2026-03-01T16:00:25.09Z" },
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -3966,88 +4035,88 @@ wheels = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "regex"
|
name = "regex"
|
||||||
version = "2026.2.19"
|
version = "2026.2.28"
|
||||||
source = { registry = "https://pypi.org/simple" }
|
source = { registry = "https://pypi.org/simple" }
|
||||||
sdist = { url = "https://files.pythonhosted.org/packages/ff/c0/d8079d4f6342e4cec5c3e7d7415b5cd3e633d5f4124f7a4626908dbe84c7/regex-2026.2.19.tar.gz", hash = "sha256:6fb8cb09b10e38f3ae17cc6dc04a1df77762bd0351b6ba9041438e7cc85ec310", size = 414973, upload-time = "2026-02-19T19:03:47.899Z" }
|
sdist = { url = "https://files.pythonhosted.org/packages/8b/71/41455aa99a5a5ac1eaf311f5d8efd9ce6433c03ac1e0962de163350d0d97/regex-2026.2.28.tar.gz", hash = "sha256:a729e47d418ea11d03469f321aaf67cdee8954cde3ff2cf8403ab87951ad10f2", size = 415184, upload-time = "2026-02-28T02:19:42.792Z" }
|
||||||
wheels = [
|
wheels = [
|
||||||
{ url = "https://files.pythonhosted.org/packages/6f/93/43f405a98f54cc59c786efb4fc0b644615ed2392fc89d57d30da11f35b5b/regex-2026.2.19-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:93b16a18cadb938f0f2306267161d57eb33081a861cee9ffcd71e60941eb5dfc", size = 488365, upload-time = "2026-02-19T19:00:17.857Z" },
|
{ url = "https://files.pythonhosted.org/packages/04/db/8cbfd0ba3f302f2d09dd0019a9fcab74b63fee77a76c937d0e33161fb8c1/regex-2026.2.28-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:e621fb7c8dc147419b28e1702f58a0177ff8308a76fa295c71f3e7827849f5d9", size = 488462, upload-time = "2026-02-28T02:16:22.616Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/66/46/da0efce22cd8f5ae28eeb25ac69703f49edcad3331ac22440776f4ea0867/regex-2026.2.19-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:78af1e499cab704131f6f4e2f155b7f54ce396ca2acb6ef21a49507e4752e0be", size = 290737, upload-time = "2026-02-19T19:00:19.869Z" },
|
{ url = "https://files.pythonhosted.org/packages/5d/10/ccc22c52802223f2368731964ddd117799e1390ffc39dbb31634a83022ee/regex-2026.2.28-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0d5bef2031cbf38757a0b0bc4298bb4824b6332d28edc16b39247228fbdbad97", size = 290774, upload-time = "2026-02-28T02:16:23.993Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/fb/19/f735078448132c1c974974d30d5306337bc297fe6b6f126164bff72c1019/regex-2026.2.19-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:eb20c11aa4c3793c9ad04c19a972078cdadb261b8429380364be28e867a843f2", size = 288654, upload-time = "2026-02-19T19:00:21.307Z" },
|
{ url = "https://files.pythonhosted.org/packages/62/b9/6796b3bf3101e64117201aaa3a5a030ec677ecf34b3cd6141b5d5c6c67d5/regex-2026.2.28-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bcb399ed84eabf4282587ba151f2732ad8168e66f1d3f85b1d038868fe547703", size = 288724, upload-time = "2026-02-28T02:16:25.403Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/e2/3e/6d7c24a2f423c03ad03e3fbddefa431057186ac1c4cb4fa98b03c7f39808/regex-2026.2.19-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:db5fd91eec71e7b08de10011a2223d0faa20448d4e1380b9daa179fa7bf58906", size = 793785, upload-time = "2026-02-19T19:00:22.926Z" },
|
{ url = "https://files.pythonhosted.org/packages/9c/02/291c0ae3f3a10cea941d0f5366da1843d8d1fa8a25b0671e20a0e454bb38/regex-2026.2.28-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7c1b34dfa72f826f535b20712afa9bb3ba580020e834f3c69866c5bddbf10098", size = 791924, upload-time = "2026-02-28T02:16:26.863Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/67/32/fdb8107504b3122a79bde6705ac1f9d495ed1fe35b87d7cfc1864471999a/regex-2026.2.19-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:fdbade8acba71bb45057c2b72f477f0b527c4895f9c83e6cfc30d4a006c21726", size = 860731, upload-time = "2026-02-19T19:00:25.196Z" },
|
{ url = "https://files.pythonhosted.org/packages/0f/57/f0235cc520d9672742196c5c15098f8f703f2758d48d5a7465a56333e496/regex-2026.2.28-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:851fa70df44325e1e4cdb79c5e676e91a78147b1b543db2aec8734d2add30ec2", size = 860095, upload-time = "2026-02-28T02:16:28.772Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/9a/fd/cc8c6f05868defd840be6e75919b1c3f462357969ac2c2a0958363b4dc23/regex-2026.2.19-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:31a5f561eb111d6aae14202e7043fb0b406d3c8dddbbb9e60851725c9b38ab1d", size = 907350, upload-time = "2026-02-19T19:00:27.093Z" },
|
{ url = "https://files.pythonhosted.org/packages/b3/7c/393c94cbedda79a0f5f2435ebd01644aba0b338d327eb24b4aa5b8d6c07f/regex-2026.2.28-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:516604edd17b1c2c3e579cf4e9b25a53bf8fa6e7cedddf1127804d3e0140ca64", size = 906583, upload-time = "2026-02-28T02:16:30.977Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/b5/1b/4590db9caa8db3d5a3fe31197c4e42c15aab3643b549ef6a454525fa3a61/regex-2026.2.19-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4584a3ee5f257b71e4b693cc9be3a5104249399f4116fe518c3f79b0c6fc7083", size = 800628, upload-time = "2026-02-19T19:00:29.392Z" },
|
{ url = "https://files.pythonhosted.org/packages/2c/73/a72820f47ca5abf2b5d911d0407ba5178fc52cf9780191ed3a54f5f419a2/regex-2026.2.28-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e7ce83654d1ab701cb619285a18a8e5a889c1216d746ddc710c914ca5fd71022", size = 800234, upload-time = "2026-02-28T02:16:32.55Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/76/05/513eaa5b96fa579fd0b813e19ec047baaaf573d7374ff010fa139b384bf7/regex-2026.2.19-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:196553ba2a2f47904e5dc272d948a746352e2644005627467e055be19d73b39e", size = 773711, upload-time = "2026-02-19T19:00:30.996Z" },
|
{ url = "https://files.pythonhosted.org/packages/34/b3/6e6a4b7b31fa998c4cf159a12cbeaf356386fbd1a8be743b1e80a3da51e4/regex-2026.2.28-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f2791948f7c70bb9335a9102df45e93d428f4b8128020d85920223925d73b9e1", size = 772803, upload-time = "2026-02-28T02:16:34.029Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/95/65/5aed06d8c54563d37fea496cf888be504879a3981a7c8e12c24b2c92c209/regex-2026.2.19-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:0c10869d18abb759a3317c757746cc913d6324ce128b8bcec99350df10419f18", size = 783186, upload-time = "2026-02-19T19:00:34.598Z" },
|
{ url = "https://files.pythonhosted.org/packages/10/e7/5da0280c765d5a92af5e1cd324b3fe8464303189cbaa449de9a71910e273/regex-2026.2.28-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:03a83cc26aa2acda6b8b9dfe748cf9e84cbd390c424a1de34fdcef58961a297a", size = 781117, upload-time = "2026-02-28T02:16:36.253Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/2c/57/79a633ad90f2371b4ef9cd72ba3a69a1a67d0cfaab4fe6fa8586d46044ef/regex-2026.2.19-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:e689fed279cbe797a6b570bd18ff535b284d057202692c73420cb93cca41aa32", size = 854854, upload-time = "2026-02-19T19:00:37.306Z" },
|
{ url = "https://files.pythonhosted.org/packages/76/39/0b8d7efb256ae34e1b8157acc1afd8758048a1cf0196e1aec2e71fd99f4b/regex-2026.2.28-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:ec6f5674c5dc836994f50f1186dd1fafde4be0666aae201ae2fcc3d29d8adf27", size = 854224, upload-time = "2026-02-28T02:16:38.119Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/eb/2d/0f113d477d9e91ec4545ec36c82e58be25038d06788229c91ad52da2b7f5/regex-2026.2.19-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:0782bd983f19ac7594039c9277cd6f75c89598c1d72f417e4d30d874105eb0c7", size = 762279, upload-time = "2026-02-19T19:00:39.793Z" },
|
{ url = "https://files.pythonhosted.org/packages/21/ff/a96d483ebe8fe6d1c67907729202313895d8de8495569ec319c6f29d0438/regex-2026.2.28-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:50c2fc924749543e0eacc93ada6aeeb3ea5f6715825624baa0dccaec771668ae", size = 761898, upload-time = "2026-02-28T02:16:40.333Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/39/cb/237e9fa4f61469fd4f037164dbe8e675a376c88cf73aaaa0aedfd305601c/regex-2026.2.19-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:dbb240c81cfed5d4a67cb86d7676d9f7ec9c3f186310bec37d8a1415210e111e", size = 846172, upload-time = "2026-02-19T19:00:42.134Z" },
|
{ url = "https://files.pythonhosted.org/packages/89/bd/d4f2e75cb4a54b484e796017e37c0d09d8a0a837de43d17e238adf163f4e/regex-2026.2.28-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:ba55c50f408fb5c346a3a02d2ce0ebc839784e24f7c9684fde328ff063c3cdea", size = 844832, upload-time = "2026-02-28T02:16:41.875Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/ac/7c/104779c5915cc4eb557a33590f8a3f68089269c64287dd769afd76c7ce61/regex-2026.2.19-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:80d31c3f1fe7e4c6cd1831cd4478a0609903044dfcdc4660abfe6fb307add7f0", size = 789078, upload-time = "2026-02-19T19:00:43.908Z" },
|
{ url = "https://files.pythonhosted.org/packages/8a/a7/428a135cf5e15e4e11d1e696eb2bf968362f8ea8a5f237122e96bc2ae950/regex-2026.2.28-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:edb1b1b3a5576c56f08ac46f108c40333f222ebfd5cf63afdfa3aab0791ebe5b", size = 788347, upload-time = "2026-02-28T02:16:43.472Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/b3/73/13b39c7c9356f333e564ab4790b6cb0df125b8e64e8d6474e73da49b1955/regex-2026.2.19-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:c1665138776e4ac1aa75146669236f7a8a696433ec4e525abf092ca9189247cc", size = 489541, upload-time = "2026-02-19T19:00:52.728Z" },
|
{ url = "https://files.pythonhosted.org/packages/07/42/9061b03cf0fc4b5fa2c3984cbbaed54324377e440a5c5a29d29a72518d62/regex-2026.2.28-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:fcf26c3c6d0da98fada8ae4ef0aa1c3405a431c0a77eb17306d38a89b02adcd7", size = 489574, upload-time = "2026-02-28T02:16:50.455Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/15/77/fcc7bd9a67000d07fbcc11ed226077287a40d5c84544e62171d29d3ef59c/regex-2026.2.19-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d792b84709021945597e05656aac059526df4e0c9ef60a0eaebb306f8fafcaa8", size = 291414, upload-time = "2026-02-19T19:00:54.51Z" },
|
{ url = "https://files.pythonhosted.org/packages/77/83/0c8a5623a233015595e3da499c5a1c13720ac63c107897a6037bb97af248/regex-2026.2.28-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:02473c954af35dd2defeb07e44182f5705b30ea3f351a7cbffa9177beb14da5d", size = 291426, upload-time = "2026-02-28T02:16:52.52Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/f9/87/3997fc72dc59233426ef2e18dfdd105bb123812fff740ee9cc348f1a3243/regex-2026.2.19-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:db970bcce4d63b37b3f9eb8c893f0db980bbf1d404a1d8d2b17aa8189de92c53", size = 289140, upload-time = "2026-02-19T19:00:56.841Z" },
|
{ url = "https://files.pythonhosted.org/packages/9e/06/3ef1ac6910dc3295ebd71b1f9bfa737e82cfead211a18b319d45f85ddd09/regex-2026.2.28-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:9b65d33a17101569f86d9c5966a8b1d7fbf8afdda5a8aa219301b0a80f58cf7d", size = 289200, upload-time = "2026-02-28T02:16:54.08Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/f3/d0/b7dd3883ed1cff8ee0c0c9462d828aaf12be63bf5dc55453cbf423523b13/regex-2026.2.19-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:03d706fbe7dfec503c8c3cb76f9352b3e3b53b623672aa49f18a251a6c71b8e6", size = 798767, upload-time = "2026-02-19T19:00:59.014Z" },
|
{ url = "https://files.pythonhosted.org/packages/dd/c9/8cc8d850b35ab5650ff6756a1cb85286e2000b66c97520b29c1587455344/regex-2026.2.28-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e71dcecaa113eebcc96622c17692672c2d104b1d71ddf7adeda90da7ddeb26fc", size = 796765, upload-time = "2026-02-28T02:16:55.905Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/4a/7e/8e2d09103832891b2b735a2515abf377db21144c6dd5ede1fb03c619bf09/regex-2026.2.19-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:8dbff048c042beef60aa1848961384572c5afb9e8b290b0f1203a5c42cf5af65", size = 864436, upload-time = "2026-02-19T19:01:00.772Z" },
|
{ url = "https://files.pythonhosted.org/packages/e9/5d/57702597627fc23278ebf36fbb497ac91c0ce7fec89ac6c81e420ca3e38c/regex-2026.2.28-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:481df4623fa4969c8b11f3433ed7d5e3dc9cec0f008356c3212b3933fb77e3d8", size = 863093, upload-time = "2026-02-28T02:16:58.094Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/8a/2e/afea8d23a6db1f67f45e3a0da3057104ce32e154f57dd0c8997274d45fcd/regex-2026.2.19-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ccaaf9b907ea6b4223d5cbf5fa5dff5f33dc66f4907a25b967b8a81339a6e332", size = 912391, upload-time = "2026-02-19T19:01:02.865Z" },
|
{ url = "https://files.pythonhosted.org/packages/02/6d/f3ecad537ca2811b4d26b54ca848cf70e04fcfc138667c146a9f3157779c/regex-2026.2.28-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:64e7c6ad614573e0640f271e811a408d79a9e1fe62a46adb602f598df42a818d", size = 909455, upload-time = "2026-02-28T02:17:00.918Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/59/3c/ea5a4687adaba5e125b9bd6190153d0037325a0ba3757cc1537cc2c8dd90/regex-2026.2.19-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:75472631eee7898e16a8a20998d15106cb31cfde21cdf96ab40b432a7082af06", size = 803702, upload-time = "2026-02-19T19:01:05.298Z" },
|
{ url = "https://files.pythonhosted.org/packages/9e/40/bb226f203caa22c1043c1ca79b36340156eca0f6a6742b46c3bb222a3a57/regex-2026.2.28-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d6b08a06976ff4fb0d83077022fde3eca06c55432bb997d8c0495b9a4e9872f4", size = 802037, upload-time = "2026-02-28T02:17:02.842Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/dc/c5/624a0705e8473a26488ec1a3a4e0b8763ecfc682a185c302dfec71daea35/regex-2026.2.19-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:d89f85a5ccc0cec125c24be75610d433d65295827ebaf0d884cbe56df82d4774", size = 775980, upload-time = "2026-02-19T19:01:07.047Z" },
|
{ url = "https://files.pythonhosted.org/packages/44/7c/c6d91d8911ac6803b45ca968e8e500c46934e58c0903cbc6d760ee817a0a/regex-2026.2.28-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:864cdd1a2ef5716b0ab468af40139e62ede1b3a53386b375ec0786bb6783fc05", size = 775113, upload-time = "2026-02-28T02:17:04.506Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/4d/4b/ed776642533232b5599b7c1f9d817fe11faf597e8a92b7a44b841daaae76/regex-2026.2.19-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0d9f81806abdca3234c3dd582b8a97492e93de3602c8772013cb4affa12d1668", size = 788122, upload-time = "2026-02-19T19:01:08.744Z" },
|
{ url = "https://files.pythonhosted.org/packages/dc/8d/4a9368d168d47abd4158580b8c848709667b1cd293ff0c0c277279543bd0/regex-2026.2.28-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:511f7419f7afab475fd4d639d4aedfc54205bcb0800066753ef68a59f0f330b5", size = 784194, upload-time = "2026-02-28T02:17:06.888Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/8c/58/e93e093921d13b9784b4f69896b6e2a9e09580a265c59d9eb95e87d288f2/regex-2026.2.19-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:9dadc10d1c2bbb1326e572a226d2ec56474ab8aab26fdb8cf19419b372c349a9", size = 858910, upload-time = "2026-02-19T19:01:10.488Z" },
|
{ url = "https://files.pythonhosted.org/packages/cc/bf/2c72ab5d8b7be462cb1651b5cc333da1d0068740342f350fcca3bca31947/regex-2026.2.28-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:b42f7466e32bf15a961cf09f35fa6323cc72e64d3d2c990b10de1274a5da0a59", size = 856846, upload-time = "2026-02-28T02:17:09.11Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/85/77/ff1d25a0c56cd546e0455cbc93235beb33474899690e6a361fa6b52d265b/regex-2026.2.19-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:6bc25d7e15f80c9dc7853cbb490b91c1ec7310808b09d56bd278fe03d776f4f6", size = 764153, upload-time = "2026-02-19T19:01:12.156Z" },
|
{ url = "https://files.pythonhosted.org/packages/7c/f4/6b65c979bb6d09f51bb2d2a7bc85de73c01ec73335d7ddd202dcb8cd1c8f/regex-2026.2.28-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:8710d61737b0c0ce6836b1da7109f20d495e49b3809f30e27e9560be67a257bf", size = 763516, upload-time = "2026-02-28T02:17:11.004Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/cd/ef/8ec58df26d52d04443b1dc56f9be4b409f43ed5ae6c0248a287f52311fc4/regex-2026.2.19-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:965d59792f5037d9138da6fed50ba943162160443b43d4895b182551805aff9c", size = 850348, upload-time = "2026-02-19T19:01:14.147Z" },
|
{ url = "https://files.pythonhosted.org/packages/8e/32/29ea5e27400ee86d2cc2b4e80aa059df04eaf78b4f0c18576ae077aeff68/regex-2026.2.28-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:4390c365fd2d45278f45afd4673cb90f7285f5701607e3ad4274df08e36140ae", size = 849278, upload-time = "2026-02-28T02:17:12.693Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/f5/b3/c42fd5ed91639ce5a4225b9df909180fc95586db071f2bf7c68d2ccbfbe6/regex-2026.2.19-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:38d88c6ed4a09ed61403dbdf515d969ccba34669af3961ceb7311ecd0cef504a", size = 789977, upload-time = "2026-02-19T19:01:15.838Z" },
|
{ url = "https://files.pythonhosted.org/packages/1d/91/3233d03b5f865111cd517e1c95ee8b43e8b428d61fa73764a80c9bb6f537/regex-2026.2.28-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:cb3b1db8ff6c7b8bf838ab05583ea15230cb2f678e569ab0e3a24d1e8320940b", size = 790068, upload-time = "2026-02-28T02:17:14.9Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/d2/2d/a849835e76ac88fcf9e8784e642d3ea635d183c4112150ca91499d6703af/regex-2026.2.19-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:8df08decd339e8b3f6a2eb5c05c687fe9d963ae91f352bc57beb05f5b2ac6879", size = 489329, upload-time = "2026-02-19T19:01:23.841Z" },
|
{ url = "https://files.pythonhosted.org/packages/87/f6/dc9ef48c61b79c8201585bf37fa70cd781977da86e466cd94e8e95d2443b/regex-2026.2.28-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:6d63a07e5ec8ce7184452cb00c41c37b49e67dc4f73b2955b5b8e782ea970784", size = 489311, upload-time = "2026-02-28T02:17:22.591Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/da/aa/78ff4666d3855490bae87845a5983485e765e1f970da20adffa2937b241d/regex-2026.2.19-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:3aa0944f1dc6e92f91f3b306ba7f851e1009398c84bfd370633182ee4fc26a64", size = 291308, upload-time = "2026-02-19T19:01:25.605Z" },
|
{ url = "https://files.pythonhosted.org/packages/95/c8/c20390f2232d3f7956f420f4ef1852608ad57aa26c3dd78516cb9f3dc913/regex-2026.2.28-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e59bc8f30414d283ae8ee1617b13d8112e7135cb92830f0ec3688cb29152585a", size = 291285, upload-time = "2026-02-28T02:17:24.355Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/cd/58/714384efcc07ae6beba528a541f6e99188c5cc1bc0295337f4e8a868296d/regex-2026.2.19-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:c13228fbecb03eadbfd8f521732c5fda09ef761af02e920a3148e18ad0e09968", size = 289033, upload-time = "2026-02-19T19:01:27.243Z" },
|
{ url = "https://files.pythonhosted.org/packages/d2/a6/ba1068a631ebd71a230e7d8013fcd284b7c89c35f46f34a7da02082141b1/regex-2026.2.28-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:de0cf053139f96219ccfabb4a8dd2d217c8c82cb206c91d9f109f3f552d6b43d", size = 289051, upload-time = "2026-02-28T02:17:26.722Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/75/ec/6438a9344d2869cf5265236a06af1ca6d885e5848b6561e10629bc8e5a11/regex-2026.2.19-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0d0e72703c60d68b18b27cde7cdb65ed2570ae29fb37231aa3076bfb6b1d1c13", size = 798798, upload-time = "2026-02-19T19:01:28.877Z" },
|
{ url = "https://files.pythonhosted.org/packages/1d/1b/7cc3b7af4c244c204b7a80924bd3d85aecd9ba5bc82b485c5806ee8cda9e/regex-2026.2.28-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:fb4db2f17e6484904f986c5a657cec85574c76b5c5e61c7aae9ffa1bc6224f95", size = 796842, upload-time = "2026-02-28T02:17:29.064Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/c2/be/b1ce2d395e3fd2ce5f2fde2522f76cade4297cfe84cd61990ff48308749c/regex-2026.2.19-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:46e69a4bf552e30e74a8aa73f473c87efcb7f6e8c8ece60d9fd7bf13d5c86f02", size = 864444, upload-time = "2026-02-19T19:01:30.933Z" },
|
{ url = "https://files.pythonhosted.org/packages/24/87/26bd03efc60e0d772ac1e7b60a2e6325af98d974e2358f659c507d3c76db/regex-2026.2.28-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:52b017b35ac2214d0db5f4f90e303634dc44e4aba4bd6235a27f97ecbe5b0472", size = 863083, upload-time = "2026-02-28T02:17:31.363Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/d5/97/a3406460c504f7136f140d9461960c25f058b0240e4424d6fb73c7a067ab/regex-2026.2.19-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:8edda06079bd770f7f0cf7f3bba1a0b447b96b4a543c91fe0c142d034c166161", size = 912633, upload-time = "2026-02-19T19:01:32.744Z" },
|
{ url = "https://files.pythonhosted.org/packages/ae/54/aeaf4afb1aa0a65e40de52a61dc2ac5b00a83c6cb081c8a1d0dda74f3010/regex-2026.2.28-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:69fc560ccbf08a09dc9b52ab69cacfae51e0ed80dc5693078bdc97db2f91ae96", size = 909412, upload-time = "2026-02-28T02:17:33.248Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/8b/d9/e5dbef95008d84e9af1dc0faabbc34a7fbc8daa05bc5807c5cf86c2bec49/regex-2026.2.19-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9cbc69eae834afbf634f7c902fc72ff3e993f1c699156dd1af1adab5d06b7fe7", size = 803718, upload-time = "2026-02-19T19:01:34.61Z" },
|
{ url = "https://files.pythonhosted.org/packages/12/2f/049901def913954e640d199bbc6a7ca2902b6aeda0e5da9d17f114100ec2/regex-2026.2.28-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e61eea47230eba62a31f3e8a0e3164d0f37ef9f40529fb2c79361bc6b53d2a92", size = 802101, upload-time = "2026-02-28T02:17:35.053Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/2f/e5/61d80132690a1ef8dc48e0f44248036877aebf94235d43f63a20d1598888/regex-2026.2.19-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:bcf57d30659996ee5c7937999874504c11b5a068edc9515e6a59221cc2744dd1", size = 775975, upload-time = "2026-02-19T19:01:36.525Z" },
|
{ url = "https://files.pythonhosted.org/packages/7d/a5/512fb9ff7f5b15ea204bb1967ebb649059446decacccb201381f9fa6aad4/regex-2026.2.28-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:4f5c0b182ad4269e7381b7c27fdb0408399881f7a92a4624fd5487f2971dfc11", size = 775260, upload-time = "2026-02-28T02:17:37.692Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/05/32/ae828b3b312c972cf228b634447de27237d593d61505e6ad84723f8eabba/regex-2026.2.19-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:8e6e77cd92216eb489e21e5652a11b186afe9bdefca8a2db739fd6b205a9e0a4", size = 788129, upload-time = "2026-02-19T19:01:38.498Z" },
|
{ url = "https://files.pythonhosted.org/packages/d1/a8/9a92935878aba19bd72706b9db5646a6f993d99b3f6ed42c02ec8beb1d61/regex-2026.2.28-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:96f6269a2882fbb0ee76967116b83679dc628e68eaea44e90884b8d53d833881", size = 784311, upload-time = "2026-02-28T02:17:39.855Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/cb/25/d74f34676f22bec401eddf0e5e457296941e10cbb2a49a571ca7a2c16e5a/regex-2026.2.19-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:b9ab8dec42afefa6314ea9b31b188259ffdd93f433d77cad454cd0b8d235ce1c", size = 858818, upload-time = "2026-02-19T19:01:40.409Z" },
|
{ url = "https://files.pythonhosted.org/packages/09/d3/fc51a8a738a49a6b6499626580554c9466d3ea561f2b72cfdc72e4149773/regex-2026.2.28-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:b5acd4b6a95f37c3c3828e5d053a7d4edaedb85de551db0153754924cb7c83e3", size = 856876, upload-time = "2026-02-28T02:17:42.317Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/1e/eb/0bc2b01a6b0b264e1406e5ef11cae3f634c3bd1a6e61206fd3227ce8e89c/regex-2026.2.19-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:294c0fb2e87c6bcc5f577c8f609210f5700b993151913352ed6c6af42f30f95f", size = 764186, upload-time = "2026-02-19T19:01:43.009Z" },
|
{ url = "https://files.pythonhosted.org/packages/08/b7/2e641f3d084b120ca4c52e8c762a78da0b32bf03ef546330db3e2635dc5f/regex-2026.2.28-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:2234059cfe33d9813a3677ef7667999caea9eeaa83fef98eb6ce15c6cf9e0215", size = 763632, upload-time = "2026-02-28T02:17:45.073Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/eb/37/5fe5a630d0d99ecf0c3570f8905dafbc160443a2d80181607770086c9812/regex-2026.2.19-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:c0924c64b082d4512b923ac016d6e1dcf647a3560b8a4c7e55cbbd13656cb4ed", size = 850363, upload-time = "2026-02-19T19:01:45.015Z" },
|
{ url = "https://files.pythonhosted.org/packages/fe/6d/0009021d97e79ee99f3d8641f0a8d001eed23479ade4c3125a5480bf3e2d/regex-2026.2.28-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:c15af43c72a7fb0c97cbc66fa36a43546eddc5c06a662b64a0cbf30d6ac40944", size = 849320, upload-time = "2026-02-28T02:17:47.192Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/c3/45/ef68d805294b01ec030cfd388724ba76a5a21a67f32af05b17924520cb0b/regex-2026.2.19-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:790dbf87b0361606cb0d79b393c3e8f4436a14ee56568a7463014565d97da02a", size = 790026, upload-time = "2026-02-19T19:01:47.51Z" },
|
{ url = "https://files.pythonhosted.org/packages/05/7a/51cfbad5758f8edae430cb21961a9c8d04bce1dae4d2d18d4186eec7cfa1/regex-2026.2.28-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9185cc63359862a6e80fe97f696e04b0ad9a11c4ac0a4a927f979f611bfe3768", size = 790152, upload-time = "2026-02-28T02:17:49.067Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/a9/a2/e0b4575b93bc84db3b1fab24183e008691cd2db5c0ef14ed52681fbd94dd/regex-2026.2.19-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:93d881cab5afdc41a005dba1524a40947d6f7a525057aa64aaf16065cf62faa9", size = 492202, upload-time = "2026-02-19T19:01:54.816Z" },
|
{ url = "https://files.pythonhosted.org/packages/24/07/6c7e4cec1e585959e96cbc24299d97e4437a81173217af54f1804994e911/regex-2026.2.28-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:97054c55db06ab020342cc0d35d6f62a465fa7662871190175f1ad6c655c028f", size = 492541, upload-time = "2026-02-28T02:17:56.813Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/24/b5/b84fec8cbb5f92a7eed2b6b5353a6a9eed9670fee31817c2da9eb85dc797/regex-2026.2.19-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:80caaa1ddcc942ec7be18427354f9d58a79cee82dea2a6b3d4fd83302e1240d7", size = 292884, upload-time = "2026-02-19T19:01:58.254Z" },
|
{ url = "https://files.pythonhosted.org/packages/7c/13/55eb22ada7f43d4f4bb3815b6132183ebc331c81bd496e2d1f3b8d862e0d/regex-2026.2.28-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0d25a10811de831c2baa6aef3c0be91622f44dd8d31dd12e69f6398efb15e48b", size = 292984, upload-time = "2026-02-28T02:17:58.538Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/70/0c/fe89966dfae43da46f475362401f03e4d7dc3a3c955b54f632abc52669e0/regex-2026.2.19-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:d793c5b4d2b4c668524cd1651404cfc798d40694c759aec997e196fe9729ec60", size = 291236, upload-time = "2026-02-19T19:01:59.966Z" },
|
{ url = "https://files.pythonhosted.org/packages/5b/11/c301f8cb29ce9644a5ef85104c59244e6e7e90994a0f458da4d39baa8e17/regex-2026.2.28-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:d6cfe798d8da41bb1862ed6e0cba14003d387c3c0c4a5d45591076ae9f0ce2f8", size = 291509, upload-time = "2026-02-28T02:18:00.208Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/f2/f7/bda2695134f3e63eb5cccbbf608c2a12aab93d261ff4e2fe49b47fabc948/regex-2026.2.19-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b5100acb20648d9efd3f4e7e91f51187f95f22a741dcd719548a6cf4e1b34b3f", size = 807660, upload-time = "2026-02-19T19:02:01.632Z" },
|
{ url = "https://files.pythonhosted.org/packages/b5/43/aabe384ec1994b91796e903582427bc2ffaed9c4103819ed3c16d8e749f3/regex-2026.2.28-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:fd0ce43e71d825b7c0661f9c54d4d74bd97c56c3fd102a8985bcfea48236bacb", size = 809429, upload-time = "2026-02-28T02:18:02.328Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/11/56/6e3a4bf5e60d17326b7003d91bbde8938e439256dec211d835597a44972d/regex-2026.2.19-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:5e3a31e94d10e52a896adaa3adf3621bd526ad2b45b8c2d23d1bbe74c7423007", size = 873585, upload-time = "2026-02-19T19:02:03.522Z" },
|
{ url = "https://files.pythonhosted.org/packages/04/b8/8d2d987a816720c4f3109cee7c06a4b24ad0e02d4fc74919ab619e543737/regex-2026.2.28-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:00945d007fd74a9084d2ab79b695b595c6b7ba3698972fadd43e23230c6979c1", size = 869422, upload-time = "2026-02-28T02:18:04.23Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/35/5e/c90c6aa4d1317cc11839359479cfdd2662608f339e84e81ba751c8a4e461/regex-2026.2.19-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:8497421099b981f67c99eba4154cf0dfd8e47159431427a11cfb6487f7791d9e", size = 915243, upload-time = "2026-02-19T19:02:05.608Z" },
|
{ url = "https://files.pythonhosted.org/packages/fc/ad/2c004509e763c0c3719f97c03eca26473bffb3868d54c5f280b8cd4f9e3d/regex-2026.2.28-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:bec23c11cbbf09a4df32fe50d57cbdd777bc442269b6e39a1775654f1c95dee2", size = 915175, upload-time = "2026-02-28T02:18:06.791Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/90/7c/981ea0694116793001496aaf9524e5c99e122ec3952d9e7f1878af3a6bf1/regex-2026.2.19-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1e7a08622f7d51d7a068f7e4052a38739c412a3e74f55817073d2e2418149619", size = 812922, upload-time = "2026-02-19T19:02:08.115Z" },
|
{ url = "https://files.pythonhosted.org/packages/55/c2/fd429066da487ef555a9da73bf214894aec77fc8c66a261ee355a69871a8/regex-2026.2.28-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5cdcc17d935c8f9d3f4db5c2ebe2640c332e3822ad5d23c2f8e0228e6947943a", size = 812044, upload-time = "2026-02-28T02:18:08.736Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/2d/be/9eda82afa425370ffdb3fa9f3ea42450b9ae4da3ff0a4ec20466f69e371b/regex-2026.2.19-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:8abe671cf0f15c26b1ad389bf4043b068ce7d3b1c5d9313e12895f57d6738555", size = 781318, upload-time = "2026-02-19T19:02:10.072Z" },
|
{ url = "https://files.pythonhosted.org/packages/5b/ca/feedb7055c62a3f7f659971bf45f0e0a87544b6b0cf462884761453f97c5/regex-2026.2.28-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:a448af01e3d8031c89c5d902040b124a5e921a25c4e5e07a861ca591ce429341", size = 782056, upload-time = "2026-02-28T02:18:10.777Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/c6/d5/50f0bbe56a8199f60a7b6c714e06e54b76b33d31806a69d0703b23ce2a9e/regex-2026.2.19-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:5a8f28dd32a4ce9c41758d43b5b9115c1c497b4b1f50c457602c1d571fa98ce1", size = 795649, upload-time = "2026-02-19T19:02:11.96Z" },
|
{ url = "https://files.pythonhosted.org/packages/95/30/1aa959ed0d25c1dd7dd5047ea8ba482ceaef38ce363c401fd32a6b923e60/regex-2026.2.28-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:10d28e19bd4888e4abf43bd3925f3c134c52fdf7259219003588a42e24c2aa25", size = 798743, upload-time = "2026-02-28T02:18:13.025Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/c5/09/d039f081e44a8b0134d0bb2dd805b0ddf390b69d0b58297ae098847c572f/regex-2026.2.19-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:654dc41a5ba9b8cc8432b3f1aa8906d8b45f3e9502442a07c2f27f6c63f85db5", size = 868844, upload-time = "2026-02-19T19:02:14.043Z" },
|
{ url = "https://files.pythonhosted.org/packages/3b/1f/dadb9cf359004784051c897dcf4d5d79895f73a1bbb7b827abaa4814ae80/regex-2026.2.28-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:99985a2c277dcb9ccb63f937451af5d65177af1efdeb8173ac55b61095a0a05c", size = 864633, upload-time = "2026-02-28T02:18:16.84Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/ef/53/e2903b79a19ec8557fe7cd21cd093956ff2dbc2e0e33969e3adbe5b184dd/regex-2026.2.19-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:4a02faea614e7fdd6ba8b3bec6c8e79529d356b100381cec76e638f45d12ca04", size = 770113, upload-time = "2026-02-19T19:02:16.161Z" },
|
{ url = "https://files.pythonhosted.org/packages/a7/f1/b9a25eb24e1cf79890f09e6ec971ee5b511519f1851de3453bc04f6c902b/regex-2026.2.28-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:e1e7b24cb3ae9953a560c563045d1ba56ee4749fbd05cf21ba571069bd7be81b", size = 770862, upload-time = "2026-02-28T02:18:18.892Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/8f/e2/784667767b55714ebb4e59bf106362327476b882c0b2f93c25e84cc99b1a/regex-2026.2.19-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:d96162140bb819814428800934c7b71b7bffe81fb6da2d6abc1dcca31741eca3", size = 854922, upload-time = "2026-02-19T19:02:18.155Z" },
|
{ url = "https://files.pythonhosted.org/packages/02/9a/c5cb10b7aa6f182f9247a30cc9527e326601f46f4df864ac6db588d11fcd/regex-2026.2.28-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:d8511a01d0e4ee1992eb3ba19e09bc1866fe03f05129c3aec3fdc4cbc77aad3f", size = 854788, upload-time = "2026-02-28T02:18:21.475Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/59/78/9ef4356bd4aed752775bd18071034979b85f035fec51f3a4f9dea497a254/regex-2026.2.19-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:c227f2922153ee42bbeb355fd6d009f8c81d9d7bdd666e2276ce41f53ed9a743", size = 799636, upload-time = "2026-02-19T19:02:20.04Z" },
|
{ url = "https://files.pythonhosted.org/packages/0a/50/414ba0731c4bd40b011fa4703b2cc86879ec060c64f2a906e65a56452589/regex-2026.2.28-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:aaffaecffcd2479ce87aa1e74076c221700b7c804e48e98e62500ee748f0f550", size = 800184, upload-time = "2026-02-28T02:18:23.492Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/2d/e2/7ad4e76a6dddefc0d64dbe12a4d3ca3947a19ddc501f864a5df2a8222ddd/regex-2026.2.19-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:03d191a9bcf94d31af56d2575210cb0d0c6a054dbcad2ea9e00aa4c42903b919", size = 489306, upload-time = "2026-02-19T19:02:29.058Z" },
|
{ url = "https://files.pythonhosted.org/packages/cf/03/691015f7a7cb1ed6dacb2ea5de5682e4858e05a4c5506b2839cd533bbcd6/regex-2026.2.28-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:78454178c7df31372ea737996fb7f36b3c2c92cccc641d251e072478afb4babc", size = 489497, upload-time = "2026-02-28T02:18:30.889Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/14/95/ee1736135733afbcf1846c58671046f99c4d5170102a150ebb3dd8d701d9/regex-2026.2.19-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:516ee067c6c721d0d0bfb80a2004edbd060fffd07e456d4e1669e38fe82f922e", size = 291218, upload-time = "2026-02-19T19:02:31.083Z" },
|
{ url = "https://files.pythonhosted.org/packages/c6/ba/8db8fd19afcbfa0e1036eaa70c05f20ca8405817d4ad7a38a6b4c2f031ac/regex-2026.2.28-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:5d10303dd18cedfd4d095543998404df656088240bcfd3cd20a8f95b861f74bd", size = 291295, upload-time = "2026-02-28T02:18:33.426Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/ef/08/180d1826c3d7065200a5168c6b993a44947395c7bb6e04b2c2a219c34225/regex-2026.2.19-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:997862c619994c4a356cb7c3592502cbd50c2ab98da5f61c5c871f10f22de7e5", size = 289097, upload-time = "2026-02-19T19:02:33.485Z" },
|
{ url = "https://files.pythonhosted.org/packages/5a/79/9aa0caf089e8defef9b857b52fc53801f62ff868e19e5c83d4a96612eba1/regex-2026.2.28-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:19a9c9e0a8f24f39d575a6a854d516b48ffe4cbdcb9de55cb0570a032556ecff", size = 289275, upload-time = "2026-02-28T02:18:35.247Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/28/93/0651924c390c5740f5f896723f8ddd946a6c63083a7d8647231c343912ff/regex-2026.2.19-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:02b9e1b8a7ebe2807cd7bbdf662510c8e43053a23262b9f46ad4fc2dfc9d204e", size = 799147, upload-time = "2026-02-19T19:02:35.669Z" },
|
{ url = "https://files.pythonhosted.org/packages/eb/26/ee53117066a30ef9c883bf1127eece08308ccf8ccd45c45a966e7a665385/regex-2026.2.28-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:09500be324f49b470d907b3ef8af9afe857f5cca486f853853f7945ddbf75911", size = 797176, upload-time = "2026-02-28T02:18:37.15Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/a7/00/2078bd8bcd37d58a756989adbfd9f1d0151b7ca4085a9c2a07e917fbac61/regex-2026.2.19-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:6c8fb3b19652e425ff24169dad3ee07f99afa7996caa9dfbb3a9106cd726f49a", size = 865239, upload-time = "2026-02-19T19:02:38.012Z" },
|
{ url = "https://files.pythonhosted.org/packages/05/1b/67fb0495a97259925f343ae78b5d24d4a6624356ae138b57f18bd43006e4/regex-2026.2.28-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:fb1c4ff62277d87a7335f2c1ea4e0387b8f2b3ad88a64efd9943906aafad4f33", size = 863813, upload-time = "2026-02-28T02:18:39.478Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/2a/13/75195161ec16936b35a365fa8c1dd2ab29fd910dd2587765062b174d8cfc/regex-2026.2.19-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:50f1ee9488dd7a9fda850ec7c68cad7a32fa49fd19733f5403a3f92b451dcf73", size = 911904, upload-time = "2026-02-19T19:02:40.737Z" },
|
{ url = "https://files.pythonhosted.org/packages/a0/1d/93ac9bbafc53618091c685c7ed40239a90bf9f2a82c983f0baa97cb7ae07/regex-2026.2.28-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:b8b3f1be1738feadc69f62daa250c933e85c6f34fa378f54a7ff43807c1b9117", size = 908678, upload-time = "2026-02-28T02:18:41.619Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/96/72/ac42f6012179343d1c4bd0ffee8c948d841cb32ea188d37e96d80527fcc9/regex-2026.2.19-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ab780092b1424d13200aa5a62996e95f65ee3db8509be366437439cdc0af1a9f", size = 803518, upload-time = "2026-02-19T19:02:42.923Z" },
|
{ url = "https://files.pythonhosted.org/packages/c7/7a/a8f5e0561702b25239846a16349feece59712ae20598ebb205580332a471/regex-2026.2.28-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dc8ed8c3f41c27acb83f7b6a9eb727a73fc6663441890c5cb3426a5f6a91ce7d", size = 801528, upload-time = "2026-02-28T02:18:43.624Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/bc/d1/75a08e2269b007b9783f0f86aa64488e023141219cb5f14dc1e69cda56c6/regex-2026.2.19-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:17648e1a88e72d88641b12635e70e6c71c5136ba14edba29bf8fc6834005a265", size = 775866, upload-time = "2026-02-19T19:02:45.189Z" },
|
{ url = "https://files.pythonhosted.org/packages/96/5d/ed6d4cbde80309854b1b9f42d9062fee38ade15f7eb4909f6ef2440403b5/regex-2026.2.28-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:fa539be029844c0ce1114762d2952ab6cfdd7c7c9bd72e0db26b94c3c36dcc5a", size = 775373, upload-time = "2026-02-28T02:18:46.102Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/92/41/70e7d05faf6994c2ca7a9fcaa536da8f8e4031d45b0ec04b57040ede201f/regex-2026.2.19-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:2f914ae8c804c8a8a562fe216100bc156bfb51338c1f8d55fe32cf407774359a", size = 788224, upload-time = "2026-02-19T19:02:47.804Z" },
|
{ url = "https://files.pythonhosted.org/packages/6a/e9/6e53c34e8068b9deec3e87210086ecb5b9efebdefca6b0d3fa43d66dcecb/regex-2026.2.28-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7900157786428a79615a8264dac1f12c9b02957c473c8110c6b1f972dcecaddf", size = 784859, upload-time = "2026-02-28T02:18:48.269Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/c8/83/34a2dd601f9deb13c20545c674a55f4a05c90869ab73d985b74d639bac43/regex-2026.2.19-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:c7e121a918bbee3f12ac300ce0a0d2f2c979cf208fb071ed8df5a6323281915c", size = 859682, upload-time = "2026-02-19T19:02:50.583Z" },
|
{ url = "https://files.pythonhosted.org/packages/48/3c/736e1c7ca7f0dcd2ae33819888fdc69058a349b7e5e84bc3e2f296bbf794/regex-2026.2.28-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:0b1d2b07614d95fa2bf8a63fd1e98bd8fa2b4848dc91b1efbc8ba219fdd73952", size = 857813, upload-time = "2026-02-28T02:18:50.576Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/8e/30/136db9a09a7f222d6e48b806f3730e7af6499a8cad9c72ac0d49d52c746e/regex-2026.2.19-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:2fedd459c791da24914ecc474feecd94cf7845efb262ac3134fe27cbd7eda799", size = 764223, upload-time = "2026-02-19T19:02:52.777Z" },
|
{ url = "https://files.pythonhosted.org/packages/6e/7c/48c4659ad9da61f58e79dbe8c05223e0006696b603c16eb6b5cbfbb52c27/regex-2026.2.28-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:b389c61aa28a79c2e0527ac36da579869c2e235a5b208a12c5b5318cda2501d8", size = 763705, upload-time = "2026-02-28T02:18:52.59Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/9e/ea/bb947743c78a16df481fa0635c50aa1a439bb80b0e6dc24cd4e49c716679/regex-2026.2.19-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:ea8dfc99689240e61fb21b5fc2828f68b90abf7777d057b62d3166b7c1543c4c", size = 850101, upload-time = "2026-02-19T19:02:55.87Z" },
|
{ url = "https://files.pythonhosted.org/packages/cf/a1/bc1c261789283128165f71b71b4b221dd1b79c77023752a6074c102f18d8/regex-2026.2.28-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:f467cb602f03fbd1ab1908f68b53c649ce393fde056628dc8c7e634dab6bfc07", size = 848734, upload-time = "2026-02-28T02:18:54.595Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/25/27/e3bfe6e97a99f7393665926be02fef772da7f8aa59e50bc3134e4262a032/regex-2026.2.19-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:9fff45852160960f29e184ec8a5be5ab4063cfd0b168d439d1fc4ac3744bf29e", size = 789904, upload-time = "2026-02-19T19:02:58.523Z" },
|
{ url = "https://files.pythonhosted.org/packages/10/d8/979407faf1397036e25a5ae778157366a911c0f382c62501009f4957cf86/regex-2026.2.28-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:e8c8cb2deba42f5ec1ede46374e990f8adc5e6456a57ac1a261b19be6f28e4e6", size = 789871, upload-time = "2026-02-28T02:18:57.34Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/49/0b/f901cfeb4efd83e4f5c3e9f91a6de77e8e5ceb18555698aca3a27e215ed3/regex-2026.2.19-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:5ec1d7c080832fdd4e150c6f5621fe674c70c63b3ae5a4454cebd7796263b175", size = 492196, upload-time = "2026-02-19T19:03:08.188Z" },
|
{ url = "https://files.pythonhosted.org/packages/d3/eb/8389f9e940ac89bcf58d185e230a677b4fd07c5f9b917603ad5c0f8fa8fe/regex-2026.2.28-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:a5dac14d0872eeb35260a8e30bac07ddf22adc1e3a0635b52b02e180d17c9c7e", size = 492546, upload-time = "2026-02-28T02:19:05.378Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/94/0a/349b959e3da874e15eda853755567b4cde7e5309dbb1e07bfe910cfde452/regex-2026.2.19-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:8457c1bc10ee9b29cdfd897ccda41dce6bde0e9abd514bcfef7bcd05e254d411", size = 292878, upload-time = "2026-02-19T19:03:10.272Z" },
|
{ url = "https://files.pythonhosted.org/packages/7b/c7/09441d27ce2a6fa6a61ea3150ea4639c1dcda9b31b2ea07b80d6937b24dd/regex-2026.2.28-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:ec0c608b7a7465ffadb344ed7c987ff2f11ee03f6a130b569aa74d8a70e8333c", size = 292986, upload-time = "2026-02-28T02:19:07.24Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/98/b0/9d81b3c2c5ddff428f8c506713737278979a2c476f6e3675a9c51da0c389/regex-2026.2.19-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:cce8027010d1ffa3eb89a0b19621cdc78ae548ea2b49fea1f7bfb3ea77064c2b", size = 291235, upload-time = "2026-02-19T19:03:12.5Z" },
|
{ url = "https://files.pythonhosted.org/packages/fb/69/4144b60ed7760a6bd235e4087041f487aa4aa62b45618ce018b0c14833ea/regex-2026.2.28-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c7815afb0ca45456613fdaf60ea9c993715511c8d53a83bc468305cbc0ee23c7", size = 291518, upload-time = "2026-02-28T02:19:09.698Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/04/e7/be7818df8691dbe9508c381ea2cc4c1153e4fdb1c4b06388abeaa93bd712/regex-2026.2.19-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:11c138febb40546ff9e026dbbc41dc9fb8b29e61013fa5848ccfe045f5b23b83", size = 807893, upload-time = "2026-02-19T19:03:15.064Z" },
|
{ url = "https://files.pythonhosted.org/packages/2d/be/77e5426cf5948c82f98c53582009ca9e94938c71f73a8918474f2e2990bb/regex-2026.2.28-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b059e71ec363968671693a78c5053bd9cb2fe410f9b8e4657e88377ebd603a2e", size = 809464, upload-time = "2026-02-28T02:19:12.494Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/0c/b6/b898a8b983190cfa0276031c17beb73cfd1db07c03c8c37f606d80b655e2/regex-2026.2.19-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:74ff212aa61532246bb3036b3dfea62233414b0154b8bc3676975da78383cac3", size = 873696, upload-time = "2026-02-19T19:03:17.848Z" },
|
{ url = "https://files.pythonhosted.org/packages/45/99/2c8c5ac90dc7d05c6e7d8e72c6a3599dc08cd577ac476898e91ca787d7f1/regex-2026.2.28-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b8cf76f1a29f0e99dcfd7aef1551a9827588aae5a737fe31442021165f1920dc", size = 869553, upload-time = "2026-02-28T02:19:15.151Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/1a/98/126ba671d54f19080ec87cad228fb4f3cc387fff8c4a01cb4e93f4ff9d94/regex-2026.2.19-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d00c95a2b6bfeb3ea1cb68d1751b1dfce2b05adc2a72c488d77a780db06ab867", size = 915493, upload-time = "2026-02-19T19:03:20.343Z" },
|
{ url = "https://files.pythonhosted.org/packages/53/34/daa66a342f0271e7737003abf6c3097aa0498d58c668dbd88362ef94eb5d/regex-2026.2.28-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:180e08a435a0319e6a4821c3468da18dc7001987e1c17ae1335488dfe7518dd8", size = 915289, upload-time = "2026-02-28T02:19:17.331Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/b2/10/550c84a1a1a7371867fe8be2bea7df55e797cbca4709974811410e195c5d/regex-2026.2.19-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:311fcccb76af31be4c588d5a17f8f1a059ae8f4b097192896ebffc95612f223a", size = 813094, upload-time = "2026-02-19T19:03:23.287Z" },
|
{ url = "https://files.pythonhosted.org/packages/c5/c7/e22c2aaf0a12e7e22ab19b004bb78d32ca1ecc7ef245949935463c5567de/regex-2026.2.28-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1e496956106fd59ba6322a8ea17141a27c5040e5ee8f9433ae92d4e5204462a0", size = 812156, upload-time = "2026-02-28T02:19:20.011Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/29/fb/ba221d2fc76a27b6b7d7a60f73a7a6a7bac21c6ba95616a08be2bcb434b0/regex-2026.2.19-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:77cfd6b5e7c4e8bf7a39d243ea05882acf5e3c7002b0ef4756de6606893b0ecd", size = 781583, upload-time = "2026-02-19T19:03:26.872Z" },
|
{ url = "https://files.pythonhosted.org/packages/7f/bb/2dc18c1efd9051cf389cd0d7a3a4d90f6804b9fff3a51b5dc3c85b935f71/regex-2026.2.28-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:bba2b18d70eeb7b79950f12f633beeecd923f7c9ad6f6bae28e59b4cb3ab046b", size = 782215, upload-time = "2026-02-28T02:19:22.047Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/26/f1/af79231301297c9e962679efc04a31361b58dc62dec1fc0cb4b8dd95956a/regex-2026.2.19-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:6380f29ff212ec922b6efb56100c089251940e0526a0d05aa7c2d9b571ddf2fe", size = 795875, upload-time = "2026-02-19T19:03:29.223Z" },
|
{ url = "https://files.pythonhosted.org/packages/17/1e/9e4ec9b9013931faa32226ec4aa3c71fe664a6d8a2b91ac56442128b332f/regex-2026.2.28-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:6db7bfae0f8a2793ff1f7021468ea55e2699d0790eb58ee6ab36ae43aa00bc5b", size = 798925, upload-time = "2026-02-28T02:19:24.173Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/a0/90/1e1d76cb0a2d0a4f38a039993e1c5cd971ae50435d751c5bae4f10e1c302/regex-2026.2.19-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:655f553a1fa3ab8a7fd570eca793408b8d26a80bfd89ed24d116baaf13a38969", size = 868916, upload-time = "2026-02-19T19:03:31.415Z" },
|
{ url = "https://files.pythonhosted.org/packages/71/57/a505927e449a9ccb41e2cc8d735e2abe3444b0213d1cf9cb364a8c1f2524/regex-2026.2.28-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:d0b02e8b7e5874b48ae0f077ecca61c1a6a9f9895e9c6dfb191b55b242862033", size = 864701, upload-time = "2026-02-28T02:19:26.376Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/9a/67/a1c01da76dbcfed690855a284c665cc0a370e7d02d1bd635cf9ff7dd74b8/regex-2026.2.19-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:015088b8558502f1f0bccd58754835aa154a7a5b0bd9d4c9b7b96ff4ae9ba876", size = 770386, upload-time = "2026-02-19T19:03:33.972Z" },
|
{ url = "https://files.pythonhosted.org/packages/a6/ad/c62cb60cdd93e13eac5b3d9d6bd5d284225ed0e3329426f94d2552dd7cca/regex-2026.2.28-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:25b6eb660c5cf4b8c3407a1ed462abba26a926cc9965e164268a3267bcc06a43", size = 770899, upload-time = "2026-02-28T02:19:29.38Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/49/6f/94842bf294f432ff3836bfd91032e2ecabea6d284227f12d1f935318c9c4/regex-2026.2.19-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:9e6693b8567a59459b5dda19104c4a4dbbd4a1c78833eacc758796f2cfef1854", size = 855007, upload-time = "2026-02-19T19:03:36.238Z" },
|
{ url = "https://files.pythonhosted.org/packages/3c/5a/874f861f5c3d5ab99633e8030dee1bc113db8e0be299d1f4b07f5b5ec349/regex-2026.2.28-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:5a932ea8ad5d0430351ff9c76c8db34db0d9f53c1d78f06022a21f4e290c5c18", size = 854727, upload-time = "2026-02-28T02:19:31.494Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/ff/93/393cd203ca0d1d368f05ce12d2c7e91a324bc93c240db2e6d5ada05835f4/regex-2026.2.19-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:4071209fd4376ab5ceec72ad3507e9d3517c59e38a889079b98916477a871868", size = 799863, upload-time = "2026-02-19T19:03:38.497Z" },
|
{ url = "https://files.pythonhosted.org/packages/6b/ca/d2c03b0efde47e13db895b975b2be6a73ed90b8ba963677927283d43bf74/regex-2026.2.28-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:1c2c95e1a2b0f89d01e821ff4de1be4b5d73d1f4b0bf679fa27c1ad8d2327f1a", size = 800366, upload-time = "2026-02-28T02:19:34.248Z" },
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -4172,24 +4241,24 @@ wheels = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "ruff"
|
name = "ruff"
|
||||||
version = "0.15.4"
|
version = "0.15.5"
|
||||||
source = { registry = "https://pypi.org/simple" }
|
source = { registry = "https://pypi.org/simple" }
|
||||||
sdist = { url = "https://files.pythonhosted.org/packages/da/31/d6e536cdebb6568ae75a7f00e4b4819ae0ad2640c3604c305a0428680b0c/ruff-0.15.4.tar.gz", hash = "sha256:3412195319e42d634470cc97aa9803d07e9d5c9223b99bcb1518f0c725f26ae1", size = 4569550, upload-time = "2026-02-26T20:04:14.959Z" }
|
sdist = { url = "https://files.pythonhosted.org/packages/77/9b/840e0039e65fcf12758adf684d2289024d6140cde9268cc59887dc55189c/ruff-0.15.5.tar.gz", hash = "sha256:7c3601d3b6d76dce18c5c824fc8d06f4eef33d6df0c21ec7799510cde0f159a2", size = 4574214, upload-time = "2026-03-05T20:06:34.946Z" }
|
||||||
wheels = [
|
wheels = [
|
||||||
{ url = "https://files.pythonhosted.org/packages/f2/82/c11a03cfec3a4d26a0ea1e571f0f44be5993b923f905eeddfc397c13d360/ruff-0.15.4-py3-none-linux_armv6l.whl", hash = "sha256:a1810931c41606c686bae8b5b9a8072adac2f611bb433c0ba476acba17a332e0", size = 10453333, upload-time = "2026-02-26T20:04:20.093Z" },
|
{ url = "https://files.pythonhosted.org/packages/47/20/5369c3ce21588c708bcbe517a8fbe1a8dfdb5dfd5137e14790b1da71612c/ruff-0.15.5-py3-none-linux_armv6l.whl", hash = "sha256:4ae44c42281f42e3b06b988e442d344a5b9b72450ff3c892e30d11b29a96a57c", size = 10478185, upload-time = "2026-03-05T20:06:29.093Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/ce/5d/6a1f271f6e31dffb31855996493641edc3eef8077b883eaf007a2f1c2976/ruff-0.15.4-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:5a1632c66672b8b4d3e1d1782859e98d6e0b4e70829530666644286600a33992", size = 10853356, upload-time = "2026-02-26T20:04:05.808Z" },
|
{ url = "https://files.pythonhosted.org/packages/44/ed/e81dd668547da281e5dce710cf0bc60193f8d3d43833e8241d006720e42b/ruff-0.15.5-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:6edd3792d408ebcf61adabc01822da687579a1a023f297618ac27a5b51ef0080", size = 10859201, upload-time = "2026-03-05T20:06:32.632Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/b1/d8/0fab9f8842b83b1a9c2bf81b85063f65e93fb512e60effa95b0be49bfc54/ruff-0.15.4-py3-none-macosx_11_0_arm64.whl", hash = "sha256:a4386ba2cd6c0f4ff75252845906acc7c7c8e1ac567b7bc3d373686ac8c222ba", size = 10187434, upload-time = "2026-02-26T20:03:54.656Z" },
|
{ url = "https://files.pythonhosted.org/packages/c4/8f/533075f00aaf19b07c5cd6aa6e5d89424b06b3b3f4583bfa9c640a079059/ruff-0.15.5-py3-none-macosx_11_0_arm64.whl", hash = "sha256:89f463f7c8205a9f8dea9d658d59eff49db05f88f89cc3047fb1a02d9f344010", size = 10184752, upload-time = "2026-03-05T20:06:40.312Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/85/cc/cc220fd9394eff5db8d94dec199eec56dd6c9f3651d8869d024867a91030/ruff-0.15.4-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b2496488bdfd3732747558b6f95ae427ff066d1fcd054daf75f5a50674411e75", size = 10535456, upload-time = "2026-02-26T20:03:52.738Z" },
|
{ url = "https://files.pythonhosted.org/packages/66/0e/ba49e2c3fa0395b3152bad634c7432f7edfc509c133b8f4529053ff024fb/ruff-0.15.5-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ba786a8295c6574c1116704cf0b9e6563de3432ac888d8f83685654fe528fd65", size = 10534857, upload-time = "2026-03-05T20:06:19.581Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/fa/0f/bced38fa5cf24373ec767713c8e4cadc90247f3863605fb030e597878661/ruff-0.15.4-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3f1c4893841ff2d54cbda1b2860fa3260173df5ddd7b95d370186f8a5e66a4ac", size = 10287772, upload-time = "2026-02-26T20:04:08.138Z" },
|
{ url = "https://files.pythonhosted.org/packages/59/71/39234440f27a226475a0659561adb0d784b4d247dfe7f43ffc12dd02e288/ruff-0.15.5-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:fd4b801e57955fe9f02b31d20375ab3a5c4415f2e5105b79fb94cf2642c91440", size = 10309120, upload-time = "2026-03-05T20:06:00.435Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/2b/90/58a1802d84fed15f8f281925b21ab3cecd813bde52a8ca033a4de8ab0e7a/ruff-0.15.4-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:820b8766bd65503b6c30aaa6331e8ef3a6e564f7999c844e9a547c40179e440a", size = 11049051, upload-time = "2026-02-26T20:04:03.53Z" },
|
{ url = "https://files.pythonhosted.org/packages/f5/87/4140aa86a93df032156982b726f4952aaec4a883bb98cb6ef73c347da253/ruff-0.15.5-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:391f7c73388f3d8c11b794dbbc2959a5b5afe66642c142a6effa90b45f6f5204", size = 11047428, upload-time = "2026-03-05T20:05:51.867Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/d2/ac/b7ad36703c35f3866584564dc15f12f91cb1a26a897dc2fd13d7cb3ae1af/ruff-0.15.4-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c9fb74bab47139c1751f900f857fa503987253c3ef89129b24ed375e72873e85", size = 11890494, upload-time = "2026-02-26T20:04:10.497Z" },
|
{ url = "https://files.pythonhosted.org/packages/5a/f7/4953e7e3287676f78fbe85e3a0ca414c5ca81237b7575bdadc00229ac240/ruff-0.15.5-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8dc18f30302e379fe1e998548b0f5e9f4dff907f52f73ad6da419ea9c19d66c8", size = 11914251, upload-time = "2026-03-05T20:06:22.887Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/93/3d/3eb2f47a39a8b0da99faf9c54d3eb24720add1e886a5309d4d1be73a6380/ruff-0.15.4-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f80c98765949c518142b3a50a5db89343aa90f2c2bf7799de9986498ae6176db", size = 11326221, upload-time = "2026-02-26T20:04:12.84Z" },
|
{ url = "https://files.pythonhosted.org/packages/77/46/0f7c865c10cf896ccf5a939c3e84e1cfaeed608ff5249584799a74d33835/ruff-0.15.5-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1cc6e7f90087e2d27f98dc34ed1b3ab7c8f0d273cc5431415454e22c0bd2a681", size = 11333801, upload-time = "2026-03-05T20:05:57.168Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/ff/90/bf134f4c1e5243e62690e09d63c55df948a74084c8ac3e48a88468314da6/ruff-0.15.4-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:451a2e224151729b3b6c9ffb36aed9091b2996fe4bdbd11f47e27d8f2e8888ec", size = 11168459, upload-time = "2026-02-26T20:04:00.969Z" },
|
{ url = "https://files.pythonhosted.org/packages/d3/01/a10fe54b653061585e655f5286c2662ebddb68831ed3eaebfb0eb08c0a16/ruff-0.15.5-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c1cb7169f53c1ddb06e71a9aebd7e98fc0fea936b39afb36d8e86d36ecc2636a", size = 11206821, upload-time = "2026-03-05T20:06:03.441Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/b5/e5/a64d27688789b06b5d55162aafc32059bb8c989c61a5139a36e1368285eb/ruff-0.15.4-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:a8f157f2e583c513c4f5f896163a93198297371f34c04220daf40d133fdd4f7f", size = 11104366, upload-time = "2026-02-26T20:03:48.099Z" },
|
{ url = "https://files.pythonhosted.org/packages/7a/0d/2132ceaf20c5e8699aa83da2706ecb5c5dcdf78b453f77edca7fb70f8a93/ruff-0.15.5-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:9b037924500a31ee17389b5c8c4d88874cc6ea8e42f12e9c61a3d754ff72f1ca", size = 11133326, upload-time = "2026-03-05T20:06:25.655Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/f1/f6/32d1dcb66a2559763fc3027bdd65836cad9eb09d90f2ed6a63d8e9252b02/ruff-0.15.4-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:917cc68503357021f541e69b35361c99387cdbbf99bd0ea4aa6f28ca99ff5338", size = 10510887, upload-time = "2026-02-26T20:03:45.771Z" },
|
{ url = "https://files.pythonhosted.org/packages/72/cb/2e5259a7eb2a0f87c08c0fe5bf5825a1e4b90883a52685524596bfc93072/ruff-0.15.5-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:65bb414e5b4eadd95a8c1e4804f6772bbe8995889f203a01f77ddf2d790929dd", size = 10510820, upload-time = "2026-03-05T20:06:37.79Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/ff/92/22d1ced50971c5b6433aed166fcef8c9343f567a94cf2b9d9089f6aa80fe/ruff-0.15.4-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:e9737c8161da79fd7cfec19f1e35620375bd8b2a50c3e77fa3d2c16f574105cc", size = 10285939, upload-time = "2026-02-26T20:04:22.42Z" },
|
{ url = "https://files.pythonhosted.org/packages/ff/20/b67ce78f9e6c59ffbdb5b4503d0090e749b5f2d31b599b554698a80d861c/ruff-0.15.5-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:d20aa469ae3b57033519c559e9bc9cd9e782842e39be05b50e852c7c981fa01d", size = 10302395, upload-time = "2026-03-05T20:05:54.504Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/e6/f4/7c20aec3143837641a02509a4668fb146a642fd1211846634edc17eb5563/ruff-0.15.4-py3-none-musllinux_1_2_i686.whl", hash = "sha256:291258c917539e18f6ba40482fe31d6f5ac023994ee11d7bdafd716f2aab8a68", size = 10765471, upload-time = "2026-02-26T20:03:58.924Z" },
|
{ url = "https://files.pythonhosted.org/packages/5f/e5/719f1acccd31b720d477751558ed74e9c88134adcc377e5e886af89d3072/ruff-0.15.5-py3-none-musllinux_1_2_i686.whl", hash = "sha256:15388dd28c9161cdb8eda68993533acc870aa4e646a0a277aa166de9ad5a8752", size = 10754069, upload-time = "2026-03-05T20:06:06.422Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/d0/09/6d2f7586f09a16120aebdff8f64d962d7c4348313c77ebb29c566cefc357/ruff-0.15.4-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:3f83c45911da6f2cd5936c436cf86b9f09f09165f033a99dcf7477e34041cbc3", size = 11263382, upload-time = "2026-02-26T20:04:24.424Z" },
|
{ url = "https://files.pythonhosted.org/packages/c3/9c/d1db14469e32d98f3ca27079dbd30b7b44dbb5317d06ab36718dee3baf03/ruff-0.15.5-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:b30da330cbd03bed0c21420b6b953158f60c74c54c5f4c1dabbdf3a57bf355d2", size = 11304315, upload-time = "2026-03-05T20:06:10.867Z" },
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
|||||||
Reference in New Issue
Block a user