Compare commits

..

3 Commits

Author SHA1 Message Date
Trenton H
3ae0b8e219 Fixes logging so I can see it 2026-03-06 12:04:54 -08:00
Trenton H
4a6fd02492 Batch based iteration and bulk updates, with chunked file reading 2026-03-06 11:44:41 -08:00
Trenton H
76fb8f3770 Transitions to SHA256 based checksums 2026-03-06 11:33:33 -08:00
64 changed files with 1549 additions and 2790 deletions

View File

@@ -3,9 +3,21 @@ on:
push:
branches-ignore:
- 'translations**'
paths:
- 'src/**'
- 'pyproject.toml'
- 'uv.lock'
- 'docker/compose/docker-compose.ci-test.yml'
- '.github/workflows/ci-backend.yml'
pull_request:
branches-ignore:
- 'translations**'
paths:
- 'src/**'
- 'pyproject.toml'
- 'uv.lock'
- 'docker/compose/docker-compose.ci-test.yml'
- '.github/workflows/ci-backend.yml'
workflow_dispatch:
concurrency:
group: backend-${{ github.event.pull_request.number || github.ref }}
@@ -14,55 +26,7 @@ env:
DEFAULT_UV_VERSION: "0.10.x"
NLTK_DATA: "/usr/share/nltk_data"
jobs:
changes:
name: Detect Backend Changes
runs-on: ubuntu-slim
outputs:
backend_changed: ${{ steps.force.outputs.run_all == 'true' || steps.filter.outputs.backend == 'true' }}
steps:
- name: Checkout
uses: actions/checkout@v6.0.2
with:
fetch-depth: 0
- name: Decide run mode
id: force
run: |
if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then
echo "run_all=true" >> "$GITHUB_OUTPUT"
elif [[ "${{ github.event_name }}" == "push" && ( "${{ github.ref_name }}" == "main" || "${{ github.ref_name }}" == "dev" ) ]]; then
echo "run_all=true" >> "$GITHUB_OUTPUT"
else
echo "run_all=false" >> "$GITHUB_OUTPUT"
fi
- name: Set diff range
id: range
if: steps.force.outputs.run_all != 'true'
run: |
if [[ "${{ github.event_name }}" == "pull_request" ]]; then
echo "base=${{ github.event.pull_request.base.sha }}" >> "$GITHUB_OUTPUT"
elif [[ "${{ github.event.created }}" == "true" ]]; then
echo "base=origin/${{ github.event.repository.default_branch }}" >> "$GITHUB_OUTPUT"
else
echo "base=${{ github.event.before }}" >> "$GITHUB_OUTPUT"
fi
echo "ref=${{ github.sha }}" >> "$GITHUB_OUTPUT"
- name: Detect changes
id: filter
if: steps.force.outputs.run_all != 'true'
uses: dorny/paths-filter@v3.0.2
with:
base: ${{ steps.range.outputs.base }}
ref: ${{ steps.range.outputs.ref }}
filters: |
backend:
- 'src/**'
- 'pyproject.toml'
- 'uv.lock'
- 'docker/compose/docker-compose.ci-test.yml'
- '.github/workflows/ci-backend.yml'
test:
needs: changes
if: needs.changes.outputs.backend_changed == 'true'
name: "Python ${{ matrix.python-version }}"
runs-on: ubuntu-24.04
strategy:
@@ -136,8 +100,6 @@ jobs:
docker compose --file docker/compose/docker-compose.ci-test.yml logs
docker compose --file docker/compose/docker-compose.ci-test.yml down
typing:
needs: changes
if: needs.changes.outputs.backend_changed == 'true'
name: Check project typing
runs-on: ubuntu-24.04
env:
@@ -188,27 +150,3 @@ jobs:
--show-error-codes \
--warn-unused-configs \
src/ | uv run mypy-baseline filter
gate:
name: Backend CI Gate
needs: [changes, test, typing]
if: always()
runs-on: ubuntu-slim
steps:
- name: Check gate
run: |
if [[ "${{ needs.changes.outputs.backend_changed }}" != "true" ]]; then
echo "No backend-relevant changes detected."
exit 0
fi
if [[ "${{ needs.test.result }}" != "success" ]]; then
echo "::error::Backend test job result: ${{ needs.test.result }}"
exit 1
fi
if [[ "${{ needs.typing.result }}" != "success" ]]; then
echo "::error::Backend typing job result: ${{ needs.typing.result }}"
exit 1
fi
echo "Backend checks passed."

View File

@@ -149,16 +149,15 @@ jobs:
mkdir -p /tmp/digests
digest="${{ steps.build.outputs.digest }}"
echo "digest=${digest}"
echo "${digest}" > "/tmp/digests/digest-${{ matrix.arch }}.txt"
touch "/tmp/digests/${digest#sha256:}"
- name: Upload digest
if: steps.check-push.outputs.should-push == 'true'
uses: actions/upload-artifact@v7.0.0
with:
name: digests-${{ matrix.arch }}
path: /tmp/digests/digest-${{ matrix.arch }}.txt
path: /tmp/digests/*
if-no-files-found: error
retention-days: 1
archive: false
merge-and-push:
name: Merge and Push Manifest
runs-on: ubuntu-24.04
@@ -172,7 +171,7 @@ jobs:
uses: actions/download-artifact@v8.0.0
with:
path: /tmp/digests
pattern: digest-*.txt
pattern: digests-*
merge-multiple: true
- name: List digests
run: |
@@ -218,9 +217,8 @@ jobs:
tags=$(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "${DOCKER_METADATA_OUTPUT_JSON}")
digests=""
for digest_file in digest-*.txt; do
digest=$(cat "${digest_file}")
digests+="${{ env.REGISTRY }}/${REPOSITORY}@${digest} "
for digest in *; do
digests+="${{ env.REGISTRY }}/${REPOSITORY}@sha256:${digest} "
done
echo "Creating manifest with tags: ${tags}"

View File

@@ -1,9 +1,22 @@
name: Documentation
on:
push:
branches-ignore:
- 'translations**'
branches:
- main
- dev
paths:
- 'docs/**'
- 'zensical.toml'
- 'pyproject.toml'
- 'uv.lock'
- '.github/workflows/ci-docs.yml'
pull_request:
paths:
- 'docs/**'
- 'zensical.toml'
- 'pyproject.toml'
- 'uv.lock'
- '.github/workflows/ci-docs.yml'
workflow_dispatch:
concurrency:
group: docs-${{ github.event.pull_request.number || github.ref }}
@@ -16,55 +29,7 @@ env:
DEFAULT_UV_VERSION: "0.10.x"
DEFAULT_PYTHON_VERSION: "3.12"
jobs:
changes:
name: Detect Docs Changes
runs-on: ubuntu-slim
outputs:
docs_changed: ${{ steps.force.outputs.run_all == 'true' || steps.filter.outputs.docs == 'true' }}
steps:
- name: Checkout
uses: actions/checkout@v6.0.2
with:
fetch-depth: 0
- name: Decide run mode
id: force
run: |
if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then
echo "run_all=true" >> "$GITHUB_OUTPUT"
elif [[ "${{ github.event_name }}" == "push" && ( "${{ github.ref_name }}" == "main" || "${{ github.ref_name }}" == "dev" ) ]]; then
echo "run_all=true" >> "$GITHUB_OUTPUT"
else
echo "run_all=false" >> "$GITHUB_OUTPUT"
fi
- name: Set diff range
id: range
if: steps.force.outputs.run_all != 'true'
run: |
if [[ "${{ github.event_name }}" == "pull_request" ]]; then
echo "base=${{ github.event.pull_request.base.sha }}" >> "$GITHUB_OUTPUT"
elif [[ "${{ github.event.created }}" == "true" ]]; then
echo "base=origin/${{ github.event.repository.default_branch }}" >> "$GITHUB_OUTPUT"
else
echo "base=${{ github.event.before }}" >> "$GITHUB_OUTPUT"
fi
echo "ref=${{ github.sha }}" >> "$GITHUB_OUTPUT"
- name: Detect changes
id: filter
if: steps.force.outputs.run_all != 'true'
uses: dorny/paths-filter@v3.0.2
with:
base: ${{ steps.range.outputs.base }}
ref: ${{ steps.range.outputs.ref }}
filters: |
docs:
- 'docs/**'
- 'zensical.toml'
- 'pyproject.toml'
- 'uv.lock'
- '.github/workflows/ci-docs.yml'
build:
needs: changes
if: needs.changes.outputs.docs_changed == 'true'
name: Build Documentation
runs-on: ubuntu-24.04
steps:
@@ -99,8 +64,8 @@ jobs:
name: github-pages-${{ github.run_id }}-${{ github.run_attempt }}
deploy:
name: Deploy Documentation
needs: [changes, build]
if: github.event_name == 'push' && github.ref == 'refs/heads/main' && needs.changes.outputs.docs_changed == 'true'
needs: build
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
runs-on: ubuntu-24.04
environment:
name: github-pages
@@ -111,22 +76,3 @@ jobs:
id: deployment
with:
artifact_name: github-pages-${{ github.run_id }}-${{ github.run_attempt }}
gate:
name: Docs CI Gate
needs: [changes, build]
if: always()
runs-on: ubuntu-slim
steps:
- name: Check gate
run: |
if [[ "${{ needs.changes.outputs.docs_changed }}" != "true" ]]; then
echo "No docs-relevant changes detected."
exit 0
fi
if [[ "${{ needs.build.result }}" != "success" ]]; then
echo "::error::Docs build job result: ${{ needs.build.result }}"
exit 1
fi
echo "Docs checks passed."

View File

@@ -3,60 +3,21 @@ on:
push:
branches-ignore:
- 'translations**'
paths:
- 'src-ui/**'
- '.github/workflows/ci-frontend.yml'
pull_request:
branches-ignore:
- 'translations**'
paths:
- 'src-ui/**'
- '.github/workflows/ci-frontend.yml'
workflow_dispatch:
concurrency:
group: frontend-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
changes:
name: Detect Frontend Changes
runs-on: ubuntu-slim
outputs:
frontend_changed: ${{ steps.force.outputs.run_all == 'true' || steps.filter.outputs.frontend == 'true' }}
steps:
- name: Checkout
uses: actions/checkout@v6.0.2
with:
fetch-depth: 0
- name: Decide run mode
id: force
run: |
if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then
echo "run_all=true" >> "$GITHUB_OUTPUT"
elif [[ "${{ github.event_name }}" == "push" && ( "${{ github.ref_name }}" == "main" || "${{ github.ref_name }}" == "dev" ) ]]; then
echo "run_all=true" >> "$GITHUB_OUTPUT"
else
echo "run_all=false" >> "$GITHUB_OUTPUT"
fi
- name: Set diff range
id: range
if: steps.force.outputs.run_all != 'true'
run: |
if [[ "${{ github.event_name }}" == "pull_request" ]]; then
echo "base=${{ github.event.pull_request.base.sha }}" >> "$GITHUB_OUTPUT"
elif [[ "${{ github.event.created }}" == "true" ]]; then
echo "base=origin/${{ github.event.repository.default_branch }}" >> "$GITHUB_OUTPUT"
else
echo "base=${{ github.event.before }}" >> "$GITHUB_OUTPUT"
fi
echo "ref=${{ github.sha }}" >> "$GITHUB_OUTPUT"
- name: Detect changes
id: filter
if: steps.force.outputs.run_all != 'true'
uses: dorny/paths-filter@v3.0.2
with:
base: ${{ steps.range.outputs.base }}
ref: ${{ steps.range.outputs.ref }}
filters: |
frontend:
- 'src-ui/**'
- '.github/workflows/ci-frontend.yml'
install-dependencies:
needs: changes
if: needs.changes.outputs.frontend_changed == 'true'
name: Install Dependencies
runs-on: ubuntu-24.04
steps:
@@ -84,8 +45,7 @@ jobs:
run: cd src-ui && pnpm install
lint:
name: Lint
needs: [changes, install-dependencies]
if: needs.changes.outputs.frontend_changed == 'true'
needs: install-dependencies
runs-on: ubuntu-24.04
steps:
- name: Checkout
@@ -113,8 +73,7 @@ jobs:
run: cd src-ui && pnpm run lint
unit-tests:
name: "Unit Tests (${{ matrix.shard-index }}/${{ matrix.shard-count }})"
needs: [changes, install-dependencies]
if: needs.changes.outputs.frontend_changed == 'true'
needs: install-dependencies
runs-on: ubuntu-24.04
strategy:
fail-fast: false
@@ -160,8 +119,7 @@ jobs:
directory: src-ui/coverage/
e2e-tests:
name: "E2E Tests (${{ matrix.shard-index }}/${{ matrix.shard-count }})"
needs: [changes, install-dependencies]
if: needs.changes.outputs.frontend_changed == 'true'
needs: install-dependencies
runs-on: ubuntu-24.04
container: mcr.microsoft.com/playwright:v1.58.2-noble
env:
@@ -201,8 +159,7 @@ jobs:
run: cd src-ui && pnpm exec playwright test --shard ${{ matrix.shard-index }}/${{ matrix.shard-count }}
bundle-analysis:
name: Bundle Analysis
needs: [changes, unit-tests, e2e-tests]
if: needs.changes.outputs.frontend_changed == 'true'
needs: [unit-tests, e2e-tests]
runs-on: ubuntu-24.04
steps:
- name: Checkout
@@ -232,42 +189,3 @@ jobs:
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
run: cd src-ui && pnpm run build --configuration=production
gate:
name: Frontend CI Gate
needs: [changes, install-dependencies, lint, unit-tests, e2e-tests, bundle-analysis]
if: always()
runs-on: ubuntu-slim
steps:
- name: Check gate
run: |
if [[ "${{ needs.changes.outputs.frontend_changed }}" != "true" ]]; then
echo "No frontend-relevant changes detected."
exit 0
fi
if [[ "${{ needs['install-dependencies'].result }}" != "success" ]]; then
echo "::error::Frontend install job result: ${{ needs['install-dependencies'].result }}"
exit 1
fi
if [[ "${{ needs.lint.result }}" != "success" ]]; then
echo "::error::Frontend lint job result: ${{ needs.lint.result }}"
exit 1
fi
if [[ "${{ needs['unit-tests'].result }}" != "success" ]]; then
echo "::error::Frontend unit-tests job result: ${{ needs['unit-tests'].result }}"
exit 1
fi
if [[ "${{ needs['e2e-tests'].result }}" != "success" ]]; then
echo "::error::Frontend e2e-tests job result: ${{ needs['e2e-tests'].result }}"
exit 1
fi
if [[ "${{ needs['bundle-analysis'].result }}" != "success" ]]; then
echo "::error::Frontend bundle-analysis job result: ${{ needs['bundle-analysis'].result }}"
exit 1
fi
echo "Frontend checks passed."

View File

@@ -2,24 +2,13 @@ name: PR Bot
on:
pull_request_target:
types: [opened]
permissions:
contents: read
pull-requests: write
jobs:
anti-slop:
runs-on: ubuntu-latest
permissions:
contents: read
issues: read
pull-requests: write
steps:
- uses: peakoss/anti-slop@v0.2.1
with:
max-failures: 4
failure-add-pr-labels: 'ai'
pr-bot:
name: Automated PR Bot
runs-on: ubuntu-latest
permissions:
contents: read
pull-requests: write
steps:
- name: Label PR by file path or branch name
# see .github/labeler.yml for the labeler config

View File

@@ -305,16 +305,52 @@ The following methods are supported:
- `"merge": true or false` (defaults to false)
- The `merge` flag determines if the supplied permissions will overwrite all existing permissions (including
removing them) or be merged with existing permissions.
- `edit_pdf`
- Requires `parameters`:
- `"doc_ids": [DOCUMENT_ID]` A list of a single document ID to edit.
- `"operations": [OPERATION, ...]` A list of operations to perform on the documents. Each operation is a dictionary
with the following keys:
- `"page": PAGE_NUMBER` The page number to edit (1-based).
- `"rotate": DEGREES` Optional rotation in degrees (90, 180, 270).
- `"doc": OUTPUT_DOCUMENT_INDEX` Optional index of the output document for split operations.
- Optional `parameters`:
- `"delete_original": true` to delete the original documents after editing.
- `"update_document": true` to add the edited PDF as a new version of the root document.
- `"include_metadata": true` to copy metadata from the original document to the edited document.
- `remove_password`
- Requires `parameters`:
- `"password": "PASSWORD_STRING"` The password to remove from the PDF documents.
- Optional `parameters`:
- `"update_document": true` to add the password-less PDF as a new version of the root document.
- `"delete_original": true` to delete the original document after editing.
- `"include_metadata": true` to copy metadata from the original document to the new password-less document.
- `merge`
- No additional `parameters` required.
- The ordering of the merged document is determined by the list of IDs.
- Optional `parameters`:
- `"metadata_document_id": DOC_ID` apply metadata (tags, correspondent, etc.) from this document to the merged document.
- `"delete_originals": true` to delete the original documents. This requires the calling user being the owner of
all documents that are merged.
- `split`
- Requires `parameters`:
- `"pages": [..]` The list should be a list of pages and/or a ranges, separated by commas e.g. `"[1,2-3,4,5-7]"`
- Optional `parameters`:
- `"delete_originals": true` to delete the original document after consumption. This requires the calling user being the owner of
the document.
- The split operation only accepts a single document.
- `rotate`
- Requires `parameters`:
- `"degrees": DEGREES`. Must be an integer i.e. 90, 180, 270
- `delete_pages`
- Requires `parameters`:
- `"pages": [..]` The list should be a list of integers e.g. `"[2,3,4]"`
- The delete_pages operation only accepts a single document.
- `modify_custom_fields`
- Requires `parameters`:
- `"add_custom_fields": { CUSTOM_FIELD_ID: VALUE }`: JSON object consisting of custom field id:value pairs to add to the document, can also be a list of custom field IDs
to add with empty values.
- `"remove_custom_fields": [CUSTOM_FIELD_ID]`: custom field ids to remove from the document.
#### Document-editing operations
Beginning with version 10+, the API supports individual endpoints for document-editing operations (`merge`, `rotate`, `edit_pdf`, etc), thus their documentation can be found in the API spec / viewer. Legacy document-editing methods via `/api/documents/bulk_edit/` are still supported for compatibility, are deprecated and clients should migrate to the individual endpoints before they are removed in a future version.
### Objects
Bulk editing for objects (tags, document types etc.) currently supports set permissions or delete
@@ -434,9 +470,4 @@ Initial API version.
#### Version 10
- The `show_on_dashboard` and `show_in_sidebar` fields of saved views have been
removed. Relevant settings are now stored in the UISettings model. Compatibility is maintained
for versions < 10 until support for API v9 is dropped.
- Document-editing operations such as `merge`, `rotate`, and `edit_pdf` have been
moved from the bulk edit endpoint to their own individual endpoints. Using these methods via
the bulk edit endpoint is still supported for compatibility with versions < 10 until support
for API v9 is dropped.
removed. Relevant settings are now stored in the UISettings model.

View File

@@ -75,13 +75,13 @@ first-time setup.
4. Install the Python dependencies:
```bash
uv sync --group dev
$ uv sync --group dev
```
5. Install pre-commit hooks:
```bash
uv run prek install
$ uv run prek install
```
6. Apply migrations and create a superuser (also can be done via the web UI) for your development instance:
@@ -89,8 +89,8 @@ first-time setup.
```bash
# src/
uv run manage.py migrate
uv run manage.py createsuperuser
$ uv run manage.py migrate
$ uv run manage.py createsuperuser
```
7. You can now either ...
@@ -103,7 +103,7 @@ first-time setup.
- spin up a bare Redis container
```bash
```
docker run -d -p 6379:6379 --restart unless-stopped redis:latest
```
@@ -118,18 +118,18 @@ work well for development, but you can use whatever you want.
Configure the IDE to use the `src/`-folder as the base source folder.
Configure the following launch configurations in your IDE:
- `uv run manage.py runserver`
- `uv run manage.py document_consumer`
- `uv run celery --app paperless worker -l DEBUG` (or any other log level)
- `python3 manage.py runserver`
- `python3 manage.py document_consumer`
- `celery --app paperless worker -l DEBUG` (or any other log level)
To start them all:
```bash
# src/
uv run manage.py runserver & \
uv run manage.py document_consumer & \
uv run celery --app paperless worker -l DEBUG
$ python3 manage.py runserver & \
python3 manage.py document_consumer & \
celery --app paperless worker -l DEBUG
```
You might need the front end to test your back end code.
@@ -140,8 +140,8 @@ To build the front end once use this command:
```bash
# src-ui/
pnpm install
pnpm ng build --configuration production
$ pnpm install
$ ng build --configuration production
```
### Testing
@@ -199,7 +199,7 @@ The front end is built using AngularJS. In order to get started, you need Node.j
4. You can launch a development server by running:
```bash
pnpm ng serve
ng serve
```
This will automatically update whenever you save. However, in-place
@@ -217,21 +217,21 @@ commit. See [above](#code-formatting-with-pre-commit-hooks) for installation ins
command such as
```bash
git ls-files -- '*.ts' | xargs uv run prek run prettier --files
$ git ls-files -- '*.ts' | xargs prek run prettier --files
```
Front end testing uses Jest and Playwright. Unit tests and e2e tests,
respectively, can be run non-interactively with:
```bash
pnpm ng test
pnpm playwright test
$ ng test
$ npx playwright test
```
Playwright also includes a UI which can be run with:
```bash
pnpm playwright test --ui
$ npx playwright test --ui
```
### Building the frontend
@@ -239,7 +239,7 @@ pnpm playwright test --ui
In order to build the front end and serve it as part of Django, execute:
```bash
pnpm ng build --configuration production
$ ng build --configuration production
```
This will build the front end and put it in a location from which the
@@ -312,10 +312,10 @@ end (such as error messages).
- The source language of the project is "en_US".
- Localization files end up in the folder `src/locale/`.
- In order to extract strings from the application, call
`uv run manage.py makemessages -l en_US`. This is important after
`python3 manage.py makemessages -l en_US`. This is important after
making changes to translatable strings.
- The message files need to be compiled for them to show up in the
application. Call `uv run manage.py compilemessages` to do this.
application. Call `python3 manage.py compilemessages` to do this.
The generated files don't get committed into git, since these are
derived artifacts. The build pipeline takes care of executing this
command.

View File

@@ -1217,7 +1217,7 @@
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1760</context>
<context context-type="linenumber">1756</context>
</context-group>
</trans-unit>
<trans-unit id="1577733187050997705" datatype="html">
@@ -2090,7 +2090,7 @@
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">637</context>
<context context-type="linenumber">634</context>
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-version-dropdown/document-version-dropdown.component.html</context>
@@ -2798,11 +2798,11 @@
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1379</context>
<context context-type="linenumber">1376</context>
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1761</context>
<context context-type="linenumber">1757</context>
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
@@ -3400,7 +3400,7 @@
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1332</context>
<context context-type="linenumber">1329</context>
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
@@ -3434,46 +3434,39 @@
<context context-type="linenumber">9</context>
</context-group>
</trans-unit>
<trans-unit id="6705735915615634619" datatype="html">
<source>{VAR_PLURAL, plural, =1 {One page} other {<x id="INTERPOLATION"/> pages}}</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/common/confirm-dialog/merge-confirm-dialog/merge-confirm-dialog.component.html</context>
<context context-type="linenumber">25</context>
</context-group>
</trans-unit>
<trans-unit id="7508164375697837821" datatype="html">
<source>Use metadata from:</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/common/confirm-dialog/merge-confirm-dialog/merge-confirm-dialog.component.html</context>
<context context-type="linenumber">34</context>
<context context-type="linenumber">22</context>
</context-group>
</trans-unit>
<trans-unit id="2020403212524346652" datatype="html">
<source>Regenerate all metadata</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/common/confirm-dialog/merge-confirm-dialog/merge-confirm-dialog.component.html</context>
<context context-type="linenumber">36</context>
<context context-type="linenumber">24</context>
</context-group>
</trans-unit>
<trans-unit id="2710430925353472741" datatype="html">
<source>Try to include archive version in merge for non-PDF files</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/common/confirm-dialog/merge-confirm-dialog/merge-confirm-dialog.component.html</context>
<context context-type="linenumber">44</context>
<context context-type="linenumber">32</context>
</context-group>
</trans-unit>
<trans-unit id="5612366187076076264" datatype="html">
<source>Delete original documents after successful merge</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/common/confirm-dialog/merge-confirm-dialog/merge-confirm-dialog.component.html</context>
<context context-type="linenumber">48</context>
<context context-type="linenumber">36</context>
</context-group>
</trans-unit>
<trans-unit id="5138283234724909648" datatype="html">
<source>Note that only PDFs will be included.</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/common/confirm-dialog/merge-confirm-dialog/merge-confirm-dialog.component.html</context>
<context context-type="linenumber">51</context>
<context context-type="linenumber">39</context>
</context-group>
</trans-unit>
<trans-unit id="1309641780471803652" datatype="html">
@@ -3512,7 +3505,7 @@
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1814</context>
<context context-type="linenumber">1808</context>
</context-group>
</trans-unit>
<trans-unit id="6661109599266152398" datatype="html">
@@ -3523,7 +3516,7 @@
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1815</context>
<context context-type="linenumber">1809</context>
</context-group>
</trans-unit>
<trans-unit id="5162686434580248853" datatype="html">
@@ -3534,7 +3527,7 @@
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1816</context>
<context context-type="linenumber">1810</context>
</context-group>
</trans-unit>
<trans-unit id="8157388568390631653" datatype="html">
@@ -5495,7 +5488,7 @@
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1336</context>
<context context-type="linenumber">1333</context>
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
@@ -7702,81 +7695,81 @@
<source>Error retrieving metadata</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">411</context>
<context context-type="linenumber">408</context>
</context-group>
</trans-unit>
<trans-unit id="2218903673684131427" datatype="html">
<source>An error occurred loading content: <x id="PH" equiv-text="err.message ?? err.toString()"/></source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">512,514</context>
<context context-type="linenumber">509,511</context>
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">959,961</context>
<context context-type="linenumber">956,958</context>
</context-group>
</trans-unit>
<trans-unit id="6357361810318120957" datatype="html">
<source>Document was updated</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">632</context>
<context context-type="linenumber">629</context>
</context-group>
</trans-unit>
<trans-unit id="5154064822428631306" datatype="html">
<source>Document was updated at <x id="PH" equiv-text="formattedModified"/>.</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">633</context>
<context context-type="linenumber">630</context>
</context-group>
</trans-unit>
<trans-unit id="8462497568316256794" datatype="html">
<source>Reload to discard your local unsaved edits and load the latest remote version.</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">634</context>
<context context-type="linenumber">631</context>
</context-group>
</trans-unit>
<trans-unit id="7967484035994732534" datatype="html">
<source>Reload</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">636</context>
<context context-type="linenumber">633</context>
</context-group>
</trans-unit>
<trans-unit id="2907037627372942104" datatype="html">
<source>Document reloaded with latest changes.</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">692</context>
<context context-type="linenumber">689</context>
</context-group>
</trans-unit>
<trans-unit id="6435639868943916539" datatype="html">
<source>Document reloaded.</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">703</context>
<context context-type="linenumber">700</context>
</context-group>
</trans-unit>
<trans-unit id="6142395741265832184" datatype="html">
<source>Next document</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">805</context>
<context context-type="linenumber">802</context>
</context-group>
</trans-unit>
<trans-unit id="651985345816518480" datatype="html">
<source>Previous document</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">815</context>
<context context-type="linenumber">812</context>
</context-group>
</trans-unit>
<trans-unit id="2885986061416655600" datatype="html">
<source>Close document</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">823</context>
<context context-type="linenumber">820</context>
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/services/open-documents.service.ts</context>
@@ -7787,67 +7780,67 @@
<source>Save document</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">830</context>
<context context-type="linenumber">827</context>
</context-group>
</trans-unit>
<trans-unit id="1784543155727940353" datatype="html">
<source>Save and close / next</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">839</context>
<context context-type="linenumber">836</context>
</context-group>
</trans-unit>
<trans-unit id="7427704425579737895" datatype="html">
<source>Error retrieving version content</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">943</context>
<context context-type="linenumber">940</context>
</context-group>
</trans-unit>
<trans-unit id="3456881259945295697" datatype="html">
<source>Error retrieving suggestions.</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1000</context>
<context context-type="linenumber">997</context>
</context-group>
</trans-unit>
<trans-unit id="2194092841814123758" datatype="html">
<source>Document &quot;<x id="PH" equiv-text="newValues.title"/>&quot; saved successfully.</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1212</context>
<context context-type="linenumber">1209</context>
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1239</context>
<context context-type="linenumber">1236</context>
</context-group>
</trans-unit>
<trans-unit id="6626387786259219838" datatype="html">
<source>Error saving document &quot;<x id="PH" equiv-text="this.document.title"/>&quot;</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1245</context>
<context context-type="linenumber">1242</context>
</context-group>
</trans-unit>
<trans-unit id="448882439049417053" datatype="html">
<source>Error saving document</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1300</context>
<context context-type="linenumber">1297</context>
</context-group>
</trans-unit>
<trans-unit id="8410796510716511826" datatype="html">
<source>Do you really want to move the document &quot;<x id="PH" equiv-text="this.document.title"/>&quot; to the trash?</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1333</context>
<context context-type="linenumber">1330</context>
</context-group>
</trans-unit>
<trans-unit id="282586936710748252" datatype="html">
<source>Documents can be restored prior to permanent deletion.</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1334</context>
<context context-type="linenumber">1331</context>
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
@@ -7858,14 +7851,14 @@
<source>Error deleting document</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1355</context>
<context context-type="linenumber">1352</context>
</context-group>
</trans-unit>
<trans-unit id="619486176823357521" datatype="html">
<source>Reprocess confirm</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1375</context>
<context context-type="linenumber">1372</context>
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
@@ -7876,102 +7869,102 @@
<source>This operation will permanently recreate the archive file for this document.</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1376</context>
<context context-type="linenumber">1373</context>
</context-group>
</trans-unit>
<trans-unit id="302054111564709516" datatype="html">
<source>The archive file will be re-generated with the current settings.</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1377</context>
<context context-type="linenumber">1374</context>
</context-group>
</trans-unit>
<trans-unit id="4700389117298802932" datatype="html">
<source>Reprocess operation for &quot;<x id="PH" equiv-text="this.document.title"/>&quot; will begin in the background.</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1387</context>
<context context-type="linenumber">1384</context>
</context-group>
</trans-unit>
<trans-unit id="4409560272830824468" datatype="html">
<source>Error executing operation</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1398</context>
<context context-type="linenumber">1395</context>
</context-group>
</trans-unit>
<trans-unit id="6030453331794586802" datatype="html">
<source>Error downloading document</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1461</context>
<context context-type="linenumber">1458</context>
</context-group>
</trans-unit>
<trans-unit id="4458954481601077369" datatype="html">
<source>Page Fit</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1541</context>
<context context-type="linenumber">1538</context>
</context-group>
</trans-unit>
<trans-unit id="4663705961777238777" datatype="html">
<source>PDF edit operation for &quot;<x id="PH" equiv-text="this.document.title"/>&quot; will begin in the background.</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1781</context>
<context context-type="linenumber">1775</context>
</context-group>
</trans-unit>
<trans-unit id="9043972994040261999" datatype="html">
<source>Error executing PDF edit operation</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1793</context>
<context context-type="linenumber">1787</context>
</context-group>
</trans-unit>
<trans-unit id="6172690334763056188" datatype="html">
<source>Please enter the current password before attempting to remove it.</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1804</context>
<context context-type="linenumber">1798</context>
</context-group>
</trans-unit>
<trans-unit id="968660764814228922" datatype="html">
<source>Password removal operation for &quot;<x id="PH" equiv-text="this.document.title"/>&quot; will begin in the background.</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1838</context>
<context context-type="linenumber">1830</context>
</context-group>
</trans-unit>
<trans-unit id="2282118435712883014" datatype="html">
<source>Error executing password removal operation</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1852</context>
<context context-type="linenumber">1844</context>
</context-group>
</trans-unit>
<trans-unit id="3740891324955700797" datatype="html">
<source>Print failed.</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1891</context>
<context context-type="linenumber">1883</context>
</context-group>
</trans-unit>
<trans-unit id="6457245677384603573" datatype="html">
<source>Error loading document for printing.</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1903</context>
<context context-type="linenumber">1895</context>
</context-group>
</trans-unit>
<trans-unit id="6085793215710522488" datatype="html">
<source>An error occurred loading tiff: <x id="PH" equiv-text="err.toString()"/></source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1968</context>
<context context-type="linenumber">1960</context>
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1972</context>
<context context-type="linenumber">1964</context>
</context-group>
</trans-unit>
<trans-unit id="4958946940233632319" datatype="html">

View File

@@ -10,22 +10,10 @@
<ul class="list-group"
cdkDropList
(cdkDropListDropped)="onDrop($event)">
@for (document of documents; track document.id) {
<li class="list-group-item d-flex align-items-center" cdkDrag>
@for (documentID of documentIDs; track documentID) {
<li class="list-group-item" cdkDrag>
<i-bs name="grip-vertical" class="me-2"></i-bs>
<div class="d-flex flex-column">
<div>
@if (document.correspondent) {
<b>{{document.correspondent | correspondentName | async}}: </b>
}{{document.title}}
</div>
<small class="text-muted">
{{document.created | customDate:'mediumDate'}}
@if (document.page_count) {
| {document.page_count, plural, =1 {One page} other {{{document.page_count}} pages}}
}
</small>
</div>
{{getDocument(documentID)?.title}}
</li>
}
</ul>

View File

@@ -3,14 +3,11 @@ import {
DragDropModule,
moveItemInArray,
} from '@angular/cdk/drag-drop'
import { AsyncPipe } from '@angular/common'
import { Component, OnInit, inject } from '@angular/core'
import { FormsModule, ReactiveFormsModule } from '@angular/forms'
import { NgxBootstrapIconsModule } from 'ngx-bootstrap-icons'
import { takeUntil } from 'rxjs'
import { Document } from 'src/app/data/document'
import { CorrespondentNamePipe } from 'src/app/pipes/correspondent-name.pipe'
import { CustomDatePipe } from 'src/app/pipes/custom-date.pipe'
import { PermissionsService } from 'src/app/services/permissions.service'
import { DocumentService } from 'src/app/services/rest/document.service'
import { ConfirmDialogComponent } from '../confirm-dialog.component'
@@ -20,9 +17,6 @@ import { ConfirmDialogComponent } from '../confirm-dialog.component'
templateUrl: './merge-confirm-dialog.component.html',
styleUrl: './merge-confirm-dialog.component.scss',
imports: [
AsyncPipe,
CorrespondentNamePipe,
CustomDatePipe,
DragDropModule,
FormsModule,
ReactiveFormsModule,

View File

@@ -3,7 +3,6 @@ import { provideHttpClientTesting } from '@angular/common/http/testing'
import { ComponentFixture, TestBed } from '@angular/core/testing'
import { NgbActiveModal } from '@ng-bootstrap/ng-bootstrap'
import { NgxBootstrapIconsModule, allIcons } from 'ngx-bootstrap-icons'
import { DocumentService } from 'src/app/services/rest/document.service'
import { PDFEditorComponent } from './pdf-editor.component'
describe('PDFEditorComponent', () => {
@@ -140,16 +139,4 @@ describe('PDFEditorComponent', () => {
expect(component.pages[1].page).toBe(2)
expect(component.pages[2].page).toBe(3)
})
it('should include selected version in preview source when provided', () => {
const documentService = TestBed.inject(DocumentService)
const previewSpy = jest
.spyOn(documentService, 'getPreviewUrl')
.mockReturnValue('preview-version')
component.documentID = 3
component.versionID = 10
expect(component.pdfSrc).toBe('preview-version')
expect(previewSpy).toHaveBeenCalledWith(3, false, 10)
})
})

View File

@@ -46,7 +46,6 @@ export class PDFEditorComponent extends ConfirmDialogComponent {
activeModal: NgbActiveModal = inject(NgbActiveModal)
documentID: number
versionID?: number
pages: PageOperation[] = []
totalPages = 0
editMode: PdfEditorEditMode = this.settingsService.get(
@@ -56,11 +55,7 @@ export class PDFEditorComponent extends ConfirmDialogComponent {
includeMetadata: boolean = true
get pdfSrc(): string {
return this.documentService.getPreviewUrl(
this.documentID,
false,
this.versionID
)
return this.documentService.getPreviewUrl(this.documentID)
}
pdfLoaded(pdf: PngxPdfDocumentProxy) {

View File

@@ -950,8 +950,8 @@ describe('DocumentDetailComponent', () => {
it('should support reprocess, confirm and close modal after started', () => {
initNormally()
const reprocessSpy = jest.spyOn(documentService, 'reprocessDocuments')
reprocessSpy.mockReturnValue(of(true))
const bulkEditSpy = jest.spyOn(documentService, 'bulkEdit')
bulkEditSpy.mockReturnValue(of(true))
let openModal: NgbModalRef
modalService.activeInstances.subscribe((modal) => (openModal = modal[0]))
const modalSpy = jest.spyOn(modalService, 'open')
@@ -959,7 +959,7 @@ describe('DocumentDetailComponent', () => {
component.reprocess()
const modalCloseSpy = jest.spyOn(openModal, 'close')
openModal.componentInstance.confirmClicked.next()
expect(reprocessSpy).toHaveBeenCalledWith([doc.id])
expect(bulkEditSpy).toHaveBeenCalledWith([doc.id], 'reprocess', {})
expect(modalSpy).toHaveBeenCalled()
expect(toastSpy).toHaveBeenCalled()
expect(modalCloseSpy).toHaveBeenCalled()
@@ -967,13 +967,13 @@ describe('DocumentDetailComponent', () => {
it('should show error if redo ocr call fails', () => {
initNormally()
const reprocessSpy = jest.spyOn(documentService, 'reprocessDocuments')
const bulkEditSpy = jest.spyOn(documentService, 'bulkEdit')
let openModal: NgbModalRef
modalService.activeInstances.subscribe((modal) => (openModal = modal[0]))
const toastSpy = jest.spyOn(toastService, 'showError')
component.reprocess()
const modalCloseSpy = jest.spyOn(openModal, 'close')
reprocessSpy.mockReturnValue(throwError(() => new Error('error occurred')))
bulkEditSpy.mockReturnValue(throwError(() => new Error('error occurred')))
openModal.componentInstance.confirmClicked.next()
expect(toastSpy).toHaveBeenCalled()
expect(modalCloseSpy).not.toHaveBeenCalled()
@@ -1661,23 +1661,23 @@ describe('DocumentDetailComponent', () => {
const closeSpy = jest.spyOn(openDocumentsService, 'closeDocument')
const errorSpy = jest.spyOn(toastService, 'showError')
initNormally()
component.selectedVersionId = 10
component.editPdf()
expect(modal).not.toBeUndefined()
modal.componentInstance.documentID = doc.id
expect(modal.componentInstance.versionID).toBe(10)
modal.componentInstance.pages = [{ page: 1, rotate: 0, splitAfter: false }]
modal.componentInstance.confirm()
let req = httpTestingController.expectOne(
`${environment.apiBaseUrl}documents/edit_pdf/`
`${environment.apiBaseUrl}documents/bulk_edit/`
)
expect(req.request.body).toEqual({
documents: [10],
operations: [{ page: 1, rotate: 0, doc: 0 }],
delete_original: false,
update_document: false,
include_metadata: true,
source_mode: 'explicit_selection',
documents: [doc.id],
method: 'edit_pdf',
parameters: {
operations: [{ page: 1, rotate: 0, doc: 0 }],
delete_original: false,
update_document: false,
include_metadata: true,
},
})
req.error(new ErrorEvent('failed'))
expect(errorSpy).toHaveBeenCalled()
@@ -1688,7 +1688,7 @@ describe('DocumentDetailComponent', () => {
modal.componentInstance.deleteOriginal = true
modal.componentInstance.confirm()
req = httpTestingController.expectOne(
`${environment.apiBaseUrl}documents/edit_pdf/`
`${environment.apiBaseUrl}documents/bulk_edit/`
)
req.flush(true)
expect(closeSpy).toHaveBeenCalled()
@@ -1698,7 +1698,6 @@ describe('DocumentDetailComponent', () => {
let modal: NgbModalRef
modalService.activeInstances.subscribe((m) => (modal = m[0]))
initNormally()
component.selectedVersionId = 10
component.password = 'secret'
component.removePassword()
const dialog =
@@ -1708,15 +1707,17 @@ describe('DocumentDetailComponent', () => {
dialog.deleteOriginal = true
dialog.confirm()
const req = httpTestingController.expectOne(
`${environment.apiBaseUrl}documents/remove_password/`
`${environment.apiBaseUrl}documents/bulk_edit/`
)
expect(req.request.body).toEqual({
documents: [10],
password: 'secret',
update_document: false,
include_metadata: false,
delete_original: true,
source_mode: 'explicit_selection',
documents: [doc.id],
method: 'remove_password',
parameters: {
password: 'secret',
update_document: false,
include_metadata: false,
delete_original: true,
},
})
req.flush(true)
})
@@ -1731,7 +1732,7 @@ describe('DocumentDetailComponent', () => {
expect(errorSpy).toHaveBeenCalled()
httpTestingController.expectNone(
`${environment.apiBaseUrl}documents/remove_password/`
`${environment.apiBaseUrl}documents/bulk_edit/`
)
})
@@ -1747,7 +1748,7 @@ describe('DocumentDetailComponent', () => {
modal.componentInstance as PasswordRemovalConfirmDialogComponent
dialog.confirm()
const req = httpTestingController.expectOne(
`${environment.apiBaseUrl}documents/remove_password/`
`${environment.apiBaseUrl}documents/bulk_edit/`
)
req.error(new ErrorEvent('failed'))
@@ -1768,7 +1769,7 @@ describe('DocumentDetailComponent', () => {
modal.componentInstance as PasswordRemovalConfirmDialogComponent
dialog.confirm()
const req = httpTestingController.expectOne(
`${environment.apiBaseUrl}documents/remove_password/`
`${environment.apiBaseUrl}documents/bulk_edit/`
)
req.flush(true)

View File

@@ -74,10 +74,7 @@ import {
import { CorrespondentService } from 'src/app/services/rest/correspondent.service'
import { CustomFieldsService } from 'src/app/services/rest/custom-fields.service'
import { DocumentTypeService } from 'src/app/services/rest/document-type.service'
import {
BulkEditSourceMode,
DocumentService,
} from 'src/app/services/rest/document.service'
import { DocumentService } from 'src/app/services/rest/document.service'
import { SavedViewService } from 'src/app/services/rest/saved-view.service'
import { StoragePathService } from 'src/app/services/rest/storage-path.service'
import { TagService } from 'src/app/services/rest/tag.service'
@@ -1379,25 +1376,27 @@ export class DocumentDetailComponent
modal.componentInstance.btnCaption = $localize`Proceed`
modal.componentInstance.confirmClicked.subscribe(() => {
modal.componentInstance.buttonsEnabled = false
this.documentsService.reprocessDocuments([this.document.id]).subscribe({
next: () => {
this.toastService.showInfo(
$localize`Reprocess operation for "${this.document.title}" will begin in the background.`
)
if (modal) {
modal.close()
}
},
error: (error) => {
if (modal) {
modal.componentInstance.buttonsEnabled = true
}
this.toastService.showError(
$localize`Error executing operation`,
error
)
},
})
this.documentsService
.bulkEdit([this.document.id], 'reprocess', {})
.subscribe({
next: () => {
this.toastService.showInfo(
$localize`Reprocess operation for "${this.document.title}" will begin in the background.`
)
if (modal) {
modal.close()
}
},
error: (error) => {
if (modal) {
modal.componentInstance.buttonsEnabled = true
}
this.toastService.showError(
$localize`Error executing operation`,
error
)
},
})
})
}
@@ -1754,23 +1753,20 @@ export class DocumentDetailComponent
size: 'xl',
scrollable: true,
})
const sourceDocumentId = this.selectedVersionId ?? this.document.id
modal.componentInstance.title = $localize`PDF Editor`
modal.componentInstance.btnCaption = $localize`Proceed`
modal.componentInstance.documentID = this.document.id
modal.componentInstance.versionID = sourceDocumentId
modal.componentInstance.confirmClicked
.pipe(takeUntil(this.unsubscribeNotifier))
.subscribe(() => {
modal.componentInstance.buttonsEnabled = false
this.documentsService
.editPdfDocuments([sourceDocumentId], {
.bulkEdit([this.document.id], 'edit_pdf', {
operations: modal.componentInstance.getOperations(),
delete_original: modal.componentInstance.deleteOriginal,
update_document:
modal.componentInstance.editMode == PdfEditorEditMode.Update,
include_metadata: modal.componentInstance.includeMetadata,
source_mode: BulkEditSourceMode.EXPLICIT_SELECTION,
})
.pipe(first(), takeUntil(this.unsubscribeNotifier))
.subscribe({
@@ -1816,18 +1812,16 @@ export class DocumentDetailComponent
modal.componentInstance.confirmClicked
.pipe(takeUntil(this.unsubscribeNotifier))
.subscribe(() => {
const sourceDocumentId = this.selectedVersionId ?? this.document.id
const dialog =
modal.componentInstance as PasswordRemovalConfirmDialogComponent
dialog.buttonsEnabled = false
this.networkActive = true
this.documentsService
.removePasswordDocuments([sourceDocumentId], {
.bulkEdit([this.document.id], 'remove_password', {
password: this.password,
update_document: dialog.updateDocument,
include_metadata: dialog.includeMetadata,
delete_original: dialog.deleteOriginal,
source_mode: BulkEditSourceMode.EXPLICIT_SELECTION,
})
.pipe(first(), takeUntil(this.unsubscribeNotifier))
.subscribe({

View File

@@ -849,11 +849,13 @@ describe('BulkEditorComponent', () => {
expect(modal).not.toBeUndefined()
modal.componentInstance.confirm()
let req = httpTestingController.expectOne(
`${environment.apiBaseUrl}documents/delete/`
`${environment.apiBaseUrl}documents/bulk_edit/`
)
req.flush(true)
expect(req.request.body).toEqual({
documents: [3, 4],
method: 'delete',
parameters: {},
})
httpTestingController.match(
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
@@ -866,7 +868,7 @@ describe('BulkEditorComponent', () => {
fixture.detectChanges()
component.applyDelete()
req = httpTestingController.expectOne(
`${environment.apiBaseUrl}documents/delete/`
`${environment.apiBaseUrl}documents/bulk_edit/`
)
})
@@ -942,11 +944,13 @@ describe('BulkEditorComponent', () => {
expect(modal).not.toBeUndefined()
modal.componentInstance.confirm()
let req = httpTestingController.expectOne(
`${environment.apiBaseUrl}documents/reprocess/`
`${environment.apiBaseUrl}documents/bulk_edit/`
)
req.flush(true)
expect(req.request.body).toEqual({
documents: [3, 4],
method: 'reprocess',
parameters: {},
})
httpTestingController.match(
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
@@ -975,13 +979,13 @@ describe('BulkEditorComponent', () => {
modal.componentInstance.rotate()
modal.componentInstance.confirm()
let req = httpTestingController.expectOne(
`${environment.apiBaseUrl}documents/rotate/`
`${environment.apiBaseUrl}documents/bulk_edit/`
)
req.flush(true)
expect(req.request.body).toEqual({
documents: [3, 4],
degrees: 90,
source_mode: 'latest_version',
method: 'rotate',
parameters: { degrees: 90 },
})
httpTestingController.match(
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
@@ -1017,12 +1021,13 @@ describe('BulkEditorComponent', () => {
modal.componentInstance.metadataDocumentID = 3
modal.componentInstance.confirm()
let req = httpTestingController.expectOne(
`${environment.apiBaseUrl}documents/merge/`
`${environment.apiBaseUrl}documents/bulk_edit/`
)
req.flush(true)
expect(req.request.body).toEqual({
documents: [3, 4],
metadata_document_id: 3,
method: 'merge',
parameters: { metadata_document_id: 3 },
})
httpTestingController.match(
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
@@ -1035,13 +1040,13 @@ describe('BulkEditorComponent', () => {
modal.componentInstance.deleteOriginals = true
modal.componentInstance.confirm()
req = httpTestingController.expectOne(
`${environment.apiBaseUrl}documents/merge/`
`${environment.apiBaseUrl}documents/bulk_edit/`
)
req.flush(true)
expect(req.request.body).toEqual({
documents: [3, 4],
metadata_document_id: 3,
delete_originals: true,
method: 'merge',
parameters: { metadata_document_id: 3, delete_originals: true },
})
httpTestingController.match(
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
@@ -1056,13 +1061,13 @@ describe('BulkEditorComponent', () => {
modal.componentInstance.archiveFallback = true
modal.componentInstance.confirm()
req = httpTestingController.expectOne(
`${environment.apiBaseUrl}documents/merge/`
`${environment.apiBaseUrl}documents/bulk_edit/`
)
req.flush(true)
expect(req.request.body).toEqual({
documents: [3, 4],
metadata_document_id: 3,
archive_fallback: true,
method: 'merge',
parameters: { metadata_document_id: 3, archive_fallback: true },
})
httpTestingController.match(
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`

View File

@@ -12,7 +12,7 @@ import {
} from '@ng-bootstrap/ng-bootstrap'
import { saveAs } from 'file-saver'
import { NgxBootstrapIconsModule } from 'ngx-bootstrap-icons'
import { first, map, Observable, Subject, switchMap, takeUntil } from 'rxjs'
import { first, map, Subject, switchMap, takeUntil } from 'rxjs'
import { ConfirmDialogComponent } from 'src/app/components/common/confirm-dialog/confirm-dialog.component'
import { CustomField } from 'src/app/data/custom-field'
import { MatchingModel } from 'src/app/data/matching-model'
@@ -29,9 +29,7 @@ import { CorrespondentService } from 'src/app/services/rest/correspondent.servic
import { CustomFieldsService } from 'src/app/services/rest/custom-fields.service'
import { DocumentTypeService } from 'src/app/services/rest/document-type.service'
import {
DocumentBulkEditMethod,
DocumentService,
MergeDocumentsRequest,
SelectionDataItem,
} from 'src/app/services/rest/document.service'
import { SavedViewService } from 'src/app/services/rest/saved-view.service'
@@ -257,9 +255,9 @@ export class BulkEditorComponent
this.unsubscribeNotifier.complete()
}
private executeBulkEditMethod(
private executeBulkOperation(
modal: NgbModalRef,
method: DocumentBulkEditMethod,
method: string,
args: any,
overrideDocumentIDs?: number[]
) {
@@ -274,55 +272,32 @@ export class BulkEditorComponent
)
.pipe(first())
.subscribe({
next: () => this.handleOperationSuccess(modal),
error: (error) => this.handleOperationError(modal, error),
next: () => {
if (args['delete_originals']) {
this.list.selected.clear()
}
this.list.reload()
this.list.reduceSelectionToFilter()
this.list.selected.forEach((id) => {
this.openDocumentService.refreshDocument(id)
})
this.savedViewService.maybeRefreshDocumentCounts()
if (modal) {
modal.close()
}
},
error: (error) => {
if (modal) {
modal.componentInstance.buttonsEnabled = true
}
this.toastService.showError(
$localize`Error executing bulk operation`,
error
)
},
})
}
private executeDocumentAction(
modal: NgbModalRef,
request: Observable<any>,
options: { deleteOriginals?: boolean } = {}
) {
if (modal) {
modal.componentInstance.buttonsEnabled = false
}
request.pipe(first()).subscribe({
next: () => {
this.handleOperationSuccess(modal, options.deleteOriginals ?? false)
},
error: (error) => this.handleOperationError(modal, error),
})
}
private handleOperationSuccess(
modal: NgbModalRef,
clearSelection: boolean = false
) {
if (clearSelection) {
this.list.selected.clear()
}
this.list.reload()
this.list.reduceSelectionToFilter()
this.list.selected.forEach((id) => {
this.openDocumentService.refreshDocument(id)
})
this.savedViewService.maybeRefreshDocumentCounts()
if (modal) {
modal.close()
}
}
private handleOperationError(modal: NgbModalRef, error: any) {
if (modal) {
modal.componentInstance.buttonsEnabled = true
}
this.toastService.showError(
$localize`Error executing bulk operation`,
error
)
}
private applySelectionData(
items: SelectionDataItem[],
selectionModel: FilterableDropdownSelectionModel
@@ -471,13 +446,13 @@ export class BulkEditorComponent
modal.componentInstance.confirmClicked
.pipe(takeUntil(this.unsubscribeNotifier))
.subscribe(() => {
this.executeBulkEditMethod(modal, 'modify_tags', {
this.executeBulkOperation(modal, 'modify_tags', {
add_tags: changedTags.itemsToAdd.map((t) => t.id),
remove_tags: changedTags.itemsToRemove.map((t) => t.id),
})
})
} else {
this.executeBulkEditMethod(null, 'modify_tags', {
this.executeBulkOperation(null, 'modify_tags', {
add_tags: changedTags.itemsToAdd.map((t) => t.id),
remove_tags: changedTags.itemsToRemove.map((t) => t.id),
})
@@ -511,12 +486,12 @@ export class BulkEditorComponent
modal.componentInstance.confirmClicked
.pipe(takeUntil(this.unsubscribeNotifier))
.subscribe(() => {
this.executeBulkEditMethod(modal, 'set_correspondent', {
this.executeBulkOperation(modal, 'set_correspondent', {
correspondent: correspondent ? correspondent.id : null,
})
})
} else {
this.executeBulkEditMethod(null, 'set_correspondent', {
this.executeBulkOperation(null, 'set_correspondent', {
correspondent: correspondent ? correspondent.id : null,
})
}
@@ -549,12 +524,12 @@ export class BulkEditorComponent
modal.componentInstance.confirmClicked
.pipe(takeUntil(this.unsubscribeNotifier))
.subscribe(() => {
this.executeBulkEditMethod(modal, 'set_document_type', {
this.executeBulkOperation(modal, 'set_document_type', {
document_type: documentType ? documentType.id : null,
})
})
} else {
this.executeBulkEditMethod(null, 'set_document_type', {
this.executeBulkOperation(null, 'set_document_type', {
document_type: documentType ? documentType.id : null,
})
}
@@ -587,12 +562,12 @@ export class BulkEditorComponent
modal.componentInstance.confirmClicked
.pipe(takeUntil(this.unsubscribeNotifier))
.subscribe(() => {
this.executeBulkEditMethod(modal, 'set_storage_path', {
this.executeBulkOperation(modal, 'set_storage_path', {
storage_path: storagePath ? storagePath.id : null,
})
})
} else {
this.executeBulkEditMethod(null, 'set_storage_path', {
this.executeBulkOperation(null, 'set_storage_path', {
storage_path: storagePath ? storagePath.id : null,
})
}
@@ -649,7 +624,7 @@ export class BulkEditorComponent
modal.componentInstance.confirmClicked
.pipe(takeUntil(this.unsubscribeNotifier))
.subscribe(() => {
this.executeBulkEditMethod(modal, 'modify_custom_fields', {
this.executeBulkOperation(modal, 'modify_custom_fields', {
add_custom_fields: changedCustomFields.itemsToAdd.map((f) => f.id),
remove_custom_fields: changedCustomFields.itemsToRemove.map(
(f) => f.id
@@ -657,7 +632,7 @@ export class BulkEditorComponent
})
})
} else {
this.executeBulkEditMethod(null, 'modify_custom_fields', {
this.executeBulkOperation(null, 'modify_custom_fields', {
add_custom_fields: changedCustomFields.itemsToAdd.map((f) => f.id),
remove_custom_fields: changedCustomFields.itemsToRemove.map(
(f) => f.id
@@ -787,16 +762,10 @@ export class BulkEditorComponent
.pipe(takeUntil(this.unsubscribeNotifier))
.subscribe(() => {
modal.componentInstance.buttonsEnabled = false
this.executeDocumentAction(
modal,
this.documentService.deleteDocuments(Array.from(this.list.selected))
)
this.executeBulkOperation(modal, 'delete', {})
})
} else {
this.executeDocumentAction(
null,
this.documentService.deleteDocuments(Array.from(this.list.selected))
)
this.executeBulkOperation(null, 'delete', {})
}
}
@@ -835,12 +804,7 @@ export class BulkEditorComponent
.pipe(takeUntil(this.unsubscribeNotifier))
.subscribe(() => {
modal.componentInstance.buttonsEnabled = false
this.executeDocumentAction(
modal,
this.documentService.reprocessDocuments(
Array.from(this.list.selected)
)
)
this.executeBulkOperation(modal, 'reprocess', {})
})
}
@@ -851,7 +815,7 @@ export class BulkEditorComponent
modal.componentInstance.confirmClicked.subscribe(
({ permissions, merge }) => {
modal.componentInstance.buttonsEnabled = false
this.executeBulkEditMethod(modal, 'set_permissions', {
this.executeBulkOperation(modal, 'set_permissions', {
...permissions,
merge,
})
@@ -874,13 +838,9 @@ export class BulkEditorComponent
.pipe(takeUntil(this.unsubscribeNotifier))
.subscribe(() => {
rotateDialog.buttonsEnabled = false
this.executeDocumentAction(
modal,
this.documentService.rotateDocuments(
Array.from(this.list.selected),
rotateDialog.degrees
)
)
this.executeBulkOperation(modal, 'rotate', {
degrees: rotateDialog.degrees,
})
})
}
@@ -896,22 +856,18 @@ export class BulkEditorComponent
mergeDialog.confirmClicked
.pipe(takeUntil(this.unsubscribeNotifier))
.subscribe(() => {
const args: MergeDocumentsRequest = {}
const args = {}
if (mergeDialog.metadataDocumentID > -1) {
args.metadata_document_id = mergeDialog.metadataDocumentID
args['metadata_document_id'] = mergeDialog.metadataDocumentID
}
if (mergeDialog.deleteOriginals) {
args.delete_originals = true
args['delete_originals'] = true
}
if (mergeDialog.archiveFallback) {
args.archive_fallback = true
args['archive_fallback'] = true
}
mergeDialog.buttonsEnabled = false
this.executeDocumentAction(
modal,
this.documentService.mergeDocuments(mergeDialog.documentIDs, args),
{ deleteOriginals: !!args.delete_originals }
)
this.executeBulkOperation(modal, 'merge', args, mergeDialog.documentIDs)
this.toastService.showInfo(
$localize`Merged document will be queued for consumption.`
)

View File

@@ -230,88 +230,6 @@ describe(`DocumentService`, () => {
})
})
it('should call appropriate api endpoint for delete documents', () => {
const ids = [1, 2, 3]
subscription = service.deleteDocuments(ids).subscribe()
const req = httpTestingController.expectOne(
`${environment.apiBaseUrl}${endpoint}/delete/`
)
expect(req.request.method).toEqual('POST')
expect(req.request.body).toEqual({
documents: ids,
})
})
it('should call appropriate api endpoint for reprocess documents', () => {
const ids = [1, 2, 3]
subscription = service.reprocessDocuments(ids).subscribe()
const req = httpTestingController.expectOne(
`${environment.apiBaseUrl}${endpoint}/reprocess/`
)
expect(req.request.method).toEqual('POST')
expect(req.request.body).toEqual({
documents: ids,
})
})
it('should call appropriate api endpoint for rotate documents', () => {
const ids = [1, 2, 3]
subscription = service.rotateDocuments(ids, 90).subscribe()
const req = httpTestingController.expectOne(
`${environment.apiBaseUrl}${endpoint}/rotate/`
)
expect(req.request.method).toEqual('POST')
expect(req.request.body).toEqual({
documents: ids,
degrees: 90,
source_mode: 'latest_version',
})
})
it('should call appropriate api endpoint for merge documents', () => {
const ids = [1, 2, 3]
const args = { metadata_document_id: 1, delete_originals: true }
subscription = service.mergeDocuments(ids, args).subscribe()
const req = httpTestingController.expectOne(
`${environment.apiBaseUrl}${endpoint}/merge/`
)
expect(req.request.method).toEqual('POST')
expect(req.request.body).toEqual({
documents: ids,
metadata_document_id: 1,
delete_originals: true,
})
})
it('should call appropriate api endpoint for edit pdf', () => {
const ids = [1]
const args = { operations: [{ page: 1, rotate: 90, doc: 0 }] }
subscription = service.editPdfDocuments(ids, args).subscribe()
const req = httpTestingController.expectOne(
`${environment.apiBaseUrl}${endpoint}/edit_pdf/`
)
expect(req.request.method).toEqual('POST')
expect(req.request.body).toEqual({
documents: ids,
operations: [{ page: 1, rotate: 90, doc: 0 }],
})
})
it('should call appropriate api endpoint for remove password', () => {
const ids = [1]
const args = { password: 'secret', update_document: true }
subscription = service.removePasswordDocuments(ids, args).subscribe()
const req = httpTestingController.expectOne(
`${environment.apiBaseUrl}${endpoint}/remove_password/`
)
expect(req.request.method).toEqual('POST')
expect(req.request.body).toEqual({
documents: ids,
password: 'secret',
update_document: true,
})
})
it('should return the correct preview URL for a single document', () => {
let url = service.getPreviewUrl(documents[0].id)
expect(url).toEqual(

View File

@@ -37,50 +37,6 @@ export interface SelectionData {
selected_custom_fields: SelectionDataItem[]
}
export enum BulkEditSourceMode {
LATEST_VERSION = 'latest_version',
EXPLICIT_SELECTION = 'explicit_selection',
}
export type DocumentBulkEditMethod =
| 'set_correspondent'
| 'set_document_type'
| 'set_storage_path'
| 'add_tag'
| 'remove_tag'
| 'modify_tags'
| 'modify_custom_fields'
| 'set_permissions'
export interface MergeDocumentsRequest {
metadata_document_id?: number
delete_originals?: boolean
archive_fallback?: boolean
source_mode?: BulkEditSourceMode
}
export interface EditPdfOperation {
page: number
rotate?: number
doc?: number
}
export interface EditPdfDocumentsRequest {
operations: EditPdfOperation[]
delete_original?: boolean
update_document?: boolean
include_metadata?: boolean
source_mode?: BulkEditSourceMode
}
export interface RemovePasswordDocumentsRequest {
password: string
update_document?: boolean
delete_original?: boolean
include_metadata?: boolean
source_mode?: BulkEditSourceMode
}
@Injectable({
providedIn: 'root',
})
@@ -338,7 +294,7 @@ export class DocumentService extends AbstractPaperlessService<Document> {
return this.http.get<DocumentMetadata>(url.toString())
}
bulkEdit(ids: number[], method: DocumentBulkEditMethod, args: any) {
bulkEdit(ids: number[], method: string, args: any) {
return this.http.post(this.getResourceUrl(null, 'bulk_edit'), {
documents: ids,
method: method,
@@ -346,54 +302,6 @@ export class DocumentService extends AbstractPaperlessService<Document> {
})
}
deleteDocuments(ids: number[]) {
return this.http.post(this.getResourceUrl(null, 'delete'), {
documents: ids,
})
}
reprocessDocuments(ids: number[]) {
return this.http.post(this.getResourceUrl(null, 'reprocess'), {
documents: ids,
})
}
rotateDocuments(
ids: number[],
degrees: number,
sourceMode: BulkEditSourceMode = BulkEditSourceMode.LATEST_VERSION
) {
return this.http.post(this.getResourceUrl(null, 'rotate'), {
documents: ids,
degrees,
source_mode: sourceMode,
})
}
mergeDocuments(ids: number[], request: MergeDocumentsRequest = {}) {
return this.http.post(this.getResourceUrl(null, 'merge'), {
documents: ids,
...request,
})
}
editPdfDocuments(ids: number[], request: EditPdfDocumentsRequest) {
return this.http.post(this.getResourceUrl(null, 'edit_pdf'), {
documents: ids,
...request,
})
}
removePasswordDocuments(
ids: number[],
request: RemovePasswordDocumentsRequest
) {
return this.http.post(this.getResourceUrl(null, 'remove_password'), {
documents: ids,
...request,
})
}
getSelectionData(ids: number[]): Observable<SelectionData> {
return this.http.post<SelectionData>(
this.getResourceUrl(null, 'selection_data'),

View File

@@ -29,21 +29,12 @@ from documents.plugins.helpers import DocumentsStatusManager
from documents.tasks import bulk_update_documents
from documents.tasks import consume_file
from documents.tasks import update_document_content_maybe_archive_file
from documents.versioning import get_latest_version_for_root
from documents.versioning import get_root_document
if TYPE_CHECKING:
from django.contrib.auth.models import User
logger: logging.Logger = logging.getLogger("paperless.bulk_edit")
SourceMode = Literal["latest_version", "explicit_selection"]
class SourceModeChoices:
LATEST_VERSION: SourceMode = "latest_version"
EXPLICIT_SELECTION: SourceMode = "explicit_selection"
@shared_task(bind=True)
def restore_archive_serial_numbers_task(
@@ -81,21 +72,46 @@ def restore_archive_serial_numbers(backup: dict[int, int | None]) -> None:
logger.info(f"Restored archive serial numbers for documents {list(backup.keys())}")
def _resolve_root_and_source_doc(
doc: Document,
*,
source_mode: SourceMode = SourceModeChoices.LATEST_VERSION,
) -> tuple[Document, Document]:
root_doc = get_root_document(doc)
def _get_root_ids_by_doc_id(doc_ids: list[int]) -> dict[int, int]:
"""
Resolve each provided document id to its root document id.
if source_mode == SourceModeChoices.EXPLICIT_SELECTION:
return root_doc, doc
- If the id is already a root document: root id is itself.
- If the id is a version document: root id is its `root_document_id`.
"""
qs = Document.objects.filter(id__in=doc_ids).only("id", "root_document_id")
return {doc.id: doc.root_document_id or doc.id for doc in qs}
# Version IDs are explicit by default, only a selected root resolves to latest
if doc.root_document_id is not None:
return root_doc, doc
return root_doc, get_latest_version_for_root(root_doc)
def _get_root_and_current_docs_by_root_id(
root_ids: set[int],
) -> tuple[dict[int, Document], dict[int, Document]]:
"""
Returns:
- root_docs: root_id -> root Document
- current_docs: root_id -> newest version Document (or root if none)
"""
root_docs = {
doc.id: doc
for doc in Document.objects.filter(id__in=root_ids).select_related(
"owner",
)
}
latest_versions_by_root_id: dict[int, Document] = {}
for version_doc in Document.objects.filter(root_document_id__in=root_ids).order_by(
"root_document_id",
"-id",
):
root_id = version_doc.root_document_id
if root_id is None:
continue
latest_versions_by_root_id.setdefault(root_id, version_doc)
current_docs: dict[int, Document] = {
root_id: latest_versions_by_root_id.get(root_id, root_docs[root_id])
for root_id in root_docs
}
return root_docs, current_docs
def set_correspondent(
@@ -405,32 +421,21 @@ def rotate(
doc_ids: list[int],
degrees: int,
*,
source_mode: SourceMode = SourceModeChoices.LATEST_VERSION,
user: User | None = None,
) -> Literal["OK"]:
logger.info(
f"Attempting to rotate {len(doc_ids)} documents by {degrees} degrees.",
)
docs_by_id = {
doc.id: doc
for doc in Document.objects.select_related("root_document").filter(
id__in=doc_ids,
)
}
docs_by_root_id: dict[int, tuple[Document, Document]] = {}
for doc_id in doc_ids:
doc = docs_by_id.get(doc_id)
if doc is None:
continue
root_doc, source_doc = _resolve_root_and_source_doc(
doc,
source_mode=source_mode,
)
docs_by_root_id.setdefault(root_doc.id, (root_doc, source_doc))
doc_to_root_id = _get_root_ids_by_doc_id(doc_ids)
root_ids = set(doc_to_root_id.values())
root_docs_by_id, current_docs_by_root_id = _get_root_and_current_docs_by_root_id(
root_ids,
)
import pikepdf
for root_doc, source_doc in docs_by_root_id.values():
for root_id in root_ids:
root_doc = root_docs_by_id[root_id]
source_doc = current_docs_by_root_id[root_id]
if source_doc.mime_type != "application/pdf":
logger.warning(
f"Document {root_doc.id} is not a PDF, skipping rotation.",
@@ -476,14 +481,12 @@ def merge(
metadata_document_id: int | None = None,
delete_originals: bool = False,
archive_fallback: bool = False,
source_mode: SourceMode = SourceModeChoices.LATEST_VERSION,
user: User | None = None,
) -> Literal["OK"]:
logger.info(
f"Attempting to merge {len(doc_ids)} documents into a single document.",
)
qs = Document.objects.select_related("root_document").filter(id__in=doc_ids)
docs_by_id = {doc.id: doc for doc in qs}
qs = Document.objects.filter(id__in=doc_ids)
affected_docs: list[int] = []
import pikepdf
@@ -492,20 +495,14 @@ def merge(
handoff_asn: int | None = None
# use doc_ids to preserve order
for doc_id in doc_ids:
doc = docs_by_id.get(doc_id)
if doc is None:
continue
_, source_doc = _resolve_root_and_source_doc(
doc,
source_mode=source_mode,
)
doc = qs.get(id=doc_id)
try:
doc_path = (
source_doc.archive_path
doc.archive_path
if archive_fallback
and source_doc.mime_type != "application/pdf"
and source_doc.has_archive_version
else source_doc.source_path
and doc.mime_type != "application/pdf"
and doc.has_archive_version
else doc.source_path
)
with pikepdf.open(str(doc_path)) as pdf:
version = max(version, pdf.pdf_version)
@@ -587,23 +584,18 @@ def split(
pages: list[list[int]],
*,
delete_originals: bool = False,
source_mode: SourceMode = SourceModeChoices.LATEST_VERSION,
user: User | None = None,
) -> Literal["OK"]:
logger.info(
f"Attempting to split document {doc_ids[0]} into {len(pages)} documents",
)
doc = Document.objects.select_related("root_document").get(id=doc_ids[0])
_, source_doc = _resolve_root_and_source_doc(
doc,
source_mode=source_mode,
)
doc = Document.objects.get(id=doc_ids[0])
import pikepdf
consume_tasks = []
try:
with pikepdf.open(source_doc.source_path) as pdf:
with pikepdf.open(doc.source_path) as pdf:
for idx, split_doc in enumerate(pages):
dst: pikepdf.Pdf = pikepdf.new()
for page in split_doc:
@@ -667,17 +659,25 @@ def delete_pages(
doc_ids: list[int],
pages: list[int],
*,
source_mode: SourceMode = SourceModeChoices.LATEST_VERSION,
user: User | None = None,
) -> Literal["OK"]:
logger.info(
f"Attempting to delete pages {pages} from {len(doc_ids)} documents",
)
doc = Document.objects.select_related("root_document").get(id=doc_ids[0])
root_doc, source_doc = _resolve_root_and_source_doc(
doc,
source_mode=source_mode,
root_doc: Document
if doc.root_document_id is None or doc.root_document is None:
root_doc = doc
else:
root_doc = doc.root_document
source_doc = (
Document.objects.filter(Q(id=root_doc.id) | Q(root_document=root_doc))
.order_by("-id")
.first()
)
if source_doc is None:
source_doc = root_doc
pages = sorted(pages) # sort pages to avoid index issues
import pikepdf
@@ -722,7 +722,6 @@ def edit_pdf(
delete_original: bool = False,
update_document: bool = False,
include_metadata: bool = True,
source_mode: SourceMode = SourceModeChoices.LATEST_VERSION,
user: User | None = None,
) -> Literal["OK"]:
"""
@@ -737,10 +736,19 @@ def edit_pdf(
f"Editing PDF of document {doc_ids[0]} with {len(operations)} operations",
)
doc = Document.objects.select_related("root_document").get(id=doc_ids[0])
root_doc, source_doc = _resolve_root_and_source_doc(
doc,
source_mode=source_mode,
root_doc: Document
if doc.root_document_id is None or doc.root_document is None:
root_doc = doc
else:
root_doc = doc.root_document
source_doc = (
Document.objects.filter(Q(id=root_doc.id) | Q(root_document=root_doc))
.order_by("-id")
.first()
)
if source_doc is None:
source_doc = root_doc
import pikepdf
pdf_docs: list[pikepdf.Pdf] = []
@@ -851,7 +859,6 @@ def remove_password(
update_document: bool = False,
delete_original: bool = False,
include_metadata: bool = True,
source_mode: SourceMode = SourceModeChoices.LATEST_VERSION,
user: User | None = None,
) -> Literal["OK"]:
"""
@@ -861,10 +868,19 @@ def remove_password(
for doc_id in doc_ids:
doc = Document.objects.select_related("root_document").get(id=doc_id)
root_doc, source_doc = _resolve_root_and_source_doc(
doc,
source_mode=source_mode,
root_doc: Document
if doc.root_document_id is None or doc.root_document is None:
root_doc = doc
else:
root_doc = doc.root_document
source_doc = (
Document.objects.filter(Q(id=root_doc.id) | Q(root_document=root_doc))
.order_by("-id")
.first()
)
if source_doc is None:
source_doc = root_doc
try:
logger.info(
f"Attempting password removal from document {doc_ids[0]}",

View File

@@ -1,5 +1,4 @@
import datetime
import hashlib
import os
import tempfile
from enum import StrEnum
@@ -48,6 +47,7 @@ from documents.signals import document_consumption_started
from documents.signals import document_updated
from documents.signals.handlers import run_workflows
from documents.templating.workflows import parse_w_workflow_placeholders
from documents.utils import compute_checksum
from documents.utils import copy_basic_file_stats
from documents.utils import copy_file_with_basic_stats
from documents.utils import run_subprocess
@@ -196,9 +196,7 @@ class ConsumerPlugin(
version_doc = Document(
root_document=root_doc_frozen,
version_index=next_version_index + 1,
checksum=hashlib.md5(
file_for_checksum.read_bytes(),
).hexdigest(),
checksum=compute_checksum(file_for_checksum),
content=text or "",
page_count=page_count,
mime_type=mime_type,
@@ -656,10 +654,9 @@ class ConsumerPlugin(
document.archive_path,
)
with Path(archive_path).open("rb") as f:
document.archive_checksum = hashlib.md5(
f.read(),
).hexdigest()
document.archive_checksum = compute_checksum(
Path(archive_path),
)
# Don't save with the lock active. Saving will cause the file
# renaming logic to acquire the lock as well.
@@ -800,7 +797,7 @@ class ConsumerPlugin(
title=title[:127],
content=text,
mime_type=mime_type,
checksum=hashlib.md5(file_for_checksum.read_bytes()).hexdigest(),
checksum=compute_checksum(file_for_checksum),
created=create_date,
modified=create_date,
page_count=page_count,
@@ -917,10 +914,9 @@ class ConsumerPreflightPlugin(
def pre_check_duplicate(self) -> None:
"""
Using the MD5 of the file, check this exact file doesn't already exist
Using the SHA256 of the file, check this exact file doesn't already exist
"""
with Path(self.input_doc.original_file).open("rb") as f:
checksum = hashlib.md5(f.read()).hexdigest()
checksum = compute_checksum(Path(self.input_doc.original_file))
existing_doc = Document.global_objects.filter(
Q(checksum=checksum) | Q(archive_checksum=checksum),
)

View File

@@ -304,7 +304,7 @@ class PaperlessCommand(RichCommand):
Progress output is directed to stderr to match the convention that
progress bars are transient UI feedback, not command output. This
mirrors the convention that progress bars are transient UI feedback and prevents progress bar rendering
mirrors tqdm's default behavior and prevents progress bar rendering
from interfering with stdout-based assertions in tests or piped
command output.

View File

@@ -17,7 +17,6 @@ class Command(PaperlessCommand):
"modified) after their initial import."
)
supports_progress_bar = True
supports_multiprocessing = True
def add_arguments(self, parser):

View File

@@ -3,10 +3,12 @@ import json
import os
import shutil
import tempfile
from itertools import chain
from itertools import islice
from pathlib import Path
from typing import TYPE_CHECKING
import tqdm
from allauth.mfa.models import Authenticator
from allauth.socialaccount.models import SocialAccount
from allauth.socialaccount.models import SocialApp
@@ -17,6 +19,7 @@ from django.contrib.auth.models import Permission
from django.contrib.auth.models import User
from django.contrib.contenttypes.models import ContentType
from django.core import serializers
from django.core.management.base import BaseCommand
from django.core.management.base import CommandError
from django.core.serializers.json import DjangoJSONEncoder
from django.db import transaction
@@ -35,7 +38,6 @@ if settings.AUDIT_LOG_ENABLED:
from documents.file_handling import delete_empty_directories
from documents.file_handling import generate_filename
from documents.management.commands.base import PaperlessCommand
from documents.management.commands.mixins import CryptMixin
from documents.models import Correspondent
from documents.models import CustomField
@@ -56,6 +58,7 @@ from documents.models import WorkflowTrigger
from documents.settings import EXPORTER_ARCHIVE_NAME
from documents.settings import EXPORTER_FILE_NAME
from documents.settings import EXPORTER_THUMBNAIL_NAME
from documents.utils import compute_checksum
from documents.utils import copy_file_with_basic_stats
from paperless import version
from paperless.models import ApplicationConfiguration
@@ -79,99 +82,14 @@ def serialize_queryset_batched(
yield serializers.serialize("python", chunk)
class StreamingManifestWriter:
"""Incrementally writes a JSON array to a file, one record at a time.
Writes to <target>.tmp first; on close(), optionally BLAKE2b-compares
with the existing file (--compare-json) and renames or discards accordingly.
On exception, discard() deletes the tmp file and leaves the original intact.
"""
def __init__(
self,
path: Path,
*,
compare_json: bool = False,
files_in_export_dir: "set[Path] | None" = None,
) -> None:
self._path = path.resolve()
self._tmp_path = self._path.with_suffix(self._path.suffix + ".tmp")
self._compare_json = compare_json
self._files_in_export_dir: set[Path] = (
files_in_export_dir if files_in_export_dir is not None else set()
)
self._file = None
self._first = True
def open(self) -> None:
self._path.parent.mkdir(parents=True, exist_ok=True)
self._file = self._tmp_path.open("w", encoding="utf-8")
self._file.write("[")
self._first = True
def write_record(self, record: dict) -> None:
if not self._first:
self._file.write(",\n")
else:
self._first = False
self._file.write(
json.dumps(record, cls=DjangoJSONEncoder, indent=2, ensure_ascii=False),
)
def write_batch(self, records: list[dict]) -> None:
for record in records:
self.write_record(record)
def close(self) -> None:
if self._file is None:
return
self._file.write("\n]")
self._file.close()
self._file = None
self._finalize()
def discard(self) -> None:
if self._file is not None:
self._file.close()
self._file = None
if self._tmp_path.exists():
self._tmp_path.unlink()
def _finalize(self) -> None:
"""Compare with existing file (if --compare-json) then rename or discard tmp."""
if self._path in self._files_in_export_dir:
self._files_in_export_dir.remove(self._path)
if self._compare_json:
existing_hash = hashlib.blake2b(self._path.read_bytes()).hexdigest()
new_hash = hashlib.blake2b(self._tmp_path.read_bytes()).hexdigest()
if existing_hash == new_hash:
self._tmp_path.unlink()
return
self._tmp_path.rename(self._path)
def __enter__(self) -> "StreamingManifestWriter":
self.open()
return self
def __exit__(self, exc_type, exc_val, exc_tb) -> None:
if exc_type is not None:
self.discard()
else:
self.close()
class Command(CryptMixin, PaperlessCommand):
class Command(CryptMixin, BaseCommand):
help = (
"Decrypt and rename all files in our collection into a given target "
"directory. And include a manifest file containing document data for "
"easy import."
)
supports_progress_bar = True
supports_multiprocessing = False
def add_arguments(self, parser) -> None:
super().add_arguments(parser)
parser.add_argument("target")
parser.add_argument(
@@ -278,6 +196,13 @@ class Command(CryptMixin, PaperlessCommand):
help="If set, only the database will be imported, not files",
)
parser.add_argument(
"--no-progress-bar",
default=False,
action="store_true",
help="If set, the progress bar will not be shown",
)
parser.add_argument(
"--passphrase",
help="If provided, is used to encrypt sensitive data in the export",
@@ -306,6 +231,7 @@ class Command(CryptMixin, PaperlessCommand):
self.no_thumbnail: bool = options["no_thumbnail"]
self.zip_export: bool = options["zip"]
self.data_only: bool = options["data_only"]
self.no_progress_bar: bool = options["no_progress_bar"]
self.passphrase: str | None = options.get("passphrase")
self.batch_size: int = options["batch_size"]
@@ -397,85 +323,95 @@ class Command(CryptMixin, PaperlessCommand):
if settings.AUDIT_LOG_ENABLED:
manifest_key_to_object_query["log_entries"] = LogEntry.objects.all()
# Crypto setup before streaming begins
if self.passphrase:
self.setup_crypto(passphrase=self.passphrase)
elif MailAccount.objects.count() > 0 or SocialToken.objects.count() > 0:
self.stdout.write(
self.style.NOTICE(
"No passphrase was given, sensitive fields will be in plaintext",
),
)
with transaction.atomic():
manifest_dict = {}
document_manifest: list[dict] = []
manifest_path = (self.target / "manifest.json").resolve()
with StreamingManifestWriter(
manifest_path,
compare_json=self.compare_json,
files_in_export_dir=self.files_in_export_dir,
) as writer:
with transaction.atomic():
for key, qs in manifest_key_to_object_query.items():
if key == "documents":
# Accumulate for file-copy loop; written to manifest after
for batch in serialize_queryset_batched(
qs,
# Build an overall manifest
for key, object_query in manifest_key_to_object_query.items():
manifest_dict[key] = list(
chain.from_iterable(
serialize_queryset_batched(
object_query,
batch_size=self.batch_size,
):
for record in batch:
self._encrypt_record_inline(record)
document_manifest.extend(batch)
elif self.split_manifest and key in (
"notes",
"custom_field_instances",
):
# Written per-document in _write_split_manifest
pass
else:
for batch in serialize_queryset_batched(
qs,
batch_size=self.batch_size,
):
for record in batch:
self._encrypt_record_inline(record)
writer.write_batch(batch)
document_map: dict[int, Document] = {
d.pk: d for d in Document.objects.order_by("id")
}
# 3. Export files from each document
for index, document_dict in enumerate(
self.track(
document_manifest,
description="Exporting documents...",
total=len(document_manifest),
),
):
document = document_map[document_dict["pk"]]
# 3.1. generate a unique filename
base_name = self.generate_base_name(document)
# 3.2. write filenames into manifest
original_target, thumbnail_target, archive_target = (
self.generate_document_targets(document, base_name, document_dict)
),
),
)
# 3.3. write files to target folder
if not self.data_only:
self.copy_document_files(
document,
original_target,
thumbnail_target,
archive_target,
)
self.encrypt_secret_fields(manifest_dict)
if self.split_manifest:
self._write_split_manifest(document_dict, document, base_name)
else:
writer.write_record(document_dict)
# These are treated specially and included in the per-document manifest
# if that setting is enabled. Otherwise, they are just exported to the bulk
# manifest
document_map: dict[int, Document] = {
d.pk: d for d in manifest_key_to_object_query["documents"]
}
document_manifest = manifest_dict["documents"]
# 3. Export files from each document
for index, document_dict in tqdm.tqdm(
enumerate(document_manifest),
total=len(document_manifest),
disable=self.no_progress_bar,
):
document = document_map[document_dict["pk"]]
# 3.1. generate a unique filename
base_name = self.generate_base_name(document)
# 3.2. write filenames into manifest
original_target, thumbnail_target, archive_target = (
self.generate_document_targets(document, base_name, document_dict)
)
# 3.3. write files to target folder
if not self.data_only:
self.copy_document_files(
document,
original_target,
thumbnail_target,
archive_target,
)
if self.split_manifest:
manifest_name = base_name.with_name(f"{base_name.stem}-manifest.json")
if self.use_folder_prefix:
manifest_name = Path("json") / manifest_name
manifest_name = (self.target / manifest_name).resolve()
manifest_name.parent.mkdir(parents=True, exist_ok=True)
content = [document_manifest[index]]
content += list(
filter(
lambda d: d["fields"]["document"] == document_dict["pk"],
manifest_dict["notes"],
),
)
content += list(
filter(
lambda d: d["fields"]["document"] == document_dict["pk"],
manifest_dict["custom_field_instances"],
),
)
self.check_and_write_json(
content,
manifest_name,
)
# These were exported already
if self.split_manifest:
del manifest_dict["documents"]
del manifest_dict["notes"]
del manifest_dict["custom_field_instances"]
# 4.1 write primary manifest to target folder
manifest = []
for key, item in manifest_dict.items():
manifest.extend(item)
manifest_path = (self.target / "manifest.json").resolve()
self.check_and_write_json(
manifest,
manifest_path,
)
# 4.2 write version information to target folder
extra_metadata_path = (self.target / "metadata.json").resolve()
@@ -597,42 +533,6 @@ class Command(CryptMixin, PaperlessCommand):
archive_target,
)
def _encrypt_record_inline(self, record: dict) -> None:
"""Encrypt sensitive fields in a single record, if passphrase is set."""
if not self.passphrase:
return
fields = self.CRYPT_FIELDS_BY_MODEL.get(record.get("model", ""))
if fields:
for field in fields:
if record["fields"].get(field):
record["fields"][field] = self.encrypt_string(
value=record["fields"][field],
)
def _write_split_manifest(
self,
document_dict: dict,
document: Document,
base_name: Path,
) -> None:
"""Write per-document manifest file for --split-manifest mode."""
content = [document_dict]
content.extend(
serializers.serialize("python", Note.objects.filter(document=document)),
)
content.extend(
serializers.serialize(
"python",
CustomFieldInstance.objects.filter(document=document),
),
)
manifest_name = base_name.with_name(f"{base_name.stem}-manifest.json")
if self.use_folder_prefix:
manifest_name = Path("json") / manifest_name
manifest_name = (self.target / manifest_name).resolve()
manifest_name.parent.mkdir(parents=True, exist_ok=True)
self.check_and_write_json(content, manifest_name)
def check_and_write_json(
self,
content: list[dict] | dict,
@@ -650,14 +550,14 @@ class Command(CryptMixin, PaperlessCommand):
if target in self.files_in_export_dir:
self.files_in_export_dir.remove(target)
if self.compare_json:
target_checksum = hashlib.blake2b(target.read_bytes()).hexdigest()
target_checksum = compute_checksum(target)
src_str = json.dumps(
content,
cls=DjangoJSONEncoder,
indent=2,
ensure_ascii=False,
)
src_checksum = hashlib.blake2b(src_str.encode("utf-8")).hexdigest()
src_checksum = hashlib.sha256(src_str.encode("utf-8")).hexdigest()
if src_checksum == target_checksum:
perform_write = False
@@ -693,7 +593,7 @@ class Command(CryptMixin, PaperlessCommand):
source_stat = source.stat()
target_stat = target.stat()
if self.compare_checksums and source_checksum:
target_checksum = hashlib.md5(target.read_bytes()).hexdigest()
target_checksum = compute_checksum(target)
perform_copy = target_checksum != source_checksum
elif (
source_stat.st_mtime != target_stat.st_mtime
@@ -707,3 +607,28 @@ class Command(CryptMixin, PaperlessCommand):
if perform_copy:
target.parent.mkdir(parents=True, exist_ok=True)
copy_file_with_basic_stats(source, target)
def encrypt_secret_fields(self, manifest: dict) -> None:
"""
Encrypts certain fields in the export. Currently limited to the mail account password
"""
if self.passphrase:
self.setup_crypto(passphrase=self.passphrase)
for crypt_config in self.CRYPT_FIELDS:
exporter_key = crypt_config["exporter_key"]
crypt_fields = crypt_config["fields"]
for manifest_record in manifest[exporter_key]:
for field in crypt_fields:
if manifest_record["fields"][field]:
manifest_record["fields"][field] = self.encrypt_string(
value=manifest_record["fields"][field],
)
elif MailAccount.objects.count() > 0 or SocialToken.objects.count() > 0:
self.stdout.write(
self.style.NOTICE(
"No passphrase was given, sensitive fields will be in plaintext",
),
)

View File

@@ -40,7 +40,6 @@ def _process_and_match(work: _WorkPackage) -> _WorkResult:
class Command(PaperlessCommand):
help = "Searches for documents where the content almost matches"
supports_progress_bar = True
supports_multiprocessing = True
def add_arguments(self, parser):

View File

@@ -8,12 +8,14 @@ from pathlib import Path
from zipfile import ZipFile
from zipfile import is_zipfile
import tqdm
from django.conf import settings
from django.contrib.auth.models import Permission
from django.contrib.auth.models import User
from django.contrib.contenttypes.models import ContentType
from django.core.exceptions import FieldDoesNotExist
from django.core.management import call_command
from django.core.management.base import BaseCommand
from django.core.management.base import CommandError
from django.core.serializers.base import DeserializationError
from django.db import IntegrityError
@@ -23,7 +25,6 @@ from django.db.models.signals import post_save
from filelock import FileLock
from documents.file_handling import create_source_path_directory
from documents.management.commands.base import PaperlessCommand
from documents.management.commands.mixins import CryptMixin
from documents.models import Correspondent
from documents.models import CustomField
@@ -56,19 +57,22 @@ def disable_signal(sig, receiver, sender, *, weak: bool | None = None) -> Genera
sig.connect(receiver=receiver, sender=sender, **kwargs)
class Command(CryptMixin, PaperlessCommand):
class Command(CryptMixin, BaseCommand):
help = (
"Using a manifest.json file, load the data from there, and import the "
"documents it refers to."
)
supports_progress_bar = True
supports_multiprocessing = False
def add_arguments(self, parser) -> None:
super().add_arguments(parser)
parser.add_argument("source")
parser.add_argument(
"--no-progress-bar",
default=False,
action="store_true",
help="If set, the progress bar will not be shown",
)
parser.add_argument(
"--data-only",
default=False,
@@ -227,6 +231,7 @@ class Command(CryptMixin, PaperlessCommand):
self.source = Path(options["source"]).resolve()
self.data_only: bool = options["data_only"]
self.no_progress_bar: bool = options["no_progress_bar"]
self.passphrase: str | None = options.get("passphrase")
self.version: str | None = None
self.salt: str | None = None
@@ -360,7 +365,7 @@ class Command(CryptMixin, PaperlessCommand):
filter(lambda r: r["model"] == "documents.document", self.manifest),
)
for record in self.track(manifest_documents, description="Copying files..."):
for record in tqdm.tqdm(manifest_documents, disable=self.no_progress_bar):
document = Document.objects.get(pk=record["pk"])
doc_file = record[EXPORTER_FILE_NAME]

View File

@@ -8,9 +8,6 @@ from documents.tasks import index_reindex
class Command(PaperlessCommand):
help = "Manages the document index."
supports_progress_bar = True
supports_multiprocessing = False
def add_arguments(self, parser):
super().add_arguments(parser)
parser.add_argument("command", choices=["reindex", "optimize"])

View File

@@ -7,9 +7,6 @@ from documents.tasks import llmindex_index
class Command(PaperlessCommand):
help = "Manages the LLM-based vector index for Paperless."
supports_progress_bar = True
supports_multiprocessing = False
def add_arguments(self, parser: Any) -> None:
super().add_arguments(parser)
parser.add_argument("command", choices=["rebuild", "update"])

View File

@@ -7,9 +7,6 @@ from documents.models import Document
class Command(PaperlessCommand):
help = "Rename all documents"
supports_progress_bar = True
supports_multiprocessing = False
def handle(self, *args, **options):
for document in self.track(Document.objects.all(), description="Renaming..."):
post_save.send(Document, instance=document, created=False)

View File

@@ -180,9 +180,6 @@ class Command(PaperlessCommand):
"modified) after their initial import."
)
supports_progress_bar = True
supports_multiprocessing = False
def add_arguments(self, parser) -> None:
super().add_arguments(parser)
parser.add_argument("-c", "--correspondent", default=False, action="store_true")

View File

@@ -24,9 +24,6 @@ _LEVEL_STYLE: dict[int, tuple[str, str]] = {
class Command(PaperlessCommand):
help = "This command checks your document archive for issues."
supports_progress_bar = True
supports_multiprocessing = False
def _render_results(self, messages: SanityCheckMessages) -> None:
"""Render sanity check results as a Rich table."""

View File

@@ -36,7 +36,6 @@ def _process_document(doc_id: int) -> None:
class Command(PaperlessCommand):
help = "This will regenerate the thumbnails for all documents."
supports_progress_bar = True
supports_multiprocessing = True
def add_arguments(self, parser) -> None:

View File

@@ -1,5 +1,6 @@
import base64
import os
from argparse import ArgumentParser
from typing import TypedDict
from cryptography.fernet import Fernet
@@ -20,6 +21,25 @@ class CryptFields(TypedDict):
fields: list[str]
class ProgressBarMixin:
"""
Many commands use a progress bar, which can be disabled
via this class
"""
def add_argument_progress_bar_mixin(self, parser: ArgumentParser) -> None:
parser.add_argument(
"--no-progress-bar",
default=False,
action="store_true",
help="If set, the progress bar will not be shown",
)
def handle_progress_bar_mixin(self, *args, **options) -> None:
self.no_progress_bar = options["no_progress_bar"]
self.use_progress_bar = not self.no_progress_bar
class CryptMixin:
"""
Fully based on:
@@ -51,7 +71,7 @@ class CryptMixin:
key_size = 32
kdf_algorithm = "pbkdf2_sha256"
CRYPT_FIELDS: list[CryptFields] = [
CRYPT_FIELDS: CryptFields = [
{
"exporter_key": "mail_accounts",
"model_name": "paperless_mail.mailaccount",
@@ -69,10 +89,6 @@ class CryptMixin:
],
},
]
# O(1) lookup for per-record encryption; derived from CRYPT_FIELDS at class definition time
CRYPT_FIELDS_BY_MODEL: dict[str, list[str]] = {
cfg["model_name"]: cfg["fields"] for cfg in CRYPT_FIELDS
}
def get_crypt_params(self) -> dict[str, dict[str, str | int]]:
return {

View File

@@ -9,9 +9,6 @@ class Command(PaperlessCommand):
help = "Prunes the audit logs of objects that no longer exist."
supports_progress_bar = True
supports_multiprocessing = False
def handle(self, *args, **options):
with transaction.atomic():
for log_entry in self.track(

View File

@@ -0,0 +1,130 @@
import hashlib
import logging
from pathlib import Path
from django.conf import settings
from django.db import migrations
from django.db import models
logger = logging.getLogger("paperless.migrations")
_CHUNK_SIZE = 65536 # 64 KiB — avoids loading entire files into memory
_BATCH_SIZE = 500 # documents per bulk_update call
_PROGRESS_INTERVAL = 500 # log a progress line every N documents
def _sha256(path: Path) -> str:
h = hashlib.sha256()
with path.open("rb") as fh:
while chunk := fh.read(_CHUNK_SIZE):
h.update(chunk)
return h.hexdigest()
def recompute_checksums(apps, schema_editor):
"""Recompute all document checksums from MD5 to SHA256."""
Document = apps.get_model("documents", "Document")
total = Document.objects.count()
if total == 0:
return
logger.info("Recomputing SHA-256 checksums for %d document(s)...", total)
batch: list = []
processed = 0
for doc in Document.objects.only(
"pk",
"filename",
"checksum",
"archive_filename",
"archive_checksum",
).iterator(chunk_size=_BATCH_SIZE):
updated_fields: list[str] = []
# Reconstruct source path the same way Document.source_path does
fname = str(doc.filename) if doc.filename else f"{doc.pk:07}.pdf"
source_path = (settings.ORIGINALS_DIR / Path(fname)).resolve()
if source_path.exists():
doc.checksum = _sha256(source_path)
updated_fields.append("checksum")
else:
logger.warning(
"Document %s: original file %s not found, checksum not updated.",
doc.pk,
source_path,
)
# Mirror Document.has_archive_version: archive_filename is not None
if doc.archive_filename is not None:
archive_path = (
settings.ARCHIVE_DIR / Path(str(doc.archive_filename))
).resolve()
if archive_path.exists():
doc.archive_checksum = _sha256(archive_path)
updated_fields.append("archive_checksum")
else:
logger.warning(
"Document %s: archive file %s not found, checksum not updated.",
doc.pk,
archive_path,
)
if updated_fields:
batch.append(doc)
processed += 1
if len(batch) >= _BATCH_SIZE:
Document.objects.bulk_update(batch, ["checksum", "archive_checksum"])
batch.clear()
if processed % _PROGRESS_INTERVAL == 0:
logger.info(
"SHA-256 checksum progress: %d/%d (%d%%)",
processed,
total,
processed * 100 // total,
)
if batch:
Document.objects.bulk_update(batch, ["checksum", "archive_checksum"])
logger.info(
"SHA-256 checksum recomputation complete: %d document(s) processed.",
total,
)
class Migration(migrations.Migration):
dependencies = [
("documents", "0016_document_version_index_and_more"),
]
operations = [
migrations.AlterField(
model_name="document",
name="checksum",
field=models.CharField(
editable=False,
help_text="The checksum of the original document.",
max_length=64,
verbose_name="checksum",
),
),
migrations.AlterField(
model_name="document",
name="archive_checksum",
field=models.CharField(
blank=True,
editable=False,
help_text="The checksum of the archived document.",
max_length=64,
null=True,
verbose_name="archive checksum",
),
),
migrations.RunPython(recompute_checksums, migrations.RunPython.noop),
]

View File

@@ -216,14 +216,14 @@ class Document(SoftDeleteModel, ModelWithOwner): # type: ignore[django-manager-
checksum = models.CharField(
_("checksum"),
max_length=32,
max_length=64,
editable=False,
help_text=_("The checksum of the original document."),
)
archive_checksum = models.CharField(
_("archive checksum"),
max_length=32,
max_length=64,
editable=False,
blank=True,
null=True,

View File

@@ -11,7 +11,6 @@ is an identity function that adds no overhead.
from __future__ import annotations
import hashlib
import logging
import uuid
from collections import defaultdict
@@ -30,6 +29,7 @@ from django.utils import timezone
from documents.models import Document
from documents.models import PaperlessTask
from documents.utils import compute_checksum
from paperless.config import GeneralConfig
logger = logging.getLogger("paperless.sanity_checker")
@@ -218,7 +218,7 @@ def _check_original(
present_files.discard(source_path)
try:
checksum = hashlib.md5(source_path.read_bytes()).hexdigest()
checksum = compute_checksum(source_path)
except OSError as e:
messages.error(doc.pk, f"Cannot read original file of document: {e}")
else:
@@ -255,7 +255,7 @@ def _check_archive(
present_files.discard(archive_path)
try:
checksum = hashlib.md5(archive_path.read_bytes()).hexdigest()
checksum = compute_checksum(archive_path)
except OSError as e:
messages.error(
doc.pk,

View File

@@ -1440,124 +1440,6 @@ class SavedViewSerializer(OwnedObjectSerializer):
"set_permissions",
]
def _get_api_version(self) -> int:
request = self.context.get("request")
return int(
request.version if request else settings.REST_FRAMEWORK["DEFAULT_VERSION"],
)
def _update_legacy_visibility_preferences(
self,
saved_view_id: int,
*,
show_on_dashboard: bool | None,
show_in_sidebar: bool | None,
) -> UiSettings | None:
if show_on_dashboard is None and show_in_sidebar is None:
return None
request = self.context.get("request")
user = request.user if request else self.user
if user is None:
return None
ui_settings, _ = UiSettings.objects.get_or_create(
user=user,
defaults={"settings": {}},
)
current_settings = (
ui_settings.settings if isinstance(ui_settings.settings, dict) else {}
)
current_settings = dict(current_settings)
saved_views_settings = current_settings.get("saved_views")
if isinstance(saved_views_settings, dict):
saved_views_settings = dict(saved_views_settings)
else:
saved_views_settings = {}
dashboard_ids = {
int(raw_id)
for raw_id in saved_views_settings.get("dashboard_views_visible_ids", [])
if str(raw_id).isdigit()
}
sidebar_ids = {
int(raw_id)
for raw_id in saved_views_settings.get("sidebar_views_visible_ids", [])
if str(raw_id).isdigit()
}
if show_on_dashboard is not None:
if show_on_dashboard:
dashboard_ids.add(saved_view_id)
else:
dashboard_ids.discard(saved_view_id)
if show_in_sidebar is not None:
if show_in_sidebar:
sidebar_ids.add(saved_view_id)
else:
sidebar_ids.discard(saved_view_id)
saved_views_settings["dashboard_views_visible_ids"] = sorted(dashboard_ids)
saved_views_settings["sidebar_views_visible_ids"] = sorted(sidebar_ids)
current_settings["saved_views"] = saved_views_settings
ui_settings.settings = current_settings
ui_settings.save(update_fields=["settings"])
return ui_settings
def to_representation(self, instance):
# TODO: remove this and related backwards compatibility code when API v9 is dropped
ret = super().to_representation(instance)
request = self.context.get("request")
api_version = self._get_api_version()
if api_version < 10:
dashboard_ids = set()
sidebar_ids = set()
user = request.user if request else None
if user is not None and hasattr(user, "ui_settings"):
ui_settings = user.ui_settings.settings or None
saved_views = None
if isinstance(ui_settings, dict):
saved_views = ui_settings.get("saved_views", {})
if isinstance(saved_views, dict):
dashboard_ids = set(
saved_views.get("dashboard_views_visible_ids", []),
)
sidebar_ids = set(
saved_views.get("sidebar_views_visible_ids", []),
)
ret["show_on_dashboard"] = instance.id in dashboard_ids
ret["show_in_sidebar"] = instance.id in sidebar_ids
return ret
def to_internal_value(self, data):
# TODO: remove this and related backwards compatibility code when API v9 is dropped
api_version = self._get_api_version()
if api_version >= 10:
return super().to_internal_value(data)
normalized_data = data.copy()
legacy_visibility_fields = {}
boolean_field = serializers.BooleanField()
for field_name in ("show_on_dashboard", "show_in_sidebar"):
if field_name in normalized_data:
try:
legacy_visibility_fields[field_name] = (
boolean_field.to_internal_value(
normalized_data.get(field_name),
)
)
except serializers.ValidationError as exc:
raise serializers.ValidationError({field_name: exc.detail})
del normalized_data[field_name]
ret = super().to_internal_value(normalized_data)
ret.update(legacy_visibility_fields)
return ret
def validate(self, attrs):
attrs = super().validate(attrs)
if "display_fields" in attrs and attrs["display_fields"] is not None:
@@ -1577,9 +1459,6 @@ class SavedViewSerializer(OwnedObjectSerializer):
return attrs
def update(self, instance, validated_data):
request = self.context.get("request")
show_on_dashboard = validated_data.pop("show_on_dashboard", None)
show_in_sidebar = validated_data.pop("show_in_sidebar", None)
if "filter_rules" in validated_data:
rules_data = validated_data.pop("filter_rules")
else:
@@ -1601,19 +1480,9 @@ class SavedViewSerializer(OwnedObjectSerializer):
SavedViewFilterRule.objects.filter(saved_view=instance).delete()
for rule_data in rules_data:
SavedViewFilterRule.objects.create(saved_view=instance, **rule_data)
ui_settings = self._update_legacy_visibility_preferences(
instance.id,
show_on_dashboard=show_on_dashboard,
show_in_sidebar=show_in_sidebar,
)
if request is not None and ui_settings is not None:
request.user.ui_settings = ui_settings
return instance
def create(self, validated_data):
request = self.context.get("request")
show_on_dashboard = validated_data.pop("show_on_dashboard", None)
show_in_sidebar = validated_data.pop("show_in_sidebar", None)
rules_data = validated_data.pop("filter_rules")
if "user" in validated_data:
# backwards compatibility
@@ -1621,13 +1490,6 @@ class SavedViewSerializer(OwnedObjectSerializer):
saved_view = super().create(validated_data)
for rule_data in rules_data:
SavedViewFilterRule.objects.create(saved_view=saved_view, **rule_data)
ui_settings = self._update_legacy_visibility_preferences(
saved_view.id,
show_on_dashboard=show_on_dashboard,
show_in_sidebar=show_in_sidebar,
)
if request is not None and ui_settings is not None:
request.user.ui_settings = ui_settings
return saved_view
@@ -1655,124 +1517,11 @@ class DocumentListSerializer(serializers.Serializer):
return documents
class SourceModeValidationMixin:
def validate_source_mode(self, source_mode: str) -> str:
if source_mode not in bulk_edit.SourceModeChoices.__dict__.values():
raise serializers.ValidationError("Invalid source_mode")
return source_mode
class RotateDocumentsSerializer(DocumentListSerializer, SourceModeValidationMixin):
degrees = serializers.IntegerField(required=True)
source_mode = serializers.CharField(
required=False,
default=bulk_edit.SourceModeChoices.LATEST_VERSION,
)
class MergeDocumentsSerializer(DocumentListSerializer, SourceModeValidationMixin):
metadata_document_id = serializers.IntegerField(
required=False,
allow_null=True,
)
delete_originals = serializers.BooleanField(required=False, default=False)
archive_fallback = serializers.BooleanField(required=False, default=False)
source_mode = serializers.CharField(
required=False,
default=bulk_edit.SourceModeChoices.LATEST_VERSION,
)
class EditPdfDocumentsSerializer(DocumentListSerializer, SourceModeValidationMixin):
operations = serializers.ListField(required=True)
delete_original = serializers.BooleanField(required=False, default=False)
update_document = serializers.BooleanField(required=False, default=False)
include_metadata = serializers.BooleanField(required=False, default=True)
source_mode = serializers.CharField(
required=False,
default=bulk_edit.SourceModeChoices.LATEST_VERSION,
)
def validate(self, attrs):
documents = attrs["documents"]
if len(documents) > 1:
raise serializers.ValidationError(
"Edit PDF method only supports one document",
)
operations = attrs["operations"]
if not isinstance(operations, list):
raise serializers.ValidationError("operations must be a list")
for op in operations:
if not isinstance(op, dict):
raise serializers.ValidationError("invalid operation entry")
if "page" not in op or not isinstance(op["page"], int):
raise serializers.ValidationError("page must be an integer")
if "rotate" in op and not isinstance(op["rotate"], int):
raise serializers.ValidationError("rotate must be an integer")
if "doc" in op and not isinstance(op["doc"], int):
raise serializers.ValidationError("doc must be an integer")
if attrs["update_document"]:
max_idx = max(op.get("doc", 0) for op in operations)
if max_idx > 0:
raise serializers.ValidationError(
"update_document only allowed with a single output document",
)
doc = Document.objects.get(id=documents[0])
if doc.page_count:
for op in operations:
if op["page"] < 1 or op["page"] > doc.page_count:
raise serializers.ValidationError(
f"Page {op['page']} is out of bounds for document with {doc.page_count} pages.",
)
return attrs
class RemovePasswordDocumentsSerializer(
DocumentListSerializer,
SourceModeValidationMixin,
):
password = serializers.CharField(required=True)
update_document = serializers.BooleanField(required=False, default=False)
delete_original = serializers.BooleanField(required=False, default=False)
include_metadata = serializers.BooleanField(required=False, default=True)
source_mode = serializers.CharField(
required=False,
default=bulk_edit.SourceModeChoices.LATEST_VERSION,
)
class DeleteDocumentsSerializer(DocumentListSerializer):
pass
class ReprocessDocumentsSerializer(DocumentListSerializer):
pass
class BulkEditSerializer(
SerializerWithPerms,
DocumentListSerializer,
SetPermissionsMixin,
SourceModeValidationMixin,
):
# TODO: remove this and related backwards compatibility code when API v9 is dropped
# split, delete_pages can be removed entirely
MOVED_DOCUMENT_ACTION_ENDPOINTS = {
"delete": "/api/documents/delete/",
"reprocess": "/api/documents/reprocess/",
"rotate": "/api/documents/rotate/",
"merge": "/api/documents/merge/",
"edit_pdf": "/api/documents/edit_pdf/",
"remove_password": "/api/documents/remove_password/",
"split": "/api/documents/edit_pdf/",
"delete_pages": "/api/documents/edit_pdf/",
}
LEGACY_DOCUMENT_ACTION_METHODS = tuple(MOVED_DOCUMENT_ACTION_ENDPOINTS.keys())
method = serializers.ChoiceField(
choices=[
"set_correspondent",
@@ -1782,8 +1531,15 @@ class BulkEditSerializer(
"remove_tag",
"modify_tags",
"modify_custom_fields",
"delete",
"reprocess",
"set_permissions",
*LEGACY_DOCUMENT_ACTION_METHODS,
"rotate",
"merge",
"split",
"delete_pages",
"edit_pdf",
"remove_password",
],
label="Method",
write_only=True,
@@ -1861,7 +1617,8 @@ class BulkEditSerializer(
return bulk_edit.edit_pdf
elif method == "remove_password":
return bulk_edit.remove_password
else:
else: # pragma: no cover
# This will never happen as it is handled by the ChoiceField
raise serializers.ValidationError("Unsupported method.")
def _validate_parameters_tags(self, parameters) -> None:
@@ -1966,13 +1723,6 @@ class BulkEditSerializer(
except ValueError:
raise serializers.ValidationError("invalid rotation degrees")
def _validate_source_mode(self, parameters) -> None:
source_mode = parameters.get(
"source_mode",
bulk_edit.SourceModeChoices.LATEST_VERSION,
)
parameters["source_mode"] = self.validate_source_mode(source_mode)
def _validate_parameters_split(self, parameters) -> None:
if "pages" not in parameters:
raise serializers.ValidationError("pages not specified")
@@ -2073,9 +1823,6 @@ class BulkEditSerializer(
method = attrs["method"]
parameters = attrs["parameters"]
if "source_mode" in parameters:
self._validate_source_mode(parameters)
if method == bulk_edit.set_correspondent:
self._validate_parameters_correspondent(parameters)
elif method == bulk_edit.set_document_type:

View File

@@ -1,5 +1,4 @@
import datetime
import hashlib
import logging
import shutil
import uuid
@@ -63,6 +62,7 @@ from documents.signals import document_updated
from documents.signals.handlers import cleanup_document_deletion
from documents.signals.handlers import run_workflows
from documents.signals.handlers import send_websocket_document_updated
from documents.utils import compute_checksum
from documents.workflows.utils import get_workflows_for_trigger
from paperless.config import AIConfig
from paperless_ai.indexing import llm_index_add_or_update_document
@@ -323,8 +323,7 @@ def update_document_content_maybe_archive_file(document_id) -> None:
with transaction.atomic():
oldDocument = Document.objects.get(pk=document.pk)
if parser.get_archive_path():
with Path(parser.get_archive_path()).open("rb") as f:
checksum = hashlib.md5(f.read()).hexdigest()
checksum = compute_checksum(Path(parser.get_archive_path()))
# I'm going to save first so that in case the file move
# fails, the database is rolled back.
# We also don't use save() since that triggers the filehandling

View File

@@ -82,8 +82,8 @@ def sample_doc(
return DocumentFactory(
title="test",
checksum="42995833e01aea9b3edee44bbfdd7ce1",
archive_checksum="62acb0bcbfbcaa62ca6ad3668e4e404b",
checksum="1093cf6e32adbd16b06969df09215d42c4a3a8938cc18b39455953f08d1ff2ab",
archive_checksum="706124ecde3c31616992fa979caed17a726b1c9ccdba70e82a4ff796cea97ccf",
content="test content",
pk=1,
filename="0000001.pdf",

View File

@@ -60,7 +60,7 @@ class DocumentFactory(DjangoModelFactory):
model = Document
title = factory.Faker("sentence", nb_words=4)
checksum = factory.Faker("md5")
checksum = factory.Faker("sha256")
content = factory.Faker("paragraph")
correspondent = None
document_type = None

View File

@@ -422,34 +422,6 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
self.assertEqual(args[0], [self.doc1.id])
self.assertEqual(len(kwargs), 0)
@mock.patch("documents.views.bulk_edit.delete")
def test_delete_documents_endpoint(self, m) -> None:
self.setup_mock(m, "delete")
response = self.client.post(
"/api/documents/delete/",
json.dumps({"documents": [self.doc1.id]}),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_200_OK)
m.assert_called_once()
args, kwargs = m.call_args
self.assertEqual(args[0], [self.doc1.id])
self.assertEqual(len(kwargs), 0)
@mock.patch("documents.views.bulk_edit.reprocess")
def test_reprocess_documents_endpoint(self, m) -> None:
self.setup_mock(m, "reprocess")
response = self.client.post(
"/api/documents/reprocess/",
json.dumps({"documents": [self.doc1.id]}),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_200_OK)
m.assert_called_once()
args, kwargs = m.call_args
self.assertEqual(args[0], [self.doc1.id])
self.assertEqual(len(kwargs), 0)
@mock.patch("documents.serialisers.bulk_edit.set_storage_path")
def test_api_set_storage_path(self, m) -> None:
"""
@@ -905,7 +877,7 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
self.assertEqual(kwargs["merge"], True)
@mock.patch("documents.serialisers.bulk_edit.set_storage_path")
@mock.patch("documents.views.bulk_edit.merge")
@mock.patch("documents.serialisers.bulk_edit.merge")
def test_insufficient_global_perms(self, mock_merge, mock_set_storage) -> None:
"""
GIVEN:
@@ -940,11 +912,12 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
mock_set_storage.assert_not_called()
response = self.client.post(
"/api/documents/merge/",
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc1.id],
"metadata_document_id": self.doc1.id,
"method": "merge",
"parameters": {"metadata_document_id": self.doc1.id},
},
),
content_type="application/json",
@@ -954,12 +927,15 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
mock_merge.assert_not_called()
response = self.client.post(
"/api/documents/merge/",
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc1.id],
"metadata_document_id": self.doc1.id,
"delete_originals": True,
"method": "merge",
"parameters": {
"metadata_document_id": self.doc1.id,
"delete_originals": True,
},
},
),
content_type="application/json",
@@ -1076,117 +1052,84 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
m.assert_called_once()
@mock.patch("documents.views.bulk_edit.rotate")
@mock.patch("documents.serialisers.bulk_edit.rotate")
def test_rotate(self, m) -> None:
self.setup_mock(m, "rotate")
response = self.client.post(
"/api/documents/rotate/",
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc2.id, self.doc3.id],
"degrees": 90,
"method": "rotate",
"parameters": {"degrees": 90},
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_200_OK)
m.assert_called_once()
args, kwargs = m.call_args
self.assertCountEqual(args[0], [self.doc2.id, self.doc3.id])
self.assertEqual(kwargs["degrees"], 90)
self.assertEqual(kwargs["source_mode"], "latest_version")
self.assertEqual(kwargs["user"], self.user)
@mock.patch("documents.views.bulk_edit.rotate")
@mock.patch("documents.serialisers.bulk_edit.rotate")
def test_rotate_invalid_params(self, m) -> None:
response = self.client.post(
"/api/documents/rotate/",
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc2.id, self.doc3.id],
"degrees": "foo",
"method": "rotate",
"parameters": {"degrees": "foo"},
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
response = self.client.post(
"/api/documents/rotate/",
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc2.id, self.doc3.id],
"degrees": 90.5,
"method": "rotate",
"parameters": {"degrees": 90.5},
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
m.assert_not_called()
@mock.patch("documents.views.bulk_edit.rotate")
def test_rotate_insufficient_permissions(self, m) -> None:
self.doc1.owner = User.objects.get(username="temp_admin")
self.doc1.save()
user1 = User.objects.create(username="user1")
user1.user_permissions.add(*Permission.objects.all())
user1.save()
self.client.force_authenticate(user=user1)
self.setup_mock(m, "rotate")
response = self.client.post(
"/api/documents/rotate/",
json.dumps(
{
"documents": [self.doc1.id, self.doc2.id],
"degrees": 90,
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
m.assert_not_called()
self.assertEqual(response.content, b"Insufficient permissions")
response = self.client.post(
"/api/documents/rotate/",
json.dumps(
{
"documents": [self.doc2.id, self.doc3.id],
"degrees": 90,
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_200_OK)
m.assert_called_once()
@mock.patch("documents.views.bulk_edit.merge")
@mock.patch("documents.serialisers.bulk_edit.merge")
def test_merge(self, m) -> None:
self.setup_mock(m, "merge")
response = self.client.post(
"/api/documents/merge/",
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc2.id, self.doc3.id],
"metadata_document_id": self.doc3.id,
"method": "merge",
"parameters": {"metadata_document_id": self.doc3.id},
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_200_OK)
m.assert_called_once()
args, kwargs = m.call_args
self.assertCountEqual(args[0], [self.doc2.id, self.doc3.id])
self.assertEqual(kwargs["metadata_document_id"], self.doc3.id)
self.assertEqual(kwargs["source_mode"], "latest_version")
self.assertEqual(kwargs["user"], self.user)
@mock.patch("documents.views.bulk_edit.merge")
@mock.patch("documents.serialisers.bulk_edit.merge")
def test_merge_and_delete_insufficient_permissions(self, m) -> None:
self.doc1.owner = User.objects.get(username="temp_admin")
self.doc1.save()
@@ -1197,12 +1140,15 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
self.setup_mock(m, "merge")
response = self.client.post(
"/api/documents/merge/",
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc1.id, self.doc2.id],
"metadata_document_id": self.doc2.id,
"delete_originals": True,
"method": "merge",
"parameters": {
"metadata_document_id": self.doc2.id,
"delete_originals": True,
},
},
),
content_type="application/json",
@@ -1213,12 +1159,15 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
self.assertEqual(response.content, b"Insufficient permissions")
response = self.client.post(
"/api/documents/merge/",
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc2.id, self.doc3.id],
"metadata_document_id": self.doc2.id,
"delete_originals": True,
"method": "merge",
"parameters": {
"metadata_document_id": self.doc2.id,
"delete_originals": True,
},
},
),
content_type="application/json",
@@ -1227,15 +1176,27 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
self.assertEqual(response.status_code, status.HTTP_200_OK)
m.assert_called_once()
@mock.patch("documents.views.bulk_edit.merge")
@mock.patch("documents.serialisers.bulk_edit.merge")
def test_merge_invalid_parameters(self, m) -> None:
"""
GIVEN:
- API data for merging documents is called
- The parameters are invalid
WHEN:
- API is called
THEN:
- The API fails with a correct error code
"""
self.setup_mock(m, "merge")
response = self.client.post(
"/api/documents/merge/",
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc1.id, self.doc2.id],
"delete_originals": "not_boolean",
"method": "merge",
"parameters": {
"delete_originals": "not_boolean",
},
},
),
content_type="application/json",
@@ -1244,81 +1205,219 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
m.assert_not_called()
def test_bulk_edit_allows_legacy_file_methods_with_warning(self) -> None:
method_payloads = {
"delete": {},
"reprocess": {},
"rotate": {"degrees": 90},
"merge": {"metadata_document_id": self.doc2.id},
"edit_pdf": {"operations": [{"page": 1}]},
"remove_password": {"password": "secret"},
"split": {"pages": "1,2-4"},
"delete_pages": {"pages": [1, 2]},
}
for version in (9, 10):
for method, parameters in method_payloads.items():
with self.subTest(method=method, version=version):
with mock.patch(
f"documents.views.bulk_edit.{method}",
) as mocked_method:
self.setup_mock(mocked_method, method)
with self.assertLogs("paperless.api", level="WARNING") as logs:
response = self.client.post(
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc2.id],
"method": method,
"parameters": parameters,
},
),
content_type="application/json",
headers={
"Accept": f"application/json; version={version}",
},
)
self.assertEqual(response.status_code, status.HTTP_200_OK)
mocked_method.assert_called_once()
self.assertTrue(
any(
"Deprecated bulk_edit method" in entry
and f"'{method}'" in entry
for entry in logs.output
),
)
@mock.patch("documents.views.bulk_edit.edit_pdf")
def test_edit_pdf(self, m) -> None:
self.setup_mock(m, "edit_pdf")
@mock.patch("documents.serialisers.bulk_edit.split")
def test_split(self, m) -> None:
self.setup_mock(m, "split")
response = self.client.post(
"/api/documents/edit_pdf/",
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc2.id],
"operations": [{"page": 1}],
"source_mode": "explicit_selection",
"method": "split",
"parameters": {"pages": "1,2-4,5-6,7"},
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_200_OK)
m.assert_called_once()
args, kwargs = m.call_args
self.assertCountEqual(args[0], [self.doc2.id])
self.assertEqual(kwargs["pages"], [[1], [2, 3, 4], [5, 6], [7]])
self.assertEqual(kwargs["user"], self.user)
def test_split_invalid_params(self) -> None:
response = self.client.post(
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc2.id],
"method": "split",
"parameters": {}, # pages not specified
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertIn(b"pages not specified", response.content)
response = self.client.post(
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc2.id],
"method": "split",
"parameters": {"pages": "1:7"}, # wrong format
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertIn(b"invalid pages specified", response.content)
response = self.client.post(
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [
self.doc1.id,
self.doc2.id,
], # only one document supported
"method": "split",
"parameters": {"pages": "1-2,3-7"}, # wrong format
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertIn(b"Split method only supports one document", response.content)
response = self.client.post(
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc2.id],
"method": "split",
"parameters": {
"pages": "1",
"delete_originals": "notabool",
}, # not a bool
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertIn(b"delete_originals must be a boolean", response.content)
@mock.patch("documents.serialisers.bulk_edit.delete_pages")
def test_delete_pages(self, m) -> None:
self.setup_mock(m, "delete_pages")
response = self.client.post(
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc2.id],
"method": "delete_pages",
"parameters": {"pages": [1, 2, 3, 4]},
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_200_OK)
m.assert_called_once()
args, kwargs = m.call_args
self.assertCountEqual(args[0], [self.doc2.id])
self.assertEqual(kwargs["pages"], [1, 2, 3, 4])
def test_delete_pages_invalid_params(self) -> None:
response = self.client.post(
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [
self.doc1.id,
self.doc2.id,
], # only one document supported
"method": "delete_pages",
"parameters": {
"pages": [1, 2, 3, 4],
},
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertIn(
b"Delete pages method only supports one document",
response.content,
)
response = self.client.post(
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc2.id],
"method": "delete_pages",
"parameters": {}, # pages not specified
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertIn(b"pages not specified", response.content)
response = self.client.post(
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc2.id],
"method": "delete_pages",
"parameters": {"pages": "1-3"}, # not a list
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertIn(b"pages must be a list", response.content)
response = self.client.post(
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc2.id],
"method": "delete_pages",
"parameters": {"pages": ["1-3"]}, # not ints
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertIn(b"pages must be a list of integers", response.content)
@mock.patch("documents.serialisers.bulk_edit.edit_pdf")
def test_edit_pdf(self, m) -> None:
self.setup_mock(m, "edit_pdf")
response = self.client.post(
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc2.id],
"method": "edit_pdf",
"parameters": {"operations": [{"page": 1}]},
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_200_OK)
m.assert_called_once()
args, kwargs = m.call_args
self.assertCountEqual(args[0], [self.doc2.id])
self.assertEqual(kwargs["operations"], [{"page": 1}])
self.assertEqual(kwargs["source_mode"], "explicit_selection")
self.assertEqual(kwargs["user"], self.user)
def test_edit_pdf_invalid_params(self) -> None:
# multiple documents
response = self.client.post(
"/api/documents/edit_pdf/",
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc2.id, self.doc3.id],
"operations": [{"page": 1}],
"method": "edit_pdf",
"parameters": {"operations": [{"page": 1}]},
},
),
content_type="application/json",
@@ -1326,25 +1425,44 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertIn(b"Edit PDF method only supports one document", response.content)
# no operations specified
response = self.client.post(
"/api/documents/edit_pdf/",
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc2.id],
"operations": "not_a_list",
"method": "edit_pdf",
"parameters": {},
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertIn(b"Expected a list of items", response.content)
self.assertIn(b"operations not specified", response.content)
# operations not a list
response = self.client.post(
"/api/documents/edit_pdf/",
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc2.id],
"operations": ["invalid_operation"],
"method": "edit_pdf",
"parameters": {"operations": "not_a_list"},
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertIn(b"operations must be a list", response.content)
# invalid operation
response = self.client.post(
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc2.id],
"method": "edit_pdf",
"parameters": {"operations": ["invalid_operation"]},
},
),
content_type="application/json",
@@ -1352,12 +1470,14 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertIn(b"invalid operation entry", response.content)
# page not an int
response = self.client.post(
"/api/documents/edit_pdf/",
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc2.id],
"operations": [{"page": "not_an_int"}],
"method": "edit_pdf",
"parameters": {"operations": [{"page": "not_an_int"}]},
},
),
content_type="application/json",
@@ -1365,12 +1485,14 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertIn(b"page must be an integer", response.content)
# rotate not an int
response = self.client.post(
"/api/documents/edit_pdf/",
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc2.id],
"operations": [{"page": 1, "rotate": "not_an_int"}],
"method": "edit_pdf",
"parameters": {"operations": [{"page": 1, "rotate": "not_an_int"}]},
},
),
content_type="application/json",
@@ -1378,12 +1500,14 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertIn(b"rotate must be an integer", response.content)
# doc not an int
response = self.client.post(
"/api/documents/edit_pdf/",
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc2.id],
"operations": [{"page": 1, "doc": "not_an_int"}],
"method": "edit_pdf",
"parameters": {"operations": [{"page": 1, "doc": "not_an_int"}]},
},
),
content_type="application/json",
@@ -1391,13 +1515,53 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertIn(b"doc must be an integer", response.content)
# update_document not a boolean
response = self.client.post(
"/api/documents/edit_pdf/",
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc2.id],
"update_document": True,
"operations": [{"page": 1, "doc": 1}, {"page": 2, "doc": 2}],
"method": "edit_pdf",
"parameters": {
"update_document": "not_a_bool",
"operations": [{"page": 1}],
},
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertIn(b"update_document must be a boolean", response.content)
# include_metadata not a boolean
response = self.client.post(
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc2.id],
"method": "edit_pdf",
"parameters": {
"include_metadata": "not_a_bool",
"operations": [{"page": 1}],
},
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertIn(b"include_metadata must be a boolean", response.content)
# update_document True but output would be multiple documents
response = self.client.post(
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc2.id],
"method": "edit_pdf",
"parameters": {
"update_document": True,
"operations": [{"page": 1, "doc": 1}, {"page": 2, "doc": 2}],
},
},
),
content_type="application/json",
@@ -1408,84 +1572,42 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
response.content,
)
response = self.client.post(
"/api/documents/edit_pdf/",
json.dumps(
{
"documents": [self.doc2.id],
"operations": [{"page": 1}],
"source_mode": "not_a_mode",
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertIn(b"Invalid source_mode", response.content)
@mock.patch("documents.views.bulk_edit.edit_pdf")
@mock.patch("documents.serialisers.bulk_edit.edit_pdf")
def test_edit_pdf_page_out_of_bounds(self, m) -> None:
"""
GIVEN:
- API data for editing PDF is called
- The page number is out of bounds
WHEN:
- API is called
THEN:
- The API fails with a correct error code
"""
self.setup_mock(m, "edit_pdf")
response = self.client.post(
"/api/documents/edit_pdf/",
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc2.id],
"operations": [{"page": 99}],
"method": "edit_pdf",
"parameters": {"operations": [{"page": 99}]},
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertIn(b"out of bounds", response.content)
m.assert_not_called()
@mock.patch("documents.views.bulk_edit.edit_pdf")
def test_edit_pdf_insufficient_permissions(self, m) -> None:
self.doc1.owner = User.objects.get(username="temp_admin")
self.doc1.save()
user1 = User.objects.create(username="user1")
user1.user_permissions.add(*Permission.objects.all())
user1.save()
self.client.force_authenticate(user=user1)
self.setup_mock(m, "edit_pdf")
response = self.client.post(
"/api/documents/edit_pdf/",
json.dumps(
{
"documents": [self.doc1.id],
"operations": [{"page": 1}],
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
m.assert_not_called()
self.assertEqual(response.content, b"Insufficient permissions")
response = self.client.post(
"/api/documents/edit_pdf/",
json.dumps(
{
"documents": [self.doc2.id],
"operations": [{"page": 1}],
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_200_OK)
m.assert_called_once()
@mock.patch("documents.views.bulk_edit.remove_password")
@mock.patch("documents.serialisers.bulk_edit.remove_password")
def test_remove_password(self, m) -> None:
self.setup_mock(m, "remove_password")
response = self.client.post(
"/api/documents/remove_password/",
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc2.id],
"password": "secret",
"update_document": True,
"method": "remove_password",
"parameters": {"password": "secret", "update_document": True},
},
),
content_type="application/json",
@@ -1497,69 +1619,36 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
self.assertCountEqual(args[0], [self.doc2.id])
self.assertEqual(kwargs["password"], "secret")
self.assertTrue(kwargs["update_document"])
self.assertEqual(kwargs["source_mode"], "latest_version")
self.assertEqual(kwargs["user"], self.user)
def test_remove_password_invalid_params(self) -> None:
response = self.client.post(
"/api/documents/remove_password/",
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc2.id],
"method": "remove_password",
"parameters": {},
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertIn(b"password not specified", response.content)
response = self.client.post(
"/api/documents/remove_password/",
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc2.id],
"password": 123,
"method": "remove_password",
"parameters": {"password": 123},
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
@mock.patch("documents.views.bulk_edit.remove_password")
def test_remove_password_insufficient_permissions(self, m) -> None:
self.doc1.owner = User.objects.get(username="temp_admin")
self.doc1.save()
user1 = User.objects.create(username="user1")
user1.user_permissions.add(*Permission.objects.all())
user1.save()
self.client.force_authenticate(user=user1)
self.setup_mock(m, "remove_password")
response = self.client.post(
"/api/documents/remove_password/",
json.dumps(
{
"documents": [self.doc1.id],
"password": "secret",
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
m.assert_not_called()
self.assertEqual(response.content, b"Insufficient permissions")
response = self.client.post(
"/api/documents/remove_password/",
json.dumps(
{
"documents": [self.doc2.id],
"password": "secret",
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_200_OK)
m.assert_called_once()
self.assertIn(b"password must be a string", response.content)
@override_settings(AUDIT_LOG_ENABLED=True)
def test_bulk_edit_audit_log_enabled_simple_field(self) -> None:

View File

@@ -41,7 +41,6 @@ from documents.models import SavedView
from documents.models import ShareLink
from documents.models import StoragePath
from documents.models import Tag
from documents.models import UiSettings
from documents.models import Workflow
from documents.models import WorkflowAction
from documents.models import WorkflowTrigger
@@ -2201,205 +2200,6 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
self.assertEqual(response.status_code, status.HTTP_200_OK)
self.assertEqual(response.data["count"], 0)
def test_saved_view_api_version_backward_compatibility(self) -> None:
"""
GIVEN:
- Saved views and UiSettings with visibility preferences
WHEN:
- API request with version=9 (legacy)
- API request with version=10 (current)
THEN:
- Version 9 returns show_on_dashboard and show_in_sidebar from UiSettings
- Version 10 omits these fields (moved to UiSettings)
"""
v1 = SavedView.objects.create(
owner=self.user,
name="dashboard_view",
sort_field="created",
)
v2 = SavedView.objects.create(
owner=self.user,
name="sidebar_view",
sort_field="created",
)
v3 = SavedView.objects.create(
owner=self.user,
name="hidden_view",
sort_field="created",
)
UiSettings.objects.update_or_create(
user=self.user,
defaults={
"settings": {
"saved_views": {
"dashboard_views_visible_ids": [v1.id],
"sidebar_views_visible_ids": [v2.id],
},
},
},
)
response_v9 = self.client.get(
"/api/saved_views/",
headers={"Accept": "application/json; version=9"},
format="json",
)
self.assertEqual(response_v9.status_code, status.HTTP_200_OK)
results_v9 = {r["id"]: r for r in response_v9.data["results"]}
self.assertIn("show_on_dashboard", results_v9[v1.id])
self.assertIn("show_in_sidebar", results_v9[v1.id])
self.assertTrue(results_v9[v1.id]["show_on_dashboard"])
self.assertFalse(results_v9[v1.id]["show_in_sidebar"])
self.assertTrue(results_v9[v2.id]["show_in_sidebar"])
self.assertFalse(results_v9[v2.id]["show_on_dashboard"])
self.assertFalse(results_v9[v3.id]["show_on_dashboard"])
self.assertFalse(results_v9[v3.id]["show_in_sidebar"])
response_v10 = self.client.get(
"/api/saved_views/",
headers={"Accept": "application/json; version=10"},
format="json",
)
self.assertEqual(response_v10.status_code, status.HTTP_200_OK)
results_v10 = {r["id"]: r for r in response_v10.data["results"]}
self.assertNotIn("show_on_dashboard", results_v10[v1.id])
self.assertNotIn("show_in_sidebar", results_v10[v1.id])
def test_saved_view_api_version_9_user_without_ui_settings(self) -> None:
"""
GIVEN:
- User with no UiSettings and a saved view
WHEN:
- API request with version=9
THEN:
- show_on_dashboard and show_in_sidebar are False (default)
"""
SavedView.objects.create(
owner=self.user,
name="test_view",
sort_field="created",
)
UiSettings.objects.filter(user=self.user).delete()
response = self.client.get(
"/api/saved_views/",
headers={"Accept": "application/json; version=9"},
format="json",
)
self.assertEqual(response.status_code, status.HTTP_200_OK)
result = response.data["results"][0]
self.assertFalse(result["show_on_dashboard"])
self.assertFalse(result["show_in_sidebar"])
def test_saved_view_api_version_9_create_writes_visibility_to_ui_settings(
self,
) -> None:
"""
GIVEN:
- No UiSettings for the current user
WHEN:
- A saved view is created through API version 9 with visibility flags
THEN:
- Visibility is persisted in UiSettings.saved_views
"""
UiSettings.objects.filter(user=self.user).delete()
response = self.client.post(
"/api/saved_views/",
{
"name": "legacy-v9-create",
"sort_field": "created",
"filter_rules": [],
"show_on_dashboard": True,
"show_in_sidebar": False,
},
headers={"Accept": "application/json; version=9"},
format="json",
)
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
self.assertTrue(response.data["show_on_dashboard"])
self.assertFalse(response.data["show_in_sidebar"])
self.user.refresh_from_db()
self.assertTrue(hasattr(self.user, "ui_settings"))
saved_view_settings = self.user.ui_settings.settings["saved_views"]
self.assertListEqual(
saved_view_settings["dashboard_views_visible_ids"],
[response.data["id"]],
)
self.assertListEqual(saved_view_settings["sidebar_views_visible_ids"], [])
def test_saved_view_api_version_9_patch_writes_visibility_to_ui_settings(
self,
) -> None:
"""
GIVEN:
- Existing saved views and UiSettings visibility ids
WHEN:
- A saved view is updated through API version 9 visibility flags
THEN:
- The per-user UiSettings visibility ids are updated
"""
v1 = SavedView.objects.create(
owner=self.user,
name="legacy-v9-patch-1",
sort_field="created",
)
v2 = SavedView.objects.create(
owner=self.user,
name="legacy-v9-patch-2",
sort_field="created",
)
UiSettings.objects.update_or_create(
user=self.user,
defaults={
"settings": {
"saved_views": {
"dashboard_views_visible_ids": [v1.id],
"sidebar_views_visible_ids": [v1.id, v2.id],
},
},
},
)
response = self.client.patch(
f"/api/saved_views/{v1.id}/",
{
"show_on_dashboard": False,
},
headers={"Accept": "application/json; version=9"},
format="json",
)
self.assertEqual(response.status_code, status.HTTP_200_OK)
self.assertFalse(response.data["show_on_dashboard"])
self.assertTrue(response.data["show_in_sidebar"])
self.user.refresh_from_db()
saved_view_settings = self.user.ui_settings.settings["saved_views"]
self.assertListEqual(saved_view_settings["dashboard_views_visible_ids"], [])
self.assertListEqual(
saved_view_settings["sidebar_views_visible_ids"],
[v1.id, v2.id],
)
response = self.client.patch(
f"/api/saved_views/{v1.id}/",
{
"show_in_sidebar": False,
},
headers={"Accept": "application/json; version=9"},
format="json",
)
self.assertEqual(response.status_code, status.HTTP_200_OK)
self.assertFalse(response.data["show_on_dashboard"])
self.assertFalse(response.data["show_in_sidebar"])
self.user.refresh_from_db()
saved_view_settings = self.user.ui_settings.settings["saved_views"]
self.assertListEqual(saved_view_settings["dashboard_views_visible_ids"], [])
self.assertListEqual(saved_view_settings["sidebar_views_visible_ids"], [v2.id])
def test_saved_view_create_update_patch(self) -> None:
User.objects.create_user("user1")

View File

@@ -25,39 +25,3 @@ class TestApiSchema(APITestCase):
ui_response = self.client.get(self.ENDPOINT + "view/")
self.assertEqual(ui_response.status_code, status.HTTP_200_OK)
def test_schema_includes_dedicated_document_edit_endpoints(self) -> None:
schema_response = self.client.get(self.ENDPOINT)
self.assertEqual(schema_response.status_code, status.HTTP_200_OK)
paths = schema_response.data["paths"]
self.assertIn("/api/documents/delete/", paths)
self.assertIn("/api/documents/reprocess/", paths)
self.assertIn("/api/documents/rotate/", paths)
self.assertIn("/api/documents/merge/", paths)
self.assertIn("/api/documents/edit_pdf/", paths)
self.assertIn("/api/documents/remove_password/", paths)
def test_schema_bulk_edit_advertises_legacy_document_action_methods(self) -> None:
schema_response = self.client.get(self.ENDPOINT)
self.assertEqual(schema_response.status_code, status.HTTP_200_OK)
schema = schema_response.data["components"]["schemas"]
bulk_schema = schema["BulkEditRequest"]
method_schema = bulk_schema["properties"]["method"]
# drf-spectacular emits the enum as a referenced schema for this field
enum_ref = method_schema["allOf"][0]["$ref"].split("/")[-1]
advertised_methods = schema[enum_ref]["enum"]
for action_method in [
"delete",
"reprocess",
"rotate",
"merge",
"edit_pdf",
"remove_password",
"split",
"delete_pages",
]:
self.assertIn(action_method, advertised_methods)

View File

@@ -405,9 +405,7 @@ class TestBulkEdit(DirectoriesMixin, TestCase):
self.assertTrue(Document.objects.filter(id=self.doc1.id).exists())
self.assertFalse(Document.objects.filter(id=version.id).exists())
def test_resolve_root_and_source_doc_latest_version_prefers_newest_version(
self,
) -> None:
def test_get_root_and_current_doc_mapping(self) -> None:
version1 = Document.objects.create(
checksum="B-v1",
title="B version 1",
@@ -419,14 +417,18 @@ class TestBulkEdit(DirectoriesMixin, TestCase):
root_document=self.doc2,
)
root_doc, source_doc = bulk_edit._resolve_root_and_source_doc(
self.doc2,
source_mode="latest_version",
root_ids_by_doc_id = bulk_edit._get_root_ids_by_doc_id(
[self.doc2.id, version1.id, version2.id],
)
self.assertEqual(root_ids_by_doc_id[self.doc2.id], self.doc2.id)
self.assertEqual(root_ids_by_doc_id[version1.id], self.doc2.id)
self.assertEqual(root_ids_by_doc_id[version2.id], self.doc2.id)
self.assertEqual(root_doc.id, self.doc2.id)
self.assertEqual(source_doc.id, version2.id)
self.assertNotEqual(source_doc.id, version1.id)
root_docs, current_docs = bulk_edit._get_root_and_current_docs_by_root_id(
{self.doc2.id},
)
self.assertEqual(root_docs[self.doc2.id].id, self.doc2.id)
self.assertEqual(current_docs[self.doc2.id].id, version2.id)
@mock.patch("documents.tasks.bulk_update_documents.delay")
def test_set_permissions(self, m) -> None:
@@ -660,33 +662,6 @@ class TestPDFActions(DirectoriesMixin, TestCase):
self.assertEqual(result, "OK")
@mock.patch("pikepdf.open")
@mock.patch("documents.tasks.consume_file.s")
def test_merge_uses_latest_version_source_for_root_selection(
self,
mock_consume_file,
mock_open_pdf,
) -> None:
version_file = self.dirs.scratch_dir / "sample2_version_merge.pdf"
shutil.copy(self.doc2.source_path, version_file)
version = Document.objects.create(
checksum="B-v1",
title="B version 1",
root_document=self.doc2,
filename=version_file,
mime_type="application/pdf",
)
fake_pdf = mock.MagicMock()
fake_pdf.pdf_version = "1.7"
fake_pdf.pages = [mock.Mock()]
mock_open_pdf.return_value.__enter__.return_value = fake_pdf
result = bulk_edit.merge([self.doc2.id])
self.assertEqual(result, "OK")
mock_open_pdf.assert_called_once_with(str(version.source_path))
mock_consume_file.assert_not_called()
@mock.patch("documents.bulk_edit.delete.si")
@mock.patch("documents.tasks.consume_file.s")
def test_merge_and_delete_originals(
@@ -895,36 +870,6 @@ class TestPDFActions(DirectoriesMixin, TestCase):
self.assertEqual(result, "OK")
@mock.patch("documents.bulk_edit.group")
@mock.patch("pikepdf.open")
@mock.patch("documents.tasks.consume_file.s")
def test_split_uses_latest_version_source_for_root_selection(
self,
mock_consume_file,
mock_open_pdf,
mock_group,
) -> None:
version_file = self.dirs.scratch_dir / "sample2_version_split.pdf"
shutil.copy(self.doc2.source_path, version_file)
version = Document.objects.create(
checksum="B-v1",
title="B version 1",
root_document=self.doc2,
filename=version_file,
mime_type="application/pdf",
)
fake_pdf = mock.MagicMock()
fake_pdf.pages = [mock.Mock(), mock.Mock()]
mock_open_pdf.return_value.__enter__.return_value = fake_pdf
mock_group.return_value.delay.return_value = None
result = bulk_edit.split([self.doc2.id], [[1], [2]])
self.assertEqual(result, "OK")
mock_open_pdf.assert_called_once_with(version.source_path)
mock_consume_file.assert_not_called()
mock_group.return_value.delay.assert_not_called()
@mock.patch("documents.bulk_edit.delete.si")
@mock.patch("documents.tasks.consume_file.s")
@mock.patch("documents.bulk_edit.chord")
@@ -1096,34 +1041,6 @@ class TestPDFActions(DirectoriesMixin, TestCase):
self.assertIsNotNone(overrides)
self.assertEqual(result, "OK")
@mock.patch("documents.data_models.magic.from_file", return_value="application/pdf")
@mock.patch("documents.tasks.consume_file.delay")
@mock.patch("pikepdf.open")
def test_rotate_explicit_selection_uses_root_source_when_root_selected(
self,
mock_open,
mock_consume_delay,
mock_magic,
):
Document.objects.create(
checksum="B-v1",
title="B version 1",
root_document=self.doc2,
)
fake_pdf = mock.MagicMock()
fake_pdf.pages = [mock.Mock()]
mock_open.return_value.__enter__.return_value = fake_pdf
result = bulk_edit.rotate(
[self.doc2.id],
90,
source_mode="explicit_selection",
)
self.assertEqual(result, "OK")
mock_open.assert_called_once_with(self.doc2.source_path)
mock_consume_delay.assert_called_once()
@mock.patch("documents.tasks.consume_file.delay")
@mock.patch("pikepdf.Pdf.save")
@mock.patch("documents.data_models.magic.from_file", return_value="application/pdf")
@@ -1148,34 +1065,6 @@ class TestPDFActions(DirectoriesMixin, TestCase):
self.assertIsNotNone(overrides)
self.assertEqual(result, "OK")
@mock.patch("documents.data_models.magic.from_file", return_value="application/pdf")
@mock.patch("documents.tasks.consume_file.delay")
@mock.patch("pikepdf.open")
def test_delete_pages_explicit_selection_uses_root_source_when_root_selected(
self,
mock_open,
mock_consume_delay,
mock_magic,
):
Document.objects.create(
checksum="B-v1",
title="B version 1",
root_document=self.doc2,
)
fake_pdf = mock.MagicMock()
fake_pdf.pages = [mock.Mock(), mock.Mock()]
mock_open.return_value.__enter__.return_value = fake_pdf
result = bulk_edit.delete_pages(
[self.doc2.id],
[1],
source_mode="explicit_selection",
)
self.assertEqual(result, "OK")
mock_open.assert_called_once_with(self.doc2.source_path)
mock_consume_delay.assert_called_once()
@mock.patch("documents.tasks.consume_file.delay")
@mock.patch("pikepdf.Pdf.save")
def test_delete_pages_with_error(self, mock_pdf_save, mock_consume_delay):
@@ -1324,40 +1213,6 @@ class TestPDFActions(DirectoriesMixin, TestCase):
self.assertTrue(str(consumable.original_file).endswith("_edited.pdf"))
self.assertIsNotNone(overrides)
@mock.patch("documents.data_models.magic.from_file", return_value="application/pdf")
@mock.patch("documents.tasks.consume_file.delay")
@mock.patch("pikepdf.new")
@mock.patch("pikepdf.open")
def test_edit_pdf_explicit_selection_uses_root_source_when_root_selected(
self,
mock_open,
mock_new,
mock_consume_delay,
mock_magic,
):
Document.objects.create(
checksum="B-v1",
title="B version 1",
root_document=self.doc2,
)
fake_pdf = mock.MagicMock()
fake_pdf.pages = [mock.Mock()]
mock_open.return_value.__enter__.return_value = fake_pdf
output_pdf = mock.MagicMock()
output_pdf.pages = []
mock_new.return_value = output_pdf
result = bulk_edit.edit_pdf(
[self.doc2.id],
operations=[{"page": 1}],
update_document=True,
source_mode="explicit_selection",
)
self.assertEqual(result, "OK")
mock_open.assert_called_once_with(self.doc2.source_path)
mock_consume_delay.assert_called_once()
@mock.patch("documents.bulk_edit.group")
@mock.patch("documents.tasks.consume_file.s")
def test_edit_pdf_without_metadata(
@@ -1478,34 +1333,6 @@ class TestPDFActions(DirectoriesMixin, TestCase):
self.assertEqual(consumable.root_document_id, doc.id)
self.assertIsNotNone(overrides)
@mock.patch("documents.data_models.magic.from_file", return_value="application/pdf")
@mock.patch("documents.tasks.consume_file.delay")
@mock.patch("pikepdf.open")
def test_remove_password_explicit_selection_uses_root_source_when_root_selected(
self,
mock_open,
mock_consume_delay,
mock_magic,
) -> None:
Document.objects.create(
checksum="A-v1",
title="A version 1",
root_document=self.doc1,
)
fake_pdf = mock.MagicMock()
mock_open.return_value.__enter__.return_value = fake_pdf
result = bulk_edit.remove_password(
[self.doc1.id],
password="secret",
update_document=True,
source_mode="explicit_selection",
)
self.assertEqual(result, "OK")
mock_open.assert_called_once_with(self.doc1.source_path, password="secret")
mock_consume_delay.assert_called_once()
@mock.patch("documents.bulk_edit.chord")
@mock.patch("documents.bulk_edit.group")
@mock.patch("documents.tasks.consume_file.s")

View File

@@ -245,8 +245,14 @@ class TestConsumer(
self.assertIsFile(document.archive_path)
self.assertEqual(document.checksum, "42995833e01aea9b3edee44bbfdd7ce1")
self.assertEqual(document.archive_checksum, "62acb0bcbfbcaa62ca6ad3668e4e404b")
self.assertEqual(
document.checksum,
"1093cf6e32adbd16b06969df09215d42c4a3a8938cc18b39455953f08d1ff2ab",
)
self.assertEqual(
document.archive_checksum,
"706124ecde3c31616992fa979caed17a726b1c9ccdba70e82a4ff796cea97ccf",
)
self.assertIsNotFile(filename)

View File

@@ -63,8 +63,8 @@ class TestExportImport(
self.d1 = Document.objects.create(
content="Content",
checksum="42995833e01aea9b3edee44bbfdd7ce1",
archive_checksum="62acb0bcbfbcaa62ca6ad3668e4e404b",
checksum="1093cf6e32adbd16b06969df09215d42c4a3a8938cc18b39455953f08d1ff2ab",
archive_checksum="706124ecde3c31616992fa979caed17a726b1c9ccdba70e82a4ff796cea97ccf",
title="wow1",
filename="0000001.pdf",
mime_type="application/pdf",
@@ -72,21 +72,21 @@ class TestExportImport(
)
self.d2 = Document.objects.create(
content="Content",
checksum="9c9691e51741c1f4f41a20896af31770",
checksum="550d1bae0f746d4f7c6be07054eb20cc2f11988a58ef64ceae45e98f85e92a5b",
title="wow2",
filename="0000002.pdf",
mime_type="application/pdf",
)
self.d3 = Document.objects.create(
content="Content",
checksum="d38d7ed02e988e072caf924e0f3fcb76",
checksum="f1ba6b7ff8548214a75adec228f5468a14fe187f445bc0b9485cbf1c35b15915",
title="wow2",
filename="0000003.pdf",
mime_type="application/pdf",
)
self.d4 = Document.objects.create(
content="Content",
checksum="82186aaa94f0b98697d704b90fd1c072",
checksum="a81b16b6b313cfd7e60eb7b12598d1343b58622b4030cfa19a2724a02e98db1b",
title="wow_dec",
filename="0000004.pdf",
mime_type="application/pdf",
@@ -240,7 +240,7 @@ class TestExportImport(
)
with Path(fname).open("rb") as f:
checksum = hashlib.md5(f.read()).hexdigest()
checksum = hashlib.sha256(f.read()).hexdigest()
self.assertEqual(checksum, element["fields"]["checksum"])
# Generated field "content_length" should not be exported,
@@ -254,7 +254,7 @@ class TestExportImport(
self.assertIsFile(fname)
with Path(fname).open("rb") as f:
checksum = hashlib.md5(f.read()).hexdigest()
checksum = hashlib.sha256(f.read()).hexdigest()
self.assertEqual(checksum, element["fields"]["archive_checksum"])
elif element["model"] == "documents.note":
@@ -753,31 +753,6 @@ class TestExportImport(
call_command("document_importer", "--no-progress-bar", self.target)
self.assertEqual(Document.objects.count(), 4)
def test_folder_prefix_with_split(self) -> None:
"""
GIVEN:
- Request to export documents to directory
WHEN:
- Option use_folder_prefix is used
- Option split manifest is used
THEN:
- Documents can be imported again
"""
shutil.rmtree(Path(self.dirs.media_dir) / "documents")
shutil.copytree(
Path(__file__).parent / "samples" / "documents",
Path(self.dirs.media_dir) / "documents",
)
self._do_export(use_folder_prefix=True, split_manifest=True)
with paperless_environment():
self.assertEqual(Document.objects.count(), 4)
Document.objects.all().delete()
self.assertEqual(Document.objects.count(), 0)
call_command("document_importer", "--no-progress-bar", self.target)
self.assertEqual(Document.objects.count(), 4)
def test_import_db_transaction_failed(self) -> None:
"""
GIVEN:

View File

@@ -260,8 +260,8 @@ class TestCommandImport(
Document.objects.create(
content="Content",
checksum="42995833e01aea9b3edee44bbfdd7ce1",
archive_checksum="62acb0bcbfbcaa62ca6ad3668e4e404b",
checksum="1093cf6e32adbd16b06969df09215d42c4a3a8938cc18b39455953f08d1ff2ab",
archive_checksum="706124ecde3c31616992fa979caed17a726b1c9ccdba70e82a4ff796cea97ccf",
title="wow1",
filename="0000001.pdf",
mime_type="application/pdf",

View File

@@ -1,3 +1,4 @@
import hashlib
import logging
import shutil
from os import utime
@@ -128,3 +129,15 @@ def get_boolean(boolstr: str) -> bool:
Return a boolean value from a string representation.
"""
return bool(boolstr.lower() in ("yes", "y", "1", "t", "true"))
def compute_checksum(path: Path, chunk_size: int = 65536) -> str:
"""
Return the SHA256 hex digest of the file at *path*, reading in chunks
of *chunk_size* bytes to avoid loading the entire file into memory.
"""
h = hashlib.sha256()
with path.open("rb") as f:
while chunk := f.read(chunk_size):
h.update(chunk)
return h.hexdigest()

View File

@@ -176,20 +176,14 @@ from documents.serialisers import BulkEditObjectsSerializer
from documents.serialisers import BulkEditSerializer
from documents.serialisers import CorrespondentSerializer
from documents.serialisers import CustomFieldSerializer
from documents.serialisers import DeleteDocumentsSerializer
from documents.serialisers import DocumentListSerializer
from documents.serialisers import DocumentSerializer
from documents.serialisers import DocumentTypeSerializer
from documents.serialisers import DocumentVersionLabelSerializer
from documents.serialisers import DocumentVersionSerializer
from documents.serialisers import EditPdfDocumentsSerializer
from documents.serialisers import EmailSerializer
from documents.serialisers import MergeDocumentsSerializer
from documents.serialisers import NotesSerializer
from documents.serialisers import PostDocumentSerializer
from documents.serialisers import RemovePasswordDocumentsSerializer
from documents.serialisers import ReprocessDocumentsSerializer
from documents.serialisers import RotateDocumentsSerializer
from documents.serialisers import RunTaskViewSerializer
from documents.serialisers import SavedViewSerializer
from documents.serialisers import SearchResultSerializer
@@ -2120,125 +2114,6 @@ class SavedViewViewSet(BulkPermissionMixin, PassUserMixin, ModelViewSet):
ordering_fields = ("name",)
class DocumentOperationPermissionMixin(PassUserMixin):
permission_classes = (IsAuthenticated,)
parser_classes = (parsers.JSONParser,)
METHOD_NAMES_REQUIRING_USER = {
"split",
"merge",
"rotate",
"delete_pages",
"edit_pdf",
"remove_password",
}
def _has_document_permissions(
self,
*,
user: User,
documents: list[int],
method,
parameters: dict[str, Any],
) -> bool:
if user.is_superuser:
return True
document_objs = Document.objects.select_related("owner").filter(
pk__in=documents,
)
user_is_owner_of_all_documents = all(
(doc.owner == user or doc.owner is None) for doc in document_objs
)
# check global and object permissions for all documents
has_perms = user.has_perm("documents.change_document") and all(
has_perms_owner_aware(user, "change_document", doc) for doc in document_objs
)
# check ownership for methods that change original document
if (
(
has_perms
and method
in [
bulk_edit.set_permissions,
bulk_edit.delete,
bulk_edit.rotate,
bulk_edit.delete_pages,
bulk_edit.edit_pdf,
bulk_edit.remove_password,
]
)
or (
method in [bulk_edit.merge, bulk_edit.split]
and parameters.get("delete_originals")
)
or (method == bulk_edit.edit_pdf and parameters.get("update_document"))
):
has_perms = user_is_owner_of_all_documents
# check global add permissions for methods that create documents
if (
has_perms
and (
method in [bulk_edit.split, bulk_edit.merge]
or (
method in [bulk_edit.edit_pdf, bulk_edit.remove_password]
and not parameters.get("update_document")
)
)
and not user.has_perm("documents.add_document")
):
has_perms = False
# check global delete permissions for methods that delete documents
if (
has_perms
and (
method == bulk_edit.delete
or (
method in [bulk_edit.merge, bulk_edit.split]
and parameters.get("delete_originals")
)
)
and not user.has_perm("documents.delete_document")
):
has_perms = False
return has_perms
def _execute_document_action(
self,
*,
method,
validated_data: dict[str, Any],
operation_label: str,
):
documents = validated_data["documents"]
parameters = {k: v for k, v in validated_data.items() if k != "documents"}
user = self.request.user
if method.__name__ in self.METHOD_NAMES_REQUIRING_USER:
parameters["user"] = user
if not self._has_document_permissions(
user=user,
documents=documents,
method=method,
parameters=parameters,
):
return HttpResponseForbidden("Insufficient permissions")
try:
result = method(documents, **parameters)
return Response({"result": result})
except Exception as e:
logger.warning(f"An error occurred performing {operation_label}: {e!s}")
return HttpResponseBadRequest(
f"Error performing {operation_label}, check logs for more detail.",
)
@extend_schema_view(
post=extend_schema(
operation_id="bulk_edit",
@@ -2257,7 +2132,7 @@ class DocumentOperationPermissionMixin(PassUserMixin):
},
),
)
class BulkEditView(DocumentOperationPermissionMixin):
class BulkEditView(PassUserMixin):
MODIFIED_FIELD_BY_METHOD = {
"set_correspondent": "correspondent",
"set_document_type": "document_type",
@@ -2279,24 +2154,11 @@ class BulkEditView(DocumentOperationPermissionMixin):
"remove_password": None,
}
permission_classes = (IsAuthenticated,)
serializer_class = BulkEditSerializer
parser_classes = (parsers.JSONParser,)
def post(self, request, *args, **kwargs):
request_method = request.data.get("method")
api_version = int(request.version or settings.REST_FRAMEWORK["DEFAULT_VERSION"])
# TODO: remove this and related backwards compatibility code when API v9 is dropped
if request_method in BulkEditSerializer.LEGACY_DOCUMENT_ACTION_METHODS:
endpoint = BulkEditSerializer.MOVED_DOCUMENT_ACTION_ENDPOINTS[
request_method
]
logger.warning(
"Deprecated bulk_edit method '%s' requested on API version %s. "
"Use '%s' instead.",
request_method,
api_version,
endpoint,
)
serializer = self.get_serializer(data=request.data)
serializer.is_valid(raise_exception=True)
@@ -2304,15 +2166,82 @@ class BulkEditView(DocumentOperationPermissionMixin):
method = serializer.validated_data.get("method")
parameters = serializer.validated_data.get("parameters")
documents = serializer.validated_data.get("documents")
if method.__name__ in self.METHOD_NAMES_REQUIRING_USER:
if method in [
bulk_edit.split,
bulk_edit.merge,
bulk_edit.rotate,
bulk_edit.delete_pages,
bulk_edit.edit_pdf,
bulk_edit.remove_password,
]:
parameters["user"] = user
if not self._has_document_permissions(
user=user,
documents=documents,
method=method,
parameters=parameters,
):
return HttpResponseForbidden("Insufficient permissions")
if not user.is_superuser:
document_objs = Document.objects.select_related("owner").filter(
pk__in=documents,
)
user_is_owner_of_all_documents = all(
(doc.owner == user or doc.owner is None) for doc in document_objs
)
# check global and object permissions for all documents
has_perms = user.has_perm("documents.change_document") and all(
has_perms_owner_aware(user, "change_document", doc)
for doc in document_objs
)
# check ownership for methods that change original document
if (
(
has_perms
and method
in [
bulk_edit.set_permissions,
bulk_edit.delete,
bulk_edit.rotate,
bulk_edit.delete_pages,
bulk_edit.edit_pdf,
bulk_edit.remove_password,
]
)
or (
method in [bulk_edit.merge, bulk_edit.split]
and parameters["delete_originals"]
)
or (method == bulk_edit.edit_pdf and parameters["update_document"])
):
has_perms = user_is_owner_of_all_documents
# check global add permissions for methods that create documents
if (
has_perms
and (
method in [bulk_edit.split, bulk_edit.merge]
or (
method in [bulk_edit.edit_pdf, bulk_edit.remove_password]
and not parameters["update_document"]
)
)
and not user.has_perm("documents.add_document")
):
has_perms = False
# check global delete permissions for methods that delete documents
if (
has_perms
and (
method == bulk_edit.delete
or (
method in [bulk_edit.merge, bulk_edit.split]
and parameters["delete_originals"]
)
)
and not user.has_perm("documents.delete_document")
):
has_perms = False
if not has_perms:
return HttpResponseForbidden("Insufficient permissions")
try:
modified_field = self.MODIFIED_FIELD_BY_METHOD.get(method.__name__, None)
@@ -2369,168 +2298,6 @@ class BulkEditView(DocumentOperationPermissionMixin):
)
@extend_schema_view(
post=extend_schema(
operation_id="documents_rotate",
description="Rotate one or more documents",
responses={
200: inline_serializer(
name="RotateDocumentsResult",
fields={
"result": serializers.CharField(),
},
),
},
),
)
class RotateDocumentsView(DocumentOperationPermissionMixin):
serializer_class = RotateDocumentsSerializer
def post(self, request, *args, **kwargs):
serializer = self.get_serializer(data=request.data)
serializer.is_valid(raise_exception=True)
return self._execute_document_action(
method=bulk_edit.rotate,
validated_data=serializer.validated_data,
operation_label="document rotate",
)
@extend_schema_view(
post=extend_schema(
operation_id="documents_merge",
description="Merge selected documents into a new document",
responses={
200: inline_serializer(
name="MergeDocumentsResult",
fields={
"result": serializers.CharField(),
},
),
},
),
)
class MergeDocumentsView(DocumentOperationPermissionMixin):
serializer_class = MergeDocumentsSerializer
def post(self, request, *args, **kwargs):
serializer = self.get_serializer(data=request.data)
serializer.is_valid(raise_exception=True)
return self._execute_document_action(
method=bulk_edit.merge,
validated_data=serializer.validated_data,
operation_label="document merge",
)
@extend_schema_view(
post=extend_schema(
operation_id="documents_delete",
description="Move selected documents to trash",
responses={
200: inline_serializer(
name="DeleteDocumentsResult",
fields={
"result": serializers.CharField(),
},
),
},
),
)
class DeleteDocumentsView(DocumentOperationPermissionMixin):
serializer_class = DeleteDocumentsSerializer
def post(self, request, *args, **kwargs):
serializer = self.get_serializer(data=request.data)
serializer.is_valid(raise_exception=True)
return self._execute_document_action(
method=bulk_edit.delete,
validated_data=serializer.validated_data,
operation_label="document delete",
)
@extend_schema_view(
post=extend_schema(
operation_id="documents_reprocess",
description="Reprocess selected documents",
responses={
200: inline_serializer(
name="ReprocessDocumentsResult",
fields={
"result": serializers.CharField(),
},
),
},
),
)
class ReprocessDocumentsView(DocumentOperationPermissionMixin):
serializer_class = ReprocessDocumentsSerializer
def post(self, request, *args, **kwargs):
serializer = self.get_serializer(data=request.data)
serializer.is_valid(raise_exception=True)
return self._execute_document_action(
method=bulk_edit.reprocess,
validated_data=serializer.validated_data,
operation_label="document reprocess",
)
@extend_schema_view(
post=extend_schema(
operation_id="documents_edit_pdf",
description="Perform PDF edit operations on a selected document",
responses={
200: inline_serializer(
name="EditPdfDocumentsResult",
fields={
"result": serializers.CharField(),
},
),
},
),
)
class EditPdfDocumentsView(DocumentOperationPermissionMixin):
serializer_class = EditPdfDocumentsSerializer
def post(self, request, *args, **kwargs):
serializer = self.get_serializer(data=request.data)
serializer.is_valid(raise_exception=True)
return self._execute_document_action(
method=bulk_edit.edit_pdf,
validated_data=serializer.validated_data,
operation_label="PDF edit",
)
@extend_schema_view(
post=extend_schema(
operation_id="documents_remove_password",
description="Remove password protection from selected PDFs",
responses={
200: inline_serializer(
name="RemovePasswordDocumentsResult",
fields={
"result": serializers.CharField(),
},
),
},
),
)
class RemovePasswordDocumentsView(DocumentOperationPermissionMixin):
serializer_class = RemovePasswordDocumentsSerializer
def post(self, request, *args, **kwargs):
serializer = self.get_serializer(data=request.data)
serializer.is_valid(raise_exception=True)
return self._execute_document_action(
method=bulk_edit.remove_password,
validated_data=serializer.validated_data,
operation_label="password removal",
)
@extend_schema_view(
post=extend_schema(
description="Upload a document via the API",

View File

@@ -2,7 +2,7 @@ msgid ""
msgstr ""
"Project-Id-Version: paperless-ngx\n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2026-03-09 01:51+0000\n"
"POT-Creation-Date: 2026-03-04 23:29+0000\n"
"PO-Revision-Date: 2022-02-17 04:17\n"
"Last-Translator: \n"
"Language-Team: English\n"
@@ -1299,7 +1299,7 @@ msgstr ""
msgid "workflow runs"
msgstr ""
#: documents/serialisers.py:463 documents/serialisers.py:2482
#: documents/serialisers.py:463 documents/serialisers.py:2332
msgid "Insufficient permissions."
msgstr ""
@@ -1307,39 +1307,39 @@ msgstr ""
msgid "Invalid color."
msgstr ""
#: documents/serialisers.py:2105
#: documents/serialisers.py:1955
#, python-format
msgid "File type %(type)s not supported"
msgstr ""
#: documents/serialisers.py:2149
#: documents/serialisers.py:1999
#, python-format
msgid "Custom field id must be an integer: %(id)s"
msgstr ""
#: documents/serialisers.py:2156
#: documents/serialisers.py:2006
#, python-format
msgid "Custom field with id %(id)s does not exist"
msgstr ""
#: documents/serialisers.py:2173 documents/serialisers.py:2183
#: documents/serialisers.py:2023 documents/serialisers.py:2033
msgid ""
"Custom fields must be a list of integers or an object mapping ids to values."
msgstr ""
#: documents/serialisers.py:2178
#: documents/serialisers.py:2028
msgid "Some custom fields don't exist or were specified twice."
msgstr ""
#: documents/serialisers.py:2325
#: documents/serialisers.py:2175
msgid "Invalid variable detected."
msgstr ""
#: documents/serialisers.py:2538
#: documents/serialisers.py:2388
msgid "Duplicate document identifiers are not allowed."
msgstr ""
#: documents/serialisers.py:2568 documents/views.py:3328
#: documents/serialisers.py:2418 documents/views.py:3328
#, python-format
msgid "Documents not found: %(ids)s"
msgstr ""

View File

@@ -1,100 +1,107 @@
import logging
from unittest import mock
import pytest
from allauth.account.adapter import get_adapter
from allauth.core import context
from allauth.socialaccount.adapter import get_adapter as get_social_adapter
from django.conf import settings
from django.contrib.auth.models import AnonymousUser
from django.contrib.auth.models import Group
from django.contrib.auth.models import User
from django.forms import ValidationError
from django.http import HttpRequest
from django.test import TestCase
from django.test import override_settings
from django.urls import reverse
from pytest_django.fixtures import SettingsWrapper
from pytest_mock import MockerFixture
from rest_framework.authtoken.models import Token
from paperless.adapter import DrfTokenStrategy
@pytest.mark.django_db
class TestCustomAccountAdapter:
def test_is_open_for_signup(self, settings: SettingsWrapper) -> None:
class TestCustomAccountAdapter(TestCase):
def test_is_open_for_signup(self) -> None:
adapter = get_adapter()
# With no accounts, signups should be allowed
assert adapter.is_open_for_signup(None)
self.assertTrue(adapter.is_open_for_signup(None))
User.objects.create_user("testuser")
# Test when ACCOUNT_ALLOW_SIGNUPS is True
settings.ACCOUNT_ALLOW_SIGNUPS = True
assert adapter.is_open_for_signup(None)
self.assertTrue(adapter.is_open_for_signup(None))
# Test when ACCOUNT_ALLOW_SIGNUPS is False
settings.ACCOUNT_ALLOW_SIGNUPS = False
assert not adapter.is_open_for_signup(None)
self.assertFalse(adapter.is_open_for_signup(None))
def test_is_safe_url(self, settings: SettingsWrapper) -> None:
def test_is_safe_url(self) -> None:
request = HttpRequest()
request.get_host = lambda: "example.com"
request.get_host = mock.Mock(return_value="example.com")
with context.request_context(request):
adapter = get_adapter()
with override_settings(ALLOWED_HOSTS=["*"]):
# True because request host is same
url = "https://example.com"
self.assertTrue(adapter.is_safe_url(url))
settings.ALLOWED_HOSTS = ["*"]
# True because request host is same
assert adapter.is_safe_url("https://example.com")
url = "https://evil.com"
# False despite wildcard because request host is different
assert not adapter.is_safe_url("https://evil.com")
self.assertFalse(adapter.is_safe_url(url))
settings.ALLOWED_HOSTS = ["example.com"]
url = "https://example.com"
# True because request host is same
assert adapter.is_safe_url("https://example.com")
self.assertTrue(adapter.is_safe_url(url))
settings.ALLOWED_HOSTS = ["*", "example.com"]
url = "//evil.com"
# False because request host is not in allowed hosts
assert not adapter.is_safe_url("//evil.com")
self.assertFalse(adapter.is_safe_url(url))
def test_pre_authenticate(
self,
settings: SettingsWrapper,
mocker: MockerFixture,
) -> None:
mocker.patch("allauth.core.internal.ratelimit.consume", return_value=True)
@mock.patch("allauth.core.internal.ratelimit.consume", return_value=True)
def test_pre_authenticate(self, mock_consume) -> None:
adapter = get_adapter()
request = HttpRequest()
request.get_host = lambda: "example.com"
request.get_host = mock.Mock(return_value="example.com")
settings.DISABLE_REGULAR_LOGIN = False
adapter.pre_authenticate(request)
settings.DISABLE_REGULAR_LOGIN = True
with pytest.raises(ValidationError):
with self.assertRaises(ValidationError):
adapter.pre_authenticate(request)
def test_get_reset_password_from_key_url(self, settings: SettingsWrapper) -> None:
def test_get_reset_password_from_key_url(self) -> None:
request = HttpRequest()
request.get_host = lambda: "foo.org"
request.get_host = mock.Mock(return_value="foo.org")
with context.request_context(request):
adapter = get_adapter()
settings.PAPERLESS_URL = None
settings.ACCOUNT_DEFAULT_HTTP_PROTOCOL = "https"
expected_url = f"https://foo.org{reverse('account_reset_password_from_key', kwargs={'uidb36': 'UID', 'key': 'KEY'})}"
assert adapter.get_reset_password_from_key_url("UID-KEY") == expected_url
# Test when PAPERLESS_URL is None
with override_settings(
PAPERLESS_URL=None,
ACCOUNT_DEFAULT_HTTP_PROTOCOL="https",
):
expected_url = f"https://foo.org{reverse('account_reset_password_from_key', kwargs={'uidb36': 'UID', 'key': 'KEY'})}"
self.assertEqual(
adapter.get_reset_password_from_key_url("UID-KEY"),
expected_url,
)
settings.PAPERLESS_URL = "https://bar.com"
expected_url = f"https://bar.com{reverse('account_reset_password_from_key', kwargs={'uidb36': 'UID', 'key': 'KEY'})}"
assert adapter.get_reset_password_from_key_url("UID-KEY") == expected_url
# Test when PAPERLESS_URL is not None
with override_settings(PAPERLESS_URL="https://bar.com"):
expected_url = f"https://bar.com{reverse('account_reset_password_from_key', kwargs={'uidb36': 'UID', 'key': 'KEY'})}"
self.assertEqual(
adapter.get_reset_password_from_key_url("UID-KEY"),
expected_url,
)
def test_save_user_adds_groups(
self,
settings: SettingsWrapper,
mocker: MockerFixture,
) -> None:
settings.ACCOUNT_DEFAULT_GROUPS = ["group1", "group2"]
@override_settings(ACCOUNT_DEFAULT_GROUPS=["group1", "group2"])
def test_save_user_adds_groups(self) -> None:
Group.objects.create(name="group1")
user = User.objects.create_user("testuser")
adapter = get_adapter()
form = mocker.MagicMock(
form = mock.Mock(
cleaned_data={
"username": "testuser",
"email": "user@example.com",
@@ -103,81 +110,88 @@ class TestCustomAccountAdapter:
user = adapter.save_user(HttpRequest(), user, form, commit=True)
assert user.groups.count() == 1
assert user.groups.filter(name="group1").exists()
assert not user.groups.filter(name="group2").exists()
self.assertEqual(user.groups.count(), 1)
self.assertTrue(user.groups.filter(name="group1").exists())
self.assertFalse(user.groups.filter(name="group2").exists())
def test_fresh_install_save_creates_superuser(self, mocker: MockerFixture) -> None:
def test_fresh_install_save_creates_superuser(self) -> None:
adapter = get_adapter()
form = mocker.MagicMock(
form = mock.Mock(
cleaned_data={
"username": "testuser",
"email": "user@paperless-ngx.com",
},
)
user = adapter.save_user(HttpRequest(), User(), form, commit=True)
assert user.is_superuser
self.assertTrue(user.is_superuser)
form = mocker.MagicMock(
# Next time, it should not create a superuser
form = mock.Mock(
cleaned_data={
"username": "testuser2",
"email": "user2@paperless-ngx.com",
},
)
user2 = adapter.save_user(HttpRequest(), User(), form, commit=True)
assert not user2.is_superuser
self.assertFalse(user2.is_superuser)
class TestCustomSocialAccountAdapter:
@pytest.mark.django_db
def test_is_open_for_signup(self, settings: SettingsWrapper) -> None:
class TestCustomSocialAccountAdapter(TestCase):
def test_is_open_for_signup(self) -> None:
adapter = get_social_adapter()
# Test when SOCIALACCOUNT_ALLOW_SIGNUPS is True
settings.SOCIALACCOUNT_ALLOW_SIGNUPS = True
assert adapter.is_open_for_signup(None, None)
self.assertTrue(adapter.is_open_for_signup(None, None))
# Test when SOCIALACCOUNT_ALLOW_SIGNUPS is False
settings.SOCIALACCOUNT_ALLOW_SIGNUPS = False
assert not adapter.is_open_for_signup(None, None)
self.assertFalse(adapter.is_open_for_signup(None, None))
def test_get_connect_redirect_url(self) -> None:
adapter = get_social_adapter()
assert adapter.get_connect_redirect_url(None, None) == reverse("base")
request = None
socialaccount = None
@pytest.mark.django_db
def test_save_user_adds_groups(
self,
settings: SettingsWrapper,
mocker: MockerFixture,
) -> None:
settings.SOCIAL_ACCOUNT_DEFAULT_GROUPS = ["group1", "group2"]
# Test the default URL
expected_url = reverse("base")
self.assertEqual(
adapter.get_connect_redirect_url(request, socialaccount),
expected_url,
)
@override_settings(SOCIAL_ACCOUNT_DEFAULT_GROUPS=["group1", "group2"])
def test_save_user_adds_groups(self) -> None:
Group.objects.create(name="group1")
adapter = get_social_adapter()
request = HttpRequest()
user = User.objects.create_user("testuser")
sociallogin = mocker.MagicMock(user=user)
sociallogin = mock.Mock(
user=user,
)
user = adapter.save_user(HttpRequest(), sociallogin, None)
user = adapter.save_user(request, sociallogin, None)
assert user.groups.count() == 1
assert user.groups.filter(name="group1").exists()
assert not user.groups.filter(name="group2").exists()
self.assertEqual(user.groups.count(), 1)
self.assertTrue(user.groups.filter(name="group1").exists())
self.assertFalse(user.groups.filter(name="group2").exists())
def test_error_logged_on_authentication_error(
self,
caplog: pytest.LogCaptureFixture,
) -> None:
def test_error_logged_on_authentication_error(self) -> None:
adapter = get_social_adapter()
with caplog.at_level(logging.INFO, logger="paperless.auth"):
request = HttpRequest()
with self.assertLogs("paperless.auth", level="INFO") as log_cm:
adapter.on_authentication_error(
HttpRequest(),
request,
provider="test-provider",
error="Error",
exception="Test authentication error",
)
assert any("Test authentication error" in msg for msg in caplog.messages)
self.assertTrue(
any("Test authentication error" in message for message in log_cm.output),
)
@pytest.mark.django_db
class TestDrfTokenStrategy:
class TestDrfTokenStrategy(TestCase):
def test_create_access_token_creates_new_token(self) -> None:
"""
GIVEN:
@@ -187,6 +201,7 @@ class TestDrfTokenStrategy:
THEN:
- A new token is created and its key is returned
"""
user = User.objects.create_user("testuser")
request = HttpRequest()
request.user = user
@@ -194,9 +209,13 @@ class TestDrfTokenStrategy:
strategy = DrfTokenStrategy()
token_key = strategy.create_access_token(request)
assert token_key is not None
assert Token.objects.filter(user=user).exists()
assert token_key == Token.objects.get(user=user).key
# Verify a token was created
self.assertIsNotNone(token_key)
self.assertTrue(Token.objects.filter(user=user).exists())
# Verify the returned key matches the created token
token = Token.objects.get(user=user)
self.assertEqual(token_key, token.key)
def test_create_access_token_returns_existing_token(self) -> None:
"""
@@ -207,6 +226,7 @@ class TestDrfTokenStrategy:
THEN:
- The same token key is returned (no new token created)
"""
user = User.objects.create_user("testuser")
existing_token = Token.objects.create(user=user)
@@ -216,8 +236,11 @@ class TestDrfTokenStrategy:
strategy = DrfTokenStrategy()
token_key = strategy.create_access_token(request)
assert token_key == existing_token.key
assert Token.objects.filter(user=user).count() == 1
# Verify the existing token key is returned
self.assertEqual(token_key, existing_token.key)
# Verify only one token exists (no duplicate created)
self.assertEqual(Token.objects.filter(user=user).count(), 1)
def test_create_access_token_returns_none_for_unauthenticated_user(self) -> None:
"""
@@ -228,11 +251,12 @@ class TestDrfTokenStrategy:
THEN:
- None is returned and no token is created
"""
request = HttpRequest()
request.user = AnonymousUser()
strategy = DrfTokenStrategy()
token_key = strategy.create_access_token(request)
assert token_key is None
assert Token.objects.count() == 0
self.assertIsNone(token_key)
self.assertEqual(Token.objects.count(), 0)

View File

@@ -1,15 +1,16 @@
import os
from collections.abc import Callable
from dataclasses import dataclass
from pathlib import Path
from unittest import mock
import pytest
from django.core.checks import Error
from django.core.checks import Warning
from pytest_django.fixtures import SettingsWrapper
from django.test import TestCase
from django.test import override_settings
from pytest_mock import MockerFixture
from documents.tests.utils import DirectoriesMixin
from documents.tests.utils import FileSystemAssertsMixin
from paperless.checks import audit_log_check
from paperless.checks import binaries_check
from paperless.checks import check_deprecated_db_settings
@@ -19,84 +20,54 @@ from paperless.checks import paths_check
from paperless.checks import settings_values_check
@dataclass(frozen=True, slots=True)
class PaperlessTestDirs:
data_dir: Path
media_dir: Path
consumption_dir: Path
# TODO: consolidate with documents/tests/conftest.py PaperlessDirs/paperless_dirs
# once the paperless and documents test suites are ready to share fixtures.
@pytest.fixture()
def directories(tmp_path: Path, settings: SettingsWrapper) -> PaperlessTestDirs:
data_dir = tmp_path / "data"
media_dir = tmp_path / "media"
consumption_dir = tmp_path / "consumption"
for d in (data_dir, media_dir, consumption_dir):
d.mkdir()
settings.DATA_DIR = data_dir
settings.MEDIA_ROOT = media_dir
settings.CONSUMPTION_DIR = consumption_dir
return PaperlessTestDirs(
data_dir=data_dir,
media_dir=media_dir,
consumption_dir=consumption_dir,
)
class TestChecks:
class TestChecks(DirectoriesMixin, TestCase):
def test_binaries(self) -> None:
assert binaries_check(None) == []
self.assertEqual(binaries_check(None), [])
def test_binaries_fail(self, settings: SettingsWrapper) -> None:
settings.CONVERT_BINARY = "uuuhh"
assert len(binaries_check(None)) == 1
@override_settings(CONVERT_BINARY="uuuhh")
def test_binaries_fail(self) -> None:
self.assertEqual(len(binaries_check(None)), 1)
@pytest.mark.usefixtures("directories")
def test_paths_check(self) -> None:
assert paths_check(None) == []
self.assertEqual(paths_check(None), [])
def test_paths_check_dont_exist(self, settings: SettingsWrapper) -> None:
settings.MEDIA_ROOT = Path("uuh")
settings.DATA_DIR = Path("whatever")
settings.CONSUMPTION_DIR = Path("idontcare")
@override_settings(
MEDIA_ROOT=Path("uuh"),
DATA_DIR=Path("whatever"),
CONSUMPTION_DIR=Path("idontcare"),
)
def test_paths_check_dont_exist(self) -> None:
msgs = paths_check(None)
self.assertEqual(len(msgs), 3, str(msgs))
for msg in msgs:
self.assertTrue(msg.msg.endswith("is set but doesn't exist."))
def test_paths_check_no_access(self) -> None:
Path(self.dirs.data_dir).chmod(0o000)
Path(self.dirs.media_dir).chmod(0o000)
Path(self.dirs.consumption_dir).chmod(0o000)
self.addCleanup(os.chmod, self.dirs.data_dir, 0o777)
self.addCleanup(os.chmod, self.dirs.media_dir, 0o777)
self.addCleanup(os.chmod, self.dirs.consumption_dir, 0o777)
msgs = paths_check(None)
self.assertEqual(len(msgs), 3)
assert len(msgs) == 3, str(msgs)
for msg in msgs:
assert msg.msg.endswith("is set but doesn't exist.")
self.assertTrue(msg.msg.endswith("is not writeable"))
def test_paths_check_no_access(self, directories: PaperlessTestDirs) -> None:
directories.data_dir.chmod(0o000)
directories.media_dir.chmod(0o000)
directories.consumption_dir.chmod(0o000)
@override_settings(DEBUG=False)
def test_debug_disabled(self) -> None:
self.assertEqual(debug_mode_check(None), [])
try:
msgs = paths_check(None)
finally:
directories.data_dir.chmod(0o777)
directories.media_dir.chmod(0o777)
directories.consumption_dir.chmod(0o777)
assert len(msgs) == 3
for msg in msgs:
assert msg.msg.endswith("is not writeable")
def test_debug_disabled(self, settings: SettingsWrapper) -> None:
settings.DEBUG = False
assert debug_mode_check(None) == []
def test_debug_enabled(self, settings: SettingsWrapper) -> None:
settings.DEBUG = True
assert len(debug_mode_check(None)) == 1
@override_settings(DEBUG=True)
def test_debug_enabled(self) -> None:
self.assertEqual(len(debug_mode_check(None)), 1)
class TestSettingsChecksAgainstDefaults:
class TestSettingsChecksAgainstDefaults(DirectoriesMixin, TestCase):
def test_all_valid(self) -> None:
"""
GIVEN:
@@ -107,71 +78,104 @@ class TestSettingsChecksAgainstDefaults:
- No system check errors reported
"""
msgs = settings_values_check(None)
assert len(msgs) == 0
self.assertEqual(len(msgs), 0)
class TestOcrSettingsChecks:
@pytest.mark.parametrize(
("setting", "value", "expected_msg"),
[
pytest.param(
"OCR_OUTPUT_TYPE",
"notapdf",
'OCR output type "notapdf"',
id="invalid-output-type",
),
pytest.param(
"OCR_MODE",
"makeitso",
'OCR output mode "makeitso"',
id="invalid-mode",
),
pytest.param(
"OCR_MODE",
"skip_noarchive",
"deprecated",
id="deprecated-mode",
),
pytest.param(
"OCR_SKIP_ARCHIVE_FILE",
"invalid",
'OCR_SKIP_ARCHIVE_FILE setting "invalid"',
id="invalid-skip-archive-file",
),
pytest.param(
"OCR_CLEAN",
"cleanme",
'OCR clean mode "cleanme"',
id="invalid-clean",
),
],
)
def test_invalid_setting_produces_one_error(
self,
settings: SettingsWrapper,
setting: str,
value: str,
expected_msg: str,
) -> None:
class TestOcrSettingsChecks(DirectoriesMixin, TestCase):
@override_settings(OCR_OUTPUT_TYPE="notapdf")
def test_invalid_output_type(self) -> None:
"""
GIVEN:
- Default settings
- One OCR setting is set to an invalid value
- OCR output type is invalid
WHEN:
- Settings are validated
THEN:
- Exactly one system check error is reported containing the expected message
- system check error reported for OCR output type
"""
setattr(settings, setting, value)
msgs = settings_values_check(None)
self.assertEqual(len(msgs), 1)
assert len(msgs) == 1
assert expected_msg in msgs[0].msg
msg = msgs[0]
self.assertIn('OCR output type "notapdf"', msg.msg)
@override_settings(OCR_MODE="makeitso")
def test_invalid_ocr_type(self) -> None:
"""
GIVEN:
- Default settings
- OCR type is invalid
WHEN:
- Settings are validated
THEN:
- system check error reported for OCR type
"""
msgs = settings_values_check(None)
self.assertEqual(len(msgs), 1)
msg = msgs[0]
self.assertIn('OCR output mode "makeitso"', msg.msg)
@override_settings(OCR_MODE="skip_noarchive")
def test_deprecated_ocr_type(self) -> None:
"""
GIVEN:
- Default settings
- OCR type is deprecated
WHEN:
- Settings are validated
THEN:
- deprecation warning reported for OCR type
"""
msgs = settings_values_check(None)
self.assertEqual(len(msgs), 1)
msg = msgs[0]
self.assertIn("deprecated", msg.msg)
@override_settings(OCR_SKIP_ARCHIVE_FILE="invalid")
def test_invalid_ocr_skip_archive_file(self) -> None:
"""
GIVEN:
- Default settings
- OCR_SKIP_ARCHIVE_FILE is invalid
WHEN:
- Settings are validated
THEN:
- system check error reported for OCR_SKIP_ARCHIVE_FILE
"""
msgs = settings_values_check(None)
self.assertEqual(len(msgs), 1)
msg = msgs[0]
self.assertIn('OCR_SKIP_ARCHIVE_FILE setting "invalid"', msg.msg)
@override_settings(OCR_CLEAN="cleanme")
def test_invalid_ocr_clean(self) -> None:
"""
GIVEN:
- Default settings
- OCR cleaning type is invalid
WHEN:
- Settings are validated
THEN:
- system check error reported for OCR cleaning type
"""
msgs = settings_values_check(None)
self.assertEqual(len(msgs), 1)
msg = msgs[0]
self.assertIn('OCR clean mode "cleanme"', msg.msg)
class TestTimezoneSettingsChecks:
def test_invalid_timezone(self, settings: SettingsWrapper) -> None:
class TestTimezoneSettingsChecks(DirectoriesMixin, TestCase):
@override_settings(TIME_ZONE="TheMoon\\MyCrater")
def test_invalid_timezone(self) -> None:
"""
GIVEN:
- Default settings
@@ -181,16 +185,17 @@ class TestTimezoneSettingsChecks:
THEN:
- system check error reported for timezone
"""
settings.TIME_ZONE = "TheMoon\\MyCrater"
msgs = settings_values_check(None)
self.assertEqual(len(msgs), 1)
assert len(msgs) == 1
assert 'Timezone "TheMoon\\MyCrater"' in msgs[0].msg
msg = msgs[0]
self.assertIn('Timezone "TheMoon\\MyCrater"', msg.msg)
class TestEmailCertSettingsChecks:
def test_not_valid_file(self, settings: SettingsWrapper) -> None:
class TestEmailCertSettingsChecks(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
@override_settings(EMAIL_CERTIFICATE_FILE=Path("/tmp/not_actually_here.pem"))
def test_not_valid_file(self) -> None:
"""
GIVEN:
- Default settings
@@ -200,22 +205,19 @@ class TestEmailCertSettingsChecks:
THEN:
- system check error reported for email certificate
"""
cert_path = Path("/tmp/not_actually_here.pem")
assert not cert_path.is_file()
settings.EMAIL_CERTIFICATE_FILE = cert_path
self.assertIsNotFile("/tmp/not_actually_here.pem")
msgs = settings_values_check(None)
assert len(msgs) == 1
assert "Email cert /tmp/not_actually_here.pem is not a file" in msgs[0].msg
self.assertEqual(len(msgs), 1)
msg = msgs[0]
self.assertIn("Email cert /tmp/not_actually_here.pem is not a file", msg.msg)
class TestAuditLogChecks:
def test_was_enabled_once(
self,
settings: SettingsWrapper,
mocker: MockerFixture,
) -> None:
class TestAuditLogChecks(TestCase):
def test_was_enabled_once(self) -> None:
"""
GIVEN:
- Audit log is not enabled
@@ -224,18 +226,23 @@ class TestAuditLogChecks:
THEN:
- system check error reported for disabling audit log
"""
settings.AUDIT_LOG_ENABLED = False
introspect_mock = mocker.MagicMock()
introspect_mock = mock.MagicMock()
introspect_mock.introspection.table_names.return_value = ["auditlog_logentry"]
mocker.patch.dict(
"paperless.checks.connections",
{"default": introspect_mock},
)
with override_settings(AUDIT_LOG_ENABLED=False):
with mock.patch.dict(
"paperless.checks.connections",
{"default": introspect_mock},
):
msgs = audit_log_check(None)
msgs = audit_log_check(None)
self.assertEqual(len(msgs), 1)
assert len(msgs) == 1
assert "auditlog table was found but audit log is disabled." in msgs[0].msg
msg = msgs[0]
self.assertIn(
("auditlog table was found but audit log is disabled."),
msg.msg,
)
DEPRECATED_VARS: dict[str, str] = {
@@ -264,16 +271,20 @@ class TestDeprecatedDbSettings:
@pytest.mark.parametrize(
("env_var", "db_option_key"),
[
pytest.param("PAPERLESS_DB_TIMEOUT", "timeout", id="db-timeout"),
pytest.param(
"PAPERLESS_DB_POOLSIZE",
"pool.min_size / pool.max_size",
id="db-poolsize",
),
pytest.param("PAPERLESS_DBSSLMODE", "sslmode", id="ssl-mode"),
pytest.param("PAPERLESS_DBSSLROOTCERT", "sslrootcert", id="ssl-rootcert"),
pytest.param("PAPERLESS_DBSSLCERT", "sslcert", id="ssl-cert"),
pytest.param("PAPERLESS_DBSSLKEY", "sslkey", id="ssl-key"),
("PAPERLESS_DB_TIMEOUT", "timeout"),
("PAPERLESS_DB_POOLSIZE", "pool.min_size / pool.max_size"),
("PAPERLESS_DBSSLMODE", "sslmode"),
("PAPERLESS_DBSSLROOTCERT", "sslrootcert"),
("PAPERLESS_DBSSLCERT", "sslcert"),
("PAPERLESS_DBSSLKEY", "sslkey"),
],
ids=[
"db-timeout",
"db-poolsize",
"ssl-mode",
"ssl-rootcert",
"ssl-cert",
"ssl-key",
],
)
def test_single_deprecated_var_produces_one_warning(
@@ -392,10 +403,7 @@ class TestV3MinimumUpgradeVersionCheck:
"""Test suite for check_v3_minimum_upgrade_version system check."""
@pytest.fixture
def build_conn_mock(
self,
mocker: MockerFixture,
) -> Callable[[list[str], list[str]], mock.MagicMock]:
def build_conn_mock(self, mocker: MockerFixture):
"""Factory fixture that builds a connections['default'] mock.
Usage::
@@ -415,7 +423,7 @@ class TestV3MinimumUpgradeVersionCheck:
def test_no_migrations_table_fresh_install(
self,
mocker: MockerFixture,
build_conn_mock: Callable[[list[str], list[str]], mock.MagicMock],
build_conn_mock,
) -> None:
"""
GIVEN:
@@ -434,7 +442,7 @@ class TestV3MinimumUpgradeVersionCheck:
def test_no_documents_migrations_fresh_install(
self,
mocker: MockerFixture,
build_conn_mock: Callable[[list[str], list[str]], mock.MagicMock],
build_conn_mock,
) -> None:
"""
GIVEN:
@@ -453,7 +461,7 @@ class TestV3MinimumUpgradeVersionCheck:
def test_v3_state_with_0001_squashed(
self,
mocker: MockerFixture,
build_conn_mock: Callable[[list[str], list[str]], mock.MagicMock],
build_conn_mock,
) -> None:
"""
GIVEN:
@@ -477,7 +485,7 @@ class TestV3MinimumUpgradeVersionCheck:
def test_v3_state_with_0002_squashed_only(
self,
mocker: MockerFixture,
build_conn_mock: Callable[[list[str], list[str]], mock.MagicMock],
build_conn_mock,
) -> None:
"""
GIVEN:
@@ -496,7 +504,7 @@ class TestV3MinimumUpgradeVersionCheck:
def test_v2_20_9_state_ready_to_upgrade(
self,
mocker: MockerFixture,
build_conn_mock: Callable[[list[str], list[str]], mock.MagicMock],
build_conn_mock,
) -> None:
"""
GIVEN:
@@ -523,7 +531,7 @@ class TestV3MinimumUpgradeVersionCheck:
def test_v2_20_8_raises_error(
self,
mocker: MockerFixture,
build_conn_mock: Callable[[list[str], list[str]], mock.MagicMock],
build_conn_mock,
) -> None:
"""
GIVEN:
@@ -550,7 +558,7 @@ class TestV3MinimumUpgradeVersionCheck:
def test_very_old_version_raises_error(
self,
mocker: MockerFixture,
build_conn_mock: Callable[[list[str], list[str]], mock.MagicMock],
build_conn_mock,
) -> None:
"""
GIVEN:
@@ -577,7 +585,7 @@ class TestV3MinimumUpgradeVersionCheck:
def test_error_hint_mentions_v2_20_9(
self,
mocker: MockerFixture,
build_conn_mock: Callable[[list[str], list[str]], mock.MagicMock],
build_conn_mock,
) -> None:
"""
GIVEN:

View File

@@ -9,50 +9,35 @@ from paperless.utils import ocr_to_dateparser_languages
@pytest.mark.parametrize(
("ocr_language", "expected"),
[
pytest.param("eng", ["en"], id="single-language"),
pytest.param("fra+ita+lao", ["fr", "it", "lo"], id="multiple-languages"),
pytest.param("fil", ["fil"], id="no-two-letter-equivalent"),
pytest.param(
"aze_cyrl+srp_latn",
["az-Cyrl", "sr-Latn"],
id="script-supported-by-dateparser",
),
pytest.param(
"deu_frak",
["de"],
id="script-not-supported-falls-back-to-language",
),
pytest.param(
"chi_tra+chi_sim",
["zh"],
id="chinese-variants-collapse-to-general",
),
pytest.param(
"eng+unsupported_language+por",
["en", "pt"],
id="unsupported-language-skipped",
),
pytest.param(
"unsupported1+unsupported2",
[],
id="all-unsupported-returns-empty",
),
pytest.param("eng+eng", ["en"], id="duplicates-deduplicated"),
pytest.param(
"ita_unknownscript",
["it"],
id="unknown-script-falls-back-to-language",
),
# One language
("eng", ["en"]),
# Multiple languages
("fra+ita+lao", ["fr", "it", "lo"]),
# Languages that don't have a two-letter equivalent
("fil", ["fil"]),
# Languages with a script part supported by dateparser
("aze_cyrl+srp_latn", ["az-Cyrl", "sr-Latn"]),
# Languages with a script part not supported by dateparser
# In this case, default to the language without script
("deu_frak", ["de"]),
# Traditional and simplified chinese don't have the same name in dateparser,
# so they're converted to the general chinese language
("chi_tra+chi_sim", ["zh"]),
# If a language is not supported by dateparser, fallback to the supported ones
("eng+unsupported_language+por", ["en", "pt"]),
# If no language is supported, fallback to default
("unsupported1+unsupported2", []),
# Duplicate languages, should not duplicate in result
("eng+eng", ["en"]),
# Language with script, but script is not mapped
("ita_unknownscript", ["it"]),
],
)
def test_ocr_to_dateparser_languages(ocr_language: str, expected: list[str]) -> None:
def test_ocr_to_dateparser_languages(ocr_language, expected):
assert sorted(ocr_to_dateparser_languages(ocr_language)) == sorted(expected)
def test_ocr_to_dateparser_languages_exception(
monkeypatch: pytest.MonkeyPatch,
caplog: pytest.LogCaptureFixture,
) -> None:
def test_ocr_to_dateparser_languages_exception(monkeypatch, caplog):
# Patch LocaleDataLoader.get_locale_map to raise an exception
class DummyLoader:
def get_locale_map(self, locales=None):

View File

@@ -1,31 +1,24 @@
import tempfile
from pathlib import Path
from django.test import Client
from pytest_django.fixtures import SettingsWrapper
from django.test import override_settings
def test_favicon_view(
client: Client,
tmp_path: Path,
settings: SettingsWrapper,
) -> None:
favicon_path = tmp_path / "paperless" / "img" / "favicon.ico"
favicon_path.parent.mkdir(parents=True)
favicon_path.write_bytes(b"FAKE ICON DATA")
def test_favicon_view(client):
with tempfile.TemporaryDirectory() as tmpdir:
static_dir = Path(tmpdir)
favicon_path = static_dir / "paperless" / "img" / "favicon.ico"
favicon_path.parent.mkdir(parents=True, exist_ok=True)
favicon_path.write_bytes(b"FAKE ICON DATA")
settings.STATIC_ROOT = tmp_path
response = client.get("/favicon.ico")
assert response.status_code == 200
assert response["Content-Type"] == "image/x-icon"
assert b"".join(response.streaming_content) == b"FAKE ICON DATA"
with override_settings(STATIC_ROOT=static_dir):
response = client.get("/favicon.ico")
assert response.status_code == 200
assert response["Content-Type"] == "image/x-icon"
assert b"".join(response.streaming_content) == b"FAKE ICON DATA"
def test_favicon_view_missing_file(
client: Client,
tmp_path: Path,
settings: SettingsWrapper,
) -> None:
settings.STATIC_ROOT = tmp_path
response = client.get("/favicon.ico")
assert response.status_code == 404
def test_favicon_view_missing_file(client):
with override_settings(STATIC_ROOT=Path(tempfile.mkdtemp())):
response = client.get("/favicon.ico")
assert response.status_code == 404

View File

@@ -21,18 +21,12 @@ from documents.views import BulkEditView
from documents.views import ChatStreamingView
from documents.views import CorrespondentViewSet
from documents.views import CustomFieldViewSet
from documents.views import DeleteDocumentsView
from documents.views import DocumentTypeViewSet
from documents.views import EditPdfDocumentsView
from documents.views import GlobalSearchView
from documents.views import IndexView
from documents.views import LogViewSet
from documents.views import MergeDocumentsView
from documents.views import PostDocumentView
from documents.views import RemoteVersionView
from documents.views import RemovePasswordDocumentsView
from documents.views import ReprocessDocumentsView
from documents.views import RotateDocumentsView
from documents.views import SavedViewViewSet
from documents.views import SearchAutoCompleteView
from documents.views import SelectionDataView
@@ -138,36 +132,6 @@ urlpatterns = [
BulkEditView.as_view(),
name="bulk_edit",
),
re_path(
"^delete/",
DeleteDocumentsView.as_view(),
name="delete_documents",
),
re_path(
"^reprocess/",
ReprocessDocumentsView.as_view(),
name="reprocess_documents",
),
re_path(
"^rotate/",
RotateDocumentsView.as_view(),
name="rotate_documents",
),
re_path(
"^merge/",
MergeDocumentsView.as_view(),
name="merge_documents",
),
re_path(
"^edit_pdf/",
EditPdfDocumentsView.as_view(),
name="edit_pdf_documents",
),
re_path(
"^remove_password/",
RemovePasswordDocumentsView.as_view(),
name="remove_password_documents",
),
re_path(
"^bulk_download/",
BulkDownloadView.as_view(),

View File

@@ -1,4 +1,4 @@
from pydantic import BaseModel
from llama_index.core.bridge.pydantic import BaseModel
class DocumentClassifierSchema(BaseModel):

View File

@@ -1,6 +1,10 @@
import logging
import sys
from llama_index.core import VectorStoreIndex
from llama_index.core.prompts import PromptTemplate
from llama_index.core.query_engine import RetrieverQueryEngine
from documents.models import Document
from paperless_ai.client import AIClient
from paperless_ai.indexing import load_or_build_index
@@ -10,13 +14,15 @@ logger = logging.getLogger("paperless_ai.chat")
MAX_SINGLE_DOC_CONTEXT_CHARS = 15000
SINGLE_DOC_SNIPPET_CHARS = 800
CHAT_PROMPT_TMPL = """Context information is below.
CHAT_PROMPT_TMPL = PromptTemplate(
template="""Context information is below.
---------------------
{context_str}
---------------------
Given the context information and not prior knowledge, answer the query.
Query: {query_str}
Answer:"""
Answer:""",
)
def stream_chat_with_documents(query_str: str, documents: list[Document]):
@@ -37,10 +43,6 @@ def stream_chat_with_documents(query_str: str, documents: list[Document]):
yield "Sorry, I couldn't find any content to answer your question."
return
from llama_index.core import VectorStoreIndex
from llama_index.core.prompts import PromptTemplate
from llama_index.core.query_engine import RetrieverQueryEngine
local_index = VectorStoreIndex(nodes=nodes)
retriever = local_index.as_retriever(
similarity_top_k=3 if len(documents) == 1 else 5,
@@ -83,8 +85,7 @@ def stream_chat_with_documents(query_str: str, documents: list[Document]):
for node in top_nodes
)
prompt_template = PromptTemplate(template=CHAT_PROMPT_TMPL)
prompt = prompt_template.partial_format(
prompt = CHAT_PROMPT_TMPL.partial_format(
context_str=context,
query_str=query_str,
).format(llm=client.llm)

View File

@@ -1,10 +1,9 @@
import logging
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from llama_index.core.llms import ChatMessage
from llama_index.llms.ollama import Ollama
from llama_index.llms.openai import OpenAI
from llama_index.core.llms import ChatMessage
from llama_index.core.program.function_program import get_function_tool
from llama_index.llms.ollama import Ollama
from llama_index.llms.openai import OpenAI
from paperless.config import AIConfig
from paperless_ai.base_model import DocumentClassifierSchema
@@ -21,18 +20,14 @@ class AIClient:
self.settings = AIConfig()
self.llm = self.get_llm()
def get_llm(self) -> "Ollama | OpenAI":
def get_llm(self) -> Ollama | OpenAI:
if self.settings.llm_backend == "ollama":
from llama_index.llms.ollama import Ollama
return Ollama(
model=self.settings.llm_model or "llama3.1",
base_url=self.settings.llm_endpoint or "http://localhost:11434",
request_timeout=120,
)
elif self.settings.llm_backend == "openai":
from llama_index.llms.openai import OpenAI
return OpenAI(
model=self.settings.llm_model or "gpt-3.5-turbo",
api_base=self.settings.llm_endpoint or None,
@@ -48,9 +43,6 @@ class AIClient:
self.settings.llm_model,
)
from llama_index.core.llms import ChatMessage
from llama_index.core.program.function_program import get_function_tool
user_msg = ChatMessage(role="user", content=prompt)
tool = get_function_tool(DocumentClassifierSchema)
result = self.llm.chat_with_tools(
@@ -66,7 +58,7 @@ class AIClient:
parsed = DocumentClassifierSchema(**tool_calls[0].tool_kwargs)
return parsed.model_dump()
def run_chat(self, messages: list["ChatMessage"]) -> str:
def run_chat(self, messages: list[ChatMessage]) -> str:
logger.debug(
"Running chat query against %s with model %s",
self.settings.llm_backend,

View File

@@ -1,12 +1,13 @@
import json
from typing import TYPE_CHECKING
from django.conf import settings
if TYPE_CHECKING:
from pathlib import Path
from llama_index.core.base.embeddings.base import BaseEmbedding
from django.conf import settings
from llama_index.core.base.embeddings.base import BaseEmbedding
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.embeddings.openai import OpenAIEmbedding
from documents.models import Document
from documents.models import Note
@@ -14,21 +15,17 @@ from paperless.config import AIConfig
from paperless.models import LLMEmbeddingBackend
def get_embedding_model() -> "BaseEmbedding":
def get_embedding_model() -> BaseEmbedding:
config = AIConfig()
match config.llm_embedding_backend:
case LLMEmbeddingBackend.OPENAI:
from llama_index.embeddings.openai import OpenAIEmbedding
return OpenAIEmbedding(
model=config.llm_embedding_model or "text-embedding-3-small",
api_key=config.llm_api_key,
api_base=config.llm_endpoint or None,
)
case LLMEmbeddingBackend.HUGGINGFACE:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
return HuggingFaceEmbedding(
model_name=config.llm_embedding_model
or "sentence-transformers/all-MiniLM-L6-v2",

View File

@@ -4,12 +4,26 @@ from collections.abc import Callable
from collections.abc import Iterable
from datetime import timedelta
from pathlib import Path
from typing import TYPE_CHECKING
from typing import TypeVar
import faiss
import llama_index.core.settings as llama_settings
from celery import states
from django.conf import settings
from django.utils import timezone
from llama_index.core import Document as LlamaDocument
from llama_index.core import StorageContext
from llama_index.core import VectorStoreIndex
from llama_index.core import load_index_from_storage
from llama_index.core.indices.prompt_helper import PromptHelper
from llama_index.core.node_parser import SimpleNodeParser
from llama_index.core.prompts import PromptTemplate
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.schema import BaseNode
from llama_index.core.storage.docstore import SimpleDocumentStore
from llama_index.core.storage.index_store import SimpleIndexStore
from llama_index.core.text_splitter import TokenTextSplitter
from llama_index.vector_stores.faiss import FaissVectorStore
from documents.models import Document
from documents.models import PaperlessTask
@@ -20,10 +34,6 @@ from paperless_ai.embedding import get_embedding_model
_T = TypeVar("_T")
IterWrapper = Callable[[Iterable[_T]], Iterable[_T]]
if TYPE_CHECKING:
from llama_index.core import VectorStoreIndex
from llama_index.core.schema import BaseNode
def _identity(iterable: Iterable[_T]) -> Iterable[_T]:
return iterable
@@ -65,23 +75,12 @@ def get_or_create_storage_context(*, rebuild=False):
settings.LLM_INDEX_DIR.mkdir(parents=True, exist_ok=True)
if rebuild or not settings.LLM_INDEX_DIR.exists():
import faiss
from llama_index.core import StorageContext
from llama_index.core.storage.docstore import SimpleDocumentStore
from llama_index.core.storage.index_store import SimpleIndexStore
from llama_index.vector_stores.faiss import FaissVectorStore
embedding_dim = get_embedding_dim()
faiss_index = faiss.IndexFlatL2(embedding_dim)
vector_store = FaissVectorStore(faiss_index=faiss_index)
docstore = SimpleDocumentStore()
index_store = SimpleIndexStore()
else:
from llama_index.core import StorageContext
from llama_index.core.storage.docstore import SimpleDocumentStore
from llama_index.core.storage.index_store import SimpleIndexStore
from llama_index.vector_stores.faiss import FaissVectorStore
vector_store = FaissVectorStore.from_persist_dir(settings.LLM_INDEX_DIR)
docstore = SimpleDocumentStore.from_persist_dir(settings.LLM_INDEX_DIR)
index_store = SimpleIndexStore.from_persist_dir(settings.LLM_INDEX_DIR)
@@ -94,7 +93,7 @@ def get_or_create_storage_context(*, rebuild=False):
)
def build_document_node(document: Document) -> list["BaseNode"]:
def build_document_node(document: Document) -> list[BaseNode]:
"""
Given a Document, returns parsed Nodes ready for indexing.
"""
@@ -113,9 +112,6 @@ def build_document_node(document: Document) -> list["BaseNode"]:
"added": document.added.isoformat() if document.added else None,
"modified": document.modified.isoformat(),
}
from llama_index.core import Document as LlamaDocument
from llama_index.core.node_parser import SimpleNodeParser
doc = LlamaDocument(text=text, metadata=metadata)
parser = SimpleNodeParser()
return parser.get_nodes_from_documents([doc])
@@ -126,10 +122,6 @@ def load_or_build_index(nodes=None):
Load an existing VectorStoreIndex if present,
or build a new one using provided nodes if storage is empty.
"""
import llama_index.core.settings as llama_settings
from llama_index.core import VectorStoreIndex
from llama_index.core import load_index_from_storage
embed_model = get_embedding_model()
llama_settings.Settings.embed_model = embed_model
storage_context = get_or_create_storage_context()
@@ -151,7 +143,7 @@ def load_or_build_index(nodes=None):
)
def remove_document_docstore_nodes(document: Document, index: "VectorStoreIndex"):
def remove_document_docstore_nodes(document: Document, index: VectorStoreIndex):
"""
Removes existing documents from docstore for a given document from the index.
This is necessary because FAISS IndexFlatL2 is append-only.
@@ -182,8 +174,6 @@ def update_llm_index(
"""
Rebuild or update the LLM index.
"""
from llama_index.core import VectorStoreIndex
nodes = []
documents = Document.objects.all()
@@ -197,8 +187,6 @@ def update_llm_index(
(settings.LLM_INDEX_DIR / "meta.json").unlink(missing_ok=True)
# Rebuild index from scratch
logger.info("Rebuilding LLM index.")
import llama_index.core.settings as llama_settings
embed_model = get_embedding_model()
llama_settings.Settings.embed_model = embed_model
storage_context = get_or_create_storage_context(rebuild=True)
@@ -283,10 +271,6 @@ def llm_index_remove_document(document: Document):
def truncate_content(content: str) -> str:
from llama_index.core.indices.prompt_helper import PromptHelper
from llama_index.core.prompts import PromptTemplate
from llama_index.core.text_splitter import TokenTextSplitter
prompt_helper = PromptHelper(
context_window=8192,
num_output=512,
@@ -331,8 +315,6 @@ def query_similar_documents(
else None
)
from llama_index.core.retrievers import VectorIndexRetriever
retriever = VectorIndexRetriever(
index=index,
similarity_top_k=top_k,

View File

@@ -181,11 +181,11 @@ def test_load_or_build_index_builds_when_nodes_given(
) -> None:
with (
patch(
"llama_index.core.load_index_from_storage",
"paperless_ai.indexing.load_index_from_storage",
side_effect=ValueError("Index not found"),
),
patch(
"llama_index.core.VectorStoreIndex",
"paperless_ai.indexing.VectorStoreIndex",
return_value=MagicMock(),
) as mock_index_cls,
patch(
@@ -206,7 +206,7 @@ def test_load_or_build_index_raises_exception_when_no_nodes(
) -> None:
with (
patch(
"llama_index.core.load_index_from_storage",
"paperless_ai.indexing.load_index_from_storage",
side_effect=ValueError("Index not found"),
),
patch(
@@ -225,11 +225,11 @@ def test_load_or_build_index_succeeds_when_nodes_given(
) -> None:
with (
patch(
"llama_index.core.load_index_from_storage",
"paperless_ai.indexing.load_index_from_storage",
side_effect=ValueError("Index not found"),
),
patch(
"llama_index.core.VectorStoreIndex",
"paperless_ai.indexing.VectorStoreIndex",
return_value=MagicMock(),
) as mock_index_cls,
patch(
@@ -334,7 +334,7 @@ def test_query_similar_documents(
patch(
"paperless_ai.indexing.vector_store_file_exists",
) as mock_vector_store_exists,
patch("llama_index.core.retrievers.VectorIndexRetriever") as mock_retriever_cls,
patch("paperless_ai.indexing.VectorIndexRetriever") as mock_retriever_cls,
patch("paperless_ai.indexing.Document.objects.filter") as mock_filter,
):
mock_storage.return_value = MagicMock()

View File

@@ -45,7 +45,7 @@ def test_stream_chat_with_one_document_full_content(mock_document) -> None:
patch("paperless_ai.chat.AIClient") as mock_client_cls,
patch("paperless_ai.chat.load_or_build_index") as mock_load_index,
patch(
"llama_index.core.query_engine.RetrieverQueryEngine.from_args",
"paperless_ai.chat.RetrieverQueryEngine.from_args",
) as mock_query_engine_cls,
):
mock_client = MagicMock()
@@ -76,7 +76,7 @@ def test_stream_chat_with_multiple_documents_retrieval(patch_embed_nodes) -> Non
patch("paperless_ai.chat.AIClient") as mock_client_cls,
patch("paperless_ai.chat.load_or_build_index") as mock_load_index,
patch(
"llama_index.core.query_engine.RetrieverQueryEngine.from_args",
"paperless_ai.chat.RetrieverQueryEngine.from_args",
) as mock_query_engine_cls,
patch.object(VectorStoreIndex, "as_retriever") as mock_as_retriever,
):

View File

@@ -18,13 +18,13 @@ def mock_ai_config():
@pytest.fixture
def mock_ollama_llm():
with patch("llama_index.llms.ollama.Ollama") as MockOllama:
with patch("paperless_ai.client.Ollama") as MockOllama:
yield MockOllama
@pytest.fixture
def mock_openai_llm():
with patch("llama_index.llms.openai.OpenAI") as MockOpenAI:
with patch("paperless_ai.client.OpenAI") as MockOpenAI:
yield MockOpenAI

View File

@@ -67,7 +67,7 @@ def test_get_embedding_model_openai(mock_ai_config):
mock_ai_config.return_value.llm_api_key = "test_api_key"
mock_ai_config.return_value.llm_endpoint = "http://test-url"
with patch("llama_index.embeddings.openai.OpenAIEmbedding") as MockOpenAIEmbedding:
with patch("paperless_ai.embedding.OpenAIEmbedding") as MockOpenAIEmbedding:
model = get_embedding_model()
MockOpenAIEmbedding.assert_called_once_with(
model="text-embedding-3-small",
@@ -84,7 +84,7 @@ def test_get_embedding_model_huggingface(mock_ai_config):
)
with patch(
"llama_index.embeddings.huggingface.HuggingFaceEmbedding",
"paperless_ai.embedding.HuggingFaceEmbedding",
) as MockHuggingFaceEmbedding:
model = get_embedding_model()
MockHuggingFaceEmbedding.assert_called_once_with(