mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-03-10 19:21:24 +00:00
Compare commits
4 Commits
feature-pa
...
fix-downgr
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b7112ab0cf | ||
|
|
2b4ea570ef | ||
|
|
86573fc1a0 | ||
|
|
3856ec19c0 |
@@ -30,7 +30,7 @@ RUN set -eux \
|
||||
# Purpose: Installs s6-overlay and rootfs
|
||||
# Comments:
|
||||
# - Don't leave anything extra in here either
|
||||
FROM ghcr.io/astral-sh/uv:0.10.7-python3.12-trixie-slim AS s6-overlay-base
|
||||
FROM ghcr.io/astral-sh/uv:0.10.9-python3.12-trixie-slim AS s6-overlay-base
|
||||
|
||||
WORKDIR /usr/src/s6
|
||||
|
||||
|
||||
51
docs/api.md
51
docs/api.md
@@ -305,52 +305,16 @@ The following methods are supported:
|
||||
- `"merge": true or false` (defaults to false)
|
||||
- The `merge` flag determines if the supplied permissions will overwrite all existing permissions (including
|
||||
removing them) or be merged with existing permissions.
|
||||
- `edit_pdf`
|
||||
- Requires `parameters`:
|
||||
- `"doc_ids": [DOCUMENT_ID]` A list of a single document ID to edit.
|
||||
- `"operations": [OPERATION, ...]` A list of operations to perform on the documents. Each operation is a dictionary
|
||||
with the following keys:
|
||||
- `"page": PAGE_NUMBER` The page number to edit (1-based).
|
||||
- `"rotate": DEGREES` Optional rotation in degrees (90, 180, 270).
|
||||
- `"doc": OUTPUT_DOCUMENT_INDEX` Optional index of the output document for split operations.
|
||||
- Optional `parameters`:
|
||||
- `"delete_original": true` to delete the original documents after editing.
|
||||
- `"update_document": true` to add the edited PDF as a new version of the root document.
|
||||
- `"include_metadata": true` to copy metadata from the original document to the edited document.
|
||||
- `remove_password`
|
||||
- Requires `parameters`:
|
||||
- `"password": "PASSWORD_STRING"` The password to remove from the PDF documents.
|
||||
- Optional `parameters`:
|
||||
- `"update_document": true` to add the password-less PDF as a new version of the root document.
|
||||
- `"delete_original": true` to delete the original document after editing.
|
||||
- `"include_metadata": true` to copy metadata from the original document to the new password-less document.
|
||||
- `merge`
|
||||
- No additional `parameters` required.
|
||||
- The ordering of the merged document is determined by the list of IDs.
|
||||
- Optional `parameters`:
|
||||
- `"metadata_document_id": DOC_ID` apply metadata (tags, correspondent, etc.) from this document to the merged document.
|
||||
- `"delete_originals": true` to delete the original documents. This requires the calling user being the owner of
|
||||
all documents that are merged.
|
||||
- `split`
|
||||
- Requires `parameters`:
|
||||
- `"pages": [..]` The list should be a list of pages and/or a ranges, separated by commas e.g. `"[1,2-3,4,5-7]"`
|
||||
- Optional `parameters`:
|
||||
- `"delete_originals": true` to delete the original document after consumption. This requires the calling user being the owner of
|
||||
the document.
|
||||
- The split operation only accepts a single document.
|
||||
- `rotate`
|
||||
- Requires `parameters`:
|
||||
- `"degrees": DEGREES`. Must be an integer i.e. 90, 180, 270
|
||||
- `delete_pages`
|
||||
- Requires `parameters`:
|
||||
- `"pages": [..]` The list should be a list of integers e.g. `"[2,3,4]"`
|
||||
- The delete_pages operation only accepts a single document.
|
||||
- `modify_custom_fields`
|
||||
- Requires `parameters`:
|
||||
- `"add_custom_fields": { CUSTOM_FIELD_ID: VALUE }`: JSON object consisting of custom field id:value pairs to add to the document, can also be a list of custom field IDs
|
||||
to add with empty values.
|
||||
- `"remove_custom_fields": [CUSTOM_FIELD_ID]`: custom field ids to remove from the document.
|
||||
|
||||
#### Document-editing operations
|
||||
|
||||
Beginning with version 10+, the API supports individual endpoints for document-editing operations (`merge`, `rotate`, `edit_pdf`, etc), thus their documentation can be found in the API spec / viewer. Legacy document-editing methods via `/api/documents/bulk_edit/` are still supported for compatibility, are deprecated and clients should migrate to the individual endpoints before they are removed in a future version.
|
||||
|
||||
### Objects
|
||||
|
||||
Bulk editing for objects (tags, document types etc.) currently supports set permissions or delete
|
||||
@@ -467,4 +431,9 @@ Initial API version.
|
||||
#### Version 10
|
||||
|
||||
- The `show_on_dashboard` and `show_in_sidebar` fields of saved views have been
|
||||
removed. Relevant settings are now stored in the UISettings model.
|
||||
removed. Relevant settings are now stored in the UISettings model. Compatibility is maintained
|
||||
for versions < 10 until support for API v9 is dropped.
|
||||
- Document-editing operations such as `merge`, `rotate`, and `edit_pdf` have been
|
||||
moved from the bulk edit endpoint to their own individual endpoints. Using these methods via
|
||||
the bulk edit endpoint is still supported for compatibility with versions < 10 until support
|
||||
for API v9 is dropped.
|
||||
|
||||
@@ -45,7 +45,7 @@ dependencies = [
|
||||
"drf-spectacular-sidecar~=2026.1.1",
|
||||
"drf-writable-nested~=0.7.1",
|
||||
"faiss-cpu>=1.10",
|
||||
"filelock~=3.24.3",
|
||||
"filelock~=3.20.3",
|
||||
"flower~=2.0.1",
|
||||
"gotenberg-client~=0.13.1",
|
||||
"httpx-oauth~=0.16",
|
||||
|
||||
@@ -1217,7 +1217,7 @@
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
|
||||
<context context-type="linenumber">1760</context>
|
||||
<context context-type="linenumber">1758</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="1577733187050997705" datatype="html">
|
||||
@@ -2802,19 +2802,19 @@
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
|
||||
<context context-type="linenumber">1761</context>
|
||||
<context context-type="linenumber">1759</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">802</context>
|
||||
<context context-type="linenumber">833</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">835</context>
|
||||
<context context-type="linenumber">871</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">854</context>
|
||||
<context context-type="linenumber">894</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/manage/document-attributes/custom-fields/custom-fields.component.ts</context>
|
||||
@@ -3404,27 +3404,27 @@
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">445</context>
|
||||
<context context-type="linenumber">470</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">485</context>
|
||||
<context context-type="linenumber">510</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">523</context>
|
||||
<context context-type="linenumber">548</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">561</context>
|
||||
<context context-type="linenumber">586</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">623</context>
|
||||
<context context-type="linenumber">648</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">756</context>
|
||||
<context context-type="linenumber">781</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="994016933065248559" datatype="html">
|
||||
@@ -3512,7 +3512,7 @@
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
|
||||
<context context-type="linenumber">1814</context>
|
||||
<context context-type="linenumber">1812</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="6661109599266152398" datatype="html">
|
||||
@@ -3523,7 +3523,7 @@
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
|
||||
<context context-type="linenumber">1815</context>
|
||||
<context context-type="linenumber">1813</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="5162686434580248853" datatype="html">
|
||||
@@ -3534,7 +3534,7 @@
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
|
||||
<context context-type="linenumber">1816</context>
|
||||
<context context-type="linenumber">1814</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="8157388568390631653" datatype="html">
|
||||
@@ -5499,7 +5499,7 @@
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">760</context>
|
||||
<context context-type="linenumber">785</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="4522609911791833187" datatype="html">
|
||||
@@ -7327,7 +7327,7 @@
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">390</context>
|
||||
<context context-type="linenumber">415</context>
|
||||
</context-group>
|
||||
<note priority="1" from="description">this string is used to separate processing, failed and added on the file upload widget</note>
|
||||
</trans-unit>
|
||||
@@ -7851,7 +7851,7 @@
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">758</context>
|
||||
<context context-type="linenumber">783</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="7295637485862454066" datatype="html">
|
||||
@@ -7869,7 +7869,7 @@
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">798</context>
|
||||
<context context-type="linenumber">829</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="2951161989614003846" datatype="html">
|
||||
@@ -7890,88 +7890,88 @@
|
||||
<source>Reprocess operation for "<x id="PH" equiv-text="this.document.title"/>" will begin in the background.</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
|
||||
<context context-type="linenumber">1387</context>
|
||||
<context context-type="linenumber">1385</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="4409560272830824468" datatype="html">
|
||||
<source>Error executing operation</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
|
||||
<context context-type="linenumber">1398</context>
|
||||
<context context-type="linenumber">1396</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="6030453331794586802" datatype="html">
|
||||
<source>Error downloading document</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
|
||||
<context context-type="linenumber">1461</context>
|
||||
<context context-type="linenumber">1459</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="4458954481601077369" datatype="html">
|
||||
<source>Page Fit</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
|
||||
<context context-type="linenumber">1541</context>
|
||||
<context context-type="linenumber">1539</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="4663705961777238777" datatype="html">
|
||||
<source>PDF edit operation for "<x id="PH" equiv-text="this.document.title"/>" will begin in the background.</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
|
||||
<context context-type="linenumber">1781</context>
|
||||
<context context-type="linenumber">1779</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="9043972994040261999" datatype="html">
|
||||
<source>Error executing PDF edit operation</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
|
||||
<context context-type="linenumber">1793</context>
|
||||
<context context-type="linenumber">1791</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="6172690334763056188" datatype="html">
|
||||
<source>Please enter the current password before attempting to remove it.</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
|
||||
<context context-type="linenumber">1804</context>
|
||||
<context context-type="linenumber">1802</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="968660764814228922" datatype="html">
|
||||
<source>Password removal operation for "<x id="PH" equiv-text="this.document.title"/>" will begin in the background.</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
|
||||
<context context-type="linenumber">1838</context>
|
||||
<context context-type="linenumber">1836</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="2282118435712883014" datatype="html">
|
||||
<source>Error executing password removal operation</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
|
||||
<context context-type="linenumber">1852</context>
|
||||
<context context-type="linenumber">1850</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="3740891324955700797" datatype="html">
|
||||
<source>Print failed.</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
|
||||
<context context-type="linenumber">1891</context>
|
||||
<context context-type="linenumber">1889</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="6457245677384603573" datatype="html">
|
||||
<source>Error loading document for printing.</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
|
||||
<context context-type="linenumber">1903</context>
|
||||
<context context-type="linenumber">1901</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="6085793215710522488" datatype="html">
|
||||
<source>An error occurred loading tiff: <x id="PH" equiv-text="err.toString()"/></source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
|
||||
<context context-type="linenumber">1968</context>
|
||||
<context context-type="linenumber">1966</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
|
||||
<context context-type="linenumber">1972</context>
|
||||
<context context-type="linenumber">1970</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="4958946940233632319" datatype="html">
|
||||
@@ -8215,25 +8215,25 @@
|
||||
<source>Error executing bulk operation</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">294</context>
|
||||
<context context-type="linenumber">321</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="7894972847287473517" datatype="html">
|
||||
<source>"<x id="PH" equiv-text="items[0].name"/>"</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">382</context>
|
||||
<context context-type="linenumber">407</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">388</context>
|
||||
<context context-type="linenumber">413</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="8639884465898458690" datatype="html">
|
||||
<source>"<x id="PH" equiv-text="items[0].name"/>" and "<x id="PH_1" equiv-text="items[1].name"/>"</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">384</context>
|
||||
<context context-type="linenumber">409</context>
|
||||
</context-group>
|
||||
<note priority="1" from="description">This is for messages like 'modify "tag1" and "tag2"'</note>
|
||||
</trans-unit>
|
||||
@@ -8241,7 +8241,7 @@
|
||||
<source><x id="PH" equiv-text="list"/> and "<x id="PH_1" equiv-text="items[items.length - 1].name"/>"</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">392,394</context>
|
||||
<context context-type="linenumber">417,419</context>
|
||||
</context-group>
|
||||
<note priority="1" from="description">this is for messages like 'modify "tag1", "tag2" and "tag3"'</note>
|
||||
</trans-unit>
|
||||
@@ -8249,14 +8249,14 @@
|
||||
<source>Confirm tags assignment</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">409</context>
|
||||
<context context-type="linenumber">434</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="6619516195038467207" datatype="html">
|
||||
<source>This operation will add the tag "<x id="PH" equiv-text="tag.name"/>" to <x id="PH_1" equiv-text="this.list.selected.size"/> selected document(s).</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">415</context>
|
||||
<context context-type="linenumber">440</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="1894412783609570695" datatype="html">
|
||||
@@ -8265,14 +8265,14 @@
|
||||
)"/> to <x id="PH_1" equiv-text="this.list.selected.size"/> selected document(s).</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">420,422</context>
|
||||
<context context-type="linenumber">445,447</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="7181166515756808573" datatype="html">
|
||||
<source>This operation will remove the tag "<x id="PH" equiv-text="tag.name"/>" from <x id="PH_1" equiv-text="this.list.selected.size"/> selected document(s).</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">428</context>
|
||||
<context context-type="linenumber">453</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="3819792277998068944" datatype="html">
|
||||
@@ -8281,7 +8281,7 @@
|
||||
)"/> from <x id="PH_1" equiv-text="this.list.selected.size"/> selected document(s).</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">433,435</context>
|
||||
<context context-type="linenumber">458,460</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="2739066218579571288" datatype="html">
|
||||
@@ -8292,84 +8292,84 @@
|
||||
)"/> on <x id="PH_2" equiv-text="this.list.selected.size"/> selected document(s).</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">437,441</context>
|
||||
<context context-type="linenumber">462,466</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="2996713129519325161" datatype="html">
|
||||
<source>Confirm correspondent assignment</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">478</context>
|
||||
<context context-type="linenumber">503</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="6900893559485781849" datatype="html">
|
||||
<source>This operation will assign the correspondent "<x id="PH" equiv-text="correspondent.name"/>" to <x id="PH_1" equiv-text="this.list.selected.size"/> selected document(s).</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">480</context>
|
||||
<context context-type="linenumber">505</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="1257522660364398440" datatype="html">
|
||||
<source>This operation will remove the correspondent from <x id="PH" equiv-text="this.list.selected.size"/> selected document(s).</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">482</context>
|
||||
<context context-type="linenumber">507</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="5393409374423140648" datatype="html">
|
||||
<source>Confirm document type assignment</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">516</context>
|
||||
<context context-type="linenumber">541</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="332180123895325027" datatype="html">
|
||||
<source>This operation will assign the document type "<x id="PH" equiv-text="documentType.name"/>" to <x id="PH_1" equiv-text="this.list.selected.size"/> selected document(s).</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">518</context>
|
||||
<context context-type="linenumber">543</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="2236642492594872779" datatype="html">
|
||||
<source>This operation will remove the document type from <x id="PH" equiv-text="this.list.selected.size"/> selected document(s).</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">520</context>
|
||||
<context context-type="linenumber">545</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="6386555513013840736" datatype="html">
|
||||
<source>Confirm storage path assignment</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">554</context>
|
||||
<context context-type="linenumber">579</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="8750527458618415924" datatype="html">
|
||||
<source>This operation will assign the storage path "<x id="PH" equiv-text="storagePath.name"/>" to <x id="PH_1" equiv-text="this.list.selected.size"/> selected document(s).</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">556</context>
|
||||
<context context-type="linenumber">581</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="60728365335056946" datatype="html">
|
||||
<source>This operation will remove the storage path from <x id="PH" equiv-text="this.list.selected.size"/> selected document(s).</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">558</context>
|
||||
<context context-type="linenumber">583</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="4187352575310415704" datatype="html">
|
||||
<source>Confirm custom field assignment</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">587</context>
|
||||
<context context-type="linenumber">612</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="7966494636326273856" datatype="html">
|
||||
<source>This operation will assign the custom field "<x id="PH" equiv-text="customField.name"/>" to <x id="PH_1" equiv-text="this.list.selected.size"/> selected document(s).</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">593</context>
|
||||
<context context-type="linenumber">618</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="5789455969634598553" datatype="html">
|
||||
@@ -8378,14 +8378,14 @@
|
||||
)"/> to <x id="PH_1" equiv-text="this.list.selected.size"/> selected document(s).</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">598,600</context>
|
||||
<context context-type="linenumber">623,625</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="5648572354333199245" datatype="html">
|
||||
<source>This operation will remove the custom field "<x id="PH" equiv-text="customField.name"/>" from <x id="PH_1" equiv-text="this.list.selected.size"/> selected document(s).</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">606</context>
|
||||
<context context-type="linenumber">631</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="6666899594015948817" datatype="html">
|
||||
@@ -8394,7 +8394,7 @@
|
||||
)"/> from <x id="PH_1" equiv-text="this.list.selected.size"/> selected document(s).</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">611,613</context>
|
||||
<context context-type="linenumber">636,638</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="8050047262594964176" datatype="html">
|
||||
@@ -8405,91 +8405,91 @@
|
||||
)"/> on <x id="PH_2" equiv-text="this.list.selected.size"/> selected document(s).</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">615,619</context>
|
||||
<context context-type="linenumber">640,644</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="8615059324209654051" datatype="html">
|
||||
<source>Move <x id="PH" equiv-text="this.list.selected.size"/> selected document(s) to the trash?</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">757</context>
|
||||
<context context-type="linenumber">782</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="8585195717323764335" datatype="html">
|
||||
<source>This operation will permanently recreate the archive files for <x id="PH" equiv-text="this.list.selected.size"/> selected document(s).</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">799</context>
|
||||
<context context-type="linenumber">830</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="7366623494074776040" datatype="html">
|
||||
<source>The archive files will be re-generated with the current settings.</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">800</context>
|
||||
<context context-type="linenumber">831</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="6555329262222566158" datatype="html">
|
||||
<source>Rotate confirm</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">832</context>
|
||||
<context context-type="linenumber">868</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="5203024009814367559" datatype="html">
|
||||
<source>This operation will add rotated versions of the <x id="PH" equiv-text="this.list.selected.size"/> document(s).</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">833</context>
|
||||
<context context-type="linenumber">869</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="7910756456450124185" datatype="html">
|
||||
<source>Merge confirm</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">852</context>
|
||||
<context context-type="linenumber">892</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="7643543647233874431" datatype="html">
|
||||
<source>This operation will merge <x id="PH" equiv-text="this.list.selected.size"/> selected documents into a new document.</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">853</context>
|
||||
<context context-type="linenumber">893</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="7869008840945899895" datatype="html">
|
||||
<source>Merged document will be queued for consumption.</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">872</context>
|
||||
<context context-type="linenumber">916</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="476913782630693351" datatype="html">
|
||||
<source>Custom fields updated.</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">896</context>
|
||||
<context context-type="linenumber">940</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="3873496751167944011" datatype="html">
|
||||
<source>Error updating custom fields.</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">905</context>
|
||||
<context context-type="linenumber">949</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="6144801143088984138" datatype="html">
|
||||
<source>Share link bundle creation requested.</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">945</context>
|
||||
<context context-type="linenumber">989</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="46019676931295023" datatype="html">
|
||||
<source>Share link bundle creation is not available yet.</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">952</context>
|
||||
<context context-type="linenumber">996</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="6307402210351946694" datatype="html">
|
||||
|
||||
@@ -950,8 +950,8 @@ describe('DocumentDetailComponent', () => {
|
||||
|
||||
it('should support reprocess, confirm and close modal after started', () => {
|
||||
initNormally()
|
||||
const bulkEditSpy = jest.spyOn(documentService, 'bulkEdit')
|
||||
bulkEditSpy.mockReturnValue(of(true))
|
||||
const reprocessSpy = jest.spyOn(documentService, 'reprocessDocuments')
|
||||
reprocessSpy.mockReturnValue(of(true))
|
||||
let openModal: NgbModalRef
|
||||
modalService.activeInstances.subscribe((modal) => (openModal = modal[0]))
|
||||
const modalSpy = jest.spyOn(modalService, 'open')
|
||||
@@ -959,7 +959,7 @@ describe('DocumentDetailComponent', () => {
|
||||
component.reprocess()
|
||||
const modalCloseSpy = jest.spyOn(openModal, 'close')
|
||||
openModal.componentInstance.confirmClicked.next()
|
||||
expect(bulkEditSpy).toHaveBeenCalledWith([doc.id], 'reprocess', {})
|
||||
expect(reprocessSpy).toHaveBeenCalledWith([doc.id])
|
||||
expect(modalSpy).toHaveBeenCalled()
|
||||
expect(toastSpy).toHaveBeenCalled()
|
||||
expect(modalCloseSpy).toHaveBeenCalled()
|
||||
@@ -967,13 +967,13 @@ describe('DocumentDetailComponent', () => {
|
||||
|
||||
it('should show error if redo ocr call fails', () => {
|
||||
initNormally()
|
||||
const bulkEditSpy = jest.spyOn(documentService, 'bulkEdit')
|
||||
const reprocessSpy = jest.spyOn(documentService, 'reprocessDocuments')
|
||||
let openModal: NgbModalRef
|
||||
modalService.activeInstances.subscribe((modal) => (openModal = modal[0]))
|
||||
const toastSpy = jest.spyOn(toastService, 'showError')
|
||||
component.reprocess()
|
||||
const modalCloseSpy = jest.spyOn(openModal, 'close')
|
||||
bulkEditSpy.mockReturnValue(throwError(() => new Error('error occurred')))
|
||||
reprocessSpy.mockReturnValue(throwError(() => new Error('error occurred')))
|
||||
openModal.componentInstance.confirmClicked.next()
|
||||
expect(toastSpy).toHaveBeenCalled()
|
||||
expect(modalCloseSpy).not.toHaveBeenCalled()
|
||||
@@ -1669,18 +1669,15 @@ describe('DocumentDetailComponent', () => {
|
||||
modal.componentInstance.pages = [{ page: 1, rotate: 0, splitAfter: false }]
|
||||
modal.componentInstance.confirm()
|
||||
let req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/bulk_edit/`
|
||||
`${environment.apiBaseUrl}documents/edit_pdf/`
|
||||
)
|
||||
expect(req.request.body).toEqual({
|
||||
documents: [10],
|
||||
method: 'edit_pdf',
|
||||
parameters: {
|
||||
operations: [{ page: 1, rotate: 0, doc: 0 }],
|
||||
delete_original: false,
|
||||
update_document: false,
|
||||
include_metadata: true,
|
||||
source_mode: 'explicit_selection',
|
||||
},
|
||||
operations: [{ page: 1, rotate: 0, doc: 0 }],
|
||||
delete_original: false,
|
||||
update_document: false,
|
||||
include_metadata: true,
|
||||
source_mode: 'explicit_selection',
|
||||
})
|
||||
req.error(new ErrorEvent('failed'))
|
||||
expect(errorSpy).toHaveBeenCalled()
|
||||
@@ -1691,7 +1688,7 @@ describe('DocumentDetailComponent', () => {
|
||||
modal.componentInstance.deleteOriginal = true
|
||||
modal.componentInstance.confirm()
|
||||
req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/bulk_edit/`
|
||||
`${environment.apiBaseUrl}documents/edit_pdf/`
|
||||
)
|
||||
req.flush(true)
|
||||
expect(closeSpy).toHaveBeenCalled()
|
||||
@@ -1711,18 +1708,15 @@ describe('DocumentDetailComponent', () => {
|
||||
dialog.deleteOriginal = true
|
||||
dialog.confirm()
|
||||
const req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/bulk_edit/`
|
||||
`${environment.apiBaseUrl}documents/remove_password/`
|
||||
)
|
||||
expect(req.request.body).toEqual({
|
||||
documents: [10],
|
||||
method: 'remove_password',
|
||||
parameters: {
|
||||
password: 'secret',
|
||||
update_document: false,
|
||||
include_metadata: false,
|
||||
delete_original: true,
|
||||
source_mode: 'explicit_selection',
|
||||
},
|
||||
password: 'secret',
|
||||
update_document: false,
|
||||
include_metadata: false,
|
||||
delete_original: true,
|
||||
source_mode: 'explicit_selection',
|
||||
})
|
||||
req.flush(true)
|
||||
})
|
||||
@@ -1737,7 +1731,7 @@ describe('DocumentDetailComponent', () => {
|
||||
|
||||
expect(errorSpy).toHaveBeenCalled()
|
||||
httpTestingController.expectNone(
|
||||
`${environment.apiBaseUrl}documents/bulk_edit/`
|
||||
`${environment.apiBaseUrl}documents/remove_password/`
|
||||
)
|
||||
})
|
||||
|
||||
@@ -1753,7 +1747,7 @@ describe('DocumentDetailComponent', () => {
|
||||
modal.componentInstance as PasswordRemovalConfirmDialogComponent
|
||||
dialog.confirm()
|
||||
const req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/bulk_edit/`
|
||||
`${environment.apiBaseUrl}documents/remove_password/`
|
||||
)
|
||||
req.error(new ErrorEvent('failed'))
|
||||
|
||||
@@ -1774,7 +1768,7 @@ describe('DocumentDetailComponent', () => {
|
||||
modal.componentInstance as PasswordRemovalConfirmDialogComponent
|
||||
dialog.confirm()
|
||||
const req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/bulk_edit/`
|
||||
`${environment.apiBaseUrl}documents/remove_password/`
|
||||
)
|
||||
req.flush(true)
|
||||
|
||||
|
||||
@@ -1379,27 +1379,25 @@ export class DocumentDetailComponent
|
||||
modal.componentInstance.btnCaption = $localize`Proceed`
|
||||
modal.componentInstance.confirmClicked.subscribe(() => {
|
||||
modal.componentInstance.buttonsEnabled = false
|
||||
this.documentsService
|
||||
.bulkEdit([this.document.id], 'reprocess', {})
|
||||
.subscribe({
|
||||
next: () => {
|
||||
this.toastService.showInfo(
|
||||
$localize`Reprocess operation for "${this.document.title}" will begin in the background.`
|
||||
)
|
||||
if (modal) {
|
||||
modal.close()
|
||||
}
|
||||
},
|
||||
error: (error) => {
|
||||
if (modal) {
|
||||
modal.componentInstance.buttonsEnabled = true
|
||||
}
|
||||
this.toastService.showError(
|
||||
$localize`Error executing operation`,
|
||||
error
|
||||
)
|
||||
},
|
||||
})
|
||||
this.documentsService.reprocessDocuments([this.document.id]).subscribe({
|
||||
next: () => {
|
||||
this.toastService.showInfo(
|
||||
$localize`Reprocess operation for "${this.document.title}" will begin in the background.`
|
||||
)
|
||||
if (modal) {
|
||||
modal.close()
|
||||
}
|
||||
},
|
||||
error: (error) => {
|
||||
if (modal) {
|
||||
modal.componentInstance.buttonsEnabled = true
|
||||
}
|
||||
this.toastService.showError(
|
||||
$localize`Error executing operation`,
|
||||
error
|
||||
)
|
||||
},
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
@@ -1766,7 +1764,7 @@ export class DocumentDetailComponent
|
||||
.subscribe(() => {
|
||||
modal.componentInstance.buttonsEnabled = false
|
||||
this.documentsService
|
||||
.bulkEdit([sourceDocumentId], 'edit_pdf', {
|
||||
.editPdfDocuments([sourceDocumentId], {
|
||||
operations: modal.componentInstance.getOperations(),
|
||||
delete_original: modal.componentInstance.deleteOriginal,
|
||||
update_document:
|
||||
@@ -1824,7 +1822,7 @@ export class DocumentDetailComponent
|
||||
dialog.buttonsEnabled = false
|
||||
this.networkActive = true
|
||||
this.documentsService
|
||||
.bulkEdit([sourceDocumentId], 'remove_password', {
|
||||
.removePasswordDocuments([sourceDocumentId], {
|
||||
password: this.password,
|
||||
update_document: dialog.updateDocument,
|
||||
include_metadata: dialog.includeMetadata,
|
||||
|
||||
@@ -849,13 +849,11 @@ describe('BulkEditorComponent', () => {
|
||||
expect(modal).not.toBeUndefined()
|
||||
modal.componentInstance.confirm()
|
||||
let req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/bulk_edit/`
|
||||
`${environment.apiBaseUrl}documents/delete/`
|
||||
)
|
||||
req.flush(true)
|
||||
expect(req.request.body).toEqual({
|
||||
documents: [3, 4],
|
||||
method: 'delete',
|
||||
parameters: {},
|
||||
})
|
||||
httpTestingController.match(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||
@@ -868,7 +866,7 @@ describe('BulkEditorComponent', () => {
|
||||
fixture.detectChanges()
|
||||
component.applyDelete()
|
||||
req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/bulk_edit/`
|
||||
`${environment.apiBaseUrl}documents/delete/`
|
||||
)
|
||||
})
|
||||
|
||||
@@ -944,13 +942,11 @@ describe('BulkEditorComponent', () => {
|
||||
expect(modal).not.toBeUndefined()
|
||||
modal.componentInstance.confirm()
|
||||
let req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/bulk_edit/`
|
||||
`${environment.apiBaseUrl}documents/reprocess/`
|
||||
)
|
||||
req.flush(true)
|
||||
expect(req.request.body).toEqual({
|
||||
documents: [3, 4],
|
||||
method: 'reprocess',
|
||||
parameters: {},
|
||||
})
|
||||
httpTestingController.match(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||
@@ -979,13 +975,13 @@ describe('BulkEditorComponent', () => {
|
||||
modal.componentInstance.rotate()
|
||||
modal.componentInstance.confirm()
|
||||
let req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/bulk_edit/`
|
||||
`${environment.apiBaseUrl}documents/rotate/`
|
||||
)
|
||||
req.flush(true)
|
||||
expect(req.request.body).toEqual({
|
||||
documents: [3, 4],
|
||||
method: 'rotate',
|
||||
parameters: { degrees: 90 },
|
||||
degrees: 90,
|
||||
source_mode: 'latest_version',
|
||||
})
|
||||
httpTestingController.match(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||
@@ -1021,13 +1017,12 @@ describe('BulkEditorComponent', () => {
|
||||
modal.componentInstance.metadataDocumentID = 3
|
||||
modal.componentInstance.confirm()
|
||||
let req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/bulk_edit/`
|
||||
`${environment.apiBaseUrl}documents/merge/`
|
||||
)
|
||||
req.flush(true)
|
||||
expect(req.request.body).toEqual({
|
||||
documents: [3, 4],
|
||||
method: 'merge',
|
||||
parameters: { metadata_document_id: 3 },
|
||||
metadata_document_id: 3,
|
||||
})
|
||||
httpTestingController.match(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||
@@ -1040,13 +1035,13 @@ describe('BulkEditorComponent', () => {
|
||||
modal.componentInstance.deleteOriginals = true
|
||||
modal.componentInstance.confirm()
|
||||
req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/bulk_edit/`
|
||||
`${environment.apiBaseUrl}documents/merge/`
|
||||
)
|
||||
req.flush(true)
|
||||
expect(req.request.body).toEqual({
|
||||
documents: [3, 4],
|
||||
method: 'merge',
|
||||
parameters: { metadata_document_id: 3, delete_originals: true },
|
||||
metadata_document_id: 3,
|
||||
delete_originals: true,
|
||||
})
|
||||
httpTestingController.match(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||
@@ -1061,13 +1056,13 @@ describe('BulkEditorComponent', () => {
|
||||
modal.componentInstance.archiveFallback = true
|
||||
modal.componentInstance.confirm()
|
||||
req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/bulk_edit/`
|
||||
`${environment.apiBaseUrl}documents/merge/`
|
||||
)
|
||||
req.flush(true)
|
||||
expect(req.request.body).toEqual({
|
||||
documents: [3, 4],
|
||||
method: 'merge',
|
||||
parameters: { metadata_document_id: 3, archive_fallback: true },
|
||||
metadata_document_id: 3,
|
||||
archive_fallback: true,
|
||||
})
|
||||
httpTestingController.match(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||
|
||||
@@ -12,7 +12,7 @@ import {
|
||||
} from '@ng-bootstrap/ng-bootstrap'
|
||||
import { saveAs } from 'file-saver'
|
||||
import { NgxBootstrapIconsModule } from 'ngx-bootstrap-icons'
|
||||
import { first, map, Subject, switchMap, takeUntil } from 'rxjs'
|
||||
import { first, map, Observable, Subject, switchMap, takeUntil } from 'rxjs'
|
||||
import { ConfirmDialogComponent } from 'src/app/components/common/confirm-dialog/confirm-dialog.component'
|
||||
import { CustomField } from 'src/app/data/custom-field'
|
||||
import { MatchingModel } from 'src/app/data/matching-model'
|
||||
@@ -29,7 +29,9 @@ import { CorrespondentService } from 'src/app/services/rest/correspondent.servic
|
||||
import { CustomFieldsService } from 'src/app/services/rest/custom-fields.service'
|
||||
import { DocumentTypeService } from 'src/app/services/rest/document-type.service'
|
||||
import {
|
||||
DocumentBulkEditMethod,
|
||||
DocumentService,
|
||||
MergeDocumentsRequest,
|
||||
SelectionDataItem,
|
||||
} from 'src/app/services/rest/document.service'
|
||||
import { SavedViewService } from 'src/app/services/rest/saved-view.service'
|
||||
@@ -255,9 +257,9 @@ export class BulkEditorComponent
|
||||
this.unsubscribeNotifier.complete()
|
||||
}
|
||||
|
||||
private executeBulkOperation(
|
||||
private executeBulkEditMethod(
|
||||
modal: NgbModalRef,
|
||||
method: string,
|
||||
method: DocumentBulkEditMethod,
|
||||
args: any,
|
||||
overrideDocumentIDs?: number[]
|
||||
) {
|
||||
@@ -272,32 +274,55 @@ export class BulkEditorComponent
|
||||
)
|
||||
.pipe(first())
|
||||
.subscribe({
|
||||
next: () => {
|
||||
if (args['delete_originals']) {
|
||||
this.list.selected.clear()
|
||||
}
|
||||
this.list.reload()
|
||||
this.list.reduceSelectionToFilter()
|
||||
this.list.selected.forEach((id) => {
|
||||
this.openDocumentService.refreshDocument(id)
|
||||
})
|
||||
this.savedViewService.maybeRefreshDocumentCounts()
|
||||
if (modal) {
|
||||
modal.close()
|
||||
}
|
||||
},
|
||||
error: (error) => {
|
||||
if (modal) {
|
||||
modal.componentInstance.buttonsEnabled = true
|
||||
}
|
||||
this.toastService.showError(
|
||||
$localize`Error executing bulk operation`,
|
||||
error
|
||||
)
|
||||
},
|
||||
next: () => this.handleOperationSuccess(modal),
|
||||
error: (error) => this.handleOperationError(modal, error),
|
||||
})
|
||||
}
|
||||
|
||||
private executeDocumentAction(
|
||||
modal: NgbModalRef,
|
||||
request: Observable<any>,
|
||||
options: { deleteOriginals?: boolean } = {}
|
||||
) {
|
||||
if (modal) {
|
||||
modal.componentInstance.buttonsEnabled = false
|
||||
}
|
||||
request.pipe(first()).subscribe({
|
||||
next: () => {
|
||||
this.handleOperationSuccess(modal, options.deleteOriginals ?? false)
|
||||
},
|
||||
error: (error) => this.handleOperationError(modal, error),
|
||||
})
|
||||
}
|
||||
|
||||
private handleOperationSuccess(
|
||||
modal: NgbModalRef,
|
||||
clearSelection: boolean = false
|
||||
) {
|
||||
if (clearSelection) {
|
||||
this.list.selected.clear()
|
||||
}
|
||||
this.list.reload()
|
||||
this.list.reduceSelectionToFilter()
|
||||
this.list.selected.forEach((id) => {
|
||||
this.openDocumentService.refreshDocument(id)
|
||||
})
|
||||
this.savedViewService.maybeRefreshDocumentCounts()
|
||||
if (modal) {
|
||||
modal.close()
|
||||
}
|
||||
}
|
||||
|
||||
private handleOperationError(modal: NgbModalRef, error: any) {
|
||||
if (modal) {
|
||||
modal.componentInstance.buttonsEnabled = true
|
||||
}
|
||||
this.toastService.showError(
|
||||
$localize`Error executing bulk operation`,
|
||||
error
|
||||
)
|
||||
}
|
||||
|
||||
private applySelectionData(
|
||||
items: SelectionDataItem[],
|
||||
selectionModel: FilterableDropdownSelectionModel
|
||||
@@ -446,13 +471,13 @@ export class BulkEditorComponent
|
||||
modal.componentInstance.confirmClicked
|
||||
.pipe(takeUntil(this.unsubscribeNotifier))
|
||||
.subscribe(() => {
|
||||
this.executeBulkOperation(modal, 'modify_tags', {
|
||||
this.executeBulkEditMethod(modal, 'modify_tags', {
|
||||
add_tags: changedTags.itemsToAdd.map((t) => t.id),
|
||||
remove_tags: changedTags.itemsToRemove.map((t) => t.id),
|
||||
})
|
||||
})
|
||||
} else {
|
||||
this.executeBulkOperation(null, 'modify_tags', {
|
||||
this.executeBulkEditMethod(null, 'modify_tags', {
|
||||
add_tags: changedTags.itemsToAdd.map((t) => t.id),
|
||||
remove_tags: changedTags.itemsToRemove.map((t) => t.id),
|
||||
})
|
||||
@@ -486,12 +511,12 @@ export class BulkEditorComponent
|
||||
modal.componentInstance.confirmClicked
|
||||
.pipe(takeUntil(this.unsubscribeNotifier))
|
||||
.subscribe(() => {
|
||||
this.executeBulkOperation(modal, 'set_correspondent', {
|
||||
this.executeBulkEditMethod(modal, 'set_correspondent', {
|
||||
correspondent: correspondent ? correspondent.id : null,
|
||||
})
|
||||
})
|
||||
} else {
|
||||
this.executeBulkOperation(null, 'set_correspondent', {
|
||||
this.executeBulkEditMethod(null, 'set_correspondent', {
|
||||
correspondent: correspondent ? correspondent.id : null,
|
||||
})
|
||||
}
|
||||
@@ -524,12 +549,12 @@ export class BulkEditorComponent
|
||||
modal.componentInstance.confirmClicked
|
||||
.pipe(takeUntil(this.unsubscribeNotifier))
|
||||
.subscribe(() => {
|
||||
this.executeBulkOperation(modal, 'set_document_type', {
|
||||
this.executeBulkEditMethod(modal, 'set_document_type', {
|
||||
document_type: documentType ? documentType.id : null,
|
||||
})
|
||||
})
|
||||
} else {
|
||||
this.executeBulkOperation(null, 'set_document_type', {
|
||||
this.executeBulkEditMethod(null, 'set_document_type', {
|
||||
document_type: documentType ? documentType.id : null,
|
||||
})
|
||||
}
|
||||
@@ -562,12 +587,12 @@ export class BulkEditorComponent
|
||||
modal.componentInstance.confirmClicked
|
||||
.pipe(takeUntil(this.unsubscribeNotifier))
|
||||
.subscribe(() => {
|
||||
this.executeBulkOperation(modal, 'set_storage_path', {
|
||||
this.executeBulkEditMethod(modal, 'set_storage_path', {
|
||||
storage_path: storagePath ? storagePath.id : null,
|
||||
})
|
||||
})
|
||||
} else {
|
||||
this.executeBulkOperation(null, 'set_storage_path', {
|
||||
this.executeBulkEditMethod(null, 'set_storage_path', {
|
||||
storage_path: storagePath ? storagePath.id : null,
|
||||
})
|
||||
}
|
||||
@@ -624,7 +649,7 @@ export class BulkEditorComponent
|
||||
modal.componentInstance.confirmClicked
|
||||
.pipe(takeUntil(this.unsubscribeNotifier))
|
||||
.subscribe(() => {
|
||||
this.executeBulkOperation(modal, 'modify_custom_fields', {
|
||||
this.executeBulkEditMethod(modal, 'modify_custom_fields', {
|
||||
add_custom_fields: changedCustomFields.itemsToAdd.map((f) => f.id),
|
||||
remove_custom_fields: changedCustomFields.itemsToRemove.map(
|
||||
(f) => f.id
|
||||
@@ -632,7 +657,7 @@ export class BulkEditorComponent
|
||||
})
|
||||
})
|
||||
} else {
|
||||
this.executeBulkOperation(null, 'modify_custom_fields', {
|
||||
this.executeBulkEditMethod(null, 'modify_custom_fields', {
|
||||
add_custom_fields: changedCustomFields.itemsToAdd.map((f) => f.id),
|
||||
remove_custom_fields: changedCustomFields.itemsToRemove.map(
|
||||
(f) => f.id
|
||||
@@ -762,10 +787,16 @@ export class BulkEditorComponent
|
||||
.pipe(takeUntil(this.unsubscribeNotifier))
|
||||
.subscribe(() => {
|
||||
modal.componentInstance.buttonsEnabled = false
|
||||
this.executeBulkOperation(modal, 'delete', {})
|
||||
this.executeDocumentAction(
|
||||
modal,
|
||||
this.documentService.deleteDocuments(Array.from(this.list.selected))
|
||||
)
|
||||
})
|
||||
} else {
|
||||
this.executeBulkOperation(null, 'delete', {})
|
||||
this.executeDocumentAction(
|
||||
null,
|
||||
this.documentService.deleteDocuments(Array.from(this.list.selected))
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -804,7 +835,12 @@ export class BulkEditorComponent
|
||||
.pipe(takeUntil(this.unsubscribeNotifier))
|
||||
.subscribe(() => {
|
||||
modal.componentInstance.buttonsEnabled = false
|
||||
this.executeBulkOperation(modal, 'reprocess', {})
|
||||
this.executeDocumentAction(
|
||||
modal,
|
||||
this.documentService.reprocessDocuments(
|
||||
Array.from(this.list.selected)
|
||||
)
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
@@ -815,7 +851,7 @@ export class BulkEditorComponent
|
||||
modal.componentInstance.confirmClicked.subscribe(
|
||||
({ permissions, merge }) => {
|
||||
modal.componentInstance.buttonsEnabled = false
|
||||
this.executeBulkOperation(modal, 'set_permissions', {
|
||||
this.executeBulkEditMethod(modal, 'set_permissions', {
|
||||
...permissions,
|
||||
merge,
|
||||
})
|
||||
@@ -838,9 +874,13 @@ export class BulkEditorComponent
|
||||
.pipe(takeUntil(this.unsubscribeNotifier))
|
||||
.subscribe(() => {
|
||||
rotateDialog.buttonsEnabled = false
|
||||
this.executeBulkOperation(modal, 'rotate', {
|
||||
degrees: rotateDialog.degrees,
|
||||
})
|
||||
this.executeDocumentAction(
|
||||
modal,
|
||||
this.documentService.rotateDocuments(
|
||||
Array.from(this.list.selected),
|
||||
rotateDialog.degrees
|
||||
)
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
@@ -856,18 +896,22 @@ export class BulkEditorComponent
|
||||
mergeDialog.confirmClicked
|
||||
.pipe(takeUntil(this.unsubscribeNotifier))
|
||||
.subscribe(() => {
|
||||
const args = {}
|
||||
const args: MergeDocumentsRequest = {}
|
||||
if (mergeDialog.metadataDocumentID > -1) {
|
||||
args['metadata_document_id'] = mergeDialog.metadataDocumentID
|
||||
args.metadata_document_id = mergeDialog.metadataDocumentID
|
||||
}
|
||||
if (mergeDialog.deleteOriginals) {
|
||||
args['delete_originals'] = true
|
||||
args.delete_originals = true
|
||||
}
|
||||
if (mergeDialog.archiveFallback) {
|
||||
args['archive_fallback'] = true
|
||||
args.archive_fallback = true
|
||||
}
|
||||
mergeDialog.buttonsEnabled = false
|
||||
this.executeBulkOperation(modal, 'merge', args, mergeDialog.documentIDs)
|
||||
this.executeDocumentAction(
|
||||
modal,
|
||||
this.documentService.mergeDocuments(mergeDialog.documentIDs, args),
|
||||
{ deleteOriginals: !!args.delete_originals }
|
||||
)
|
||||
this.toastService.showInfo(
|
||||
$localize`Merged document will be queued for consumption.`
|
||||
)
|
||||
|
||||
@@ -230,6 +230,88 @@ describe(`DocumentService`, () => {
|
||||
})
|
||||
})
|
||||
|
||||
it('should call appropriate api endpoint for delete documents', () => {
|
||||
const ids = [1, 2, 3]
|
||||
subscription = service.deleteDocuments(ids).subscribe()
|
||||
const req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}${endpoint}/delete/`
|
||||
)
|
||||
expect(req.request.method).toEqual('POST')
|
||||
expect(req.request.body).toEqual({
|
||||
documents: ids,
|
||||
})
|
||||
})
|
||||
|
||||
it('should call appropriate api endpoint for reprocess documents', () => {
|
||||
const ids = [1, 2, 3]
|
||||
subscription = service.reprocessDocuments(ids).subscribe()
|
||||
const req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}${endpoint}/reprocess/`
|
||||
)
|
||||
expect(req.request.method).toEqual('POST')
|
||||
expect(req.request.body).toEqual({
|
||||
documents: ids,
|
||||
})
|
||||
})
|
||||
|
||||
it('should call appropriate api endpoint for rotate documents', () => {
|
||||
const ids = [1, 2, 3]
|
||||
subscription = service.rotateDocuments(ids, 90).subscribe()
|
||||
const req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}${endpoint}/rotate/`
|
||||
)
|
||||
expect(req.request.method).toEqual('POST')
|
||||
expect(req.request.body).toEqual({
|
||||
documents: ids,
|
||||
degrees: 90,
|
||||
source_mode: 'latest_version',
|
||||
})
|
||||
})
|
||||
|
||||
it('should call appropriate api endpoint for merge documents', () => {
|
||||
const ids = [1, 2, 3]
|
||||
const args = { metadata_document_id: 1, delete_originals: true }
|
||||
subscription = service.mergeDocuments(ids, args).subscribe()
|
||||
const req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}${endpoint}/merge/`
|
||||
)
|
||||
expect(req.request.method).toEqual('POST')
|
||||
expect(req.request.body).toEqual({
|
||||
documents: ids,
|
||||
metadata_document_id: 1,
|
||||
delete_originals: true,
|
||||
})
|
||||
})
|
||||
|
||||
it('should call appropriate api endpoint for edit pdf', () => {
|
||||
const ids = [1]
|
||||
const args = { operations: [{ page: 1, rotate: 90, doc: 0 }] }
|
||||
subscription = service.editPdfDocuments(ids, args).subscribe()
|
||||
const req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}${endpoint}/edit_pdf/`
|
||||
)
|
||||
expect(req.request.method).toEqual('POST')
|
||||
expect(req.request.body).toEqual({
|
||||
documents: ids,
|
||||
operations: [{ page: 1, rotate: 90, doc: 0 }],
|
||||
})
|
||||
})
|
||||
|
||||
it('should call appropriate api endpoint for remove password', () => {
|
||||
const ids = [1]
|
||||
const args = { password: 'secret', update_document: true }
|
||||
subscription = service.removePasswordDocuments(ids, args).subscribe()
|
||||
const req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}${endpoint}/remove_password/`
|
||||
)
|
||||
expect(req.request.method).toEqual('POST')
|
||||
expect(req.request.body).toEqual({
|
||||
documents: ids,
|
||||
password: 'secret',
|
||||
update_document: true,
|
||||
})
|
||||
})
|
||||
|
||||
it('should return the correct preview URL for a single document', () => {
|
||||
let url = service.getPreviewUrl(documents[0].id)
|
||||
expect(url).toEqual(
|
||||
|
||||
@@ -42,6 +42,45 @@ export enum BulkEditSourceMode {
|
||||
EXPLICIT_SELECTION = 'explicit_selection',
|
||||
}
|
||||
|
||||
export type DocumentBulkEditMethod =
|
||||
| 'set_correspondent'
|
||||
| 'set_document_type'
|
||||
| 'set_storage_path'
|
||||
| 'add_tag'
|
||||
| 'remove_tag'
|
||||
| 'modify_tags'
|
||||
| 'modify_custom_fields'
|
||||
| 'set_permissions'
|
||||
|
||||
export interface MergeDocumentsRequest {
|
||||
metadata_document_id?: number
|
||||
delete_originals?: boolean
|
||||
archive_fallback?: boolean
|
||||
source_mode?: BulkEditSourceMode
|
||||
}
|
||||
|
||||
export interface EditPdfOperation {
|
||||
page: number
|
||||
rotate?: number
|
||||
doc?: number
|
||||
}
|
||||
|
||||
export interface EditPdfDocumentsRequest {
|
||||
operations: EditPdfOperation[]
|
||||
delete_original?: boolean
|
||||
update_document?: boolean
|
||||
include_metadata?: boolean
|
||||
source_mode?: BulkEditSourceMode
|
||||
}
|
||||
|
||||
export interface RemovePasswordDocumentsRequest {
|
||||
password: string
|
||||
update_document?: boolean
|
||||
delete_original?: boolean
|
||||
include_metadata?: boolean
|
||||
source_mode?: BulkEditSourceMode
|
||||
}
|
||||
|
||||
@Injectable({
|
||||
providedIn: 'root',
|
||||
})
|
||||
@@ -299,7 +338,7 @@ export class DocumentService extends AbstractPaperlessService<Document> {
|
||||
return this.http.get<DocumentMetadata>(url.toString())
|
||||
}
|
||||
|
||||
bulkEdit(ids: number[], method: string, args: any) {
|
||||
bulkEdit(ids: number[], method: DocumentBulkEditMethod, args: any) {
|
||||
return this.http.post(this.getResourceUrl(null, 'bulk_edit'), {
|
||||
documents: ids,
|
||||
method: method,
|
||||
@@ -307,6 +346,54 @@ export class DocumentService extends AbstractPaperlessService<Document> {
|
||||
})
|
||||
}
|
||||
|
||||
deleteDocuments(ids: number[]) {
|
||||
return this.http.post(this.getResourceUrl(null, 'delete'), {
|
||||
documents: ids,
|
||||
})
|
||||
}
|
||||
|
||||
reprocessDocuments(ids: number[]) {
|
||||
return this.http.post(this.getResourceUrl(null, 'reprocess'), {
|
||||
documents: ids,
|
||||
})
|
||||
}
|
||||
|
||||
rotateDocuments(
|
||||
ids: number[],
|
||||
degrees: number,
|
||||
sourceMode: BulkEditSourceMode = BulkEditSourceMode.LATEST_VERSION
|
||||
) {
|
||||
return this.http.post(this.getResourceUrl(null, 'rotate'), {
|
||||
documents: ids,
|
||||
degrees,
|
||||
source_mode: sourceMode,
|
||||
})
|
||||
}
|
||||
|
||||
mergeDocuments(ids: number[], request: MergeDocumentsRequest = {}) {
|
||||
return this.http.post(this.getResourceUrl(null, 'merge'), {
|
||||
documents: ids,
|
||||
...request,
|
||||
})
|
||||
}
|
||||
|
||||
editPdfDocuments(ids: number[], request: EditPdfDocumentsRequest) {
|
||||
return this.http.post(this.getResourceUrl(null, 'edit_pdf'), {
|
||||
documents: ids,
|
||||
...request,
|
||||
})
|
||||
}
|
||||
|
||||
removePasswordDocuments(
|
||||
ids: number[],
|
||||
request: RemovePasswordDocumentsRequest
|
||||
) {
|
||||
return this.http.post(this.getResourceUrl(null, 'remove_password'), {
|
||||
documents: ids,
|
||||
...request,
|
||||
})
|
||||
}
|
||||
|
||||
getSelectionData(ids: number[]): Observable<SelectionData> {
|
||||
return this.http.post<SelectionData>(
|
||||
this.getResourceUrl(null, 'selection_data'),
|
||||
|
||||
@@ -51,28 +51,11 @@ from documents.templating.workflows import parse_w_workflow_placeholders
|
||||
from documents.utils import copy_basic_file_stats
|
||||
from documents.utils import copy_file_with_basic_stats
|
||||
from documents.utils import run_subprocess
|
||||
from paperless.parsers.text import TextDocumentParser
|
||||
from paperless_mail.parsers import MailDocumentParser
|
||||
|
||||
LOGGING_NAME: Final[str] = "paperless.consumer"
|
||||
|
||||
|
||||
def _parser_cleanup(parser: DocumentParser) -> None:
|
||||
"""
|
||||
Call cleanup on a parser, handling the new-style context-manager parsers.
|
||||
|
||||
New-style parsers (e.g. TextDocumentParser) use __exit__ for teardown
|
||||
instead of a cleanup() method. This shim will be removed once all existing parsers
|
||||
have switched to the new style and this consumer is updated to use it
|
||||
|
||||
TODO(stumpylog): Remove me in the future
|
||||
"""
|
||||
if isinstance(parser, TextDocumentParser):
|
||||
parser.__exit__(None, None, None)
|
||||
else:
|
||||
parser.cleanup()
|
||||
|
||||
|
||||
class WorkflowTriggerPlugin(
|
||||
NoCleanupPluginMixin,
|
||||
NoSetupPluginMixin,
|
||||
@@ -476,9 +459,6 @@ class ConsumerPlugin(
|
||||
self.filename,
|
||||
self.input_doc.mailrule_id,
|
||||
)
|
||||
elif isinstance(document_parser, TextDocumentParser):
|
||||
# TODO(stumpylog): Remove me in the future
|
||||
document_parser.parse(self.working_copy, mime_type)
|
||||
else:
|
||||
document_parser.parse(self.working_copy, mime_type, self.filename)
|
||||
|
||||
@@ -489,15 +469,11 @@ class ConsumerPlugin(
|
||||
ProgressStatusOptions.WORKING,
|
||||
ConsumerStatusShortMessage.GENERATING_THUMBNAIL,
|
||||
)
|
||||
if isinstance(document_parser, TextDocumentParser):
|
||||
# TODO(stumpylog): Remove me in the future
|
||||
thumbnail = document_parser.get_thumbnail(self.working_copy, mime_type)
|
||||
else:
|
||||
thumbnail = document_parser.get_thumbnail(
|
||||
self.working_copy,
|
||||
mime_type,
|
||||
self.filename,
|
||||
)
|
||||
thumbnail = document_parser.get_thumbnail(
|
||||
self.working_copy,
|
||||
mime_type,
|
||||
self.filename,
|
||||
)
|
||||
|
||||
text = document_parser.get_text()
|
||||
date = document_parser.get_date()
|
||||
@@ -514,7 +490,7 @@ class ConsumerPlugin(
|
||||
page_count = document_parser.get_page_count(self.working_copy, mime_type)
|
||||
|
||||
except ParseError as e:
|
||||
_parser_cleanup(document_parser)
|
||||
document_parser.cleanup()
|
||||
if tempdir:
|
||||
tempdir.cleanup()
|
||||
self._fail(
|
||||
@@ -524,7 +500,7 @@ class ConsumerPlugin(
|
||||
exception=e,
|
||||
)
|
||||
except Exception as e:
|
||||
_parser_cleanup(document_parser)
|
||||
document_parser.cleanup()
|
||||
if tempdir:
|
||||
tempdir.cleanup()
|
||||
self._fail(
|
||||
@@ -726,7 +702,7 @@ class ConsumerPlugin(
|
||||
exception=e,
|
||||
)
|
||||
finally:
|
||||
_parser_cleanup(document_parser)
|
||||
document_parser.cleanup()
|
||||
tempdir.cleanup()
|
||||
|
||||
self.run_post_consume_script(document)
|
||||
|
||||
@@ -30,7 +30,6 @@ def _process_document(doc_id: int) -> None:
|
||||
)
|
||||
shutil.move(thumb, document.thumbnail_path)
|
||||
finally:
|
||||
# TODO(stumpylog): Cleanup once all parsers are handled
|
||||
parser.cleanup()
|
||||
|
||||
|
||||
|
||||
@@ -1540,11 +1540,124 @@ class DocumentListSerializer(serializers.Serializer):
|
||||
return documents
|
||||
|
||||
|
||||
class SourceModeValidationMixin:
|
||||
def validate_source_mode(self, source_mode: str) -> str:
|
||||
if source_mode not in bulk_edit.SourceModeChoices.__dict__.values():
|
||||
raise serializers.ValidationError("Invalid source_mode")
|
||||
return source_mode
|
||||
|
||||
|
||||
class RotateDocumentsSerializer(DocumentListSerializer, SourceModeValidationMixin):
|
||||
degrees = serializers.IntegerField(required=True)
|
||||
source_mode = serializers.CharField(
|
||||
required=False,
|
||||
default=bulk_edit.SourceModeChoices.LATEST_VERSION,
|
||||
)
|
||||
|
||||
|
||||
class MergeDocumentsSerializer(DocumentListSerializer, SourceModeValidationMixin):
|
||||
metadata_document_id = serializers.IntegerField(
|
||||
required=False,
|
||||
allow_null=True,
|
||||
)
|
||||
delete_originals = serializers.BooleanField(required=False, default=False)
|
||||
archive_fallback = serializers.BooleanField(required=False, default=False)
|
||||
source_mode = serializers.CharField(
|
||||
required=False,
|
||||
default=bulk_edit.SourceModeChoices.LATEST_VERSION,
|
||||
)
|
||||
|
||||
|
||||
class EditPdfDocumentsSerializer(DocumentListSerializer, SourceModeValidationMixin):
|
||||
operations = serializers.ListField(required=True)
|
||||
delete_original = serializers.BooleanField(required=False, default=False)
|
||||
update_document = serializers.BooleanField(required=False, default=False)
|
||||
include_metadata = serializers.BooleanField(required=False, default=True)
|
||||
source_mode = serializers.CharField(
|
||||
required=False,
|
||||
default=bulk_edit.SourceModeChoices.LATEST_VERSION,
|
||||
)
|
||||
|
||||
def validate(self, attrs):
|
||||
documents = attrs["documents"]
|
||||
if len(documents) > 1:
|
||||
raise serializers.ValidationError(
|
||||
"Edit PDF method only supports one document",
|
||||
)
|
||||
|
||||
operations = attrs["operations"]
|
||||
if not isinstance(operations, list):
|
||||
raise serializers.ValidationError("operations must be a list")
|
||||
|
||||
for op in operations:
|
||||
if not isinstance(op, dict):
|
||||
raise serializers.ValidationError("invalid operation entry")
|
||||
if "page" not in op or not isinstance(op["page"], int):
|
||||
raise serializers.ValidationError("page must be an integer")
|
||||
if "rotate" in op and not isinstance(op["rotate"], int):
|
||||
raise serializers.ValidationError("rotate must be an integer")
|
||||
if "doc" in op and not isinstance(op["doc"], int):
|
||||
raise serializers.ValidationError("doc must be an integer")
|
||||
|
||||
if attrs["update_document"]:
|
||||
max_idx = max(op.get("doc", 0) for op in operations)
|
||||
if max_idx > 0:
|
||||
raise serializers.ValidationError(
|
||||
"update_document only allowed with a single output document",
|
||||
)
|
||||
|
||||
doc = Document.objects.get(id=documents[0])
|
||||
if doc.page_count:
|
||||
for op in operations:
|
||||
if op["page"] < 1 or op["page"] > doc.page_count:
|
||||
raise serializers.ValidationError(
|
||||
f"Page {op['page']} is out of bounds for document with {doc.page_count} pages.",
|
||||
)
|
||||
return attrs
|
||||
|
||||
|
||||
class RemovePasswordDocumentsSerializer(
|
||||
DocumentListSerializer,
|
||||
SourceModeValidationMixin,
|
||||
):
|
||||
password = serializers.CharField(required=True)
|
||||
update_document = serializers.BooleanField(required=False, default=False)
|
||||
delete_original = serializers.BooleanField(required=False, default=False)
|
||||
include_metadata = serializers.BooleanField(required=False, default=True)
|
||||
source_mode = serializers.CharField(
|
||||
required=False,
|
||||
default=bulk_edit.SourceModeChoices.LATEST_VERSION,
|
||||
)
|
||||
|
||||
|
||||
class DeleteDocumentsSerializer(DocumentListSerializer):
|
||||
pass
|
||||
|
||||
|
||||
class ReprocessDocumentsSerializer(DocumentListSerializer):
|
||||
pass
|
||||
|
||||
|
||||
class BulkEditSerializer(
|
||||
SerializerWithPerms,
|
||||
DocumentListSerializer,
|
||||
SetPermissionsMixin,
|
||||
SourceModeValidationMixin,
|
||||
):
|
||||
# TODO: remove this and related backwards compatibility code when API v9 is dropped
|
||||
# split, delete_pages can be removed entirely
|
||||
MOVED_DOCUMENT_ACTION_ENDPOINTS = {
|
||||
"delete": "/api/documents/delete/",
|
||||
"reprocess": "/api/documents/reprocess/",
|
||||
"rotate": "/api/documents/rotate/",
|
||||
"merge": "/api/documents/merge/",
|
||||
"edit_pdf": "/api/documents/edit_pdf/",
|
||||
"remove_password": "/api/documents/remove_password/",
|
||||
"split": "/api/documents/edit_pdf/",
|
||||
"delete_pages": "/api/documents/edit_pdf/",
|
||||
}
|
||||
LEGACY_DOCUMENT_ACTION_METHODS = tuple(MOVED_DOCUMENT_ACTION_ENDPOINTS.keys())
|
||||
|
||||
method = serializers.ChoiceField(
|
||||
choices=[
|
||||
"set_correspondent",
|
||||
@@ -1554,15 +1667,8 @@ class BulkEditSerializer(
|
||||
"remove_tag",
|
||||
"modify_tags",
|
||||
"modify_custom_fields",
|
||||
"delete",
|
||||
"reprocess",
|
||||
"set_permissions",
|
||||
"rotate",
|
||||
"merge",
|
||||
"split",
|
||||
"delete_pages",
|
||||
"edit_pdf",
|
||||
"remove_password",
|
||||
*LEGACY_DOCUMENT_ACTION_METHODS,
|
||||
],
|
||||
label="Method",
|
||||
write_only=True,
|
||||
@@ -1640,8 +1746,7 @@ class BulkEditSerializer(
|
||||
return bulk_edit.edit_pdf
|
||||
elif method == "remove_password":
|
||||
return bulk_edit.remove_password
|
||||
else: # pragma: no cover
|
||||
# This will never happen as it is handled by the ChoiceField
|
||||
else:
|
||||
raise serializers.ValidationError("Unsupported method.")
|
||||
|
||||
def _validate_parameters_tags(self, parameters) -> None:
|
||||
@@ -1751,9 +1856,7 @@ class BulkEditSerializer(
|
||||
"source_mode",
|
||||
bulk_edit.SourceModeChoices.LATEST_VERSION,
|
||||
)
|
||||
if source_mode not in bulk_edit.SourceModeChoices.__dict__.values():
|
||||
raise serializers.ValidationError("Invalid source_mode")
|
||||
parameters["source_mode"] = source_mode
|
||||
parameters["source_mode"] = self.validate_source_mode(source_mode)
|
||||
|
||||
def _validate_parameters_split(self, parameters) -> None:
|
||||
if "pages" not in parameters:
|
||||
|
||||
@@ -399,7 +399,6 @@ def update_document_content_maybe_archive_file(document_id) -> None:
|
||||
f"Error while parsing document {document} (ID: {document_id})",
|
||||
)
|
||||
finally:
|
||||
# TODO(stumpylog): Cleanup once all parsers are handled
|
||||
parser.cleanup()
|
||||
|
||||
|
||||
|
||||
@@ -422,6 +422,34 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
|
||||
self.assertEqual(args[0], [self.doc1.id])
|
||||
self.assertEqual(len(kwargs), 0)
|
||||
|
||||
@mock.patch("documents.views.bulk_edit.delete")
|
||||
def test_delete_documents_endpoint(self, m) -> None:
|
||||
self.setup_mock(m, "delete")
|
||||
response = self.client.post(
|
||||
"/api/documents/delete/",
|
||||
json.dumps({"documents": [self.doc1.id]}),
|
||||
content_type="application/json",
|
||||
)
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
m.assert_called_once()
|
||||
args, kwargs = m.call_args
|
||||
self.assertEqual(args[0], [self.doc1.id])
|
||||
self.assertEqual(len(kwargs), 0)
|
||||
|
||||
@mock.patch("documents.views.bulk_edit.reprocess")
|
||||
def test_reprocess_documents_endpoint(self, m) -> None:
|
||||
self.setup_mock(m, "reprocess")
|
||||
response = self.client.post(
|
||||
"/api/documents/reprocess/",
|
||||
json.dumps({"documents": [self.doc1.id]}),
|
||||
content_type="application/json",
|
||||
)
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
m.assert_called_once()
|
||||
args, kwargs = m.call_args
|
||||
self.assertEqual(args[0], [self.doc1.id])
|
||||
self.assertEqual(len(kwargs), 0)
|
||||
|
||||
@mock.patch("documents.serialisers.bulk_edit.set_storage_path")
|
||||
def test_api_set_storage_path(self, m) -> None:
|
||||
"""
|
||||
@@ -877,7 +905,7 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
|
||||
self.assertEqual(kwargs["merge"], True)
|
||||
|
||||
@mock.patch("documents.serialisers.bulk_edit.set_storage_path")
|
||||
@mock.patch("documents.serialisers.bulk_edit.merge")
|
||||
@mock.patch("documents.views.bulk_edit.merge")
|
||||
def test_insufficient_global_perms(self, mock_merge, mock_set_storage) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
@@ -912,12 +940,11 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
|
||||
mock_set_storage.assert_not_called()
|
||||
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
"/api/documents/merge/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc1.id],
|
||||
"method": "merge",
|
||||
"parameters": {"metadata_document_id": self.doc1.id},
|
||||
"metadata_document_id": self.doc1.id,
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
@@ -927,15 +954,12 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
|
||||
mock_merge.assert_not_called()
|
||||
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
"/api/documents/merge/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc1.id],
|
||||
"method": "merge",
|
||||
"parameters": {
|
||||
"metadata_document_id": self.doc1.id,
|
||||
"delete_originals": True,
|
||||
},
|
||||
"metadata_document_id": self.doc1.id,
|
||||
"delete_originals": True,
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
@@ -1052,85 +1076,57 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
|
||||
|
||||
m.assert_called_once()
|
||||
|
||||
@mock.patch("documents.serialisers.bulk_edit.rotate")
|
||||
@mock.patch("documents.views.bulk_edit.rotate")
|
||||
def test_rotate(self, m) -> None:
|
||||
self.setup_mock(m, "rotate")
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
"/api/documents/rotate/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id, self.doc3.id],
|
||||
"method": "rotate",
|
||||
"parameters": {"degrees": 90},
|
||||
"degrees": 90,
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
|
||||
m.assert_called_once()
|
||||
args, kwargs = m.call_args
|
||||
self.assertCountEqual(args[0], [self.doc2.id, self.doc3.id])
|
||||
self.assertEqual(kwargs["degrees"], 90)
|
||||
|
||||
@mock.patch("documents.serialisers.bulk_edit.rotate")
|
||||
def test_rotate_invalid_params(self, m) -> None:
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id, self.doc3.id],
|
||||
"method": "rotate",
|
||||
"parameters": {"degrees": "foo"},
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id, self.doc3.id],
|
||||
"method": "rotate",
|
||||
"parameters": {"degrees": 90.5},
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
|
||||
m.assert_not_called()
|
||||
|
||||
@mock.patch("documents.serialisers.bulk_edit.merge")
|
||||
def test_merge(self, m) -> None:
|
||||
self.setup_mock(m, "merge")
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id, self.doc3.id],
|
||||
"method": "merge",
|
||||
"parameters": {"metadata_document_id": self.doc3.id},
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
|
||||
m.assert_called_once()
|
||||
args, kwargs = m.call_args
|
||||
self.assertCountEqual(args[0], [self.doc2.id, self.doc3.id])
|
||||
self.assertEqual(kwargs["metadata_document_id"], self.doc3.id)
|
||||
self.assertEqual(kwargs["source_mode"], "latest_version")
|
||||
self.assertEqual(kwargs["user"], self.user)
|
||||
|
||||
@mock.patch("documents.serialisers.bulk_edit.merge")
|
||||
def test_merge_and_delete_insufficient_permissions(self, m) -> None:
|
||||
@mock.patch("documents.views.bulk_edit.rotate")
|
||||
def test_rotate_invalid_params(self, m) -> None:
|
||||
response = self.client.post(
|
||||
"/api/documents/rotate/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id, self.doc3.id],
|
||||
"degrees": "foo",
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
|
||||
response = self.client.post(
|
||||
"/api/documents/rotate/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id, self.doc3.id],
|
||||
"degrees": 90.5,
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
m.assert_not_called()
|
||||
|
||||
@mock.patch("documents.views.bulk_edit.rotate")
|
||||
def test_rotate_insufficient_permissions(self, m) -> None:
|
||||
self.doc1.owner = User.objects.get(username="temp_admin")
|
||||
self.doc1.save()
|
||||
user1 = User.objects.create(username="user1")
|
||||
@@ -1138,17 +1134,13 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
|
||||
user1.save()
|
||||
self.client.force_authenticate(user=user1)
|
||||
|
||||
self.setup_mock(m, "merge")
|
||||
self.setup_mock(m, "rotate")
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
"/api/documents/rotate/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc1.id, self.doc2.id],
|
||||
"method": "merge",
|
||||
"parameters": {
|
||||
"metadata_document_id": self.doc2.id,
|
||||
"delete_originals": True,
|
||||
},
|
||||
"degrees": 90,
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
@@ -1159,15 +1151,11 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
|
||||
self.assertEqual(response.content, b"Insufficient permissions")
|
||||
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
"/api/documents/rotate/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id, self.doc3.id],
|
||||
"method": "merge",
|
||||
"parameters": {
|
||||
"metadata_document_id": self.doc2.id,
|
||||
"delete_originals": True,
|
||||
},
|
||||
"degrees": 90,
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
@@ -1176,27 +1164,78 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
m.assert_called_once()
|
||||
|
||||
@mock.patch("documents.serialisers.bulk_edit.merge")
|
||||
def test_merge_invalid_parameters(self, m) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- API data for merging documents is called
|
||||
- The parameters are invalid
|
||||
WHEN:
|
||||
- API is called
|
||||
THEN:
|
||||
- The API fails with a correct error code
|
||||
"""
|
||||
@mock.patch("documents.views.bulk_edit.merge")
|
||||
def test_merge(self, m) -> None:
|
||||
self.setup_mock(m, "merge")
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
"/api/documents/merge/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id, self.doc3.id],
|
||||
"metadata_document_id": self.doc3.id,
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
m.assert_called_once()
|
||||
args, kwargs = m.call_args
|
||||
self.assertCountEqual(args[0], [self.doc2.id, self.doc3.id])
|
||||
self.assertEqual(kwargs["metadata_document_id"], self.doc3.id)
|
||||
self.assertEqual(kwargs["source_mode"], "latest_version")
|
||||
self.assertEqual(kwargs["user"], self.user)
|
||||
|
||||
@mock.patch("documents.views.bulk_edit.merge")
|
||||
def test_merge_and_delete_insufficient_permissions(self, m) -> None:
|
||||
self.doc1.owner = User.objects.get(username="temp_admin")
|
||||
self.doc1.save()
|
||||
user1 = User.objects.create(username="user1")
|
||||
user1.user_permissions.add(*Permission.objects.all())
|
||||
user1.save()
|
||||
self.client.force_authenticate(user=user1)
|
||||
|
||||
self.setup_mock(m, "merge")
|
||||
response = self.client.post(
|
||||
"/api/documents/merge/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc1.id, self.doc2.id],
|
||||
"method": "merge",
|
||||
"parameters": {
|
||||
"delete_originals": "not_boolean",
|
||||
},
|
||||
"metadata_document_id": self.doc2.id,
|
||||
"delete_originals": True,
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
|
||||
m.assert_not_called()
|
||||
self.assertEqual(response.content, b"Insufficient permissions")
|
||||
|
||||
response = self.client.post(
|
||||
"/api/documents/merge/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id, self.doc3.id],
|
||||
"metadata_document_id": self.doc2.id,
|
||||
"delete_originals": True,
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
m.assert_called_once()
|
||||
|
||||
@mock.patch("documents.views.bulk_edit.merge")
|
||||
def test_merge_invalid_parameters(self, m) -> None:
|
||||
self.setup_mock(m, "merge")
|
||||
response = self.client.post(
|
||||
"/api/documents/merge/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc1.id, self.doc2.id],
|
||||
"delete_originals": "not_boolean",
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
@@ -1205,207 +1244,67 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
m.assert_not_called()
|
||||
|
||||
@mock.patch("documents.serialisers.bulk_edit.split")
|
||||
def test_split(self, m) -> None:
|
||||
self.setup_mock(m, "split")
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"method": "split",
|
||||
"parameters": {"pages": "1,2-4,5-6,7"},
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
def test_bulk_edit_allows_legacy_file_methods_with_warning(self) -> None:
|
||||
method_payloads = {
|
||||
"delete": {},
|
||||
"reprocess": {},
|
||||
"rotate": {"degrees": 90},
|
||||
"merge": {"metadata_document_id": self.doc2.id},
|
||||
"edit_pdf": {"operations": [{"page": 1}]},
|
||||
"remove_password": {"password": "secret"},
|
||||
"split": {"pages": "1,2-4"},
|
||||
"delete_pages": {"pages": [1, 2]},
|
||||
}
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
for version in (9, 10):
|
||||
for method, parameters in method_payloads.items():
|
||||
with self.subTest(method=method, version=version):
|
||||
with mock.patch(
|
||||
f"documents.views.bulk_edit.{method}",
|
||||
) as mocked_method:
|
||||
self.setup_mock(mocked_method, method)
|
||||
with self.assertLogs("paperless.api", level="WARNING") as logs:
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"method": method,
|
||||
"parameters": parameters,
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
headers={
|
||||
"Accept": f"application/json; version={version}",
|
||||
},
|
||||
)
|
||||
|
||||
m.assert_called_once()
|
||||
args, kwargs = m.call_args
|
||||
self.assertCountEqual(args[0], [self.doc2.id])
|
||||
self.assertEqual(kwargs["pages"], [[1], [2, 3, 4], [5, 6], [7]])
|
||||
self.assertEqual(kwargs["user"], self.user)
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
mocked_method.assert_called_once()
|
||||
self.assertTrue(
|
||||
any(
|
||||
"Deprecated bulk_edit method" in entry
|
||||
and f"'{method}'" in entry
|
||||
for entry in logs.output
|
||||
),
|
||||
)
|
||||
|
||||
def test_split_invalid_params(self) -> None:
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"method": "split",
|
||||
"parameters": {}, # pages not specified
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn(b"pages not specified", response.content)
|
||||
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"method": "split",
|
||||
"parameters": {"pages": "1:7"}, # wrong format
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn(b"invalid pages specified", response.content)
|
||||
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [
|
||||
self.doc1.id,
|
||||
self.doc2.id,
|
||||
], # only one document supported
|
||||
"method": "split",
|
||||
"parameters": {"pages": "1-2,3-7"}, # wrong format
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn(b"Split method only supports one document", response.content)
|
||||
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"method": "split",
|
||||
"parameters": {
|
||||
"pages": "1",
|
||||
"delete_originals": "notabool",
|
||||
}, # not a bool
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn(b"delete_originals must be a boolean", response.content)
|
||||
|
||||
@mock.patch("documents.serialisers.bulk_edit.delete_pages")
|
||||
def test_delete_pages(self, m) -> None:
|
||||
self.setup_mock(m, "delete_pages")
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"method": "delete_pages",
|
||||
"parameters": {"pages": [1, 2, 3, 4]},
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
|
||||
m.assert_called_once()
|
||||
args, kwargs = m.call_args
|
||||
self.assertCountEqual(args[0], [self.doc2.id])
|
||||
self.assertEqual(kwargs["pages"], [1, 2, 3, 4])
|
||||
|
||||
def test_delete_pages_invalid_params(self) -> None:
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [
|
||||
self.doc1.id,
|
||||
self.doc2.id,
|
||||
], # only one document supported
|
||||
"method": "delete_pages",
|
||||
"parameters": {
|
||||
"pages": [1, 2, 3, 4],
|
||||
},
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn(
|
||||
b"Delete pages method only supports one document",
|
||||
response.content,
|
||||
)
|
||||
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"method": "delete_pages",
|
||||
"parameters": {}, # pages not specified
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn(b"pages not specified", response.content)
|
||||
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"method": "delete_pages",
|
||||
"parameters": {"pages": "1-3"}, # not a list
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn(b"pages must be a list", response.content)
|
||||
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"method": "delete_pages",
|
||||
"parameters": {"pages": ["1-3"]}, # not ints
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn(b"pages must be a list of integers", response.content)
|
||||
|
||||
@mock.patch("documents.serialisers.bulk_edit.edit_pdf")
|
||||
@mock.patch("documents.views.bulk_edit.edit_pdf")
|
||||
def test_edit_pdf(self, m) -> None:
|
||||
self.setup_mock(m, "edit_pdf")
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
"/api/documents/edit_pdf/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"method": "edit_pdf",
|
||||
"parameters": {
|
||||
"operations": [{"page": 1}],
|
||||
"source_mode": "explicit_selection",
|
||||
},
|
||||
"operations": [{"page": 1}],
|
||||
"source_mode": "explicit_selection",
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
|
||||
m.assert_called_once()
|
||||
args, kwargs = m.call_args
|
||||
self.assertCountEqual(args[0], [self.doc2.id])
|
||||
@@ -1414,14 +1313,12 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
|
||||
self.assertEqual(kwargs["user"], self.user)
|
||||
|
||||
def test_edit_pdf_invalid_params(self) -> None:
|
||||
# multiple documents
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
"/api/documents/edit_pdf/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id, self.doc3.id],
|
||||
"method": "edit_pdf",
|
||||
"parameters": {"operations": [{"page": 1}]},
|
||||
"operations": [{"page": 1}],
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
@@ -1429,44 +1326,25 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn(b"Edit PDF method only supports one document", response.content)
|
||||
|
||||
# no operations specified
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
"/api/documents/edit_pdf/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"method": "edit_pdf",
|
||||
"parameters": {},
|
||||
"operations": "not_a_list",
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn(b"operations not specified", response.content)
|
||||
self.assertIn(b"Expected a list of items", response.content)
|
||||
|
||||
# operations not a list
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
"/api/documents/edit_pdf/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"method": "edit_pdf",
|
||||
"parameters": {"operations": "not_a_list"},
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn(b"operations must be a list", response.content)
|
||||
|
||||
# invalid operation
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"method": "edit_pdf",
|
||||
"parameters": {"operations": ["invalid_operation"]},
|
||||
"operations": ["invalid_operation"],
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
@@ -1474,14 +1352,12 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn(b"invalid operation entry", response.content)
|
||||
|
||||
# page not an int
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
"/api/documents/edit_pdf/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"method": "edit_pdf",
|
||||
"parameters": {"operations": [{"page": "not_an_int"}]},
|
||||
"operations": [{"page": "not_an_int"}],
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
@@ -1489,14 +1365,12 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn(b"page must be an integer", response.content)
|
||||
|
||||
# rotate not an int
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
"/api/documents/edit_pdf/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"method": "edit_pdf",
|
||||
"parameters": {"operations": [{"page": 1, "rotate": "not_an_int"}]},
|
||||
"operations": [{"page": 1, "rotate": "not_an_int"}],
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
@@ -1504,14 +1378,12 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn(b"rotate must be an integer", response.content)
|
||||
|
||||
# doc not an int
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
"/api/documents/edit_pdf/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"method": "edit_pdf",
|
||||
"parameters": {"operations": [{"page": 1, "doc": "not_an_int"}]},
|
||||
"operations": [{"page": 1, "doc": "not_an_int"}],
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
@@ -1519,53 +1391,13 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn(b"doc must be an integer", response.content)
|
||||
|
||||
# update_document not a boolean
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
"/api/documents/edit_pdf/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"method": "edit_pdf",
|
||||
"parameters": {
|
||||
"update_document": "not_a_bool",
|
||||
"operations": [{"page": 1}],
|
||||
},
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn(b"update_document must be a boolean", response.content)
|
||||
|
||||
# include_metadata not a boolean
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"method": "edit_pdf",
|
||||
"parameters": {
|
||||
"include_metadata": "not_a_bool",
|
||||
"operations": [{"page": 1}],
|
||||
},
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn(b"include_metadata must be a boolean", response.content)
|
||||
|
||||
# update_document True but output would be multiple documents
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"method": "edit_pdf",
|
||||
"parameters": {
|
||||
"update_document": True,
|
||||
"operations": [{"page": 1, "doc": 1}, {"page": 2, "doc": 2}],
|
||||
},
|
||||
"update_document": True,
|
||||
"operations": [{"page": 1, "doc": 1}, {"page": 2, "doc": 2}],
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
@@ -1576,17 +1408,13 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
|
||||
response.content,
|
||||
)
|
||||
|
||||
# invalid source mode
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
"/api/documents/edit_pdf/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"method": "edit_pdf",
|
||||
"parameters": {
|
||||
"operations": [{"page": 1}],
|
||||
"source_mode": "not_a_mode",
|
||||
},
|
||||
"operations": [{"page": 1}],
|
||||
"source_mode": "not_a_mode",
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
@@ -1594,42 +1422,70 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn(b"Invalid source_mode", response.content)
|
||||
|
||||
@mock.patch("documents.serialisers.bulk_edit.edit_pdf")
|
||||
@mock.patch("documents.views.bulk_edit.edit_pdf")
|
||||
def test_edit_pdf_page_out_of_bounds(self, m) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- API data for editing PDF is called
|
||||
- The page number is out of bounds
|
||||
WHEN:
|
||||
- API is called
|
||||
THEN:
|
||||
- The API fails with a correct error code
|
||||
"""
|
||||
self.setup_mock(m, "edit_pdf")
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
"/api/documents/edit_pdf/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"method": "edit_pdf",
|
||||
"parameters": {"operations": [{"page": 99}]},
|
||||
"operations": [{"page": 99}],
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn(b"out of bounds", response.content)
|
||||
m.assert_not_called()
|
||||
|
||||
@mock.patch("documents.serialisers.bulk_edit.remove_password")
|
||||
def test_remove_password(self, m) -> None:
|
||||
self.setup_mock(m, "remove_password")
|
||||
@mock.patch("documents.views.bulk_edit.edit_pdf")
|
||||
def test_edit_pdf_insufficient_permissions(self, m) -> None:
|
||||
self.doc1.owner = User.objects.get(username="temp_admin")
|
||||
self.doc1.save()
|
||||
user1 = User.objects.create(username="user1")
|
||||
user1.user_permissions.add(*Permission.objects.all())
|
||||
user1.save()
|
||||
self.client.force_authenticate(user=user1)
|
||||
|
||||
self.setup_mock(m, "edit_pdf")
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
"/api/documents/edit_pdf/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc1.id],
|
||||
"operations": [{"page": 1}],
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
|
||||
m.assert_not_called()
|
||||
self.assertEqual(response.content, b"Insufficient permissions")
|
||||
|
||||
response = self.client.post(
|
||||
"/api/documents/edit_pdf/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"method": "remove_password",
|
||||
"parameters": {"password": "secret", "update_document": True},
|
||||
"operations": [{"page": 1}],
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
m.assert_called_once()
|
||||
|
||||
@mock.patch("documents.views.bulk_edit.remove_password")
|
||||
def test_remove_password(self, m) -> None:
|
||||
self.setup_mock(m, "remove_password")
|
||||
response = self.client.post(
|
||||
"/api/documents/remove_password/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"password": "secret",
|
||||
"update_document": True,
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
@@ -1641,36 +1497,69 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
|
||||
self.assertCountEqual(args[0], [self.doc2.id])
|
||||
self.assertEqual(kwargs["password"], "secret")
|
||||
self.assertTrue(kwargs["update_document"])
|
||||
self.assertEqual(kwargs["source_mode"], "latest_version")
|
||||
self.assertEqual(kwargs["user"], self.user)
|
||||
|
||||
def test_remove_password_invalid_params(self) -> None:
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
"/api/documents/remove_password/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"method": "remove_password",
|
||||
"parameters": {},
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn(b"password not specified", response.content)
|
||||
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
"/api/documents/remove_password/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"method": "remove_password",
|
||||
"parameters": {"password": 123},
|
||||
"password": 123,
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn(b"password must be a string", response.content)
|
||||
|
||||
@mock.patch("documents.views.bulk_edit.remove_password")
|
||||
def test_remove_password_insufficient_permissions(self, m) -> None:
|
||||
self.doc1.owner = User.objects.get(username="temp_admin")
|
||||
self.doc1.save()
|
||||
user1 = User.objects.create(username="user1")
|
||||
user1.user_permissions.add(*Permission.objects.all())
|
||||
user1.save()
|
||||
self.client.force_authenticate(user=user1)
|
||||
|
||||
self.setup_mock(m, "remove_password")
|
||||
response = self.client.post(
|
||||
"/api/documents/remove_password/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc1.id],
|
||||
"password": "secret",
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
|
||||
m.assert_not_called()
|
||||
self.assertEqual(response.content, b"Insufficient permissions")
|
||||
|
||||
response = self.client.post(
|
||||
"/api/documents/remove_password/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"password": "secret",
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
m.assert_called_once()
|
||||
|
||||
@override_settings(AUDIT_LOG_ENABLED=True)
|
||||
def test_bulk_edit_audit_log_enabled_simple_field(self) -> None:
|
||||
|
||||
@@ -25,3 +25,39 @@ class TestApiSchema(APITestCase):
|
||||
|
||||
ui_response = self.client.get(self.ENDPOINT + "view/")
|
||||
self.assertEqual(ui_response.status_code, status.HTTP_200_OK)
|
||||
|
||||
def test_schema_includes_dedicated_document_edit_endpoints(self) -> None:
|
||||
schema_response = self.client.get(self.ENDPOINT)
|
||||
self.assertEqual(schema_response.status_code, status.HTTP_200_OK)
|
||||
|
||||
paths = schema_response.data["paths"]
|
||||
self.assertIn("/api/documents/delete/", paths)
|
||||
self.assertIn("/api/documents/reprocess/", paths)
|
||||
self.assertIn("/api/documents/rotate/", paths)
|
||||
self.assertIn("/api/documents/merge/", paths)
|
||||
self.assertIn("/api/documents/edit_pdf/", paths)
|
||||
self.assertIn("/api/documents/remove_password/", paths)
|
||||
|
||||
def test_schema_bulk_edit_advertises_legacy_document_action_methods(self) -> None:
|
||||
schema_response = self.client.get(self.ENDPOINT)
|
||||
self.assertEqual(schema_response.status_code, status.HTTP_200_OK)
|
||||
|
||||
schema = schema_response.data["components"]["schemas"]
|
||||
bulk_schema = schema["BulkEditRequest"]
|
||||
method_schema = bulk_schema["properties"]["method"]
|
||||
|
||||
# drf-spectacular emits the enum as a referenced schema for this field
|
||||
enum_ref = method_schema["allOf"][0]["$ref"].split("/")[-1]
|
||||
advertised_methods = schema[enum_ref]["enum"]
|
||||
|
||||
for action_method in [
|
||||
"delete",
|
||||
"reprocess",
|
||||
"rotate",
|
||||
"merge",
|
||||
"edit_pdf",
|
||||
"remove_password",
|
||||
"split",
|
||||
"delete_pages",
|
||||
]:
|
||||
self.assertIn(action_method, advertised_methods)
|
||||
|
||||
@@ -9,8 +9,8 @@ from documents.parsers import get_default_file_extension
|
||||
from documents.parsers import get_parser_class_for_mime_type
|
||||
from documents.parsers import get_supported_file_extensions
|
||||
from documents.parsers import is_file_ext_supported
|
||||
from paperless.parsers.text import TextDocumentParser
|
||||
from paperless_tesseract.parsers import RasterisedDocumentParser
|
||||
from paperless_text.parsers import TextDocumentParser
|
||||
from paperless_tika.parsers import TikaDocumentParser
|
||||
|
||||
|
||||
|
||||
@@ -176,14 +176,20 @@ from documents.serialisers import BulkEditObjectsSerializer
|
||||
from documents.serialisers import BulkEditSerializer
|
||||
from documents.serialisers import CorrespondentSerializer
|
||||
from documents.serialisers import CustomFieldSerializer
|
||||
from documents.serialisers import DeleteDocumentsSerializer
|
||||
from documents.serialisers import DocumentListSerializer
|
||||
from documents.serialisers import DocumentSerializer
|
||||
from documents.serialisers import DocumentTypeSerializer
|
||||
from documents.serialisers import DocumentVersionLabelSerializer
|
||||
from documents.serialisers import DocumentVersionSerializer
|
||||
from documents.serialisers import EditPdfDocumentsSerializer
|
||||
from documents.serialisers import EmailSerializer
|
||||
from documents.serialisers import MergeDocumentsSerializer
|
||||
from documents.serialisers import NotesSerializer
|
||||
from documents.serialisers import PostDocumentSerializer
|
||||
from documents.serialisers import RemovePasswordDocumentsSerializer
|
||||
from documents.serialisers import ReprocessDocumentsSerializer
|
||||
from documents.serialisers import RotateDocumentsSerializer
|
||||
from documents.serialisers import RunTaskViewSerializer
|
||||
from documents.serialisers import SavedViewSerializer
|
||||
from documents.serialisers import SearchResultSerializer
|
||||
@@ -2114,6 +2120,125 @@ class SavedViewViewSet(BulkPermissionMixin, PassUserMixin, ModelViewSet):
|
||||
ordering_fields = ("name",)
|
||||
|
||||
|
||||
class DocumentOperationPermissionMixin(PassUserMixin):
|
||||
permission_classes = (IsAuthenticated,)
|
||||
parser_classes = (parsers.JSONParser,)
|
||||
METHOD_NAMES_REQUIRING_USER = {
|
||||
"split",
|
||||
"merge",
|
||||
"rotate",
|
||||
"delete_pages",
|
||||
"edit_pdf",
|
||||
"remove_password",
|
||||
}
|
||||
|
||||
def _has_document_permissions(
|
||||
self,
|
||||
*,
|
||||
user: User,
|
||||
documents: list[int],
|
||||
method,
|
||||
parameters: dict[str, Any],
|
||||
) -> bool:
|
||||
if user.is_superuser:
|
||||
return True
|
||||
|
||||
document_objs = Document.objects.select_related("owner").filter(
|
||||
pk__in=documents,
|
||||
)
|
||||
user_is_owner_of_all_documents = all(
|
||||
(doc.owner == user or doc.owner is None) for doc in document_objs
|
||||
)
|
||||
|
||||
# check global and object permissions for all documents
|
||||
has_perms = user.has_perm("documents.change_document") and all(
|
||||
has_perms_owner_aware(user, "change_document", doc) for doc in document_objs
|
||||
)
|
||||
|
||||
# check ownership for methods that change original document
|
||||
if (
|
||||
(
|
||||
has_perms
|
||||
and method
|
||||
in [
|
||||
bulk_edit.set_permissions,
|
||||
bulk_edit.delete,
|
||||
bulk_edit.rotate,
|
||||
bulk_edit.delete_pages,
|
||||
bulk_edit.edit_pdf,
|
||||
bulk_edit.remove_password,
|
||||
]
|
||||
)
|
||||
or (
|
||||
method in [bulk_edit.merge, bulk_edit.split]
|
||||
and parameters.get("delete_originals")
|
||||
)
|
||||
or (method == bulk_edit.edit_pdf and parameters.get("update_document"))
|
||||
):
|
||||
has_perms = user_is_owner_of_all_documents
|
||||
|
||||
# check global add permissions for methods that create documents
|
||||
if (
|
||||
has_perms
|
||||
and (
|
||||
method in [bulk_edit.split, bulk_edit.merge]
|
||||
or (
|
||||
method in [bulk_edit.edit_pdf, bulk_edit.remove_password]
|
||||
and not parameters.get("update_document")
|
||||
)
|
||||
)
|
||||
and not user.has_perm("documents.add_document")
|
||||
):
|
||||
has_perms = False
|
||||
|
||||
# check global delete permissions for methods that delete documents
|
||||
if (
|
||||
has_perms
|
||||
and (
|
||||
method == bulk_edit.delete
|
||||
or (
|
||||
method in [bulk_edit.merge, bulk_edit.split]
|
||||
and parameters.get("delete_originals")
|
||||
)
|
||||
)
|
||||
and not user.has_perm("documents.delete_document")
|
||||
):
|
||||
has_perms = False
|
||||
|
||||
return has_perms
|
||||
|
||||
def _execute_document_action(
|
||||
self,
|
||||
*,
|
||||
method,
|
||||
validated_data: dict[str, Any],
|
||||
operation_label: str,
|
||||
):
|
||||
documents = validated_data["documents"]
|
||||
parameters = {k: v for k, v in validated_data.items() if k != "documents"}
|
||||
user = self.request.user
|
||||
|
||||
if method.__name__ in self.METHOD_NAMES_REQUIRING_USER:
|
||||
parameters["user"] = user
|
||||
|
||||
if not self._has_document_permissions(
|
||||
user=user,
|
||||
documents=documents,
|
||||
method=method,
|
||||
parameters=parameters,
|
||||
):
|
||||
return HttpResponseForbidden("Insufficient permissions")
|
||||
|
||||
try:
|
||||
result = method(documents, **parameters)
|
||||
return Response({"result": result})
|
||||
except Exception as e:
|
||||
logger.warning(f"An error occurred performing {operation_label}: {e!s}")
|
||||
return HttpResponseBadRequest(
|
||||
f"Error performing {operation_label}, check logs for more detail.",
|
||||
)
|
||||
|
||||
|
||||
@extend_schema_view(
|
||||
post=extend_schema(
|
||||
operation_id="bulk_edit",
|
||||
@@ -2132,7 +2257,7 @@ class SavedViewViewSet(BulkPermissionMixin, PassUserMixin, ModelViewSet):
|
||||
},
|
||||
),
|
||||
)
|
||||
class BulkEditView(PassUserMixin):
|
||||
class BulkEditView(DocumentOperationPermissionMixin):
|
||||
MODIFIED_FIELD_BY_METHOD = {
|
||||
"set_correspondent": "correspondent",
|
||||
"set_document_type": "document_type",
|
||||
@@ -2154,11 +2279,24 @@ class BulkEditView(PassUserMixin):
|
||||
"remove_password": None,
|
||||
}
|
||||
|
||||
permission_classes = (IsAuthenticated,)
|
||||
serializer_class = BulkEditSerializer
|
||||
parser_classes = (parsers.JSONParser,)
|
||||
|
||||
def post(self, request, *args, **kwargs):
|
||||
request_method = request.data.get("method")
|
||||
api_version = int(request.version or settings.REST_FRAMEWORK["DEFAULT_VERSION"])
|
||||
# TODO: remove this and related backwards compatibility code when API v9 is dropped
|
||||
if request_method in BulkEditSerializer.LEGACY_DOCUMENT_ACTION_METHODS:
|
||||
endpoint = BulkEditSerializer.MOVED_DOCUMENT_ACTION_ENDPOINTS[
|
||||
request_method
|
||||
]
|
||||
logger.warning(
|
||||
"Deprecated bulk_edit method '%s' requested on API version %s. "
|
||||
"Use '%s' instead.",
|
||||
request_method,
|
||||
api_version,
|
||||
endpoint,
|
||||
)
|
||||
|
||||
serializer = self.get_serializer(data=request.data)
|
||||
serializer.is_valid(raise_exception=True)
|
||||
|
||||
@@ -2166,82 +2304,15 @@ class BulkEditView(PassUserMixin):
|
||||
method = serializer.validated_data.get("method")
|
||||
parameters = serializer.validated_data.get("parameters")
|
||||
documents = serializer.validated_data.get("documents")
|
||||
if method in [
|
||||
bulk_edit.split,
|
||||
bulk_edit.merge,
|
||||
bulk_edit.rotate,
|
||||
bulk_edit.delete_pages,
|
||||
bulk_edit.edit_pdf,
|
||||
bulk_edit.remove_password,
|
||||
]:
|
||||
if method.__name__ in self.METHOD_NAMES_REQUIRING_USER:
|
||||
parameters["user"] = user
|
||||
|
||||
if not user.is_superuser:
|
||||
document_objs = Document.objects.select_related("owner").filter(
|
||||
pk__in=documents,
|
||||
)
|
||||
user_is_owner_of_all_documents = all(
|
||||
(doc.owner == user or doc.owner is None) for doc in document_objs
|
||||
)
|
||||
|
||||
# check global and object permissions for all documents
|
||||
has_perms = user.has_perm("documents.change_document") and all(
|
||||
has_perms_owner_aware(user, "change_document", doc)
|
||||
for doc in document_objs
|
||||
)
|
||||
|
||||
# check ownership for methods that change original document
|
||||
if (
|
||||
(
|
||||
has_perms
|
||||
and method
|
||||
in [
|
||||
bulk_edit.set_permissions,
|
||||
bulk_edit.delete,
|
||||
bulk_edit.rotate,
|
||||
bulk_edit.delete_pages,
|
||||
bulk_edit.edit_pdf,
|
||||
bulk_edit.remove_password,
|
||||
]
|
||||
)
|
||||
or (
|
||||
method in [bulk_edit.merge, bulk_edit.split]
|
||||
and parameters["delete_originals"]
|
||||
)
|
||||
or (method == bulk_edit.edit_pdf and parameters["update_document"])
|
||||
):
|
||||
has_perms = user_is_owner_of_all_documents
|
||||
|
||||
# check global add permissions for methods that create documents
|
||||
if (
|
||||
has_perms
|
||||
and (
|
||||
method in [bulk_edit.split, bulk_edit.merge]
|
||||
or (
|
||||
method in [bulk_edit.edit_pdf, bulk_edit.remove_password]
|
||||
and not parameters["update_document"]
|
||||
)
|
||||
)
|
||||
and not user.has_perm("documents.add_document")
|
||||
):
|
||||
has_perms = False
|
||||
|
||||
# check global delete permissions for methods that delete documents
|
||||
if (
|
||||
has_perms
|
||||
and (
|
||||
method == bulk_edit.delete
|
||||
or (
|
||||
method in [bulk_edit.merge, bulk_edit.split]
|
||||
and parameters["delete_originals"]
|
||||
)
|
||||
)
|
||||
and not user.has_perm("documents.delete_document")
|
||||
):
|
||||
has_perms = False
|
||||
|
||||
if not has_perms:
|
||||
return HttpResponseForbidden("Insufficient permissions")
|
||||
if not self._has_document_permissions(
|
||||
user=user,
|
||||
documents=documents,
|
||||
method=method,
|
||||
parameters=parameters,
|
||||
):
|
||||
return HttpResponseForbidden("Insufficient permissions")
|
||||
|
||||
try:
|
||||
modified_field = self.MODIFIED_FIELD_BY_METHOD.get(method.__name__, None)
|
||||
@@ -2298,6 +2369,168 @@ class BulkEditView(PassUserMixin):
|
||||
)
|
||||
|
||||
|
||||
@extend_schema_view(
|
||||
post=extend_schema(
|
||||
operation_id="documents_rotate",
|
||||
description="Rotate one or more documents",
|
||||
responses={
|
||||
200: inline_serializer(
|
||||
name="RotateDocumentsResult",
|
||||
fields={
|
||||
"result": serializers.CharField(),
|
||||
},
|
||||
),
|
||||
},
|
||||
),
|
||||
)
|
||||
class RotateDocumentsView(DocumentOperationPermissionMixin):
|
||||
serializer_class = RotateDocumentsSerializer
|
||||
|
||||
def post(self, request, *args, **kwargs):
|
||||
serializer = self.get_serializer(data=request.data)
|
||||
serializer.is_valid(raise_exception=True)
|
||||
return self._execute_document_action(
|
||||
method=bulk_edit.rotate,
|
||||
validated_data=serializer.validated_data,
|
||||
operation_label="document rotate",
|
||||
)
|
||||
|
||||
|
||||
@extend_schema_view(
|
||||
post=extend_schema(
|
||||
operation_id="documents_merge",
|
||||
description="Merge selected documents into a new document",
|
||||
responses={
|
||||
200: inline_serializer(
|
||||
name="MergeDocumentsResult",
|
||||
fields={
|
||||
"result": serializers.CharField(),
|
||||
},
|
||||
),
|
||||
},
|
||||
),
|
||||
)
|
||||
class MergeDocumentsView(DocumentOperationPermissionMixin):
|
||||
serializer_class = MergeDocumentsSerializer
|
||||
|
||||
def post(self, request, *args, **kwargs):
|
||||
serializer = self.get_serializer(data=request.data)
|
||||
serializer.is_valid(raise_exception=True)
|
||||
return self._execute_document_action(
|
||||
method=bulk_edit.merge,
|
||||
validated_data=serializer.validated_data,
|
||||
operation_label="document merge",
|
||||
)
|
||||
|
||||
|
||||
@extend_schema_view(
|
||||
post=extend_schema(
|
||||
operation_id="documents_delete",
|
||||
description="Move selected documents to trash",
|
||||
responses={
|
||||
200: inline_serializer(
|
||||
name="DeleteDocumentsResult",
|
||||
fields={
|
||||
"result": serializers.CharField(),
|
||||
},
|
||||
),
|
||||
},
|
||||
),
|
||||
)
|
||||
class DeleteDocumentsView(DocumentOperationPermissionMixin):
|
||||
serializer_class = DeleteDocumentsSerializer
|
||||
|
||||
def post(self, request, *args, **kwargs):
|
||||
serializer = self.get_serializer(data=request.data)
|
||||
serializer.is_valid(raise_exception=True)
|
||||
return self._execute_document_action(
|
||||
method=bulk_edit.delete,
|
||||
validated_data=serializer.validated_data,
|
||||
operation_label="document delete",
|
||||
)
|
||||
|
||||
|
||||
@extend_schema_view(
|
||||
post=extend_schema(
|
||||
operation_id="documents_reprocess",
|
||||
description="Reprocess selected documents",
|
||||
responses={
|
||||
200: inline_serializer(
|
||||
name="ReprocessDocumentsResult",
|
||||
fields={
|
||||
"result": serializers.CharField(),
|
||||
},
|
||||
),
|
||||
},
|
||||
),
|
||||
)
|
||||
class ReprocessDocumentsView(DocumentOperationPermissionMixin):
|
||||
serializer_class = ReprocessDocumentsSerializer
|
||||
|
||||
def post(self, request, *args, **kwargs):
|
||||
serializer = self.get_serializer(data=request.data)
|
||||
serializer.is_valid(raise_exception=True)
|
||||
return self._execute_document_action(
|
||||
method=bulk_edit.reprocess,
|
||||
validated_data=serializer.validated_data,
|
||||
operation_label="document reprocess",
|
||||
)
|
||||
|
||||
|
||||
@extend_schema_view(
|
||||
post=extend_schema(
|
||||
operation_id="documents_edit_pdf",
|
||||
description="Perform PDF edit operations on a selected document",
|
||||
responses={
|
||||
200: inline_serializer(
|
||||
name="EditPdfDocumentsResult",
|
||||
fields={
|
||||
"result": serializers.CharField(),
|
||||
},
|
||||
),
|
||||
},
|
||||
),
|
||||
)
|
||||
class EditPdfDocumentsView(DocumentOperationPermissionMixin):
|
||||
serializer_class = EditPdfDocumentsSerializer
|
||||
|
||||
def post(self, request, *args, **kwargs):
|
||||
serializer = self.get_serializer(data=request.data)
|
||||
serializer.is_valid(raise_exception=True)
|
||||
return self._execute_document_action(
|
||||
method=bulk_edit.edit_pdf,
|
||||
validated_data=serializer.validated_data,
|
||||
operation_label="PDF edit",
|
||||
)
|
||||
|
||||
|
||||
@extend_schema_view(
|
||||
post=extend_schema(
|
||||
operation_id="documents_remove_password",
|
||||
description="Remove password protection from selected PDFs",
|
||||
responses={
|
||||
200: inline_serializer(
|
||||
name="RemovePasswordDocumentsResult",
|
||||
fields={
|
||||
"result": serializers.CharField(),
|
||||
},
|
||||
),
|
||||
},
|
||||
),
|
||||
)
|
||||
class RemovePasswordDocumentsView(DocumentOperationPermissionMixin):
|
||||
serializer_class = RemovePasswordDocumentsSerializer
|
||||
|
||||
def post(self, request, *args, **kwargs):
|
||||
serializer = self.get_serializer(data=request.data)
|
||||
serializer.is_valid(raise_exception=True)
|
||||
return self._execute_document_action(
|
||||
method=bulk_edit.remove_password,
|
||||
validated_data=serializer.validated_data,
|
||||
operation_label="password removal",
|
||||
)
|
||||
|
||||
|
||||
@extend_schema_view(
|
||||
post=extend_schema(
|
||||
description="Upload a document via the API",
|
||||
|
||||
@@ -2,7 +2,7 @@ msgid ""
|
||||
msgstr ""
|
||||
"Project-Id-Version: paperless-ngx\n"
|
||||
"Report-Msgid-Bugs-To: \n"
|
||||
"POT-Creation-Date: 2026-03-09 22:37+0000\n"
|
||||
"POT-Creation-Date: 2026-03-10 18:57+0000\n"
|
||||
"PO-Revision-Date: 2022-02-17 04:17\n"
|
||||
"Last-Translator: \n"
|
||||
"Language-Team: English\n"
|
||||
@@ -1299,7 +1299,7 @@ msgstr ""
|
||||
msgid "workflow runs"
|
||||
msgstr ""
|
||||
|
||||
#: documents/serialisers.py:463 documents/serialisers.py:2367
|
||||
#: documents/serialisers.py:463 documents/serialisers.py:2470
|
||||
msgid "Insufficient permissions."
|
||||
msgstr ""
|
||||
|
||||
@@ -1307,39 +1307,39 @@ msgstr ""
|
||||
msgid "Invalid color."
|
||||
msgstr ""
|
||||
|
||||
#: documents/serialisers.py:1990
|
||||
#: documents/serialisers.py:2093
|
||||
#, python-format
|
||||
msgid "File type %(type)s not supported"
|
||||
msgstr ""
|
||||
|
||||
#: documents/serialisers.py:2034
|
||||
#: documents/serialisers.py:2137
|
||||
#, python-format
|
||||
msgid "Custom field id must be an integer: %(id)s"
|
||||
msgstr ""
|
||||
|
||||
#: documents/serialisers.py:2041
|
||||
#: documents/serialisers.py:2144
|
||||
#, python-format
|
||||
msgid "Custom field with id %(id)s does not exist"
|
||||
msgstr ""
|
||||
|
||||
#: documents/serialisers.py:2058 documents/serialisers.py:2068
|
||||
#: documents/serialisers.py:2161 documents/serialisers.py:2171
|
||||
msgid ""
|
||||
"Custom fields must be a list of integers or an object mapping ids to values."
|
||||
msgstr ""
|
||||
|
||||
#: documents/serialisers.py:2063
|
||||
#: documents/serialisers.py:2166
|
||||
msgid "Some custom fields don't exist or were specified twice."
|
||||
msgstr ""
|
||||
|
||||
#: documents/serialisers.py:2210
|
||||
#: documents/serialisers.py:2313
|
||||
msgid "Invalid variable detected."
|
||||
msgstr ""
|
||||
|
||||
#: documents/serialisers.py:2423
|
||||
#: documents/serialisers.py:2526
|
||||
msgid "Duplicate document identifiers are not allowed."
|
||||
msgstr ""
|
||||
|
||||
#: documents/serialisers.py:2453 documents/views.py:3328
|
||||
#: documents/serialisers.py:2556 documents/views.py:3561
|
||||
#, python-format
|
||||
msgid "Documents not found: %(ids)s"
|
||||
msgstr ""
|
||||
@@ -1603,20 +1603,20 @@ msgstr ""
|
||||
msgid "Unable to parse URI {value}"
|
||||
msgstr ""
|
||||
|
||||
#: documents/views.py:3340
|
||||
#: documents/views.py:3573
|
||||
#, python-format
|
||||
msgid "Insufficient permissions to share document %(id)s."
|
||||
msgstr ""
|
||||
|
||||
#: documents/views.py:3383
|
||||
#: documents/views.py:3616
|
||||
msgid "Bundle is already being processed."
|
||||
msgstr ""
|
||||
|
||||
#: documents/views.py:3440
|
||||
#: documents/views.py:3673
|
||||
msgid "The share link bundle is still being prepared. Please try again later."
|
||||
msgstr ""
|
||||
|
||||
#: documents/views.py:3450
|
||||
#: documents/views.py:3683
|
||||
msgid "The share link bundle is unavailable."
|
||||
msgstr ""
|
||||
|
||||
@@ -2004,7 +2004,7 @@ msgstr ""
|
||||
msgid "Chinese Traditional"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/urls.py:379
|
||||
#: paperless/urls.py:415
|
||||
msgid "Paperless-ngx administration"
|
||||
msgstr ""
|
||||
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
import os
|
||||
|
||||
from celery import Celery
|
||||
from celery.signals import worker_process_init
|
||||
|
||||
# Set the default Django settings module for the 'celery' program.
|
||||
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "paperless.settings")
|
||||
@@ -16,19 +15,3 @@ app.config_from_object("django.conf:settings", namespace="CELERY")
|
||||
|
||||
# Load task modules from all registered Django apps.
|
||||
app.autodiscover_tasks()
|
||||
|
||||
|
||||
@worker_process_init.connect
|
||||
def on_worker_process_init(**kwargs) -> None: # pragma: no cover
|
||||
"""
|
||||
Register built-in parsers eagerly in each Celery worker process.
|
||||
|
||||
This registers only the built-in parsers (no entrypoint discovery) so
|
||||
that workers can begin consuming documents immediately. Entrypoint
|
||||
discovery for third-party parsers is deferred to the first call of
|
||||
get_parser_registry() inside a task, keeping worker_process_init
|
||||
well within its 4-second timeout budget.
|
||||
"""
|
||||
from paperless.parsers.registry import init_builtin_parsers
|
||||
|
||||
init_builtin_parsers()
|
||||
|
||||
@@ -1,379 +0,0 @@
|
||||
"""
|
||||
Public interface for the Paperless-ngx parser plugin system.
|
||||
|
||||
This module defines ParserProtocol — the structural contract that every
|
||||
document parser must satisfy, whether it is a built-in parser shipped with
|
||||
Paperless-ngx or a third-party parser installed via a Python entrypoint.
|
||||
|
||||
Phase 1/2 scope: only the Protocol is defined here. The transitional
|
||||
DocumentParser ABC (Phase 3) and concrete built-in parsers (Phase 3+) will
|
||||
be added in later phases, so there are intentionally no imports of parser
|
||||
implementations here.
|
||||
|
||||
Usage example (third-party parser)::
|
||||
|
||||
from paperless.parsers import ParserProtocol
|
||||
|
||||
class MyParser:
|
||||
name = "my-parser"
|
||||
version = "1.0.0"
|
||||
author = "Acme Corp"
|
||||
url = "https://example.com/my-parser"
|
||||
|
||||
@classmethod
|
||||
def supported_mime_types(cls) -> dict[str, str]:
|
||||
return {"application/x-my-format": ".myf"}
|
||||
|
||||
@classmethod
|
||||
def score(cls, mime_type, filename, path=None):
|
||||
return 10
|
||||
|
||||
# … implement remaining protocol methods …
|
||||
|
||||
assert isinstance(MyParser(), ParserProtocol)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
from typing import Protocol
|
||||
from typing import Self
|
||||
from typing import TypedDict
|
||||
from typing import runtime_checkable
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import datetime
|
||||
from pathlib import Path
|
||||
from types import TracebackType
|
||||
|
||||
__all__ = [
|
||||
"MetadataEntry",
|
||||
"ParserProtocol",
|
||||
]
|
||||
|
||||
|
||||
class MetadataEntry(TypedDict):
|
||||
"""A single metadata field extracted from a document.
|
||||
|
||||
All four keys are required. Values are always serialised to strings —
|
||||
type-specific conversion (dates, integers, lists) is the responsibility
|
||||
of the parser before returning.
|
||||
"""
|
||||
|
||||
namespace: str
|
||||
"""URI of the metadata namespace (e.g. 'http://ns.adobe.com/pdf/1.3/')."""
|
||||
|
||||
prefix: str
|
||||
"""Conventional namespace prefix (e.g. 'pdf', 'xmp', 'dc')."""
|
||||
|
||||
key: str
|
||||
"""Field name within the namespace (e.g. 'Author', 'CreateDate')."""
|
||||
|
||||
value: str
|
||||
"""String representation of the field value."""
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
class ParserProtocol(Protocol):
|
||||
"""Structural contract for all Paperless-ngx document parsers.
|
||||
|
||||
Both built-in parsers and third-party plugins (discovered via the
|
||||
"paperless_ngx.parsers" entrypoint group) must satisfy this Protocol.
|
||||
Because it is decorated with runtime_checkable, isinstance(obj,
|
||||
ParserProtocol) works at runtime based on method presence, which is
|
||||
useful for validation in ParserRegistry.discover.
|
||||
|
||||
Parsers must expose four string attributes at the class level so the
|
||||
registry can log attribution information without instantiating the parser:
|
||||
|
||||
name : str
|
||||
Human-readable parser name (e.g. "Tesseract OCR").
|
||||
version : str
|
||||
Semantic version string (e.g. "1.2.3").
|
||||
author : str
|
||||
Author or organisation name.
|
||||
url : str
|
||||
URL for documentation, source code, or issue tracker.
|
||||
"""
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Class-level identity (checked by the registry, not Protocol methods)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
name: str
|
||||
version: str
|
||||
author: str
|
||||
url: str
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Class methods
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@classmethod
|
||||
def supported_mime_types(cls) -> dict[str, str]:
|
||||
"""Return a mapping of supported MIME types to preferred file extensions.
|
||||
|
||||
The keys are MIME type strings (e.g. "application/pdf"), and the
|
||||
values are the preferred file extension including the leading dot
|
||||
(e.g. ".pdf"). The registry uses this mapping both to decide whether
|
||||
a parser is a candidate for a given file and to determine the default
|
||||
extension when creating archive copies.
|
||||
|
||||
Returns
|
||||
-------
|
||||
dict[str, str]
|
||||
{mime_type: extension} mapping — may be empty if the parser
|
||||
has been temporarily disabled.
|
||||
"""
|
||||
...
|
||||
|
||||
@classmethod
|
||||
def score(
|
||||
cls,
|
||||
mime_type: str,
|
||||
filename: str,
|
||||
path: Path | None = None,
|
||||
) -> int | None:
|
||||
"""Return a priority score for handling this file, or None to decline.
|
||||
|
||||
The registry calls this after confirming that the MIME type is in
|
||||
supported_mime_types. Parsers may inspect filename and optionally
|
||||
the file at path to refine their confidence level.
|
||||
|
||||
A higher score wins. Return None to explicitly decline handling a file
|
||||
even though the MIME type is listed as supported (e.g. when a feature
|
||||
flag is disabled, or a required service is not configured).
|
||||
|
||||
Parameters
|
||||
----------
|
||||
mime_type:
|
||||
The detected MIME type of the file to be parsed.
|
||||
filename:
|
||||
The original filename, including extension.
|
||||
path:
|
||||
Optional filesystem path to the file. Parsers that need to
|
||||
inspect file content (e.g. magic-byte sniffing) may use this.
|
||||
May be None when scoring happens before the file is available locally.
|
||||
|
||||
Returns
|
||||
-------
|
||||
int | None
|
||||
Priority score (higher wins), or None to decline.
|
||||
"""
|
||||
...
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Properties
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@property
|
||||
def can_produce_archive(self) -> bool:
|
||||
"""Whether this parser can produce a searchable PDF archive copy.
|
||||
|
||||
If True, the consumption pipeline may request an archive version when
|
||||
processing the document, subject to the ARCHIVE_FILE_GENERATION
|
||||
setting. If False, only thumbnail and text extraction are performed.
|
||||
"""
|
||||
...
|
||||
|
||||
@property
|
||||
def requires_pdf_rendition(self) -> bool:
|
||||
"""Whether the parser must produce a PDF for the frontend to display.
|
||||
|
||||
True for formats the browser cannot display natively (e.g. DOCX, ODT).
|
||||
When True, the pipeline always stores the PDF output regardless of the
|
||||
ARCHIVE_FILE_GENERATION setting, since the original format cannot be
|
||||
shown to the user.
|
||||
"""
|
||||
...
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Core parsing interface
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def parse(
|
||||
self,
|
||||
document_path: Path,
|
||||
mime_type: str,
|
||||
*,
|
||||
produce_archive: bool = True,
|
||||
) -> None:
|
||||
"""Parse document_path and populate internal state.
|
||||
|
||||
After a successful call, callers retrieve results via get_text,
|
||||
get_date, and get_archive_path.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
document_path:
|
||||
Absolute path to the document file to parse.
|
||||
mime_type:
|
||||
Detected MIME type of the document.
|
||||
produce_archive:
|
||||
When True (the default) and can_produce_archive is also True,
|
||||
the parser should produce a searchable PDF at the path returned
|
||||
by get_archive_path. Pass False when only text extraction and
|
||||
thumbnail generation are required and disk I/O should be minimised.
|
||||
|
||||
Raises
|
||||
------
|
||||
documents.parsers.ParseError
|
||||
If parsing fails for any reason.
|
||||
"""
|
||||
...
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Result accessors
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def get_text(self) -> str | None:
|
||||
"""Return the plain-text content extracted during parse.
|
||||
|
||||
Returns
|
||||
-------
|
||||
str | None
|
||||
Extracted text, or None if no text could be found.
|
||||
"""
|
||||
...
|
||||
|
||||
def get_date(self) -> datetime.datetime | None:
|
||||
"""Return the document date detected during parse.
|
||||
|
||||
Returns
|
||||
-------
|
||||
datetime.datetime | None
|
||||
Detected document date, or None if no date was found.
|
||||
"""
|
||||
...
|
||||
|
||||
def get_archive_path(self) -> Path | None:
|
||||
"""Return the path to the generated archive PDF, or None.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Path | None
|
||||
Path to the searchable PDF archive, or None if no archive was
|
||||
produced (e.g. because produce_archive=False or the parser does
|
||||
not support archive generation).
|
||||
"""
|
||||
...
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Thumbnail and metadata
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def get_thumbnail(self, document_path: Path, mime_type: str) -> Path:
|
||||
"""Generate and return the path to a thumbnail image for the document.
|
||||
|
||||
May be called independently of parse. The returned path must point to
|
||||
an existing WebP image file inside the parser's temporary working
|
||||
directory.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
document_path:
|
||||
Absolute path to the source document.
|
||||
mime_type:
|
||||
Detected MIME type of the document.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Path
|
||||
Path to the generated thumbnail image (WebP format preferred).
|
||||
"""
|
||||
...
|
||||
|
||||
def get_page_count(
|
||||
self,
|
||||
document_path: Path,
|
||||
mime_type: str,
|
||||
) -> int | None:
|
||||
"""Return the number of pages in the document, if determinable.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
document_path:
|
||||
Absolute path to the source document.
|
||||
mime_type:
|
||||
Detected MIME type of the document.
|
||||
|
||||
Returns
|
||||
-------
|
||||
int | None
|
||||
Page count, or None if the parser cannot determine it.
|
||||
"""
|
||||
...
|
||||
|
||||
def extract_metadata(
|
||||
self,
|
||||
document_path: Path,
|
||||
mime_type: str,
|
||||
) -> list[MetadataEntry]:
|
||||
"""Extract format-specific metadata from the document.
|
||||
|
||||
Called by the API view layer on demand — not during the consumption
|
||||
pipeline. Results are returned to the frontend for per-file display.
|
||||
|
||||
For documents with an archive version, this method is called twice:
|
||||
once for the original file (with its native MIME type) and once for
|
||||
the archive file (with ``"application/pdf"``). Parsers that produce
|
||||
archives should handle both cases.
|
||||
|
||||
Implementations must not raise. A failure to read metadata is not
|
||||
fatal — log a warning and return whatever partial results were
|
||||
collected, or ``[]`` if none.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
document_path:
|
||||
Absolute path to the file to extract metadata from.
|
||||
mime_type:
|
||||
MIME type of the file at ``document_path``. May be
|
||||
``"application/pdf"`` when called for the archive version.
|
||||
|
||||
Returns
|
||||
-------
|
||||
list[MetadataEntry]
|
||||
Zero or more metadata entries. Returns ``[]`` if no metadata
|
||||
could be extracted or the format does not support it.
|
||||
"""
|
||||
...
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Context manager
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def __enter__(self) -> Self:
|
||||
"""Enter the parser context, returning the parser instance.
|
||||
|
||||
Implementations should perform any resource allocation here if not
|
||||
done in __init__ (e.g. creating API clients or temp directories).
|
||||
|
||||
Returns
|
||||
-------
|
||||
Self
|
||||
The parser instance itself.
|
||||
"""
|
||||
...
|
||||
|
||||
def __exit__(
|
||||
self,
|
||||
exc_type: type[BaseException] | None,
|
||||
exc_val: BaseException | None,
|
||||
exc_tb: TracebackType | None,
|
||||
) -> None:
|
||||
"""Exit the parser context and release all resources.
|
||||
|
||||
Implementations must clean up all temporary files and other resources
|
||||
regardless of whether an exception occurred.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
exc_type:
|
||||
The exception class, or None if no exception was raised.
|
||||
exc_val:
|
||||
The exception instance, or None.
|
||||
exc_tb:
|
||||
The traceback, or None.
|
||||
"""
|
||||
...
|
||||
@@ -1,364 +0,0 @@
|
||||
"""
|
||||
Singleton registry that tracks all document parsers available to
|
||||
Paperless-ngx — both built-ins shipped with the application and third-party
|
||||
plugins installed via Python entrypoints.
|
||||
|
||||
Public surface
|
||||
--------------
|
||||
get_parser_registry
|
||||
Lazy-initialise and return the shared ParserRegistry. This is the primary
|
||||
entry point for production code.
|
||||
|
||||
init_builtin_parsers
|
||||
Register built-in parsers only, without entrypoint discovery. Safe to
|
||||
call from Celery worker_process_init where importing all entrypoints
|
||||
would be wasteful or cause side effects.
|
||||
|
||||
reset_parser_registry
|
||||
Reset module-level state. For tests only.
|
||||
|
||||
Entrypoint group
|
||||
----------------
|
||||
Third-party parsers must advertise themselves under the
|
||||
"paperless_ngx.parsers" entrypoint group in their pyproject.toml::
|
||||
|
||||
[project.entry-points."paperless_ngx.parsers"]
|
||||
my_parser = "my_package.parsers:MyParser"
|
||||
|
||||
The loaded class must expose the following attributes at the class level
|
||||
(not just on instances) for the registry to accept it:
|
||||
name, version, author, url, supported_mime_types (callable), score (callable).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from importlib.metadata import entry_points
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pathlib import Path
|
||||
|
||||
from paperless.parsers import ParserProtocol
|
||||
|
||||
logger = logging.getLogger("paperless.parsers.registry")
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Module-level singleton state
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_registry: ParserRegistry | None = None
|
||||
_discovery_complete: bool = False
|
||||
|
||||
# Attribute names that every registered external parser class must expose.
|
||||
_REQUIRED_ATTRS: tuple[str, ...] = (
|
||||
"name",
|
||||
"version",
|
||||
"author",
|
||||
"url",
|
||||
"supported_mime_types",
|
||||
"score",
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Module-level accessor functions
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def get_parser_registry() -> ParserRegistry:
|
||||
"""Return the shared ParserRegistry instance.
|
||||
|
||||
On the first call this function:
|
||||
|
||||
1. Creates a new ParserRegistry.
|
||||
2. Calls register_defaults to install built-in parsers.
|
||||
3. Calls discover to load third-party plugins via importlib.metadata entrypoints.
|
||||
4. Calls log_summary to emit a startup summary.
|
||||
|
||||
Subsequent calls return the same instance immediately.
|
||||
|
||||
Returns
|
||||
-------
|
||||
ParserRegistry
|
||||
The shared registry singleton.
|
||||
"""
|
||||
global _registry, _discovery_complete
|
||||
|
||||
if _registry is None:
|
||||
_registry = ParserRegistry()
|
||||
_registry.register_defaults()
|
||||
|
||||
if not _discovery_complete:
|
||||
_registry.discover()
|
||||
_registry.log_summary()
|
||||
_discovery_complete = True
|
||||
|
||||
return _registry
|
||||
|
||||
|
||||
def init_builtin_parsers() -> None:
|
||||
"""Register built-in parsers without performing entrypoint discovery.
|
||||
|
||||
Intended for use in Celery worker_process_init handlers where importing
|
||||
all installed entrypoints would be wasteful, slow, or could produce
|
||||
undesirable side effects. Entrypoint discovery (third-party plugins) is
|
||||
deliberately not performed.
|
||||
|
||||
Safe to call multiple times — subsequent calls are no-ops.
|
||||
|
||||
Returns
|
||||
-------
|
||||
None
|
||||
"""
|
||||
global _registry
|
||||
|
||||
if _registry is None:
|
||||
_registry = ParserRegistry()
|
||||
_registry.register_defaults()
|
||||
|
||||
|
||||
def reset_parser_registry() -> None:
|
||||
"""Reset the module-level registry state to its initial values.
|
||||
|
||||
Resets _registry and _discovery_complete so the next call to
|
||||
get_parser_registry will re-initialise everything from scratch.
|
||||
|
||||
FOR TESTS ONLY. Do not call this in production code — resetting the
|
||||
registry mid-request causes all subsequent parser lookups to go through
|
||||
discovery again, which is expensive and may have unexpected side effects
|
||||
in multi-threaded environments.
|
||||
|
||||
Returns
|
||||
-------
|
||||
None
|
||||
"""
|
||||
global _registry, _discovery_complete
|
||||
|
||||
_registry = None
|
||||
_discovery_complete = False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Registry class
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class ParserRegistry:
|
||||
"""Registry that maps MIME types to the best available parser class.
|
||||
|
||||
Parsers are partitioned into two lists:
|
||||
|
||||
_builtins
|
||||
Parser classes registered via register_builtin (populated by
|
||||
register_defaults in Phase 3+).
|
||||
|
||||
_external
|
||||
Parser classes loaded from installed Python entrypoints via discover.
|
||||
|
||||
When resolving a parser for a file, external parsers are evaluated
|
||||
alongside built-in parsers using a uniform scoring mechanism. Both lists
|
||||
are iterated together; the class with the highest score wins. If an
|
||||
external parser wins, its attribution details are logged so users can
|
||||
identify which third-party package handled their document.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._external: list[type[ParserProtocol]] = []
|
||||
self._builtins: list[type[ParserProtocol]] = []
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Registration
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def register_builtin(self, parser_class: type[ParserProtocol]) -> None:
|
||||
"""Register a built-in parser class.
|
||||
|
||||
Built-in parsers are shipped with Paperless-ngx and are appended to
|
||||
the _builtins list. They are never overridden by external parsers;
|
||||
instead, scoring determines which parser wins for any given file.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
parser_class:
|
||||
The parser class to register. Must satisfy ParserProtocol.
|
||||
"""
|
||||
self._builtins.append(parser_class)
|
||||
|
||||
def register_defaults(self) -> None:
|
||||
"""Register the built-in parsers that ship with Paperless-ngx.
|
||||
|
||||
Each parser that has been migrated to the new ParserProtocol interface
|
||||
is registered here. Parsers are added in ascending weight order so
|
||||
that log output is predictable; scoring determines which parser wins
|
||||
at runtime regardless of registration order.
|
||||
"""
|
||||
from paperless.parsers.text import TextDocumentParser
|
||||
|
||||
self.register_builtin(TextDocumentParser)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Discovery
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def discover(self) -> None:
|
||||
"""Load third-party parsers from the "paperless_ngx.parsers" entrypoint group.
|
||||
|
||||
For each advertised entrypoint the method:
|
||||
|
||||
1. Calls ep.load() to import the class.
|
||||
2. Validates that the class exposes all required attributes.
|
||||
3. On success, appends the class to _external and logs an info message.
|
||||
4. On failure (import error or missing attributes), logs an appropriate
|
||||
warning/error and continues to the next entrypoint.
|
||||
|
||||
Errors during discovery of a single parser do not prevent other parsers
|
||||
from being loaded.
|
||||
|
||||
Returns
|
||||
-------
|
||||
None
|
||||
"""
|
||||
eps = entry_points(group="paperless_ngx.parsers")
|
||||
|
||||
for ep in eps:
|
||||
try:
|
||||
parser_class = ep.load()
|
||||
except Exception:
|
||||
logger.exception(
|
||||
"Failed to load parser entrypoint '%s' — skipping.",
|
||||
ep.name,
|
||||
)
|
||||
continue
|
||||
|
||||
missing = [
|
||||
attr for attr in _REQUIRED_ATTRS if not hasattr(parser_class, attr)
|
||||
]
|
||||
if missing:
|
||||
logger.warning(
|
||||
"Parser loaded from entrypoint '%s' is missing required "
|
||||
"attributes %r — skipping.",
|
||||
ep.name,
|
||||
missing,
|
||||
)
|
||||
continue
|
||||
|
||||
self._external.append(parser_class)
|
||||
logger.info(
|
||||
"Loaded third-party parser '%s' v%s by %s (entrypoint: '%s').",
|
||||
parser_class.name,
|
||||
parser_class.version,
|
||||
parser_class.author,
|
||||
ep.name,
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Summary logging
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def log_summary(self) -> None:
|
||||
"""Log a startup summary of all registered parsers.
|
||||
|
||||
Built-in parsers are listed first, followed by any external parsers
|
||||
discovered from entrypoints. If no external parsers were found a
|
||||
short informational message is logged instead of an empty list.
|
||||
|
||||
Returns
|
||||
-------
|
||||
None
|
||||
"""
|
||||
logger.info(
|
||||
"Built-in parsers (%d):",
|
||||
len(self._builtins),
|
||||
)
|
||||
for cls in self._builtins:
|
||||
logger.info(
|
||||
" [built-in] %s v%s — %s",
|
||||
getattr(cls, "name", repr(cls)),
|
||||
getattr(cls, "version", "unknown"),
|
||||
getattr(cls, "url", "built-in"),
|
||||
)
|
||||
|
||||
if not self._external:
|
||||
logger.info("No third-party parsers discovered.")
|
||||
return
|
||||
|
||||
logger.info(
|
||||
"Third-party parsers (%d):",
|
||||
len(self._external),
|
||||
)
|
||||
for cls in self._external:
|
||||
logger.info(
|
||||
" [external] %s v%s by %s — report issues at %s",
|
||||
getattr(cls, "name", repr(cls)),
|
||||
getattr(cls, "version", "unknown"),
|
||||
getattr(cls, "author", "unknown"),
|
||||
getattr(cls, "url", "unknown"),
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Parser resolution
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def get_parser_for_file(
|
||||
self,
|
||||
mime_type: str,
|
||||
filename: str,
|
||||
path: Path | None = None,
|
||||
) -> type[ParserProtocol] | None:
|
||||
"""Return the best parser class for the given file, or None.
|
||||
|
||||
All registered parsers (external first, then built-ins) are evaluated
|
||||
against the file. A parser is eligible if mime_type appears in the dict
|
||||
returned by its supported_mime_types classmethod, and its score
|
||||
classmethod returns a non-None integer.
|
||||
|
||||
The parser with the highest score wins. When two parsers return the
|
||||
same score, the one that appears earlier in the evaluation order wins
|
||||
(external parsers are evaluated before built-ins, giving third-party
|
||||
packages a chance to override defaults at equal priority).
|
||||
|
||||
When an external parser is selected, its identity is logged at INFO
|
||||
level so operators can trace which package handled a document.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
mime_type:
|
||||
The detected MIME type of the file.
|
||||
filename:
|
||||
The original filename, including extension.
|
||||
path:
|
||||
Optional filesystem path to the file. Forwarded to each
|
||||
parser's score method.
|
||||
|
||||
Returns
|
||||
-------
|
||||
type[ParserProtocol] | None
|
||||
The winning parser class, or None if no parser can handle the file.
|
||||
"""
|
||||
best_score: int | None = None
|
||||
best_parser: type[ParserProtocol] | None = None
|
||||
|
||||
# External parsers are placed first so that, at equal scores, an
|
||||
# external parser wins over a built-in (first-seen policy).
|
||||
for parser_class in (*self._external, *self._builtins):
|
||||
if mime_type not in parser_class.supported_mime_types():
|
||||
continue
|
||||
|
||||
score = parser_class.score(mime_type, filename, path)
|
||||
if score is None:
|
||||
continue
|
||||
|
||||
if best_score is None or score > best_score:
|
||||
best_score = score
|
||||
best_parser = parser_class
|
||||
|
||||
if best_parser is not None and best_parser in self._external:
|
||||
logger.info(
|
||||
"Document handled by third-party parser '%s' v%s — %s",
|
||||
getattr(best_parser, "name", repr(best_parser)),
|
||||
getattr(best_parser, "version", "unknown"),
|
||||
getattr(best_parser, "url", "unknown"),
|
||||
)
|
||||
|
||||
return best_parser
|
||||
@@ -1,320 +0,0 @@
|
||||
"""
|
||||
Built-in plain-text document parser.
|
||||
|
||||
Handles text/plain, text/csv, and application/csv MIME types by reading the
|
||||
file content directly. Thumbnails are generated by rendering a page-sized
|
||||
WebP image from the first 100,000 characters using Pillow.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import shutil
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
from typing import Self
|
||||
|
||||
from django.conf import settings
|
||||
from PIL import Image
|
||||
from PIL import ImageDraw
|
||||
from PIL import ImageFont
|
||||
|
||||
from paperless.version import __full_version_str__
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import datetime
|
||||
from types import TracebackType
|
||||
|
||||
from paperless.parsers import MetadataEntry
|
||||
|
||||
logger = logging.getLogger("paperless.parsing.text")
|
||||
|
||||
_SUPPORTED_MIME_TYPES: dict[str, str] = {
|
||||
"text/plain": ".txt",
|
||||
"text/csv": ".csv",
|
||||
"application/csv": ".csv",
|
||||
}
|
||||
|
||||
|
||||
class TextDocumentParser:
|
||||
"""Parse plain-text documents (txt, csv) for Paperless-ngx.
|
||||
|
||||
This parser reads the file content directly as UTF-8 text and renders a
|
||||
simple thumbnail using Pillow. It does not perform OCR and does not
|
||||
produce a searchable PDF archive copy.
|
||||
|
||||
Class attributes
|
||||
----------------
|
||||
name : str
|
||||
Human-readable parser name.
|
||||
version : str
|
||||
Semantic version string, kept in sync with Paperless-ngx releases.
|
||||
author : str
|
||||
Maintainer name.
|
||||
url : str
|
||||
Issue tracker / source URL.
|
||||
"""
|
||||
|
||||
name: str = "Paperless-ngx Text Parser"
|
||||
version: str = __full_version_str__
|
||||
author: str = "Paperless-ngx Contributors"
|
||||
url: str = "https://github.com/paperless-ngx/paperless-ngx"
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Class methods
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@classmethod
|
||||
def supported_mime_types(cls) -> dict[str, str]:
|
||||
"""Return the MIME types this parser handles.
|
||||
|
||||
Returns
|
||||
-------
|
||||
dict[str, str]
|
||||
Mapping of MIME type to preferred file extension.
|
||||
"""
|
||||
return _SUPPORTED_MIME_TYPES
|
||||
|
||||
@classmethod
|
||||
def score(
|
||||
cls,
|
||||
mime_type: str,
|
||||
filename: str,
|
||||
path: Path | None = None,
|
||||
) -> int | None:
|
||||
"""Return the priority score for handling this file.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
mime_type:
|
||||
Detected MIME type of the file.
|
||||
filename:
|
||||
Original filename including extension.
|
||||
path:
|
||||
Optional filesystem path. Not inspected by this parser.
|
||||
|
||||
Returns
|
||||
-------
|
||||
int | None
|
||||
10 if the MIME type is supported, otherwise None.
|
||||
"""
|
||||
if mime_type in _SUPPORTED_MIME_TYPES:
|
||||
return 10
|
||||
return None
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Properties
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@property
|
||||
def can_produce_archive(self) -> bool:
|
||||
"""Whether this parser can produce a searchable PDF archive copy.
|
||||
|
||||
Returns
|
||||
-------
|
||||
bool
|
||||
Always False — the text parser does not produce a PDF archive.
|
||||
"""
|
||||
return False
|
||||
|
||||
@property
|
||||
def requires_pdf_rendition(self) -> bool:
|
||||
"""Whether the parser must produce a PDF for the frontend to display.
|
||||
|
||||
Returns
|
||||
-------
|
||||
bool
|
||||
Always False — plain text files are displayable as-is.
|
||||
"""
|
||||
return False
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Lifecycle
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def __init__(self, logging_group: object = None) -> None:
|
||||
settings.SCRATCH_DIR.mkdir(parents=True, exist_ok=True)
|
||||
self._tempdir = Path(
|
||||
tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR),
|
||||
)
|
||||
self._text: str | None = None
|
||||
|
||||
def __enter__(self) -> Self:
|
||||
return self
|
||||
|
||||
def __exit__(
|
||||
self,
|
||||
exc_type: type[BaseException] | None,
|
||||
exc_val: BaseException | None,
|
||||
exc_tb: TracebackType | None,
|
||||
) -> None:
|
||||
logger.debug("Cleaning up temporary directory %s", self._tempdir)
|
||||
shutil.rmtree(self._tempdir, ignore_errors=True)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Core parsing interface
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def parse(
|
||||
self,
|
||||
document_path: Path,
|
||||
mime_type: str,
|
||||
*,
|
||||
produce_archive: bool = True,
|
||||
) -> None:
|
||||
"""Read the document and store its text content.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
document_path:
|
||||
Absolute path to the text file.
|
||||
mime_type:
|
||||
Detected MIME type of the document.
|
||||
produce_archive:
|
||||
Ignored — this parser never produces a PDF archive.
|
||||
|
||||
Raises
|
||||
------
|
||||
documents.parsers.ParseError
|
||||
If the file cannot be read.
|
||||
"""
|
||||
self._text = self._read_text(document_path)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Result accessors
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def get_text(self) -> str | None:
|
||||
"""Return the plain-text content extracted during parse.
|
||||
|
||||
Returns
|
||||
-------
|
||||
str | None
|
||||
Extracted text, or None if parse has not been called yet.
|
||||
"""
|
||||
return self._text
|
||||
|
||||
def get_date(self) -> datetime.datetime | None:
|
||||
"""Return the document date detected during parse.
|
||||
|
||||
Returns
|
||||
-------
|
||||
datetime.datetime | None
|
||||
Always None — the text parser does not detect dates.
|
||||
"""
|
||||
return None
|
||||
|
||||
def get_archive_path(self) -> Path | None:
|
||||
"""Return the path to a generated archive PDF, or None.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Path | None
|
||||
Always None — the text parser does not produce a PDF archive.
|
||||
"""
|
||||
return None
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Thumbnail and metadata
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def get_thumbnail(self, document_path: Path, mime_type: str) -> Path:
|
||||
"""Render the first portion of the document as a WebP thumbnail.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
document_path:
|
||||
Absolute path to the source document.
|
||||
mime_type:
|
||||
Detected MIME type of the document.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Path
|
||||
Path to the generated WebP thumbnail inside the temporary directory.
|
||||
"""
|
||||
max_chars = 100_000
|
||||
file_size_limit = 50 * 1024 * 1024
|
||||
|
||||
if document_path.stat().st_size > file_size_limit:
|
||||
text = "[File too large to preview]"
|
||||
else:
|
||||
with Path(document_path).open("r", encoding="utf-8", errors="replace") as f:
|
||||
text = f.read(max_chars)
|
||||
|
||||
img = Image.new("RGB", (500, 700), color="white")
|
||||
draw = ImageDraw.Draw(img)
|
||||
font = ImageFont.truetype(
|
||||
font=settings.THUMBNAIL_FONT_NAME,
|
||||
size=20,
|
||||
layout_engine=ImageFont.Layout.BASIC,
|
||||
)
|
||||
draw.multiline_text((5, 5), text, font=font, fill="black", spacing=4)
|
||||
|
||||
out_path = self._tempdir / "thumb.webp"
|
||||
img.save(out_path, format="WEBP")
|
||||
|
||||
return out_path
|
||||
|
||||
def get_page_count(
|
||||
self,
|
||||
document_path: Path,
|
||||
mime_type: str,
|
||||
) -> int | None:
|
||||
"""Return the number of pages in the document.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
document_path:
|
||||
Absolute path to the source document.
|
||||
mime_type:
|
||||
Detected MIME type of the document.
|
||||
|
||||
Returns
|
||||
-------
|
||||
int | None
|
||||
Always None — page count is not meaningful for plain text.
|
||||
"""
|
||||
return None
|
||||
|
||||
def extract_metadata(
|
||||
self,
|
||||
document_path: Path,
|
||||
mime_type: str,
|
||||
) -> list[MetadataEntry]:
|
||||
"""Extract format-specific metadata from the document.
|
||||
|
||||
Returns
|
||||
-------
|
||||
list[MetadataEntry]
|
||||
Always ``[]`` — plain text files carry no structured metadata.
|
||||
"""
|
||||
return []
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Private helpers
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _read_text(self, filepath: Path) -> str:
|
||||
"""Read file content, replacing invalid UTF-8 bytes rather than failing.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
filepath:
|
||||
Path to the file to read.
|
||||
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
File content as a string.
|
||||
"""
|
||||
try:
|
||||
return filepath.read_text(encoding="utf-8")
|
||||
except UnicodeDecodeError as exc:
|
||||
logger.warning(
|
||||
"Unicode error reading %s, replacing bad bytes: %s",
|
||||
filepath,
|
||||
exc,
|
||||
)
|
||||
return filepath.read_bytes().decode("utf-8", errors="replace")
|
||||
@@ -1,48 +0,0 @@
|
||||
"""
|
||||
Fixtures defined here are available to every test module under
|
||||
src/paperless/tests/ (including sub-packages such as parsers/).
|
||||
|
||||
Session-scoped fixtures for the shared samples directory live here so
|
||||
sub-package conftest files can reference them without duplicating path logic.
|
||||
Parser-specific fixtures (concrete parser instances, format-specific sample
|
||||
files) live in paperless/tests/parsers/conftest.py.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import pytest
|
||||
|
||||
from paperless.parsers.registry import reset_parser_registry
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Generator
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def samples_dir() -> Path:
|
||||
"""Absolute path to the shared parser sample files directory.
|
||||
|
||||
Sub-package conftest files derive format-specific paths from this root,
|
||||
e.g. ``samples_dir / "text" / "test.txt"``.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Path
|
||||
Directory containing all sample documents used by parser tests.
|
||||
"""
|
||||
return (Path(__file__).parent / "samples").resolve()
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def clean_registry() -> Generator[None, None, None]:
|
||||
"""Reset the parser registry before and after every test.
|
||||
|
||||
This prevents registry state from leaking between tests that call
|
||||
get_parser_registry() or init_builtin_parsers().
|
||||
"""
|
||||
reset_parser_registry()
|
||||
yield
|
||||
reset_parser_registry()
|
||||
@@ -1,76 +0,0 @@
|
||||
"""
|
||||
Parser fixtures that are used across multiple test modules in this package
|
||||
are defined here. Format-specific sample-file fixtures are grouped by parser
|
||||
so it is easy to see which files belong to which test module.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import pytest
|
||||
|
||||
from paperless.parsers.text import TextDocumentParser
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Generator
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Text parser sample files
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def text_samples_dir(samples_dir: Path) -> Path:
|
||||
"""Absolute path to the text parser sample files directory.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Path
|
||||
``<samples_dir>/text/``
|
||||
"""
|
||||
return samples_dir / "text"
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def sample_txt_file(text_samples_dir: Path) -> Path:
|
||||
"""Path to a valid UTF-8 plain-text sample file.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Path
|
||||
Absolute path to ``text/test.txt``.
|
||||
"""
|
||||
return text_samples_dir / "test.txt"
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def malformed_txt_file(text_samples_dir: Path) -> Path:
|
||||
"""Path to a text file containing invalid UTF-8 bytes.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Path
|
||||
Absolute path to ``text/decode_error.txt``.
|
||||
"""
|
||||
return text_samples_dir / "decode_error.txt"
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Text parser instance
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def text_parser() -> Generator[TextDocumentParser, None, None]:
|
||||
"""Yield a TextDocumentParser and clean up its temporary directory afterwards.
|
||||
|
||||
Yields
|
||||
------
|
||||
TextDocumentParser
|
||||
A ready-to-use parser instance.
|
||||
"""
|
||||
with TextDocumentParser() as parser:
|
||||
yield parser
|
||||
@@ -1,256 +0,0 @@
|
||||
"""
|
||||
Tests for paperless.parsers.text.TextDocumentParser.
|
||||
|
||||
All tests use the context-manager protocol for parser lifecycle. Sample
|
||||
files are provided by session-scoped fixtures defined in conftest.py.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from paperless.parsers import ParserProtocol
|
||||
from paperless.parsers.text import TextDocumentParser
|
||||
|
||||
|
||||
class TestTextParserProtocol:
|
||||
"""Verify that TextDocumentParser satisfies the ParserProtocol contract."""
|
||||
|
||||
def test_isinstance_satisfies_protocol(
|
||||
self,
|
||||
text_parser: TextDocumentParser,
|
||||
) -> None:
|
||||
assert isinstance(text_parser, ParserProtocol)
|
||||
|
||||
def test_class_attributes_present(self) -> None:
|
||||
assert isinstance(TextDocumentParser.name, str) and TextDocumentParser.name
|
||||
assert (
|
||||
isinstance(TextDocumentParser.version, str) and TextDocumentParser.version
|
||||
)
|
||||
assert isinstance(TextDocumentParser.author, str) and TextDocumentParser.author
|
||||
assert isinstance(TextDocumentParser.url, str) and TextDocumentParser.url
|
||||
|
||||
def test_supported_mime_types_returns_dict(self) -> None:
|
||||
mime_types = TextDocumentParser.supported_mime_types()
|
||||
assert isinstance(mime_types, dict)
|
||||
assert "text/plain" in mime_types
|
||||
assert "text/csv" in mime_types
|
||||
assert "application/csv" in mime_types
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("mime_type", "expected"),
|
||||
[
|
||||
("text/plain", 10),
|
||||
("text/csv", 10),
|
||||
("application/csv", 10),
|
||||
("application/pdf", None),
|
||||
("image/png", None),
|
||||
],
|
||||
)
|
||||
def test_score(self, mime_type: str, expected: int | None) -> None:
|
||||
assert TextDocumentParser.score(mime_type, "file.txt") == expected
|
||||
|
||||
def test_can_produce_archive_is_false(
|
||||
self,
|
||||
text_parser: TextDocumentParser,
|
||||
) -> None:
|
||||
assert text_parser.can_produce_archive is False
|
||||
|
||||
def test_requires_pdf_rendition_is_false(
|
||||
self,
|
||||
text_parser: TextDocumentParser,
|
||||
) -> None:
|
||||
assert text_parser.requires_pdf_rendition is False
|
||||
|
||||
|
||||
class TestTextParserLifecycle:
|
||||
"""Verify context-manager behaviour and temporary directory cleanup."""
|
||||
|
||||
def test_context_manager_cleans_up_tempdir(self) -> None:
|
||||
with TextDocumentParser() as parser:
|
||||
tempdir = parser._tempdir
|
||||
assert tempdir.exists()
|
||||
assert not tempdir.exists()
|
||||
|
||||
def test_context_manager_cleans_up_after_exception(self) -> None:
|
||||
tempdir: Path | None = None
|
||||
with pytest.raises(RuntimeError):
|
||||
with TextDocumentParser() as parser:
|
||||
tempdir = parser._tempdir
|
||||
raise RuntimeError("boom")
|
||||
assert tempdir is not None
|
||||
assert not tempdir.exists()
|
||||
|
||||
|
||||
class TestTextParserParse:
|
||||
"""Verify parse() and the result accessors."""
|
||||
|
||||
def test_parse_valid_utf8(
|
||||
self,
|
||||
text_parser: TextDocumentParser,
|
||||
sample_txt_file: Path,
|
||||
) -> None:
|
||||
text_parser.parse(sample_txt_file, "text/plain")
|
||||
|
||||
assert text_parser.get_text() == "This is a test file.\n"
|
||||
|
||||
def test_parse_returns_none_for_archive_path(
|
||||
self,
|
||||
text_parser: TextDocumentParser,
|
||||
sample_txt_file: Path,
|
||||
) -> None:
|
||||
text_parser.parse(sample_txt_file, "text/plain")
|
||||
|
||||
assert text_parser.get_archive_path() is None
|
||||
|
||||
def test_parse_returns_none_for_date(
|
||||
self,
|
||||
text_parser: TextDocumentParser,
|
||||
sample_txt_file: Path,
|
||||
) -> None:
|
||||
text_parser.parse(sample_txt_file, "text/plain")
|
||||
|
||||
assert text_parser.get_date() is None
|
||||
|
||||
def test_parse_invalid_utf8_bytes_replaced(
|
||||
self,
|
||||
text_parser: TextDocumentParser,
|
||||
malformed_txt_file: Path,
|
||||
) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- A text file containing invalid UTF-8 byte sequences
|
||||
WHEN:
|
||||
- The file is parsed
|
||||
THEN:
|
||||
- Parsing succeeds
|
||||
- Invalid bytes are replaced with the Unicode replacement character
|
||||
"""
|
||||
text_parser.parse(malformed_txt_file, "text/plain")
|
||||
|
||||
assert text_parser.get_text() == "Pantothens\ufffdure\n"
|
||||
|
||||
def test_get_text_none_before_parse(
|
||||
self,
|
||||
text_parser: TextDocumentParser,
|
||||
) -> None:
|
||||
assert text_parser.get_text() is None
|
||||
|
||||
|
||||
class TestTextParserThumbnail:
|
||||
"""Verify thumbnail generation."""
|
||||
|
||||
def test_thumbnail_exists_and_is_file(
|
||||
self,
|
||||
text_parser: TextDocumentParser,
|
||||
sample_txt_file: Path,
|
||||
) -> None:
|
||||
thumb = text_parser.get_thumbnail(sample_txt_file, "text/plain")
|
||||
|
||||
assert thumb.exists()
|
||||
assert thumb.is_file()
|
||||
|
||||
def test_thumbnail_large_file_does_not_read_all(
|
||||
self,
|
||||
text_parser: TextDocumentParser,
|
||||
) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- A text file larger than 50 MB
|
||||
WHEN:
|
||||
- A thumbnail is requested
|
||||
THEN:
|
||||
- The thumbnail is generated without loading the full file
|
||||
"""
|
||||
with tempfile.NamedTemporaryFile(
|
||||
delete=False,
|
||||
mode="w",
|
||||
encoding="utf-8",
|
||||
suffix=".txt",
|
||||
) as tmp:
|
||||
tmp.write("A" * (51 * 1024 * 1024))
|
||||
large_file = Path(tmp.name)
|
||||
|
||||
try:
|
||||
thumb = text_parser.get_thumbnail(large_file, "text/plain")
|
||||
assert thumb.exists()
|
||||
assert thumb.is_file()
|
||||
finally:
|
||||
large_file.unlink(missing_ok=True)
|
||||
|
||||
def test_get_page_count_returns_none(
|
||||
self,
|
||||
text_parser: TextDocumentParser,
|
||||
sample_txt_file: Path,
|
||||
) -> None:
|
||||
assert text_parser.get_page_count(sample_txt_file, "text/plain") is None
|
||||
|
||||
|
||||
class TestTextParserMetadata:
|
||||
"""Verify extract_metadata behaviour."""
|
||||
|
||||
def test_extract_metadata_returns_empty_list(
|
||||
self,
|
||||
text_parser: TextDocumentParser,
|
||||
sample_txt_file: Path,
|
||||
) -> None:
|
||||
result = text_parser.extract_metadata(sample_txt_file, "text/plain")
|
||||
|
||||
assert result == []
|
||||
|
||||
def test_extract_metadata_returns_list_type(
|
||||
self,
|
||||
text_parser: TextDocumentParser,
|
||||
sample_txt_file: Path,
|
||||
) -> None:
|
||||
result = text_parser.extract_metadata(sample_txt_file, "text/plain")
|
||||
|
||||
assert isinstance(result, list)
|
||||
|
||||
def test_extract_metadata_ignores_mime_type(
|
||||
self,
|
||||
text_parser: TextDocumentParser,
|
||||
sample_txt_file: Path,
|
||||
) -> None:
|
||||
"""extract_metadata returns [] regardless of the mime_type argument."""
|
||||
assert text_parser.extract_metadata(sample_txt_file, "application/pdf") == []
|
||||
assert text_parser.extract_metadata(sample_txt_file, "text/csv") == []
|
||||
|
||||
|
||||
class TestTextParserRegistry:
|
||||
"""Verify that TextDocumentParser is registered by default."""
|
||||
|
||||
def test_registered_in_defaults(self) -> None:
|
||||
from paperless.parsers.registry import ParserRegistry
|
||||
|
||||
registry = ParserRegistry()
|
||||
registry.register_defaults()
|
||||
|
||||
assert TextDocumentParser in registry._builtins
|
||||
|
||||
def test_get_parser_for_text_plain(self) -> None:
|
||||
from paperless.parsers.registry import get_parser_registry
|
||||
|
||||
registry = get_parser_registry()
|
||||
parser_cls = registry.get_parser_for_file("text/plain", "doc.txt")
|
||||
|
||||
assert parser_cls is TextDocumentParser
|
||||
|
||||
def test_get_parser_for_text_csv(self) -> None:
|
||||
from paperless.parsers.registry import get_parser_registry
|
||||
|
||||
registry = get_parser_registry()
|
||||
parser_cls = registry.get_parser_for_file("text/csv", "data.csv")
|
||||
|
||||
assert parser_cls is TextDocumentParser
|
||||
|
||||
def test_get_parser_for_unknown_type_returns_none(self) -> None:
|
||||
from paperless.parsers.registry import get_parser_registry
|
||||
|
||||
registry = get_parser_registry()
|
||||
parser_cls = registry.get_parser_for_file("application/pdf", "doc.pdf")
|
||||
|
||||
assert parser_cls is None
|
||||
@@ -1,714 +0,0 @@
|
||||
"""
|
||||
Tests for :mod:`paperless.parsers` (ParserProtocol) and
|
||||
:mod:`paperless.parsers.registry` (ParserRegistry + module-level helpers).
|
||||
|
||||
All tests use pytest-style functions/classes — no unittest.TestCase.
|
||||
The ``clean_registry`` fixture ensures complete isolation between tests by
|
||||
resetting the module-level singleton before and after every test.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from importlib.metadata import EntryPoint
|
||||
from pathlib import Path
|
||||
from typing import Self
|
||||
from unittest.mock import MagicMock
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from paperless.parsers import ParserProtocol
|
||||
from paperless.parsers.registry import ParserRegistry
|
||||
from paperless.parsers.registry import get_parser_registry
|
||||
from paperless.parsers.registry import init_builtin_parsers
|
||||
from paperless.parsers.registry import reset_parser_registry
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def dummy_parser_cls() -> type:
|
||||
"""Return a class that fully satisfies :class:`ParserProtocol`.
|
||||
|
||||
GIVEN: A need to exercise registry and Protocol logic with a minimal
|
||||
but complete parser.
|
||||
WHEN: A test requests this fixture.
|
||||
THEN: A class with all required attributes and methods is returned.
|
||||
"""
|
||||
|
||||
class DummyParser:
|
||||
name = "dummy-parser"
|
||||
version = "0.1.0"
|
||||
author = "Test Author"
|
||||
url = "https://example.com/dummy-parser"
|
||||
|
||||
@classmethod
|
||||
def supported_mime_types(cls) -> dict[str, str]:
|
||||
return {"text/plain": ".txt"}
|
||||
|
||||
@classmethod
|
||||
def score(
|
||||
cls,
|
||||
mime_type: str,
|
||||
filename: str,
|
||||
path: Path | None = None,
|
||||
) -> int | None:
|
||||
return 10
|
||||
|
||||
@property
|
||||
def can_produce_archive(self) -> bool:
|
||||
return False
|
||||
|
||||
@property
|
||||
def requires_pdf_rendition(self) -> bool:
|
||||
return False
|
||||
|
||||
def parse(
|
||||
self,
|
||||
document_path: Path,
|
||||
mime_type: str,
|
||||
*,
|
||||
produce_archive: bool = True,
|
||||
) -> None:
|
||||
"""
|
||||
Required to exist, but doesn't need to do anything
|
||||
"""
|
||||
|
||||
def get_text(self) -> str | None:
|
||||
return None
|
||||
|
||||
def get_date(self) -> None:
|
||||
return None
|
||||
|
||||
def get_archive_path(self) -> Path | None:
|
||||
return None
|
||||
|
||||
def get_thumbnail(
|
||||
self,
|
||||
document_path: Path,
|
||||
mime_type: str,
|
||||
) -> Path:
|
||||
return Path("/tmp/thumbnail.webp")
|
||||
|
||||
def get_page_count(
|
||||
self,
|
||||
document_path: Path,
|
||||
mime_type: str,
|
||||
) -> int | None:
|
||||
return None
|
||||
|
||||
def extract_metadata(
|
||||
self,
|
||||
document_path: Path,
|
||||
mime_type: str,
|
||||
) -> list:
|
||||
return []
|
||||
|
||||
def __enter__(self) -> Self:
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb) -> None:
|
||||
"""
|
||||
Required to exist, but doesn't need to do anything
|
||||
"""
|
||||
|
||||
return DummyParser
|
||||
|
||||
|
||||
class TestParserProtocol:
|
||||
"""Verify runtime isinstance() checks against ParserProtocol."""
|
||||
|
||||
def test_compliant_class_instance_passes_isinstance(
|
||||
self,
|
||||
dummy_parser_cls: type,
|
||||
) -> None:
|
||||
"""
|
||||
GIVEN: A class that implements every method required by ParserProtocol.
|
||||
WHEN: isinstance() is called with the Protocol.
|
||||
THEN: The check passes (returns True).
|
||||
"""
|
||||
instance = dummy_parser_cls()
|
||||
assert isinstance(instance, ParserProtocol)
|
||||
|
||||
def test_non_compliant_class_instance_fails_isinstance(self) -> None:
|
||||
"""
|
||||
GIVEN: A plain class with no parser-related methods.
|
||||
WHEN: isinstance() is called with ParserProtocol.
|
||||
THEN: The check fails (returns False).
|
||||
"""
|
||||
|
||||
class Unrelated:
|
||||
pass
|
||||
|
||||
assert not isinstance(Unrelated(), ParserProtocol)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"missing_method",
|
||||
[
|
||||
pytest.param("parse", id="missing-parse"),
|
||||
pytest.param("get_text", id="missing-get_text"),
|
||||
pytest.param("get_thumbnail", id="missing-get_thumbnail"),
|
||||
pytest.param("__enter__", id="missing-__enter__"),
|
||||
pytest.param("__exit__", id="missing-__exit__"),
|
||||
],
|
||||
)
|
||||
def test_partial_compliant_fails_isinstance(
|
||||
self,
|
||||
dummy_parser_cls: type,
|
||||
missing_method: str,
|
||||
) -> None:
|
||||
"""
|
||||
GIVEN: A class that satisfies ParserProtocol except for one method.
|
||||
WHEN: isinstance() is called with ParserProtocol.
|
||||
THEN: The check fails because the Protocol is not fully satisfied.
|
||||
"""
|
||||
# Create a subclass and delete the specified method to break compliance.
|
||||
partial_cls = type(
|
||||
"PartialParser",
|
||||
(dummy_parser_cls,),
|
||||
{missing_method: None}, # Replace with None — not callable
|
||||
)
|
||||
assert not isinstance(partial_cls(), ParserProtocol)
|
||||
|
||||
|
||||
class TestRegistrySingleton:
|
||||
"""Verify the module-level singleton lifecycle functions."""
|
||||
|
||||
def test_get_parser_registry_returns_instance(self) -> None:
|
||||
"""
|
||||
GIVEN: No registry has been created yet.
|
||||
WHEN: get_parser_registry() is called.
|
||||
THEN: A ParserRegistry instance is returned.
|
||||
"""
|
||||
registry = get_parser_registry()
|
||||
assert isinstance(registry, ParserRegistry)
|
||||
|
||||
def test_get_parser_registry_same_instance_on_repeated_calls(self) -> None:
|
||||
"""
|
||||
GIVEN: A registry instance was created by a prior call.
|
||||
WHEN: get_parser_registry() is called a second time.
|
||||
THEN: The exact same object (identity) is returned.
|
||||
"""
|
||||
first = get_parser_registry()
|
||||
second = get_parser_registry()
|
||||
assert first is second
|
||||
|
||||
def test_reset_parser_registry_gives_fresh_instance(self) -> None:
|
||||
"""
|
||||
GIVEN: A registry instance already exists.
|
||||
WHEN: reset_parser_registry() is called and then get_parser_registry()
|
||||
is called again.
|
||||
THEN: A new, distinct registry instance is returned.
|
||||
"""
|
||||
first = get_parser_registry()
|
||||
reset_parser_registry()
|
||||
second = get_parser_registry()
|
||||
assert first is not second
|
||||
|
||||
def test_init_builtin_parsers_does_not_run_discover(
|
||||
self,
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
"""
|
||||
GIVEN: discover() would raise an exception if called.
|
||||
WHEN: init_builtin_parsers() is called.
|
||||
THEN: No exception is raised, confirming discover() was not invoked.
|
||||
"""
|
||||
|
||||
def exploding_discover(self) -> None:
|
||||
raise RuntimeError(
|
||||
"discover() must not be called from init_builtin_parsers",
|
||||
)
|
||||
|
||||
monkeypatch.setattr(ParserRegistry, "discover", exploding_discover)
|
||||
|
||||
# Should complete without raising.
|
||||
init_builtin_parsers()
|
||||
|
||||
def test_init_builtin_parsers_idempotent(self) -> None:
|
||||
"""
|
||||
GIVEN: init_builtin_parsers() has already been called once.
|
||||
WHEN: init_builtin_parsers() is called a second time.
|
||||
THEN: No error is raised and the same registry instance is reused.
|
||||
"""
|
||||
init_builtin_parsers()
|
||||
# Capture the registry created by the first call.
|
||||
import paperless.parsers.registry as reg_module
|
||||
|
||||
first_registry = reg_module._registry
|
||||
|
||||
init_builtin_parsers()
|
||||
|
||||
assert reg_module._registry is first_registry
|
||||
|
||||
|
||||
class TestParserRegistryGetParserForFile:
|
||||
"""Verify parser selection logic in get_parser_for_file()."""
|
||||
|
||||
def test_returns_none_when_no_parsers_registered(self) -> None:
|
||||
"""
|
||||
GIVEN: A registry with no parsers registered.
|
||||
WHEN: get_parser_for_file() is called for any MIME type.
|
||||
THEN: None is returned.
|
||||
"""
|
||||
registry = ParserRegistry()
|
||||
result = registry.get_parser_for_file("text/plain", "doc.txt")
|
||||
assert result is None
|
||||
|
||||
def test_returns_none_for_unsupported_mime_type(
|
||||
self,
|
||||
dummy_parser_cls: type,
|
||||
) -> None:
|
||||
"""
|
||||
GIVEN: A registry with a parser that supports only 'text/plain'.
|
||||
WHEN: get_parser_for_file() is called with 'application/pdf'.
|
||||
THEN: None is returned.
|
||||
"""
|
||||
registry = ParserRegistry()
|
||||
registry.register_builtin(dummy_parser_cls)
|
||||
result = registry.get_parser_for_file("application/pdf", "file.pdf")
|
||||
assert result is None
|
||||
|
||||
def test_returns_parser_for_supported_mime_type(
|
||||
self,
|
||||
dummy_parser_cls: type,
|
||||
) -> None:
|
||||
"""
|
||||
GIVEN: A registry with a parser registered for 'text/plain'.
|
||||
WHEN: get_parser_for_file() is called with 'text/plain'.
|
||||
THEN: The registered parser class is returned.
|
||||
"""
|
||||
registry = ParserRegistry()
|
||||
registry.register_builtin(dummy_parser_cls)
|
||||
result = registry.get_parser_for_file("text/plain", "readme.txt")
|
||||
assert result is dummy_parser_cls
|
||||
|
||||
def test_highest_score_wins(self) -> None:
|
||||
"""
|
||||
GIVEN: Two parsers both supporting 'text/plain' with scores 5 and 20.
|
||||
WHEN: get_parser_for_file() is called for 'text/plain'.
|
||||
THEN: The parser with score 20 is returned.
|
||||
"""
|
||||
|
||||
class LowScoreParser:
|
||||
name = "low"
|
||||
version = "1.0"
|
||||
author = "A"
|
||||
url = "https://example.com/low"
|
||||
|
||||
@classmethod
|
||||
def supported_mime_types(cls):
|
||||
return {"text/plain": ".txt"}
|
||||
|
||||
@classmethod
|
||||
def score(cls, mime_type, filename, path=None):
|
||||
return 5
|
||||
|
||||
class HighScoreParser:
|
||||
name = "high"
|
||||
version = "1.0"
|
||||
author = "B"
|
||||
url = "https://example.com/high"
|
||||
|
||||
@classmethod
|
||||
def supported_mime_types(cls):
|
||||
return {"text/plain": ".txt"}
|
||||
|
||||
@classmethod
|
||||
def score(cls, mime_type, filename, path=None):
|
||||
return 20
|
||||
|
||||
registry = ParserRegistry()
|
||||
registry.register_builtin(LowScoreParser)
|
||||
registry.register_builtin(HighScoreParser)
|
||||
result = registry.get_parser_for_file("text/plain", "readme.txt")
|
||||
assert result is HighScoreParser
|
||||
|
||||
def test_parser_returning_none_score_is_skipped(self) -> None:
|
||||
"""
|
||||
GIVEN: A parser that returns None from score() for the given file.
|
||||
WHEN: get_parser_for_file() is called.
|
||||
THEN: That parser is skipped and None is returned (no other candidates).
|
||||
"""
|
||||
|
||||
class DecliningParser:
|
||||
name = "declining"
|
||||
version = "1.0"
|
||||
author = "A"
|
||||
url = "https://example.com"
|
||||
|
||||
@classmethod
|
||||
def supported_mime_types(cls):
|
||||
return {"text/plain": ".txt"}
|
||||
|
||||
@classmethod
|
||||
def score(cls, mime_type, filename, path=None):
|
||||
return None # Explicitly declines
|
||||
|
||||
registry = ParserRegistry()
|
||||
registry.register_builtin(DecliningParser)
|
||||
result = registry.get_parser_for_file("text/plain", "readme.txt")
|
||||
assert result is None
|
||||
|
||||
def test_all_parsers_decline_returns_none(self) -> None:
|
||||
"""
|
||||
GIVEN: Multiple parsers that all return None from score().
|
||||
WHEN: get_parser_for_file() is called.
|
||||
THEN: None is returned.
|
||||
"""
|
||||
|
||||
class AlwaysDeclines:
|
||||
name = "declines"
|
||||
version = "1.0"
|
||||
author = "A"
|
||||
url = "https://example.com"
|
||||
|
||||
@classmethod
|
||||
def supported_mime_types(cls):
|
||||
return {"text/plain": ".txt"}
|
||||
|
||||
@classmethod
|
||||
def score(cls, mime_type, filename, path=None):
|
||||
return None
|
||||
|
||||
registry = ParserRegistry()
|
||||
registry.register_builtin(AlwaysDeclines)
|
||||
registry._external.append(AlwaysDeclines)
|
||||
result = registry.get_parser_for_file("text/plain", "file.txt")
|
||||
assert result is None
|
||||
|
||||
def test_external_parser_beats_builtin_same_score(self) -> None:
|
||||
"""
|
||||
GIVEN: An external and a built-in parser both returning score 10.
|
||||
WHEN: get_parser_for_file() is called.
|
||||
THEN: The external parser wins because externals are evaluated first
|
||||
and the first-seen-wins policy applies at equal scores.
|
||||
"""
|
||||
|
||||
class BuiltinParser:
|
||||
name = "builtin"
|
||||
version = "1.0"
|
||||
author = "Core"
|
||||
url = "https://example.com/builtin"
|
||||
|
||||
@classmethod
|
||||
def supported_mime_types(cls):
|
||||
return {"text/plain": ".txt"}
|
||||
|
||||
@classmethod
|
||||
def score(cls, mime_type, filename, path=None):
|
||||
return 10
|
||||
|
||||
class ExternalParser:
|
||||
name = "external"
|
||||
version = "2.0"
|
||||
author = "Third Party"
|
||||
url = "https://example.com/external"
|
||||
|
||||
@classmethod
|
||||
def supported_mime_types(cls):
|
||||
return {"text/plain": ".txt"}
|
||||
|
||||
@classmethod
|
||||
def score(cls, mime_type, filename, path=None):
|
||||
return 10
|
||||
|
||||
registry = ParserRegistry()
|
||||
registry.register_builtin(BuiltinParser)
|
||||
registry._external.append(ExternalParser)
|
||||
result = registry.get_parser_for_file("text/plain", "file.txt")
|
||||
assert result is ExternalParser
|
||||
|
||||
def test_builtin_wins_when_external_declines(self) -> None:
|
||||
"""
|
||||
GIVEN: An external parser that declines (score None) and a built-in
|
||||
that returns score 5.
|
||||
WHEN: get_parser_for_file() is called.
|
||||
THEN: The built-in parser is returned.
|
||||
"""
|
||||
|
||||
class DecliningExternal:
|
||||
name = "declining-external"
|
||||
version = "1.0"
|
||||
author = "Third Party"
|
||||
url = "https://example.com/declining"
|
||||
|
||||
@classmethod
|
||||
def supported_mime_types(cls):
|
||||
return {"text/plain": ".txt"}
|
||||
|
||||
@classmethod
|
||||
def score(cls, mime_type, filename, path=None):
|
||||
return None
|
||||
|
||||
class AcceptingBuiltin:
|
||||
name = "accepting-builtin"
|
||||
version = "1.0"
|
||||
author = "Core"
|
||||
url = "https://example.com/accepting"
|
||||
|
||||
@classmethod
|
||||
def supported_mime_types(cls):
|
||||
return {"text/plain": ".txt"}
|
||||
|
||||
@classmethod
|
||||
def score(cls, mime_type, filename, path=None):
|
||||
return 5
|
||||
|
||||
registry = ParserRegistry()
|
||||
registry.register_builtin(AcceptingBuiltin)
|
||||
registry._external.append(DecliningExternal)
|
||||
result = registry.get_parser_for_file("text/plain", "file.txt")
|
||||
assert result is AcceptingBuiltin
|
||||
|
||||
|
||||
class TestDiscover:
|
||||
"""Verify entrypoint discovery in ParserRegistry.discover()."""
|
||||
|
||||
def test_discover_with_no_entrypoints(self) -> None:
|
||||
"""
|
||||
GIVEN: No entrypoints are registered under 'paperless_ngx.parsers'.
|
||||
WHEN: discover() is called.
|
||||
THEN: _external remains empty and no errors are raised.
|
||||
"""
|
||||
registry = ParserRegistry()
|
||||
|
||||
with patch(
|
||||
"paperless.parsers.registry.entry_points",
|
||||
return_value=[],
|
||||
):
|
||||
registry.discover()
|
||||
|
||||
assert registry._external == []
|
||||
|
||||
def test_discover_adds_valid_external_parser(self) -> None:
|
||||
"""
|
||||
GIVEN: One valid entrypoint whose loaded class has all required attrs.
|
||||
WHEN: discover() is called.
|
||||
THEN: The class is appended to _external.
|
||||
"""
|
||||
|
||||
class ValidExternal:
|
||||
name = "valid-external"
|
||||
version = "3.0.0"
|
||||
author = "Someone"
|
||||
url = "https://example.com/valid"
|
||||
|
||||
@classmethod
|
||||
def supported_mime_types(cls):
|
||||
return {"application/pdf": ".pdf"}
|
||||
|
||||
@classmethod
|
||||
def score(cls, mime_type, filename, path=None):
|
||||
return 5
|
||||
|
||||
mock_ep = MagicMock(spec=EntryPoint)
|
||||
mock_ep.name = "valid_external"
|
||||
mock_ep.load.return_value = ValidExternal
|
||||
|
||||
registry = ParserRegistry()
|
||||
|
||||
with patch(
|
||||
"paperless.parsers.registry.entry_points",
|
||||
return_value=[mock_ep],
|
||||
):
|
||||
registry.discover()
|
||||
|
||||
assert ValidExternal in registry._external
|
||||
|
||||
def test_discover_skips_entrypoint_with_load_error(
|
||||
self,
|
||||
caplog: pytest.LogCaptureFixture,
|
||||
) -> None:
|
||||
"""
|
||||
GIVEN: An entrypoint whose load() method raises ImportError.
|
||||
WHEN: discover() is called.
|
||||
THEN: The entrypoint is skipped, an error is logged, and _external
|
||||
remains empty.
|
||||
"""
|
||||
mock_ep = MagicMock(spec=EntryPoint)
|
||||
mock_ep.name = "broken_ep"
|
||||
mock_ep.load.side_effect = ImportError("missing dependency")
|
||||
|
||||
registry = ParserRegistry()
|
||||
|
||||
with caplog.at_level(logging.ERROR, logger="paperless.parsers.registry"):
|
||||
with patch(
|
||||
"paperless.parsers.registry.entry_points",
|
||||
return_value=[mock_ep],
|
||||
):
|
||||
registry.discover()
|
||||
|
||||
assert registry._external == []
|
||||
assert any(
|
||||
"broken_ep" in record.message
|
||||
for record in caplog.records
|
||||
if record.levelno >= logging.ERROR
|
||||
)
|
||||
|
||||
def test_discover_skips_entrypoint_with_missing_attrs(
|
||||
self,
|
||||
caplog: pytest.LogCaptureFixture,
|
||||
) -> None:
|
||||
"""
|
||||
GIVEN: A class loaded from an entrypoint that is missing the 'score'
|
||||
attribute.
|
||||
WHEN: discover() is called.
|
||||
THEN: The entrypoint is skipped, a warning is logged, and _external
|
||||
remains empty.
|
||||
"""
|
||||
|
||||
class MissingScore:
|
||||
name = "missing-score"
|
||||
version = "1.0"
|
||||
author = "Someone"
|
||||
url = "https://example.com"
|
||||
|
||||
# 'score' classmethod is intentionally absent.
|
||||
|
||||
@classmethod
|
||||
def supported_mime_types(cls):
|
||||
return {"text/plain": ".txt"}
|
||||
|
||||
mock_ep = MagicMock(spec=EntryPoint)
|
||||
mock_ep.name = "missing_score_ep"
|
||||
mock_ep.load.return_value = MissingScore
|
||||
|
||||
registry = ParserRegistry()
|
||||
|
||||
with caplog.at_level(logging.WARNING, logger="paperless.parsers.registry"):
|
||||
with patch(
|
||||
"paperless.parsers.registry.entry_points",
|
||||
return_value=[mock_ep],
|
||||
):
|
||||
registry.discover()
|
||||
|
||||
assert registry._external == []
|
||||
assert any(
|
||||
"missing_score_ep" in record.message
|
||||
for record in caplog.records
|
||||
if record.levelno >= logging.WARNING
|
||||
)
|
||||
|
||||
def test_discover_logs_loaded_parser_info(
|
||||
self,
|
||||
caplog: pytest.LogCaptureFixture,
|
||||
) -> None:
|
||||
"""
|
||||
GIVEN: A valid entrypoint that loads successfully.
|
||||
WHEN: discover() is called.
|
||||
THEN: An INFO log message is emitted containing the parser name,
|
||||
version, author, and entrypoint name.
|
||||
"""
|
||||
|
||||
class LoggableParser:
|
||||
name = "loggable"
|
||||
version = "4.2.0"
|
||||
author = "Log Tester"
|
||||
url = "https://example.com/loggable"
|
||||
|
||||
@classmethod
|
||||
def supported_mime_types(cls):
|
||||
return {"image/png": ".png"}
|
||||
|
||||
@classmethod
|
||||
def score(cls, mime_type, filename, path=None):
|
||||
return 1
|
||||
|
||||
mock_ep = MagicMock(spec=EntryPoint)
|
||||
mock_ep.name = "loggable_ep"
|
||||
mock_ep.load.return_value = LoggableParser
|
||||
|
||||
registry = ParserRegistry()
|
||||
|
||||
with caplog.at_level(logging.INFO, logger="paperless.parsers.registry"):
|
||||
with patch(
|
||||
"paperless.parsers.registry.entry_points",
|
||||
return_value=[mock_ep],
|
||||
):
|
||||
registry.discover()
|
||||
|
||||
info_messages = " ".join(
|
||||
r.message for r in caplog.records if r.levelno == logging.INFO
|
||||
)
|
||||
assert "loggable" in info_messages
|
||||
assert "4.2.0" in info_messages
|
||||
assert "Log Tester" in info_messages
|
||||
assert "loggable_ep" in info_messages
|
||||
|
||||
|
||||
class TestLogSummary:
|
||||
"""Verify log output from ParserRegistry.log_summary()."""
|
||||
|
||||
def test_log_summary_with_no_external_parsers(
|
||||
self,
|
||||
dummy_parser_cls: type,
|
||||
caplog: pytest.LogCaptureFixture,
|
||||
) -> None:
|
||||
"""
|
||||
GIVEN: A registry with one built-in parser and no external parsers.
|
||||
WHEN: log_summary() is called.
|
||||
THEN: The built-in parser name appears in the logs.
|
||||
"""
|
||||
registry = ParserRegistry()
|
||||
registry.register_builtin(dummy_parser_cls)
|
||||
|
||||
with caplog.at_level(logging.INFO, logger="paperless.parsers.registry"):
|
||||
registry.log_summary()
|
||||
|
||||
all_messages = " ".join(r.message for r in caplog.records)
|
||||
assert dummy_parser_cls.name in all_messages
|
||||
|
||||
def test_log_summary_with_external_parsers(
|
||||
self,
|
||||
caplog: pytest.LogCaptureFixture,
|
||||
) -> None:
|
||||
"""
|
||||
GIVEN: A registry with one external parser registered.
|
||||
WHEN: log_summary() is called.
|
||||
THEN: The external parser name, version, author, and url appear in
|
||||
the log output.
|
||||
"""
|
||||
|
||||
class ExtParser:
|
||||
name = "ext-parser"
|
||||
version = "9.9.9"
|
||||
author = "Ext Corp"
|
||||
url = "https://ext.example.com"
|
||||
|
||||
@classmethod
|
||||
def supported_mime_types(cls):
|
||||
return {}
|
||||
|
||||
@classmethod
|
||||
def score(cls, mime_type, filename, path=None):
|
||||
return None
|
||||
|
||||
registry = ParserRegistry()
|
||||
registry._external.append(ExtParser)
|
||||
|
||||
with caplog.at_level(logging.INFO, logger="paperless.parsers.registry"):
|
||||
registry.log_summary()
|
||||
|
||||
all_messages = " ".join(r.message for r in caplog.records)
|
||||
assert "ext-parser" in all_messages
|
||||
assert "9.9.9" in all_messages
|
||||
assert "Ext Corp" in all_messages
|
||||
assert "https://ext.example.com" in all_messages
|
||||
|
||||
def test_log_summary_logs_no_third_party_message_when_none(
|
||||
self,
|
||||
caplog: pytest.LogCaptureFixture,
|
||||
) -> None:
|
||||
"""
|
||||
GIVEN: A registry with no external parsers.
|
||||
WHEN: log_summary() is called.
|
||||
THEN: A message containing 'No third-party parsers discovered.' is
|
||||
logged.
|
||||
"""
|
||||
registry = ParserRegistry()
|
||||
|
||||
with caplog.at_level(logging.INFO, logger="paperless.parsers.registry"):
|
||||
registry.log_summary()
|
||||
|
||||
all_messages = " ".join(r.message for r in caplog.records)
|
||||
assert "No third-party parsers discovered." in all_messages
|
||||
@@ -21,12 +21,18 @@ from documents.views import BulkEditView
|
||||
from documents.views import ChatStreamingView
|
||||
from documents.views import CorrespondentViewSet
|
||||
from documents.views import CustomFieldViewSet
|
||||
from documents.views import DeleteDocumentsView
|
||||
from documents.views import DocumentTypeViewSet
|
||||
from documents.views import EditPdfDocumentsView
|
||||
from documents.views import GlobalSearchView
|
||||
from documents.views import IndexView
|
||||
from documents.views import LogViewSet
|
||||
from documents.views import MergeDocumentsView
|
||||
from documents.views import PostDocumentView
|
||||
from documents.views import RemoteVersionView
|
||||
from documents.views import RemovePasswordDocumentsView
|
||||
from documents.views import ReprocessDocumentsView
|
||||
from documents.views import RotateDocumentsView
|
||||
from documents.views import SavedViewViewSet
|
||||
from documents.views import SearchAutoCompleteView
|
||||
from documents.views import SelectionDataView
|
||||
@@ -132,6 +138,36 @@ urlpatterns = [
|
||||
BulkEditView.as_view(),
|
||||
name="bulk_edit",
|
||||
),
|
||||
re_path(
|
||||
"^delete/",
|
||||
DeleteDocumentsView.as_view(),
|
||||
name="delete_documents",
|
||||
),
|
||||
re_path(
|
||||
"^reprocess/",
|
||||
ReprocessDocumentsView.as_view(),
|
||||
name="reprocess_documents",
|
||||
),
|
||||
re_path(
|
||||
"^rotate/",
|
||||
RotateDocumentsView.as_view(),
|
||||
name="rotate_documents",
|
||||
),
|
||||
re_path(
|
||||
"^merge/",
|
||||
MergeDocumentsView.as_view(),
|
||||
name="merge_documents",
|
||||
),
|
||||
re_path(
|
||||
"^edit_pdf/",
|
||||
EditPdfDocumentsView.as_view(),
|
||||
name="edit_pdf_documents",
|
||||
),
|
||||
re_path(
|
||||
"^remove_password/",
|
||||
RemovePasswordDocumentsView.as_view(),
|
||||
name="remove_password_documents",
|
||||
),
|
||||
re_path(
|
||||
"^bulk_download/",
|
||||
BulkDownloadView.as_view(),
|
||||
|
||||
50
src/paperless_text/parsers.py
Normal file
50
src/paperless_text/parsers.py
Normal file
@@ -0,0 +1,50 @@
|
||||
from pathlib import Path
|
||||
|
||||
from django.conf import settings
|
||||
from PIL import Image
|
||||
from PIL import ImageDraw
|
||||
from PIL import ImageFont
|
||||
|
||||
from documents.parsers import DocumentParser
|
||||
|
||||
|
||||
class TextDocumentParser(DocumentParser):
|
||||
"""
|
||||
This parser directly parses a text document (.txt, .md, or .csv)
|
||||
"""
|
||||
|
||||
logging_name = "paperless.parsing.text"
|
||||
|
||||
def get_thumbnail(self, document_path: Path, mime_type, file_name=None) -> Path:
|
||||
# Avoid reading entire file into memory
|
||||
max_chars = 100_000
|
||||
file_size_limit = 50 * 1024 * 1024
|
||||
|
||||
if document_path.stat().st_size > file_size_limit:
|
||||
text = "[File too large to preview]"
|
||||
else:
|
||||
with Path(document_path).open("r", encoding="utf-8", errors="replace") as f:
|
||||
text = f.read(max_chars)
|
||||
|
||||
img = Image.new("RGB", (500, 700), color="white")
|
||||
draw = ImageDraw.Draw(img)
|
||||
font = ImageFont.truetype(
|
||||
font=settings.THUMBNAIL_FONT_NAME,
|
||||
size=20,
|
||||
layout_engine=ImageFont.Layout.BASIC,
|
||||
)
|
||||
draw.multiline_text((5, 5), text, font=font, fill="black", spacing=4)
|
||||
|
||||
out_path = self.tempdir / "thumb.webp"
|
||||
img.save(out_path, format="WEBP")
|
||||
|
||||
return out_path
|
||||
|
||||
def parse(self, document_path, mime_type, file_name=None) -> None:
|
||||
self.text = self.read_file_handle_unicode_errors(document_path)
|
||||
|
||||
def get_settings(self) -> None:
|
||||
"""
|
||||
This parser does not implement additional settings yet
|
||||
"""
|
||||
return None
|
||||
@@ -1,13 +1,7 @@
|
||||
def get_parser(*args, **kwargs):
|
||||
from paperless.parsers.text import TextDocumentParser
|
||||
from paperless_text.parsers import TextDocumentParser
|
||||
|
||||
# The new TextDocumentParser does not accept the legacy logging_group /
|
||||
# progress_callback kwargs injected by the old signal-based consumer.
|
||||
# These are dropped here; Phase 4 will replace this signal path with the
|
||||
# new ParserRegistry so the shim can be removed at that point.
|
||||
kwargs.pop("logging_group", None)
|
||||
kwargs.pop("progress_callback", None)
|
||||
return TextDocumentParser()
|
||||
return TextDocumentParser(*args, **kwargs)
|
||||
|
||||
|
||||
def text_consumer_declaration(sender, **kwargs):
|
||||
|
||||
30
src/paperless_text/tests/conftest.py
Normal file
30
src/paperless_text/tests/conftest.py
Normal file
@@ -0,0 +1,30 @@
|
||||
from collections.abc import Generator
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from paperless_text.parsers import TextDocumentParser
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def sample_dir() -> Path:
|
||||
return (Path(__file__).parent / Path("samples")).resolve()
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def text_parser() -> Generator[TextDocumentParser, None, None]:
|
||||
try:
|
||||
parser = TextDocumentParser(logging_group=None)
|
||||
yield parser
|
||||
finally:
|
||||
parser.cleanup()
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def sample_txt_file(sample_dir: Path) -> Path:
|
||||
return sample_dir / "test.txt"
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def malformed_txt_file(sample_dir: Path) -> Path:
|
||||
return sample_dir / "decode_error.txt"
|
||||
69
src/paperless_text/tests/test_parser.py
Normal file
69
src/paperless_text/tests/test_parser.py
Normal file
@@ -0,0 +1,69 @@
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
from paperless_text.parsers import TextDocumentParser
|
||||
|
||||
|
||||
class TestTextParser:
|
||||
def test_thumbnail(
|
||||
self,
|
||||
text_parser: TextDocumentParser,
|
||||
sample_txt_file: Path,
|
||||
) -> None:
|
||||
# just make sure that it does not crash
|
||||
f = text_parser.get_thumbnail(sample_txt_file, "text/plain")
|
||||
assert f.exists()
|
||||
assert f.is_file()
|
||||
|
||||
def test_parse(
|
||||
self,
|
||||
text_parser: TextDocumentParser,
|
||||
sample_txt_file: Path,
|
||||
) -> None:
|
||||
text_parser.parse(sample_txt_file, "text/plain")
|
||||
|
||||
assert text_parser.get_text() == "This is a test file.\n"
|
||||
assert text_parser.get_archive_path() is None
|
||||
|
||||
def test_parse_invalid_bytes(
|
||||
self,
|
||||
text_parser: TextDocumentParser,
|
||||
malformed_txt_file: Path,
|
||||
) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- Text file which contains invalid UTF bytes
|
||||
WHEN:
|
||||
- The file is parsed
|
||||
THEN:
|
||||
- Parsing continues
|
||||
- Invalid bytes are removed
|
||||
"""
|
||||
|
||||
text_parser.parse(malformed_txt_file, "text/plain")
|
||||
|
||||
assert text_parser.get_text() == "Pantothens<EFBFBD>ure\n"
|
||||
assert text_parser.get_archive_path() is None
|
||||
|
||||
def test_thumbnail_large_file(self, text_parser: TextDocumentParser) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- A very large text file (>50MB)
|
||||
WHEN:
|
||||
- A thumbnail is requested
|
||||
THEN:
|
||||
- A thumbnail is created without reading the entire file into memory
|
||||
"""
|
||||
with tempfile.NamedTemporaryFile(
|
||||
delete=False,
|
||||
mode="w",
|
||||
encoding="utf-8",
|
||||
suffix=".txt",
|
||||
) as tmp:
|
||||
tmp.write("A" * (51 * 1024 * 1024)) # 51 MB of 'A'
|
||||
large_file = Path(tmp.name)
|
||||
|
||||
thumb = text_parser.get_thumbnail(large_file, "text/plain")
|
||||
assert thumb.exists()
|
||||
assert thumb.is_file()
|
||||
large_file.unlink()
|
||||
@@ -12,7 +12,6 @@ def tika_parser() -> Generator[TikaDocumentParser, None, None]:
|
||||
parser = TikaDocumentParser(logging_group=None)
|
||||
yield parser
|
||||
finally:
|
||||
# TODO(stumpylog): Cleanup once all parsers are handled
|
||||
parser.cleanup()
|
||||
|
||||
|
||||
|
||||
8
uv.lock
generated
8
uv.lock
generated
@@ -1251,11 +1251,11 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "filelock"
|
||||
version = "3.24.3"
|
||||
version = "3.20.3"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/73/92/a8e2479937ff39185d20dd6a851c1a63e55849e447a55e798cc2e1f49c65/filelock-3.24.3.tar.gz", hash = "sha256:011a5644dc937c22699943ebbfc46e969cdde3e171470a6e40b9533e5a72affa", size = 37935, upload-time = "2026-02-19T00:48:20.543Z" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/1d/65/ce7f1b70157833bf3cb851b556a37d4547ceafc158aa9b34b36782f23696/filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1", size = 19485, upload-time = "2026-01-09T17:55:05.421Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/9c/0f/5d0c71a1aefeb08efff26272149e07ab922b64f46c63363756224bd6872e/filelock-3.24.3-py3-none-any.whl", hash = "sha256:426e9a4660391f7f8a810d71b0555bce9008b0a1cc342ab1f6947d37639e002d", size = 24331, upload-time = "2026-02-19T00:48:18.465Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/b5/36/7fb70f04bf00bc646cd5bb45aa9eddb15e19437a28b8fb2b4a5249fac770/filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1", size = 16701, upload-time = "2026-01-09T17:55:04.334Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -2961,7 +2961,7 @@ requires-dist = [
|
||||
{ name = "drf-spectacular-sidecar", specifier = "~=2026.1.1" },
|
||||
{ name = "drf-writable-nested", specifier = "~=0.7.1" },
|
||||
{ name = "faiss-cpu", specifier = ">=1.10" },
|
||||
{ name = "filelock", specifier = "~=3.24.3" },
|
||||
{ name = "filelock", specifier = "~=3.20.3" },
|
||||
{ name = "flower", specifier = "~=2.0.1" },
|
||||
{ name = "gotenberg-client", specifier = "~=0.13.1" },
|
||||
{ name = "granian", extras = ["uvloop"], marker = "extra == 'webserver'", specifier = "~=2.7.0" },
|
||||
|
||||
Reference in New Issue
Block a user