mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-03-13 04:31:23 +00:00
Compare commits
20 Commits
fix-drop-s
...
feature-ti
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c7e1981064 | ||
|
|
d6edb6c9f7 | ||
|
|
29ccac98e0 | ||
|
|
8c432afb82 | ||
|
|
a0d0ea28cf | ||
|
|
da76c16274 | ||
|
|
f2eef8a6d1 | ||
|
|
c8e5e6c4e2 | ||
|
|
0b00c66b96 | ||
|
|
e4cadff749 | ||
|
|
d4b6075a2a | ||
|
|
130a73ec71 | ||
|
|
8ebc24bcfa | ||
|
|
d7052b8dee | ||
|
|
c96e9f5dc7 | ||
|
|
f7f162424b | ||
|
|
cdeabaf75d | ||
|
|
404ef6b40d | ||
|
|
8c40491034 | ||
|
|
0f6bdaf5de |
@@ -30,7 +30,7 @@ RUN set -eux \
|
||||
# Purpose: Installs s6-overlay and rootfs
|
||||
# Comments:
|
||||
# - Don't leave anything extra in here either
|
||||
FROM ghcr.io/astral-sh/uv:0.10.9-python3.12-trixie-slim AS s6-overlay-base
|
||||
FROM ghcr.io/astral-sh/uv:0.10.7-python3.12-trixie-slim AS s6-overlay-base
|
||||
|
||||
WORKDIR /usr/src/s6
|
||||
|
||||
|
||||
51
docs/api.md
51
docs/api.md
@@ -305,16 +305,52 @@ The following methods are supported:
|
||||
- `"merge": true or false` (defaults to false)
|
||||
- The `merge` flag determines if the supplied permissions will overwrite all existing permissions (including
|
||||
removing them) or be merged with existing permissions.
|
||||
- `edit_pdf`
|
||||
- Requires `parameters`:
|
||||
- `"doc_ids": [DOCUMENT_ID]` A list of a single document ID to edit.
|
||||
- `"operations": [OPERATION, ...]` A list of operations to perform on the documents. Each operation is a dictionary
|
||||
with the following keys:
|
||||
- `"page": PAGE_NUMBER` The page number to edit (1-based).
|
||||
- `"rotate": DEGREES` Optional rotation in degrees (90, 180, 270).
|
||||
- `"doc": OUTPUT_DOCUMENT_INDEX` Optional index of the output document for split operations.
|
||||
- Optional `parameters`:
|
||||
- `"delete_original": true` to delete the original documents after editing.
|
||||
- `"update_document": true` to add the edited PDF as a new version of the root document.
|
||||
- `"include_metadata": true` to copy metadata from the original document to the edited document.
|
||||
- `remove_password`
|
||||
- Requires `parameters`:
|
||||
- `"password": "PASSWORD_STRING"` The password to remove from the PDF documents.
|
||||
- Optional `parameters`:
|
||||
- `"update_document": true` to add the password-less PDF as a new version of the root document.
|
||||
- `"delete_original": true` to delete the original document after editing.
|
||||
- `"include_metadata": true` to copy metadata from the original document to the new password-less document.
|
||||
- `merge`
|
||||
- No additional `parameters` required.
|
||||
- The ordering of the merged document is determined by the list of IDs.
|
||||
- Optional `parameters`:
|
||||
- `"metadata_document_id": DOC_ID` apply metadata (tags, correspondent, etc.) from this document to the merged document.
|
||||
- `"delete_originals": true` to delete the original documents. This requires the calling user being the owner of
|
||||
all documents that are merged.
|
||||
- `split`
|
||||
- Requires `parameters`:
|
||||
- `"pages": [..]` The list should be a list of pages and/or a ranges, separated by commas e.g. `"[1,2-3,4,5-7]"`
|
||||
- Optional `parameters`:
|
||||
- `"delete_originals": true` to delete the original document after consumption. This requires the calling user being the owner of
|
||||
the document.
|
||||
- The split operation only accepts a single document.
|
||||
- `rotate`
|
||||
- Requires `parameters`:
|
||||
- `"degrees": DEGREES`. Must be an integer i.e. 90, 180, 270
|
||||
- `delete_pages`
|
||||
- Requires `parameters`:
|
||||
- `"pages": [..]` The list should be a list of integers e.g. `"[2,3,4]"`
|
||||
- The delete_pages operation only accepts a single document.
|
||||
- `modify_custom_fields`
|
||||
- Requires `parameters`:
|
||||
- `"add_custom_fields": { CUSTOM_FIELD_ID: VALUE }`: JSON object consisting of custom field id:value pairs to add to the document, can also be a list of custom field IDs
|
||||
to add with empty values.
|
||||
- `"remove_custom_fields": [CUSTOM_FIELD_ID]`: custom field ids to remove from the document.
|
||||
|
||||
#### Document-editing operations
|
||||
|
||||
Beginning with version 10+, the API supports individual endpoints for document-editing operations (`merge`, `rotate`, `edit_pdf`, etc), thus their documentation can be found in the API spec / viewer. Legacy document-editing methods via `/api/documents/bulk_edit/` are still supported for compatibility, are deprecated and clients should migrate to the individual endpoints before they are removed in a future version.
|
||||
|
||||
### Objects
|
||||
|
||||
Bulk editing for objects (tags, document types etc.) currently supports set permissions or delete
|
||||
@@ -431,9 +467,4 @@ Initial API version.
|
||||
#### Version 10
|
||||
|
||||
- The `show_on_dashboard` and `show_in_sidebar` fields of saved views have been
|
||||
removed. Relevant settings are now stored in the UISettings model. Compatibility is maintained
|
||||
for versions < 10 until support for API v9 is dropped.
|
||||
- Document-editing operations such as `merge`, `rotate`, and `edit_pdf` have been
|
||||
moved from the bulk edit endpoint to their own individual endpoints. Using these methods via
|
||||
the bulk edit endpoint is still supported for compatibility with versions < 10 until support
|
||||
for API v9 is dropped.
|
||||
removed. Relevant settings are now stored in the UISettings model.
|
||||
|
||||
@@ -45,7 +45,7 @@ dependencies = [
|
||||
"drf-spectacular-sidecar~=2026.1.1",
|
||||
"drf-writable-nested~=0.7.1",
|
||||
"faiss-cpu>=1.10",
|
||||
"filelock~=3.20.3",
|
||||
"filelock~=3.24.3",
|
||||
"flower~=2.0.1",
|
||||
"gotenberg-client~=0.13.1",
|
||||
"httpx-oauth~=0.16",
|
||||
|
||||
@@ -468,7 +468,7 @@
|
||||
"time": 0.951,
|
||||
"request": {
|
||||
"method": "GET",
|
||||
"url": "http://localhost:8000/api/documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true&tags__id__in=9",
|
||||
"url": "http://localhost:8000/api/documents/?page=1&page_size=50&ordering=-created&truncate_content=true&tags__id__in=9",
|
||||
"httpVersion": "HTTP/1.1",
|
||||
"cookies": [],
|
||||
"headers": [
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -534,7 +534,7 @@
|
||||
"time": 0.653,
|
||||
"request": {
|
||||
"method": "GET",
|
||||
"url": "http://localhost:8000/api/documents/?page=1&page_size=10&ordering=-created&truncate_content=true&include_selection_data=true&tags__id__all=9",
|
||||
"url": "http://localhost:8000/api/documents/?page=1&page_size=50&ordering=-created&truncate_content=true&tags__id__all=9",
|
||||
"httpVersion": "HTTP/1.1",
|
||||
"cookies": [],
|
||||
"headers": [
|
||||
|
||||
@@ -883,7 +883,7 @@
|
||||
"time": 0.93,
|
||||
"request": {
|
||||
"method": "GET",
|
||||
"url": "http://localhost:8000/api/documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true&tags__id__all=4",
|
||||
"url": "http://localhost:8000/api/documents/?page=1&page_size=50&ordering=-created&truncate_content=true&tags__id__all=4",
|
||||
"httpVersion": "HTTP/1.1",
|
||||
"cookies": [],
|
||||
"headers": [
|
||||
@@ -961,7 +961,7 @@
|
||||
"time": -1,
|
||||
"request": {
|
||||
"method": "GET",
|
||||
"url": "http://localhost:8000/api/documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true&tags__id__all=4",
|
||||
"url": "http://localhost:8000/api/documents/?page=1&page_size=50&ordering=-created&truncate_content=true&tags__id__all=4",
|
||||
"httpVersion": "HTTP/1.1",
|
||||
"cookies": [],
|
||||
"headers": [
|
||||
|
||||
@@ -16,7 +16,7 @@ test('basic filtering', async ({ page }) => {
|
||||
await expect(page).toHaveURL(/tags__id__all=9/)
|
||||
await expect(page.locator('pngx-document-list')).toHaveText(/8 documents/)
|
||||
await page.getByRole('button', { name: 'Document type' }).click()
|
||||
await page.getByRole('menuitem', { name: /^Invoice Test/ }).click()
|
||||
await page.getByRole('menuitem', { name: 'Invoice Test 3' }).click()
|
||||
await expect(page).toHaveURL(/document_type__id__in=1/)
|
||||
await expect(page.locator('pngx-document-list')).toHaveText(/3 documents/)
|
||||
await page.getByRole('button', { name: 'Reset filters' }).first().click()
|
||||
|
||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@@ -1217,7 +1217,7 @@
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
|
||||
<context context-type="linenumber">1758</context>
|
||||
<context context-type="linenumber">1760</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="1577733187050997705" datatype="html">
|
||||
@@ -2802,19 +2802,19 @@
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
|
||||
<context context-type="linenumber">1759</context>
|
||||
<context context-type="linenumber">1761</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">833</context>
|
||||
<context context-type="linenumber">802</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">871</context>
|
||||
<context context-type="linenumber">835</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">894</context>
|
||||
<context context-type="linenumber">854</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/manage/document-attributes/custom-fields/custom-fields.component.ts</context>
|
||||
@@ -3404,27 +3404,27 @@
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">470</context>
|
||||
<context context-type="linenumber">445</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">510</context>
|
||||
<context context-type="linenumber">485</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">548</context>
|
||||
<context context-type="linenumber">523</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">586</context>
|
||||
<context context-type="linenumber">561</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">648</context>
|
||||
<context context-type="linenumber">623</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">781</context>
|
||||
<context context-type="linenumber">756</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="994016933065248559" datatype="html">
|
||||
@@ -3512,7 +3512,7 @@
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
|
||||
<context context-type="linenumber">1812</context>
|
||||
<context context-type="linenumber">1814</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="6661109599266152398" datatype="html">
|
||||
@@ -3523,7 +3523,7 @@
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
|
||||
<context context-type="linenumber">1813</context>
|
||||
<context context-type="linenumber">1815</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="5162686434580248853" datatype="html">
|
||||
@@ -3534,7 +3534,7 @@
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
|
||||
<context context-type="linenumber">1814</context>
|
||||
<context context-type="linenumber">1816</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="8157388568390631653" datatype="html">
|
||||
@@ -5499,7 +5499,7 @@
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">785</context>
|
||||
<context context-type="linenumber">760</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="4522609911791833187" datatype="html">
|
||||
@@ -7327,7 +7327,7 @@
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">415</context>
|
||||
<context context-type="linenumber">390</context>
|
||||
</context-group>
|
||||
<note priority="1" from="description">this string is used to separate processing, failed and added on the file upload widget</note>
|
||||
</trans-unit>
|
||||
@@ -7851,7 +7851,7 @@
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">783</context>
|
||||
<context context-type="linenumber">758</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="7295637485862454066" datatype="html">
|
||||
@@ -7869,7 +7869,7 @@
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">829</context>
|
||||
<context context-type="linenumber">798</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="2951161989614003846" datatype="html">
|
||||
@@ -7890,88 +7890,88 @@
|
||||
<source>Reprocess operation for "<x id="PH" equiv-text="this.document.title"/>" will begin in the background.</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
|
||||
<context context-type="linenumber">1385</context>
|
||||
<context context-type="linenumber">1387</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="4409560272830824468" datatype="html">
|
||||
<source>Error executing operation</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
|
||||
<context context-type="linenumber">1396</context>
|
||||
<context context-type="linenumber">1398</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="6030453331794586802" datatype="html">
|
||||
<source>Error downloading document</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
|
||||
<context context-type="linenumber">1459</context>
|
||||
<context context-type="linenumber">1461</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="4458954481601077369" datatype="html">
|
||||
<source>Page Fit</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
|
||||
<context context-type="linenumber">1539</context>
|
||||
<context context-type="linenumber">1541</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="4663705961777238777" datatype="html">
|
||||
<source>PDF edit operation for "<x id="PH" equiv-text="this.document.title"/>" will begin in the background.</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
|
||||
<context context-type="linenumber">1779</context>
|
||||
<context context-type="linenumber">1781</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="9043972994040261999" datatype="html">
|
||||
<source>Error executing PDF edit operation</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
|
||||
<context context-type="linenumber">1791</context>
|
||||
<context context-type="linenumber">1793</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="6172690334763056188" datatype="html">
|
||||
<source>Please enter the current password before attempting to remove it.</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
|
||||
<context context-type="linenumber">1802</context>
|
||||
<context context-type="linenumber">1804</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="968660764814228922" datatype="html">
|
||||
<source>Password removal operation for "<x id="PH" equiv-text="this.document.title"/>" will begin in the background.</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
|
||||
<context context-type="linenumber">1836</context>
|
||||
<context context-type="linenumber">1838</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="2282118435712883014" datatype="html">
|
||||
<source>Error executing password removal operation</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
|
||||
<context context-type="linenumber">1850</context>
|
||||
<context context-type="linenumber">1852</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="3740891324955700797" datatype="html">
|
||||
<source>Print failed.</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
|
||||
<context context-type="linenumber">1889</context>
|
||||
<context context-type="linenumber">1891</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="6457245677384603573" datatype="html">
|
||||
<source>Error loading document for printing.</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
|
||||
<context context-type="linenumber">1901</context>
|
||||
<context context-type="linenumber">1903</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="6085793215710522488" datatype="html">
|
||||
<source>An error occurred loading tiff: <x id="PH" equiv-text="err.toString()"/></source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
|
||||
<context context-type="linenumber">1966</context>
|
||||
<context context-type="linenumber">1968</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
|
||||
<context context-type="linenumber">1970</context>
|
||||
<context context-type="linenumber">1972</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="4958946940233632319" datatype="html">
|
||||
@@ -8215,25 +8215,25 @@
|
||||
<source>Error executing bulk operation</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">321</context>
|
||||
<context context-type="linenumber">294</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="7894972847287473517" datatype="html">
|
||||
<source>"<x id="PH" equiv-text="items[0].name"/>"</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">407</context>
|
||||
<context context-type="linenumber">382</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">413</context>
|
||||
<context context-type="linenumber">388</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="8639884465898458690" datatype="html">
|
||||
<source>"<x id="PH" equiv-text="items[0].name"/>" and "<x id="PH_1" equiv-text="items[1].name"/>"</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">409</context>
|
||||
<context context-type="linenumber">384</context>
|
||||
</context-group>
|
||||
<note priority="1" from="description">This is for messages like 'modify "tag1" and "tag2"'</note>
|
||||
</trans-unit>
|
||||
@@ -8241,7 +8241,7 @@
|
||||
<source><x id="PH" equiv-text="list"/> and "<x id="PH_1" equiv-text="items[items.length - 1].name"/>"</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">417,419</context>
|
||||
<context context-type="linenumber">392,394</context>
|
||||
</context-group>
|
||||
<note priority="1" from="description">this is for messages like 'modify "tag1", "tag2" and "tag3"'</note>
|
||||
</trans-unit>
|
||||
@@ -8249,14 +8249,14 @@
|
||||
<source>Confirm tags assignment</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">434</context>
|
||||
<context context-type="linenumber">409</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="6619516195038467207" datatype="html">
|
||||
<source>This operation will add the tag "<x id="PH" equiv-text="tag.name"/>" to <x id="PH_1" equiv-text="this.list.selected.size"/> selected document(s).</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">440</context>
|
||||
<context context-type="linenumber">415</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="1894412783609570695" datatype="html">
|
||||
@@ -8265,14 +8265,14 @@
|
||||
)"/> to <x id="PH_1" equiv-text="this.list.selected.size"/> selected document(s).</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">445,447</context>
|
||||
<context context-type="linenumber">420,422</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="7181166515756808573" datatype="html">
|
||||
<source>This operation will remove the tag "<x id="PH" equiv-text="tag.name"/>" from <x id="PH_1" equiv-text="this.list.selected.size"/> selected document(s).</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">453</context>
|
||||
<context context-type="linenumber">428</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="3819792277998068944" datatype="html">
|
||||
@@ -8281,7 +8281,7 @@
|
||||
)"/> from <x id="PH_1" equiv-text="this.list.selected.size"/> selected document(s).</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">458,460</context>
|
||||
<context context-type="linenumber">433,435</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="2739066218579571288" datatype="html">
|
||||
@@ -8292,84 +8292,84 @@
|
||||
)"/> on <x id="PH_2" equiv-text="this.list.selected.size"/> selected document(s).</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">462,466</context>
|
||||
<context context-type="linenumber">437,441</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="2996713129519325161" datatype="html">
|
||||
<source>Confirm correspondent assignment</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">503</context>
|
||||
<context context-type="linenumber">478</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="6900893559485781849" datatype="html">
|
||||
<source>This operation will assign the correspondent "<x id="PH" equiv-text="correspondent.name"/>" to <x id="PH_1" equiv-text="this.list.selected.size"/> selected document(s).</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">505</context>
|
||||
<context context-type="linenumber">480</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="1257522660364398440" datatype="html">
|
||||
<source>This operation will remove the correspondent from <x id="PH" equiv-text="this.list.selected.size"/> selected document(s).</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">507</context>
|
||||
<context context-type="linenumber">482</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="5393409374423140648" datatype="html">
|
||||
<source>Confirm document type assignment</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">541</context>
|
||||
<context context-type="linenumber">516</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="332180123895325027" datatype="html">
|
||||
<source>This operation will assign the document type "<x id="PH" equiv-text="documentType.name"/>" to <x id="PH_1" equiv-text="this.list.selected.size"/> selected document(s).</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">543</context>
|
||||
<context context-type="linenumber">518</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="2236642492594872779" datatype="html">
|
||||
<source>This operation will remove the document type from <x id="PH" equiv-text="this.list.selected.size"/> selected document(s).</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">545</context>
|
||||
<context context-type="linenumber">520</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="6386555513013840736" datatype="html">
|
||||
<source>Confirm storage path assignment</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">579</context>
|
||||
<context context-type="linenumber">554</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="8750527458618415924" datatype="html">
|
||||
<source>This operation will assign the storage path "<x id="PH" equiv-text="storagePath.name"/>" to <x id="PH_1" equiv-text="this.list.selected.size"/> selected document(s).</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">581</context>
|
||||
<context context-type="linenumber">556</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="60728365335056946" datatype="html">
|
||||
<source>This operation will remove the storage path from <x id="PH" equiv-text="this.list.selected.size"/> selected document(s).</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">583</context>
|
||||
<context context-type="linenumber">558</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="4187352575310415704" datatype="html">
|
||||
<source>Confirm custom field assignment</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">612</context>
|
||||
<context context-type="linenumber">587</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="7966494636326273856" datatype="html">
|
||||
<source>This operation will assign the custom field "<x id="PH" equiv-text="customField.name"/>" to <x id="PH_1" equiv-text="this.list.selected.size"/> selected document(s).</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">618</context>
|
||||
<context context-type="linenumber">593</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="5789455969634598553" datatype="html">
|
||||
@@ -8378,14 +8378,14 @@
|
||||
)"/> to <x id="PH_1" equiv-text="this.list.selected.size"/> selected document(s).</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">623,625</context>
|
||||
<context context-type="linenumber">598,600</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="5648572354333199245" datatype="html">
|
||||
<source>This operation will remove the custom field "<x id="PH" equiv-text="customField.name"/>" from <x id="PH_1" equiv-text="this.list.selected.size"/> selected document(s).</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">631</context>
|
||||
<context context-type="linenumber">606</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="6666899594015948817" datatype="html">
|
||||
@@ -8394,7 +8394,7 @@
|
||||
)"/> from <x id="PH_1" equiv-text="this.list.selected.size"/> selected document(s).</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">636,638</context>
|
||||
<context context-type="linenumber">611,613</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="8050047262594964176" datatype="html">
|
||||
@@ -8405,91 +8405,91 @@
|
||||
)"/> on <x id="PH_2" equiv-text="this.list.selected.size"/> selected document(s).</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">640,644</context>
|
||||
<context context-type="linenumber">615,619</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="8615059324209654051" datatype="html">
|
||||
<source>Move <x id="PH" equiv-text="this.list.selected.size"/> selected document(s) to the trash?</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">782</context>
|
||||
<context context-type="linenumber">757</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="8585195717323764335" datatype="html">
|
||||
<source>This operation will permanently recreate the archive files for <x id="PH" equiv-text="this.list.selected.size"/> selected document(s).</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">830</context>
|
||||
<context context-type="linenumber">799</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="7366623494074776040" datatype="html">
|
||||
<source>The archive files will be re-generated with the current settings.</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">831</context>
|
||||
<context context-type="linenumber">800</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="6555329262222566158" datatype="html">
|
||||
<source>Rotate confirm</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">868</context>
|
||||
<context context-type="linenumber">832</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="5203024009814367559" datatype="html">
|
||||
<source>This operation will add rotated versions of the <x id="PH" equiv-text="this.list.selected.size"/> document(s).</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">869</context>
|
||||
<context context-type="linenumber">833</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="7910756456450124185" datatype="html">
|
||||
<source>Merge confirm</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">892</context>
|
||||
<context context-type="linenumber">852</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="7643543647233874431" datatype="html">
|
||||
<source>This operation will merge <x id="PH" equiv-text="this.list.selected.size"/> selected documents into a new document.</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">893</context>
|
||||
<context context-type="linenumber">853</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="7869008840945899895" datatype="html">
|
||||
<source>Merged document will be queued for consumption.</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">916</context>
|
||||
<context context-type="linenumber">872</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="476913782630693351" datatype="html">
|
||||
<source>Custom fields updated.</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">940</context>
|
||||
<context context-type="linenumber">896</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="3873496751167944011" datatype="html">
|
||||
<source>Error updating custom fields.</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">949</context>
|
||||
<context context-type="linenumber">905</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="6144801143088984138" datatype="html">
|
||||
<source>Share link bundle creation requested.</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">989</context>
|
||||
<context context-type="linenumber">945</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="46019676931295023" datatype="html">
|
||||
<source>Share link bundle creation is not available yet.</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">996</context>
|
||||
<context context-type="linenumber">952</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="6307402210351946694" datatype="html">
|
||||
|
||||
@@ -20,9 +20,9 @@ import { Subject, filter, takeUntil } from 'rxjs'
|
||||
import { NEGATIVE_NULL_FILTER_VALUE } from 'src/app/data/filter-rule-type'
|
||||
import { MatchingModel } from 'src/app/data/matching-model'
|
||||
import { ObjectWithPermissions } from 'src/app/data/object-with-permissions'
|
||||
import { SelectionDataItem } from 'src/app/data/results'
|
||||
import { FilterPipe } from 'src/app/pipes/filter.pipe'
|
||||
import { HotKeyService } from 'src/app/services/hot-key.service'
|
||||
import { SelectionDataItem } from 'src/app/services/rest/document.service'
|
||||
import { pngxPopperOptions } from 'src/app/utils/popper-options'
|
||||
import { LoadingComponentWithPermissions } from '../../loading-component/loading.component'
|
||||
import { ClearableBadgeComponent } from '../clearable-badge/clearable-badge.component'
|
||||
|
||||
@@ -950,8 +950,8 @@ describe('DocumentDetailComponent', () => {
|
||||
|
||||
it('should support reprocess, confirm and close modal after started', () => {
|
||||
initNormally()
|
||||
const reprocessSpy = jest.spyOn(documentService, 'reprocessDocuments')
|
||||
reprocessSpy.mockReturnValue(of(true))
|
||||
const bulkEditSpy = jest.spyOn(documentService, 'bulkEdit')
|
||||
bulkEditSpy.mockReturnValue(of(true))
|
||||
let openModal: NgbModalRef
|
||||
modalService.activeInstances.subscribe((modal) => (openModal = modal[0]))
|
||||
const modalSpy = jest.spyOn(modalService, 'open')
|
||||
@@ -959,7 +959,7 @@ describe('DocumentDetailComponent', () => {
|
||||
component.reprocess()
|
||||
const modalCloseSpy = jest.spyOn(openModal, 'close')
|
||||
openModal.componentInstance.confirmClicked.next()
|
||||
expect(reprocessSpy).toHaveBeenCalledWith([doc.id])
|
||||
expect(bulkEditSpy).toHaveBeenCalledWith([doc.id], 'reprocess', {})
|
||||
expect(modalSpy).toHaveBeenCalled()
|
||||
expect(toastSpy).toHaveBeenCalled()
|
||||
expect(modalCloseSpy).toHaveBeenCalled()
|
||||
@@ -967,13 +967,13 @@ describe('DocumentDetailComponent', () => {
|
||||
|
||||
it('should show error if redo ocr call fails', () => {
|
||||
initNormally()
|
||||
const reprocessSpy = jest.spyOn(documentService, 'reprocessDocuments')
|
||||
const bulkEditSpy = jest.spyOn(documentService, 'bulkEdit')
|
||||
let openModal: NgbModalRef
|
||||
modalService.activeInstances.subscribe((modal) => (openModal = modal[0]))
|
||||
const toastSpy = jest.spyOn(toastService, 'showError')
|
||||
component.reprocess()
|
||||
const modalCloseSpy = jest.spyOn(openModal, 'close')
|
||||
reprocessSpy.mockReturnValue(throwError(() => new Error('error occurred')))
|
||||
bulkEditSpy.mockReturnValue(throwError(() => new Error('error occurred')))
|
||||
openModal.componentInstance.confirmClicked.next()
|
||||
expect(toastSpy).toHaveBeenCalled()
|
||||
expect(modalCloseSpy).not.toHaveBeenCalled()
|
||||
@@ -1669,15 +1669,18 @@ describe('DocumentDetailComponent', () => {
|
||||
modal.componentInstance.pages = [{ page: 1, rotate: 0, splitAfter: false }]
|
||||
modal.componentInstance.confirm()
|
||||
let req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/edit_pdf/`
|
||||
`${environment.apiBaseUrl}documents/bulk_edit/`
|
||||
)
|
||||
expect(req.request.body).toEqual({
|
||||
documents: [10],
|
||||
operations: [{ page: 1, rotate: 0, doc: 0 }],
|
||||
delete_original: false,
|
||||
update_document: false,
|
||||
include_metadata: true,
|
||||
source_mode: 'explicit_selection',
|
||||
method: 'edit_pdf',
|
||||
parameters: {
|
||||
operations: [{ page: 1, rotate: 0, doc: 0 }],
|
||||
delete_original: false,
|
||||
update_document: false,
|
||||
include_metadata: true,
|
||||
source_mode: 'explicit_selection',
|
||||
},
|
||||
})
|
||||
req.error(new ErrorEvent('failed'))
|
||||
expect(errorSpy).toHaveBeenCalled()
|
||||
@@ -1688,7 +1691,7 @@ describe('DocumentDetailComponent', () => {
|
||||
modal.componentInstance.deleteOriginal = true
|
||||
modal.componentInstance.confirm()
|
||||
req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/edit_pdf/`
|
||||
`${environment.apiBaseUrl}documents/bulk_edit/`
|
||||
)
|
||||
req.flush(true)
|
||||
expect(closeSpy).toHaveBeenCalled()
|
||||
@@ -1708,15 +1711,18 @@ describe('DocumentDetailComponent', () => {
|
||||
dialog.deleteOriginal = true
|
||||
dialog.confirm()
|
||||
const req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/remove_password/`
|
||||
`${environment.apiBaseUrl}documents/bulk_edit/`
|
||||
)
|
||||
expect(req.request.body).toEqual({
|
||||
documents: [10],
|
||||
password: 'secret',
|
||||
update_document: false,
|
||||
include_metadata: false,
|
||||
delete_original: true,
|
||||
source_mode: 'explicit_selection',
|
||||
method: 'remove_password',
|
||||
parameters: {
|
||||
password: 'secret',
|
||||
update_document: false,
|
||||
include_metadata: false,
|
||||
delete_original: true,
|
||||
source_mode: 'explicit_selection',
|
||||
},
|
||||
})
|
||||
req.flush(true)
|
||||
})
|
||||
@@ -1731,7 +1737,7 @@ describe('DocumentDetailComponent', () => {
|
||||
|
||||
expect(errorSpy).toHaveBeenCalled()
|
||||
httpTestingController.expectNone(
|
||||
`${environment.apiBaseUrl}documents/remove_password/`
|
||||
`${environment.apiBaseUrl}documents/bulk_edit/`
|
||||
)
|
||||
})
|
||||
|
||||
@@ -1747,7 +1753,7 @@ describe('DocumentDetailComponent', () => {
|
||||
modal.componentInstance as PasswordRemovalConfirmDialogComponent
|
||||
dialog.confirm()
|
||||
const req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/remove_password/`
|
||||
`${environment.apiBaseUrl}documents/bulk_edit/`
|
||||
)
|
||||
req.error(new ErrorEvent('failed'))
|
||||
|
||||
@@ -1768,7 +1774,7 @@ describe('DocumentDetailComponent', () => {
|
||||
modal.componentInstance as PasswordRemovalConfirmDialogComponent
|
||||
dialog.confirm()
|
||||
const req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/remove_password/`
|
||||
`${environment.apiBaseUrl}documents/bulk_edit/`
|
||||
)
|
||||
req.flush(true)
|
||||
|
||||
|
||||
@@ -1379,25 +1379,27 @@ export class DocumentDetailComponent
|
||||
modal.componentInstance.btnCaption = $localize`Proceed`
|
||||
modal.componentInstance.confirmClicked.subscribe(() => {
|
||||
modal.componentInstance.buttonsEnabled = false
|
||||
this.documentsService.reprocessDocuments([this.document.id]).subscribe({
|
||||
next: () => {
|
||||
this.toastService.showInfo(
|
||||
$localize`Reprocess operation for "${this.document.title}" will begin in the background.`
|
||||
)
|
||||
if (modal) {
|
||||
modal.close()
|
||||
}
|
||||
},
|
||||
error: (error) => {
|
||||
if (modal) {
|
||||
modal.componentInstance.buttonsEnabled = true
|
||||
}
|
||||
this.toastService.showError(
|
||||
$localize`Error executing operation`,
|
||||
error
|
||||
)
|
||||
},
|
||||
})
|
||||
this.documentsService
|
||||
.bulkEdit([this.document.id], 'reprocess', {})
|
||||
.subscribe({
|
||||
next: () => {
|
||||
this.toastService.showInfo(
|
||||
$localize`Reprocess operation for "${this.document.title}" will begin in the background.`
|
||||
)
|
||||
if (modal) {
|
||||
modal.close()
|
||||
}
|
||||
},
|
||||
error: (error) => {
|
||||
if (modal) {
|
||||
modal.componentInstance.buttonsEnabled = true
|
||||
}
|
||||
this.toastService.showError(
|
||||
$localize`Error executing operation`,
|
||||
error
|
||||
)
|
||||
},
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
@@ -1764,7 +1766,7 @@ export class DocumentDetailComponent
|
||||
.subscribe(() => {
|
||||
modal.componentInstance.buttonsEnabled = false
|
||||
this.documentsService
|
||||
.editPdfDocuments([sourceDocumentId], {
|
||||
.bulkEdit([sourceDocumentId], 'edit_pdf', {
|
||||
operations: modal.componentInstance.getOperations(),
|
||||
delete_original: modal.componentInstance.deleteOriginal,
|
||||
update_document:
|
||||
@@ -1822,7 +1824,7 @@ export class DocumentDetailComponent
|
||||
dialog.buttonsEnabled = false
|
||||
this.networkActive = true
|
||||
this.documentsService
|
||||
.removePasswordDocuments([sourceDocumentId], {
|
||||
.bulkEdit([sourceDocumentId], 'remove_password', {
|
||||
password: this.password,
|
||||
update_document: dialog.updateDocument,
|
||||
include_metadata: dialog.includeMetadata,
|
||||
|
||||
@@ -298,7 +298,7 @@ describe('BulkEditorComponent', () => {
|
||||
parameters: { add_tags: [101], remove_tags: [] },
|
||||
})
|
||||
httpTestingController.match(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||
) // list reload
|
||||
httpTestingController.match(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
||||
@@ -330,7 +330,7 @@ describe('BulkEditorComponent', () => {
|
||||
.expectOne(`${environment.apiBaseUrl}documents/bulk_edit/`)
|
||||
.flush(true)
|
||||
httpTestingController.match(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||
) // list reload
|
||||
httpTestingController.match(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
||||
@@ -421,7 +421,7 @@ describe('BulkEditorComponent', () => {
|
||||
parameters: { correspondent: 101 },
|
||||
})
|
||||
httpTestingController.match(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||
) // list reload
|
||||
httpTestingController.match(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
||||
@@ -453,7 +453,7 @@ describe('BulkEditorComponent', () => {
|
||||
.expectOne(`${environment.apiBaseUrl}documents/bulk_edit/`)
|
||||
.flush(true)
|
||||
httpTestingController.match(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||
) // list reload
|
||||
httpTestingController.match(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
||||
@@ -519,7 +519,7 @@ describe('BulkEditorComponent', () => {
|
||||
parameters: { document_type: 101 },
|
||||
})
|
||||
httpTestingController.match(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||
) // list reload
|
||||
httpTestingController.match(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
||||
@@ -551,7 +551,7 @@ describe('BulkEditorComponent', () => {
|
||||
.expectOne(`${environment.apiBaseUrl}documents/bulk_edit/`)
|
||||
.flush(true)
|
||||
httpTestingController.match(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||
) // list reload
|
||||
httpTestingController.match(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
||||
@@ -617,7 +617,7 @@ describe('BulkEditorComponent', () => {
|
||||
parameters: { storage_path: 101 },
|
||||
})
|
||||
httpTestingController.match(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||
) // list reload
|
||||
httpTestingController.match(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
||||
@@ -649,7 +649,7 @@ describe('BulkEditorComponent', () => {
|
||||
.expectOne(`${environment.apiBaseUrl}documents/bulk_edit/`)
|
||||
.flush(true)
|
||||
httpTestingController.match(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||
) // list reload
|
||||
httpTestingController.match(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
||||
@@ -715,7 +715,7 @@ describe('BulkEditorComponent', () => {
|
||||
parameters: { add_custom_fields: [101], remove_custom_fields: [102] },
|
||||
})
|
||||
httpTestingController.match(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||
) // list reload
|
||||
httpTestingController.match(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
||||
@@ -747,7 +747,7 @@ describe('BulkEditorComponent', () => {
|
||||
.expectOne(`${environment.apiBaseUrl}documents/bulk_edit/`)
|
||||
.flush(true)
|
||||
httpTestingController.match(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||
) // list reload
|
||||
httpTestingController.match(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
||||
@@ -849,14 +849,16 @@ describe('BulkEditorComponent', () => {
|
||||
expect(modal).not.toBeUndefined()
|
||||
modal.componentInstance.confirm()
|
||||
let req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/delete/`
|
||||
`${environment.apiBaseUrl}documents/bulk_edit/`
|
||||
)
|
||||
req.flush(true)
|
||||
expect(req.request.body).toEqual({
|
||||
documents: [3, 4],
|
||||
method: 'delete',
|
||||
parameters: {},
|
||||
})
|
||||
httpTestingController.match(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||
) // list reload
|
||||
httpTestingController.match(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
||||
@@ -866,7 +868,7 @@ describe('BulkEditorComponent', () => {
|
||||
fixture.detectChanges()
|
||||
component.applyDelete()
|
||||
req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/delete/`
|
||||
`${environment.apiBaseUrl}documents/bulk_edit/`
|
||||
)
|
||||
})
|
||||
|
||||
@@ -942,14 +944,16 @@ describe('BulkEditorComponent', () => {
|
||||
expect(modal).not.toBeUndefined()
|
||||
modal.componentInstance.confirm()
|
||||
let req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/reprocess/`
|
||||
`${environment.apiBaseUrl}documents/bulk_edit/`
|
||||
)
|
||||
req.flush(true)
|
||||
expect(req.request.body).toEqual({
|
||||
documents: [3, 4],
|
||||
method: 'reprocess',
|
||||
parameters: {},
|
||||
})
|
||||
httpTestingController.match(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||
) // list reload
|
||||
httpTestingController.match(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
||||
@@ -975,16 +979,16 @@ describe('BulkEditorComponent', () => {
|
||||
modal.componentInstance.rotate()
|
||||
modal.componentInstance.confirm()
|
||||
let req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/rotate/`
|
||||
`${environment.apiBaseUrl}documents/bulk_edit/`
|
||||
)
|
||||
req.flush(true)
|
||||
expect(req.request.body).toEqual({
|
||||
documents: [3, 4],
|
||||
degrees: 90,
|
||||
source_mode: 'latest_version',
|
||||
method: 'rotate',
|
||||
parameters: { degrees: 90 },
|
||||
})
|
||||
httpTestingController.match(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||
) // list reload
|
||||
httpTestingController.match(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
||||
@@ -1017,15 +1021,16 @@ describe('BulkEditorComponent', () => {
|
||||
modal.componentInstance.metadataDocumentID = 3
|
||||
modal.componentInstance.confirm()
|
||||
let req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/merge/`
|
||||
`${environment.apiBaseUrl}documents/bulk_edit/`
|
||||
)
|
||||
req.flush(true)
|
||||
expect(req.request.body).toEqual({
|
||||
documents: [3, 4],
|
||||
metadata_document_id: 3,
|
||||
method: 'merge',
|
||||
parameters: { metadata_document_id: 3 },
|
||||
})
|
||||
httpTestingController.match(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||
) // list reload
|
||||
httpTestingController.match(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
||||
@@ -1035,16 +1040,16 @@ describe('BulkEditorComponent', () => {
|
||||
modal.componentInstance.deleteOriginals = true
|
||||
modal.componentInstance.confirm()
|
||||
req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/merge/`
|
||||
`${environment.apiBaseUrl}documents/bulk_edit/`
|
||||
)
|
||||
req.flush(true)
|
||||
expect(req.request.body).toEqual({
|
||||
documents: [3, 4],
|
||||
metadata_document_id: 3,
|
||||
delete_originals: true,
|
||||
method: 'merge',
|
||||
parameters: { metadata_document_id: 3, delete_originals: true },
|
||||
})
|
||||
httpTestingController.match(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||
) // list reload
|
||||
httpTestingController.match(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
||||
@@ -1056,16 +1061,16 @@ describe('BulkEditorComponent', () => {
|
||||
modal.componentInstance.archiveFallback = true
|
||||
modal.componentInstance.confirm()
|
||||
req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/merge/`
|
||||
`${environment.apiBaseUrl}documents/bulk_edit/`
|
||||
)
|
||||
req.flush(true)
|
||||
expect(req.request.body).toEqual({
|
||||
documents: [3, 4],
|
||||
metadata_document_id: 3,
|
||||
archive_fallback: true,
|
||||
method: 'merge',
|
||||
parameters: { metadata_document_id: 3, archive_fallback: true },
|
||||
})
|
||||
httpTestingController.match(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||
) // list reload
|
||||
httpTestingController.match(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
||||
@@ -1151,7 +1156,7 @@ describe('BulkEditorComponent', () => {
|
||||
},
|
||||
})
|
||||
httpTestingController.match(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||
) // list reload
|
||||
httpTestingController.match(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
||||
@@ -1458,7 +1463,7 @@ describe('BulkEditorComponent', () => {
|
||||
expect(toastServiceShowInfoSpy).toHaveBeenCalled()
|
||||
expect(listReloadSpy).toHaveBeenCalled()
|
||||
httpTestingController.match(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||
) // list reload
|
||||
httpTestingController.match(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
||||
|
||||
@@ -12,11 +12,10 @@ import {
|
||||
} from '@ng-bootstrap/ng-bootstrap'
|
||||
import { saveAs } from 'file-saver'
|
||||
import { NgxBootstrapIconsModule } from 'ngx-bootstrap-icons'
|
||||
import { first, map, Observable, Subject, switchMap, takeUntil } from 'rxjs'
|
||||
import { first, map, Subject, switchMap, takeUntil } from 'rxjs'
|
||||
import { ConfirmDialogComponent } from 'src/app/components/common/confirm-dialog/confirm-dialog.component'
|
||||
import { CustomField } from 'src/app/data/custom-field'
|
||||
import { MatchingModel } from 'src/app/data/matching-model'
|
||||
import { SelectionDataItem } from 'src/app/data/results'
|
||||
import { SETTINGS_KEYS } from 'src/app/data/ui-settings'
|
||||
import { IfPermissionsDirective } from 'src/app/directives/if-permissions.directive'
|
||||
import { DocumentListViewService } from 'src/app/services/document-list-view.service'
|
||||
@@ -30,9 +29,8 @@ import { CorrespondentService } from 'src/app/services/rest/correspondent.servic
|
||||
import { CustomFieldsService } from 'src/app/services/rest/custom-fields.service'
|
||||
import { DocumentTypeService } from 'src/app/services/rest/document-type.service'
|
||||
import {
|
||||
DocumentBulkEditMethod,
|
||||
DocumentService,
|
||||
MergeDocumentsRequest,
|
||||
SelectionDataItem,
|
||||
} from 'src/app/services/rest/document.service'
|
||||
import { SavedViewService } from 'src/app/services/rest/saved-view.service'
|
||||
import { ShareLinkBundleService } from 'src/app/services/rest/share-link-bundle.service'
|
||||
@@ -257,9 +255,9 @@ export class BulkEditorComponent
|
||||
this.unsubscribeNotifier.complete()
|
||||
}
|
||||
|
||||
private executeBulkEditMethod(
|
||||
private executeBulkOperation(
|
||||
modal: NgbModalRef,
|
||||
method: DocumentBulkEditMethod,
|
||||
method: string,
|
||||
args: any,
|
||||
overrideDocumentIDs?: number[]
|
||||
) {
|
||||
@@ -274,55 +272,32 @@ export class BulkEditorComponent
|
||||
)
|
||||
.pipe(first())
|
||||
.subscribe({
|
||||
next: () => this.handleOperationSuccess(modal),
|
||||
error: (error) => this.handleOperationError(modal, error),
|
||||
next: () => {
|
||||
if (args['delete_originals']) {
|
||||
this.list.selected.clear()
|
||||
}
|
||||
this.list.reload()
|
||||
this.list.reduceSelectionToFilter()
|
||||
this.list.selected.forEach((id) => {
|
||||
this.openDocumentService.refreshDocument(id)
|
||||
})
|
||||
this.savedViewService.maybeRefreshDocumentCounts()
|
||||
if (modal) {
|
||||
modal.close()
|
||||
}
|
||||
},
|
||||
error: (error) => {
|
||||
if (modal) {
|
||||
modal.componentInstance.buttonsEnabled = true
|
||||
}
|
||||
this.toastService.showError(
|
||||
$localize`Error executing bulk operation`,
|
||||
error
|
||||
)
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
private executeDocumentAction(
|
||||
modal: NgbModalRef,
|
||||
request: Observable<any>,
|
||||
options: { deleteOriginals?: boolean } = {}
|
||||
) {
|
||||
if (modal) {
|
||||
modal.componentInstance.buttonsEnabled = false
|
||||
}
|
||||
request.pipe(first()).subscribe({
|
||||
next: () => {
|
||||
this.handleOperationSuccess(modal, options.deleteOriginals ?? false)
|
||||
},
|
||||
error: (error) => this.handleOperationError(modal, error),
|
||||
})
|
||||
}
|
||||
|
||||
private handleOperationSuccess(
|
||||
modal: NgbModalRef,
|
||||
clearSelection: boolean = false
|
||||
) {
|
||||
if (clearSelection) {
|
||||
this.list.selected.clear()
|
||||
}
|
||||
this.list.reload()
|
||||
this.list.reduceSelectionToFilter()
|
||||
this.list.selected.forEach((id) => {
|
||||
this.openDocumentService.refreshDocument(id)
|
||||
})
|
||||
this.savedViewService.maybeRefreshDocumentCounts()
|
||||
if (modal) {
|
||||
modal.close()
|
||||
}
|
||||
}
|
||||
|
||||
private handleOperationError(modal: NgbModalRef, error: any) {
|
||||
if (modal) {
|
||||
modal.componentInstance.buttonsEnabled = true
|
||||
}
|
||||
this.toastService.showError(
|
||||
$localize`Error executing bulk operation`,
|
||||
error
|
||||
)
|
||||
}
|
||||
|
||||
private applySelectionData(
|
||||
items: SelectionDataItem[],
|
||||
selectionModel: FilterableDropdownSelectionModel
|
||||
@@ -471,13 +446,13 @@ export class BulkEditorComponent
|
||||
modal.componentInstance.confirmClicked
|
||||
.pipe(takeUntil(this.unsubscribeNotifier))
|
||||
.subscribe(() => {
|
||||
this.executeBulkEditMethod(modal, 'modify_tags', {
|
||||
this.executeBulkOperation(modal, 'modify_tags', {
|
||||
add_tags: changedTags.itemsToAdd.map((t) => t.id),
|
||||
remove_tags: changedTags.itemsToRemove.map((t) => t.id),
|
||||
})
|
||||
})
|
||||
} else {
|
||||
this.executeBulkEditMethod(null, 'modify_tags', {
|
||||
this.executeBulkOperation(null, 'modify_tags', {
|
||||
add_tags: changedTags.itemsToAdd.map((t) => t.id),
|
||||
remove_tags: changedTags.itemsToRemove.map((t) => t.id),
|
||||
})
|
||||
@@ -511,12 +486,12 @@ export class BulkEditorComponent
|
||||
modal.componentInstance.confirmClicked
|
||||
.pipe(takeUntil(this.unsubscribeNotifier))
|
||||
.subscribe(() => {
|
||||
this.executeBulkEditMethod(modal, 'set_correspondent', {
|
||||
this.executeBulkOperation(modal, 'set_correspondent', {
|
||||
correspondent: correspondent ? correspondent.id : null,
|
||||
})
|
||||
})
|
||||
} else {
|
||||
this.executeBulkEditMethod(null, 'set_correspondent', {
|
||||
this.executeBulkOperation(null, 'set_correspondent', {
|
||||
correspondent: correspondent ? correspondent.id : null,
|
||||
})
|
||||
}
|
||||
@@ -549,12 +524,12 @@ export class BulkEditorComponent
|
||||
modal.componentInstance.confirmClicked
|
||||
.pipe(takeUntil(this.unsubscribeNotifier))
|
||||
.subscribe(() => {
|
||||
this.executeBulkEditMethod(modal, 'set_document_type', {
|
||||
this.executeBulkOperation(modal, 'set_document_type', {
|
||||
document_type: documentType ? documentType.id : null,
|
||||
})
|
||||
})
|
||||
} else {
|
||||
this.executeBulkEditMethod(null, 'set_document_type', {
|
||||
this.executeBulkOperation(null, 'set_document_type', {
|
||||
document_type: documentType ? documentType.id : null,
|
||||
})
|
||||
}
|
||||
@@ -587,12 +562,12 @@ export class BulkEditorComponent
|
||||
modal.componentInstance.confirmClicked
|
||||
.pipe(takeUntil(this.unsubscribeNotifier))
|
||||
.subscribe(() => {
|
||||
this.executeBulkEditMethod(modal, 'set_storage_path', {
|
||||
this.executeBulkOperation(modal, 'set_storage_path', {
|
||||
storage_path: storagePath ? storagePath.id : null,
|
||||
})
|
||||
})
|
||||
} else {
|
||||
this.executeBulkEditMethod(null, 'set_storage_path', {
|
||||
this.executeBulkOperation(null, 'set_storage_path', {
|
||||
storage_path: storagePath ? storagePath.id : null,
|
||||
})
|
||||
}
|
||||
@@ -649,7 +624,7 @@ export class BulkEditorComponent
|
||||
modal.componentInstance.confirmClicked
|
||||
.pipe(takeUntil(this.unsubscribeNotifier))
|
||||
.subscribe(() => {
|
||||
this.executeBulkEditMethod(modal, 'modify_custom_fields', {
|
||||
this.executeBulkOperation(modal, 'modify_custom_fields', {
|
||||
add_custom_fields: changedCustomFields.itemsToAdd.map((f) => f.id),
|
||||
remove_custom_fields: changedCustomFields.itemsToRemove.map(
|
||||
(f) => f.id
|
||||
@@ -657,7 +632,7 @@ export class BulkEditorComponent
|
||||
})
|
||||
})
|
||||
} else {
|
||||
this.executeBulkEditMethod(null, 'modify_custom_fields', {
|
||||
this.executeBulkOperation(null, 'modify_custom_fields', {
|
||||
add_custom_fields: changedCustomFields.itemsToAdd.map((f) => f.id),
|
||||
remove_custom_fields: changedCustomFields.itemsToRemove.map(
|
||||
(f) => f.id
|
||||
@@ -787,16 +762,10 @@ export class BulkEditorComponent
|
||||
.pipe(takeUntil(this.unsubscribeNotifier))
|
||||
.subscribe(() => {
|
||||
modal.componentInstance.buttonsEnabled = false
|
||||
this.executeDocumentAction(
|
||||
modal,
|
||||
this.documentService.deleteDocuments(Array.from(this.list.selected))
|
||||
)
|
||||
this.executeBulkOperation(modal, 'delete', {})
|
||||
})
|
||||
} else {
|
||||
this.executeDocumentAction(
|
||||
null,
|
||||
this.documentService.deleteDocuments(Array.from(this.list.selected))
|
||||
)
|
||||
this.executeBulkOperation(null, 'delete', {})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -835,12 +804,7 @@ export class BulkEditorComponent
|
||||
.pipe(takeUntil(this.unsubscribeNotifier))
|
||||
.subscribe(() => {
|
||||
modal.componentInstance.buttonsEnabled = false
|
||||
this.executeDocumentAction(
|
||||
modal,
|
||||
this.documentService.reprocessDocuments(
|
||||
Array.from(this.list.selected)
|
||||
)
|
||||
)
|
||||
this.executeBulkOperation(modal, 'reprocess', {})
|
||||
})
|
||||
}
|
||||
|
||||
@@ -851,7 +815,7 @@ export class BulkEditorComponent
|
||||
modal.componentInstance.confirmClicked.subscribe(
|
||||
({ permissions, merge }) => {
|
||||
modal.componentInstance.buttonsEnabled = false
|
||||
this.executeBulkEditMethod(modal, 'set_permissions', {
|
||||
this.executeBulkOperation(modal, 'set_permissions', {
|
||||
...permissions,
|
||||
merge,
|
||||
})
|
||||
@@ -874,13 +838,9 @@ export class BulkEditorComponent
|
||||
.pipe(takeUntil(this.unsubscribeNotifier))
|
||||
.subscribe(() => {
|
||||
rotateDialog.buttonsEnabled = false
|
||||
this.executeDocumentAction(
|
||||
modal,
|
||||
this.documentService.rotateDocuments(
|
||||
Array.from(this.list.selected),
|
||||
rotateDialog.degrees
|
||||
)
|
||||
)
|
||||
this.executeBulkOperation(modal, 'rotate', {
|
||||
degrees: rotateDialog.degrees,
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
@@ -896,22 +856,18 @@ export class BulkEditorComponent
|
||||
mergeDialog.confirmClicked
|
||||
.pipe(takeUntil(this.unsubscribeNotifier))
|
||||
.subscribe(() => {
|
||||
const args: MergeDocumentsRequest = {}
|
||||
const args = {}
|
||||
if (mergeDialog.metadataDocumentID > -1) {
|
||||
args.metadata_document_id = mergeDialog.metadataDocumentID
|
||||
args['metadata_document_id'] = mergeDialog.metadataDocumentID
|
||||
}
|
||||
if (mergeDialog.deleteOriginals) {
|
||||
args.delete_originals = true
|
||||
args['delete_originals'] = true
|
||||
}
|
||||
if (mergeDialog.archiveFallback) {
|
||||
args.archive_fallback = true
|
||||
args['archive_fallback'] = true
|
||||
}
|
||||
mergeDialog.buttonsEnabled = false
|
||||
this.executeDocumentAction(
|
||||
modal,
|
||||
this.documentService.mergeDocuments(mergeDialog.documentIDs, args),
|
||||
{ deleteOriginals: !!args.delete_originals }
|
||||
)
|
||||
this.executeBulkOperation(modal, 'merge', args, mergeDialog.documentIDs)
|
||||
this.toastService.showInfo(
|
||||
$localize`Merged document will be queued for consumption.`
|
||||
)
|
||||
|
||||
@@ -76,7 +76,6 @@ import {
|
||||
FILTER_TITLE_CONTENT,
|
||||
NEGATIVE_NULL_FILTER_VALUE,
|
||||
} from 'src/app/data/filter-rule-type'
|
||||
import { SelectionData, SelectionDataItem } from 'src/app/data/results'
|
||||
import {
|
||||
PermissionAction,
|
||||
PermissionType,
|
||||
@@ -85,7 +84,11 @@ import {
|
||||
import { CorrespondentService } from 'src/app/services/rest/correspondent.service'
|
||||
import { CustomFieldsService } from 'src/app/services/rest/custom-fields.service'
|
||||
import { DocumentTypeService } from 'src/app/services/rest/document-type.service'
|
||||
import { DocumentService } from 'src/app/services/rest/document.service'
|
||||
import {
|
||||
DocumentService,
|
||||
SelectionData,
|
||||
SelectionDataItem,
|
||||
} from 'src/app/services/rest/document.service'
|
||||
import { SearchService } from 'src/app/services/rest/search.service'
|
||||
import { StoragePathService } from 'src/app/services/rest/storage-path.service'
|
||||
import { TagService } from 'src/app/services/rest/tag.service'
|
||||
|
||||
@@ -1,5 +1,3 @@
|
||||
import { Document } from './document'
|
||||
|
||||
export interface Results<T> {
|
||||
count: number
|
||||
|
||||
@@ -7,20 +5,3 @@ export interface Results<T> {
|
||||
|
||||
all: number[]
|
||||
}
|
||||
|
||||
export interface SelectionDataItem {
|
||||
id: number
|
||||
document_count: number
|
||||
}
|
||||
|
||||
export interface SelectionData {
|
||||
selected_storage_paths: SelectionDataItem[]
|
||||
selected_correspondents: SelectionDataItem[]
|
||||
selected_tags: SelectionDataItem[]
|
||||
selected_document_types: SelectionDataItem[]
|
||||
selected_custom_fields: SelectionDataItem[]
|
||||
}
|
||||
|
||||
export interface DocumentResults extends Results<Document> {
|
||||
selection_data?: SelectionData
|
||||
}
|
||||
|
||||
@@ -126,10 +126,13 @@ describe('DocumentListViewService', () => {
|
||||
expect(documentListViewService.currentPage).toEqual(1)
|
||||
documentListViewService.reload()
|
||||
const req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||
)
|
||||
expect(req.request.method).toEqual('GET')
|
||||
req.flush(full_results)
|
||||
httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/selection_data/`
|
||||
)
|
||||
expect(req.request.method).toEqual('GET')
|
||||
expect(documentListViewService.isReloading).toBeFalsy()
|
||||
expect(documentListViewService.activeSavedViewId).toBeNull()
|
||||
@@ -141,12 +144,12 @@ describe('DocumentListViewService', () => {
|
||||
it('should handle error on page request out of range', () => {
|
||||
documentListViewService.currentPage = 50
|
||||
let req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/?page=50&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
||||
`${environment.apiBaseUrl}documents/?page=50&page_size=50&ordering=-created&truncate_content=true`
|
||||
)
|
||||
expect(req.request.method).toEqual('GET')
|
||||
req.flush([], { status: 404, statusText: 'Unexpected error' })
|
||||
req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||
)
|
||||
expect(req.request.method).toEqual('GET')
|
||||
expect(documentListViewService.currentPage).toEqual(1)
|
||||
@@ -163,7 +166,7 @@ describe('DocumentListViewService', () => {
|
||||
]
|
||||
documentListViewService.setFilterRules(filterRulesAny)
|
||||
let req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true&tags__id__in=${tags__id__in}`
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&tags__id__in=${tags__id__in}`
|
||||
)
|
||||
expect(req.request.method).toEqual('GET')
|
||||
req.flush(
|
||||
@@ -171,13 +174,13 @@ describe('DocumentListViewService', () => {
|
||||
{ status: 404, statusText: 'Unexpected error' }
|
||||
)
|
||||
req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||
)
|
||||
expect(req.request.method).toEqual('GET')
|
||||
// reset the list
|
||||
documentListViewService.setFilterRules([])
|
||||
req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||
)
|
||||
})
|
||||
|
||||
@@ -185,7 +188,7 @@ describe('DocumentListViewService', () => {
|
||||
documentListViewService.currentPage = 1
|
||||
documentListViewService.sortField = 'custom_field_999'
|
||||
let req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-custom_field_999&truncate_content=true&include_selection_data=true`
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-custom_field_999&truncate_content=true`
|
||||
)
|
||||
expect(req.request.method).toEqual('GET')
|
||||
req.flush(
|
||||
@@ -194,7 +197,7 @@ describe('DocumentListViewService', () => {
|
||||
)
|
||||
// resets itself
|
||||
req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||
)
|
||||
})
|
||||
|
||||
@@ -209,7 +212,7 @@ describe('DocumentListViewService', () => {
|
||||
]
|
||||
documentListViewService.setFilterRules(filterRulesAny)
|
||||
let req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true&tags__id__in=${tags__id__in}`
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&tags__id__in=${tags__id__in}`
|
||||
)
|
||||
expect(req.request.method).toEqual('GET')
|
||||
req.flush('Generic error', { status: 404, statusText: 'Unexpected error' })
|
||||
@@ -217,7 +220,7 @@ describe('DocumentListViewService', () => {
|
||||
// reset the list
|
||||
documentListViewService.setFilterRules([])
|
||||
req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||
)
|
||||
})
|
||||
|
||||
@@ -226,7 +229,7 @@ describe('DocumentListViewService', () => {
|
||||
expect(documentListViewService.sortReverse).toBeTruthy()
|
||||
documentListViewService.setSort('added', false)
|
||||
let req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=added&truncate_content=true&include_selection_data=true`
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=added&truncate_content=true`
|
||||
)
|
||||
expect(req.request.method).toEqual('GET')
|
||||
expect(documentListViewService.sortField).toEqual('added')
|
||||
@@ -234,12 +237,12 @@ describe('DocumentListViewService', () => {
|
||||
|
||||
documentListViewService.sortField = 'created'
|
||||
req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=created&truncate_content=true&include_selection_data=true`
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=created&truncate_content=true`
|
||||
)
|
||||
expect(documentListViewService.sortField).toEqual('created')
|
||||
documentListViewService.sortReverse = true
|
||||
req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||
)
|
||||
expect(req.request.method).toEqual('GET')
|
||||
expect(documentListViewService.sortReverse).toBeTruthy()
|
||||
@@ -259,7 +262,7 @@ describe('DocumentListViewService', () => {
|
||||
const req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/?page=${page}&page_size=${
|
||||
documentListViewService.pageSize
|
||||
}&ordering=${reverse ? '-' : ''}${sort}&truncate_content=true&include_selection_data=true`
|
||||
}&ordering=${reverse ? '-' : ''}${sort}&truncate_content=true`
|
||||
)
|
||||
expect(req.request.method).toEqual('GET')
|
||||
expect(documentListViewService.currentPage).toEqual(page)
|
||||
@@ -276,7 +279,7 @@ describe('DocumentListViewService', () => {
|
||||
}
|
||||
documentListViewService.loadFromQueryParams(convertToParamMap(params))
|
||||
let req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/?page=${documentListViewService.currentPage}&page_size=${documentListViewService.pageSize}&ordering=-added&truncate_content=true&include_selection_data=true&tags__id__all=${tags__id__all}`
|
||||
`${environment.apiBaseUrl}documents/?page=${documentListViewService.currentPage}&page_size=${documentListViewService.pageSize}&ordering=-added&truncate_content=true&tags__id__all=${tags__id__all}`
|
||||
)
|
||||
expect(req.request.method).toEqual('GET')
|
||||
expect(documentListViewService.filterRules).toEqual([
|
||||
@@ -286,12 +289,15 @@ describe('DocumentListViewService', () => {
|
||||
},
|
||||
])
|
||||
req.flush(full_results)
|
||||
httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/selection_data/`
|
||||
)
|
||||
})
|
||||
|
||||
it('should use filter rules to update query params', () => {
|
||||
documentListViewService.setFilterRules(filterRules)
|
||||
const req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/?page=${documentListViewService.currentPage}&page_size=${documentListViewService.pageSize}&ordering=-created&truncate_content=true&include_selection_data=true&tags__id__all=${tags__id__all}`
|
||||
`${environment.apiBaseUrl}documents/?page=${documentListViewService.currentPage}&page_size=${documentListViewService.pageSize}&ordering=-created&truncate_content=true&tags__id__all=${tags__id__all}`
|
||||
)
|
||||
expect(req.request.method).toEqual('GET')
|
||||
})
|
||||
@@ -300,26 +306,34 @@ describe('DocumentListViewService', () => {
|
||||
documentListViewService.currentPage = 2
|
||||
let req = httpTestingController.expectOne((request) =>
|
||||
request.urlWithParams.startsWith(
|
||||
`${environment.apiBaseUrl}documents/?page=2&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
||||
`${environment.apiBaseUrl}documents/?page=2&page_size=50&ordering=-created&truncate_content=true`
|
||||
)
|
||||
)
|
||||
expect(req.request.method).toEqual('GET')
|
||||
req.flush(full_results)
|
||||
req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/selection_data/`
|
||||
)
|
||||
req.flush([])
|
||||
|
||||
documentListViewService.setFilterRules(filterRules, true)
|
||||
|
||||
const filteredReqs = httpTestingController.match(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true&tags__id__all=${tags__id__all}`
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&tags__id__all=${tags__id__all}`
|
||||
)
|
||||
expect(filteredReqs).toHaveLength(1)
|
||||
filteredReqs[0].flush(full_results)
|
||||
req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/selection_data/`
|
||||
)
|
||||
req.flush([])
|
||||
expect(documentListViewService.currentPage).toEqual(1)
|
||||
})
|
||||
|
||||
it('should support quick filter', () => {
|
||||
documentListViewService.quickFilter(filterRules)
|
||||
const req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/?page=${documentListViewService.currentPage}&page_size=${documentListViewService.pageSize}&ordering=-created&truncate_content=true&include_selection_data=true&tags__id__all=${tags__id__all}`
|
||||
`${environment.apiBaseUrl}documents/?page=${documentListViewService.currentPage}&page_size=${documentListViewService.pageSize}&ordering=-created&truncate_content=true&tags__id__all=${tags__id__all}`
|
||||
)
|
||||
expect(req.request.method).toEqual('GET')
|
||||
})
|
||||
@@ -342,21 +356,21 @@ describe('DocumentListViewService', () => {
|
||||
convertToParamMap(params)
|
||||
)
|
||||
let req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/?page=${page}&page_size=${documentListViewService.pageSize}&ordering=-added&truncate_content=true&include_selection_data=true&tags__id__all=${tags__id__all}`
|
||||
`${environment.apiBaseUrl}documents/?page=${page}&page_size=${documentListViewService.pageSize}&ordering=-added&truncate_content=true&tags__id__all=${tags__id__all}`
|
||||
)
|
||||
expect(req.request.method).toEqual('GET')
|
||||
// reset the list
|
||||
documentListViewService.currentPage = 1
|
||||
req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-added&truncate_content=true&include_selection_data=true&tags__id__all=9`
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-added&truncate_content=true&tags__id__all=9`
|
||||
)
|
||||
documentListViewService.setFilterRules([])
|
||||
req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-added&truncate_content=true&include_selection_data=true`
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-added&truncate_content=true`
|
||||
)
|
||||
documentListViewService.sortField = 'created'
|
||||
req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||
)
|
||||
documentListViewService.activateSavedView(null)
|
||||
})
|
||||
@@ -364,18 +378,21 @@ describe('DocumentListViewService', () => {
|
||||
it('should support navigating next / previous', () => {
|
||||
documentListViewService.setFilterRules([])
|
||||
let req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||
)
|
||||
expect(documentListViewService.currentPage).toEqual(1)
|
||||
documentListViewService.pageSize = 3
|
||||
req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=3&ordering=-created&truncate_content=true&include_selection_data=true`
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=3&ordering=-created&truncate_content=true`
|
||||
)
|
||||
expect(req.request.method).toEqual('GET')
|
||||
req.flush({
|
||||
count: 3,
|
||||
results: documents.slice(0, 3),
|
||||
})
|
||||
httpTestingController
|
||||
.expectOne(`${environment.apiBaseUrl}documents/selection_data/`)
|
||||
.flush([])
|
||||
expect(documentListViewService.hasNext(documents[0].id)).toBeTruthy()
|
||||
expect(documentListViewService.hasPrevious(documents[0].id)).toBeFalsy()
|
||||
documentListViewService.getNext(documents[0].id).subscribe((docId) => {
|
||||
@@ -422,7 +439,7 @@ describe('DocumentListViewService', () => {
|
||||
expect(documentListViewService.currentPage).toEqual(1)
|
||||
documentListViewService.pageSize = 3
|
||||
httpTestingController.match(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=3&ordering=-created&truncate_content=true&include_selection_data=true`
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=3&ordering=-created&truncate_content=true`
|
||||
)
|
||||
jest
|
||||
.spyOn(documentListViewService, 'getLastPage')
|
||||
@@ -437,7 +454,7 @@ describe('DocumentListViewService', () => {
|
||||
expect(reloadSpy).toHaveBeenCalled()
|
||||
expect(documentListViewService.currentPage).toEqual(2)
|
||||
const reqs = httpTestingController.match(
|
||||
`${environment.apiBaseUrl}documents/?page=2&page_size=3&ordering=-created&truncate_content=true&include_selection_data=true`
|
||||
`${environment.apiBaseUrl}documents/?page=2&page_size=3&ordering=-created&truncate_content=true`
|
||||
)
|
||||
expect(reqs.length).toBeGreaterThan(0)
|
||||
})
|
||||
@@ -472,11 +489,11 @@ describe('DocumentListViewService', () => {
|
||||
.mockReturnValue(documents)
|
||||
documentListViewService.currentPage = 2
|
||||
httpTestingController.match(
|
||||
`${environment.apiBaseUrl}documents/?page=2&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
||||
`${environment.apiBaseUrl}documents/?page=2&page_size=50&ordering=-created&truncate_content=true`
|
||||
)
|
||||
documentListViewService.pageSize = 3
|
||||
httpTestingController.match(
|
||||
`${environment.apiBaseUrl}documents/?page=2&page_size=3&ordering=-created&truncate_content=true&include_selection_data=true`
|
||||
`${environment.apiBaseUrl}documents/?page=2&page_size=3&ordering=-created&truncate_content=true`
|
||||
)
|
||||
const reloadSpy = jest.spyOn(documentListViewService, 'reload')
|
||||
documentListViewService.getPrevious(1).subscribe({
|
||||
@@ -486,7 +503,7 @@ describe('DocumentListViewService', () => {
|
||||
expect(reloadSpy).toHaveBeenCalled()
|
||||
expect(documentListViewService.currentPage).toEqual(1)
|
||||
const reqs = httpTestingController.match(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=3&ordering=-created&truncate_content=true&include_selection_data=true`
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=3&ordering=-created&truncate_content=true`
|
||||
)
|
||||
expect(reqs.length).toBeGreaterThan(0)
|
||||
})
|
||||
@@ -499,10 +516,13 @@ describe('DocumentListViewService', () => {
|
||||
it('should support select a document', () => {
|
||||
documentListViewService.reload()
|
||||
const req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||
)
|
||||
expect(req.request.method).toEqual('GET')
|
||||
req.flush(full_results)
|
||||
httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/selection_data/`
|
||||
)
|
||||
documentListViewService.toggleSelected(documents[0])
|
||||
expect(documentListViewService.isSelected(documents[0])).toBeTruthy()
|
||||
documentListViewService.toggleSelected(documents[0])
|
||||
@@ -524,13 +544,16 @@ describe('DocumentListViewService', () => {
|
||||
it('should support select page', () => {
|
||||
documentListViewService.pageSize = 3
|
||||
const req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=3&ordering=-created&truncate_content=true&include_selection_data=true`
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=3&ordering=-created&truncate_content=true`
|
||||
)
|
||||
expect(req.request.method).toEqual('GET')
|
||||
req.flush({
|
||||
count: 3,
|
||||
results: documents.slice(0, 3),
|
||||
})
|
||||
httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/selection_data/`
|
||||
)
|
||||
documentListViewService.selectPage()
|
||||
expect(documentListViewService.selected.size).toEqual(3)
|
||||
expect(documentListViewService.isSelected(documents[5])).toBeFalsy()
|
||||
@@ -539,10 +562,13 @@ describe('DocumentListViewService', () => {
|
||||
it('should support select range', () => {
|
||||
documentListViewService.reload()
|
||||
const req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||
)
|
||||
expect(req.request.method).toEqual('GET')
|
||||
req.flush(full_results)
|
||||
httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/selection_data/`
|
||||
)
|
||||
documentListViewService.toggleSelected(documents[0])
|
||||
expect(documentListViewService.isSelected(documents[0])).toBeTruthy()
|
||||
documentListViewService.selectRangeTo(documents[2])
|
||||
@@ -562,7 +588,7 @@ describe('DocumentListViewService', () => {
|
||||
|
||||
documentListViewService.setFilterRules(filterRules)
|
||||
httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true&tags__id__all=9`
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&tags__id__all=9`
|
||||
)
|
||||
const reqs = httpTestingController.match(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id&tags__id__all=9`
|
||||
@@ -578,7 +604,7 @@ describe('DocumentListViewService', () => {
|
||||
const cancelSpy = jest.spyOn(documentListViewService, 'cancelPending')
|
||||
documentListViewService.reload()
|
||||
httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true&tags__id__all=9`
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&tags__id__all=9`
|
||||
)
|
||||
expect(cancelSpy).toHaveBeenCalled()
|
||||
})
|
||||
@@ -597,7 +623,7 @@ describe('DocumentListViewService', () => {
|
||||
documentListViewService.setFilterRules([])
|
||||
expect(documentListViewService.sortField).toEqual('created')
|
||||
httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||
)
|
||||
})
|
||||
|
||||
@@ -624,11 +650,11 @@ describe('DocumentListViewService', () => {
|
||||
expect(localStorageSpy).toHaveBeenCalled()
|
||||
// reload triggered
|
||||
httpTestingController.match(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||
)
|
||||
documentListViewService.displayFields = null
|
||||
httpTestingController.match(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||
)
|
||||
expect(documentListViewService.displayFields).toEqual(
|
||||
DEFAULT_DISPLAY_FIELDS.filter((f) => f.id !== DisplayField.ADDED).map(
|
||||
@@ -668,7 +694,7 @@ describe('DocumentListViewService', () => {
|
||||
it('should generate quick filter URL preserving default state', () => {
|
||||
documentListViewService.reload()
|
||||
httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||
)
|
||||
const urlTree = documentListViewService.getQuickFilterUrl(filterRules)
|
||||
expect(urlTree).toBeDefined()
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import { Injectable, inject } from '@angular/core'
|
||||
import { ParamMap, Router, UrlTree } from '@angular/router'
|
||||
import { Observable, Subject, takeUntil } from 'rxjs'
|
||||
import { Observable, Subject, first, takeUntil } from 'rxjs'
|
||||
import {
|
||||
DEFAULT_DISPLAY_FIELDS,
|
||||
DisplayField,
|
||||
@@ -8,7 +8,6 @@ import {
|
||||
Document,
|
||||
} from '../data/document'
|
||||
import { FilterRule } from '../data/filter-rule'
|
||||
import { DocumentResults, SelectionData } from '../data/results'
|
||||
import { SavedView } from '../data/saved-view'
|
||||
import { DOCUMENT_LIST_SERVICE } from '../data/storage-keys'
|
||||
import { SETTINGS_KEYS } from '../data/ui-settings'
|
||||
@@ -18,7 +17,7 @@ import {
|
||||
isFullTextFilterRule,
|
||||
} from '../utils/filter-rules'
|
||||
import { paramsFromViewState, paramsToViewState } from '../utils/query-params'
|
||||
import { DocumentService } from './rest/document.service'
|
||||
import { DocumentService, SelectionData } from './rest/document.service'
|
||||
import { SettingsService } from './settings.service'
|
||||
|
||||
const LIST_DEFAULT_DISPLAY_FIELDS: DisplayField[] = DEFAULT_DISPLAY_FIELDS.map(
|
||||
@@ -261,17 +260,27 @@ export class DocumentListViewService {
|
||||
activeListViewState.sortField,
|
||||
activeListViewState.sortReverse,
|
||||
activeListViewState.filterRules,
|
||||
{ truncate_content: true, include_selection_data: true }
|
||||
{ truncate_content: true }
|
||||
)
|
||||
.pipe(takeUntil(this.unsubscribeNotifier))
|
||||
.subscribe({
|
||||
next: (result) => {
|
||||
const resultWithSelectionData = result as DocumentResults
|
||||
this.initialized = true
|
||||
this.isReloading = false
|
||||
activeListViewState.collectionSize = result.count
|
||||
activeListViewState.documents = result.results
|
||||
this.selectionData = resultWithSelectionData.selection_data ?? null
|
||||
|
||||
this.documentService
|
||||
.getSelectionData(result.all)
|
||||
.pipe(first())
|
||||
.subscribe({
|
||||
next: (selectionData) => {
|
||||
this.selectionData = selectionData
|
||||
},
|
||||
error: () => {
|
||||
this.selectionData = null
|
||||
},
|
||||
})
|
||||
|
||||
if (updateQueryParams && !this._activeSavedViewId) {
|
||||
let base = ['/documents']
|
||||
|
||||
@@ -230,88 +230,6 @@ describe(`DocumentService`, () => {
|
||||
})
|
||||
})
|
||||
|
||||
it('should call appropriate api endpoint for delete documents', () => {
|
||||
const ids = [1, 2, 3]
|
||||
subscription = service.deleteDocuments(ids).subscribe()
|
||||
const req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}${endpoint}/delete/`
|
||||
)
|
||||
expect(req.request.method).toEqual('POST')
|
||||
expect(req.request.body).toEqual({
|
||||
documents: ids,
|
||||
})
|
||||
})
|
||||
|
||||
it('should call appropriate api endpoint for reprocess documents', () => {
|
||||
const ids = [1, 2, 3]
|
||||
subscription = service.reprocessDocuments(ids).subscribe()
|
||||
const req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}${endpoint}/reprocess/`
|
||||
)
|
||||
expect(req.request.method).toEqual('POST')
|
||||
expect(req.request.body).toEqual({
|
||||
documents: ids,
|
||||
})
|
||||
})
|
||||
|
||||
it('should call appropriate api endpoint for rotate documents', () => {
|
||||
const ids = [1, 2, 3]
|
||||
subscription = service.rotateDocuments(ids, 90).subscribe()
|
||||
const req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}${endpoint}/rotate/`
|
||||
)
|
||||
expect(req.request.method).toEqual('POST')
|
||||
expect(req.request.body).toEqual({
|
||||
documents: ids,
|
||||
degrees: 90,
|
||||
source_mode: 'latest_version',
|
||||
})
|
||||
})
|
||||
|
||||
it('should call appropriate api endpoint for merge documents', () => {
|
||||
const ids = [1, 2, 3]
|
||||
const args = { metadata_document_id: 1, delete_originals: true }
|
||||
subscription = service.mergeDocuments(ids, args).subscribe()
|
||||
const req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}${endpoint}/merge/`
|
||||
)
|
||||
expect(req.request.method).toEqual('POST')
|
||||
expect(req.request.body).toEqual({
|
||||
documents: ids,
|
||||
metadata_document_id: 1,
|
||||
delete_originals: true,
|
||||
})
|
||||
})
|
||||
|
||||
it('should call appropriate api endpoint for edit pdf', () => {
|
||||
const ids = [1]
|
||||
const args = { operations: [{ page: 1, rotate: 90, doc: 0 }] }
|
||||
subscription = service.editPdfDocuments(ids, args).subscribe()
|
||||
const req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}${endpoint}/edit_pdf/`
|
||||
)
|
||||
expect(req.request.method).toEqual('POST')
|
||||
expect(req.request.body).toEqual({
|
||||
documents: ids,
|
||||
operations: [{ page: 1, rotate: 90, doc: 0 }],
|
||||
})
|
||||
})
|
||||
|
||||
it('should call appropriate api endpoint for remove password', () => {
|
||||
const ids = [1]
|
||||
const args = { password: 'secret', update_document: true }
|
||||
subscription = service.removePasswordDocuments(ids, args).subscribe()
|
||||
const req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}${endpoint}/remove_password/`
|
||||
)
|
||||
expect(req.request.method).toEqual('POST')
|
||||
expect(req.request.body).toEqual({
|
||||
documents: ids,
|
||||
password: 'secret',
|
||||
update_document: true,
|
||||
})
|
||||
})
|
||||
|
||||
it('should return the correct preview URL for a single document', () => {
|
||||
let url = service.getPreviewUrl(documents[0].id)
|
||||
expect(url).toEqual(
|
||||
|
||||
@@ -12,7 +12,7 @@ import {
|
||||
import { DocumentMetadata } from 'src/app/data/document-metadata'
|
||||
import { DocumentSuggestions } from 'src/app/data/document-suggestions'
|
||||
import { FilterRule } from 'src/app/data/filter-rule'
|
||||
import { Results, SelectionData } from 'src/app/data/results'
|
||||
import { Results } from 'src/app/data/results'
|
||||
import { SETTINGS_KEYS } from 'src/app/data/ui-settings'
|
||||
import { queryParamsFromFilterRules } from '../../utils/query-params'
|
||||
import {
|
||||
@@ -24,50 +24,24 @@ import { SettingsService } from '../settings.service'
|
||||
import { AbstractPaperlessService } from './abstract-paperless-service'
|
||||
import { CustomFieldsService } from './custom-fields.service'
|
||||
|
||||
export interface SelectionDataItem {
|
||||
id: number
|
||||
document_count: number
|
||||
}
|
||||
|
||||
export interface SelectionData {
|
||||
selected_storage_paths: SelectionDataItem[]
|
||||
selected_correspondents: SelectionDataItem[]
|
||||
selected_tags: SelectionDataItem[]
|
||||
selected_document_types: SelectionDataItem[]
|
||||
selected_custom_fields: SelectionDataItem[]
|
||||
}
|
||||
|
||||
export enum BulkEditSourceMode {
|
||||
LATEST_VERSION = 'latest_version',
|
||||
EXPLICIT_SELECTION = 'explicit_selection',
|
||||
}
|
||||
|
||||
export type DocumentBulkEditMethod =
|
||||
| 'set_correspondent'
|
||||
| 'set_document_type'
|
||||
| 'set_storage_path'
|
||||
| 'add_tag'
|
||||
| 'remove_tag'
|
||||
| 'modify_tags'
|
||||
| 'modify_custom_fields'
|
||||
| 'set_permissions'
|
||||
|
||||
export interface MergeDocumentsRequest {
|
||||
metadata_document_id?: number
|
||||
delete_originals?: boolean
|
||||
archive_fallback?: boolean
|
||||
source_mode?: BulkEditSourceMode
|
||||
}
|
||||
|
||||
export interface EditPdfOperation {
|
||||
page: number
|
||||
rotate?: number
|
||||
doc?: number
|
||||
}
|
||||
|
||||
export interface EditPdfDocumentsRequest {
|
||||
operations: EditPdfOperation[]
|
||||
delete_original?: boolean
|
||||
update_document?: boolean
|
||||
include_metadata?: boolean
|
||||
source_mode?: BulkEditSourceMode
|
||||
}
|
||||
|
||||
export interface RemovePasswordDocumentsRequest {
|
||||
password: string
|
||||
update_document?: boolean
|
||||
delete_original?: boolean
|
||||
include_metadata?: boolean
|
||||
source_mode?: BulkEditSourceMode
|
||||
}
|
||||
|
||||
@Injectable({
|
||||
providedIn: 'root',
|
||||
})
|
||||
@@ -325,7 +299,7 @@ export class DocumentService extends AbstractPaperlessService<Document> {
|
||||
return this.http.get<DocumentMetadata>(url.toString())
|
||||
}
|
||||
|
||||
bulkEdit(ids: number[], method: DocumentBulkEditMethod, args: any) {
|
||||
bulkEdit(ids: number[], method: string, args: any) {
|
||||
return this.http.post(this.getResourceUrl(null, 'bulk_edit'), {
|
||||
documents: ids,
|
||||
method: method,
|
||||
@@ -333,54 +307,6 @@ export class DocumentService extends AbstractPaperlessService<Document> {
|
||||
})
|
||||
}
|
||||
|
||||
deleteDocuments(ids: number[]) {
|
||||
return this.http.post(this.getResourceUrl(null, 'delete'), {
|
||||
documents: ids,
|
||||
})
|
||||
}
|
||||
|
||||
reprocessDocuments(ids: number[]) {
|
||||
return this.http.post(this.getResourceUrl(null, 'reprocess'), {
|
||||
documents: ids,
|
||||
})
|
||||
}
|
||||
|
||||
rotateDocuments(
|
||||
ids: number[],
|
||||
degrees: number,
|
||||
sourceMode: BulkEditSourceMode = BulkEditSourceMode.LATEST_VERSION
|
||||
) {
|
||||
return this.http.post(this.getResourceUrl(null, 'rotate'), {
|
||||
documents: ids,
|
||||
degrees,
|
||||
source_mode: sourceMode,
|
||||
})
|
||||
}
|
||||
|
||||
mergeDocuments(ids: number[], request: MergeDocumentsRequest = {}) {
|
||||
return this.http.post(this.getResourceUrl(null, 'merge'), {
|
||||
documents: ids,
|
||||
...request,
|
||||
})
|
||||
}
|
||||
|
||||
editPdfDocuments(ids: number[], request: EditPdfDocumentsRequest) {
|
||||
return this.http.post(this.getResourceUrl(null, 'edit_pdf'), {
|
||||
documents: ids,
|
||||
...request,
|
||||
})
|
||||
}
|
||||
|
||||
removePasswordDocuments(
|
||||
ids: number[],
|
||||
request: RemovePasswordDocumentsRequest
|
||||
) {
|
||||
return this.http.post(this.getResourceUrl(null, 'remove_password'), {
|
||||
documents: ids,
|
||||
...request,
|
||||
})
|
||||
}
|
||||
|
||||
getSelectionData(ids: number[]): Observable<SelectionData> {
|
||||
return this.http.post<SelectionData>(
|
||||
this.getResourceUrl(null, 'selection_data'),
|
||||
|
||||
@@ -51,11 +51,29 @@ from documents.templating.workflows import parse_w_workflow_placeholders
|
||||
from documents.utils import copy_basic_file_stats
|
||||
from documents.utils import copy_file_with_basic_stats
|
||||
from documents.utils import run_subprocess
|
||||
from paperless.parsers.text import TextDocumentParser
|
||||
from paperless.parsers.tika import TikaDocumentParser
|
||||
from paperless_mail.parsers import MailDocumentParser
|
||||
|
||||
LOGGING_NAME: Final[str] = "paperless.consumer"
|
||||
|
||||
|
||||
def _parser_cleanup(parser: DocumentParser) -> None:
|
||||
"""
|
||||
Call cleanup on a parser, handling the new-style context-manager parsers.
|
||||
|
||||
New-style parsers (e.g. TextDocumentParser) use __exit__ for teardown
|
||||
instead of a cleanup() method. This shim will be removed once all existing parsers
|
||||
have switched to the new style and this consumer is updated to use it
|
||||
|
||||
TODO(stumpylog): Remove me in the future
|
||||
"""
|
||||
if isinstance(parser, (TextDocumentParser, TikaDocumentParser)):
|
||||
parser.__exit__(None, None, None)
|
||||
else:
|
||||
parser.cleanup()
|
||||
|
||||
|
||||
class WorkflowTriggerPlugin(
|
||||
NoCleanupPluginMixin,
|
||||
NoSetupPluginMixin,
|
||||
@@ -431,6 +449,12 @@ class ConsumerPlugin(
|
||||
progress_callback=progress_callback,
|
||||
)
|
||||
|
||||
# New-style parsers use __enter__/__exit__ for resource management.
|
||||
# _parser_cleanup (below) handles __exit__; call __enter__ here.
|
||||
# TODO(stumpylog): Remove me in the future
|
||||
if isinstance(document_parser, (TextDocumentParser, TikaDocumentParser)):
|
||||
document_parser.__enter__()
|
||||
|
||||
self.log.debug(f"Parser: {type(document_parser).__name__}")
|
||||
|
||||
# Parse the document. This may take some time.
|
||||
@@ -459,6 +483,9 @@ class ConsumerPlugin(
|
||||
self.filename,
|
||||
self.input_doc.mailrule_id,
|
||||
)
|
||||
elif isinstance(document_parser, (TextDocumentParser, TikaDocumentParser)):
|
||||
# TODO(stumpylog): Remove me in the future
|
||||
document_parser.parse(self.working_copy, mime_type)
|
||||
else:
|
||||
document_parser.parse(self.working_copy, mime_type, self.filename)
|
||||
|
||||
@@ -469,11 +496,15 @@ class ConsumerPlugin(
|
||||
ProgressStatusOptions.WORKING,
|
||||
ConsumerStatusShortMessage.GENERATING_THUMBNAIL,
|
||||
)
|
||||
thumbnail = document_parser.get_thumbnail(
|
||||
self.working_copy,
|
||||
mime_type,
|
||||
self.filename,
|
||||
)
|
||||
if isinstance(document_parser, (TextDocumentParser, TikaDocumentParser)):
|
||||
# TODO(stumpylog): Remove me in the future
|
||||
thumbnail = document_parser.get_thumbnail(self.working_copy, mime_type)
|
||||
else:
|
||||
thumbnail = document_parser.get_thumbnail(
|
||||
self.working_copy,
|
||||
mime_type,
|
||||
self.filename,
|
||||
)
|
||||
|
||||
text = document_parser.get_text()
|
||||
date = document_parser.get_date()
|
||||
@@ -490,7 +521,7 @@ class ConsumerPlugin(
|
||||
page_count = document_parser.get_page_count(self.working_copy, mime_type)
|
||||
|
||||
except ParseError as e:
|
||||
document_parser.cleanup()
|
||||
_parser_cleanup(document_parser)
|
||||
if tempdir:
|
||||
tempdir.cleanup()
|
||||
self._fail(
|
||||
@@ -500,7 +531,7 @@ class ConsumerPlugin(
|
||||
exception=e,
|
||||
)
|
||||
except Exception as e:
|
||||
document_parser.cleanup()
|
||||
_parser_cleanup(document_parser)
|
||||
if tempdir:
|
||||
tempdir.cleanup()
|
||||
self._fail(
|
||||
@@ -702,7 +733,7 @@ class ConsumerPlugin(
|
||||
exception=e,
|
||||
)
|
||||
finally:
|
||||
document_parser.cleanup()
|
||||
_parser_cleanup(document_parser)
|
||||
tempdir.cleanup()
|
||||
|
||||
self.run_post_consume_script(document)
|
||||
|
||||
@@ -30,6 +30,7 @@ def _process_document(doc_id: int) -> None:
|
||||
)
|
||||
shutil.move(thumb, document.thumbnail_path)
|
||||
finally:
|
||||
# TODO(stumpylog): Cleanup once all parsers are handled
|
||||
parser.cleanup()
|
||||
|
||||
|
||||
|
||||
@@ -169,7 +169,7 @@ def match_storage_paths(document: Document, classifier: DocumentClassifier, user
|
||||
def matches(matching_model: MatchingModel, document: Document):
|
||||
search_flags = 0
|
||||
|
||||
document_content = document.get_effective_content() or ""
|
||||
document_content = document.content
|
||||
|
||||
# Check that match is not empty
|
||||
if not matching_model.match.strip():
|
||||
|
||||
@@ -361,42 +361,6 @@ class Document(SoftDeleteModel, ModelWithOwner): # type: ignore[django-manager-
|
||||
res += f" {self.title}"
|
||||
return res
|
||||
|
||||
def get_effective_content(self) -> str | None:
|
||||
"""
|
||||
Returns the effective content for the document.
|
||||
|
||||
For root documents, this is the latest version's content when available.
|
||||
For version documents, this is always the document's own content.
|
||||
If the queryset already annotated ``effective_content``, that value is used.
|
||||
"""
|
||||
if hasattr(self, "effective_content"):
|
||||
return getattr(self, "effective_content")
|
||||
|
||||
if self.root_document_id is not None or self.pk is None:
|
||||
return self.content
|
||||
|
||||
prefetched_cache = getattr(self, "_prefetched_objects_cache", None)
|
||||
prefetched_versions = (
|
||||
prefetched_cache.get("versions")
|
||||
if isinstance(prefetched_cache, dict)
|
||||
else None
|
||||
)
|
||||
if prefetched_versions:
|
||||
latest_prefetched = max(prefetched_versions, key=lambda doc: doc.id)
|
||||
return latest_prefetched.content
|
||||
|
||||
latest_version_content = (
|
||||
Document.objects.filter(root_document=self)
|
||||
.order_by("-id")
|
||||
.values_list("content", flat=True)
|
||||
.first()
|
||||
)
|
||||
return (
|
||||
latest_version_content
|
||||
if latest_version_content is not None
|
||||
else self.content
|
||||
)
|
||||
|
||||
@property
|
||||
def suggestion_content(self):
|
||||
"""
|
||||
@@ -409,21 +373,15 @@ class Document(SoftDeleteModel, ModelWithOwner): # type: ignore[django-manager-
|
||||
This improves processing speed for large documents while keeping
|
||||
enough context for accurate suggestions.
|
||||
"""
|
||||
effective_content = self.get_effective_content()
|
||||
if not effective_content or len(effective_content) <= 1200000:
|
||||
return effective_content
|
||||
if not self.content or len(self.content) <= 1200000:
|
||||
return self.content
|
||||
else:
|
||||
# Use 80% from the start and 20% from the end
|
||||
# to preserve both opening and closing context.
|
||||
head_len = 800000
|
||||
tail_len = 200000
|
||||
|
||||
return " ".join(
|
||||
(
|
||||
effective_content[:head_len],
|
||||
effective_content[-tail_len:],
|
||||
),
|
||||
)
|
||||
return " ".join((self.content[:head_len], self.content[-tail_len:]))
|
||||
|
||||
@property
|
||||
def source_path(self) -> Path:
|
||||
|
||||
@@ -1540,124 +1540,11 @@ class DocumentListSerializer(serializers.Serializer):
|
||||
return documents
|
||||
|
||||
|
||||
class SourceModeValidationMixin:
|
||||
def validate_source_mode(self, source_mode: str) -> str:
|
||||
if source_mode not in bulk_edit.SourceModeChoices.__dict__.values():
|
||||
raise serializers.ValidationError("Invalid source_mode")
|
||||
return source_mode
|
||||
|
||||
|
||||
class RotateDocumentsSerializer(DocumentListSerializer, SourceModeValidationMixin):
|
||||
degrees = serializers.IntegerField(required=True)
|
||||
source_mode = serializers.CharField(
|
||||
required=False,
|
||||
default=bulk_edit.SourceModeChoices.LATEST_VERSION,
|
||||
)
|
||||
|
||||
|
||||
class MergeDocumentsSerializer(DocumentListSerializer, SourceModeValidationMixin):
|
||||
metadata_document_id = serializers.IntegerField(
|
||||
required=False,
|
||||
allow_null=True,
|
||||
)
|
||||
delete_originals = serializers.BooleanField(required=False, default=False)
|
||||
archive_fallback = serializers.BooleanField(required=False, default=False)
|
||||
source_mode = serializers.CharField(
|
||||
required=False,
|
||||
default=bulk_edit.SourceModeChoices.LATEST_VERSION,
|
||||
)
|
||||
|
||||
|
||||
class EditPdfDocumentsSerializer(DocumentListSerializer, SourceModeValidationMixin):
|
||||
operations = serializers.ListField(required=True)
|
||||
delete_original = serializers.BooleanField(required=False, default=False)
|
||||
update_document = serializers.BooleanField(required=False, default=False)
|
||||
include_metadata = serializers.BooleanField(required=False, default=True)
|
||||
source_mode = serializers.CharField(
|
||||
required=False,
|
||||
default=bulk_edit.SourceModeChoices.LATEST_VERSION,
|
||||
)
|
||||
|
||||
def validate(self, attrs):
|
||||
documents = attrs["documents"]
|
||||
if len(documents) > 1:
|
||||
raise serializers.ValidationError(
|
||||
"Edit PDF method only supports one document",
|
||||
)
|
||||
|
||||
operations = attrs["operations"]
|
||||
if not isinstance(operations, list):
|
||||
raise serializers.ValidationError("operations must be a list")
|
||||
|
||||
for op in operations:
|
||||
if not isinstance(op, dict):
|
||||
raise serializers.ValidationError("invalid operation entry")
|
||||
if "page" not in op or not isinstance(op["page"], int):
|
||||
raise serializers.ValidationError("page must be an integer")
|
||||
if "rotate" in op and not isinstance(op["rotate"], int):
|
||||
raise serializers.ValidationError("rotate must be an integer")
|
||||
if "doc" in op and not isinstance(op["doc"], int):
|
||||
raise serializers.ValidationError("doc must be an integer")
|
||||
|
||||
if attrs["update_document"]:
|
||||
max_idx = max(op.get("doc", 0) for op in operations)
|
||||
if max_idx > 0:
|
||||
raise serializers.ValidationError(
|
||||
"update_document only allowed with a single output document",
|
||||
)
|
||||
|
||||
doc = Document.objects.get(id=documents[0])
|
||||
if doc.page_count:
|
||||
for op in operations:
|
||||
if op["page"] < 1 or op["page"] > doc.page_count:
|
||||
raise serializers.ValidationError(
|
||||
f"Page {op['page']} is out of bounds for document with {doc.page_count} pages.",
|
||||
)
|
||||
return attrs
|
||||
|
||||
|
||||
class RemovePasswordDocumentsSerializer(
|
||||
DocumentListSerializer,
|
||||
SourceModeValidationMixin,
|
||||
):
|
||||
password = serializers.CharField(required=True)
|
||||
update_document = serializers.BooleanField(required=False, default=False)
|
||||
delete_original = serializers.BooleanField(required=False, default=False)
|
||||
include_metadata = serializers.BooleanField(required=False, default=True)
|
||||
source_mode = serializers.CharField(
|
||||
required=False,
|
||||
default=bulk_edit.SourceModeChoices.LATEST_VERSION,
|
||||
)
|
||||
|
||||
|
||||
class DeleteDocumentsSerializer(DocumentListSerializer):
|
||||
pass
|
||||
|
||||
|
||||
class ReprocessDocumentsSerializer(DocumentListSerializer):
|
||||
pass
|
||||
|
||||
|
||||
class BulkEditSerializer(
|
||||
SerializerWithPerms,
|
||||
DocumentListSerializer,
|
||||
SetPermissionsMixin,
|
||||
SourceModeValidationMixin,
|
||||
):
|
||||
# TODO: remove this and related backwards compatibility code when API v9 is dropped
|
||||
# split, delete_pages can be removed entirely
|
||||
MOVED_DOCUMENT_ACTION_ENDPOINTS = {
|
||||
"delete": "/api/documents/delete/",
|
||||
"reprocess": "/api/documents/reprocess/",
|
||||
"rotate": "/api/documents/rotate/",
|
||||
"merge": "/api/documents/merge/",
|
||||
"edit_pdf": "/api/documents/edit_pdf/",
|
||||
"remove_password": "/api/documents/remove_password/",
|
||||
"split": "/api/documents/edit_pdf/",
|
||||
"delete_pages": "/api/documents/edit_pdf/",
|
||||
}
|
||||
LEGACY_DOCUMENT_ACTION_METHODS = tuple(MOVED_DOCUMENT_ACTION_ENDPOINTS.keys())
|
||||
|
||||
method = serializers.ChoiceField(
|
||||
choices=[
|
||||
"set_correspondent",
|
||||
@@ -1667,8 +1554,15 @@ class BulkEditSerializer(
|
||||
"remove_tag",
|
||||
"modify_tags",
|
||||
"modify_custom_fields",
|
||||
"delete",
|
||||
"reprocess",
|
||||
"set_permissions",
|
||||
*LEGACY_DOCUMENT_ACTION_METHODS,
|
||||
"rotate",
|
||||
"merge",
|
||||
"split",
|
||||
"delete_pages",
|
||||
"edit_pdf",
|
||||
"remove_password",
|
||||
],
|
||||
label="Method",
|
||||
write_only=True,
|
||||
@@ -1746,7 +1640,8 @@ class BulkEditSerializer(
|
||||
return bulk_edit.edit_pdf
|
||||
elif method == "remove_password":
|
||||
return bulk_edit.remove_password
|
||||
else:
|
||||
else: # pragma: no cover
|
||||
# This will never happen as it is handled by the ChoiceField
|
||||
raise serializers.ValidationError("Unsupported method.")
|
||||
|
||||
def _validate_parameters_tags(self, parameters) -> None:
|
||||
@@ -1856,7 +1751,9 @@ class BulkEditSerializer(
|
||||
"source_mode",
|
||||
bulk_edit.SourceModeChoices.LATEST_VERSION,
|
||||
)
|
||||
parameters["source_mode"] = self.validate_source_mode(source_mode)
|
||||
if source_mode not in bulk_edit.SourceModeChoices.__dict__.values():
|
||||
raise serializers.ValidationError("Invalid source_mode")
|
||||
parameters["source_mode"] = source_mode
|
||||
|
||||
def _validate_parameters_split(self, parameters) -> None:
|
||||
if "pages" not in parameters:
|
||||
|
||||
@@ -399,6 +399,7 @@ def update_document_content_maybe_archive_file(document_id) -> None:
|
||||
f"Error while parsing document {document} (ID: {document_id})",
|
||||
)
|
||||
finally:
|
||||
# TODO(stumpylog): Cleanup once all parsers are handled
|
||||
parser.cleanup()
|
||||
|
||||
|
||||
|
||||
@@ -422,34 +422,6 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
|
||||
self.assertEqual(args[0], [self.doc1.id])
|
||||
self.assertEqual(len(kwargs), 0)
|
||||
|
||||
@mock.patch("documents.views.bulk_edit.delete")
|
||||
def test_delete_documents_endpoint(self, m) -> None:
|
||||
self.setup_mock(m, "delete")
|
||||
response = self.client.post(
|
||||
"/api/documents/delete/",
|
||||
json.dumps({"documents": [self.doc1.id]}),
|
||||
content_type="application/json",
|
||||
)
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
m.assert_called_once()
|
||||
args, kwargs = m.call_args
|
||||
self.assertEqual(args[0], [self.doc1.id])
|
||||
self.assertEqual(len(kwargs), 0)
|
||||
|
||||
@mock.patch("documents.views.bulk_edit.reprocess")
|
||||
def test_reprocess_documents_endpoint(self, m) -> None:
|
||||
self.setup_mock(m, "reprocess")
|
||||
response = self.client.post(
|
||||
"/api/documents/reprocess/",
|
||||
json.dumps({"documents": [self.doc1.id]}),
|
||||
content_type="application/json",
|
||||
)
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
m.assert_called_once()
|
||||
args, kwargs = m.call_args
|
||||
self.assertEqual(args[0], [self.doc1.id])
|
||||
self.assertEqual(len(kwargs), 0)
|
||||
|
||||
@mock.patch("documents.serialisers.bulk_edit.set_storage_path")
|
||||
def test_api_set_storage_path(self, m) -> None:
|
||||
"""
|
||||
@@ -905,7 +877,7 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
|
||||
self.assertEqual(kwargs["merge"], True)
|
||||
|
||||
@mock.patch("documents.serialisers.bulk_edit.set_storage_path")
|
||||
@mock.patch("documents.views.bulk_edit.merge")
|
||||
@mock.patch("documents.serialisers.bulk_edit.merge")
|
||||
def test_insufficient_global_perms(self, mock_merge, mock_set_storage) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
@@ -940,11 +912,12 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
|
||||
mock_set_storage.assert_not_called()
|
||||
|
||||
response = self.client.post(
|
||||
"/api/documents/merge/",
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc1.id],
|
||||
"metadata_document_id": self.doc1.id,
|
||||
"method": "merge",
|
||||
"parameters": {"metadata_document_id": self.doc1.id},
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
@@ -954,12 +927,15 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
|
||||
mock_merge.assert_not_called()
|
||||
|
||||
response = self.client.post(
|
||||
"/api/documents/merge/",
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc1.id],
|
||||
"metadata_document_id": self.doc1.id,
|
||||
"delete_originals": True,
|
||||
"method": "merge",
|
||||
"parameters": {
|
||||
"metadata_document_id": self.doc1.id,
|
||||
"delete_originals": True,
|
||||
},
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
@@ -1076,117 +1052,84 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
|
||||
|
||||
m.assert_called_once()
|
||||
|
||||
@mock.patch("documents.views.bulk_edit.rotate")
|
||||
@mock.patch("documents.serialisers.bulk_edit.rotate")
|
||||
def test_rotate(self, m) -> None:
|
||||
self.setup_mock(m, "rotate")
|
||||
response = self.client.post(
|
||||
"/api/documents/rotate/",
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id, self.doc3.id],
|
||||
"degrees": 90,
|
||||
"method": "rotate",
|
||||
"parameters": {"degrees": 90},
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
|
||||
m.assert_called_once()
|
||||
args, kwargs = m.call_args
|
||||
self.assertCountEqual(args[0], [self.doc2.id, self.doc3.id])
|
||||
self.assertEqual(kwargs["degrees"], 90)
|
||||
self.assertEqual(kwargs["source_mode"], "latest_version")
|
||||
self.assertEqual(kwargs["user"], self.user)
|
||||
|
||||
@mock.patch("documents.views.bulk_edit.rotate")
|
||||
@mock.patch("documents.serialisers.bulk_edit.rotate")
|
||||
def test_rotate_invalid_params(self, m) -> None:
|
||||
response = self.client.post(
|
||||
"/api/documents/rotate/",
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id, self.doc3.id],
|
||||
"degrees": "foo",
|
||||
"method": "rotate",
|
||||
"parameters": {"degrees": "foo"},
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
|
||||
response = self.client.post(
|
||||
"/api/documents/rotate/",
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id, self.doc3.id],
|
||||
"degrees": 90.5,
|
||||
"method": "rotate",
|
||||
"parameters": {"degrees": 90.5},
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
|
||||
m.assert_not_called()
|
||||
|
||||
@mock.patch("documents.views.bulk_edit.rotate")
|
||||
def test_rotate_insufficient_permissions(self, m) -> None:
|
||||
self.doc1.owner = User.objects.get(username="temp_admin")
|
||||
self.doc1.save()
|
||||
user1 = User.objects.create(username="user1")
|
||||
user1.user_permissions.add(*Permission.objects.all())
|
||||
user1.save()
|
||||
self.client.force_authenticate(user=user1)
|
||||
|
||||
self.setup_mock(m, "rotate")
|
||||
response = self.client.post(
|
||||
"/api/documents/rotate/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc1.id, self.doc2.id],
|
||||
"degrees": 90,
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
|
||||
m.assert_not_called()
|
||||
self.assertEqual(response.content, b"Insufficient permissions")
|
||||
|
||||
response = self.client.post(
|
||||
"/api/documents/rotate/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id, self.doc3.id],
|
||||
"degrees": 90,
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
m.assert_called_once()
|
||||
|
||||
@mock.patch("documents.views.bulk_edit.merge")
|
||||
@mock.patch("documents.serialisers.bulk_edit.merge")
|
||||
def test_merge(self, m) -> None:
|
||||
self.setup_mock(m, "merge")
|
||||
response = self.client.post(
|
||||
"/api/documents/merge/",
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id, self.doc3.id],
|
||||
"metadata_document_id": self.doc3.id,
|
||||
"method": "merge",
|
||||
"parameters": {"metadata_document_id": self.doc3.id},
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
|
||||
m.assert_called_once()
|
||||
args, kwargs = m.call_args
|
||||
self.assertCountEqual(args[0], [self.doc2.id, self.doc3.id])
|
||||
self.assertEqual(kwargs["metadata_document_id"], self.doc3.id)
|
||||
self.assertEqual(kwargs["source_mode"], "latest_version")
|
||||
self.assertEqual(kwargs["user"], self.user)
|
||||
|
||||
@mock.patch("documents.views.bulk_edit.merge")
|
||||
@mock.patch("documents.serialisers.bulk_edit.merge")
|
||||
def test_merge_and_delete_insufficient_permissions(self, m) -> None:
|
||||
self.doc1.owner = User.objects.get(username="temp_admin")
|
||||
self.doc1.save()
|
||||
@@ -1197,12 +1140,15 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
|
||||
|
||||
self.setup_mock(m, "merge")
|
||||
response = self.client.post(
|
||||
"/api/documents/merge/",
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc1.id, self.doc2.id],
|
||||
"metadata_document_id": self.doc2.id,
|
||||
"delete_originals": True,
|
||||
"method": "merge",
|
||||
"parameters": {
|
||||
"metadata_document_id": self.doc2.id,
|
||||
"delete_originals": True,
|
||||
},
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
@@ -1213,12 +1159,15 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
|
||||
self.assertEqual(response.content, b"Insufficient permissions")
|
||||
|
||||
response = self.client.post(
|
||||
"/api/documents/merge/",
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id, self.doc3.id],
|
||||
"metadata_document_id": self.doc2.id,
|
||||
"delete_originals": True,
|
||||
"method": "merge",
|
||||
"parameters": {
|
||||
"metadata_document_id": self.doc2.id,
|
||||
"delete_originals": True,
|
||||
},
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
@@ -1227,15 +1176,27 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
m.assert_called_once()
|
||||
|
||||
@mock.patch("documents.views.bulk_edit.merge")
|
||||
@mock.patch("documents.serialisers.bulk_edit.merge")
|
||||
def test_merge_invalid_parameters(self, m) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- API data for merging documents is called
|
||||
- The parameters are invalid
|
||||
WHEN:
|
||||
- API is called
|
||||
THEN:
|
||||
- The API fails with a correct error code
|
||||
"""
|
||||
self.setup_mock(m, "merge")
|
||||
response = self.client.post(
|
||||
"/api/documents/merge/",
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc1.id, self.doc2.id],
|
||||
"delete_originals": "not_boolean",
|
||||
"method": "merge",
|
||||
"parameters": {
|
||||
"delete_originals": "not_boolean",
|
||||
},
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
@@ -1244,67 +1205,207 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
m.assert_not_called()
|
||||
|
||||
def test_bulk_edit_allows_legacy_file_methods_with_warning(self) -> None:
|
||||
method_payloads = {
|
||||
"delete": {},
|
||||
"reprocess": {},
|
||||
"rotate": {"degrees": 90},
|
||||
"merge": {"metadata_document_id": self.doc2.id},
|
||||
"edit_pdf": {"operations": [{"page": 1}]},
|
||||
"remove_password": {"password": "secret"},
|
||||
"split": {"pages": "1,2-4"},
|
||||
"delete_pages": {"pages": [1, 2]},
|
||||
}
|
||||
|
||||
for version in (9, 10):
|
||||
for method, parameters in method_payloads.items():
|
||||
with self.subTest(method=method, version=version):
|
||||
with mock.patch(
|
||||
f"documents.views.bulk_edit.{method}",
|
||||
) as mocked_method:
|
||||
self.setup_mock(mocked_method, method)
|
||||
with self.assertLogs("paperless.api", level="WARNING") as logs:
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"method": method,
|
||||
"parameters": parameters,
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
headers={
|
||||
"Accept": f"application/json; version={version}",
|
||||
},
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
mocked_method.assert_called_once()
|
||||
self.assertTrue(
|
||||
any(
|
||||
"Deprecated bulk_edit method" in entry
|
||||
and f"'{method}'" in entry
|
||||
for entry in logs.output
|
||||
),
|
||||
)
|
||||
|
||||
@mock.patch("documents.views.bulk_edit.edit_pdf")
|
||||
def test_edit_pdf(self, m) -> None:
|
||||
self.setup_mock(m, "edit_pdf")
|
||||
@mock.patch("documents.serialisers.bulk_edit.split")
|
||||
def test_split(self, m) -> None:
|
||||
self.setup_mock(m, "split")
|
||||
response = self.client.post(
|
||||
"/api/documents/edit_pdf/",
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"operations": [{"page": 1}],
|
||||
"source_mode": "explicit_selection",
|
||||
"method": "split",
|
||||
"parameters": {"pages": "1,2-4,5-6,7"},
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
|
||||
m.assert_called_once()
|
||||
args, kwargs = m.call_args
|
||||
self.assertCountEqual(args[0], [self.doc2.id])
|
||||
self.assertEqual(kwargs["pages"], [[1], [2, 3, 4], [5, 6], [7]])
|
||||
self.assertEqual(kwargs["user"], self.user)
|
||||
|
||||
def test_split_invalid_params(self) -> None:
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"method": "split",
|
||||
"parameters": {}, # pages not specified
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn(b"pages not specified", response.content)
|
||||
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"method": "split",
|
||||
"parameters": {"pages": "1:7"}, # wrong format
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn(b"invalid pages specified", response.content)
|
||||
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [
|
||||
self.doc1.id,
|
||||
self.doc2.id,
|
||||
], # only one document supported
|
||||
"method": "split",
|
||||
"parameters": {"pages": "1-2,3-7"}, # wrong format
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn(b"Split method only supports one document", response.content)
|
||||
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"method": "split",
|
||||
"parameters": {
|
||||
"pages": "1",
|
||||
"delete_originals": "notabool",
|
||||
}, # not a bool
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn(b"delete_originals must be a boolean", response.content)
|
||||
|
||||
@mock.patch("documents.serialisers.bulk_edit.delete_pages")
|
||||
def test_delete_pages(self, m) -> None:
|
||||
self.setup_mock(m, "delete_pages")
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"method": "delete_pages",
|
||||
"parameters": {"pages": [1, 2, 3, 4]},
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
|
||||
m.assert_called_once()
|
||||
args, kwargs = m.call_args
|
||||
self.assertCountEqual(args[0], [self.doc2.id])
|
||||
self.assertEqual(kwargs["pages"], [1, 2, 3, 4])
|
||||
|
||||
def test_delete_pages_invalid_params(self) -> None:
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [
|
||||
self.doc1.id,
|
||||
self.doc2.id,
|
||||
], # only one document supported
|
||||
"method": "delete_pages",
|
||||
"parameters": {
|
||||
"pages": [1, 2, 3, 4],
|
||||
},
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn(
|
||||
b"Delete pages method only supports one document",
|
||||
response.content,
|
||||
)
|
||||
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"method": "delete_pages",
|
||||
"parameters": {}, # pages not specified
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn(b"pages not specified", response.content)
|
||||
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"method": "delete_pages",
|
||||
"parameters": {"pages": "1-3"}, # not a list
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn(b"pages must be a list", response.content)
|
||||
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"method": "delete_pages",
|
||||
"parameters": {"pages": ["1-3"]}, # not ints
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn(b"pages must be a list of integers", response.content)
|
||||
|
||||
@mock.patch("documents.serialisers.bulk_edit.edit_pdf")
|
||||
def test_edit_pdf(self, m) -> None:
|
||||
self.setup_mock(m, "edit_pdf")
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"method": "edit_pdf",
|
||||
"parameters": {
|
||||
"operations": [{"page": 1}],
|
||||
"source_mode": "explicit_selection",
|
||||
},
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
|
||||
m.assert_called_once()
|
||||
args, kwargs = m.call_args
|
||||
self.assertCountEqual(args[0], [self.doc2.id])
|
||||
@@ -1313,12 +1414,14 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
|
||||
self.assertEqual(kwargs["user"], self.user)
|
||||
|
||||
def test_edit_pdf_invalid_params(self) -> None:
|
||||
# multiple documents
|
||||
response = self.client.post(
|
||||
"/api/documents/edit_pdf/",
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id, self.doc3.id],
|
||||
"operations": [{"page": 1}],
|
||||
"method": "edit_pdf",
|
||||
"parameters": {"operations": [{"page": 1}]},
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
@@ -1326,25 +1429,44 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn(b"Edit PDF method only supports one document", response.content)
|
||||
|
||||
# no operations specified
|
||||
response = self.client.post(
|
||||
"/api/documents/edit_pdf/",
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"operations": "not_a_list",
|
||||
"method": "edit_pdf",
|
||||
"parameters": {},
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn(b"Expected a list of items", response.content)
|
||||
self.assertIn(b"operations not specified", response.content)
|
||||
|
||||
# operations not a list
|
||||
response = self.client.post(
|
||||
"/api/documents/edit_pdf/",
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"operations": ["invalid_operation"],
|
||||
"method": "edit_pdf",
|
||||
"parameters": {"operations": "not_a_list"},
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn(b"operations must be a list", response.content)
|
||||
|
||||
# invalid operation
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"method": "edit_pdf",
|
||||
"parameters": {"operations": ["invalid_operation"]},
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
@@ -1352,12 +1474,14 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn(b"invalid operation entry", response.content)
|
||||
|
||||
# page not an int
|
||||
response = self.client.post(
|
||||
"/api/documents/edit_pdf/",
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"operations": [{"page": "not_an_int"}],
|
||||
"method": "edit_pdf",
|
||||
"parameters": {"operations": [{"page": "not_an_int"}]},
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
@@ -1365,12 +1489,14 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn(b"page must be an integer", response.content)
|
||||
|
||||
# rotate not an int
|
||||
response = self.client.post(
|
||||
"/api/documents/edit_pdf/",
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"operations": [{"page": 1, "rotate": "not_an_int"}],
|
||||
"method": "edit_pdf",
|
||||
"parameters": {"operations": [{"page": 1, "rotate": "not_an_int"}]},
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
@@ -1378,12 +1504,14 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn(b"rotate must be an integer", response.content)
|
||||
|
||||
# doc not an int
|
||||
response = self.client.post(
|
||||
"/api/documents/edit_pdf/",
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"operations": [{"page": 1, "doc": "not_an_int"}],
|
||||
"method": "edit_pdf",
|
||||
"parameters": {"operations": [{"page": 1, "doc": "not_an_int"}]},
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
@@ -1391,13 +1519,53 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn(b"doc must be an integer", response.content)
|
||||
|
||||
# update_document not a boolean
|
||||
response = self.client.post(
|
||||
"/api/documents/edit_pdf/",
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"update_document": True,
|
||||
"operations": [{"page": 1, "doc": 1}, {"page": 2, "doc": 2}],
|
||||
"method": "edit_pdf",
|
||||
"parameters": {
|
||||
"update_document": "not_a_bool",
|
||||
"operations": [{"page": 1}],
|
||||
},
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn(b"update_document must be a boolean", response.content)
|
||||
|
||||
# include_metadata not a boolean
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"method": "edit_pdf",
|
||||
"parameters": {
|
||||
"include_metadata": "not_a_bool",
|
||||
"operations": [{"page": 1}],
|
||||
},
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn(b"include_metadata must be a boolean", response.content)
|
||||
|
||||
# update_document True but output would be multiple documents
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"method": "edit_pdf",
|
||||
"parameters": {
|
||||
"update_document": True,
|
||||
"operations": [{"page": 1, "doc": 1}, {"page": 2, "doc": 2}],
|
||||
},
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
@@ -1408,13 +1576,17 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
|
||||
response.content,
|
||||
)
|
||||
|
||||
# invalid source mode
|
||||
response = self.client.post(
|
||||
"/api/documents/edit_pdf/",
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"operations": [{"page": 1}],
|
||||
"source_mode": "not_a_mode",
|
||||
"method": "edit_pdf",
|
||||
"parameters": {
|
||||
"operations": [{"page": 1}],
|
||||
"source_mode": "not_a_mode",
|
||||
},
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
@@ -1422,70 +1594,42 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn(b"Invalid source_mode", response.content)
|
||||
|
||||
@mock.patch("documents.views.bulk_edit.edit_pdf")
|
||||
@mock.patch("documents.serialisers.bulk_edit.edit_pdf")
|
||||
def test_edit_pdf_page_out_of_bounds(self, m) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- API data for editing PDF is called
|
||||
- The page number is out of bounds
|
||||
WHEN:
|
||||
- API is called
|
||||
THEN:
|
||||
- The API fails with a correct error code
|
||||
"""
|
||||
self.setup_mock(m, "edit_pdf")
|
||||
response = self.client.post(
|
||||
"/api/documents/edit_pdf/",
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"operations": [{"page": 99}],
|
||||
"method": "edit_pdf",
|
||||
"parameters": {"operations": [{"page": 99}]},
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn(b"out of bounds", response.content)
|
||||
m.assert_not_called()
|
||||
|
||||
@mock.patch("documents.views.bulk_edit.edit_pdf")
|
||||
def test_edit_pdf_insufficient_permissions(self, m) -> None:
|
||||
self.doc1.owner = User.objects.get(username="temp_admin")
|
||||
self.doc1.save()
|
||||
user1 = User.objects.create(username="user1")
|
||||
user1.user_permissions.add(*Permission.objects.all())
|
||||
user1.save()
|
||||
self.client.force_authenticate(user=user1)
|
||||
|
||||
self.setup_mock(m, "edit_pdf")
|
||||
response = self.client.post(
|
||||
"/api/documents/edit_pdf/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc1.id],
|
||||
"operations": [{"page": 1}],
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
|
||||
m.assert_not_called()
|
||||
self.assertEqual(response.content, b"Insufficient permissions")
|
||||
|
||||
response = self.client.post(
|
||||
"/api/documents/edit_pdf/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"operations": [{"page": 1}],
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
m.assert_called_once()
|
||||
|
||||
@mock.patch("documents.views.bulk_edit.remove_password")
|
||||
@mock.patch("documents.serialisers.bulk_edit.remove_password")
|
||||
def test_remove_password(self, m) -> None:
|
||||
self.setup_mock(m, "remove_password")
|
||||
response = self.client.post(
|
||||
"/api/documents/remove_password/",
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"password": "secret",
|
||||
"update_document": True,
|
||||
"method": "remove_password",
|
||||
"parameters": {"password": "secret", "update_document": True},
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
@@ -1497,69 +1641,36 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
|
||||
self.assertCountEqual(args[0], [self.doc2.id])
|
||||
self.assertEqual(kwargs["password"], "secret")
|
||||
self.assertTrue(kwargs["update_document"])
|
||||
self.assertEqual(kwargs["source_mode"], "latest_version")
|
||||
self.assertEqual(kwargs["user"], self.user)
|
||||
|
||||
def test_remove_password_invalid_params(self) -> None:
|
||||
response = self.client.post(
|
||||
"/api/documents/remove_password/",
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"method": "remove_password",
|
||||
"parameters": {},
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn(b"password not specified", response.content)
|
||||
|
||||
response = self.client.post(
|
||||
"/api/documents/remove_password/",
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"password": 123,
|
||||
"method": "remove_password",
|
||||
"parameters": {"password": 123},
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
|
||||
@mock.patch("documents.views.bulk_edit.remove_password")
|
||||
def test_remove_password_insufficient_permissions(self, m) -> None:
|
||||
self.doc1.owner = User.objects.get(username="temp_admin")
|
||||
self.doc1.save()
|
||||
user1 = User.objects.create(username="user1")
|
||||
user1.user_permissions.add(*Permission.objects.all())
|
||||
user1.save()
|
||||
self.client.force_authenticate(user=user1)
|
||||
|
||||
self.setup_mock(m, "remove_password")
|
||||
response = self.client.post(
|
||||
"/api/documents/remove_password/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc1.id],
|
||||
"password": "secret",
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
|
||||
m.assert_not_called()
|
||||
self.assertEqual(response.content, b"Insufficient permissions")
|
||||
|
||||
response = self.client.post(
|
||||
"/api/documents/remove_password/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"password": "secret",
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
m.assert_called_once()
|
||||
self.assertIn(b"password must be a string", response.content)
|
||||
|
||||
@override_settings(AUDIT_LOG_ENABLED=True)
|
||||
def test_bulk_edit_audit_log_enabled_simple_field(self) -> None:
|
||||
|
||||
@@ -1144,56 +1144,6 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
|
||||
self.assertEqual(len(response.data["all"]), 50)
|
||||
self.assertCountEqual(response.data["all"], [d.id for d in docs])
|
||||
|
||||
def test_list_with_include_selection_data(self) -> None:
|
||||
correspondent = Correspondent.objects.create(name="c1")
|
||||
doc_type = DocumentType.objects.create(name="dt1")
|
||||
storage_path = StoragePath.objects.create(name="sp1")
|
||||
tag = Tag.objects.create(name="tag")
|
||||
|
||||
matching_doc = Document.objects.create(
|
||||
checksum="A",
|
||||
correspondent=correspondent,
|
||||
document_type=doc_type,
|
||||
storage_path=storage_path,
|
||||
)
|
||||
matching_doc.tags.add(tag)
|
||||
|
||||
non_matching_doc = Document.objects.create(checksum="B")
|
||||
non_matching_doc.tags.add(Tag.objects.create(name="other"))
|
||||
|
||||
response = self.client.get(
|
||||
f"/api/documents/?tags__id__in={tag.id}&include_selection_data=true",
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.assertIn("selection_data", response.data)
|
||||
|
||||
selected_correspondent = next(
|
||||
item
|
||||
for item in response.data["selection_data"]["selected_correspondents"]
|
||||
if item["id"] == correspondent.id
|
||||
)
|
||||
selected_tag = next(
|
||||
item
|
||||
for item in response.data["selection_data"]["selected_tags"]
|
||||
if item["id"] == tag.id
|
||||
)
|
||||
selected_type = next(
|
||||
item
|
||||
for item in response.data["selection_data"]["selected_document_types"]
|
||||
if item["id"] == doc_type.id
|
||||
)
|
||||
selected_storage_path = next(
|
||||
item
|
||||
for item in response.data["selection_data"]["selected_storage_paths"]
|
||||
if item["id"] == storage_path.id
|
||||
)
|
||||
|
||||
self.assertEqual(selected_correspondent["document_count"], 1)
|
||||
self.assertEqual(selected_tag["document_count"], 1)
|
||||
self.assertEqual(selected_type["document_count"], 1)
|
||||
self.assertEqual(selected_storage_path["document_count"], 1)
|
||||
|
||||
def test_statistics(self) -> None:
|
||||
doc1 = Document.objects.create(
|
||||
title="none1",
|
||||
|
||||
@@ -25,39 +25,3 @@ class TestApiSchema(APITestCase):
|
||||
|
||||
ui_response = self.client.get(self.ENDPOINT + "view/")
|
||||
self.assertEqual(ui_response.status_code, status.HTTP_200_OK)
|
||||
|
||||
def test_schema_includes_dedicated_document_edit_endpoints(self) -> None:
|
||||
schema_response = self.client.get(self.ENDPOINT)
|
||||
self.assertEqual(schema_response.status_code, status.HTTP_200_OK)
|
||||
|
||||
paths = schema_response.data["paths"]
|
||||
self.assertIn("/api/documents/delete/", paths)
|
||||
self.assertIn("/api/documents/reprocess/", paths)
|
||||
self.assertIn("/api/documents/rotate/", paths)
|
||||
self.assertIn("/api/documents/merge/", paths)
|
||||
self.assertIn("/api/documents/edit_pdf/", paths)
|
||||
self.assertIn("/api/documents/remove_password/", paths)
|
||||
|
||||
def test_schema_bulk_edit_advertises_legacy_document_action_methods(self) -> None:
|
||||
schema_response = self.client.get(self.ENDPOINT)
|
||||
self.assertEqual(schema_response.status_code, status.HTTP_200_OK)
|
||||
|
||||
schema = schema_response.data["components"]["schemas"]
|
||||
bulk_schema = schema["BulkEditRequest"]
|
||||
method_schema = bulk_schema["properties"]["method"]
|
||||
|
||||
# drf-spectacular emits the enum as a referenced schema for this field
|
||||
enum_ref = method_schema["allOf"][0]["$ref"].split("/")[-1]
|
||||
advertised_methods = schema[enum_ref]["enum"]
|
||||
|
||||
for action_method in [
|
||||
"delete",
|
||||
"reprocess",
|
||||
"rotate",
|
||||
"merge",
|
||||
"edit_pdf",
|
||||
"remove_password",
|
||||
"split",
|
||||
"delete_pages",
|
||||
]:
|
||||
self.assertIn(action_method, advertised_methods)
|
||||
|
||||
@@ -89,46 +89,6 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
self.assertEqual(len(results), 0)
|
||||
self.assertCountEqual(response.data["all"], [])
|
||||
|
||||
def test_search_with_include_selection_data(self) -> None:
|
||||
correspondent = Correspondent.objects.create(name="c1")
|
||||
doc_type = DocumentType.objects.create(name="dt1")
|
||||
storage_path = StoragePath.objects.create(name="sp1")
|
||||
tag = Tag.objects.create(name="tag")
|
||||
|
||||
matching_doc = Document.objects.create(
|
||||
title="bank statement",
|
||||
content="bank content",
|
||||
checksum="A",
|
||||
correspondent=correspondent,
|
||||
document_type=doc_type,
|
||||
storage_path=storage_path,
|
||||
)
|
||||
matching_doc.tags.add(tag)
|
||||
|
||||
with AsyncWriter(index.open_index()) as writer:
|
||||
index.update_document(writer, matching_doc)
|
||||
|
||||
response = self.client.get(
|
||||
"/api/documents/?query=bank&include_selection_data=true",
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.assertIn("selection_data", response.data)
|
||||
|
||||
selected_correspondent = next(
|
||||
item
|
||||
for item in response.data["selection_data"]["selected_correspondents"]
|
||||
if item["id"] == correspondent.id
|
||||
)
|
||||
selected_tag = next(
|
||||
item
|
||||
for item in response.data["selection_data"]["selected_tags"]
|
||||
if item["id"] == tag.id
|
||||
)
|
||||
|
||||
self.assertEqual(selected_correspondent["document_count"], 1)
|
||||
self.assertEqual(selected_tag["document_count"], 1)
|
||||
|
||||
def test_search_custom_field_ordering(self) -> None:
|
||||
custom_field = CustomField.objects.create(
|
||||
name="Sortable field",
|
||||
|
||||
@@ -156,46 +156,6 @@ class TestDocument(TestCase):
|
||||
)
|
||||
self.assertEqual(doc.get_public_filename(), "2020-12-25 test")
|
||||
|
||||
def test_suggestion_content_uses_latest_version_content_for_root_documents(
|
||||
self,
|
||||
) -> None:
|
||||
root = Document.objects.create(
|
||||
title="root",
|
||||
checksum="root",
|
||||
mime_type="application/pdf",
|
||||
content="outdated root content",
|
||||
)
|
||||
version = Document.objects.create(
|
||||
title="v1",
|
||||
checksum="v1",
|
||||
mime_type="application/pdf",
|
||||
root_document=root,
|
||||
content="latest version content",
|
||||
)
|
||||
|
||||
self.assertEqual(root.suggestion_content, version.content)
|
||||
|
||||
def test_content_length_is_per_document_row_for_versions(self) -> None:
|
||||
root = Document.objects.create(
|
||||
title="root",
|
||||
checksum="root",
|
||||
mime_type="application/pdf",
|
||||
content="abc",
|
||||
)
|
||||
version = Document.objects.create(
|
||||
title="v1",
|
||||
checksum="v1",
|
||||
mime_type="application/pdf",
|
||||
root_document=root,
|
||||
content="abcdefgh",
|
||||
)
|
||||
|
||||
root.refresh_from_db()
|
||||
version.refresh_from_db()
|
||||
|
||||
self.assertEqual(root.content_length, 3)
|
||||
self.assertEqual(version.content_length, 8)
|
||||
|
||||
|
||||
def test_suggestion_content() -> None:
|
||||
"""
|
||||
|
||||
@@ -48,52 +48,6 @@ class _TestMatchingBase(TestCase):
|
||||
|
||||
|
||||
class TestMatching(_TestMatchingBase):
|
||||
def test_matches_uses_latest_version_content_for_root_documents(self) -> None:
|
||||
root = Document.objects.create(
|
||||
title="root",
|
||||
checksum="root",
|
||||
mime_type="application/pdf",
|
||||
content="root content without token",
|
||||
)
|
||||
Document.objects.create(
|
||||
title="v1",
|
||||
checksum="v1",
|
||||
mime_type="application/pdf",
|
||||
root_document=root,
|
||||
content="latest version contains keyword",
|
||||
)
|
||||
tag = Tag.objects.create(
|
||||
name="tag",
|
||||
match="keyword",
|
||||
matching_algorithm=Tag.MATCH_ANY,
|
||||
)
|
||||
|
||||
self.assertTrue(matching.matches(tag, root))
|
||||
|
||||
def test_matches_does_not_fall_back_to_root_content_when_version_exists(
|
||||
self,
|
||||
) -> None:
|
||||
root = Document.objects.create(
|
||||
title="root",
|
||||
checksum="root",
|
||||
mime_type="application/pdf",
|
||||
content="root contains keyword",
|
||||
)
|
||||
Document.objects.create(
|
||||
title="v1",
|
||||
checksum="v1",
|
||||
mime_type="application/pdf",
|
||||
root_document=root,
|
||||
content="latest version without token",
|
||||
)
|
||||
tag = Tag.objects.create(
|
||||
name="tag",
|
||||
match="keyword",
|
||||
matching_algorithm=Tag.MATCH_ANY,
|
||||
)
|
||||
|
||||
self.assertFalse(matching.matches(tag, root))
|
||||
|
||||
def test_match_none(self) -> None:
|
||||
self._test_matching(
|
||||
"",
|
||||
|
||||
@@ -9,9 +9,9 @@ from documents.parsers import get_default_file_extension
|
||||
from documents.parsers import get_parser_class_for_mime_type
|
||||
from documents.parsers import get_supported_file_extensions
|
||||
from documents.parsers import is_file_ext_supported
|
||||
from paperless.parsers.text import TextDocumentParser
|
||||
from paperless.parsers.tika import TikaDocumentParser
|
||||
from paperless_tesseract.parsers import RasterisedDocumentParser
|
||||
from paperless_text.parsers import TextDocumentParser
|
||||
from paperless_tika.parsers import TikaDocumentParser
|
||||
|
||||
|
||||
class TestParserDiscovery(TestCase):
|
||||
|
||||
@@ -7,6 +7,7 @@ import tempfile
|
||||
import zipfile
|
||||
from collections import defaultdict
|
||||
from collections import deque
|
||||
from contextlib import nullcontext
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from time import mktime
|
||||
@@ -176,20 +177,14 @@ from documents.serialisers import BulkEditObjectsSerializer
|
||||
from documents.serialisers import BulkEditSerializer
|
||||
from documents.serialisers import CorrespondentSerializer
|
||||
from documents.serialisers import CustomFieldSerializer
|
||||
from documents.serialisers import DeleteDocumentsSerializer
|
||||
from documents.serialisers import DocumentListSerializer
|
||||
from documents.serialisers import DocumentSerializer
|
||||
from documents.serialisers import DocumentTypeSerializer
|
||||
from documents.serialisers import DocumentVersionLabelSerializer
|
||||
from documents.serialisers import DocumentVersionSerializer
|
||||
from documents.serialisers import EditPdfDocumentsSerializer
|
||||
from documents.serialisers import EmailSerializer
|
||||
from documents.serialisers import MergeDocumentsSerializer
|
||||
from documents.serialisers import NotesSerializer
|
||||
from documents.serialisers import PostDocumentSerializer
|
||||
from documents.serialisers import RemovePasswordDocumentsSerializer
|
||||
from documents.serialisers import ReprocessDocumentsSerializer
|
||||
from documents.serialisers import RotateDocumentsSerializer
|
||||
from documents.serialisers import RunTaskViewSerializer
|
||||
from documents.serialisers import SavedViewSerializer
|
||||
from documents.serialisers import SearchResultSerializer
|
||||
@@ -225,6 +220,7 @@ from paperless.celery import app as celery_app
|
||||
from paperless.config import AIConfig
|
||||
from paperless.config import GeneralConfig
|
||||
from paperless.models import ApplicationConfiguration
|
||||
from paperless.parsers import ParserProtocol
|
||||
from paperless.serialisers import GroupSerializer
|
||||
from paperless.serialisers import UserSerializer
|
||||
from paperless.views import StandardPagination
|
||||
@@ -835,61 +831,6 @@ class DocumentViewSet(
|
||||
"custom_field_",
|
||||
)
|
||||
|
||||
def _get_selection_data_for_queryset(self, queryset):
|
||||
correspondents = Correspondent.objects.annotate(
|
||||
document_count=Count(
|
||||
"documents",
|
||||
filter=Q(documents__in=queryset),
|
||||
distinct=True,
|
||||
),
|
||||
)
|
||||
tags = Tag.objects.annotate(
|
||||
document_count=Count(
|
||||
"documents",
|
||||
filter=Q(documents__in=queryset),
|
||||
distinct=True,
|
||||
),
|
||||
)
|
||||
document_types = DocumentType.objects.annotate(
|
||||
document_count=Count(
|
||||
"documents",
|
||||
filter=Q(documents__in=queryset),
|
||||
distinct=True,
|
||||
),
|
||||
)
|
||||
storage_paths = StoragePath.objects.annotate(
|
||||
document_count=Count(
|
||||
"documents",
|
||||
filter=Q(documents__in=queryset),
|
||||
distinct=True,
|
||||
),
|
||||
)
|
||||
custom_fields = CustomField.objects.annotate(
|
||||
document_count=Count(
|
||||
"fields__document",
|
||||
filter=Q(fields__document__in=queryset),
|
||||
distinct=True,
|
||||
),
|
||||
)
|
||||
|
||||
return {
|
||||
"selected_correspondents": [
|
||||
{"id": t.id, "document_count": t.document_count} for t in correspondents
|
||||
],
|
||||
"selected_tags": [
|
||||
{"id": t.id, "document_count": t.document_count} for t in tags
|
||||
],
|
||||
"selected_document_types": [
|
||||
{"id": t.id, "document_count": t.document_count} for t in document_types
|
||||
],
|
||||
"selected_storage_paths": [
|
||||
{"id": t.id, "document_count": t.document_count} for t in storage_paths
|
||||
],
|
||||
"selected_custom_fields": [
|
||||
{"id": t.id, "document_count": t.document_count} for t in custom_fields
|
||||
],
|
||||
}
|
||||
|
||||
def get_queryset(self):
|
||||
latest_version_content = Subquery(
|
||||
Document.objects.filter(root_document=OuterRef("pk"))
|
||||
@@ -1037,25 +978,6 @@ class DocumentViewSet(
|
||||
|
||||
return response
|
||||
|
||||
def list(self, request, *args, **kwargs):
|
||||
if not get_boolean(
|
||||
str(request.query_params.get("include_selection_data", "false")),
|
||||
):
|
||||
return super().list(request, *args, **kwargs)
|
||||
|
||||
queryset = self.filter_queryset(self.get_queryset())
|
||||
selection_data = self._get_selection_data_for_queryset(queryset)
|
||||
|
||||
page = self.paginate_queryset(queryset)
|
||||
if page is not None:
|
||||
serializer = self.get_serializer(page, many=True)
|
||||
response = self.get_paginated_response(serializer.data)
|
||||
response.data["selection_data"] = selection_data
|
||||
return response
|
||||
|
||||
serializer = self.get_serializer(queryset, many=True)
|
||||
return Response({"results": serializer.data, "selection_data": selection_data})
|
||||
|
||||
def destroy(self, request, *args, **kwargs):
|
||||
from documents import index
|
||||
|
||||
@@ -1158,9 +1080,11 @@ class DocumentViewSet(
|
||||
parser_class = get_parser_class_for_mime_type(mime_type)
|
||||
if parser_class:
|
||||
parser = parser_class(progress_callback=None, logging_group=None)
|
||||
cm = parser if isinstance(parser, ParserProtocol) else nullcontext(parser)
|
||||
|
||||
try:
|
||||
return parser.extract_metadata(file, mime_type)
|
||||
with cm:
|
||||
return parser.extract_metadata(file, mime_type)
|
||||
except Exception: # pragma: no cover
|
||||
logger.exception(f"Issue getting metadata for {file}")
|
||||
# TODO: cover GPG errors, remove later.
|
||||
@@ -2072,21 +1996,6 @@ class UnifiedSearchViewSet(DocumentViewSet):
|
||||
else None
|
||||
)
|
||||
|
||||
if get_boolean(
|
||||
str(
|
||||
request.query_params.get(
|
||||
"include_selection_data",
|
||||
"false",
|
||||
),
|
||||
),
|
||||
):
|
||||
result_ids = response.data.get("all", [])
|
||||
response.data["selection_data"] = (
|
||||
self._get_selection_data_for_queryset(
|
||||
Document.objects.filter(pk__in=result_ids),
|
||||
)
|
||||
)
|
||||
|
||||
return response
|
||||
except NotFound:
|
||||
raise
|
||||
@@ -2209,125 +2118,6 @@ class SavedViewViewSet(BulkPermissionMixin, PassUserMixin, ModelViewSet):
|
||||
ordering_fields = ("name",)
|
||||
|
||||
|
||||
class DocumentOperationPermissionMixin(PassUserMixin):
|
||||
permission_classes = (IsAuthenticated,)
|
||||
parser_classes = (parsers.JSONParser,)
|
||||
METHOD_NAMES_REQUIRING_USER = {
|
||||
"split",
|
||||
"merge",
|
||||
"rotate",
|
||||
"delete_pages",
|
||||
"edit_pdf",
|
||||
"remove_password",
|
||||
}
|
||||
|
||||
def _has_document_permissions(
|
||||
self,
|
||||
*,
|
||||
user: User,
|
||||
documents: list[int],
|
||||
method,
|
||||
parameters: dict[str, Any],
|
||||
) -> bool:
|
||||
if user.is_superuser:
|
||||
return True
|
||||
|
||||
document_objs = Document.objects.select_related("owner").filter(
|
||||
pk__in=documents,
|
||||
)
|
||||
user_is_owner_of_all_documents = all(
|
||||
(doc.owner == user or doc.owner is None) for doc in document_objs
|
||||
)
|
||||
|
||||
# check global and object permissions for all documents
|
||||
has_perms = user.has_perm("documents.change_document") and all(
|
||||
has_perms_owner_aware(user, "change_document", doc) for doc in document_objs
|
||||
)
|
||||
|
||||
# check ownership for methods that change original document
|
||||
if (
|
||||
(
|
||||
has_perms
|
||||
and method
|
||||
in [
|
||||
bulk_edit.set_permissions,
|
||||
bulk_edit.delete,
|
||||
bulk_edit.rotate,
|
||||
bulk_edit.delete_pages,
|
||||
bulk_edit.edit_pdf,
|
||||
bulk_edit.remove_password,
|
||||
]
|
||||
)
|
||||
or (
|
||||
method in [bulk_edit.merge, bulk_edit.split]
|
||||
and parameters.get("delete_originals")
|
||||
)
|
||||
or (method == bulk_edit.edit_pdf and parameters.get("update_document"))
|
||||
):
|
||||
has_perms = user_is_owner_of_all_documents
|
||||
|
||||
# check global add permissions for methods that create documents
|
||||
if (
|
||||
has_perms
|
||||
and (
|
||||
method in [bulk_edit.split, bulk_edit.merge]
|
||||
or (
|
||||
method in [bulk_edit.edit_pdf, bulk_edit.remove_password]
|
||||
and not parameters.get("update_document")
|
||||
)
|
||||
)
|
||||
and not user.has_perm("documents.add_document")
|
||||
):
|
||||
has_perms = False
|
||||
|
||||
# check global delete permissions for methods that delete documents
|
||||
if (
|
||||
has_perms
|
||||
and (
|
||||
method == bulk_edit.delete
|
||||
or (
|
||||
method in [bulk_edit.merge, bulk_edit.split]
|
||||
and parameters.get("delete_originals")
|
||||
)
|
||||
)
|
||||
and not user.has_perm("documents.delete_document")
|
||||
):
|
||||
has_perms = False
|
||||
|
||||
return has_perms
|
||||
|
||||
def _execute_document_action(
|
||||
self,
|
||||
*,
|
||||
method,
|
||||
validated_data: dict[str, Any],
|
||||
operation_label: str,
|
||||
):
|
||||
documents = validated_data["documents"]
|
||||
parameters = {k: v for k, v in validated_data.items() if k != "documents"}
|
||||
user = self.request.user
|
||||
|
||||
if method.__name__ in self.METHOD_NAMES_REQUIRING_USER:
|
||||
parameters["user"] = user
|
||||
|
||||
if not self._has_document_permissions(
|
||||
user=user,
|
||||
documents=documents,
|
||||
method=method,
|
||||
parameters=parameters,
|
||||
):
|
||||
return HttpResponseForbidden("Insufficient permissions")
|
||||
|
||||
try:
|
||||
result = method(documents, **parameters)
|
||||
return Response({"result": result})
|
||||
except Exception as e:
|
||||
logger.warning(f"An error occurred performing {operation_label}: {e!s}")
|
||||
return HttpResponseBadRequest(
|
||||
f"Error performing {operation_label}, check logs for more detail.",
|
||||
)
|
||||
|
||||
|
||||
@extend_schema_view(
|
||||
post=extend_schema(
|
||||
operation_id="bulk_edit",
|
||||
@@ -2346,7 +2136,7 @@ class DocumentOperationPermissionMixin(PassUserMixin):
|
||||
},
|
||||
),
|
||||
)
|
||||
class BulkEditView(DocumentOperationPermissionMixin):
|
||||
class BulkEditView(PassUserMixin):
|
||||
MODIFIED_FIELD_BY_METHOD = {
|
||||
"set_correspondent": "correspondent",
|
||||
"set_document_type": "document_type",
|
||||
@@ -2368,24 +2158,11 @@ class BulkEditView(DocumentOperationPermissionMixin):
|
||||
"remove_password": None,
|
||||
}
|
||||
|
||||
permission_classes = (IsAuthenticated,)
|
||||
serializer_class = BulkEditSerializer
|
||||
parser_classes = (parsers.JSONParser,)
|
||||
|
||||
def post(self, request, *args, **kwargs):
|
||||
request_method = request.data.get("method")
|
||||
api_version = int(request.version or settings.REST_FRAMEWORK["DEFAULT_VERSION"])
|
||||
# TODO: remove this and related backwards compatibility code when API v9 is dropped
|
||||
if request_method in BulkEditSerializer.LEGACY_DOCUMENT_ACTION_METHODS:
|
||||
endpoint = BulkEditSerializer.MOVED_DOCUMENT_ACTION_ENDPOINTS[
|
||||
request_method
|
||||
]
|
||||
logger.warning(
|
||||
"Deprecated bulk_edit method '%s' requested on API version %s. "
|
||||
"Use '%s' instead.",
|
||||
request_method,
|
||||
api_version,
|
||||
endpoint,
|
||||
)
|
||||
|
||||
serializer = self.get_serializer(data=request.data)
|
||||
serializer.is_valid(raise_exception=True)
|
||||
|
||||
@@ -2393,15 +2170,82 @@ class BulkEditView(DocumentOperationPermissionMixin):
|
||||
method = serializer.validated_data.get("method")
|
||||
parameters = serializer.validated_data.get("parameters")
|
||||
documents = serializer.validated_data.get("documents")
|
||||
if method.__name__ in self.METHOD_NAMES_REQUIRING_USER:
|
||||
if method in [
|
||||
bulk_edit.split,
|
||||
bulk_edit.merge,
|
||||
bulk_edit.rotate,
|
||||
bulk_edit.delete_pages,
|
||||
bulk_edit.edit_pdf,
|
||||
bulk_edit.remove_password,
|
||||
]:
|
||||
parameters["user"] = user
|
||||
if not self._has_document_permissions(
|
||||
user=user,
|
||||
documents=documents,
|
||||
method=method,
|
||||
parameters=parameters,
|
||||
):
|
||||
return HttpResponseForbidden("Insufficient permissions")
|
||||
|
||||
if not user.is_superuser:
|
||||
document_objs = Document.objects.select_related("owner").filter(
|
||||
pk__in=documents,
|
||||
)
|
||||
user_is_owner_of_all_documents = all(
|
||||
(doc.owner == user or doc.owner is None) for doc in document_objs
|
||||
)
|
||||
|
||||
# check global and object permissions for all documents
|
||||
has_perms = user.has_perm("documents.change_document") and all(
|
||||
has_perms_owner_aware(user, "change_document", doc)
|
||||
for doc in document_objs
|
||||
)
|
||||
|
||||
# check ownership for methods that change original document
|
||||
if (
|
||||
(
|
||||
has_perms
|
||||
and method
|
||||
in [
|
||||
bulk_edit.set_permissions,
|
||||
bulk_edit.delete,
|
||||
bulk_edit.rotate,
|
||||
bulk_edit.delete_pages,
|
||||
bulk_edit.edit_pdf,
|
||||
bulk_edit.remove_password,
|
||||
]
|
||||
)
|
||||
or (
|
||||
method in [bulk_edit.merge, bulk_edit.split]
|
||||
and parameters["delete_originals"]
|
||||
)
|
||||
or (method == bulk_edit.edit_pdf and parameters["update_document"])
|
||||
):
|
||||
has_perms = user_is_owner_of_all_documents
|
||||
|
||||
# check global add permissions for methods that create documents
|
||||
if (
|
||||
has_perms
|
||||
and (
|
||||
method in [bulk_edit.split, bulk_edit.merge]
|
||||
or (
|
||||
method in [bulk_edit.edit_pdf, bulk_edit.remove_password]
|
||||
and not parameters["update_document"]
|
||||
)
|
||||
)
|
||||
and not user.has_perm("documents.add_document")
|
||||
):
|
||||
has_perms = False
|
||||
|
||||
# check global delete permissions for methods that delete documents
|
||||
if (
|
||||
has_perms
|
||||
and (
|
||||
method == bulk_edit.delete
|
||||
or (
|
||||
method in [bulk_edit.merge, bulk_edit.split]
|
||||
and parameters["delete_originals"]
|
||||
)
|
||||
)
|
||||
and not user.has_perm("documents.delete_document")
|
||||
):
|
||||
has_perms = False
|
||||
|
||||
if not has_perms:
|
||||
return HttpResponseForbidden("Insufficient permissions")
|
||||
|
||||
try:
|
||||
modified_field = self.MODIFIED_FIELD_BY_METHOD.get(method.__name__, None)
|
||||
@@ -2458,168 +2302,6 @@ class BulkEditView(DocumentOperationPermissionMixin):
|
||||
)
|
||||
|
||||
|
||||
@extend_schema_view(
|
||||
post=extend_schema(
|
||||
operation_id="documents_rotate",
|
||||
description="Rotate one or more documents",
|
||||
responses={
|
||||
200: inline_serializer(
|
||||
name="RotateDocumentsResult",
|
||||
fields={
|
||||
"result": serializers.CharField(),
|
||||
},
|
||||
),
|
||||
},
|
||||
),
|
||||
)
|
||||
class RotateDocumentsView(DocumentOperationPermissionMixin):
|
||||
serializer_class = RotateDocumentsSerializer
|
||||
|
||||
def post(self, request, *args, **kwargs):
|
||||
serializer = self.get_serializer(data=request.data)
|
||||
serializer.is_valid(raise_exception=True)
|
||||
return self._execute_document_action(
|
||||
method=bulk_edit.rotate,
|
||||
validated_data=serializer.validated_data,
|
||||
operation_label="document rotate",
|
||||
)
|
||||
|
||||
|
||||
@extend_schema_view(
|
||||
post=extend_schema(
|
||||
operation_id="documents_merge",
|
||||
description="Merge selected documents into a new document",
|
||||
responses={
|
||||
200: inline_serializer(
|
||||
name="MergeDocumentsResult",
|
||||
fields={
|
||||
"result": serializers.CharField(),
|
||||
},
|
||||
),
|
||||
},
|
||||
),
|
||||
)
|
||||
class MergeDocumentsView(DocumentOperationPermissionMixin):
|
||||
serializer_class = MergeDocumentsSerializer
|
||||
|
||||
def post(self, request, *args, **kwargs):
|
||||
serializer = self.get_serializer(data=request.data)
|
||||
serializer.is_valid(raise_exception=True)
|
||||
return self._execute_document_action(
|
||||
method=bulk_edit.merge,
|
||||
validated_data=serializer.validated_data,
|
||||
operation_label="document merge",
|
||||
)
|
||||
|
||||
|
||||
@extend_schema_view(
|
||||
post=extend_schema(
|
||||
operation_id="documents_delete",
|
||||
description="Move selected documents to trash",
|
||||
responses={
|
||||
200: inline_serializer(
|
||||
name="DeleteDocumentsResult",
|
||||
fields={
|
||||
"result": serializers.CharField(),
|
||||
},
|
||||
),
|
||||
},
|
||||
),
|
||||
)
|
||||
class DeleteDocumentsView(DocumentOperationPermissionMixin):
|
||||
serializer_class = DeleteDocumentsSerializer
|
||||
|
||||
def post(self, request, *args, **kwargs):
|
||||
serializer = self.get_serializer(data=request.data)
|
||||
serializer.is_valid(raise_exception=True)
|
||||
return self._execute_document_action(
|
||||
method=bulk_edit.delete,
|
||||
validated_data=serializer.validated_data,
|
||||
operation_label="document delete",
|
||||
)
|
||||
|
||||
|
||||
@extend_schema_view(
|
||||
post=extend_schema(
|
||||
operation_id="documents_reprocess",
|
||||
description="Reprocess selected documents",
|
||||
responses={
|
||||
200: inline_serializer(
|
||||
name="ReprocessDocumentsResult",
|
||||
fields={
|
||||
"result": serializers.CharField(),
|
||||
},
|
||||
),
|
||||
},
|
||||
),
|
||||
)
|
||||
class ReprocessDocumentsView(DocumentOperationPermissionMixin):
|
||||
serializer_class = ReprocessDocumentsSerializer
|
||||
|
||||
def post(self, request, *args, **kwargs):
|
||||
serializer = self.get_serializer(data=request.data)
|
||||
serializer.is_valid(raise_exception=True)
|
||||
return self._execute_document_action(
|
||||
method=bulk_edit.reprocess,
|
||||
validated_data=serializer.validated_data,
|
||||
operation_label="document reprocess",
|
||||
)
|
||||
|
||||
|
||||
@extend_schema_view(
|
||||
post=extend_schema(
|
||||
operation_id="documents_edit_pdf",
|
||||
description="Perform PDF edit operations on a selected document",
|
||||
responses={
|
||||
200: inline_serializer(
|
||||
name="EditPdfDocumentsResult",
|
||||
fields={
|
||||
"result": serializers.CharField(),
|
||||
},
|
||||
),
|
||||
},
|
||||
),
|
||||
)
|
||||
class EditPdfDocumentsView(DocumentOperationPermissionMixin):
|
||||
serializer_class = EditPdfDocumentsSerializer
|
||||
|
||||
def post(self, request, *args, **kwargs):
|
||||
serializer = self.get_serializer(data=request.data)
|
||||
serializer.is_valid(raise_exception=True)
|
||||
return self._execute_document_action(
|
||||
method=bulk_edit.edit_pdf,
|
||||
validated_data=serializer.validated_data,
|
||||
operation_label="PDF edit",
|
||||
)
|
||||
|
||||
|
||||
@extend_schema_view(
|
||||
post=extend_schema(
|
||||
operation_id="documents_remove_password",
|
||||
description="Remove password protection from selected PDFs",
|
||||
responses={
|
||||
200: inline_serializer(
|
||||
name="RemovePasswordDocumentsResult",
|
||||
fields={
|
||||
"result": serializers.CharField(),
|
||||
},
|
||||
),
|
||||
},
|
||||
),
|
||||
)
|
||||
class RemovePasswordDocumentsView(DocumentOperationPermissionMixin):
|
||||
serializer_class = RemovePasswordDocumentsSerializer
|
||||
|
||||
def post(self, request, *args, **kwargs):
|
||||
serializer = self.get_serializer(data=request.data)
|
||||
serializer.is_valid(raise_exception=True)
|
||||
return self._execute_document_action(
|
||||
method=bulk_edit.remove_password,
|
||||
validated_data=serializer.validated_data,
|
||||
operation_label="password removal",
|
||||
)
|
||||
|
||||
|
||||
@extend_schema_view(
|
||||
post=extend_schema(
|
||||
description="Upload a document via the API",
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,6 +1,7 @@
|
||||
import os
|
||||
|
||||
from celery import Celery
|
||||
from celery.signals import worker_process_init
|
||||
|
||||
# Set the default Django settings module for the 'celery' program.
|
||||
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "paperless.settings")
|
||||
@@ -15,3 +16,19 @@ app.config_from_object("django.conf:settings", namespace="CELERY")
|
||||
|
||||
# Load task modules from all registered Django apps.
|
||||
app.autodiscover_tasks()
|
||||
|
||||
|
||||
@worker_process_init.connect
|
||||
def on_worker_process_init(**kwargs) -> None: # pragma: no cover
|
||||
"""
|
||||
Register built-in parsers eagerly in each Celery worker process.
|
||||
|
||||
This registers only the built-in parsers (no entrypoint discovery) so
|
||||
that workers can begin consuming documents immediately. Entrypoint
|
||||
discovery for third-party parsers is deferred to the first call of
|
||||
get_parser_registry() inside a task, keeping worker_process_init
|
||||
well within its 4-second timeout budget.
|
||||
"""
|
||||
from paperless.parsers.registry import init_builtin_parsers
|
||||
|
||||
init_builtin_parsers()
|
||||
|
||||
379
src/paperless/parsers/__init__.py
Normal file
379
src/paperless/parsers/__init__.py
Normal file
@@ -0,0 +1,379 @@
|
||||
"""
|
||||
Public interface for the Paperless-ngx parser plugin system.
|
||||
|
||||
This module defines ParserProtocol — the structural contract that every
|
||||
document parser must satisfy, whether it is a built-in parser shipped with
|
||||
Paperless-ngx or a third-party parser installed via a Python entrypoint.
|
||||
|
||||
Phase 1/2 scope: only the Protocol is defined here. The transitional
|
||||
DocumentParser ABC (Phase 3) and concrete built-in parsers (Phase 3+) will
|
||||
be added in later phases, so there are intentionally no imports of parser
|
||||
implementations here.
|
||||
|
||||
Usage example (third-party parser)::
|
||||
|
||||
from paperless.parsers import ParserProtocol
|
||||
|
||||
class MyParser:
|
||||
name = "my-parser"
|
||||
version = "1.0.0"
|
||||
author = "Acme Corp"
|
||||
url = "https://example.com/my-parser"
|
||||
|
||||
@classmethod
|
||||
def supported_mime_types(cls) -> dict[str, str]:
|
||||
return {"application/x-my-format": ".myf"}
|
||||
|
||||
@classmethod
|
||||
def score(cls, mime_type, filename, path=None):
|
||||
return 10
|
||||
|
||||
# … implement remaining protocol methods …
|
||||
|
||||
assert isinstance(MyParser(), ParserProtocol)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
from typing import Protocol
|
||||
from typing import Self
|
||||
from typing import TypedDict
|
||||
from typing import runtime_checkable
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import datetime
|
||||
from pathlib import Path
|
||||
from types import TracebackType
|
||||
|
||||
__all__ = [
|
||||
"MetadataEntry",
|
||||
"ParserProtocol",
|
||||
]
|
||||
|
||||
|
||||
class MetadataEntry(TypedDict):
|
||||
"""A single metadata field extracted from a document.
|
||||
|
||||
All four keys are required. Values are always serialised to strings —
|
||||
type-specific conversion (dates, integers, lists) is the responsibility
|
||||
of the parser before returning.
|
||||
"""
|
||||
|
||||
namespace: str
|
||||
"""URI of the metadata namespace (e.g. 'http://ns.adobe.com/pdf/1.3/')."""
|
||||
|
||||
prefix: str
|
||||
"""Conventional namespace prefix (e.g. 'pdf', 'xmp', 'dc')."""
|
||||
|
||||
key: str
|
||||
"""Field name within the namespace (e.g. 'Author', 'CreateDate')."""
|
||||
|
||||
value: str
|
||||
"""String representation of the field value."""
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
class ParserProtocol(Protocol):
|
||||
"""Structural contract for all Paperless-ngx document parsers.
|
||||
|
||||
Both built-in parsers and third-party plugins (discovered via the
|
||||
"paperless_ngx.parsers" entrypoint group) must satisfy this Protocol.
|
||||
Because it is decorated with runtime_checkable, isinstance(obj,
|
||||
ParserProtocol) works at runtime based on method presence, which is
|
||||
useful for validation in ParserRegistry.discover.
|
||||
|
||||
Parsers must expose four string attributes at the class level so the
|
||||
registry can log attribution information without instantiating the parser:
|
||||
|
||||
name : str
|
||||
Human-readable parser name (e.g. "Tesseract OCR").
|
||||
version : str
|
||||
Semantic version string (e.g. "1.2.3").
|
||||
author : str
|
||||
Author or organisation name.
|
||||
url : str
|
||||
URL for documentation, source code, or issue tracker.
|
||||
"""
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Class-level identity (checked by the registry, not Protocol methods)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
name: str
|
||||
version: str
|
||||
author: str
|
||||
url: str
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Class methods
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@classmethod
|
||||
def supported_mime_types(cls) -> dict[str, str]:
|
||||
"""Return a mapping of supported MIME types to preferred file extensions.
|
||||
|
||||
The keys are MIME type strings (e.g. "application/pdf"), and the
|
||||
values are the preferred file extension including the leading dot
|
||||
(e.g. ".pdf"). The registry uses this mapping both to decide whether
|
||||
a parser is a candidate for a given file and to determine the default
|
||||
extension when creating archive copies.
|
||||
|
||||
Returns
|
||||
-------
|
||||
dict[str, str]
|
||||
{mime_type: extension} mapping — may be empty if the parser
|
||||
has been temporarily disabled.
|
||||
"""
|
||||
...
|
||||
|
||||
@classmethod
|
||||
def score(
|
||||
cls,
|
||||
mime_type: str,
|
||||
filename: str,
|
||||
path: Path | None = None,
|
||||
) -> int | None:
|
||||
"""Return a priority score for handling this file, or None to decline.
|
||||
|
||||
The registry calls this after confirming that the MIME type is in
|
||||
supported_mime_types. Parsers may inspect filename and optionally
|
||||
the file at path to refine their confidence level.
|
||||
|
||||
A higher score wins. Return None to explicitly decline handling a file
|
||||
even though the MIME type is listed as supported (e.g. when a feature
|
||||
flag is disabled, or a required service is not configured).
|
||||
|
||||
Parameters
|
||||
----------
|
||||
mime_type:
|
||||
The detected MIME type of the file to be parsed.
|
||||
filename:
|
||||
The original filename, including extension.
|
||||
path:
|
||||
Optional filesystem path to the file. Parsers that need to
|
||||
inspect file content (e.g. magic-byte sniffing) may use this.
|
||||
May be None when scoring happens before the file is available locally.
|
||||
|
||||
Returns
|
||||
-------
|
||||
int | None
|
||||
Priority score (higher wins), or None to decline.
|
||||
"""
|
||||
...
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Properties
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@property
|
||||
def can_produce_archive(self) -> bool:
|
||||
"""Whether this parser can produce a searchable PDF archive copy.
|
||||
|
||||
If True, the consumption pipeline may request an archive version when
|
||||
processing the document, subject to the ARCHIVE_FILE_GENERATION
|
||||
setting. If False, only thumbnail and text extraction are performed.
|
||||
"""
|
||||
...
|
||||
|
||||
@property
|
||||
def requires_pdf_rendition(self) -> bool:
|
||||
"""Whether the parser must produce a PDF for the frontend to display.
|
||||
|
||||
True for formats the browser cannot display natively (e.g. DOCX, ODT).
|
||||
When True, the pipeline always stores the PDF output regardless of the
|
||||
ARCHIVE_FILE_GENERATION setting, since the original format cannot be
|
||||
shown to the user.
|
||||
"""
|
||||
...
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Core parsing interface
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def parse(
|
||||
self,
|
||||
document_path: Path,
|
||||
mime_type: str,
|
||||
*,
|
||||
produce_archive: bool = True,
|
||||
) -> None:
|
||||
"""Parse document_path and populate internal state.
|
||||
|
||||
After a successful call, callers retrieve results via get_text,
|
||||
get_date, and get_archive_path.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
document_path:
|
||||
Absolute path to the document file to parse.
|
||||
mime_type:
|
||||
Detected MIME type of the document.
|
||||
produce_archive:
|
||||
When True (the default) and can_produce_archive is also True,
|
||||
the parser should produce a searchable PDF at the path returned
|
||||
by get_archive_path. Pass False when only text extraction and
|
||||
thumbnail generation are required and disk I/O should be minimised.
|
||||
|
||||
Raises
|
||||
------
|
||||
documents.parsers.ParseError
|
||||
If parsing fails for any reason.
|
||||
"""
|
||||
...
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Result accessors
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def get_text(self) -> str | None:
|
||||
"""Return the plain-text content extracted during parse.
|
||||
|
||||
Returns
|
||||
-------
|
||||
str | None
|
||||
Extracted text, or None if no text could be found.
|
||||
"""
|
||||
...
|
||||
|
||||
def get_date(self) -> datetime.datetime | None:
|
||||
"""Return the document date detected during parse.
|
||||
|
||||
Returns
|
||||
-------
|
||||
datetime.datetime | None
|
||||
Detected document date, or None if no date was found.
|
||||
"""
|
||||
...
|
||||
|
||||
def get_archive_path(self) -> Path | None:
|
||||
"""Return the path to the generated archive PDF, or None.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Path | None
|
||||
Path to the searchable PDF archive, or None if no archive was
|
||||
produced (e.g. because produce_archive=False or the parser does
|
||||
not support archive generation).
|
||||
"""
|
||||
...
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Thumbnail and metadata
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def get_thumbnail(self, document_path: Path, mime_type: str) -> Path:
|
||||
"""Generate and return the path to a thumbnail image for the document.
|
||||
|
||||
May be called independently of parse. The returned path must point to
|
||||
an existing WebP image file inside the parser's temporary working
|
||||
directory.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
document_path:
|
||||
Absolute path to the source document.
|
||||
mime_type:
|
||||
Detected MIME type of the document.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Path
|
||||
Path to the generated thumbnail image (WebP format preferred).
|
||||
"""
|
||||
...
|
||||
|
||||
def get_page_count(
|
||||
self,
|
||||
document_path: Path,
|
||||
mime_type: str,
|
||||
) -> int | None:
|
||||
"""Return the number of pages in the document, if determinable.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
document_path:
|
||||
Absolute path to the source document.
|
||||
mime_type:
|
||||
Detected MIME type of the document.
|
||||
|
||||
Returns
|
||||
-------
|
||||
int | None
|
||||
Page count, or None if the parser cannot determine it.
|
||||
"""
|
||||
...
|
||||
|
||||
def extract_metadata(
|
||||
self,
|
||||
document_path: Path,
|
||||
mime_type: str,
|
||||
) -> list[MetadataEntry]:
|
||||
"""Extract format-specific metadata from the document.
|
||||
|
||||
Called by the API view layer on demand — not during the consumption
|
||||
pipeline. Results are returned to the frontend for per-file display.
|
||||
|
||||
For documents with an archive version, this method is called twice:
|
||||
once for the original file (with its native MIME type) and once for
|
||||
the archive file (with ``"application/pdf"``). Parsers that produce
|
||||
archives should handle both cases.
|
||||
|
||||
Implementations must not raise. A failure to read metadata is not
|
||||
fatal — log a warning and return whatever partial results were
|
||||
collected, or ``[]`` if none.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
document_path:
|
||||
Absolute path to the file to extract metadata from.
|
||||
mime_type:
|
||||
MIME type of the file at ``document_path``. May be
|
||||
``"application/pdf"`` when called for the archive version.
|
||||
|
||||
Returns
|
||||
-------
|
||||
list[MetadataEntry]
|
||||
Zero or more metadata entries. Returns ``[]`` if no metadata
|
||||
could be extracted or the format does not support it.
|
||||
"""
|
||||
...
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Context manager
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def __enter__(self) -> Self:
|
||||
"""Enter the parser context, returning the parser instance.
|
||||
|
||||
Implementations should perform any resource allocation here if not
|
||||
done in __init__ (e.g. creating API clients or temp directories).
|
||||
|
||||
Returns
|
||||
-------
|
||||
Self
|
||||
The parser instance itself.
|
||||
"""
|
||||
...
|
||||
|
||||
def __exit__(
|
||||
self,
|
||||
exc_type: type[BaseException] | None,
|
||||
exc_val: BaseException | None,
|
||||
exc_tb: TracebackType | None,
|
||||
) -> None:
|
||||
"""Exit the parser context and release all resources.
|
||||
|
||||
Implementations must clean up all temporary files and other resources
|
||||
regardless of whether an exception occurred.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
exc_type:
|
||||
The exception class, or None if no exception was raised.
|
||||
exc_val:
|
||||
The exception instance, or None.
|
||||
exc_tb:
|
||||
The traceback, or None.
|
||||
"""
|
||||
...
|
||||
364
src/paperless/parsers/registry.py
Normal file
364
src/paperless/parsers/registry.py
Normal file
@@ -0,0 +1,364 @@
|
||||
"""
|
||||
Singleton registry that tracks all document parsers available to
|
||||
Paperless-ngx — both built-ins shipped with the application and third-party
|
||||
plugins installed via Python entrypoints.
|
||||
|
||||
Public surface
|
||||
--------------
|
||||
get_parser_registry
|
||||
Lazy-initialise and return the shared ParserRegistry. This is the primary
|
||||
entry point for production code.
|
||||
|
||||
init_builtin_parsers
|
||||
Register built-in parsers only, without entrypoint discovery. Safe to
|
||||
call from Celery worker_process_init where importing all entrypoints
|
||||
would be wasteful or cause side effects.
|
||||
|
||||
reset_parser_registry
|
||||
Reset module-level state. For tests only.
|
||||
|
||||
Entrypoint group
|
||||
----------------
|
||||
Third-party parsers must advertise themselves under the
|
||||
"paperless_ngx.parsers" entrypoint group in their pyproject.toml::
|
||||
|
||||
[project.entry-points."paperless_ngx.parsers"]
|
||||
my_parser = "my_package.parsers:MyParser"
|
||||
|
||||
The loaded class must expose the following attributes at the class level
|
||||
(not just on instances) for the registry to accept it:
|
||||
name, version, author, url, supported_mime_types (callable), score (callable).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from importlib.metadata import entry_points
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pathlib import Path
|
||||
|
||||
from paperless.parsers import ParserProtocol
|
||||
|
||||
logger = logging.getLogger("paperless.parsers.registry")
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Module-level singleton state
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_registry: ParserRegistry | None = None
|
||||
_discovery_complete: bool = False
|
||||
|
||||
# Attribute names that every registered external parser class must expose.
|
||||
_REQUIRED_ATTRS: tuple[str, ...] = (
|
||||
"name",
|
||||
"version",
|
||||
"author",
|
||||
"url",
|
||||
"supported_mime_types",
|
||||
"score",
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Module-level accessor functions
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def get_parser_registry() -> ParserRegistry:
|
||||
"""Return the shared ParserRegistry instance.
|
||||
|
||||
On the first call this function:
|
||||
|
||||
1. Creates a new ParserRegistry.
|
||||
2. Calls register_defaults to install built-in parsers.
|
||||
3. Calls discover to load third-party plugins via importlib.metadata entrypoints.
|
||||
4. Calls log_summary to emit a startup summary.
|
||||
|
||||
Subsequent calls return the same instance immediately.
|
||||
|
||||
Returns
|
||||
-------
|
||||
ParserRegistry
|
||||
The shared registry singleton.
|
||||
"""
|
||||
global _registry, _discovery_complete
|
||||
|
||||
if _registry is None:
|
||||
_registry = ParserRegistry()
|
||||
_registry.register_defaults()
|
||||
|
||||
if not _discovery_complete:
|
||||
_registry.discover()
|
||||
_registry.log_summary()
|
||||
_discovery_complete = True
|
||||
|
||||
return _registry
|
||||
|
||||
|
||||
def init_builtin_parsers() -> None:
|
||||
"""Register built-in parsers without performing entrypoint discovery.
|
||||
|
||||
Intended for use in Celery worker_process_init handlers where importing
|
||||
all installed entrypoints would be wasteful, slow, or could produce
|
||||
undesirable side effects. Entrypoint discovery (third-party plugins) is
|
||||
deliberately not performed.
|
||||
|
||||
Safe to call multiple times — subsequent calls are no-ops.
|
||||
|
||||
Returns
|
||||
-------
|
||||
None
|
||||
"""
|
||||
global _registry
|
||||
|
||||
if _registry is None:
|
||||
_registry = ParserRegistry()
|
||||
_registry.register_defaults()
|
||||
|
||||
|
||||
def reset_parser_registry() -> None:
|
||||
"""Reset the module-level registry state to its initial values.
|
||||
|
||||
Resets _registry and _discovery_complete so the next call to
|
||||
get_parser_registry will re-initialise everything from scratch.
|
||||
|
||||
FOR TESTS ONLY. Do not call this in production code — resetting the
|
||||
registry mid-request causes all subsequent parser lookups to go through
|
||||
discovery again, which is expensive and may have unexpected side effects
|
||||
in multi-threaded environments.
|
||||
|
||||
Returns
|
||||
-------
|
||||
None
|
||||
"""
|
||||
global _registry, _discovery_complete
|
||||
|
||||
_registry = None
|
||||
_discovery_complete = False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Registry class
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class ParserRegistry:
|
||||
"""Registry that maps MIME types to the best available parser class.
|
||||
|
||||
Parsers are partitioned into two lists:
|
||||
|
||||
_builtins
|
||||
Parser classes registered via register_builtin (populated by
|
||||
register_defaults in Phase 3+).
|
||||
|
||||
_external
|
||||
Parser classes loaded from installed Python entrypoints via discover.
|
||||
|
||||
When resolving a parser for a file, external parsers are evaluated
|
||||
alongside built-in parsers using a uniform scoring mechanism. Both lists
|
||||
are iterated together; the class with the highest score wins. If an
|
||||
external parser wins, its attribution details are logged so users can
|
||||
identify which third-party package handled their document.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._external: list[type[ParserProtocol]] = []
|
||||
self._builtins: list[type[ParserProtocol]] = []
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Registration
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def register_builtin(self, parser_class: type[ParserProtocol]) -> None:
|
||||
"""Register a built-in parser class.
|
||||
|
||||
Built-in parsers are shipped with Paperless-ngx and are appended to
|
||||
the _builtins list. They are never overridden by external parsers;
|
||||
instead, scoring determines which parser wins for any given file.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
parser_class:
|
||||
The parser class to register. Must satisfy ParserProtocol.
|
||||
"""
|
||||
self._builtins.append(parser_class)
|
||||
|
||||
def register_defaults(self) -> None:
|
||||
"""Register the built-in parsers that ship with Paperless-ngx.
|
||||
|
||||
Each parser that has been migrated to the new ParserProtocol interface
|
||||
is registered here. Parsers are added in ascending weight order so
|
||||
that log output is predictable; scoring determines which parser wins
|
||||
at runtime regardless of registration order.
|
||||
"""
|
||||
from paperless.parsers.text import TextDocumentParser
|
||||
|
||||
self.register_builtin(TextDocumentParser)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Discovery
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def discover(self) -> None:
|
||||
"""Load third-party parsers from the "paperless_ngx.parsers" entrypoint group.
|
||||
|
||||
For each advertised entrypoint the method:
|
||||
|
||||
1. Calls ep.load() to import the class.
|
||||
2. Validates that the class exposes all required attributes.
|
||||
3. On success, appends the class to _external and logs an info message.
|
||||
4. On failure (import error or missing attributes), logs an appropriate
|
||||
warning/error and continues to the next entrypoint.
|
||||
|
||||
Errors during discovery of a single parser do not prevent other parsers
|
||||
from being loaded.
|
||||
|
||||
Returns
|
||||
-------
|
||||
None
|
||||
"""
|
||||
eps = entry_points(group="paperless_ngx.parsers")
|
||||
|
||||
for ep in eps:
|
||||
try:
|
||||
parser_class = ep.load()
|
||||
except Exception:
|
||||
logger.exception(
|
||||
"Failed to load parser entrypoint '%s' — skipping.",
|
||||
ep.name,
|
||||
)
|
||||
continue
|
||||
|
||||
missing = [
|
||||
attr for attr in _REQUIRED_ATTRS if not hasattr(parser_class, attr)
|
||||
]
|
||||
if missing:
|
||||
logger.warning(
|
||||
"Parser loaded from entrypoint '%s' is missing required "
|
||||
"attributes %r — skipping.",
|
||||
ep.name,
|
||||
missing,
|
||||
)
|
||||
continue
|
||||
|
||||
self._external.append(parser_class)
|
||||
logger.info(
|
||||
"Loaded third-party parser '%s' v%s by %s (entrypoint: '%s').",
|
||||
parser_class.name,
|
||||
parser_class.version,
|
||||
parser_class.author,
|
||||
ep.name,
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Summary logging
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def log_summary(self) -> None:
|
||||
"""Log a startup summary of all registered parsers.
|
||||
|
||||
Built-in parsers are listed first, followed by any external parsers
|
||||
discovered from entrypoints. If no external parsers were found a
|
||||
short informational message is logged instead of an empty list.
|
||||
|
||||
Returns
|
||||
-------
|
||||
None
|
||||
"""
|
||||
logger.info(
|
||||
"Built-in parsers (%d):",
|
||||
len(self._builtins),
|
||||
)
|
||||
for cls in self._builtins:
|
||||
logger.info(
|
||||
" [built-in] %s v%s — %s",
|
||||
getattr(cls, "name", repr(cls)),
|
||||
getattr(cls, "version", "unknown"),
|
||||
getattr(cls, "url", "built-in"),
|
||||
)
|
||||
|
||||
if not self._external:
|
||||
logger.info("No third-party parsers discovered.")
|
||||
return
|
||||
|
||||
logger.info(
|
||||
"Third-party parsers (%d):",
|
||||
len(self._external),
|
||||
)
|
||||
for cls in self._external:
|
||||
logger.info(
|
||||
" [external] %s v%s by %s — report issues at %s",
|
||||
getattr(cls, "name", repr(cls)),
|
||||
getattr(cls, "version", "unknown"),
|
||||
getattr(cls, "author", "unknown"),
|
||||
getattr(cls, "url", "unknown"),
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Parser resolution
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def get_parser_for_file(
|
||||
self,
|
||||
mime_type: str,
|
||||
filename: str,
|
||||
path: Path | None = None,
|
||||
) -> type[ParserProtocol] | None:
|
||||
"""Return the best parser class for the given file, or None.
|
||||
|
||||
All registered parsers (external first, then built-ins) are evaluated
|
||||
against the file. A parser is eligible if mime_type appears in the dict
|
||||
returned by its supported_mime_types classmethod, and its score
|
||||
classmethod returns a non-None integer.
|
||||
|
||||
The parser with the highest score wins. When two parsers return the
|
||||
same score, the one that appears earlier in the evaluation order wins
|
||||
(external parsers are evaluated before built-ins, giving third-party
|
||||
packages a chance to override defaults at equal priority).
|
||||
|
||||
When an external parser is selected, its identity is logged at INFO
|
||||
level so operators can trace which package handled a document.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
mime_type:
|
||||
The detected MIME type of the file.
|
||||
filename:
|
||||
The original filename, including extension.
|
||||
path:
|
||||
Optional filesystem path to the file. Forwarded to each
|
||||
parser's score method.
|
||||
|
||||
Returns
|
||||
-------
|
||||
type[ParserProtocol] | None
|
||||
The winning parser class, or None if no parser can handle the file.
|
||||
"""
|
||||
best_score: int | None = None
|
||||
best_parser: type[ParserProtocol] | None = None
|
||||
|
||||
# External parsers are placed first so that, at equal scores, an
|
||||
# external parser wins over a built-in (first-seen policy).
|
||||
for parser_class in (*self._external, *self._builtins):
|
||||
if mime_type not in parser_class.supported_mime_types():
|
||||
continue
|
||||
|
||||
score = parser_class.score(mime_type, filename, path)
|
||||
if score is None:
|
||||
continue
|
||||
|
||||
if best_score is None or score > best_score:
|
||||
best_score = score
|
||||
best_parser = parser_class
|
||||
|
||||
if best_parser is not None and best_parser in self._external:
|
||||
logger.info(
|
||||
"Document handled by third-party parser '%s' v%s — %s",
|
||||
getattr(best_parser, "name", repr(best_parser)),
|
||||
getattr(best_parser, "version", "unknown"),
|
||||
getattr(best_parser, "url", "unknown"),
|
||||
)
|
||||
|
||||
return best_parser
|
||||
320
src/paperless/parsers/text.py
Normal file
320
src/paperless/parsers/text.py
Normal file
@@ -0,0 +1,320 @@
|
||||
"""
|
||||
Built-in plain-text document parser.
|
||||
|
||||
Handles text/plain, text/csv, and application/csv MIME types by reading the
|
||||
file content directly. Thumbnails are generated by rendering a page-sized
|
||||
WebP image from the first 100,000 characters using Pillow.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import shutil
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
from typing import Self
|
||||
|
||||
from django.conf import settings
|
||||
from PIL import Image
|
||||
from PIL import ImageDraw
|
||||
from PIL import ImageFont
|
||||
|
||||
from paperless.version import __full_version_str__
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import datetime
|
||||
from types import TracebackType
|
||||
|
||||
from paperless.parsers import MetadataEntry
|
||||
|
||||
logger = logging.getLogger("paperless.parsing.text")
|
||||
|
||||
_SUPPORTED_MIME_TYPES: dict[str, str] = {
|
||||
"text/plain": ".txt",
|
||||
"text/csv": ".csv",
|
||||
"application/csv": ".csv",
|
||||
}
|
||||
|
||||
|
||||
class TextDocumentParser:
|
||||
"""Parse plain-text documents (txt, csv) for Paperless-ngx.
|
||||
|
||||
This parser reads the file content directly as UTF-8 text and renders a
|
||||
simple thumbnail using Pillow. It does not perform OCR and does not
|
||||
produce a searchable PDF archive copy.
|
||||
|
||||
Class attributes
|
||||
----------------
|
||||
name : str
|
||||
Human-readable parser name.
|
||||
version : str
|
||||
Semantic version string, kept in sync with Paperless-ngx releases.
|
||||
author : str
|
||||
Maintainer name.
|
||||
url : str
|
||||
Issue tracker / source URL.
|
||||
"""
|
||||
|
||||
name: str = "Paperless-ngx Text Parser"
|
||||
version: str = __full_version_str__
|
||||
author: str = "Paperless-ngx Contributors"
|
||||
url: str = "https://github.com/paperless-ngx/paperless-ngx"
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Class methods
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@classmethod
|
||||
def supported_mime_types(cls) -> dict[str, str]:
|
||||
"""Return the MIME types this parser handles.
|
||||
|
||||
Returns
|
||||
-------
|
||||
dict[str, str]
|
||||
Mapping of MIME type to preferred file extension.
|
||||
"""
|
||||
return _SUPPORTED_MIME_TYPES
|
||||
|
||||
@classmethod
|
||||
def score(
|
||||
cls,
|
||||
mime_type: str,
|
||||
filename: str,
|
||||
path: Path | None = None,
|
||||
) -> int | None:
|
||||
"""Return the priority score for handling this file.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
mime_type:
|
||||
Detected MIME type of the file.
|
||||
filename:
|
||||
Original filename including extension.
|
||||
path:
|
||||
Optional filesystem path. Not inspected by this parser.
|
||||
|
||||
Returns
|
||||
-------
|
||||
int | None
|
||||
10 if the MIME type is supported, otherwise None.
|
||||
"""
|
||||
if mime_type in _SUPPORTED_MIME_TYPES:
|
||||
return 10
|
||||
return None
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Properties
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@property
|
||||
def can_produce_archive(self) -> bool:
|
||||
"""Whether this parser can produce a searchable PDF archive copy.
|
||||
|
||||
Returns
|
||||
-------
|
||||
bool
|
||||
Always False — the text parser does not produce a PDF archive.
|
||||
"""
|
||||
return False
|
||||
|
||||
@property
|
||||
def requires_pdf_rendition(self) -> bool:
|
||||
"""Whether the parser must produce a PDF for the frontend to display.
|
||||
|
||||
Returns
|
||||
-------
|
||||
bool
|
||||
Always False — plain text files are displayable as-is.
|
||||
"""
|
||||
return False
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Lifecycle
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def __init__(self, logging_group: object = None) -> None:
|
||||
settings.SCRATCH_DIR.mkdir(parents=True, exist_ok=True)
|
||||
self._tempdir = Path(
|
||||
tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR),
|
||||
)
|
||||
self._text: str | None = None
|
||||
|
||||
def __enter__(self) -> Self:
|
||||
return self
|
||||
|
||||
def __exit__(
|
||||
self,
|
||||
exc_type: type[BaseException] | None,
|
||||
exc_val: BaseException | None,
|
||||
exc_tb: TracebackType | None,
|
||||
) -> None:
|
||||
logger.debug("Cleaning up temporary directory %s", self._tempdir)
|
||||
shutil.rmtree(self._tempdir, ignore_errors=True)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Core parsing interface
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def parse(
|
||||
self,
|
||||
document_path: Path,
|
||||
mime_type: str,
|
||||
*,
|
||||
produce_archive: bool = True,
|
||||
) -> None:
|
||||
"""Read the document and store its text content.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
document_path:
|
||||
Absolute path to the text file.
|
||||
mime_type:
|
||||
Detected MIME type of the document.
|
||||
produce_archive:
|
||||
Ignored — this parser never produces a PDF archive.
|
||||
|
||||
Raises
|
||||
------
|
||||
documents.parsers.ParseError
|
||||
If the file cannot be read.
|
||||
"""
|
||||
self._text = self._read_text(document_path)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Result accessors
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def get_text(self) -> str | None:
|
||||
"""Return the plain-text content extracted during parse.
|
||||
|
||||
Returns
|
||||
-------
|
||||
str | None
|
||||
Extracted text, or None if parse has not been called yet.
|
||||
"""
|
||||
return self._text
|
||||
|
||||
def get_date(self) -> datetime.datetime | None:
|
||||
"""Return the document date detected during parse.
|
||||
|
||||
Returns
|
||||
-------
|
||||
datetime.datetime | None
|
||||
Always None — the text parser does not detect dates.
|
||||
"""
|
||||
return None
|
||||
|
||||
def get_archive_path(self) -> Path | None:
|
||||
"""Return the path to a generated archive PDF, or None.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Path | None
|
||||
Always None — the text parser does not produce a PDF archive.
|
||||
"""
|
||||
return None
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Thumbnail and metadata
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def get_thumbnail(self, document_path: Path, mime_type: str) -> Path:
|
||||
"""Render the first portion of the document as a WebP thumbnail.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
document_path:
|
||||
Absolute path to the source document.
|
||||
mime_type:
|
||||
Detected MIME type of the document.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Path
|
||||
Path to the generated WebP thumbnail inside the temporary directory.
|
||||
"""
|
||||
max_chars = 100_000
|
||||
file_size_limit = 50 * 1024 * 1024
|
||||
|
||||
if document_path.stat().st_size > file_size_limit:
|
||||
text = "[File too large to preview]"
|
||||
else:
|
||||
with Path(document_path).open("r", encoding="utf-8", errors="replace") as f:
|
||||
text = f.read(max_chars)
|
||||
|
||||
img = Image.new("RGB", (500, 700), color="white")
|
||||
draw = ImageDraw.Draw(img)
|
||||
font = ImageFont.truetype(
|
||||
font=settings.THUMBNAIL_FONT_NAME,
|
||||
size=20,
|
||||
layout_engine=ImageFont.Layout.BASIC,
|
||||
)
|
||||
draw.multiline_text((5, 5), text, font=font, fill="black", spacing=4)
|
||||
|
||||
out_path = self._tempdir / "thumb.webp"
|
||||
img.save(out_path, format="WEBP")
|
||||
|
||||
return out_path
|
||||
|
||||
def get_page_count(
|
||||
self,
|
||||
document_path: Path,
|
||||
mime_type: str,
|
||||
) -> int | None:
|
||||
"""Return the number of pages in the document.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
document_path:
|
||||
Absolute path to the source document.
|
||||
mime_type:
|
||||
Detected MIME type of the document.
|
||||
|
||||
Returns
|
||||
-------
|
||||
int | None
|
||||
Always None — page count is not meaningful for plain text.
|
||||
"""
|
||||
return None
|
||||
|
||||
def extract_metadata(
|
||||
self,
|
||||
document_path: Path,
|
||||
mime_type: str,
|
||||
) -> list[MetadataEntry]:
|
||||
"""Extract format-specific metadata from the document.
|
||||
|
||||
Returns
|
||||
-------
|
||||
list[MetadataEntry]
|
||||
Always ``[]`` — plain text files carry no structured metadata.
|
||||
"""
|
||||
return []
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Private helpers
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _read_text(self, filepath: Path) -> str:
|
||||
"""Read file content, replacing invalid UTF-8 bytes rather than failing.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
filepath:
|
||||
Path to the file to read.
|
||||
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
File content as a string.
|
||||
"""
|
||||
try:
|
||||
return filepath.read_text(encoding="utf-8")
|
||||
except UnicodeDecodeError as exc:
|
||||
logger.warning(
|
||||
"Unicode error reading %s, replacing bad bytes: %s",
|
||||
filepath,
|
||||
exc,
|
||||
)
|
||||
return filepath.read_bytes().decode("utf-8", errors="replace")
|
||||
440
src/paperless/parsers/tika.py
Normal file
440
src/paperless/parsers/tika.py
Normal file
@@ -0,0 +1,440 @@
|
||||
"""
|
||||
Built-in Tika document parser.
|
||||
|
||||
Handles Office documents (DOCX, ODT, XLS, XLSX, PPT, PPTX, RTF, etc.) by
|
||||
sending them to an Apache Tika server for text extraction and a Gotenberg
|
||||
server for PDF conversion. Because the source formats cannot be rendered by
|
||||
a browser natively, the parser always produces a PDF rendition for display.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import shutil
|
||||
import tempfile
|
||||
from contextlib import ExitStack
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
from typing import Self
|
||||
|
||||
import httpx
|
||||
from django.conf import settings
|
||||
from django.utils import timezone
|
||||
from gotenberg_client import GotenbergClient
|
||||
from gotenberg_client.options import PdfAFormat
|
||||
from tika_client import TikaClient
|
||||
|
||||
from documents.parsers import ParseError
|
||||
from documents.parsers import make_thumbnail_from_pdf
|
||||
from paperless.config import OutputTypeConfig
|
||||
from paperless.models import OutputTypeChoices
|
||||
from paperless.version import __full_version_str__
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import datetime
|
||||
from types import TracebackType
|
||||
|
||||
from paperless.parsers import MetadataEntry
|
||||
|
||||
logger = logging.getLogger("paperless.parsing.tika")
|
||||
|
||||
_SUPPORTED_MIME_TYPES: dict[str, str] = {
|
||||
"application/msword": ".doc",
|
||||
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx",
|
||||
"application/vnd.ms-excel": ".xls",
|
||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx",
|
||||
"application/vnd.ms-powerpoint": ".ppt",
|
||||
"application/vnd.openxmlformats-officedocument.presentationml.presentation": ".pptx",
|
||||
"application/vnd.openxmlformats-officedocument.presentationml.slideshow": ".ppsx",
|
||||
"application/vnd.oasis.opendocument.presentation": ".odp",
|
||||
"application/vnd.oasis.opendocument.spreadsheet": ".ods",
|
||||
"application/vnd.oasis.opendocument.text": ".odt",
|
||||
"application/vnd.oasis.opendocument.graphics": ".odg",
|
||||
"text/rtf": ".rtf",
|
||||
}
|
||||
|
||||
|
||||
class TikaDocumentParser:
|
||||
"""Parse Office documents via Apache Tika and Gotenberg for Paperless-ngx.
|
||||
|
||||
Text extraction is handled by the Tika server. PDF conversion for display
|
||||
is handled by Gotenberg (LibreOffice route). Because the source formats
|
||||
cannot be rendered by a browser natively, ``requires_pdf_rendition`` is
|
||||
True and the PDF is always produced regardless of the ``produce_archive``
|
||||
flag passed to ``parse``.
|
||||
|
||||
Both ``TikaClient`` and ``GotenbergClient`` are opened once in
|
||||
``__enter__`` via an ``ExitStack`` and shared across ``parse``,
|
||||
``extract_metadata``, and ``_convert_to_pdf`` calls, then closed via
|
||||
``ExitStack.close()`` in ``__exit__``. The parser must always be used
|
||||
as a context manager.
|
||||
|
||||
Class attributes
|
||||
----------------
|
||||
name : str
|
||||
Human-readable parser name.
|
||||
version : str
|
||||
Semantic version string, kept in sync with Paperless-ngx releases.
|
||||
author : str
|
||||
Maintainer name.
|
||||
url : str
|
||||
Issue tracker / source URL.
|
||||
"""
|
||||
|
||||
name: str = "Paperless-ngx Tika Parser"
|
||||
version: str = __full_version_str__
|
||||
author: str = "Paperless-ngx Contributors"
|
||||
url: str = "https://github.com/paperless-ngx/paperless-ngx"
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Class methods
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@classmethod
|
||||
def supported_mime_types(cls) -> dict[str, str]:
|
||||
"""Return the MIME types this parser handles.
|
||||
|
||||
Returns
|
||||
-------
|
||||
dict[str, str]
|
||||
Mapping of MIME type to preferred file extension.
|
||||
"""
|
||||
return _SUPPORTED_MIME_TYPES
|
||||
|
||||
@classmethod
|
||||
def score(
|
||||
cls,
|
||||
mime_type: str,
|
||||
filename: str,
|
||||
path: Path | None = None,
|
||||
) -> int | None:
|
||||
"""Return the priority score for handling this file.
|
||||
|
||||
Returns ``None`` when Tika integration is disabled so the registry
|
||||
skips this parser entirely.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
mime_type:
|
||||
Detected MIME type of the file.
|
||||
filename:
|
||||
Original filename including extension.
|
||||
path:
|
||||
Optional filesystem path. Not inspected by this parser.
|
||||
|
||||
Returns
|
||||
-------
|
||||
int | None
|
||||
10 if TIKA_ENABLED and the MIME type is supported, otherwise None.
|
||||
"""
|
||||
if not settings.TIKA_ENABLED:
|
||||
return None
|
||||
if mime_type in _SUPPORTED_MIME_TYPES:
|
||||
return 10
|
||||
return None
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Properties
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@property
|
||||
def can_produce_archive(self) -> bool:
|
||||
"""Whether this parser can produce a searchable PDF archive copy.
|
||||
|
||||
Returns
|
||||
-------
|
||||
bool
|
||||
Always False — Tika produces a display PDF, not an OCR archive.
|
||||
"""
|
||||
return False
|
||||
|
||||
@property
|
||||
def requires_pdf_rendition(self) -> bool:
|
||||
"""Whether the parser must produce a PDF for the frontend to display.
|
||||
|
||||
Returns
|
||||
-------
|
||||
bool
|
||||
Always True — Office formats cannot be rendered natively in a
|
||||
browser, so a PDF conversion is always required for display.
|
||||
"""
|
||||
return True
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Lifecycle
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def __init__(self, logging_group: object = None) -> None:
|
||||
settings.SCRATCH_DIR.mkdir(parents=True, exist_ok=True)
|
||||
self._tempdir = Path(
|
||||
tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR),
|
||||
)
|
||||
self._text: str | None = None
|
||||
self._date: datetime.datetime | None = None
|
||||
self._archive_path: Path | None = None
|
||||
self._exit_stack = ExitStack()
|
||||
self._tika_client: TikaClient | None = None
|
||||
self._gotenberg_client: GotenbergClient | None = None
|
||||
|
||||
def __enter__(self) -> Self:
|
||||
self._tika_client = self._exit_stack.enter_context(
|
||||
TikaClient(
|
||||
tika_url=settings.TIKA_ENDPOINT,
|
||||
timeout=settings.CELERY_TASK_TIME_LIMIT,
|
||||
),
|
||||
)
|
||||
self._gotenberg_client = self._exit_stack.enter_context(
|
||||
GotenbergClient(
|
||||
host=settings.TIKA_GOTENBERG_ENDPOINT,
|
||||
timeout=settings.CELERY_TASK_TIME_LIMIT,
|
||||
),
|
||||
)
|
||||
return self
|
||||
|
||||
def __exit__(
|
||||
self,
|
||||
exc_type: type[BaseException] | None,
|
||||
exc_val: BaseException | None,
|
||||
exc_tb: TracebackType | None,
|
||||
) -> None:
|
||||
self._exit_stack.close()
|
||||
logger.debug("Cleaning up temporary directory %s", self._tempdir)
|
||||
shutil.rmtree(self._tempdir, ignore_errors=True)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Core parsing interface
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def parse(
|
||||
self,
|
||||
document_path: Path,
|
||||
mime_type: str,
|
||||
*,
|
||||
produce_archive: bool = True,
|
||||
) -> None:
|
||||
"""Send the document to Tika for text extraction and Gotenberg for PDF.
|
||||
|
||||
Because ``requires_pdf_rendition`` is True the PDF conversion is
|
||||
always performed — the ``produce_archive`` flag is intentionally
|
||||
ignored.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
document_path:
|
||||
Absolute path to the document file to parse.
|
||||
mime_type:
|
||||
Detected MIME type of the document.
|
||||
produce_archive:
|
||||
Accepted for protocol compatibility but ignored; the PDF rendition
|
||||
is always produced since the source format cannot be displayed
|
||||
natively in the browser.
|
||||
|
||||
Raises
|
||||
------
|
||||
documents.parsers.ParseError
|
||||
If Tika or Gotenberg returns an error.
|
||||
"""
|
||||
if TYPE_CHECKING:
|
||||
assert self._tika_client is not None
|
||||
|
||||
logger.info("Sending %s to Tika server", document_path)
|
||||
|
||||
try:
|
||||
try:
|
||||
parsed = self._tika_client.tika.as_text.from_file(
|
||||
document_path,
|
||||
mime_type,
|
||||
)
|
||||
except httpx.HTTPStatusError as err:
|
||||
# Workaround https://issues.apache.org/jira/browse/TIKA-4110
|
||||
# Tika fails with some files as multi-part form data
|
||||
if err.response.status_code == httpx.codes.INTERNAL_SERVER_ERROR:
|
||||
parsed = self._tika_client.tika.as_text.from_buffer(
|
||||
document_path.read_bytes(),
|
||||
mime_type,
|
||||
)
|
||||
else: # pragma: no cover
|
||||
raise
|
||||
except Exception as err:
|
||||
raise ParseError(
|
||||
f"Could not parse {document_path} with tika server at "
|
||||
f"{settings.TIKA_ENDPOINT}: {err}",
|
||||
) from err
|
||||
|
||||
self._text = parsed.content
|
||||
if self._text is not None:
|
||||
self._text = self._text.strip()
|
||||
|
||||
self._date = parsed.created
|
||||
if self._date is not None and timezone.is_naive(self._date):
|
||||
self._date = timezone.make_aware(self._date)
|
||||
|
||||
# Always convert — requires_pdf_rendition=True means the browser
|
||||
# cannot display the source format natively.
|
||||
self._archive_path = self._convert_to_pdf(document_path)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Result accessors
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def get_text(self) -> str | None:
|
||||
"""Return the plain-text content extracted during parse.
|
||||
|
||||
Returns
|
||||
-------
|
||||
str | None
|
||||
Extracted text, or None if parse has not been called yet.
|
||||
"""
|
||||
return self._text
|
||||
|
||||
def get_date(self) -> datetime.datetime | None:
|
||||
"""Return the document date detected during parse.
|
||||
|
||||
Returns
|
||||
-------
|
||||
datetime.datetime | None
|
||||
Creation date from Tika metadata, or None if not detected.
|
||||
"""
|
||||
return self._date
|
||||
|
||||
def get_archive_path(self) -> Path | None:
|
||||
"""Return the path to the generated PDF rendition, or None.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Path | None
|
||||
Path to the PDF produced by Gotenberg, or None if parse has not
|
||||
been called yet.
|
||||
"""
|
||||
return self._archive_path
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Thumbnail and metadata
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def get_thumbnail(self, document_path: Path, mime_type: str) -> Path:
|
||||
"""Generate a thumbnail from the PDF rendition of the document.
|
||||
|
||||
Converts the document to PDF first if not already done.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
document_path:
|
||||
Absolute path to the source document.
|
||||
mime_type:
|
||||
Detected MIME type of the document.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Path
|
||||
Path to the generated WebP thumbnail inside the temporary directory.
|
||||
"""
|
||||
if self._archive_path is None:
|
||||
self._archive_path = self._convert_to_pdf(document_path)
|
||||
return make_thumbnail_from_pdf(self._archive_path, self._tempdir)
|
||||
|
||||
def get_page_count(
|
||||
self,
|
||||
document_path: Path,
|
||||
mime_type: str,
|
||||
) -> int | None:
|
||||
"""Return the number of pages in the document.
|
||||
|
||||
Returns
|
||||
-------
|
||||
int | None
|
||||
Always None — page count is not available from Tika.
|
||||
"""
|
||||
return None
|
||||
|
||||
def extract_metadata(
|
||||
self,
|
||||
document_path: Path,
|
||||
mime_type: str,
|
||||
) -> list[MetadataEntry]:
|
||||
"""Extract format-specific metadata via the Tika metadata endpoint.
|
||||
|
||||
Returns
|
||||
-------
|
||||
list[MetadataEntry]
|
||||
All key/value pairs returned by Tika, or ``[]`` on error.
|
||||
"""
|
||||
if TYPE_CHECKING:
|
||||
assert self._tika_client is not None
|
||||
|
||||
try:
|
||||
parsed = self._tika_client.metadata.from_file(document_path, mime_type)
|
||||
return [
|
||||
{
|
||||
"namespace": "",
|
||||
"prefix": "",
|
||||
"key": key,
|
||||
"value": parsed.data[key],
|
||||
}
|
||||
for key in parsed.data
|
||||
]
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"Error while fetching document metadata for %s: %s",
|
||||
document_path,
|
||||
e,
|
||||
)
|
||||
return []
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Private helpers
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _convert_to_pdf(self, document_path: Path) -> Path:
|
||||
"""Convert the document to PDF using Gotenberg's LibreOffice route.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
document_path:
|
||||
Absolute path to the source document.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Path
|
||||
Path to the generated PDF inside the temporary directory.
|
||||
|
||||
Raises
|
||||
------
|
||||
documents.parsers.ParseError
|
||||
If Gotenberg returns an error.
|
||||
"""
|
||||
if TYPE_CHECKING:
|
||||
assert self._gotenberg_client is not None
|
||||
|
||||
pdf_path = self._tempdir / "convert.pdf"
|
||||
|
||||
logger.info("Converting %s to PDF as %s", document_path, pdf_path)
|
||||
|
||||
with self._gotenberg_client.libre_office.to_pdf() as route:
|
||||
# Set the output format of the resulting PDF.
|
||||
# OutputTypeConfig reads the database-stored ApplicationConfiguration
|
||||
# first, then falls back to the PAPERLESS_OCR_OUTPUT_TYPE env var.
|
||||
output_type = OutputTypeConfig().output_type
|
||||
if output_type in {
|
||||
OutputTypeChoices.PDF_A,
|
||||
OutputTypeChoices.PDF_A2,
|
||||
}:
|
||||
route.pdf_format(PdfAFormat.A2b)
|
||||
elif output_type == OutputTypeChoices.PDF_A1:
|
||||
logger.warning(
|
||||
"Gotenberg does not support PDF/A-1a, choosing PDF/A-2b instead",
|
||||
)
|
||||
route.pdf_format(PdfAFormat.A2b)
|
||||
elif output_type == OutputTypeChoices.PDF_A3:
|
||||
route.pdf_format(PdfAFormat.A3b)
|
||||
|
||||
route.convert(document_path)
|
||||
|
||||
try:
|
||||
response = route.run()
|
||||
pdf_path.write_bytes(response.content)
|
||||
return pdf_path
|
||||
except Exception as err:
|
||||
raise ParseError(
|
||||
f"Error while converting document to PDF: {err}",
|
||||
) from err
|
||||
48
src/paperless/tests/conftest.py
Normal file
48
src/paperless/tests/conftest.py
Normal file
@@ -0,0 +1,48 @@
|
||||
"""
|
||||
Fixtures defined here are available to every test module under
|
||||
src/paperless/tests/ (including sub-packages such as parsers/).
|
||||
|
||||
Session-scoped fixtures for the shared samples directory live here so
|
||||
sub-package conftest files can reference them without duplicating path logic.
|
||||
Parser-specific fixtures (concrete parser instances, format-specific sample
|
||||
files) live in paperless/tests/parsers/conftest.py.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import pytest
|
||||
|
||||
from paperless.parsers.registry import reset_parser_registry
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Generator
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def samples_dir() -> Path:
|
||||
"""Absolute path to the shared parser sample files directory.
|
||||
|
||||
Sub-package conftest files derive format-specific paths from this root,
|
||||
e.g. ``samples_dir / "text" / "test.txt"``.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Path
|
||||
Directory containing all sample documents used by parser tests.
|
||||
"""
|
||||
return (Path(__file__).parent / "samples").resolve()
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def clean_registry() -> Generator[None, None, None]:
|
||||
"""Reset the parser registry before and after every test.
|
||||
|
||||
This prevents registry state from leaking between tests that call
|
||||
get_parser_registry() or init_builtin_parsers().
|
||||
"""
|
||||
reset_parser_registry()
|
||||
yield
|
||||
reset_parser_registry()
|
||||
0
src/paperless/tests/parsers/__init__.py
Normal file
0
src/paperless/tests/parsers/__init__.py
Normal file
160
src/paperless/tests/parsers/conftest.py
Normal file
160
src/paperless/tests/parsers/conftest.py
Normal file
@@ -0,0 +1,160 @@
|
||||
"""
|
||||
Parser fixtures that are used across multiple test modules in this package
|
||||
are defined here. Format-specific sample-file fixtures are grouped by parser
|
||||
so it is easy to see which files belong to which test module.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import pytest
|
||||
|
||||
from paperless.parsers.text import TextDocumentParser
|
||||
from paperless.parsers.tika import TikaDocumentParser
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Generator
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Text parser sample files
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def text_samples_dir(samples_dir: Path) -> Path:
|
||||
"""Absolute path to the text parser sample files directory.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Path
|
||||
``<samples_dir>/text/``
|
||||
"""
|
||||
return samples_dir / "text"
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def sample_txt_file(text_samples_dir: Path) -> Path:
|
||||
"""Path to a valid UTF-8 plain-text sample file.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Path
|
||||
Absolute path to ``text/test.txt``.
|
||||
"""
|
||||
return text_samples_dir / "test.txt"
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def malformed_txt_file(text_samples_dir: Path) -> Path:
|
||||
"""Path to a text file containing invalid UTF-8 bytes.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Path
|
||||
Absolute path to ``text/decode_error.txt``.
|
||||
"""
|
||||
return text_samples_dir / "decode_error.txt"
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Text parser instance
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def text_parser() -> Generator[TextDocumentParser, None, None]:
|
||||
"""Yield a TextDocumentParser and clean up its temporary directory afterwards.
|
||||
|
||||
Yields
|
||||
------
|
||||
TextDocumentParser
|
||||
A ready-to-use parser instance.
|
||||
"""
|
||||
with TextDocumentParser() as parser:
|
||||
yield parser
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Tika parser sample files
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def tika_samples_dir(samples_dir: Path) -> Path:
|
||||
"""Absolute path to the Tika parser sample files directory.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Path
|
||||
``<samples_dir>/tika/``
|
||||
"""
|
||||
return samples_dir / "tika"
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def sample_odt_file(tika_samples_dir: Path) -> Path:
|
||||
"""Path to a sample ODT file.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Path
|
||||
Absolute path to ``tika/sample.odt``.
|
||||
"""
|
||||
return tika_samples_dir / "sample.odt"
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def sample_docx_file(tika_samples_dir: Path) -> Path:
|
||||
"""Path to a sample DOCX file.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Path
|
||||
Absolute path to ``tika/sample.docx``.
|
||||
"""
|
||||
return tika_samples_dir / "sample.docx"
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def sample_doc_file(tika_samples_dir: Path) -> Path:
|
||||
"""Path to a sample DOC file.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Path
|
||||
Absolute path to ``tika/sample.doc``.
|
||||
"""
|
||||
return tika_samples_dir / "sample.doc"
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def sample_broken_odt(tika_samples_dir: Path) -> Path:
|
||||
"""Path to a broken ODT file that triggers the multi-part fallback.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Path
|
||||
Absolute path to ``tika/multi-part-broken.odt``.
|
||||
"""
|
||||
return tika_samples_dir / "multi-part-broken.odt"
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Tika parser instance
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def tika_parser() -> Generator[TikaDocumentParser, None, None]:
|
||||
"""Yield a TikaDocumentParser and clean up its temporary directory afterwards.
|
||||
|
||||
Yields
|
||||
------
|
||||
TikaDocumentParser
|
||||
A ready-to-use parser instance.
|
||||
"""
|
||||
with TikaDocumentParser() as parser:
|
||||
yield parser
|
||||
256
src/paperless/tests/parsers/test_text_parser.py
Normal file
256
src/paperless/tests/parsers/test_text_parser.py
Normal file
@@ -0,0 +1,256 @@
|
||||
"""
|
||||
Tests for paperless.parsers.text.TextDocumentParser.
|
||||
|
||||
All tests use the context-manager protocol for parser lifecycle. Sample
|
||||
files are provided by session-scoped fixtures defined in conftest.py.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from paperless.parsers import ParserProtocol
|
||||
from paperless.parsers.text import TextDocumentParser
|
||||
|
||||
|
||||
class TestTextParserProtocol:
|
||||
"""Verify that TextDocumentParser satisfies the ParserProtocol contract."""
|
||||
|
||||
def test_isinstance_satisfies_protocol(
|
||||
self,
|
||||
text_parser: TextDocumentParser,
|
||||
) -> None:
|
||||
assert isinstance(text_parser, ParserProtocol)
|
||||
|
||||
def test_class_attributes_present(self) -> None:
|
||||
assert isinstance(TextDocumentParser.name, str) and TextDocumentParser.name
|
||||
assert (
|
||||
isinstance(TextDocumentParser.version, str) and TextDocumentParser.version
|
||||
)
|
||||
assert isinstance(TextDocumentParser.author, str) and TextDocumentParser.author
|
||||
assert isinstance(TextDocumentParser.url, str) and TextDocumentParser.url
|
||||
|
||||
def test_supported_mime_types_returns_dict(self) -> None:
|
||||
mime_types = TextDocumentParser.supported_mime_types()
|
||||
assert isinstance(mime_types, dict)
|
||||
assert "text/plain" in mime_types
|
||||
assert "text/csv" in mime_types
|
||||
assert "application/csv" in mime_types
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("mime_type", "expected"),
|
||||
[
|
||||
("text/plain", 10),
|
||||
("text/csv", 10),
|
||||
("application/csv", 10),
|
||||
("application/pdf", None),
|
||||
("image/png", None),
|
||||
],
|
||||
)
|
||||
def test_score(self, mime_type: str, expected: int | None) -> None:
|
||||
assert TextDocumentParser.score(mime_type, "file.txt") == expected
|
||||
|
||||
def test_can_produce_archive_is_false(
|
||||
self,
|
||||
text_parser: TextDocumentParser,
|
||||
) -> None:
|
||||
assert text_parser.can_produce_archive is False
|
||||
|
||||
def test_requires_pdf_rendition_is_false(
|
||||
self,
|
||||
text_parser: TextDocumentParser,
|
||||
) -> None:
|
||||
assert text_parser.requires_pdf_rendition is False
|
||||
|
||||
|
||||
class TestTextParserLifecycle:
|
||||
"""Verify context-manager behaviour and temporary directory cleanup."""
|
||||
|
||||
def test_context_manager_cleans_up_tempdir(self) -> None:
|
||||
with TextDocumentParser() as parser:
|
||||
tempdir = parser._tempdir
|
||||
assert tempdir.exists()
|
||||
assert not tempdir.exists()
|
||||
|
||||
def test_context_manager_cleans_up_after_exception(self) -> None:
|
||||
tempdir: Path | None = None
|
||||
with pytest.raises(RuntimeError):
|
||||
with TextDocumentParser() as parser:
|
||||
tempdir = parser._tempdir
|
||||
raise RuntimeError("boom")
|
||||
assert tempdir is not None
|
||||
assert not tempdir.exists()
|
||||
|
||||
|
||||
class TestTextParserParse:
|
||||
"""Verify parse() and the result accessors."""
|
||||
|
||||
def test_parse_valid_utf8(
|
||||
self,
|
||||
text_parser: TextDocumentParser,
|
||||
sample_txt_file: Path,
|
||||
) -> None:
|
||||
text_parser.parse(sample_txt_file, "text/plain")
|
||||
|
||||
assert text_parser.get_text() == "This is a test file.\n"
|
||||
|
||||
def test_parse_returns_none_for_archive_path(
|
||||
self,
|
||||
text_parser: TextDocumentParser,
|
||||
sample_txt_file: Path,
|
||||
) -> None:
|
||||
text_parser.parse(sample_txt_file, "text/plain")
|
||||
|
||||
assert text_parser.get_archive_path() is None
|
||||
|
||||
def test_parse_returns_none_for_date(
|
||||
self,
|
||||
text_parser: TextDocumentParser,
|
||||
sample_txt_file: Path,
|
||||
) -> None:
|
||||
text_parser.parse(sample_txt_file, "text/plain")
|
||||
|
||||
assert text_parser.get_date() is None
|
||||
|
||||
def test_parse_invalid_utf8_bytes_replaced(
|
||||
self,
|
||||
text_parser: TextDocumentParser,
|
||||
malformed_txt_file: Path,
|
||||
) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- A text file containing invalid UTF-8 byte sequences
|
||||
WHEN:
|
||||
- The file is parsed
|
||||
THEN:
|
||||
- Parsing succeeds
|
||||
- Invalid bytes are replaced with the Unicode replacement character
|
||||
"""
|
||||
text_parser.parse(malformed_txt_file, "text/plain")
|
||||
|
||||
assert text_parser.get_text() == "Pantothens\ufffdure\n"
|
||||
|
||||
def test_get_text_none_before_parse(
|
||||
self,
|
||||
text_parser: TextDocumentParser,
|
||||
) -> None:
|
||||
assert text_parser.get_text() is None
|
||||
|
||||
|
||||
class TestTextParserThumbnail:
|
||||
"""Verify thumbnail generation."""
|
||||
|
||||
def test_thumbnail_exists_and_is_file(
|
||||
self,
|
||||
text_parser: TextDocumentParser,
|
||||
sample_txt_file: Path,
|
||||
) -> None:
|
||||
thumb = text_parser.get_thumbnail(sample_txt_file, "text/plain")
|
||||
|
||||
assert thumb.exists()
|
||||
assert thumb.is_file()
|
||||
|
||||
def test_thumbnail_large_file_does_not_read_all(
|
||||
self,
|
||||
text_parser: TextDocumentParser,
|
||||
) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- A text file larger than 50 MB
|
||||
WHEN:
|
||||
- A thumbnail is requested
|
||||
THEN:
|
||||
- The thumbnail is generated without loading the full file
|
||||
"""
|
||||
with tempfile.NamedTemporaryFile(
|
||||
delete=False,
|
||||
mode="w",
|
||||
encoding="utf-8",
|
||||
suffix=".txt",
|
||||
) as tmp:
|
||||
tmp.write("A" * (51 * 1024 * 1024))
|
||||
large_file = Path(tmp.name)
|
||||
|
||||
try:
|
||||
thumb = text_parser.get_thumbnail(large_file, "text/plain")
|
||||
assert thumb.exists()
|
||||
assert thumb.is_file()
|
||||
finally:
|
||||
large_file.unlink(missing_ok=True)
|
||||
|
||||
def test_get_page_count_returns_none(
|
||||
self,
|
||||
text_parser: TextDocumentParser,
|
||||
sample_txt_file: Path,
|
||||
) -> None:
|
||||
assert text_parser.get_page_count(sample_txt_file, "text/plain") is None
|
||||
|
||||
|
||||
class TestTextParserMetadata:
|
||||
"""Verify extract_metadata behaviour."""
|
||||
|
||||
def test_extract_metadata_returns_empty_list(
|
||||
self,
|
||||
text_parser: TextDocumentParser,
|
||||
sample_txt_file: Path,
|
||||
) -> None:
|
||||
result = text_parser.extract_metadata(sample_txt_file, "text/plain")
|
||||
|
||||
assert result == []
|
||||
|
||||
def test_extract_metadata_returns_list_type(
|
||||
self,
|
||||
text_parser: TextDocumentParser,
|
||||
sample_txt_file: Path,
|
||||
) -> None:
|
||||
result = text_parser.extract_metadata(sample_txt_file, "text/plain")
|
||||
|
||||
assert isinstance(result, list)
|
||||
|
||||
def test_extract_metadata_ignores_mime_type(
|
||||
self,
|
||||
text_parser: TextDocumentParser,
|
||||
sample_txt_file: Path,
|
||||
) -> None:
|
||||
"""extract_metadata returns [] regardless of the mime_type argument."""
|
||||
assert text_parser.extract_metadata(sample_txt_file, "application/pdf") == []
|
||||
assert text_parser.extract_metadata(sample_txt_file, "text/csv") == []
|
||||
|
||||
|
||||
class TestTextParserRegistry:
|
||||
"""Verify that TextDocumentParser is registered by default."""
|
||||
|
||||
def test_registered_in_defaults(self) -> None:
|
||||
from paperless.parsers.registry import ParserRegistry
|
||||
|
||||
registry = ParserRegistry()
|
||||
registry.register_defaults()
|
||||
|
||||
assert TextDocumentParser in registry._builtins
|
||||
|
||||
def test_get_parser_for_text_plain(self) -> None:
|
||||
from paperless.parsers.registry import get_parser_registry
|
||||
|
||||
registry = get_parser_registry()
|
||||
parser_cls = registry.get_parser_for_file("text/plain", "doc.txt")
|
||||
|
||||
assert parser_cls is TextDocumentParser
|
||||
|
||||
def test_get_parser_for_text_csv(self) -> None:
|
||||
from paperless.parsers.registry import get_parser_registry
|
||||
|
||||
registry = get_parser_registry()
|
||||
parser_cls = registry.get_parser_for_file("text/csv", "data.csv")
|
||||
|
||||
assert parser_cls is TextDocumentParser
|
||||
|
||||
def test_get_parser_for_unknown_type_returns_none(self) -> None:
|
||||
from paperless.parsers.registry import get_parser_registry
|
||||
|
||||
registry = get_parser_registry()
|
||||
parser_cls = registry.get_parser_for_file("application/pdf", "doc.pdf")
|
||||
|
||||
assert parser_cls is None
|
||||
@@ -4,7 +4,7 @@ from pathlib import Path
|
||||
import pytest
|
||||
|
||||
from documents.tests.utils import util_call_with_backoff
|
||||
from paperless_tika.parsers import TikaDocumentParser
|
||||
from paperless.parsers.tika import TikaDocumentParser
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
@@ -42,14 +42,15 @@ class TestTikaParserAgainstServer:
|
||||
)
|
||||
|
||||
assert (
|
||||
tika_parser.text
|
||||
tika_parser.get_text()
|
||||
== "This is an ODT test document, created September 14, 2022"
|
||||
)
|
||||
assert tika_parser.archive_path is not None
|
||||
assert b"PDF-" in tika_parser.archive_path.read_bytes()[:10]
|
||||
archive = tika_parser.get_archive_path()
|
||||
assert archive is not None
|
||||
assert b"PDF-" in archive.read_bytes()[:10]
|
||||
|
||||
# TODO: Unsure what can set the Creation-Date field in a document, enable when possible
|
||||
# self.assertEqual(tika_parser.date, datetime.datetime(2022, 9, 14))
|
||||
# self.assertEqual(tika_parser.get_date(), datetime.datetime(2022, 9, 14))
|
||||
|
||||
def test_basic_parse_docx(
|
||||
self,
|
||||
@@ -74,14 +75,15 @@ class TestTikaParserAgainstServer:
|
||||
)
|
||||
|
||||
assert (
|
||||
tika_parser.text
|
||||
tika_parser.get_text()
|
||||
== "This is an DOCX test document, also made September 14, 2022"
|
||||
)
|
||||
assert tika_parser.archive_path is not None
|
||||
with Path(tika_parser.archive_path).open("rb") as f:
|
||||
archive = tika_parser.get_archive_path()
|
||||
assert archive is not None
|
||||
with archive.open("rb") as f:
|
||||
assert b"PDF-" in f.read()[:10]
|
||||
|
||||
# self.assertEqual(tika_parser.date, datetime.datetime(2022, 9, 14))
|
||||
# self.assertEqual(tika_parser.get_date(), datetime.datetime(2022, 9, 14))
|
||||
|
||||
def test_basic_parse_doc(
|
||||
self,
|
||||
@@ -102,13 +104,12 @@ class TestTikaParserAgainstServer:
|
||||
[sample_doc_file, "application/msword"],
|
||||
)
|
||||
|
||||
assert tika_parser.text is not None
|
||||
assert (
|
||||
"This is a test document, saved in the older .doc format"
|
||||
in tika_parser.text
|
||||
)
|
||||
assert tika_parser.archive_path is not None
|
||||
with Path(tika_parser.archive_path).open("rb") as f:
|
||||
text = tika_parser.get_text()
|
||||
assert text is not None
|
||||
assert "This is a test document, saved in the older .doc format" in text
|
||||
archive = tika_parser.get_archive_path()
|
||||
assert archive is not None
|
||||
with archive.open("rb") as f:
|
||||
assert b"PDF-" in f.read()[:10]
|
||||
|
||||
def test_tika_fails_multi_part(
|
||||
@@ -133,6 +134,7 @@ class TestTikaParserAgainstServer:
|
||||
[sample_broken_odt, "application/vnd.oasis.opendocument.text"],
|
||||
)
|
||||
|
||||
assert tika_parser.archive_path is not None
|
||||
with Path(tika_parser.archive_path).open("rb") as f:
|
||||
archive = tika_parser.get_archive_path()
|
||||
assert archive is not None
|
||||
with archive.open("rb") as f:
|
||||
assert b"PDF-" in f.read()[:10]
|
||||
@@ -9,7 +9,56 @@ from pytest_django.fixtures import SettingsWrapper
|
||||
from pytest_httpx import HTTPXMock
|
||||
|
||||
from documents.parsers import ParseError
|
||||
from paperless_tika.parsers import TikaDocumentParser
|
||||
from paperless.parsers import ParserProtocol
|
||||
from paperless.parsers.tika import TikaDocumentParser
|
||||
|
||||
|
||||
class TestTikaParserRegistryInterface:
|
||||
"""Verify that TikaDocumentParser satisfies the ParserProtocol contract."""
|
||||
|
||||
def test_satisfies_parser_protocol(self) -> None:
|
||||
assert isinstance(TikaDocumentParser(), ParserProtocol)
|
||||
|
||||
def test_supported_mime_types_is_classmethod(self) -> None:
|
||||
mime_types = TikaDocumentParser.supported_mime_types()
|
||||
assert isinstance(mime_types, dict)
|
||||
assert len(mime_types) > 0
|
||||
|
||||
def test_score_returns_none_when_tika_disabled(
|
||||
self,
|
||||
settings: SettingsWrapper,
|
||||
) -> None:
|
||||
settings.TIKA_ENABLED = False
|
||||
result = TikaDocumentParser.score(
|
||||
"application/vnd.oasis.opendocument.text",
|
||||
"sample.odt",
|
||||
)
|
||||
assert result is None
|
||||
|
||||
def test_score_returns_int_when_tika_enabled(
|
||||
self,
|
||||
settings: SettingsWrapper,
|
||||
) -> None:
|
||||
settings.TIKA_ENABLED = True
|
||||
result = TikaDocumentParser.score(
|
||||
"application/vnd.oasis.opendocument.text",
|
||||
"sample.odt",
|
||||
)
|
||||
assert isinstance(result, int)
|
||||
|
||||
def test_score_returns_none_for_unsupported_mime(
|
||||
self,
|
||||
settings: SettingsWrapper,
|
||||
) -> None:
|
||||
settings.TIKA_ENABLED = True
|
||||
result = TikaDocumentParser.score("application/pdf", "doc.pdf")
|
||||
assert result is None
|
||||
|
||||
def test_can_produce_archive_is_false(self) -> None:
|
||||
assert TikaDocumentParser().can_produce_archive is False
|
||||
|
||||
def test_requires_pdf_rendition_is_true(self) -> None:
|
||||
assert TikaDocumentParser().requires_pdf_rendition is True
|
||||
|
||||
|
||||
@pytest.mark.django_db()
|
||||
@@ -36,12 +85,12 @@ class TestTikaParser:
|
||||
|
||||
tika_parser.parse(sample_odt_file, "application/vnd.oasis.opendocument.text")
|
||||
|
||||
assert tika_parser.text == "the content"
|
||||
assert tika_parser.archive_path is not None
|
||||
with Path(tika_parser.archive_path).open("rb") as f:
|
||||
assert tika_parser.get_text() == "the content"
|
||||
assert tika_parser.get_archive_path() is not None
|
||||
with Path(tika_parser.get_archive_path()).open("rb") as f:
|
||||
assert f.read() == b"PDF document"
|
||||
|
||||
assert tika_parser.date == datetime.datetime(
|
||||
assert tika_parser.get_date() == datetime.datetime(
|
||||
2020,
|
||||
11,
|
||||
21,
|
||||
@@ -89,7 +138,7 @@ class TestTikaParser:
|
||||
httpx_mock.add_response(status_code=HTTPStatus.INTERNAL_SERVER_ERROR)
|
||||
|
||||
with pytest.raises(ParseError):
|
||||
tika_parser.convert_to_pdf(sample_odt_file, None)
|
||||
tika_parser._convert_to_pdf(sample_odt_file)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("setting_value", "expected_form_value"),
|
||||
@@ -106,7 +155,6 @@ class TestTikaParser:
|
||||
expected_form_value: str,
|
||||
httpx_mock: HTTPXMock,
|
||||
settings: SettingsWrapper,
|
||||
tika_parser: TikaDocumentParser,
|
||||
sample_odt_file: Path,
|
||||
) -> None:
|
||||
"""
|
||||
@@ -117,6 +165,8 @@ class TestTikaParser:
|
||||
THEN:
|
||||
- Request to Gotenberg contains the expected PDF/A format string
|
||||
"""
|
||||
# Parser must be created after the setting is changed so that
|
||||
# OutputTypeConfig reads the correct value at __init__ time.
|
||||
settings.OCR_OUTPUT_TYPE = setting_value
|
||||
httpx_mock.add_response(
|
||||
status_code=codes.OK,
|
||||
@@ -124,7 +174,8 @@ class TestTikaParser:
|
||||
method="POST",
|
||||
)
|
||||
|
||||
tika_parser.convert_to_pdf(sample_odt_file, None)
|
||||
with TikaDocumentParser() as parser:
|
||||
parser._convert_to_pdf(sample_odt_file)
|
||||
|
||||
request = httpx_mock.get_request()
|
||||
|
||||
714
src/paperless/tests/test_registry.py
Normal file
714
src/paperless/tests/test_registry.py
Normal file
@@ -0,0 +1,714 @@
|
||||
"""
|
||||
Tests for :mod:`paperless.parsers` (ParserProtocol) and
|
||||
:mod:`paperless.parsers.registry` (ParserRegistry + module-level helpers).
|
||||
|
||||
All tests use pytest-style functions/classes — no unittest.TestCase.
|
||||
The ``clean_registry`` fixture ensures complete isolation between tests by
|
||||
resetting the module-level singleton before and after every test.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from importlib.metadata import EntryPoint
|
||||
from pathlib import Path
|
||||
from typing import Self
|
||||
from unittest.mock import MagicMock
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from paperless.parsers import ParserProtocol
|
||||
from paperless.parsers.registry import ParserRegistry
|
||||
from paperless.parsers.registry import get_parser_registry
|
||||
from paperless.parsers.registry import init_builtin_parsers
|
||||
from paperless.parsers.registry import reset_parser_registry
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def dummy_parser_cls() -> type:
|
||||
"""Return a class that fully satisfies :class:`ParserProtocol`.
|
||||
|
||||
GIVEN: A need to exercise registry and Protocol logic with a minimal
|
||||
but complete parser.
|
||||
WHEN: A test requests this fixture.
|
||||
THEN: A class with all required attributes and methods is returned.
|
||||
"""
|
||||
|
||||
class DummyParser:
|
||||
name = "dummy-parser"
|
||||
version = "0.1.0"
|
||||
author = "Test Author"
|
||||
url = "https://example.com/dummy-parser"
|
||||
|
||||
@classmethod
|
||||
def supported_mime_types(cls) -> dict[str, str]:
|
||||
return {"text/plain": ".txt"}
|
||||
|
||||
@classmethod
|
||||
def score(
|
||||
cls,
|
||||
mime_type: str,
|
||||
filename: str,
|
||||
path: Path | None = None,
|
||||
) -> int | None:
|
||||
return 10
|
||||
|
||||
@property
|
||||
def can_produce_archive(self) -> bool:
|
||||
return False
|
||||
|
||||
@property
|
||||
def requires_pdf_rendition(self) -> bool:
|
||||
return False
|
||||
|
||||
def parse(
|
||||
self,
|
||||
document_path: Path,
|
||||
mime_type: str,
|
||||
*,
|
||||
produce_archive: bool = True,
|
||||
) -> None:
|
||||
"""
|
||||
Required to exist, but doesn't need to do anything
|
||||
"""
|
||||
|
||||
def get_text(self) -> str | None:
|
||||
return None
|
||||
|
||||
def get_date(self) -> None:
|
||||
return None
|
||||
|
||||
def get_archive_path(self) -> Path | None:
|
||||
return None
|
||||
|
||||
def get_thumbnail(
|
||||
self,
|
||||
document_path: Path,
|
||||
mime_type: str,
|
||||
) -> Path:
|
||||
return Path("/tmp/thumbnail.webp")
|
||||
|
||||
def get_page_count(
|
||||
self,
|
||||
document_path: Path,
|
||||
mime_type: str,
|
||||
) -> int | None:
|
||||
return None
|
||||
|
||||
def extract_metadata(
|
||||
self,
|
||||
document_path: Path,
|
||||
mime_type: str,
|
||||
) -> list:
|
||||
return []
|
||||
|
||||
def __enter__(self) -> Self:
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb) -> None:
|
||||
"""
|
||||
Required to exist, but doesn't need to do anything
|
||||
"""
|
||||
|
||||
return DummyParser
|
||||
|
||||
|
||||
class TestParserProtocol:
|
||||
"""Verify runtime isinstance() checks against ParserProtocol."""
|
||||
|
||||
def test_compliant_class_instance_passes_isinstance(
|
||||
self,
|
||||
dummy_parser_cls: type,
|
||||
) -> None:
|
||||
"""
|
||||
GIVEN: A class that implements every method required by ParserProtocol.
|
||||
WHEN: isinstance() is called with the Protocol.
|
||||
THEN: The check passes (returns True).
|
||||
"""
|
||||
instance = dummy_parser_cls()
|
||||
assert isinstance(instance, ParserProtocol)
|
||||
|
||||
def test_non_compliant_class_instance_fails_isinstance(self) -> None:
|
||||
"""
|
||||
GIVEN: A plain class with no parser-related methods.
|
||||
WHEN: isinstance() is called with ParserProtocol.
|
||||
THEN: The check fails (returns False).
|
||||
"""
|
||||
|
||||
class Unrelated:
|
||||
pass
|
||||
|
||||
assert not isinstance(Unrelated(), ParserProtocol)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"missing_method",
|
||||
[
|
||||
pytest.param("parse", id="missing-parse"),
|
||||
pytest.param("get_text", id="missing-get_text"),
|
||||
pytest.param("get_thumbnail", id="missing-get_thumbnail"),
|
||||
pytest.param("__enter__", id="missing-__enter__"),
|
||||
pytest.param("__exit__", id="missing-__exit__"),
|
||||
],
|
||||
)
|
||||
def test_partial_compliant_fails_isinstance(
|
||||
self,
|
||||
dummy_parser_cls: type,
|
||||
missing_method: str,
|
||||
) -> None:
|
||||
"""
|
||||
GIVEN: A class that satisfies ParserProtocol except for one method.
|
||||
WHEN: isinstance() is called with ParserProtocol.
|
||||
THEN: The check fails because the Protocol is not fully satisfied.
|
||||
"""
|
||||
# Create a subclass and delete the specified method to break compliance.
|
||||
partial_cls = type(
|
||||
"PartialParser",
|
||||
(dummy_parser_cls,),
|
||||
{missing_method: None}, # Replace with None — not callable
|
||||
)
|
||||
assert not isinstance(partial_cls(), ParserProtocol)
|
||||
|
||||
|
||||
class TestRegistrySingleton:
|
||||
"""Verify the module-level singleton lifecycle functions."""
|
||||
|
||||
def test_get_parser_registry_returns_instance(self) -> None:
|
||||
"""
|
||||
GIVEN: No registry has been created yet.
|
||||
WHEN: get_parser_registry() is called.
|
||||
THEN: A ParserRegistry instance is returned.
|
||||
"""
|
||||
registry = get_parser_registry()
|
||||
assert isinstance(registry, ParserRegistry)
|
||||
|
||||
def test_get_parser_registry_same_instance_on_repeated_calls(self) -> None:
|
||||
"""
|
||||
GIVEN: A registry instance was created by a prior call.
|
||||
WHEN: get_parser_registry() is called a second time.
|
||||
THEN: The exact same object (identity) is returned.
|
||||
"""
|
||||
first = get_parser_registry()
|
||||
second = get_parser_registry()
|
||||
assert first is second
|
||||
|
||||
def test_reset_parser_registry_gives_fresh_instance(self) -> None:
|
||||
"""
|
||||
GIVEN: A registry instance already exists.
|
||||
WHEN: reset_parser_registry() is called and then get_parser_registry()
|
||||
is called again.
|
||||
THEN: A new, distinct registry instance is returned.
|
||||
"""
|
||||
first = get_parser_registry()
|
||||
reset_parser_registry()
|
||||
second = get_parser_registry()
|
||||
assert first is not second
|
||||
|
||||
def test_init_builtin_parsers_does_not_run_discover(
|
||||
self,
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
"""
|
||||
GIVEN: discover() would raise an exception if called.
|
||||
WHEN: init_builtin_parsers() is called.
|
||||
THEN: No exception is raised, confirming discover() was not invoked.
|
||||
"""
|
||||
|
||||
def exploding_discover(self) -> None:
|
||||
raise RuntimeError(
|
||||
"discover() must not be called from init_builtin_parsers",
|
||||
)
|
||||
|
||||
monkeypatch.setattr(ParserRegistry, "discover", exploding_discover)
|
||||
|
||||
# Should complete without raising.
|
||||
init_builtin_parsers()
|
||||
|
||||
def test_init_builtin_parsers_idempotent(self) -> None:
|
||||
"""
|
||||
GIVEN: init_builtin_parsers() has already been called once.
|
||||
WHEN: init_builtin_parsers() is called a second time.
|
||||
THEN: No error is raised and the same registry instance is reused.
|
||||
"""
|
||||
init_builtin_parsers()
|
||||
# Capture the registry created by the first call.
|
||||
import paperless.parsers.registry as reg_module
|
||||
|
||||
first_registry = reg_module._registry
|
||||
|
||||
init_builtin_parsers()
|
||||
|
||||
assert reg_module._registry is first_registry
|
||||
|
||||
|
||||
class TestParserRegistryGetParserForFile:
|
||||
"""Verify parser selection logic in get_parser_for_file()."""
|
||||
|
||||
def test_returns_none_when_no_parsers_registered(self) -> None:
|
||||
"""
|
||||
GIVEN: A registry with no parsers registered.
|
||||
WHEN: get_parser_for_file() is called for any MIME type.
|
||||
THEN: None is returned.
|
||||
"""
|
||||
registry = ParserRegistry()
|
||||
result = registry.get_parser_for_file("text/plain", "doc.txt")
|
||||
assert result is None
|
||||
|
||||
def test_returns_none_for_unsupported_mime_type(
|
||||
self,
|
||||
dummy_parser_cls: type,
|
||||
) -> None:
|
||||
"""
|
||||
GIVEN: A registry with a parser that supports only 'text/plain'.
|
||||
WHEN: get_parser_for_file() is called with 'application/pdf'.
|
||||
THEN: None is returned.
|
||||
"""
|
||||
registry = ParserRegistry()
|
||||
registry.register_builtin(dummy_parser_cls)
|
||||
result = registry.get_parser_for_file("application/pdf", "file.pdf")
|
||||
assert result is None
|
||||
|
||||
def test_returns_parser_for_supported_mime_type(
|
||||
self,
|
||||
dummy_parser_cls: type,
|
||||
) -> None:
|
||||
"""
|
||||
GIVEN: A registry with a parser registered for 'text/plain'.
|
||||
WHEN: get_parser_for_file() is called with 'text/plain'.
|
||||
THEN: The registered parser class is returned.
|
||||
"""
|
||||
registry = ParserRegistry()
|
||||
registry.register_builtin(dummy_parser_cls)
|
||||
result = registry.get_parser_for_file("text/plain", "readme.txt")
|
||||
assert result is dummy_parser_cls
|
||||
|
||||
def test_highest_score_wins(self) -> None:
|
||||
"""
|
||||
GIVEN: Two parsers both supporting 'text/plain' with scores 5 and 20.
|
||||
WHEN: get_parser_for_file() is called for 'text/plain'.
|
||||
THEN: The parser with score 20 is returned.
|
||||
"""
|
||||
|
||||
class LowScoreParser:
|
||||
name = "low"
|
||||
version = "1.0"
|
||||
author = "A"
|
||||
url = "https://example.com/low"
|
||||
|
||||
@classmethod
|
||||
def supported_mime_types(cls):
|
||||
return {"text/plain": ".txt"}
|
||||
|
||||
@classmethod
|
||||
def score(cls, mime_type, filename, path=None):
|
||||
return 5
|
||||
|
||||
class HighScoreParser:
|
||||
name = "high"
|
||||
version = "1.0"
|
||||
author = "B"
|
||||
url = "https://example.com/high"
|
||||
|
||||
@classmethod
|
||||
def supported_mime_types(cls):
|
||||
return {"text/plain": ".txt"}
|
||||
|
||||
@classmethod
|
||||
def score(cls, mime_type, filename, path=None):
|
||||
return 20
|
||||
|
||||
registry = ParserRegistry()
|
||||
registry.register_builtin(LowScoreParser)
|
||||
registry.register_builtin(HighScoreParser)
|
||||
result = registry.get_parser_for_file("text/plain", "readme.txt")
|
||||
assert result is HighScoreParser
|
||||
|
||||
def test_parser_returning_none_score_is_skipped(self) -> None:
|
||||
"""
|
||||
GIVEN: A parser that returns None from score() for the given file.
|
||||
WHEN: get_parser_for_file() is called.
|
||||
THEN: That parser is skipped and None is returned (no other candidates).
|
||||
"""
|
||||
|
||||
class DecliningParser:
|
||||
name = "declining"
|
||||
version = "1.0"
|
||||
author = "A"
|
||||
url = "https://example.com"
|
||||
|
||||
@classmethod
|
||||
def supported_mime_types(cls):
|
||||
return {"text/plain": ".txt"}
|
||||
|
||||
@classmethod
|
||||
def score(cls, mime_type, filename, path=None):
|
||||
return None # Explicitly declines
|
||||
|
||||
registry = ParserRegistry()
|
||||
registry.register_builtin(DecliningParser)
|
||||
result = registry.get_parser_for_file("text/plain", "readme.txt")
|
||||
assert result is None
|
||||
|
||||
def test_all_parsers_decline_returns_none(self) -> None:
|
||||
"""
|
||||
GIVEN: Multiple parsers that all return None from score().
|
||||
WHEN: get_parser_for_file() is called.
|
||||
THEN: None is returned.
|
||||
"""
|
||||
|
||||
class AlwaysDeclines:
|
||||
name = "declines"
|
||||
version = "1.0"
|
||||
author = "A"
|
||||
url = "https://example.com"
|
||||
|
||||
@classmethod
|
||||
def supported_mime_types(cls):
|
||||
return {"text/plain": ".txt"}
|
||||
|
||||
@classmethod
|
||||
def score(cls, mime_type, filename, path=None):
|
||||
return None
|
||||
|
||||
registry = ParserRegistry()
|
||||
registry.register_builtin(AlwaysDeclines)
|
||||
registry._external.append(AlwaysDeclines)
|
||||
result = registry.get_parser_for_file("text/plain", "file.txt")
|
||||
assert result is None
|
||||
|
||||
def test_external_parser_beats_builtin_same_score(self) -> None:
|
||||
"""
|
||||
GIVEN: An external and a built-in parser both returning score 10.
|
||||
WHEN: get_parser_for_file() is called.
|
||||
THEN: The external parser wins because externals are evaluated first
|
||||
and the first-seen-wins policy applies at equal scores.
|
||||
"""
|
||||
|
||||
class BuiltinParser:
|
||||
name = "builtin"
|
||||
version = "1.0"
|
||||
author = "Core"
|
||||
url = "https://example.com/builtin"
|
||||
|
||||
@classmethod
|
||||
def supported_mime_types(cls):
|
||||
return {"text/plain": ".txt"}
|
||||
|
||||
@classmethod
|
||||
def score(cls, mime_type, filename, path=None):
|
||||
return 10
|
||||
|
||||
class ExternalParser:
|
||||
name = "external"
|
||||
version = "2.0"
|
||||
author = "Third Party"
|
||||
url = "https://example.com/external"
|
||||
|
||||
@classmethod
|
||||
def supported_mime_types(cls):
|
||||
return {"text/plain": ".txt"}
|
||||
|
||||
@classmethod
|
||||
def score(cls, mime_type, filename, path=None):
|
||||
return 10
|
||||
|
||||
registry = ParserRegistry()
|
||||
registry.register_builtin(BuiltinParser)
|
||||
registry._external.append(ExternalParser)
|
||||
result = registry.get_parser_for_file("text/plain", "file.txt")
|
||||
assert result is ExternalParser
|
||||
|
||||
def test_builtin_wins_when_external_declines(self) -> None:
|
||||
"""
|
||||
GIVEN: An external parser that declines (score None) and a built-in
|
||||
that returns score 5.
|
||||
WHEN: get_parser_for_file() is called.
|
||||
THEN: The built-in parser is returned.
|
||||
"""
|
||||
|
||||
class DecliningExternal:
|
||||
name = "declining-external"
|
||||
version = "1.0"
|
||||
author = "Third Party"
|
||||
url = "https://example.com/declining"
|
||||
|
||||
@classmethod
|
||||
def supported_mime_types(cls):
|
||||
return {"text/plain": ".txt"}
|
||||
|
||||
@classmethod
|
||||
def score(cls, mime_type, filename, path=None):
|
||||
return None
|
||||
|
||||
class AcceptingBuiltin:
|
||||
name = "accepting-builtin"
|
||||
version = "1.0"
|
||||
author = "Core"
|
||||
url = "https://example.com/accepting"
|
||||
|
||||
@classmethod
|
||||
def supported_mime_types(cls):
|
||||
return {"text/plain": ".txt"}
|
||||
|
||||
@classmethod
|
||||
def score(cls, mime_type, filename, path=None):
|
||||
return 5
|
||||
|
||||
registry = ParserRegistry()
|
||||
registry.register_builtin(AcceptingBuiltin)
|
||||
registry._external.append(DecliningExternal)
|
||||
result = registry.get_parser_for_file("text/plain", "file.txt")
|
||||
assert result is AcceptingBuiltin
|
||||
|
||||
|
||||
class TestDiscover:
|
||||
"""Verify entrypoint discovery in ParserRegistry.discover()."""
|
||||
|
||||
def test_discover_with_no_entrypoints(self) -> None:
|
||||
"""
|
||||
GIVEN: No entrypoints are registered under 'paperless_ngx.parsers'.
|
||||
WHEN: discover() is called.
|
||||
THEN: _external remains empty and no errors are raised.
|
||||
"""
|
||||
registry = ParserRegistry()
|
||||
|
||||
with patch(
|
||||
"paperless.parsers.registry.entry_points",
|
||||
return_value=[],
|
||||
):
|
||||
registry.discover()
|
||||
|
||||
assert registry._external == []
|
||||
|
||||
def test_discover_adds_valid_external_parser(self) -> None:
|
||||
"""
|
||||
GIVEN: One valid entrypoint whose loaded class has all required attrs.
|
||||
WHEN: discover() is called.
|
||||
THEN: The class is appended to _external.
|
||||
"""
|
||||
|
||||
class ValidExternal:
|
||||
name = "valid-external"
|
||||
version = "3.0.0"
|
||||
author = "Someone"
|
||||
url = "https://example.com/valid"
|
||||
|
||||
@classmethod
|
||||
def supported_mime_types(cls):
|
||||
return {"application/pdf": ".pdf"}
|
||||
|
||||
@classmethod
|
||||
def score(cls, mime_type, filename, path=None):
|
||||
return 5
|
||||
|
||||
mock_ep = MagicMock(spec=EntryPoint)
|
||||
mock_ep.name = "valid_external"
|
||||
mock_ep.load.return_value = ValidExternal
|
||||
|
||||
registry = ParserRegistry()
|
||||
|
||||
with patch(
|
||||
"paperless.parsers.registry.entry_points",
|
||||
return_value=[mock_ep],
|
||||
):
|
||||
registry.discover()
|
||||
|
||||
assert ValidExternal in registry._external
|
||||
|
||||
def test_discover_skips_entrypoint_with_load_error(
|
||||
self,
|
||||
caplog: pytest.LogCaptureFixture,
|
||||
) -> None:
|
||||
"""
|
||||
GIVEN: An entrypoint whose load() method raises ImportError.
|
||||
WHEN: discover() is called.
|
||||
THEN: The entrypoint is skipped, an error is logged, and _external
|
||||
remains empty.
|
||||
"""
|
||||
mock_ep = MagicMock(spec=EntryPoint)
|
||||
mock_ep.name = "broken_ep"
|
||||
mock_ep.load.side_effect = ImportError("missing dependency")
|
||||
|
||||
registry = ParserRegistry()
|
||||
|
||||
with caplog.at_level(logging.ERROR, logger="paperless.parsers.registry"):
|
||||
with patch(
|
||||
"paperless.parsers.registry.entry_points",
|
||||
return_value=[mock_ep],
|
||||
):
|
||||
registry.discover()
|
||||
|
||||
assert registry._external == []
|
||||
assert any(
|
||||
"broken_ep" in record.message
|
||||
for record in caplog.records
|
||||
if record.levelno >= logging.ERROR
|
||||
)
|
||||
|
||||
def test_discover_skips_entrypoint_with_missing_attrs(
|
||||
self,
|
||||
caplog: pytest.LogCaptureFixture,
|
||||
) -> None:
|
||||
"""
|
||||
GIVEN: A class loaded from an entrypoint that is missing the 'score'
|
||||
attribute.
|
||||
WHEN: discover() is called.
|
||||
THEN: The entrypoint is skipped, a warning is logged, and _external
|
||||
remains empty.
|
||||
"""
|
||||
|
||||
class MissingScore:
|
||||
name = "missing-score"
|
||||
version = "1.0"
|
||||
author = "Someone"
|
||||
url = "https://example.com"
|
||||
|
||||
# 'score' classmethod is intentionally absent.
|
||||
|
||||
@classmethod
|
||||
def supported_mime_types(cls):
|
||||
return {"text/plain": ".txt"}
|
||||
|
||||
mock_ep = MagicMock(spec=EntryPoint)
|
||||
mock_ep.name = "missing_score_ep"
|
||||
mock_ep.load.return_value = MissingScore
|
||||
|
||||
registry = ParserRegistry()
|
||||
|
||||
with caplog.at_level(logging.WARNING, logger="paperless.parsers.registry"):
|
||||
with patch(
|
||||
"paperless.parsers.registry.entry_points",
|
||||
return_value=[mock_ep],
|
||||
):
|
||||
registry.discover()
|
||||
|
||||
assert registry._external == []
|
||||
assert any(
|
||||
"missing_score_ep" in record.message
|
||||
for record in caplog.records
|
||||
if record.levelno >= logging.WARNING
|
||||
)
|
||||
|
||||
def test_discover_logs_loaded_parser_info(
|
||||
self,
|
||||
caplog: pytest.LogCaptureFixture,
|
||||
) -> None:
|
||||
"""
|
||||
GIVEN: A valid entrypoint that loads successfully.
|
||||
WHEN: discover() is called.
|
||||
THEN: An INFO log message is emitted containing the parser name,
|
||||
version, author, and entrypoint name.
|
||||
"""
|
||||
|
||||
class LoggableParser:
|
||||
name = "loggable"
|
||||
version = "4.2.0"
|
||||
author = "Log Tester"
|
||||
url = "https://example.com/loggable"
|
||||
|
||||
@classmethod
|
||||
def supported_mime_types(cls):
|
||||
return {"image/png": ".png"}
|
||||
|
||||
@classmethod
|
||||
def score(cls, mime_type, filename, path=None):
|
||||
return 1
|
||||
|
||||
mock_ep = MagicMock(spec=EntryPoint)
|
||||
mock_ep.name = "loggable_ep"
|
||||
mock_ep.load.return_value = LoggableParser
|
||||
|
||||
registry = ParserRegistry()
|
||||
|
||||
with caplog.at_level(logging.INFO, logger="paperless.parsers.registry"):
|
||||
with patch(
|
||||
"paperless.parsers.registry.entry_points",
|
||||
return_value=[mock_ep],
|
||||
):
|
||||
registry.discover()
|
||||
|
||||
info_messages = " ".join(
|
||||
r.message for r in caplog.records if r.levelno == logging.INFO
|
||||
)
|
||||
assert "loggable" in info_messages
|
||||
assert "4.2.0" in info_messages
|
||||
assert "Log Tester" in info_messages
|
||||
assert "loggable_ep" in info_messages
|
||||
|
||||
|
||||
class TestLogSummary:
|
||||
"""Verify log output from ParserRegistry.log_summary()."""
|
||||
|
||||
def test_log_summary_with_no_external_parsers(
|
||||
self,
|
||||
dummy_parser_cls: type,
|
||||
caplog: pytest.LogCaptureFixture,
|
||||
) -> None:
|
||||
"""
|
||||
GIVEN: A registry with one built-in parser and no external parsers.
|
||||
WHEN: log_summary() is called.
|
||||
THEN: The built-in parser name appears in the logs.
|
||||
"""
|
||||
registry = ParserRegistry()
|
||||
registry.register_builtin(dummy_parser_cls)
|
||||
|
||||
with caplog.at_level(logging.INFO, logger="paperless.parsers.registry"):
|
||||
registry.log_summary()
|
||||
|
||||
all_messages = " ".join(r.message for r in caplog.records)
|
||||
assert dummy_parser_cls.name in all_messages
|
||||
|
||||
def test_log_summary_with_external_parsers(
|
||||
self,
|
||||
caplog: pytest.LogCaptureFixture,
|
||||
) -> None:
|
||||
"""
|
||||
GIVEN: A registry with one external parser registered.
|
||||
WHEN: log_summary() is called.
|
||||
THEN: The external parser name, version, author, and url appear in
|
||||
the log output.
|
||||
"""
|
||||
|
||||
class ExtParser:
|
||||
name = "ext-parser"
|
||||
version = "9.9.9"
|
||||
author = "Ext Corp"
|
||||
url = "https://ext.example.com"
|
||||
|
||||
@classmethod
|
||||
def supported_mime_types(cls):
|
||||
return {}
|
||||
|
||||
@classmethod
|
||||
def score(cls, mime_type, filename, path=None):
|
||||
return None
|
||||
|
||||
registry = ParserRegistry()
|
||||
registry._external.append(ExtParser)
|
||||
|
||||
with caplog.at_level(logging.INFO, logger="paperless.parsers.registry"):
|
||||
registry.log_summary()
|
||||
|
||||
all_messages = " ".join(r.message for r in caplog.records)
|
||||
assert "ext-parser" in all_messages
|
||||
assert "9.9.9" in all_messages
|
||||
assert "Ext Corp" in all_messages
|
||||
assert "https://ext.example.com" in all_messages
|
||||
|
||||
def test_log_summary_logs_no_third_party_message_when_none(
|
||||
self,
|
||||
caplog: pytest.LogCaptureFixture,
|
||||
) -> None:
|
||||
"""
|
||||
GIVEN: A registry with no external parsers.
|
||||
WHEN: log_summary() is called.
|
||||
THEN: A message containing 'No third-party parsers discovered.' is
|
||||
logged.
|
||||
"""
|
||||
registry = ParserRegistry()
|
||||
|
||||
with caplog.at_level(logging.INFO, logger="paperless.parsers.registry"):
|
||||
registry.log_summary()
|
||||
|
||||
all_messages = " ".join(r.message for r in caplog.records)
|
||||
assert "No third-party parsers discovered." in all_messages
|
||||
@@ -21,18 +21,12 @@ from documents.views import BulkEditView
|
||||
from documents.views import ChatStreamingView
|
||||
from documents.views import CorrespondentViewSet
|
||||
from documents.views import CustomFieldViewSet
|
||||
from documents.views import DeleteDocumentsView
|
||||
from documents.views import DocumentTypeViewSet
|
||||
from documents.views import EditPdfDocumentsView
|
||||
from documents.views import GlobalSearchView
|
||||
from documents.views import IndexView
|
||||
from documents.views import LogViewSet
|
||||
from documents.views import MergeDocumentsView
|
||||
from documents.views import PostDocumentView
|
||||
from documents.views import RemoteVersionView
|
||||
from documents.views import RemovePasswordDocumentsView
|
||||
from documents.views import ReprocessDocumentsView
|
||||
from documents.views import RotateDocumentsView
|
||||
from documents.views import SavedViewViewSet
|
||||
from documents.views import SearchAutoCompleteView
|
||||
from documents.views import SelectionDataView
|
||||
@@ -138,36 +132,6 @@ urlpatterns = [
|
||||
BulkEditView.as_view(),
|
||||
name="bulk_edit",
|
||||
),
|
||||
re_path(
|
||||
"^delete/",
|
||||
DeleteDocumentsView.as_view(),
|
||||
name="delete_documents",
|
||||
),
|
||||
re_path(
|
||||
"^reprocess/",
|
||||
ReprocessDocumentsView.as_view(),
|
||||
name="reprocess_documents",
|
||||
),
|
||||
re_path(
|
||||
"^rotate/",
|
||||
RotateDocumentsView.as_view(),
|
||||
name="rotate_documents",
|
||||
),
|
||||
re_path(
|
||||
"^merge/",
|
||||
MergeDocumentsView.as_view(),
|
||||
name="merge_documents",
|
||||
),
|
||||
re_path(
|
||||
"^edit_pdf/",
|
||||
EditPdfDocumentsView.as_view(),
|
||||
name="edit_pdf_documents",
|
||||
),
|
||||
re_path(
|
||||
"^remove_password/",
|
||||
RemovePasswordDocumentsView.as_view(),
|
||||
name="remove_password_documents",
|
||||
),
|
||||
re_path(
|
||||
"^bulk_download/",
|
||||
BulkDownloadView.as_view(),
|
||||
|
||||
@@ -1,50 +0,0 @@
|
||||
from pathlib import Path
|
||||
|
||||
from django.conf import settings
|
||||
from PIL import Image
|
||||
from PIL import ImageDraw
|
||||
from PIL import ImageFont
|
||||
|
||||
from documents.parsers import DocumentParser
|
||||
|
||||
|
||||
class TextDocumentParser(DocumentParser):
|
||||
"""
|
||||
This parser directly parses a text document (.txt, .md, or .csv)
|
||||
"""
|
||||
|
||||
logging_name = "paperless.parsing.text"
|
||||
|
||||
def get_thumbnail(self, document_path: Path, mime_type, file_name=None) -> Path:
|
||||
# Avoid reading entire file into memory
|
||||
max_chars = 100_000
|
||||
file_size_limit = 50 * 1024 * 1024
|
||||
|
||||
if document_path.stat().st_size > file_size_limit:
|
||||
text = "[File too large to preview]"
|
||||
else:
|
||||
with Path(document_path).open("r", encoding="utf-8", errors="replace") as f:
|
||||
text = f.read(max_chars)
|
||||
|
||||
img = Image.new("RGB", (500, 700), color="white")
|
||||
draw = ImageDraw.Draw(img)
|
||||
font = ImageFont.truetype(
|
||||
font=settings.THUMBNAIL_FONT_NAME,
|
||||
size=20,
|
||||
layout_engine=ImageFont.Layout.BASIC,
|
||||
)
|
||||
draw.multiline_text((5, 5), text, font=font, fill="black", spacing=4)
|
||||
|
||||
out_path = self.tempdir / "thumb.webp"
|
||||
img.save(out_path, format="WEBP")
|
||||
|
||||
return out_path
|
||||
|
||||
def parse(self, document_path, mime_type, file_name=None) -> None:
|
||||
self.text = self.read_file_handle_unicode_errors(document_path)
|
||||
|
||||
def get_settings(self) -> None:
|
||||
"""
|
||||
This parser does not implement additional settings yet
|
||||
"""
|
||||
return None
|
||||
@@ -1,7 +1,13 @@
|
||||
def get_parser(*args, **kwargs):
|
||||
from paperless_text.parsers import TextDocumentParser
|
||||
from paperless.parsers.text import TextDocumentParser
|
||||
|
||||
return TextDocumentParser(*args, **kwargs)
|
||||
# The new TextDocumentParser does not accept the legacy logging_group /
|
||||
# progress_callback kwargs injected by the old signal-based consumer.
|
||||
# These are dropped here; Phase 4 will replace this signal path with the
|
||||
# new ParserRegistry so the shim can be removed at that point.
|
||||
kwargs.pop("logging_group", None)
|
||||
kwargs.pop("progress_callback", None)
|
||||
return TextDocumentParser()
|
||||
|
||||
|
||||
def text_consumer_declaration(sender, **kwargs):
|
||||
|
||||
@@ -1,30 +0,0 @@
|
||||
from collections.abc import Generator
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from paperless_text.parsers import TextDocumentParser
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def sample_dir() -> Path:
|
||||
return (Path(__file__).parent / Path("samples")).resolve()
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def text_parser() -> Generator[TextDocumentParser, None, None]:
|
||||
try:
|
||||
parser = TextDocumentParser(logging_group=None)
|
||||
yield parser
|
||||
finally:
|
||||
parser.cleanup()
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def sample_txt_file(sample_dir: Path) -> Path:
|
||||
return sample_dir / "test.txt"
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def malformed_txt_file(sample_dir: Path) -> Path:
|
||||
return sample_dir / "decode_error.txt"
|
||||
@@ -1,69 +0,0 @@
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
from paperless_text.parsers import TextDocumentParser
|
||||
|
||||
|
||||
class TestTextParser:
|
||||
def test_thumbnail(
|
||||
self,
|
||||
text_parser: TextDocumentParser,
|
||||
sample_txt_file: Path,
|
||||
) -> None:
|
||||
# just make sure that it does not crash
|
||||
f = text_parser.get_thumbnail(sample_txt_file, "text/plain")
|
||||
assert f.exists()
|
||||
assert f.is_file()
|
||||
|
||||
def test_parse(
|
||||
self,
|
||||
text_parser: TextDocumentParser,
|
||||
sample_txt_file: Path,
|
||||
) -> None:
|
||||
text_parser.parse(sample_txt_file, "text/plain")
|
||||
|
||||
assert text_parser.get_text() == "This is a test file.\n"
|
||||
assert text_parser.get_archive_path() is None
|
||||
|
||||
def test_parse_invalid_bytes(
|
||||
self,
|
||||
text_parser: TextDocumentParser,
|
||||
malformed_txt_file: Path,
|
||||
) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- Text file which contains invalid UTF bytes
|
||||
WHEN:
|
||||
- The file is parsed
|
||||
THEN:
|
||||
- Parsing continues
|
||||
- Invalid bytes are removed
|
||||
"""
|
||||
|
||||
text_parser.parse(malformed_txt_file, "text/plain")
|
||||
|
||||
assert text_parser.get_text() == "Pantothens<EFBFBD>ure\n"
|
||||
assert text_parser.get_archive_path() is None
|
||||
|
||||
def test_thumbnail_large_file(self, text_parser: TextDocumentParser) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- A very large text file (>50MB)
|
||||
WHEN:
|
||||
- A thumbnail is requested
|
||||
THEN:
|
||||
- A thumbnail is created without reading the entire file into memory
|
||||
"""
|
||||
with tempfile.NamedTemporaryFile(
|
||||
delete=False,
|
||||
mode="w",
|
||||
encoding="utf-8",
|
||||
suffix=".txt",
|
||||
) as tmp:
|
||||
tmp.write("A" * (51 * 1024 * 1024)) # 51 MB of 'A'
|
||||
large_file = Path(tmp.name)
|
||||
|
||||
thumb = text_parser.get_thumbnail(large_file, "text/plain")
|
||||
assert thumb.exists()
|
||||
assert thumb.is_file()
|
||||
large_file.unlink()
|
||||
@@ -1,136 +0,0 @@
|
||||
from pathlib import Path
|
||||
|
||||
import httpx
|
||||
from django.conf import settings
|
||||
from django.utils import timezone
|
||||
from gotenberg_client import GotenbergClient
|
||||
from gotenberg_client.options import PdfAFormat
|
||||
from tika_client import TikaClient
|
||||
|
||||
from documents.parsers import DocumentParser
|
||||
from documents.parsers import ParseError
|
||||
from documents.parsers import make_thumbnail_from_pdf
|
||||
from paperless.config import OutputTypeConfig
|
||||
from paperless.models import OutputTypeChoices
|
||||
|
||||
|
||||
class TikaDocumentParser(DocumentParser):
|
||||
"""
|
||||
This parser sends documents to a local tika server
|
||||
"""
|
||||
|
||||
logging_name = "paperless.parsing.tika"
|
||||
|
||||
def get_thumbnail(self, document_path, mime_type, file_name=None):
|
||||
if not self.archive_path:
|
||||
self.archive_path = self.convert_to_pdf(document_path, file_name)
|
||||
|
||||
return make_thumbnail_from_pdf(
|
||||
self.archive_path,
|
||||
self.tempdir,
|
||||
self.logging_group,
|
||||
)
|
||||
|
||||
def extract_metadata(self, document_path, mime_type):
|
||||
try:
|
||||
with TikaClient(
|
||||
tika_url=settings.TIKA_ENDPOINT,
|
||||
timeout=settings.CELERY_TASK_TIME_LIMIT,
|
||||
) as client:
|
||||
parsed = client.metadata.from_file(document_path, mime_type)
|
||||
return [
|
||||
{
|
||||
"namespace": "",
|
||||
"prefix": "",
|
||||
"key": key,
|
||||
"value": parsed.data[key],
|
||||
}
|
||||
for key in parsed.data
|
||||
]
|
||||
except Exception as e:
|
||||
self.log.warning(
|
||||
f"Error while fetching document metadata for {document_path}: {e}",
|
||||
)
|
||||
return []
|
||||
|
||||
def parse(self, document_path: Path, mime_type: str, file_name=None) -> None:
|
||||
self.log.info(f"Sending {document_path} to Tika server")
|
||||
|
||||
try:
|
||||
with TikaClient(
|
||||
tika_url=settings.TIKA_ENDPOINT,
|
||||
timeout=settings.CELERY_TASK_TIME_LIMIT,
|
||||
) as client:
|
||||
try:
|
||||
parsed = client.tika.as_text.from_file(document_path, mime_type)
|
||||
except httpx.HTTPStatusError as err:
|
||||
# Workaround https://issues.apache.org/jira/browse/TIKA-4110
|
||||
# Tika fails with some files as multi-part form data
|
||||
if err.response.status_code == httpx.codes.INTERNAL_SERVER_ERROR:
|
||||
parsed = client.tika.as_text.from_buffer(
|
||||
document_path.read_bytes(),
|
||||
mime_type,
|
||||
)
|
||||
else: # pragma: no cover
|
||||
raise
|
||||
except Exception as err:
|
||||
raise ParseError(
|
||||
f"Could not parse {document_path} with tika server at "
|
||||
f"{settings.TIKA_ENDPOINT}: {err}",
|
||||
) from err
|
||||
|
||||
self.text = parsed.content
|
||||
if self.text is not None:
|
||||
self.text = self.text.strip()
|
||||
|
||||
self.date = parsed.created
|
||||
if self.date is not None and timezone.is_naive(self.date):
|
||||
self.date = timezone.make_aware(self.date)
|
||||
|
||||
self.archive_path = self.convert_to_pdf(document_path, file_name)
|
||||
|
||||
def convert_to_pdf(self, document_path: Path, file_name):
|
||||
pdf_path = Path(self.tempdir) / "convert.pdf"
|
||||
|
||||
self.log.info(f"Converting {document_path} to PDF as {pdf_path}")
|
||||
|
||||
with (
|
||||
GotenbergClient(
|
||||
host=settings.TIKA_GOTENBERG_ENDPOINT,
|
||||
timeout=settings.CELERY_TASK_TIME_LIMIT,
|
||||
) as client,
|
||||
client.libre_office.to_pdf() as route,
|
||||
):
|
||||
# Set the output format of the resulting PDF
|
||||
if settings.OCR_OUTPUT_TYPE in {
|
||||
OutputTypeChoices.PDF_A,
|
||||
OutputTypeChoices.PDF_A2,
|
||||
}:
|
||||
route.pdf_format(PdfAFormat.A2b)
|
||||
elif settings.OCR_OUTPUT_TYPE == OutputTypeChoices.PDF_A1:
|
||||
self.log.warning(
|
||||
"Gotenberg does not support PDF/A-1a, choosing PDF/A-2b instead",
|
||||
)
|
||||
route.pdf_format(PdfAFormat.A2b)
|
||||
elif settings.OCR_OUTPUT_TYPE == OutputTypeChoices.PDF_A3:
|
||||
route.pdf_format(PdfAFormat.A3b)
|
||||
|
||||
route.convert(document_path)
|
||||
|
||||
try:
|
||||
response = route.run()
|
||||
|
||||
pdf_path.write_bytes(response.content)
|
||||
|
||||
return pdf_path
|
||||
|
||||
except Exception as err:
|
||||
raise ParseError(
|
||||
f"Error while converting document to PDF: {err}",
|
||||
) from err
|
||||
|
||||
def get_settings(self) -> OutputTypeConfig:
|
||||
"""
|
||||
This parser only uses the PDF output type configuration currently
|
||||
"""
|
||||
return OutputTypeConfig()
|
||||
@@ -1,7 +1,13 @@
|
||||
def get_parser(*args, **kwargs):
|
||||
from paperless_tika.parsers import TikaDocumentParser
|
||||
from paperless.parsers.tika import TikaDocumentParser
|
||||
|
||||
return TikaDocumentParser(*args, **kwargs)
|
||||
# The new TikaDocumentParser does not accept the legacy logging_group /
|
||||
# progress_callback kwargs injected by the old signal-based consumer.
|
||||
# These are dropped here; Phase 4 will replace this signal path with the
|
||||
# new ParserRegistry so the shim can be removed at that point.
|
||||
kwargs.pop("logging_group", None)
|
||||
kwargs.pop("progress_callback", None)
|
||||
return TikaDocumentParser()
|
||||
|
||||
|
||||
def tika_consumer_declaration(sender, **kwargs):
|
||||
|
||||
@@ -1,40 +0,0 @@
|
||||
from collections.abc import Generator
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from paperless_tika.parsers import TikaDocumentParser
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def tika_parser() -> Generator[TikaDocumentParser, None, None]:
|
||||
try:
|
||||
parser = TikaDocumentParser(logging_group=None)
|
||||
yield parser
|
||||
finally:
|
||||
parser.cleanup()
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def sample_dir() -> Path:
|
||||
return (Path(__file__).parent / Path("samples")).resolve()
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def sample_odt_file(sample_dir: Path) -> Path:
|
||||
return sample_dir / "sample.odt"
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def sample_docx_file(sample_dir: Path) -> Path:
|
||||
return sample_dir / "sample.docx"
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def sample_doc_file(sample_dir: Path) -> Path:
|
||||
return sample_dir / "sample.doc"
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def sample_broken_odt(sample_dir: Path) -> Path:
|
||||
return sample_dir / "multi-part-broken.odt"
|
||||
8
uv.lock
generated
8
uv.lock
generated
@@ -1251,11 +1251,11 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "filelock"
|
||||
version = "3.20.3"
|
||||
version = "3.24.3"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/1d/65/ce7f1b70157833bf3cb851b556a37d4547ceafc158aa9b34b36782f23696/filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1", size = 19485, upload-time = "2026-01-09T17:55:05.421Z" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/73/92/a8e2479937ff39185d20dd6a851c1a63e55849e447a55e798cc2e1f49c65/filelock-3.24.3.tar.gz", hash = "sha256:011a5644dc937c22699943ebbfc46e969cdde3e171470a6e40b9533e5a72affa", size = 37935, upload-time = "2026-02-19T00:48:20.543Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/b5/36/7fb70f04bf00bc646cd5bb45aa9eddb15e19437a28b8fb2b4a5249fac770/filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1", size = 16701, upload-time = "2026-01-09T17:55:04.334Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/9c/0f/5d0c71a1aefeb08efff26272149e07ab922b64f46c63363756224bd6872e/filelock-3.24.3-py3-none-any.whl", hash = "sha256:426e9a4660391f7f8a810d71b0555bce9008b0a1cc342ab1f6947d37639e002d", size = 24331, upload-time = "2026-02-19T00:48:18.465Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -2961,7 +2961,7 @@ requires-dist = [
|
||||
{ name = "drf-spectacular-sidecar", specifier = "~=2026.1.1" },
|
||||
{ name = "drf-writable-nested", specifier = "~=0.7.1" },
|
||||
{ name = "faiss-cpu", specifier = ">=1.10" },
|
||||
{ name = "filelock", specifier = "~=3.20.3" },
|
||||
{ name = "filelock", specifier = "~=3.24.3" },
|
||||
{ name = "flower", specifier = "~=2.0.1" },
|
||||
{ name = "gotenberg-client", specifier = "~=0.13.1" },
|
||||
{ name = "granian", extras = ["uvloop"], marker = "extra == 'webserver'", specifier = "~=2.7.0" },
|
||||
|
||||
Reference in New Issue
Block a user