mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-03-14 21:21:24 +00:00
Compare commits
12 Commits
fix-drop-s
...
feature-ti
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b8069d24b1 | ||
|
|
da06dd2c09 | ||
|
|
bc01e000ad | ||
|
|
23b051b2ee | ||
|
|
365ff99934 | ||
|
|
644a0f3c6b | ||
|
|
dcf4402b15 | ||
|
|
89d00247f6 | ||
|
|
c16bcb7fef | ||
|
|
d0b95f2cda | ||
|
|
2b33617262 | ||
|
|
0a9c67e9b1 |
3
.github/dependabot.yml
vendored
3
.github/dependabot.yml
vendored
@@ -157,6 +157,9 @@ updates:
|
|||||||
postgres:
|
postgres:
|
||||||
patterns:
|
patterns:
|
||||||
- "docker.io/library/postgres*"
|
- "docker.io/library/postgres*"
|
||||||
|
greenmail:
|
||||||
|
patterns:
|
||||||
|
- "docker.io/greenmail*"
|
||||||
- package-ecosystem: "pre-commit" # See documentation for possible values
|
- package-ecosystem: "pre-commit" # See documentation for possible values
|
||||||
directory: "/" # Location of package manifests
|
directory: "/" # Location of package manifests
|
||||||
schedule:
|
schedule:
|
||||||
|
|||||||
@@ -18,13 +18,13 @@ services:
|
|||||||
- "--log-level=warn"
|
- "--log-level=warn"
|
||||||
- "--log-format=text"
|
- "--log-format=text"
|
||||||
tika:
|
tika:
|
||||||
image: docker.io/apache/tika:latest
|
image: docker.io/apache/tika:3.2.3.0
|
||||||
hostname: tika
|
hostname: tika
|
||||||
container_name: tika
|
container_name: tika
|
||||||
network_mode: host
|
network_mode: host
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
greenmail:
|
greenmail:
|
||||||
image: greenmail/standalone:2.1.8
|
image: docker.io/greenmail/standalone:2.1.8
|
||||||
hostname: greenmail
|
hostname: greenmail
|
||||||
container_name: greenmail
|
container_name: greenmail
|
||||||
environment:
|
environment:
|
||||||
|
|||||||
@@ -60,7 +60,7 @@ dependencies = [
|
|||||||
"llama-index-llms-openai>=0.6.13",
|
"llama-index-llms-openai>=0.6.13",
|
||||||
"llama-index-vector-stores-faiss>=0.5.2",
|
"llama-index-vector-stores-faiss>=0.5.2",
|
||||||
"nltk~=3.9.1",
|
"nltk~=3.9.1",
|
||||||
"ocrmypdf~=16.13.0",
|
"ocrmypdf~=17.3.0",
|
||||||
"openai>=1.76",
|
"openai>=1.76",
|
||||||
"pathvalidate~=3.3.1",
|
"pathvalidate~=3.3.1",
|
||||||
"pdf2image~=1.17.0",
|
"pdf2image~=1.17.0",
|
||||||
|
|||||||
@@ -468,7 +468,7 @@
|
|||||||
"time": 0.951,
|
"time": 0.951,
|
||||||
"request": {
|
"request": {
|
||||||
"method": "GET",
|
"method": "GET",
|
||||||
"url": "http://localhost:8000/api/documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true&tags__id__in=9",
|
"url": "http://localhost:8000/api/documents/?page=1&page_size=50&ordering=-created&truncate_content=true&tags__id__in=9",
|
||||||
"httpVersion": "HTTP/1.1",
|
"httpVersion": "HTTP/1.1",
|
||||||
"cookies": [],
|
"cookies": [],
|
||||||
"headers": [
|
"headers": [
|
||||||
|
|||||||
File diff suppressed because one or more lines are too long
@@ -534,7 +534,7 @@
|
|||||||
"time": 0.653,
|
"time": 0.653,
|
||||||
"request": {
|
"request": {
|
||||||
"method": "GET",
|
"method": "GET",
|
||||||
"url": "http://localhost:8000/api/documents/?page=1&page_size=10&ordering=-created&truncate_content=true&include_selection_data=true&tags__id__all=9",
|
"url": "http://localhost:8000/api/documents/?page=1&page_size=50&ordering=-created&truncate_content=true&tags__id__all=9",
|
||||||
"httpVersion": "HTTP/1.1",
|
"httpVersion": "HTTP/1.1",
|
||||||
"cookies": [],
|
"cookies": [],
|
||||||
"headers": [
|
"headers": [
|
||||||
|
|||||||
@@ -883,7 +883,7 @@
|
|||||||
"time": 0.93,
|
"time": 0.93,
|
||||||
"request": {
|
"request": {
|
||||||
"method": "GET",
|
"method": "GET",
|
||||||
"url": "http://localhost:8000/api/documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true&tags__id__all=4",
|
"url": "http://localhost:8000/api/documents/?page=1&page_size=50&ordering=-created&truncate_content=true&tags__id__all=4",
|
||||||
"httpVersion": "HTTP/1.1",
|
"httpVersion": "HTTP/1.1",
|
||||||
"cookies": [],
|
"cookies": [],
|
||||||
"headers": [
|
"headers": [
|
||||||
@@ -961,7 +961,7 @@
|
|||||||
"time": -1,
|
"time": -1,
|
||||||
"request": {
|
"request": {
|
||||||
"method": "GET",
|
"method": "GET",
|
||||||
"url": "http://localhost:8000/api/documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true&tags__id__all=4",
|
"url": "http://localhost:8000/api/documents/?page=1&page_size=50&ordering=-created&truncate_content=true&tags__id__all=4",
|
||||||
"httpVersion": "HTTP/1.1",
|
"httpVersion": "HTTP/1.1",
|
||||||
"cookies": [],
|
"cookies": [],
|
||||||
"headers": [
|
"headers": [
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ test('basic filtering', async ({ page }) => {
|
|||||||
await expect(page).toHaveURL(/tags__id__all=9/)
|
await expect(page).toHaveURL(/tags__id__all=9/)
|
||||||
await expect(page.locator('pngx-document-list')).toHaveText(/8 documents/)
|
await expect(page.locator('pngx-document-list')).toHaveText(/8 documents/)
|
||||||
await page.getByRole('button', { name: 'Document type' }).click()
|
await page.getByRole('button', { name: 'Document type' }).click()
|
||||||
await page.getByRole('menuitem', { name: /^Invoice Test/ }).click()
|
await page.getByRole('menuitem', { name: 'Invoice Test 3' }).click()
|
||||||
await expect(page).toHaveURL(/document_type__id__in=1/)
|
await expect(page).toHaveURL(/document_type__id__in=1/)
|
||||||
await expect(page.locator('pngx-document-list')).toHaveText(/3 documents/)
|
await expect(page.locator('pngx-document-list')).toHaveText(/3 documents/)
|
||||||
await page.getByRole('button', { name: 'Reset filters' }).first().click()
|
await page.getByRole('button', { name: 'Reset filters' }).first().click()
|
||||||
|
|||||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@@ -20,9 +20,9 @@ import { Subject, filter, takeUntil } from 'rxjs'
|
|||||||
import { NEGATIVE_NULL_FILTER_VALUE } from 'src/app/data/filter-rule-type'
|
import { NEGATIVE_NULL_FILTER_VALUE } from 'src/app/data/filter-rule-type'
|
||||||
import { MatchingModel } from 'src/app/data/matching-model'
|
import { MatchingModel } from 'src/app/data/matching-model'
|
||||||
import { ObjectWithPermissions } from 'src/app/data/object-with-permissions'
|
import { ObjectWithPermissions } from 'src/app/data/object-with-permissions'
|
||||||
import { SelectionDataItem } from 'src/app/data/results'
|
|
||||||
import { FilterPipe } from 'src/app/pipes/filter.pipe'
|
import { FilterPipe } from 'src/app/pipes/filter.pipe'
|
||||||
import { HotKeyService } from 'src/app/services/hot-key.service'
|
import { HotKeyService } from 'src/app/services/hot-key.service'
|
||||||
|
import { SelectionDataItem } from 'src/app/services/rest/document.service'
|
||||||
import { pngxPopperOptions } from 'src/app/utils/popper-options'
|
import { pngxPopperOptions } from 'src/app/utils/popper-options'
|
||||||
import { LoadingComponentWithPermissions } from '../../loading-component/loading.component'
|
import { LoadingComponentWithPermissions } from '../../loading-component/loading.component'
|
||||||
import { ClearableBadgeComponent } from '../clearable-badge/clearable-badge.component'
|
import { ClearableBadgeComponent } from '../clearable-badge/clearable-badge.component'
|
||||||
|
|||||||
@@ -300,7 +300,7 @@ describe('BulkEditorComponent', () => {
|
|||||||
parameters: { add_tags: [101], remove_tags: [] },
|
parameters: { add_tags: [101], remove_tags: [] },
|
||||||
})
|
})
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||||
) // list reload
|
) // list reload
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
||||||
@@ -332,7 +332,7 @@ describe('BulkEditorComponent', () => {
|
|||||||
.expectOne(`${environment.apiBaseUrl}documents/bulk_edit/`)
|
.expectOne(`${environment.apiBaseUrl}documents/bulk_edit/`)
|
||||||
.flush(true)
|
.flush(true)
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||||
) // list reload
|
) // list reload
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
||||||
@@ -423,7 +423,7 @@ describe('BulkEditorComponent', () => {
|
|||||||
parameters: { correspondent: 101 },
|
parameters: { correspondent: 101 },
|
||||||
})
|
})
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||||
) // list reload
|
) // list reload
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
||||||
@@ -455,7 +455,7 @@ describe('BulkEditorComponent', () => {
|
|||||||
.expectOne(`${environment.apiBaseUrl}documents/bulk_edit/`)
|
.expectOne(`${environment.apiBaseUrl}documents/bulk_edit/`)
|
||||||
.flush(true)
|
.flush(true)
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||||
) // list reload
|
) // list reload
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
||||||
@@ -521,7 +521,7 @@ describe('BulkEditorComponent', () => {
|
|||||||
parameters: { document_type: 101 },
|
parameters: { document_type: 101 },
|
||||||
})
|
})
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||||
) // list reload
|
) // list reload
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
||||||
@@ -553,7 +553,7 @@ describe('BulkEditorComponent', () => {
|
|||||||
.expectOne(`${environment.apiBaseUrl}documents/bulk_edit/`)
|
.expectOne(`${environment.apiBaseUrl}documents/bulk_edit/`)
|
||||||
.flush(true)
|
.flush(true)
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||||
) // list reload
|
) // list reload
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
||||||
@@ -619,7 +619,7 @@ describe('BulkEditorComponent', () => {
|
|||||||
parameters: { storage_path: 101 },
|
parameters: { storage_path: 101 },
|
||||||
})
|
})
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||||
) // list reload
|
) // list reload
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
||||||
@@ -651,7 +651,7 @@ describe('BulkEditorComponent', () => {
|
|||||||
.expectOne(`${environment.apiBaseUrl}documents/bulk_edit/`)
|
.expectOne(`${environment.apiBaseUrl}documents/bulk_edit/`)
|
||||||
.flush(true)
|
.flush(true)
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||||
) // list reload
|
) // list reload
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
||||||
@@ -717,7 +717,7 @@ describe('BulkEditorComponent', () => {
|
|||||||
parameters: { add_custom_fields: [101], remove_custom_fields: [102] },
|
parameters: { add_custom_fields: [101], remove_custom_fields: [102] },
|
||||||
})
|
})
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||||
) // list reload
|
) // list reload
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
||||||
@@ -749,7 +749,7 @@ describe('BulkEditorComponent', () => {
|
|||||||
.expectOne(`${environment.apiBaseUrl}documents/bulk_edit/`)
|
.expectOne(`${environment.apiBaseUrl}documents/bulk_edit/`)
|
||||||
.flush(true)
|
.flush(true)
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||||
) // list reload
|
) // list reload
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
||||||
@@ -858,7 +858,7 @@ describe('BulkEditorComponent', () => {
|
|||||||
documents: [3, 4],
|
documents: [3, 4],
|
||||||
})
|
})
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||||
) // list reload
|
) // list reload
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
||||||
@@ -951,7 +951,7 @@ describe('BulkEditorComponent', () => {
|
|||||||
documents: [3, 4],
|
documents: [3, 4],
|
||||||
})
|
})
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||||
) // list reload
|
) // list reload
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
||||||
@@ -986,7 +986,7 @@ describe('BulkEditorComponent', () => {
|
|||||||
source_mode: 'latest_version',
|
source_mode: 'latest_version',
|
||||||
})
|
})
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||||
) // list reload
|
) // list reload
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
||||||
@@ -1027,7 +1027,7 @@ describe('BulkEditorComponent', () => {
|
|||||||
metadata_document_id: 3,
|
metadata_document_id: 3,
|
||||||
})
|
})
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||||
) // list reload
|
) // list reload
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
||||||
@@ -1046,7 +1046,7 @@ describe('BulkEditorComponent', () => {
|
|||||||
delete_originals: true,
|
delete_originals: true,
|
||||||
})
|
})
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||||
) // list reload
|
) // list reload
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
||||||
@@ -1067,7 +1067,7 @@ describe('BulkEditorComponent', () => {
|
|||||||
archive_fallback: true,
|
archive_fallback: true,
|
||||||
})
|
})
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||||
) // list reload
|
) // list reload
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
||||||
@@ -1153,7 +1153,7 @@ describe('BulkEditorComponent', () => {
|
|||||||
},
|
},
|
||||||
})
|
})
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||||
) // list reload
|
) // list reload
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
||||||
@@ -1460,7 +1460,7 @@ describe('BulkEditorComponent', () => {
|
|||||||
expect(toastServiceShowInfoSpy).toHaveBeenCalled()
|
expect(toastServiceShowInfoSpy).toHaveBeenCalled()
|
||||||
expect(listReloadSpy).toHaveBeenCalled()
|
expect(listReloadSpy).toHaveBeenCalled()
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||||
) // list reload
|
) // list reload
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id`
|
||||||
|
|||||||
@@ -16,7 +16,6 @@ import { first, map, Observable, Subject, switchMap, takeUntil } from 'rxjs'
|
|||||||
import { ConfirmDialogComponent } from 'src/app/components/common/confirm-dialog/confirm-dialog.component'
|
import { ConfirmDialogComponent } from 'src/app/components/common/confirm-dialog/confirm-dialog.component'
|
||||||
import { CustomField } from 'src/app/data/custom-field'
|
import { CustomField } from 'src/app/data/custom-field'
|
||||||
import { MatchingModel } from 'src/app/data/matching-model'
|
import { MatchingModel } from 'src/app/data/matching-model'
|
||||||
import { SelectionDataItem } from 'src/app/data/results'
|
|
||||||
import { SETTINGS_KEYS } from 'src/app/data/ui-settings'
|
import { SETTINGS_KEYS } from 'src/app/data/ui-settings'
|
||||||
import { IfPermissionsDirective } from 'src/app/directives/if-permissions.directive'
|
import { IfPermissionsDirective } from 'src/app/directives/if-permissions.directive'
|
||||||
import { DocumentListViewService } from 'src/app/services/document-list-view.service'
|
import { DocumentListViewService } from 'src/app/services/document-list-view.service'
|
||||||
@@ -33,6 +32,7 @@ import {
|
|||||||
DocumentBulkEditMethod,
|
DocumentBulkEditMethod,
|
||||||
DocumentService,
|
DocumentService,
|
||||||
MergeDocumentsRequest,
|
MergeDocumentsRequest,
|
||||||
|
SelectionDataItem,
|
||||||
} from 'src/app/services/rest/document.service'
|
} from 'src/app/services/rest/document.service'
|
||||||
import { SavedViewService } from 'src/app/services/rest/saved-view.service'
|
import { SavedViewService } from 'src/app/services/rest/saved-view.service'
|
||||||
import { ShareLinkBundleService } from 'src/app/services/rest/share-link-bundle.service'
|
import { ShareLinkBundleService } from 'src/app/services/rest/share-link-bundle.service'
|
||||||
|
|||||||
@@ -76,7 +76,6 @@ import {
|
|||||||
FILTER_TITLE_CONTENT,
|
FILTER_TITLE_CONTENT,
|
||||||
NEGATIVE_NULL_FILTER_VALUE,
|
NEGATIVE_NULL_FILTER_VALUE,
|
||||||
} from 'src/app/data/filter-rule-type'
|
} from 'src/app/data/filter-rule-type'
|
||||||
import { SelectionData, SelectionDataItem } from 'src/app/data/results'
|
|
||||||
import {
|
import {
|
||||||
PermissionAction,
|
PermissionAction,
|
||||||
PermissionType,
|
PermissionType,
|
||||||
@@ -85,7 +84,11 @@ import {
|
|||||||
import { CorrespondentService } from 'src/app/services/rest/correspondent.service'
|
import { CorrespondentService } from 'src/app/services/rest/correspondent.service'
|
||||||
import { CustomFieldsService } from 'src/app/services/rest/custom-fields.service'
|
import { CustomFieldsService } from 'src/app/services/rest/custom-fields.service'
|
||||||
import { DocumentTypeService } from 'src/app/services/rest/document-type.service'
|
import { DocumentTypeService } from 'src/app/services/rest/document-type.service'
|
||||||
import { DocumentService } from 'src/app/services/rest/document.service'
|
import {
|
||||||
|
DocumentService,
|
||||||
|
SelectionData,
|
||||||
|
SelectionDataItem,
|
||||||
|
} from 'src/app/services/rest/document.service'
|
||||||
import { SearchService } from 'src/app/services/rest/search.service'
|
import { SearchService } from 'src/app/services/rest/search.service'
|
||||||
import { StoragePathService } from 'src/app/services/rest/storage-path.service'
|
import { StoragePathService } from 'src/app/services/rest/storage-path.service'
|
||||||
import { TagService } from 'src/app/services/rest/tag.service'
|
import { TagService } from 'src/app/services/rest/tag.service'
|
||||||
|
|||||||
@@ -1,5 +1,3 @@
|
|||||||
import { Document } from './document'
|
|
||||||
|
|
||||||
export interface Results<T> {
|
export interface Results<T> {
|
||||||
count: number
|
count: number
|
||||||
|
|
||||||
@@ -7,20 +5,3 @@ export interface Results<T> {
|
|||||||
|
|
||||||
all: number[]
|
all: number[]
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface SelectionDataItem {
|
|
||||||
id: number
|
|
||||||
document_count: number
|
|
||||||
}
|
|
||||||
|
|
||||||
export interface SelectionData {
|
|
||||||
selected_storage_paths: SelectionDataItem[]
|
|
||||||
selected_correspondents: SelectionDataItem[]
|
|
||||||
selected_tags: SelectionDataItem[]
|
|
||||||
selected_document_types: SelectionDataItem[]
|
|
||||||
selected_custom_fields: SelectionDataItem[]
|
|
||||||
}
|
|
||||||
|
|
||||||
export interface DocumentResults extends Results<Document> {
|
|
||||||
selection_data?: SelectionData
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -126,10 +126,13 @@ describe('DocumentListViewService', () => {
|
|||||||
expect(documentListViewService.currentPage).toEqual(1)
|
expect(documentListViewService.currentPage).toEqual(1)
|
||||||
documentListViewService.reload()
|
documentListViewService.reload()
|
||||||
const req = httpTestingController.expectOne(
|
const req = httpTestingController.expectOne(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||||
)
|
)
|
||||||
expect(req.request.method).toEqual('GET')
|
expect(req.request.method).toEqual('GET')
|
||||||
req.flush(full_results)
|
req.flush(full_results)
|
||||||
|
httpTestingController.expectOne(
|
||||||
|
`${environment.apiBaseUrl}documents/selection_data/`
|
||||||
|
)
|
||||||
expect(req.request.method).toEqual('GET')
|
expect(req.request.method).toEqual('GET')
|
||||||
expect(documentListViewService.isReloading).toBeFalsy()
|
expect(documentListViewService.isReloading).toBeFalsy()
|
||||||
expect(documentListViewService.activeSavedViewId).toBeNull()
|
expect(documentListViewService.activeSavedViewId).toBeNull()
|
||||||
@@ -141,12 +144,12 @@ describe('DocumentListViewService', () => {
|
|||||||
it('should handle error on page request out of range', () => {
|
it('should handle error on page request out of range', () => {
|
||||||
documentListViewService.currentPage = 50
|
documentListViewService.currentPage = 50
|
||||||
let req = httpTestingController.expectOne(
|
let req = httpTestingController.expectOne(
|
||||||
`${environment.apiBaseUrl}documents/?page=50&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=50&page_size=50&ordering=-created&truncate_content=true`
|
||||||
)
|
)
|
||||||
expect(req.request.method).toEqual('GET')
|
expect(req.request.method).toEqual('GET')
|
||||||
req.flush([], { status: 404, statusText: 'Unexpected error' })
|
req.flush([], { status: 404, statusText: 'Unexpected error' })
|
||||||
req = httpTestingController.expectOne(
|
req = httpTestingController.expectOne(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||||
)
|
)
|
||||||
expect(req.request.method).toEqual('GET')
|
expect(req.request.method).toEqual('GET')
|
||||||
expect(documentListViewService.currentPage).toEqual(1)
|
expect(documentListViewService.currentPage).toEqual(1)
|
||||||
@@ -163,7 +166,7 @@ describe('DocumentListViewService', () => {
|
|||||||
]
|
]
|
||||||
documentListViewService.setFilterRules(filterRulesAny)
|
documentListViewService.setFilterRules(filterRulesAny)
|
||||||
let req = httpTestingController.expectOne(
|
let req = httpTestingController.expectOne(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true&tags__id__in=${tags__id__in}`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&tags__id__in=${tags__id__in}`
|
||||||
)
|
)
|
||||||
expect(req.request.method).toEqual('GET')
|
expect(req.request.method).toEqual('GET')
|
||||||
req.flush(
|
req.flush(
|
||||||
@@ -171,13 +174,13 @@ describe('DocumentListViewService', () => {
|
|||||||
{ status: 404, statusText: 'Unexpected error' }
|
{ status: 404, statusText: 'Unexpected error' }
|
||||||
)
|
)
|
||||||
req = httpTestingController.expectOne(
|
req = httpTestingController.expectOne(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||||
)
|
)
|
||||||
expect(req.request.method).toEqual('GET')
|
expect(req.request.method).toEqual('GET')
|
||||||
// reset the list
|
// reset the list
|
||||||
documentListViewService.setFilterRules([])
|
documentListViewService.setFilterRules([])
|
||||||
req = httpTestingController.expectOne(
|
req = httpTestingController.expectOne(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
|
|
||||||
@@ -185,7 +188,7 @@ describe('DocumentListViewService', () => {
|
|||||||
documentListViewService.currentPage = 1
|
documentListViewService.currentPage = 1
|
||||||
documentListViewService.sortField = 'custom_field_999'
|
documentListViewService.sortField = 'custom_field_999'
|
||||||
let req = httpTestingController.expectOne(
|
let req = httpTestingController.expectOne(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-custom_field_999&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-custom_field_999&truncate_content=true`
|
||||||
)
|
)
|
||||||
expect(req.request.method).toEqual('GET')
|
expect(req.request.method).toEqual('GET')
|
||||||
req.flush(
|
req.flush(
|
||||||
@@ -194,7 +197,7 @@ describe('DocumentListViewService', () => {
|
|||||||
)
|
)
|
||||||
// resets itself
|
// resets itself
|
||||||
req = httpTestingController.expectOne(
|
req = httpTestingController.expectOne(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
|
|
||||||
@@ -209,7 +212,7 @@ describe('DocumentListViewService', () => {
|
|||||||
]
|
]
|
||||||
documentListViewService.setFilterRules(filterRulesAny)
|
documentListViewService.setFilterRules(filterRulesAny)
|
||||||
let req = httpTestingController.expectOne(
|
let req = httpTestingController.expectOne(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true&tags__id__in=${tags__id__in}`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&tags__id__in=${tags__id__in}`
|
||||||
)
|
)
|
||||||
expect(req.request.method).toEqual('GET')
|
expect(req.request.method).toEqual('GET')
|
||||||
req.flush('Generic error', { status: 404, statusText: 'Unexpected error' })
|
req.flush('Generic error', { status: 404, statusText: 'Unexpected error' })
|
||||||
@@ -217,7 +220,7 @@ describe('DocumentListViewService', () => {
|
|||||||
// reset the list
|
// reset the list
|
||||||
documentListViewService.setFilterRules([])
|
documentListViewService.setFilterRules([])
|
||||||
req = httpTestingController.expectOne(
|
req = httpTestingController.expectOne(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
|
|
||||||
@@ -226,7 +229,7 @@ describe('DocumentListViewService', () => {
|
|||||||
expect(documentListViewService.sortReverse).toBeTruthy()
|
expect(documentListViewService.sortReverse).toBeTruthy()
|
||||||
documentListViewService.setSort('added', false)
|
documentListViewService.setSort('added', false)
|
||||||
let req = httpTestingController.expectOne(
|
let req = httpTestingController.expectOne(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=added&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=added&truncate_content=true`
|
||||||
)
|
)
|
||||||
expect(req.request.method).toEqual('GET')
|
expect(req.request.method).toEqual('GET')
|
||||||
expect(documentListViewService.sortField).toEqual('added')
|
expect(documentListViewService.sortField).toEqual('added')
|
||||||
@@ -234,12 +237,12 @@ describe('DocumentListViewService', () => {
|
|||||||
|
|
||||||
documentListViewService.sortField = 'created'
|
documentListViewService.sortField = 'created'
|
||||||
req = httpTestingController.expectOne(
|
req = httpTestingController.expectOne(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=created&truncate_content=true`
|
||||||
)
|
)
|
||||||
expect(documentListViewService.sortField).toEqual('created')
|
expect(documentListViewService.sortField).toEqual('created')
|
||||||
documentListViewService.sortReverse = true
|
documentListViewService.sortReverse = true
|
||||||
req = httpTestingController.expectOne(
|
req = httpTestingController.expectOne(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||||
)
|
)
|
||||||
expect(req.request.method).toEqual('GET')
|
expect(req.request.method).toEqual('GET')
|
||||||
expect(documentListViewService.sortReverse).toBeTruthy()
|
expect(documentListViewService.sortReverse).toBeTruthy()
|
||||||
@@ -259,7 +262,7 @@ describe('DocumentListViewService', () => {
|
|||||||
const req = httpTestingController.expectOne(
|
const req = httpTestingController.expectOne(
|
||||||
`${environment.apiBaseUrl}documents/?page=${page}&page_size=${
|
`${environment.apiBaseUrl}documents/?page=${page}&page_size=${
|
||||||
documentListViewService.pageSize
|
documentListViewService.pageSize
|
||||||
}&ordering=${reverse ? '-' : ''}${sort}&truncate_content=true&include_selection_data=true`
|
}&ordering=${reverse ? '-' : ''}${sort}&truncate_content=true`
|
||||||
)
|
)
|
||||||
expect(req.request.method).toEqual('GET')
|
expect(req.request.method).toEqual('GET')
|
||||||
expect(documentListViewService.currentPage).toEqual(page)
|
expect(documentListViewService.currentPage).toEqual(page)
|
||||||
@@ -276,7 +279,7 @@ describe('DocumentListViewService', () => {
|
|||||||
}
|
}
|
||||||
documentListViewService.loadFromQueryParams(convertToParamMap(params))
|
documentListViewService.loadFromQueryParams(convertToParamMap(params))
|
||||||
let req = httpTestingController.expectOne(
|
let req = httpTestingController.expectOne(
|
||||||
`${environment.apiBaseUrl}documents/?page=${documentListViewService.currentPage}&page_size=${documentListViewService.pageSize}&ordering=-added&truncate_content=true&include_selection_data=true&tags__id__all=${tags__id__all}`
|
`${environment.apiBaseUrl}documents/?page=${documentListViewService.currentPage}&page_size=${documentListViewService.pageSize}&ordering=-added&truncate_content=true&tags__id__all=${tags__id__all}`
|
||||||
)
|
)
|
||||||
expect(req.request.method).toEqual('GET')
|
expect(req.request.method).toEqual('GET')
|
||||||
expect(documentListViewService.filterRules).toEqual([
|
expect(documentListViewService.filterRules).toEqual([
|
||||||
@@ -286,12 +289,15 @@ describe('DocumentListViewService', () => {
|
|||||||
},
|
},
|
||||||
])
|
])
|
||||||
req.flush(full_results)
|
req.flush(full_results)
|
||||||
|
httpTestingController.expectOne(
|
||||||
|
`${environment.apiBaseUrl}documents/selection_data/`
|
||||||
|
)
|
||||||
})
|
})
|
||||||
|
|
||||||
it('should use filter rules to update query params', () => {
|
it('should use filter rules to update query params', () => {
|
||||||
documentListViewService.setFilterRules(filterRules)
|
documentListViewService.setFilterRules(filterRules)
|
||||||
const req = httpTestingController.expectOne(
|
const req = httpTestingController.expectOne(
|
||||||
`${environment.apiBaseUrl}documents/?page=${documentListViewService.currentPage}&page_size=${documentListViewService.pageSize}&ordering=-created&truncate_content=true&include_selection_data=true&tags__id__all=${tags__id__all}`
|
`${environment.apiBaseUrl}documents/?page=${documentListViewService.currentPage}&page_size=${documentListViewService.pageSize}&ordering=-created&truncate_content=true&tags__id__all=${tags__id__all}`
|
||||||
)
|
)
|
||||||
expect(req.request.method).toEqual('GET')
|
expect(req.request.method).toEqual('GET')
|
||||||
})
|
})
|
||||||
@@ -300,26 +306,34 @@ describe('DocumentListViewService', () => {
|
|||||||
documentListViewService.currentPage = 2
|
documentListViewService.currentPage = 2
|
||||||
let req = httpTestingController.expectOne((request) =>
|
let req = httpTestingController.expectOne((request) =>
|
||||||
request.urlWithParams.startsWith(
|
request.urlWithParams.startsWith(
|
||||||
`${environment.apiBaseUrl}documents/?page=2&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=2&page_size=50&ordering=-created&truncate_content=true`
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
expect(req.request.method).toEqual('GET')
|
expect(req.request.method).toEqual('GET')
|
||||||
req.flush(full_results)
|
req.flush(full_results)
|
||||||
|
req = httpTestingController.expectOne(
|
||||||
|
`${environment.apiBaseUrl}documents/selection_data/`
|
||||||
|
)
|
||||||
|
req.flush([])
|
||||||
|
|
||||||
documentListViewService.setFilterRules(filterRules, true)
|
documentListViewService.setFilterRules(filterRules, true)
|
||||||
|
|
||||||
const filteredReqs = httpTestingController.match(
|
const filteredReqs = httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true&tags__id__all=${tags__id__all}`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&tags__id__all=${tags__id__all}`
|
||||||
)
|
)
|
||||||
expect(filteredReqs).toHaveLength(1)
|
expect(filteredReqs).toHaveLength(1)
|
||||||
filteredReqs[0].flush(full_results)
|
filteredReqs[0].flush(full_results)
|
||||||
|
req = httpTestingController.expectOne(
|
||||||
|
`${environment.apiBaseUrl}documents/selection_data/`
|
||||||
|
)
|
||||||
|
req.flush([])
|
||||||
expect(documentListViewService.currentPage).toEqual(1)
|
expect(documentListViewService.currentPage).toEqual(1)
|
||||||
})
|
})
|
||||||
|
|
||||||
it('should support quick filter', () => {
|
it('should support quick filter', () => {
|
||||||
documentListViewService.quickFilter(filterRules)
|
documentListViewService.quickFilter(filterRules)
|
||||||
const req = httpTestingController.expectOne(
|
const req = httpTestingController.expectOne(
|
||||||
`${environment.apiBaseUrl}documents/?page=${documentListViewService.currentPage}&page_size=${documentListViewService.pageSize}&ordering=-created&truncate_content=true&include_selection_data=true&tags__id__all=${tags__id__all}`
|
`${environment.apiBaseUrl}documents/?page=${documentListViewService.currentPage}&page_size=${documentListViewService.pageSize}&ordering=-created&truncate_content=true&tags__id__all=${tags__id__all}`
|
||||||
)
|
)
|
||||||
expect(req.request.method).toEqual('GET')
|
expect(req.request.method).toEqual('GET')
|
||||||
})
|
})
|
||||||
@@ -342,21 +356,21 @@ describe('DocumentListViewService', () => {
|
|||||||
convertToParamMap(params)
|
convertToParamMap(params)
|
||||||
)
|
)
|
||||||
let req = httpTestingController.expectOne(
|
let req = httpTestingController.expectOne(
|
||||||
`${environment.apiBaseUrl}documents/?page=${page}&page_size=${documentListViewService.pageSize}&ordering=-added&truncate_content=true&include_selection_data=true&tags__id__all=${tags__id__all}`
|
`${environment.apiBaseUrl}documents/?page=${page}&page_size=${documentListViewService.pageSize}&ordering=-added&truncate_content=true&tags__id__all=${tags__id__all}`
|
||||||
)
|
)
|
||||||
expect(req.request.method).toEqual('GET')
|
expect(req.request.method).toEqual('GET')
|
||||||
// reset the list
|
// reset the list
|
||||||
documentListViewService.currentPage = 1
|
documentListViewService.currentPage = 1
|
||||||
req = httpTestingController.expectOne(
|
req = httpTestingController.expectOne(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-added&truncate_content=true&include_selection_data=true&tags__id__all=9`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-added&truncate_content=true&tags__id__all=9`
|
||||||
)
|
)
|
||||||
documentListViewService.setFilterRules([])
|
documentListViewService.setFilterRules([])
|
||||||
req = httpTestingController.expectOne(
|
req = httpTestingController.expectOne(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-added&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-added&truncate_content=true`
|
||||||
)
|
)
|
||||||
documentListViewService.sortField = 'created'
|
documentListViewService.sortField = 'created'
|
||||||
req = httpTestingController.expectOne(
|
req = httpTestingController.expectOne(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||||
)
|
)
|
||||||
documentListViewService.activateSavedView(null)
|
documentListViewService.activateSavedView(null)
|
||||||
})
|
})
|
||||||
@@ -364,18 +378,21 @@ describe('DocumentListViewService', () => {
|
|||||||
it('should support navigating next / previous', () => {
|
it('should support navigating next / previous', () => {
|
||||||
documentListViewService.setFilterRules([])
|
documentListViewService.setFilterRules([])
|
||||||
let req = httpTestingController.expectOne(
|
let req = httpTestingController.expectOne(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||||
)
|
)
|
||||||
expect(documentListViewService.currentPage).toEqual(1)
|
expect(documentListViewService.currentPage).toEqual(1)
|
||||||
documentListViewService.pageSize = 3
|
documentListViewService.pageSize = 3
|
||||||
req = httpTestingController.expectOne(
|
req = httpTestingController.expectOne(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=3&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=3&ordering=-created&truncate_content=true`
|
||||||
)
|
)
|
||||||
expect(req.request.method).toEqual('GET')
|
expect(req.request.method).toEqual('GET')
|
||||||
req.flush({
|
req.flush({
|
||||||
count: 3,
|
count: 3,
|
||||||
results: documents.slice(0, 3),
|
results: documents.slice(0, 3),
|
||||||
})
|
})
|
||||||
|
httpTestingController
|
||||||
|
.expectOne(`${environment.apiBaseUrl}documents/selection_data/`)
|
||||||
|
.flush([])
|
||||||
expect(documentListViewService.hasNext(documents[0].id)).toBeTruthy()
|
expect(documentListViewService.hasNext(documents[0].id)).toBeTruthy()
|
||||||
expect(documentListViewService.hasPrevious(documents[0].id)).toBeFalsy()
|
expect(documentListViewService.hasPrevious(documents[0].id)).toBeFalsy()
|
||||||
documentListViewService.getNext(documents[0].id).subscribe((docId) => {
|
documentListViewService.getNext(documents[0].id).subscribe((docId) => {
|
||||||
@@ -422,7 +439,7 @@ describe('DocumentListViewService', () => {
|
|||||||
expect(documentListViewService.currentPage).toEqual(1)
|
expect(documentListViewService.currentPage).toEqual(1)
|
||||||
documentListViewService.pageSize = 3
|
documentListViewService.pageSize = 3
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=3&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=3&ordering=-created&truncate_content=true`
|
||||||
)
|
)
|
||||||
jest
|
jest
|
||||||
.spyOn(documentListViewService, 'getLastPage')
|
.spyOn(documentListViewService, 'getLastPage')
|
||||||
@@ -437,7 +454,7 @@ describe('DocumentListViewService', () => {
|
|||||||
expect(reloadSpy).toHaveBeenCalled()
|
expect(reloadSpy).toHaveBeenCalled()
|
||||||
expect(documentListViewService.currentPage).toEqual(2)
|
expect(documentListViewService.currentPage).toEqual(2)
|
||||||
const reqs = httpTestingController.match(
|
const reqs = httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=2&page_size=3&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=2&page_size=3&ordering=-created&truncate_content=true`
|
||||||
)
|
)
|
||||||
expect(reqs.length).toBeGreaterThan(0)
|
expect(reqs.length).toBeGreaterThan(0)
|
||||||
})
|
})
|
||||||
@@ -472,11 +489,11 @@ describe('DocumentListViewService', () => {
|
|||||||
.mockReturnValue(documents)
|
.mockReturnValue(documents)
|
||||||
documentListViewService.currentPage = 2
|
documentListViewService.currentPage = 2
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=2&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=2&page_size=50&ordering=-created&truncate_content=true`
|
||||||
)
|
)
|
||||||
documentListViewService.pageSize = 3
|
documentListViewService.pageSize = 3
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=2&page_size=3&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=2&page_size=3&ordering=-created&truncate_content=true`
|
||||||
)
|
)
|
||||||
const reloadSpy = jest.spyOn(documentListViewService, 'reload')
|
const reloadSpy = jest.spyOn(documentListViewService, 'reload')
|
||||||
documentListViewService.getPrevious(1).subscribe({
|
documentListViewService.getPrevious(1).subscribe({
|
||||||
@@ -486,7 +503,7 @@ describe('DocumentListViewService', () => {
|
|||||||
expect(reloadSpy).toHaveBeenCalled()
|
expect(reloadSpy).toHaveBeenCalled()
|
||||||
expect(documentListViewService.currentPage).toEqual(1)
|
expect(documentListViewService.currentPage).toEqual(1)
|
||||||
const reqs = httpTestingController.match(
|
const reqs = httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=3&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=3&ordering=-created&truncate_content=true`
|
||||||
)
|
)
|
||||||
expect(reqs.length).toBeGreaterThan(0)
|
expect(reqs.length).toBeGreaterThan(0)
|
||||||
})
|
})
|
||||||
@@ -499,10 +516,13 @@ describe('DocumentListViewService', () => {
|
|||||||
it('should support select a document', () => {
|
it('should support select a document', () => {
|
||||||
documentListViewService.reload()
|
documentListViewService.reload()
|
||||||
const req = httpTestingController.expectOne(
|
const req = httpTestingController.expectOne(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||||
)
|
)
|
||||||
expect(req.request.method).toEqual('GET')
|
expect(req.request.method).toEqual('GET')
|
||||||
req.flush(full_results)
|
req.flush(full_results)
|
||||||
|
httpTestingController.expectOne(
|
||||||
|
`${environment.apiBaseUrl}documents/selection_data/`
|
||||||
|
)
|
||||||
documentListViewService.toggleSelected(documents[0])
|
documentListViewService.toggleSelected(documents[0])
|
||||||
expect(documentListViewService.isSelected(documents[0])).toBeTruthy()
|
expect(documentListViewService.isSelected(documents[0])).toBeTruthy()
|
||||||
documentListViewService.toggleSelected(documents[0])
|
documentListViewService.toggleSelected(documents[0])
|
||||||
@@ -524,13 +544,16 @@ describe('DocumentListViewService', () => {
|
|||||||
it('should support select page', () => {
|
it('should support select page', () => {
|
||||||
documentListViewService.pageSize = 3
|
documentListViewService.pageSize = 3
|
||||||
const req = httpTestingController.expectOne(
|
const req = httpTestingController.expectOne(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=3&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=3&ordering=-created&truncate_content=true`
|
||||||
)
|
)
|
||||||
expect(req.request.method).toEqual('GET')
|
expect(req.request.method).toEqual('GET')
|
||||||
req.flush({
|
req.flush({
|
||||||
count: 3,
|
count: 3,
|
||||||
results: documents.slice(0, 3),
|
results: documents.slice(0, 3),
|
||||||
})
|
})
|
||||||
|
httpTestingController.expectOne(
|
||||||
|
`${environment.apiBaseUrl}documents/selection_data/`
|
||||||
|
)
|
||||||
documentListViewService.selectPage()
|
documentListViewService.selectPage()
|
||||||
expect(documentListViewService.selected.size).toEqual(3)
|
expect(documentListViewService.selected.size).toEqual(3)
|
||||||
expect(documentListViewService.isSelected(documents[5])).toBeFalsy()
|
expect(documentListViewService.isSelected(documents[5])).toBeFalsy()
|
||||||
@@ -539,10 +562,13 @@ describe('DocumentListViewService', () => {
|
|||||||
it('should support select range', () => {
|
it('should support select range', () => {
|
||||||
documentListViewService.reload()
|
documentListViewService.reload()
|
||||||
const req = httpTestingController.expectOne(
|
const req = httpTestingController.expectOne(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||||
)
|
)
|
||||||
expect(req.request.method).toEqual('GET')
|
expect(req.request.method).toEqual('GET')
|
||||||
req.flush(full_results)
|
req.flush(full_results)
|
||||||
|
httpTestingController.expectOne(
|
||||||
|
`${environment.apiBaseUrl}documents/selection_data/`
|
||||||
|
)
|
||||||
documentListViewService.toggleSelected(documents[0])
|
documentListViewService.toggleSelected(documents[0])
|
||||||
expect(documentListViewService.isSelected(documents[0])).toBeTruthy()
|
expect(documentListViewService.isSelected(documents[0])).toBeTruthy()
|
||||||
documentListViewService.selectRangeTo(documents[2])
|
documentListViewService.selectRangeTo(documents[2])
|
||||||
@@ -562,7 +588,7 @@ describe('DocumentListViewService', () => {
|
|||||||
|
|
||||||
documentListViewService.setFilterRules(filterRules)
|
documentListViewService.setFilterRules(filterRules)
|
||||||
httpTestingController.expectOne(
|
httpTestingController.expectOne(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true&tags__id__all=9`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&tags__id__all=9`
|
||||||
)
|
)
|
||||||
const reqs = httpTestingController.match(
|
const reqs = httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id&tags__id__all=9`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=100000&fields=id&tags__id__all=9`
|
||||||
@@ -578,7 +604,7 @@ describe('DocumentListViewService', () => {
|
|||||||
const cancelSpy = jest.spyOn(documentListViewService, 'cancelPending')
|
const cancelSpy = jest.spyOn(documentListViewService, 'cancelPending')
|
||||||
documentListViewService.reload()
|
documentListViewService.reload()
|
||||||
httpTestingController.expectOne(
|
httpTestingController.expectOne(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true&tags__id__all=9`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&tags__id__all=9`
|
||||||
)
|
)
|
||||||
expect(cancelSpy).toHaveBeenCalled()
|
expect(cancelSpy).toHaveBeenCalled()
|
||||||
})
|
})
|
||||||
@@ -597,7 +623,7 @@ describe('DocumentListViewService', () => {
|
|||||||
documentListViewService.setFilterRules([])
|
documentListViewService.setFilterRules([])
|
||||||
expect(documentListViewService.sortField).toEqual('created')
|
expect(documentListViewService.sortField).toEqual('created')
|
||||||
httpTestingController.expectOne(
|
httpTestingController.expectOne(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
|
|
||||||
@@ -624,11 +650,11 @@ describe('DocumentListViewService', () => {
|
|||||||
expect(localStorageSpy).toHaveBeenCalled()
|
expect(localStorageSpy).toHaveBeenCalled()
|
||||||
// reload triggered
|
// reload triggered
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||||
)
|
)
|
||||||
documentListViewService.displayFields = null
|
documentListViewService.displayFields = null
|
||||||
httpTestingController.match(
|
httpTestingController.match(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||||
)
|
)
|
||||||
expect(documentListViewService.displayFields).toEqual(
|
expect(documentListViewService.displayFields).toEqual(
|
||||||
DEFAULT_DISPLAY_FIELDS.filter((f) => f.id !== DisplayField.ADDED).map(
|
DEFAULT_DISPLAY_FIELDS.filter((f) => f.id !== DisplayField.ADDED).map(
|
||||||
@@ -668,7 +694,7 @@ describe('DocumentListViewService', () => {
|
|||||||
it('should generate quick filter URL preserving default state', () => {
|
it('should generate quick filter URL preserving default state', () => {
|
||||||
documentListViewService.reload()
|
documentListViewService.reload()
|
||||||
httpTestingController.expectOne(
|
httpTestingController.expectOne(
|
||||||
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true`
|
`${environment.apiBaseUrl}documents/?page=1&page_size=50&ordering=-created&truncate_content=true`
|
||||||
)
|
)
|
||||||
const urlTree = documentListViewService.getQuickFilterUrl(filterRules)
|
const urlTree = documentListViewService.getQuickFilterUrl(filterRules)
|
||||||
expect(urlTree).toBeDefined()
|
expect(urlTree).toBeDefined()
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
import { Injectable, inject } from '@angular/core'
|
import { Injectable, inject } from '@angular/core'
|
||||||
import { ParamMap, Router, UrlTree } from '@angular/router'
|
import { ParamMap, Router, UrlTree } from '@angular/router'
|
||||||
import { Observable, Subject, takeUntil } from 'rxjs'
|
import { Observable, Subject, first, takeUntil } from 'rxjs'
|
||||||
import {
|
import {
|
||||||
DEFAULT_DISPLAY_FIELDS,
|
DEFAULT_DISPLAY_FIELDS,
|
||||||
DisplayField,
|
DisplayField,
|
||||||
@@ -8,7 +8,6 @@ import {
|
|||||||
Document,
|
Document,
|
||||||
} from '../data/document'
|
} from '../data/document'
|
||||||
import { FilterRule } from '../data/filter-rule'
|
import { FilterRule } from '../data/filter-rule'
|
||||||
import { DocumentResults, SelectionData } from '../data/results'
|
|
||||||
import { SavedView } from '../data/saved-view'
|
import { SavedView } from '../data/saved-view'
|
||||||
import { DOCUMENT_LIST_SERVICE } from '../data/storage-keys'
|
import { DOCUMENT_LIST_SERVICE } from '../data/storage-keys'
|
||||||
import { SETTINGS_KEYS } from '../data/ui-settings'
|
import { SETTINGS_KEYS } from '../data/ui-settings'
|
||||||
@@ -18,7 +17,7 @@ import {
|
|||||||
isFullTextFilterRule,
|
isFullTextFilterRule,
|
||||||
} from '../utils/filter-rules'
|
} from '../utils/filter-rules'
|
||||||
import { paramsFromViewState, paramsToViewState } from '../utils/query-params'
|
import { paramsFromViewState, paramsToViewState } from '../utils/query-params'
|
||||||
import { DocumentService } from './rest/document.service'
|
import { DocumentService, SelectionData } from './rest/document.service'
|
||||||
import { SettingsService } from './settings.service'
|
import { SettingsService } from './settings.service'
|
||||||
|
|
||||||
const LIST_DEFAULT_DISPLAY_FIELDS: DisplayField[] = DEFAULT_DISPLAY_FIELDS.map(
|
const LIST_DEFAULT_DISPLAY_FIELDS: DisplayField[] = DEFAULT_DISPLAY_FIELDS.map(
|
||||||
@@ -261,17 +260,27 @@ export class DocumentListViewService {
|
|||||||
activeListViewState.sortField,
|
activeListViewState.sortField,
|
||||||
activeListViewState.sortReverse,
|
activeListViewState.sortReverse,
|
||||||
activeListViewState.filterRules,
|
activeListViewState.filterRules,
|
||||||
{ truncate_content: true, include_selection_data: true }
|
{ truncate_content: true }
|
||||||
)
|
)
|
||||||
.pipe(takeUntil(this.unsubscribeNotifier))
|
.pipe(takeUntil(this.unsubscribeNotifier))
|
||||||
.subscribe({
|
.subscribe({
|
||||||
next: (result) => {
|
next: (result) => {
|
||||||
const resultWithSelectionData = result as DocumentResults
|
|
||||||
this.initialized = true
|
this.initialized = true
|
||||||
this.isReloading = false
|
this.isReloading = false
|
||||||
activeListViewState.collectionSize = result.count
|
activeListViewState.collectionSize = result.count
|
||||||
activeListViewState.documents = result.results
|
activeListViewState.documents = result.results
|
||||||
this.selectionData = resultWithSelectionData.selection_data ?? null
|
|
||||||
|
this.documentService
|
||||||
|
.getSelectionData(result.all)
|
||||||
|
.pipe(first())
|
||||||
|
.subscribe({
|
||||||
|
next: (selectionData) => {
|
||||||
|
this.selectionData = selectionData
|
||||||
|
},
|
||||||
|
error: () => {
|
||||||
|
this.selectionData = null
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
if (updateQueryParams && !this._activeSavedViewId) {
|
if (updateQueryParams && !this._activeSavedViewId) {
|
||||||
let base = ['/documents']
|
let base = ['/documents']
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ import {
|
|||||||
import { DocumentMetadata } from 'src/app/data/document-metadata'
|
import { DocumentMetadata } from 'src/app/data/document-metadata'
|
||||||
import { DocumentSuggestions } from 'src/app/data/document-suggestions'
|
import { DocumentSuggestions } from 'src/app/data/document-suggestions'
|
||||||
import { FilterRule } from 'src/app/data/filter-rule'
|
import { FilterRule } from 'src/app/data/filter-rule'
|
||||||
import { Results, SelectionData } from 'src/app/data/results'
|
import { Results } from 'src/app/data/results'
|
||||||
import { SETTINGS_KEYS } from 'src/app/data/ui-settings'
|
import { SETTINGS_KEYS } from 'src/app/data/ui-settings'
|
||||||
import { queryParamsFromFilterRules } from '../../utils/query-params'
|
import { queryParamsFromFilterRules } from '../../utils/query-params'
|
||||||
import {
|
import {
|
||||||
@@ -24,6 +24,19 @@ import { SettingsService } from '../settings.service'
|
|||||||
import { AbstractPaperlessService } from './abstract-paperless-service'
|
import { AbstractPaperlessService } from './abstract-paperless-service'
|
||||||
import { CustomFieldsService } from './custom-fields.service'
|
import { CustomFieldsService } from './custom-fields.service'
|
||||||
|
|
||||||
|
export interface SelectionDataItem {
|
||||||
|
id: number
|
||||||
|
document_count: number
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface SelectionData {
|
||||||
|
selected_storage_paths: SelectionDataItem[]
|
||||||
|
selected_correspondents: SelectionDataItem[]
|
||||||
|
selected_tags: SelectionDataItem[]
|
||||||
|
selected_document_types: SelectionDataItem[]
|
||||||
|
selected_custom_fields: SelectionDataItem[]
|
||||||
|
}
|
||||||
|
|
||||||
export enum BulkEditSourceMode {
|
export enum BulkEditSourceMode {
|
||||||
LATEST_VERSION = 'latest_version',
|
LATEST_VERSION = 'latest_version',
|
||||||
EXPLICIT_SELECTION = 'explicit_selection',
|
EXPLICIT_SELECTION = 'explicit_selection',
|
||||||
|
|||||||
@@ -52,6 +52,7 @@ from documents.utils import copy_basic_file_stats
|
|||||||
from documents.utils import copy_file_with_basic_stats
|
from documents.utils import copy_file_with_basic_stats
|
||||||
from documents.utils import run_subprocess
|
from documents.utils import run_subprocess
|
||||||
from paperless.parsers.text import TextDocumentParser
|
from paperless.parsers.text import TextDocumentParser
|
||||||
|
from paperless.parsers.tika import TikaDocumentParser
|
||||||
from paperless_mail.parsers import MailDocumentParser
|
from paperless_mail.parsers import MailDocumentParser
|
||||||
|
|
||||||
LOGGING_NAME: Final[str] = "paperless.consumer"
|
LOGGING_NAME: Final[str] = "paperless.consumer"
|
||||||
@@ -67,7 +68,7 @@ def _parser_cleanup(parser: DocumentParser) -> None:
|
|||||||
|
|
||||||
TODO(stumpylog): Remove me in the future
|
TODO(stumpylog): Remove me in the future
|
||||||
"""
|
"""
|
||||||
if isinstance(parser, TextDocumentParser):
|
if isinstance(parser, (TextDocumentParser, TikaDocumentParser)):
|
||||||
parser.__exit__(None, None, None)
|
parser.__exit__(None, None, None)
|
||||||
else:
|
else:
|
||||||
parser.cleanup()
|
parser.cleanup()
|
||||||
@@ -448,6 +449,12 @@ class ConsumerPlugin(
|
|||||||
progress_callback=progress_callback,
|
progress_callback=progress_callback,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# New-style parsers use __enter__/__exit__ for resource management.
|
||||||
|
# _parser_cleanup (below) handles __exit__; call __enter__ here.
|
||||||
|
# TODO(stumpylog): Remove me in the future
|
||||||
|
if isinstance(document_parser, (TextDocumentParser, TikaDocumentParser)):
|
||||||
|
document_parser.__enter__()
|
||||||
|
|
||||||
self.log.debug(f"Parser: {type(document_parser).__name__}")
|
self.log.debug(f"Parser: {type(document_parser).__name__}")
|
||||||
|
|
||||||
# Parse the document. This may take some time.
|
# Parse the document. This may take some time.
|
||||||
@@ -476,7 +483,7 @@ class ConsumerPlugin(
|
|||||||
self.filename,
|
self.filename,
|
||||||
self.input_doc.mailrule_id,
|
self.input_doc.mailrule_id,
|
||||||
)
|
)
|
||||||
elif isinstance(document_parser, TextDocumentParser):
|
elif isinstance(document_parser, (TextDocumentParser, TikaDocumentParser)):
|
||||||
# TODO(stumpylog): Remove me in the future
|
# TODO(stumpylog): Remove me in the future
|
||||||
document_parser.parse(self.working_copy, mime_type)
|
document_parser.parse(self.working_copy, mime_type)
|
||||||
else:
|
else:
|
||||||
@@ -489,7 +496,7 @@ class ConsumerPlugin(
|
|||||||
ProgressStatusOptions.WORKING,
|
ProgressStatusOptions.WORKING,
|
||||||
ConsumerStatusShortMessage.GENERATING_THUMBNAIL,
|
ConsumerStatusShortMessage.GENERATING_THUMBNAIL,
|
||||||
)
|
)
|
||||||
if isinstance(document_parser, TextDocumentParser):
|
if isinstance(document_parser, (TextDocumentParser, TikaDocumentParser)):
|
||||||
# TODO(stumpylog): Remove me in the future
|
# TODO(stumpylog): Remove me in the future
|
||||||
thumbnail = document_parser.get_thumbnail(self.working_copy, mime_type)
|
thumbnail = document_parser.get_thumbnail(self.working_copy, mime_type)
|
||||||
else:
|
else:
|
||||||
|
|||||||
@@ -1144,56 +1144,6 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
|
|||||||
self.assertEqual(len(response.data["all"]), 50)
|
self.assertEqual(len(response.data["all"]), 50)
|
||||||
self.assertCountEqual(response.data["all"], [d.id for d in docs])
|
self.assertCountEqual(response.data["all"], [d.id for d in docs])
|
||||||
|
|
||||||
def test_list_with_include_selection_data(self) -> None:
|
|
||||||
correspondent = Correspondent.objects.create(name="c1")
|
|
||||||
doc_type = DocumentType.objects.create(name="dt1")
|
|
||||||
storage_path = StoragePath.objects.create(name="sp1")
|
|
||||||
tag = Tag.objects.create(name="tag")
|
|
||||||
|
|
||||||
matching_doc = Document.objects.create(
|
|
||||||
checksum="A",
|
|
||||||
correspondent=correspondent,
|
|
||||||
document_type=doc_type,
|
|
||||||
storage_path=storage_path,
|
|
||||||
)
|
|
||||||
matching_doc.tags.add(tag)
|
|
||||||
|
|
||||||
non_matching_doc = Document.objects.create(checksum="B")
|
|
||||||
non_matching_doc.tags.add(Tag.objects.create(name="other"))
|
|
||||||
|
|
||||||
response = self.client.get(
|
|
||||||
f"/api/documents/?tags__id__in={tag.id}&include_selection_data=true",
|
|
||||||
)
|
|
||||||
|
|
||||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
|
||||||
self.assertIn("selection_data", response.data)
|
|
||||||
|
|
||||||
selected_correspondent = next(
|
|
||||||
item
|
|
||||||
for item in response.data["selection_data"]["selected_correspondents"]
|
|
||||||
if item["id"] == correspondent.id
|
|
||||||
)
|
|
||||||
selected_tag = next(
|
|
||||||
item
|
|
||||||
for item in response.data["selection_data"]["selected_tags"]
|
|
||||||
if item["id"] == tag.id
|
|
||||||
)
|
|
||||||
selected_type = next(
|
|
||||||
item
|
|
||||||
for item in response.data["selection_data"]["selected_document_types"]
|
|
||||||
if item["id"] == doc_type.id
|
|
||||||
)
|
|
||||||
selected_storage_path = next(
|
|
||||||
item
|
|
||||||
for item in response.data["selection_data"]["selected_storage_paths"]
|
|
||||||
if item["id"] == storage_path.id
|
|
||||||
)
|
|
||||||
|
|
||||||
self.assertEqual(selected_correspondent["document_count"], 1)
|
|
||||||
self.assertEqual(selected_tag["document_count"], 1)
|
|
||||||
self.assertEqual(selected_type["document_count"], 1)
|
|
||||||
self.assertEqual(selected_storage_path["document_count"], 1)
|
|
||||||
|
|
||||||
def test_statistics(self) -> None:
|
def test_statistics(self) -> None:
|
||||||
doc1 = Document.objects.create(
|
doc1 = Document.objects.create(
|
||||||
title="none1",
|
title="none1",
|
||||||
|
|||||||
@@ -89,46 +89,6 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
|||||||
self.assertEqual(len(results), 0)
|
self.assertEqual(len(results), 0)
|
||||||
self.assertCountEqual(response.data["all"], [])
|
self.assertCountEqual(response.data["all"], [])
|
||||||
|
|
||||||
def test_search_with_include_selection_data(self) -> None:
|
|
||||||
correspondent = Correspondent.objects.create(name="c1")
|
|
||||||
doc_type = DocumentType.objects.create(name="dt1")
|
|
||||||
storage_path = StoragePath.objects.create(name="sp1")
|
|
||||||
tag = Tag.objects.create(name="tag")
|
|
||||||
|
|
||||||
matching_doc = Document.objects.create(
|
|
||||||
title="bank statement",
|
|
||||||
content="bank content",
|
|
||||||
checksum="A",
|
|
||||||
correspondent=correspondent,
|
|
||||||
document_type=doc_type,
|
|
||||||
storage_path=storage_path,
|
|
||||||
)
|
|
||||||
matching_doc.tags.add(tag)
|
|
||||||
|
|
||||||
with AsyncWriter(index.open_index()) as writer:
|
|
||||||
index.update_document(writer, matching_doc)
|
|
||||||
|
|
||||||
response = self.client.get(
|
|
||||||
"/api/documents/?query=bank&include_selection_data=true",
|
|
||||||
)
|
|
||||||
|
|
||||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
|
||||||
self.assertIn("selection_data", response.data)
|
|
||||||
|
|
||||||
selected_correspondent = next(
|
|
||||||
item
|
|
||||||
for item in response.data["selection_data"]["selected_correspondents"]
|
|
||||||
if item["id"] == correspondent.id
|
|
||||||
)
|
|
||||||
selected_tag = next(
|
|
||||||
item
|
|
||||||
for item in response.data["selection_data"]["selected_tags"]
|
|
||||||
if item["id"] == tag.id
|
|
||||||
)
|
|
||||||
|
|
||||||
self.assertEqual(selected_correspondent["document_count"], 1)
|
|
||||||
self.assertEqual(selected_tag["document_count"], 1)
|
|
||||||
|
|
||||||
def test_search_custom_field_ordering(self) -> None:
|
def test_search_custom_field_ordering(self) -> None:
|
||||||
custom_field = CustomField.objects.create(
|
custom_field = CustomField.objects.create(
|
||||||
name="Sortable field",
|
name="Sortable field",
|
||||||
|
|||||||
@@ -10,8 +10,8 @@ from documents.parsers import get_parser_class_for_mime_type
|
|||||||
from documents.parsers import get_supported_file_extensions
|
from documents.parsers import get_supported_file_extensions
|
||||||
from documents.parsers import is_file_ext_supported
|
from documents.parsers import is_file_ext_supported
|
||||||
from paperless.parsers.text import TextDocumentParser
|
from paperless.parsers.text import TextDocumentParser
|
||||||
|
from paperless.parsers.tika import TikaDocumentParser
|
||||||
from paperless_tesseract.parsers import RasterisedDocumentParser
|
from paperless_tesseract.parsers import RasterisedDocumentParser
|
||||||
from paperless_tika.parsers import TikaDocumentParser
|
|
||||||
|
|
||||||
|
|
||||||
class TestParserDiscovery(TestCase):
|
class TestParserDiscovery(TestCase):
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ import tempfile
|
|||||||
import zipfile
|
import zipfile
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from collections import deque
|
from collections import deque
|
||||||
|
from contextlib import nullcontext
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from time import mktime
|
from time import mktime
|
||||||
@@ -225,6 +226,7 @@ from paperless.celery import app as celery_app
|
|||||||
from paperless.config import AIConfig
|
from paperless.config import AIConfig
|
||||||
from paperless.config import GeneralConfig
|
from paperless.config import GeneralConfig
|
||||||
from paperless.models import ApplicationConfiguration
|
from paperless.models import ApplicationConfiguration
|
||||||
|
from paperless.parsers import ParserProtocol
|
||||||
from paperless.serialisers import GroupSerializer
|
from paperless.serialisers import GroupSerializer
|
||||||
from paperless.serialisers import UserSerializer
|
from paperless.serialisers import UserSerializer
|
||||||
from paperless.views import StandardPagination
|
from paperless.views import StandardPagination
|
||||||
@@ -835,61 +837,6 @@ class DocumentViewSet(
|
|||||||
"custom_field_",
|
"custom_field_",
|
||||||
)
|
)
|
||||||
|
|
||||||
def _get_selection_data_for_queryset(self, queryset):
|
|
||||||
correspondents = Correspondent.objects.annotate(
|
|
||||||
document_count=Count(
|
|
||||||
"documents",
|
|
||||||
filter=Q(documents__in=queryset),
|
|
||||||
distinct=True,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
tags = Tag.objects.annotate(
|
|
||||||
document_count=Count(
|
|
||||||
"documents",
|
|
||||||
filter=Q(documents__in=queryset),
|
|
||||||
distinct=True,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
document_types = DocumentType.objects.annotate(
|
|
||||||
document_count=Count(
|
|
||||||
"documents",
|
|
||||||
filter=Q(documents__in=queryset),
|
|
||||||
distinct=True,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
storage_paths = StoragePath.objects.annotate(
|
|
||||||
document_count=Count(
|
|
||||||
"documents",
|
|
||||||
filter=Q(documents__in=queryset),
|
|
||||||
distinct=True,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
custom_fields = CustomField.objects.annotate(
|
|
||||||
document_count=Count(
|
|
||||||
"fields__document",
|
|
||||||
filter=Q(fields__document__in=queryset),
|
|
||||||
distinct=True,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
return {
|
|
||||||
"selected_correspondents": [
|
|
||||||
{"id": t.id, "document_count": t.document_count} for t in correspondents
|
|
||||||
],
|
|
||||||
"selected_tags": [
|
|
||||||
{"id": t.id, "document_count": t.document_count} for t in tags
|
|
||||||
],
|
|
||||||
"selected_document_types": [
|
|
||||||
{"id": t.id, "document_count": t.document_count} for t in document_types
|
|
||||||
],
|
|
||||||
"selected_storage_paths": [
|
|
||||||
{"id": t.id, "document_count": t.document_count} for t in storage_paths
|
|
||||||
],
|
|
||||||
"selected_custom_fields": [
|
|
||||||
{"id": t.id, "document_count": t.document_count} for t in custom_fields
|
|
||||||
],
|
|
||||||
}
|
|
||||||
|
|
||||||
def get_queryset(self):
|
def get_queryset(self):
|
||||||
latest_version_content = Subquery(
|
latest_version_content = Subquery(
|
||||||
Document.objects.filter(root_document=OuterRef("pk"))
|
Document.objects.filter(root_document=OuterRef("pk"))
|
||||||
@@ -1037,25 +984,6 @@ class DocumentViewSet(
|
|||||||
|
|
||||||
return response
|
return response
|
||||||
|
|
||||||
def list(self, request, *args, **kwargs):
|
|
||||||
if not get_boolean(
|
|
||||||
str(request.query_params.get("include_selection_data", "false")),
|
|
||||||
):
|
|
||||||
return super().list(request, *args, **kwargs)
|
|
||||||
|
|
||||||
queryset = self.filter_queryset(self.get_queryset())
|
|
||||||
selection_data = self._get_selection_data_for_queryset(queryset)
|
|
||||||
|
|
||||||
page = self.paginate_queryset(queryset)
|
|
||||||
if page is not None:
|
|
||||||
serializer = self.get_serializer(page, many=True)
|
|
||||||
response = self.get_paginated_response(serializer.data)
|
|
||||||
response.data["selection_data"] = selection_data
|
|
||||||
return response
|
|
||||||
|
|
||||||
serializer = self.get_serializer(queryset, many=True)
|
|
||||||
return Response({"results": serializer.data, "selection_data": selection_data})
|
|
||||||
|
|
||||||
def destroy(self, request, *args, **kwargs):
|
def destroy(self, request, *args, **kwargs):
|
||||||
from documents import index
|
from documents import index
|
||||||
|
|
||||||
@@ -1158,9 +1086,11 @@ class DocumentViewSet(
|
|||||||
parser_class = get_parser_class_for_mime_type(mime_type)
|
parser_class = get_parser_class_for_mime_type(mime_type)
|
||||||
if parser_class:
|
if parser_class:
|
||||||
parser = parser_class(progress_callback=None, logging_group=None)
|
parser = parser_class(progress_callback=None, logging_group=None)
|
||||||
|
cm = parser if isinstance(parser, ParserProtocol) else nullcontext(parser)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
return parser.extract_metadata(file, mime_type)
|
with cm:
|
||||||
|
return parser.extract_metadata(file, mime_type)
|
||||||
except Exception: # pragma: no cover
|
except Exception: # pragma: no cover
|
||||||
logger.exception(f"Issue getting metadata for {file}")
|
logger.exception(f"Issue getting metadata for {file}")
|
||||||
# TODO: cover GPG errors, remove later.
|
# TODO: cover GPG errors, remove later.
|
||||||
@@ -2076,21 +2006,6 @@ class UnifiedSearchViewSet(DocumentViewSet):
|
|||||||
else None
|
else None
|
||||||
)
|
)
|
||||||
|
|
||||||
if get_boolean(
|
|
||||||
str(
|
|
||||||
request.query_params.get(
|
|
||||||
"include_selection_data",
|
|
||||||
"false",
|
|
||||||
),
|
|
||||||
),
|
|
||||||
):
|
|
||||||
result_ids = response.data.get("all", [])
|
|
||||||
response.data["selection_data"] = (
|
|
||||||
self._get_selection_data_for_queryset(
|
|
||||||
Document.objects.filter(pk__in=result_ids),
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
return response
|
return response
|
||||||
except NotFound:
|
except NotFound:
|
||||||
raise
|
raise
|
||||||
|
|||||||
@@ -194,8 +194,10 @@ class ParserRegistry:
|
|||||||
at runtime regardless of registration order.
|
at runtime regardless of registration order.
|
||||||
"""
|
"""
|
||||||
from paperless.parsers.text import TextDocumentParser
|
from paperless.parsers.text import TextDocumentParser
|
||||||
|
from paperless.parsers.tika import TikaDocumentParser
|
||||||
|
|
||||||
self.register_builtin(TextDocumentParser)
|
self.register_builtin(TextDocumentParser)
|
||||||
|
self.register_builtin(TikaDocumentParser)
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
# Discovery
|
# Discovery
|
||||||
|
|||||||
440
src/paperless/parsers/tika.py
Normal file
440
src/paperless/parsers/tika.py
Normal file
@@ -0,0 +1,440 @@
|
|||||||
|
"""
|
||||||
|
Built-in Tika document parser.
|
||||||
|
|
||||||
|
Handles Office documents (DOCX, ODT, XLS, XLSX, PPT, PPTX, RTF, etc.) by
|
||||||
|
sending them to an Apache Tika server for text extraction and a Gotenberg
|
||||||
|
server for PDF conversion. Because the source formats cannot be rendered by
|
||||||
|
a browser natively, the parser always produces a PDF rendition for display.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import shutil
|
||||||
|
import tempfile
|
||||||
|
from contextlib import ExitStack
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import TYPE_CHECKING
|
||||||
|
from typing import Self
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
from django.conf import settings
|
||||||
|
from django.utils import timezone
|
||||||
|
from gotenberg_client import GotenbergClient
|
||||||
|
from gotenberg_client.options import PdfAFormat
|
||||||
|
from tika_client import TikaClient
|
||||||
|
|
||||||
|
from documents.parsers import ParseError
|
||||||
|
from documents.parsers import make_thumbnail_from_pdf
|
||||||
|
from paperless.config import OutputTypeConfig
|
||||||
|
from paperless.models import OutputTypeChoices
|
||||||
|
from paperless.version import __full_version_str__
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
import datetime
|
||||||
|
from types import TracebackType
|
||||||
|
|
||||||
|
from paperless.parsers import MetadataEntry
|
||||||
|
|
||||||
|
logger = logging.getLogger("paperless.parsing.tika")
|
||||||
|
|
||||||
|
_SUPPORTED_MIME_TYPES: dict[str, str] = {
|
||||||
|
"application/msword": ".doc",
|
||||||
|
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx",
|
||||||
|
"application/vnd.ms-excel": ".xls",
|
||||||
|
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx",
|
||||||
|
"application/vnd.ms-powerpoint": ".ppt",
|
||||||
|
"application/vnd.openxmlformats-officedocument.presentationml.presentation": ".pptx",
|
||||||
|
"application/vnd.openxmlformats-officedocument.presentationml.slideshow": ".ppsx",
|
||||||
|
"application/vnd.oasis.opendocument.presentation": ".odp",
|
||||||
|
"application/vnd.oasis.opendocument.spreadsheet": ".ods",
|
||||||
|
"application/vnd.oasis.opendocument.text": ".odt",
|
||||||
|
"application/vnd.oasis.opendocument.graphics": ".odg",
|
||||||
|
"text/rtf": ".rtf",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class TikaDocumentParser:
|
||||||
|
"""Parse Office documents via Apache Tika and Gotenberg for Paperless-ngx.
|
||||||
|
|
||||||
|
Text extraction is handled by the Tika server. PDF conversion for display
|
||||||
|
is handled by Gotenberg (LibreOffice route). Because the source formats
|
||||||
|
cannot be rendered by a browser natively, ``requires_pdf_rendition`` is
|
||||||
|
True and the PDF is always produced regardless of the ``produce_archive``
|
||||||
|
flag passed to ``parse``.
|
||||||
|
|
||||||
|
Both ``TikaClient`` and ``GotenbergClient`` are opened once in
|
||||||
|
``__enter__`` via an ``ExitStack`` and shared across ``parse``,
|
||||||
|
``extract_metadata``, and ``_convert_to_pdf`` calls, then closed via
|
||||||
|
``ExitStack.close()`` in ``__exit__``. The parser must always be used
|
||||||
|
as a context manager.
|
||||||
|
|
||||||
|
Class attributes
|
||||||
|
----------------
|
||||||
|
name : str
|
||||||
|
Human-readable parser name.
|
||||||
|
version : str
|
||||||
|
Semantic version string, kept in sync with Paperless-ngx releases.
|
||||||
|
author : str
|
||||||
|
Maintainer name.
|
||||||
|
url : str
|
||||||
|
Issue tracker / source URL.
|
||||||
|
"""
|
||||||
|
|
||||||
|
name: str = "Paperless-ngx Tika Parser"
|
||||||
|
version: str = __full_version_str__
|
||||||
|
author: str = "Paperless-ngx Contributors"
|
||||||
|
url: str = "https://github.com/paperless-ngx/paperless-ngx"
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Class methods
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def supported_mime_types(cls) -> dict[str, str]:
|
||||||
|
"""Return the MIME types this parser handles.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
dict[str, str]
|
||||||
|
Mapping of MIME type to preferred file extension.
|
||||||
|
"""
|
||||||
|
return _SUPPORTED_MIME_TYPES
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def score(
|
||||||
|
cls,
|
||||||
|
mime_type: str,
|
||||||
|
filename: str,
|
||||||
|
path: Path | None = None,
|
||||||
|
) -> int | None:
|
||||||
|
"""Return the priority score for handling this file.
|
||||||
|
|
||||||
|
Returns ``None`` when Tika integration is disabled so the registry
|
||||||
|
skips this parser entirely.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
mime_type:
|
||||||
|
Detected MIME type of the file.
|
||||||
|
filename:
|
||||||
|
Original filename including extension.
|
||||||
|
path:
|
||||||
|
Optional filesystem path. Not inspected by this parser.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
int | None
|
||||||
|
10 if TIKA_ENABLED and the MIME type is supported, otherwise None.
|
||||||
|
"""
|
||||||
|
if not settings.TIKA_ENABLED:
|
||||||
|
return None
|
||||||
|
if mime_type in _SUPPORTED_MIME_TYPES:
|
||||||
|
return 10
|
||||||
|
return None
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Properties
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
@property
|
||||||
|
def can_produce_archive(self) -> bool:
|
||||||
|
"""Whether this parser can produce a searchable PDF archive copy.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
bool
|
||||||
|
Always False — Tika produces a display PDF, not an OCR archive.
|
||||||
|
"""
|
||||||
|
return False
|
||||||
|
|
||||||
|
@property
|
||||||
|
def requires_pdf_rendition(self) -> bool:
|
||||||
|
"""Whether the parser must produce a PDF for the frontend to display.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
bool
|
||||||
|
Always True — Office formats cannot be rendered natively in a
|
||||||
|
browser, so a PDF conversion is always required for display.
|
||||||
|
"""
|
||||||
|
return True
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Lifecycle
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def __init__(self, logging_group: object = None) -> None:
|
||||||
|
settings.SCRATCH_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
|
self._tempdir = Path(
|
||||||
|
tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR),
|
||||||
|
)
|
||||||
|
self._text: str | None = None
|
||||||
|
self._date: datetime.datetime | None = None
|
||||||
|
self._archive_path: Path | None = None
|
||||||
|
self._exit_stack = ExitStack()
|
||||||
|
self._tika_client: TikaClient | None = None
|
||||||
|
self._gotenberg_client: GotenbergClient | None = None
|
||||||
|
|
||||||
|
def __enter__(self) -> Self:
|
||||||
|
self._tika_client = self._exit_stack.enter_context(
|
||||||
|
TikaClient(
|
||||||
|
tika_url=settings.TIKA_ENDPOINT,
|
||||||
|
timeout=settings.CELERY_TASK_TIME_LIMIT,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
self._gotenberg_client = self._exit_stack.enter_context(
|
||||||
|
GotenbergClient(
|
||||||
|
host=settings.TIKA_GOTENBERG_ENDPOINT,
|
||||||
|
timeout=settings.CELERY_TASK_TIME_LIMIT,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __exit__(
|
||||||
|
self,
|
||||||
|
exc_type: type[BaseException] | None,
|
||||||
|
exc_val: BaseException | None,
|
||||||
|
exc_tb: TracebackType | None,
|
||||||
|
) -> None:
|
||||||
|
self._exit_stack.close()
|
||||||
|
logger.debug("Cleaning up temporary directory %s", self._tempdir)
|
||||||
|
shutil.rmtree(self._tempdir, ignore_errors=True)
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Core parsing interface
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def parse(
|
||||||
|
self,
|
||||||
|
document_path: Path,
|
||||||
|
mime_type: str,
|
||||||
|
*,
|
||||||
|
produce_archive: bool = True,
|
||||||
|
) -> None:
|
||||||
|
"""Send the document to Tika for text extraction and Gotenberg for PDF.
|
||||||
|
|
||||||
|
Because ``requires_pdf_rendition`` is True the PDF conversion is
|
||||||
|
always performed — the ``produce_archive`` flag is intentionally
|
||||||
|
ignored.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
document_path:
|
||||||
|
Absolute path to the document file to parse.
|
||||||
|
mime_type:
|
||||||
|
Detected MIME type of the document.
|
||||||
|
produce_archive:
|
||||||
|
Accepted for protocol compatibility but ignored; the PDF rendition
|
||||||
|
is always produced since the source format cannot be displayed
|
||||||
|
natively in the browser.
|
||||||
|
|
||||||
|
Raises
|
||||||
|
------
|
||||||
|
documents.parsers.ParseError
|
||||||
|
If Tika or Gotenberg returns an error.
|
||||||
|
"""
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
assert self._tika_client is not None
|
||||||
|
|
||||||
|
logger.info("Sending %s to Tika server", document_path)
|
||||||
|
|
||||||
|
try:
|
||||||
|
try:
|
||||||
|
parsed = self._tika_client.tika.as_text.from_file(
|
||||||
|
document_path,
|
||||||
|
mime_type,
|
||||||
|
)
|
||||||
|
except httpx.HTTPStatusError as err:
|
||||||
|
# Workaround https://issues.apache.org/jira/browse/TIKA-4110
|
||||||
|
# Tika fails with some files as multi-part form data
|
||||||
|
if err.response.status_code == httpx.codes.INTERNAL_SERVER_ERROR:
|
||||||
|
parsed = self._tika_client.tika.as_text.from_buffer(
|
||||||
|
document_path.read_bytes(),
|
||||||
|
mime_type,
|
||||||
|
)
|
||||||
|
else: # pragma: no cover
|
||||||
|
raise
|
||||||
|
except Exception as err:
|
||||||
|
raise ParseError(
|
||||||
|
f"Could not parse {document_path} with tika server at "
|
||||||
|
f"{settings.TIKA_ENDPOINT}: {err}",
|
||||||
|
) from err
|
||||||
|
|
||||||
|
self._text = parsed.content
|
||||||
|
if self._text is not None:
|
||||||
|
self._text = self._text.strip()
|
||||||
|
|
||||||
|
self._date = parsed.created
|
||||||
|
if self._date is not None and timezone.is_naive(self._date):
|
||||||
|
self._date = timezone.make_aware(self._date)
|
||||||
|
|
||||||
|
# Always convert — requires_pdf_rendition=True means the browser
|
||||||
|
# cannot display the source format natively.
|
||||||
|
self._archive_path = self._convert_to_pdf(document_path)
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Result accessors
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def get_text(self) -> str | None:
|
||||||
|
"""Return the plain-text content extracted during parse.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
str | None
|
||||||
|
Extracted text, or None if parse has not been called yet.
|
||||||
|
"""
|
||||||
|
return self._text
|
||||||
|
|
||||||
|
def get_date(self) -> datetime.datetime | None:
|
||||||
|
"""Return the document date detected during parse.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
datetime.datetime | None
|
||||||
|
Creation date from Tika metadata, or None if not detected.
|
||||||
|
"""
|
||||||
|
return self._date
|
||||||
|
|
||||||
|
def get_archive_path(self) -> Path | None:
|
||||||
|
"""Return the path to the generated PDF rendition, or None.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
Path | None
|
||||||
|
Path to the PDF produced by Gotenberg, or None if parse has not
|
||||||
|
been called yet.
|
||||||
|
"""
|
||||||
|
return self._archive_path
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Thumbnail and metadata
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def get_thumbnail(self, document_path: Path, mime_type: str) -> Path:
|
||||||
|
"""Generate a thumbnail from the PDF rendition of the document.
|
||||||
|
|
||||||
|
Converts the document to PDF first if not already done.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
document_path:
|
||||||
|
Absolute path to the source document.
|
||||||
|
mime_type:
|
||||||
|
Detected MIME type of the document.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
Path
|
||||||
|
Path to the generated WebP thumbnail inside the temporary directory.
|
||||||
|
"""
|
||||||
|
if self._archive_path is None:
|
||||||
|
self._archive_path = self._convert_to_pdf(document_path)
|
||||||
|
return make_thumbnail_from_pdf(self._archive_path, self._tempdir)
|
||||||
|
|
||||||
|
def get_page_count(
|
||||||
|
self,
|
||||||
|
document_path: Path,
|
||||||
|
mime_type: str,
|
||||||
|
) -> int | None:
|
||||||
|
"""Return the number of pages in the document.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
int | None
|
||||||
|
Always None — page count is not available from Tika.
|
||||||
|
"""
|
||||||
|
return None
|
||||||
|
|
||||||
|
def extract_metadata(
|
||||||
|
self,
|
||||||
|
document_path: Path,
|
||||||
|
mime_type: str,
|
||||||
|
) -> list[MetadataEntry]:
|
||||||
|
"""Extract format-specific metadata via the Tika metadata endpoint.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
list[MetadataEntry]
|
||||||
|
All key/value pairs returned by Tika, or ``[]`` on error.
|
||||||
|
"""
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
assert self._tika_client is not None
|
||||||
|
|
||||||
|
try:
|
||||||
|
parsed = self._tika_client.metadata.from_file(document_path, mime_type)
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
"namespace": "",
|
||||||
|
"prefix": "",
|
||||||
|
"key": key,
|
||||||
|
"value": parsed.data[key],
|
||||||
|
}
|
||||||
|
for key in parsed.data
|
||||||
|
]
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(
|
||||||
|
"Error while fetching document metadata for %s: %s",
|
||||||
|
document_path,
|
||||||
|
e,
|
||||||
|
)
|
||||||
|
return []
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Private helpers
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _convert_to_pdf(self, document_path: Path) -> Path:
|
||||||
|
"""Convert the document to PDF using Gotenberg's LibreOffice route.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
document_path:
|
||||||
|
Absolute path to the source document.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
Path
|
||||||
|
Path to the generated PDF inside the temporary directory.
|
||||||
|
|
||||||
|
Raises
|
||||||
|
------
|
||||||
|
documents.parsers.ParseError
|
||||||
|
If Gotenberg returns an error.
|
||||||
|
"""
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
assert self._gotenberg_client is not None
|
||||||
|
|
||||||
|
pdf_path = self._tempdir / "convert.pdf"
|
||||||
|
|
||||||
|
logger.info("Converting %s to PDF as %s", document_path, pdf_path)
|
||||||
|
|
||||||
|
with self._gotenberg_client.libre_office.to_pdf() as route:
|
||||||
|
# Set the output format of the resulting PDF.
|
||||||
|
# OutputTypeConfig reads the database-stored ApplicationConfiguration
|
||||||
|
# first, then falls back to the PAPERLESS_OCR_OUTPUT_TYPE env var.
|
||||||
|
output_type = OutputTypeConfig().output_type
|
||||||
|
if output_type in {
|
||||||
|
OutputTypeChoices.PDF_A,
|
||||||
|
OutputTypeChoices.PDF_A2,
|
||||||
|
}:
|
||||||
|
route.pdf_format(PdfAFormat.A2b)
|
||||||
|
elif output_type == OutputTypeChoices.PDF_A1:
|
||||||
|
logger.warning(
|
||||||
|
"Gotenberg does not support PDF/A-1a, choosing PDF/A-2b instead",
|
||||||
|
)
|
||||||
|
route.pdf_format(PdfAFormat.A2b)
|
||||||
|
elif output_type == OutputTypeChoices.PDF_A3:
|
||||||
|
route.pdf_format(PdfAFormat.A3b)
|
||||||
|
|
||||||
|
route.convert(document_path)
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = route.run()
|
||||||
|
pdf_path.write_bytes(response.content)
|
||||||
|
return pdf_path
|
||||||
|
except Exception as err:
|
||||||
|
raise ParseError(
|
||||||
|
f"Error while converting document to PDF: {err}",
|
||||||
|
) from err
|
||||||
@@ -11,6 +11,7 @@ from typing import TYPE_CHECKING
|
|||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from paperless.parsers.text import TextDocumentParser
|
from paperless.parsers.text import TextDocumentParser
|
||||||
|
from paperless.parsers.tika import TikaDocumentParser
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from collections.abc import Generator
|
from collections.abc import Generator
|
||||||
@@ -74,3 +75,86 @@ def text_parser() -> Generator[TextDocumentParser, None, None]:
|
|||||||
"""
|
"""
|
||||||
with TextDocumentParser() as parser:
|
with TextDocumentParser() as parser:
|
||||||
yield parser
|
yield parser
|
||||||
|
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Tika parser sample files
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session")
|
||||||
|
def tika_samples_dir(samples_dir: Path) -> Path:
|
||||||
|
"""Absolute path to the Tika parser sample files directory.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
Path
|
||||||
|
``<samples_dir>/tika/``
|
||||||
|
"""
|
||||||
|
return samples_dir / "tika"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session")
|
||||||
|
def sample_odt_file(tika_samples_dir: Path) -> Path:
|
||||||
|
"""Path to a sample ODT file.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
Path
|
||||||
|
Absolute path to ``tika/sample.odt``.
|
||||||
|
"""
|
||||||
|
return tika_samples_dir / "sample.odt"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session")
|
||||||
|
def sample_docx_file(tika_samples_dir: Path) -> Path:
|
||||||
|
"""Path to a sample DOCX file.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
Path
|
||||||
|
Absolute path to ``tika/sample.docx``.
|
||||||
|
"""
|
||||||
|
return tika_samples_dir / "sample.docx"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session")
|
||||||
|
def sample_doc_file(tika_samples_dir: Path) -> Path:
|
||||||
|
"""Path to a sample DOC file.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
Path
|
||||||
|
Absolute path to ``tika/sample.doc``.
|
||||||
|
"""
|
||||||
|
return tika_samples_dir / "sample.doc"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session")
|
||||||
|
def sample_broken_odt(tika_samples_dir: Path) -> Path:
|
||||||
|
"""Path to a broken ODT file that triggers the multi-part fallback.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
Path
|
||||||
|
Absolute path to ``tika/multi-part-broken.odt``.
|
||||||
|
"""
|
||||||
|
return tika_samples_dir / "multi-part-broken.odt"
|
||||||
|
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Tika parser instance
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture()
|
||||||
|
def tika_parser() -> Generator[TikaDocumentParser, None, None]:
|
||||||
|
"""Yield a TikaDocumentParser and clean up its temporary directory afterwards.
|
||||||
|
|
||||||
|
Yields
|
||||||
|
------
|
||||||
|
TikaDocumentParser
|
||||||
|
A ready-to-use parser instance.
|
||||||
|
"""
|
||||||
|
with TikaDocumentParser() as parser:
|
||||||
|
yield parser
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ from pathlib import Path
|
|||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from documents.tests.utils import util_call_with_backoff
|
from documents.tests.utils import util_call_with_backoff
|
||||||
from paperless_tika.parsers import TikaDocumentParser
|
from paperless.parsers.tika import TikaDocumentParser
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skipif(
|
@pytest.mark.skipif(
|
||||||
@@ -42,14 +42,15 @@ class TestTikaParserAgainstServer:
|
|||||||
)
|
)
|
||||||
|
|
||||||
assert (
|
assert (
|
||||||
tika_parser.text
|
tika_parser.get_text()
|
||||||
== "This is an ODT test document, created September 14, 2022"
|
== "This is an ODT test document, created September 14, 2022"
|
||||||
)
|
)
|
||||||
assert tika_parser.archive_path is not None
|
archive = tika_parser.get_archive_path()
|
||||||
assert b"PDF-" in tika_parser.archive_path.read_bytes()[:10]
|
assert archive is not None
|
||||||
|
assert b"PDF-" in archive.read_bytes()[:10]
|
||||||
|
|
||||||
# TODO: Unsure what can set the Creation-Date field in a document, enable when possible
|
# TODO: Unsure what can set the Creation-Date field in a document, enable when possible
|
||||||
# self.assertEqual(tika_parser.date, datetime.datetime(2022, 9, 14))
|
# self.assertEqual(tika_parser.get_date(), datetime.datetime(2022, 9, 14))
|
||||||
|
|
||||||
def test_basic_parse_docx(
|
def test_basic_parse_docx(
|
||||||
self,
|
self,
|
||||||
@@ -74,14 +75,15 @@ class TestTikaParserAgainstServer:
|
|||||||
)
|
)
|
||||||
|
|
||||||
assert (
|
assert (
|
||||||
tika_parser.text
|
tika_parser.get_text()
|
||||||
== "This is an DOCX test document, also made September 14, 2022"
|
== "This is an DOCX test document, also made September 14, 2022"
|
||||||
)
|
)
|
||||||
assert tika_parser.archive_path is not None
|
archive = tika_parser.get_archive_path()
|
||||||
with Path(tika_parser.archive_path).open("rb") as f:
|
assert archive is not None
|
||||||
|
with archive.open("rb") as f:
|
||||||
assert b"PDF-" in f.read()[:10]
|
assert b"PDF-" in f.read()[:10]
|
||||||
|
|
||||||
# self.assertEqual(tika_parser.date, datetime.datetime(2022, 9, 14))
|
# self.assertEqual(tika_parser.get_date(), datetime.datetime(2022, 9, 14))
|
||||||
|
|
||||||
def test_basic_parse_doc(
|
def test_basic_parse_doc(
|
||||||
self,
|
self,
|
||||||
@@ -102,13 +104,12 @@ class TestTikaParserAgainstServer:
|
|||||||
[sample_doc_file, "application/msword"],
|
[sample_doc_file, "application/msword"],
|
||||||
)
|
)
|
||||||
|
|
||||||
assert tika_parser.text is not None
|
text = tika_parser.get_text()
|
||||||
assert (
|
assert text is not None
|
||||||
"This is a test document, saved in the older .doc format"
|
assert "This is a test document, saved in the older .doc format" in text
|
||||||
in tika_parser.text
|
archive = tika_parser.get_archive_path()
|
||||||
)
|
assert archive is not None
|
||||||
assert tika_parser.archive_path is not None
|
with archive.open("rb") as f:
|
||||||
with Path(tika_parser.archive_path).open("rb") as f:
|
|
||||||
assert b"PDF-" in f.read()[:10]
|
assert b"PDF-" in f.read()[:10]
|
||||||
|
|
||||||
def test_tika_fails_multi_part(
|
def test_tika_fails_multi_part(
|
||||||
@@ -133,6 +134,7 @@ class TestTikaParserAgainstServer:
|
|||||||
[sample_broken_odt, "application/vnd.oasis.opendocument.text"],
|
[sample_broken_odt, "application/vnd.oasis.opendocument.text"],
|
||||||
)
|
)
|
||||||
|
|
||||||
assert tika_parser.archive_path is not None
|
archive = tika_parser.get_archive_path()
|
||||||
with Path(tika_parser.archive_path).open("rb") as f:
|
assert archive is not None
|
||||||
|
with archive.open("rb") as f:
|
||||||
assert b"PDF-" in f.read()[:10]
|
assert b"PDF-" in f.read()[:10]
|
||||||
@@ -9,7 +9,56 @@ from pytest_django.fixtures import SettingsWrapper
|
|||||||
from pytest_httpx import HTTPXMock
|
from pytest_httpx import HTTPXMock
|
||||||
|
|
||||||
from documents.parsers import ParseError
|
from documents.parsers import ParseError
|
||||||
from paperless_tika.parsers import TikaDocumentParser
|
from paperless.parsers import ParserProtocol
|
||||||
|
from paperless.parsers.tika import TikaDocumentParser
|
||||||
|
|
||||||
|
|
||||||
|
class TestTikaParserRegistryInterface:
|
||||||
|
"""Verify that TikaDocumentParser satisfies the ParserProtocol contract."""
|
||||||
|
|
||||||
|
def test_satisfies_parser_protocol(self) -> None:
|
||||||
|
assert isinstance(TikaDocumentParser(), ParserProtocol)
|
||||||
|
|
||||||
|
def test_supported_mime_types_is_classmethod(self) -> None:
|
||||||
|
mime_types = TikaDocumentParser.supported_mime_types()
|
||||||
|
assert isinstance(mime_types, dict)
|
||||||
|
assert len(mime_types) > 0
|
||||||
|
|
||||||
|
def test_score_returns_none_when_tika_disabled(
|
||||||
|
self,
|
||||||
|
settings: SettingsWrapper,
|
||||||
|
) -> None:
|
||||||
|
settings.TIKA_ENABLED = False
|
||||||
|
result = TikaDocumentParser.score(
|
||||||
|
"application/vnd.oasis.opendocument.text",
|
||||||
|
"sample.odt",
|
||||||
|
)
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
def test_score_returns_int_when_tika_enabled(
|
||||||
|
self,
|
||||||
|
settings: SettingsWrapper,
|
||||||
|
) -> None:
|
||||||
|
settings.TIKA_ENABLED = True
|
||||||
|
result = TikaDocumentParser.score(
|
||||||
|
"application/vnd.oasis.opendocument.text",
|
||||||
|
"sample.odt",
|
||||||
|
)
|
||||||
|
assert isinstance(result, int)
|
||||||
|
|
||||||
|
def test_score_returns_none_for_unsupported_mime(
|
||||||
|
self,
|
||||||
|
settings: SettingsWrapper,
|
||||||
|
) -> None:
|
||||||
|
settings.TIKA_ENABLED = True
|
||||||
|
result = TikaDocumentParser.score("application/pdf", "doc.pdf")
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
def test_can_produce_archive_is_false(self) -> None:
|
||||||
|
assert TikaDocumentParser().can_produce_archive is False
|
||||||
|
|
||||||
|
def test_requires_pdf_rendition_is_true(self) -> None:
|
||||||
|
assert TikaDocumentParser().requires_pdf_rendition is True
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.django_db()
|
@pytest.mark.django_db()
|
||||||
@@ -36,12 +85,12 @@ class TestTikaParser:
|
|||||||
|
|
||||||
tika_parser.parse(sample_odt_file, "application/vnd.oasis.opendocument.text")
|
tika_parser.parse(sample_odt_file, "application/vnd.oasis.opendocument.text")
|
||||||
|
|
||||||
assert tika_parser.text == "the content"
|
assert tika_parser.get_text() == "the content"
|
||||||
assert tika_parser.archive_path is not None
|
assert tika_parser.get_archive_path() is not None
|
||||||
with Path(tika_parser.archive_path).open("rb") as f:
|
with Path(tika_parser.get_archive_path()).open("rb") as f:
|
||||||
assert f.read() == b"PDF document"
|
assert f.read() == b"PDF document"
|
||||||
|
|
||||||
assert tika_parser.date == datetime.datetime(
|
assert tika_parser.get_date() == datetime.datetime(
|
||||||
2020,
|
2020,
|
||||||
11,
|
11,
|
||||||
21,
|
21,
|
||||||
@@ -89,7 +138,7 @@ class TestTikaParser:
|
|||||||
httpx_mock.add_response(status_code=HTTPStatus.INTERNAL_SERVER_ERROR)
|
httpx_mock.add_response(status_code=HTTPStatus.INTERNAL_SERVER_ERROR)
|
||||||
|
|
||||||
with pytest.raises(ParseError):
|
with pytest.raises(ParseError):
|
||||||
tika_parser.convert_to_pdf(sample_odt_file, None)
|
tika_parser._convert_to_pdf(sample_odt_file)
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
("setting_value", "expected_form_value"),
|
("setting_value", "expected_form_value"),
|
||||||
@@ -106,7 +155,6 @@ class TestTikaParser:
|
|||||||
expected_form_value: str,
|
expected_form_value: str,
|
||||||
httpx_mock: HTTPXMock,
|
httpx_mock: HTTPXMock,
|
||||||
settings: SettingsWrapper,
|
settings: SettingsWrapper,
|
||||||
tika_parser: TikaDocumentParser,
|
|
||||||
sample_odt_file: Path,
|
sample_odt_file: Path,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""
|
"""
|
||||||
@@ -117,6 +165,8 @@ class TestTikaParser:
|
|||||||
THEN:
|
THEN:
|
||||||
- Request to Gotenberg contains the expected PDF/A format string
|
- Request to Gotenberg contains the expected PDF/A format string
|
||||||
"""
|
"""
|
||||||
|
# Parser must be created after the setting is changed so that
|
||||||
|
# OutputTypeConfig reads the correct value at __init__ time.
|
||||||
settings.OCR_OUTPUT_TYPE = setting_value
|
settings.OCR_OUTPUT_TYPE = setting_value
|
||||||
httpx_mock.add_response(
|
httpx_mock.add_response(
|
||||||
status_code=codes.OK,
|
status_code=codes.OK,
|
||||||
@@ -124,7 +174,8 @@ class TestTikaParser:
|
|||||||
method="POST",
|
method="POST",
|
||||||
)
|
)
|
||||||
|
|
||||||
tika_parser.convert_to_pdf(sample_odt_file, None)
|
with TikaDocumentParser() as parser:
|
||||||
|
parser._convert_to_pdf(sample_odt_file)
|
||||||
|
|
||||||
request = httpx_mock.get_request()
|
request = httpx_mock.get_request()
|
||||||
|
|
||||||
@@ -221,7 +221,7 @@ class RasterisedDocumentParser(DocumentParser):
|
|||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
assert isinstance(self.settings, OcrConfig)
|
assert isinstance(self.settings, OcrConfig)
|
||||||
ocrmypdf_args = {
|
ocrmypdf_args = {
|
||||||
"input_file": input_file,
|
"input_file_or_options": input_file,
|
||||||
"output_file": output_file,
|
"output_file": output_file,
|
||||||
# need to use threads, since this will be run in daemonized
|
# need to use threads, since this will be run in daemonized
|
||||||
# processes via the task library.
|
# processes via the task library.
|
||||||
@@ -285,7 +285,7 @@ class RasterisedDocumentParser(DocumentParser):
|
|||||||
"for compatibility with img2pdf",
|
"for compatibility with img2pdf",
|
||||||
)
|
)
|
||||||
# Replace the input file with the non-alpha
|
# Replace the input file with the non-alpha
|
||||||
ocrmypdf_args["input_file"] = self.remove_alpha(input_file)
|
ocrmypdf_args["input_file_or_options"] = self.remove_alpha(input_file)
|
||||||
|
|
||||||
if dpi:
|
if dpi:
|
||||||
self.log.debug(f"Detected DPI for image {input_file}: {dpi}")
|
self.log.debug(f"Detected DPI for image {input_file}: {dpi}")
|
||||||
|
|||||||
@@ -778,7 +778,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
safe_fallback=False,
|
safe_fallback=False,
|
||||||
)
|
)
|
||||||
|
|
||||||
self.assertEqual(params["input_file"], "input.pdf")
|
self.assertEqual(params["input_file_or_options"], "input.pdf")
|
||||||
self.assertEqual(params["output_file"], "output.pdf")
|
self.assertEqual(params["output_file"], "output.pdf")
|
||||||
self.assertEqual(params["sidecar"], "sidecar.txt")
|
self.assertEqual(params["sidecar"], "sidecar.txt")
|
||||||
|
|
||||||
|
|||||||
@@ -1,10 +1,12 @@
|
|||||||
def get_parser(*args, **kwargs):
|
def get_parser(*args, **kwargs):
|
||||||
from paperless.parsers.text import TextDocumentParser
|
from paperless.parsers.text import TextDocumentParser
|
||||||
|
|
||||||
# The new TextDocumentParser does not accept the legacy logging_group /
|
# TextDocumentParser accepts logging_group for constructor compatibility but
|
||||||
# progress_callback kwargs injected by the old signal-based consumer.
|
# does not store or use it (no legacy DocumentParser base class).
|
||||||
# These are dropped here; Phase 4 will replace this signal path with the
|
# progress_callback is also not used. Both may arrive as a positional arg
|
||||||
# new ParserRegistry so the shim can be removed at that point.
|
# (consumer) or a keyword arg (views); *args absorbs the positional form,
|
||||||
|
# kwargs.pop handles the keyword form. Phase 4 will replace this signal
|
||||||
|
# path with the new ParserRegistry so the shim can be removed at that point.
|
||||||
kwargs.pop("logging_group", None)
|
kwargs.pop("logging_group", None)
|
||||||
kwargs.pop("progress_callback", None)
|
kwargs.pop("progress_callback", None)
|
||||||
return TextDocumentParser()
|
return TextDocumentParser()
|
||||||
|
|||||||
@@ -1,136 +0,0 @@
|
|||||||
from pathlib import Path
|
|
||||||
|
|
||||||
import httpx
|
|
||||||
from django.conf import settings
|
|
||||||
from django.utils import timezone
|
|
||||||
from gotenberg_client import GotenbergClient
|
|
||||||
from gotenberg_client.options import PdfAFormat
|
|
||||||
from tika_client import TikaClient
|
|
||||||
|
|
||||||
from documents.parsers import DocumentParser
|
|
||||||
from documents.parsers import ParseError
|
|
||||||
from documents.parsers import make_thumbnail_from_pdf
|
|
||||||
from paperless.config import OutputTypeConfig
|
|
||||||
from paperless.models import OutputTypeChoices
|
|
||||||
|
|
||||||
|
|
||||||
class TikaDocumentParser(DocumentParser):
|
|
||||||
"""
|
|
||||||
This parser sends documents to a local tika server
|
|
||||||
"""
|
|
||||||
|
|
||||||
logging_name = "paperless.parsing.tika"
|
|
||||||
|
|
||||||
def get_thumbnail(self, document_path, mime_type, file_name=None):
|
|
||||||
if not self.archive_path:
|
|
||||||
self.archive_path = self.convert_to_pdf(document_path, file_name)
|
|
||||||
|
|
||||||
return make_thumbnail_from_pdf(
|
|
||||||
self.archive_path,
|
|
||||||
self.tempdir,
|
|
||||||
self.logging_group,
|
|
||||||
)
|
|
||||||
|
|
||||||
def extract_metadata(self, document_path, mime_type):
|
|
||||||
try:
|
|
||||||
with TikaClient(
|
|
||||||
tika_url=settings.TIKA_ENDPOINT,
|
|
||||||
timeout=settings.CELERY_TASK_TIME_LIMIT,
|
|
||||||
) as client:
|
|
||||||
parsed = client.metadata.from_file(document_path, mime_type)
|
|
||||||
return [
|
|
||||||
{
|
|
||||||
"namespace": "",
|
|
||||||
"prefix": "",
|
|
||||||
"key": key,
|
|
||||||
"value": parsed.data[key],
|
|
||||||
}
|
|
||||||
for key in parsed.data
|
|
||||||
]
|
|
||||||
except Exception as e:
|
|
||||||
self.log.warning(
|
|
||||||
f"Error while fetching document metadata for {document_path}: {e}",
|
|
||||||
)
|
|
||||||
return []
|
|
||||||
|
|
||||||
def parse(self, document_path: Path, mime_type: str, file_name=None) -> None:
|
|
||||||
self.log.info(f"Sending {document_path} to Tika server")
|
|
||||||
|
|
||||||
try:
|
|
||||||
with TikaClient(
|
|
||||||
tika_url=settings.TIKA_ENDPOINT,
|
|
||||||
timeout=settings.CELERY_TASK_TIME_LIMIT,
|
|
||||||
) as client:
|
|
||||||
try:
|
|
||||||
parsed = client.tika.as_text.from_file(document_path, mime_type)
|
|
||||||
except httpx.HTTPStatusError as err:
|
|
||||||
# Workaround https://issues.apache.org/jira/browse/TIKA-4110
|
|
||||||
# Tika fails with some files as multi-part form data
|
|
||||||
if err.response.status_code == httpx.codes.INTERNAL_SERVER_ERROR:
|
|
||||||
parsed = client.tika.as_text.from_buffer(
|
|
||||||
document_path.read_bytes(),
|
|
||||||
mime_type,
|
|
||||||
)
|
|
||||||
else: # pragma: no cover
|
|
||||||
raise
|
|
||||||
except Exception as err:
|
|
||||||
raise ParseError(
|
|
||||||
f"Could not parse {document_path} with tika server at "
|
|
||||||
f"{settings.TIKA_ENDPOINT}: {err}",
|
|
||||||
) from err
|
|
||||||
|
|
||||||
self.text = parsed.content
|
|
||||||
if self.text is not None:
|
|
||||||
self.text = self.text.strip()
|
|
||||||
|
|
||||||
self.date = parsed.created
|
|
||||||
if self.date is not None and timezone.is_naive(self.date):
|
|
||||||
self.date = timezone.make_aware(self.date)
|
|
||||||
|
|
||||||
self.archive_path = self.convert_to_pdf(document_path, file_name)
|
|
||||||
|
|
||||||
def convert_to_pdf(self, document_path: Path, file_name):
|
|
||||||
pdf_path = Path(self.tempdir) / "convert.pdf"
|
|
||||||
|
|
||||||
self.log.info(f"Converting {document_path} to PDF as {pdf_path}")
|
|
||||||
|
|
||||||
with (
|
|
||||||
GotenbergClient(
|
|
||||||
host=settings.TIKA_GOTENBERG_ENDPOINT,
|
|
||||||
timeout=settings.CELERY_TASK_TIME_LIMIT,
|
|
||||||
) as client,
|
|
||||||
client.libre_office.to_pdf() as route,
|
|
||||||
):
|
|
||||||
# Set the output format of the resulting PDF
|
|
||||||
if settings.OCR_OUTPUT_TYPE in {
|
|
||||||
OutputTypeChoices.PDF_A,
|
|
||||||
OutputTypeChoices.PDF_A2,
|
|
||||||
}:
|
|
||||||
route.pdf_format(PdfAFormat.A2b)
|
|
||||||
elif settings.OCR_OUTPUT_TYPE == OutputTypeChoices.PDF_A1:
|
|
||||||
self.log.warning(
|
|
||||||
"Gotenberg does not support PDF/A-1a, choosing PDF/A-2b instead",
|
|
||||||
)
|
|
||||||
route.pdf_format(PdfAFormat.A2b)
|
|
||||||
elif settings.OCR_OUTPUT_TYPE == OutputTypeChoices.PDF_A3:
|
|
||||||
route.pdf_format(PdfAFormat.A3b)
|
|
||||||
|
|
||||||
route.convert(document_path)
|
|
||||||
|
|
||||||
try:
|
|
||||||
response = route.run()
|
|
||||||
|
|
||||||
pdf_path.write_bytes(response.content)
|
|
||||||
|
|
||||||
return pdf_path
|
|
||||||
|
|
||||||
except Exception as err:
|
|
||||||
raise ParseError(
|
|
||||||
f"Error while converting document to PDF: {err}",
|
|
||||||
) from err
|
|
||||||
|
|
||||||
def get_settings(self) -> OutputTypeConfig:
|
|
||||||
"""
|
|
||||||
This parser only uses the PDF output type configuration currently
|
|
||||||
"""
|
|
||||||
return OutputTypeConfig()
|
|
||||||
@@ -1,7 +1,15 @@
|
|||||||
def get_parser(*args, **kwargs):
|
def get_parser(*args, **kwargs):
|
||||||
from paperless_tika.parsers import TikaDocumentParser
|
from paperless.parsers.tika import TikaDocumentParser
|
||||||
|
|
||||||
return TikaDocumentParser(*args, **kwargs)
|
# TikaDocumentParser accepts logging_group for constructor compatibility but
|
||||||
|
# does not store or use it (no legacy DocumentParser base class).
|
||||||
|
# progress_callback is also not used. Both may arrive as a positional arg
|
||||||
|
# (consumer) or a keyword arg (views); *args absorbs the positional form,
|
||||||
|
# kwargs.pop handles the keyword form. Phase 4 will replace this signal
|
||||||
|
# path with the new ParserRegistry so the shim can be removed at that point.
|
||||||
|
kwargs.pop("logging_group", None)
|
||||||
|
kwargs.pop("progress_callback", None)
|
||||||
|
return TikaDocumentParser()
|
||||||
|
|
||||||
|
|
||||||
def tika_consumer_declaration(sender, **kwargs):
|
def tika_consumer_declaration(sender, **kwargs):
|
||||||
|
|||||||
@@ -1,41 +0,0 @@
|
|||||||
from collections.abc import Generator
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
import pytest
|
|
||||||
|
|
||||||
from paperless_tika.parsers import TikaDocumentParser
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture()
|
|
||||||
def tika_parser() -> Generator[TikaDocumentParser, None, None]:
|
|
||||||
try:
|
|
||||||
parser = TikaDocumentParser(logging_group=None)
|
|
||||||
yield parser
|
|
||||||
finally:
|
|
||||||
# TODO(stumpylog): Cleanup once all parsers are handled
|
|
||||||
parser.cleanup()
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="session")
|
|
||||||
def sample_dir() -> Path:
|
|
||||||
return (Path(__file__).parent / Path("samples")).resolve()
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="session")
|
|
||||||
def sample_odt_file(sample_dir: Path) -> Path:
|
|
||||||
return sample_dir / "sample.odt"
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="session")
|
|
||||||
def sample_docx_file(sample_dir: Path) -> Path:
|
|
||||||
return sample_dir / "sample.docx"
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="session")
|
|
||||||
def sample_doc_file(sample_dir: Path) -> Path:
|
|
||||||
return sample_dir / "sample.doc"
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="session")
|
|
||||||
def sample_broken_odt(sample_dir: Path) -> Path:
|
|
||||||
return sample_dir / "multi-part-broken.odt"
|
|
||||||
115
uv.lock
generated
115
uv.lock
generated
@@ -831,6 +831,15 @@ wheels = [
|
|||||||
{ url = "https://files.pythonhosted.org/packages/2d/82/e5d2c1c67d19841e9edc74954c827444ae826978499bde3dfc1d007c8c11/deepmerge-2.0-py3-none-any.whl", hash = "sha256:6de9ce507115cff0bed95ff0ce9ecc31088ef50cbdf09bc90a09349a318b3d00", size = 13475, upload-time = "2024-08-30T05:31:48.659Z" },
|
{ url = "https://files.pythonhosted.org/packages/2d/82/e5d2c1c67d19841e9edc74954c827444ae826978499bde3dfc1d007c8c11/deepmerge-2.0-py3-none-any.whl", hash = "sha256:6de9ce507115cff0bed95ff0ce9ecc31088ef50cbdf09bc90a09349a318b3d00", size = 13475, upload-time = "2024-08-30T05:31:48.659Z" },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "defusedxml"
|
||||||
|
version = "0.7.1"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/0f/d5/c66da9b79e5bdb124974bfe172b4daf3c984ebd9c2a06e2b8a4dc7331c72/defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69", size = 75520, upload-time = "2021-03-08T10:59:26.269Z" }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/07/6c/aa3f2f849e01cb6a001cd8554a88d4c77c5c1a31c95bdf1cf9301e6d9ef4/defusedxml-0.7.1-py2.py3-none-any.whl", hash = "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61", size = 25604, upload-time = "2021-03-08T10:59:24.45Z" },
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "deprecated"
|
name = "deprecated"
|
||||||
version = "1.3.1"
|
version = "1.3.1"
|
||||||
@@ -1283,6 +1292,59 @@ wheels = [
|
|||||||
{ url = "https://files.pythonhosted.org/packages/a6/ff/ee2f67c0ff146ec98b5df1df637b2bc2d17beeb05df9f427a67bd7a7d79c/flower-2.0.1-py2.py3-none-any.whl", hash = "sha256:9db2c621eeefbc844c8dd88be64aef61e84e2deb29b271e02ab2b5b9f01068e2", size = 383553, upload-time = "2023-08-13T14:37:41.552Z" },
|
{ url = "https://files.pythonhosted.org/packages/a6/ff/ee2f67c0ff146ec98b5df1df637b2bc2d17beeb05df9f427a67bd7a7d79c/flower-2.0.1-py2.py3-none-any.whl", hash = "sha256:9db2c621eeefbc844c8dd88be64aef61e84e2deb29b271e02ab2b5b9f01068e2", size = 383553, upload-time = "2023-08-13T14:37:41.552Z" },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "fonttools"
|
||||||
|
version = "4.62.0"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/5a/96/686339e0fda8142b7ebed39af53f4a5694602a729662f42a6209e3be91d0/fonttools-4.62.0.tar.gz", hash = "sha256:0dc477c12b8076b4eb9af2e440421b0433ffa9e1dcb39e0640a6c94665ed1098", size = 3579521, upload-time = "2026-03-09T16:50:06.217Z" }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/e4/33/63d79ca41020dd460b51f1e0f58ad1ff0a36b7bcbdf8f3971d52836581e9/fonttools-4.62.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:196cafef9aeec5258425bd31a4e9a414b2ee0d1557bca184d7923d3d3bcd90f9", size = 2870816, upload-time = "2026-03-09T16:48:32.39Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/c0/7a/9aeec114bc9fc00d757a41f092f7107863d372e684a5b5724c043654477c/fonttools-4.62.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:153afc3012ff8761b1733e8fbe5d98623409774c44ffd88fbcb780e240c11d13", size = 2416127, upload-time = "2026-03-09T16:48:34.627Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/5a/71/12cfd8ae0478b7158ffa8850786781f67e73c00fd897ef9d053415c5f88b/fonttools-4.62.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:13b663fb197334de84db790353d59da2a7288fd14e9be329f5debc63ec0500a5", size = 5100678, upload-time = "2026-03-09T16:48:36.454Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/8a/d7/8e4845993ee233c2023d11babe9b3dae7d30333da1d792eeccebcb77baab/fonttools-4.62.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:591220d5333264b1df0d3285adbdfe2af4f6a45bbf9ca2b485f97c9f577c49ff", size = 5070859, upload-time = "2026-03-09T16:48:38.786Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/ae/a0/287ae04cd883a52e7bb1d92dfc4997dcffb54173761c751106845fa9e316/fonttools-4.62.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:579f35c121528a50c96bf6fcb6a393e81e7f896d4326bf40e379f1c971603db9", size = 5076689, upload-time = "2026-03-09T16:48:41.886Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/6d/4e/a2377ad26c36fcd3e671a1c316ea5ed83107de1588e2d897a98349363bc7/fonttools-4.62.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:44956b003151d5a289eba6c71fe590d63509267c37e26de1766ba15d9c589582", size = 5202053, upload-time = "2026-03-09T16:48:43.867Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/ab/9d/7ad1ffc080619f67d0b1e0fa6a0578f0be077404f13fd8e448d1616a94a3/fonttools-4.62.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:22bde4dc12a9e09b5ced77f3b5053d96cf10c4976c6ac0dee293418ef289d221", size = 2870004, upload-time = "2026-03-09T16:48:50.837Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/4d/8b/ba59069a490f61b737e064c3129453dbd28ee38e81d56af0d04d7e6b4de4/fonttools-4.62.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7199c73b326bad892f1cb53ffdd002128bfd58a89b8f662204fbf1daf8d62e85", size = 2414662, upload-time = "2026-03-09T16:48:53.295Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/8c/8c/c52a4310de58deeac7e9ea800892aec09b00bb3eb0c53265b31ec02be115/fonttools-4.62.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d732938633681d6e2324e601b79e93f7f72395ec8681f9cdae5a8c08bc167e72", size = 5032975, upload-time = "2026-03-09T16:48:55.718Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/0b/a1/d16318232964d786907b9b3613b8409f74cf0be2da400854509d3a864e43/fonttools-4.62.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:31a804c16d76038cc4e3826e07678efb0a02dc4f15396ea8e07088adbfb2578e", size = 4988544, upload-time = "2026-03-09T16:48:57.715Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/b2/8d/7e745ca3e65852adc5e52a83dc213fe1b07d61cb5b394970fcd4b1199d1e/fonttools-4.62.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:090e74ac86e68c20150e665ef8e7e0c20cb9f8b395302c9419fa2e4d332c3b51", size = 4971296, upload-time = "2026-03-09T16:48:59.678Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/e6/d4/b717a4874175146029ca1517e85474b1af80c9d9a306fc3161e71485eea5/fonttools-4.62.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:8f086120e8be9e99ca1288aa5ce519833f93fe0ec6ebad2380c1dee18781f0b5", size = 5122503, upload-time = "2026-03-09T16:49:02.464Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/82/c7/985c1670aa6d82ef270f04cde11394c168f2002700353bd2bde405e59b8f/fonttools-4.62.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:274c8b8a87e439faf565d3bcd3f9f9e31bca7740755776a4a90a4bfeaa722efa", size = 2864929, upload-time = "2026-03-09T16:49:09.331Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/c1/dc/c409c8ceec0d3119e9ab0b7b1a2e3c76d1f4d66e4a9db5c59e6b7652e7df/fonttools-4.62.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:93e27131a5a0ae82aaadcffe309b1bae195f6711689722af026862bede05c07c", size = 2412586, upload-time = "2026-03-09T16:49:11.378Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/5f/ac/8e300dbf7b4d135287c261ffd92ede02d9f48f0d2db14665fbc8b059588a/fonttools-4.62.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:83c6524c5b93bad9c2939d88e619fedc62e913c19e673f25d5ab74e7a5d074e5", size = 5013708, upload-time = "2026-03-09T16:49:14.063Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/fb/bc/60d93477b653eeb1ddf5f9ec34be689b79234d82dbdded269ac0252715b8/fonttools-4.62.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:106aec9226f9498fc5345125ff7200842c01eda273ae038f5049b0916907acee", size = 4964355, upload-time = "2026-03-09T16:49:16.515Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/cb/eb/6dc62bcc3c3598c28a3ecb77e69018869c3e109bd83031d4973c059d318b/fonttools-4.62.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:15d86b96c79013320f13bc1b15f94789edb376c0a2d22fb6088f33637e8dfcbc", size = 4953472, upload-time = "2026-03-09T16:49:18.494Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/82/b3/3af7592d9b254b7b7fec018135f8776bfa0d1ad335476c2791b1334dc5e4/fonttools-4.62.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4f16c07e5250d5d71d0f990a59460bc5620c3cc456121f2cfb5b60475699905f", size = 5094701, upload-time = "2026-03-09T16:49:21.67Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/1a/64/61f69298aa6e7c363dcf00dd6371a654676900abe27d1effd1a74b43e5d0/fonttools-4.62.0-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:4fa5a9c716e2f75ef34b5a5c2ca0ee4848d795daa7e6792bf30fd4abf8993449", size = 2864222, upload-time = "2026-03-09T16:49:28.285Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/c6/57/6b08756fe4455336b1fe160ab3c11fccc90768ccb6ee03fb0b45851aace4/fonttools-4.62.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:625f5cbeb0b8f4e42343eaeb4bc2786718ddd84760a2f5e55fdd3db049047c00", size = 2410674, upload-time = "2026-03-09T16:49:30.504Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/6f/86/db65b63bb1b824b63e602e9be21b18741ddc99bcf5a7850f9181159ae107/fonttools-4.62.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6247e58b96b982709cd569a91a2ba935d406dccf17b6aa615afaed37ac3856aa", size = 4999387, upload-time = "2026-03-09T16:49:32.593Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/86/c8/c6669e42d2f4efd60d38a3252cebbb28851f968890efb2b9b15f9d1092b0/fonttools-4.62.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:840632ea9c1eab7b7f01c369e408c0721c287dfd7500ab937398430689852fd1", size = 4912506, upload-time = "2026-03-09T16:49:34.927Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/2e/49/0ae552aa098edd0ec548413fbf818f52ceb70535016215094a5ce9bf8f70/fonttools-4.62.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:28a9ea2a7467a816d1bec22658b0cce4443ac60abac3e293bdee78beb74588f3", size = 4951202, upload-time = "2026-03-09T16:49:37.1Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/71/65/ae38fc8a4cea6f162d74cf11f58e9aeef1baa7d0e3d1376dabd336c129e5/fonttools-4.62.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:5ae611294f768d413949fd12693a8cba0e6332fbc1e07aba60121be35eac68d0", size = 5060758, upload-time = "2026-03-09T16:49:39.464Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/f8/65/f47f9b3db1ec156a1f222f1089ba076b2cc9ee1d024a8b0a60c54258517e/fonttools-4.62.0-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:0361a7d41d86937f1f752717c19f719d0fde064d3011038f9f19bdf5fc2f5c95", size = 2947079, upload-time = "2026-03-09T16:49:46.471Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/52/73/bc62e5058a0c22cf02b1e0169ef0c3ca6c3247216d719f95bead3c05a991/fonttools-4.62.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:d4108c12773b3c97aa592311557c405d5b4fc03db2b969ed928fcf68e7b3c887", size = 2448802, upload-time = "2026-03-09T16:49:48.328Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/2b/df/bfaa0e845884935355670e6e68f137185ab87295f8bc838db575e4a66064/fonttools-4.62.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b448075f32708e8fb377fe7687f769a5f51a027172c591ba9a58693631b077a8", size = 5137378, upload-time = "2026-03-09T16:49:50.223Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/32/32/04f616979a18b48b52e634988b93d847b6346260faf85ecccaf7e2e9057f/fonttools-4.62.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e5f1fa8cc9f1a56a3e33ee6b954d6d9235e6b9d11eb7a6c9dfe2c2f829dc24db", size = 4920714, upload-time = "2026-03-09T16:49:53.172Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/3b/2e/274e16689c1dfee5c68302cd7c444213cfddd23cf4620374419625037ec6/fonttools-4.62.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:f8c8ea812f82db1e884b9cdb663080453e28f0f9a1f5027a5adb59c4cc8d38d1", size = 5016012, upload-time = "2026-03-09T16:49:55.762Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/7f/0c/b08117270626e7117ac2f89d732fdd4386ec37d2ab3a944462d29e6f89a1/fonttools-4.62.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:03c6068adfdc67c565d217e92386b1cdd951abd4240d65180cec62fa74ba31b2", size = 5042766, upload-time = "2026-03-09T16:49:57.726Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/9c/57/c2487c281dde03abb2dec244fd67059b8d118bd30a653cbf69e94084cb23/fonttools-4.62.0-py3-none-any.whl", hash = "sha256:75064f19a10c50c74b336aa5ebe7b1f89fd0fb5255807bfd4b0c6317098f4af3", size = 1152427, upload-time = "2026-03-09T16:50:04.074Z" },
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "fpdf2"
|
||||||
|
version = "2.8.7"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
dependencies = [
|
||||||
|
{ name = "defusedxml", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||||
|
{ name = "fonttools", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||||
|
{ name = "pillow", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||||
|
]
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/27/f2/72feae0b2827ed38013e4307b14f95bf0b3d124adfef4d38a7d57533f7be/fpdf2-2.8.7.tar.gz", hash = "sha256:7060ccee5a9c7ab0a271fb765a36a23639f83ef8996c34e3d46af0a17ede57f9", size = 362351, upload-time = "2026-02-28T05:39:16.456Z" }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/66/0a/cf50ecffa1e3747ed9380a3adfc829259f1f86b3fdbd9e505af789003141/fpdf2-2.8.7-py3-none-any.whl", hash = "sha256:d391fc508a3ce02fc43a577c830cda4fe6f37646f2d143d489839940932fbc19", size = 327056, upload-time = "2026-02-28T05:39:14.619Z" },
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "frozenlist"
|
name = "frozenlist"
|
||||||
version = "1.8.0"
|
version = "1.8.0"
|
||||||
@@ -2722,10 +2784,11 @@ wheels = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "ocrmypdf"
|
name = "ocrmypdf"
|
||||||
version = "16.13.0"
|
version = "17.3.0"
|
||||||
source = { registry = "https://pypi.org/simple" }
|
source = { registry = "https://pypi.org/simple" }
|
||||||
dependencies = [
|
dependencies = [
|
||||||
{ name = "deprecation", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
{ name = "deprecation", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||||
|
{ name = "fpdf2", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||||
{ name = "img2pdf", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
{ name = "img2pdf", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||||
{ name = "packaging", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
{ name = "packaging", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||||
{ name = "pdfminer-six", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
{ name = "pdfminer-six", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||||
@@ -2733,11 +2796,14 @@ dependencies = [
|
|||||||
{ name = "pikepdf", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
{ name = "pikepdf", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||||
{ name = "pillow", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
{ name = "pillow", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||||
{ name = "pluggy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
{ name = "pluggy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||||
|
{ name = "pydantic", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||||
|
{ name = "pypdfium2", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||||
{ name = "rich", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
{ name = "rich", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||||
|
{ name = "uharfbuzz", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||||
]
|
]
|
||||||
sdist = { url = "https://files.pythonhosted.org/packages/8c/52/be1aaece0703a736757d8957c0d4f19c37561054169b501eb0e7132f15e5/ocrmypdf-16.13.0.tar.gz", hash = "sha256:29d37e915234ce717374863a9cc5dd32d29e063dfe60c51380dda71254c88248", size = 7042247, upload-time = "2025-12-24T07:58:35.86Z" }
|
sdist = { url = "https://files.pythonhosted.org/packages/fa/fe/60bdc79529be1ad8b151d426ed2020d5ac90328c54e9ba92bd808e1535c1/ocrmypdf-17.3.0.tar.gz", hash = "sha256:4022f13aad3f405e330056a07aa8bd63714b48b414693831b56e2cf2c325f52d", size = 7378015, upload-time = "2026-02-21T09:30:07.207Z" }
|
||||||
wheels = [
|
wheels = [
|
||||||
{ url = "https://files.pythonhosted.org/packages/41/b1/e2e7ad98de0d3ee05b44dbc3f78ccb158a620f3add82d00c85490120e7f2/ocrmypdf-16.13.0-py3-none-any.whl", hash = "sha256:fad8a6f7cc52cdc6225095c401a1766c778c47efe9f1e854ae4dc64a550a3d37", size = 165377, upload-time = "2025-12-24T07:58:33.925Z" },
|
{ url = "https://files.pythonhosted.org/packages/3d/b1/b7ae057a1bcb1495067ee3c4d48c1ce5fc66addd9492307c5a0ff799a7f2/ocrmypdf-17.3.0-py3-none-any.whl", hash = "sha256:c8882e7864954d3db6bcee49cc9f261b65bff66b7e5925eb68a1c281f41cad23", size = 488130, upload-time = "2026-02-21T09:30:05.236Z" },
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -2978,7 +3044,7 @@ requires-dist = [
|
|||||||
{ name = "llama-index-vector-stores-faiss", specifier = ">=0.5.2" },
|
{ name = "llama-index-vector-stores-faiss", specifier = ">=0.5.2" },
|
||||||
{ name = "mysqlclient", marker = "extra == 'mariadb'", specifier = "~=2.2.7" },
|
{ name = "mysqlclient", marker = "extra == 'mariadb'", specifier = "~=2.2.7" },
|
||||||
{ name = "nltk", specifier = "~=3.9.1" },
|
{ name = "nltk", specifier = "~=3.9.1" },
|
||||||
{ name = "ocrmypdf", specifier = "~=16.13.0" },
|
{ name = "ocrmypdf", specifier = "~=17.3.0" },
|
||||||
{ name = "openai", specifier = ">=1.76" },
|
{ name = "openai", specifier = ">=1.76" },
|
||||||
{ name = "pathvalidate", specifier = "~=3.3.1" },
|
{ name = "pathvalidate", specifier = "~=3.3.1" },
|
||||||
{ name = "pdf2image", specifier = "~=1.17.0" },
|
{ name = "pdf2image", specifier = "~=1.17.0" },
|
||||||
@@ -3655,6 +3721,30 @@ wheels = [
|
|||||||
{ url = "https://files.pythonhosted.org/packages/d1/81/ef2b1dfd1862567d573a4fdbc9f969067621764fbb74338496840a1d2977/pyopenssl-25.3.0-py3-none-any.whl", hash = "sha256:1fda6fc034d5e3d179d39e59c1895c9faeaf40a79de5fc4cbbfbe0d36f4a77b6", size = 57268, upload-time = "2025-09-17T00:32:19.474Z" },
|
{ url = "https://files.pythonhosted.org/packages/d1/81/ef2b1dfd1862567d573a4fdbc9f969067621764fbb74338496840a1d2977/pyopenssl-25.3.0-py3-none-any.whl", hash = "sha256:1fda6fc034d5e3d179d39e59c1895c9faeaf40a79de5fc4cbbfbe0d36f4a77b6", size = 57268, upload-time = "2025-09-17T00:32:19.474Z" },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pypdfium2"
|
||||||
|
version = "5.6.0"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/3b/01/be763b9081c7eb823196e7d13d9c145bf75ac43f3c1466de81c21c24b381/pypdfium2-5.6.0.tar.gz", hash = "sha256:bcb9368acfe3547054698abbdae68ba0cbd2d3bda8e8ee437e061deef061976d", size = 270714, upload-time = "2026-03-08T01:05:06.5Z" }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/6e/f6/9f9e190fe0e5a6b86b82f83bd8b5d3490348766062381140ca5cad8e00b1/pypdfium2-5.6.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:e468c38997573f0e86f03273c2c1fbdea999de52ba43fee96acaa2f6b2ad35f7", size = 3412541, upload-time = "2026-03-08T01:04:25.45Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/ee/8d/e57492cb2228ba56ed57de1ff044c8ac114b46905f8b1445c33299ba0488/pypdfium2-5.6.0-py3-none-macosx_11_0_x86_64.whl", hash = "sha256:ad3abddc5805424f962e383253ccad6a0d1d2ebd86afa9a9e1b9ca659773cd0d", size = 3592320, upload-time = "2026-03-08T01:04:27.509Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/f9/8a/8ab82e33e9c551494cbe1526ea250ca8cc4e9e98d6a4fc6b6f8d959aa1d1/pypdfium2-5.6.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f6b5eb9eae5c45076395454522ca26add72ba8bd1fe473e1e4721aa58521470c", size = 3596450, upload-time = "2026-03-08T01:04:29.183Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/f5/b5/602a792282312ccb158cc63849528079d94b0a11efdc61f2a359edfb41e9/pypdfium2-5.6.0-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:258624da8ef45cdc426e11b33e9d83f9fb723c1c201c6e0f4ab5a85966c6b876", size = 3325442, upload-time = "2026-03-08T01:04:30.886Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/81/1f/9e48ec05ed8d19d736c2d1f23c1bd0f20673f02ef846a2576c69e237f15d/pypdfium2-5.6.0-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e9367451c8a00931d6612db0822525a18c06f649d562cd323a719e46ac19c9bb", size = 3727434, upload-time = "2026-03-08T01:04:33.619Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/33/90/0efd020928b4edbd65f4f3c2af0c84e20b43a3ada8fa6d04f999a97afe7a/pypdfium2-5.6.0-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a757869f891eac1cc1372e38a4aa01adac8abc8fe2a8a4e2ebf50595e3bf5937", size = 4139029, upload-time = "2026-03-08T01:04:36.08Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/ff/49/a640b288a48dab1752281dd9b72c0679fccea107874e80a65a606b00efa9/pypdfium2-5.6.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:515be355222cc57ae9e62cd5c7c350b8e0c863efc539f80c7d75e2811ba45cb6", size = 3646387, upload-time = "2026-03-08T01:04:38.151Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/b0/3b/a344c19c01021eeb5d830c102e4fc9b1602f19c04aa7d11abbe2d188fd8e/pypdfium2-5.6.0-py3-none-manylinux_2_27_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d1c4753c7caf7d004211d7f57a21f10d127f5e0e5510a14d24bc073e7220a3ea", size = 3097212, upload-time = "2026-03-08T01:04:40.776Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/50/96/e48e13789ace22aeb9b7510904a1b1493ec588196e11bbacc122da330b3d/pypdfium2-5.6.0-py3-none-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:c49729090281fdd85775fb8912c10bd19e99178efaa98f145ab06e7ce68554d2", size = 2965026, upload-time = "2026-03-08T01:04:42.857Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/cb/06/3100e44d4935f73af8f5d633d3bd40f0d36d606027085a0ef1f0566a6320/pypdfium2-5.6.0-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:a4a1749a8d4afd62924a8d95cfa4f2e26fc32957ce34ac3b674be6f127ed252e", size = 4131431, upload-time = "2026-03-08T01:04:44.982Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/64/ef/d8df63569ce9a66c8496057782eb8af78e0d28667922d62ec958434e3d4b/pypdfium2-5.6.0-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:36469ebd0fdffb7130ce45ed9c44f8232d91571c89eb851bd1633c64b6f6114f", size = 3747469, upload-time = "2026-03-08T01:04:46.702Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/a6/47/fd2c6a67a49fade1acd719fbd11f7c375e7219912923ef2de0ea0ac1544e/pypdfium2-5.6.0-py3-none-musllinux_1_2_i686.whl", hash = "sha256:9da900df09be3cf546b637a127a7b6428fb22d705951d731269e25fd3adef457", size = 4337578, upload-time = "2026-03-08T01:04:49.007Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/6b/f5/836c83e54b01e09478c4d6bf4912651d6053c932250fcee953f5c72d8e4a/pypdfium2-5.6.0-py3-none-musllinux_1_2_ppc64le.whl", hash = "sha256:45fccd5622233c5ec91a885770ae7dd4004d4320ac05a4ad8fa03a66dea40244", size = 4376104, upload-time = "2026-03-08T01:04:51.04Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/6e/7f/b940b6a1664daf8f9bad87c6c99b84effa3611615b8708d10392dc33036c/pypdfium2-5.6.0-py3-none-musllinux_1_2_riscv64.whl", hash = "sha256:282dc030e767cd61bd0299f9d581052b91188e2b87561489057a8e7963e7e0cb", size = 3929824, upload-time = "2026-03-08T01:04:53.544Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/88/79/00267d92a6a58c229e364d474f5698efe446e0c7f4f152f58d0138715e99/pypdfium2-5.6.0-py3-none-musllinux_1_2_s390x.whl", hash = "sha256:a1c1dfe950382c76a7bba1ba160ec5e40df8dd26b04a1124ae268fda55bc4cbe", size = 4270201, upload-time = "2026-03-08T01:04:55.81Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/e1/ab/b127f38aba41746bdf9ace15ba08411d7ef6ecba1326d529ba414eb1ed50/pypdfium2-5.6.0-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:43b0341ca6feb6c92e4b7a9eb4813e5466f5f5e8b6baeb14df0a94d5f312c00b", size = 4180793, upload-time = "2026-03-08T01:04:57.961Z" },
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "pyrefly"
|
name = "pyrefly"
|
||||||
version = "0.55.0"
|
version = "0.55.0"
|
||||||
@@ -5143,6 +5233,23 @@ wheels = [
|
|||||||
{ url = "https://files.pythonhosted.org/packages/b1/5e/512aeb40fd819f4660d00f96f5c7371ee36fc8c6b605128c5ee59e0b28c6/u_msgpack_python-2.8.0-py2.py3-none-any.whl", hash = "sha256:1d853d33e78b72c4228a2025b4db28cda81214076e5b0422ed0ae1b1b2bb586a", size = 10590, upload-time = "2023-05-18T09:28:10.323Z" },
|
{ url = "https://files.pythonhosted.org/packages/b1/5e/512aeb40fd819f4660d00f96f5c7371ee36fc8c6b605128c5ee59e0b28c6/u_msgpack_python-2.8.0-py2.py3-none-any.whl", hash = "sha256:1d853d33e78b72c4228a2025b4db28cda81214076e5b0422ed0ae1b1b2bb586a", size = 10590, upload-time = "2023-05-18T09:28:10.323Z" },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "uharfbuzz"
|
||||||
|
version = "0.53.3"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/1c/8d/7c82298bfa5c96f018541661bc2ccdf90dfe397bb2724db46725bf495466/uharfbuzz-0.53.3.tar.gz", hash = "sha256:9a87175c14d1361322ce2a3504e63c6b66062934a5edf47266aed5b33416806c", size = 1714488, upload-time = "2026-01-24T13:10:43.693Z" }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/51/88/5df9337adb60d7b1ad150b162bbc5c56d783d15546714085d92b9531f8f3/uharfbuzz-0.53.3-cp310-abi3-macosx_10_9_universal2.whl", hash = "sha256:d977e41a501d9e8af3f2c329d75031037ee79634bc29ca3872e9115c44e67d25", size = 2722639, upload-time = "2026-01-24T13:10:22.436Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/39/c4/8b4b050e77d6cb9a84af509e5796734f0e687bd02ad11757a581bd6f197d/uharfbuzz-0.53.3-cp310-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:21d512c94aa992691aaf5b433deaca7e51f4ea54c68b99f535974073364f806f", size = 1647506, upload-time = "2026-01-24T13:10:24.16Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/30/ff/8e7cf78d525604f3e0a43b9468263fcf2acb5d208a3979c3bfa8dc61112d/uharfbuzz-0.53.3-cp310-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dca9a2e071c0c59ba8f382356f31a2518ac3dc7cc77e4f3519defc454c5b9a97", size = 1706448, upload-time = "2026-01-24T13:10:25.729Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/9b/a0/739471cdd52723ecc9fc80f36fb92c706a87265dc258521c1b14d99414f7/uharfbuzz-0.53.3-cp310-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:1191a74ddcf18ec721161b6b33a8ab31b0c6a2b15c6724a9b663127bf7f07d2e", size = 2664628, upload-time = "2026-01-24T13:10:27.814Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/ae/4a/63a81e9eef922b9f26bd948b518b73704d01a8d8e83324b2f99084ab7af0/uharfbuzz-0.53.3-cp310-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:35ec3b600b3f63e7659792f9bd43e1ffb389d3d2aac8285f269d11efbe04787d", size = 2757384, upload-time = "2026-01-24T13:10:29.669Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/e2/d2/27be1201488323d0ff0c99fb966a0522b2736f79bd5a5b7b99526fca3d98/uharfbuzz-0.53.3-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:6f0ad2812303d2c7ccff596fd6c9d5629874f3a83f30255e11639c9b7ba4e89d", size = 1335822, upload-time = "2026-01-24T13:10:34.774Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/70/99/53e39bcd4dec5981eb70a6a76285a862c8a76b80cd52e8f40fe51adab032/uharfbuzz-0.53.3-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:757d9ed1841912e8f229319f335cf7dd25a2fd377e444bda9deb720617192e12", size = 1237560, upload-time = "2026-01-24T13:10:36.971Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/aa/2b/04d8cde466acfe70373d4f489da5c6eab0aba07d50442dd21217cb0fd167/uharfbuzz-0.53.3-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5d3a0b824811bd1be129356818e6cdbf0e4b056bb60aa9a5eb270bff9d21f24c", size = 1497923, upload-time = "2026-01-24T13:10:38.743Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/f3/01/a250521491bc995609275e0062c552b16f437a3ce15de83250176245093e/uharfbuzz-0.53.3-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9211d798b2921a99b8c34e810676137f66372d3b5447765b72d969bdfa6abe6a", size = 1556794, upload-time = "2026-01-24T13:10:40.262Z" },
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "ujson"
|
name = "ujson"
|
||||||
version = "5.11.0"
|
version = "5.11.0"
|
||||||
|
|||||||
Reference in New Issue
Block a user