diff --git a/.github/workflows/ci-backend.yml b/.github/workflows/ci-backend.yml
index f92d1fb00..cff139e8c 100644
--- a/.github/workflows/ci-backend.yml
+++ b/.github/workflows/ci-backend.yml
@@ -24,6 +24,7 @@ jobs:
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
fetch-depth: 0
+ persist-credentials: false
- name: Decide run mode
id: force
run: |
@@ -72,6 +73,8 @@ jobs:
steps:
- name: Checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+ with:
+ persist-credentials: false
- name: Start containers
run: |
docker compose --file docker/compose/docker-compose.ci-test.yml pull --quiet
@@ -145,6 +148,8 @@ jobs:
steps:
- name: Checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+ with:
+ persist-credentials: false
- name: Set up Python
id: setup-python
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
diff --git a/.github/workflows/ci-docker.yml b/.github/workflows/ci-docker.yml
index 48d258dca..43b79728d 100644
--- a/.github/workflows/ci-docker.yml
+++ b/.github/workflows/ci-docker.yml
@@ -42,6 +42,8 @@ jobs:
steps:
- name: Checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+ with:
+ persist-credentials: false
- name: Determine ref name
id: ref
run: |
diff --git a/.github/workflows/ci-docs.yml b/.github/workflows/ci-docs.yml
index 68f202264..a598a3c9d 100644
--- a/.github/workflows/ci-docs.yml
+++ b/.github/workflows/ci-docs.yml
@@ -26,6 +26,7 @@ jobs:
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
fetch-depth: 0
+ persist-credentials: false
- name: Decide run mode
id: force
run: |
@@ -71,6 +72,8 @@ jobs:
- uses: actions/configure-pages@45bfe0192ca1faeb007ade9deae92b16b8254a0d # v6.0.0
- name: Checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+ with:
+ persist-credentials: false
- name: Set up Python
id: setup-python
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
diff --git a/.github/workflows/ci-frontend.yml b/.github/workflows/ci-frontend.yml
index dffb54e6b..9d4e23a1a 100644
--- a/.github/workflows/ci-frontend.yml
+++ b/.github/workflows/ci-frontend.yml
@@ -62,6 +62,8 @@ jobs:
steps:
- name: Checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+ with:
+ persist-credentials: false
- name: Install pnpm
uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5.0.0
with:
@@ -90,6 +92,8 @@ jobs:
steps:
- name: Checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+ with:
+ persist-credentials: false
- name: Install pnpm
uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5.0.0
with:
@@ -125,6 +129,8 @@ jobs:
steps:
- name: Checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+ with:
+ persist-credentials: false
- name: Install pnpm
uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5.0.0
with:
@@ -176,6 +182,8 @@ jobs:
steps:
- name: Checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+ with:
+ persist-credentials: false
- name: Install pnpm
uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5.0.0
with:
@@ -209,6 +217,7 @@ jobs:
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
fetch-depth: 2
+ persist-credentials: false
- name: Install pnpm
uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5.0.0
with:
diff --git a/.github/workflows/ci-lint.yml b/.github/workflows/ci-lint.yml
index bf1458e1d..314250719 100644
--- a/.github/workflows/ci-lint.yml
+++ b/.github/workflows/ci-lint.yml
@@ -16,6 +16,8 @@ jobs:
steps:
- name: Checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+ with:
+ persist-credentials: false
- name: Install Python
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
with:
diff --git a/.github/workflows/ci-release.yml b/.github/workflows/ci-release.yml
index b38ecbc40..030e3bcad 100644
--- a/.github/workflows/ci-release.yml
+++ b/.github/workflows/ci-release.yml
@@ -29,6 +29,8 @@ jobs:
steps:
- name: Checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+ with:
+ persist-credentials: false
# ---- Frontend Build ----
- name: Install pnpm
uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5.0.0
@@ -179,6 +181,7 @@ jobs:
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
ref: main
+ persist-credentials: true # for pushing changelog branch
- name: Set up Python
id: setup-python
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
diff --git a/.github/workflows/ci-static-analysis.yml b/.github/workflows/ci-static-analysis.yml
new file mode 100644
index 000000000..99388354a
--- /dev/null
+++ b/.github/workflows/ci-static-analysis.yml
@@ -0,0 +1,42 @@
+name: Static Analysis
+on:
+ push:
+ branches-ignore:
+ - 'translations**'
+ pull_request:
+ branches-ignore:
+ - 'translations**'
+ workflow_dispatch:
+concurrency:
+ group: static-analysis-${{ github.event.pull_request.number || github.ref }}
+ cancel-in-progress: true
+permissions:
+ contents: read
+jobs:
+ zizmor:
+ name: Run zizmor
+ runs-on: ubuntu-24.04
+ permissions:
+ contents: read
+ actions: read
+ security-events: write
+ steps:
+ - name: Checkout
+ uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+ with:
+ persist-credentials: false
+ - name: Run zizmor
+ uses: zizmorcore/zizmor-action@71321a20a9ded102f6e9ce5718a2fcec2c4f70d8 # v0.5.2
+ semgrep:
+ name: Semgrep CE
+ runs-on: ubuntu-24.04
+ container:
+ image: semgrep/semgrep:1.155.0@sha256:cc869c685dcc0fe497c86258da9f205397d8108e56d21a86082ea4886e52784d
+ if: github.actor != 'dependabot[bot]'
+ steps:
+ - name: Checkout
+ uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+ with:
+ persist-credentials: false
+ - name: Run Semgrep
+ run: semgrep scan --config auto
diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
index 08c2bc1a2..e295e938d 100644
--- a/.github/workflows/codeql-analysis.yml
+++ b/.github/workflows/codeql-analysis.yml
@@ -35,6 +35,8 @@ jobs:
steps:
- name: Checkout repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+ with:
+ persist-credentials: false
# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
uses: github/codeql-action/init@c793b717bc78562f491db7b0e93a3a178b099162 # v4.32.5
diff --git a/.github/workflows/crowdin.yml b/.github/workflows/crowdin.yml
index 38e73bbb5..29b4be02f 100644
--- a/.github/workflows/crowdin.yml
+++ b/.github/workflows/crowdin.yml
@@ -16,6 +16,7 @@ jobs:
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
token: ${{ secrets.PNGX_BOT_PAT }}
+ persist-credentials: false
- name: crowdin action
uses: crowdin/github-action@8818ff65bfc4322384f983ea37e3926948c11745 # v2.15.0
with:
diff --git a/.github/workflows/translate-strings.yml b/.github/workflows/translate-strings.yml
index c38886bc2..ad894abe7 100644
--- a/.github/workflows/translate-strings.yml
+++ b/.github/workflows/translate-strings.yml
@@ -17,6 +17,7 @@ jobs:
with:
token: ${{ secrets.PNGX_BOT_PAT }}
ref: ${{ env.GH_REF }}
+ persist-credentials: true # for pushing translation branch
- name: Set up Python
id: setup-python
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
diff --git a/docs/api.md b/docs/api.md
index 2284d9d29..af1190f3d 100644
--- a/docs/api.md
+++ b/docs/api.md
@@ -62,10 +62,14 @@ The REST api provides five different forms of authentication.
## Searching for documents
-Full text searching is available on the `/api/documents/` endpoint. Two
-specific query parameters cause the API to return full text search
+Full text searching is available on the `/api/documents/` endpoint. The
+following query parameters cause the API to return Tantivy-backed search
results:
+- `/api/documents/?text=your%20search%20query`: Search title and content
+ using simple substring-style search.
+- `/api/documents/?title_search=your%20search%20query`: Search title only
+ using simple substring-style search.
- `/api/documents/?query=your%20search%20query`: Search for a document
using a full text query. For details on the syntax, see [Basic Usage - Searching](usage.md#basic-usage_searching).
- `/api/documents/?more_like_id=1234`: Search for documents similar to
@@ -439,3 +443,5 @@ Initial API version.
- The `all` parameter of list endpoints is now deprecated and will be removed in a future version.
- The bulk edit objects endpoint now supports `all` and `filters` parameters to avoid having to send
large lists of object IDs for operations affecting many objects.
+- The legacy `title_content` document search parameter is deprecated and will be removed in a future version.
+ Clients should use `text` for simple title-and-content search and `title_search` for title-only search.
diff --git a/docs/migration-v3.md b/docs/migration-v3.md
index 900bcd5c5..c76996cc0 100644
--- a/docs/migration-v3.md
+++ b/docs/migration-v3.md
@@ -1,5 +1,24 @@
# v3 Migration Guide
+## Secret Key is Now Required
+
+The `PAPERLESS_SECRET_KEY` environment variable is now required. This is a critical security setting used for cryptographic signing and should be set to a long, random value.
+
+### Action Required
+
+If you are upgrading an existing installation, you must now set `PAPERLESS_SECRET_KEY` explicitly.
+
+If your installation was relying on the previous built-in default key, you have two options:
+
+- Set `PAPERLESS_SECRET_KEY` to that previous value to preserve existing sessions and tokens.
+- Set `PAPERLESS_SECRET_KEY` to a new random value to improve security, understanding that this will invalidate existing sessions and other signed tokens.
+
+For new installations, or if you choose to rotate the key, you may generate a new secret key with:
+
+```bash
+python3 -c "import secrets; print(secrets.token_urlsafe(64))"
+```
+
## Consumer Settings Changes
The v3 consumer command uses a [different library](https://watchfiles.helpmanual.io/) to unify
diff --git a/src-ui/e2e/document-list/document-list.spec.ts b/src-ui/e2e/document-list/document-list.spec.ts
index 700304186..0cea8effa 100644
--- a/src-ui/e2e/document-list/document-list.spec.ts
+++ b/src-ui/e2e/document-list/document-list.spec.ts
@@ -49,11 +49,11 @@ test('text filtering', async ({ page }) => {
await page.getByRole('main').getByRole('combobox').click()
await page.getByRole('main').getByRole('combobox').fill('test')
await expect(page.locator('pngx-document-list')).toHaveText(/32 documents/)
- await expect(page).toHaveURL(/title_content=test/)
+ await expect(page).toHaveURL(/text=test/)
await page.getByRole('button', { name: 'Title & content' }).click()
await page.getByRole('button', { name: 'Title', exact: true }).click()
await expect(page.locator('pngx-document-list')).toHaveText(/9 documents/)
- await expect(page).toHaveURL(/title__icontains=test/)
+ await expect(page).toHaveURL(/title_search=test/)
await page.getByRole('button', { name: 'Title', exact: true }).click()
await page.getByRole('button', { name: 'Advanced search' }).click()
await expect(page).toHaveURL(/query=test/)
diff --git a/src-ui/e2e/document-list/requests/api-document-list2.har b/src-ui/e2e/document-list/requests/api-document-list2.har
index 3cbc9e8a6..f6a488b26 100644
--- a/src-ui/e2e/document-list/requests/api-document-list2.har
+++ b/src-ui/e2e/document-list/requests/api-document-list2.har
@@ -3545,7 +3545,7 @@
"time": 1.091,
"request": {
"method": "GET",
- "url": "http://localhost:8000/api/documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true&title_content=test",
+ "url": "http://localhost:8000/api/documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true&text=test",
"httpVersion": "HTTP/1.1",
"cookies": [],
"headers": [
@@ -3579,7 +3579,7 @@
"value": "true"
},
{
- "name": "title_content",
+ "name": "text",
"value": "test"
}
],
@@ -4303,7 +4303,7 @@
"time": 0.603,
"request": {
"method": "GET",
- "url": "http://localhost:8000/api/documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true&title__icontains=test",
+ "url": "http://localhost:8000/api/documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true&title_search=test",
"httpVersion": "HTTP/1.1",
"cookies": [],
"headers": [
@@ -4337,7 +4337,7 @@
"value": "true"
},
{
- "name": "title__icontains",
+ "name": "title_search",
"value": "test"
}
],
diff --git a/src-ui/messages.xlf b/src-ui/messages.xlf
index 19b2f7ce2..f30605a4e 100644
--- a/src-ui/messages.xlf
+++ b/src-ui/messages.xlf
@@ -1081,7 +1081,7 @@
src/app/components/document-list/filter-editor/filter-editor.component.ts
- 205
+ 203
@@ -3027,10 +3027,6 @@
src/app/components/document-list/filter-editor/filter-editor.component.html
84
-
- src/app/components/document-list/filter-editor/filter-editor.component.ts
- 200
-
src/app/components/manage/document-attributes/document-attributes.component.ts
129
@@ -7504,7 +7500,7 @@
src/app/components/document-list/filter-editor/filter-editor.component.ts
- 192
+ 194
src/app/data/document.ts
@@ -8817,7 +8813,7 @@
src/app/components/document-list/filter-editor/filter-editor.component.ts
- 197
+ 199
src/app/data/document.ts
@@ -9020,56 +9016,63 @@
Title & content
src/app/components/document-list/filter-editor/filter-editor.component.ts
- 195
+ 197
File type
src/app/components/document-list/filter-editor/filter-editor.component.ts
- 202
+ 200
+
+
+
+ Custom fields (Deprecated)
+
+ src/app/components/document-list/filter-editor/filter-editor.component.ts
+ 210
More like
src/app/components/document-list/filter-editor/filter-editor.component.ts
- 211
+ 215
equals
src/app/components/document-list/filter-editor/filter-editor.component.ts
- 217
+ 221
is empty
src/app/components/document-list/filter-editor/filter-editor.component.ts
- 221
+ 225
is not empty
src/app/components/document-list/filter-editor/filter-editor.component.ts
- 225
+ 229
greater than
src/app/components/document-list/filter-editor/filter-editor.component.ts
- 229
+ 233
less than
src/app/components/document-list/filter-editor/filter-editor.component.ts
- 233
+ 237
@@ -9078,14 +9081,14 @@
)?.name"/>
src/app/components/document-list/filter-editor/filter-editor.component.ts
- 274,278
+ 278,282
Without correspondent
src/app/components/document-list/filter-editor/filter-editor.component.ts
- 280
+ 284
@@ -9094,14 +9097,14 @@
)?.name"/>
src/app/components/document-list/filter-editor/filter-editor.component.ts
- 286,290
+ 290,294
Without document type
src/app/components/document-list/filter-editor/filter-editor.component.ts
- 292
+ 296
@@ -9110,70 +9113,77 @@
)?.name"/>
src/app/components/document-list/filter-editor/filter-editor.component.ts
- 298,302
+ 302,306
Without storage path
src/app/components/document-list/filter-editor/filter-editor.component.ts
- 304
+ 308
Tag:
src/app/components/document-list/filter-editor/filter-editor.component.ts
- 308,310
+ 312,314
Without any tag
src/app/components/document-list/filter-editor/filter-editor.component.ts
- 314
+ 318
Custom fields query
src/app/components/document-list/filter-editor/filter-editor.component.ts
- 318
+ 322
Title:
src/app/components/document-list/filter-editor/filter-editor.component.ts
- 321
+ 326
+
+
+
+ Title & content:
+
+ src/app/components/document-list/filter-editor/filter-editor.component.ts
+ 330
ASN:
src/app/components/document-list/filter-editor/filter-editor.component.ts
- 324
+ 333
Owner:
src/app/components/document-list/filter-editor/filter-editor.component.ts
- 327
+ 336
Owner not in:
src/app/components/document-list/filter-editor/filter-editor.component.ts
- 330
+ 339
Without an owner
src/app/components/document-list/filter-editor/filter-editor.component.ts
- 333
+ 342
diff --git a/src-ui/src/app/components/app-frame/global-search/global-search.component.spec.ts b/src-ui/src/app/components/app-frame/global-search/global-search.component.spec.ts
index eaae4a814..1be801478 100644
--- a/src-ui/src/app/components/app-frame/global-search/global-search.component.spec.ts
+++ b/src-ui/src/app/components/app-frame/global-search/global-search.component.spec.ts
@@ -24,7 +24,7 @@ import {
FILTER_HAS_DOCUMENT_TYPE_ANY,
FILTER_HAS_STORAGE_PATH_ANY,
FILTER_HAS_TAGS_ALL,
- FILTER_TITLE_CONTENT,
+ FILTER_SIMPLE_TEXT,
} from 'src/app/data/filter-rule-type'
import { GlobalSearchType, SETTINGS_KEYS } from 'src/app/data/ui-settings'
import { DocumentListViewService } from 'src/app/services/document-list-view.service'
@@ -545,7 +545,7 @@ describe('GlobalSearchComponent', () => {
component.query = 'test'
component.runFullSearch()
expect(qfSpy).toHaveBeenCalledWith([
- { rule_type: FILTER_TITLE_CONTENT, value: 'test' },
+ { rule_type: FILTER_SIMPLE_TEXT, value: 'test' },
])
settingsService.set(
diff --git a/src-ui/src/app/components/app-frame/global-search/global-search.component.ts b/src-ui/src/app/components/app-frame/global-search/global-search.component.ts
index 4f9a2467c..e95b52cfc 100644
--- a/src-ui/src/app/components/app-frame/global-search/global-search.component.ts
+++ b/src-ui/src/app/components/app-frame/global-search/global-search.component.ts
@@ -25,7 +25,7 @@ import {
FILTER_HAS_DOCUMENT_TYPE_ANY,
FILTER_HAS_STORAGE_PATH_ANY,
FILTER_HAS_TAGS_ALL,
- FILTER_TITLE_CONTENT,
+ FILTER_SIMPLE_TEXT,
} from 'src/app/data/filter-rule-type'
import { ObjectWithId } from 'src/app/data/object-with-id'
import { GlobalSearchType, SETTINGS_KEYS } from 'src/app/data/ui-settings'
@@ -410,7 +410,7 @@ export class GlobalSearchComponent implements OnInit {
public runFullSearch() {
const ruleType = this.useAdvancedForFullSearch
? FILTER_FULLTEXT_QUERY
- : FILTER_TITLE_CONTENT
+ : FILTER_SIMPLE_TEXT
this.documentService.searchQuery = this.useAdvancedForFullSearch
? this.query
: ''
diff --git a/src-ui/src/app/components/common/edit-dialog/storage-path-edit-dialog/storage-path-edit-dialog.component.spec.ts b/src-ui/src/app/components/common/edit-dialog/storage-path-edit-dialog/storage-path-edit-dialog.component.spec.ts
index 89e7b1fee..2466ced73 100644
--- a/src-ui/src/app/components/common/edit-dialog/storage-path-edit-dialog/storage-path-edit-dialog.component.spec.ts
+++ b/src-ui/src/app/components/common/edit-dialog/storage-path-edit-dialog/storage-path-edit-dialog.component.spec.ts
@@ -4,7 +4,7 @@ import { ComponentFixture, TestBed } from '@angular/core/testing'
import { By } from '@angular/platform-browser'
import { NgbAccordionButton, NgbActiveModal } from '@ng-bootstrap/ng-bootstrap'
import { of, throwError } from 'rxjs'
-import { FILTER_TITLE } from 'src/app/data/filter-rule-type'
+import { FILTER_SIMPLE_TITLE } from 'src/app/data/filter-rule-type'
import { DocumentService } from 'src/app/services/rest/document.service'
import { StoragePathService } from 'src/app/services/rest/storage-path.service'
import { SettingsService } from 'src/app/services/settings.service'
@@ -105,7 +105,7 @@ describe('StoragePathEditDialogComponent', () => {
null,
'created',
true,
- [{ rule_type: FILTER_TITLE, value: 'bar' }],
+ [{ rule_type: FILTER_SIMPLE_TITLE, value: 'bar' }],
{ truncate_content: true }
)
listSpy.mockReturnValueOnce(
diff --git a/src-ui/src/app/components/common/edit-dialog/storage-path-edit-dialog/storage-path-edit-dialog.component.ts b/src-ui/src/app/components/common/edit-dialog/storage-path-edit-dialog/storage-path-edit-dialog.component.ts
index f06831588..68ce40f5e 100644
--- a/src-ui/src/app/components/common/edit-dialog/storage-path-edit-dialog/storage-path-edit-dialog.component.ts
+++ b/src-ui/src/app/components/common/edit-dialog/storage-path-edit-dialog/storage-path-edit-dialog.component.ts
@@ -23,7 +23,7 @@ import {
} from 'rxjs'
import { EditDialogComponent } from 'src/app/components/common/edit-dialog/edit-dialog.component'
import { Document } from 'src/app/data/document'
-import { FILTER_TITLE } from 'src/app/data/filter-rule-type'
+import { FILTER_SIMPLE_TITLE } from 'src/app/data/filter-rule-type'
import { DEFAULT_MATCHING_ALGORITHM } from 'src/app/data/matching-model'
import { StoragePath } from 'src/app/data/storage-path'
import { IfOwnerDirective } from 'src/app/directives/if-owner.directive'
@@ -146,7 +146,7 @@ export class StoragePathEditDialogComponent
null,
'created',
true,
- [{ rule_type: FILTER_TITLE, value: title }],
+ [{ rule_type: FILTER_SIMPLE_TITLE, value: title }],
{ truncate_content: true }
)
.pipe(
diff --git a/src-ui/src/app/components/common/input/document-link/document-link.component.spec.ts b/src-ui/src/app/components/common/input/document-link/document-link.component.spec.ts
index 7021012ab..f8a8f3817 100644
--- a/src-ui/src/app/components/common/input/document-link/document-link.component.spec.ts
+++ b/src-ui/src/app/components/common/input/document-link/document-link.component.spec.ts
@@ -3,7 +3,7 @@ import { provideHttpClientTesting } from '@angular/common/http/testing'
import { ComponentFixture, TestBed } from '@angular/core/testing'
import { NG_VALUE_ACCESSOR } from '@angular/forms'
import { of, throwError } from 'rxjs'
-import { FILTER_TITLE } from 'src/app/data/filter-rule-type'
+import { FILTER_SIMPLE_TITLE } from 'src/app/data/filter-rule-type'
import { DocumentService } from 'src/app/services/rest/document.service'
import { DocumentLinkComponent } from './document-link.component'
@@ -99,7 +99,7 @@ describe('DocumentLinkComponent', () => {
null,
'created',
true,
- [{ rule_type: FILTER_TITLE, value: 'bar' }],
+ [{ rule_type: FILTER_SIMPLE_TITLE, value: 'bar' }],
{ truncate_content: true }
)
listSpy.mockReturnValueOnce(throwError(() => new Error()))
diff --git a/src-ui/src/app/components/common/input/document-link/document-link.component.ts b/src-ui/src/app/components/common/input/document-link/document-link.component.ts
index b50f5701d..9bfb60063 100644
--- a/src-ui/src/app/components/common/input/document-link/document-link.component.ts
+++ b/src-ui/src/app/components/common/input/document-link/document-link.component.ts
@@ -28,7 +28,7 @@ import {
tap,
} from 'rxjs'
import { Document } from 'src/app/data/document'
-import { FILTER_TITLE } from 'src/app/data/filter-rule-type'
+import { FILTER_SIMPLE_TITLE } from 'src/app/data/filter-rule-type'
import { CustomDatePipe } from 'src/app/pipes/custom-date.pipe'
import { DocumentService } from 'src/app/services/rest/document.service'
import { AbstractInputComponent } from '../abstract-input'
@@ -121,7 +121,7 @@ export class DocumentLinkComponent
null,
'created',
true,
- [{ rule_type: FILTER_TITLE, value: title }],
+ [{ rule_type: FILTER_SIMPLE_TITLE, value: title }],
{ truncate_content: true }
)
.pipe(
diff --git a/src-ui/src/app/components/document-list/bulk-editor/bulk-editor.component.spec.ts b/src-ui/src/app/components/document-list/bulk-editor/bulk-editor.component.spec.ts
index f283a75f3..8f82be1ab 100644
--- a/src-ui/src/app/components/document-list/bulk-editor/bulk-editor.component.spec.ts
+++ b/src-ui/src/app/components/document-list/bulk-editor/bulk-editor.component.spec.ts
@@ -428,7 +428,7 @@ describe('BulkEditorComponent', () => {
req.flush(true)
expect(req.request.body).toEqual({
all: true,
- filters: { title__icontains: 'apple' },
+ filters: { title_search: 'apple' },
method: 'modify_tags',
parameters: { add_tags: [101], remove_tags: [] },
})
diff --git a/src-ui/src/app/components/document-list/filter-editor/filter-editor.component.spec.ts b/src-ui/src/app/components/document-list/filter-editor/filter-editor.component.spec.ts
index bf5240f1b..d75e38630 100644
--- a/src-ui/src/app/components/document-list/filter-editor/filter-editor.component.spec.ts
+++ b/src-ui/src/app/components/document-list/filter-editor/filter-editor.component.spec.ts
@@ -67,6 +67,8 @@ import {
FILTER_OWNER_DOES_NOT_INCLUDE,
FILTER_OWNER_ISNULL,
FILTER_SHARED_BY_USER,
+ FILTER_SIMPLE_TEXT,
+ FILTER_SIMPLE_TITLE,
FILTER_STORAGE_PATH,
FILTER_TITLE,
FILTER_TITLE_CONTENT,
@@ -312,7 +314,7 @@ describe('FilterEditorComponent', () => {
expect(component.textFilter).toEqual(null)
component.filterRules = [
{
- rule_type: FILTER_TITLE_CONTENT,
+ rule_type: FILTER_SIMPLE_TEXT,
value: 'foo',
},
]
@@ -320,6 +322,18 @@ describe('FilterEditorComponent', () => {
expect(component.textFilterTarget).toEqual('title-content') // TEXT_FILTER_TARGET_TITLE_CONTENT
}))
+ it('should ingest legacy text filter rules for doc title + content', fakeAsync(() => {
+ expect(component.textFilter).toEqual(null)
+ component.filterRules = [
+ {
+ rule_type: FILTER_TITLE_CONTENT,
+ value: 'legacy foo',
+ },
+ ]
+ expect(component.textFilter).toEqual('legacy foo')
+ expect(component.textFilterTarget).toEqual('title-content') // TEXT_FILTER_TARGET_TITLE_CONTENT
+ }))
+
it('should ingest text filter rules for doc asn', fakeAsync(() => {
expect(component.textFilter).toEqual(null)
component.filterRules = [
@@ -1117,7 +1131,7 @@ describe('FilterEditorComponent', () => {
expect(component.textFilter).toEqual('foo')
expect(component.filterRules).toEqual([
{
- rule_type: FILTER_TITLE_CONTENT,
+ rule_type: FILTER_SIMPLE_TEXT,
value: 'foo',
},
])
@@ -1136,7 +1150,7 @@ describe('FilterEditorComponent', () => {
expect(component.textFilterTarget).toEqual('title')
expect(component.filterRules).toEqual([
{
- rule_type: FILTER_TITLE,
+ rule_type: FILTER_SIMPLE_TITLE,
value: 'foo',
},
])
@@ -1250,30 +1264,12 @@ describe('FilterEditorComponent', () => {
])
}))
- it('should convert user input to correct filter rules on custom fields query', fakeAsync(() => {
- component.textFilterInput.nativeElement.value = 'foo'
- component.textFilterInput.nativeElement.dispatchEvent(new Event('input'))
- const textFieldTargetDropdown = fixture.debugElement.queryAll(
- By.directive(NgbDropdownItem)
- )[3]
- textFieldTargetDropdown.triggerEventHandler('click') // TEXT_FILTER_TARGET_CUSTOM_FIELDS
- fixture.detectChanges()
- tick(400)
- expect(component.textFilterTarget).toEqual('custom-fields')
- expect(component.filterRules).toEqual([
- {
- rule_type: FILTER_CUSTOM_FIELDS_TEXT,
- value: 'foo',
- },
- ])
- }))
-
it('should convert user input to correct filter rules on mime type', fakeAsync(() => {
component.textFilterInput.nativeElement.value = 'pdf'
component.textFilterInput.nativeElement.dispatchEvent(new Event('input'))
const textFieldTargetDropdown = fixture.debugElement.queryAll(
By.directive(NgbDropdownItem)
- )[4]
+ )[3]
textFieldTargetDropdown.triggerEventHandler('click') // TEXT_FILTER_TARGET_MIME_TYPE
fixture.detectChanges()
tick(400)
@@ -1291,8 +1287,8 @@ describe('FilterEditorComponent', () => {
component.textFilterInput.nativeElement.dispatchEvent(new Event('input'))
const textFieldTargetDropdown = fixture.debugElement.queryAll(
By.directive(NgbDropdownItem)
- )[5]
- textFieldTargetDropdown.triggerEventHandler('click') // TEXT_FILTER_TARGET_ASN
+ )[4]
+ textFieldTargetDropdown.triggerEventHandler('click') // TEXT_FILTER_TARGET_FULLTEXT_QUERY
fixture.detectChanges()
tick(400)
expect(component.textFilterTarget).toEqual('fulltext-query')
@@ -1696,12 +1692,56 @@ describe('FilterEditorComponent', () => {
])
}))
+ it('should convert legacy title filters into full text query when adding a created relative date', fakeAsync(() => {
+ component.filterRules = [
+ {
+ rule_type: FILTER_TITLE,
+ value: 'foo',
+ },
+ ]
+ const dateCreatedDropdown = fixture.debugElement.queryAll(
+ By.directive(DatesDropdownComponent)
+ )[0]
+ component.dateCreatedRelativeDate = RelativeDate.WITHIN_1_WEEK
+ dateCreatedDropdown.triggerEventHandler('datesSet')
+ fixture.detectChanges()
+ tick(400)
+ expect(component.filterRules).toEqual([
+ {
+ rule_type: FILTER_FULLTEXT_QUERY,
+ value: 'foo,created:[-1 week to now]',
+ },
+ ])
+ }))
+
+ it('should convert simple title filters into full text query when adding a created relative date', fakeAsync(() => {
+ component.filterRules = [
+ {
+ rule_type: FILTER_SIMPLE_TITLE,
+ value: 'foo',
+ },
+ ]
+ const dateCreatedDropdown = fixture.debugElement.queryAll(
+ By.directive(DatesDropdownComponent)
+ )[0]
+ component.dateCreatedRelativeDate = RelativeDate.WITHIN_1_WEEK
+ dateCreatedDropdown.triggerEventHandler('datesSet')
+ fixture.detectChanges()
+ tick(400)
+ expect(component.filterRules).toEqual([
+ {
+ rule_type: FILTER_FULLTEXT_QUERY,
+ value: 'foo,created:[-1 week to now]',
+ },
+ ])
+ }))
+
it('should leave relative dates not in quick list intact', fakeAsync(() => {
component.textFilterInput.nativeElement.value = 'created:[-2 week to now]'
component.textFilterInput.nativeElement.dispatchEvent(new Event('input'))
const textFieldTargetDropdown = fixture.debugElement.queryAll(
By.directive(NgbDropdownItem)
- )[5]
+ )[4]
textFieldTargetDropdown.triggerEventHandler('click')
fixture.detectChanges()
tick(400)
@@ -2031,12 +2071,30 @@ describe('FilterEditorComponent', () => {
component.filterRules = [
{
- rule_type: FILTER_TITLE,
+ rule_type: FILTER_SIMPLE_TITLE,
value: 'foo',
},
]
expect(component.generateFilterName()).toEqual('Title: foo')
+ component.filterRules = [
+ {
+ rule_type: FILTER_TITLE_CONTENT,
+ value: 'legacy foo',
+ },
+ ]
+ expect(component.generateFilterName()).toEqual(
+ 'Title & content: legacy foo'
+ )
+
+ component.filterRules = [
+ {
+ rule_type: FILTER_SIMPLE_TEXT,
+ value: 'foo',
+ },
+ ]
+ expect(component.generateFilterName()).toEqual('Title & content: foo')
+
component.filterRules = [
{
rule_type: FILTER_ASN,
@@ -2156,6 +2214,36 @@ describe('FilterEditorComponent', () => {
})
})
+ it('should hide deprecated custom fields target from default text filter targets', () => {
+ expect(component.textFilterTargets).not.toContainEqual({
+ id: 'custom-fields',
+ name: $localize`Custom fields (Deprecated)`,
+ })
+ })
+
+ it('should keep deprecated custom fields target available for legacy filters', fakeAsync(() => {
+ component.filterRules = [
+ {
+ rule_type: FILTER_CUSTOM_FIELDS_TEXT,
+ value: 'foo',
+ },
+ ]
+ fixture.detectChanges()
+ tick()
+
+ expect(component.textFilterTarget).toEqual('custom-fields')
+ expect(component.textFilterTargets).toContainEqual({
+ id: 'custom-fields',
+ name: $localize`Custom fields (Deprecated)`,
+ })
+ expect(component.filterRules).toEqual([
+ {
+ rule_type: FILTER_CUSTOM_FIELDS_TEXT,
+ value: 'foo',
+ },
+ ])
+ }))
+
it('should call autocomplete endpoint on input', fakeAsync(() => {
component.textFilterTarget = 'fulltext-query' // TEXT_FILTER_TARGET_FULLTEXT_QUERY
const autocompleteSpy = jest.spyOn(searchService, 'autocomplete')
diff --git a/src-ui/src/app/components/document-list/filter-editor/filter-editor.component.ts b/src-ui/src/app/components/document-list/filter-editor/filter-editor.component.ts
index f7b50181b..b4e63317a 100644
--- a/src-ui/src/app/components/document-list/filter-editor/filter-editor.component.ts
+++ b/src-ui/src/app/components/document-list/filter-editor/filter-editor.component.ts
@@ -71,6 +71,8 @@ import {
FILTER_OWNER_DOES_NOT_INCLUDE,
FILTER_OWNER_ISNULL,
FILTER_SHARED_BY_USER,
+ FILTER_SIMPLE_TEXT,
+ FILTER_SIMPLE_TITLE,
FILTER_STORAGE_PATH,
FILTER_TITLE,
FILTER_TITLE_CONTENT,
@@ -195,10 +197,6 @@ const DEFAULT_TEXT_FILTER_TARGET_OPTIONS = [
name: $localize`Title & content`,
},
{ id: TEXT_FILTER_TARGET_ASN, name: $localize`ASN` },
- {
- id: TEXT_FILTER_TARGET_CUSTOM_FIELDS,
- name: $localize`Custom fields`,
- },
{ id: TEXT_FILTER_TARGET_MIME_TYPE, name: $localize`File type` },
{
id: TEXT_FILTER_TARGET_FULLTEXT_QUERY,
@@ -206,6 +204,12 @@ const DEFAULT_TEXT_FILTER_TARGET_OPTIONS = [
},
]
+const DEPRECATED_CUSTOM_FIELDS_TEXT_FILTER_TARGET_OPTION = {
+ // Kept only so legacy saved views can render and be edited away from, remove me eventually
+ id: TEXT_FILTER_TARGET_CUSTOM_FIELDS,
+ name: $localize`Custom fields (Deprecated)`,
+}
+
const TEXT_FILTER_TARGET_MORELIKE_OPTION = {
id: TEXT_FILTER_TARGET_FULLTEXT_MORELIKE,
name: $localize`More like`,
@@ -318,8 +322,13 @@ export class FilterEditorComponent
return $localize`Custom fields query`
case FILTER_TITLE:
+ case FILTER_SIMPLE_TITLE:
return $localize`Title: ${rule.value}`
+ case FILTER_TITLE_CONTENT:
+ case FILTER_SIMPLE_TEXT:
+ return $localize`Title & content: ${rule.value}`
+
case FILTER_ASN:
return $localize`ASN: ${rule.value}`
@@ -353,12 +362,16 @@ export class FilterEditorComponent
_moreLikeDoc: Document
get textFilterTargets() {
+ let targets = DEFAULT_TEXT_FILTER_TARGET_OPTIONS
if (this.textFilterTarget == TEXT_FILTER_TARGET_FULLTEXT_MORELIKE) {
- return DEFAULT_TEXT_FILTER_TARGET_OPTIONS.concat([
- TEXT_FILTER_TARGET_MORELIKE_OPTION,
+ targets = targets.concat([TEXT_FILTER_TARGET_MORELIKE_OPTION])
+ }
+ if (this.textFilterTarget == TEXT_FILTER_TARGET_CUSTOM_FIELDS) {
+ targets = targets.concat([
+ DEPRECATED_CUSTOM_FIELDS_TEXT_FILTER_TARGET_OPTION,
])
}
- return DEFAULT_TEXT_FILTER_TARGET_OPTIONS
+ return targets
}
textFilterTarget = TEXT_FILTER_TARGET_TITLE_CONTENT
@@ -437,10 +450,12 @@ export class FilterEditorComponent
value.forEach((rule) => {
switch (rule.rule_type) {
case FILTER_TITLE:
+ case FILTER_SIMPLE_TITLE:
this._textFilter = rule.value
this.textFilterTarget = TEXT_FILTER_TARGET_TITLE
break
case FILTER_TITLE_CONTENT:
+ case FILTER_SIMPLE_TEXT:
this._textFilter = rule.value
this.textFilterTarget = TEXT_FILTER_TARGET_TITLE_CONTENT
break
@@ -762,12 +777,15 @@ export class FilterEditorComponent
this.textFilterTarget == TEXT_FILTER_TARGET_TITLE_CONTENT
) {
filterRules.push({
- rule_type: FILTER_TITLE_CONTENT,
+ rule_type: FILTER_SIMPLE_TEXT,
value: this._textFilter.trim(),
})
}
if (this._textFilter && this.textFilterTarget == TEXT_FILTER_TARGET_TITLE) {
- filterRules.push({ rule_type: FILTER_TITLE, value: this._textFilter })
+ filterRules.push({
+ rule_type: FILTER_SIMPLE_TITLE,
+ value: this._textFilter,
+ })
}
if (this.textFilterTarget == TEXT_FILTER_TARGET_ASN) {
if (
@@ -1009,7 +1027,10 @@ export class FilterEditorComponent
) {
existingRule = filterRules.find(
(fr) =>
- fr.rule_type == FILTER_TITLE_CONTENT || fr.rule_type == FILTER_TITLE
+ fr.rule_type == FILTER_TITLE_CONTENT ||
+ fr.rule_type == FILTER_SIMPLE_TEXT ||
+ fr.rule_type == FILTER_TITLE ||
+ fr.rule_type == FILTER_SIMPLE_TITLE
)
existingRule.rule_type = FILTER_FULLTEXT_QUERY
}
diff --git a/src-ui/src/app/data/filter-rule-type.ts b/src-ui/src/app/data/filter-rule-type.ts
index 7f0f0d56d..6330eb44c 100644
--- a/src-ui/src/app/data/filter-rule-type.ts
+++ b/src-ui/src/app/data/filter-rule-type.ts
@@ -3,7 +3,7 @@ import { DataType } from './datatype'
export const NEGATIVE_NULL_FILTER_VALUE = -1
// These correspond to src/documents/models.py and changes here require a DB migration (and vice versa)
-export const FILTER_TITLE = 0
+export const FILTER_TITLE = 0 // Deprecated in favor of Tantivy-backed `title_search`. Keep for now for existing saved views
export const FILTER_CONTENT = 1
export const FILTER_ASN = 2
@@ -46,7 +46,9 @@ export const FILTER_ADDED_FROM = 46
export const FILTER_MODIFIED_BEFORE = 15
export const FILTER_MODIFIED_AFTER = 16
-export const FILTER_TITLE_CONTENT = 19
+export const FILTER_TITLE_CONTENT = 19 // Deprecated in favor of Tantivy-backed `text` filtervar. Keep for now for existing saved views
+export const FILTER_SIMPLE_TITLE = 48
+export const FILTER_SIMPLE_TEXT = 49
export const FILTER_FULLTEXT_QUERY = 20
export const FILTER_FULLTEXT_MORELIKE = 21
@@ -56,7 +58,7 @@ export const FILTER_OWNER_ISNULL = 34
export const FILTER_OWNER_DOES_NOT_INCLUDE = 35
export const FILTER_SHARED_BY_USER = 37
-export const FILTER_CUSTOM_FIELDS_TEXT = 36
+export const FILTER_CUSTOM_FIELDS_TEXT = 36 // Deprecated. UI no longer includes CF text-search mode. Keep for now for existing saved views
export const FILTER_HAS_CUSTOM_FIELDS_ALL = 38
export const FILTER_HAS_CUSTOM_FIELDS_ANY = 39
export const FILTER_DOES_NOT_HAVE_CUSTOM_FIELDS = 40
@@ -66,6 +68,9 @@ export const FILTER_CUSTOM_FIELDS_QUERY = 42
export const FILTER_MIME_TYPE = 47
+export const SIMPLE_TEXT_PARAMETER = 'text'
+export const SIMPLE_TITLE_PARAMETER = 'title_search'
+
export const FILTER_RULE_TYPES: FilterRuleType[] = [
{
id: FILTER_TITLE,
@@ -74,6 +79,13 @@ export const FILTER_RULE_TYPES: FilterRuleType[] = [
multi: false,
default: '',
},
+ {
+ id: FILTER_SIMPLE_TITLE,
+ filtervar: SIMPLE_TITLE_PARAMETER,
+ datatype: 'string',
+ multi: false,
+ default: '',
+ },
{
id: FILTER_CONTENT,
filtervar: 'content__icontains',
@@ -279,6 +291,12 @@ export const FILTER_RULE_TYPES: FilterRuleType[] = [
datatype: 'string',
multi: false,
},
+ {
+ id: FILTER_SIMPLE_TEXT,
+ filtervar: SIMPLE_TEXT_PARAMETER,
+ datatype: 'string',
+ multi: false,
+ },
{
id: FILTER_FULLTEXT_QUERY,
filtervar: 'query',
diff --git a/src-ui/src/app/services/rest/document.service.spec.ts b/src-ui/src/app/services/rest/document.service.spec.ts
index 711aab743..03375e367 100644
--- a/src-ui/src/app/services/rest/document.service.spec.ts
+++ b/src-ui/src/app/services/rest/document.service.spec.ts
@@ -10,7 +10,7 @@ import {
DOCUMENT_SORT_FIELDS,
DOCUMENT_SORT_FIELDS_FULLTEXT,
} from 'src/app/data/document'
-import { FILTER_TITLE } from 'src/app/data/filter-rule-type'
+import { FILTER_SIMPLE_TITLE } from 'src/app/data/filter-rule-type'
import { SETTINGS_KEYS } from 'src/app/data/ui-settings'
import { environment } from 'src/environments/environment'
import { PermissionsService } from '../permissions.service'
@@ -138,13 +138,13 @@ describe(`DocumentService`, () => {
subscription = service
.listAllFilteredIds([
{
- rule_type: FILTER_TITLE,
+ rule_type: FILTER_SIMPLE_TITLE,
value: 'apple',
},
])
.subscribe()
const req = httpTestingController.expectOne(
- `${environment.apiBaseUrl}${endpoint}/?page=1&page_size=100000&fields=id&title__icontains=apple`
+ `${environment.apiBaseUrl}${endpoint}/?page=1&page_size=100000&fields=id&title_search=apple`
)
expect(req.request.method).toEqual('GET')
})
diff --git a/src-ui/src/app/utils/query-params.spec.ts b/src-ui/src/app/utils/query-params.spec.ts
index c22c90d11..7fd8f6808 100644
--- a/src-ui/src/app/utils/query-params.spec.ts
+++ b/src-ui/src/app/utils/query-params.spec.ts
@@ -8,6 +8,10 @@ import {
FILTER_HAS_CUSTOM_FIELDS_ALL,
FILTER_HAS_CUSTOM_FIELDS_ANY,
FILTER_HAS_TAGS_ALL,
+ FILTER_SIMPLE_TEXT,
+ FILTER_SIMPLE_TITLE,
+ FILTER_TITLE,
+ FILTER_TITLE_CONTENT,
NEGATIVE_NULL_FILTER_VALUE,
} from '../data/filter-rule-type'
import {
@@ -128,6 +132,26 @@ describe('QueryParams Utils', () => {
is_tagged: 0,
})
+ params = queryParamsFromFilterRules([
+ {
+ rule_type: FILTER_TITLE_CONTENT,
+ value: 'bank statement',
+ },
+ ])
+ expect(params).toEqual({
+ text: 'bank statement',
+ })
+
+ params = queryParamsFromFilterRules([
+ {
+ rule_type: FILTER_TITLE,
+ value: 'invoice',
+ },
+ ])
+ expect(params).toEqual({
+ title_search: 'invoice',
+ })
+
params = queryParamsFromFilterRules([
{
rule_type: FILTER_HAS_TAGS_ALL,
@@ -148,6 +172,30 @@ describe('QueryParams Utils', () => {
it('should convert filter rules to query params', () => {
let rules = filterRulesFromQueryParams(
+ convertToParamMap({
+ text: 'bank statement',
+ })
+ )
+ expect(rules).toEqual([
+ {
+ rule_type: FILTER_SIMPLE_TEXT,
+ value: 'bank statement',
+ },
+ ])
+
+ rules = filterRulesFromQueryParams(
+ convertToParamMap({
+ title_search: 'invoice',
+ })
+ )
+ expect(rules).toEqual([
+ {
+ rule_type: FILTER_SIMPLE_TITLE,
+ value: 'invoice',
+ },
+ ])
+
+ rules = filterRulesFromQueryParams(
convertToParamMap({
tags__id__all,
})
diff --git a/src-ui/src/app/utils/query-params.ts b/src-ui/src/app/utils/query-params.ts
index 27716cc2d..be33ba724 100644
--- a/src-ui/src/app/utils/query-params.ts
+++ b/src-ui/src/app/utils/query-params.ts
@@ -9,8 +9,14 @@ import {
FILTER_HAS_CUSTOM_FIELDS_ALL,
FILTER_HAS_CUSTOM_FIELDS_ANY,
FILTER_RULE_TYPES,
+ FILTER_SIMPLE_TEXT,
+ FILTER_SIMPLE_TITLE,
+ FILTER_TITLE,
+ FILTER_TITLE_CONTENT,
FilterRuleType,
NEGATIVE_NULL_FILTER_VALUE,
+ SIMPLE_TEXT_PARAMETER,
+ SIMPLE_TITLE_PARAMETER,
} from '../data/filter-rule-type'
import { ListViewState } from '../services/document-list-view.service'
@@ -97,6 +103,8 @@ export function transformLegacyFilterRules(
export function filterRulesFromQueryParams(
queryParams: ParamMap
): FilterRule[] {
+ let filterRulesFromQueryParams: FilterRule[] = []
+
const allFilterRuleQueryParams: string[] = FILTER_RULE_TYPES.map(
(rt) => rt.filtervar
)
@@ -104,7 +112,6 @@ export function filterRulesFromQueryParams(
.filter((rt) => rt !== undefined)
// transform query params to filter rules
- let filterRulesFromQueryParams: FilterRule[] = []
allFilterRuleQueryParams
.filter((frqp) => queryParams.has(frqp))
.forEach((filterQueryParamName) => {
@@ -146,7 +153,17 @@ export function queryParamsFromFilterRules(filterRules: FilterRule[]): Params {
let params = {}
for (let rule of filterRules) {
let ruleType = FILTER_RULE_TYPES.find((t) => t.id == rule.rule_type)
- if (ruleType.isnull_filtervar && rule.value == null) {
+ if (
+ rule.rule_type === FILTER_TITLE_CONTENT ||
+ rule.rule_type === FILTER_SIMPLE_TEXT
+ ) {
+ params[SIMPLE_TEXT_PARAMETER] = rule.value
+ } else if (
+ rule.rule_type === FILTER_TITLE ||
+ rule.rule_type === FILTER_SIMPLE_TITLE
+ ) {
+ params[SIMPLE_TITLE_PARAMETER] = rule.value
+ } else if (ruleType.isnull_filtervar && rule.value == null) {
params[ruleType.isnull_filtervar] = 1
} else if (
ruleType.isnull_filtervar &&
diff --git a/src/documents/consumer.py b/src/documents/consumer.py
index 424e22ce2..8035f3857 100644
--- a/src/documents/consumer.py
+++ b/src/documents/consumer.py
@@ -213,14 +213,12 @@ class ConsumerPluginMixin:
message,
current_progress,
max_progress,
- extra_args={
- "document_id": document_id,
- "owner_id": self.metadata.owner_id if self.metadata.owner_id else None,
- "users_can_view": (self.metadata.view_users or [])
- + (self.metadata.change_users or []),
- "groups_can_view": (self.metadata.view_groups or [])
- + (self.metadata.change_groups or []),
- },
+ document_id=document_id,
+ owner_id=self.metadata.owner_id if self.metadata.owner_id else None,
+ users_can_view=(self.metadata.view_users or [])
+ + (self.metadata.change_users or []),
+ groups_can_view=(self.metadata.view_groups or [])
+ + (self.metadata.change_groups or []),
)
def _fail(
diff --git a/src/documents/filters.py b/src/documents/filters.py
index 2f7de1cd4..b2b226ee1 100644
--- a/src/documents/filters.py
+++ b/src/documents/filters.py
@@ -3,6 +3,7 @@ from __future__ import annotations
import functools
import inspect
import json
+import logging
import operator
from contextlib import contextmanager
from typing import TYPE_CHECKING
@@ -77,6 +78,8 @@ DATETIME_KWARGS = [
CUSTOM_FIELD_QUERY_MAX_DEPTH = 10
CUSTOM_FIELD_QUERY_MAX_ATOMS = 20
+logger = logging.getLogger("paperless.api")
+
class CorrespondentFilterSet(FilterSet):
class Meta:
@@ -162,9 +165,13 @@ class InboxFilter(Filter):
@extend_schema_field(serializers.CharField)
class TitleContentFilter(Filter):
+ # Deprecated but retained for existing saved views. UI uses Tantivy-backed `text` / `title_search` params.
def filter(self, qs: Any, value: Any) -> Any:
value = value.strip() if isinstance(value, str) else value
if value:
+ logger.warning(
+ "Deprecated document filter parameter 'title_content' used; use `text` instead.",
+ )
try:
return qs.filter(
Q(title__icontains=value) | Q(effective_content__icontains=value),
@@ -243,6 +250,9 @@ class CustomFieldsFilter(Filter):
def filter(self, qs, value):
value = value.strip() if isinstance(value, str) else value
if value:
+ logger.warning(
+ "Deprecated document filter parameter 'custom_fields__icontains' used; use `custom_field_query` or advanced Tantivy field syntax instead.",
+ )
fields_with_matching_selects = CustomField.objects.filter(
extra_data__icontains=value,
)
@@ -747,6 +757,7 @@ class DocumentFilterSet(FilterSet):
is_in_inbox = InboxFilter()
+ # Deprecated, but keep for now for existing saved views
title_content = TitleContentFilter()
content__istartswith = EffectiveContentFilter(lookup_expr="istartswith")
@@ -756,6 +767,7 @@ class DocumentFilterSet(FilterSet):
owner__id__none = ObjectFilter(field_name="owner", exclude=True)
+ # Deprecated, UI no longer includes CF text-search mode, but keep for now for existing saved views
custom_fields__icontains = CustomFieldsFilter()
custom_fields__id__all = ObjectFilter(field_name="custom_fields__field")
diff --git a/src/documents/management/commands/document_exporter.py b/src/documents/management/commands/document_exporter.py
index ee3b44e0c..562a2ca8d 100644
--- a/src/documents/management/commands/document_exporter.py
+++ b/src/documents/management/commands/document_exporter.py
@@ -45,6 +45,8 @@ from documents.models import DocumentType
from documents.models import Note
from documents.models import SavedView
from documents.models import SavedViewFilterRule
+from documents.models import ShareLink
+from documents.models import ShareLinkBundle
from documents.models import StoragePath
from documents.models import Tag
from documents.models import UiSettings
@@ -55,6 +57,7 @@ from documents.models import WorkflowActionWebhook
from documents.models import WorkflowTrigger
from documents.settings import EXPORTER_ARCHIVE_NAME
from documents.settings import EXPORTER_FILE_NAME
+from documents.settings import EXPORTER_SHARE_LINK_BUNDLE_NAME
from documents.settings import EXPORTER_THUMBNAIL_NAME
from documents.utils import compute_checksum
from documents.utils import copy_file_with_basic_stats
@@ -389,6 +392,8 @@ class Command(CryptMixin, PaperlessCommand):
"app_configs": ApplicationConfiguration.objects.all(),
"notes": Note.global_objects.all(),
"documents": Document.global_objects.order_by("id").all(),
+ "share_links": ShareLink.global_objects.all(),
+ "share_link_bundles": ShareLinkBundle.objects.order_by("id").all(),
"social_accounts": SocialAccount.objects.all(),
"social_apps": SocialApp.objects.all(),
"social_tokens": SocialToken.objects.all(),
@@ -409,6 +414,7 @@ class Command(CryptMixin, PaperlessCommand):
)
document_manifest: list[dict] = []
+ share_link_bundle_manifest: list[dict] = []
manifest_path = (self.target / "manifest.json").resolve()
with StreamingManifestWriter(
@@ -427,6 +433,15 @@ class Command(CryptMixin, PaperlessCommand):
for record in batch:
self._encrypt_record_inline(record)
document_manifest.extend(batch)
+ elif key == "share_link_bundles":
+ # Accumulate for file-copy loop; written to manifest after
+ for batch in serialize_queryset_batched(
+ qs,
+ batch_size=self.batch_size,
+ ):
+ for record in batch:
+ self._encrypt_record_inline(record)
+ share_link_bundle_manifest.extend(batch)
elif self.split_manifest and key in (
"notes",
"custom_field_instances",
@@ -445,6 +460,12 @@ class Command(CryptMixin, PaperlessCommand):
document_map: dict[int, Document] = {
d.pk: d for d in Document.global_objects.order_by("id")
}
+ share_link_bundle_map: dict[int, ShareLinkBundle] = {
+ b.pk: b
+ for b in ShareLinkBundle.objects.order_by("id").prefetch_related(
+ "documents",
+ )
+ }
# 3. Export files from each document
for index, document_dict in enumerate(
@@ -478,6 +499,19 @@ class Command(CryptMixin, PaperlessCommand):
else:
writer.write_record(document_dict)
+ for bundle_dict in share_link_bundle_manifest:
+ bundle = share_link_bundle_map[bundle_dict["pk"]]
+
+ bundle_target = self.generate_share_link_bundle_target(
+ bundle,
+ bundle_dict,
+ )
+
+ if not self.data_only and bundle_target is not None:
+ self.copy_share_link_bundle_file(bundle, bundle_target)
+
+ writer.write_record(bundle_dict)
+
# 4.2 write version information to target folder
extra_metadata_path = (self.target / "metadata.json").resolve()
metadata: dict[str, str | int | dict[str, str | int]] = {
@@ -598,6 +632,48 @@ class Command(CryptMixin, PaperlessCommand):
archive_target,
)
+ def generate_share_link_bundle_target(
+ self,
+ bundle: ShareLinkBundle,
+ bundle_dict: dict,
+ ) -> Path | None:
+ """
+ Generates the export target for a share link bundle file, when present.
+ """
+ if not bundle.file_path:
+ return None
+
+ stored_bundle_path = Path(bundle.file_path)
+ portable_bundle_path = (
+ stored_bundle_path
+ if not stored_bundle_path.is_absolute()
+ else Path(stored_bundle_path.name)
+ )
+ export_bundle_path = Path("share_link_bundles") / portable_bundle_path
+
+ bundle_dict["fields"]["file_path"] = portable_bundle_path.as_posix()
+ bundle_dict[EXPORTER_SHARE_LINK_BUNDLE_NAME] = export_bundle_path.as_posix()
+
+ return (self.target / export_bundle_path).resolve()
+
+ def copy_share_link_bundle_file(
+ self,
+ bundle: ShareLinkBundle,
+ bundle_target: Path,
+ ) -> None:
+ """
+ Copies a share link bundle ZIP into the export directory.
+ """
+ bundle_source_path = bundle.absolute_file_path
+ if bundle_source_path is None:
+ raise FileNotFoundError(f"Share link bundle {bundle.pk} has no file path")
+
+ self.check_and_copy(
+ bundle_source_path,
+ None,
+ bundle_target,
+ )
+
def _encrypt_record_inline(self, record: dict) -> None:
"""Encrypt sensitive fields in a single record, if passphrase is set."""
if not self.passphrase:
diff --git a/src/documents/management/commands/document_importer.py b/src/documents/management/commands/document_importer.py
index 4572b4617..becdf7b76 100644
--- a/src/documents/management/commands/document_importer.py
+++ b/src/documents/management/commands/document_importer.py
@@ -32,10 +32,12 @@ from documents.models import CustomFieldInstance
from documents.models import Document
from documents.models import DocumentType
from documents.models import Note
+from documents.models import ShareLinkBundle
from documents.models import Tag
from documents.settings import EXPORTER_ARCHIVE_NAME
from documents.settings import EXPORTER_CRYPTO_SETTINGS_NAME
from documents.settings import EXPORTER_FILE_NAME
+from documents.settings import EXPORTER_SHARE_LINK_BUNDLE_NAME
from documents.settings import EXPORTER_THUMBNAIL_NAME
from documents.signals.handlers import check_paths_and_prune_custom_fields
from documents.signals.handlers import update_filename_and_move_files
@@ -348,18 +350,42 @@ class Command(CryptMixin, PaperlessCommand):
f"Failed to read from archive file {doc_archive_path}",
) from e
+ def check_share_link_bundle_validity(bundle_record: dict) -> None:
+ if EXPORTER_SHARE_LINK_BUNDLE_NAME not in bundle_record:
+ return
+
+ bundle_file = bundle_record[EXPORTER_SHARE_LINK_BUNDLE_NAME]
+ bundle_path: Path = self.source / bundle_file
+ if not bundle_path.exists():
+ raise CommandError(
+ f'The manifest file refers to "{bundle_file}" which does not '
+ "appear to be in the source directory.",
+ )
+ try:
+ with bundle_path.open(mode="rb"):
+ pass
+ except Exception as e:
+ raise CommandError(
+ f"Failed to read from share link bundle file {bundle_path}",
+ ) from e
+
self.stdout.write("Checking the manifest")
for manifest_path in self.manifest_paths:
for record in iter_manifest_records(manifest_path):
# Only check if the document files exist if this is not data only
# We don't care about documents for a data only import
- if not self.data_only and record["model"] == "documents.document":
+ if self.data_only:
+ continue
+ if record["model"] == "documents.document":
check_document_validity(record)
+ elif record["model"] == "documents.sharelinkbundle":
+ check_share_link_bundle_validity(record)
def _import_files_from_manifest(self) -> None:
settings.ORIGINALS_DIR.mkdir(parents=True, exist_ok=True)
settings.THUMBNAIL_DIR.mkdir(parents=True, exist_ok=True)
settings.ARCHIVE_DIR.mkdir(parents=True, exist_ok=True)
+ settings.SHARE_LINK_BUNDLE_DIR.mkdir(parents=True, exist_ok=True)
self.stdout.write("Copy files into paperless...")
@@ -374,6 +400,18 @@ class Command(CryptMixin, PaperlessCommand):
for record in iter_manifest_records(manifest_path)
if record["model"] == "documents.document"
]
+ share_link_bundle_records = [
+ {
+ "pk": record["pk"],
+ EXPORTER_SHARE_LINK_BUNDLE_NAME: record.get(
+ EXPORTER_SHARE_LINK_BUNDLE_NAME,
+ ),
+ }
+ for manifest_path in self.manifest_paths
+ for record in iter_manifest_records(manifest_path)
+ if record["model"] == "documents.sharelinkbundle"
+ and record.get(EXPORTER_SHARE_LINK_BUNDLE_NAME)
+ ]
for record in self.track(document_records, description="Copying files..."):
document = Document.global_objects.get(pk=record["pk"])
@@ -416,6 +454,26 @@ class Command(CryptMixin, PaperlessCommand):
document.save()
+ for record in self.track(
+ share_link_bundle_records,
+ description="Copying share link bundles...",
+ ):
+ bundle = ShareLinkBundle.objects.get(pk=record["pk"])
+ bundle_file = record[EXPORTER_SHARE_LINK_BUNDLE_NAME]
+ bundle_source_path = (self.source / bundle_file).resolve()
+ bundle_target_path = bundle.absolute_file_path
+ if bundle_target_path is None:
+ raise CommandError(
+ f"Share link bundle {bundle.pk} does not have a valid file path.",
+ )
+
+ with FileLock(settings.MEDIA_LOCK):
+ bundle_target_path.parent.mkdir(parents=True, exist_ok=True)
+ copy_file_with_basic_stats(
+ bundle_source_path,
+ bundle_target_path,
+ )
+
def _decrypt_record_if_needed(self, record: dict) -> dict:
fields = self.CRYPT_FIELDS_BY_MODEL.get(record.get("model", ""))
if fields:
diff --git a/src/documents/migrations/0018_saved_view_simple_search_rules.py b/src/documents/migrations/0018_saved_view_simple_search_rules.py
new file mode 100644
index 000000000..6d128c593
--- /dev/null
+++ b/src/documents/migrations/0018_saved_view_simple_search_rules.py
@@ -0,0 +1,92 @@
+# Generated by Django 5.2.12 on 2026-04-01 18:20
+
+from django.db import migrations
+from django.db import models
+
+OLD_TITLE_RULE = 0
+OLD_TITLE_CONTENT_RULE = 19
+NEW_SIMPLE_TITLE_RULE = 48
+NEW_SIMPLE_TEXT_RULE = 49
+
+
+# See documents/models.py SavedViewFilterRule
+def migrate_saved_view_rules_forward(apps, schema_editor):
+ SavedViewFilterRule = apps.get_model("documents", "SavedViewFilterRule")
+ SavedViewFilterRule.objects.filter(rule_type=OLD_TITLE_RULE).update(
+ rule_type=NEW_SIMPLE_TITLE_RULE,
+ )
+ SavedViewFilterRule.objects.filter(rule_type=OLD_TITLE_CONTENT_RULE).update(
+ rule_type=NEW_SIMPLE_TEXT_RULE,
+ )
+
+
+class Migration(migrations.Migration):
+ dependencies = [
+ ("documents", "0017_migrate_fulltext_query_field_prefixes"),
+ ]
+
+ operations = [
+ migrations.AlterField(
+ model_name="savedviewfilterrule",
+ name="rule_type",
+ field=models.PositiveSmallIntegerField(
+ choices=[
+ (0, "title contains"),
+ (1, "content contains"),
+ (2, "ASN is"),
+ (3, "correspondent is"),
+ (4, "document type is"),
+ (5, "is in inbox"),
+ (6, "has tag"),
+ (7, "has any tag"),
+ (8, "created before"),
+ (9, "created after"),
+ (10, "created year is"),
+ (11, "created month is"),
+ (12, "created day is"),
+ (13, "added before"),
+ (14, "added after"),
+ (15, "modified before"),
+ (16, "modified after"),
+ (17, "does not have tag"),
+ (18, "does not have ASN"),
+ (19, "title or content contains"),
+ (20, "fulltext query"),
+ (21, "more like this"),
+ (22, "has tags in"),
+ (23, "ASN greater than"),
+ (24, "ASN less than"),
+ (25, "storage path is"),
+ (26, "has correspondent in"),
+ (27, "does not have correspondent in"),
+ (28, "has document type in"),
+ (29, "does not have document type in"),
+ (30, "has storage path in"),
+ (31, "does not have storage path in"),
+ (32, "owner is"),
+ (33, "has owner in"),
+ (34, "does not have owner"),
+ (35, "does not have owner in"),
+ (36, "has custom field value"),
+ (37, "is shared by me"),
+ (38, "has custom fields"),
+ (39, "has custom field in"),
+ (40, "does not have custom field in"),
+ (41, "does not have custom field"),
+ (42, "custom fields query"),
+ (43, "created to"),
+ (44, "created from"),
+ (45, "added to"),
+ (46, "added from"),
+ (47, "mime type is"),
+ (48, "simple title search"),
+ (49, "simple text search"),
+ ],
+ verbose_name="rule type",
+ ),
+ ),
+ migrations.RunPython(
+ migrate_saved_view_rules_forward,
+ migrations.RunPython.noop,
+ ),
+ ]
diff --git a/src/documents/models.py b/src/documents/models.py
index 96f027b94..9af5fbc23 100644
--- a/src/documents/models.py
+++ b/src/documents/models.py
@@ -623,6 +623,8 @@ class SavedViewFilterRule(models.Model):
(45, _("added to")),
(46, _("added from")),
(47, _("mime type is")),
+ (48, _("simple title search")),
+ (49, _("simple text search")),
]
saved_view = models.ForeignKey(
diff --git a/src/documents/plugins/helpers.py b/src/documents/plugins/helpers.py
index e5cfde3b8..e30591125 100644
--- a/src/documents/plugins/helpers.py
+++ b/src/documents/plugins/helpers.py
@@ -1,6 +1,9 @@
import enum
-from collections.abc import Mapping
from typing import TYPE_CHECKING
+from typing import Literal
+from typing import Self
+from typing import TypeAlias
+from typing import TypedDict
from asgiref.sync import async_to_sync
from channels.layers import get_channel_layer
@@ -16,6 +19,59 @@ class ProgressStatusOptions(enum.StrEnum):
FAILED = "FAILED"
+class PermissionsData(TypedDict, total=False):
+ """Permission fields included in status messages for access control."""
+
+ owner_id: int | None
+ users_can_view: list[int]
+ groups_can_view: list[int]
+
+
+class ProgressUpdateData(TypedDict):
+ filename: str | None
+ task_id: str | None
+ current_progress: int
+ max_progress: int
+ status: str
+ message: str
+ document_id: int | None
+ owner_id: int | None
+ users_can_view: list[int]
+ groups_can_view: list[int]
+
+
+class StatusUpdatePayload(TypedDict):
+ type: Literal["status_update"]
+ data: ProgressUpdateData
+
+
+class DocumentsDeletedData(TypedDict):
+ documents: list[int]
+
+
+class DocumentsDeletedPayload(TypedDict):
+ type: Literal["documents_deleted"]
+ data: DocumentsDeletedData
+
+
+class DocumentUpdatedData(TypedDict):
+ document_id: int
+ modified: str
+ owner_id: int | None
+ users_can_view: list[int]
+ groups_can_view: list[int]
+
+
+class DocumentUpdatedPayload(TypedDict):
+ type: Literal["document_updated"]
+ data: DocumentUpdatedData
+
+
+WebsocketPayload: TypeAlias = (
+ StatusUpdatePayload | DocumentsDeletedPayload | DocumentUpdatedPayload
+)
+
+
class BaseStatusManager:
"""
Handles sending of progress information via the channel layer, with proper management
@@ -25,11 +81,11 @@ class BaseStatusManager:
def __init__(self) -> None:
self._channel: RedisPubSubChannelLayer | None = None
- def __enter__(self):
+ def __enter__(self) -> Self:
self.open()
return self
- def __exit__(self, exc_type, exc_val, exc_tb):
+ def __exit__(self, exc_type: object, exc_val: object, exc_tb: object) -> None:
self.close()
def open(self) -> None:
@@ -48,7 +104,7 @@ class BaseStatusManager:
async_to_sync(self._channel.flush)
self._channel = None
- def send(self, payload: Mapping[str, object]) -> None:
+ def send(self, payload: WebsocketPayload) -> None:
# Ensure the layer is open
self.open()
@@ -72,36 +128,36 @@ class ProgressManager(BaseStatusManager):
message: str,
current_progress: int,
max_progress: int,
- extra_args: dict[str, str | int | None] | None = None,
+ *,
+ document_id: int | None = None,
+ owner_id: int | None = None,
+ users_can_view: list[int] | None = None,
+ groups_can_view: list[int] | None = None,
) -> None:
- data: dict[str, object] = {
+ data: ProgressUpdateData = {
"filename": self.filename,
"task_id": self.task_id,
"current_progress": current_progress,
"max_progress": max_progress,
"status": status,
"message": message,
+ "document_id": document_id,
+ "owner_id": owner_id,
+ "users_can_view": users_can_view or [],
+ "groups_can_view": groups_can_view or [],
}
- if extra_args is not None:
- data.update(extra_args)
-
- payload: dict[str, object] = {
- "type": "status_update",
- "data": data,
- }
-
+ payload: StatusUpdatePayload = {"type": "status_update", "data": data}
self.send(payload)
class DocumentsStatusManager(BaseStatusManager):
def send_documents_deleted(self, documents: list[int]) -> None:
- payload: dict[str, object] = {
+ payload: DocumentsDeletedPayload = {
"type": "documents_deleted",
"data": {
"documents": documents,
},
}
-
self.send(payload)
def send_document_updated(
@@ -113,7 +169,7 @@ class DocumentsStatusManager(BaseStatusManager):
users_can_view: list[int] | None = None,
groups_can_view: list[int] | None = None,
) -> None:
- payload: dict[str, object] = {
+ payload: DocumentUpdatedPayload = {
"type": "document_updated",
"data": {
"document_id": document_id,
@@ -123,5 +179,4 @@ class DocumentsStatusManager(BaseStatusManager):
"groups_can_view": groups_can_view or [],
},
}
-
self.send(payload)
diff --git a/src/documents/search/__init__.py b/src/documents/search/__init__.py
index b0a89f242..a4145d7ef 100644
--- a/src/documents/search/__init__.py
+++ b/src/documents/search/__init__.py
@@ -1,4 +1,5 @@
from documents.search._backend import SearchIndexLockError
+from documents.search._backend import SearchMode
from documents.search._backend import SearchResults
from documents.search._backend import TantivyBackend
from documents.search._backend import TantivyRelevanceList
@@ -10,6 +11,7 @@ from documents.search._schema import wipe_index
__all__ = [
"SearchIndexLockError",
+ "SearchMode",
"SearchResults",
"TantivyBackend",
"TantivyRelevanceList",
diff --git a/src/documents/search/_backend.py b/src/documents/search/_backend.py
index a1bff8a9f..2005a436f 100644
--- a/src/documents/search/_backend.py
+++ b/src/documents/search/_backend.py
@@ -2,11 +2,11 @@ from __future__ import annotations
import logging
import threading
-import unicodedata
from collections import Counter
from dataclasses import dataclass
from datetime import UTC
from datetime import datetime
+from enum import StrEnum
from typing import TYPE_CHECKING
from typing import Self
from typing import TypedDict
@@ -19,7 +19,10 @@ from django.conf import settings
from django.utils.timezone import get_current_timezone
from guardian.shortcuts import get_users_with_perms
+from documents.search._normalize import ascii_fold
from documents.search._query import build_permission_filter
+from documents.search._query import parse_simple_text_query
+from documents.search._query import parse_simple_title_query
from documents.search._query import parse_user_query
from documents.search._schema import _write_sentinels
from documents.search._schema import build_schema
@@ -45,14 +48,10 @@ _AUTOCOMPLETE_REGEX_TIMEOUT = 1.0 # seconds; guards against ReDoS on untrusted
T = TypeVar("T")
-def _ascii_fold(s: str) -> str:
- """
- Normalize unicode to ASCII equivalent characters for search consistency.
-
- Converts accented characters (e.g., "café") to their ASCII base forms ("cafe")
- to enable cross-language searching without requiring exact diacritic matching.
- """
- return unicodedata.normalize("NFD", s).encode("ascii", "ignore").decode()
+class SearchMode(StrEnum):
+ QUERY = "query"
+ TEXT = "text"
+ TITLE = "title"
def _extract_autocomplete_words(text_sources: list[str]) -> set[str]:
@@ -74,7 +73,7 @@ def _extract_autocomplete_words(text_sources: list[str]) -> set[str]:
)
continue
for token in tokens:
- normalized = _ascii_fold(token.lower())
+ normalized = ascii_fold(token.lower())
if normalized:
words.add(normalized)
return words
@@ -294,8 +293,10 @@ class TantivyBackend:
doc.add_text("checksum", document.checksum)
doc.add_text("title", document.title)
doc.add_text("title_sort", document.title)
+ doc.add_text("simple_title", document.title)
doc.add_text("content", content)
doc.add_text("bigram_content", content)
+ doc.add_text("simple_content", content)
# Original filename - only add if not None/empty
if document.original_filename:
@@ -433,6 +434,7 @@ class TantivyBackend:
sort_field: str | None,
*,
sort_reverse: bool,
+ search_mode: SearchMode = SearchMode.QUERY,
) -> SearchResults:
"""
Execute a search query against the document index.
@@ -441,20 +443,32 @@ class TantivyBackend:
permission filtering before executing against Tantivy. Supports both
relevance-based and field-based sorting.
+ QUERY search mode supports natural date keywords, field filters, etc.
+ TITLE search mode treats the query as plain text to search for in title only
+ TEXT search mode treats the query as plain text to search for in title and content
+
Args:
- query: User's search query (supports natural date keywords, field filters)
+ query: User's search query
user: User for permission filtering (None for superuser/no filtering)
page: Page number (1-indexed) for pagination
page_size: Number of results per page
sort_field: Field to sort by (None for relevance ranking)
sort_reverse: Whether to reverse the sort order
+ search_mode: "query" for advanced Tantivy syntax, "text" for
+ plain-text search over title and content only, "title" for
+ plain-text search over title only
Returns:
SearchResults with hits, total count, and processed query
"""
self._ensure_open()
tz = get_current_timezone()
- user_query = parse_user_query(self._index, query, tz)
+ if search_mode is SearchMode.TEXT:
+ user_query = parse_simple_text_query(self._index, query)
+ elif search_mode is SearchMode.TITLE:
+ user_query = parse_simple_title_query(self._index, query)
+ else:
+ user_query = parse_user_query(self._index, query, tz)
# Apply permission filter if user is not None (not superuser)
if user is not None:
@@ -518,6 +532,7 @@ class TantivyBackend:
# Build result hits with highlights
hits: list[SearchHit] = []
snippet_generator = None
+ notes_snippet_generator = None
for rank, (doc_address, score) in enumerate(page_hits, start=offset + 1):
# Get the actual document from the searcher using the doc address
@@ -544,13 +559,16 @@ class TantivyBackend:
# Try notes highlights
if "notes" in doc_dict:
- notes_generator = tantivy.SnippetGenerator.create(
- searcher,
- final_query,
- self._schema,
- "notes",
+ if notes_snippet_generator is None:
+ notes_snippet_generator = tantivy.SnippetGenerator.create(
+ searcher,
+ final_query,
+ self._schema,
+ "notes",
+ )
+ notes_snippet = notes_snippet_generator.snippet_from_doc(
+ actual_doc,
)
- notes_snippet = notes_generator.snippet_from_doc(actual_doc)
if notes_snippet:
highlights["notes"] = str(notes_snippet)
@@ -594,7 +612,7 @@ class TantivyBackend:
List of word suggestions ordered by frequency, then alphabetically
"""
self._ensure_open()
- normalized_term = _ascii_fold(term.lower())
+ normalized_term = ascii_fold(term.lower())
searcher = self._index.searcher()
diff --git a/src/documents/search/_normalize.py b/src/documents/search/_normalize.py
new file mode 100644
index 000000000..3d7b23f33
--- /dev/null
+++ b/src/documents/search/_normalize.py
@@ -0,0 +1,8 @@
+from __future__ import annotations
+
+import unicodedata
+
+
+def ascii_fold(text: str) -> str:
+ """Normalize unicode text to ASCII equivalents for search consistency."""
+ return unicodedata.normalize("NFD", text).encode("ascii", "ignore").decode()
diff --git a/src/documents/search/_query.py b/src/documents/search/_query.py
index 212df1516..b7bcbbe9c 100644
--- a/src/documents/search/_query.py
+++ b/src/documents/search/_query.py
@@ -12,6 +12,8 @@ import tantivy
from dateutil.relativedelta import relativedelta
from django.conf import settings
+from documents.search._normalize import ascii_fold
+
if TYPE_CHECKING:
from datetime import tzinfo
@@ -51,6 +53,7 @@ _WHOOSH_REL_RANGE_RE = regex.compile(
)
# Whoosh-style 8-digit date: field:YYYYMMDD — field-aware so timezone can be applied correctly
_DATE8_RE = regex.compile(r"(?P\w+):(?P\d{8})\b")
+_SIMPLE_QUERY_TOKEN_RE = regex.compile(r"\S+")
def _fmt(dt: datetime) -> str:
@@ -436,7 +439,37 @@ DEFAULT_SEARCH_FIELDS = [
"document_type",
"tag",
]
+SIMPLE_SEARCH_FIELDS = ["simple_title", "simple_content"]
+TITLE_SEARCH_FIELDS = ["simple_title"]
_FIELD_BOOSTS = {"title": 2.0}
+_SIMPLE_FIELD_BOOSTS = {"simple_title": 2.0}
+
+
+def _build_simple_field_query(
+ index: tantivy.Index,
+ field: str,
+ tokens: list[str],
+) -> tantivy.Query:
+ patterns = []
+ for idx, token in enumerate(tokens):
+ escaped = regex.escape(token)
+ # For multi-token substring search, only the first token can begin mid-word.
+ # Later tokens follow a whitespace boundary in the original query, so anchor
+ # them to the start of the next indexed token to reduce false positives like
+ # matching "Z-Berichte 16" for the query "Z-Berichte 6".
+ if idx == 0:
+ patterns.append(f".*{escaped}.*")
+ else:
+ patterns.append(f"{escaped}.*")
+ if len(patterns) == 1:
+ query = tantivy.Query.regex_query(index.schema, field, patterns[0])
+ else:
+ query = tantivy.Query.regex_phrase_query(index.schema, field, patterns)
+
+ boost = _SIMPLE_FIELD_BOOSTS.get(field, 1.0)
+ if boost > 1.0:
+ return tantivy.Query.boost_query(query, boost)
+ return query
def parse_user_query(
@@ -495,3 +528,52 @@ def parse_user_query(
)
return exact
+
+
+def parse_simple_query(
+ index: tantivy.Index,
+ raw_query: str,
+ fields: list[str],
+) -> tantivy.Query:
+ """
+ Parse a plain-text query using Tantivy over a restricted field set.
+
+ Query string is escaped and normalized to be treated as "simple" text query.
+ """
+ tokens = [
+ ascii_fold(token.lower())
+ for token in _SIMPLE_QUERY_TOKEN_RE.findall(raw_query, timeout=_REGEX_TIMEOUT)
+ ]
+ tokens = [token for token in tokens if token]
+ if not tokens:
+ return tantivy.Query.empty_query()
+
+ field_queries = [
+ (tantivy.Occur.Should, _build_simple_field_query(index, field, tokens))
+ for field in fields
+ ]
+ if len(field_queries) == 1:
+ return field_queries[0][1]
+ return tantivy.Query.boolean_query(field_queries)
+
+
+def parse_simple_text_query(
+ index: tantivy.Index,
+ raw_query: str,
+) -> tantivy.Query:
+ """
+ Parse a plain-text query over title/content for simple search inputs.
+ """
+
+ return parse_simple_query(index, raw_query, SIMPLE_SEARCH_FIELDS)
+
+
+def parse_simple_title_query(
+ index: tantivy.Index,
+ raw_query: str,
+) -> tantivy.Query:
+ """
+ Parse a plain-text query over the title field only.
+ """
+
+ return parse_simple_query(index, raw_query, TITLE_SEARCH_FIELDS)
diff --git a/src/documents/search/_schema.py b/src/documents/search/_schema.py
index ba6646007..5e9404235 100644
--- a/src/documents/search/_schema.py
+++ b/src/documents/search/_schema.py
@@ -53,6 +53,18 @@ def build_schema() -> tantivy.Schema:
# CJK support - not stored, indexed only
sb.add_text_field("bigram_content", stored=False, tokenizer_name="bigram_analyzer")
+ # Simple substring search support for title/content - not stored, indexed only
+ sb.add_text_field(
+ "simple_title",
+ stored=False,
+ tokenizer_name="simple_search_analyzer",
+ )
+ sb.add_text_field(
+ "simple_content",
+ stored=False,
+ tokenizer_name="simple_search_analyzer",
+ )
+
# Autocomplete prefix scan - stored, not indexed
sb.add_text_field("autocomplete_word", stored=True, tokenizer_name="raw")
diff --git a/src/documents/search/_tokenizer.py b/src/documents/search/_tokenizer.py
index e597a879e..2079ca4cc 100644
--- a/src/documents/search/_tokenizer.py
+++ b/src/documents/search/_tokenizer.py
@@ -70,6 +70,7 @@ def register_tokenizers(index: tantivy.Index, language: str | None) -> None:
index.register_tokenizer("paperless_text", _paperless_text(language))
index.register_tokenizer("simple_analyzer", _simple_analyzer())
index.register_tokenizer("bigram_analyzer", _bigram_analyzer())
+ index.register_tokenizer("simple_search_analyzer", _simple_search_analyzer())
# Fast-field tokenizer required for fast=True text fields in the schema
index.register_fast_field_tokenizer("simple_analyzer", _simple_analyzer())
@@ -114,3 +115,16 @@ def _bigram_analyzer() -> tantivy.TextAnalyzer:
.filter(tantivy.Filter.lowercase())
.build()
)
+
+
+def _simple_search_analyzer() -> tantivy.TextAnalyzer:
+ """Tokenizer for simple substring search fields: non-whitespace chunks -> remove_long(65) -> lowercase -> ascii_fold."""
+ return (
+ tantivy.TextAnalyzerBuilder(
+ tantivy.Tokenizer.regex(r"\S+"),
+ )
+ .filter(tantivy.Filter.remove_long(65))
+ .filter(tantivy.Filter.lowercase())
+ .filter(tantivy.Filter.ascii_fold())
+ .build()
+ )
diff --git a/src/documents/settings.py b/src/documents/settings.py
index 9dff44c95..c4c87b8a7 100644
--- a/src/documents/settings.py
+++ b/src/documents/settings.py
@@ -3,6 +3,7 @@
EXPORTER_FILE_NAME = "__exported_file_name__"
EXPORTER_THUMBNAIL_NAME = "__exported_thumbnail_name__"
EXPORTER_ARCHIVE_NAME = "__exported_archive_name__"
+EXPORTER_SHARE_LINK_BUNDLE_NAME = "__exported_share_link_bundle_name__"
EXPORTER_CRYPTO_SETTINGS_NAME = "__crypto__"
EXPORTER_CRYPTO_SALT_NAME = "__salt_hex__"
diff --git a/src/documents/tasks.py b/src/documents/tasks.py
index bc4ed1abe..57c819492 100644
--- a/src/documents/tasks.py
+++ b/src/documents/tasks.py
@@ -62,6 +62,7 @@ from documents.utils import compute_checksum
from documents.utils import identity
from documents.workflows.utils import get_workflows_for_trigger
from paperless.config import AIConfig
+from paperless.logging import consume_task_id
from paperless.parsers import ParserContext
from paperless.parsers.registry import get_parser_registry
from paperless_ai.indexing import llm_index_add_or_update_document
@@ -148,76 +149,85 @@ def consume_file(
input_doc: ConsumableDocument,
overrides: DocumentMetadataOverrides | None = None,
):
- # Default no overrides
- if overrides is None:
- overrides = DocumentMetadataOverrides()
+ token = consume_task_id.set((self.request.id or "")[:8])
+ try:
+ # Default no overrides
+ if overrides is None:
+ overrides = DocumentMetadataOverrides()
- plugins: list[type[ConsumeTaskPlugin]] = (
- [
- ConsumerPreflightPlugin,
- ConsumerPlugin,
- ]
- if input_doc.root_document_id is not None
- else [
- ConsumerPreflightPlugin,
- AsnCheckPlugin,
- CollatePlugin,
- BarcodePlugin,
- AsnCheckPlugin, # Re-run ASN check after barcode reading
- WorkflowTriggerPlugin,
- ConsumerPlugin,
- ]
- )
+ plugins: list[type[ConsumeTaskPlugin]] = (
+ [
+ ConsumerPreflightPlugin,
+ ConsumerPlugin,
+ ]
+ if input_doc.root_document_id is not None
+ else [
+ ConsumerPreflightPlugin,
+ AsnCheckPlugin,
+ CollatePlugin,
+ BarcodePlugin,
+ AsnCheckPlugin, # Re-run ASN check after barcode reading
+ WorkflowTriggerPlugin,
+ ConsumerPlugin,
+ ]
+ )
- with (
- ProgressManager(
- overrides.filename or input_doc.original_file.name,
- self.request.id,
- ) as status_mgr,
- TemporaryDirectory(dir=settings.SCRATCH_DIR) as tmp_dir,
- ):
- tmp_dir = Path(tmp_dir)
- for plugin_class in plugins:
- plugin_name = plugin_class.NAME
-
- plugin = plugin_class(
- input_doc,
- overrides,
- status_mgr,
- tmp_dir,
+ with (
+ ProgressManager(
+ overrides.filename or input_doc.original_file.name,
self.request.id,
- )
+ ) as status_mgr,
+ TemporaryDirectory(dir=settings.SCRATCH_DIR) as tmp_dir,
+ ):
+ tmp_dir = Path(tmp_dir)
+ for plugin_class in plugins:
+ plugin_name = plugin_class.NAME
- if not plugin.able_to_run:
- logger.debug(f"Skipping plugin {plugin_name}")
- continue
+ plugin = plugin_class(
+ input_doc,
+ overrides,
+ status_mgr,
+ tmp_dir,
+ self.request.id,
+ )
- try:
- logger.debug(f"Executing plugin {plugin_name}")
- plugin.setup()
+ if not plugin.able_to_run:
+ logger.debug(f"Skipping plugin {plugin_name}")
+ continue
- msg = plugin.run()
+ try:
+ logger.debug(f"Executing plugin {plugin_name}")
+ plugin.setup()
- if msg is not None:
- logger.info(f"{plugin_name} completed with: {msg}")
- else:
- logger.info(f"{plugin_name} completed with no message")
+ msg = plugin.run()
- overrides = plugin.metadata
+ if msg is not None:
+ logger.info(f"{plugin_name} completed with: {msg}")
+ else:
+ logger.info(f"{plugin_name} completed with no message")
- except StopConsumeTaskError as e:
- logger.info(f"{plugin_name} requested task exit: {e.message}")
- return e.message
+ overrides = plugin.metadata
- except Exception as e:
- logger.exception(f"{plugin_name} failed: {e}")
- status_mgr.send_progress(ProgressStatusOptions.FAILED, f"{e}", 100, 100)
- raise
+ except StopConsumeTaskError as e:
+ logger.info(f"{plugin_name} requested task exit: {e.message}")
+ return e.message
- finally:
- plugin.cleanup()
+ except Exception as e:
+ logger.exception(f"{plugin_name} failed: {e}")
+ status_mgr.send_progress(
+ ProgressStatusOptions.FAILED,
+ f"{e}",
+ 100,
+ 100,
+ )
+ raise
- return msg
+ finally:
+ plugin.cleanup()
+
+ return msg
+ finally:
+ consume_task_id.reset(token)
@shared_task
diff --git a/src/documents/tests/search/test_backend.py b/src/documents/tests/search/test_backend.py
index 5c92da447..ff9638e63 100644
--- a/src/documents/tests/search/test_backend.py
+++ b/src/documents/tests/search/test_backend.py
@@ -5,6 +5,7 @@ from documents.models import CustomField
from documents.models import CustomFieldInstance
from documents.models import Document
from documents.models import Note
+from documents.search._backend import SearchMode
from documents.search._backend import TantivyBackend
from documents.search._backend import get_backend
from documents.search._backend import reset_backend
@@ -46,6 +47,258 @@ class TestWriteBatch:
class TestSearch:
"""Test search functionality."""
+ def test_text_mode_limits_default_search_to_title_and_content(
+ self,
+ backend: TantivyBackend,
+ ):
+ """Simple text mode must not match metadata-only fields."""
+ doc = Document.objects.create(
+ title="Invoice document",
+ content="monthly statement",
+ checksum="TXT1",
+ pk=9,
+ )
+ backend.add_or_update(doc)
+
+ metadata_only = backend.search(
+ "document_type:invoice",
+ user=None,
+ page=1,
+ page_size=10,
+ sort_field=None,
+ sort_reverse=False,
+ search_mode=SearchMode.TEXT,
+ )
+ assert metadata_only.total == 0
+
+ content_match = backend.search(
+ "monthly",
+ user=None,
+ page=1,
+ page_size=10,
+ sort_field=None,
+ sort_reverse=False,
+ search_mode=SearchMode.TEXT,
+ )
+ assert content_match.total == 1
+
+ def test_title_mode_limits_default_search_to_title_only(
+ self,
+ backend: TantivyBackend,
+ ):
+ """Title mode must not match content-only terms."""
+ doc = Document.objects.create(
+ title="Invoice document",
+ content="monthly statement",
+ checksum="TXT2",
+ pk=10,
+ )
+ backend.add_or_update(doc)
+
+ content_only = backend.search(
+ "monthly",
+ user=None,
+ page=1,
+ page_size=10,
+ sort_field=None,
+ sort_reverse=False,
+ search_mode=SearchMode.TITLE,
+ )
+ assert content_only.total == 0
+
+ title_match = backend.search(
+ "invoice",
+ user=None,
+ page=1,
+ page_size=10,
+ sort_field=None,
+ sort_reverse=False,
+ search_mode=SearchMode.TITLE,
+ )
+ assert title_match.total == 1
+
+ def test_text_mode_matches_partial_term_substrings(
+ self,
+ backend: TantivyBackend,
+ ):
+ """Simple text mode should support substring matching within tokens."""
+ doc = Document.objects.create(
+ title="Account access",
+ content="password reset instructions",
+ checksum="TXT3",
+ pk=11,
+ )
+ backend.add_or_update(doc)
+
+ prefix_match = backend.search(
+ "pass",
+ user=None,
+ page=1,
+ page_size=10,
+ sort_field=None,
+ sort_reverse=False,
+ search_mode=SearchMode.TEXT,
+ )
+ assert prefix_match.total == 1
+
+ infix_match = backend.search(
+ "sswo",
+ user=None,
+ page=1,
+ page_size=10,
+ sort_field=None,
+ sort_reverse=False,
+ search_mode=SearchMode.TEXT,
+ )
+ assert infix_match.total == 1
+
+ phrase_match = backend.search(
+ "sswo re",
+ user=None,
+ page=1,
+ page_size=10,
+ sort_field=None,
+ sort_reverse=False,
+ search_mode=SearchMode.TEXT,
+ )
+ assert phrase_match.total == 1
+
+ def test_text_mode_does_not_match_on_partial_term_overlap(
+ self,
+ backend: TantivyBackend,
+ ):
+ """Simple text mode should not match documents that merely share partial fragments."""
+ doc = Document.objects.create(
+ title="Adobe Acrobat PDF Files",
+ content="Lorem ipsum dolor sit amet, consectetur adipiscing elit.",
+ checksum="TXT7",
+ pk=13,
+ )
+ backend.add_or_update(doc)
+
+ non_match = backend.search(
+ "raptor",
+ user=None,
+ page=1,
+ page_size=10,
+ sort_field=None,
+ sort_reverse=False,
+ search_mode=SearchMode.TEXT,
+ )
+ assert non_match.total == 0
+
+ def test_text_mode_anchors_later_query_tokens_to_token_starts(
+ self,
+ backend: TantivyBackend,
+ ):
+ """Multi-token simple search should not match later tokens in the middle of a word."""
+ exact_doc = Document.objects.create(
+ title="Z-Berichte 6",
+ content="monthly report",
+ checksum="TXT9",
+ pk=15,
+ )
+ prefix_doc = Document.objects.create(
+ title="Z-Berichte 60",
+ content="monthly report",
+ checksum="TXT10",
+ pk=16,
+ )
+ false_positive = Document.objects.create(
+ title="Z-Berichte 16",
+ content="monthly report",
+ checksum="TXT11",
+ pk=17,
+ )
+ backend.add_or_update(exact_doc)
+ backend.add_or_update(prefix_doc)
+ backend.add_or_update(false_positive)
+
+ results = backend.search(
+ "Z-Berichte 6",
+ user=None,
+ page=1,
+ page_size=10,
+ sort_field=None,
+ sort_reverse=False,
+ search_mode=SearchMode.TEXT,
+ )
+ result_ids = {hit["id"] for hit in results.hits}
+
+ assert exact_doc.id in result_ids
+ assert prefix_doc.id in result_ids
+ assert false_positive.id not in result_ids
+
+ def test_text_mode_ignores_queries_without_searchable_tokens(
+ self,
+ backend: TantivyBackend,
+ ):
+ """Simple text mode should safely return no hits for symbol-only strings."""
+ doc = Document.objects.create(
+ title="Guide",
+ content="This is a guide.",
+ checksum="TXT8",
+ pk=14,
+ )
+ backend.add_or_update(doc)
+
+ no_tokens = backend.search(
+ "!!!",
+ user=None,
+ page=1,
+ page_size=10,
+ sort_field=None,
+ sort_reverse=False,
+ search_mode=SearchMode.TEXT,
+ )
+ assert no_tokens.total == 0
+
+ def test_title_mode_matches_partial_term_substrings(
+ self,
+ backend: TantivyBackend,
+ ):
+ """Title mode should support substring matching within title tokens."""
+ doc = Document.objects.create(
+ title="Password guide",
+ content="reset instructions",
+ checksum="TXT4",
+ pk=12,
+ )
+ backend.add_or_update(doc)
+
+ prefix_match = backend.search(
+ "pass",
+ user=None,
+ page=1,
+ page_size=10,
+ sort_field=None,
+ sort_reverse=False,
+ search_mode=SearchMode.TITLE,
+ )
+ assert prefix_match.total == 1
+
+ infix_match = backend.search(
+ "sswo",
+ user=None,
+ page=1,
+ page_size=10,
+ sort_field=None,
+ sort_reverse=False,
+ search_mode=SearchMode.TITLE,
+ )
+ assert infix_match.total == 1
+
+ phrase_match = backend.search(
+ "sswo gu",
+ user=None,
+ page=1,
+ page_size=10,
+ sort_field=None,
+ sort_reverse=False,
+ search_mode=SearchMode.TITLE,
+ )
+ assert phrase_match.total == 1
+
def test_scores_normalised_top_hit_is_one(self, backend: TantivyBackend):
"""Search scores must be normalized so top hit has score 1.0 for UI consistency."""
for i, title in enumerate(["bank invoice", "bank statement", "bank receipt"]):
diff --git a/src/documents/tests/search/test_tokenizer.py b/src/documents/tests/search/test_tokenizer.py
index aee52a567..fc2c41231 100644
--- a/src/documents/tests/search/test_tokenizer.py
+++ b/src/documents/tests/search/test_tokenizer.py
@@ -8,6 +8,7 @@ import tantivy
from documents.search._tokenizer import _bigram_analyzer
from documents.search._tokenizer import _paperless_text
+from documents.search._tokenizer import _simple_search_analyzer
from documents.search._tokenizer import register_tokenizers
if TYPE_CHECKING:
@@ -41,6 +42,20 @@ class TestTokenizers:
idx.register_tokenizer("bigram_analyzer", _bigram_analyzer())
return idx
+ @pytest.fixture
+ def simple_search_index(self) -> tantivy.Index:
+ """Index with simple-search field for Latin substring tests."""
+ sb = tantivy.SchemaBuilder()
+ sb.add_text_field(
+ "simple_content",
+ stored=False,
+ tokenizer_name="simple_search_analyzer",
+ )
+ schema = sb.build()
+ idx = tantivy.Index(schema, path=None)
+ idx.register_tokenizer("simple_search_analyzer", _simple_search_analyzer())
+ return idx
+
def test_ascii_fold_finds_accented_content(
self,
content_index: tantivy.Index,
@@ -66,6 +81,24 @@ class TestTokenizers:
q = bigram_index.parse_query("東京", ["bigram_content"])
assert bigram_index.searcher().search(q, limit=5).count == 1
+ def test_simple_search_analyzer_supports_regex_substrings(
+ self,
+ simple_search_index: tantivy.Index,
+ ) -> None:
+ """Whitespace-preserving simple search analyzer supports substring regex matching."""
+ writer = simple_search_index.writer()
+ doc = tantivy.Document()
+ doc.add_text("simple_content", "tag:invoice password-reset")
+ writer.add_document(doc)
+ writer.commit()
+ simple_search_index.reload()
+ q = tantivy.Query.regex_query(
+ simple_search_index.schema,
+ "simple_content",
+ ".*sswo.*",
+ )
+ assert simple_search_index.searcher().search(q, limit=5).count == 1
+
def test_unsupported_language_logs_warning(self, caplog: LogCaptureFixture) -> None:
"""Unsupported language codes should log a warning and disable stemming gracefully."""
sb = tantivy.SchemaBuilder()
diff --git a/src/documents/tests/test_api_search.py b/src/documents/tests/test_api_search.py
index 69bd65198..9e0879e89 100644
--- a/src/documents/tests/test_api_search.py
+++ b/src/documents/tests/test_api_search.py
@@ -91,6 +91,135 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
self.assertEqual(response.data["count"], 0)
self.assertEqual(len(results), 0)
+ def test_simple_text_search(self) -> None:
+ tagged = Tag.objects.create(name="invoice")
+ matching_doc = Document.objects.create(
+ title="Quarterly summary",
+ content="Monthly bank report",
+ checksum="T1",
+ pk=11,
+ )
+ matching_doc.tags.add(tagged)
+
+ metadata_only_doc = Document.objects.create(
+ title="Completely unrelated",
+ content="No matching terms here",
+ checksum="T2",
+ pk=12,
+ )
+ metadata_only_doc.tags.add(tagged)
+
+ backend = get_backend()
+ backend.add_or_update(matching_doc)
+ backend.add_or_update(metadata_only_doc)
+
+ response = self.client.get("/api/documents/?text=monthly")
+ self.assertEqual(response.status_code, status.HTTP_200_OK)
+ self.assertEqual(response.data["count"], 1)
+ self.assertEqual(response.data["results"][0]["id"], matching_doc.id)
+
+ response = self.client.get("/api/documents/?text=tag:invoice")
+ self.assertEqual(response.status_code, status.HTTP_200_OK)
+ self.assertEqual(response.data["count"], 0)
+
+ def test_simple_text_search_matches_substrings(self) -> None:
+ matching_doc = Document.objects.create(
+ title="Quarterly summary",
+ content="Password reset instructions",
+ checksum="T5",
+ pk=15,
+ )
+
+ backend = get_backend()
+ backend.add_or_update(matching_doc)
+
+ response = self.client.get("/api/documents/?text=pass")
+ self.assertEqual(response.status_code, status.HTTP_200_OK)
+ self.assertEqual(response.data["count"], 1)
+ self.assertEqual(response.data["results"][0]["id"], matching_doc.id)
+
+ response = self.client.get("/api/documents/?text=sswo")
+ self.assertEqual(response.status_code, status.HTTP_200_OK)
+ self.assertEqual(response.data["count"], 1)
+ self.assertEqual(response.data["results"][0]["id"], matching_doc.id)
+
+ response = self.client.get("/api/documents/?text=sswo re")
+ self.assertEqual(response.status_code, status.HTTP_200_OK)
+ self.assertEqual(response.data["count"], 1)
+ self.assertEqual(response.data["results"][0]["id"], matching_doc.id)
+
+ def test_simple_text_search_does_not_match_on_partial_term_overlap(self) -> None:
+ non_matching_doc = Document.objects.create(
+ title="Adobe Acrobat PDF Files",
+ content="Lorem ipsum dolor sit amet, consectetur adipiscing elit.",
+ checksum="T7",
+ pk=17,
+ )
+
+ backend = get_backend()
+ backend.add_or_update(non_matching_doc)
+
+ response = self.client.get("/api/documents/?text=raptor")
+ self.assertEqual(response.status_code, status.HTTP_200_OK)
+ self.assertEqual(response.data["count"], 0)
+
+ def test_simple_title_search(self) -> None:
+ title_match = Document.objects.create(
+ title="Quarterly summary",
+ content="No matching content here",
+ checksum="T3",
+ pk=13,
+ )
+ content_only = Document.objects.create(
+ title="Completely unrelated",
+ content="Quarterly summary appears only in content",
+ checksum="T4",
+ pk=14,
+ )
+
+ backend = get_backend()
+ backend.add_or_update(title_match)
+ backend.add_or_update(content_only)
+
+ response = self.client.get("/api/documents/?title_search=quarterly")
+ self.assertEqual(response.status_code, status.HTTP_200_OK)
+ self.assertEqual(response.data["count"], 1)
+ self.assertEqual(response.data["results"][0]["id"], title_match.id)
+
+ def test_simple_title_search_matches_substrings(self) -> None:
+ title_match = Document.objects.create(
+ title="Password handbook",
+ content="No matching content here",
+ checksum="T6",
+ pk=16,
+ )
+
+ backend = get_backend()
+ backend.add_or_update(title_match)
+
+ response = self.client.get("/api/documents/?title_search=pass")
+ self.assertEqual(response.status_code, status.HTTP_200_OK)
+ self.assertEqual(response.data["count"], 1)
+ self.assertEqual(response.data["results"][0]["id"], title_match.id)
+
+ response = self.client.get("/api/documents/?title_search=sswo")
+ self.assertEqual(response.status_code, status.HTTP_200_OK)
+ self.assertEqual(response.data["count"], 1)
+ self.assertEqual(response.data["results"][0]["id"], title_match.id)
+
+ response = self.client.get("/api/documents/?title_search=sswo hand")
+ self.assertEqual(response.status_code, status.HTTP_200_OK)
+ self.assertEqual(response.data["count"], 1)
+ self.assertEqual(response.data["results"][0]["id"], title_match.id)
+
+ def test_search_rejects_multiple_search_modes(self) -> None:
+ response = self.client.get("/api/documents/?text=bank&query=bank")
+ self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
+ self.assertEqual(
+ response.data["detail"],
+ "Specify only one of text, title_search, query, or more_like_id.",
+ )
+
def test_search_returns_all_for_api_version_9(self) -> None:
d1 = Document.objects.create(
title="invoice",
@@ -1493,6 +1622,31 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
self.assertEqual(results["custom_fields"][0]["id"], custom_field1.id)
self.assertEqual(results["workflows"][0]["id"], workflow1.id)
+ def test_global_search_db_only_limits_documents_to_title_matches(self) -> None:
+ title_match = Document.objects.create(
+ title="bank statement",
+ content="no additional terms",
+ checksum="GS1",
+ pk=21,
+ )
+ content_only = Document.objects.create(
+ title="not a title match",
+ content="bank appears only in content",
+ checksum="GS2",
+ pk=22,
+ )
+
+ backend = get_backend()
+ backend.add_or_update(title_match)
+ backend.add_or_update(content_only)
+
+ self.client.force_authenticate(self.user)
+
+ response = self.client.get("/api/search/?query=bank&db_only=true")
+ self.assertEqual(response.status_code, status.HTTP_200_OK)
+ self.assertEqual(len(response.data["documents"]), 1)
+ self.assertEqual(response.data["documents"][0]["id"], title_match.id)
+
def test_global_search_filters_owned_mail_objects(self) -> None:
user1 = User.objects.create_user("mail-search-user")
user2 = User.objects.create_user("other-mail-search-user")
diff --git a/src/documents/tests/test_management_exporter.py b/src/documents/tests/test_management_exporter.py
index a214ef51d..4ee7677ca 100644
--- a/src/documents/tests/test_management_exporter.py
+++ b/src/documents/tests/test_management_exporter.py
@@ -2,6 +2,7 @@ import hashlib
import json
import shutil
import tempfile
+from datetime import timedelta
from io import StringIO
from pathlib import Path
from unittest import mock
@@ -11,6 +12,7 @@ import pytest
from allauth.socialaccount.models import SocialAccount
from allauth.socialaccount.models import SocialApp
from allauth.socialaccount.models import SocialToken
+from django.conf import settings
from django.contrib.auth.models import Group
from django.contrib.auth.models import Permission
from django.contrib.contenttypes.models import ContentType
@@ -31,6 +33,8 @@ from documents.models import CustomFieldInstance
from documents.models import Document
from documents.models import DocumentType
from documents.models import Note
+from documents.models import ShareLink
+from documents.models import ShareLinkBundle
from documents.models import StoragePath
from documents.models import Tag
from documents.models import User
@@ -39,6 +43,7 @@ from documents.models import WorkflowAction
from documents.models import WorkflowTrigger
from documents.sanity_checker import check_sanity
from documents.settings import EXPORTER_FILE_NAME
+from documents.settings import EXPORTER_SHARE_LINK_BUNDLE_NAME
from documents.tests.utils import DirectoriesMixin
from documents.tests.utils import FileSystemAssertsMixin
from documents.tests.utils import SampleDirMixin
@@ -306,6 +311,108 @@ class TestExportImport(
):
self.test_exporter(use_filename_format=True)
+ def test_exporter_includes_share_links_and_bundles(self) -> None:
+ shutil.rmtree(Path(self.dirs.media_dir) / "documents")
+ shutil.copytree(
+ Path(__file__).parent / "samples" / "documents",
+ Path(self.dirs.media_dir) / "documents",
+ )
+
+ share_link = ShareLink.objects.create(
+ slug="share-link-slug",
+ document=self.d1,
+ owner=self.user,
+ file_version=ShareLink.FileVersion.ORIGINAL,
+ expiration=timezone.now() + timedelta(days=7),
+ )
+
+ bundle_relative_path = Path("nested") / "share-bundle.zip"
+ bundle_source_path = settings.SHARE_LINK_BUNDLE_DIR / bundle_relative_path
+ bundle_source_path.parent.mkdir(parents=True, exist_ok=True)
+ bundle_source_path.write_bytes(b"share-bundle-contents")
+ bundle = ShareLinkBundle.objects.create(
+ slug="share-bundle-slug",
+ owner=self.user,
+ file_version=ShareLink.FileVersion.ARCHIVE,
+ expiration=timezone.now() + timedelta(days=7),
+ status=ShareLinkBundle.Status.READY,
+ size_bytes=bundle_source_path.stat().st_size,
+ file_path=str(bundle_relative_path),
+ built_at=timezone.now(),
+ )
+ bundle.documents.set([self.d1, self.d2])
+
+ manifest = self._do_export()
+
+ share_link_records = [
+ record for record in manifest if record["model"] == "documents.sharelink"
+ ]
+ self.assertEqual(len(share_link_records), 1)
+ self.assertEqual(share_link_records[0]["pk"], share_link.pk)
+ self.assertEqual(share_link_records[0]["fields"]["document"], self.d1.pk)
+ self.assertEqual(share_link_records[0]["fields"]["owner"], self.user.pk)
+
+ share_link_bundle_records = [
+ record
+ for record in manifest
+ if record["model"] == "documents.sharelinkbundle"
+ ]
+ self.assertEqual(len(share_link_bundle_records), 1)
+ bundle_record = share_link_bundle_records[0]
+ self.assertEqual(bundle_record["pk"], bundle.pk)
+ self.assertEqual(
+ bundle_record["fields"]["documents"],
+ [self.d1.pk, self.d2.pk],
+ )
+ self.assertEqual(
+ bundle_record[EXPORTER_SHARE_LINK_BUNDLE_NAME],
+ "share_link_bundles/nested/share-bundle.zip",
+ )
+ self.assertEqual(
+ bundle_record["fields"]["file_path"],
+ "nested/share-bundle.zip",
+ )
+ self.assertIsFile(self.target / bundle_record[EXPORTER_SHARE_LINK_BUNDLE_NAME])
+
+ with paperless_environment():
+ ShareLink.objects.all().delete()
+ ShareLinkBundle.objects.all().delete()
+ shutil.rmtree(settings.SHARE_LINK_BUNDLE_DIR, ignore_errors=True)
+
+ call_command(
+ "document_importer",
+ "--no-progress-bar",
+ self.target,
+ skip_checks=True,
+ )
+
+ imported_share_link = ShareLink.objects.get(pk=share_link.pk)
+ self.assertEqual(imported_share_link.document_id, self.d1.pk)
+ self.assertEqual(imported_share_link.owner_id, self.user.pk)
+ self.assertEqual(
+ imported_share_link.file_version,
+ ShareLink.FileVersion.ORIGINAL,
+ )
+
+ imported_bundle = ShareLinkBundle.objects.get(pk=bundle.pk)
+ imported_bundle_path = imported_bundle.absolute_file_path
+ self.assertEqual(imported_bundle.owner_id, self.user.pk)
+ self.assertEqual(
+ list(
+ imported_bundle.documents.order_by("pk").values_list(
+ "pk",
+ flat=True,
+ ),
+ ),
+ [self.d1.pk, self.d2.pk],
+ )
+ self.assertEqual(imported_bundle.file_path, "nested/share-bundle.zip")
+ self.assertIsNotNone(imported_bundle_path)
+ self.assertEqual(
+ imported_bundle_path.read_bytes(),
+ b"share-bundle-contents",
+ )
+
def test_update_export_changed_time(self) -> None:
shutil.rmtree(Path(self.dirs.media_dir) / "documents")
shutil.copytree(
diff --git a/src/documents/tests/utils.py b/src/documents/tests/utils.py
index cc4190974..98c8258b8 100644
--- a/src/documents/tests/utils.py
+++ b/src/documents/tests/utils.py
@@ -435,7 +435,11 @@ class DummyProgressManager:
message: str,
current_progress: int,
max_progress: int,
- extra_args: dict[str, str | int] | None = None,
+ *,
+ document_id: int | None = None,
+ owner_id: int | None = None,
+ users_can_view: list[int] | None = None,
+ groups_can_view: list[int] | None = None,
) -> None:
# Ensure the layer is open
self.open()
@@ -449,9 +453,10 @@ class DummyProgressManager:
"max_progress": max_progress,
"status": status,
"message": message,
+ "document_id": document_id,
+ "owner_id": owner_id,
+ "users_can_view": users_can_view or [],
+ "groups_can_view": groups_can_view or [],
},
}
- if extra_args is not None:
- payload["data"].update(extra_args)
-
self.payloads.append(payload)
diff --git a/src/documents/views.py b/src/documents/views.py
index 024e846a0..68d2b7961 100644
--- a/src/documents/views.py
+++ b/src/documents/views.py
@@ -1995,11 +1995,23 @@ class ChatStreamingView(GenericAPIView):
list=extend_schema(
description="Document views including search",
parameters=[
+ OpenApiParameter(
+ name="text",
+ type=OpenApiTypes.STR,
+ location=OpenApiParameter.QUERY,
+ description="Simple Tantivy-backed text search query string",
+ ),
+ OpenApiParameter(
+ name="title_search",
+ type=OpenApiTypes.STR,
+ location=OpenApiParameter.QUERY,
+ description="Simple Tantivy-backed title-only search query string",
+ ),
OpenApiParameter(
name="query",
type=OpenApiTypes.STR,
location=OpenApiParameter.QUERY,
- description="Advanced search query string",
+ description="Advanced Tantivy search query string",
),
OpenApiParameter(
name="full_perms",
@@ -2025,22 +2037,28 @@ class ChatStreamingView(GenericAPIView):
),
)
class UnifiedSearchViewSet(DocumentViewSet):
+ SEARCH_PARAM_NAMES = ("text", "title_search", "query", "more_like_id")
+
def get_serializer_class(self):
if self._is_search_request():
return SearchResultSerializer
else:
return DocumentSerializer
+ def _get_active_search_params(self, request: Request | None = None) -> list[str]:
+ request = request or self.request
+ return [
+ param for param in self.SEARCH_PARAM_NAMES if param in request.query_params
+ ]
+
def _is_search_request(self):
- return (
- "query" in self.request.query_params
- or "more_like_id" in self.request.query_params
- )
+ return bool(self._get_active_search_params())
def list(self, request, *args, **kwargs):
if not self._is_search_request():
return super().list(request)
+ from documents.search import SearchMode
from documents.search import TantivyRelevanceList
from documents.search import get_backend
@@ -2050,9 +2068,31 @@ class UnifiedSearchViewSet(DocumentViewSet):
filtered_qs = self.filter_queryset(self.get_queryset())
user = None if request.user.is_superuser else request.user
+ active_search_params = self._get_active_search_params(request)
- if "query" in request.query_params:
- query_str = request.query_params["query"]
+ if len(active_search_params) > 1:
+ raise ValidationError(
+ {
+ "detail": _(
+ "Specify only one of text, title_search, query, or more_like_id.",
+ ),
+ },
+ )
+
+ if (
+ "text" in request.query_params
+ or "title_search" in request.query_params
+ or "query" in request.query_params
+ ):
+ if "text" in request.query_params:
+ search_mode = SearchMode.TEXT
+ query_str = request.query_params["text"]
+ elif "title_search" in request.query_params:
+ search_mode = SearchMode.TITLE
+ query_str = request.query_params["title_search"]
+ else:
+ search_mode = SearchMode.QUERY
+ query_str = request.query_params["query"]
results = backend.search(
query_str,
user=user,
@@ -2060,6 +2100,7 @@ class UnifiedSearchViewSet(DocumentViewSet):
page_size=10000,
sort_field=None,
sort_reverse=False,
+ search_mode=search_mode,
)
else:
# more_like_id — validate permission on the seed document first
@@ -2132,6 +2173,8 @@ class UnifiedSearchViewSet(DocumentViewSet):
if str(e.detail) == str(invalid_more_like_id_message):
return HttpResponseForbidden(invalid_more_like_id_message)
return HttpResponseForbidden(_("Insufficient permissions."))
+ except ValidationError:
+ raise
except Exception as e:
logger.warning(f"An error occurred listing search results: {e!s}")
return HttpResponseBadRequest(
@@ -3003,6 +3046,9 @@ class GlobalSearchView(PassUserMixin):
serializer_class = SearchResultSerializer
def get(self, request, *args, **kwargs):
+ from documents.search import SearchMode
+ from documents.search import get_backend
+
query = request.query_params.get("query", None)
if query is None:
return HttpResponseBadRequest("Query required")
@@ -3019,25 +3065,25 @@ class GlobalSearchView(PassUserMixin):
"view_document",
Document,
)
- # First search by title
- docs = all_docs.filter(title__icontains=query)
- if not db_only and len(docs) < OBJECT_LIMIT:
- # If we don't have enough results, search by content.
- # Over-fetch from Tantivy (no permission filter) and rely on
- # the ORM all_docs queryset for authoritative permission gating.
- from documents.search import get_backend
-
+ if db_only:
+ docs = all_docs.filter(title__icontains=query)[:OBJECT_LIMIT]
+ else:
+ user = None if request.user.is_superuser else request.user
fts_results = get_backend().search(
query,
- user=None,
+ user=user,
page=1,
page_size=1000,
sort_field=None,
sort_reverse=False,
+ search_mode=SearchMode.TEXT,
)
- fts_ids = {h["id"] for h in fts_results.hits}
- docs = docs | all_docs.filter(id__in=fts_ids)
- docs = docs[:OBJECT_LIMIT]
+ docs_by_id = all_docs.in_bulk([hit["id"] for hit in fts_results.hits])
+ docs = [
+ docs_by_id[hit["id"]]
+ for hit in fts_results.hits
+ if hit["id"] in docs_by_id
+ ][:OBJECT_LIMIT]
saved_views = (
get_objects_for_user_owner_aware(
request.user,
diff --git a/src/locale/en_US/LC_MESSAGES/django.po b/src/locale/en_US/LC_MESSAGES/django.po
index 57ade319a..03fdcc6e1 100644
--- a/src/locale/en_US/LC_MESSAGES/django.po
+++ b/src/locale/en_US/LC_MESSAGES/django.po
@@ -2,7 +2,7 @@ msgid ""
msgstr ""
"Project-Id-Version: paperless-ngx\n"
"Report-Msgid-Bugs-To: \n"
-"POT-Creation-Date: 2026-04-03 03:25+0000\n"
+"POT-Creation-Date: 2026-04-03 20:54+0000\n"
"PO-Revision-Date: 2022-02-17 04:17\n"
"Last-Translator: \n"
"Language-Team: English\n"
@@ -21,67 +21,67 @@ msgstr ""
msgid "Documents"
msgstr ""
-#: documents/filters.py:421
+#: documents/filters.py:431
msgid "Value must be valid JSON."
msgstr ""
-#: documents/filters.py:440
+#: documents/filters.py:450
msgid "Invalid custom field query expression"
msgstr ""
-#: documents/filters.py:450
+#: documents/filters.py:460
msgid "Invalid expression list. Must be nonempty."
msgstr ""
-#: documents/filters.py:471
+#: documents/filters.py:481
msgid "Invalid logical operator {op!r}"
msgstr ""
-#: documents/filters.py:485
+#: documents/filters.py:495
msgid "Maximum number of query conditions exceeded."
msgstr ""
-#: documents/filters.py:550
+#: documents/filters.py:560
msgid "{name!r} is not a valid custom field."
msgstr ""
-#: documents/filters.py:587
+#: documents/filters.py:597
msgid "{data_type} does not support query expr {expr!r}."
msgstr ""
-#: documents/filters.py:695 documents/models.py:137
+#: documents/filters.py:705 documents/models.py:137
msgid "Maximum nesting depth exceeded."
msgstr ""
-#: documents/filters.py:907
+#: documents/filters.py:919
msgid "Custom field not found"
msgstr ""
-#: documents/models.py:40 documents/models.py:842 documents/models.py:890
+#: documents/models.py:40 documents/models.py:844 documents/models.py:892
msgid "owner"
msgstr ""
-#: documents/models.py:57 documents/models.py:1172
+#: documents/models.py:57 documents/models.py:1174
msgid "None"
msgstr ""
-#: documents/models.py:58 documents/models.py:1173
+#: documents/models.py:58 documents/models.py:1175
msgid "Any word"
msgstr ""
-#: documents/models.py:59 documents/models.py:1174
+#: documents/models.py:59 documents/models.py:1176
msgid "All words"
msgstr ""
-#: documents/models.py:60 documents/models.py:1175
+#: documents/models.py:60 documents/models.py:1177
msgid "Exact match"
msgstr ""
-#: documents/models.py:61 documents/models.py:1176
+#: documents/models.py:61 documents/models.py:1178
msgid "Regular expression"
msgstr ""
-#: documents/models.py:62 documents/models.py:1177
+#: documents/models.py:62 documents/models.py:1179
msgid "Fuzzy word"
msgstr ""
@@ -89,20 +89,20 @@ msgstr ""
msgid "Automatic"
msgstr ""
-#: documents/models.py:66 documents/models.py:536 documents/models.py:1755
+#: documents/models.py:66 documents/models.py:536 documents/models.py:1757
#: paperless_mail/models.py:23 paperless_mail/models.py:143
msgid "name"
msgstr ""
-#: documents/models.py:68 documents/models.py:1241
+#: documents/models.py:68 documents/models.py:1243
msgid "match"
msgstr ""
-#: documents/models.py:71 documents/models.py:1244
+#: documents/models.py:71 documents/models.py:1246
msgid "matching algorithm"
msgstr ""
-#: documents/models.py:76 documents/models.py:1249
+#: documents/models.py:76 documents/models.py:1251
msgid "is insensitive"
msgstr ""
@@ -168,7 +168,7 @@ msgstr ""
msgid "title"
msgstr ""
-#: documents/models.py:191 documents/models.py:756
+#: documents/models.py:191 documents/models.py:758
msgid "content"
msgstr ""
@@ -206,8 +206,8 @@ msgstr ""
msgid "The number of pages of the document."
msgstr ""
-#: documents/models.py:246 documents/models.py:762 documents/models.py:800
-#: documents/models.py:862 documents/models.py:980 documents/models.py:1039
+#: documents/models.py:246 documents/models.py:764 documents/models.py:802
+#: documents/models.py:864 documents/models.py:982 documents/models.py:1041
msgid "created"
msgstr ""
@@ -271,12 +271,12 @@ msgstr ""
msgid "Optional short label for a document version."
msgstr ""
-#: documents/models.py:340 documents/models.py:773 documents/models.py:827
-#: documents/models.py:1798
+#: documents/models.py:340 documents/models.py:775 documents/models.py:829
+#: documents/models.py:1800
msgid "document"
msgstr ""
-#: documents/models.py:341 documents/models.py:933
+#: documents/models.py:341 documents/models.py:935
msgid "documents"
msgstr ""
@@ -296,11 +296,11 @@ msgstr ""
msgid "Title"
msgstr ""
-#: documents/models.py:523 documents/models.py:1193
+#: documents/models.py:523 documents/models.py:1195
msgid "Created"
msgstr ""
-#: documents/models.py:524 documents/models.py:1192
+#: documents/models.py:524 documents/models.py:1194
msgid "Added"
msgstr ""
@@ -360,7 +360,7 @@ msgstr ""
msgid "Document display fields"
msgstr ""
-#: documents/models.py:569 documents/models.py:632
+#: documents/models.py:569 documents/models.py:634
msgid "saved view"
msgstr ""
@@ -560,748 +560,756 @@ msgstr ""
msgid "mime type is"
msgstr ""
-#: documents/models.py:635
-msgid "rule type"
+#: documents/models.py:626
+msgid "simple title search"
+msgstr ""
+
+#: documents/models.py:627
+msgid "simple text search"
msgstr ""
#: documents/models.py:637
+msgid "rule type"
+msgstr ""
+
+#: documents/models.py:639
msgid "value"
msgstr ""
-#: documents/models.py:640
+#: documents/models.py:642
msgid "filter rule"
msgstr ""
-#: documents/models.py:641
+#: documents/models.py:643
msgid "filter rules"
msgstr ""
-#: documents/models.py:665
+#: documents/models.py:667
msgid "Auto Task"
msgstr ""
-#: documents/models.py:666
+#: documents/models.py:668
msgid "Scheduled Task"
msgstr ""
-#: documents/models.py:667
+#: documents/models.py:669
msgid "Manual Task"
msgstr ""
-#: documents/models.py:670
+#: documents/models.py:672
msgid "Consume File"
msgstr ""
-#: documents/models.py:671
+#: documents/models.py:673
msgid "Train Classifier"
msgstr ""
-#: documents/models.py:672
+#: documents/models.py:674
msgid "Check Sanity"
msgstr ""
-#: documents/models.py:673
+#: documents/models.py:675
msgid "Index Optimize"
msgstr ""
-#: documents/models.py:674
+#: documents/models.py:676
msgid "LLM Index Update"
msgstr ""
-#: documents/models.py:679
+#: documents/models.py:681
msgid "Task ID"
msgstr ""
-#: documents/models.py:680
+#: documents/models.py:682
msgid "Celery ID for the Task that was run"
msgstr ""
-#: documents/models.py:685
+#: documents/models.py:687
msgid "Acknowledged"
msgstr ""
-#: documents/models.py:686
+#: documents/models.py:688
msgid "If the task is acknowledged via the frontend or API"
msgstr ""
-#: documents/models.py:692
+#: documents/models.py:694
msgid "Task Filename"
msgstr ""
-#: documents/models.py:693
+#: documents/models.py:695
msgid "Name of the file which the Task was run for"
msgstr ""
-#: documents/models.py:700
+#: documents/models.py:702
msgid "Task Name"
msgstr ""
-#: documents/models.py:701
+#: documents/models.py:703
msgid "Name of the task that was run"
msgstr ""
-#: documents/models.py:708
+#: documents/models.py:710
msgid "Task State"
msgstr ""
-#: documents/models.py:709
+#: documents/models.py:711
msgid "Current state of the task being run"
msgstr ""
-#: documents/models.py:715
+#: documents/models.py:717
msgid "Created DateTime"
msgstr ""
-#: documents/models.py:716
+#: documents/models.py:718
msgid "Datetime field when the task result was created in UTC"
msgstr ""
-#: documents/models.py:722
+#: documents/models.py:724
msgid "Started DateTime"
msgstr ""
-#: documents/models.py:723
+#: documents/models.py:725
msgid "Datetime field when the task was started in UTC"
msgstr ""
-#: documents/models.py:729
+#: documents/models.py:731
msgid "Completed DateTime"
msgstr ""
-#: documents/models.py:730
+#: documents/models.py:732
msgid "Datetime field when the task was completed in UTC"
msgstr ""
-#: documents/models.py:736
+#: documents/models.py:738
msgid "Result Data"
msgstr ""
-#: documents/models.py:738
+#: documents/models.py:740
msgid "The data returned by the task"
msgstr ""
-#: documents/models.py:746
+#: documents/models.py:748
msgid "Task Type"
msgstr ""
-#: documents/models.py:747
+#: documents/models.py:749
msgid "The type of task that was run"
msgstr ""
-#: documents/models.py:758
+#: documents/models.py:760
msgid "Note for the document"
msgstr ""
-#: documents/models.py:782
+#: documents/models.py:784
msgid "user"
msgstr ""
-#: documents/models.py:787
+#: documents/models.py:789
msgid "note"
msgstr ""
-#: documents/models.py:788
+#: documents/models.py:790
msgid "notes"
msgstr ""
-#: documents/models.py:796
+#: documents/models.py:798
msgid "Archive"
msgstr ""
-#: documents/models.py:797
+#: documents/models.py:799
msgid "Original"
msgstr ""
-#: documents/models.py:808 documents/models.py:870 paperless_mail/models.py:75
+#: documents/models.py:810 documents/models.py:872 paperless_mail/models.py:75
msgid "expiration"
msgstr ""
-#: documents/models.py:815 documents/models.py:877
+#: documents/models.py:817 documents/models.py:879
msgid "slug"
msgstr ""
-#: documents/models.py:847
+#: documents/models.py:849
msgid "share link"
msgstr ""
-#: documents/models.py:848
+#: documents/models.py:850
msgid "share links"
msgstr ""
-#: documents/models.py:856
+#: documents/models.py:858
msgid "Pending"
msgstr ""
-#: documents/models.py:857
+#: documents/models.py:859
msgid "Processing"
msgstr ""
-#: documents/models.py:858
+#: documents/models.py:860
msgid "Ready"
msgstr ""
-#: documents/models.py:859
+#: documents/models.py:861
msgid "Failed"
msgstr ""
-#: documents/models.py:906
+#: documents/models.py:908
msgid "size (bytes)"
msgstr ""
-#: documents/models.py:912
+#: documents/models.py:914
msgid "last error"
msgstr ""
-#: documents/models.py:919
+#: documents/models.py:921
msgid "file path"
msgstr ""
-#: documents/models.py:925
+#: documents/models.py:927
msgid "built at"
msgstr ""
-#: documents/models.py:938
+#: documents/models.py:940
msgid "share link bundle"
msgstr ""
-#: documents/models.py:939
+#: documents/models.py:941
msgid "share link bundles"
msgstr ""
-#: documents/models.py:942
+#: documents/models.py:944
#, python-format
msgid "Share link bundle %(slug)s"
msgstr ""
-#: documents/models.py:968
+#: documents/models.py:970
msgid "String"
msgstr ""
-#: documents/models.py:969
+#: documents/models.py:971
msgid "URL"
msgstr ""
-#: documents/models.py:970
+#: documents/models.py:972
msgid "Date"
msgstr ""
-#: documents/models.py:971
+#: documents/models.py:973
msgid "Boolean"
msgstr ""
-#: documents/models.py:972
+#: documents/models.py:974
msgid "Integer"
msgstr ""
-#: documents/models.py:973
+#: documents/models.py:975
msgid "Float"
msgstr ""
-#: documents/models.py:974
+#: documents/models.py:976
msgid "Monetary"
msgstr ""
-#: documents/models.py:975
+#: documents/models.py:977
msgid "Document Link"
msgstr ""
-#: documents/models.py:976
+#: documents/models.py:978
msgid "Select"
msgstr ""
-#: documents/models.py:977
+#: documents/models.py:979
msgid "Long Text"
msgstr ""
-#: documents/models.py:989
+#: documents/models.py:991
msgid "data type"
msgstr ""
-#: documents/models.py:996
+#: documents/models.py:998
msgid "extra data"
msgstr ""
-#: documents/models.py:1000
+#: documents/models.py:1002
msgid "Extra data for the custom field, such as select options"
msgstr ""
-#: documents/models.py:1006
+#: documents/models.py:1008
msgid "custom field"
msgstr ""
-#: documents/models.py:1007
+#: documents/models.py:1009
msgid "custom fields"
msgstr ""
-#: documents/models.py:1107
+#: documents/models.py:1109
msgid "custom field instance"
msgstr ""
-#: documents/models.py:1108
+#: documents/models.py:1110
msgid "custom field instances"
msgstr ""
-#: documents/models.py:1180
+#: documents/models.py:1182
msgid "Consumption Started"
msgstr ""
-#: documents/models.py:1181
+#: documents/models.py:1183
msgid "Document Added"
msgstr ""
-#: documents/models.py:1182
+#: documents/models.py:1184
msgid "Document Updated"
msgstr ""
-#: documents/models.py:1183
+#: documents/models.py:1185
msgid "Scheduled"
msgstr ""
-#: documents/models.py:1186
+#: documents/models.py:1188
msgid "Consume Folder"
msgstr ""
-#: documents/models.py:1187
+#: documents/models.py:1189
msgid "Api Upload"
msgstr ""
-#: documents/models.py:1188
+#: documents/models.py:1190
msgid "Mail Fetch"
msgstr ""
-#: documents/models.py:1189
+#: documents/models.py:1191
msgid "Web UI"
msgstr ""
-#: documents/models.py:1194
+#: documents/models.py:1196
msgid "Modified"
msgstr ""
-#: documents/models.py:1195
+#: documents/models.py:1197
msgid "Custom Field"
msgstr ""
-#: documents/models.py:1198
+#: documents/models.py:1200
msgid "Workflow Trigger Type"
msgstr ""
-#: documents/models.py:1210
+#: documents/models.py:1212
msgid "filter path"
msgstr ""
-#: documents/models.py:1215
+#: documents/models.py:1217
msgid ""
"Only consume documents with a path that matches this if specified. Wildcards "
"specified as * are allowed. Case insensitive."
msgstr ""
-#: documents/models.py:1222
+#: documents/models.py:1224
msgid "filter filename"
msgstr ""
-#: documents/models.py:1227 paperless_mail/models.py:200
+#: documents/models.py:1229 paperless_mail/models.py:200
msgid ""
"Only consume documents which entirely match this filename if specified. "
"Wildcards such as *.pdf or *invoice* are allowed. Case insensitive."
msgstr ""
-#: documents/models.py:1238
+#: documents/models.py:1240
msgid "filter documents from this mail rule"
msgstr ""
-#: documents/models.py:1254
+#: documents/models.py:1256
msgid "has these tag(s)"
msgstr ""
-#: documents/models.py:1261
+#: documents/models.py:1263
msgid "has all of these tag(s)"
msgstr ""
-#: documents/models.py:1268
+#: documents/models.py:1270
msgid "does not have these tag(s)"
msgstr ""
-#: documents/models.py:1276
+#: documents/models.py:1278
msgid "has this document type"
msgstr ""
-#: documents/models.py:1283
+#: documents/models.py:1285
msgid "has one of these document types"
msgstr ""
-#: documents/models.py:1290
+#: documents/models.py:1292
msgid "does not have these document type(s)"
msgstr ""
-#: documents/models.py:1298
+#: documents/models.py:1300
msgid "has this correspondent"
msgstr ""
-#: documents/models.py:1305
+#: documents/models.py:1307
msgid "does not have these correspondent(s)"
msgstr ""
-#: documents/models.py:1312
+#: documents/models.py:1314
msgid "has one of these correspondents"
msgstr ""
-#: documents/models.py:1320
+#: documents/models.py:1322
msgid "has this storage path"
msgstr ""
-#: documents/models.py:1327
+#: documents/models.py:1329
msgid "has one of these storage paths"
msgstr ""
-#: documents/models.py:1334
+#: documents/models.py:1336
msgid "does not have these storage path(s)"
msgstr ""
-#: documents/models.py:1338
+#: documents/models.py:1340
msgid "filter custom field query"
msgstr ""
-#: documents/models.py:1341
+#: documents/models.py:1343
msgid "JSON-encoded custom field query expression."
msgstr ""
-#: documents/models.py:1345
+#: documents/models.py:1347
msgid "schedule offset days"
msgstr ""
-#: documents/models.py:1348
+#: documents/models.py:1350
msgid "The number of days to offset the schedule trigger by."
msgstr ""
-#: documents/models.py:1353
+#: documents/models.py:1355
msgid "schedule is recurring"
msgstr ""
-#: documents/models.py:1356
+#: documents/models.py:1358
msgid "If the schedule should be recurring."
msgstr ""
-#: documents/models.py:1361
+#: documents/models.py:1363
msgid "schedule recurring delay in days"
msgstr ""
-#: documents/models.py:1365
+#: documents/models.py:1367
msgid "The number of days between recurring schedule triggers."
msgstr ""
-#: documents/models.py:1370
+#: documents/models.py:1372
msgid "schedule date field"
msgstr ""
-#: documents/models.py:1375
+#: documents/models.py:1377
msgid "The field to check for a schedule trigger."
msgstr ""
-#: documents/models.py:1384
+#: documents/models.py:1386
msgid "schedule date custom field"
msgstr ""
-#: documents/models.py:1388
+#: documents/models.py:1390
msgid "workflow trigger"
msgstr ""
-#: documents/models.py:1389
+#: documents/models.py:1391
msgid "workflow triggers"
msgstr ""
-#: documents/models.py:1397
+#: documents/models.py:1399
msgid "email subject"
msgstr ""
-#: documents/models.py:1401
+#: documents/models.py:1403
msgid ""
"The subject of the email, can include some placeholders, see documentation."
msgstr ""
-#: documents/models.py:1407
+#: documents/models.py:1409
msgid "email body"
msgstr ""
-#: documents/models.py:1410
+#: documents/models.py:1412
msgid ""
"The body (message) of the email, can include some placeholders, see "
"documentation."
msgstr ""
-#: documents/models.py:1416
+#: documents/models.py:1418
msgid "emails to"
msgstr ""
-#: documents/models.py:1419
+#: documents/models.py:1421
msgid "The destination email addresses, comma separated."
msgstr ""
-#: documents/models.py:1425
+#: documents/models.py:1427
msgid "include document in email"
msgstr ""
-#: documents/models.py:1436
+#: documents/models.py:1438
msgid "webhook url"
msgstr ""
-#: documents/models.py:1439
+#: documents/models.py:1441
msgid "The destination URL for the notification."
msgstr ""
-#: documents/models.py:1444
+#: documents/models.py:1446
msgid "use parameters"
msgstr ""
-#: documents/models.py:1449
+#: documents/models.py:1451
msgid "send as JSON"
msgstr ""
-#: documents/models.py:1453
+#: documents/models.py:1455
msgid "webhook parameters"
msgstr ""
-#: documents/models.py:1456
+#: documents/models.py:1458
msgid "The parameters to send with the webhook URL if body not used."
msgstr ""
-#: documents/models.py:1460
+#: documents/models.py:1462
msgid "webhook body"
msgstr ""
-#: documents/models.py:1463
+#: documents/models.py:1465
msgid "The body to send with the webhook URL if parameters not used."
msgstr ""
-#: documents/models.py:1467
+#: documents/models.py:1469
msgid "webhook headers"
msgstr ""
-#: documents/models.py:1470
+#: documents/models.py:1472
msgid "The headers to send with the webhook URL."
msgstr ""
-#: documents/models.py:1475
+#: documents/models.py:1477
msgid "include document in webhook"
msgstr ""
-#: documents/models.py:1486
+#: documents/models.py:1488
msgid "Assignment"
msgstr ""
-#: documents/models.py:1490
+#: documents/models.py:1492
msgid "Removal"
msgstr ""
-#: documents/models.py:1494 documents/templates/account/password_reset.html:15
+#: documents/models.py:1496 documents/templates/account/password_reset.html:15
msgid "Email"
msgstr ""
-#: documents/models.py:1498
+#: documents/models.py:1500
msgid "Webhook"
msgstr ""
-#: documents/models.py:1502
+#: documents/models.py:1504
msgid "Password removal"
msgstr ""
-#: documents/models.py:1506
+#: documents/models.py:1508
msgid "Move to trash"
msgstr ""
-#: documents/models.py:1510
+#: documents/models.py:1512
msgid "Workflow Action Type"
msgstr ""
-#: documents/models.py:1515 documents/models.py:1757
+#: documents/models.py:1517 documents/models.py:1759
#: paperless_mail/models.py:145
msgid "order"
msgstr ""
-#: documents/models.py:1518
+#: documents/models.py:1520
msgid "assign title"
msgstr ""
-#: documents/models.py:1522
+#: documents/models.py:1524
msgid "Assign a document title, must be a Jinja2 template, see documentation."
msgstr ""
-#: documents/models.py:1530 paperless_mail/models.py:274
+#: documents/models.py:1532 paperless_mail/models.py:274
msgid "assign this tag"
msgstr ""
-#: documents/models.py:1539 paperless_mail/models.py:282
+#: documents/models.py:1541 paperless_mail/models.py:282
msgid "assign this document type"
msgstr ""
-#: documents/models.py:1548 paperless_mail/models.py:296
+#: documents/models.py:1550 paperless_mail/models.py:296
msgid "assign this correspondent"
msgstr ""
-#: documents/models.py:1557
+#: documents/models.py:1559
msgid "assign this storage path"
msgstr ""
-#: documents/models.py:1566
+#: documents/models.py:1568
msgid "assign this owner"
msgstr ""
-#: documents/models.py:1573
+#: documents/models.py:1575
msgid "grant view permissions to these users"
msgstr ""
-#: documents/models.py:1580
+#: documents/models.py:1582
msgid "grant view permissions to these groups"
msgstr ""
-#: documents/models.py:1587
+#: documents/models.py:1589
msgid "grant change permissions to these users"
msgstr ""
-#: documents/models.py:1594
+#: documents/models.py:1596
msgid "grant change permissions to these groups"
msgstr ""
-#: documents/models.py:1601
+#: documents/models.py:1603
msgid "assign these custom fields"
msgstr ""
-#: documents/models.py:1605
+#: documents/models.py:1607
msgid "custom field values"
msgstr ""
-#: documents/models.py:1609
+#: documents/models.py:1611
msgid "Optional values to assign to the custom fields."
msgstr ""
-#: documents/models.py:1618
+#: documents/models.py:1620
msgid "remove these tag(s)"
msgstr ""
-#: documents/models.py:1623
+#: documents/models.py:1625
msgid "remove all tags"
msgstr ""
-#: documents/models.py:1630
+#: documents/models.py:1632
msgid "remove these document type(s)"
msgstr ""
-#: documents/models.py:1635
+#: documents/models.py:1637
msgid "remove all document types"
msgstr ""
-#: documents/models.py:1642
+#: documents/models.py:1644
msgid "remove these correspondent(s)"
msgstr ""
-#: documents/models.py:1647
+#: documents/models.py:1649
msgid "remove all correspondents"
msgstr ""
-#: documents/models.py:1654
+#: documents/models.py:1656
msgid "remove these storage path(s)"
msgstr ""
-#: documents/models.py:1659
+#: documents/models.py:1661
msgid "remove all storage paths"
msgstr ""
-#: documents/models.py:1666
+#: documents/models.py:1668
msgid "remove these owner(s)"
msgstr ""
-#: documents/models.py:1671
+#: documents/models.py:1673
msgid "remove all owners"
msgstr ""
-#: documents/models.py:1678
+#: documents/models.py:1680
msgid "remove view permissions for these users"
msgstr ""
-#: documents/models.py:1685
+#: documents/models.py:1687
msgid "remove view permissions for these groups"
msgstr ""
-#: documents/models.py:1692
+#: documents/models.py:1694
msgid "remove change permissions for these users"
msgstr ""
-#: documents/models.py:1699
+#: documents/models.py:1701
msgid "remove change permissions for these groups"
msgstr ""
-#: documents/models.py:1704
+#: documents/models.py:1706
msgid "remove all permissions"
msgstr ""
-#: documents/models.py:1711
+#: documents/models.py:1713
msgid "remove these custom fields"
msgstr ""
-#: documents/models.py:1716
+#: documents/models.py:1718
msgid "remove all custom fields"
msgstr ""
-#: documents/models.py:1725
+#: documents/models.py:1727
msgid "email"
msgstr ""
-#: documents/models.py:1734
+#: documents/models.py:1736
msgid "webhook"
msgstr ""
-#: documents/models.py:1738
+#: documents/models.py:1740
msgid "passwords"
msgstr ""
-#: documents/models.py:1742
+#: documents/models.py:1744
msgid ""
"Passwords to try when removing PDF protection. Separate with commas or new "
"lines."
msgstr ""
-#: documents/models.py:1747
+#: documents/models.py:1749
msgid "workflow action"
msgstr ""
-#: documents/models.py:1748
+#: documents/models.py:1750
msgid "workflow actions"
msgstr ""
-#: documents/models.py:1763
+#: documents/models.py:1765
msgid "triggers"
msgstr ""
-#: documents/models.py:1770
+#: documents/models.py:1772
msgid "actions"
msgstr ""
-#: documents/models.py:1773 paperless_mail/models.py:154
+#: documents/models.py:1775 paperless_mail/models.py:154
msgid "enabled"
msgstr ""
-#: documents/models.py:1784
+#: documents/models.py:1786
msgid "workflow"
msgstr ""
-#: documents/models.py:1788
+#: documents/models.py:1790
msgid "workflow trigger type"
msgstr ""
-#: documents/models.py:1802
+#: documents/models.py:1804
msgid "date run"
msgstr ""
-#: documents/models.py:1808
+#: documents/models.py:1810
msgid "workflow run"
msgstr ""
-#: documents/models.py:1809
+#: documents/models.py:1811
msgid "workflow runs"
msgstr ""
#: documents/serialisers.py:463 documents/serialisers.py:815
-#: documents/serialisers.py:2545 documents/views.py:2079
-#: documents/views.py:2134 paperless_mail/serialisers.py:143
+#: documents/serialisers.py:2545 documents/views.py:2120
+#: documents/views.py:2175 paperless_mail/serialisers.py:143
msgid "Insufficient permissions."
msgstr ""
@@ -1341,7 +1349,7 @@ msgstr ""
msgid "Duplicate document identifiers are not allowed."
msgstr ""
-#: documents/serialisers.py:2631 documents/views.py:3738
+#: documents/serialisers.py:2631 documents/views.py:3784
#, python-format
msgid "Documents not found: %(ids)s"
msgstr ""
@@ -1609,24 +1617,28 @@ msgstr ""
msgid "Unable to parse URI {value}"
msgstr ""
-#: documents/views.py:2072 documents/views.py:2131
+#: documents/views.py:2077
+msgid "Specify only one of text, title_search, query, or more_like_id."
+msgstr ""
+
+#: documents/views.py:2113 documents/views.py:2172
msgid "Invalid more_like_id"
msgstr ""
-#: documents/views.py:3750
+#: documents/views.py:3796
#, python-format
msgid "Insufficient permissions to share document %(id)s."
msgstr ""
-#: documents/views.py:3793
+#: documents/views.py:3839
msgid "Bundle is already being processed."
msgstr ""
-#: documents/views.py:3850
+#: documents/views.py:3896
msgid "The share link bundle is still being prepared. Please try again later."
msgstr ""
-#: documents/views.py:3860
+#: documents/views.py:3906
msgid "The share link bundle is unavailable."
msgstr ""
diff --git a/src/paperless/consumers.py b/src/paperless/consumers.py
index 9d59a1a5a..4a3cda8fe 100644
--- a/src/paperless/consumers.py
+++ b/src/paperless/consumers.py
@@ -1,16 +1,27 @@
+from __future__ import annotations
+
import json
-from typing import Any
+from typing import TYPE_CHECKING
from channels.generic.websocket import AsyncWebsocketConsumer
+if TYPE_CHECKING:
+ from django.contrib.auth.base_user import AbstractBaseUser
+ from django.contrib.auth.models import AnonymousUser
+
+ from documents.plugins.helpers import DocumentsDeletedPayload
+ from documents.plugins.helpers import DocumentUpdatedPayload
+ from documents.plugins.helpers import PermissionsData
+ from documents.plugins.helpers import StatusUpdatePayload
+
class StatusConsumer(AsyncWebsocketConsumer):
def _authenticated(self) -> bool:
- user: Any = self.scope.get("user")
+ user: AbstractBaseUser | AnonymousUser | None = self.scope.get("user")
return user is not None and user.is_authenticated
- async def _can_view(self, data: dict[str, Any]) -> bool:
- user: Any = self.scope.get("user")
+ async def _can_view(self, data: PermissionsData) -> bool:
+ user: AbstractBaseUser | AnonymousUser | None = self.scope.get("user")
if user is None:
return False
owner_id = data.get("owner_id")
@@ -32,19 +43,19 @@ class StatusConsumer(AsyncWebsocketConsumer):
async def disconnect(self, code: int) -> None:
await self.channel_layer.group_discard("status_updates", self.channel_name)
- async def status_update(self, event: dict[str, Any]) -> None:
+ async def status_update(self, event: StatusUpdatePayload) -> None:
if not self._authenticated():
await self.close()
elif await self._can_view(event["data"]):
await self.send(json.dumps(event))
- async def documents_deleted(self, event: dict[str, Any]) -> None:
+ async def documents_deleted(self, event: DocumentsDeletedPayload) -> None:
if not self._authenticated():
await self.close()
else:
await self.send(json.dumps(event))
- async def document_updated(self, event: dict[str, Any]) -> None:
+ async def document_updated(self, event: DocumentUpdatedPayload) -> None:
if not self._authenticated():
await self.close()
elif await self._can_view(event["data"]):
diff --git a/src/paperless/logging.py b/src/paperless/logging.py
new file mode 100644
index 000000000..ce2eff4fc
--- /dev/null
+++ b/src/paperless/logging.py
@@ -0,0 +1,33 @@
+from __future__ import annotations
+
+import logging
+from contextvars import ContextVar
+
+consume_task_id: ContextVar[str] = ContextVar("consume_task_id", default="")
+
+
+class ConsumeTaskFormatter(logging.Formatter):
+ """
+ Logging formatter that prepends a short task correlation ID to messages
+ emitted during document consumption.
+
+ The ID is the first 8 characters of the Celery task UUID, set via the
+ ``consume_task_id`` ContextVar at the entry of ``consume_file``. When
+ the ContextVar is empty (any log outside a consume task) no prefix is
+ added and the output is identical to the standard verbose format.
+ """
+
+ def __init__(self) -> None:
+ super().__init__(
+ fmt="[{asctime}] [{levelname}] [{name}] {task_prefix}{message}",
+ style="{",
+ validate=False, # {task_prefix} is not a standard LogRecord attribute, so Python's
+ # init-time format-string validation would raise ValueError without
+ # this. Runtime safety comes from format() always setting
+ # record.task_prefix before calling super().format().
+ )
+
+ def format(self, record: logging.LogRecord) -> str:
+ task_id = consume_task_id.get()
+ record.task_prefix = f"[{task_id}] " if task_id else ""
+ return super().format(record)
diff --git a/src/paperless/settings/__init__.py b/src/paperless/settings/__init__.py
index b960e9dc4..772c51801 100644
--- a/src/paperless/settings/__init__.py
+++ b/src/paperless/settings/__init__.py
@@ -592,8 +592,7 @@ LOGGING = {
"disable_existing_loggers": False,
"formatters": {
"verbose": {
- "format": "[{asctime}] [{levelname}] [{name}] {message}",
- "style": "{",
+ "()": "paperless.logging.ConsumeTaskFormatter",
},
"simple": {
"format": "{levelname} {message}",
diff --git a/src/paperless/tests/test_logging.py b/src/paperless/tests/test_logging.py
new file mode 100644
index 000000000..dbd36c7d0
--- /dev/null
+++ b/src/paperless/tests/test_logging.py
@@ -0,0 +1,34 @@
+import logging
+
+from paperless.logging import ConsumeTaskFormatter
+from paperless.logging import consume_task_id
+
+
+def _make_record(msg: str = "Test message") -> logging.LogRecord:
+ return logging.LogRecord(
+ name="paperless.consumer",
+ level=logging.INFO,
+ pathname="",
+ lineno=0,
+ msg=msg,
+ args=(),
+ exc_info=None,
+ )
+
+
+def test_formatter_includes_task_id_when_set():
+ token = consume_task_id.set("a8098c1a")
+ try:
+ formatter = ConsumeTaskFormatter()
+ output = formatter.format(_make_record())
+ assert "[a8098c1a] Test message" in output
+ finally:
+ consume_task_id.reset(token)
+
+
+def test_formatter_omits_prefix_when_no_task_id():
+ # ContextVar default is "" — no task active
+ formatter = ConsumeTaskFormatter()
+ output = formatter.format(_make_record())
+ assert "[] " not in output
+ assert "Test message" in output
diff --git a/src/paperless/tests/test_websockets.py b/src/paperless/tests/test_websockets.py
index bffc44f82..9f7c9a652 100644
--- a/src/paperless/tests/test_websockets.py
+++ b/src/paperless/tests/test_websockets.py
@@ -200,7 +200,10 @@ class TestWebSockets:
"Test message",
1,
10,
- extra_args={"foo": "bar"},
+ document_id=42,
+ owner_id=1,
+ users_can_view=[2, 3],
+ groups_can_view=[4],
)
assert mock_group_send.call_args[0][1] == {
@@ -212,7 +215,10 @@ class TestWebSockets:
"max_progress": 10,
"status": ProgressStatusOptions.STARTED,
"message": "Test message",
- "foo": "bar",
+ "document_id": 42,
+ "owner_id": 1,
+ "users_can_view": [2, 3],
+ "groups_can_view": [4],
},
}