Merge remote-tracking branch 'origin/dev' into feature-archive-ocr-decoupling

This commit is contained in:
Trenton Holmes
2026-04-05 13:32:00 -07:00
55 changed files with 1882 additions and 452 deletions
+5
View File
@@ -24,6 +24,7 @@ jobs:
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
fetch-depth: 0
persist-credentials: false
- name: Decide run mode
id: force
run: |
@@ -72,6 +73,8 @@ jobs:
steps:
- name: Checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
- name: Start containers
run: |
docker compose --file docker/compose/docker-compose.ci-test.yml pull --quiet
@@ -145,6 +148,8 @@ jobs:
steps:
- name: Checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
- name: Set up Python
id: setup-python
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
+2
View File
@@ -42,6 +42,8 @@ jobs:
steps:
- name: Checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
- name: Determine ref name
id: ref
run: |
+3
View File
@@ -26,6 +26,7 @@ jobs:
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
fetch-depth: 0
persist-credentials: false
- name: Decide run mode
id: force
run: |
@@ -71,6 +72,8 @@ jobs:
- uses: actions/configure-pages@45bfe0192ca1faeb007ade9deae92b16b8254a0d # v6.0.0
- name: Checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
- name: Set up Python
id: setup-python
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
+9
View File
@@ -62,6 +62,8 @@ jobs:
steps:
- name: Checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
- name: Install pnpm
uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5.0.0
with:
@@ -90,6 +92,8 @@ jobs:
steps:
- name: Checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
- name: Install pnpm
uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5.0.0
with:
@@ -125,6 +129,8 @@ jobs:
steps:
- name: Checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
- name: Install pnpm
uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5.0.0
with:
@@ -176,6 +182,8 @@ jobs:
steps:
- name: Checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
- name: Install pnpm
uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5.0.0
with:
@@ -209,6 +217,7 @@ jobs:
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
fetch-depth: 2
persist-credentials: false
- name: Install pnpm
uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5.0.0
with:
+2
View File
@@ -16,6 +16,8 @@ jobs:
steps:
- name: Checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
- name: Install Python
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
with:
+3
View File
@@ -29,6 +29,8 @@ jobs:
steps:
- name: Checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
# ---- Frontend Build ----
- name: Install pnpm
uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5.0.0
@@ -179,6 +181,7 @@ jobs:
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
ref: main
persist-credentials: true # for pushing changelog branch
- name: Set up Python
id: setup-python
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
+42
View File
@@ -0,0 +1,42 @@
name: Static Analysis
on:
push:
branches-ignore:
- 'translations**'
pull_request:
branches-ignore:
- 'translations**'
workflow_dispatch:
concurrency:
group: static-analysis-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
permissions:
contents: read
jobs:
zizmor:
name: Run zizmor
runs-on: ubuntu-24.04
permissions:
contents: read
actions: read
security-events: write
steps:
- name: Checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
- name: Run zizmor
uses: zizmorcore/zizmor-action@71321a20a9ded102f6e9ce5718a2fcec2c4f70d8 # v0.5.2
semgrep:
name: Semgrep CE
runs-on: ubuntu-24.04
container:
image: semgrep/semgrep:1.155.0@sha256:cc869c685dcc0fe497c86258da9f205397d8108e56d21a86082ea4886e52784d
if: github.actor != 'dependabot[bot]'
steps:
- name: Checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
- name: Run Semgrep
run: semgrep scan --config auto
+2
View File
@@ -35,6 +35,8 @@ jobs:
steps:
- name: Checkout repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
uses: github/codeql-action/init@c793b717bc78562f491db7b0e93a3a178b099162 # v4.32.5
+1
View File
@@ -16,6 +16,7 @@ jobs:
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
token: ${{ secrets.PNGX_BOT_PAT }}
persist-credentials: false
- name: crowdin action
uses: crowdin/github-action@8818ff65bfc4322384f983ea37e3926948c11745 # v2.15.0
with:
+1
View File
@@ -17,6 +17,7 @@ jobs:
with:
token: ${{ secrets.PNGX_BOT_PAT }}
ref: ${{ env.GH_REF }}
persist-credentials: true # for pushing translation branch
- name: Set up Python
id: setup-python
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
+8 -2
View File
@@ -62,10 +62,14 @@ The REST api provides five different forms of authentication.
## Searching for documents
Full text searching is available on the `/api/documents/` endpoint. Two
specific query parameters cause the API to return full text search
Full text searching is available on the `/api/documents/` endpoint. The
following query parameters cause the API to return Tantivy-backed search
results:
- `/api/documents/?text=your%20search%20query`: Search title and content
using simple substring-style search.
- `/api/documents/?title_search=your%20search%20query`: Search title only
using simple substring-style search.
- `/api/documents/?query=your%20search%20query`: Search for a document
using a full text query. For details on the syntax, see [Basic Usage - Searching](usage.md#basic-usage_searching).
- `/api/documents/?more_like_id=1234`: Search for documents similar to
@@ -439,3 +443,5 @@ Initial API version.
- The `all` parameter of list endpoints is now deprecated and will be removed in a future version.
- The bulk edit objects endpoint now supports `all` and `filters` parameters to avoid having to send
large lists of object IDs for operations affecting many objects.
- The legacy `title_content` document search parameter is deprecated and will be removed in a future version.
Clients should use `text` for simple title-and-content search and `title_search` for title-only search.
+19
View File
@@ -1,5 +1,24 @@
# v3 Migration Guide
## Secret Key is Now Required
The `PAPERLESS_SECRET_KEY` environment variable is now required. This is a critical security setting used for cryptographic signing and should be set to a long, random value.
### Action Required
If you are upgrading an existing installation, you must now set `PAPERLESS_SECRET_KEY` explicitly.
If your installation was relying on the previous built-in default key, you have two options:
- Set `PAPERLESS_SECRET_KEY` to that previous value to preserve existing sessions and tokens.
- Set `PAPERLESS_SECRET_KEY` to a new random value to improve security, understanding that this will invalidate existing sessions and other signed tokens.
For new installations, or if you choose to rotate the key, you may generate a new secret key with:
```bash
python3 -c "import secrets; print(secrets.token_urlsafe(64))"
```
## Consumer Settings Changes
The v3 consumer command uses a [different library](https://watchfiles.helpmanual.io/) to unify
@@ -49,11 +49,11 @@ test('text filtering', async ({ page }) => {
await page.getByRole('main').getByRole('combobox').click()
await page.getByRole('main').getByRole('combobox').fill('test')
await expect(page.locator('pngx-document-list')).toHaveText(/32 documents/)
await expect(page).toHaveURL(/title_content=test/)
await expect(page).toHaveURL(/text=test/)
await page.getByRole('button', { name: 'Title & content' }).click()
await page.getByRole('button', { name: 'Title', exact: true }).click()
await expect(page.locator('pngx-document-list')).toHaveText(/9 documents/)
await expect(page).toHaveURL(/title__icontains=test/)
await expect(page).toHaveURL(/title_search=test/)
await page.getByRole('button', { name: 'Title', exact: true }).click()
await page.getByRole('button', { name: 'Advanced search' }).click()
await expect(page).toHaveURL(/query=test/)
@@ -3545,7 +3545,7 @@
"time": 1.091,
"request": {
"method": "GET",
"url": "http://localhost:8000/api/documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true&title_content=test",
"url": "http://localhost:8000/api/documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true&text=test",
"httpVersion": "HTTP/1.1",
"cookies": [],
"headers": [
@@ -3579,7 +3579,7 @@
"value": "true"
},
{
"name": "title_content",
"name": "text",
"value": "test"
}
],
@@ -4303,7 +4303,7 @@
"time": 0.603,
"request": {
"method": "GET",
"url": "http://localhost:8000/api/documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true&title__icontains=test",
"url": "http://localhost:8000/api/documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true&title_search=test",
"httpVersion": "HTTP/1.1",
"cookies": [],
"headers": [
@@ -4337,7 +4337,7 @@
"value": "true"
},
{
"name": "title__icontains",
"name": "title_search",
"value": "test"
}
],
+39 -29
View File
@@ -1081,7 +1081,7 @@
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-list/filter-editor/filter-editor.component.ts</context>
<context context-type="linenumber">205</context>
<context context-type="linenumber">203</context>
</context-group>
</trans-unit>
<trans-unit id="8901931207592071833" datatype="html">
@@ -3027,10 +3027,6 @@
<context context-type="sourcefile">src/app/components/document-list/filter-editor/filter-editor.component.html</context>
<context context-type="linenumber">84</context>
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-list/filter-editor/filter-editor.component.ts</context>
<context context-type="linenumber">200</context>
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/manage/document-attributes/document-attributes.component.ts</context>
<context context-type="linenumber">129</context>
@@ -7504,7 +7500,7 @@
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-list/filter-editor/filter-editor.component.ts</context>
<context context-type="linenumber">192</context>
<context context-type="linenumber">194</context>
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/data/document.ts</context>
@@ -8817,7 +8813,7 @@
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-list/filter-editor/filter-editor.component.ts</context>
<context context-type="linenumber">197</context>
<context context-type="linenumber">199</context>
</context-group>
<context-group purpose="location">
<context context-type="sourcefile">src/app/data/document.ts</context>
@@ -9020,56 +9016,63 @@
<source>Title &amp; content</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-list/filter-editor/filter-editor.component.ts</context>
<context context-type="linenumber">195</context>
<context context-type="linenumber">197</context>
</context-group>
</trans-unit>
<trans-unit id="7408932238599462499" datatype="html">
<source>File type</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-list/filter-editor/filter-editor.component.ts</context>
<context context-type="linenumber">202</context>
<context context-type="linenumber">200</context>
</context-group>
</trans-unit>
<trans-unit id="8492379284173601938" datatype="html">
<source>Custom fields (Deprecated)</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-list/filter-editor/filter-editor.component.ts</context>
<context context-type="linenumber">210</context>
</context-group>
</trans-unit>
<trans-unit id="2649431021108393503" datatype="html">
<source>More like</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-list/filter-editor/filter-editor.component.ts</context>
<context context-type="linenumber">211</context>
<context context-type="linenumber">215</context>
</context-group>
</trans-unit>
<trans-unit id="3697582909018473071" datatype="html">
<source>equals</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-list/filter-editor/filter-editor.component.ts</context>
<context context-type="linenumber">217</context>
<context context-type="linenumber">221</context>
</context-group>
</trans-unit>
<trans-unit id="5325481293405718739" datatype="html">
<source>is empty</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-list/filter-editor/filter-editor.component.ts</context>
<context context-type="linenumber">221</context>
<context context-type="linenumber">225</context>
</context-group>
</trans-unit>
<trans-unit id="6166785695326182482" datatype="html">
<source>is not empty</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-list/filter-editor/filter-editor.component.ts</context>
<context context-type="linenumber">225</context>
<context context-type="linenumber">229</context>
</context-group>
</trans-unit>
<trans-unit id="4686622206659266699" datatype="html">
<source>greater than</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-list/filter-editor/filter-editor.component.ts</context>
<context context-type="linenumber">229</context>
<context context-type="linenumber">233</context>
</context-group>
</trans-unit>
<trans-unit id="8014012170270529279" datatype="html">
<source>less than</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-list/filter-editor/filter-editor.component.ts</context>
<context context-type="linenumber">233</context>
<context context-type="linenumber">237</context>
</context-group>
</trans-unit>
<trans-unit id="5195932016807797291" datatype="html">
@@ -9078,14 +9081,14 @@
)?.name"/></source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-list/filter-editor/filter-editor.component.ts</context>
<context context-type="linenumber">274,278</context>
<context context-type="linenumber">278,282</context>
</context-group>
</trans-unit>
<trans-unit id="8170755470576301659" datatype="html">
<source>Without correspondent</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-list/filter-editor/filter-editor.component.ts</context>
<context context-type="linenumber">280</context>
<context context-type="linenumber">284</context>
</context-group>
</trans-unit>
<trans-unit id="317796810569008208" datatype="html">
@@ -9094,14 +9097,14 @@
)?.name"/></source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-list/filter-editor/filter-editor.component.ts</context>
<context context-type="linenumber">286,290</context>
<context context-type="linenumber">290,294</context>
</context-group>
</trans-unit>
<trans-unit id="4362173610367509215" datatype="html">
<source>Without document type</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-list/filter-editor/filter-editor.component.ts</context>
<context context-type="linenumber">292</context>
<context context-type="linenumber">296</context>
</context-group>
</trans-unit>
<trans-unit id="232202047340644471" datatype="html">
@@ -9110,70 +9113,77 @@
)?.name"/></source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-list/filter-editor/filter-editor.component.ts</context>
<context context-type="linenumber">298,302</context>
<context context-type="linenumber">302,306</context>
</context-group>
</trans-unit>
<trans-unit id="1562820715074533164" datatype="html">
<source>Without storage path</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-list/filter-editor/filter-editor.component.ts</context>
<context context-type="linenumber">304</context>
<context context-type="linenumber">308</context>
</context-group>
</trans-unit>
<trans-unit id="8180755793012580465" datatype="html">
<source>Tag: <x id="PH" equiv-text="this.tagSelectionModel.items.find((t) =&gt; t.id == +rule.value)?.name"/></source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-list/filter-editor/filter-editor.component.ts</context>
<context context-type="linenumber">308,310</context>
<context context-type="linenumber">312,314</context>
</context-group>
</trans-unit>
<trans-unit id="6494566478302448576" datatype="html">
<source>Without any tag</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-list/filter-editor/filter-editor.component.ts</context>
<context context-type="linenumber">314</context>
<context context-type="linenumber">318</context>
</context-group>
</trans-unit>
<trans-unit id="8644099678903817943" datatype="html">
<source>Custom fields query</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-list/filter-editor/filter-editor.component.ts</context>
<context context-type="linenumber">318</context>
<context context-type="linenumber">322</context>
</context-group>
</trans-unit>
<trans-unit id="6523384805359286307" datatype="html">
<source>Title: <x id="PH" equiv-text="rule.value"/></source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-list/filter-editor/filter-editor.component.ts</context>
<context context-type="linenumber">321</context>
<context context-type="linenumber">326</context>
</context-group>
</trans-unit>
<trans-unit id="2578442194354349017" datatype="html">
<source>Title &amp; content: <x id="PH" equiv-text="rule.value"/></source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-list/filter-editor/filter-editor.component.ts</context>
<context context-type="linenumber">330</context>
</context-group>
</trans-unit>
<trans-unit id="1872523635812236432" datatype="html">
<source>ASN: <x id="PH" equiv-text="rule.value"/></source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-list/filter-editor/filter-editor.component.ts</context>
<context context-type="linenumber">324</context>
<context context-type="linenumber">333</context>
</context-group>
</trans-unit>
<trans-unit id="102674688969746976" datatype="html">
<source>Owner: <x id="PH" equiv-text="rule.value"/></source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-list/filter-editor/filter-editor.component.ts</context>
<context context-type="linenumber">327</context>
<context context-type="linenumber">336</context>
</context-group>
</trans-unit>
<trans-unit id="3550877650686009106" datatype="html">
<source>Owner not in: <x id="PH" equiv-text="rule.value"/></source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-list/filter-editor/filter-editor.component.ts</context>
<context context-type="linenumber">330</context>
<context context-type="linenumber">339</context>
</context-group>
</trans-unit>
<trans-unit id="1082034558646673343" datatype="html">
<source>Without an owner</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-list/filter-editor/filter-editor.component.ts</context>
<context context-type="linenumber">333</context>
<context context-type="linenumber">342</context>
</context-group>
</trans-unit>
<trans-unit id="7210076240260527720" datatype="html">
@@ -24,7 +24,7 @@ import {
FILTER_HAS_DOCUMENT_TYPE_ANY,
FILTER_HAS_STORAGE_PATH_ANY,
FILTER_HAS_TAGS_ALL,
FILTER_TITLE_CONTENT,
FILTER_SIMPLE_TEXT,
} from 'src/app/data/filter-rule-type'
import { GlobalSearchType, SETTINGS_KEYS } from 'src/app/data/ui-settings'
import { DocumentListViewService } from 'src/app/services/document-list-view.service'
@@ -545,7 +545,7 @@ describe('GlobalSearchComponent', () => {
component.query = 'test'
component.runFullSearch()
expect(qfSpy).toHaveBeenCalledWith([
{ rule_type: FILTER_TITLE_CONTENT, value: 'test' },
{ rule_type: FILTER_SIMPLE_TEXT, value: 'test' },
])
settingsService.set(
@@ -25,7 +25,7 @@ import {
FILTER_HAS_DOCUMENT_TYPE_ANY,
FILTER_HAS_STORAGE_PATH_ANY,
FILTER_HAS_TAGS_ALL,
FILTER_TITLE_CONTENT,
FILTER_SIMPLE_TEXT,
} from 'src/app/data/filter-rule-type'
import { ObjectWithId } from 'src/app/data/object-with-id'
import { GlobalSearchType, SETTINGS_KEYS } from 'src/app/data/ui-settings'
@@ -410,7 +410,7 @@ export class GlobalSearchComponent implements OnInit {
public runFullSearch() {
const ruleType = this.useAdvancedForFullSearch
? FILTER_FULLTEXT_QUERY
: FILTER_TITLE_CONTENT
: FILTER_SIMPLE_TEXT
this.documentService.searchQuery = this.useAdvancedForFullSearch
? this.query
: ''
@@ -4,7 +4,7 @@ import { ComponentFixture, TestBed } from '@angular/core/testing'
import { By } from '@angular/platform-browser'
import { NgbAccordionButton, NgbActiveModal } from '@ng-bootstrap/ng-bootstrap'
import { of, throwError } from 'rxjs'
import { FILTER_TITLE } from 'src/app/data/filter-rule-type'
import { FILTER_SIMPLE_TITLE } from 'src/app/data/filter-rule-type'
import { DocumentService } from 'src/app/services/rest/document.service'
import { StoragePathService } from 'src/app/services/rest/storage-path.service'
import { SettingsService } from 'src/app/services/settings.service'
@@ -105,7 +105,7 @@ describe('StoragePathEditDialogComponent', () => {
null,
'created',
true,
[{ rule_type: FILTER_TITLE, value: 'bar' }],
[{ rule_type: FILTER_SIMPLE_TITLE, value: 'bar' }],
{ truncate_content: true }
)
listSpy.mockReturnValueOnce(
@@ -23,7 +23,7 @@ import {
} from 'rxjs'
import { EditDialogComponent } from 'src/app/components/common/edit-dialog/edit-dialog.component'
import { Document } from 'src/app/data/document'
import { FILTER_TITLE } from 'src/app/data/filter-rule-type'
import { FILTER_SIMPLE_TITLE } from 'src/app/data/filter-rule-type'
import { DEFAULT_MATCHING_ALGORITHM } from 'src/app/data/matching-model'
import { StoragePath } from 'src/app/data/storage-path'
import { IfOwnerDirective } from 'src/app/directives/if-owner.directive'
@@ -146,7 +146,7 @@ export class StoragePathEditDialogComponent
null,
'created',
true,
[{ rule_type: FILTER_TITLE, value: title }],
[{ rule_type: FILTER_SIMPLE_TITLE, value: title }],
{ truncate_content: true }
)
.pipe(
@@ -3,7 +3,7 @@ import { provideHttpClientTesting } from '@angular/common/http/testing'
import { ComponentFixture, TestBed } from '@angular/core/testing'
import { NG_VALUE_ACCESSOR } from '@angular/forms'
import { of, throwError } from 'rxjs'
import { FILTER_TITLE } from 'src/app/data/filter-rule-type'
import { FILTER_SIMPLE_TITLE } from 'src/app/data/filter-rule-type'
import { DocumentService } from 'src/app/services/rest/document.service'
import { DocumentLinkComponent } from './document-link.component'
@@ -99,7 +99,7 @@ describe('DocumentLinkComponent', () => {
null,
'created',
true,
[{ rule_type: FILTER_TITLE, value: 'bar' }],
[{ rule_type: FILTER_SIMPLE_TITLE, value: 'bar' }],
{ truncate_content: true }
)
listSpy.mockReturnValueOnce(throwError(() => new Error()))
@@ -28,7 +28,7 @@ import {
tap,
} from 'rxjs'
import { Document } from 'src/app/data/document'
import { FILTER_TITLE } from 'src/app/data/filter-rule-type'
import { FILTER_SIMPLE_TITLE } from 'src/app/data/filter-rule-type'
import { CustomDatePipe } from 'src/app/pipes/custom-date.pipe'
import { DocumentService } from 'src/app/services/rest/document.service'
import { AbstractInputComponent } from '../abstract-input'
@@ -121,7 +121,7 @@ export class DocumentLinkComponent
null,
'created',
true,
[{ rule_type: FILTER_TITLE, value: title }],
[{ rule_type: FILTER_SIMPLE_TITLE, value: title }],
{ truncate_content: true }
)
.pipe(
@@ -428,7 +428,7 @@ describe('BulkEditorComponent', () => {
req.flush(true)
expect(req.request.body).toEqual({
all: true,
filters: { title__icontains: 'apple' },
filters: { title_search: 'apple' },
method: 'modify_tags',
parameters: { add_tags: [101], remove_tags: [] },
})
@@ -67,6 +67,8 @@ import {
FILTER_OWNER_DOES_NOT_INCLUDE,
FILTER_OWNER_ISNULL,
FILTER_SHARED_BY_USER,
FILTER_SIMPLE_TEXT,
FILTER_SIMPLE_TITLE,
FILTER_STORAGE_PATH,
FILTER_TITLE,
FILTER_TITLE_CONTENT,
@@ -312,7 +314,7 @@ describe('FilterEditorComponent', () => {
expect(component.textFilter).toEqual(null)
component.filterRules = [
{
rule_type: FILTER_TITLE_CONTENT,
rule_type: FILTER_SIMPLE_TEXT,
value: 'foo',
},
]
@@ -320,6 +322,18 @@ describe('FilterEditorComponent', () => {
expect(component.textFilterTarget).toEqual('title-content') // TEXT_FILTER_TARGET_TITLE_CONTENT
}))
it('should ingest legacy text filter rules for doc title + content', fakeAsync(() => {
expect(component.textFilter).toEqual(null)
component.filterRules = [
{
rule_type: FILTER_TITLE_CONTENT,
value: 'legacy foo',
},
]
expect(component.textFilter).toEqual('legacy foo')
expect(component.textFilterTarget).toEqual('title-content') // TEXT_FILTER_TARGET_TITLE_CONTENT
}))
it('should ingest text filter rules for doc asn', fakeAsync(() => {
expect(component.textFilter).toEqual(null)
component.filterRules = [
@@ -1117,7 +1131,7 @@ describe('FilterEditorComponent', () => {
expect(component.textFilter).toEqual('foo')
expect(component.filterRules).toEqual([
{
rule_type: FILTER_TITLE_CONTENT,
rule_type: FILTER_SIMPLE_TEXT,
value: 'foo',
},
])
@@ -1136,7 +1150,7 @@ describe('FilterEditorComponent', () => {
expect(component.textFilterTarget).toEqual('title')
expect(component.filterRules).toEqual([
{
rule_type: FILTER_TITLE,
rule_type: FILTER_SIMPLE_TITLE,
value: 'foo',
},
])
@@ -1250,30 +1264,12 @@ describe('FilterEditorComponent', () => {
])
}))
it('should convert user input to correct filter rules on custom fields query', fakeAsync(() => {
component.textFilterInput.nativeElement.value = 'foo'
component.textFilterInput.nativeElement.dispatchEvent(new Event('input'))
const textFieldTargetDropdown = fixture.debugElement.queryAll(
By.directive(NgbDropdownItem)
)[3]
textFieldTargetDropdown.triggerEventHandler('click') // TEXT_FILTER_TARGET_CUSTOM_FIELDS
fixture.detectChanges()
tick(400)
expect(component.textFilterTarget).toEqual('custom-fields')
expect(component.filterRules).toEqual([
{
rule_type: FILTER_CUSTOM_FIELDS_TEXT,
value: 'foo',
},
])
}))
it('should convert user input to correct filter rules on mime type', fakeAsync(() => {
component.textFilterInput.nativeElement.value = 'pdf'
component.textFilterInput.nativeElement.dispatchEvent(new Event('input'))
const textFieldTargetDropdown = fixture.debugElement.queryAll(
By.directive(NgbDropdownItem)
)[4]
)[3]
textFieldTargetDropdown.triggerEventHandler('click') // TEXT_FILTER_TARGET_MIME_TYPE
fixture.detectChanges()
tick(400)
@@ -1291,8 +1287,8 @@ describe('FilterEditorComponent', () => {
component.textFilterInput.nativeElement.dispatchEvent(new Event('input'))
const textFieldTargetDropdown = fixture.debugElement.queryAll(
By.directive(NgbDropdownItem)
)[5]
textFieldTargetDropdown.triggerEventHandler('click') // TEXT_FILTER_TARGET_ASN
)[4]
textFieldTargetDropdown.triggerEventHandler('click') // TEXT_FILTER_TARGET_FULLTEXT_QUERY
fixture.detectChanges()
tick(400)
expect(component.textFilterTarget).toEqual('fulltext-query')
@@ -1696,12 +1692,56 @@ describe('FilterEditorComponent', () => {
])
}))
it('should convert legacy title filters into full text query when adding a created relative date', fakeAsync(() => {
component.filterRules = [
{
rule_type: FILTER_TITLE,
value: 'foo',
},
]
const dateCreatedDropdown = fixture.debugElement.queryAll(
By.directive(DatesDropdownComponent)
)[0]
component.dateCreatedRelativeDate = RelativeDate.WITHIN_1_WEEK
dateCreatedDropdown.triggerEventHandler('datesSet')
fixture.detectChanges()
tick(400)
expect(component.filterRules).toEqual([
{
rule_type: FILTER_FULLTEXT_QUERY,
value: 'foo,created:[-1 week to now]',
},
])
}))
it('should convert simple title filters into full text query when adding a created relative date', fakeAsync(() => {
component.filterRules = [
{
rule_type: FILTER_SIMPLE_TITLE,
value: 'foo',
},
]
const dateCreatedDropdown = fixture.debugElement.queryAll(
By.directive(DatesDropdownComponent)
)[0]
component.dateCreatedRelativeDate = RelativeDate.WITHIN_1_WEEK
dateCreatedDropdown.triggerEventHandler('datesSet')
fixture.detectChanges()
tick(400)
expect(component.filterRules).toEqual([
{
rule_type: FILTER_FULLTEXT_QUERY,
value: 'foo,created:[-1 week to now]',
},
])
}))
it('should leave relative dates not in quick list intact', fakeAsync(() => {
component.textFilterInput.nativeElement.value = 'created:[-2 week to now]'
component.textFilterInput.nativeElement.dispatchEvent(new Event('input'))
const textFieldTargetDropdown = fixture.debugElement.queryAll(
By.directive(NgbDropdownItem)
)[5]
)[4]
textFieldTargetDropdown.triggerEventHandler('click')
fixture.detectChanges()
tick(400)
@@ -2031,12 +2071,30 @@ describe('FilterEditorComponent', () => {
component.filterRules = [
{
rule_type: FILTER_TITLE,
rule_type: FILTER_SIMPLE_TITLE,
value: 'foo',
},
]
expect(component.generateFilterName()).toEqual('Title: foo')
component.filterRules = [
{
rule_type: FILTER_TITLE_CONTENT,
value: 'legacy foo',
},
]
expect(component.generateFilterName()).toEqual(
'Title & content: legacy foo'
)
component.filterRules = [
{
rule_type: FILTER_SIMPLE_TEXT,
value: 'foo',
},
]
expect(component.generateFilterName()).toEqual('Title & content: foo')
component.filterRules = [
{
rule_type: FILTER_ASN,
@@ -2156,6 +2214,36 @@ describe('FilterEditorComponent', () => {
})
})
it('should hide deprecated custom fields target from default text filter targets', () => {
expect(component.textFilterTargets).not.toContainEqual({
id: 'custom-fields',
name: $localize`Custom fields (Deprecated)`,
})
})
it('should keep deprecated custom fields target available for legacy filters', fakeAsync(() => {
component.filterRules = [
{
rule_type: FILTER_CUSTOM_FIELDS_TEXT,
value: 'foo',
},
]
fixture.detectChanges()
tick()
expect(component.textFilterTarget).toEqual('custom-fields')
expect(component.textFilterTargets).toContainEqual({
id: 'custom-fields',
name: $localize`Custom fields (Deprecated)`,
})
expect(component.filterRules).toEqual([
{
rule_type: FILTER_CUSTOM_FIELDS_TEXT,
value: 'foo',
},
])
}))
it('should call autocomplete endpoint on input', fakeAsync(() => {
component.textFilterTarget = 'fulltext-query' // TEXT_FILTER_TARGET_FULLTEXT_QUERY
const autocompleteSpy = jest.spyOn(searchService, 'autocomplete')
@@ -71,6 +71,8 @@ import {
FILTER_OWNER_DOES_NOT_INCLUDE,
FILTER_OWNER_ISNULL,
FILTER_SHARED_BY_USER,
FILTER_SIMPLE_TEXT,
FILTER_SIMPLE_TITLE,
FILTER_STORAGE_PATH,
FILTER_TITLE,
FILTER_TITLE_CONTENT,
@@ -195,10 +197,6 @@ const DEFAULT_TEXT_FILTER_TARGET_OPTIONS = [
name: $localize`Title & content`,
},
{ id: TEXT_FILTER_TARGET_ASN, name: $localize`ASN` },
{
id: TEXT_FILTER_TARGET_CUSTOM_FIELDS,
name: $localize`Custom fields`,
},
{ id: TEXT_FILTER_TARGET_MIME_TYPE, name: $localize`File type` },
{
id: TEXT_FILTER_TARGET_FULLTEXT_QUERY,
@@ -206,6 +204,12 @@ const DEFAULT_TEXT_FILTER_TARGET_OPTIONS = [
},
]
const DEPRECATED_CUSTOM_FIELDS_TEXT_FILTER_TARGET_OPTION = {
// Kept only so legacy saved views can render and be edited away from, remove me eventually
id: TEXT_FILTER_TARGET_CUSTOM_FIELDS,
name: $localize`Custom fields (Deprecated)`,
}
const TEXT_FILTER_TARGET_MORELIKE_OPTION = {
id: TEXT_FILTER_TARGET_FULLTEXT_MORELIKE,
name: $localize`More like`,
@@ -318,8 +322,13 @@ export class FilterEditorComponent
return $localize`Custom fields query`
case FILTER_TITLE:
case FILTER_SIMPLE_TITLE:
return $localize`Title: ${rule.value}`
case FILTER_TITLE_CONTENT:
case FILTER_SIMPLE_TEXT:
return $localize`Title & content: ${rule.value}`
case FILTER_ASN:
return $localize`ASN: ${rule.value}`
@@ -353,12 +362,16 @@ export class FilterEditorComponent
_moreLikeDoc: Document
get textFilterTargets() {
let targets = DEFAULT_TEXT_FILTER_TARGET_OPTIONS
if (this.textFilterTarget == TEXT_FILTER_TARGET_FULLTEXT_MORELIKE) {
return DEFAULT_TEXT_FILTER_TARGET_OPTIONS.concat([
TEXT_FILTER_TARGET_MORELIKE_OPTION,
targets = targets.concat([TEXT_FILTER_TARGET_MORELIKE_OPTION])
}
if (this.textFilterTarget == TEXT_FILTER_TARGET_CUSTOM_FIELDS) {
targets = targets.concat([
DEPRECATED_CUSTOM_FIELDS_TEXT_FILTER_TARGET_OPTION,
])
}
return DEFAULT_TEXT_FILTER_TARGET_OPTIONS
return targets
}
textFilterTarget = TEXT_FILTER_TARGET_TITLE_CONTENT
@@ -437,10 +450,12 @@ export class FilterEditorComponent
value.forEach((rule) => {
switch (rule.rule_type) {
case FILTER_TITLE:
case FILTER_SIMPLE_TITLE:
this._textFilter = rule.value
this.textFilterTarget = TEXT_FILTER_TARGET_TITLE
break
case FILTER_TITLE_CONTENT:
case FILTER_SIMPLE_TEXT:
this._textFilter = rule.value
this.textFilterTarget = TEXT_FILTER_TARGET_TITLE_CONTENT
break
@@ -762,12 +777,15 @@ export class FilterEditorComponent
this.textFilterTarget == TEXT_FILTER_TARGET_TITLE_CONTENT
) {
filterRules.push({
rule_type: FILTER_TITLE_CONTENT,
rule_type: FILTER_SIMPLE_TEXT,
value: this._textFilter.trim(),
})
}
if (this._textFilter && this.textFilterTarget == TEXT_FILTER_TARGET_TITLE) {
filterRules.push({ rule_type: FILTER_TITLE, value: this._textFilter })
filterRules.push({
rule_type: FILTER_SIMPLE_TITLE,
value: this._textFilter,
})
}
if (this.textFilterTarget == TEXT_FILTER_TARGET_ASN) {
if (
@@ -1009,7 +1027,10 @@ export class FilterEditorComponent
) {
existingRule = filterRules.find(
(fr) =>
fr.rule_type == FILTER_TITLE_CONTENT || fr.rule_type == FILTER_TITLE
fr.rule_type == FILTER_TITLE_CONTENT ||
fr.rule_type == FILTER_SIMPLE_TEXT ||
fr.rule_type == FILTER_TITLE ||
fr.rule_type == FILTER_SIMPLE_TITLE
)
existingRule.rule_type = FILTER_FULLTEXT_QUERY
}
+21 -3
View File
@@ -3,7 +3,7 @@ import { DataType } from './datatype'
export const NEGATIVE_NULL_FILTER_VALUE = -1
// These correspond to src/documents/models.py and changes here require a DB migration (and vice versa)
export const FILTER_TITLE = 0
export const FILTER_TITLE = 0 // Deprecated in favor of Tantivy-backed `title_search`. Keep for now for existing saved views
export const FILTER_CONTENT = 1
export const FILTER_ASN = 2
@@ -46,7 +46,9 @@ export const FILTER_ADDED_FROM = 46
export const FILTER_MODIFIED_BEFORE = 15
export const FILTER_MODIFIED_AFTER = 16
export const FILTER_TITLE_CONTENT = 19
export const FILTER_TITLE_CONTENT = 19 // Deprecated in favor of Tantivy-backed `text` filtervar. Keep for now for existing saved views
export const FILTER_SIMPLE_TITLE = 48
export const FILTER_SIMPLE_TEXT = 49
export const FILTER_FULLTEXT_QUERY = 20
export const FILTER_FULLTEXT_MORELIKE = 21
@@ -56,7 +58,7 @@ export const FILTER_OWNER_ISNULL = 34
export const FILTER_OWNER_DOES_NOT_INCLUDE = 35
export const FILTER_SHARED_BY_USER = 37
export const FILTER_CUSTOM_FIELDS_TEXT = 36
export const FILTER_CUSTOM_FIELDS_TEXT = 36 // Deprecated. UI no longer includes CF text-search mode. Keep for now for existing saved views
export const FILTER_HAS_CUSTOM_FIELDS_ALL = 38
export const FILTER_HAS_CUSTOM_FIELDS_ANY = 39
export const FILTER_DOES_NOT_HAVE_CUSTOM_FIELDS = 40
@@ -66,6 +68,9 @@ export const FILTER_CUSTOM_FIELDS_QUERY = 42
export const FILTER_MIME_TYPE = 47
export const SIMPLE_TEXT_PARAMETER = 'text'
export const SIMPLE_TITLE_PARAMETER = 'title_search'
export const FILTER_RULE_TYPES: FilterRuleType[] = [
{
id: FILTER_TITLE,
@@ -74,6 +79,13 @@ export const FILTER_RULE_TYPES: FilterRuleType[] = [
multi: false,
default: '',
},
{
id: FILTER_SIMPLE_TITLE,
filtervar: SIMPLE_TITLE_PARAMETER,
datatype: 'string',
multi: false,
default: '',
},
{
id: FILTER_CONTENT,
filtervar: 'content__icontains',
@@ -279,6 +291,12 @@ export const FILTER_RULE_TYPES: FilterRuleType[] = [
datatype: 'string',
multi: false,
},
{
id: FILTER_SIMPLE_TEXT,
filtervar: SIMPLE_TEXT_PARAMETER,
datatype: 'string',
multi: false,
},
{
id: FILTER_FULLTEXT_QUERY,
filtervar: 'query',
@@ -10,7 +10,7 @@ import {
DOCUMENT_SORT_FIELDS,
DOCUMENT_SORT_FIELDS_FULLTEXT,
} from 'src/app/data/document'
import { FILTER_TITLE } from 'src/app/data/filter-rule-type'
import { FILTER_SIMPLE_TITLE } from 'src/app/data/filter-rule-type'
import { SETTINGS_KEYS } from 'src/app/data/ui-settings'
import { environment } from 'src/environments/environment'
import { PermissionsService } from '../permissions.service'
@@ -138,13 +138,13 @@ describe(`DocumentService`, () => {
subscription = service
.listAllFilteredIds([
{
rule_type: FILTER_TITLE,
rule_type: FILTER_SIMPLE_TITLE,
value: 'apple',
},
])
.subscribe()
const req = httpTestingController.expectOne(
`${environment.apiBaseUrl}${endpoint}/?page=1&page_size=100000&fields=id&title__icontains=apple`
`${environment.apiBaseUrl}${endpoint}/?page=1&page_size=100000&fields=id&title_search=apple`
)
expect(req.request.method).toEqual('GET')
})
+48
View File
@@ -8,6 +8,10 @@ import {
FILTER_HAS_CUSTOM_FIELDS_ALL,
FILTER_HAS_CUSTOM_FIELDS_ANY,
FILTER_HAS_TAGS_ALL,
FILTER_SIMPLE_TEXT,
FILTER_SIMPLE_TITLE,
FILTER_TITLE,
FILTER_TITLE_CONTENT,
NEGATIVE_NULL_FILTER_VALUE,
} from '../data/filter-rule-type'
import {
@@ -128,6 +132,26 @@ describe('QueryParams Utils', () => {
is_tagged: 0,
})
params = queryParamsFromFilterRules([
{
rule_type: FILTER_TITLE_CONTENT,
value: 'bank statement',
},
])
expect(params).toEqual({
text: 'bank statement',
})
params = queryParamsFromFilterRules([
{
rule_type: FILTER_TITLE,
value: 'invoice',
},
])
expect(params).toEqual({
title_search: 'invoice',
})
params = queryParamsFromFilterRules([
{
rule_type: FILTER_HAS_TAGS_ALL,
@@ -148,6 +172,30 @@ describe('QueryParams Utils', () => {
it('should convert filter rules to query params', () => {
let rules = filterRulesFromQueryParams(
convertToParamMap({
text: 'bank statement',
})
)
expect(rules).toEqual([
{
rule_type: FILTER_SIMPLE_TEXT,
value: 'bank statement',
},
])
rules = filterRulesFromQueryParams(
convertToParamMap({
title_search: 'invoice',
})
)
expect(rules).toEqual([
{
rule_type: FILTER_SIMPLE_TITLE,
value: 'invoice',
},
])
rules = filterRulesFromQueryParams(
convertToParamMap({
tags__id__all,
})
+19 -2
View File
@@ -9,8 +9,14 @@ import {
FILTER_HAS_CUSTOM_FIELDS_ALL,
FILTER_HAS_CUSTOM_FIELDS_ANY,
FILTER_RULE_TYPES,
FILTER_SIMPLE_TEXT,
FILTER_SIMPLE_TITLE,
FILTER_TITLE,
FILTER_TITLE_CONTENT,
FilterRuleType,
NEGATIVE_NULL_FILTER_VALUE,
SIMPLE_TEXT_PARAMETER,
SIMPLE_TITLE_PARAMETER,
} from '../data/filter-rule-type'
import { ListViewState } from '../services/document-list-view.service'
@@ -97,6 +103,8 @@ export function transformLegacyFilterRules(
export function filterRulesFromQueryParams(
queryParams: ParamMap
): FilterRule[] {
let filterRulesFromQueryParams: FilterRule[] = []
const allFilterRuleQueryParams: string[] = FILTER_RULE_TYPES.map(
(rt) => rt.filtervar
)
@@ -104,7 +112,6 @@ export function filterRulesFromQueryParams(
.filter((rt) => rt !== undefined)
// transform query params to filter rules
let filterRulesFromQueryParams: FilterRule[] = []
allFilterRuleQueryParams
.filter((frqp) => queryParams.has(frqp))
.forEach((filterQueryParamName) => {
@@ -146,7 +153,17 @@ export function queryParamsFromFilterRules(filterRules: FilterRule[]): Params {
let params = {}
for (let rule of filterRules) {
let ruleType = FILTER_RULE_TYPES.find((t) => t.id == rule.rule_type)
if (ruleType.isnull_filtervar && rule.value == null) {
if (
rule.rule_type === FILTER_TITLE_CONTENT ||
rule.rule_type === FILTER_SIMPLE_TEXT
) {
params[SIMPLE_TEXT_PARAMETER] = rule.value
} else if (
rule.rule_type === FILTER_TITLE ||
rule.rule_type === FILTER_SIMPLE_TITLE
) {
params[SIMPLE_TITLE_PARAMETER] = rule.value
} else if (ruleType.isnull_filtervar && rule.value == null) {
params[ruleType.isnull_filtervar] = 1
} else if (
ruleType.isnull_filtervar &&
+6 -8
View File
@@ -213,14 +213,12 @@ class ConsumerPluginMixin:
message,
current_progress,
max_progress,
extra_args={
"document_id": document_id,
"owner_id": self.metadata.owner_id if self.metadata.owner_id else None,
"users_can_view": (self.metadata.view_users or [])
+ (self.metadata.change_users or []),
"groups_can_view": (self.metadata.view_groups or [])
+ (self.metadata.change_groups or []),
},
document_id=document_id,
owner_id=self.metadata.owner_id if self.metadata.owner_id else None,
users_can_view=(self.metadata.view_users or [])
+ (self.metadata.change_users or []),
groups_can_view=(self.metadata.view_groups or [])
+ (self.metadata.change_groups or []),
)
def _fail(
+12
View File
@@ -3,6 +3,7 @@ from __future__ import annotations
import functools
import inspect
import json
import logging
import operator
from contextlib import contextmanager
from typing import TYPE_CHECKING
@@ -77,6 +78,8 @@ DATETIME_KWARGS = [
CUSTOM_FIELD_QUERY_MAX_DEPTH = 10
CUSTOM_FIELD_QUERY_MAX_ATOMS = 20
logger = logging.getLogger("paperless.api")
class CorrespondentFilterSet(FilterSet):
class Meta:
@@ -162,9 +165,13 @@ class InboxFilter(Filter):
@extend_schema_field(serializers.CharField)
class TitleContentFilter(Filter):
# Deprecated but retained for existing saved views. UI uses Tantivy-backed `text` / `title_search` params.
def filter(self, qs: Any, value: Any) -> Any:
value = value.strip() if isinstance(value, str) else value
if value:
logger.warning(
"Deprecated document filter parameter 'title_content' used; use `text` instead.",
)
try:
return qs.filter(
Q(title__icontains=value) | Q(effective_content__icontains=value),
@@ -243,6 +250,9 @@ class CustomFieldsFilter(Filter):
def filter(self, qs, value):
value = value.strip() if isinstance(value, str) else value
if value:
logger.warning(
"Deprecated document filter parameter 'custom_fields__icontains' used; use `custom_field_query` or advanced Tantivy field syntax instead.",
)
fields_with_matching_selects = CustomField.objects.filter(
extra_data__icontains=value,
)
@@ -747,6 +757,7 @@ class DocumentFilterSet(FilterSet):
is_in_inbox = InboxFilter()
# Deprecated, but keep for now for existing saved views
title_content = TitleContentFilter()
content__istartswith = EffectiveContentFilter(lookup_expr="istartswith")
@@ -756,6 +767,7 @@ class DocumentFilterSet(FilterSet):
owner__id__none = ObjectFilter(field_name="owner", exclude=True)
# Deprecated, UI no longer includes CF text-search mode, but keep for now for existing saved views
custom_fields__icontains = CustomFieldsFilter()
custom_fields__id__all = ObjectFilter(field_name="custom_fields__field")
@@ -45,6 +45,8 @@ from documents.models import DocumentType
from documents.models import Note
from documents.models import SavedView
from documents.models import SavedViewFilterRule
from documents.models import ShareLink
from documents.models import ShareLinkBundle
from documents.models import StoragePath
from documents.models import Tag
from documents.models import UiSettings
@@ -55,6 +57,7 @@ from documents.models import WorkflowActionWebhook
from documents.models import WorkflowTrigger
from documents.settings import EXPORTER_ARCHIVE_NAME
from documents.settings import EXPORTER_FILE_NAME
from documents.settings import EXPORTER_SHARE_LINK_BUNDLE_NAME
from documents.settings import EXPORTER_THUMBNAIL_NAME
from documents.utils import compute_checksum
from documents.utils import copy_file_with_basic_stats
@@ -389,6 +392,8 @@ class Command(CryptMixin, PaperlessCommand):
"app_configs": ApplicationConfiguration.objects.all(),
"notes": Note.global_objects.all(),
"documents": Document.global_objects.order_by("id").all(),
"share_links": ShareLink.global_objects.all(),
"share_link_bundles": ShareLinkBundle.objects.order_by("id").all(),
"social_accounts": SocialAccount.objects.all(),
"social_apps": SocialApp.objects.all(),
"social_tokens": SocialToken.objects.all(),
@@ -409,6 +414,7 @@ class Command(CryptMixin, PaperlessCommand):
)
document_manifest: list[dict] = []
share_link_bundle_manifest: list[dict] = []
manifest_path = (self.target / "manifest.json").resolve()
with StreamingManifestWriter(
@@ -427,6 +433,15 @@ class Command(CryptMixin, PaperlessCommand):
for record in batch:
self._encrypt_record_inline(record)
document_manifest.extend(batch)
elif key == "share_link_bundles":
# Accumulate for file-copy loop; written to manifest after
for batch in serialize_queryset_batched(
qs,
batch_size=self.batch_size,
):
for record in batch:
self._encrypt_record_inline(record)
share_link_bundle_manifest.extend(batch)
elif self.split_manifest and key in (
"notes",
"custom_field_instances",
@@ -445,6 +460,12 @@ class Command(CryptMixin, PaperlessCommand):
document_map: dict[int, Document] = {
d.pk: d for d in Document.global_objects.order_by("id")
}
share_link_bundle_map: dict[int, ShareLinkBundle] = {
b.pk: b
for b in ShareLinkBundle.objects.order_by("id").prefetch_related(
"documents",
)
}
# 3. Export files from each document
for index, document_dict in enumerate(
@@ -478,6 +499,19 @@ class Command(CryptMixin, PaperlessCommand):
else:
writer.write_record(document_dict)
for bundle_dict in share_link_bundle_manifest:
bundle = share_link_bundle_map[bundle_dict["pk"]]
bundle_target = self.generate_share_link_bundle_target(
bundle,
bundle_dict,
)
if not self.data_only and bundle_target is not None:
self.copy_share_link_bundle_file(bundle, bundle_target)
writer.write_record(bundle_dict)
# 4.2 write version information to target folder
extra_metadata_path = (self.target / "metadata.json").resolve()
metadata: dict[str, str | int | dict[str, str | int]] = {
@@ -598,6 +632,48 @@ class Command(CryptMixin, PaperlessCommand):
archive_target,
)
def generate_share_link_bundle_target(
self,
bundle: ShareLinkBundle,
bundle_dict: dict,
) -> Path | None:
"""
Generates the export target for a share link bundle file, when present.
"""
if not bundle.file_path:
return None
stored_bundle_path = Path(bundle.file_path)
portable_bundle_path = (
stored_bundle_path
if not stored_bundle_path.is_absolute()
else Path(stored_bundle_path.name)
)
export_bundle_path = Path("share_link_bundles") / portable_bundle_path
bundle_dict["fields"]["file_path"] = portable_bundle_path.as_posix()
bundle_dict[EXPORTER_SHARE_LINK_BUNDLE_NAME] = export_bundle_path.as_posix()
return (self.target / export_bundle_path).resolve()
def copy_share_link_bundle_file(
self,
bundle: ShareLinkBundle,
bundle_target: Path,
) -> None:
"""
Copies a share link bundle ZIP into the export directory.
"""
bundle_source_path = bundle.absolute_file_path
if bundle_source_path is None:
raise FileNotFoundError(f"Share link bundle {bundle.pk} has no file path")
self.check_and_copy(
bundle_source_path,
None,
bundle_target,
)
def _encrypt_record_inline(self, record: dict) -> None:
"""Encrypt sensitive fields in a single record, if passphrase is set."""
if not self.passphrase:
@@ -32,10 +32,12 @@ from documents.models import CustomFieldInstance
from documents.models import Document
from documents.models import DocumentType
from documents.models import Note
from documents.models import ShareLinkBundle
from documents.models import Tag
from documents.settings import EXPORTER_ARCHIVE_NAME
from documents.settings import EXPORTER_CRYPTO_SETTINGS_NAME
from documents.settings import EXPORTER_FILE_NAME
from documents.settings import EXPORTER_SHARE_LINK_BUNDLE_NAME
from documents.settings import EXPORTER_THUMBNAIL_NAME
from documents.signals.handlers import check_paths_and_prune_custom_fields
from documents.signals.handlers import update_filename_and_move_files
@@ -348,18 +350,42 @@ class Command(CryptMixin, PaperlessCommand):
f"Failed to read from archive file {doc_archive_path}",
) from e
def check_share_link_bundle_validity(bundle_record: dict) -> None:
if EXPORTER_SHARE_LINK_BUNDLE_NAME not in bundle_record:
return
bundle_file = bundle_record[EXPORTER_SHARE_LINK_BUNDLE_NAME]
bundle_path: Path = self.source / bundle_file
if not bundle_path.exists():
raise CommandError(
f'The manifest file refers to "{bundle_file}" which does not '
"appear to be in the source directory.",
)
try:
with bundle_path.open(mode="rb"):
pass
except Exception as e:
raise CommandError(
f"Failed to read from share link bundle file {bundle_path}",
) from e
self.stdout.write("Checking the manifest")
for manifest_path in self.manifest_paths:
for record in iter_manifest_records(manifest_path):
# Only check if the document files exist if this is not data only
# We don't care about documents for a data only import
if not self.data_only and record["model"] == "documents.document":
if self.data_only:
continue
if record["model"] == "documents.document":
check_document_validity(record)
elif record["model"] == "documents.sharelinkbundle":
check_share_link_bundle_validity(record)
def _import_files_from_manifest(self) -> None:
settings.ORIGINALS_DIR.mkdir(parents=True, exist_ok=True)
settings.THUMBNAIL_DIR.mkdir(parents=True, exist_ok=True)
settings.ARCHIVE_DIR.mkdir(parents=True, exist_ok=True)
settings.SHARE_LINK_BUNDLE_DIR.mkdir(parents=True, exist_ok=True)
self.stdout.write("Copy files into paperless...")
@@ -374,6 +400,18 @@ class Command(CryptMixin, PaperlessCommand):
for record in iter_manifest_records(manifest_path)
if record["model"] == "documents.document"
]
share_link_bundle_records = [
{
"pk": record["pk"],
EXPORTER_SHARE_LINK_BUNDLE_NAME: record.get(
EXPORTER_SHARE_LINK_BUNDLE_NAME,
),
}
for manifest_path in self.manifest_paths
for record in iter_manifest_records(manifest_path)
if record["model"] == "documents.sharelinkbundle"
and record.get(EXPORTER_SHARE_LINK_BUNDLE_NAME)
]
for record in self.track(document_records, description="Copying files..."):
document = Document.global_objects.get(pk=record["pk"])
@@ -416,6 +454,26 @@ class Command(CryptMixin, PaperlessCommand):
document.save()
for record in self.track(
share_link_bundle_records,
description="Copying share link bundles...",
):
bundle = ShareLinkBundle.objects.get(pk=record["pk"])
bundle_file = record[EXPORTER_SHARE_LINK_BUNDLE_NAME]
bundle_source_path = (self.source / bundle_file).resolve()
bundle_target_path = bundle.absolute_file_path
if bundle_target_path is None:
raise CommandError(
f"Share link bundle {bundle.pk} does not have a valid file path.",
)
with FileLock(settings.MEDIA_LOCK):
bundle_target_path.parent.mkdir(parents=True, exist_ok=True)
copy_file_with_basic_stats(
bundle_source_path,
bundle_target_path,
)
def _decrypt_record_if_needed(self, record: dict) -> dict:
fields = self.CRYPT_FIELDS_BY_MODEL.get(record.get("model", ""))
if fields:
@@ -0,0 +1,92 @@
# Generated by Django 5.2.12 on 2026-04-01 18:20
from django.db import migrations
from django.db import models
OLD_TITLE_RULE = 0
OLD_TITLE_CONTENT_RULE = 19
NEW_SIMPLE_TITLE_RULE = 48
NEW_SIMPLE_TEXT_RULE = 49
# See documents/models.py SavedViewFilterRule
def migrate_saved_view_rules_forward(apps, schema_editor):
SavedViewFilterRule = apps.get_model("documents", "SavedViewFilterRule")
SavedViewFilterRule.objects.filter(rule_type=OLD_TITLE_RULE).update(
rule_type=NEW_SIMPLE_TITLE_RULE,
)
SavedViewFilterRule.objects.filter(rule_type=OLD_TITLE_CONTENT_RULE).update(
rule_type=NEW_SIMPLE_TEXT_RULE,
)
class Migration(migrations.Migration):
dependencies = [
("documents", "0017_migrate_fulltext_query_field_prefixes"),
]
operations = [
migrations.AlterField(
model_name="savedviewfilterrule",
name="rule_type",
field=models.PositiveSmallIntegerField(
choices=[
(0, "title contains"),
(1, "content contains"),
(2, "ASN is"),
(3, "correspondent is"),
(4, "document type is"),
(5, "is in inbox"),
(6, "has tag"),
(7, "has any tag"),
(8, "created before"),
(9, "created after"),
(10, "created year is"),
(11, "created month is"),
(12, "created day is"),
(13, "added before"),
(14, "added after"),
(15, "modified before"),
(16, "modified after"),
(17, "does not have tag"),
(18, "does not have ASN"),
(19, "title or content contains"),
(20, "fulltext query"),
(21, "more like this"),
(22, "has tags in"),
(23, "ASN greater than"),
(24, "ASN less than"),
(25, "storage path is"),
(26, "has correspondent in"),
(27, "does not have correspondent in"),
(28, "has document type in"),
(29, "does not have document type in"),
(30, "has storage path in"),
(31, "does not have storage path in"),
(32, "owner is"),
(33, "has owner in"),
(34, "does not have owner"),
(35, "does not have owner in"),
(36, "has custom field value"),
(37, "is shared by me"),
(38, "has custom fields"),
(39, "has custom field in"),
(40, "does not have custom field in"),
(41, "does not have custom field"),
(42, "custom fields query"),
(43, "created to"),
(44, "created from"),
(45, "added to"),
(46, "added from"),
(47, "mime type is"),
(48, "simple title search"),
(49, "simple text search"),
],
verbose_name="rule type",
),
),
migrations.RunPython(
migrate_saved_view_rules_forward,
migrations.RunPython.noop,
),
]
+2
View File
@@ -623,6 +623,8 @@ class SavedViewFilterRule(models.Model):
(45, _("added to")),
(46, _("added from")),
(47, _("mime type is")),
(48, _("simple title search")),
(49, _("simple text search")),
]
saved_view = models.ForeignKey(
+73 -18
View File
@@ -1,6 +1,9 @@
import enum
from collections.abc import Mapping
from typing import TYPE_CHECKING
from typing import Literal
from typing import Self
from typing import TypeAlias
from typing import TypedDict
from asgiref.sync import async_to_sync
from channels.layers import get_channel_layer
@@ -16,6 +19,59 @@ class ProgressStatusOptions(enum.StrEnum):
FAILED = "FAILED"
class PermissionsData(TypedDict, total=False):
"""Permission fields included in status messages for access control."""
owner_id: int | None
users_can_view: list[int]
groups_can_view: list[int]
class ProgressUpdateData(TypedDict):
filename: str | None
task_id: str | None
current_progress: int
max_progress: int
status: str
message: str
document_id: int | None
owner_id: int | None
users_can_view: list[int]
groups_can_view: list[int]
class StatusUpdatePayload(TypedDict):
type: Literal["status_update"]
data: ProgressUpdateData
class DocumentsDeletedData(TypedDict):
documents: list[int]
class DocumentsDeletedPayload(TypedDict):
type: Literal["documents_deleted"]
data: DocumentsDeletedData
class DocumentUpdatedData(TypedDict):
document_id: int
modified: str
owner_id: int | None
users_can_view: list[int]
groups_can_view: list[int]
class DocumentUpdatedPayload(TypedDict):
type: Literal["document_updated"]
data: DocumentUpdatedData
WebsocketPayload: TypeAlias = (
StatusUpdatePayload | DocumentsDeletedPayload | DocumentUpdatedPayload
)
class BaseStatusManager:
"""
Handles sending of progress information via the channel layer, with proper management
@@ -25,11 +81,11 @@ class BaseStatusManager:
def __init__(self) -> None:
self._channel: RedisPubSubChannelLayer | None = None
def __enter__(self):
def __enter__(self) -> Self:
self.open()
return self
def __exit__(self, exc_type, exc_val, exc_tb):
def __exit__(self, exc_type: object, exc_val: object, exc_tb: object) -> None:
self.close()
def open(self) -> None:
@@ -48,7 +104,7 @@ class BaseStatusManager:
async_to_sync(self._channel.flush)
self._channel = None
def send(self, payload: Mapping[str, object]) -> None:
def send(self, payload: WebsocketPayload) -> None:
# Ensure the layer is open
self.open()
@@ -72,36 +128,36 @@ class ProgressManager(BaseStatusManager):
message: str,
current_progress: int,
max_progress: int,
extra_args: dict[str, str | int | None] | None = None,
*,
document_id: int | None = None,
owner_id: int | None = None,
users_can_view: list[int] | None = None,
groups_can_view: list[int] | None = None,
) -> None:
data: dict[str, object] = {
data: ProgressUpdateData = {
"filename": self.filename,
"task_id": self.task_id,
"current_progress": current_progress,
"max_progress": max_progress,
"status": status,
"message": message,
"document_id": document_id,
"owner_id": owner_id,
"users_can_view": users_can_view or [],
"groups_can_view": groups_can_view or [],
}
if extra_args is not None:
data.update(extra_args)
payload: dict[str, object] = {
"type": "status_update",
"data": data,
}
payload: StatusUpdatePayload = {"type": "status_update", "data": data}
self.send(payload)
class DocumentsStatusManager(BaseStatusManager):
def send_documents_deleted(self, documents: list[int]) -> None:
payload: dict[str, object] = {
payload: DocumentsDeletedPayload = {
"type": "documents_deleted",
"data": {
"documents": documents,
},
}
self.send(payload)
def send_document_updated(
@@ -113,7 +169,7 @@ class DocumentsStatusManager(BaseStatusManager):
users_can_view: list[int] | None = None,
groups_can_view: list[int] | None = None,
) -> None:
payload: dict[str, object] = {
payload: DocumentUpdatedPayload = {
"type": "document_updated",
"data": {
"document_id": document_id,
@@ -123,5 +179,4 @@ class DocumentsStatusManager(BaseStatusManager):
"groups_can_view": groups_can_view or [],
},
}
self.send(payload)
+2
View File
@@ -1,4 +1,5 @@
from documents.search._backend import SearchIndexLockError
from documents.search._backend import SearchMode
from documents.search._backend import SearchResults
from documents.search._backend import TantivyBackend
from documents.search._backend import TantivyRelevanceList
@@ -10,6 +11,7 @@ from documents.search._schema import wipe_index
__all__ = [
"SearchIndexLockError",
"SearchMode",
"SearchResults",
"TantivyBackend",
"TantivyRelevanceList",
+37 -19
View File
@@ -2,11 +2,11 @@ from __future__ import annotations
import logging
import threading
import unicodedata
from collections import Counter
from dataclasses import dataclass
from datetime import UTC
from datetime import datetime
from enum import StrEnum
from typing import TYPE_CHECKING
from typing import Self
from typing import TypedDict
@@ -19,7 +19,10 @@ from django.conf import settings
from django.utils.timezone import get_current_timezone
from guardian.shortcuts import get_users_with_perms
from documents.search._normalize import ascii_fold
from documents.search._query import build_permission_filter
from documents.search._query import parse_simple_text_query
from documents.search._query import parse_simple_title_query
from documents.search._query import parse_user_query
from documents.search._schema import _write_sentinels
from documents.search._schema import build_schema
@@ -45,14 +48,10 @@ _AUTOCOMPLETE_REGEX_TIMEOUT = 1.0 # seconds; guards against ReDoS on untrusted
T = TypeVar("T")
def _ascii_fold(s: str) -> str:
"""
Normalize unicode to ASCII equivalent characters for search consistency.
Converts accented characters (e.g., "café") to their ASCII base forms ("cafe")
to enable cross-language searching without requiring exact diacritic matching.
"""
return unicodedata.normalize("NFD", s).encode("ascii", "ignore").decode()
class SearchMode(StrEnum):
QUERY = "query"
TEXT = "text"
TITLE = "title"
def _extract_autocomplete_words(text_sources: list[str]) -> set[str]:
@@ -74,7 +73,7 @@ def _extract_autocomplete_words(text_sources: list[str]) -> set[str]:
)
continue
for token in tokens:
normalized = _ascii_fold(token.lower())
normalized = ascii_fold(token.lower())
if normalized:
words.add(normalized)
return words
@@ -294,8 +293,10 @@ class TantivyBackend:
doc.add_text("checksum", document.checksum)
doc.add_text("title", document.title)
doc.add_text("title_sort", document.title)
doc.add_text("simple_title", document.title)
doc.add_text("content", content)
doc.add_text("bigram_content", content)
doc.add_text("simple_content", content)
# Original filename - only add if not None/empty
if document.original_filename:
@@ -433,6 +434,7 @@ class TantivyBackend:
sort_field: str | None,
*,
sort_reverse: bool,
search_mode: SearchMode = SearchMode.QUERY,
) -> SearchResults:
"""
Execute a search query against the document index.
@@ -441,20 +443,32 @@ class TantivyBackend:
permission filtering before executing against Tantivy. Supports both
relevance-based and field-based sorting.
QUERY search mode supports natural date keywords, field filters, etc.
TITLE search mode treats the query as plain text to search for in title only
TEXT search mode treats the query as plain text to search for in title and content
Args:
query: User's search query (supports natural date keywords, field filters)
query: User's search query
user: User for permission filtering (None for superuser/no filtering)
page: Page number (1-indexed) for pagination
page_size: Number of results per page
sort_field: Field to sort by (None for relevance ranking)
sort_reverse: Whether to reverse the sort order
search_mode: "query" for advanced Tantivy syntax, "text" for
plain-text search over title and content only, "title" for
plain-text search over title only
Returns:
SearchResults with hits, total count, and processed query
"""
self._ensure_open()
tz = get_current_timezone()
user_query = parse_user_query(self._index, query, tz)
if search_mode is SearchMode.TEXT:
user_query = parse_simple_text_query(self._index, query)
elif search_mode is SearchMode.TITLE:
user_query = parse_simple_title_query(self._index, query)
else:
user_query = parse_user_query(self._index, query, tz)
# Apply permission filter if user is not None (not superuser)
if user is not None:
@@ -518,6 +532,7 @@ class TantivyBackend:
# Build result hits with highlights
hits: list[SearchHit] = []
snippet_generator = None
notes_snippet_generator = None
for rank, (doc_address, score) in enumerate(page_hits, start=offset + 1):
# Get the actual document from the searcher using the doc address
@@ -544,13 +559,16 @@ class TantivyBackend:
# Try notes highlights
if "notes" in doc_dict:
notes_generator = tantivy.SnippetGenerator.create(
searcher,
final_query,
self._schema,
"notes",
if notes_snippet_generator is None:
notes_snippet_generator = tantivy.SnippetGenerator.create(
searcher,
final_query,
self._schema,
"notes",
)
notes_snippet = notes_snippet_generator.snippet_from_doc(
actual_doc,
)
notes_snippet = notes_generator.snippet_from_doc(actual_doc)
if notes_snippet:
highlights["notes"] = str(notes_snippet)
@@ -594,7 +612,7 @@ class TantivyBackend:
List of word suggestions ordered by frequency, then alphabetically
"""
self._ensure_open()
normalized_term = _ascii_fold(term.lower())
normalized_term = ascii_fold(term.lower())
searcher = self._index.searcher()
+8
View File
@@ -0,0 +1,8 @@
from __future__ import annotations
import unicodedata
def ascii_fold(text: str) -> str:
"""Normalize unicode text to ASCII equivalents for search consistency."""
return unicodedata.normalize("NFD", text).encode("ascii", "ignore").decode()
+82
View File
@@ -12,6 +12,8 @@ import tantivy
from dateutil.relativedelta import relativedelta
from django.conf import settings
from documents.search._normalize import ascii_fold
if TYPE_CHECKING:
from datetime import tzinfo
@@ -51,6 +53,7 @@ _WHOOSH_REL_RANGE_RE = regex.compile(
)
# Whoosh-style 8-digit date: field:YYYYMMDD — field-aware so timezone can be applied correctly
_DATE8_RE = regex.compile(r"(?P<field>\w+):(?P<date8>\d{8})\b")
_SIMPLE_QUERY_TOKEN_RE = regex.compile(r"\S+")
def _fmt(dt: datetime) -> str:
@@ -436,7 +439,37 @@ DEFAULT_SEARCH_FIELDS = [
"document_type",
"tag",
]
SIMPLE_SEARCH_FIELDS = ["simple_title", "simple_content"]
TITLE_SEARCH_FIELDS = ["simple_title"]
_FIELD_BOOSTS = {"title": 2.0}
_SIMPLE_FIELD_BOOSTS = {"simple_title": 2.0}
def _build_simple_field_query(
index: tantivy.Index,
field: str,
tokens: list[str],
) -> tantivy.Query:
patterns = []
for idx, token in enumerate(tokens):
escaped = regex.escape(token)
# For multi-token substring search, only the first token can begin mid-word.
# Later tokens follow a whitespace boundary in the original query, so anchor
# them to the start of the next indexed token to reduce false positives like
# matching "Z-Berichte 16" for the query "Z-Berichte 6".
if idx == 0:
patterns.append(f".*{escaped}.*")
else:
patterns.append(f"{escaped}.*")
if len(patterns) == 1:
query = tantivy.Query.regex_query(index.schema, field, patterns[0])
else:
query = tantivy.Query.regex_phrase_query(index.schema, field, patterns)
boost = _SIMPLE_FIELD_BOOSTS.get(field, 1.0)
if boost > 1.0:
return tantivy.Query.boost_query(query, boost)
return query
def parse_user_query(
@@ -495,3 +528,52 @@ def parse_user_query(
)
return exact
def parse_simple_query(
index: tantivy.Index,
raw_query: str,
fields: list[str],
) -> tantivy.Query:
"""
Parse a plain-text query using Tantivy over a restricted field set.
Query string is escaped and normalized to be treated as "simple" text query.
"""
tokens = [
ascii_fold(token.lower())
for token in _SIMPLE_QUERY_TOKEN_RE.findall(raw_query, timeout=_REGEX_TIMEOUT)
]
tokens = [token for token in tokens if token]
if not tokens:
return tantivy.Query.empty_query()
field_queries = [
(tantivy.Occur.Should, _build_simple_field_query(index, field, tokens))
for field in fields
]
if len(field_queries) == 1:
return field_queries[0][1]
return tantivy.Query.boolean_query(field_queries)
def parse_simple_text_query(
index: tantivy.Index,
raw_query: str,
) -> tantivy.Query:
"""
Parse a plain-text query over title/content for simple search inputs.
"""
return parse_simple_query(index, raw_query, SIMPLE_SEARCH_FIELDS)
def parse_simple_title_query(
index: tantivy.Index,
raw_query: str,
) -> tantivy.Query:
"""
Parse a plain-text query over the title field only.
"""
return parse_simple_query(index, raw_query, TITLE_SEARCH_FIELDS)
+12
View File
@@ -53,6 +53,18 @@ def build_schema() -> tantivy.Schema:
# CJK support - not stored, indexed only
sb.add_text_field("bigram_content", stored=False, tokenizer_name="bigram_analyzer")
# Simple substring search support for title/content - not stored, indexed only
sb.add_text_field(
"simple_title",
stored=False,
tokenizer_name="simple_search_analyzer",
)
sb.add_text_field(
"simple_content",
stored=False,
tokenizer_name="simple_search_analyzer",
)
# Autocomplete prefix scan - stored, not indexed
sb.add_text_field("autocomplete_word", stored=True, tokenizer_name="raw")
+14
View File
@@ -70,6 +70,7 @@ def register_tokenizers(index: tantivy.Index, language: str | None) -> None:
index.register_tokenizer("paperless_text", _paperless_text(language))
index.register_tokenizer("simple_analyzer", _simple_analyzer())
index.register_tokenizer("bigram_analyzer", _bigram_analyzer())
index.register_tokenizer("simple_search_analyzer", _simple_search_analyzer())
# Fast-field tokenizer required for fast=True text fields in the schema
index.register_fast_field_tokenizer("simple_analyzer", _simple_analyzer())
@@ -114,3 +115,16 @@ def _bigram_analyzer() -> tantivy.TextAnalyzer:
.filter(tantivy.Filter.lowercase())
.build()
)
def _simple_search_analyzer() -> tantivy.TextAnalyzer:
"""Tokenizer for simple substring search fields: non-whitespace chunks -> remove_long(65) -> lowercase -> ascii_fold."""
return (
tantivy.TextAnalyzerBuilder(
tantivy.Tokenizer.regex(r"\S+"),
)
.filter(tantivy.Filter.remove_long(65))
.filter(tantivy.Filter.lowercase())
.filter(tantivy.Filter.ascii_fold())
.build()
)
+1
View File
@@ -3,6 +3,7 @@
EXPORTER_FILE_NAME = "__exported_file_name__"
EXPORTER_THUMBNAIL_NAME = "__exported_thumbnail_name__"
EXPORTER_ARCHIVE_NAME = "__exported_archive_name__"
EXPORTER_SHARE_LINK_BUNDLE_NAME = "__exported_share_link_bundle_name__"
EXPORTER_CRYPTO_SETTINGS_NAME = "__crypto__"
EXPORTER_CRYPTO_SALT_NAME = "__salt_hex__"
+68 -58
View File
@@ -62,6 +62,7 @@ from documents.utils import compute_checksum
from documents.utils import identity
from documents.workflows.utils import get_workflows_for_trigger
from paperless.config import AIConfig
from paperless.logging import consume_task_id
from paperless.parsers import ParserContext
from paperless.parsers.registry import get_parser_registry
from paperless_ai.indexing import llm_index_add_or_update_document
@@ -148,76 +149,85 @@ def consume_file(
input_doc: ConsumableDocument,
overrides: DocumentMetadataOverrides | None = None,
):
# Default no overrides
if overrides is None:
overrides = DocumentMetadataOverrides()
token = consume_task_id.set((self.request.id or "")[:8])
try:
# Default no overrides
if overrides is None:
overrides = DocumentMetadataOverrides()
plugins: list[type[ConsumeTaskPlugin]] = (
[
ConsumerPreflightPlugin,
ConsumerPlugin,
]
if input_doc.root_document_id is not None
else [
ConsumerPreflightPlugin,
AsnCheckPlugin,
CollatePlugin,
BarcodePlugin,
AsnCheckPlugin, # Re-run ASN check after barcode reading
WorkflowTriggerPlugin,
ConsumerPlugin,
]
)
plugins: list[type[ConsumeTaskPlugin]] = (
[
ConsumerPreflightPlugin,
ConsumerPlugin,
]
if input_doc.root_document_id is not None
else [
ConsumerPreflightPlugin,
AsnCheckPlugin,
CollatePlugin,
BarcodePlugin,
AsnCheckPlugin, # Re-run ASN check after barcode reading
WorkflowTriggerPlugin,
ConsumerPlugin,
]
)
with (
ProgressManager(
overrides.filename or input_doc.original_file.name,
self.request.id,
) as status_mgr,
TemporaryDirectory(dir=settings.SCRATCH_DIR) as tmp_dir,
):
tmp_dir = Path(tmp_dir)
for plugin_class in plugins:
plugin_name = plugin_class.NAME
plugin = plugin_class(
input_doc,
overrides,
status_mgr,
tmp_dir,
with (
ProgressManager(
overrides.filename or input_doc.original_file.name,
self.request.id,
)
) as status_mgr,
TemporaryDirectory(dir=settings.SCRATCH_DIR) as tmp_dir,
):
tmp_dir = Path(tmp_dir)
for plugin_class in plugins:
plugin_name = plugin_class.NAME
if not plugin.able_to_run:
logger.debug(f"Skipping plugin {plugin_name}")
continue
plugin = plugin_class(
input_doc,
overrides,
status_mgr,
tmp_dir,
self.request.id,
)
try:
logger.debug(f"Executing plugin {plugin_name}")
plugin.setup()
if not plugin.able_to_run:
logger.debug(f"Skipping plugin {plugin_name}")
continue
msg = plugin.run()
try:
logger.debug(f"Executing plugin {plugin_name}")
plugin.setup()
if msg is not None:
logger.info(f"{plugin_name} completed with: {msg}")
else:
logger.info(f"{plugin_name} completed with no message")
msg = plugin.run()
overrides = plugin.metadata
if msg is not None:
logger.info(f"{plugin_name} completed with: {msg}")
else:
logger.info(f"{plugin_name} completed with no message")
except StopConsumeTaskError as e:
logger.info(f"{plugin_name} requested task exit: {e.message}")
return e.message
overrides = plugin.metadata
except Exception as e:
logger.exception(f"{plugin_name} failed: {e}")
status_mgr.send_progress(ProgressStatusOptions.FAILED, f"{e}", 100, 100)
raise
except StopConsumeTaskError as e:
logger.info(f"{plugin_name} requested task exit: {e.message}")
return e.message
finally:
plugin.cleanup()
except Exception as e:
logger.exception(f"{plugin_name} failed: {e}")
status_mgr.send_progress(
ProgressStatusOptions.FAILED,
f"{e}",
100,
100,
)
raise
return msg
finally:
plugin.cleanup()
return msg
finally:
consume_task_id.reset(token)
@shared_task
+253
View File
@@ -5,6 +5,7 @@ from documents.models import CustomField
from documents.models import CustomFieldInstance
from documents.models import Document
from documents.models import Note
from documents.search._backend import SearchMode
from documents.search._backend import TantivyBackend
from documents.search._backend import get_backend
from documents.search._backend import reset_backend
@@ -46,6 +47,258 @@ class TestWriteBatch:
class TestSearch:
"""Test search functionality."""
def test_text_mode_limits_default_search_to_title_and_content(
self,
backend: TantivyBackend,
):
"""Simple text mode must not match metadata-only fields."""
doc = Document.objects.create(
title="Invoice document",
content="monthly statement",
checksum="TXT1",
pk=9,
)
backend.add_or_update(doc)
metadata_only = backend.search(
"document_type:invoice",
user=None,
page=1,
page_size=10,
sort_field=None,
sort_reverse=False,
search_mode=SearchMode.TEXT,
)
assert metadata_only.total == 0
content_match = backend.search(
"monthly",
user=None,
page=1,
page_size=10,
sort_field=None,
sort_reverse=False,
search_mode=SearchMode.TEXT,
)
assert content_match.total == 1
def test_title_mode_limits_default_search_to_title_only(
self,
backend: TantivyBackend,
):
"""Title mode must not match content-only terms."""
doc = Document.objects.create(
title="Invoice document",
content="monthly statement",
checksum="TXT2",
pk=10,
)
backend.add_or_update(doc)
content_only = backend.search(
"monthly",
user=None,
page=1,
page_size=10,
sort_field=None,
sort_reverse=False,
search_mode=SearchMode.TITLE,
)
assert content_only.total == 0
title_match = backend.search(
"invoice",
user=None,
page=1,
page_size=10,
sort_field=None,
sort_reverse=False,
search_mode=SearchMode.TITLE,
)
assert title_match.total == 1
def test_text_mode_matches_partial_term_substrings(
self,
backend: TantivyBackend,
):
"""Simple text mode should support substring matching within tokens."""
doc = Document.objects.create(
title="Account access",
content="password reset instructions",
checksum="TXT3",
pk=11,
)
backend.add_or_update(doc)
prefix_match = backend.search(
"pass",
user=None,
page=1,
page_size=10,
sort_field=None,
sort_reverse=False,
search_mode=SearchMode.TEXT,
)
assert prefix_match.total == 1
infix_match = backend.search(
"sswo",
user=None,
page=1,
page_size=10,
sort_field=None,
sort_reverse=False,
search_mode=SearchMode.TEXT,
)
assert infix_match.total == 1
phrase_match = backend.search(
"sswo re",
user=None,
page=1,
page_size=10,
sort_field=None,
sort_reverse=False,
search_mode=SearchMode.TEXT,
)
assert phrase_match.total == 1
def test_text_mode_does_not_match_on_partial_term_overlap(
self,
backend: TantivyBackend,
):
"""Simple text mode should not match documents that merely share partial fragments."""
doc = Document.objects.create(
title="Adobe Acrobat PDF Files",
content="Lorem ipsum dolor sit amet, consectetur adipiscing elit.",
checksum="TXT7",
pk=13,
)
backend.add_or_update(doc)
non_match = backend.search(
"raptor",
user=None,
page=1,
page_size=10,
sort_field=None,
sort_reverse=False,
search_mode=SearchMode.TEXT,
)
assert non_match.total == 0
def test_text_mode_anchors_later_query_tokens_to_token_starts(
self,
backend: TantivyBackend,
):
"""Multi-token simple search should not match later tokens in the middle of a word."""
exact_doc = Document.objects.create(
title="Z-Berichte 6",
content="monthly report",
checksum="TXT9",
pk=15,
)
prefix_doc = Document.objects.create(
title="Z-Berichte 60",
content="monthly report",
checksum="TXT10",
pk=16,
)
false_positive = Document.objects.create(
title="Z-Berichte 16",
content="monthly report",
checksum="TXT11",
pk=17,
)
backend.add_or_update(exact_doc)
backend.add_or_update(prefix_doc)
backend.add_or_update(false_positive)
results = backend.search(
"Z-Berichte 6",
user=None,
page=1,
page_size=10,
sort_field=None,
sort_reverse=False,
search_mode=SearchMode.TEXT,
)
result_ids = {hit["id"] for hit in results.hits}
assert exact_doc.id in result_ids
assert prefix_doc.id in result_ids
assert false_positive.id not in result_ids
def test_text_mode_ignores_queries_without_searchable_tokens(
self,
backend: TantivyBackend,
):
"""Simple text mode should safely return no hits for symbol-only strings."""
doc = Document.objects.create(
title="Guide",
content="This is a guide.",
checksum="TXT8",
pk=14,
)
backend.add_or_update(doc)
no_tokens = backend.search(
"!!!",
user=None,
page=1,
page_size=10,
sort_field=None,
sort_reverse=False,
search_mode=SearchMode.TEXT,
)
assert no_tokens.total == 0
def test_title_mode_matches_partial_term_substrings(
self,
backend: TantivyBackend,
):
"""Title mode should support substring matching within title tokens."""
doc = Document.objects.create(
title="Password guide",
content="reset instructions",
checksum="TXT4",
pk=12,
)
backend.add_or_update(doc)
prefix_match = backend.search(
"pass",
user=None,
page=1,
page_size=10,
sort_field=None,
sort_reverse=False,
search_mode=SearchMode.TITLE,
)
assert prefix_match.total == 1
infix_match = backend.search(
"sswo",
user=None,
page=1,
page_size=10,
sort_field=None,
sort_reverse=False,
search_mode=SearchMode.TITLE,
)
assert infix_match.total == 1
phrase_match = backend.search(
"sswo gu",
user=None,
page=1,
page_size=10,
sort_field=None,
sort_reverse=False,
search_mode=SearchMode.TITLE,
)
assert phrase_match.total == 1
def test_scores_normalised_top_hit_is_one(self, backend: TantivyBackend):
"""Search scores must be normalized so top hit has score 1.0 for UI consistency."""
for i, title in enumerate(["bank invoice", "bank statement", "bank receipt"]):
@@ -8,6 +8,7 @@ import tantivy
from documents.search._tokenizer import _bigram_analyzer
from documents.search._tokenizer import _paperless_text
from documents.search._tokenizer import _simple_search_analyzer
from documents.search._tokenizer import register_tokenizers
if TYPE_CHECKING:
@@ -41,6 +42,20 @@ class TestTokenizers:
idx.register_tokenizer("bigram_analyzer", _bigram_analyzer())
return idx
@pytest.fixture
def simple_search_index(self) -> tantivy.Index:
"""Index with simple-search field for Latin substring tests."""
sb = tantivy.SchemaBuilder()
sb.add_text_field(
"simple_content",
stored=False,
tokenizer_name="simple_search_analyzer",
)
schema = sb.build()
idx = tantivy.Index(schema, path=None)
idx.register_tokenizer("simple_search_analyzer", _simple_search_analyzer())
return idx
def test_ascii_fold_finds_accented_content(
self,
content_index: tantivy.Index,
@@ -66,6 +81,24 @@ class TestTokenizers:
q = bigram_index.parse_query("東京", ["bigram_content"])
assert bigram_index.searcher().search(q, limit=5).count == 1
def test_simple_search_analyzer_supports_regex_substrings(
self,
simple_search_index: tantivy.Index,
) -> None:
"""Whitespace-preserving simple search analyzer supports substring regex matching."""
writer = simple_search_index.writer()
doc = tantivy.Document()
doc.add_text("simple_content", "tag:invoice password-reset")
writer.add_document(doc)
writer.commit()
simple_search_index.reload()
q = tantivy.Query.regex_query(
simple_search_index.schema,
"simple_content",
".*sswo.*",
)
assert simple_search_index.searcher().search(q, limit=5).count == 1
def test_unsupported_language_logs_warning(self, caplog: LogCaptureFixture) -> None:
"""Unsupported language codes should log a warning and disable stemming gracefully."""
sb = tantivy.SchemaBuilder()
+154
View File
@@ -91,6 +91,135 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
self.assertEqual(response.data["count"], 0)
self.assertEqual(len(results), 0)
def test_simple_text_search(self) -> None:
tagged = Tag.objects.create(name="invoice")
matching_doc = Document.objects.create(
title="Quarterly summary",
content="Monthly bank report",
checksum="T1",
pk=11,
)
matching_doc.tags.add(tagged)
metadata_only_doc = Document.objects.create(
title="Completely unrelated",
content="No matching terms here",
checksum="T2",
pk=12,
)
metadata_only_doc.tags.add(tagged)
backend = get_backend()
backend.add_or_update(matching_doc)
backend.add_or_update(metadata_only_doc)
response = self.client.get("/api/documents/?text=monthly")
self.assertEqual(response.status_code, status.HTTP_200_OK)
self.assertEqual(response.data["count"], 1)
self.assertEqual(response.data["results"][0]["id"], matching_doc.id)
response = self.client.get("/api/documents/?text=tag:invoice")
self.assertEqual(response.status_code, status.HTTP_200_OK)
self.assertEqual(response.data["count"], 0)
def test_simple_text_search_matches_substrings(self) -> None:
matching_doc = Document.objects.create(
title="Quarterly summary",
content="Password reset instructions",
checksum="T5",
pk=15,
)
backend = get_backend()
backend.add_or_update(matching_doc)
response = self.client.get("/api/documents/?text=pass")
self.assertEqual(response.status_code, status.HTTP_200_OK)
self.assertEqual(response.data["count"], 1)
self.assertEqual(response.data["results"][0]["id"], matching_doc.id)
response = self.client.get("/api/documents/?text=sswo")
self.assertEqual(response.status_code, status.HTTP_200_OK)
self.assertEqual(response.data["count"], 1)
self.assertEqual(response.data["results"][0]["id"], matching_doc.id)
response = self.client.get("/api/documents/?text=sswo re")
self.assertEqual(response.status_code, status.HTTP_200_OK)
self.assertEqual(response.data["count"], 1)
self.assertEqual(response.data["results"][0]["id"], matching_doc.id)
def test_simple_text_search_does_not_match_on_partial_term_overlap(self) -> None:
non_matching_doc = Document.objects.create(
title="Adobe Acrobat PDF Files",
content="Lorem ipsum dolor sit amet, consectetur adipiscing elit.",
checksum="T7",
pk=17,
)
backend = get_backend()
backend.add_or_update(non_matching_doc)
response = self.client.get("/api/documents/?text=raptor")
self.assertEqual(response.status_code, status.HTTP_200_OK)
self.assertEqual(response.data["count"], 0)
def test_simple_title_search(self) -> None:
title_match = Document.objects.create(
title="Quarterly summary",
content="No matching content here",
checksum="T3",
pk=13,
)
content_only = Document.objects.create(
title="Completely unrelated",
content="Quarterly summary appears only in content",
checksum="T4",
pk=14,
)
backend = get_backend()
backend.add_or_update(title_match)
backend.add_or_update(content_only)
response = self.client.get("/api/documents/?title_search=quarterly")
self.assertEqual(response.status_code, status.HTTP_200_OK)
self.assertEqual(response.data["count"], 1)
self.assertEqual(response.data["results"][0]["id"], title_match.id)
def test_simple_title_search_matches_substrings(self) -> None:
title_match = Document.objects.create(
title="Password handbook",
content="No matching content here",
checksum="T6",
pk=16,
)
backend = get_backend()
backend.add_or_update(title_match)
response = self.client.get("/api/documents/?title_search=pass")
self.assertEqual(response.status_code, status.HTTP_200_OK)
self.assertEqual(response.data["count"], 1)
self.assertEqual(response.data["results"][0]["id"], title_match.id)
response = self.client.get("/api/documents/?title_search=sswo")
self.assertEqual(response.status_code, status.HTTP_200_OK)
self.assertEqual(response.data["count"], 1)
self.assertEqual(response.data["results"][0]["id"], title_match.id)
response = self.client.get("/api/documents/?title_search=sswo hand")
self.assertEqual(response.status_code, status.HTTP_200_OK)
self.assertEqual(response.data["count"], 1)
self.assertEqual(response.data["results"][0]["id"], title_match.id)
def test_search_rejects_multiple_search_modes(self) -> None:
response = self.client.get("/api/documents/?text=bank&query=bank")
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertEqual(
response.data["detail"],
"Specify only one of text, title_search, query, or more_like_id.",
)
def test_search_returns_all_for_api_version_9(self) -> None:
d1 = Document.objects.create(
title="invoice",
@@ -1493,6 +1622,31 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
self.assertEqual(results["custom_fields"][0]["id"], custom_field1.id)
self.assertEqual(results["workflows"][0]["id"], workflow1.id)
def test_global_search_db_only_limits_documents_to_title_matches(self) -> None:
title_match = Document.objects.create(
title="bank statement",
content="no additional terms",
checksum="GS1",
pk=21,
)
content_only = Document.objects.create(
title="not a title match",
content="bank appears only in content",
checksum="GS2",
pk=22,
)
backend = get_backend()
backend.add_or_update(title_match)
backend.add_or_update(content_only)
self.client.force_authenticate(self.user)
response = self.client.get("/api/search/?query=bank&db_only=true")
self.assertEqual(response.status_code, status.HTTP_200_OK)
self.assertEqual(len(response.data["documents"]), 1)
self.assertEqual(response.data["documents"][0]["id"], title_match.id)
def test_global_search_filters_owned_mail_objects(self) -> None:
user1 = User.objects.create_user("mail-search-user")
user2 = User.objects.create_user("other-mail-search-user")
@@ -2,6 +2,7 @@ import hashlib
import json
import shutil
import tempfile
from datetime import timedelta
from io import StringIO
from pathlib import Path
from unittest import mock
@@ -11,6 +12,7 @@ import pytest
from allauth.socialaccount.models import SocialAccount
from allauth.socialaccount.models import SocialApp
from allauth.socialaccount.models import SocialToken
from django.conf import settings
from django.contrib.auth.models import Group
from django.contrib.auth.models import Permission
from django.contrib.contenttypes.models import ContentType
@@ -31,6 +33,8 @@ from documents.models import CustomFieldInstance
from documents.models import Document
from documents.models import DocumentType
from documents.models import Note
from documents.models import ShareLink
from documents.models import ShareLinkBundle
from documents.models import StoragePath
from documents.models import Tag
from documents.models import User
@@ -39,6 +43,7 @@ from documents.models import WorkflowAction
from documents.models import WorkflowTrigger
from documents.sanity_checker import check_sanity
from documents.settings import EXPORTER_FILE_NAME
from documents.settings import EXPORTER_SHARE_LINK_BUNDLE_NAME
from documents.tests.utils import DirectoriesMixin
from documents.tests.utils import FileSystemAssertsMixin
from documents.tests.utils import SampleDirMixin
@@ -306,6 +311,108 @@ class TestExportImport(
):
self.test_exporter(use_filename_format=True)
def test_exporter_includes_share_links_and_bundles(self) -> None:
shutil.rmtree(Path(self.dirs.media_dir) / "documents")
shutil.copytree(
Path(__file__).parent / "samples" / "documents",
Path(self.dirs.media_dir) / "documents",
)
share_link = ShareLink.objects.create(
slug="share-link-slug",
document=self.d1,
owner=self.user,
file_version=ShareLink.FileVersion.ORIGINAL,
expiration=timezone.now() + timedelta(days=7),
)
bundle_relative_path = Path("nested") / "share-bundle.zip"
bundle_source_path = settings.SHARE_LINK_BUNDLE_DIR / bundle_relative_path
bundle_source_path.parent.mkdir(parents=True, exist_ok=True)
bundle_source_path.write_bytes(b"share-bundle-contents")
bundle = ShareLinkBundle.objects.create(
slug="share-bundle-slug",
owner=self.user,
file_version=ShareLink.FileVersion.ARCHIVE,
expiration=timezone.now() + timedelta(days=7),
status=ShareLinkBundle.Status.READY,
size_bytes=bundle_source_path.stat().st_size,
file_path=str(bundle_relative_path),
built_at=timezone.now(),
)
bundle.documents.set([self.d1, self.d2])
manifest = self._do_export()
share_link_records = [
record for record in manifest if record["model"] == "documents.sharelink"
]
self.assertEqual(len(share_link_records), 1)
self.assertEqual(share_link_records[0]["pk"], share_link.pk)
self.assertEqual(share_link_records[0]["fields"]["document"], self.d1.pk)
self.assertEqual(share_link_records[0]["fields"]["owner"], self.user.pk)
share_link_bundle_records = [
record
for record in manifest
if record["model"] == "documents.sharelinkbundle"
]
self.assertEqual(len(share_link_bundle_records), 1)
bundle_record = share_link_bundle_records[0]
self.assertEqual(bundle_record["pk"], bundle.pk)
self.assertEqual(
bundle_record["fields"]["documents"],
[self.d1.pk, self.d2.pk],
)
self.assertEqual(
bundle_record[EXPORTER_SHARE_LINK_BUNDLE_NAME],
"share_link_bundles/nested/share-bundle.zip",
)
self.assertEqual(
bundle_record["fields"]["file_path"],
"nested/share-bundle.zip",
)
self.assertIsFile(self.target / bundle_record[EXPORTER_SHARE_LINK_BUNDLE_NAME])
with paperless_environment():
ShareLink.objects.all().delete()
ShareLinkBundle.objects.all().delete()
shutil.rmtree(settings.SHARE_LINK_BUNDLE_DIR, ignore_errors=True)
call_command(
"document_importer",
"--no-progress-bar",
self.target,
skip_checks=True,
)
imported_share_link = ShareLink.objects.get(pk=share_link.pk)
self.assertEqual(imported_share_link.document_id, self.d1.pk)
self.assertEqual(imported_share_link.owner_id, self.user.pk)
self.assertEqual(
imported_share_link.file_version,
ShareLink.FileVersion.ORIGINAL,
)
imported_bundle = ShareLinkBundle.objects.get(pk=bundle.pk)
imported_bundle_path = imported_bundle.absolute_file_path
self.assertEqual(imported_bundle.owner_id, self.user.pk)
self.assertEqual(
list(
imported_bundle.documents.order_by("pk").values_list(
"pk",
flat=True,
),
),
[self.d1.pk, self.d2.pk],
)
self.assertEqual(imported_bundle.file_path, "nested/share-bundle.zip")
self.assertIsNotNone(imported_bundle_path)
self.assertEqual(
imported_bundle_path.read_bytes(),
b"share-bundle-contents",
)
def test_update_export_changed_time(self) -> None:
shutil.rmtree(Path(self.dirs.media_dir) / "documents")
shutil.copytree(
+9 -4
View File
@@ -435,7 +435,11 @@ class DummyProgressManager:
message: str,
current_progress: int,
max_progress: int,
extra_args: dict[str, str | int] | None = None,
*,
document_id: int | None = None,
owner_id: int | None = None,
users_can_view: list[int] | None = None,
groups_can_view: list[int] | None = None,
) -> None:
# Ensure the layer is open
self.open()
@@ -449,9 +453,10 @@ class DummyProgressManager:
"max_progress": max_progress,
"status": status,
"message": message,
"document_id": document_id,
"owner_id": owner_id,
"users_can_view": users_can_view or [],
"groups_can_view": groups_can_view or [],
},
}
if extra_args is not None:
payload["data"].update(extra_args)
self.payloads.append(payload)
+65 -19
View File
@@ -1995,11 +1995,23 @@ class ChatStreamingView(GenericAPIView):
list=extend_schema(
description="Document views including search",
parameters=[
OpenApiParameter(
name="text",
type=OpenApiTypes.STR,
location=OpenApiParameter.QUERY,
description="Simple Tantivy-backed text search query string",
),
OpenApiParameter(
name="title_search",
type=OpenApiTypes.STR,
location=OpenApiParameter.QUERY,
description="Simple Tantivy-backed title-only search query string",
),
OpenApiParameter(
name="query",
type=OpenApiTypes.STR,
location=OpenApiParameter.QUERY,
description="Advanced search query string",
description="Advanced Tantivy search query string",
),
OpenApiParameter(
name="full_perms",
@@ -2025,22 +2037,28 @@ class ChatStreamingView(GenericAPIView):
),
)
class UnifiedSearchViewSet(DocumentViewSet):
SEARCH_PARAM_NAMES = ("text", "title_search", "query", "more_like_id")
def get_serializer_class(self):
if self._is_search_request():
return SearchResultSerializer
else:
return DocumentSerializer
def _get_active_search_params(self, request: Request | None = None) -> list[str]:
request = request or self.request
return [
param for param in self.SEARCH_PARAM_NAMES if param in request.query_params
]
def _is_search_request(self):
return (
"query" in self.request.query_params
or "more_like_id" in self.request.query_params
)
return bool(self._get_active_search_params())
def list(self, request, *args, **kwargs):
if not self._is_search_request():
return super().list(request)
from documents.search import SearchMode
from documents.search import TantivyRelevanceList
from documents.search import get_backend
@@ -2050,9 +2068,31 @@ class UnifiedSearchViewSet(DocumentViewSet):
filtered_qs = self.filter_queryset(self.get_queryset())
user = None if request.user.is_superuser else request.user
active_search_params = self._get_active_search_params(request)
if "query" in request.query_params:
query_str = request.query_params["query"]
if len(active_search_params) > 1:
raise ValidationError(
{
"detail": _(
"Specify only one of text, title_search, query, or more_like_id.",
),
},
)
if (
"text" in request.query_params
or "title_search" in request.query_params
or "query" in request.query_params
):
if "text" in request.query_params:
search_mode = SearchMode.TEXT
query_str = request.query_params["text"]
elif "title_search" in request.query_params:
search_mode = SearchMode.TITLE
query_str = request.query_params["title_search"]
else:
search_mode = SearchMode.QUERY
query_str = request.query_params["query"]
results = backend.search(
query_str,
user=user,
@@ -2060,6 +2100,7 @@ class UnifiedSearchViewSet(DocumentViewSet):
page_size=10000,
sort_field=None,
sort_reverse=False,
search_mode=search_mode,
)
else:
# more_like_id — validate permission on the seed document first
@@ -2132,6 +2173,8 @@ class UnifiedSearchViewSet(DocumentViewSet):
if str(e.detail) == str(invalid_more_like_id_message):
return HttpResponseForbidden(invalid_more_like_id_message)
return HttpResponseForbidden(_("Insufficient permissions."))
except ValidationError:
raise
except Exception as e:
logger.warning(f"An error occurred listing search results: {e!s}")
return HttpResponseBadRequest(
@@ -3003,6 +3046,9 @@ class GlobalSearchView(PassUserMixin):
serializer_class = SearchResultSerializer
def get(self, request, *args, **kwargs):
from documents.search import SearchMode
from documents.search import get_backend
query = request.query_params.get("query", None)
if query is None:
return HttpResponseBadRequest("Query required")
@@ -3019,25 +3065,25 @@ class GlobalSearchView(PassUserMixin):
"view_document",
Document,
)
# First search by title
docs = all_docs.filter(title__icontains=query)
if not db_only and len(docs) < OBJECT_LIMIT:
# If we don't have enough results, search by content.
# Over-fetch from Tantivy (no permission filter) and rely on
# the ORM all_docs queryset for authoritative permission gating.
from documents.search import get_backend
if db_only:
docs = all_docs.filter(title__icontains=query)[:OBJECT_LIMIT]
else:
user = None if request.user.is_superuser else request.user
fts_results = get_backend().search(
query,
user=None,
user=user,
page=1,
page_size=1000,
sort_field=None,
sort_reverse=False,
search_mode=SearchMode.TEXT,
)
fts_ids = {h["id"] for h in fts_results.hits}
docs = docs | all_docs.filter(id__in=fts_ids)
docs = docs[:OBJECT_LIMIT]
docs_by_id = all_docs.in_bulk([hit["id"] for hit in fts_results.hits])
docs = [
docs_by_id[hit["id"]]
for hit in fts_results.hits
if hit["id"] in docs_by_id
][:OBJECT_LIMIT]
saved_views = (
get_objects_for_user_owner_aware(
request.user,
File diff suppressed because it is too large Load Diff
+18 -7
View File
@@ -1,16 +1,27 @@
from __future__ import annotations
import json
from typing import Any
from typing import TYPE_CHECKING
from channels.generic.websocket import AsyncWebsocketConsumer
if TYPE_CHECKING:
from django.contrib.auth.base_user import AbstractBaseUser
from django.contrib.auth.models import AnonymousUser
from documents.plugins.helpers import DocumentsDeletedPayload
from documents.plugins.helpers import DocumentUpdatedPayload
from documents.plugins.helpers import PermissionsData
from documents.plugins.helpers import StatusUpdatePayload
class StatusConsumer(AsyncWebsocketConsumer):
def _authenticated(self) -> bool:
user: Any = self.scope.get("user")
user: AbstractBaseUser | AnonymousUser | None = self.scope.get("user")
return user is not None and user.is_authenticated
async def _can_view(self, data: dict[str, Any]) -> bool:
user: Any = self.scope.get("user")
async def _can_view(self, data: PermissionsData) -> bool:
user: AbstractBaseUser | AnonymousUser | None = self.scope.get("user")
if user is None:
return False
owner_id = data.get("owner_id")
@@ -32,19 +43,19 @@ class StatusConsumer(AsyncWebsocketConsumer):
async def disconnect(self, code: int) -> None:
await self.channel_layer.group_discard("status_updates", self.channel_name)
async def status_update(self, event: dict[str, Any]) -> None:
async def status_update(self, event: StatusUpdatePayload) -> None:
if not self._authenticated():
await self.close()
elif await self._can_view(event["data"]):
await self.send(json.dumps(event))
async def documents_deleted(self, event: dict[str, Any]) -> None:
async def documents_deleted(self, event: DocumentsDeletedPayload) -> None:
if not self._authenticated():
await self.close()
else:
await self.send(json.dumps(event))
async def document_updated(self, event: dict[str, Any]) -> None:
async def document_updated(self, event: DocumentUpdatedPayload) -> None:
if not self._authenticated():
await self.close()
elif await self._can_view(event["data"]):
+33
View File
@@ -0,0 +1,33 @@
from __future__ import annotations
import logging
from contextvars import ContextVar
consume_task_id: ContextVar[str] = ContextVar("consume_task_id", default="")
class ConsumeTaskFormatter(logging.Formatter):
"""
Logging formatter that prepends a short task correlation ID to messages
emitted during document consumption.
The ID is the first 8 characters of the Celery task UUID, set via the
``consume_task_id`` ContextVar at the entry of ``consume_file``. When
the ContextVar is empty (any log outside a consume task) no prefix is
added and the output is identical to the standard verbose format.
"""
def __init__(self) -> None:
super().__init__(
fmt="[{asctime}] [{levelname}] [{name}] {task_prefix}{message}",
style="{",
validate=False, # {task_prefix} is not a standard LogRecord attribute, so Python's
# init-time format-string validation would raise ValueError without
# this. Runtime safety comes from format() always setting
# record.task_prefix before calling super().format().
)
def format(self, record: logging.LogRecord) -> str:
task_id = consume_task_id.get()
record.task_prefix = f"[{task_id}] " if task_id else ""
return super().format(record)
+1 -2
View File
@@ -592,8 +592,7 @@ LOGGING = {
"disable_existing_loggers": False,
"formatters": {
"verbose": {
"format": "[{asctime}] [{levelname}] [{name}] {message}",
"style": "{",
"()": "paperless.logging.ConsumeTaskFormatter",
},
"simple": {
"format": "{levelname} {message}",
+34
View File
@@ -0,0 +1,34 @@
import logging
from paperless.logging import ConsumeTaskFormatter
from paperless.logging import consume_task_id
def _make_record(msg: str = "Test message") -> logging.LogRecord:
return logging.LogRecord(
name="paperless.consumer",
level=logging.INFO,
pathname="",
lineno=0,
msg=msg,
args=(),
exc_info=None,
)
def test_formatter_includes_task_id_when_set():
token = consume_task_id.set("a8098c1a")
try:
formatter = ConsumeTaskFormatter()
output = formatter.format(_make_record())
assert "[a8098c1a] Test message" in output
finally:
consume_task_id.reset(token)
def test_formatter_omits_prefix_when_no_task_id():
# ContextVar default is "" — no task active
formatter = ConsumeTaskFormatter()
output = formatter.format(_make_record())
assert "[] " not in output
assert "Test message" in output
+8 -2
View File
@@ -200,7 +200,10 @@ class TestWebSockets:
"Test message",
1,
10,
extra_args={"foo": "bar"},
document_id=42,
owner_id=1,
users_can_view=[2, 3],
groups_can_view=[4],
)
assert mock_group_send.call_args[0][1] == {
@@ -212,7 +215,10 @@ class TestWebSockets:
"max_progress": 10,
"status": ProgressStatusOptions.STARTED,
"message": "Test message",
"foo": "bar",
"document_id": 42,
"owner_id": 1,
"users_can_view": [2, 3],
"groups_can_view": [4],
},
}