diff --git a/.github/workflows/ci-backend.yml b/.github/workflows/ci-backend.yml index f92d1fb00..cff139e8c 100644 --- a/.github/workflows/ci-backend.yml +++ b/.github/workflows/ci-backend.yml @@ -24,6 +24,7 @@ jobs: uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: fetch-depth: 0 + persist-credentials: false - name: Decide run mode id: force run: | @@ -72,6 +73,8 @@ jobs: steps: - name: Checkout uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false - name: Start containers run: | docker compose --file docker/compose/docker-compose.ci-test.yml pull --quiet @@ -145,6 +148,8 @@ jobs: steps: - name: Checkout uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false - name: Set up Python id: setup-python uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 diff --git a/.github/workflows/ci-docker.yml b/.github/workflows/ci-docker.yml index 48d258dca..43b79728d 100644 --- a/.github/workflows/ci-docker.yml +++ b/.github/workflows/ci-docker.yml @@ -42,6 +42,8 @@ jobs: steps: - name: Checkout uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false - name: Determine ref name id: ref run: | diff --git a/.github/workflows/ci-docs.yml b/.github/workflows/ci-docs.yml index 68f202264..a598a3c9d 100644 --- a/.github/workflows/ci-docs.yml +++ b/.github/workflows/ci-docs.yml @@ -26,6 +26,7 @@ jobs: uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: fetch-depth: 0 + persist-credentials: false - name: Decide run mode id: force run: | @@ -71,6 +72,8 @@ jobs: - uses: actions/configure-pages@45bfe0192ca1faeb007ade9deae92b16b8254a0d # v6.0.0 - name: Checkout uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false - name: Set up Python id: setup-python uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 diff --git a/.github/workflows/ci-frontend.yml b/.github/workflows/ci-frontend.yml index dffb54e6b..9d4e23a1a 100644 --- a/.github/workflows/ci-frontend.yml +++ b/.github/workflows/ci-frontend.yml @@ -62,6 +62,8 @@ jobs: steps: - name: Checkout uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false - name: Install pnpm uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5.0.0 with: @@ -90,6 +92,8 @@ jobs: steps: - name: Checkout uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false - name: Install pnpm uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5.0.0 with: @@ -125,6 +129,8 @@ jobs: steps: - name: Checkout uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false - name: Install pnpm uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5.0.0 with: @@ -176,6 +182,8 @@ jobs: steps: - name: Checkout uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false - name: Install pnpm uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5.0.0 with: @@ -209,6 +217,7 @@ jobs: uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: fetch-depth: 2 + persist-credentials: false - name: Install pnpm uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5.0.0 with: diff --git a/.github/workflows/ci-lint.yml b/.github/workflows/ci-lint.yml index bf1458e1d..314250719 100644 --- a/.github/workflows/ci-lint.yml +++ b/.github/workflows/ci-lint.yml @@ -16,6 +16,8 @@ jobs: steps: - name: Checkout uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false - name: Install Python uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: diff --git a/.github/workflows/ci-release.yml b/.github/workflows/ci-release.yml index b38ecbc40..030e3bcad 100644 --- a/.github/workflows/ci-release.yml +++ b/.github/workflows/ci-release.yml @@ -29,6 +29,8 @@ jobs: steps: - name: Checkout uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false # ---- Frontend Build ---- - name: Install pnpm uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5.0.0 @@ -179,6 +181,7 @@ jobs: uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: ref: main + persist-credentials: true # for pushing changelog branch - name: Set up Python id: setup-python uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 diff --git a/.github/workflows/ci-static-analysis.yml b/.github/workflows/ci-static-analysis.yml new file mode 100644 index 000000000..99388354a --- /dev/null +++ b/.github/workflows/ci-static-analysis.yml @@ -0,0 +1,42 @@ +name: Static Analysis +on: + push: + branches-ignore: + - 'translations**' + pull_request: + branches-ignore: + - 'translations**' + workflow_dispatch: +concurrency: + group: static-analysis-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true +permissions: + contents: read +jobs: + zizmor: + name: Run zizmor + runs-on: ubuntu-24.04 + permissions: + contents: read + actions: read + security-events: write + steps: + - name: Checkout + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + - name: Run zizmor + uses: zizmorcore/zizmor-action@71321a20a9ded102f6e9ce5718a2fcec2c4f70d8 # v0.5.2 + semgrep: + name: Semgrep CE + runs-on: ubuntu-24.04 + container: + image: semgrep/semgrep:1.155.0@sha256:cc869c685dcc0fe497c86258da9f205397d8108e56d21a86082ea4886e52784d + if: github.actor != 'dependabot[bot]' + steps: + - name: Checkout + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + - name: Run Semgrep + run: semgrep scan --config auto diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 08c2bc1a2..e295e938d 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -35,6 +35,8 @@ jobs: steps: - name: Checkout repository uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL uses: github/codeql-action/init@c793b717bc78562f491db7b0e93a3a178b099162 # v4.32.5 diff --git a/.github/workflows/crowdin.yml b/.github/workflows/crowdin.yml index 38e73bbb5..29b4be02f 100644 --- a/.github/workflows/crowdin.yml +++ b/.github/workflows/crowdin.yml @@ -16,6 +16,7 @@ jobs: uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: token: ${{ secrets.PNGX_BOT_PAT }} + persist-credentials: false - name: crowdin action uses: crowdin/github-action@8818ff65bfc4322384f983ea37e3926948c11745 # v2.15.0 with: diff --git a/.github/workflows/translate-strings.yml b/.github/workflows/translate-strings.yml index c38886bc2..ad894abe7 100644 --- a/.github/workflows/translate-strings.yml +++ b/.github/workflows/translate-strings.yml @@ -17,6 +17,7 @@ jobs: with: token: ${{ secrets.PNGX_BOT_PAT }} ref: ${{ env.GH_REF }} + persist-credentials: true # for pushing translation branch - name: Set up Python id: setup-python uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 diff --git a/docs/api.md b/docs/api.md index 2284d9d29..af1190f3d 100644 --- a/docs/api.md +++ b/docs/api.md @@ -62,10 +62,14 @@ The REST api provides five different forms of authentication. ## Searching for documents -Full text searching is available on the `/api/documents/` endpoint. Two -specific query parameters cause the API to return full text search +Full text searching is available on the `/api/documents/` endpoint. The +following query parameters cause the API to return Tantivy-backed search results: +- `/api/documents/?text=your%20search%20query`: Search title and content + using simple substring-style search. +- `/api/documents/?title_search=your%20search%20query`: Search title only + using simple substring-style search. - `/api/documents/?query=your%20search%20query`: Search for a document using a full text query. For details on the syntax, see [Basic Usage - Searching](usage.md#basic-usage_searching). - `/api/documents/?more_like_id=1234`: Search for documents similar to @@ -439,3 +443,5 @@ Initial API version. - The `all` parameter of list endpoints is now deprecated and will be removed in a future version. - The bulk edit objects endpoint now supports `all` and `filters` parameters to avoid having to send large lists of object IDs for operations affecting many objects. +- The legacy `title_content` document search parameter is deprecated and will be removed in a future version. + Clients should use `text` for simple title-and-content search and `title_search` for title-only search. diff --git a/docs/migration-v3.md b/docs/migration-v3.md index 900bcd5c5..c76996cc0 100644 --- a/docs/migration-v3.md +++ b/docs/migration-v3.md @@ -1,5 +1,24 @@ # v3 Migration Guide +## Secret Key is Now Required + +The `PAPERLESS_SECRET_KEY` environment variable is now required. This is a critical security setting used for cryptographic signing and should be set to a long, random value. + +### Action Required + +If you are upgrading an existing installation, you must now set `PAPERLESS_SECRET_KEY` explicitly. + +If your installation was relying on the previous built-in default key, you have two options: + +- Set `PAPERLESS_SECRET_KEY` to that previous value to preserve existing sessions and tokens. +- Set `PAPERLESS_SECRET_KEY` to a new random value to improve security, understanding that this will invalidate existing sessions and other signed tokens. + +For new installations, or if you choose to rotate the key, you may generate a new secret key with: + +```bash +python3 -c "import secrets; print(secrets.token_urlsafe(64))" +``` + ## Consumer Settings Changes The v3 consumer command uses a [different library](https://watchfiles.helpmanual.io/) to unify diff --git a/src-ui/e2e/document-list/document-list.spec.ts b/src-ui/e2e/document-list/document-list.spec.ts index 700304186..0cea8effa 100644 --- a/src-ui/e2e/document-list/document-list.spec.ts +++ b/src-ui/e2e/document-list/document-list.spec.ts @@ -49,11 +49,11 @@ test('text filtering', async ({ page }) => { await page.getByRole('main').getByRole('combobox').click() await page.getByRole('main').getByRole('combobox').fill('test') await expect(page.locator('pngx-document-list')).toHaveText(/32 documents/) - await expect(page).toHaveURL(/title_content=test/) + await expect(page).toHaveURL(/text=test/) await page.getByRole('button', { name: 'Title & content' }).click() await page.getByRole('button', { name: 'Title', exact: true }).click() await expect(page.locator('pngx-document-list')).toHaveText(/9 documents/) - await expect(page).toHaveURL(/title__icontains=test/) + await expect(page).toHaveURL(/title_search=test/) await page.getByRole('button', { name: 'Title', exact: true }).click() await page.getByRole('button', { name: 'Advanced search' }).click() await expect(page).toHaveURL(/query=test/) diff --git a/src-ui/e2e/document-list/requests/api-document-list2.har b/src-ui/e2e/document-list/requests/api-document-list2.har index 3cbc9e8a6..f6a488b26 100644 --- a/src-ui/e2e/document-list/requests/api-document-list2.har +++ b/src-ui/e2e/document-list/requests/api-document-list2.har @@ -3545,7 +3545,7 @@ "time": 1.091, "request": { "method": "GET", - "url": "http://localhost:8000/api/documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true&title_content=test", + "url": "http://localhost:8000/api/documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true&text=test", "httpVersion": "HTTP/1.1", "cookies": [], "headers": [ @@ -3579,7 +3579,7 @@ "value": "true" }, { - "name": "title_content", + "name": "text", "value": "test" } ], @@ -4303,7 +4303,7 @@ "time": 0.603, "request": { "method": "GET", - "url": "http://localhost:8000/api/documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true&title__icontains=test", + "url": "http://localhost:8000/api/documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true&title_search=test", "httpVersion": "HTTP/1.1", "cookies": [], "headers": [ @@ -4337,7 +4337,7 @@ "value": "true" }, { - "name": "title__icontains", + "name": "title_search", "value": "test" } ], diff --git a/src-ui/messages.xlf b/src-ui/messages.xlf index 19b2f7ce2..f30605a4e 100644 --- a/src-ui/messages.xlf +++ b/src-ui/messages.xlf @@ -1081,7 +1081,7 @@ src/app/components/document-list/filter-editor/filter-editor.component.ts - 205 + 203 @@ -3027,10 +3027,6 @@ src/app/components/document-list/filter-editor/filter-editor.component.html 84 - - src/app/components/document-list/filter-editor/filter-editor.component.ts - 200 - src/app/components/manage/document-attributes/document-attributes.component.ts 129 @@ -7504,7 +7500,7 @@ src/app/components/document-list/filter-editor/filter-editor.component.ts - 192 + 194 src/app/data/document.ts @@ -8817,7 +8813,7 @@ src/app/components/document-list/filter-editor/filter-editor.component.ts - 197 + 199 src/app/data/document.ts @@ -9020,56 +9016,63 @@ Title & content src/app/components/document-list/filter-editor/filter-editor.component.ts - 195 + 197 File type src/app/components/document-list/filter-editor/filter-editor.component.ts - 202 + 200 + + + + Custom fields (Deprecated) + + src/app/components/document-list/filter-editor/filter-editor.component.ts + 210 More like src/app/components/document-list/filter-editor/filter-editor.component.ts - 211 + 215 equals src/app/components/document-list/filter-editor/filter-editor.component.ts - 217 + 221 is empty src/app/components/document-list/filter-editor/filter-editor.component.ts - 221 + 225 is not empty src/app/components/document-list/filter-editor/filter-editor.component.ts - 225 + 229 greater than src/app/components/document-list/filter-editor/filter-editor.component.ts - 229 + 233 less than src/app/components/document-list/filter-editor/filter-editor.component.ts - 233 + 237 @@ -9078,14 +9081,14 @@ )?.name"/> src/app/components/document-list/filter-editor/filter-editor.component.ts - 274,278 + 278,282 Without correspondent src/app/components/document-list/filter-editor/filter-editor.component.ts - 280 + 284 @@ -9094,14 +9097,14 @@ )?.name"/> src/app/components/document-list/filter-editor/filter-editor.component.ts - 286,290 + 290,294 Without document type src/app/components/document-list/filter-editor/filter-editor.component.ts - 292 + 296 @@ -9110,70 +9113,77 @@ )?.name"/> src/app/components/document-list/filter-editor/filter-editor.component.ts - 298,302 + 302,306 Without storage path src/app/components/document-list/filter-editor/filter-editor.component.ts - 304 + 308 Tag: src/app/components/document-list/filter-editor/filter-editor.component.ts - 308,310 + 312,314 Without any tag src/app/components/document-list/filter-editor/filter-editor.component.ts - 314 + 318 Custom fields query src/app/components/document-list/filter-editor/filter-editor.component.ts - 318 + 322 Title: src/app/components/document-list/filter-editor/filter-editor.component.ts - 321 + 326 + + + + Title & content: + + src/app/components/document-list/filter-editor/filter-editor.component.ts + 330 ASN: src/app/components/document-list/filter-editor/filter-editor.component.ts - 324 + 333 Owner: src/app/components/document-list/filter-editor/filter-editor.component.ts - 327 + 336 Owner not in: src/app/components/document-list/filter-editor/filter-editor.component.ts - 330 + 339 Without an owner src/app/components/document-list/filter-editor/filter-editor.component.ts - 333 + 342 diff --git a/src-ui/src/app/components/app-frame/global-search/global-search.component.spec.ts b/src-ui/src/app/components/app-frame/global-search/global-search.component.spec.ts index eaae4a814..1be801478 100644 --- a/src-ui/src/app/components/app-frame/global-search/global-search.component.spec.ts +++ b/src-ui/src/app/components/app-frame/global-search/global-search.component.spec.ts @@ -24,7 +24,7 @@ import { FILTER_HAS_DOCUMENT_TYPE_ANY, FILTER_HAS_STORAGE_PATH_ANY, FILTER_HAS_TAGS_ALL, - FILTER_TITLE_CONTENT, + FILTER_SIMPLE_TEXT, } from 'src/app/data/filter-rule-type' import { GlobalSearchType, SETTINGS_KEYS } from 'src/app/data/ui-settings' import { DocumentListViewService } from 'src/app/services/document-list-view.service' @@ -545,7 +545,7 @@ describe('GlobalSearchComponent', () => { component.query = 'test' component.runFullSearch() expect(qfSpy).toHaveBeenCalledWith([ - { rule_type: FILTER_TITLE_CONTENT, value: 'test' }, + { rule_type: FILTER_SIMPLE_TEXT, value: 'test' }, ]) settingsService.set( diff --git a/src-ui/src/app/components/app-frame/global-search/global-search.component.ts b/src-ui/src/app/components/app-frame/global-search/global-search.component.ts index 4f9a2467c..e95b52cfc 100644 --- a/src-ui/src/app/components/app-frame/global-search/global-search.component.ts +++ b/src-ui/src/app/components/app-frame/global-search/global-search.component.ts @@ -25,7 +25,7 @@ import { FILTER_HAS_DOCUMENT_TYPE_ANY, FILTER_HAS_STORAGE_PATH_ANY, FILTER_HAS_TAGS_ALL, - FILTER_TITLE_CONTENT, + FILTER_SIMPLE_TEXT, } from 'src/app/data/filter-rule-type' import { ObjectWithId } from 'src/app/data/object-with-id' import { GlobalSearchType, SETTINGS_KEYS } from 'src/app/data/ui-settings' @@ -410,7 +410,7 @@ export class GlobalSearchComponent implements OnInit { public runFullSearch() { const ruleType = this.useAdvancedForFullSearch ? FILTER_FULLTEXT_QUERY - : FILTER_TITLE_CONTENT + : FILTER_SIMPLE_TEXT this.documentService.searchQuery = this.useAdvancedForFullSearch ? this.query : '' diff --git a/src-ui/src/app/components/common/edit-dialog/storage-path-edit-dialog/storage-path-edit-dialog.component.spec.ts b/src-ui/src/app/components/common/edit-dialog/storage-path-edit-dialog/storage-path-edit-dialog.component.spec.ts index 89e7b1fee..2466ced73 100644 --- a/src-ui/src/app/components/common/edit-dialog/storage-path-edit-dialog/storage-path-edit-dialog.component.spec.ts +++ b/src-ui/src/app/components/common/edit-dialog/storage-path-edit-dialog/storage-path-edit-dialog.component.spec.ts @@ -4,7 +4,7 @@ import { ComponentFixture, TestBed } from '@angular/core/testing' import { By } from '@angular/platform-browser' import { NgbAccordionButton, NgbActiveModal } from '@ng-bootstrap/ng-bootstrap' import { of, throwError } from 'rxjs' -import { FILTER_TITLE } from 'src/app/data/filter-rule-type' +import { FILTER_SIMPLE_TITLE } from 'src/app/data/filter-rule-type' import { DocumentService } from 'src/app/services/rest/document.service' import { StoragePathService } from 'src/app/services/rest/storage-path.service' import { SettingsService } from 'src/app/services/settings.service' @@ -105,7 +105,7 @@ describe('StoragePathEditDialogComponent', () => { null, 'created', true, - [{ rule_type: FILTER_TITLE, value: 'bar' }], + [{ rule_type: FILTER_SIMPLE_TITLE, value: 'bar' }], { truncate_content: true } ) listSpy.mockReturnValueOnce( diff --git a/src-ui/src/app/components/common/edit-dialog/storage-path-edit-dialog/storage-path-edit-dialog.component.ts b/src-ui/src/app/components/common/edit-dialog/storage-path-edit-dialog/storage-path-edit-dialog.component.ts index f06831588..68ce40f5e 100644 --- a/src-ui/src/app/components/common/edit-dialog/storage-path-edit-dialog/storage-path-edit-dialog.component.ts +++ b/src-ui/src/app/components/common/edit-dialog/storage-path-edit-dialog/storage-path-edit-dialog.component.ts @@ -23,7 +23,7 @@ import { } from 'rxjs' import { EditDialogComponent } from 'src/app/components/common/edit-dialog/edit-dialog.component' import { Document } from 'src/app/data/document' -import { FILTER_TITLE } from 'src/app/data/filter-rule-type' +import { FILTER_SIMPLE_TITLE } from 'src/app/data/filter-rule-type' import { DEFAULT_MATCHING_ALGORITHM } from 'src/app/data/matching-model' import { StoragePath } from 'src/app/data/storage-path' import { IfOwnerDirective } from 'src/app/directives/if-owner.directive' @@ -146,7 +146,7 @@ export class StoragePathEditDialogComponent null, 'created', true, - [{ rule_type: FILTER_TITLE, value: title }], + [{ rule_type: FILTER_SIMPLE_TITLE, value: title }], { truncate_content: true } ) .pipe( diff --git a/src-ui/src/app/components/common/input/document-link/document-link.component.spec.ts b/src-ui/src/app/components/common/input/document-link/document-link.component.spec.ts index 7021012ab..f8a8f3817 100644 --- a/src-ui/src/app/components/common/input/document-link/document-link.component.spec.ts +++ b/src-ui/src/app/components/common/input/document-link/document-link.component.spec.ts @@ -3,7 +3,7 @@ import { provideHttpClientTesting } from '@angular/common/http/testing' import { ComponentFixture, TestBed } from '@angular/core/testing' import { NG_VALUE_ACCESSOR } from '@angular/forms' import { of, throwError } from 'rxjs' -import { FILTER_TITLE } from 'src/app/data/filter-rule-type' +import { FILTER_SIMPLE_TITLE } from 'src/app/data/filter-rule-type' import { DocumentService } from 'src/app/services/rest/document.service' import { DocumentLinkComponent } from './document-link.component' @@ -99,7 +99,7 @@ describe('DocumentLinkComponent', () => { null, 'created', true, - [{ rule_type: FILTER_TITLE, value: 'bar' }], + [{ rule_type: FILTER_SIMPLE_TITLE, value: 'bar' }], { truncate_content: true } ) listSpy.mockReturnValueOnce(throwError(() => new Error())) diff --git a/src-ui/src/app/components/common/input/document-link/document-link.component.ts b/src-ui/src/app/components/common/input/document-link/document-link.component.ts index b50f5701d..9bfb60063 100644 --- a/src-ui/src/app/components/common/input/document-link/document-link.component.ts +++ b/src-ui/src/app/components/common/input/document-link/document-link.component.ts @@ -28,7 +28,7 @@ import { tap, } from 'rxjs' import { Document } from 'src/app/data/document' -import { FILTER_TITLE } from 'src/app/data/filter-rule-type' +import { FILTER_SIMPLE_TITLE } from 'src/app/data/filter-rule-type' import { CustomDatePipe } from 'src/app/pipes/custom-date.pipe' import { DocumentService } from 'src/app/services/rest/document.service' import { AbstractInputComponent } from '../abstract-input' @@ -121,7 +121,7 @@ export class DocumentLinkComponent null, 'created', true, - [{ rule_type: FILTER_TITLE, value: title }], + [{ rule_type: FILTER_SIMPLE_TITLE, value: title }], { truncate_content: true } ) .pipe( diff --git a/src-ui/src/app/components/document-list/bulk-editor/bulk-editor.component.spec.ts b/src-ui/src/app/components/document-list/bulk-editor/bulk-editor.component.spec.ts index f283a75f3..8f82be1ab 100644 --- a/src-ui/src/app/components/document-list/bulk-editor/bulk-editor.component.spec.ts +++ b/src-ui/src/app/components/document-list/bulk-editor/bulk-editor.component.spec.ts @@ -428,7 +428,7 @@ describe('BulkEditorComponent', () => { req.flush(true) expect(req.request.body).toEqual({ all: true, - filters: { title__icontains: 'apple' }, + filters: { title_search: 'apple' }, method: 'modify_tags', parameters: { add_tags: [101], remove_tags: [] }, }) diff --git a/src-ui/src/app/components/document-list/filter-editor/filter-editor.component.spec.ts b/src-ui/src/app/components/document-list/filter-editor/filter-editor.component.spec.ts index bf5240f1b..d75e38630 100644 --- a/src-ui/src/app/components/document-list/filter-editor/filter-editor.component.spec.ts +++ b/src-ui/src/app/components/document-list/filter-editor/filter-editor.component.spec.ts @@ -67,6 +67,8 @@ import { FILTER_OWNER_DOES_NOT_INCLUDE, FILTER_OWNER_ISNULL, FILTER_SHARED_BY_USER, + FILTER_SIMPLE_TEXT, + FILTER_SIMPLE_TITLE, FILTER_STORAGE_PATH, FILTER_TITLE, FILTER_TITLE_CONTENT, @@ -312,7 +314,7 @@ describe('FilterEditorComponent', () => { expect(component.textFilter).toEqual(null) component.filterRules = [ { - rule_type: FILTER_TITLE_CONTENT, + rule_type: FILTER_SIMPLE_TEXT, value: 'foo', }, ] @@ -320,6 +322,18 @@ describe('FilterEditorComponent', () => { expect(component.textFilterTarget).toEqual('title-content') // TEXT_FILTER_TARGET_TITLE_CONTENT })) + it('should ingest legacy text filter rules for doc title + content', fakeAsync(() => { + expect(component.textFilter).toEqual(null) + component.filterRules = [ + { + rule_type: FILTER_TITLE_CONTENT, + value: 'legacy foo', + }, + ] + expect(component.textFilter).toEqual('legacy foo') + expect(component.textFilterTarget).toEqual('title-content') // TEXT_FILTER_TARGET_TITLE_CONTENT + })) + it('should ingest text filter rules for doc asn', fakeAsync(() => { expect(component.textFilter).toEqual(null) component.filterRules = [ @@ -1117,7 +1131,7 @@ describe('FilterEditorComponent', () => { expect(component.textFilter).toEqual('foo') expect(component.filterRules).toEqual([ { - rule_type: FILTER_TITLE_CONTENT, + rule_type: FILTER_SIMPLE_TEXT, value: 'foo', }, ]) @@ -1136,7 +1150,7 @@ describe('FilterEditorComponent', () => { expect(component.textFilterTarget).toEqual('title') expect(component.filterRules).toEqual([ { - rule_type: FILTER_TITLE, + rule_type: FILTER_SIMPLE_TITLE, value: 'foo', }, ]) @@ -1250,30 +1264,12 @@ describe('FilterEditorComponent', () => { ]) })) - it('should convert user input to correct filter rules on custom fields query', fakeAsync(() => { - component.textFilterInput.nativeElement.value = 'foo' - component.textFilterInput.nativeElement.dispatchEvent(new Event('input')) - const textFieldTargetDropdown = fixture.debugElement.queryAll( - By.directive(NgbDropdownItem) - )[3] - textFieldTargetDropdown.triggerEventHandler('click') // TEXT_FILTER_TARGET_CUSTOM_FIELDS - fixture.detectChanges() - tick(400) - expect(component.textFilterTarget).toEqual('custom-fields') - expect(component.filterRules).toEqual([ - { - rule_type: FILTER_CUSTOM_FIELDS_TEXT, - value: 'foo', - }, - ]) - })) - it('should convert user input to correct filter rules on mime type', fakeAsync(() => { component.textFilterInput.nativeElement.value = 'pdf' component.textFilterInput.nativeElement.dispatchEvent(new Event('input')) const textFieldTargetDropdown = fixture.debugElement.queryAll( By.directive(NgbDropdownItem) - )[4] + )[3] textFieldTargetDropdown.triggerEventHandler('click') // TEXT_FILTER_TARGET_MIME_TYPE fixture.detectChanges() tick(400) @@ -1291,8 +1287,8 @@ describe('FilterEditorComponent', () => { component.textFilterInput.nativeElement.dispatchEvent(new Event('input')) const textFieldTargetDropdown = fixture.debugElement.queryAll( By.directive(NgbDropdownItem) - )[5] - textFieldTargetDropdown.triggerEventHandler('click') // TEXT_FILTER_TARGET_ASN + )[4] + textFieldTargetDropdown.triggerEventHandler('click') // TEXT_FILTER_TARGET_FULLTEXT_QUERY fixture.detectChanges() tick(400) expect(component.textFilterTarget).toEqual('fulltext-query') @@ -1696,12 +1692,56 @@ describe('FilterEditorComponent', () => { ]) })) + it('should convert legacy title filters into full text query when adding a created relative date', fakeAsync(() => { + component.filterRules = [ + { + rule_type: FILTER_TITLE, + value: 'foo', + }, + ] + const dateCreatedDropdown = fixture.debugElement.queryAll( + By.directive(DatesDropdownComponent) + )[0] + component.dateCreatedRelativeDate = RelativeDate.WITHIN_1_WEEK + dateCreatedDropdown.triggerEventHandler('datesSet') + fixture.detectChanges() + tick(400) + expect(component.filterRules).toEqual([ + { + rule_type: FILTER_FULLTEXT_QUERY, + value: 'foo,created:[-1 week to now]', + }, + ]) + })) + + it('should convert simple title filters into full text query when adding a created relative date', fakeAsync(() => { + component.filterRules = [ + { + rule_type: FILTER_SIMPLE_TITLE, + value: 'foo', + }, + ] + const dateCreatedDropdown = fixture.debugElement.queryAll( + By.directive(DatesDropdownComponent) + )[0] + component.dateCreatedRelativeDate = RelativeDate.WITHIN_1_WEEK + dateCreatedDropdown.triggerEventHandler('datesSet') + fixture.detectChanges() + tick(400) + expect(component.filterRules).toEqual([ + { + rule_type: FILTER_FULLTEXT_QUERY, + value: 'foo,created:[-1 week to now]', + }, + ]) + })) + it('should leave relative dates not in quick list intact', fakeAsync(() => { component.textFilterInput.nativeElement.value = 'created:[-2 week to now]' component.textFilterInput.nativeElement.dispatchEvent(new Event('input')) const textFieldTargetDropdown = fixture.debugElement.queryAll( By.directive(NgbDropdownItem) - )[5] + )[4] textFieldTargetDropdown.triggerEventHandler('click') fixture.detectChanges() tick(400) @@ -2031,12 +2071,30 @@ describe('FilterEditorComponent', () => { component.filterRules = [ { - rule_type: FILTER_TITLE, + rule_type: FILTER_SIMPLE_TITLE, value: 'foo', }, ] expect(component.generateFilterName()).toEqual('Title: foo') + component.filterRules = [ + { + rule_type: FILTER_TITLE_CONTENT, + value: 'legacy foo', + }, + ] + expect(component.generateFilterName()).toEqual( + 'Title & content: legacy foo' + ) + + component.filterRules = [ + { + rule_type: FILTER_SIMPLE_TEXT, + value: 'foo', + }, + ] + expect(component.generateFilterName()).toEqual('Title & content: foo') + component.filterRules = [ { rule_type: FILTER_ASN, @@ -2156,6 +2214,36 @@ describe('FilterEditorComponent', () => { }) }) + it('should hide deprecated custom fields target from default text filter targets', () => { + expect(component.textFilterTargets).not.toContainEqual({ + id: 'custom-fields', + name: $localize`Custom fields (Deprecated)`, + }) + }) + + it('should keep deprecated custom fields target available for legacy filters', fakeAsync(() => { + component.filterRules = [ + { + rule_type: FILTER_CUSTOM_FIELDS_TEXT, + value: 'foo', + }, + ] + fixture.detectChanges() + tick() + + expect(component.textFilterTarget).toEqual('custom-fields') + expect(component.textFilterTargets).toContainEqual({ + id: 'custom-fields', + name: $localize`Custom fields (Deprecated)`, + }) + expect(component.filterRules).toEqual([ + { + rule_type: FILTER_CUSTOM_FIELDS_TEXT, + value: 'foo', + }, + ]) + })) + it('should call autocomplete endpoint on input', fakeAsync(() => { component.textFilterTarget = 'fulltext-query' // TEXT_FILTER_TARGET_FULLTEXT_QUERY const autocompleteSpy = jest.spyOn(searchService, 'autocomplete') diff --git a/src-ui/src/app/components/document-list/filter-editor/filter-editor.component.ts b/src-ui/src/app/components/document-list/filter-editor/filter-editor.component.ts index f7b50181b..b4e63317a 100644 --- a/src-ui/src/app/components/document-list/filter-editor/filter-editor.component.ts +++ b/src-ui/src/app/components/document-list/filter-editor/filter-editor.component.ts @@ -71,6 +71,8 @@ import { FILTER_OWNER_DOES_NOT_INCLUDE, FILTER_OWNER_ISNULL, FILTER_SHARED_BY_USER, + FILTER_SIMPLE_TEXT, + FILTER_SIMPLE_TITLE, FILTER_STORAGE_PATH, FILTER_TITLE, FILTER_TITLE_CONTENT, @@ -195,10 +197,6 @@ const DEFAULT_TEXT_FILTER_TARGET_OPTIONS = [ name: $localize`Title & content`, }, { id: TEXT_FILTER_TARGET_ASN, name: $localize`ASN` }, - { - id: TEXT_FILTER_TARGET_CUSTOM_FIELDS, - name: $localize`Custom fields`, - }, { id: TEXT_FILTER_TARGET_MIME_TYPE, name: $localize`File type` }, { id: TEXT_FILTER_TARGET_FULLTEXT_QUERY, @@ -206,6 +204,12 @@ const DEFAULT_TEXT_FILTER_TARGET_OPTIONS = [ }, ] +const DEPRECATED_CUSTOM_FIELDS_TEXT_FILTER_TARGET_OPTION = { + // Kept only so legacy saved views can render and be edited away from, remove me eventually + id: TEXT_FILTER_TARGET_CUSTOM_FIELDS, + name: $localize`Custom fields (Deprecated)`, +} + const TEXT_FILTER_TARGET_MORELIKE_OPTION = { id: TEXT_FILTER_TARGET_FULLTEXT_MORELIKE, name: $localize`More like`, @@ -318,8 +322,13 @@ export class FilterEditorComponent return $localize`Custom fields query` case FILTER_TITLE: + case FILTER_SIMPLE_TITLE: return $localize`Title: ${rule.value}` + case FILTER_TITLE_CONTENT: + case FILTER_SIMPLE_TEXT: + return $localize`Title & content: ${rule.value}` + case FILTER_ASN: return $localize`ASN: ${rule.value}` @@ -353,12 +362,16 @@ export class FilterEditorComponent _moreLikeDoc: Document get textFilterTargets() { + let targets = DEFAULT_TEXT_FILTER_TARGET_OPTIONS if (this.textFilterTarget == TEXT_FILTER_TARGET_FULLTEXT_MORELIKE) { - return DEFAULT_TEXT_FILTER_TARGET_OPTIONS.concat([ - TEXT_FILTER_TARGET_MORELIKE_OPTION, + targets = targets.concat([TEXT_FILTER_TARGET_MORELIKE_OPTION]) + } + if (this.textFilterTarget == TEXT_FILTER_TARGET_CUSTOM_FIELDS) { + targets = targets.concat([ + DEPRECATED_CUSTOM_FIELDS_TEXT_FILTER_TARGET_OPTION, ]) } - return DEFAULT_TEXT_FILTER_TARGET_OPTIONS + return targets } textFilterTarget = TEXT_FILTER_TARGET_TITLE_CONTENT @@ -437,10 +450,12 @@ export class FilterEditorComponent value.forEach((rule) => { switch (rule.rule_type) { case FILTER_TITLE: + case FILTER_SIMPLE_TITLE: this._textFilter = rule.value this.textFilterTarget = TEXT_FILTER_TARGET_TITLE break case FILTER_TITLE_CONTENT: + case FILTER_SIMPLE_TEXT: this._textFilter = rule.value this.textFilterTarget = TEXT_FILTER_TARGET_TITLE_CONTENT break @@ -762,12 +777,15 @@ export class FilterEditorComponent this.textFilterTarget == TEXT_FILTER_TARGET_TITLE_CONTENT ) { filterRules.push({ - rule_type: FILTER_TITLE_CONTENT, + rule_type: FILTER_SIMPLE_TEXT, value: this._textFilter.trim(), }) } if (this._textFilter && this.textFilterTarget == TEXT_FILTER_TARGET_TITLE) { - filterRules.push({ rule_type: FILTER_TITLE, value: this._textFilter }) + filterRules.push({ + rule_type: FILTER_SIMPLE_TITLE, + value: this._textFilter, + }) } if (this.textFilterTarget == TEXT_FILTER_TARGET_ASN) { if ( @@ -1009,7 +1027,10 @@ export class FilterEditorComponent ) { existingRule = filterRules.find( (fr) => - fr.rule_type == FILTER_TITLE_CONTENT || fr.rule_type == FILTER_TITLE + fr.rule_type == FILTER_TITLE_CONTENT || + fr.rule_type == FILTER_SIMPLE_TEXT || + fr.rule_type == FILTER_TITLE || + fr.rule_type == FILTER_SIMPLE_TITLE ) existingRule.rule_type = FILTER_FULLTEXT_QUERY } diff --git a/src-ui/src/app/data/filter-rule-type.ts b/src-ui/src/app/data/filter-rule-type.ts index 7f0f0d56d..6330eb44c 100644 --- a/src-ui/src/app/data/filter-rule-type.ts +++ b/src-ui/src/app/data/filter-rule-type.ts @@ -3,7 +3,7 @@ import { DataType } from './datatype' export const NEGATIVE_NULL_FILTER_VALUE = -1 // These correspond to src/documents/models.py and changes here require a DB migration (and vice versa) -export const FILTER_TITLE = 0 +export const FILTER_TITLE = 0 // Deprecated in favor of Tantivy-backed `title_search`. Keep for now for existing saved views export const FILTER_CONTENT = 1 export const FILTER_ASN = 2 @@ -46,7 +46,9 @@ export const FILTER_ADDED_FROM = 46 export const FILTER_MODIFIED_BEFORE = 15 export const FILTER_MODIFIED_AFTER = 16 -export const FILTER_TITLE_CONTENT = 19 +export const FILTER_TITLE_CONTENT = 19 // Deprecated in favor of Tantivy-backed `text` filtervar. Keep for now for existing saved views +export const FILTER_SIMPLE_TITLE = 48 +export const FILTER_SIMPLE_TEXT = 49 export const FILTER_FULLTEXT_QUERY = 20 export const FILTER_FULLTEXT_MORELIKE = 21 @@ -56,7 +58,7 @@ export const FILTER_OWNER_ISNULL = 34 export const FILTER_OWNER_DOES_NOT_INCLUDE = 35 export const FILTER_SHARED_BY_USER = 37 -export const FILTER_CUSTOM_FIELDS_TEXT = 36 +export const FILTER_CUSTOM_FIELDS_TEXT = 36 // Deprecated. UI no longer includes CF text-search mode. Keep for now for existing saved views export const FILTER_HAS_CUSTOM_FIELDS_ALL = 38 export const FILTER_HAS_CUSTOM_FIELDS_ANY = 39 export const FILTER_DOES_NOT_HAVE_CUSTOM_FIELDS = 40 @@ -66,6 +68,9 @@ export const FILTER_CUSTOM_FIELDS_QUERY = 42 export const FILTER_MIME_TYPE = 47 +export const SIMPLE_TEXT_PARAMETER = 'text' +export const SIMPLE_TITLE_PARAMETER = 'title_search' + export const FILTER_RULE_TYPES: FilterRuleType[] = [ { id: FILTER_TITLE, @@ -74,6 +79,13 @@ export const FILTER_RULE_TYPES: FilterRuleType[] = [ multi: false, default: '', }, + { + id: FILTER_SIMPLE_TITLE, + filtervar: SIMPLE_TITLE_PARAMETER, + datatype: 'string', + multi: false, + default: '', + }, { id: FILTER_CONTENT, filtervar: 'content__icontains', @@ -279,6 +291,12 @@ export const FILTER_RULE_TYPES: FilterRuleType[] = [ datatype: 'string', multi: false, }, + { + id: FILTER_SIMPLE_TEXT, + filtervar: SIMPLE_TEXT_PARAMETER, + datatype: 'string', + multi: false, + }, { id: FILTER_FULLTEXT_QUERY, filtervar: 'query', diff --git a/src-ui/src/app/services/rest/document.service.spec.ts b/src-ui/src/app/services/rest/document.service.spec.ts index 711aab743..03375e367 100644 --- a/src-ui/src/app/services/rest/document.service.spec.ts +++ b/src-ui/src/app/services/rest/document.service.spec.ts @@ -10,7 +10,7 @@ import { DOCUMENT_SORT_FIELDS, DOCUMENT_SORT_FIELDS_FULLTEXT, } from 'src/app/data/document' -import { FILTER_TITLE } from 'src/app/data/filter-rule-type' +import { FILTER_SIMPLE_TITLE } from 'src/app/data/filter-rule-type' import { SETTINGS_KEYS } from 'src/app/data/ui-settings' import { environment } from 'src/environments/environment' import { PermissionsService } from '../permissions.service' @@ -138,13 +138,13 @@ describe(`DocumentService`, () => { subscription = service .listAllFilteredIds([ { - rule_type: FILTER_TITLE, + rule_type: FILTER_SIMPLE_TITLE, value: 'apple', }, ]) .subscribe() const req = httpTestingController.expectOne( - `${environment.apiBaseUrl}${endpoint}/?page=1&page_size=100000&fields=id&title__icontains=apple` + `${environment.apiBaseUrl}${endpoint}/?page=1&page_size=100000&fields=id&title_search=apple` ) expect(req.request.method).toEqual('GET') }) diff --git a/src-ui/src/app/utils/query-params.spec.ts b/src-ui/src/app/utils/query-params.spec.ts index c22c90d11..7fd8f6808 100644 --- a/src-ui/src/app/utils/query-params.spec.ts +++ b/src-ui/src/app/utils/query-params.spec.ts @@ -8,6 +8,10 @@ import { FILTER_HAS_CUSTOM_FIELDS_ALL, FILTER_HAS_CUSTOM_FIELDS_ANY, FILTER_HAS_TAGS_ALL, + FILTER_SIMPLE_TEXT, + FILTER_SIMPLE_TITLE, + FILTER_TITLE, + FILTER_TITLE_CONTENT, NEGATIVE_NULL_FILTER_VALUE, } from '../data/filter-rule-type' import { @@ -128,6 +132,26 @@ describe('QueryParams Utils', () => { is_tagged: 0, }) + params = queryParamsFromFilterRules([ + { + rule_type: FILTER_TITLE_CONTENT, + value: 'bank statement', + }, + ]) + expect(params).toEqual({ + text: 'bank statement', + }) + + params = queryParamsFromFilterRules([ + { + rule_type: FILTER_TITLE, + value: 'invoice', + }, + ]) + expect(params).toEqual({ + title_search: 'invoice', + }) + params = queryParamsFromFilterRules([ { rule_type: FILTER_HAS_TAGS_ALL, @@ -148,6 +172,30 @@ describe('QueryParams Utils', () => { it('should convert filter rules to query params', () => { let rules = filterRulesFromQueryParams( + convertToParamMap({ + text: 'bank statement', + }) + ) + expect(rules).toEqual([ + { + rule_type: FILTER_SIMPLE_TEXT, + value: 'bank statement', + }, + ]) + + rules = filterRulesFromQueryParams( + convertToParamMap({ + title_search: 'invoice', + }) + ) + expect(rules).toEqual([ + { + rule_type: FILTER_SIMPLE_TITLE, + value: 'invoice', + }, + ]) + + rules = filterRulesFromQueryParams( convertToParamMap({ tags__id__all, }) diff --git a/src-ui/src/app/utils/query-params.ts b/src-ui/src/app/utils/query-params.ts index 27716cc2d..be33ba724 100644 --- a/src-ui/src/app/utils/query-params.ts +++ b/src-ui/src/app/utils/query-params.ts @@ -9,8 +9,14 @@ import { FILTER_HAS_CUSTOM_FIELDS_ALL, FILTER_HAS_CUSTOM_FIELDS_ANY, FILTER_RULE_TYPES, + FILTER_SIMPLE_TEXT, + FILTER_SIMPLE_TITLE, + FILTER_TITLE, + FILTER_TITLE_CONTENT, FilterRuleType, NEGATIVE_NULL_FILTER_VALUE, + SIMPLE_TEXT_PARAMETER, + SIMPLE_TITLE_PARAMETER, } from '../data/filter-rule-type' import { ListViewState } from '../services/document-list-view.service' @@ -97,6 +103,8 @@ export function transformLegacyFilterRules( export function filterRulesFromQueryParams( queryParams: ParamMap ): FilterRule[] { + let filterRulesFromQueryParams: FilterRule[] = [] + const allFilterRuleQueryParams: string[] = FILTER_RULE_TYPES.map( (rt) => rt.filtervar ) @@ -104,7 +112,6 @@ export function filterRulesFromQueryParams( .filter((rt) => rt !== undefined) // transform query params to filter rules - let filterRulesFromQueryParams: FilterRule[] = [] allFilterRuleQueryParams .filter((frqp) => queryParams.has(frqp)) .forEach((filterQueryParamName) => { @@ -146,7 +153,17 @@ export function queryParamsFromFilterRules(filterRules: FilterRule[]): Params { let params = {} for (let rule of filterRules) { let ruleType = FILTER_RULE_TYPES.find((t) => t.id == rule.rule_type) - if (ruleType.isnull_filtervar && rule.value == null) { + if ( + rule.rule_type === FILTER_TITLE_CONTENT || + rule.rule_type === FILTER_SIMPLE_TEXT + ) { + params[SIMPLE_TEXT_PARAMETER] = rule.value + } else if ( + rule.rule_type === FILTER_TITLE || + rule.rule_type === FILTER_SIMPLE_TITLE + ) { + params[SIMPLE_TITLE_PARAMETER] = rule.value + } else if (ruleType.isnull_filtervar && rule.value == null) { params[ruleType.isnull_filtervar] = 1 } else if ( ruleType.isnull_filtervar && diff --git a/src/documents/consumer.py b/src/documents/consumer.py index 424e22ce2..8035f3857 100644 --- a/src/documents/consumer.py +++ b/src/documents/consumer.py @@ -213,14 +213,12 @@ class ConsumerPluginMixin: message, current_progress, max_progress, - extra_args={ - "document_id": document_id, - "owner_id": self.metadata.owner_id if self.metadata.owner_id else None, - "users_can_view": (self.metadata.view_users or []) - + (self.metadata.change_users or []), - "groups_can_view": (self.metadata.view_groups or []) - + (self.metadata.change_groups or []), - }, + document_id=document_id, + owner_id=self.metadata.owner_id if self.metadata.owner_id else None, + users_can_view=(self.metadata.view_users or []) + + (self.metadata.change_users or []), + groups_can_view=(self.metadata.view_groups or []) + + (self.metadata.change_groups or []), ) def _fail( diff --git a/src/documents/filters.py b/src/documents/filters.py index 2f7de1cd4..b2b226ee1 100644 --- a/src/documents/filters.py +++ b/src/documents/filters.py @@ -3,6 +3,7 @@ from __future__ import annotations import functools import inspect import json +import logging import operator from contextlib import contextmanager from typing import TYPE_CHECKING @@ -77,6 +78,8 @@ DATETIME_KWARGS = [ CUSTOM_FIELD_QUERY_MAX_DEPTH = 10 CUSTOM_FIELD_QUERY_MAX_ATOMS = 20 +logger = logging.getLogger("paperless.api") + class CorrespondentFilterSet(FilterSet): class Meta: @@ -162,9 +165,13 @@ class InboxFilter(Filter): @extend_schema_field(serializers.CharField) class TitleContentFilter(Filter): + # Deprecated but retained for existing saved views. UI uses Tantivy-backed `text` / `title_search` params. def filter(self, qs: Any, value: Any) -> Any: value = value.strip() if isinstance(value, str) else value if value: + logger.warning( + "Deprecated document filter parameter 'title_content' used; use `text` instead.", + ) try: return qs.filter( Q(title__icontains=value) | Q(effective_content__icontains=value), @@ -243,6 +250,9 @@ class CustomFieldsFilter(Filter): def filter(self, qs, value): value = value.strip() if isinstance(value, str) else value if value: + logger.warning( + "Deprecated document filter parameter 'custom_fields__icontains' used; use `custom_field_query` or advanced Tantivy field syntax instead.", + ) fields_with_matching_selects = CustomField.objects.filter( extra_data__icontains=value, ) @@ -747,6 +757,7 @@ class DocumentFilterSet(FilterSet): is_in_inbox = InboxFilter() + # Deprecated, but keep for now for existing saved views title_content = TitleContentFilter() content__istartswith = EffectiveContentFilter(lookup_expr="istartswith") @@ -756,6 +767,7 @@ class DocumentFilterSet(FilterSet): owner__id__none = ObjectFilter(field_name="owner", exclude=True) + # Deprecated, UI no longer includes CF text-search mode, but keep for now for existing saved views custom_fields__icontains = CustomFieldsFilter() custom_fields__id__all = ObjectFilter(field_name="custom_fields__field") diff --git a/src/documents/management/commands/document_exporter.py b/src/documents/management/commands/document_exporter.py index ee3b44e0c..562a2ca8d 100644 --- a/src/documents/management/commands/document_exporter.py +++ b/src/documents/management/commands/document_exporter.py @@ -45,6 +45,8 @@ from documents.models import DocumentType from documents.models import Note from documents.models import SavedView from documents.models import SavedViewFilterRule +from documents.models import ShareLink +from documents.models import ShareLinkBundle from documents.models import StoragePath from documents.models import Tag from documents.models import UiSettings @@ -55,6 +57,7 @@ from documents.models import WorkflowActionWebhook from documents.models import WorkflowTrigger from documents.settings import EXPORTER_ARCHIVE_NAME from documents.settings import EXPORTER_FILE_NAME +from documents.settings import EXPORTER_SHARE_LINK_BUNDLE_NAME from documents.settings import EXPORTER_THUMBNAIL_NAME from documents.utils import compute_checksum from documents.utils import copy_file_with_basic_stats @@ -389,6 +392,8 @@ class Command(CryptMixin, PaperlessCommand): "app_configs": ApplicationConfiguration.objects.all(), "notes": Note.global_objects.all(), "documents": Document.global_objects.order_by("id").all(), + "share_links": ShareLink.global_objects.all(), + "share_link_bundles": ShareLinkBundle.objects.order_by("id").all(), "social_accounts": SocialAccount.objects.all(), "social_apps": SocialApp.objects.all(), "social_tokens": SocialToken.objects.all(), @@ -409,6 +414,7 @@ class Command(CryptMixin, PaperlessCommand): ) document_manifest: list[dict] = [] + share_link_bundle_manifest: list[dict] = [] manifest_path = (self.target / "manifest.json").resolve() with StreamingManifestWriter( @@ -427,6 +433,15 @@ class Command(CryptMixin, PaperlessCommand): for record in batch: self._encrypt_record_inline(record) document_manifest.extend(batch) + elif key == "share_link_bundles": + # Accumulate for file-copy loop; written to manifest after + for batch in serialize_queryset_batched( + qs, + batch_size=self.batch_size, + ): + for record in batch: + self._encrypt_record_inline(record) + share_link_bundle_manifest.extend(batch) elif self.split_manifest and key in ( "notes", "custom_field_instances", @@ -445,6 +460,12 @@ class Command(CryptMixin, PaperlessCommand): document_map: dict[int, Document] = { d.pk: d for d in Document.global_objects.order_by("id") } + share_link_bundle_map: dict[int, ShareLinkBundle] = { + b.pk: b + for b in ShareLinkBundle.objects.order_by("id").prefetch_related( + "documents", + ) + } # 3. Export files from each document for index, document_dict in enumerate( @@ -478,6 +499,19 @@ class Command(CryptMixin, PaperlessCommand): else: writer.write_record(document_dict) + for bundle_dict in share_link_bundle_manifest: + bundle = share_link_bundle_map[bundle_dict["pk"]] + + bundle_target = self.generate_share_link_bundle_target( + bundle, + bundle_dict, + ) + + if not self.data_only and bundle_target is not None: + self.copy_share_link_bundle_file(bundle, bundle_target) + + writer.write_record(bundle_dict) + # 4.2 write version information to target folder extra_metadata_path = (self.target / "metadata.json").resolve() metadata: dict[str, str | int | dict[str, str | int]] = { @@ -598,6 +632,48 @@ class Command(CryptMixin, PaperlessCommand): archive_target, ) + def generate_share_link_bundle_target( + self, + bundle: ShareLinkBundle, + bundle_dict: dict, + ) -> Path | None: + """ + Generates the export target for a share link bundle file, when present. + """ + if not bundle.file_path: + return None + + stored_bundle_path = Path(bundle.file_path) + portable_bundle_path = ( + stored_bundle_path + if not stored_bundle_path.is_absolute() + else Path(stored_bundle_path.name) + ) + export_bundle_path = Path("share_link_bundles") / portable_bundle_path + + bundle_dict["fields"]["file_path"] = portable_bundle_path.as_posix() + bundle_dict[EXPORTER_SHARE_LINK_BUNDLE_NAME] = export_bundle_path.as_posix() + + return (self.target / export_bundle_path).resolve() + + def copy_share_link_bundle_file( + self, + bundle: ShareLinkBundle, + bundle_target: Path, + ) -> None: + """ + Copies a share link bundle ZIP into the export directory. + """ + bundle_source_path = bundle.absolute_file_path + if bundle_source_path is None: + raise FileNotFoundError(f"Share link bundle {bundle.pk} has no file path") + + self.check_and_copy( + bundle_source_path, + None, + bundle_target, + ) + def _encrypt_record_inline(self, record: dict) -> None: """Encrypt sensitive fields in a single record, if passphrase is set.""" if not self.passphrase: diff --git a/src/documents/management/commands/document_importer.py b/src/documents/management/commands/document_importer.py index 4572b4617..becdf7b76 100644 --- a/src/documents/management/commands/document_importer.py +++ b/src/documents/management/commands/document_importer.py @@ -32,10 +32,12 @@ from documents.models import CustomFieldInstance from documents.models import Document from documents.models import DocumentType from documents.models import Note +from documents.models import ShareLinkBundle from documents.models import Tag from documents.settings import EXPORTER_ARCHIVE_NAME from documents.settings import EXPORTER_CRYPTO_SETTINGS_NAME from documents.settings import EXPORTER_FILE_NAME +from documents.settings import EXPORTER_SHARE_LINK_BUNDLE_NAME from documents.settings import EXPORTER_THUMBNAIL_NAME from documents.signals.handlers import check_paths_and_prune_custom_fields from documents.signals.handlers import update_filename_and_move_files @@ -348,18 +350,42 @@ class Command(CryptMixin, PaperlessCommand): f"Failed to read from archive file {doc_archive_path}", ) from e + def check_share_link_bundle_validity(bundle_record: dict) -> None: + if EXPORTER_SHARE_LINK_BUNDLE_NAME not in bundle_record: + return + + bundle_file = bundle_record[EXPORTER_SHARE_LINK_BUNDLE_NAME] + bundle_path: Path = self.source / bundle_file + if not bundle_path.exists(): + raise CommandError( + f'The manifest file refers to "{bundle_file}" which does not ' + "appear to be in the source directory.", + ) + try: + with bundle_path.open(mode="rb"): + pass + except Exception as e: + raise CommandError( + f"Failed to read from share link bundle file {bundle_path}", + ) from e + self.stdout.write("Checking the manifest") for manifest_path in self.manifest_paths: for record in iter_manifest_records(manifest_path): # Only check if the document files exist if this is not data only # We don't care about documents for a data only import - if not self.data_only and record["model"] == "documents.document": + if self.data_only: + continue + if record["model"] == "documents.document": check_document_validity(record) + elif record["model"] == "documents.sharelinkbundle": + check_share_link_bundle_validity(record) def _import_files_from_manifest(self) -> None: settings.ORIGINALS_DIR.mkdir(parents=True, exist_ok=True) settings.THUMBNAIL_DIR.mkdir(parents=True, exist_ok=True) settings.ARCHIVE_DIR.mkdir(parents=True, exist_ok=True) + settings.SHARE_LINK_BUNDLE_DIR.mkdir(parents=True, exist_ok=True) self.stdout.write("Copy files into paperless...") @@ -374,6 +400,18 @@ class Command(CryptMixin, PaperlessCommand): for record in iter_manifest_records(manifest_path) if record["model"] == "documents.document" ] + share_link_bundle_records = [ + { + "pk": record["pk"], + EXPORTER_SHARE_LINK_BUNDLE_NAME: record.get( + EXPORTER_SHARE_LINK_BUNDLE_NAME, + ), + } + for manifest_path in self.manifest_paths + for record in iter_manifest_records(manifest_path) + if record["model"] == "documents.sharelinkbundle" + and record.get(EXPORTER_SHARE_LINK_BUNDLE_NAME) + ] for record in self.track(document_records, description="Copying files..."): document = Document.global_objects.get(pk=record["pk"]) @@ -416,6 +454,26 @@ class Command(CryptMixin, PaperlessCommand): document.save() + for record in self.track( + share_link_bundle_records, + description="Copying share link bundles...", + ): + bundle = ShareLinkBundle.objects.get(pk=record["pk"]) + bundle_file = record[EXPORTER_SHARE_LINK_BUNDLE_NAME] + bundle_source_path = (self.source / bundle_file).resolve() + bundle_target_path = bundle.absolute_file_path + if bundle_target_path is None: + raise CommandError( + f"Share link bundle {bundle.pk} does not have a valid file path.", + ) + + with FileLock(settings.MEDIA_LOCK): + bundle_target_path.parent.mkdir(parents=True, exist_ok=True) + copy_file_with_basic_stats( + bundle_source_path, + bundle_target_path, + ) + def _decrypt_record_if_needed(self, record: dict) -> dict: fields = self.CRYPT_FIELDS_BY_MODEL.get(record.get("model", "")) if fields: diff --git a/src/documents/migrations/0018_saved_view_simple_search_rules.py b/src/documents/migrations/0018_saved_view_simple_search_rules.py new file mode 100644 index 000000000..6d128c593 --- /dev/null +++ b/src/documents/migrations/0018_saved_view_simple_search_rules.py @@ -0,0 +1,92 @@ +# Generated by Django 5.2.12 on 2026-04-01 18:20 + +from django.db import migrations +from django.db import models + +OLD_TITLE_RULE = 0 +OLD_TITLE_CONTENT_RULE = 19 +NEW_SIMPLE_TITLE_RULE = 48 +NEW_SIMPLE_TEXT_RULE = 49 + + +# See documents/models.py SavedViewFilterRule +def migrate_saved_view_rules_forward(apps, schema_editor): + SavedViewFilterRule = apps.get_model("documents", "SavedViewFilterRule") + SavedViewFilterRule.objects.filter(rule_type=OLD_TITLE_RULE).update( + rule_type=NEW_SIMPLE_TITLE_RULE, + ) + SavedViewFilterRule.objects.filter(rule_type=OLD_TITLE_CONTENT_RULE).update( + rule_type=NEW_SIMPLE_TEXT_RULE, + ) + + +class Migration(migrations.Migration): + dependencies = [ + ("documents", "0017_migrate_fulltext_query_field_prefixes"), + ] + + operations = [ + migrations.AlterField( + model_name="savedviewfilterrule", + name="rule_type", + field=models.PositiveSmallIntegerField( + choices=[ + (0, "title contains"), + (1, "content contains"), + (2, "ASN is"), + (3, "correspondent is"), + (4, "document type is"), + (5, "is in inbox"), + (6, "has tag"), + (7, "has any tag"), + (8, "created before"), + (9, "created after"), + (10, "created year is"), + (11, "created month is"), + (12, "created day is"), + (13, "added before"), + (14, "added after"), + (15, "modified before"), + (16, "modified after"), + (17, "does not have tag"), + (18, "does not have ASN"), + (19, "title or content contains"), + (20, "fulltext query"), + (21, "more like this"), + (22, "has tags in"), + (23, "ASN greater than"), + (24, "ASN less than"), + (25, "storage path is"), + (26, "has correspondent in"), + (27, "does not have correspondent in"), + (28, "has document type in"), + (29, "does not have document type in"), + (30, "has storage path in"), + (31, "does not have storage path in"), + (32, "owner is"), + (33, "has owner in"), + (34, "does not have owner"), + (35, "does not have owner in"), + (36, "has custom field value"), + (37, "is shared by me"), + (38, "has custom fields"), + (39, "has custom field in"), + (40, "does not have custom field in"), + (41, "does not have custom field"), + (42, "custom fields query"), + (43, "created to"), + (44, "created from"), + (45, "added to"), + (46, "added from"), + (47, "mime type is"), + (48, "simple title search"), + (49, "simple text search"), + ], + verbose_name="rule type", + ), + ), + migrations.RunPython( + migrate_saved_view_rules_forward, + migrations.RunPython.noop, + ), + ] diff --git a/src/documents/models.py b/src/documents/models.py index 96f027b94..9af5fbc23 100644 --- a/src/documents/models.py +++ b/src/documents/models.py @@ -623,6 +623,8 @@ class SavedViewFilterRule(models.Model): (45, _("added to")), (46, _("added from")), (47, _("mime type is")), + (48, _("simple title search")), + (49, _("simple text search")), ] saved_view = models.ForeignKey( diff --git a/src/documents/plugins/helpers.py b/src/documents/plugins/helpers.py index e5cfde3b8..e30591125 100644 --- a/src/documents/plugins/helpers.py +++ b/src/documents/plugins/helpers.py @@ -1,6 +1,9 @@ import enum -from collections.abc import Mapping from typing import TYPE_CHECKING +from typing import Literal +from typing import Self +from typing import TypeAlias +from typing import TypedDict from asgiref.sync import async_to_sync from channels.layers import get_channel_layer @@ -16,6 +19,59 @@ class ProgressStatusOptions(enum.StrEnum): FAILED = "FAILED" +class PermissionsData(TypedDict, total=False): + """Permission fields included in status messages for access control.""" + + owner_id: int | None + users_can_view: list[int] + groups_can_view: list[int] + + +class ProgressUpdateData(TypedDict): + filename: str | None + task_id: str | None + current_progress: int + max_progress: int + status: str + message: str + document_id: int | None + owner_id: int | None + users_can_view: list[int] + groups_can_view: list[int] + + +class StatusUpdatePayload(TypedDict): + type: Literal["status_update"] + data: ProgressUpdateData + + +class DocumentsDeletedData(TypedDict): + documents: list[int] + + +class DocumentsDeletedPayload(TypedDict): + type: Literal["documents_deleted"] + data: DocumentsDeletedData + + +class DocumentUpdatedData(TypedDict): + document_id: int + modified: str + owner_id: int | None + users_can_view: list[int] + groups_can_view: list[int] + + +class DocumentUpdatedPayload(TypedDict): + type: Literal["document_updated"] + data: DocumentUpdatedData + + +WebsocketPayload: TypeAlias = ( + StatusUpdatePayload | DocumentsDeletedPayload | DocumentUpdatedPayload +) + + class BaseStatusManager: """ Handles sending of progress information via the channel layer, with proper management @@ -25,11 +81,11 @@ class BaseStatusManager: def __init__(self) -> None: self._channel: RedisPubSubChannelLayer | None = None - def __enter__(self): + def __enter__(self) -> Self: self.open() return self - def __exit__(self, exc_type, exc_val, exc_tb): + def __exit__(self, exc_type: object, exc_val: object, exc_tb: object) -> None: self.close() def open(self) -> None: @@ -48,7 +104,7 @@ class BaseStatusManager: async_to_sync(self._channel.flush) self._channel = None - def send(self, payload: Mapping[str, object]) -> None: + def send(self, payload: WebsocketPayload) -> None: # Ensure the layer is open self.open() @@ -72,36 +128,36 @@ class ProgressManager(BaseStatusManager): message: str, current_progress: int, max_progress: int, - extra_args: dict[str, str | int | None] | None = None, + *, + document_id: int | None = None, + owner_id: int | None = None, + users_can_view: list[int] | None = None, + groups_can_view: list[int] | None = None, ) -> None: - data: dict[str, object] = { + data: ProgressUpdateData = { "filename": self.filename, "task_id": self.task_id, "current_progress": current_progress, "max_progress": max_progress, "status": status, "message": message, + "document_id": document_id, + "owner_id": owner_id, + "users_can_view": users_can_view or [], + "groups_can_view": groups_can_view or [], } - if extra_args is not None: - data.update(extra_args) - - payload: dict[str, object] = { - "type": "status_update", - "data": data, - } - + payload: StatusUpdatePayload = {"type": "status_update", "data": data} self.send(payload) class DocumentsStatusManager(BaseStatusManager): def send_documents_deleted(self, documents: list[int]) -> None: - payload: dict[str, object] = { + payload: DocumentsDeletedPayload = { "type": "documents_deleted", "data": { "documents": documents, }, } - self.send(payload) def send_document_updated( @@ -113,7 +169,7 @@ class DocumentsStatusManager(BaseStatusManager): users_can_view: list[int] | None = None, groups_can_view: list[int] | None = None, ) -> None: - payload: dict[str, object] = { + payload: DocumentUpdatedPayload = { "type": "document_updated", "data": { "document_id": document_id, @@ -123,5 +179,4 @@ class DocumentsStatusManager(BaseStatusManager): "groups_can_view": groups_can_view or [], }, } - self.send(payload) diff --git a/src/documents/search/__init__.py b/src/documents/search/__init__.py index b0a89f242..a4145d7ef 100644 --- a/src/documents/search/__init__.py +++ b/src/documents/search/__init__.py @@ -1,4 +1,5 @@ from documents.search._backend import SearchIndexLockError +from documents.search._backend import SearchMode from documents.search._backend import SearchResults from documents.search._backend import TantivyBackend from documents.search._backend import TantivyRelevanceList @@ -10,6 +11,7 @@ from documents.search._schema import wipe_index __all__ = [ "SearchIndexLockError", + "SearchMode", "SearchResults", "TantivyBackend", "TantivyRelevanceList", diff --git a/src/documents/search/_backend.py b/src/documents/search/_backend.py index a1bff8a9f..2005a436f 100644 --- a/src/documents/search/_backend.py +++ b/src/documents/search/_backend.py @@ -2,11 +2,11 @@ from __future__ import annotations import logging import threading -import unicodedata from collections import Counter from dataclasses import dataclass from datetime import UTC from datetime import datetime +from enum import StrEnum from typing import TYPE_CHECKING from typing import Self from typing import TypedDict @@ -19,7 +19,10 @@ from django.conf import settings from django.utils.timezone import get_current_timezone from guardian.shortcuts import get_users_with_perms +from documents.search._normalize import ascii_fold from documents.search._query import build_permission_filter +from documents.search._query import parse_simple_text_query +from documents.search._query import parse_simple_title_query from documents.search._query import parse_user_query from documents.search._schema import _write_sentinels from documents.search._schema import build_schema @@ -45,14 +48,10 @@ _AUTOCOMPLETE_REGEX_TIMEOUT = 1.0 # seconds; guards against ReDoS on untrusted T = TypeVar("T") -def _ascii_fold(s: str) -> str: - """ - Normalize unicode to ASCII equivalent characters for search consistency. - - Converts accented characters (e.g., "café") to their ASCII base forms ("cafe") - to enable cross-language searching without requiring exact diacritic matching. - """ - return unicodedata.normalize("NFD", s).encode("ascii", "ignore").decode() +class SearchMode(StrEnum): + QUERY = "query" + TEXT = "text" + TITLE = "title" def _extract_autocomplete_words(text_sources: list[str]) -> set[str]: @@ -74,7 +73,7 @@ def _extract_autocomplete_words(text_sources: list[str]) -> set[str]: ) continue for token in tokens: - normalized = _ascii_fold(token.lower()) + normalized = ascii_fold(token.lower()) if normalized: words.add(normalized) return words @@ -294,8 +293,10 @@ class TantivyBackend: doc.add_text("checksum", document.checksum) doc.add_text("title", document.title) doc.add_text("title_sort", document.title) + doc.add_text("simple_title", document.title) doc.add_text("content", content) doc.add_text("bigram_content", content) + doc.add_text("simple_content", content) # Original filename - only add if not None/empty if document.original_filename: @@ -433,6 +434,7 @@ class TantivyBackend: sort_field: str | None, *, sort_reverse: bool, + search_mode: SearchMode = SearchMode.QUERY, ) -> SearchResults: """ Execute a search query against the document index. @@ -441,20 +443,32 @@ class TantivyBackend: permission filtering before executing against Tantivy. Supports both relevance-based and field-based sorting. + QUERY search mode supports natural date keywords, field filters, etc. + TITLE search mode treats the query as plain text to search for in title only + TEXT search mode treats the query as plain text to search for in title and content + Args: - query: User's search query (supports natural date keywords, field filters) + query: User's search query user: User for permission filtering (None for superuser/no filtering) page: Page number (1-indexed) for pagination page_size: Number of results per page sort_field: Field to sort by (None for relevance ranking) sort_reverse: Whether to reverse the sort order + search_mode: "query" for advanced Tantivy syntax, "text" for + plain-text search over title and content only, "title" for + plain-text search over title only Returns: SearchResults with hits, total count, and processed query """ self._ensure_open() tz = get_current_timezone() - user_query = parse_user_query(self._index, query, tz) + if search_mode is SearchMode.TEXT: + user_query = parse_simple_text_query(self._index, query) + elif search_mode is SearchMode.TITLE: + user_query = parse_simple_title_query(self._index, query) + else: + user_query = parse_user_query(self._index, query, tz) # Apply permission filter if user is not None (not superuser) if user is not None: @@ -518,6 +532,7 @@ class TantivyBackend: # Build result hits with highlights hits: list[SearchHit] = [] snippet_generator = None + notes_snippet_generator = None for rank, (doc_address, score) in enumerate(page_hits, start=offset + 1): # Get the actual document from the searcher using the doc address @@ -544,13 +559,16 @@ class TantivyBackend: # Try notes highlights if "notes" in doc_dict: - notes_generator = tantivy.SnippetGenerator.create( - searcher, - final_query, - self._schema, - "notes", + if notes_snippet_generator is None: + notes_snippet_generator = tantivy.SnippetGenerator.create( + searcher, + final_query, + self._schema, + "notes", + ) + notes_snippet = notes_snippet_generator.snippet_from_doc( + actual_doc, ) - notes_snippet = notes_generator.snippet_from_doc(actual_doc) if notes_snippet: highlights["notes"] = str(notes_snippet) @@ -594,7 +612,7 @@ class TantivyBackend: List of word suggestions ordered by frequency, then alphabetically """ self._ensure_open() - normalized_term = _ascii_fold(term.lower()) + normalized_term = ascii_fold(term.lower()) searcher = self._index.searcher() diff --git a/src/documents/search/_normalize.py b/src/documents/search/_normalize.py new file mode 100644 index 000000000..3d7b23f33 --- /dev/null +++ b/src/documents/search/_normalize.py @@ -0,0 +1,8 @@ +from __future__ import annotations + +import unicodedata + + +def ascii_fold(text: str) -> str: + """Normalize unicode text to ASCII equivalents for search consistency.""" + return unicodedata.normalize("NFD", text).encode("ascii", "ignore").decode() diff --git a/src/documents/search/_query.py b/src/documents/search/_query.py index 212df1516..b7bcbbe9c 100644 --- a/src/documents/search/_query.py +++ b/src/documents/search/_query.py @@ -12,6 +12,8 @@ import tantivy from dateutil.relativedelta import relativedelta from django.conf import settings +from documents.search._normalize import ascii_fold + if TYPE_CHECKING: from datetime import tzinfo @@ -51,6 +53,7 @@ _WHOOSH_REL_RANGE_RE = regex.compile( ) # Whoosh-style 8-digit date: field:YYYYMMDD — field-aware so timezone can be applied correctly _DATE8_RE = regex.compile(r"(?P\w+):(?P\d{8})\b") +_SIMPLE_QUERY_TOKEN_RE = regex.compile(r"\S+") def _fmt(dt: datetime) -> str: @@ -436,7 +439,37 @@ DEFAULT_SEARCH_FIELDS = [ "document_type", "tag", ] +SIMPLE_SEARCH_FIELDS = ["simple_title", "simple_content"] +TITLE_SEARCH_FIELDS = ["simple_title"] _FIELD_BOOSTS = {"title": 2.0} +_SIMPLE_FIELD_BOOSTS = {"simple_title": 2.0} + + +def _build_simple_field_query( + index: tantivy.Index, + field: str, + tokens: list[str], +) -> tantivy.Query: + patterns = [] + for idx, token in enumerate(tokens): + escaped = regex.escape(token) + # For multi-token substring search, only the first token can begin mid-word. + # Later tokens follow a whitespace boundary in the original query, so anchor + # them to the start of the next indexed token to reduce false positives like + # matching "Z-Berichte 16" for the query "Z-Berichte 6". + if idx == 0: + patterns.append(f".*{escaped}.*") + else: + patterns.append(f"{escaped}.*") + if len(patterns) == 1: + query = tantivy.Query.regex_query(index.schema, field, patterns[0]) + else: + query = tantivy.Query.regex_phrase_query(index.schema, field, patterns) + + boost = _SIMPLE_FIELD_BOOSTS.get(field, 1.0) + if boost > 1.0: + return tantivy.Query.boost_query(query, boost) + return query def parse_user_query( @@ -495,3 +528,52 @@ def parse_user_query( ) return exact + + +def parse_simple_query( + index: tantivy.Index, + raw_query: str, + fields: list[str], +) -> tantivy.Query: + """ + Parse a plain-text query using Tantivy over a restricted field set. + + Query string is escaped and normalized to be treated as "simple" text query. + """ + tokens = [ + ascii_fold(token.lower()) + for token in _SIMPLE_QUERY_TOKEN_RE.findall(raw_query, timeout=_REGEX_TIMEOUT) + ] + tokens = [token for token in tokens if token] + if not tokens: + return tantivy.Query.empty_query() + + field_queries = [ + (tantivy.Occur.Should, _build_simple_field_query(index, field, tokens)) + for field in fields + ] + if len(field_queries) == 1: + return field_queries[0][1] + return tantivy.Query.boolean_query(field_queries) + + +def parse_simple_text_query( + index: tantivy.Index, + raw_query: str, +) -> tantivy.Query: + """ + Parse a plain-text query over title/content for simple search inputs. + """ + + return parse_simple_query(index, raw_query, SIMPLE_SEARCH_FIELDS) + + +def parse_simple_title_query( + index: tantivy.Index, + raw_query: str, +) -> tantivy.Query: + """ + Parse a plain-text query over the title field only. + """ + + return parse_simple_query(index, raw_query, TITLE_SEARCH_FIELDS) diff --git a/src/documents/search/_schema.py b/src/documents/search/_schema.py index ba6646007..5e9404235 100644 --- a/src/documents/search/_schema.py +++ b/src/documents/search/_schema.py @@ -53,6 +53,18 @@ def build_schema() -> tantivy.Schema: # CJK support - not stored, indexed only sb.add_text_field("bigram_content", stored=False, tokenizer_name="bigram_analyzer") + # Simple substring search support for title/content - not stored, indexed only + sb.add_text_field( + "simple_title", + stored=False, + tokenizer_name="simple_search_analyzer", + ) + sb.add_text_field( + "simple_content", + stored=False, + tokenizer_name="simple_search_analyzer", + ) + # Autocomplete prefix scan - stored, not indexed sb.add_text_field("autocomplete_word", stored=True, tokenizer_name="raw") diff --git a/src/documents/search/_tokenizer.py b/src/documents/search/_tokenizer.py index e597a879e..2079ca4cc 100644 --- a/src/documents/search/_tokenizer.py +++ b/src/documents/search/_tokenizer.py @@ -70,6 +70,7 @@ def register_tokenizers(index: tantivy.Index, language: str | None) -> None: index.register_tokenizer("paperless_text", _paperless_text(language)) index.register_tokenizer("simple_analyzer", _simple_analyzer()) index.register_tokenizer("bigram_analyzer", _bigram_analyzer()) + index.register_tokenizer("simple_search_analyzer", _simple_search_analyzer()) # Fast-field tokenizer required for fast=True text fields in the schema index.register_fast_field_tokenizer("simple_analyzer", _simple_analyzer()) @@ -114,3 +115,16 @@ def _bigram_analyzer() -> tantivy.TextAnalyzer: .filter(tantivy.Filter.lowercase()) .build() ) + + +def _simple_search_analyzer() -> tantivy.TextAnalyzer: + """Tokenizer for simple substring search fields: non-whitespace chunks -> remove_long(65) -> lowercase -> ascii_fold.""" + return ( + tantivy.TextAnalyzerBuilder( + tantivy.Tokenizer.regex(r"\S+"), + ) + .filter(tantivy.Filter.remove_long(65)) + .filter(tantivy.Filter.lowercase()) + .filter(tantivy.Filter.ascii_fold()) + .build() + ) diff --git a/src/documents/settings.py b/src/documents/settings.py index 9dff44c95..c4c87b8a7 100644 --- a/src/documents/settings.py +++ b/src/documents/settings.py @@ -3,6 +3,7 @@ EXPORTER_FILE_NAME = "__exported_file_name__" EXPORTER_THUMBNAIL_NAME = "__exported_thumbnail_name__" EXPORTER_ARCHIVE_NAME = "__exported_archive_name__" +EXPORTER_SHARE_LINK_BUNDLE_NAME = "__exported_share_link_bundle_name__" EXPORTER_CRYPTO_SETTINGS_NAME = "__crypto__" EXPORTER_CRYPTO_SALT_NAME = "__salt_hex__" diff --git a/src/documents/tasks.py b/src/documents/tasks.py index bc4ed1abe..57c819492 100644 --- a/src/documents/tasks.py +++ b/src/documents/tasks.py @@ -62,6 +62,7 @@ from documents.utils import compute_checksum from documents.utils import identity from documents.workflows.utils import get_workflows_for_trigger from paperless.config import AIConfig +from paperless.logging import consume_task_id from paperless.parsers import ParserContext from paperless.parsers.registry import get_parser_registry from paperless_ai.indexing import llm_index_add_or_update_document @@ -148,76 +149,85 @@ def consume_file( input_doc: ConsumableDocument, overrides: DocumentMetadataOverrides | None = None, ): - # Default no overrides - if overrides is None: - overrides = DocumentMetadataOverrides() + token = consume_task_id.set((self.request.id or "")[:8]) + try: + # Default no overrides + if overrides is None: + overrides = DocumentMetadataOverrides() - plugins: list[type[ConsumeTaskPlugin]] = ( - [ - ConsumerPreflightPlugin, - ConsumerPlugin, - ] - if input_doc.root_document_id is not None - else [ - ConsumerPreflightPlugin, - AsnCheckPlugin, - CollatePlugin, - BarcodePlugin, - AsnCheckPlugin, # Re-run ASN check after barcode reading - WorkflowTriggerPlugin, - ConsumerPlugin, - ] - ) + plugins: list[type[ConsumeTaskPlugin]] = ( + [ + ConsumerPreflightPlugin, + ConsumerPlugin, + ] + if input_doc.root_document_id is not None + else [ + ConsumerPreflightPlugin, + AsnCheckPlugin, + CollatePlugin, + BarcodePlugin, + AsnCheckPlugin, # Re-run ASN check after barcode reading + WorkflowTriggerPlugin, + ConsumerPlugin, + ] + ) - with ( - ProgressManager( - overrides.filename or input_doc.original_file.name, - self.request.id, - ) as status_mgr, - TemporaryDirectory(dir=settings.SCRATCH_DIR) as tmp_dir, - ): - tmp_dir = Path(tmp_dir) - for plugin_class in plugins: - plugin_name = plugin_class.NAME - - plugin = plugin_class( - input_doc, - overrides, - status_mgr, - tmp_dir, + with ( + ProgressManager( + overrides.filename or input_doc.original_file.name, self.request.id, - ) + ) as status_mgr, + TemporaryDirectory(dir=settings.SCRATCH_DIR) as tmp_dir, + ): + tmp_dir = Path(tmp_dir) + for plugin_class in plugins: + plugin_name = plugin_class.NAME - if not plugin.able_to_run: - logger.debug(f"Skipping plugin {plugin_name}") - continue + plugin = plugin_class( + input_doc, + overrides, + status_mgr, + tmp_dir, + self.request.id, + ) - try: - logger.debug(f"Executing plugin {plugin_name}") - plugin.setup() + if not plugin.able_to_run: + logger.debug(f"Skipping plugin {plugin_name}") + continue - msg = plugin.run() + try: + logger.debug(f"Executing plugin {plugin_name}") + plugin.setup() - if msg is not None: - logger.info(f"{plugin_name} completed with: {msg}") - else: - logger.info(f"{plugin_name} completed with no message") + msg = plugin.run() - overrides = plugin.metadata + if msg is not None: + logger.info(f"{plugin_name} completed with: {msg}") + else: + logger.info(f"{plugin_name} completed with no message") - except StopConsumeTaskError as e: - logger.info(f"{plugin_name} requested task exit: {e.message}") - return e.message + overrides = plugin.metadata - except Exception as e: - logger.exception(f"{plugin_name} failed: {e}") - status_mgr.send_progress(ProgressStatusOptions.FAILED, f"{e}", 100, 100) - raise + except StopConsumeTaskError as e: + logger.info(f"{plugin_name} requested task exit: {e.message}") + return e.message - finally: - plugin.cleanup() + except Exception as e: + logger.exception(f"{plugin_name} failed: {e}") + status_mgr.send_progress( + ProgressStatusOptions.FAILED, + f"{e}", + 100, + 100, + ) + raise - return msg + finally: + plugin.cleanup() + + return msg + finally: + consume_task_id.reset(token) @shared_task diff --git a/src/documents/tests/search/test_backend.py b/src/documents/tests/search/test_backend.py index 5c92da447..ff9638e63 100644 --- a/src/documents/tests/search/test_backend.py +++ b/src/documents/tests/search/test_backend.py @@ -5,6 +5,7 @@ from documents.models import CustomField from documents.models import CustomFieldInstance from documents.models import Document from documents.models import Note +from documents.search._backend import SearchMode from documents.search._backend import TantivyBackend from documents.search._backend import get_backend from documents.search._backend import reset_backend @@ -46,6 +47,258 @@ class TestWriteBatch: class TestSearch: """Test search functionality.""" + def test_text_mode_limits_default_search_to_title_and_content( + self, + backend: TantivyBackend, + ): + """Simple text mode must not match metadata-only fields.""" + doc = Document.objects.create( + title="Invoice document", + content="monthly statement", + checksum="TXT1", + pk=9, + ) + backend.add_or_update(doc) + + metadata_only = backend.search( + "document_type:invoice", + user=None, + page=1, + page_size=10, + sort_field=None, + sort_reverse=False, + search_mode=SearchMode.TEXT, + ) + assert metadata_only.total == 0 + + content_match = backend.search( + "monthly", + user=None, + page=1, + page_size=10, + sort_field=None, + sort_reverse=False, + search_mode=SearchMode.TEXT, + ) + assert content_match.total == 1 + + def test_title_mode_limits_default_search_to_title_only( + self, + backend: TantivyBackend, + ): + """Title mode must not match content-only terms.""" + doc = Document.objects.create( + title="Invoice document", + content="monthly statement", + checksum="TXT2", + pk=10, + ) + backend.add_or_update(doc) + + content_only = backend.search( + "monthly", + user=None, + page=1, + page_size=10, + sort_field=None, + sort_reverse=False, + search_mode=SearchMode.TITLE, + ) + assert content_only.total == 0 + + title_match = backend.search( + "invoice", + user=None, + page=1, + page_size=10, + sort_field=None, + sort_reverse=False, + search_mode=SearchMode.TITLE, + ) + assert title_match.total == 1 + + def test_text_mode_matches_partial_term_substrings( + self, + backend: TantivyBackend, + ): + """Simple text mode should support substring matching within tokens.""" + doc = Document.objects.create( + title="Account access", + content="password reset instructions", + checksum="TXT3", + pk=11, + ) + backend.add_or_update(doc) + + prefix_match = backend.search( + "pass", + user=None, + page=1, + page_size=10, + sort_field=None, + sort_reverse=False, + search_mode=SearchMode.TEXT, + ) + assert prefix_match.total == 1 + + infix_match = backend.search( + "sswo", + user=None, + page=1, + page_size=10, + sort_field=None, + sort_reverse=False, + search_mode=SearchMode.TEXT, + ) + assert infix_match.total == 1 + + phrase_match = backend.search( + "sswo re", + user=None, + page=1, + page_size=10, + sort_field=None, + sort_reverse=False, + search_mode=SearchMode.TEXT, + ) + assert phrase_match.total == 1 + + def test_text_mode_does_not_match_on_partial_term_overlap( + self, + backend: TantivyBackend, + ): + """Simple text mode should not match documents that merely share partial fragments.""" + doc = Document.objects.create( + title="Adobe Acrobat PDF Files", + content="Lorem ipsum dolor sit amet, consectetur adipiscing elit.", + checksum="TXT7", + pk=13, + ) + backend.add_or_update(doc) + + non_match = backend.search( + "raptor", + user=None, + page=1, + page_size=10, + sort_field=None, + sort_reverse=False, + search_mode=SearchMode.TEXT, + ) + assert non_match.total == 0 + + def test_text_mode_anchors_later_query_tokens_to_token_starts( + self, + backend: TantivyBackend, + ): + """Multi-token simple search should not match later tokens in the middle of a word.""" + exact_doc = Document.objects.create( + title="Z-Berichte 6", + content="monthly report", + checksum="TXT9", + pk=15, + ) + prefix_doc = Document.objects.create( + title="Z-Berichte 60", + content="monthly report", + checksum="TXT10", + pk=16, + ) + false_positive = Document.objects.create( + title="Z-Berichte 16", + content="monthly report", + checksum="TXT11", + pk=17, + ) + backend.add_or_update(exact_doc) + backend.add_or_update(prefix_doc) + backend.add_or_update(false_positive) + + results = backend.search( + "Z-Berichte 6", + user=None, + page=1, + page_size=10, + sort_field=None, + sort_reverse=False, + search_mode=SearchMode.TEXT, + ) + result_ids = {hit["id"] for hit in results.hits} + + assert exact_doc.id in result_ids + assert prefix_doc.id in result_ids + assert false_positive.id not in result_ids + + def test_text_mode_ignores_queries_without_searchable_tokens( + self, + backend: TantivyBackend, + ): + """Simple text mode should safely return no hits for symbol-only strings.""" + doc = Document.objects.create( + title="Guide", + content="This is a guide.", + checksum="TXT8", + pk=14, + ) + backend.add_or_update(doc) + + no_tokens = backend.search( + "!!!", + user=None, + page=1, + page_size=10, + sort_field=None, + sort_reverse=False, + search_mode=SearchMode.TEXT, + ) + assert no_tokens.total == 0 + + def test_title_mode_matches_partial_term_substrings( + self, + backend: TantivyBackend, + ): + """Title mode should support substring matching within title tokens.""" + doc = Document.objects.create( + title="Password guide", + content="reset instructions", + checksum="TXT4", + pk=12, + ) + backend.add_or_update(doc) + + prefix_match = backend.search( + "pass", + user=None, + page=1, + page_size=10, + sort_field=None, + sort_reverse=False, + search_mode=SearchMode.TITLE, + ) + assert prefix_match.total == 1 + + infix_match = backend.search( + "sswo", + user=None, + page=1, + page_size=10, + sort_field=None, + sort_reverse=False, + search_mode=SearchMode.TITLE, + ) + assert infix_match.total == 1 + + phrase_match = backend.search( + "sswo gu", + user=None, + page=1, + page_size=10, + sort_field=None, + sort_reverse=False, + search_mode=SearchMode.TITLE, + ) + assert phrase_match.total == 1 + def test_scores_normalised_top_hit_is_one(self, backend: TantivyBackend): """Search scores must be normalized so top hit has score 1.0 for UI consistency.""" for i, title in enumerate(["bank invoice", "bank statement", "bank receipt"]): diff --git a/src/documents/tests/search/test_tokenizer.py b/src/documents/tests/search/test_tokenizer.py index aee52a567..fc2c41231 100644 --- a/src/documents/tests/search/test_tokenizer.py +++ b/src/documents/tests/search/test_tokenizer.py @@ -8,6 +8,7 @@ import tantivy from documents.search._tokenizer import _bigram_analyzer from documents.search._tokenizer import _paperless_text +from documents.search._tokenizer import _simple_search_analyzer from documents.search._tokenizer import register_tokenizers if TYPE_CHECKING: @@ -41,6 +42,20 @@ class TestTokenizers: idx.register_tokenizer("bigram_analyzer", _bigram_analyzer()) return idx + @pytest.fixture + def simple_search_index(self) -> tantivy.Index: + """Index with simple-search field for Latin substring tests.""" + sb = tantivy.SchemaBuilder() + sb.add_text_field( + "simple_content", + stored=False, + tokenizer_name="simple_search_analyzer", + ) + schema = sb.build() + idx = tantivy.Index(schema, path=None) + idx.register_tokenizer("simple_search_analyzer", _simple_search_analyzer()) + return idx + def test_ascii_fold_finds_accented_content( self, content_index: tantivy.Index, @@ -66,6 +81,24 @@ class TestTokenizers: q = bigram_index.parse_query("東京", ["bigram_content"]) assert bigram_index.searcher().search(q, limit=5).count == 1 + def test_simple_search_analyzer_supports_regex_substrings( + self, + simple_search_index: tantivy.Index, + ) -> None: + """Whitespace-preserving simple search analyzer supports substring regex matching.""" + writer = simple_search_index.writer() + doc = tantivy.Document() + doc.add_text("simple_content", "tag:invoice password-reset") + writer.add_document(doc) + writer.commit() + simple_search_index.reload() + q = tantivy.Query.regex_query( + simple_search_index.schema, + "simple_content", + ".*sswo.*", + ) + assert simple_search_index.searcher().search(q, limit=5).count == 1 + def test_unsupported_language_logs_warning(self, caplog: LogCaptureFixture) -> None: """Unsupported language codes should log a warning and disable stemming gracefully.""" sb = tantivy.SchemaBuilder() diff --git a/src/documents/tests/test_api_search.py b/src/documents/tests/test_api_search.py index 69bd65198..9e0879e89 100644 --- a/src/documents/tests/test_api_search.py +++ b/src/documents/tests/test_api_search.py @@ -91,6 +91,135 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase): self.assertEqual(response.data["count"], 0) self.assertEqual(len(results), 0) + def test_simple_text_search(self) -> None: + tagged = Tag.objects.create(name="invoice") + matching_doc = Document.objects.create( + title="Quarterly summary", + content="Monthly bank report", + checksum="T1", + pk=11, + ) + matching_doc.tags.add(tagged) + + metadata_only_doc = Document.objects.create( + title="Completely unrelated", + content="No matching terms here", + checksum="T2", + pk=12, + ) + metadata_only_doc.tags.add(tagged) + + backend = get_backend() + backend.add_or_update(matching_doc) + backend.add_or_update(metadata_only_doc) + + response = self.client.get("/api/documents/?text=monthly") + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertEqual(response.data["count"], 1) + self.assertEqual(response.data["results"][0]["id"], matching_doc.id) + + response = self.client.get("/api/documents/?text=tag:invoice") + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertEqual(response.data["count"], 0) + + def test_simple_text_search_matches_substrings(self) -> None: + matching_doc = Document.objects.create( + title="Quarterly summary", + content="Password reset instructions", + checksum="T5", + pk=15, + ) + + backend = get_backend() + backend.add_or_update(matching_doc) + + response = self.client.get("/api/documents/?text=pass") + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertEqual(response.data["count"], 1) + self.assertEqual(response.data["results"][0]["id"], matching_doc.id) + + response = self.client.get("/api/documents/?text=sswo") + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertEqual(response.data["count"], 1) + self.assertEqual(response.data["results"][0]["id"], matching_doc.id) + + response = self.client.get("/api/documents/?text=sswo re") + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertEqual(response.data["count"], 1) + self.assertEqual(response.data["results"][0]["id"], matching_doc.id) + + def test_simple_text_search_does_not_match_on_partial_term_overlap(self) -> None: + non_matching_doc = Document.objects.create( + title="Adobe Acrobat PDF Files", + content="Lorem ipsum dolor sit amet, consectetur adipiscing elit.", + checksum="T7", + pk=17, + ) + + backend = get_backend() + backend.add_or_update(non_matching_doc) + + response = self.client.get("/api/documents/?text=raptor") + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertEqual(response.data["count"], 0) + + def test_simple_title_search(self) -> None: + title_match = Document.objects.create( + title="Quarterly summary", + content="No matching content here", + checksum="T3", + pk=13, + ) + content_only = Document.objects.create( + title="Completely unrelated", + content="Quarterly summary appears only in content", + checksum="T4", + pk=14, + ) + + backend = get_backend() + backend.add_or_update(title_match) + backend.add_or_update(content_only) + + response = self.client.get("/api/documents/?title_search=quarterly") + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertEqual(response.data["count"], 1) + self.assertEqual(response.data["results"][0]["id"], title_match.id) + + def test_simple_title_search_matches_substrings(self) -> None: + title_match = Document.objects.create( + title="Password handbook", + content="No matching content here", + checksum="T6", + pk=16, + ) + + backend = get_backend() + backend.add_or_update(title_match) + + response = self.client.get("/api/documents/?title_search=pass") + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertEqual(response.data["count"], 1) + self.assertEqual(response.data["results"][0]["id"], title_match.id) + + response = self.client.get("/api/documents/?title_search=sswo") + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertEqual(response.data["count"], 1) + self.assertEqual(response.data["results"][0]["id"], title_match.id) + + response = self.client.get("/api/documents/?title_search=sswo hand") + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertEqual(response.data["count"], 1) + self.assertEqual(response.data["results"][0]["id"], title_match.id) + + def test_search_rejects_multiple_search_modes(self) -> None: + response = self.client.get("/api/documents/?text=bank&query=bank") + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertEqual( + response.data["detail"], + "Specify only one of text, title_search, query, or more_like_id.", + ) + def test_search_returns_all_for_api_version_9(self) -> None: d1 = Document.objects.create( title="invoice", @@ -1493,6 +1622,31 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase): self.assertEqual(results["custom_fields"][0]["id"], custom_field1.id) self.assertEqual(results["workflows"][0]["id"], workflow1.id) + def test_global_search_db_only_limits_documents_to_title_matches(self) -> None: + title_match = Document.objects.create( + title="bank statement", + content="no additional terms", + checksum="GS1", + pk=21, + ) + content_only = Document.objects.create( + title="not a title match", + content="bank appears only in content", + checksum="GS2", + pk=22, + ) + + backend = get_backend() + backend.add_or_update(title_match) + backend.add_or_update(content_only) + + self.client.force_authenticate(self.user) + + response = self.client.get("/api/search/?query=bank&db_only=true") + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertEqual(len(response.data["documents"]), 1) + self.assertEqual(response.data["documents"][0]["id"], title_match.id) + def test_global_search_filters_owned_mail_objects(self) -> None: user1 = User.objects.create_user("mail-search-user") user2 = User.objects.create_user("other-mail-search-user") diff --git a/src/documents/tests/test_management_exporter.py b/src/documents/tests/test_management_exporter.py index a214ef51d..4ee7677ca 100644 --- a/src/documents/tests/test_management_exporter.py +++ b/src/documents/tests/test_management_exporter.py @@ -2,6 +2,7 @@ import hashlib import json import shutil import tempfile +from datetime import timedelta from io import StringIO from pathlib import Path from unittest import mock @@ -11,6 +12,7 @@ import pytest from allauth.socialaccount.models import SocialAccount from allauth.socialaccount.models import SocialApp from allauth.socialaccount.models import SocialToken +from django.conf import settings from django.contrib.auth.models import Group from django.contrib.auth.models import Permission from django.contrib.contenttypes.models import ContentType @@ -31,6 +33,8 @@ from documents.models import CustomFieldInstance from documents.models import Document from documents.models import DocumentType from documents.models import Note +from documents.models import ShareLink +from documents.models import ShareLinkBundle from documents.models import StoragePath from documents.models import Tag from documents.models import User @@ -39,6 +43,7 @@ from documents.models import WorkflowAction from documents.models import WorkflowTrigger from documents.sanity_checker import check_sanity from documents.settings import EXPORTER_FILE_NAME +from documents.settings import EXPORTER_SHARE_LINK_BUNDLE_NAME from documents.tests.utils import DirectoriesMixin from documents.tests.utils import FileSystemAssertsMixin from documents.tests.utils import SampleDirMixin @@ -306,6 +311,108 @@ class TestExportImport( ): self.test_exporter(use_filename_format=True) + def test_exporter_includes_share_links_and_bundles(self) -> None: + shutil.rmtree(Path(self.dirs.media_dir) / "documents") + shutil.copytree( + Path(__file__).parent / "samples" / "documents", + Path(self.dirs.media_dir) / "documents", + ) + + share_link = ShareLink.objects.create( + slug="share-link-slug", + document=self.d1, + owner=self.user, + file_version=ShareLink.FileVersion.ORIGINAL, + expiration=timezone.now() + timedelta(days=7), + ) + + bundle_relative_path = Path("nested") / "share-bundle.zip" + bundle_source_path = settings.SHARE_LINK_BUNDLE_DIR / bundle_relative_path + bundle_source_path.parent.mkdir(parents=True, exist_ok=True) + bundle_source_path.write_bytes(b"share-bundle-contents") + bundle = ShareLinkBundle.objects.create( + slug="share-bundle-slug", + owner=self.user, + file_version=ShareLink.FileVersion.ARCHIVE, + expiration=timezone.now() + timedelta(days=7), + status=ShareLinkBundle.Status.READY, + size_bytes=bundle_source_path.stat().st_size, + file_path=str(bundle_relative_path), + built_at=timezone.now(), + ) + bundle.documents.set([self.d1, self.d2]) + + manifest = self._do_export() + + share_link_records = [ + record for record in manifest if record["model"] == "documents.sharelink" + ] + self.assertEqual(len(share_link_records), 1) + self.assertEqual(share_link_records[0]["pk"], share_link.pk) + self.assertEqual(share_link_records[0]["fields"]["document"], self.d1.pk) + self.assertEqual(share_link_records[0]["fields"]["owner"], self.user.pk) + + share_link_bundle_records = [ + record + for record in manifest + if record["model"] == "documents.sharelinkbundle" + ] + self.assertEqual(len(share_link_bundle_records), 1) + bundle_record = share_link_bundle_records[0] + self.assertEqual(bundle_record["pk"], bundle.pk) + self.assertEqual( + bundle_record["fields"]["documents"], + [self.d1.pk, self.d2.pk], + ) + self.assertEqual( + bundle_record[EXPORTER_SHARE_LINK_BUNDLE_NAME], + "share_link_bundles/nested/share-bundle.zip", + ) + self.assertEqual( + bundle_record["fields"]["file_path"], + "nested/share-bundle.zip", + ) + self.assertIsFile(self.target / bundle_record[EXPORTER_SHARE_LINK_BUNDLE_NAME]) + + with paperless_environment(): + ShareLink.objects.all().delete() + ShareLinkBundle.objects.all().delete() + shutil.rmtree(settings.SHARE_LINK_BUNDLE_DIR, ignore_errors=True) + + call_command( + "document_importer", + "--no-progress-bar", + self.target, + skip_checks=True, + ) + + imported_share_link = ShareLink.objects.get(pk=share_link.pk) + self.assertEqual(imported_share_link.document_id, self.d1.pk) + self.assertEqual(imported_share_link.owner_id, self.user.pk) + self.assertEqual( + imported_share_link.file_version, + ShareLink.FileVersion.ORIGINAL, + ) + + imported_bundle = ShareLinkBundle.objects.get(pk=bundle.pk) + imported_bundle_path = imported_bundle.absolute_file_path + self.assertEqual(imported_bundle.owner_id, self.user.pk) + self.assertEqual( + list( + imported_bundle.documents.order_by("pk").values_list( + "pk", + flat=True, + ), + ), + [self.d1.pk, self.d2.pk], + ) + self.assertEqual(imported_bundle.file_path, "nested/share-bundle.zip") + self.assertIsNotNone(imported_bundle_path) + self.assertEqual( + imported_bundle_path.read_bytes(), + b"share-bundle-contents", + ) + def test_update_export_changed_time(self) -> None: shutil.rmtree(Path(self.dirs.media_dir) / "documents") shutil.copytree( diff --git a/src/documents/tests/utils.py b/src/documents/tests/utils.py index cc4190974..98c8258b8 100644 --- a/src/documents/tests/utils.py +++ b/src/documents/tests/utils.py @@ -435,7 +435,11 @@ class DummyProgressManager: message: str, current_progress: int, max_progress: int, - extra_args: dict[str, str | int] | None = None, + *, + document_id: int | None = None, + owner_id: int | None = None, + users_can_view: list[int] | None = None, + groups_can_view: list[int] | None = None, ) -> None: # Ensure the layer is open self.open() @@ -449,9 +453,10 @@ class DummyProgressManager: "max_progress": max_progress, "status": status, "message": message, + "document_id": document_id, + "owner_id": owner_id, + "users_can_view": users_can_view or [], + "groups_can_view": groups_can_view or [], }, } - if extra_args is not None: - payload["data"].update(extra_args) - self.payloads.append(payload) diff --git a/src/documents/views.py b/src/documents/views.py index 024e846a0..68d2b7961 100644 --- a/src/documents/views.py +++ b/src/documents/views.py @@ -1995,11 +1995,23 @@ class ChatStreamingView(GenericAPIView): list=extend_schema( description="Document views including search", parameters=[ + OpenApiParameter( + name="text", + type=OpenApiTypes.STR, + location=OpenApiParameter.QUERY, + description="Simple Tantivy-backed text search query string", + ), + OpenApiParameter( + name="title_search", + type=OpenApiTypes.STR, + location=OpenApiParameter.QUERY, + description="Simple Tantivy-backed title-only search query string", + ), OpenApiParameter( name="query", type=OpenApiTypes.STR, location=OpenApiParameter.QUERY, - description="Advanced search query string", + description="Advanced Tantivy search query string", ), OpenApiParameter( name="full_perms", @@ -2025,22 +2037,28 @@ class ChatStreamingView(GenericAPIView): ), ) class UnifiedSearchViewSet(DocumentViewSet): + SEARCH_PARAM_NAMES = ("text", "title_search", "query", "more_like_id") + def get_serializer_class(self): if self._is_search_request(): return SearchResultSerializer else: return DocumentSerializer + def _get_active_search_params(self, request: Request | None = None) -> list[str]: + request = request or self.request + return [ + param for param in self.SEARCH_PARAM_NAMES if param in request.query_params + ] + def _is_search_request(self): - return ( - "query" in self.request.query_params - or "more_like_id" in self.request.query_params - ) + return bool(self._get_active_search_params()) def list(self, request, *args, **kwargs): if not self._is_search_request(): return super().list(request) + from documents.search import SearchMode from documents.search import TantivyRelevanceList from documents.search import get_backend @@ -2050,9 +2068,31 @@ class UnifiedSearchViewSet(DocumentViewSet): filtered_qs = self.filter_queryset(self.get_queryset()) user = None if request.user.is_superuser else request.user + active_search_params = self._get_active_search_params(request) - if "query" in request.query_params: - query_str = request.query_params["query"] + if len(active_search_params) > 1: + raise ValidationError( + { + "detail": _( + "Specify only one of text, title_search, query, or more_like_id.", + ), + }, + ) + + if ( + "text" in request.query_params + or "title_search" in request.query_params + or "query" in request.query_params + ): + if "text" in request.query_params: + search_mode = SearchMode.TEXT + query_str = request.query_params["text"] + elif "title_search" in request.query_params: + search_mode = SearchMode.TITLE + query_str = request.query_params["title_search"] + else: + search_mode = SearchMode.QUERY + query_str = request.query_params["query"] results = backend.search( query_str, user=user, @@ -2060,6 +2100,7 @@ class UnifiedSearchViewSet(DocumentViewSet): page_size=10000, sort_field=None, sort_reverse=False, + search_mode=search_mode, ) else: # more_like_id — validate permission on the seed document first @@ -2132,6 +2173,8 @@ class UnifiedSearchViewSet(DocumentViewSet): if str(e.detail) == str(invalid_more_like_id_message): return HttpResponseForbidden(invalid_more_like_id_message) return HttpResponseForbidden(_("Insufficient permissions.")) + except ValidationError: + raise except Exception as e: logger.warning(f"An error occurred listing search results: {e!s}") return HttpResponseBadRequest( @@ -3003,6 +3046,9 @@ class GlobalSearchView(PassUserMixin): serializer_class = SearchResultSerializer def get(self, request, *args, **kwargs): + from documents.search import SearchMode + from documents.search import get_backend + query = request.query_params.get("query", None) if query is None: return HttpResponseBadRequest("Query required") @@ -3019,25 +3065,25 @@ class GlobalSearchView(PassUserMixin): "view_document", Document, ) - # First search by title - docs = all_docs.filter(title__icontains=query) - if not db_only and len(docs) < OBJECT_LIMIT: - # If we don't have enough results, search by content. - # Over-fetch from Tantivy (no permission filter) and rely on - # the ORM all_docs queryset for authoritative permission gating. - from documents.search import get_backend - + if db_only: + docs = all_docs.filter(title__icontains=query)[:OBJECT_LIMIT] + else: + user = None if request.user.is_superuser else request.user fts_results = get_backend().search( query, - user=None, + user=user, page=1, page_size=1000, sort_field=None, sort_reverse=False, + search_mode=SearchMode.TEXT, ) - fts_ids = {h["id"] for h in fts_results.hits} - docs = docs | all_docs.filter(id__in=fts_ids) - docs = docs[:OBJECT_LIMIT] + docs_by_id = all_docs.in_bulk([hit["id"] for hit in fts_results.hits]) + docs = [ + docs_by_id[hit["id"]] + for hit in fts_results.hits + if hit["id"] in docs_by_id + ][:OBJECT_LIMIT] saved_views = ( get_objects_for_user_owner_aware( request.user, diff --git a/src/locale/en_US/LC_MESSAGES/django.po b/src/locale/en_US/LC_MESSAGES/django.po index 57ade319a..03fdcc6e1 100644 --- a/src/locale/en_US/LC_MESSAGES/django.po +++ b/src/locale/en_US/LC_MESSAGES/django.po @@ -2,7 +2,7 @@ msgid "" msgstr "" "Project-Id-Version: paperless-ngx\n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2026-04-03 03:25+0000\n" +"POT-Creation-Date: 2026-04-03 20:54+0000\n" "PO-Revision-Date: 2022-02-17 04:17\n" "Last-Translator: \n" "Language-Team: English\n" @@ -21,67 +21,67 @@ msgstr "" msgid "Documents" msgstr "" -#: documents/filters.py:421 +#: documents/filters.py:431 msgid "Value must be valid JSON." msgstr "" -#: documents/filters.py:440 +#: documents/filters.py:450 msgid "Invalid custom field query expression" msgstr "" -#: documents/filters.py:450 +#: documents/filters.py:460 msgid "Invalid expression list. Must be nonempty." msgstr "" -#: documents/filters.py:471 +#: documents/filters.py:481 msgid "Invalid logical operator {op!r}" msgstr "" -#: documents/filters.py:485 +#: documents/filters.py:495 msgid "Maximum number of query conditions exceeded." msgstr "" -#: documents/filters.py:550 +#: documents/filters.py:560 msgid "{name!r} is not a valid custom field." msgstr "" -#: documents/filters.py:587 +#: documents/filters.py:597 msgid "{data_type} does not support query expr {expr!r}." msgstr "" -#: documents/filters.py:695 documents/models.py:137 +#: documents/filters.py:705 documents/models.py:137 msgid "Maximum nesting depth exceeded." msgstr "" -#: documents/filters.py:907 +#: documents/filters.py:919 msgid "Custom field not found" msgstr "" -#: documents/models.py:40 documents/models.py:842 documents/models.py:890 +#: documents/models.py:40 documents/models.py:844 documents/models.py:892 msgid "owner" msgstr "" -#: documents/models.py:57 documents/models.py:1172 +#: documents/models.py:57 documents/models.py:1174 msgid "None" msgstr "" -#: documents/models.py:58 documents/models.py:1173 +#: documents/models.py:58 documents/models.py:1175 msgid "Any word" msgstr "" -#: documents/models.py:59 documents/models.py:1174 +#: documents/models.py:59 documents/models.py:1176 msgid "All words" msgstr "" -#: documents/models.py:60 documents/models.py:1175 +#: documents/models.py:60 documents/models.py:1177 msgid "Exact match" msgstr "" -#: documents/models.py:61 documents/models.py:1176 +#: documents/models.py:61 documents/models.py:1178 msgid "Regular expression" msgstr "" -#: documents/models.py:62 documents/models.py:1177 +#: documents/models.py:62 documents/models.py:1179 msgid "Fuzzy word" msgstr "" @@ -89,20 +89,20 @@ msgstr "" msgid "Automatic" msgstr "" -#: documents/models.py:66 documents/models.py:536 documents/models.py:1755 +#: documents/models.py:66 documents/models.py:536 documents/models.py:1757 #: paperless_mail/models.py:23 paperless_mail/models.py:143 msgid "name" msgstr "" -#: documents/models.py:68 documents/models.py:1241 +#: documents/models.py:68 documents/models.py:1243 msgid "match" msgstr "" -#: documents/models.py:71 documents/models.py:1244 +#: documents/models.py:71 documents/models.py:1246 msgid "matching algorithm" msgstr "" -#: documents/models.py:76 documents/models.py:1249 +#: documents/models.py:76 documents/models.py:1251 msgid "is insensitive" msgstr "" @@ -168,7 +168,7 @@ msgstr "" msgid "title" msgstr "" -#: documents/models.py:191 documents/models.py:756 +#: documents/models.py:191 documents/models.py:758 msgid "content" msgstr "" @@ -206,8 +206,8 @@ msgstr "" msgid "The number of pages of the document." msgstr "" -#: documents/models.py:246 documents/models.py:762 documents/models.py:800 -#: documents/models.py:862 documents/models.py:980 documents/models.py:1039 +#: documents/models.py:246 documents/models.py:764 documents/models.py:802 +#: documents/models.py:864 documents/models.py:982 documents/models.py:1041 msgid "created" msgstr "" @@ -271,12 +271,12 @@ msgstr "" msgid "Optional short label for a document version." msgstr "" -#: documents/models.py:340 documents/models.py:773 documents/models.py:827 -#: documents/models.py:1798 +#: documents/models.py:340 documents/models.py:775 documents/models.py:829 +#: documents/models.py:1800 msgid "document" msgstr "" -#: documents/models.py:341 documents/models.py:933 +#: documents/models.py:341 documents/models.py:935 msgid "documents" msgstr "" @@ -296,11 +296,11 @@ msgstr "" msgid "Title" msgstr "" -#: documents/models.py:523 documents/models.py:1193 +#: documents/models.py:523 documents/models.py:1195 msgid "Created" msgstr "" -#: documents/models.py:524 documents/models.py:1192 +#: documents/models.py:524 documents/models.py:1194 msgid "Added" msgstr "" @@ -360,7 +360,7 @@ msgstr "" msgid "Document display fields" msgstr "" -#: documents/models.py:569 documents/models.py:632 +#: documents/models.py:569 documents/models.py:634 msgid "saved view" msgstr "" @@ -560,748 +560,756 @@ msgstr "" msgid "mime type is" msgstr "" -#: documents/models.py:635 -msgid "rule type" +#: documents/models.py:626 +msgid "simple title search" +msgstr "" + +#: documents/models.py:627 +msgid "simple text search" msgstr "" #: documents/models.py:637 +msgid "rule type" +msgstr "" + +#: documents/models.py:639 msgid "value" msgstr "" -#: documents/models.py:640 +#: documents/models.py:642 msgid "filter rule" msgstr "" -#: documents/models.py:641 +#: documents/models.py:643 msgid "filter rules" msgstr "" -#: documents/models.py:665 +#: documents/models.py:667 msgid "Auto Task" msgstr "" -#: documents/models.py:666 +#: documents/models.py:668 msgid "Scheduled Task" msgstr "" -#: documents/models.py:667 +#: documents/models.py:669 msgid "Manual Task" msgstr "" -#: documents/models.py:670 +#: documents/models.py:672 msgid "Consume File" msgstr "" -#: documents/models.py:671 +#: documents/models.py:673 msgid "Train Classifier" msgstr "" -#: documents/models.py:672 +#: documents/models.py:674 msgid "Check Sanity" msgstr "" -#: documents/models.py:673 +#: documents/models.py:675 msgid "Index Optimize" msgstr "" -#: documents/models.py:674 +#: documents/models.py:676 msgid "LLM Index Update" msgstr "" -#: documents/models.py:679 +#: documents/models.py:681 msgid "Task ID" msgstr "" -#: documents/models.py:680 +#: documents/models.py:682 msgid "Celery ID for the Task that was run" msgstr "" -#: documents/models.py:685 +#: documents/models.py:687 msgid "Acknowledged" msgstr "" -#: documents/models.py:686 +#: documents/models.py:688 msgid "If the task is acknowledged via the frontend or API" msgstr "" -#: documents/models.py:692 +#: documents/models.py:694 msgid "Task Filename" msgstr "" -#: documents/models.py:693 +#: documents/models.py:695 msgid "Name of the file which the Task was run for" msgstr "" -#: documents/models.py:700 +#: documents/models.py:702 msgid "Task Name" msgstr "" -#: documents/models.py:701 +#: documents/models.py:703 msgid "Name of the task that was run" msgstr "" -#: documents/models.py:708 +#: documents/models.py:710 msgid "Task State" msgstr "" -#: documents/models.py:709 +#: documents/models.py:711 msgid "Current state of the task being run" msgstr "" -#: documents/models.py:715 +#: documents/models.py:717 msgid "Created DateTime" msgstr "" -#: documents/models.py:716 +#: documents/models.py:718 msgid "Datetime field when the task result was created in UTC" msgstr "" -#: documents/models.py:722 +#: documents/models.py:724 msgid "Started DateTime" msgstr "" -#: documents/models.py:723 +#: documents/models.py:725 msgid "Datetime field when the task was started in UTC" msgstr "" -#: documents/models.py:729 +#: documents/models.py:731 msgid "Completed DateTime" msgstr "" -#: documents/models.py:730 +#: documents/models.py:732 msgid "Datetime field when the task was completed in UTC" msgstr "" -#: documents/models.py:736 +#: documents/models.py:738 msgid "Result Data" msgstr "" -#: documents/models.py:738 +#: documents/models.py:740 msgid "The data returned by the task" msgstr "" -#: documents/models.py:746 +#: documents/models.py:748 msgid "Task Type" msgstr "" -#: documents/models.py:747 +#: documents/models.py:749 msgid "The type of task that was run" msgstr "" -#: documents/models.py:758 +#: documents/models.py:760 msgid "Note for the document" msgstr "" -#: documents/models.py:782 +#: documents/models.py:784 msgid "user" msgstr "" -#: documents/models.py:787 +#: documents/models.py:789 msgid "note" msgstr "" -#: documents/models.py:788 +#: documents/models.py:790 msgid "notes" msgstr "" -#: documents/models.py:796 +#: documents/models.py:798 msgid "Archive" msgstr "" -#: documents/models.py:797 +#: documents/models.py:799 msgid "Original" msgstr "" -#: documents/models.py:808 documents/models.py:870 paperless_mail/models.py:75 +#: documents/models.py:810 documents/models.py:872 paperless_mail/models.py:75 msgid "expiration" msgstr "" -#: documents/models.py:815 documents/models.py:877 +#: documents/models.py:817 documents/models.py:879 msgid "slug" msgstr "" -#: documents/models.py:847 +#: documents/models.py:849 msgid "share link" msgstr "" -#: documents/models.py:848 +#: documents/models.py:850 msgid "share links" msgstr "" -#: documents/models.py:856 +#: documents/models.py:858 msgid "Pending" msgstr "" -#: documents/models.py:857 +#: documents/models.py:859 msgid "Processing" msgstr "" -#: documents/models.py:858 +#: documents/models.py:860 msgid "Ready" msgstr "" -#: documents/models.py:859 +#: documents/models.py:861 msgid "Failed" msgstr "" -#: documents/models.py:906 +#: documents/models.py:908 msgid "size (bytes)" msgstr "" -#: documents/models.py:912 +#: documents/models.py:914 msgid "last error" msgstr "" -#: documents/models.py:919 +#: documents/models.py:921 msgid "file path" msgstr "" -#: documents/models.py:925 +#: documents/models.py:927 msgid "built at" msgstr "" -#: documents/models.py:938 +#: documents/models.py:940 msgid "share link bundle" msgstr "" -#: documents/models.py:939 +#: documents/models.py:941 msgid "share link bundles" msgstr "" -#: documents/models.py:942 +#: documents/models.py:944 #, python-format msgid "Share link bundle %(slug)s" msgstr "" -#: documents/models.py:968 +#: documents/models.py:970 msgid "String" msgstr "" -#: documents/models.py:969 +#: documents/models.py:971 msgid "URL" msgstr "" -#: documents/models.py:970 +#: documents/models.py:972 msgid "Date" msgstr "" -#: documents/models.py:971 +#: documents/models.py:973 msgid "Boolean" msgstr "" -#: documents/models.py:972 +#: documents/models.py:974 msgid "Integer" msgstr "" -#: documents/models.py:973 +#: documents/models.py:975 msgid "Float" msgstr "" -#: documents/models.py:974 +#: documents/models.py:976 msgid "Monetary" msgstr "" -#: documents/models.py:975 +#: documents/models.py:977 msgid "Document Link" msgstr "" -#: documents/models.py:976 +#: documents/models.py:978 msgid "Select" msgstr "" -#: documents/models.py:977 +#: documents/models.py:979 msgid "Long Text" msgstr "" -#: documents/models.py:989 +#: documents/models.py:991 msgid "data type" msgstr "" -#: documents/models.py:996 +#: documents/models.py:998 msgid "extra data" msgstr "" -#: documents/models.py:1000 +#: documents/models.py:1002 msgid "Extra data for the custom field, such as select options" msgstr "" -#: documents/models.py:1006 +#: documents/models.py:1008 msgid "custom field" msgstr "" -#: documents/models.py:1007 +#: documents/models.py:1009 msgid "custom fields" msgstr "" -#: documents/models.py:1107 +#: documents/models.py:1109 msgid "custom field instance" msgstr "" -#: documents/models.py:1108 +#: documents/models.py:1110 msgid "custom field instances" msgstr "" -#: documents/models.py:1180 +#: documents/models.py:1182 msgid "Consumption Started" msgstr "" -#: documents/models.py:1181 +#: documents/models.py:1183 msgid "Document Added" msgstr "" -#: documents/models.py:1182 +#: documents/models.py:1184 msgid "Document Updated" msgstr "" -#: documents/models.py:1183 +#: documents/models.py:1185 msgid "Scheduled" msgstr "" -#: documents/models.py:1186 +#: documents/models.py:1188 msgid "Consume Folder" msgstr "" -#: documents/models.py:1187 +#: documents/models.py:1189 msgid "Api Upload" msgstr "" -#: documents/models.py:1188 +#: documents/models.py:1190 msgid "Mail Fetch" msgstr "" -#: documents/models.py:1189 +#: documents/models.py:1191 msgid "Web UI" msgstr "" -#: documents/models.py:1194 +#: documents/models.py:1196 msgid "Modified" msgstr "" -#: documents/models.py:1195 +#: documents/models.py:1197 msgid "Custom Field" msgstr "" -#: documents/models.py:1198 +#: documents/models.py:1200 msgid "Workflow Trigger Type" msgstr "" -#: documents/models.py:1210 +#: documents/models.py:1212 msgid "filter path" msgstr "" -#: documents/models.py:1215 +#: documents/models.py:1217 msgid "" "Only consume documents with a path that matches this if specified. Wildcards " "specified as * are allowed. Case insensitive." msgstr "" -#: documents/models.py:1222 +#: documents/models.py:1224 msgid "filter filename" msgstr "" -#: documents/models.py:1227 paperless_mail/models.py:200 +#: documents/models.py:1229 paperless_mail/models.py:200 msgid "" "Only consume documents which entirely match this filename if specified. " "Wildcards such as *.pdf or *invoice* are allowed. Case insensitive." msgstr "" -#: documents/models.py:1238 +#: documents/models.py:1240 msgid "filter documents from this mail rule" msgstr "" -#: documents/models.py:1254 +#: documents/models.py:1256 msgid "has these tag(s)" msgstr "" -#: documents/models.py:1261 +#: documents/models.py:1263 msgid "has all of these tag(s)" msgstr "" -#: documents/models.py:1268 +#: documents/models.py:1270 msgid "does not have these tag(s)" msgstr "" -#: documents/models.py:1276 +#: documents/models.py:1278 msgid "has this document type" msgstr "" -#: documents/models.py:1283 +#: documents/models.py:1285 msgid "has one of these document types" msgstr "" -#: documents/models.py:1290 +#: documents/models.py:1292 msgid "does not have these document type(s)" msgstr "" -#: documents/models.py:1298 +#: documents/models.py:1300 msgid "has this correspondent" msgstr "" -#: documents/models.py:1305 +#: documents/models.py:1307 msgid "does not have these correspondent(s)" msgstr "" -#: documents/models.py:1312 +#: documents/models.py:1314 msgid "has one of these correspondents" msgstr "" -#: documents/models.py:1320 +#: documents/models.py:1322 msgid "has this storage path" msgstr "" -#: documents/models.py:1327 +#: documents/models.py:1329 msgid "has one of these storage paths" msgstr "" -#: documents/models.py:1334 +#: documents/models.py:1336 msgid "does not have these storage path(s)" msgstr "" -#: documents/models.py:1338 +#: documents/models.py:1340 msgid "filter custom field query" msgstr "" -#: documents/models.py:1341 +#: documents/models.py:1343 msgid "JSON-encoded custom field query expression." msgstr "" -#: documents/models.py:1345 +#: documents/models.py:1347 msgid "schedule offset days" msgstr "" -#: documents/models.py:1348 +#: documents/models.py:1350 msgid "The number of days to offset the schedule trigger by." msgstr "" -#: documents/models.py:1353 +#: documents/models.py:1355 msgid "schedule is recurring" msgstr "" -#: documents/models.py:1356 +#: documents/models.py:1358 msgid "If the schedule should be recurring." msgstr "" -#: documents/models.py:1361 +#: documents/models.py:1363 msgid "schedule recurring delay in days" msgstr "" -#: documents/models.py:1365 +#: documents/models.py:1367 msgid "The number of days between recurring schedule triggers." msgstr "" -#: documents/models.py:1370 +#: documents/models.py:1372 msgid "schedule date field" msgstr "" -#: documents/models.py:1375 +#: documents/models.py:1377 msgid "The field to check for a schedule trigger." msgstr "" -#: documents/models.py:1384 +#: documents/models.py:1386 msgid "schedule date custom field" msgstr "" -#: documents/models.py:1388 +#: documents/models.py:1390 msgid "workflow trigger" msgstr "" -#: documents/models.py:1389 +#: documents/models.py:1391 msgid "workflow triggers" msgstr "" -#: documents/models.py:1397 +#: documents/models.py:1399 msgid "email subject" msgstr "" -#: documents/models.py:1401 +#: documents/models.py:1403 msgid "" "The subject of the email, can include some placeholders, see documentation." msgstr "" -#: documents/models.py:1407 +#: documents/models.py:1409 msgid "email body" msgstr "" -#: documents/models.py:1410 +#: documents/models.py:1412 msgid "" "The body (message) of the email, can include some placeholders, see " "documentation." msgstr "" -#: documents/models.py:1416 +#: documents/models.py:1418 msgid "emails to" msgstr "" -#: documents/models.py:1419 +#: documents/models.py:1421 msgid "The destination email addresses, comma separated." msgstr "" -#: documents/models.py:1425 +#: documents/models.py:1427 msgid "include document in email" msgstr "" -#: documents/models.py:1436 +#: documents/models.py:1438 msgid "webhook url" msgstr "" -#: documents/models.py:1439 +#: documents/models.py:1441 msgid "The destination URL for the notification." msgstr "" -#: documents/models.py:1444 +#: documents/models.py:1446 msgid "use parameters" msgstr "" -#: documents/models.py:1449 +#: documents/models.py:1451 msgid "send as JSON" msgstr "" -#: documents/models.py:1453 +#: documents/models.py:1455 msgid "webhook parameters" msgstr "" -#: documents/models.py:1456 +#: documents/models.py:1458 msgid "The parameters to send with the webhook URL if body not used." msgstr "" -#: documents/models.py:1460 +#: documents/models.py:1462 msgid "webhook body" msgstr "" -#: documents/models.py:1463 +#: documents/models.py:1465 msgid "The body to send with the webhook URL if parameters not used." msgstr "" -#: documents/models.py:1467 +#: documents/models.py:1469 msgid "webhook headers" msgstr "" -#: documents/models.py:1470 +#: documents/models.py:1472 msgid "The headers to send with the webhook URL." msgstr "" -#: documents/models.py:1475 +#: documents/models.py:1477 msgid "include document in webhook" msgstr "" -#: documents/models.py:1486 +#: documents/models.py:1488 msgid "Assignment" msgstr "" -#: documents/models.py:1490 +#: documents/models.py:1492 msgid "Removal" msgstr "" -#: documents/models.py:1494 documents/templates/account/password_reset.html:15 +#: documents/models.py:1496 documents/templates/account/password_reset.html:15 msgid "Email" msgstr "" -#: documents/models.py:1498 +#: documents/models.py:1500 msgid "Webhook" msgstr "" -#: documents/models.py:1502 +#: documents/models.py:1504 msgid "Password removal" msgstr "" -#: documents/models.py:1506 +#: documents/models.py:1508 msgid "Move to trash" msgstr "" -#: documents/models.py:1510 +#: documents/models.py:1512 msgid "Workflow Action Type" msgstr "" -#: documents/models.py:1515 documents/models.py:1757 +#: documents/models.py:1517 documents/models.py:1759 #: paperless_mail/models.py:145 msgid "order" msgstr "" -#: documents/models.py:1518 +#: documents/models.py:1520 msgid "assign title" msgstr "" -#: documents/models.py:1522 +#: documents/models.py:1524 msgid "Assign a document title, must be a Jinja2 template, see documentation." msgstr "" -#: documents/models.py:1530 paperless_mail/models.py:274 +#: documents/models.py:1532 paperless_mail/models.py:274 msgid "assign this tag" msgstr "" -#: documents/models.py:1539 paperless_mail/models.py:282 +#: documents/models.py:1541 paperless_mail/models.py:282 msgid "assign this document type" msgstr "" -#: documents/models.py:1548 paperless_mail/models.py:296 +#: documents/models.py:1550 paperless_mail/models.py:296 msgid "assign this correspondent" msgstr "" -#: documents/models.py:1557 +#: documents/models.py:1559 msgid "assign this storage path" msgstr "" -#: documents/models.py:1566 +#: documents/models.py:1568 msgid "assign this owner" msgstr "" -#: documents/models.py:1573 +#: documents/models.py:1575 msgid "grant view permissions to these users" msgstr "" -#: documents/models.py:1580 +#: documents/models.py:1582 msgid "grant view permissions to these groups" msgstr "" -#: documents/models.py:1587 +#: documents/models.py:1589 msgid "grant change permissions to these users" msgstr "" -#: documents/models.py:1594 +#: documents/models.py:1596 msgid "grant change permissions to these groups" msgstr "" -#: documents/models.py:1601 +#: documents/models.py:1603 msgid "assign these custom fields" msgstr "" -#: documents/models.py:1605 +#: documents/models.py:1607 msgid "custom field values" msgstr "" -#: documents/models.py:1609 +#: documents/models.py:1611 msgid "Optional values to assign to the custom fields." msgstr "" -#: documents/models.py:1618 +#: documents/models.py:1620 msgid "remove these tag(s)" msgstr "" -#: documents/models.py:1623 +#: documents/models.py:1625 msgid "remove all tags" msgstr "" -#: documents/models.py:1630 +#: documents/models.py:1632 msgid "remove these document type(s)" msgstr "" -#: documents/models.py:1635 +#: documents/models.py:1637 msgid "remove all document types" msgstr "" -#: documents/models.py:1642 +#: documents/models.py:1644 msgid "remove these correspondent(s)" msgstr "" -#: documents/models.py:1647 +#: documents/models.py:1649 msgid "remove all correspondents" msgstr "" -#: documents/models.py:1654 +#: documents/models.py:1656 msgid "remove these storage path(s)" msgstr "" -#: documents/models.py:1659 +#: documents/models.py:1661 msgid "remove all storage paths" msgstr "" -#: documents/models.py:1666 +#: documents/models.py:1668 msgid "remove these owner(s)" msgstr "" -#: documents/models.py:1671 +#: documents/models.py:1673 msgid "remove all owners" msgstr "" -#: documents/models.py:1678 +#: documents/models.py:1680 msgid "remove view permissions for these users" msgstr "" -#: documents/models.py:1685 +#: documents/models.py:1687 msgid "remove view permissions for these groups" msgstr "" -#: documents/models.py:1692 +#: documents/models.py:1694 msgid "remove change permissions for these users" msgstr "" -#: documents/models.py:1699 +#: documents/models.py:1701 msgid "remove change permissions for these groups" msgstr "" -#: documents/models.py:1704 +#: documents/models.py:1706 msgid "remove all permissions" msgstr "" -#: documents/models.py:1711 +#: documents/models.py:1713 msgid "remove these custom fields" msgstr "" -#: documents/models.py:1716 +#: documents/models.py:1718 msgid "remove all custom fields" msgstr "" -#: documents/models.py:1725 +#: documents/models.py:1727 msgid "email" msgstr "" -#: documents/models.py:1734 +#: documents/models.py:1736 msgid "webhook" msgstr "" -#: documents/models.py:1738 +#: documents/models.py:1740 msgid "passwords" msgstr "" -#: documents/models.py:1742 +#: documents/models.py:1744 msgid "" "Passwords to try when removing PDF protection. Separate with commas or new " "lines." msgstr "" -#: documents/models.py:1747 +#: documents/models.py:1749 msgid "workflow action" msgstr "" -#: documents/models.py:1748 +#: documents/models.py:1750 msgid "workflow actions" msgstr "" -#: documents/models.py:1763 +#: documents/models.py:1765 msgid "triggers" msgstr "" -#: documents/models.py:1770 +#: documents/models.py:1772 msgid "actions" msgstr "" -#: documents/models.py:1773 paperless_mail/models.py:154 +#: documents/models.py:1775 paperless_mail/models.py:154 msgid "enabled" msgstr "" -#: documents/models.py:1784 +#: documents/models.py:1786 msgid "workflow" msgstr "" -#: documents/models.py:1788 +#: documents/models.py:1790 msgid "workflow trigger type" msgstr "" -#: documents/models.py:1802 +#: documents/models.py:1804 msgid "date run" msgstr "" -#: documents/models.py:1808 +#: documents/models.py:1810 msgid "workflow run" msgstr "" -#: documents/models.py:1809 +#: documents/models.py:1811 msgid "workflow runs" msgstr "" #: documents/serialisers.py:463 documents/serialisers.py:815 -#: documents/serialisers.py:2545 documents/views.py:2079 -#: documents/views.py:2134 paperless_mail/serialisers.py:143 +#: documents/serialisers.py:2545 documents/views.py:2120 +#: documents/views.py:2175 paperless_mail/serialisers.py:143 msgid "Insufficient permissions." msgstr "" @@ -1341,7 +1349,7 @@ msgstr "" msgid "Duplicate document identifiers are not allowed." msgstr "" -#: documents/serialisers.py:2631 documents/views.py:3738 +#: documents/serialisers.py:2631 documents/views.py:3784 #, python-format msgid "Documents not found: %(ids)s" msgstr "" @@ -1609,24 +1617,28 @@ msgstr "" msgid "Unable to parse URI {value}" msgstr "" -#: documents/views.py:2072 documents/views.py:2131 +#: documents/views.py:2077 +msgid "Specify only one of text, title_search, query, or more_like_id." +msgstr "" + +#: documents/views.py:2113 documents/views.py:2172 msgid "Invalid more_like_id" msgstr "" -#: documents/views.py:3750 +#: documents/views.py:3796 #, python-format msgid "Insufficient permissions to share document %(id)s." msgstr "" -#: documents/views.py:3793 +#: documents/views.py:3839 msgid "Bundle is already being processed." msgstr "" -#: documents/views.py:3850 +#: documents/views.py:3896 msgid "The share link bundle is still being prepared. Please try again later." msgstr "" -#: documents/views.py:3860 +#: documents/views.py:3906 msgid "The share link bundle is unavailable." msgstr "" diff --git a/src/paperless/consumers.py b/src/paperless/consumers.py index 9d59a1a5a..4a3cda8fe 100644 --- a/src/paperless/consumers.py +++ b/src/paperless/consumers.py @@ -1,16 +1,27 @@ +from __future__ import annotations + import json -from typing import Any +from typing import TYPE_CHECKING from channels.generic.websocket import AsyncWebsocketConsumer +if TYPE_CHECKING: + from django.contrib.auth.base_user import AbstractBaseUser + from django.contrib.auth.models import AnonymousUser + + from documents.plugins.helpers import DocumentsDeletedPayload + from documents.plugins.helpers import DocumentUpdatedPayload + from documents.plugins.helpers import PermissionsData + from documents.plugins.helpers import StatusUpdatePayload + class StatusConsumer(AsyncWebsocketConsumer): def _authenticated(self) -> bool: - user: Any = self.scope.get("user") + user: AbstractBaseUser | AnonymousUser | None = self.scope.get("user") return user is not None and user.is_authenticated - async def _can_view(self, data: dict[str, Any]) -> bool: - user: Any = self.scope.get("user") + async def _can_view(self, data: PermissionsData) -> bool: + user: AbstractBaseUser | AnonymousUser | None = self.scope.get("user") if user is None: return False owner_id = data.get("owner_id") @@ -32,19 +43,19 @@ class StatusConsumer(AsyncWebsocketConsumer): async def disconnect(self, code: int) -> None: await self.channel_layer.group_discard("status_updates", self.channel_name) - async def status_update(self, event: dict[str, Any]) -> None: + async def status_update(self, event: StatusUpdatePayload) -> None: if not self._authenticated(): await self.close() elif await self._can_view(event["data"]): await self.send(json.dumps(event)) - async def documents_deleted(self, event: dict[str, Any]) -> None: + async def documents_deleted(self, event: DocumentsDeletedPayload) -> None: if not self._authenticated(): await self.close() else: await self.send(json.dumps(event)) - async def document_updated(self, event: dict[str, Any]) -> None: + async def document_updated(self, event: DocumentUpdatedPayload) -> None: if not self._authenticated(): await self.close() elif await self._can_view(event["data"]): diff --git a/src/paperless/logging.py b/src/paperless/logging.py new file mode 100644 index 000000000..ce2eff4fc --- /dev/null +++ b/src/paperless/logging.py @@ -0,0 +1,33 @@ +from __future__ import annotations + +import logging +from contextvars import ContextVar + +consume_task_id: ContextVar[str] = ContextVar("consume_task_id", default="") + + +class ConsumeTaskFormatter(logging.Formatter): + """ + Logging formatter that prepends a short task correlation ID to messages + emitted during document consumption. + + The ID is the first 8 characters of the Celery task UUID, set via the + ``consume_task_id`` ContextVar at the entry of ``consume_file``. When + the ContextVar is empty (any log outside a consume task) no prefix is + added and the output is identical to the standard verbose format. + """ + + def __init__(self) -> None: + super().__init__( + fmt="[{asctime}] [{levelname}] [{name}] {task_prefix}{message}", + style="{", + validate=False, # {task_prefix} is not a standard LogRecord attribute, so Python's + # init-time format-string validation would raise ValueError without + # this. Runtime safety comes from format() always setting + # record.task_prefix before calling super().format(). + ) + + def format(self, record: logging.LogRecord) -> str: + task_id = consume_task_id.get() + record.task_prefix = f"[{task_id}] " if task_id else "" + return super().format(record) diff --git a/src/paperless/settings/__init__.py b/src/paperless/settings/__init__.py index b960e9dc4..772c51801 100644 --- a/src/paperless/settings/__init__.py +++ b/src/paperless/settings/__init__.py @@ -592,8 +592,7 @@ LOGGING = { "disable_existing_loggers": False, "formatters": { "verbose": { - "format": "[{asctime}] [{levelname}] [{name}] {message}", - "style": "{", + "()": "paperless.logging.ConsumeTaskFormatter", }, "simple": { "format": "{levelname} {message}", diff --git a/src/paperless/tests/test_logging.py b/src/paperless/tests/test_logging.py new file mode 100644 index 000000000..dbd36c7d0 --- /dev/null +++ b/src/paperless/tests/test_logging.py @@ -0,0 +1,34 @@ +import logging + +from paperless.logging import ConsumeTaskFormatter +from paperless.logging import consume_task_id + + +def _make_record(msg: str = "Test message") -> logging.LogRecord: + return logging.LogRecord( + name="paperless.consumer", + level=logging.INFO, + pathname="", + lineno=0, + msg=msg, + args=(), + exc_info=None, + ) + + +def test_formatter_includes_task_id_when_set(): + token = consume_task_id.set("a8098c1a") + try: + formatter = ConsumeTaskFormatter() + output = formatter.format(_make_record()) + assert "[a8098c1a] Test message" in output + finally: + consume_task_id.reset(token) + + +def test_formatter_omits_prefix_when_no_task_id(): + # ContextVar default is "" — no task active + formatter = ConsumeTaskFormatter() + output = formatter.format(_make_record()) + assert "[] " not in output + assert "Test message" in output diff --git a/src/paperless/tests/test_websockets.py b/src/paperless/tests/test_websockets.py index bffc44f82..9f7c9a652 100644 --- a/src/paperless/tests/test_websockets.py +++ b/src/paperless/tests/test_websockets.py @@ -200,7 +200,10 @@ class TestWebSockets: "Test message", 1, 10, - extra_args={"foo": "bar"}, + document_id=42, + owner_id=1, + users_can_view=[2, 3], + groups_can_view=[4], ) assert mock_group_send.call_args[0][1] == { @@ -212,7 +215,10 @@ class TestWebSockets: "max_progress": 10, "status": ProgressStatusOptions.STARTED, "message": "Test message", - "foo": "bar", + "document_id": 42, + "owner_id": 1, + "users_can_view": [2, 3], + "groups_can_view": [4], }, }