mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-04-06 08:08:51 +00:00
Compare commits
39 Commits
feature-ve
...
feature-se
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0b5b6fdad5 | ||
|
|
d98dbd50f4 | ||
|
|
7649e4a6b1 | ||
|
|
610ba27891 | ||
|
|
a945cd9379 | ||
|
|
5f5fb263c9 | ||
|
|
7c50e0077c | ||
|
|
288740ea62 | ||
|
|
c039df423f | ||
|
|
d998d3fbaf | ||
|
|
6cf01dd383 | ||
|
|
0d915c58a4 | ||
|
|
46008d2da7 | ||
|
|
b807b107ad | ||
|
|
6768c1e6f8 | ||
|
|
5a94291b79 | ||
|
|
c2f02851da | ||
|
|
d0f8a98a9a | ||
|
|
566afdffca | ||
|
|
f32ad98d8e | ||
|
|
64debc87a5 | ||
|
|
eb758862c9 | ||
|
|
8c539bd862 | ||
|
|
d365f19962 | ||
|
|
2703c12f1a | ||
|
|
e7c7978d67 | ||
|
|
83501757df | ||
|
|
dda05a7c00 | ||
|
|
376af81b9c | ||
|
|
05c9e21fac | ||
|
|
aed9abe48c | ||
|
|
e01a762e81 | ||
|
|
14cc6a7ca4 | ||
|
|
32876f0334 | ||
|
|
e7884cb505 | ||
|
|
63f4e939d5 | ||
|
|
c813a1846d | ||
|
|
045afa7419 | ||
|
|
e827581f2a |
9
.github/workflows/ci-backend.yml
vendored
9
.github/workflows/ci-backend.yml
vendored
@@ -24,6 +24,7 @@ jobs:
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: false
|
||||
- name: Decide run mode
|
||||
id: force
|
||||
run: |
|
||||
@@ -49,7 +50,7 @@ jobs:
|
||||
- name: Detect changes
|
||||
id: filter
|
||||
if: steps.force.outputs.run_all != 'true'
|
||||
uses: dorny/paths-filter@de90cc6fb38fc0963ad72b210f1f284cd68cea36 # v3.0.2
|
||||
uses: dorny/paths-filter@fbd0ab8f3e69293af611ebaee6363fc25e6d187d # v4.0.1
|
||||
with:
|
||||
base: ${{ steps.range.outputs.base }}
|
||||
ref: ${{ steps.range.outputs.ref }}
|
||||
@@ -72,6 +73,8 @@ jobs:
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
persist-credentials: false
|
||||
- name: Start containers
|
||||
run: |
|
||||
docker compose --file docker/compose/docker-compose.ci-test.yml pull --quiet
|
||||
@@ -145,6 +148,8 @@ jobs:
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
persist-credentials: false
|
||||
- name: Set up Python
|
||||
id: setup-python
|
||||
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
|
||||
@@ -173,7 +178,7 @@ jobs:
|
||||
check \
|
||||
src/
|
||||
- name: Cache Mypy
|
||||
uses: actions/cache@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 # v5.0.3
|
||||
uses: actions/cache@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4
|
||||
with:
|
||||
path: .mypy_cache
|
||||
# Keyed by OS, Python version, and dependency hashes
|
||||
|
||||
4
.github/workflows/ci-docker.yml
vendored
4
.github/workflows/ci-docker.yml
vendored
@@ -42,6 +42,8 @@ jobs:
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
persist-credentials: false
|
||||
- name: Determine ref name
|
||||
id: ref
|
||||
run: |
|
||||
@@ -169,7 +171,7 @@ jobs:
|
||||
packages: write
|
||||
steps:
|
||||
- name: Download digests
|
||||
uses: actions/download-artifact@70fc10c6e5e1ce46ad2ea6f2b72d43f7d47b13c3 # v8.0.0
|
||||
uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
|
||||
with:
|
||||
path: /tmp/digests
|
||||
pattern: digest-*.txt
|
||||
|
||||
9
.github/workflows/ci-docs.yml
vendored
9
.github/workflows/ci-docs.yml
vendored
@@ -26,6 +26,7 @@ jobs:
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: false
|
||||
- name: Decide run mode
|
||||
id: force
|
||||
run: |
|
||||
@@ -51,7 +52,7 @@ jobs:
|
||||
- name: Detect changes
|
||||
id: filter
|
||||
if: steps.force.outputs.run_all != 'true'
|
||||
uses: dorny/paths-filter@de90cc6fb38fc0963ad72b210f1f284cd68cea36 # v3.0.2
|
||||
uses: dorny/paths-filter@fbd0ab8f3e69293af611ebaee6363fc25e6d187d # v4.0.1
|
||||
with:
|
||||
base: ${{ steps.range.outputs.base }}
|
||||
ref: ${{ steps.range.outputs.ref }}
|
||||
@@ -68,9 +69,11 @@ jobs:
|
||||
name: Build Documentation
|
||||
runs-on: ubuntu-24.04
|
||||
steps:
|
||||
- uses: actions/configure-pages@983d7736d9b0ae728b81ab479565c72886d7745b # v5.0.0
|
||||
- uses: actions/configure-pages@45bfe0192ca1faeb007ade9deae92b16b8254a0d # v6.0.0
|
||||
- name: Checkout
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
persist-credentials: false
|
||||
- name: Set up Python
|
||||
id: setup-python
|
||||
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
|
||||
@@ -107,7 +110,7 @@ jobs:
|
||||
url: ${{ steps.deployment.outputs.page_url }}
|
||||
steps:
|
||||
- name: Deploy GitHub Pages
|
||||
uses: actions/deploy-pages@d6db90164ac5ed86f2b6aed7e0febac5b3c0c03e # v4.0.5
|
||||
uses: actions/deploy-pages@cd2ce8fcbc39b97be8ca5fce6e763baed58fa128 # v5.0.0
|
||||
id: deployment
|
||||
with:
|
||||
artifact_name: github-pages-${{ github.run_id }}-${{ github.run_attempt }}
|
||||
|
||||
31
.github/workflows/ci-frontend.yml
vendored
31
.github/workflows/ci-frontend.yml
vendored
@@ -46,7 +46,7 @@ jobs:
|
||||
- name: Detect changes
|
||||
id: filter
|
||||
if: steps.force.outputs.run_all != 'true'
|
||||
uses: dorny/paths-filter@de90cc6fb38fc0963ad72b210f1f284cd68cea36 # v3.0.2
|
||||
uses: dorny/paths-filter@fbd0ab8f3e69293af611ebaee6363fc25e6d187d # v4.0.1
|
||||
with:
|
||||
base: ${{ steps.range.outputs.base }}
|
||||
ref: ${{ steps.range.outputs.ref }}
|
||||
@@ -62,8 +62,10 @@ jobs:
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
persist-credentials: false
|
||||
- name: Install pnpm
|
||||
uses: pnpm/action-setup@41ff72655975bd51cab0327fa583b6e92b6d3061 # v4.2.0
|
||||
uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5.0.0
|
||||
with:
|
||||
version: 10
|
||||
- name: Use Node.js 24
|
||||
@@ -74,7 +76,7 @@ jobs:
|
||||
cache-dependency-path: 'src-ui/pnpm-lock.yaml'
|
||||
- name: Cache frontend dependencies
|
||||
id: cache-frontend-deps
|
||||
uses: actions/cache@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 # v5.0.3
|
||||
uses: actions/cache@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4
|
||||
with:
|
||||
path: |
|
||||
~/.pnpm-store
|
||||
@@ -90,8 +92,10 @@ jobs:
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
persist-credentials: false
|
||||
- name: Install pnpm
|
||||
uses: pnpm/action-setup@41ff72655975bd51cab0327fa583b6e92b6d3061 # v4.2.0
|
||||
uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5.0.0
|
||||
with:
|
||||
version: 10
|
||||
- name: Use Node.js 24
|
||||
@@ -101,7 +105,7 @@ jobs:
|
||||
cache: 'pnpm'
|
||||
cache-dependency-path: 'src-ui/pnpm-lock.yaml'
|
||||
- name: Cache frontend dependencies
|
||||
uses: actions/cache@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 # v5.0.3
|
||||
uses: actions/cache@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4
|
||||
with:
|
||||
path: |
|
||||
~/.pnpm-store
|
||||
@@ -125,8 +129,10 @@ jobs:
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
persist-credentials: false
|
||||
- name: Install pnpm
|
||||
uses: pnpm/action-setup@41ff72655975bd51cab0327fa583b6e92b6d3061 # v4.2.0
|
||||
uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5.0.0
|
||||
with:
|
||||
version: 10
|
||||
- name: Use Node.js 24
|
||||
@@ -136,7 +142,7 @@ jobs:
|
||||
cache: 'pnpm'
|
||||
cache-dependency-path: 'src-ui/pnpm-lock.yaml'
|
||||
- name: Cache frontend dependencies
|
||||
uses: actions/cache@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 # v5.0.3
|
||||
uses: actions/cache@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4
|
||||
with:
|
||||
path: |
|
||||
~/.pnpm-store
|
||||
@@ -176,8 +182,10 @@ jobs:
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
persist-credentials: false
|
||||
- name: Install pnpm
|
||||
uses: pnpm/action-setup@41ff72655975bd51cab0327fa583b6e92b6d3061 # v4.2.0
|
||||
uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5.0.0
|
||||
with:
|
||||
version: 10
|
||||
- name: Use Node.js 24
|
||||
@@ -187,7 +195,7 @@ jobs:
|
||||
cache: 'pnpm'
|
||||
cache-dependency-path: 'src-ui/pnpm-lock.yaml'
|
||||
- name: Cache frontend dependencies
|
||||
uses: actions/cache@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 # v5.0.3
|
||||
uses: actions/cache@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4
|
||||
with:
|
||||
path: |
|
||||
~/.pnpm-store
|
||||
@@ -209,8 +217,9 @@ jobs:
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
fetch-depth: 2
|
||||
persist-credentials: false
|
||||
- name: Install pnpm
|
||||
uses: pnpm/action-setup@41ff72655975bd51cab0327fa583b6e92b6d3061 # v4.2.0
|
||||
uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5.0.0
|
||||
with:
|
||||
version: 10
|
||||
- name: Use Node.js 24
|
||||
@@ -220,7 +229,7 @@ jobs:
|
||||
cache: 'pnpm'
|
||||
cache-dependency-path: 'src-ui/pnpm-lock.yaml'
|
||||
- name: Cache frontend dependencies
|
||||
uses: actions/cache@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 # v5.0.3
|
||||
uses: actions/cache@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4
|
||||
with:
|
||||
path: |
|
||||
~/.pnpm-store
|
||||
|
||||
4
.github/workflows/ci-lint.yml
vendored
4
.github/workflows/ci-lint.yml
vendored
@@ -16,9 +16,11 @@ jobs:
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
persist-credentials: false
|
||||
- name: Install Python
|
||||
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
|
||||
with:
|
||||
python-version: "3.14"
|
||||
- name: Run prek
|
||||
uses: j178/prek-action@0bb87d7f00b0c99306c8bcb8b8beba1eb581c037 # v1.1.1
|
||||
uses: j178/prek-action@53276d8b0d10f8b6672aa85b4588c6921d0370cc # v2.0.1
|
||||
|
||||
11
.github/workflows/ci-release.yml
vendored
11
.github/workflows/ci-release.yml
vendored
@@ -29,9 +29,11 @@ jobs:
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
persist-credentials: false
|
||||
# ---- Frontend Build ----
|
||||
- name: Install pnpm
|
||||
uses: pnpm/action-setup@41ff72655975bd51cab0327fa583b6e92b6d3061 # v4.2.0
|
||||
uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5.0.0
|
||||
with:
|
||||
version: 10
|
||||
- name: Use Node.js 24
|
||||
@@ -133,7 +135,7 @@ jobs:
|
||||
version: ${{ steps.get-version.outputs.version }}
|
||||
steps:
|
||||
- name: Download release artifact
|
||||
uses: actions/download-artifact@70fc10c6e5e1ce46ad2ea6f2b72d43f7d47b13c3 # v8.0.0
|
||||
uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
|
||||
with:
|
||||
name: release
|
||||
path: ./
|
||||
@@ -148,7 +150,7 @@ jobs:
|
||||
fi
|
||||
- name: Create release and changelog
|
||||
id: create-release
|
||||
uses: release-drafter/release-drafter@6db134d15f3909ccc9eefd369f02bd1e9cffdf97 # v6.2.0
|
||||
uses: release-drafter/release-drafter@139054aeaa9adc52ab36ddf67437541f039b88e2 # v7.1.1
|
||||
with:
|
||||
name: Paperless-ngx ${{ steps.get-version.outputs.version }}
|
||||
tag: ${{ steps.get-version.outputs.version }}
|
||||
@@ -159,7 +161,7 @@ jobs:
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
- name: Upload release archive
|
||||
uses: shogo82148/actions-upload-release-asset@8f6863c6c894ba46f9e676ef5cccec4752723c1e # v1.9.2
|
||||
uses: shogo82148/actions-upload-release-asset@96bc1f0cb850b65efd58a6b5eaa0a69f88d38077 # v1.10.0
|
||||
with:
|
||||
github_token: ${{ secrets.GITHUB_TOKEN }}
|
||||
upload_url: ${{ steps.create-release.outputs.upload_url }}
|
||||
@@ -179,6 +181,7 @@ jobs:
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
ref: main
|
||||
persist-credentials: true # for pushing changelog branch
|
||||
- name: Set up Python
|
||||
id: setup-python
|
||||
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
|
||||
|
||||
42
.github/workflows/ci-static-analysis.yml
vendored
Normal file
42
.github/workflows/ci-static-analysis.yml
vendored
Normal file
@@ -0,0 +1,42 @@
|
||||
name: Static Analysis
|
||||
on:
|
||||
push:
|
||||
branches-ignore:
|
||||
- 'translations**'
|
||||
pull_request:
|
||||
branches-ignore:
|
||||
- 'translations**'
|
||||
workflow_dispatch:
|
||||
concurrency:
|
||||
group: static-analysis-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
permissions:
|
||||
contents: read
|
||||
jobs:
|
||||
zizmor:
|
||||
name: Run zizmor
|
||||
runs-on: ubuntu-24.04
|
||||
permissions:
|
||||
contents: read
|
||||
actions: read
|
||||
security-events: write
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
persist-credentials: false
|
||||
- name: Run zizmor
|
||||
uses: zizmorcore/zizmor-action@71321a20a9ded102f6e9ce5718a2fcec2c4f70d8 # v0.5.2
|
||||
semgrep:
|
||||
name: Semgrep CE
|
||||
runs-on: ubuntu-24.04
|
||||
container:
|
||||
image: semgrep/semgrep:1.155.0@sha256:cc869c685dcc0fe497c86258da9f205397d8108e56d21a86082ea4886e52784d
|
||||
if: github.actor != 'dependabot[bot]'
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
persist-credentials: false
|
||||
- name: Run Semgrep
|
||||
run: semgrep scan --config auto
|
||||
2
.github/workflows/codeql-analysis.yml
vendored
2
.github/workflows/codeql-analysis.yml
vendored
@@ -35,6 +35,8 @@ jobs:
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
persist-credentials: false
|
||||
# Initializes the CodeQL tools for scanning.
|
||||
- name: Initialize CodeQL
|
||||
uses: github/codeql-action/init@c793b717bc78562f491db7b0e93a3a178b099162 # v4.32.5
|
||||
|
||||
1
.github/workflows/crowdin.yml
vendored
1
.github/workflows/crowdin.yml
vendored
@@ -16,6 +16,7 @@ jobs:
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
token: ${{ secrets.PNGX_BOT_PAT }}
|
||||
persist-credentials: false
|
||||
- name: crowdin action
|
||||
uses: crowdin/github-action@8818ff65bfc4322384f983ea37e3926948c11745 # v2.15.0
|
||||
with:
|
||||
|
||||
2
.github/workflows/project-actions.yml
vendored
2
.github/workflows/project-actions.yml
vendored
@@ -19,6 +19,6 @@ jobs:
|
||||
if: github.event_name == 'pull_request_target' && (github.event.action == 'opened' || github.event.action == 'reopened') && github.event.pull_request.user.login != 'dependabot'
|
||||
steps:
|
||||
- name: Label PR with release-drafter
|
||||
uses: release-drafter/release-drafter@6db134d15f3909ccc9eefd369f02bd1e9cffdf97 # v6.2.0
|
||||
uses: release-drafter/release-drafter@139054aeaa9adc52ab36ddf67437541f039b88e2 # v7.1.1
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
5
.github/workflows/translate-strings.yml
vendored
5
.github/workflows/translate-strings.yml
vendored
@@ -17,6 +17,7 @@ jobs:
|
||||
with:
|
||||
token: ${{ secrets.PNGX_BOT_PAT }}
|
||||
ref: ${{ env.GH_REF }}
|
||||
persist-credentials: true # for pushing translation branch
|
||||
- name: Set up Python
|
||||
id: setup-python
|
||||
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
|
||||
@@ -36,7 +37,7 @@ jobs:
|
||||
- name: Generate backend translation strings
|
||||
run: cd src/ && uv run manage.py makemessages -l en_US -i "samples*"
|
||||
- name: Install pnpm
|
||||
uses: pnpm/action-setup@41ff72655975bd51cab0327fa583b6e92b6d3061 # v4.2.0
|
||||
uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5.0.0
|
||||
with:
|
||||
version: 10
|
||||
- name: Use Node.js 24
|
||||
@@ -47,7 +48,7 @@ jobs:
|
||||
cache-dependency-path: 'src-ui/pnpm-lock.yaml'
|
||||
- name: Cache frontend dependencies
|
||||
id: cache-frontend-deps
|
||||
uses: actions/cache@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 # v5.0.3
|
||||
uses: actions/cache@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4
|
||||
with:
|
||||
path: |
|
||||
~/.pnpm-store
|
||||
|
||||
@@ -50,12 +50,12 @@ repos:
|
||||
- 'prettier-plugin-organize-imports@4.3.0'
|
||||
# Python hooks
|
||||
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||
rev: v0.15.6
|
||||
rev: v0.15.8
|
||||
hooks:
|
||||
- id: ruff-check
|
||||
- id: ruff-format
|
||||
- repo: https://github.com/tox-dev/pyproject-fmt
|
||||
rev: "v2.12.1"
|
||||
rev: "v2.21.0"
|
||||
hooks:
|
||||
- id: pyproject-fmt
|
||||
# Dockerfile hooks
|
||||
|
||||
@@ -237,8 +237,8 @@ RUN set -eux \
|
||||
&& echo "Adjusting all permissions" \
|
||||
&& chown --from root:root --changes --recursive paperless:paperless /usr/src/paperless \
|
||||
&& echo "Collecting static files" \
|
||||
&& s6-setuidgid paperless python3 manage.py collectstatic --clear --no-input --link \
|
||||
&& s6-setuidgid paperless python3 manage.py compilemessages \
|
||||
&& PAPERLESS_SECRET_KEY=build-time-dummy s6-setuidgid paperless python3 manage.py collectstatic --clear --no-input --link \
|
||||
&& PAPERLESS_SECRET_KEY=build-time-dummy s6-setuidgid paperless python3 manage.py compilemessages \
|
||||
&& /usr/local/bin/deduplicate.py --verbose /usr/src/paperless/static/
|
||||
|
||||
VOLUME ["/usr/src/paperless/data", \
|
||||
|
||||
@@ -17,9 +17,9 @@
|
||||
# (if doing so please consider security measures such as reverse proxy)
|
||||
#PAPERLESS_URL=https://paperless.example.com
|
||||
|
||||
# Adjust this key if you plan to make paperless available publicly. It should
|
||||
# be a very long sequence of random characters. You don't need to remember it.
|
||||
#PAPERLESS_SECRET_KEY=change-me
|
||||
# Required. A unique secret key for session tokens and signing.
|
||||
# Generate with: python3 -c "import secrets; print(secrets.token_urlsafe(64))"
|
||||
PAPERLESS_SECRET_KEY=change-me
|
||||
|
||||
# Use this variable to set a timezone for the Paperless Docker containers. Defaults to UTC.
|
||||
#PAPERLESS_TIME_ZONE=America/Los_Angeles
|
||||
|
||||
@@ -3,26 +3,10 @@
|
||||
|
||||
declare -r log_prefix="[init-index]"
|
||||
|
||||
declare -r index_version=9
|
||||
declare -r data_dir="${PAPERLESS_DATA_DIR:-/usr/src/paperless/data}"
|
||||
declare -r index_version_file="${data_dir}/.index_version"
|
||||
|
||||
update_index () {
|
||||
echo "${log_prefix} Search index out of date. Updating..."
|
||||
cd "${PAPERLESS_SRC_DIR}"
|
||||
if [[ -n "${USER_IS_NON_ROOT}" ]]; then
|
||||
python3 manage.py document_index reindex --no-progress-bar
|
||||
echo ${index_version} | tee "${index_version_file}" > /dev/null
|
||||
else
|
||||
s6-setuidgid paperless python3 manage.py document_index reindex --no-progress-bar
|
||||
echo ${index_version} | s6-setuidgid paperless tee "${index_version_file}" > /dev/null
|
||||
fi
|
||||
}
|
||||
|
||||
if [[ (! -f "${index_version_file}") ]]; then
|
||||
echo "${log_prefix} No index version file found"
|
||||
update_index
|
||||
elif [[ $(<"${index_version_file}") != "$index_version" ]]; then
|
||||
echo "${log_prefix} index version updated"
|
||||
update_index
|
||||
echo "${log_prefix} Checking search index..."
|
||||
cd "${PAPERLESS_SRC_DIR}"
|
||||
if [[ -n "${USER_IS_NON_ROOT}" ]]; then
|
||||
python3 manage.py document_index reindex --if-needed --no-progress-bar
|
||||
else
|
||||
s6-setuidgid paperless python3 manage.py document_index reindex --if-needed --no-progress-bar
|
||||
fi
|
||||
|
||||
@@ -180,6 +180,16 @@ following:
|
||||
This might not actually do anything. Not every new paperless version
|
||||
comes with new database migrations.
|
||||
|
||||
4. Rebuild the search index if needed.
|
||||
|
||||
```shell-session
|
||||
cd src
|
||||
python3 manage.py document_index reindex --if-needed
|
||||
```
|
||||
|
||||
This is a no-op if the index is already up to date, so it is safe to
|
||||
run on every upgrade.
|
||||
|
||||
### Database Upgrades
|
||||
|
||||
Paperless-ngx is compatible with Django-supported versions of PostgreSQL and MariaDB and it is generally
|
||||
@@ -453,17 +463,42 @@ the search yields non-existing documents or won't find anything, you
|
||||
may need to recreate the index manually.
|
||||
|
||||
```
|
||||
document_index {reindex,optimize}
|
||||
document_index {reindex,optimize} [--recreate] [--if-needed]
|
||||
```
|
||||
|
||||
Specify `reindex` to have the index created from scratch. This may take
|
||||
some time.
|
||||
Specify `reindex` to rebuild the index from all documents in the database. This
|
||||
may take some time.
|
||||
|
||||
Specify `optimize` to optimize the index. This updates certain aspects
|
||||
of the index and usually makes queries faster and also ensures that the
|
||||
autocompletion works properly. This command is regularly invoked by the
|
||||
Pass `--recreate` to wipe the existing index before rebuilding. Use this when the
|
||||
index is corrupted or you want a fully clean rebuild.
|
||||
|
||||
Pass `--if-needed` to skip the rebuild if the index is already up to date (schema
|
||||
version and search language match). Safe to run on every startup or upgrade.
|
||||
|
||||
Specify `optimize` to optimize the index. This command is regularly invoked by the
|
||||
task scheduler.
|
||||
|
||||
!!! note
|
||||
|
||||
The `optimize` subcommand is deprecated and is now a no-op. Tantivy manages
|
||||
segment merging automatically; no manual optimization step is needed.
|
||||
|
||||
!!! note
|
||||
|
||||
**Docker users:** On every startup, the container runs
|
||||
`document_index reindex --if-needed` automatically. Schema changes, language
|
||||
changes, and missing indexes are all detected and rebuilt before the webserver
|
||||
starts. No manual step is required.
|
||||
|
||||
**Bare metal users:** Run the following command after each upgrade (and after
|
||||
changing `PAPERLESS_SEARCH_LANGUAGE`). It is a no-op if the index is already
|
||||
up to date:
|
||||
|
||||
```shell-session
|
||||
cd src
|
||||
python3 manage.py document_index reindex --if-needed
|
||||
```
|
||||
|
||||
### Clearing the database read cache
|
||||
|
||||
If the database read cache is enabled, **you must run this command** after making any changes to the database outside the application context.
|
||||
|
||||
15
docs/api.md
15
docs/api.md
@@ -62,10 +62,14 @@ The REST api provides five different forms of authentication.
|
||||
|
||||
## Searching for documents
|
||||
|
||||
Full text searching is available on the `/api/documents/` endpoint. Two
|
||||
specific query parameters cause the API to return full text search
|
||||
Full text searching is available on the `/api/documents/` endpoint. The
|
||||
following query parameters cause the API to return Tantivy-backed search
|
||||
results:
|
||||
|
||||
- `/api/documents/?text=your%20search%20query`: Search title and content
|
||||
using simple substring-style search.
|
||||
- `/api/documents/?title_search=your%20search%20query`: Search title only
|
||||
using simple substring-style search.
|
||||
- `/api/documents/?query=your%20search%20query`: Search for a document
|
||||
using a full text query. For details on the syntax, see [Basic Usage - Searching](usage.md#basic-usage_searching).
|
||||
- `/api/documents/?more_like_id=1234`: Search for documents similar to
|
||||
@@ -167,9 +171,8 @@ Query parameters:
|
||||
- `term`: The incomplete term.
|
||||
- `limit`: Amount of results. Defaults to 10.
|
||||
|
||||
Results returned by the endpoint are ordered by importance of the term
|
||||
in the document index. The first result is the term that has the highest
|
||||
[Tf/Idf](https://en.wikipedia.org/wiki/Tf%E2%80%93idf) score in the index.
|
||||
Results are ordered by how many of the user's visible documents contain
|
||||
each matching word. The first result is the word that appears in the most documents.
|
||||
|
||||
```json
|
||||
["term1", "term3", "term6", "term4"]
|
||||
@@ -440,3 +443,5 @@ Initial API version.
|
||||
- The `all` parameter of list endpoints is now deprecated and will be removed in a future version.
|
||||
- The bulk edit objects endpoint now supports `all` and `filters` parameters to avoid having to send
|
||||
large lists of object IDs for operations affecting many objects.
|
||||
- The legacy `title_content` document search parameter is deprecated and will be removed in a future version.
|
||||
Clients should use `text` for simple title-and-content search and `title_search` for title-only search.
|
||||
|
||||
@@ -402,6 +402,12 @@ Defaults to `/usr/share/nltk_data`
|
||||
|
||||
: This is where paperless will store the classification model.
|
||||
|
||||
!!! warning
|
||||
|
||||
The classification model uses Python's pickle serialization format.
|
||||
Ensure this file is only writable by the paperless user, as a
|
||||
maliciously crafted model file could execute arbitrary code when loaded.
|
||||
|
||||
Defaults to `PAPERLESS_DATA_DIR/classification_model.pickle`.
|
||||
|
||||
## Logging
|
||||
@@ -422,14 +428,20 @@ Defaults to `/usr/share/nltk_data`
|
||||
|
||||
#### [`PAPERLESS_SECRET_KEY=<key>`](#PAPERLESS_SECRET_KEY) {#PAPERLESS_SECRET_KEY}
|
||||
|
||||
: Paperless uses this to make session tokens. If you expose paperless
|
||||
on the internet, you need to change this, since the default secret
|
||||
is well known.
|
||||
: **Required.** Paperless uses this to make session tokens and sign
|
||||
sensitive data. Paperless will refuse to start if this is not set.
|
||||
|
||||
Use any sequence of characters. The more, the better. You don't
|
||||
need to remember this. Just face-roll your keyboard.
|
||||
need to remember this. You can generate a suitable key with:
|
||||
|
||||
Default is listed in the file `src/paperless/settings.py`.
|
||||
python3 -c "import secrets; print(secrets.token_urlsafe(64))"
|
||||
|
||||
!!! warning
|
||||
|
||||
This setting has no default value. You **must** set it before
|
||||
starting Paperless. Existing installations that relied on the
|
||||
previous default value should set `PAPERLESS_SECRET_KEY` to
|
||||
that value to avoid invalidating existing sessions and tokens.
|
||||
|
||||
#### [`PAPERLESS_URL=<url>`](#PAPERLESS_URL) {#PAPERLESS_URL}
|
||||
|
||||
@@ -770,6 +782,14 @@ If both the [PAPERLESS_ACCOUNT_DEFAULT_GROUPS](#PAPERLESS_ACCOUNT_DEFAULT_GROUPS
|
||||
|
||||
Defaults to 1209600 (2 weeks)
|
||||
|
||||
#### [`PAPERLESS_TOKEN_THROTTLE_RATE=<rate>`](#PAPERLESS_TOKEN_THROTTLE_RATE) {#PAPERLESS_TOKEN_THROTTLE_RATE}
|
||||
|
||||
: Rate limit for the API token authentication endpoint (`/api/token/`), used to mitigate brute-force login attempts.
|
||||
Uses Django REST Framework's [throttle rate format](https://www.django-rest-framework.org/api-guide/throttling/#setting-the-throttling-policy),
|
||||
e.g. `5/min`, `100/hour`, `1000/day`.
|
||||
|
||||
Defaults to `5/min`
|
||||
|
||||
## OCR settings {#ocr}
|
||||
|
||||
Paperless uses [OCRmyPDF](https://ocrmypdf.readthedocs.io/en/latest/)
|
||||
@@ -1103,6 +1123,32 @@ should be a valid crontab(5) expression describing when to run.
|
||||
|
||||
Defaults to `0 0 * * *` or daily at midnight.
|
||||
|
||||
#### [`PAPERLESS_SEARCH_LANGUAGE=<language>`](#PAPERLESS_SEARCH_LANGUAGE) {#PAPERLESS_SEARCH_LANGUAGE}
|
||||
|
||||
: Sets the stemmer language for the full-text search index.
|
||||
Stemming improves recall by matching word variants (e.g. "running" matches "run").
|
||||
Changing this setting causes the index to be rebuilt automatically on next startup.
|
||||
An invalid value raises an error at startup.
|
||||
|
||||
: Use the ISO 639-1 two-letter code (e.g. `en`, `de`, `fr`). Lowercase full names
|
||||
(e.g. `english`, `german`, `french`) are also accepted. The capitalized names shown
|
||||
in the [Tantivy Language enum](https://docs.rs/tantivy/latest/tantivy/tokenizer/enum.Language.html)
|
||||
documentation are **not** valid — use the lowercase equivalent.
|
||||
|
||||
: If not set, paperless infers the language from
|
||||
[`PAPERLESS_OCR_LANGUAGE`](#PAPERLESS_OCR_LANGUAGE). If the OCR language has no
|
||||
Tantivy stemmer equivalent, stemming is disabled.
|
||||
|
||||
Defaults to unset (inferred from `PAPERLESS_OCR_LANGUAGE`).
|
||||
|
||||
#### [`PAPERLESS_ADVANCED_FUZZY_SEARCH_THRESHOLD=<float>`](#PAPERLESS_ADVANCED_FUZZY_SEARCH_THRESHOLD) {#PAPERLESS_ADVANCED_FUZZY_SEARCH_THRESHOLD}
|
||||
|
||||
: When set to a float value, approximate/fuzzy matching is applied alongside exact
|
||||
matching. Fuzzy results rank below exact matches. A value of `0.5` is a reasonable
|
||||
starting point. Leave unset to disable fuzzy matching entirely.
|
||||
|
||||
Defaults to unset (disabled).
|
||||
|
||||
#### [`PAPERLESS_SANITY_TASK_CRON=<cron expression>`](#PAPERLESS_SANITY_TASK_CRON) {#PAPERLESS_SANITY_TASK_CRON}
|
||||
|
||||
: Configures the scheduled sanity checker frequency. The value should be a
|
||||
@@ -1394,6 +1440,14 @@ ports.
|
||||
|
||||
## Incoming Mail {#incoming_mail}
|
||||
|
||||
#### [`PAPERLESS_EMAIL_ALLOW_INTERNAL_HOSTS=<bool>`](#PAPERLESS_EMAIL_ALLOW_INTERNAL_HOSTS) {#PAPERLESS_EMAIL_ALLOW_INTERNAL_HOSTS}
|
||||
|
||||
: If set to false, incoming mail account connections are blocked when the
|
||||
configured IMAP hostname resolves to a non-public address (for example,
|
||||
localhost, link-local, or RFC1918 private ranges).
|
||||
|
||||
Defaults to true, which allows internal hosts.
|
||||
|
||||
### Email OAuth {#email_oauth}
|
||||
|
||||
#### [`PAPERLESS_OAUTH_CALLBACK_BASE_URL=<str>`](#PAPERLESS_OAUTH_CALLBACK_BASE_URL) {#PAPERLESS_OAUTH_CALLBACK_BASE_URL}
|
||||
|
||||
@@ -1,5 +1,24 @@
|
||||
# v3 Migration Guide
|
||||
|
||||
## Secret Key is Now Required
|
||||
|
||||
The `PAPERLESS_SECRET_KEY` environment variable is now required. This is a critical security setting used for cryptographic signing and should be set to a long, random value.
|
||||
|
||||
### Action Required
|
||||
|
||||
If you are upgrading an existing installation, you must now set `PAPERLESS_SECRET_KEY` explicitly.
|
||||
|
||||
If your installation was relying on the previous built-in default key, you have two options:
|
||||
|
||||
- Set `PAPERLESS_SECRET_KEY` to that previous value to preserve existing sessions and tokens.
|
||||
- Set `PAPERLESS_SECRET_KEY` to a new random value to improve security, understanding that this will invalidate existing sessions and other signed tokens.
|
||||
|
||||
For new installations, or if you choose to rotate the key, you may generate a new secret key with:
|
||||
|
||||
```bash
|
||||
python3 -c "import secrets; print(secrets.token_urlsafe(64))"
|
||||
```
|
||||
|
||||
## Consumer Settings Changes
|
||||
|
||||
The v3 consumer command uses a [different library](https://watchfiles.helpmanual.io/) to unify
|
||||
@@ -104,6 +123,37 @@ Multiple options are combined in a single value:
|
||||
PAPERLESS_DB_OPTIONS="sslmode=require;sslrootcert=/certs/ca.pem;pool.max_size=10"
|
||||
```
|
||||
|
||||
## Search Index (Whoosh -> Tantivy)
|
||||
|
||||
The full-text search backend has been replaced with [Tantivy](https://github.com/quickwit-oss/tantivy).
|
||||
The index format is incompatible with Whoosh, so **the search index is automatically rebuilt from
|
||||
scratch on first startup after upgrading**. No manual action is required for the rebuild itself.
|
||||
|
||||
### Note and custom field search syntax
|
||||
|
||||
The old Whoosh index exposed `note` and `custom_field` as flat text fields that were included in
|
||||
unqualified searches (e.g. just typing `invoice` would match note content). With Tantivy these are
|
||||
now structured JSON fields accessed via dotted paths:
|
||||
|
||||
| Old syntax | New syntax |
|
||||
| -------------------- | --------------------------- |
|
||||
| `note:query` | `notes.note:query` |
|
||||
| `custom_field:query` | `custom_fields.value:query` |
|
||||
|
||||
**Saved views are migrated automatically.** Any saved view filter rule that used an explicit
|
||||
`note:` or `custom_field:` field prefix in a fulltext query is rewritten to the new syntax by a
|
||||
data migration that runs on upgrade.
|
||||
|
||||
**Unqualified queries are not migrated.** If you had a saved view with a plain search term (e.g.
|
||||
`invoice`) that happened to match note content or custom field values, it will no longer return
|
||||
those matches. Update those queries to use the explicit prefix, for example:
|
||||
|
||||
```
|
||||
invoice OR notes.note:invoice OR custom_fields.value:invoice
|
||||
```
|
||||
|
||||
Custom field names can also be searched with `custom_fields.name:fieldname`.
|
||||
|
||||
## OpenID Connect Token Endpoint Authentication
|
||||
|
||||
Some existing OpenID Connect setups may require an explicit token endpoint authentication method after upgrading to v3.
|
||||
|
||||
1176
docs/superpowers/plans/2026-04-03-search-performance.md
Normal file
1176
docs/superpowers/plans/2026-04-03-search-performance.md
Normal file
File diff suppressed because it is too large
Load Diff
121
docs/superpowers/plans/profiling-baseline.txt
Normal file
121
docs/superpowers/plans/profiling-baseline.txt
Normal file
@@ -0,0 +1,121 @@
|
||||
============================= test session starts ==============================
|
||||
platform linux -- Python 3.14.3, pytest-9.0.2, pluggy-1.6.0 -- /home/trenton/Documents/projects/paperless-ngx/.venv/bin/python
|
||||
cachedir: .pytest_cache
|
||||
django: version: 5.2.12, settings: paperless.settings (from ini)
|
||||
rootdir: /home/trenton/Documents/projects/paperless-ngx
|
||||
configfile: pyproject.toml
|
||||
plugins: sugar-1.1.1, xdist-3.8.0, cov-7.0.0, httpx-0.36.0, django-4.12.0, Faker-40.8.0, env-1.5.0, time-machine-3.2.0, mock-3.15.1, anyio-4.12.1, rerunfailures-16.1
|
||||
collecting ... collected 6 items
|
||||
|
||||
src/documents/tests/test_search_profiling.py::TestSearchProfilingBaseline::test_profile_relevance_search Creating test database for alias 'default'...
|
||||
|
||||
============================================================
|
||||
Profile: BEFORE — relevance search (no ordering)
|
||||
============================================================
|
||||
Wall time: 0.9622s
|
||||
Queries: 33 (0.0000s)
|
||||
Memory delta: 16557.2 KiB
|
||||
Peak memory: 16584.0 KiB
|
||||
|
||||
Top 5 allocations:
|
||||
<frozen importlib._bootstrap_external>:511: size=5480 KiB (+5480 KiB), count=45642 (+45642), average=123 B
|
||||
/home/trenton/Documents/projects/paperless-ngx/.venv/lib/python3.14/site-packages/fido2/rpid.py:47: size=518 KiB (+518 KiB), count=9769 (+9769), average=54 B
|
||||
<frozen abc>:106: size=432 KiB (+432 KiB), count=1480 (+1480), average=299 B
|
||||
/home/trenton/Documents/projects/paperless-ngx/.venv/lib/python3.14/site-packages/langdetect/utils/ngram.py:257: size=391 KiB (+391 KiB), count=6667 (+6667), average=60 B
|
||||
<frozen importlib._bootstrap>:491: size=284 KiB (+284 KiB), count=2543 (+2543), average=114 B
|
||||
============================================================
|
||||
|
||||
PASSED
|
||||
src/documents/tests/test_search_profiling.py::TestSearchProfilingBaseline::test_profile_sorted_search
|
||||
============================================================
|
||||
Profile: BEFORE — sorted search (ordering=created)
|
||||
============================================================
|
||||
Wall time: 0.1320s
|
||||
Queries: 32 (0.0010s)
|
||||
Memory delta: 880.8 KiB
|
||||
Peak memory: 906.8 KiB
|
||||
|
||||
Top 5 allocations:
|
||||
/home/trenton/Documents/projects/paperless-ngx/src/documents/search/_backend.py:575: size=50.1 KiB (+50.1 KiB), count=521 (+521), average=99 B
|
||||
/home/trenton/.local/share/uv/python/cpython-3.14.3-linux-x86_64-gnu/lib/python3.14/copyreg.py:104: size=49.7 KiB (+49.7 KiB), count=315 (+315), average=162 B
|
||||
/home/trenton/Documents/projects/paperless-ngx/.venv/lib/python3.14/site-packages/django/db/models/sql/query.py:386: size=38.0 KiB (+38.0 KiB), count=160 (+160), average=243 B
|
||||
/home/trenton/Documents/projects/paperless-ngx/.venv/lib/python3.14/site-packages/django_filters/filterset.py:209: size=32.0 KiB (+32.0 KiB), count=82 (+82), average=400 B
|
||||
/home/trenton/Documents/projects/paperless-ngx/.venv/lib/python3.14/site-packages/django_filters/filters.py:158: size=21.4 KiB (+21.4 KiB), count=104 (+104), average=210 B
|
||||
============================================================
|
||||
|
||||
PASSED
|
||||
src/documents/tests/test_search_profiling.py::TestSearchProfilingBaseline::test_profile_paginated_search
|
||||
============================================================
|
||||
Profile: BEFORE — paginated search (page=2, page_size=25)
|
||||
============================================================
|
||||
Wall time: 0.1395s
|
||||
Queries: 32 (0.0000s)
|
||||
Memory delta: 868.1 KiB
|
||||
Peak memory: 893.5 KiB
|
||||
|
||||
Top 5 allocations:
|
||||
/home/trenton/Documents/projects/paperless-ngx/src/documents/search/_backend.py:575: size=50.1 KiB (+50.1 KiB), count=521 (+521), average=99 B
|
||||
/home/trenton/.local/share/uv/python/cpython-3.14.3-linux-x86_64-gnu/lib/python3.14/copyreg.py:104: size=49.2 KiB (+49.2 KiB), count=315 (+315), average=160 B
|
||||
/home/trenton/Documents/projects/paperless-ngx/.venv/lib/python3.14/site-packages/django/db/models/sql/query.py:386: size=38.1 KiB (+38.1 KiB), count=161 (+161), average=242 B
|
||||
/home/trenton/Documents/projects/paperless-ngx/.venv/lib/python3.14/site-packages/django_filters/filterset.py:209: size=32.0 KiB (+32.0 KiB), count=82 (+82), average=400 B
|
||||
/home/trenton/Documents/projects/paperless-ngx/.venv/lib/python3.14/site-packages/django_filters/filters.py:158: size=21.3 KiB (+21.3 KiB), count=104 (+104), average=209 B
|
||||
============================================================
|
||||
|
||||
PASSED
|
||||
src/documents/tests/test_search_profiling.py::TestSearchProfilingBaseline::test_profile_search_with_selection_data
|
||||
============================================================
|
||||
Profile: BEFORE — search with selection_data
|
||||
============================================================
|
||||
Wall time: 0.1656s
|
||||
Queries: 37 (0.0020s)
|
||||
Memory delta: 926.9 KiB
|
||||
Peak memory: 1084.3 KiB
|
||||
|
||||
Top 5 allocations:
|
||||
/home/trenton/Documents/projects/paperless-ngx/src/documents/search/_backend.py:575: size=50.1 KiB (+50.1 KiB), count=521 (+521), average=99 B
|
||||
/home/trenton/.local/share/uv/python/cpython-3.14.3-linux-x86_64-gnu/lib/python3.14/copyreg.py:104: size=49.6 KiB (+49.6 KiB), count=327 (+327), average=155 B
|
||||
/home/trenton/Documents/projects/paperless-ngx/.venv/lib/python3.14/site-packages/django/db/models/sql/query.py:386: size=38.1 KiB (+38.1 KiB), count=161 (+161), average=242 B
|
||||
/home/trenton/Documents/projects/paperless-ngx/.venv/lib/python3.14/site-packages/django_filters/filterset.py:209: size=32.0 KiB (+32.0 KiB), count=82 (+82), average=400 B
|
||||
/home/trenton/Documents/projects/paperless-ngx/.venv/lib/python3.14/site-packages/django/db/backends/sqlite3/operations.py:193: size=27.1 KiB (+27.1 KiB), count=37 (+37), average=751 B
|
||||
============================================================
|
||||
|
||||
PASSED
|
||||
src/documents/tests/test_search_profiling.py::TestSearchProfilingBaseline::test_profile_backend_search_only
|
||||
============================================================
|
||||
Profile: BEFORE — backend.search(page_size=10000, all highlights)
|
||||
============================================================
|
||||
Wall time: 0.0175s
|
||||
Queries: 0 (0.0000s)
|
||||
Memory delta: 88.6 KiB
|
||||
Peak memory: 100.3 KiB
|
||||
|
||||
Top 5 allocations:
|
||||
/home/trenton/Documents/projects/paperless-ngx/src/documents/search/_backend.py:575: size=51.2 KiB (+51.2 KiB), count=530 (+530), average=99 B
|
||||
/home/trenton/Documents/projects/paperless-ngx/src/documents/search/_backend.py:557: size=17.8 KiB (+17.8 KiB), count=200 (+200), average=91 B
|
||||
/home/trenton/Documents/projects/paperless-ngx/src/documents/search/_backend.py:542: size=8576 B (+8576 B), count=134 (+134), average=64 B
|
||||
/home/trenton/Documents/projects/paperless-ngx/src/documents/search/_backend.py:522: size=4800 B (+4800 B), count=200 (+200), average=24 B
|
||||
/home/trenton/Documents/projects/paperless-ngx/src/documents/search/_backend.py:515: size=2376 B (+2376 B), count=99 (+99), average=24 B
|
||||
============================================================
|
||||
|
||||
PASSED
|
||||
src/documents/tests/test_search_profiling.py::TestSearchProfilingBaseline::test_profile_backend_search_single_page
|
||||
============================================================
|
||||
Profile: BEFORE — backend.search(page_size=25)
|
||||
============================================================
|
||||
Wall time: 0.0070s
|
||||
Queries: 0 (0.0000s)
|
||||
Memory delta: 5.9 KiB
|
||||
Peak memory: 11.3 KiB
|
||||
|
||||
Top 5 allocations:
|
||||
/home/trenton/Documents/projects/paperless-ngx/src/documents/search/_backend.py:557: size=2275 B (+2275 B), count=25 (+25), average=91 B
|
||||
/home/trenton/Documents/projects/paperless-ngx/src/documents/search/_backend.py:575: size=1600 B (+1600 B), count=25 (+25), average=64 B
|
||||
/home/trenton/.local/share/uv/python/cpython-3.14.3-linux-x86_64-gnu/lib/python3.14/weakref.py:73: size=1280 B (+1280 B), count=20 (+20), average=64 B
|
||||
/home/trenton/Documents/projects/paperless-ngx/src/documents/search/_backend.py:574: size=256 B (+256 B), count=1 (+1), average=256 B
|
||||
/home/trenton/.local/share/uv/python/cpython-3.14.3-linux-x86_64-gnu/lib/python3.14/tracemalloc.py:560: size=240 B (+240 B), count=1 (+1), average=240 B
|
||||
============================================================
|
||||
|
||||
PASSEDDestroying test database for alias 'default'...
|
||||
|
||||
|
||||
======================== 6 passed in 241.83s (0:04:01) =========================
|
||||
@@ -804,13 +804,20 @@ contract you signed 8 years ago).
|
||||
|
||||
When you search paperless for a document, it tries to match this query
|
||||
against your documents. Paperless will look for matching documents by
|
||||
inspecting their content, title, correspondent, type and tags. Paperless
|
||||
returns a scored list of results, so that documents matching your query
|
||||
better will appear further up in the search results.
|
||||
inspecting their content, title, correspondent, type, tags, notes, and
|
||||
custom field values. Paperless returns a scored list of results, so that
|
||||
documents matching your query better will appear further up in the search
|
||||
results.
|
||||
|
||||
By default, paperless returns only documents which contain all words
|
||||
typed in the search bar. However, paperless also offers advanced search
|
||||
syntax if you want to drill down the results further.
|
||||
typed in the search bar. A few things to know about how matching works:
|
||||
|
||||
- **Word-order-independent**: "invoice unpaid" and "unpaid invoice" return the same results.
|
||||
- **Accent-insensitive**: searching `resume` also finds `résumé`, `cafe` finds `café`.
|
||||
- **Separator-agnostic**: punctuation and separators are stripped during indexing, so
|
||||
searching a partial number like `1312` finds documents containing `A-1312/B`.
|
||||
|
||||
Paperless also offers advanced search syntax if you want to drill down further.
|
||||
|
||||
Matching documents with logical expressions:
|
||||
|
||||
@@ -839,18 +846,69 @@ Matching inexact words:
|
||||
produ*name
|
||||
```
|
||||
|
||||
Matching natural date keywords:
|
||||
|
||||
```
|
||||
added:today
|
||||
modified:yesterday
|
||||
created:this_week
|
||||
added:last_month
|
||||
modified:this_year
|
||||
```
|
||||
|
||||
Supported date keywords: `today`, `yesterday`, `this_week`, `last_week`,
|
||||
`this_month`, `last_month`, `this_year`, `last_year`.
|
||||
|
||||
#### Searching custom fields
|
||||
|
||||
Custom field values are included in the full-text index, so a plain search
|
||||
already matches documents whose custom field values contain your search terms.
|
||||
To narrow by field name or value specifically:
|
||||
|
||||
```
|
||||
custom_fields.value:policy
|
||||
custom_fields.name:"Contract Number"
|
||||
custom_fields.name:Insurance custom_fields.value:policy
|
||||
```
|
||||
|
||||
- `custom_fields.value` matches against the value of any custom field.
|
||||
- `custom_fields.name` matches the name of the field (use quotes for multi-word names).
|
||||
- Combine both to find documents where a specific named field contains a specific value.
|
||||
|
||||
Because separators are stripped during indexing, individual parts of formatted
|
||||
codes are searchable on their own. A value stored as `A-1312/99.50` produces the
|
||||
tokens `a`, `1312`, `99`, `50` — each searchable independently:
|
||||
|
||||
```
|
||||
custom_fields.value:1312
|
||||
custom_fields.name:"Contract Number" custom_fields.value:1312
|
||||
```
|
||||
|
||||
!!! note
|
||||
|
||||
Inexact terms are hard for search indexes. These queries might take a
|
||||
while to execute. That's why paperless offers auto complete and query
|
||||
correction.
|
||||
Custom date fields do not support relative date syntax (e.g. `[now to 2 weeks]`).
|
||||
For date ranges on custom date fields, use the document list filters in the web UI.
|
||||
|
||||
#### Searching notes
|
||||
|
||||
Notes content is included in full-text search automatically. To search
|
||||
by note author or content specifically:
|
||||
|
||||
```
|
||||
notes.user:alice
|
||||
notes.note:reminder
|
||||
notes.user:alice notes.note:insurance
|
||||
```
|
||||
|
||||
All of these constructs can be combined as you see fit. If you want to
|
||||
learn more about the query language used by paperless, paperless uses
|
||||
Whoosh's default query language. Head over to [Whoosh query
|
||||
language](https://whoosh.readthedocs.io/en/latest/querylang.html). For
|
||||
details on what date parsing utilities are available, see [Date
|
||||
parsing](https://whoosh.readthedocs.io/en/latest/dates.html#parsing-date-queries).
|
||||
learn more about the query language used by paperless, see the
|
||||
[Tantivy query language documentation](https://docs.rs/tantivy/latest/tantivy/query/struct.QueryParser.html).
|
||||
|
||||
!!! note
|
||||
|
||||
Fuzzy (approximate) matching can be enabled by setting
|
||||
[`PAPERLESS_ADVANCED_FUZZY_SEARCH_THRESHOLD`](configuration.md#PAPERLESS_ADVANCED_FUZZY_SEARCH_THRESHOLD).
|
||||
When enabled, paperless will include near-miss results ranked below exact matches.
|
||||
|
||||
## Keyboard shortcuts / hotkeys
|
||||
|
||||
|
||||
@@ -23,7 +23,8 @@
|
||||
|
||||
# Security and hosting
|
||||
|
||||
#PAPERLESS_SECRET_KEY=change-me
|
||||
# Required. Generate with: python3 -c "import secrets; print(secrets.token_urlsafe(64))"
|
||||
PAPERLESS_SECRET_KEY=change-me
|
||||
#PAPERLESS_URL=https://example.com
|
||||
#PAPERLESS_CSRF_TRUSTED_ORIGINS=https://example.com # can be set using PAPERLESS_URL
|
||||
#PAPERLESS_ALLOWED_HOSTS=example.com,www.example.com # can be set using PAPERLESS_URL
|
||||
|
||||
204
pyproject.toml
204
pyproject.toml
@@ -13,7 +13,6 @@ classifiers = [
|
||||
]
|
||||
# TODO: Move certain things to groups and then utilize that further
|
||||
# This will allow testing to not install a webserver, mysql, etc
|
||||
|
||||
dependencies = [
|
||||
"azure-ai-documentintelligence>=1.0.2",
|
||||
"babel>=2.17",
|
||||
@@ -47,7 +46,7 @@ dependencies = [
|
||||
"faiss-cpu>=1.10",
|
||||
"filelock~=3.25.2",
|
||||
"flower~=2.0.1",
|
||||
"gotenberg-client~=0.13.1",
|
||||
"gotenberg-client~=0.14.0",
|
||||
"httpx-oauth~=0.16",
|
||||
"ijson>=3.2",
|
||||
"imap-tools~=1.11.0",
|
||||
@@ -60,7 +59,7 @@ dependencies = [
|
||||
"llama-index-llms-openai>=0.6.13",
|
||||
"llama-index-vector-stores-faiss>=0.5.2",
|
||||
"nltk~=3.9.1",
|
||||
"ocrmypdf~=17.3.0",
|
||||
"ocrmypdf~=17.4.0",
|
||||
"openai>=1.76",
|
||||
"pathvalidate~=3.3.1",
|
||||
"pdf2image~=1.17.0",
|
||||
@@ -75,39 +74,40 @@ dependencies = [
|
||||
"scikit-learn~=1.8.0",
|
||||
"sentence-transformers>=4.1",
|
||||
"setproctitle~=1.3.4",
|
||||
"tika-client~=0.10.0",
|
||||
"tantivy>=0.25.1",
|
||||
"tika-client~=0.11.0",
|
||||
"torch~=2.10.0",
|
||||
"watchfiles>=1.1.1",
|
||||
"whitenoise~=6.11",
|
||||
"whoosh-reloaded>=2.7.5",
|
||||
"zxing-cpp~=3.0.0",
|
||||
]
|
||||
|
||||
optional-dependencies.mariadb = [
|
||||
[project.optional-dependencies]
|
||||
mariadb = [
|
||||
"mysqlclient~=2.2.7",
|
||||
]
|
||||
optional-dependencies.postgres = [
|
||||
postgres = [
|
||||
"psycopg[c,pool]==3.3",
|
||||
# Direct dependency for proper resolution of the pre-built wheels
|
||||
"psycopg-c==3.3",
|
||||
"psycopg-pool==3.3",
|
||||
]
|
||||
optional-dependencies.webserver = [
|
||||
webserver = [
|
||||
"granian[uvloop]~=2.7.0",
|
||||
]
|
||||
|
||||
[dependency-groups]
|
||||
|
||||
dev = [
|
||||
{ "include-group" = "docs" },
|
||||
{ "include-group" = "testing" },
|
||||
{ "include-group" = "lint" },
|
||||
{ include-group = "docs" },
|
||||
{ include-group = "lint" },
|
||||
{ include-group = "testing" },
|
||||
]
|
||||
|
||||
docs = [
|
||||
"zensical>=0.0.21",
|
||||
]
|
||||
|
||||
lint = [
|
||||
"prek~=0.3.0",
|
||||
"ruff~=0.15.0",
|
||||
]
|
||||
testing = [
|
||||
"daphne",
|
||||
"factory-boy~=3.3.1",
|
||||
@@ -119,17 +119,12 @@ testing = [
|
||||
"pytest-env~=1.5.0",
|
||||
"pytest-httpx",
|
||||
"pytest-mock~=3.15.1",
|
||||
#"pytest-randomly~=4.0.1",
|
||||
# "pytest-randomly~=4.0.1",
|
||||
"pytest-rerunfailures~=16.1",
|
||||
"pytest-sugar",
|
||||
"pytest-xdist~=3.8.0",
|
||||
"time-machine>=2.13",
|
||||
]
|
||||
|
||||
lint = [
|
||||
"prek~=0.3.0",
|
||||
"ruff~=0.15.0",
|
||||
]
|
||||
|
||||
typing = [
|
||||
"celery-types",
|
||||
"django-filter-stubs",
|
||||
@@ -154,24 +149,21 @@ typing = [
|
||||
|
||||
[tool.uv]
|
||||
required-version = ">=0.9.0"
|
||||
package = false
|
||||
environments = [
|
||||
"sys_platform == 'darwin'",
|
||||
"sys_platform == 'linux'",
|
||||
]
|
||||
|
||||
package = false
|
||||
[[tool.uv.index]]
|
||||
name = "pytorch-cpu"
|
||||
url = "https://download.pytorch.org/whl/cpu"
|
||||
explicit = true
|
||||
|
||||
[tool.uv.sources]
|
||||
# Markers are chosen to select these almost exclusively when building the Docker image
|
||||
psycopg-c = [
|
||||
{ url = "https://github.com/paperless-ngx/builder/releases/download/psycopg-trixie-3.3.0/psycopg_c-3.3.0-cp312-cp312-linux_x86_64.whl", marker = "sys_platform == 'linux' and platform_machine == 'x86_64' and python_version == '3.12'" },
|
||||
{ url = "https://github.com/paperless-ngx/builder/releases/download/psycopg-trixie-3.3.0/psycopg_c-3.3.0-cp312-cp312-linux_aarch64.whl", marker = "sys_platform == 'linux' and platform_machine == 'aarch64' and python_version == '3.12'" },
|
||||
]
|
||||
|
||||
torch = [
|
||||
{ index = "pytorch-cpu" },
|
||||
]
|
||||
@@ -186,10 +178,10 @@ respect-gitignore = true
|
||||
# https://docs.astral.sh/ruff/settings/
|
||||
fix = true
|
||||
show-fixes = true
|
||||
|
||||
output-format = "grouped"
|
||||
[tool.ruff.lint]
|
||||
# https://docs.astral.sh/ruff/rules/
|
||||
lint.extend-select = [
|
||||
extend-select = [
|
||||
"COM", # https://docs.astral.sh/ruff/rules/#flake8-commas-com
|
||||
"DJ", # https://docs.astral.sh/ruff/rules/#flake8-django-dj
|
||||
"EXE", # https://docs.astral.sh/ruff/rules/#flake8-executable-exe
|
||||
@@ -214,115 +206,52 @@ lint.extend-select = [
|
||||
"UP", # https://docs.astral.sh/ruff/rules/#pyupgrade-up
|
||||
"W", # https://docs.astral.sh/ruff/rules/#pycodestyle-e-w
|
||||
]
|
||||
lint.ignore = [
|
||||
ignore = [
|
||||
"DJ001",
|
||||
"PLC0415",
|
||||
"RUF012",
|
||||
"SIM105",
|
||||
]
|
||||
# Migrations
|
||||
lint.per-file-ignores."*/migrations/*.py" = [
|
||||
per-file-ignores."*/migrations/*.py" = [
|
||||
"E501",
|
||||
"SIM",
|
||||
"T201",
|
||||
]
|
||||
# Testing
|
||||
lint.per-file-ignores."*/tests/*.py" = [
|
||||
per-file-ignores."*/tests/*.py" = [
|
||||
"E501",
|
||||
"SIM117",
|
||||
]
|
||||
lint.per-file-ignores.".github/scripts/*.py" = [
|
||||
per-file-ignores.".github/scripts/*.py" = [
|
||||
"E501",
|
||||
"INP001",
|
||||
"SIM117",
|
||||
]
|
||||
# Docker specific
|
||||
lint.per-file-ignores."docker/rootfs/usr/local/bin/wait-for-redis.py" = [
|
||||
per-file-ignores."docker/rootfs/usr/local/bin/wait-for-redis.py" = [
|
||||
"INP001",
|
||||
"T201",
|
||||
]
|
||||
lint.per-file-ignores."docker/wait-for-redis.py" = [
|
||||
per-file-ignores."docker/wait-for-redis.py" = [
|
||||
"INP001",
|
||||
"T201",
|
||||
]
|
||||
lint.per-file-ignores."src/documents/models.py" = [
|
||||
per-file-ignores."src/documents/models.py" = [
|
||||
"SIM115",
|
||||
]
|
||||
|
||||
lint.isort.force-single-line = true
|
||||
isort.force-single-line = true
|
||||
|
||||
[tool.codespell]
|
||||
write-changes = true
|
||||
ignore-words-list = "criterias,afterall,valeu,ureue,equest,ure,assertIn,Oktober,commitish"
|
||||
skip = "src-ui/src/locale/*,src-ui/pnpm-lock.yaml,src-ui/e2e/*,src/paperless_mail/tests/samples/*,src/paperless/tests/samples/mail/*,src/documents/tests/samples/*,*.po,*.json"
|
||||
skip = """\
|
||||
src-ui/src/locale/*,src-ui/pnpm-lock.yaml,src-ui/e2e/*,src/paperless_mail/tests/samples/*,src/paperless/tests/samples\
|
||||
/mail/*,src/documents/tests/samples/*,*.po,*.json\
|
||||
"""
|
||||
|
||||
[tool.pytest]
|
||||
minversion = "9.0"
|
||||
pythonpath = [ "src" ]
|
||||
|
||||
strict_config = true
|
||||
strict_markers = true
|
||||
strict_parametrization_ids = true
|
||||
strict_xfail = true
|
||||
|
||||
testpaths = [
|
||||
"src/documents/tests/",
|
||||
"src/paperless/tests/",
|
||||
"src/paperless_mail/tests/",
|
||||
"src/paperless_ai/tests",
|
||||
]
|
||||
|
||||
addopts = [
|
||||
"--pythonwarnings=all",
|
||||
"--cov",
|
||||
"--cov-report=html",
|
||||
"--cov-report=xml",
|
||||
"--numprocesses=auto",
|
||||
"--maxprocesses=16",
|
||||
"--dist=loadscope",
|
||||
"--durations=50",
|
||||
"--durations-min=0.5",
|
||||
"--junitxml=junit.xml",
|
||||
"-o",
|
||||
"junit_family=legacy",
|
||||
]
|
||||
|
||||
norecursedirs = [ "src/locale/", ".venv/", "src-ui/" ]
|
||||
|
||||
DJANGO_SETTINGS_MODULE = "paperless.settings"
|
||||
|
||||
markers = [
|
||||
"live: Integration tests requiring external services (Gotenberg, Tika, nginx, etc)",
|
||||
"nginx: Tests that make HTTP requests to the local nginx service",
|
||||
"gotenberg: Tests requiring Gotenberg service",
|
||||
"tika: Tests requiring Tika service",
|
||||
"greenmail: Tests requiring Greenmail service",
|
||||
"date_parsing: Tests which cover date parsing from content or filename",
|
||||
"management: Tests which cover management commands/functionality",
|
||||
]
|
||||
|
||||
[tool.pytest_env]
|
||||
PAPERLESS_DISABLE_DBHANDLER = "true"
|
||||
PAPERLESS_CACHE_BACKEND = "django.core.cache.backends.locmem.LocMemCache"
|
||||
PAPERLESS_CHANNELS_BACKEND = "channels.layers.InMemoryChannelLayer"
|
||||
|
||||
[tool.coverage.report]
|
||||
exclude_also = [
|
||||
"if settings.AUDIT_LOG_ENABLED:",
|
||||
"if AUDIT_LOG_ENABLED:",
|
||||
"if TYPE_CHECKING:",
|
||||
]
|
||||
|
||||
[tool.coverage.run]
|
||||
source = [
|
||||
"src/",
|
||||
]
|
||||
omit = [
|
||||
"*/tests/*",
|
||||
"manage.py",
|
||||
"paperless/wsgi.py",
|
||||
"paperless/auth.py",
|
||||
]
|
||||
[tool.pyproject-fmt]
|
||||
table_format = "long"
|
||||
|
||||
[tool.mypy]
|
||||
mypy_path = "src"
|
||||
@@ -345,6 +274,71 @@ python-platform = "linux"
|
||||
[tool.django-stubs]
|
||||
django_settings_module = "paperless.settings"
|
||||
|
||||
[tool.pytest]
|
||||
minversion = "9.0"
|
||||
pythonpath = [ "src" ]
|
||||
strict_config = true
|
||||
strict_markers = true
|
||||
strict_parametrization_ids = true
|
||||
strict_xfail = true
|
||||
testpaths = [
|
||||
"src/documents/tests/",
|
||||
"src/paperless/tests/",
|
||||
"src/paperless_mail/tests/",
|
||||
"src/paperless_ai/tests",
|
||||
]
|
||||
addopts = [
|
||||
"--pythonwarnings=all",
|
||||
"--cov",
|
||||
"--cov-report=html",
|
||||
"--cov-report=xml",
|
||||
"--numprocesses=auto",
|
||||
"--maxprocesses=16",
|
||||
"--dist=loadscope",
|
||||
"--durations=50",
|
||||
"--durations-min=0.5",
|
||||
"--junitxml=junit.xml",
|
||||
"-o",
|
||||
"junit_family=legacy",
|
||||
]
|
||||
norecursedirs = [ "src/locale/", ".venv/", "src-ui/" ]
|
||||
DJANGO_SETTINGS_MODULE = "paperless.settings"
|
||||
markers = [
|
||||
"live: Integration tests requiring external services (Gotenberg, Tika, nginx, etc)",
|
||||
"nginx: Tests that make HTTP requests to the local nginx service",
|
||||
"gotenberg: Tests requiring Gotenberg service",
|
||||
"tika: Tests requiring Tika service",
|
||||
"greenmail: Tests requiring Greenmail service",
|
||||
"date_parsing: Tests which cover date parsing from content or filename",
|
||||
"management: Tests which cover management commands/functionality",
|
||||
"search: Tests for the Tantivy search backend",
|
||||
]
|
||||
|
||||
[tool.pytest_env]
|
||||
PAPERLESS_SECRET_KEY = "test-secret-key-do-not-use-in-production"
|
||||
PAPERLESS_DISABLE_DBHANDLER = "true"
|
||||
PAPERLESS_CACHE_BACKEND = "django.core.cache.backends.locmem.LocMemCache"
|
||||
PAPERLESS_CHANNELS_BACKEND = "channels.layers.InMemoryChannelLayer"
|
||||
# I don't think anything hits this, but just in case, basically infinite
|
||||
PAPERLESS_TOKEN_THROTTLE_RATE = "1000/min"
|
||||
|
||||
[tool.coverage.report]
|
||||
exclude_also = [
|
||||
"if settings.AUDIT_LOG_ENABLED:",
|
||||
"if AUDIT_LOG_ENABLED:",
|
||||
"if TYPE_CHECKING:",
|
||||
]
|
||||
[tool.coverage.run]
|
||||
source = [
|
||||
"src/",
|
||||
]
|
||||
omit = [
|
||||
"*/tests/*",
|
||||
"manage.py",
|
||||
"paperless/wsgi.py",
|
||||
"paperless/auth.py",
|
||||
]
|
||||
|
||||
[tool.mypy-baseline]
|
||||
baseline_path = ".mypy-baseline.txt"
|
||||
sort_baseline = true
|
||||
|
||||
@@ -49,11 +49,11 @@ test('text filtering', async ({ page }) => {
|
||||
await page.getByRole('main').getByRole('combobox').click()
|
||||
await page.getByRole('main').getByRole('combobox').fill('test')
|
||||
await expect(page.locator('pngx-document-list')).toHaveText(/32 documents/)
|
||||
await expect(page).toHaveURL(/title_content=test/)
|
||||
await expect(page).toHaveURL(/text=test/)
|
||||
await page.getByRole('button', { name: 'Title & content' }).click()
|
||||
await page.getByRole('button', { name: 'Title', exact: true }).click()
|
||||
await expect(page.locator('pngx-document-list')).toHaveText(/9 documents/)
|
||||
await expect(page).toHaveURL(/title__icontains=test/)
|
||||
await expect(page).toHaveURL(/title_search=test/)
|
||||
await page.getByRole('button', { name: 'Title', exact: true }).click()
|
||||
await page.getByRole('button', { name: 'Advanced search' }).click()
|
||||
await expect(page).toHaveURL(/query=test/)
|
||||
|
||||
@@ -3545,7 +3545,7 @@
|
||||
"time": 1.091,
|
||||
"request": {
|
||||
"method": "GET",
|
||||
"url": "http://localhost:8000/api/documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true&title_content=test",
|
||||
"url": "http://localhost:8000/api/documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true&text=test",
|
||||
"httpVersion": "HTTP/1.1",
|
||||
"cookies": [],
|
||||
"headers": [
|
||||
@@ -3579,7 +3579,7 @@
|
||||
"value": "true"
|
||||
},
|
||||
{
|
||||
"name": "title_content",
|
||||
"name": "text",
|
||||
"value": "test"
|
||||
}
|
||||
],
|
||||
@@ -4303,7 +4303,7 @@
|
||||
"time": 0.603,
|
||||
"request": {
|
||||
"method": "GET",
|
||||
"url": "http://localhost:8000/api/documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true&title__icontains=test",
|
||||
"url": "http://localhost:8000/api/documents/?page=1&page_size=50&ordering=-created&truncate_content=true&include_selection_data=true&title_search=test",
|
||||
"httpVersion": "HTTP/1.1",
|
||||
"cookies": [],
|
||||
"headers": [
|
||||
@@ -4337,7 +4337,7 @@
|
||||
"value": "true"
|
||||
},
|
||||
{
|
||||
"name": "title__icontains",
|
||||
"name": "title_search",
|
||||
"value": "test"
|
||||
}
|
||||
],
|
||||
|
||||
@@ -5,14 +5,14 @@
|
||||
<trans-unit id="ngb.alert.close" datatype="html">
|
||||
<source>Close</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/alert/alert.ts</context>
|
||||
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.6_@angular+core@21.2.6_@angular+_0766f480734948ad660a180d719522cc/node_modules/src/alert/alert.ts</context>
|
||||
<context context-type="linenumber">50</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="ngb.carousel.slide-number" datatype="html">
|
||||
<source> Slide <x id="INTERPOLATION" equiv-text="ueryList<NgbSli"/> of <x id="INTERPOLATION_1" equiv-text="EventSource = N"/> </source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/carousel/carousel.ts</context>
|
||||
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.6_@angular+core@21.2.6_@angular+_0766f480734948ad660a180d719522cc/node_modules/src/carousel/carousel.ts</context>
|
||||
<context context-type="linenumber">131,135</context>
|
||||
</context-group>
|
||||
<note priority="1" from="description">Currently selected slide number read by screen reader</note>
|
||||
@@ -20,114 +20,114 @@
|
||||
<trans-unit id="ngb.carousel.previous" datatype="html">
|
||||
<source>Previous</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/carousel/carousel.ts</context>
|
||||
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.6_@angular+core@21.2.6_@angular+_0766f480734948ad660a180d719522cc/node_modules/src/carousel/carousel.ts</context>
|
||||
<context context-type="linenumber">159,162</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="ngb.carousel.next" datatype="html">
|
||||
<source>Next</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/carousel/carousel.ts</context>
|
||||
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.6_@angular+core@21.2.6_@angular+_0766f480734948ad660a180d719522cc/node_modules/src/carousel/carousel.ts</context>
|
||||
<context context-type="linenumber">202,203</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="ngb.datepicker.select-month" datatype="html">
|
||||
<source>Select month</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/datepicker/datepicker-navigation-select.ts</context>
|
||||
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.6_@angular+core@21.2.6_@angular+_0766f480734948ad660a180d719522cc/node_modules/src/datepicker/datepicker-navigation-select.ts</context>
|
||||
<context context-type="linenumber">91</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/datepicker/datepicker-navigation-select.ts</context>
|
||||
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.6_@angular+core@21.2.6_@angular+_0766f480734948ad660a180d719522cc/node_modules/src/datepicker/datepicker-navigation-select.ts</context>
|
||||
<context context-type="linenumber">91</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="ngb.datepicker.select-year" datatype="html">
|
||||
<source>Select year</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/datepicker/datepicker-navigation-select.ts</context>
|
||||
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.6_@angular+core@21.2.6_@angular+_0766f480734948ad660a180d719522cc/node_modules/src/datepicker/datepicker-navigation-select.ts</context>
|
||||
<context context-type="linenumber">91</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/datepicker/datepicker-navigation-select.ts</context>
|
||||
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.6_@angular+core@21.2.6_@angular+_0766f480734948ad660a180d719522cc/node_modules/src/datepicker/datepicker-navigation-select.ts</context>
|
||||
<context context-type="linenumber">91</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="ngb.datepicker.previous-month" datatype="html">
|
||||
<source>Previous month</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/datepicker/datepicker-navigation.ts</context>
|
||||
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.6_@angular+core@21.2.6_@angular+_0766f480734948ad660a180d719522cc/node_modules/src/datepicker/datepicker-navigation.ts</context>
|
||||
<context context-type="linenumber">83,85</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/datepicker/datepicker-navigation.ts</context>
|
||||
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.6_@angular+core@21.2.6_@angular+_0766f480734948ad660a180d719522cc/node_modules/src/datepicker/datepicker-navigation.ts</context>
|
||||
<context context-type="linenumber">112</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="ngb.datepicker.next-month" datatype="html">
|
||||
<source>Next month</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/datepicker/datepicker-navigation.ts</context>
|
||||
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.6_@angular+core@21.2.6_@angular+_0766f480734948ad660a180d719522cc/node_modules/src/datepicker/datepicker-navigation.ts</context>
|
||||
<context context-type="linenumber">112</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/datepicker/datepicker-navigation.ts</context>
|
||||
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.6_@angular+core@21.2.6_@angular+_0766f480734948ad660a180d719522cc/node_modules/src/datepicker/datepicker-navigation.ts</context>
|
||||
<context context-type="linenumber">112</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="ngb.pagination.first" datatype="html">
|
||||
<source>««</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/pagination/pagination-config.ts</context>
|
||||
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.6_@angular+core@21.2.6_@angular+_0766f480734948ad660a180d719522cc/node_modules/src/pagination/pagination-config.ts</context>
|
||||
<context context-type="linenumber">20</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="ngb.pagination.previous" datatype="html">
|
||||
<source>«</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/pagination/pagination-config.ts</context>
|
||||
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.6_@angular+core@21.2.6_@angular+_0766f480734948ad660a180d719522cc/node_modules/src/pagination/pagination-config.ts</context>
|
||||
<context context-type="linenumber">20</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="ngb.pagination.next" datatype="html">
|
||||
<source>»</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/pagination/pagination-config.ts</context>
|
||||
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.6_@angular+core@21.2.6_@angular+_0766f480734948ad660a180d719522cc/node_modules/src/pagination/pagination-config.ts</context>
|
||||
<context context-type="linenumber">20</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="ngb.pagination.last" datatype="html">
|
||||
<source>»»</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/pagination/pagination-config.ts</context>
|
||||
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.6_@angular+core@21.2.6_@angular+_0766f480734948ad660a180d719522cc/node_modules/src/pagination/pagination-config.ts</context>
|
||||
<context context-type="linenumber">20</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="ngb.pagination.first-aria" datatype="html">
|
||||
<source>First</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/pagination/pagination-config.ts</context>
|
||||
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.6_@angular+core@21.2.6_@angular+_0766f480734948ad660a180d719522cc/node_modules/src/pagination/pagination-config.ts</context>
|
||||
<context context-type="linenumber">20</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="ngb.pagination.previous-aria" datatype="html">
|
||||
<source>Previous</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/pagination/pagination-config.ts</context>
|
||||
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.6_@angular+core@21.2.6_@angular+_0766f480734948ad660a180d719522cc/node_modules/src/pagination/pagination-config.ts</context>
|
||||
<context context-type="linenumber">20</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="ngb.pagination.next-aria" datatype="html">
|
||||
<source>Next</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/pagination/pagination-config.ts</context>
|
||||
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.6_@angular+core@21.2.6_@angular+_0766f480734948ad660a180d719522cc/node_modules/src/pagination/pagination-config.ts</context>
|
||||
<context context-type="linenumber">20</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="ngb.pagination.last-aria" datatype="html">
|
||||
<source>Last</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/pagination/pagination-config.ts</context>
|
||||
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.6_@angular+core@21.2.6_@angular+_0766f480734948ad660a180d719522cc/node_modules/src/pagination/pagination-config.ts</context>
|
||||
<context context-type="linenumber">20</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
@@ -135,105 +135,105 @@
|
||||
<source><x id="INTERPOLATION" equiv-text="barConfig);
|
||||
pu"/></source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/progressbar/progressbar.ts</context>
|
||||
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.6_@angular+core@21.2.6_@angular+_0766f480734948ad660a180d719522cc/node_modules/src/progressbar/progressbar.ts</context>
|
||||
<context context-type="linenumber">41,42</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="ngb.timepicker.HH" datatype="html">
|
||||
<source>HH</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/timepicker/timepicker-config.ts</context>
|
||||
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.6_@angular+core@21.2.6_@angular+_0766f480734948ad660a180d719522cc/node_modules/src/timepicker/timepicker-config.ts</context>
|
||||
<context context-type="linenumber">21</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="ngb.timepicker.hours" datatype="html">
|
||||
<source>Hours</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/timepicker/timepicker-config.ts</context>
|
||||
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.6_@angular+core@21.2.6_@angular+_0766f480734948ad660a180d719522cc/node_modules/src/timepicker/timepicker-config.ts</context>
|
||||
<context context-type="linenumber">21</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="ngb.timepicker.MM" datatype="html">
|
||||
<source>MM</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/timepicker/timepicker-config.ts</context>
|
||||
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.6_@angular+core@21.2.6_@angular+_0766f480734948ad660a180d719522cc/node_modules/src/timepicker/timepicker-config.ts</context>
|
||||
<context context-type="linenumber">21</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="ngb.timepicker.minutes" datatype="html">
|
||||
<source>Minutes</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/timepicker/timepicker-config.ts</context>
|
||||
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.6_@angular+core@21.2.6_@angular+_0766f480734948ad660a180d719522cc/node_modules/src/timepicker/timepicker-config.ts</context>
|
||||
<context context-type="linenumber">21</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="ngb.timepicker.increment-hours" datatype="html">
|
||||
<source>Increment hours</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/timepicker/timepicker-config.ts</context>
|
||||
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.6_@angular+core@21.2.6_@angular+_0766f480734948ad660a180d719522cc/node_modules/src/timepicker/timepicker-config.ts</context>
|
||||
<context context-type="linenumber">21</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="ngb.timepicker.decrement-hours" datatype="html">
|
||||
<source>Decrement hours</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/timepicker/timepicker-config.ts</context>
|
||||
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.6_@angular+core@21.2.6_@angular+_0766f480734948ad660a180d719522cc/node_modules/src/timepicker/timepicker-config.ts</context>
|
||||
<context context-type="linenumber">21</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="ngb.timepicker.increment-minutes" datatype="html">
|
||||
<source>Increment minutes</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/timepicker/timepicker-config.ts</context>
|
||||
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.6_@angular+core@21.2.6_@angular+_0766f480734948ad660a180d719522cc/node_modules/src/timepicker/timepicker-config.ts</context>
|
||||
<context context-type="linenumber">21</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="ngb.timepicker.decrement-minutes" datatype="html">
|
||||
<source>Decrement minutes</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/timepicker/timepicker-config.ts</context>
|
||||
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.6_@angular+core@21.2.6_@angular+_0766f480734948ad660a180d719522cc/node_modules/src/timepicker/timepicker-config.ts</context>
|
||||
<context context-type="linenumber">21</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="ngb.timepicker.SS" datatype="html">
|
||||
<source>SS</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/timepicker/timepicker-config.ts</context>
|
||||
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.6_@angular+core@21.2.6_@angular+_0766f480734948ad660a180d719522cc/node_modules/src/timepicker/timepicker-config.ts</context>
|
||||
<context context-type="linenumber">21</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="ngb.timepicker.seconds" datatype="html">
|
||||
<source>Seconds</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/timepicker/timepicker-config.ts</context>
|
||||
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.6_@angular+core@21.2.6_@angular+_0766f480734948ad660a180d719522cc/node_modules/src/timepicker/timepicker-config.ts</context>
|
||||
<context context-type="linenumber">21</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="ngb.timepicker.increment-seconds" datatype="html">
|
||||
<source>Increment seconds</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/timepicker/timepicker-config.ts</context>
|
||||
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.6_@angular+core@21.2.6_@angular+_0766f480734948ad660a180d719522cc/node_modules/src/timepicker/timepicker-config.ts</context>
|
||||
<context context-type="linenumber">21</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="ngb.timepicker.decrement-seconds" datatype="html">
|
||||
<source>Decrement seconds</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/timepicker/timepicker-config.ts</context>
|
||||
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.6_@angular+core@21.2.6_@angular+_0766f480734948ad660a180d719522cc/node_modules/src/timepicker/timepicker-config.ts</context>
|
||||
<context context-type="linenumber">21</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="ngb.timepicker.PM" datatype="html">
|
||||
<source><x id="INTERPOLATION"/></source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/timepicker/timepicker-config.ts</context>
|
||||
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.6_@angular+core@21.2.6_@angular+_0766f480734948ad660a180d719522cc/node_modules/src/timepicker/timepicker-config.ts</context>
|
||||
<context context-type="linenumber">21</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="ngb.toast.close-aria" datatype="html">
|
||||
<source>Close</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.4_@angular+core@21.2.4_@angular+_a674c967733fd102e5fef61ea5e6b837/node_modules/src/toast/toast-config.ts</context>
|
||||
<context context-type="sourcefile">node_modules/.pnpm/@ng-bootstrap+ng-bootstrap@20.0.0_@angular+common@21.2.6_@angular+core@21.2.6_@angular+_0766f480734948ad660a180d719522cc/node_modules/src/toast/toast-config.ts</context>
|
||||
<context context-type="linenumber">54</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
@@ -1081,7 +1081,7 @@
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/filter-editor/filter-editor.component.ts</context>
|
||||
<context context-type="linenumber">205</context>
|
||||
<context context-type="linenumber">203</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="8901931207592071833" datatype="html">
|
||||
@@ -3027,10 +3027,6 @@
|
||||
<context context-type="sourcefile">src/app/components/document-list/filter-editor/filter-editor.component.html</context>
|
||||
<context context-type="linenumber">84</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/filter-editor/filter-editor.component.ts</context>
|
||||
<context context-type="linenumber">200</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/manage/document-attributes/document-attributes.component.ts</context>
|
||||
<context context-type="linenumber">129</context>
|
||||
@@ -7504,7 +7500,7 @@
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/filter-editor/filter-editor.component.ts</context>
|
||||
<context context-type="linenumber">192</context>
|
||||
<context context-type="linenumber">194</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/data/document.ts</context>
|
||||
@@ -8817,7 +8813,7 @@
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/filter-editor/filter-editor.component.ts</context>
|
||||
<context context-type="linenumber">197</context>
|
||||
<context context-type="linenumber">199</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/data/document.ts</context>
|
||||
@@ -9020,56 +9016,63 @@
|
||||
<source>Title & content</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/filter-editor/filter-editor.component.ts</context>
|
||||
<context context-type="linenumber">195</context>
|
||||
<context context-type="linenumber">197</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="7408932238599462499" datatype="html">
|
||||
<source>File type</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/filter-editor/filter-editor.component.ts</context>
|
||||
<context context-type="linenumber">202</context>
|
||||
<context context-type="linenumber">200</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="8492379284173601938" datatype="html">
|
||||
<source>Custom fields (Deprecated)</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/filter-editor/filter-editor.component.ts</context>
|
||||
<context context-type="linenumber">210</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="2649431021108393503" datatype="html">
|
||||
<source>More like</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/filter-editor/filter-editor.component.ts</context>
|
||||
<context context-type="linenumber">211</context>
|
||||
<context context-type="linenumber">215</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="3697582909018473071" datatype="html">
|
||||
<source>equals</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/filter-editor/filter-editor.component.ts</context>
|
||||
<context context-type="linenumber">217</context>
|
||||
<context context-type="linenumber">221</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="5325481293405718739" datatype="html">
|
||||
<source>is empty</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/filter-editor/filter-editor.component.ts</context>
|
||||
<context context-type="linenumber">221</context>
|
||||
<context context-type="linenumber">225</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="6166785695326182482" datatype="html">
|
||||
<source>is not empty</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/filter-editor/filter-editor.component.ts</context>
|
||||
<context context-type="linenumber">225</context>
|
||||
<context context-type="linenumber">229</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="4686622206659266699" datatype="html">
|
||||
<source>greater than</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/filter-editor/filter-editor.component.ts</context>
|
||||
<context context-type="linenumber">229</context>
|
||||
<context context-type="linenumber">233</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="8014012170270529279" datatype="html">
|
||||
<source>less than</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/filter-editor/filter-editor.component.ts</context>
|
||||
<context context-type="linenumber">233</context>
|
||||
<context context-type="linenumber">237</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="5195932016807797291" datatype="html">
|
||||
@@ -9078,14 +9081,14 @@
|
||||
)?.name"/></source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/filter-editor/filter-editor.component.ts</context>
|
||||
<context context-type="linenumber">274,278</context>
|
||||
<context context-type="linenumber">278,282</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="8170755470576301659" datatype="html">
|
||||
<source>Without correspondent</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/filter-editor/filter-editor.component.ts</context>
|
||||
<context context-type="linenumber">280</context>
|
||||
<context context-type="linenumber">284</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="317796810569008208" datatype="html">
|
||||
@@ -9094,14 +9097,14 @@
|
||||
)?.name"/></source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/filter-editor/filter-editor.component.ts</context>
|
||||
<context context-type="linenumber">286,290</context>
|
||||
<context context-type="linenumber">290,294</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="4362173610367509215" datatype="html">
|
||||
<source>Without document type</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/filter-editor/filter-editor.component.ts</context>
|
||||
<context context-type="linenumber">292</context>
|
||||
<context context-type="linenumber">296</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="232202047340644471" datatype="html">
|
||||
@@ -9110,70 +9113,77 @@
|
||||
)?.name"/></source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/filter-editor/filter-editor.component.ts</context>
|
||||
<context context-type="linenumber">298,302</context>
|
||||
<context context-type="linenumber">302,306</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="1562820715074533164" datatype="html">
|
||||
<source>Without storage path</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/filter-editor/filter-editor.component.ts</context>
|
||||
<context context-type="linenumber">304</context>
|
||||
<context context-type="linenumber">308</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="8180755793012580465" datatype="html">
|
||||
<source>Tag: <x id="PH" equiv-text="this.tagSelectionModel.items.find((t) => t.id == +rule.value)?.name"/></source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/filter-editor/filter-editor.component.ts</context>
|
||||
<context context-type="linenumber">308,310</context>
|
||||
<context context-type="linenumber">312,314</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="6494566478302448576" datatype="html">
|
||||
<source>Without any tag</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/filter-editor/filter-editor.component.ts</context>
|
||||
<context context-type="linenumber">314</context>
|
||||
<context context-type="linenumber">318</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="8644099678903817943" datatype="html">
|
||||
<source>Custom fields query</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/filter-editor/filter-editor.component.ts</context>
|
||||
<context context-type="linenumber">318</context>
|
||||
<context context-type="linenumber">322</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="6523384805359286307" datatype="html">
|
||||
<source>Title: <x id="PH" equiv-text="rule.value"/></source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/filter-editor/filter-editor.component.ts</context>
|
||||
<context context-type="linenumber">321</context>
|
||||
<context context-type="linenumber">326</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="2578442194354349017" datatype="html">
|
||||
<source>Title & content: <x id="PH" equiv-text="rule.value"/></source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/filter-editor/filter-editor.component.ts</context>
|
||||
<context context-type="linenumber">330</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="1872523635812236432" datatype="html">
|
||||
<source>ASN: <x id="PH" equiv-text="rule.value"/></source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/filter-editor/filter-editor.component.ts</context>
|
||||
<context context-type="linenumber">324</context>
|
||||
<context context-type="linenumber">333</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="102674688969746976" datatype="html">
|
||||
<source>Owner: <x id="PH" equiv-text="rule.value"/></source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/filter-editor/filter-editor.component.ts</context>
|
||||
<context context-type="linenumber">327</context>
|
||||
<context context-type="linenumber">336</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="3550877650686009106" datatype="html">
|
||||
<source>Owner not in: <x id="PH" equiv-text="rule.value"/></source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/filter-editor/filter-editor.component.ts</context>
|
||||
<context context-type="linenumber">330</context>
|
||||
<context context-type="linenumber">339</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="1082034558646673343" datatype="html">
|
||||
<source>Without an owner</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/filter-editor/filter-editor.component.ts</context>
|
||||
<context context-type="linenumber">333</context>
|
||||
<context context-type="linenumber">342</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="7210076240260527720" datatype="html">
|
||||
|
||||
@@ -11,15 +11,15 @@
|
||||
},
|
||||
"private": true,
|
||||
"dependencies": {
|
||||
"@angular/cdk": "^21.2.2",
|
||||
"@angular/common": "~21.2.4",
|
||||
"@angular/compiler": "~21.2.4",
|
||||
"@angular/core": "~21.2.4",
|
||||
"@angular/forms": "~21.2.4",
|
||||
"@angular/localize": "~21.2.4",
|
||||
"@angular/platform-browser": "~21.2.4",
|
||||
"@angular/platform-browser-dynamic": "~21.2.4",
|
||||
"@angular/router": "~21.2.4",
|
||||
"@angular/cdk": "^21.2.4",
|
||||
"@angular/common": "~21.2.6",
|
||||
"@angular/compiler": "~21.2.6",
|
||||
"@angular/core": "~21.2.6",
|
||||
"@angular/forms": "~21.2.6",
|
||||
"@angular/localize": "~21.2.6",
|
||||
"@angular/platform-browser": "~21.2.6",
|
||||
"@angular/platform-browser-dynamic": "~21.2.6",
|
||||
"@angular/router": "~21.2.6",
|
||||
"@ng-bootstrap/ng-bootstrap": "^20.0.0",
|
||||
"@ng-select/ng-select": "^21.5.2",
|
||||
"@ngneat/dirty-check-forms": "^3.0.3",
|
||||
@@ -29,7 +29,7 @@
|
||||
"mime-names": "^1.0.0",
|
||||
"ngx-bootstrap-icons": "^1.9.3",
|
||||
"ngx-color": "^10.1.0",
|
||||
"ngx-cookie-service": "^21.1.0",
|
||||
"ngx-cookie-service": "^21.3.1",
|
||||
"ngx-device-detector": "^11.0.0",
|
||||
"ngx-ui-tour-ng-bootstrap": "^18.0.0",
|
||||
"pdfjs-dist": "^5.4.624",
|
||||
@@ -42,24 +42,24 @@
|
||||
"devDependencies": {
|
||||
"@angular-builders/custom-webpack": "^21.0.3",
|
||||
"@angular-builders/jest": "^21.0.3",
|
||||
"@angular-devkit/core": "^21.2.2",
|
||||
"@angular-devkit/schematics": "^21.2.2",
|
||||
"@angular-eslint/builder": "21.3.0",
|
||||
"@angular-eslint/eslint-plugin": "21.3.0",
|
||||
"@angular-eslint/eslint-plugin-template": "21.3.0",
|
||||
"@angular-eslint/schematics": "21.3.0",
|
||||
"@angular-eslint/template-parser": "21.3.0",
|
||||
"@angular/build": "^21.2.2",
|
||||
"@angular/cli": "~21.2.2",
|
||||
"@angular/compiler-cli": "~21.2.4",
|
||||
"@angular-devkit/core": "^21.2.3",
|
||||
"@angular-devkit/schematics": "^21.2.3",
|
||||
"@angular-eslint/builder": "21.3.1",
|
||||
"@angular-eslint/eslint-plugin": "21.3.1",
|
||||
"@angular-eslint/eslint-plugin-template": "21.3.1",
|
||||
"@angular-eslint/schematics": "21.3.1",
|
||||
"@angular-eslint/template-parser": "21.3.1",
|
||||
"@angular/build": "^21.2.3",
|
||||
"@angular/cli": "~21.2.3",
|
||||
"@angular/compiler-cli": "~21.2.6",
|
||||
"@codecov/webpack-plugin": "^1.9.1",
|
||||
"@playwright/test": "^1.58.2",
|
||||
"@types/jest": "^30.0.0",
|
||||
"@types/node": "^25.4.0",
|
||||
"@typescript-eslint/eslint-plugin": "^8.57.0",
|
||||
"@typescript-eslint/parser": "^8.57.0",
|
||||
"@typescript-eslint/utils": "^8.57.0",
|
||||
"eslint": "^10.0.3",
|
||||
"@types/node": "^25.5.0",
|
||||
"@typescript-eslint/eslint-plugin": "^8.57.2",
|
||||
"@typescript-eslint/parser": "^8.57.2",
|
||||
"@typescript-eslint/utils": "^8.57.2",
|
||||
"eslint": "^10.1.0",
|
||||
"jest": "30.3.0",
|
||||
"jest-environment-jsdom": "^30.3.0",
|
||||
"jest-junit": "^16.0.0",
|
||||
|
||||
1723
src-ui/pnpm-lock.yaml
generated
1723
src-ui/pnpm-lock.yaml
generated
File diff suppressed because it is too large
Load Diff
@@ -24,7 +24,7 @@ import {
|
||||
FILTER_HAS_DOCUMENT_TYPE_ANY,
|
||||
FILTER_HAS_STORAGE_PATH_ANY,
|
||||
FILTER_HAS_TAGS_ALL,
|
||||
FILTER_TITLE_CONTENT,
|
||||
FILTER_SIMPLE_TEXT,
|
||||
} from 'src/app/data/filter-rule-type'
|
||||
import { GlobalSearchType, SETTINGS_KEYS } from 'src/app/data/ui-settings'
|
||||
import { DocumentListViewService } from 'src/app/services/document-list-view.service'
|
||||
@@ -545,7 +545,7 @@ describe('GlobalSearchComponent', () => {
|
||||
component.query = 'test'
|
||||
component.runFullSearch()
|
||||
expect(qfSpy).toHaveBeenCalledWith([
|
||||
{ rule_type: FILTER_TITLE_CONTENT, value: 'test' },
|
||||
{ rule_type: FILTER_SIMPLE_TEXT, value: 'test' },
|
||||
])
|
||||
|
||||
settingsService.set(
|
||||
|
||||
@@ -25,7 +25,7 @@ import {
|
||||
FILTER_HAS_DOCUMENT_TYPE_ANY,
|
||||
FILTER_HAS_STORAGE_PATH_ANY,
|
||||
FILTER_HAS_TAGS_ALL,
|
||||
FILTER_TITLE_CONTENT,
|
||||
FILTER_SIMPLE_TEXT,
|
||||
} from 'src/app/data/filter-rule-type'
|
||||
import { ObjectWithId } from 'src/app/data/object-with-id'
|
||||
import { GlobalSearchType, SETTINGS_KEYS } from 'src/app/data/ui-settings'
|
||||
@@ -410,7 +410,7 @@ export class GlobalSearchComponent implements OnInit {
|
||||
public runFullSearch() {
|
||||
const ruleType = this.useAdvancedForFullSearch
|
||||
? FILTER_FULLTEXT_QUERY
|
||||
: FILTER_TITLE_CONTENT
|
||||
: FILTER_SIMPLE_TEXT
|
||||
this.documentService.searchQuery = this.useAdvancedForFullSearch
|
||||
? this.query
|
||||
: ''
|
||||
|
||||
@@ -4,7 +4,7 @@ import { ComponentFixture, TestBed } from '@angular/core/testing'
|
||||
import { By } from '@angular/platform-browser'
|
||||
import { NgbAccordionButton, NgbActiveModal } from '@ng-bootstrap/ng-bootstrap'
|
||||
import { of, throwError } from 'rxjs'
|
||||
import { FILTER_TITLE } from 'src/app/data/filter-rule-type'
|
||||
import { FILTER_SIMPLE_TITLE } from 'src/app/data/filter-rule-type'
|
||||
import { DocumentService } from 'src/app/services/rest/document.service'
|
||||
import { StoragePathService } from 'src/app/services/rest/storage-path.service'
|
||||
import { SettingsService } from 'src/app/services/settings.service'
|
||||
@@ -105,7 +105,7 @@ describe('StoragePathEditDialogComponent', () => {
|
||||
null,
|
||||
'created',
|
||||
true,
|
||||
[{ rule_type: FILTER_TITLE, value: 'bar' }],
|
||||
[{ rule_type: FILTER_SIMPLE_TITLE, value: 'bar' }],
|
||||
{ truncate_content: true }
|
||||
)
|
||||
listSpy.mockReturnValueOnce(
|
||||
|
||||
@@ -23,7 +23,7 @@ import {
|
||||
} from 'rxjs'
|
||||
import { EditDialogComponent } from 'src/app/components/common/edit-dialog/edit-dialog.component'
|
||||
import { Document } from 'src/app/data/document'
|
||||
import { FILTER_TITLE } from 'src/app/data/filter-rule-type'
|
||||
import { FILTER_SIMPLE_TITLE } from 'src/app/data/filter-rule-type'
|
||||
import { DEFAULT_MATCHING_ALGORITHM } from 'src/app/data/matching-model'
|
||||
import { StoragePath } from 'src/app/data/storage-path'
|
||||
import { IfOwnerDirective } from 'src/app/directives/if-owner.directive'
|
||||
@@ -146,7 +146,7 @@ export class StoragePathEditDialogComponent
|
||||
null,
|
||||
'created',
|
||||
true,
|
||||
[{ rule_type: FILTER_TITLE, value: title }],
|
||||
[{ rule_type: FILTER_SIMPLE_TITLE, value: title }],
|
||||
{ truncate_content: true }
|
||||
)
|
||||
.pipe(
|
||||
|
||||
@@ -3,7 +3,7 @@ import { provideHttpClientTesting } from '@angular/common/http/testing'
|
||||
import { ComponentFixture, TestBed } from '@angular/core/testing'
|
||||
import { NG_VALUE_ACCESSOR } from '@angular/forms'
|
||||
import { of, throwError } from 'rxjs'
|
||||
import { FILTER_TITLE } from 'src/app/data/filter-rule-type'
|
||||
import { FILTER_SIMPLE_TITLE } from 'src/app/data/filter-rule-type'
|
||||
import { DocumentService } from 'src/app/services/rest/document.service'
|
||||
import { DocumentLinkComponent } from './document-link.component'
|
||||
|
||||
@@ -99,7 +99,7 @@ describe('DocumentLinkComponent', () => {
|
||||
null,
|
||||
'created',
|
||||
true,
|
||||
[{ rule_type: FILTER_TITLE, value: 'bar' }],
|
||||
[{ rule_type: FILTER_SIMPLE_TITLE, value: 'bar' }],
|
||||
{ truncate_content: true }
|
||||
)
|
||||
listSpy.mockReturnValueOnce(throwError(() => new Error()))
|
||||
|
||||
@@ -28,7 +28,7 @@ import {
|
||||
tap,
|
||||
} from 'rxjs'
|
||||
import { Document } from 'src/app/data/document'
|
||||
import { FILTER_TITLE } from 'src/app/data/filter-rule-type'
|
||||
import { FILTER_SIMPLE_TITLE } from 'src/app/data/filter-rule-type'
|
||||
import { CustomDatePipe } from 'src/app/pipes/custom-date.pipe'
|
||||
import { DocumentService } from 'src/app/services/rest/document.service'
|
||||
import { AbstractInputComponent } from '../abstract-input'
|
||||
@@ -121,7 +121,7 @@ export class DocumentLinkComponent
|
||||
null,
|
||||
'created',
|
||||
true,
|
||||
[{ rule_type: FILTER_TITLE, value: title }],
|
||||
[{ rule_type: FILTER_SIMPLE_TITLE, value: title }],
|
||||
{ truncate_content: true }
|
||||
)
|
||||
.pipe(
|
||||
|
||||
@@ -428,7 +428,7 @@ describe('BulkEditorComponent', () => {
|
||||
req.flush(true)
|
||||
expect(req.request.body).toEqual({
|
||||
all: true,
|
||||
filters: { title__icontains: 'apple' },
|
||||
filters: { title_search: 'apple' },
|
||||
method: 'modify_tags',
|
||||
parameters: { add_tags: [101], remove_tags: [] },
|
||||
})
|
||||
|
||||
@@ -67,6 +67,8 @@ import {
|
||||
FILTER_OWNER_DOES_NOT_INCLUDE,
|
||||
FILTER_OWNER_ISNULL,
|
||||
FILTER_SHARED_BY_USER,
|
||||
FILTER_SIMPLE_TEXT,
|
||||
FILTER_SIMPLE_TITLE,
|
||||
FILTER_STORAGE_PATH,
|
||||
FILTER_TITLE,
|
||||
FILTER_TITLE_CONTENT,
|
||||
@@ -312,7 +314,7 @@ describe('FilterEditorComponent', () => {
|
||||
expect(component.textFilter).toEqual(null)
|
||||
component.filterRules = [
|
||||
{
|
||||
rule_type: FILTER_TITLE_CONTENT,
|
||||
rule_type: FILTER_SIMPLE_TEXT,
|
||||
value: 'foo',
|
||||
},
|
||||
]
|
||||
@@ -320,6 +322,18 @@ describe('FilterEditorComponent', () => {
|
||||
expect(component.textFilterTarget).toEqual('title-content') // TEXT_FILTER_TARGET_TITLE_CONTENT
|
||||
}))
|
||||
|
||||
it('should ingest legacy text filter rules for doc title + content', fakeAsync(() => {
|
||||
expect(component.textFilter).toEqual(null)
|
||||
component.filterRules = [
|
||||
{
|
||||
rule_type: FILTER_TITLE_CONTENT,
|
||||
value: 'legacy foo',
|
||||
},
|
||||
]
|
||||
expect(component.textFilter).toEqual('legacy foo')
|
||||
expect(component.textFilterTarget).toEqual('title-content') // TEXT_FILTER_TARGET_TITLE_CONTENT
|
||||
}))
|
||||
|
||||
it('should ingest text filter rules for doc asn', fakeAsync(() => {
|
||||
expect(component.textFilter).toEqual(null)
|
||||
component.filterRules = [
|
||||
@@ -1117,7 +1131,7 @@ describe('FilterEditorComponent', () => {
|
||||
expect(component.textFilter).toEqual('foo')
|
||||
expect(component.filterRules).toEqual([
|
||||
{
|
||||
rule_type: FILTER_TITLE_CONTENT,
|
||||
rule_type: FILTER_SIMPLE_TEXT,
|
||||
value: 'foo',
|
||||
},
|
||||
])
|
||||
@@ -1136,7 +1150,7 @@ describe('FilterEditorComponent', () => {
|
||||
expect(component.textFilterTarget).toEqual('title')
|
||||
expect(component.filterRules).toEqual([
|
||||
{
|
||||
rule_type: FILTER_TITLE,
|
||||
rule_type: FILTER_SIMPLE_TITLE,
|
||||
value: 'foo',
|
||||
},
|
||||
])
|
||||
@@ -1250,30 +1264,12 @@ describe('FilterEditorComponent', () => {
|
||||
])
|
||||
}))
|
||||
|
||||
it('should convert user input to correct filter rules on custom fields query', fakeAsync(() => {
|
||||
component.textFilterInput.nativeElement.value = 'foo'
|
||||
component.textFilterInput.nativeElement.dispatchEvent(new Event('input'))
|
||||
const textFieldTargetDropdown = fixture.debugElement.queryAll(
|
||||
By.directive(NgbDropdownItem)
|
||||
)[3]
|
||||
textFieldTargetDropdown.triggerEventHandler('click') // TEXT_FILTER_TARGET_CUSTOM_FIELDS
|
||||
fixture.detectChanges()
|
||||
tick(400)
|
||||
expect(component.textFilterTarget).toEqual('custom-fields')
|
||||
expect(component.filterRules).toEqual([
|
||||
{
|
||||
rule_type: FILTER_CUSTOM_FIELDS_TEXT,
|
||||
value: 'foo',
|
||||
},
|
||||
])
|
||||
}))
|
||||
|
||||
it('should convert user input to correct filter rules on mime type', fakeAsync(() => {
|
||||
component.textFilterInput.nativeElement.value = 'pdf'
|
||||
component.textFilterInput.nativeElement.dispatchEvent(new Event('input'))
|
||||
const textFieldTargetDropdown = fixture.debugElement.queryAll(
|
||||
By.directive(NgbDropdownItem)
|
||||
)[4]
|
||||
)[3]
|
||||
textFieldTargetDropdown.triggerEventHandler('click') // TEXT_FILTER_TARGET_MIME_TYPE
|
||||
fixture.detectChanges()
|
||||
tick(400)
|
||||
@@ -1291,8 +1287,8 @@ describe('FilterEditorComponent', () => {
|
||||
component.textFilterInput.nativeElement.dispatchEvent(new Event('input'))
|
||||
const textFieldTargetDropdown = fixture.debugElement.queryAll(
|
||||
By.directive(NgbDropdownItem)
|
||||
)[5]
|
||||
textFieldTargetDropdown.triggerEventHandler('click') // TEXT_FILTER_TARGET_ASN
|
||||
)[4]
|
||||
textFieldTargetDropdown.triggerEventHandler('click') // TEXT_FILTER_TARGET_FULLTEXT_QUERY
|
||||
fixture.detectChanges()
|
||||
tick(400)
|
||||
expect(component.textFilterTarget).toEqual('fulltext-query')
|
||||
@@ -1696,12 +1692,56 @@ describe('FilterEditorComponent', () => {
|
||||
])
|
||||
}))
|
||||
|
||||
it('should convert legacy title filters into full text query when adding a created relative date', fakeAsync(() => {
|
||||
component.filterRules = [
|
||||
{
|
||||
rule_type: FILTER_TITLE,
|
||||
value: 'foo',
|
||||
},
|
||||
]
|
||||
const dateCreatedDropdown = fixture.debugElement.queryAll(
|
||||
By.directive(DatesDropdownComponent)
|
||||
)[0]
|
||||
component.dateCreatedRelativeDate = RelativeDate.WITHIN_1_WEEK
|
||||
dateCreatedDropdown.triggerEventHandler('datesSet')
|
||||
fixture.detectChanges()
|
||||
tick(400)
|
||||
expect(component.filterRules).toEqual([
|
||||
{
|
||||
rule_type: FILTER_FULLTEXT_QUERY,
|
||||
value: 'foo,created:[-1 week to now]',
|
||||
},
|
||||
])
|
||||
}))
|
||||
|
||||
it('should convert simple title filters into full text query when adding a created relative date', fakeAsync(() => {
|
||||
component.filterRules = [
|
||||
{
|
||||
rule_type: FILTER_SIMPLE_TITLE,
|
||||
value: 'foo',
|
||||
},
|
||||
]
|
||||
const dateCreatedDropdown = fixture.debugElement.queryAll(
|
||||
By.directive(DatesDropdownComponent)
|
||||
)[0]
|
||||
component.dateCreatedRelativeDate = RelativeDate.WITHIN_1_WEEK
|
||||
dateCreatedDropdown.triggerEventHandler('datesSet')
|
||||
fixture.detectChanges()
|
||||
tick(400)
|
||||
expect(component.filterRules).toEqual([
|
||||
{
|
||||
rule_type: FILTER_FULLTEXT_QUERY,
|
||||
value: 'foo,created:[-1 week to now]',
|
||||
},
|
||||
])
|
||||
}))
|
||||
|
||||
it('should leave relative dates not in quick list intact', fakeAsync(() => {
|
||||
component.textFilterInput.nativeElement.value = 'created:[-2 week to now]'
|
||||
component.textFilterInput.nativeElement.dispatchEvent(new Event('input'))
|
||||
const textFieldTargetDropdown = fixture.debugElement.queryAll(
|
||||
By.directive(NgbDropdownItem)
|
||||
)[5]
|
||||
)[4]
|
||||
textFieldTargetDropdown.triggerEventHandler('click')
|
||||
fixture.detectChanges()
|
||||
tick(400)
|
||||
@@ -2031,12 +2071,30 @@ describe('FilterEditorComponent', () => {
|
||||
|
||||
component.filterRules = [
|
||||
{
|
||||
rule_type: FILTER_TITLE,
|
||||
rule_type: FILTER_SIMPLE_TITLE,
|
||||
value: 'foo',
|
||||
},
|
||||
]
|
||||
expect(component.generateFilterName()).toEqual('Title: foo')
|
||||
|
||||
component.filterRules = [
|
||||
{
|
||||
rule_type: FILTER_TITLE_CONTENT,
|
||||
value: 'legacy foo',
|
||||
},
|
||||
]
|
||||
expect(component.generateFilterName()).toEqual(
|
||||
'Title & content: legacy foo'
|
||||
)
|
||||
|
||||
component.filterRules = [
|
||||
{
|
||||
rule_type: FILTER_SIMPLE_TEXT,
|
||||
value: 'foo',
|
||||
},
|
||||
]
|
||||
expect(component.generateFilterName()).toEqual('Title & content: foo')
|
||||
|
||||
component.filterRules = [
|
||||
{
|
||||
rule_type: FILTER_ASN,
|
||||
@@ -2156,6 +2214,36 @@ describe('FilterEditorComponent', () => {
|
||||
})
|
||||
})
|
||||
|
||||
it('should hide deprecated custom fields target from default text filter targets', () => {
|
||||
expect(component.textFilterTargets).not.toContainEqual({
|
||||
id: 'custom-fields',
|
||||
name: $localize`Custom fields (Deprecated)`,
|
||||
})
|
||||
})
|
||||
|
||||
it('should keep deprecated custom fields target available for legacy filters', fakeAsync(() => {
|
||||
component.filterRules = [
|
||||
{
|
||||
rule_type: FILTER_CUSTOM_FIELDS_TEXT,
|
||||
value: 'foo',
|
||||
},
|
||||
]
|
||||
fixture.detectChanges()
|
||||
tick()
|
||||
|
||||
expect(component.textFilterTarget).toEqual('custom-fields')
|
||||
expect(component.textFilterTargets).toContainEqual({
|
||||
id: 'custom-fields',
|
||||
name: $localize`Custom fields (Deprecated)`,
|
||||
})
|
||||
expect(component.filterRules).toEqual([
|
||||
{
|
||||
rule_type: FILTER_CUSTOM_FIELDS_TEXT,
|
||||
value: 'foo',
|
||||
},
|
||||
])
|
||||
}))
|
||||
|
||||
it('should call autocomplete endpoint on input', fakeAsync(() => {
|
||||
component.textFilterTarget = 'fulltext-query' // TEXT_FILTER_TARGET_FULLTEXT_QUERY
|
||||
const autocompleteSpy = jest.spyOn(searchService, 'autocomplete')
|
||||
|
||||
@@ -71,6 +71,8 @@ import {
|
||||
FILTER_OWNER_DOES_NOT_INCLUDE,
|
||||
FILTER_OWNER_ISNULL,
|
||||
FILTER_SHARED_BY_USER,
|
||||
FILTER_SIMPLE_TEXT,
|
||||
FILTER_SIMPLE_TITLE,
|
||||
FILTER_STORAGE_PATH,
|
||||
FILTER_TITLE,
|
||||
FILTER_TITLE_CONTENT,
|
||||
@@ -195,10 +197,6 @@ const DEFAULT_TEXT_FILTER_TARGET_OPTIONS = [
|
||||
name: $localize`Title & content`,
|
||||
},
|
||||
{ id: TEXT_FILTER_TARGET_ASN, name: $localize`ASN` },
|
||||
{
|
||||
id: TEXT_FILTER_TARGET_CUSTOM_FIELDS,
|
||||
name: $localize`Custom fields`,
|
||||
},
|
||||
{ id: TEXT_FILTER_TARGET_MIME_TYPE, name: $localize`File type` },
|
||||
{
|
||||
id: TEXT_FILTER_TARGET_FULLTEXT_QUERY,
|
||||
@@ -206,6 +204,12 @@ const DEFAULT_TEXT_FILTER_TARGET_OPTIONS = [
|
||||
},
|
||||
]
|
||||
|
||||
const DEPRECATED_CUSTOM_FIELDS_TEXT_FILTER_TARGET_OPTION = {
|
||||
// Kept only so legacy saved views can render and be edited away from, remove me eventually
|
||||
id: TEXT_FILTER_TARGET_CUSTOM_FIELDS,
|
||||
name: $localize`Custom fields (Deprecated)`,
|
||||
}
|
||||
|
||||
const TEXT_FILTER_TARGET_MORELIKE_OPTION = {
|
||||
id: TEXT_FILTER_TARGET_FULLTEXT_MORELIKE,
|
||||
name: $localize`More like`,
|
||||
@@ -318,8 +322,13 @@ export class FilterEditorComponent
|
||||
return $localize`Custom fields query`
|
||||
|
||||
case FILTER_TITLE:
|
||||
case FILTER_SIMPLE_TITLE:
|
||||
return $localize`Title: ${rule.value}`
|
||||
|
||||
case FILTER_TITLE_CONTENT:
|
||||
case FILTER_SIMPLE_TEXT:
|
||||
return $localize`Title & content: ${rule.value}`
|
||||
|
||||
case FILTER_ASN:
|
||||
return $localize`ASN: ${rule.value}`
|
||||
|
||||
@@ -353,12 +362,16 @@ export class FilterEditorComponent
|
||||
_moreLikeDoc: Document
|
||||
|
||||
get textFilterTargets() {
|
||||
let targets = DEFAULT_TEXT_FILTER_TARGET_OPTIONS
|
||||
if (this.textFilterTarget == TEXT_FILTER_TARGET_FULLTEXT_MORELIKE) {
|
||||
return DEFAULT_TEXT_FILTER_TARGET_OPTIONS.concat([
|
||||
TEXT_FILTER_TARGET_MORELIKE_OPTION,
|
||||
targets = targets.concat([TEXT_FILTER_TARGET_MORELIKE_OPTION])
|
||||
}
|
||||
if (this.textFilterTarget == TEXT_FILTER_TARGET_CUSTOM_FIELDS) {
|
||||
targets = targets.concat([
|
||||
DEPRECATED_CUSTOM_FIELDS_TEXT_FILTER_TARGET_OPTION,
|
||||
])
|
||||
}
|
||||
return DEFAULT_TEXT_FILTER_TARGET_OPTIONS
|
||||
return targets
|
||||
}
|
||||
|
||||
textFilterTarget = TEXT_FILTER_TARGET_TITLE_CONTENT
|
||||
@@ -437,10 +450,12 @@ export class FilterEditorComponent
|
||||
value.forEach((rule) => {
|
||||
switch (rule.rule_type) {
|
||||
case FILTER_TITLE:
|
||||
case FILTER_SIMPLE_TITLE:
|
||||
this._textFilter = rule.value
|
||||
this.textFilterTarget = TEXT_FILTER_TARGET_TITLE
|
||||
break
|
||||
case FILTER_TITLE_CONTENT:
|
||||
case FILTER_SIMPLE_TEXT:
|
||||
this._textFilter = rule.value
|
||||
this.textFilterTarget = TEXT_FILTER_TARGET_TITLE_CONTENT
|
||||
break
|
||||
@@ -762,12 +777,15 @@ export class FilterEditorComponent
|
||||
this.textFilterTarget == TEXT_FILTER_TARGET_TITLE_CONTENT
|
||||
) {
|
||||
filterRules.push({
|
||||
rule_type: FILTER_TITLE_CONTENT,
|
||||
rule_type: FILTER_SIMPLE_TEXT,
|
||||
value: this._textFilter.trim(),
|
||||
})
|
||||
}
|
||||
if (this._textFilter && this.textFilterTarget == TEXT_FILTER_TARGET_TITLE) {
|
||||
filterRules.push({ rule_type: FILTER_TITLE, value: this._textFilter })
|
||||
filterRules.push({
|
||||
rule_type: FILTER_SIMPLE_TITLE,
|
||||
value: this._textFilter,
|
||||
})
|
||||
}
|
||||
if (this.textFilterTarget == TEXT_FILTER_TARGET_ASN) {
|
||||
if (
|
||||
@@ -1009,7 +1027,10 @@ export class FilterEditorComponent
|
||||
) {
|
||||
existingRule = filterRules.find(
|
||||
(fr) =>
|
||||
fr.rule_type == FILTER_TITLE_CONTENT || fr.rule_type == FILTER_TITLE
|
||||
fr.rule_type == FILTER_TITLE_CONTENT ||
|
||||
fr.rule_type == FILTER_SIMPLE_TEXT ||
|
||||
fr.rule_type == FILTER_TITLE ||
|
||||
fr.rule_type == FILTER_SIMPLE_TITLE
|
||||
)
|
||||
existingRule.rule_type = FILTER_FULLTEXT_QUERY
|
||||
}
|
||||
|
||||
@@ -3,7 +3,7 @@ import { DataType } from './datatype'
|
||||
export const NEGATIVE_NULL_FILTER_VALUE = -1
|
||||
|
||||
// These correspond to src/documents/models.py and changes here require a DB migration (and vice versa)
|
||||
export const FILTER_TITLE = 0
|
||||
export const FILTER_TITLE = 0 // Deprecated in favor of Tantivy-backed `title_search`. Keep for now for existing saved views
|
||||
export const FILTER_CONTENT = 1
|
||||
|
||||
export const FILTER_ASN = 2
|
||||
@@ -46,7 +46,9 @@ export const FILTER_ADDED_FROM = 46
|
||||
export const FILTER_MODIFIED_BEFORE = 15
|
||||
export const FILTER_MODIFIED_AFTER = 16
|
||||
|
||||
export const FILTER_TITLE_CONTENT = 19
|
||||
export const FILTER_TITLE_CONTENT = 19 // Deprecated in favor of Tantivy-backed `text` filtervar. Keep for now for existing saved views
|
||||
export const FILTER_SIMPLE_TITLE = 48
|
||||
export const FILTER_SIMPLE_TEXT = 49
|
||||
export const FILTER_FULLTEXT_QUERY = 20
|
||||
export const FILTER_FULLTEXT_MORELIKE = 21
|
||||
|
||||
@@ -56,7 +58,7 @@ export const FILTER_OWNER_ISNULL = 34
|
||||
export const FILTER_OWNER_DOES_NOT_INCLUDE = 35
|
||||
export const FILTER_SHARED_BY_USER = 37
|
||||
|
||||
export const FILTER_CUSTOM_FIELDS_TEXT = 36
|
||||
export const FILTER_CUSTOM_FIELDS_TEXT = 36 // Deprecated. UI no longer includes CF text-search mode. Keep for now for existing saved views
|
||||
export const FILTER_HAS_CUSTOM_FIELDS_ALL = 38
|
||||
export const FILTER_HAS_CUSTOM_FIELDS_ANY = 39
|
||||
export const FILTER_DOES_NOT_HAVE_CUSTOM_FIELDS = 40
|
||||
@@ -66,6 +68,9 @@ export const FILTER_CUSTOM_FIELDS_QUERY = 42
|
||||
|
||||
export const FILTER_MIME_TYPE = 47
|
||||
|
||||
export const SIMPLE_TEXT_PARAMETER = 'text'
|
||||
export const SIMPLE_TITLE_PARAMETER = 'title_search'
|
||||
|
||||
export const FILTER_RULE_TYPES: FilterRuleType[] = [
|
||||
{
|
||||
id: FILTER_TITLE,
|
||||
@@ -74,6 +79,13 @@ export const FILTER_RULE_TYPES: FilterRuleType[] = [
|
||||
multi: false,
|
||||
default: '',
|
||||
},
|
||||
{
|
||||
id: FILTER_SIMPLE_TITLE,
|
||||
filtervar: SIMPLE_TITLE_PARAMETER,
|
||||
datatype: 'string',
|
||||
multi: false,
|
||||
default: '',
|
||||
},
|
||||
{
|
||||
id: FILTER_CONTENT,
|
||||
filtervar: 'content__icontains',
|
||||
@@ -279,6 +291,12 @@ export const FILTER_RULE_TYPES: FilterRuleType[] = [
|
||||
datatype: 'string',
|
||||
multi: false,
|
||||
},
|
||||
{
|
||||
id: FILTER_SIMPLE_TEXT,
|
||||
filtervar: SIMPLE_TEXT_PARAMETER,
|
||||
datatype: 'string',
|
||||
multi: false,
|
||||
},
|
||||
{
|
||||
id: FILTER_FULLTEXT_QUERY,
|
||||
filtervar: 'query',
|
||||
|
||||
@@ -10,7 +10,7 @@ import {
|
||||
DOCUMENT_SORT_FIELDS,
|
||||
DOCUMENT_SORT_FIELDS_FULLTEXT,
|
||||
} from 'src/app/data/document'
|
||||
import { FILTER_TITLE } from 'src/app/data/filter-rule-type'
|
||||
import { FILTER_SIMPLE_TITLE } from 'src/app/data/filter-rule-type'
|
||||
import { SETTINGS_KEYS } from 'src/app/data/ui-settings'
|
||||
import { environment } from 'src/environments/environment'
|
||||
import { PermissionsService } from '../permissions.service'
|
||||
@@ -138,13 +138,13 @@ describe(`DocumentService`, () => {
|
||||
subscription = service
|
||||
.listAllFilteredIds([
|
||||
{
|
||||
rule_type: FILTER_TITLE,
|
||||
rule_type: FILTER_SIMPLE_TITLE,
|
||||
value: 'apple',
|
||||
},
|
||||
])
|
||||
.subscribe()
|
||||
const req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}${endpoint}/?page=1&page_size=100000&fields=id&title__icontains=apple`
|
||||
`${environment.apiBaseUrl}${endpoint}/?page=1&page_size=100000&fields=id&title_search=apple`
|
||||
)
|
||||
expect(req.request.method).toEqual('GET')
|
||||
})
|
||||
|
||||
@@ -8,6 +8,10 @@ import {
|
||||
FILTER_HAS_CUSTOM_FIELDS_ALL,
|
||||
FILTER_HAS_CUSTOM_FIELDS_ANY,
|
||||
FILTER_HAS_TAGS_ALL,
|
||||
FILTER_SIMPLE_TEXT,
|
||||
FILTER_SIMPLE_TITLE,
|
||||
FILTER_TITLE,
|
||||
FILTER_TITLE_CONTENT,
|
||||
NEGATIVE_NULL_FILTER_VALUE,
|
||||
} from '../data/filter-rule-type'
|
||||
import {
|
||||
@@ -128,6 +132,26 @@ describe('QueryParams Utils', () => {
|
||||
is_tagged: 0,
|
||||
})
|
||||
|
||||
params = queryParamsFromFilterRules([
|
||||
{
|
||||
rule_type: FILTER_TITLE_CONTENT,
|
||||
value: 'bank statement',
|
||||
},
|
||||
])
|
||||
expect(params).toEqual({
|
||||
text: 'bank statement',
|
||||
})
|
||||
|
||||
params = queryParamsFromFilterRules([
|
||||
{
|
||||
rule_type: FILTER_TITLE,
|
||||
value: 'invoice',
|
||||
},
|
||||
])
|
||||
expect(params).toEqual({
|
||||
title_search: 'invoice',
|
||||
})
|
||||
|
||||
params = queryParamsFromFilterRules([
|
||||
{
|
||||
rule_type: FILTER_HAS_TAGS_ALL,
|
||||
@@ -148,6 +172,30 @@ describe('QueryParams Utils', () => {
|
||||
|
||||
it('should convert filter rules to query params', () => {
|
||||
let rules = filterRulesFromQueryParams(
|
||||
convertToParamMap({
|
||||
text: 'bank statement',
|
||||
})
|
||||
)
|
||||
expect(rules).toEqual([
|
||||
{
|
||||
rule_type: FILTER_SIMPLE_TEXT,
|
||||
value: 'bank statement',
|
||||
},
|
||||
])
|
||||
|
||||
rules = filterRulesFromQueryParams(
|
||||
convertToParamMap({
|
||||
title_search: 'invoice',
|
||||
})
|
||||
)
|
||||
expect(rules).toEqual([
|
||||
{
|
||||
rule_type: FILTER_SIMPLE_TITLE,
|
||||
value: 'invoice',
|
||||
},
|
||||
])
|
||||
|
||||
rules = filterRulesFromQueryParams(
|
||||
convertToParamMap({
|
||||
tags__id__all,
|
||||
})
|
||||
|
||||
@@ -9,8 +9,14 @@ import {
|
||||
FILTER_HAS_CUSTOM_FIELDS_ALL,
|
||||
FILTER_HAS_CUSTOM_FIELDS_ANY,
|
||||
FILTER_RULE_TYPES,
|
||||
FILTER_SIMPLE_TEXT,
|
||||
FILTER_SIMPLE_TITLE,
|
||||
FILTER_TITLE,
|
||||
FILTER_TITLE_CONTENT,
|
||||
FilterRuleType,
|
||||
NEGATIVE_NULL_FILTER_VALUE,
|
||||
SIMPLE_TEXT_PARAMETER,
|
||||
SIMPLE_TITLE_PARAMETER,
|
||||
} from '../data/filter-rule-type'
|
||||
import { ListViewState } from '../services/document-list-view.service'
|
||||
|
||||
@@ -97,6 +103,8 @@ export function transformLegacyFilterRules(
|
||||
export function filterRulesFromQueryParams(
|
||||
queryParams: ParamMap
|
||||
): FilterRule[] {
|
||||
let filterRulesFromQueryParams: FilterRule[] = []
|
||||
|
||||
const allFilterRuleQueryParams: string[] = FILTER_RULE_TYPES.map(
|
||||
(rt) => rt.filtervar
|
||||
)
|
||||
@@ -104,7 +112,6 @@ export function filterRulesFromQueryParams(
|
||||
.filter((rt) => rt !== undefined)
|
||||
|
||||
// transform query params to filter rules
|
||||
let filterRulesFromQueryParams: FilterRule[] = []
|
||||
allFilterRuleQueryParams
|
||||
.filter((frqp) => queryParams.has(frqp))
|
||||
.forEach((filterQueryParamName) => {
|
||||
@@ -146,7 +153,17 @@ export function queryParamsFromFilterRules(filterRules: FilterRule[]): Params {
|
||||
let params = {}
|
||||
for (let rule of filterRules) {
|
||||
let ruleType = FILTER_RULE_TYPES.find((t) => t.id == rule.rule_type)
|
||||
if (ruleType.isnull_filtervar && rule.value == null) {
|
||||
if (
|
||||
rule.rule_type === FILTER_TITLE_CONTENT ||
|
||||
rule.rule_type === FILTER_SIMPLE_TEXT
|
||||
) {
|
||||
params[SIMPLE_TEXT_PARAMETER] = rule.value
|
||||
} else if (
|
||||
rule.rule_type === FILTER_TITLE ||
|
||||
rule.rule_type === FILTER_SIMPLE_TITLE
|
||||
) {
|
||||
params[SIMPLE_TITLE_PARAMETER] = rule.value
|
||||
} else if (ruleType.isnull_filtervar && rule.value == null) {
|
||||
params[ruleType.isnull_filtervar] = 1
|
||||
} else if (
|
||||
ruleType.isnull_filtervar &&
|
||||
|
||||
@@ -100,24 +100,23 @@ class DocumentAdmin(GuardedModelAdmin):
|
||||
return Document.global_objects.all()
|
||||
|
||||
def delete_queryset(self, request, queryset):
|
||||
from documents import index
|
||||
from documents.search import get_backend
|
||||
|
||||
with index.open_index_writer() as writer:
|
||||
with get_backend().batch_update() as batch:
|
||||
for o in queryset:
|
||||
index.remove_document(writer, o)
|
||||
|
||||
batch.remove(o.pk)
|
||||
super().delete_queryset(request, queryset)
|
||||
|
||||
def delete_model(self, request, obj):
|
||||
from documents import index
|
||||
from documents.search import get_backend
|
||||
|
||||
index.remove_document_from_index(obj)
|
||||
get_backend().remove(obj.pk)
|
||||
super().delete_model(request, obj)
|
||||
|
||||
def save_model(self, request, obj, form, change):
|
||||
from documents import index
|
||||
from documents.search import get_backend
|
||||
|
||||
index.add_or_update_document(obj)
|
||||
get_backend().add_or_update(obj)
|
||||
super().save_model(request, obj, form, change)
|
||||
|
||||
|
||||
|
||||
@@ -7,6 +7,7 @@ from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import regex as regex_mod
|
||||
from django.conf import settings
|
||||
from pdf2image import convert_from_path
|
||||
from pikepdf import Page
|
||||
@@ -22,6 +23,8 @@ from documents.plugins.base import ConsumeTaskPlugin
|
||||
from documents.plugins.base import StopConsumeTaskError
|
||||
from documents.plugins.helpers import ProgressManager
|
||||
from documents.plugins.helpers import ProgressStatusOptions
|
||||
from documents.regex import safe_regex_match
|
||||
from documents.regex import safe_regex_sub
|
||||
from documents.utils import copy_basic_file_stats
|
||||
from documents.utils import copy_file_with_basic_stats
|
||||
from documents.utils import maybe_override_pixel_limit
|
||||
@@ -68,8 +71,8 @@ class Barcode:
|
||||
Note: This does NOT exclude ASN or separator barcodes - they can also be used
|
||||
as tags if they match a tag mapping pattern (e.g., {"ASN12.*": "JOHN"}).
|
||||
"""
|
||||
for regex in self.settings.barcode_tag_mapping:
|
||||
if re.match(regex, self.value, flags=re.IGNORECASE):
|
||||
for pattern in self.settings.barcode_tag_mapping:
|
||||
if safe_regex_match(pattern, self.value, flags=regex_mod.IGNORECASE):
|
||||
return True
|
||||
return False
|
||||
|
||||
@@ -392,11 +395,16 @@ class BarcodePlugin(ConsumeTaskPlugin):
|
||||
for raw in tag_texts.split(","):
|
||||
try:
|
||||
tag_str: str | None = None
|
||||
for regex in self.settings.barcode_tag_mapping:
|
||||
if re.match(regex, raw, flags=re.IGNORECASE):
|
||||
sub = self.settings.barcode_tag_mapping[regex]
|
||||
for pattern in self.settings.barcode_tag_mapping:
|
||||
if safe_regex_match(pattern, raw, flags=regex_mod.IGNORECASE):
|
||||
sub = self.settings.barcode_tag_mapping[pattern]
|
||||
tag_str = (
|
||||
re.sub(regex, sub, raw, flags=re.IGNORECASE)
|
||||
safe_regex_sub(
|
||||
pattern,
|
||||
sub,
|
||||
raw,
|
||||
flags=regex_mod.IGNORECASE,
|
||||
)
|
||||
if sub
|
||||
else raw
|
||||
)
|
||||
|
||||
@@ -349,11 +349,11 @@ def delete(doc_ids: list[int]) -> Literal["OK"]:
|
||||
|
||||
Document.objects.filter(id__in=delete_ids).delete()
|
||||
|
||||
from documents import index
|
||||
from documents.search import get_backend
|
||||
|
||||
with index.open_index_writer() as writer:
|
||||
with get_backend().batch_update() as batch:
|
||||
for id in delete_ids:
|
||||
index.remove_document_by_id(writer, id)
|
||||
batch.remove(id)
|
||||
|
||||
status_mgr = DocumentsStatusManager()
|
||||
status_mgr.send_documents_deleted(delete_ids)
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import hmac
|
||||
import logging
|
||||
import pickle
|
||||
import re
|
||||
@@ -75,7 +76,7 @@ def load_classifier(*, raise_exception: bool = False) -> DocumentClassifier | No
|
||||
"Unrecoverable error while loading document "
|
||||
"classification model, deleting model file.",
|
||||
)
|
||||
Path(settings.MODEL_FILE).unlink
|
||||
Path(settings.MODEL_FILE).unlink()
|
||||
classifier = None
|
||||
if raise_exception:
|
||||
raise e
|
||||
@@ -97,7 +98,10 @@ class DocumentClassifier:
|
||||
# v7 - Updated scikit-learn package version
|
||||
# v8 - Added storage path classifier
|
||||
# v9 - Changed from hashing to time/ids for re-train check
|
||||
FORMAT_VERSION = 9
|
||||
# v10 - HMAC-signed model file
|
||||
FORMAT_VERSION = 10
|
||||
|
||||
HMAC_SIZE = 32 # SHA-256 digest length
|
||||
|
||||
def __init__(self) -> None:
|
||||
# last time a document changed and therefore training might be required
|
||||
@@ -128,67 +132,89 @@ class DocumentClassifier:
|
||||
pickle.dumps(self.data_vectorizer),
|
||||
).hexdigest()
|
||||
|
||||
@staticmethod
|
||||
def _compute_hmac(data: bytes) -> bytes:
|
||||
return hmac.new(
|
||||
settings.SECRET_KEY.encode(),
|
||||
data,
|
||||
sha256,
|
||||
).digest()
|
||||
|
||||
def load(self) -> None:
|
||||
from sklearn.exceptions import InconsistentVersionWarning
|
||||
|
||||
raw = Path(settings.MODEL_FILE).read_bytes()
|
||||
|
||||
if len(raw) <= self.HMAC_SIZE:
|
||||
raise ClassifierModelCorruptError
|
||||
|
||||
signature = raw[: self.HMAC_SIZE]
|
||||
data = raw[self.HMAC_SIZE :]
|
||||
|
||||
if not hmac.compare_digest(signature, self._compute_hmac(data)):
|
||||
raise ClassifierModelCorruptError
|
||||
|
||||
# Catch warnings for processing
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
with Path(settings.MODEL_FILE).open("rb") as f:
|
||||
schema_version = pickle.load(f)
|
||||
try:
|
||||
(
|
||||
schema_version,
|
||||
self.last_doc_change_time,
|
||||
self.last_auto_type_hash,
|
||||
self.data_vectorizer,
|
||||
self.tags_binarizer,
|
||||
self.tags_classifier,
|
||||
self.correspondent_classifier,
|
||||
self.document_type_classifier,
|
||||
self.storage_path_classifier,
|
||||
) = pickle.loads(data)
|
||||
except Exception as err:
|
||||
raise ClassifierModelCorruptError from err
|
||||
|
||||
if schema_version != self.FORMAT_VERSION:
|
||||
raise IncompatibleClassifierVersionError(
|
||||
"Cannot load classifier, incompatible versions.",
|
||||
)
|
||||
else:
|
||||
try:
|
||||
self.last_doc_change_time = pickle.load(f)
|
||||
self.last_auto_type_hash = pickle.load(f)
|
||||
|
||||
self.data_vectorizer = pickle.load(f)
|
||||
self._update_data_vectorizer_hash()
|
||||
self.tags_binarizer = pickle.load(f)
|
||||
|
||||
self.tags_classifier = pickle.load(f)
|
||||
self.correspondent_classifier = pickle.load(f)
|
||||
self.document_type_classifier = pickle.load(f)
|
||||
self.storage_path_classifier = pickle.load(f)
|
||||
except Exception as err:
|
||||
raise ClassifierModelCorruptError from err
|
||||
|
||||
# Check for the warning about unpickling from differing versions
|
||||
# and consider it incompatible
|
||||
sk_learn_warning_url = (
|
||||
"https://scikit-learn.org/stable/"
|
||||
"model_persistence.html"
|
||||
"#security-maintainability-limitations"
|
||||
if schema_version != self.FORMAT_VERSION:
|
||||
raise IncompatibleClassifierVersionError(
|
||||
"Cannot load classifier, incompatible versions.",
|
||||
)
|
||||
for warning in w:
|
||||
# The warning is inconsistent, the MLPClassifier is a specific warning, others have not updated yet
|
||||
if issubclass(warning.category, InconsistentVersionWarning) or (
|
||||
issubclass(warning.category, UserWarning)
|
||||
and sk_learn_warning_url in str(warning.message)
|
||||
):
|
||||
raise IncompatibleClassifierVersionError("sklearn version update")
|
||||
|
||||
self._update_data_vectorizer_hash()
|
||||
|
||||
# Check for the warning about unpickling from differing versions
|
||||
# and consider it incompatible
|
||||
sk_learn_warning_url = (
|
||||
"https://scikit-learn.org/stable/"
|
||||
"model_persistence.html"
|
||||
"#security-maintainability-limitations"
|
||||
)
|
||||
for warning in w:
|
||||
# The warning is inconsistent, the MLPClassifier is a specific warning, others have not updated yet
|
||||
if issubclass(warning.category, InconsistentVersionWarning) or (
|
||||
issubclass(warning.category, UserWarning)
|
||||
and sk_learn_warning_url in str(warning.message)
|
||||
):
|
||||
raise IncompatibleClassifierVersionError("sklearn version update")
|
||||
|
||||
def save(self) -> None:
|
||||
target_file: Path = settings.MODEL_FILE
|
||||
target_file_temp: Path = target_file.with_suffix(".pickle.part")
|
||||
|
||||
data = pickle.dumps(
|
||||
(
|
||||
self.FORMAT_VERSION,
|
||||
self.last_doc_change_time,
|
||||
self.last_auto_type_hash,
|
||||
self.data_vectorizer,
|
||||
self.tags_binarizer,
|
||||
self.tags_classifier,
|
||||
self.correspondent_classifier,
|
||||
self.document_type_classifier,
|
||||
self.storage_path_classifier,
|
||||
),
|
||||
)
|
||||
|
||||
signature = self._compute_hmac(data)
|
||||
|
||||
with target_file_temp.open("wb") as f:
|
||||
pickle.dump(self.FORMAT_VERSION, f)
|
||||
|
||||
pickle.dump(self.last_doc_change_time, f)
|
||||
pickle.dump(self.last_auto_type_hash, f)
|
||||
|
||||
pickle.dump(self.data_vectorizer, f)
|
||||
|
||||
pickle.dump(self.tags_binarizer, f)
|
||||
pickle.dump(self.tags_classifier, f)
|
||||
|
||||
pickle.dump(self.correspondent_classifier, f)
|
||||
pickle.dump(self.document_type_classifier, f)
|
||||
pickle.dump(self.storage_path_classifier, f)
|
||||
f.write(signature + data)
|
||||
|
||||
target_file_temp.rename(target_file)
|
||||
|
||||
|
||||
@@ -139,14 +139,12 @@ class ConsumerPluginMixin:
|
||||
message,
|
||||
current_progress,
|
||||
max_progress,
|
||||
extra_args={
|
||||
"document_id": document_id,
|
||||
"owner_id": self.metadata.owner_id if self.metadata.owner_id else None,
|
||||
"users_can_view": (self.metadata.view_users or [])
|
||||
+ (self.metadata.change_users or []),
|
||||
"groups_can_view": (self.metadata.view_groups or [])
|
||||
+ (self.metadata.change_groups or []),
|
||||
},
|
||||
document_id=document_id,
|
||||
owner_id=self.metadata.owner_id if self.metadata.owner_id else None,
|
||||
users_can_view=(self.metadata.view_users or [])
|
||||
+ (self.metadata.change_users or []),
|
||||
groups_can_view=(self.metadata.view_groups or [])
|
||||
+ (self.metadata.change_groups or []),
|
||||
)
|
||||
|
||||
def _fail(
|
||||
|
||||
@@ -3,6 +3,7 @@ from __future__ import annotations
|
||||
import functools
|
||||
import inspect
|
||||
import json
|
||||
import logging
|
||||
import operator
|
||||
from contextlib import contextmanager
|
||||
from typing import TYPE_CHECKING
|
||||
@@ -77,6 +78,8 @@ DATETIME_KWARGS = [
|
||||
CUSTOM_FIELD_QUERY_MAX_DEPTH = 10
|
||||
CUSTOM_FIELD_QUERY_MAX_ATOMS = 20
|
||||
|
||||
logger = logging.getLogger("paperless.api")
|
||||
|
||||
|
||||
class CorrespondentFilterSet(FilterSet):
|
||||
class Meta:
|
||||
@@ -162,9 +165,13 @@ class InboxFilter(Filter):
|
||||
|
||||
@extend_schema_field(serializers.CharField)
|
||||
class TitleContentFilter(Filter):
|
||||
# Deprecated but retained for existing saved views. UI uses Tantivy-backed `text` / `title_search` params.
|
||||
def filter(self, qs: Any, value: Any) -> Any:
|
||||
value = value.strip() if isinstance(value, str) else value
|
||||
if value:
|
||||
logger.warning(
|
||||
"Deprecated document filter parameter 'title_content' used; use `text` instead.",
|
||||
)
|
||||
try:
|
||||
return qs.filter(
|
||||
Q(title__icontains=value) | Q(effective_content__icontains=value),
|
||||
@@ -243,6 +250,9 @@ class CustomFieldsFilter(Filter):
|
||||
def filter(self, qs, value):
|
||||
value = value.strip() if isinstance(value, str) else value
|
||||
if value:
|
||||
logger.warning(
|
||||
"Deprecated document filter parameter 'custom_fields__icontains' used; use `custom_field_query` or advanced Tantivy field syntax instead.",
|
||||
)
|
||||
fields_with_matching_selects = CustomField.objects.filter(
|
||||
extra_data__icontains=value,
|
||||
)
|
||||
@@ -747,6 +757,7 @@ class DocumentFilterSet(FilterSet):
|
||||
|
||||
is_in_inbox = InboxFilter()
|
||||
|
||||
# Deprecated, but keep for now for existing saved views
|
||||
title_content = TitleContentFilter()
|
||||
|
||||
content__istartswith = EffectiveContentFilter(lookup_expr="istartswith")
|
||||
@@ -756,6 +767,7 @@ class DocumentFilterSet(FilterSet):
|
||||
|
||||
owner__id__none = ObjectFilter(field_name="owner", exclude=True)
|
||||
|
||||
# Deprecated, UI no longer includes CF text-search mode, but keep for now for existing saved views
|
||||
custom_fields__icontains = CustomFieldsFilter()
|
||||
|
||||
custom_fields__id__all = ObjectFilter(field_name="custom_fields__field")
|
||||
|
||||
@@ -1,675 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import math
|
||||
import re
|
||||
from collections import Counter
|
||||
from contextlib import contextmanager
|
||||
from datetime import UTC
|
||||
from datetime import datetime
|
||||
from datetime import time
|
||||
from datetime import timedelta
|
||||
from shutil import rmtree
|
||||
from time import sleep
|
||||
from typing import TYPE_CHECKING
|
||||
from typing import Literal
|
||||
|
||||
from dateutil.relativedelta import relativedelta
|
||||
from django.conf import settings
|
||||
from django.utils import timezone as django_timezone
|
||||
from django.utils.timezone import get_current_timezone
|
||||
from django.utils.timezone import now
|
||||
from guardian.shortcuts import get_users_with_perms
|
||||
from whoosh import classify
|
||||
from whoosh import highlight
|
||||
from whoosh import query
|
||||
from whoosh.fields import BOOLEAN
|
||||
from whoosh.fields import DATETIME
|
||||
from whoosh.fields import KEYWORD
|
||||
from whoosh.fields import NUMERIC
|
||||
from whoosh.fields import TEXT
|
||||
from whoosh.fields import Schema
|
||||
from whoosh.highlight import HtmlFormatter
|
||||
from whoosh.idsets import BitSet
|
||||
from whoosh.idsets import DocIdSet
|
||||
from whoosh.index import FileIndex
|
||||
from whoosh.index import LockError
|
||||
from whoosh.index import create_in
|
||||
from whoosh.index import exists_in
|
||||
from whoosh.index import open_dir
|
||||
from whoosh.qparser import MultifieldParser
|
||||
from whoosh.qparser import QueryParser
|
||||
from whoosh.qparser.dateparse import DateParserPlugin
|
||||
from whoosh.qparser.dateparse import English
|
||||
from whoosh.qparser.plugins import FieldsPlugin
|
||||
from whoosh.scoring import TF_IDF
|
||||
from whoosh.util.times import timespan
|
||||
from whoosh.writing import AsyncWriter
|
||||
|
||||
from documents.models import CustomFieldInstance
|
||||
from documents.models import Document
|
||||
from documents.models import Note
|
||||
from documents.models import User
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from django.db.models import QuerySet
|
||||
from whoosh.reading import IndexReader
|
||||
from whoosh.searching import ResultsPage
|
||||
from whoosh.searching import Searcher
|
||||
|
||||
logger = logging.getLogger("paperless.index")
|
||||
|
||||
|
||||
def get_schema() -> Schema:
|
||||
return Schema(
|
||||
id=NUMERIC(stored=True, unique=True),
|
||||
title=TEXT(sortable=True),
|
||||
content=TEXT(),
|
||||
asn=NUMERIC(sortable=True, signed=False),
|
||||
correspondent=TEXT(sortable=True),
|
||||
correspondent_id=NUMERIC(),
|
||||
has_correspondent=BOOLEAN(),
|
||||
tag=KEYWORD(commas=True, scorable=True, lowercase=True),
|
||||
tag_id=KEYWORD(commas=True, scorable=True),
|
||||
has_tag=BOOLEAN(),
|
||||
type=TEXT(sortable=True),
|
||||
type_id=NUMERIC(),
|
||||
has_type=BOOLEAN(),
|
||||
created=DATETIME(sortable=True),
|
||||
modified=DATETIME(sortable=True),
|
||||
added=DATETIME(sortable=True),
|
||||
path=TEXT(sortable=True),
|
||||
path_id=NUMERIC(),
|
||||
has_path=BOOLEAN(),
|
||||
notes=TEXT(),
|
||||
num_notes=NUMERIC(sortable=True, signed=False),
|
||||
custom_fields=TEXT(),
|
||||
custom_field_count=NUMERIC(sortable=True, signed=False),
|
||||
has_custom_fields=BOOLEAN(),
|
||||
custom_fields_id=KEYWORD(commas=True),
|
||||
owner=TEXT(),
|
||||
owner_id=NUMERIC(),
|
||||
has_owner=BOOLEAN(),
|
||||
viewer_id=KEYWORD(commas=True),
|
||||
checksum=TEXT(),
|
||||
page_count=NUMERIC(sortable=True),
|
||||
original_filename=TEXT(sortable=True),
|
||||
is_shared=BOOLEAN(),
|
||||
)
|
||||
|
||||
|
||||
def open_index(*, recreate=False) -> FileIndex:
|
||||
transient_exceptions = (FileNotFoundError, LockError)
|
||||
max_retries = 3
|
||||
retry_delay = 0.1
|
||||
|
||||
for attempt in range(max_retries + 1):
|
||||
try:
|
||||
if exists_in(settings.INDEX_DIR) and not recreate:
|
||||
return open_dir(settings.INDEX_DIR, schema=get_schema())
|
||||
break
|
||||
except transient_exceptions as exc:
|
||||
is_last_attempt = attempt == max_retries or recreate
|
||||
if is_last_attempt:
|
||||
logger.exception(
|
||||
"Error while opening the index after retries, recreating.",
|
||||
)
|
||||
break
|
||||
|
||||
logger.warning(
|
||||
"Transient error while opening the index (attempt %s/%s): %s. Retrying.",
|
||||
attempt + 1,
|
||||
max_retries + 1,
|
||||
exc,
|
||||
)
|
||||
sleep(retry_delay)
|
||||
except Exception:
|
||||
logger.exception("Error while opening the index, recreating.")
|
||||
break
|
||||
|
||||
# create_in doesn't handle corrupted indexes very well, remove the directory entirely first
|
||||
if settings.INDEX_DIR.is_dir():
|
||||
rmtree(settings.INDEX_DIR)
|
||||
settings.INDEX_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
return create_in(settings.INDEX_DIR, get_schema())
|
||||
|
||||
|
||||
@contextmanager
|
||||
def open_index_writer(*, optimize=False) -> AsyncWriter:
|
||||
writer = AsyncWriter(open_index())
|
||||
|
||||
try:
|
||||
yield writer
|
||||
except Exception as e:
|
||||
logger.exception(str(e))
|
||||
writer.cancel()
|
||||
finally:
|
||||
writer.commit(optimize=optimize)
|
||||
|
||||
|
||||
@contextmanager
|
||||
def open_index_searcher() -> Searcher:
|
||||
searcher = open_index().searcher()
|
||||
|
||||
try:
|
||||
yield searcher
|
||||
finally:
|
||||
searcher.close()
|
||||
|
||||
|
||||
def update_document(
|
||||
writer: AsyncWriter,
|
||||
doc: Document,
|
||||
effective_content: str | None = None,
|
||||
) -> None:
|
||||
tags = ",".join([t.name for t in doc.tags.all()])
|
||||
tags_ids = ",".join([str(t.id) for t in doc.tags.all()])
|
||||
notes = ",".join([str(c.note) for c in Note.objects.filter(document=doc)])
|
||||
custom_fields = ",".join(
|
||||
[str(c) for c in CustomFieldInstance.objects.filter(document=doc)],
|
||||
)
|
||||
custom_fields_ids = ",".join(
|
||||
[str(f.field.id) for f in CustomFieldInstance.objects.filter(document=doc)],
|
||||
)
|
||||
asn: int | None = doc.archive_serial_number
|
||||
if asn is not None and (
|
||||
asn < Document.ARCHIVE_SERIAL_NUMBER_MIN
|
||||
or asn > Document.ARCHIVE_SERIAL_NUMBER_MAX
|
||||
):
|
||||
logger.error(
|
||||
f"Not indexing Archive Serial Number {asn} of document {doc.pk}. "
|
||||
f"ASN is out of range "
|
||||
f"[{Document.ARCHIVE_SERIAL_NUMBER_MIN:,}, "
|
||||
f"{Document.ARCHIVE_SERIAL_NUMBER_MAX:,}.",
|
||||
)
|
||||
asn = 0
|
||||
users_with_perms = get_users_with_perms(
|
||||
doc,
|
||||
only_with_perms_in=["view_document"],
|
||||
)
|
||||
viewer_ids: str = ",".join([str(u.id) for u in users_with_perms])
|
||||
writer.update_document(
|
||||
id=doc.pk,
|
||||
title=doc.title,
|
||||
content=effective_content or doc.content,
|
||||
correspondent=doc.correspondent.name if doc.correspondent else None,
|
||||
correspondent_id=doc.correspondent.id if doc.correspondent else None,
|
||||
has_correspondent=doc.correspondent is not None,
|
||||
tag=tags if tags else None,
|
||||
tag_id=tags_ids if tags_ids else None,
|
||||
has_tag=len(tags) > 0,
|
||||
type=doc.document_type.name if doc.document_type else None,
|
||||
type_id=doc.document_type.id if doc.document_type else None,
|
||||
has_type=doc.document_type is not None,
|
||||
created=datetime.combine(doc.created, time.min),
|
||||
added=doc.added,
|
||||
asn=asn,
|
||||
modified=doc.modified,
|
||||
path=doc.storage_path.name if doc.storage_path else None,
|
||||
path_id=doc.storage_path.id if doc.storage_path else None,
|
||||
has_path=doc.storage_path is not None,
|
||||
notes=notes,
|
||||
num_notes=len(notes),
|
||||
custom_fields=custom_fields,
|
||||
custom_field_count=len(doc.custom_fields.all()),
|
||||
has_custom_fields=len(custom_fields) > 0,
|
||||
custom_fields_id=custom_fields_ids if custom_fields_ids else None,
|
||||
owner=doc.owner.username if doc.owner else None,
|
||||
owner_id=doc.owner.id if doc.owner else None,
|
||||
has_owner=doc.owner is not None,
|
||||
viewer_id=viewer_ids if viewer_ids else None,
|
||||
checksum=doc.checksum,
|
||||
page_count=doc.page_count,
|
||||
original_filename=doc.original_filename,
|
||||
is_shared=len(viewer_ids) > 0,
|
||||
)
|
||||
logger.debug(f"Index updated for document {doc.pk}.")
|
||||
|
||||
|
||||
def remove_document(writer: AsyncWriter, doc: Document) -> None:
|
||||
remove_document_by_id(writer, doc.pk)
|
||||
|
||||
|
||||
def remove_document_by_id(writer: AsyncWriter, doc_id) -> None:
|
||||
writer.delete_by_term("id", doc_id)
|
||||
|
||||
|
||||
def add_or_update_document(
|
||||
document: Document,
|
||||
effective_content: str | None = None,
|
||||
) -> None:
|
||||
with open_index_writer() as writer:
|
||||
update_document(writer, document, effective_content=effective_content)
|
||||
|
||||
|
||||
def remove_document_from_index(document: Document) -> None:
|
||||
with open_index_writer() as writer:
|
||||
remove_document(writer, document)
|
||||
|
||||
|
||||
class MappedDocIdSet(DocIdSet):
|
||||
"""
|
||||
A DocIdSet backed by a set of `Document` IDs.
|
||||
Supports efficiently looking up if a whoosh docnum is in the provided `filter_queryset`.
|
||||
"""
|
||||
|
||||
def __init__(self, filter_queryset: QuerySet, ixreader: IndexReader) -> None:
|
||||
super().__init__()
|
||||
document_ids = filter_queryset.order_by("id").values_list("id", flat=True)
|
||||
max_id = document_ids.last() or 0
|
||||
self.document_ids = BitSet(document_ids, size=max_id)
|
||||
self.ixreader = ixreader
|
||||
|
||||
def __contains__(self, docnum) -> bool:
|
||||
document_id = self.ixreader.stored_fields(docnum)["id"]
|
||||
return document_id in self.document_ids
|
||||
|
||||
def __bool__(self) -> Literal[True]:
|
||||
# searcher.search ignores a filter if it's "falsy".
|
||||
# We use this hack so this DocIdSet, when used as a filter, is never ignored.
|
||||
return True
|
||||
|
||||
|
||||
class DelayedQuery:
|
||||
def _get_query(self):
|
||||
raise NotImplementedError # pragma: no cover
|
||||
|
||||
def _get_query_sortedby(self) -> tuple[None, Literal[False]] | tuple[str, bool]:
|
||||
if "ordering" not in self.query_params:
|
||||
return None, False
|
||||
|
||||
field: str = self.query_params["ordering"]
|
||||
|
||||
sort_fields_map: dict[str, str] = {
|
||||
"created": "created",
|
||||
"modified": "modified",
|
||||
"added": "added",
|
||||
"title": "title",
|
||||
"correspondent__name": "correspondent",
|
||||
"document_type__name": "type",
|
||||
"archive_serial_number": "asn",
|
||||
"num_notes": "num_notes",
|
||||
"owner": "owner",
|
||||
"page_count": "page_count",
|
||||
}
|
||||
|
||||
if field.startswith("-"):
|
||||
field = field[1:]
|
||||
reverse = True
|
||||
else:
|
||||
reverse = False
|
||||
|
||||
if field not in sort_fields_map:
|
||||
return None, False
|
||||
else:
|
||||
return sort_fields_map[field], reverse
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
searcher: Searcher,
|
||||
query_params,
|
||||
page_size,
|
||||
filter_queryset: QuerySet,
|
||||
) -> None:
|
||||
self.searcher = searcher
|
||||
self.query_params = query_params
|
||||
self.page_size = page_size
|
||||
self.saved_results = dict()
|
||||
self.first_score = None
|
||||
self.filter_queryset = filter_queryset
|
||||
self.suggested_correction = None
|
||||
self._manual_hits_cache: list | None = None
|
||||
|
||||
def __len__(self) -> int:
|
||||
if self._manual_sort_requested():
|
||||
manual_hits = self._manual_hits()
|
||||
return len(manual_hits)
|
||||
|
||||
page = self[0:1]
|
||||
return len(page)
|
||||
|
||||
def _manual_sort_requested(self):
|
||||
ordering = self.query_params.get("ordering", "")
|
||||
return ordering.lstrip("-").startswith("custom_field_")
|
||||
|
||||
def _manual_hits(self):
|
||||
if self._manual_hits_cache is None:
|
||||
q, mask, suggested_correction = self._get_query()
|
||||
self.suggested_correction = suggested_correction
|
||||
|
||||
results = self.searcher.search(
|
||||
q,
|
||||
mask=mask,
|
||||
filter=MappedDocIdSet(self.filter_queryset, self.searcher.ixreader),
|
||||
limit=None,
|
||||
)
|
||||
results.fragmenter = highlight.ContextFragmenter(surround=50)
|
||||
results.formatter = HtmlFormatter(tagname="span", between=" ... ")
|
||||
|
||||
if not self.first_score and len(results) > 0:
|
||||
self.first_score = results[0].score
|
||||
|
||||
if self.first_score:
|
||||
results.top_n = [
|
||||
(
|
||||
(hit[0] / self.first_score) if self.first_score else None,
|
||||
hit[1],
|
||||
)
|
||||
for hit in results.top_n
|
||||
]
|
||||
|
||||
hits_by_id = {hit["id"]: hit for hit in results}
|
||||
matching_ids = list(hits_by_id.keys())
|
||||
|
||||
ordered_ids = list(
|
||||
self.filter_queryset.filter(id__in=matching_ids).values_list(
|
||||
"id",
|
||||
flat=True,
|
||||
),
|
||||
)
|
||||
ordered_ids = list(dict.fromkeys(ordered_ids))
|
||||
|
||||
self._manual_hits_cache = [
|
||||
hits_by_id[_id] for _id in ordered_ids if _id in hits_by_id
|
||||
]
|
||||
return self._manual_hits_cache
|
||||
|
||||
def get_result_ids(self) -> list[int]:
|
||||
"""
|
||||
Return all matching document IDs for the current query and ordering.
|
||||
"""
|
||||
if self._manual_sort_requested():
|
||||
return [hit["id"] for hit in self._manual_hits()]
|
||||
|
||||
q, mask, suggested_correction = self._get_query()
|
||||
self.suggested_correction = suggested_correction
|
||||
sortedby, reverse = self._get_query_sortedby()
|
||||
results = self.searcher.search(
|
||||
q,
|
||||
mask=mask,
|
||||
filter=MappedDocIdSet(self.filter_queryset, self.searcher.ixreader),
|
||||
limit=None,
|
||||
sortedby=sortedby,
|
||||
reverse=reverse,
|
||||
)
|
||||
return [hit["id"] for hit in results]
|
||||
|
||||
def __getitem__(self, item):
|
||||
if item.start in self.saved_results:
|
||||
return self.saved_results[item.start]
|
||||
|
||||
if self._manual_sort_requested():
|
||||
manual_hits = self._manual_hits()
|
||||
start = 0 if item.start is None else item.start
|
||||
stop = item.stop
|
||||
hits = manual_hits[start:stop] if stop is not None else manual_hits[start:]
|
||||
page = ManualResultsPage(hits)
|
||||
self.saved_results[start] = page
|
||||
return page
|
||||
|
||||
q, mask, suggested_correction = self._get_query()
|
||||
self.suggested_correction = suggested_correction
|
||||
sortedby, reverse = self._get_query_sortedby()
|
||||
|
||||
page: ResultsPage = self.searcher.search_page(
|
||||
q,
|
||||
mask=mask,
|
||||
filter=MappedDocIdSet(self.filter_queryset, self.searcher.ixreader),
|
||||
pagenum=math.floor(item.start / self.page_size) + 1,
|
||||
pagelen=self.page_size,
|
||||
sortedby=sortedby,
|
||||
reverse=reverse,
|
||||
)
|
||||
page.results.fragmenter = highlight.ContextFragmenter(surround=50)
|
||||
page.results.formatter = HtmlFormatter(tagname="span", between=" ... ")
|
||||
|
||||
if not self.first_score and len(page.results) > 0 and sortedby is None:
|
||||
self.first_score = page.results[0].score
|
||||
|
||||
page.results.top_n = [
|
||||
(
|
||||
(hit[0] / self.first_score) if self.first_score else None,
|
||||
hit[1],
|
||||
)
|
||||
for hit in page.results.top_n
|
||||
]
|
||||
|
||||
self.saved_results[item.start] = page
|
||||
|
||||
return page
|
||||
|
||||
|
||||
class ManualResultsPage(list):
|
||||
def __init__(self, hits) -> None:
|
||||
super().__init__(hits)
|
||||
self.results = ManualResults(hits)
|
||||
|
||||
|
||||
class ManualResults:
|
||||
def __init__(self, hits) -> None:
|
||||
self._docnums = [hit.docnum for hit in hits]
|
||||
|
||||
def docs(self):
|
||||
return self._docnums
|
||||
|
||||
|
||||
class LocalDateParser(English):
|
||||
def reverse_timezone_offset(self, d):
|
||||
return (d.replace(tzinfo=django_timezone.get_current_timezone())).astimezone(
|
||||
UTC,
|
||||
)
|
||||
|
||||
def date_from(self, *args, **kwargs):
|
||||
d = super().date_from(*args, **kwargs)
|
||||
if isinstance(d, timespan):
|
||||
d.start = self.reverse_timezone_offset(d.start)
|
||||
d.end = self.reverse_timezone_offset(d.end)
|
||||
elif isinstance(d, datetime):
|
||||
d = self.reverse_timezone_offset(d)
|
||||
return d
|
||||
|
||||
|
||||
class DelayedFullTextQuery(DelayedQuery):
|
||||
def _get_query(self) -> tuple:
|
||||
q_str = self.query_params["query"]
|
||||
q_str = rewrite_natural_date_keywords(q_str)
|
||||
qp = MultifieldParser(
|
||||
[
|
||||
"content",
|
||||
"title",
|
||||
"correspondent",
|
||||
"tag",
|
||||
"type",
|
||||
"notes",
|
||||
"custom_fields",
|
||||
],
|
||||
self.searcher.ixreader.schema,
|
||||
)
|
||||
qp.add_plugin(
|
||||
DateParserPlugin(
|
||||
basedate=django_timezone.now(),
|
||||
dateparser=LocalDateParser(),
|
||||
),
|
||||
)
|
||||
q = qp.parse(q_str)
|
||||
suggested_correction = None
|
||||
try:
|
||||
corrected = self.searcher.correct_query(q, q_str)
|
||||
if corrected.string != q_str:
|
||||
corrected_results = self.searcher.search(
|
||||
corrected.query,
|
||||
limit=1,
|
||||
filter=MappedDocIdSet(self.filter_queryset, self.searcher.ixreader),
|
||||
scored=False,
|
||||
)
|
||||
if len(corrected_results) > 0:
|
||||
suggested_correction = corrected.string
|
||||
except Exception as e:
|
||||
logger.info(
|
||||
"Error while correcting query %s: %s",
|
||||
f"{q_str!r}",
|
||||
e,
|
||||
)
|
||||
|
||||
return q, None, suggested_correction
|
||||
|
||||
|
||||
class DelayedMoreLikeThisQuery(DelayedQuery):
|
||||
def _get_query(self) -> tuple:
|
||||
more_like_doc_id = int(self.query_params["more_like_id"])
|
||||
content = Document.objects.get(id=more_like_doc_id).content
|
||||
|
||||
docnum = self.searcher.document_number(id=more_like_doc_id)
|
||||
kts = self.searcher.key_terms_from_text(
|
||||
"content",
|
||||
content,
|
||||
numterms=20,
|
||||
model=classify.Bo1Model,
|
||||
normalize=False,
|
||||
)
|
||||
q = query.Or(
|
||||
[query.Term("content", word, boost=weight) for word, weight in kts],
|
||||
)
|
||||
mask: set = {docnum}
|
||||
|
||||
return q, mask, None
|
||||
|
||||
|
||||
def autocomplete(
|
||||
ix: FileIndex,
|
||||
term: str,
|
||||
limit: int = 10,
|
||||
user: User | None = None,
|
||||
) -> list:
|
||||
"""
|
||||
Mimics whoosh.reading.IndexReader.most_distinctive_terms with permissions
|
||||
and without scoring
|
||||
"""
|
||||
terms = []
|
||||
|
||||
with ix.searcher(weighting=TF_IDF()) as s:
|
||||
qp = QueryParser("content", schema=ix.schema)
|
||||
# Don't let searches with a query that happen to match a field override the
|
||||
# content field query instead and return bogus, not text data
|
||||
qp.remove_plugin_class(FieldsPlugin)
|
||||
q = qp.parse(f"{term.lower()}*")
|
||||
user_criterias: list = get_permissions_criterias(user)
|
||||
|
||||
results = s.search(
|
||||
q,
|
||||
terms=True,
|
||||
filter=query.Or(user_criterias) if user_criterias is not None else None,
|
||||
)
|
||||
|
||||
termCounts = Counter()
|
||||
if results.has_matched_terms():
|
||||
for hit in results:
|
||||
for _, match in hit.matched_terms():
|
||||
termCounts[match] += 1
|
||||
terms = [t for t, _ in termCounts.most_common(limit)]
|
||||
|
||||
term_encoded: bytes = term.encode("UTF-8")
|
||||
if term_encoded in terms:
|
||||
terms.insert(0, terms.pop(terms.index(term_encoded)))
|
||||
|
||||
return terms
|
||||
|
||||
|
||||
def get_permissions_criterias(user: User | None = None) -> list:
|
||||
user_criterias = [query.Term("has_owner", text=False)]
|
||||
if user is not None:
|
||||
if user.is_superuser: # superusers see all docs
|
||||
user_criterias = []
|
||||
else:
|
||||
user_criterias.append(query.Term("owner_id", user.id))
|
||||
user_criterias.append(
|
||||
query.Term("viewer_id", str(user.id)),
|
||||
)
|
||||
return user_criterias
|
||||
|
||||
|
||||
def rewrite_natural_date_keywords(query_string: str) -> str:
|
||||
"""
|
||||
Rewrites natural date keywords (e.g. added:today or added:"yesterday") to UTC range syntax for Whoosh.
|
||||
This resolves timezone issues with date parsing in Whoosh as well as adding support for more
|
||||
natural date keywords.
|
||||
"""
|
||||
|
||||
tz = get_current_timezone()
|
||||
local_now = now().astimezone(tz)
|
||||
today = local_now.date()
|
||||
|
||||
# all supported Keywords
|
||||
pattern = r"(\b(?:added|created|modified))\s*:\s*[\"']?(today|yesterday|this month|previous month|previous week|previous quarter|this year|previous year)[\"']?"
|
||||
|
||||
def repl(m):
|
||||
field = m.group(1)
|
||||
keyword = m.group(2).lower()
|
||||
|
||||
match keyword:
|
||||
case "today":
|
||||
start = datetime.combine(today, time.min, tzinfo=tz)
|
||||
end = datetime.combine(today, time.max, tzinfo=tz)
|
||||
|
||||
case "yesterday":
|
||||
yesterday = today - timedelta(days=1)
|
||||
start = datetime.combine(yesterday, time.min, tzinfo=tz)
|
||||
end = datetime.combine(yesterday, time.max, tzinfo=tz)
|
||||
|
||||
case "this month":
|
||||
start = datetime(local_now.year, local_now.month, 1, 0, 0, 0, tzinfo=tz)
|
||||
end = start + relativedelta(months=1) - timedelta(seconds=1)
|
||||
|
||||
case "previous month":
|
||||
this_month_start = datetime(
|
||||
local_now.year,
|
||||
local_now.month,
|
||||
1,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
tzinfo=tz,
|
||||
)
|
||||
start = this_month_start - relativedelta(months=1)
|
||||
end = this_month_start - timedelta(seconds=1)
|
||||
|
||||
case "this year":
|
||||
start = datetime(local_now.year, 1, 1, 0, 0, 0, tzinfo=tz)
|
||||
end = datetime(local_now.year, 12, 31, 23, 59, 59, tzinfo=tz)
|
||||
|
||||
case "previous week":
|
||||
days_since_monday = local_now.weekday()
|
||||
this_week_start = datetime.combine(
|
||||
today - timedelta(days=days_since_monday),
|
||||
time.min,
|
||||
tzinfo=tz,
|
||||
)
|
||||
start = this_week_start - timedelta(days=7)
|
||||
end = this_week_start - timedelta(seconds=1)
|
||||
|
||||
case "previous quarter":
|
||||
current_quarter = (local_now.month - 1) // 3 + 1
|
||||
this_quarter_start_month = (current_quarter - 1) * 3 + 1
|
||||
this_quarter_start = datetime(
|
||||
local_now.year,
|
||||
this_quarter_start_month,
|
||||
1,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
tzinfo=tz,
|
||||
)
|
||||
start = this_quarter_start - relativedelta(months=3)
|
||||
end = this_quarter_start - timedelta(seconds=1)
|
||||
|
||||
case "previous year":
|
||||
start = datetime(local_now.year - 1, 1, 1, 0, 0, 0, tzinfo=tz)
|
||||
end = datetime(local_now.year - 1, 12, 31, 23, 59, 59, tzinfo=tz)
|
||||
|
||||
# Convert to UTC and format
|
||||
start_str = start.astimezone(UTC).strftime("%Y%m%d%H%M%S")
|
||||
end_str = end.astimezone(UTC).strftime("%Y%m%d%H%M%S")
|
||||
return f"{field}:[{start_str} TO {end_str}]"
|
||||
|
||||
return re.sub(pattern, repl, query_string, flags=re.IGNORECASE)
|
||||
@@ -45,6 +45,8 @@ from documents.models import DocumentType
|
||||
from documents.models import Note
|
||||
from documents.models import SavedView
|
||||
from documents.models import SavedViewFilterRule
|
||||
from documents.models import ShareLink
|
||||
from documents.models import ShareLinkBundle
|
||||
from documents.models import StoragePath
|
||||
from documents.models import Tag
|
||||
from documents.models import UiSettings
|
||||
@@ -55,6 +57,7 @@ from documents.models import WorkflowActionWebhook
|
||||
from documents.models import WorkflowTrigger
|
||||
from documents.settings import EXPORTER_ARCHIVE_NAME
|
||||
from documents.settings import EXPORTER_FILE_NAME
|
||||
from documents.settings import EXPORTER_SHARE_LINK_BUNDLE_NAME
|
||||
from documents.settings import EXPORTER_THUMBNAIL_NAME
|
||||
from documents.utils import compute_checksum
|
||||
from documents.utils import copy_file_with_basic_stats
|
||||
@@ -389,6 +392,8 @@ class Command(CryptMixin, PaperlessCommand):
|
||||
"app_configs": ApplicationConfiguration.objects.all(),
|
||||
"notes": Note.global_objects.all(),
|
||||
"documents": Document.global_objects.order_by("id").all(),
|
||||
"share_links": ShareLink.global_objects.all(),
|
||||
"share_link_bundles": ShareLinkBundle.objects.order_by("id").all(),
|
||||
"social_accounts": SocialAccount.objects.all(),
|
||||
"social_apps": SocialApp.objects.all(),
|
||||
"social_tokens": SocialToken.objects.all(),
|
||||
@@ -409,6 +414,7 @@ class Command(CryptMixin, PaperlessCommand):
|
||||
)
|
||||
|
||||
document_manifest: list[dict] = []
|
||||
share_link_bundle_manifest: list[dict] = []
|
||||
manifest_path = (self.target / "manifest.json").resolve()
|
||||
|
||||
with StreamingManifestWriter(
|
||||
@@ -427,6 +433,15 @@ class Command(CryptMixin, PaperlessCommand):
|
||||
for record in batch:
|
||||
self._encrypt_record_inline(record)
|
||||
document_manifest.extend(batch)
|
||||
elif key == "share_link_bundles":
|
||||
# Accumulate for file-copy loop; written to manifest after
|
||||
for batch in serialize_queryset_batched(
|
||||
qs,
|
||||
batch_size=self.batch_size,
|
||||
):
|
||||
for record in batch:
|
||||
self._encrypt_record_inline(record)
|
||||
share_link_bundle_manifest.extend(batch)
|
||||
elif self.split_manifest and key in (
|
||||
"notes",
|
||||
"custom_field_instances",
|
||||
@@ -445,6 +460,12 @@ class Command(CryptMixin, PaperlessCommand):
|
||||
document_map: dict[int, Document] = {
|
||||
d.pk: d for d in Document.global_objects.order_by("id")
|
||||
}
|
||||
share_link_bundle_map: dict[int, ShareLinkBundle] = {
|
||||
b.pk: b
|
||||
for b in ShareLinkBundle.objects.order_by("id").prefetch_related(
|
||||
"documents",
|
||||
)
|
||||
}
|
||||
|
||||
# 3. Export files from each document
|
||||
for index, document_dict in enumerate(
|
||||
@@ -478,6 +499,19 @@ class Command(CryptMixin, PaperlessCommand):
|
||||
else:
|
||||
writer.write_record(document_dict)
|
||||
|
||||
for bundle_dict in share_link_bundle_manifest:
|
||||
bundle = share_link_bundle_map[bundle_dict["pk"]]
|
||||
|
||||
bundle_target = self.generate_share_link_bundle_target(
|
||||
bundle,
|
||||
bundle_dict,
|
||||
)
|
||||
|
||||
if not self.data_only and bundle_target is not None:
|
||||
self.copy_share_link_bundle_file(bundle, bundle_target)
|
||||
|
||||
writer.write_record(bundle_dict)
|
||||
|
||||
# 4.2 write version information to target folder
|
||||
extra_metadata_path = (self.target / "metadata.json").resolve()
|
||||
metadata: dict[str, str | int | dict[str, str | int]] = {
|
||||
@@ -598,6 +632,48 @@ class Command(CryptMixin, PaperlessCommand):
|
||||
archive_target,
|
||||
)
|
||||
|
||||
def generate_share_link_bundle_target(
|
||||
self,
|
||||
bundle: ShareLinkBundle,
|
||||
bundle_dict: dict,
|
||||
) -> Path | None:
|
||||
"""
|
||||
Generates the export target for a share link bundle file, when present.
|
||||
"""
|
||||
if not bundle.file_path:
|
||||
return None
|
||||
|
||||
stored_bundle_path = Path(bundle.file_path)
|
||||
portable_bundle_path = (
|
||||
stored_bundle_path
|
||||
if not stored_bundle_path.is_absolute()
|
||||
else Path(stored_bundle_path.name)
|
||||
)
|
||||
export_bundle_path = Path("share_link_bundles") / portable_bundle_path
|
||||
|
||||
bundle_dict["fields"]["file_path"] = portable_bundle_path.as_posix()
|
||||
bundle_dict[EXPORTER_SHARE_LINK_BUNDLE_NAME] = export_bundle_path.as_posix()
|
||||
|
||||
return (self.target / export_bundle_path).resolve()
|
||||
|
||||
def copy_share_link_bundle_file(
|
||||
self,
|
||||
bundle: ShareLinkBundle,
|
||||
bundle_target: Path,
|
||||
) -> None:
|
||||
"""
|
||||
Copies a share link bundle ZIP into the export directory.
|
||||
"""
|
||||
bundle_source_path = bundle.absolute_file_path
|
||||
if bundle_source_path is None:
|
||||
raise FileNotFoundError(f"Share link bundle {bundle.pk} has no file path")
|
||||
|
||||
self.check_and_copy(
|
||||
bundle_source_path,
|
||||
None,
|
||||
bundle_target,
|
||||
)
|
||||
|
||||
def _encrypt_record_inline(self, record: dict) -> None:
|
||||
"""Encrypt sensitive fields in a single record, if passphrase is set."""
|
||||
if not self.passphrase:
|
||||
|
||||
@@ -32,10 +32,12 @@ from documents.models import CustomFieldInstance
|
||||
from documents.models import Document
|
||||
from documents.models import DocumentType
|
||||
from documents.models import Note
|
||||
from documents.models import ShareLinkBundle
|
||||
from documents.models import Tag
|
||||
from documents.settings import EXPORTER_ARCHIVE_NAME
|
||||
from documents.settings import EXPORTER_CRYPTO_SETTINGS_NAME
|
||||
from documents.settings import EXPORTER_FILE_NAME
|
||||
from documents.settings import EXPORTER_SHARE_LINK_BUNDLE_NAME
|
||||
from documents.settings import EXPORTER_THUMBNAIL_NAME
|
||||
from documents.signals.handlers import check_paths_and_prune_custom_fields
|
||||
from documents.signals.handlers import update_filename_and_move_files
|
||||
@@ -348,18 +350,42 @@ class Command(CryptMixin, PaperlessCommand):
|
||||
f"Failed to read from archive file {doc_archive_path}",
|
||||
) from e
|
||||
|
||||
def check_share_link_bundle_validity(bundle_record: dict) -> None:
|
||||
if EXPORTER_SHARE_LINK_BUNDLE_NAME not in bundle_record:
|
||||
return
|
||||
|
||||
bundle_file = bundle_record[EXPORTER_SHARE_LINK_BUNDLE_NAME]
|
||||
bundle_path: Path = self.source / bundle_file
|
||||
if not bundle_path.exists():
|
||||
raise CommandError(
|
||||
f'The manifest file refers to "{bundle_file}" which does not '
|
||||
"appear to be in the source directory.",
|
||||
)
|
||||
try:
|
||||
with bundle_path.open(mode="rb"):
|
||||
pass
|
||||
except Exception as e:
|
||||
raise CommandError(
|
||||
f"Failed to read from share link bundle file {bundle_path}",
|
||||
) from e
|
||||
|
||||
self.stdout.write("Checking the manifest")
|
||||
for manifest_path in self.manifest_paths:
|
||||
for record in iter_manifest_records(manifest_path):
|
||||
# Only check if the document files exist if this is not data only
|
||||
# We don't care about documents for a data only import
|
||||
if not self.data_only and record["model"] == "documents.document":
|
||||
if self.data_only:
|
||||
continue
|
||||
if record["model"] == "documents.document":
|
||||
check_document_validity(record)
|
||||
elif record["model"] == "documents.sharelinkbundle":
|
||||
check_share_link_bundle_validity(record)
|
||||
|
||||
def _import_files_from_manifest(self) -> None:
|
||||
settings.ORIGINALS_DIR.mkdir(parents=True, exist_ok=True)
|
||||
settings.THUMBNAIL_DIR.mkdir(parents=True, exist_ok=True)
|
||||
settings.ARCHIVE_DIR.mkdir(parents=True, exist_ok=True)
|
||||
settings.SHARE_LINK_BUNDLE_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
self.stdout.write("Copy files into paperless...")
|
||||
|
||||
@@ -374,6 +400,18 @@ class Command(CryptMixin, PaperlessCommand):
|
||||
for record in iter_manifest_records(manifest_path)
|
||||
if record["model"] == "documents.document"
|
||||
]
|
||||
share_link_bundle_records = [
|
||||
{
|
||||
"pk": record["pk"],
|
||||
EXPORTER_SHARE_LINK_BUNDLE_NAME: record.get(
|
||||
EXPORTER_SHARE_LINK_BUNDLE_NAME,
|
||||
),
|
||||
}
|
||||
for manifest_path in self.manifest_paths
|
||||
for record in iter_manifest_records(manifest_path)
|
||||
if record["model"] == "documents.sharelinkbundle"
|
||||
and record.get(EXPORTER_SHARE_LINK_BUNDLE_NAME)
|
||||
]
|
||||
|
||||
for record in self.track(document_records, description="Copying files..."):
|
||||
document = Document.global_objects.get(pk=record["pk"])
|
||||
@@ -416,6 +454,26 @@ class Command(CryptMixin, PaperlessCommand):
|
||||
|
||||
document.save()
|
||||
|
||||
for record in self.track(
|
||||
share_link_bundle_records,
|
||||
description="Copying share link bundles...",
|
||||
):
|
||||
bundle = ShareLinkBundle.objects.get(pk=record["pk"])
|
||||
bundle_file = record[EXPORTER_SHARE_LINK_BUNDLE_NAME]
|
||||
bundle_source_path = (self.source / bundle_file).resolve()
|
||||
bundle_target_path = bundle.absolute_file_path
|
||||
if bundle_target_path is None:
|
||||
raise CommandError(
|
||||
f"Share link bundle {bundle.pk} does not have a valid file path.",
|
||||
)
|
||||
|
||||
with FileLock(settings.MEDIA_LOCK):
|
||||
bundle_target_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
copy_file_with_basic_stats(
|
||||
bundle_source_path,
|
||||
bundle_target_path,
|
||||
)
|
||||
|
||||
def _decrypt_record_if_needed(self, record: dict) -> dict:
|
||||
fields = self.CRYPT_FIELDS_BY_MODEL.get(record.get("model", ""))
|
||||
if fields:
|
||||
|
||||
@@ -1,11 +1,26 @@
|
||||
import logging
|
||||
|
||||
from django.conf import settings
|
||||
from django.db import transaction
|
||||
|
||||
from documents.management.commands.base import PaperlessCommand
|
||||
from documents.tasks import index_optimize
|
||||
from documents.tasks import index_reindex
|
||||
from documents.models import Document
|
||||
from documents.search import get_backend
|
||||
from documents.search import needs_rebuild
|
||||
from documents.search import reset_backend
|
||||
from documents.search import wipe_index
|
||||
|
||||
logger = logging.getLogger("paperless.management.document_index")
|
||||
|
||||
|
||||
class Command(PaperlessCommand):
|
||||
"""
|
||||
Django management command for search index operations.
|
||||
|
||||
Provides subcommands for reindexing documents and optimizing the search index.
|
||||
Supports conditional reindexing based on schema version and language changes.
|
||||
"""
|
||||
|
||||
help = "Manages the document index."
|
||||
|
||||
supports_progress_bar = True
|
||||
@@ -14,15 +29,49 @@ class Command(PaperlessCommand):
|
||||
def add_arguments(self, parser):
|
||||
super().add_arguments(parser)
|
||||
parser.add_argument("command", choices=["reindex", "optimize"])
|
||||
parser.add_argument(
|
||||
"--recreate",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="Wipe and recreate the index from scratch (only used with reindex).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--if-needed",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help=(
|
||||
"Skip reindex if the index is already up to date. "
|
||||
"Checks schema version and search language sentinels. "
|
||||
"Safe to run on every startup or upgrade."
|
||||
),
|
||||
)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
with transaction.atomic():
|
||||
if options["command"] == "reindex":
|
||||
index_reindex(
|
||||
if options.get("if_needed") and not needs_rebuild(settings.INDEX_DIR):
|
||||
self.stdout.write("Search index is up to date.")
|
||||
return
|
||||
if options.get("recreate"):
|
||||
wipe_index(settings.INDEX_DIR)
|
||||
|
||||
documents = Document.objects.select_related(
|
||||
"correspondent",
|
||||
"document_type",
|
||||
"storage_path",
|
||||
"owner",
|
||||
).prefetch_related("tags", "notes", "custom_fields", "versions")
|
||||
get_backend().rebuild(
|
||||
documents,
|
||||
iter_wrapper=lambda docs: self.track(
|
||||
docs,
|
||||
description="Indexing documents...",
|
||||
),
|
||||
)
|
||||
reset_backend()
|
||||
|
||||
elif options["command"] == "optimize":
|
||||
index_optimize()
|
||||
logger.info(
|
||||
"document_index optimize is a no-op — Tantivy manages "
|
||||
"segment merging automatically.",
|
||||
)
|
||||
|
||||
@@ -0,0 +1,39 @@
|
||||
import re
|
||||
|
||||
from django.db import migrations
|
||||
|
||||
# Matches "note:" when NOT preceded by a word character or dot.
|
||||
# This avoids false positives like "denote:" or already-migrated "notes.note:".
|
||||
# Handles start-of-string, whitespace, parentheses, +/- operators per Whoosh syntax.
|
||||
_NOTE_RE = re.compile(r"(?<![.\w])note:")
|
||||
|
||||
# Same logic for "custom_field:" -> "custom_fields.value:"
|
||||
_CUSTOM_FIELD_RE = re.compile(r"(?<![.\w])custom_field:")
|
||||
|
||||
|
||||
def migrate_fulltext_query_field_prefixes(apps, schema_editor):
|
||||
SavedViewFilterRule = apps.get_model("documents", "SavedViewFilterRule")
|
||||
|
||||
# rule_type 20 = "fulltext query" — value is a search query string
|
||||
for rule in SavedViewFilterRule.objects.filter(rule_type=20).exclude(
|
||||
value__isnull=True,
|
||||
):
|
||||
new_value = _NOTE_RE.sub("notes.note:", rule.value)
|
||||
new_value = _CUSTOM_FIELD_RE.sub("custom_fields.value:", new_value)
|
||||
|
||||
if new_value != rule.value:
|
||||
rule.value = new_value
|
||||
rule.save(update_fields=["value"])
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
("documents", "0016_sha256_checksums"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.RunPython(
|
||||
migrate_fulltext_query_field_prefixes,
|
||||
migrations.RunPython.noop,
|
||||
),
|
||||
]
|
||||
@@ -0,0 +1,92 @@
|
||||
# Generated by Django 5.2.12 on 2026-04-01 18:20
|
||||
|
||||
from django.db import migrations
|
||||
from django.db import models
|
||||
|
||||
OLD_TITLE_RULE = 0
|
||||
OLD_TITLE_CONTENT_RULE = 19
|
||||
NEW_SIMPLE_TITLE_RULE = 48
|
||||
NEW_SIMPLE_TEXT_RULE = 49
|
||||
|
||||
|
||||
# See documents/models.py SavedViewFilterRule
|
||||
def migrate_saved_view_rules_forward(apps, schema_editor):
|
||||
SavedViewFilterRule = apps.get_model("documents", "SavedViewFilterRule")
|
||||
SavedViewFilterRule.objects.filter(rule_type=OLD_TITLE_RULE).update(
|
||||
rule_type=NEW_SIMPLE_TITLE_RULE,
|
||||
)
|
||||
SavedViewFilterRule.objects.filter(rule_type=OLD_TITLE_CONTENT_RULE).update(
|
||||
rule_type=NEW_SIMPLE_TEXT_RULE,
|
||||
)
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
("documents", "0017_migrate_fulltext_query_field_prefixes"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name="savedviewfilterrule",
|
||||
name="rule_type",
|
||||
field=models.PositiveSmallIntegerField(
|
||||
choices=[
|
||||
(0, "title contains"),
|
||||
(1, "content contains"),
|
||||
(2, "ASN is"),
|
||||
(3, "correspondent is"),
|
||||
(4, "document type is"),
|
||||
(5, "is in inbox"),
|
||||
(6, "has tag"),
|
||||
(7, "has any tag"),
|
||||
(8, "created before"),
|
||||
(9, "created after"),
|
||||
(10, "created year is"),
|
||||
(11, "created month is"),
|
||||
(12, "created day is"),
|
||||
(13, "added before"),
|
||||
(14, "added after"),
|
||||
(15, "modified before"),
|
||||
(16, "modified after"),
|
||||
(17, "does not have tag"),
|
||||
(18, "does not have ASN"),
|
||||
(19, "title or content contains"),
|
||||
(20, "fulltext query"),
|
||||
(21, "more like this"),
|
||||
(22, "has tags in"),
|
||||
(23, "ASN greater than"),
|
||||
(24, "ASN less than"),
|
||||
(25, "storage path is"),
|
||||
(26, "has correspondent in"),
|
||||
(27, "does not have correspondent in"),
|
||||
(28, "has document type in"),
|
||||
(29, "does not have document type in"),
|
||||
(30, "has storage path in"),
|
||||
(31, "does not have storage path in"),
|
||||
(32, "owner is"),
|
||||
(33, "has owner in"),
|
||||
(34, "does not have owner"),
|
||||
(35, "does not have owner in"),
|
||||
(36, "has custom field value"),
|
||||
(37, "is shared by me"),
|
||||
(38, "has custom fields"),
|
||||
(39, "has custom field in"),
|
||||
(40, "does not have custom field in"),
|
||||
(41, "does not have custom field"),
|
||||
(42, "custom fields query"),
|
||||
(43, "created to"),
|
||||
(44, "created from"),
|
||||
(45, "added to"),
|
||||
(46, "added from"),
|
||||
(47, "mime type is"),
|
||||
(48, "simple title search"),
|
||||
(49, "simple text search"),
|
||||
],
|
||||
verbose_name="rule type",
|
||||
),
|
||||
),
|
||||
migrations.RunPython(
|
||||
migrate_saved_view_rules_forward,
|
||||
migrations.RunPython.noop,
|
||||
),
|
||||
]
|
||||
@@ -623,6 +623,8 @@ class SavedViewFilterRule(models.Model):
|
||||
(45, _("added to")),
|
||||
(46, _("added from")),
|
||||
(47, _("mime type is")),
|
||||
(48, _("simple title search")),
|
||||
(49, _("simple text search")),
|
||||
]
|
||||
|
||||
saved_view = models.ForeignKey(
|
||||
@@ -1114,19 +1116,7 @@ class CustomFieldInstance(SoftDeleteModel):
|
||||
]
|
||||
|
||||
def __str__(self) -> str:
|
||||
value = (
|
||||
next(
|
||||
option.get("label")
|
||||
for option in self.field.extra_data["select_options"]
|
||||
if option.get("id") == self.value_select
|
||||
)
|
||||
if (
|
||||
self.field.data_type == CustomField.FieldDataType.SELECT
|
||||
and self.value_select is not None
|
||||
)
|
||||
else self.value
|
||||
)
|
||||
return str(self.field.name) + f" : {value}"
|
||||
return str(self.field.name) + f" : {self.value_for_search}"
|
||||
|
||||
@classmethod
|
||||
def get_value_field_name(cls, data_type: CustomField.FieldDataType):
|
||||
@@ -1144,6 +1134,25 @@ class CustomFieldInstance(SoftDeleteModel):
|
||||
value_field_name = self.get_value_field_name(self.field.data_type)
|
||||
return getattr(self, value_field_name)
|
||||
|
||||
@property
|
||||
def value_for_search(self) -> str | None:
|
||||
"""
|
||||
Return the value suitable for full-text indexing and display, or None
|
||||
if the value is unset.
|
||||
|
||||
For SELECT fields, resolves the human-readable label rather than the
|
||||
opaque option ID stored in value_select.
|
||||
"""
|
||||
if self.value is None:
|
||||
return None
|
||||
if self.field.data_type == CustomField.FieldDataType.SELECT:
|
||||
options = (self.field.extra_data or {}).get("select_options", [])
|
||||
return next(
|
||||
(o["label"] for o in options if o.get("id") == self.value),
|
||||
None,
|
||||
)
|
||||
return str(self.value)
|
||||
|
||||
|
||||
if settings.AUDIT_LOG_ENABLED:
|
||||
auditlog.register(
|
||||
|
||||
@@ -1,9 +1,11 @@
|
||||
import datetime
|
||||
import re
|
||||
from collections.abc import Iterator
|
||||
from re import Match
|
||||
|
||||
import regex
|
||||
from regex import Match
|
||||
|
||||
from documents.plugins.date_parsing.base import DateParserPluginBase
|
||||
from documents.regex import safe_regex_finditer
|
||||
|
||||
|
||||
class RegexDateParserPlugin(DateParserPluginBase):
|
||||
@@ -14,7 +16,7 @@ class RegexDateParserPlugin(DateParserPluginBase):
|
||||
passed to its constructor.
|
||||
"""
|
||||
|
||||
DATE_REGEX = re.compile(
|
||||
DATE_REGEX = regex.compile(
|
||||
r"(\b|(?!=([_-])))(\d{1,2})[\.\/-](\d{1,2})[\.\/-](\d{4}|\d{2})(\b|(?=([_-])))|"
|
||||
r"(\b|(?!=([_-])))(\d{4}|\d{2})[\.\/-](\d{1,2})[\.\/-](\d{1,2})(\b|(?=([_-])))|"
|
||||
r"(\b|(?!=([_-])))(\d{1,2}[\. ]+[a-zéûäëčžúřěáíóńźçŞğü]{3,9} \d{4}|[a-zéûäëčžúřěáíóńźçŞğü]{3,9} \d{1,2}, \d{4})(\b|(?=([_-])))|"
|
||||
@@ -22,7 +24,7 @@ class RegexDateParserPlugin(DateParserPluginBase):
|
||||
r"(\b|(?!=([_-])))([^\W\d_]{3,9} \d{4})(\b|(?=([_-])))|"
|
||||
r"(\b|(?!=([_-])))(\d{1,2}[^ 0-9]{2}[\. ]+[^ ]{3,9}[ \.\/-]\d{4})(\b|(?=([_-])))|"
|
||||
r"(\b|(?!=([_-])))(\b\d{1,2}[ \.\/-][a-zéûäëčžúřěáíóńźçŞğü]{3}[ \.\/-]\d{4})(\b|(?=([_-])))",
|
||||
re.IGNORECASE,
|
||||
regex.IGNORECASE,
|
||||
)
|
||||
|
||||
def _process_match(
|
||||
@@ -45,7 +47,7 @@ class RegexDateParserPlugin(DateParserPluginBase):
|
||||
"""
|
||||
Finds all regex matches in content and yields valid dates.
|
||||
"""
|
||||
for m in re.finditer(self.DATE_REGEX, content):
|
||||
for m in safe_regex_finditer(self.DATE_REGEX, content):
|
||||
date = self._process_match(m, date_order)
|
||||
if date is not None:
|
||||
yield date
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
import enum
|
||||
from collections.abc import Mapping
|
||||
from typing import TYPE_CHECKING
|
||||
from typing import Literal
|
||||
from typing import Self
|
||||
from typing import TypeAlias
|
||||
from typing import TypedDict
|
||||
|
||||
from asgiref.sync import async_to_sync
|
||||
from channels.layers import get_channel_layer
|
||||
@@ -16,6 +19,59 @@ class ProgressStatusOptions(enum.StrEnum):
|
||||
FAILED = "FAILED"
|
||||
|
||||
|
||||
class PermissionsData(TypedDict, total=False):
|
||||
"""Permission fields included in status messages for access control."""
|
||||
|
||||
owner_id: int | None
|
||||
users_can_view: list[int]
|
||||
groups_can_view: list[int]
|
||||
|
||||
|
||||
class ProgressUpdateData(TypedDict):
|
||||
filename: str | None
|
||||
task_id: str | None
|
||||
current_progress: int
|
||||
max_progress: int
|
||||
status: str
|
||||
message: str
|
||||
document_id: int | None
|
||||
owner_id: int | None
|
||||
users_can_view: list[int]
|
||||
groups_can_view: list[int]
|
||||
|
||||
|
||||
class StatusUpdatePayload(TypedDict):
|
||||
type: Literal["status_update"]
|
||||
data: ProgressUpdateData
|
||||
|
||||
|
||||
class DocumentsDeletedData(TypedDict):
|
||||
documents: list[int]
|
||||
|
||||
|
||||
class DocumentsDeletedPayload(TypedDict):
|
||||
type: Literal["documents_deleted"]
|
||||
data: DocumentsDeletedData
|
||||
|
||||
|
||||
class DocumentUpdatedData(TypedDict):
|
||||
document_id: int
|
||||
modified: str
|
||||
owner_id: int | None
|
||||
users_can_view: list[int]
|
||||
groups_can_view: list[int]
|
||||
|
||||
|
||||
class DocumentUpdatedPayload(TypedDict):
|
||||
type: Literal["document_updated"]
|
||||
data: DocumentUpdatedData
|
||||
|
||||
|
||||
WebsocketPayload: TypeAlias = (
|
||||
StatusUpdatePayload | DocumentsDeletedPayload | DocumentUpdatedPayload
|
||||
)
|
||||
|
||||
|
||||
class BaseStatusManager:
|
||||
"""
|
||||
Handles sending of progress information via the channel layer, with proper management
|
||||
@@ -25,11 +81,11 @@ class BaseStatusManager:
|
||||
def __init__(self) -> None:
|
||||
self._channel: RedisPubSubChannelLayer | None = None
|
||||
|
||||
def __enter__(self):
|
||||
def __enter__(self) -> Self:
|
||||
self.open()
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
def __exit__(self, exc_type: object, exc_val: object, exc_tb: object) -> None:
|
||||
self.close()
|
||||
|
||||
def open(self) -> None:
|
||||
@@ -48,7 +104,7 @@ class BaseStatusManager:
|
||||
async_to_sync(self._channel.flush)
|
||||
self._channel = None
|
||||
|
||||
def send(self, payload: Mapping[str, object]) -> None:
|
||||
def send(self, payload: WebsocketPayload) -> None:
|
||||
# Ensure the layer is open
|
||||
self.open()
|
||||
|
||||
@@ -72,36 +128,36 @@ class ProgressManager(BaseStatusManager):
|
||||
message: str,
|
||||
current_progress: int,
|
||||
max_progress: int,
|
||||
extra_args: dict[str, str | int | None] | None = None,
|
||||
*,
|
||||
document_id: int | None = None,
|
||||
owner_id: int | None = None,
|
||||
users_can_view: list[int] | None = None,
|
||||
groups_can_view: list[int] | None = None,
|
||||
) -> None:
|
||||
data: dict[str, object] = {
|
||||
data: ProgressUpdateData = {
|
||||
"filename": self.filename,
|
||||
"task_id": self.task_id,
|
||||
"current_progress": current_progress,
|
||||
"max_progress": max_progress,
|
||||
"status": status,
|
||||
"message": message,
|
||||
"document_id": document_id,
|
||||
"owner_id": owner_id,
|
||||
"users_can_view": users_can_view or [],
|
||||
"groups_can_view": groups_can_view or [],
|
||||
}
|
||||
if extra_args is not None:
|
||||
data.update(extra_args)
|
||||
|
||||
payload: dict[str, object] = {
|
||||
"type": "status_update",
|
||||
"data": data,
|
||||
}
|
||||
|
||||
payload: StatusUpdatePayload = {"type": "status_update", "data": data}
|
||||
self.send(payload)
|
||||
|
||||
|
||||
class DocumentsStatusManager(BaseStatusManager):
|
||||
def send_documents_deleted(self, documents: list[int]) -> None:
|
||||
payload: dict[str, object] = {
|
||||
payload: DocumentsDeletedPayload = {
|
||||
"type": "documents_deleted",
|
||||
"data": {
|
||||
"documents": documents,
|
||||
},
|
||||
}
|
||||
|
||||
self.send(payload)
|
||||
|
||||
def send_document_updated(
|
||||
@@ -113,7 +169,7 @@ class DocumentsStatusManager(BaseStatusManager):
|
||||
users_can_view: list[int] | None = None,
|
||||
groups_can_view: list[int] | None = None,
|
||||
) -> None:
|
||||
payload: dict[str, object] = {
|
||||
payload: DocumentUpdatedPayload = {
|
||||
"type": "document_updated",
|
||||
"data": {
|
||||
"document_id": document_id,
|
||||
@@ -123,5 +179,4 @@ class DocumentsStatusManager(BaseStatusManager):
|
||||
"groups_can_view": groups_can_view or [],
|
||||
},
|
||||
}
|
||||
|
||||
self.send(payload)
|
||||
|
||||
@@ -48,3 +48,73 @@ def safe_regex_search(pattern: str, text: str, *, flags: int = 0):
|
||||
textwrap.shorten(pattern, width=80, placeholder="…"),
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
def safe_regex_match(pattern: str, text: str, *, flags: int = 0):
|
||||
"""
|
||||
Run a regex match with a timeout. Returns a match object or None.
|
||||
Validation errors and timeouts are logged and treated as no match.
|
||||
"""
|
||||
|
||||
try:
|
||||
validate_regex_pattern(pattern)
|
||||
compiled = regex.compile(pattern, flags=flags)
|
||||
except (regex.error, ValueError) as exc:
|
||||
logger.error(
|
||||
"Error while processing regular expression %s: %s",
|
||||
textwrap.shorten(pattern, width=80, placeholder="…"),
|
||||
exc,
|
||||
)
|
||||
return None
|
||||
|
||||
try:
|
||||
return compiled.match(text, timeout=REGEX_TIMEOUT_SECONDS)
|
||||
except TimeoutError:
|
||||
logger.warning(
|
||||
"Regular expression matching timed out for pattern %s",
|
||||
textwrap.shorten(pattern, width=80, placeholder="…"),
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
def safe_regex_sub(pattern: str, repl: str, text: str, *, flags: int = 0) -> str | None:
|
||||
"""
|
||||
Run a regex substitution with a timeout. Returns the substituted string,
|
||||
or None on error/timeout.
|
||||
"""
|
||||
|
||||
try:
|
||||
validate_regex_pattern(pattern)
|
||||
compiled = regex.compile(pattern, flags=flags)
|
||||
except (regex.error, ValueError) as exc:
|
||||
logger.error(
|
||||
"Error while processing regular expression %s: %s",
|
||||
textwrap.shorten(pattern, width=80, placeholder="…"),
|
||||
exc,
|
||||
)
|
||||
return None
|
||||
|
||||
try:
|
||||
return compiled.sub(repl, text, timeout=REGEX_TIMEOUT_SECONDS)
|
||||
except TimeoutError:
|
||||
logger.warning(
|
||||
"Regular expression substitution timed out for pattern %s",
|
||||
textwrap.shorten(pattern, width=80, placeholder="…"),
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
def safe_regex_finditer(compiled_pattern: regex.Pattern, text: str):
|
||||
"""
|
||||
Run regex finditer with a timeout. Yields match objects.
|
||||
Stops iteration on timeout.
|
||||
"""
|
||||
|
||||
try:
|
||||
yield from compiled_pattern.finditer(text, timeout=REGEX_TIMEOUT_SECONDS)
|
||||
except TimeoutError:
|
||||
logger.warning(
|
||||
"Regular expression finditer timed out for pattern %s",
|
||||
textwrap.shorten(compiled_pattern.pattern, width=80, placeholder="…"),
|
||||
)
|
||||
return
|
||||
|
||||
@@ -9,19 +9,14 @@ to wrap the document queryset (e.g., with a progress bar). The default
|
||||
is an identity function that adds no overhead.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import uuid
|
||||
from collections import defaultdict
|
||||
from collections.abc import Callable
|
||||
from collections.abc import Iterable
|
||||
from collections.abc import Iterator
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
from typing import Final
|
||||
from typing import TypedDict
|
||||
from typing import TypeVar
|
||||
|
||||
from celery import states
|
||||
from django.conf import settings
|
||||
@@ -29,14 +24,13 @@ from django.utils import timezone
|
||||
|
||||
from documents.models import Document
|
||||
from documents.models import PaperlessTask
|
||||
from documents.utils import IterWrapper
|
||||
from documents.utils import compute_checksum
|
||||
from documents.utils import identity
|
||||
from paperless.config import GeneralConfig
|
||||
|
||||
logger = logging.getLogger("paperless.sanity_checker")
|
||||
|
||||
_T = TypeVar("_T")
|
||||
IterWrapper = Callable[[Iterable[_T]], Iterable[_T]]
|
||||
|
||||
|
||||
class MessageEntry(TypedDict):
|
||||
"""A single sanity check message with its severity level."""
|
||||
@@ -45,11 +39,6 @@ class MessageEntry(TypedDict):
|
||||
message: str
|
||||
|
||||
|
||||
def _identity(iterable: Iterable[_T]) -> Iterable[_T]:
|
||||
"""Pass through an iterable unchanged (default iter_wrapper)."""
|
||||
return iterable
|
||||
|
||||
|
||||
class SanityCheckMessages:
|
||||
"""Collects sanity check messages grouped by document primary key.
|
||||
|
||||
@@ -296,7 +285,7 @@ def _check_document(
|
||||
def check_sanity(
|
||||
*,
|
||||
scheduled: bool = True,
|
||||
iter_wrapper: IterWrapper[Document] = _identity,
|
||||
iter_wrapper: IterWrapper[Document] = identity,
|
||||
) -> SanityCheckMessages:
|
||||
"""Run a full sanity check on the document archive.
|
||||
|
||||
|
||||
25
src/documents/search/__init__.py
Normal file
25
src/documents/search/__init__.py
Normal file
@@ -0,0 +1,25 @@
|
||||
from documents.search._backend import SearchHit
|
||||
from documents.search._backend import SearchIndexLockError
|
||||
from documents.search._backend import SearchMode
|
||||
from documents.search._backend import SearchResults
|
||||
from documents.search._backend import TantivyBackend
|
||||
from documents.search._backend import TantivyRelevanceList
|
||||
from documents.search._backend import WriteBatch
|
||||
from documents.search._backend import get_backend
|
||||
from documents.search._backend import reset_backend
|
||||
from documents.search._schema import needs_rebuild
|
||||
from documents.search._schema import wipe_index
|
||||
|
||||
__all__ = [
|
||||
"SearchHit",
|
||||
"SearchIndexLockError",
|
||||
"SearchMode",
|
||||
"SearchResults",
|
||||
"TantivyBackend",
|
||||
"TantivyRelevanceList",
|
||||
"WriteBatch",
|
||||
"get_backend",
|
||||
"needs_rebuild",
|
||||
"reset_backend",
|
||||
"wipe_index",
|
||||
]
|
||||
1154
src/documents/search/_backend.py
Normal file
1154
src/documents/search/_backend.py
Normal file
File diff suppressed because it is too large
Load Diff
8
src/documents/search/_normalize.py
Normal file
8
src/documents/search/_normalize.py
Normal file
@@ -0,0 +1,8 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import unicodedata
|
||||
|
||||
|
||||
def ascii_fold(text: str) -> str:
|
||||
"""Normalize unicode text to ASCII equivalents for search consistency."""
|
||||
return unicodedata.normalize("NFD", text).encode("ascii", "ignore").decode()
|
||||
579
src/documents/search/_query.py
Normal file
579
src/documents/search/_query.py
Normal file
@@ -0,0 +1,579 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import UTC
|
||||
from datetime import date
|
||||
from datetime import datetime
|
||||
from datetime import timedelta
|
||||
from typing import TYPE_CHECKING
|
||||
from typing import Final
|
||||
|
||||
import regex
|
||||
import tantivy
|
||||
from dateutil.relativedelta import relativedelta
|
||||
from django.conf import settings
|
||||
|
||||
from documents.search._normalize import ascii_fold
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from datetime import tzinfo
|
||||
|
||||
from django.contrib.auth.base_user import AbstractBaseUser
|
||||
|
||||
# Maximum seconds any single regex substitution may run.
|
||||
# Prevents ReDoS on adversarial user-supplied query strings.
|
||||
_REGEX_TIMEOUT: Final[float] = 1.0
|
||||
|
||||
_DATE_ONLY_FIELDS = frozenset({"created"})
|
||||
|
||||
_DATE_KEYWORDS = frozenset(
|
||||
{
|
||||
"today",
|
||||
"yesterday",
|
||||
"this_week",
|
||||
"last_week",
|
||||
"this_month",
|
||||
"last_month",
|
||||
"this_year",
|
||||
"last_year",
|
||||
},
|
||||
)
|
||||
|
||||
_FIELD_DATE_RE = regex.compile(
|
||||
r"(\w+):(" + "|".join(_DATE_KEYWORDS) + r")\b",
|
||||
)
|
||||
_COMPACT_DATE_RE = regex.compile(r"\b(\d{14})\b")
|
||||
_RELATIVE_RANGE_RE = regex.compile(
|
||||
r"\[now([+-]\d+[dhm])?\s+TO\s+now([+-]\d+[dhm])?\]",
|
||||
regex.IGNORECASE,
|
||||
)
|
||||
# Whoosh-style relative date range: e.g. [-1 week to now], [-7 days to now]
|
||||
_WHOOSH_REL_RANGE_RE = regex.compile(
|
||||
r"\[-(?P<n>\d+)\s+(?P<unit>second|minute|hour|day|week|month|year)s?\s+to\s+now\]",
|
||||
regex.IGNORECASE,
|
||||
)
|
||||
# Whoosh-style 8-digit date: field:YYYYMMDD — field-aware so timezone can be applied correctly
|
||||
_DATE8_RE = regex.compile(r"(?P<field>\w+):(?P<date8>\d{8})\b")
|
||||
_SIMPLE_QUERY_TOKEN_RE = regex.compile(r"\S+")
|
||||
|
||||
|
||||
def _fmt(dt: datetime) -> str:
|
||||
"""Format a datetime as an ISO 8601 UTC string for use in Tantivy range queries."""
|
||||
return dt.astimezone(UTC).strftime("%Y-%m-%dT%H:%M:%SZ")
|
||||
|
||||
|
||||
def _iso_range(lo: datetime, hi: datetime) -> str:
|
||||
"""Format a [lo TO hi] range string in ISO 8601 for Tantivy query syntax."""
|
||||
return f"[{_fmt(lo)} TO {_fmt(hi)}]"
|
||||
|
||||
|
||||
def _date_only_range(keyword: str, tz: tzinfo) -> str:
|
||||
"""
|
||||
For `created` (DateField): use the local calendar date, converted to
|
||||
midnight UTC boundaries. No offset arithmetic — date only.
|
||||
"""
|
||||
|
||||
today = datetime.now(tz).date()
|
||||
|
||||
if keyword == "today":
|
||||
lo = datetime(today.year, today.month, today.day, tzinfo=UTC)
|
||||
return _iso_range(lo, lo + timedelta(days=1))
|
||||
if keyword == "yesterday":
|
||||
y = today - timedelta(days=1)
|
||||
lo = datetime(y.year, y.month, y.day, tzinfo=UTC)
|
||||
hi = datetime(today.year, today.month, today.day, tzinfo=UTC)
|
||||
return _iso_range(lo, hi)
|
||||
if keyword == "this_week":
|
||||
mon = today - timedelta(days=today.weekday())
|
||||
lo = datetime(mon.year, mon.month, mon.day, tzinfo=UTC)
|
||||
return _iso_range(lo, lo + timedelta(weeks=1))
|
||||
if keyword == "last_week":
|
||||
this_mon = today - timedelta(days=today.weekday())
|
||||
last_mon = this_mon - timedelta(weeks=1)
|
||||
lo = datetime(last_mon.year, last_mon.month, last_mon.day, tzinfo=UTC)
|
||||
hi = datetime(this_mon.year, this_mon.month, this_mon.day, tzinfo=UTC)
|
||||
return _iso_range(lo, hi)
|
||||
if keyword == "this_month":
|
||||
lo = datetime(today.year, today.month, 1, tzinfo=UTC)
|
||||
if today.month == 12:
|
||||
hi = datetime(today.year + 1, 1, 1, tzinfo=UTC)
|
||||
else:
|
||||
hi = datetime(today.year, today.month + 1, 1, tzinfo=UTC)
|
||||
return _iso_range(lo, hi)
|
||||
if keyword == "last_month":
|
||||
if today.month == 1:
|
||||
lo = datetime(today.year - 1, 12, 1, tzinfo=UTC)
|
||||
else:
|
||||
lo = datetime(today.year, today.month - 1, 1, tzinfo=UTC)
|
||||
hi = datetime(today.year, today.month, 1, tzinfo=UTC)
|
||||
return _iso_range(lo, hi)
|
||||
if keyword == "this_year":
|
||||
lo = datetime(today.year, 1, 1, tzinfo=UTC)
|
||||
return _iso_range(lo, datetime(today.year + 1, 1, 1, tzinfo=UTC))
|
||||
if keyword == "last_year":
|
||||
lo = datetime(today.year - 1, 1, 1, tzinfo=UTC)
|
||||
return _iso_range(lo, datetime(today.year, 1, 1, tzinfo=UTC))
|
||||
raise ValueError(f"Unknown keyword: {keyword}")
|
||||
|
||||
|
||||
def _datetime_range(keyword: str, tz: tzinfo) -> str:
|
||||
"""
|
||||
For `added` / `modified` (DateTimeField, stored as UTC): convert local day
|
||||
boundaries to UTC — full offset arithmetic required.
|
||||
"""
|
||||
|
||||
now_local = datetime.now(tz)
|
||||
today = now_local.date()
|
||||
|
||||
def _midnight(d: date) -> datetime:
|
||||
return datetime(d.year, d.month, d.day, tzinfo=tz).astimezone(UTC)
|
||||
|
||||
if keyword == "today":
|
||||
return _iso_range(_midnight(today), _midnight(today + timedelta(days=1)))
|
||||
if keyword == "yesterday":
|
||||
y = today - timedelta(days=1)
|
||||
return _iso_range(_midnight(y), _midnight(today))
|
||||
if keyword == "this_week":
|
||||
mon = today - timedelta(days=today.weekday())
|
||||
return _iso_range(_midnight(mon), _midnight(mon + timedelta(weeks=1)))
|
||||
if keyword == "last_week":
|
||||
this_mon = today - timedelta(days=today.weekday())
|
||||
last_mon = this_mon - timedelta(weeks=1)
|
||||
return _iso_range(_midnight(last_mon), _midnight(this_mon))
|
||||
if keyword == "this_month":
|
||||
first = today.replace(day=1)
|
||||
if today.month == 12:
|
||||
next_first = date(today.year + 1, 1, 1)
|
||||
else:
|
||||
next_first = date(today.year, today.month + 1, 1)
|
||||
return _iso_range(_midnight(first), _midnight(next_first))
|
||||
if keyword == "last_month":
|
||||
this_first = today.replace(day=1)
|
||||
if today.month == 1:
|
||||
last_first = date(today.year - 1, 12, 1)
|
||||
else:
|
||||
last_first = date(today.year, today.month - 1, 1)
|
||||
return _iso_range(_midnight(last_first), _midnight(this_first))
|
||||
if keyword == "this_year":
|
||||
return _iso_range(
|
||||
_midnight(date(today.year, 1, 1)),
|
||||
_midnight(date(today.year + 1, 1, 1)),
|
||||
)
|
||||
if keyword == "last_year":
|
||||
return _iso_range(
|
||||
_midnight(date(today.year - 1, 1, 1)),
|
||||
_midnight(date(today.year, 1, 1)),
|
||||
)
|
||||
raise ValueError(f"Unknown keyword: {keyword}")
|
||||
|
||||
|
||||
def _rewrite_compact_date(query: str) -> str:
|
||||
"""Rewrite Whoosh compact date tokens (14-digit YYYYMMDDHHmmss) to ISO 8601."""
|
||||
|
||||
def _sub(m: regex.Match[str]) -> str:
|
||||
raw = m.group(1)
|
||||
try:
|
||||
dt = datetime(
|
||||
int(raw[0:4]),
|
||||
int(raw[4:6]),
|
||||
int(raw[6:8]),
|
||||
int(raw[8:10]),
|
||||
int(raw[10:12]),
|
||||
int(raw[12:14]),
|
||||
tzinfo=UTC,
|
||||
)
|
||||
return dt.strftime("%Y-%m-%dT%H:%M:%SZ")
|
||||
except ValueError:
|
||||
return str(m.group(0))
|
||||
|
||||
try:
|
||||
return _COMPACT_DATE_RE.sub(_sub, query, timeout=_REGEX_TIMEOUT)
|
||||
except TimeoutError: # pragma: no cover
|
||||
raise ValueError(
|
||||
"Query too complex to process (compact date rewrite timed out)",
|
||||
)
|
||||
|
||||
|
||||
def _rewrite_relative_range(query: str) -> str:
|
||||
"""Rewrite Whoosh relative ranges ([now-7d TO now]) to concrete ISO 8601 UTC boundaries."""
|
||||
|
||||
def _sub(m: regex.Match[str]) -> str:
|
||||
now = datetime.now(UTC)
|
||||
|
||||
def _offset(s: str | None) -> timedelta:
|
||||
if not s:
|
||||
return timedelta(0)
|
||||
sign = 1 if s[0] == "+" else -1
|
||||
n, unit = int(s[1:-1]), s[-1]
|
||||
return (
|
||||
sign
|
||||
* {
|
||||
"d": timedelta(days=n),
|
||||
"h": timedelta(hours=n),
|
||||
"m": timedelta(minutes=n),
|
||||
}[unit]
|
||||
)
|
||||
|
||||
lo, hi = now + _offset(m.group(1)), now + _offset(m.group(2))
|
||||
if lo > hi:
|
||||
lo, hi = hi, lo
|
||||
return f"[{_fmt(lo)} TO {_fmt(hi)}]"
|
||||
|
||||
try:
|
||||
return _RELATIVE_RANGE_RE.sub(_sub, query, timeout=_REGEX_TIMEOUT)
|
||||
except TimeoutError: # pragma: no cover
|
||||
raise ValueError(
|
||||
"Query too complex to process (relative range rewrite timed out)",
|
||||
)
|
||||
|
||||
|
||||
def _rewrite_whoosh_relative_range(query: str) -> str:
|
||||
"""Rewrite Whoosh-style relative date ranges ([-N unit to now]) to ISO 8601.
|
||||
|
||||
Supports: second, minute, hour, day, week, month, year (singular and plural).
|
||||
Example: ``added:[-1 week to now]`` → ``added:[2025-01-01T… TO 2025-01-08T…]``
|
||||
"""
|
||||
now = datetime.now(UTC)
|
||||
|
||||
def _sub(m: regex.Match[str]) -> str:
|
||||
n = int(m.group("n"))
|
||||
unit = m.group("unit").lower()
|
||||
delta_map: dict[str, timedelta | relativedelta] = {
|
||||
"second": timedelta(seconds=n),
|
||||
"minute": timedelta(minutes=n),
|
||||
"hour": timedelta(hours=n),
|
||||
"day": timedelta(days=n),
|
||||
"week": timedelta(weeks=n),
|
||||
"month": relativedelta(months=n),
|
||||
"year": relativedelta(years=n),
|
||||
}
|
||||
lo = now - delta_map[unit]
|
||||
return f"[{_fmt(lo)} TO {_fmt(now)}]"
|
||||
|
||||
try:
|
||||
return _WHOOSH_REL_RANGE_RE.sub(_sub, query, timeout=_REGEX_TIMEOUT)
|
||||
except TimeoutError: # pragma: no cover
|
||||
raise ValueError(
|
||||
"Query too complex to process (Whoosh relative range rewrite timed out)",
|
||||
)
|
||||
|
||||
|
||||
def _rewrite_8digit_date(query: str, tz: tzinfo) -> str:
|
||||
"""Rewrite field:YYYYMMDD date tokens to an ISO 8601 day range.
|
||||
|
||||
Runs after ``_rewrite_compact_date`` so 14-digit timestamps are already
|
||||
converted and won't spuriously match here.
|
||||
|
||||
For DateField fields (e.g. ``created``) uses UTC midnight boundaries.
|
||||
For DateTimeField fields (e.g. ``added``, ``modified``) uses local TZ
|
||||
midnight boundaries converted to UTC — matching the ``_datetime_range``
|
||||
behaviour for keyword dates.
|
||||
"""
|
||||
|
||||
def _sub(m: regex.Match[str]) -> str:
|
||||
field = m.group("field")
|
||||
raw = m.group("date8")
|
||||
try:
|
||||
year, month, day = int(raw[0:4]), int(raw[4:6]), int(raw[6:8])
|
||||
d = date(year, month, day)
|
||||
if field in _DATE_ONLY_FIELDS:
|
||||
lo = datetime(d.year, d.month, d.day, tzinfo=UTC)
|
||||
hi = lo + timedelta(days=1)
|
||||
else:
|
||||
# DateTimeField: use local-timezone midnight → UTC
|
||||
lo = datetime(d.year, d.month, d.day, tzinfo=tz).astimezone(UTC)
|
||||
hi = datetime(
|
||||
(d + timedelta(days=1)).year,
|
||||
(d + timedelta(days=1)).month,
|
||||
(d + timedelta(days=1)).day,
|
||||
tzinfo=tz,
|
||||
).astimezone(UTC)
|
||||
return f"{field}:[{_fmt(lo)} TO {_fmt(hi)}]"
|
||||
except ValueError:
|
||||
return m.group(0)
|
||||
|
||||
try:
|
||||
return _DATE8_RE.sub(_sub, query, timeout=_REGEX_TIMEOUT)
|
||||
except TimeoutError: # pragma: no cover
|
||||
raise ValueError(
|
||||
"Query too complex to process (8-digit date rewrite timed out)",
|
||||
)
|
||||
|
||||
|
||||
def rewrite_natural_date_keywords(query: str, tz: tzinfo) -> str:
|
||||
"""
|
||||
Rewrite natural date syntax to ISO 8601 format for Tantivy compatibility.
|
||||
|
||||
Performs the first stage of query preprocessing, converting various date
|
||||
formats and keywords to ISO 8601 datetime ranges that Tantivy can parse:
|
||||
- Compact 14-digit dates (YYYYMMDDHHmmss)
|
||||
- Whoosh relative ranges ([-7 days to now], [now-1h TO now+2h])
|
||||
- 8-digit dates with field awareness (created:20240115)
|
||||
- Natural keywords (field:today, field:last_week, etc.)
|
||||
|
||||
Args:
|
||||
query: Raw user query string
|
||||
tz: Timezone for converting local date boundaries to UTC
|
||||
|
||||
Returns:
|
||||
Query with date syntax rewritten to ISO 8601 ranges
|
||||
|
||||
Note:
|
||||
Bare keywords without field prefixes pass through unchanged.
|
||||
"""
|
||||
query = _rewrite_compact_date(query)
|
||||
query = _rewrite_whoosh_relative_range(query)
|
||||
query = _rewrite_8digit_date(query, tz)
|
||||
query = _rewrite_relative_range(query)
|
||||
|
||||
def _replace(m: regex.Match[str]) -> str:
|
||||
field, keyword = m.group(1), m.group(2)
|
||||
if field in _DATE_ONLY_FIELDS:
|
||||
return f"{field}:{_date_only_range(keyword, tz)}"
|
||||
return f"{field}:{_datetime_range(keyword, tz)}"
|
||||
|
||||
try:
|
||||
return _FIELD_DATE_RE.sub(_replace, query, timeout=_REGEX_TIMEOUT)
|
||||
except TimeoutError: # pragma: no cover
|
||||
raise ValueError(
|
||||
"Query too complex to process (date keyword rewrite timed out)",
|
||||
)
|
||||
|
||||
|
||||
def normalize_query(query: str) -> str:
|
||||
"""
|
||||
Normalize query syntax for better search behavior.
|
||||
|
||||
Expands comma-separated field values to explicit AND clauses and
|
||||
collapses excessive whitespace for cleaner parsing:
|
||||
- tag:foo,bar → tag:foo AND tag:bar
|
||||
- multiple spaces → single spaces
|
||||
|
||||
Args:
|
||||
query: Query string after date rewriting
|
||||
|
||||
Returns:
|
||||
Normalized query string ready for Tantivy parsing
|
||||
"""
|
||||
|
||||
def _expand(m: regex.Match[str]) -> str:
|
||||
field = m.group(1)
|
||||
values = [v.strip() for v in m.group(2).split(",") if v.strip()]
|
||||
return " AND ".join(f"{field}:{v}" for v in values)
|
||||
|
||||
try:
|
||||
query = regex.sub(
|
||||
r"(\w+):([^\s\[\]]+(?:,[^\s\[\]]+)+)",
|
||||
_expand,
|
||||
query,
|
||||
timeout=_REGEX_TIMEOUT,
|
||||
)
|
||||
return regex.sub(r" {2,}", " ", query, timeout=_REGEX_TIMEOUT).strip()
|
||||
except TimeoutError: # pragma: no cover
|
||||
raise ValueError("Query too complex to process (normalization timed out)")
|
||||
|
||||
|
||||
_MAX_U64 = 2**64 - 1 # u64 max — used as inclusive upper bound for "any owner" range
|
||||
|
||||
|
||||
def build_permission_filter(
|
||||
schema: tantivy.Schema,
|
||||
user: AbstractBaseUser,
|
||||
) -> tantivy.Query:
|
||||
"""
|
||||
Build a query filter for user document permissions.
|
||||
|
||||
Creates a query that matches only documents visible to the specified user
|
||||
according to paperless-ngx permission rules:
|
||||
- Public documents (no owner) are visible to all users
|
||||
- Private documents are visible to their owner
|
||||
- Documents explicitly shared with the user are visible
|
||||
|
||||
Args:
|
||||
schema: Tantivy schema for field validation
|
||||
user: User to check permissions for
|
||||
|
||||
Returns:
|
||||
Tantivy query that filters results to visible documents
|
||||
|
||||
Implementation Notes:
|
||||
- Uses range_query instead of term_query to work around unsigned integer
|
||||
type detection bug in tantivy-py 0.25
|
||||
- Uses boolean_query for "no owner" check since exists_query is not
|
||||
available in tantivy-py 0.25.1 (available in master)
|
||||
- Uses disjunction_max_query to combine permission clauses with OR logic
|
||||
"""
|
||||
owner_any = tantivy.Query.range_query(
|
||||
schema,
|
||||
"owner_id",
|
||||
tantivy.FieldType.Unsigned,
|
||||
1,
|
||||
_MAX_U64,
|
||||
)
|
||||
no_owner = tantivy.Query.boolean_query(
|
||||
[
|
||||
(tantivy.Occur.Must, tantivy.Query.all_query()),
|
||||
(tantivy.Occur.MustNot, owner_any),
|
||||
],
|
||||
)
|
||||
owned = tantivy.Query.range_query(
|
||||
schema,
|
||||
"owner_id",
|
||||
tantivy.FieldType.Unsigned,
|
||||
user.pk,
|
||||
user.pk,
|
||||
)
|
||||
shared = tantivy.Query.range_query(
|
||||
schema,
|
||||
"viewer_id",
|
||||
tantivy.FieldType.Unsigned,
|
||||
user.pk,
|
||||
user.pk,
|
||||
)
|
||||
return tantivy.Query.disjunction_max_query([no_owner, owned, shared])
|
||||
|
||||
|
||||
DEFAULT_SEARCH_FIELDS = [
|
||||
"title",
|
||||
"content",
|
||||
"correspondent",
|
||||
"document_type",
|
||||
"tag",
|
||||
]
|
||||
SIMPLE_SEARCH_FIELDS = ["simple_title", "simple_content"]
|
||||
TITLE_SEARCH_FIELDS = ["simple_title"]
|
||||
_FIELD_BOOSTS = {"title": 2.0}
|
||||
_SIMPLE_FIELD_BOOSTS = {"simple_title": 2.0}
|
||||
|
||||
|
||||
def _build_simple_field_query(
|
||||
index: tantivy.Index,
|
||||
field: str,
|
||||
tokens: list[str],
|
||||
) -> tantivy.Query:
|
||||
patterns = []
|
||||
for idx, token in enumerate(tokens):
|
||||
escaped = regex.escape(token)
|
||||
# For multi-token substring search, only the first token can begin mid-word.
|
||||
# Later tokens follow a whitespace boundary in the original query, so anchor
|
||||
# them to the start of the next indexed token to reduce false positives like
|
||||
# matching "Z-Berichte 16" for the query "Z-Berichte 6".
|
||||
if idx == 0:
|
||||
patterns.append(f".*{escaped}.*")
|
||||
else:
|
||||
patterns.append(f"{escaped}.*")
|
||||
if len(patterns) == 1:
|
||||
query = tantivy.Query.regex_query(index.schema, field, patterns[0])
|
||||
else:
|
||||
query = tantivy.Query.regex_phrase_query(index.schema, field, patterns)
|
||||
|
||||
boost = _SIMPLE_FIELD_BOOSTS.get(field, 1.0)
|
||||
if boost > 1.0:
|
||||
return tantivy.Query.boost_query(query, boost)
|
||||
return query
|
||||
|
||||
|
||||
def parse_user_query(
|
||||
index: tantivy.Index,
|
||||
raw_query: str,
|
||||
tz: tzinfo,
|
||||
) -> tantivy.Query:
|
||||
"""
|
||||
Parse user query through the complete preprocessing pipeline.
|
||||
|
||||
Transforms the raw user query through multiple stages:
|
||||
1. Date keyword rewriting (today → ISO 8601 ranges)
|
||||
2. Query normalization (comma expansion, whitespace cleanup)
|
||||
3. Tantivy parsing with field boosts
|
||||
4. Optional fuzzy query blending (if ADVANCED_FUZZY_SEARCH_THRESHOLD set)
|
||||
|
||||
Args:
|
||||
index: Tantivy index with registered tokenizers
|
||||
raw_query: Original user query string
|
||||
tz: Timezone for date boundary calculations
|
||||
|
||||
Returns:
|
||||
Parsed Tantivy query ready for execution
|
||||
|
||||
Note:
|
||||
When ADVANCED_FUZZY_SEARCH_THRESHOLD is configured, adds a low-priority
|
||||
fuzzy query as a Should clause (0.1 boost) to catch approximate matches
|
||||
while keeping exact matches ranked higher. The threshold value is applied
|
||||
as a post-search score filter, not during query construction.
|
||||
"""
|
||||
|
||||
query_str = rewrite_natural_date_keywords(raw_query, tz)
|
||||
query_str = normalize_query(query_str)
|
||||
|
||||
exact = index.parse_query(
|
||||
query_str,
|
||||
DEFAULT_SEARCH_FIELDS,
|
||||
field_boosts=_FIELD_BOOSTS,
|
||||
)
|
||||
|
||||
threshold = settings.ADVANCED_FUZZY_SEARCH_THRESHOLD
|
||||
if threshold is not None:
|
||||
fuzzy = index.parse_query(
|
||||
query_str,
|
||||
DEFAULT_SEARCH_FIELDS,
|
||||
field_boosts=_FIELD_BOOSTS,
|
||||
# (prefix=True, distance=1, transposition_cost_one=True) — edit-distance fuzziness
|
||||
fuzzy_fields={f: (True, 1, True) for f in DEFAULT_SEARCH_FIELDS},
|
||||
)
|
||||
return tantivy.Query.boolean_query(
|
||||
[
|
||||
(tantivy.Occur.Should, exact),
|
||||
# 0.1 boost keeps fuzzy hits ranked below exact matches (intentional)
|
||||
(tantivy.Occur.Should, tantivy.Query.boost_query(fuzzy, 0.1)),
|
||||
],
|
||||
)
|
||||
|
||||
return exact
|
||||
|
||||
|
||||
def parse_simple_query(
|
||||
index: tantivy.Index,
|
||||
raw_query: str,
|
||||
fields: list[str],
|
||||
) -> tantivy.Query:
|
||||
"""
|
||||
Parse a plain-text query using Tantivy over a restricted field set.
|
||||
|
||||
Query string is escaped and normalized to be treated as "simple" text query.
|
||||
"""
|
||||
tokens = [
|
||||
ascii_fold(token.lower())
|
||||
for token in _SIMPLE_QUERY_TOKEN_RE.findall(raw_query, timeout=_REGEX_TIMEOUT)
|
||||
]
|
||||
tokens = [token for token in tokens if token]
|
||||
if not tokens:
|
||||
return tantivy.Query.empty_query()
|
||||
|
||||
field_queries = [
|
||||
(tantivy.Occur.Should, _build_simple_field_query(index, field, tokens))
|
||||
for field in fields
|
||||
]
|
||||
if len(field_queries) == 1:
|
||||
return field_queries[0][1]
|
||||
return tantivy.Query.boolean_query(field_queries)
|
||||
|
||||
|
||||
def parse_simple_text_query(
|
||||
index: tantivy.Index,
|
||||
raw_query: str,
|
||||
) -> tantivy.Query:
|
||||
"""
|
||||
Parse a plain-text query over title/content for simple search inputs.
|
||||
"""
|
||||
|
||||
return parse_simple_query(index, raw_query, SIMPLE_SEARCH_FIELDS)
|
||||
|
||||
|
||||
def parse_simple_title_query(
|
||||
index: tantivy.Index,
|
||||
raw_query: str,
|
||||
) -> tantivy.Query:
|
||||
"""
|
||||
Parse a plain-text query over the title field only.
|
||||
"""
|
||||
|
||||
return parse_simple_query(index, raw_query, TITLE_SEARCH_FIELDS)
|
||||
177
src/documents/search/_schema.py
Normal file
177
src/documents/search/_schema.py
Normal file
@@ -0,0 +1,177 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import shutil
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import tantivy
|
||||
from django.conf import settings
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pathlib import Path
|
||||
|
||||
logger = logging.getLogger("paperless.search")
|
||||
|
||||
SCHEMA_VERSION = 1
|
||||
|
||||
|
||||
def build_schema() -> tantivy.Schema:
|
||||
"""
|
||||
Build the Tantivy schema for the paperless document index.
|
||||
|
||||
Creates a comprehensive schema supporting full-text search, filtering,
|
||||
sorting, and autocomplete functionality. Includes fields for document
|
||||
content, metadata, permissions, custom fields, and notes.
|
||||
|
||||
Returns:
|
||||
Configured Tantivy schema ready for index creation
|
||||
"""
|
||||
sb = tantivy.SchemaBuilder()
|
||||
|
||||
sb.add_unsigned_field("id", stored=True, indexed=True, fast=True)
|
||||
sb.add_text_field("checksum", stored=True, tokenizer_name="raw")
|
||||
|
||||
for field in (
|
||||
"title",
|
||||
"correspondent",
|
||||
"document_type",
|
||||
"storage_path",
|
||||
"original_filename",
|
||||
"content",
|
||||
):
|
||||
sb.add_text_field(field, stored=True, tokenizer_name="paperless_text")
|
||||
|
||||
# Shadow sort fields - fast, not stored/indexed
|
||||
for field in ("title_sort", "correspondent_sort", "type_sort"):
|
||||
sb.add_text_field(
|
||||
field,
|
||||
stored=False,
|
||||
tokenizer_name="simple_analyzer",
|
||||
fast=True,
|
||||
)
|
||||
|
||||
# CJK support - not stored, indexed only
|
||||
sb.add_text_field("bigram_content", stored=False, tokenizer_name="bigram_analyzer")
|
||||
|
||||
# Simple substring search support for title/content - not stored, indexed only
|
||||
sb.add_text_field(
|
||||
"simple_title",
|
||||
stored=False,
|
||||
tokenizer_name="simple_search_analyzer",
|
||||
)
|
||||
sb.add_text_field(
|
||||
"simple_content",
|
||||
stored=False,
|
||||
tokenizer_name="simple_search_analyzer",
|
||||
)
|
||||
|
||||
# Autocomplete prefix scan - stored, not indexed
|
||||
sb.add_text_field("autocomplete_word", stored=True, tokenizer_name="raw")
|
||||
|
||||
sb.add_text_field("tag", stored=True, tokenizer_name="paperless_text")
|
||||
|
||||
# JSON fields — structured queries: notes.user:alice, custom_fields.name:invoice
|
||||
sb.add_json_field("notes", stored=True, tokenizer_name="paperless_text")
|
||||
sb.add_json_field("custom_fields", stored=True, tokenizer_name="paperless_text")
|
||||
|
||||
for field in (
|
||||
"correspondent_id",
|
||||
"document_type_id",
|
||||
"storage_path_id",
|
||||
"tag_id",
|
||||
"owner_id",
|
||||
"viewer_id",
|
||||
):
|
||||
sb.add_unsigned_field(field, stored=False, indexed=True, fast=True)
|
||||
|
||||
for field in ("created", "modified", "added"):
|
||||
sb.add_date_field(field, stored=True, indexed=True, fast=True)
|
||||
|
||||
for field in ("asn", "page_count", "num_notes"):
|
||||
sb.add_unsigned_field(field, stored=True, indexed=True, fast=True)
|
||||
|
||||
return sb.build()
|
||||
|
||||
|
||||
def needs_rebuild(index_dir: Path) -> bool:
|
||||
"""
|
||||
Check if the search index needs rebuilding.
|
||||
|
||||
Compares the current schema version and search language configuration
|
||||
against sentinel files to determine if the index is compatible with
|
||||
the current paperless-ngx version and settings.
|
||||
|
||||
Args:
|
||||
index_dir: Path to the search index directory
|
||||
|
||||
Returns:
|
||||
True if the index needs rebuilding, False if it's up to date
|
||||
"""
|
||||
version_file = index_dir / ".schema_version"
|
||||
if not version_file.exists():
|
||||
return True
|
||||
try:
|
||||
if int(version_file.read_text().strip()) != SCHEMA_VERSION:
|
||||
logger.info("Search index schema version mismatch - rebuilding.")
|
||||
return True
|
||||
except ValueError:
|
||||
return True
|
||||
|
||||
language_file = index_dir / ".schema_language"
|
||||
if not language_file.exists():
|
||||
logger.info("Search index language sentinel missing - rebuilding.")
|
||||
return True
|
||||
if language_file.read_text().strip() != (settings.SEARCH_LANGUAGE or ""):
|
||||
logger.info("Search index language changed - rebuilding.")
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def wipe_index(index_dir: Path) -> None:
|
||||
"""
|
||||
Delete all contents of the index directory to prepare for rebuild.
|
||||
|
||||
Recursively removes all files and subdirectories within the index
|
||||
directory while preserving the directory itself.
|
||||
|
||||
Args:
|
||||
index_dir: Path to the search index directory to clear
|
||||
"""
|
||||
for child in index_dir.iterdir():
|
||||
if child.is_dir():
|
||||
shutil.rmtree(child)
|
||||
else:
|
||||
child.unlink()
|
||||
|
||||
|
||||
def _write_sentinels(index_dir: Path) -> None:
|
||||
"""Write schema version and language sentinel files so the next index open can skip rebuilding."""
|
||||
(index_dir / ".schema_version").write_text(str(SCHEMA_VERSION))
|
||||
(index_dir / ".schema_language").write_text(settings.SEARCH_LANGUAGE or "")
|
||||
|
||||
|
||||
def open_or_rebuild_index(index_dir: Path | None = None) -> tantivy.Index:
|
||||
"""
|
||||
Open the Tantivy index, creating or rebuilding as needed.
|
||||
|
||||
Checks if the index needs rebuilding due to schema version or language
|
||||
changes. If rebuilding is needed, wipes the directory and creates a fresh
|
||||
index with the current schema and configuration.
|
||||
|
||||
Args:
|
||||
index_dir: Path to index directory (defaults to settings.INDEX_DIR)
|
||||
|
||||
Returns:
|
||||
Opened Tantivy index (caller must register custom tokenizers)
|
||||
"""
|
||||
if index_dir is None:
|
||||
index_dir = settings.INDEX_DIR
|
||||
if not index_dir.exists():
|
||||
return tantivy.Index(build_schema())
|
||||
if needs_rebuild(index_dir):
|
||||
wipe_index(index_dir)
|
||||
idx = tantivy.Index(build_schema(), path=str(index_dir))
|
||||
_write_sentinels(index_dir)
|
||||
return idx
|
||||
return tantivy.Index.open(str(index_dir))
|
||||
130
src/documents/search/_tokenizer.py
Normal file
130
src/documents/search/_tokenizer.py
Normal file
@@ -0,0 +1,130 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
|
||||
import tantivy
|
||||
|
||||
logger = logging.getLogger("paperless.search")
|
||||
|
||||
# Mapping of ISO 639-1 codes (and common aliases) -> Tantivy Snowball name
|
||||
_LANGUAGE_MAP: dict[str, str] = {
|
||||
"ar": "Arabic",
|
||||
"arabic": "Arabic",
|
||||
"da": "Danish",
|
||||
"danish": "Danish",
|
||||
"nl": "Dutch",
|
||||
"dutch": "Dutch",
|
||||
"en": "English",
|
||||
"english": "English",
|
||||
"fi": "Finnish",
|
||||
"finnish": "Finnish",
|
||||
"fr": "French",
|
||||
"french": "French",
|
||||
"de": "German",
|
||||
"german": "German",
|
||||
"el": "Greek",
|
||||
"greek": "Greek",
|
||||
"hu": "Hungarian",
|
||||
"hungarian": "Hungarian",
|
||||
"it": "Italian",
|
||||
"italian": "Italian",
|
||||
"no": "Norwegian",
|
||||
"norwegian": "Norwegian",
|
||||
"pt": "Portuguese",
|
||||
"portuguese": "Portuguese",
|
||||
"ro": "Romanian",
|
||||
"romanian": "Romanian",
|
||||
"ru": "Russian",
|
||||
"russian": "Russian",
|
||||
"es": "Spanish",
|
||||
"spanish": "Spanish",
|
||||
"sv": "Swedish",
|
||||
"swedish": "Swedish",
|
||||
"ta": "Tamil",
|
||||
"tamil": "Tamil",
|
||||
"tr": "Turkish",
|
||||
"turkish": "Turkish",
|
||||
}
|
||||
|
||||
SUPPORTED_LANGUAGES: frozenset[str] = frozenset(_LANGUAGE_MAP)
|
||||
|
||||
|
||||
def register_tokenizers(index: tantivy.Index, language: str | None) -> None:
|
||||
"""
|
||||
Register all custom tokenizers required by the paperless schema.
|
||||
|
||||
Must be called on every Index instance since Tantivy requires tokenizer
|
||||
re-registration after each index open/creation. Registers tokenizers for
|
||||
full-text search, sorting, CJK language support, and fast-field indexing.
|
||||
|
||||
Args:
|
||||
index: Tantivy index instance to register tokenizers on
|
||||
language: ISO 639-1 language code for stemming (None to disable)
|
||||
|
||||
Note:
|
||||
simple_analyzer is registered as both a text and fast-field tokenizer
|
||||
since sort shadow fields (title_sort, correspondent_sort, type_sort)
|
||||
use fast=True and Tantivy requires fast-field tokenizers to exist
|
||||
even for documents that omit those fields.
|
||||
"""
|
||||
index.register_tokenizer("paperless_text", _paperless_text(language))
|
||||
index.register_tokenizer("simple_analyzer", _simple_analyzer())
|
||||
index.register_tokenizer("bigram_analyzer", _bigram_analyzer())
|
||||
index.register_tokenizer("simple_search_analyzer", _simple_search_analyzer())
|
||||
# Fast-field tokenizer required for fast=True text fields in the schema
|
||||
index.register_fast_field_tokenizer("simple_analyzer", _simple_analyzer())
|
||||
|
||||
|
||||
def _paperless_text(language: str | None) -> tantivy.TextAnalyzer:
|
||||
"""Main full-text tokenizer for content, title, etc: simple -> remove_long(65) -> lowercase -> ascii_fold [-> stemmer]"""
|
||||
builder = (
|
||||
tantivy.TextAnalyzerBuilder(tantivy.Tokenizer.simple())
|
||||
.filter(tantivy.Filter.remove_long(65))
|
||||
.filter(tantivy.Filter.lowercase())
|
||||
.filter(tantivy.Filter.ascii_fold())
|
||||
)
|
||||
if language:
|
||||
tantivy_lang = _LANGUAGE_MAP.get(language.lower())
|
||||
if tantivy_lang:
|
||||
builder = builder.filter(tantivy.Filter.stemmer(tantivy_lang))
|
||||
else:
|
||||
logger.warning(
|
||||
"Unsupported search language '%s' - stemming disabled. Supported: %s",
|
||||
language,
|
||||
", ".join(sorted(SUPPORTED_LANGUAGES)),
|
||||
)
|
||||
return builder.build()
|
||||
|
||||
|
||||
def _simple_analyzer() -> tantivy.TextAnalyzer:
|
||||
"""Tokenizer for shadow sort fields (title_sort, correspondent_sort, type_sort): simple -> lowercase -> ascii_fold."""
|
||||
return (
|
||||
tantivy.TextAnalyzerBuilder(tantivy.Tokenizer.simple())
|
||||
.filter(tantivy.Filter.lowercase())
|
||||
.filter(tantivy.Filter.ascii_fold())
|
||||
.build()
|
||||
)
|
||||
|
||||
|
||||
def _bigram_analyzer() -> tantivy.TextAnalyzer:
|
||||
"""Enables substring search in CJK text: ngram(2,2) -> lowercase. CJK / no-whitespace language support."""
|
||||
return (
|
||||
tantivy.TextAnalyzerBuilder(
|
||||
tantivy.Tokenizer.ngram(min_gram=2, max_gram=2, prefix_only=False),
|
||||
)
|
||||
.filter(tantivy.Filter.lowercase())
|
||||
.build()
|
||||
)
|
||||
|
||||
|
||||
def _simple_search_analyzer() -> tantivy.TextAnalyzer:
|
||||
"""Tokenizer for simple substring search fields: non-whitespace chunks -> remove_long(65) -> lowercase -> ascii_fold."""
|
||||
return (
|
||||
tantivy.TextAnalyzerBuilder(
|
||||
tantivy.Tokenizer.regex(r"\S+"),
|
||||
)
|
||||
.filter(tantivy.Filter.remove_long(65))
|
||||
.filter(tantivy.Filter.lowercase())
|
||||
.filter(tantivy.Filter.ascii_fold())
|
||||
.build()
|
||||
)
|
||||
@@ -1293,22 +1293,18 @@ class SearchResultSerializer(DocumentSerializer):
|
||||
documents = self.context.get("documents")
|
||||
# Otherwise we fetch this document.
|
||||
if documents is None: # pragma: no cover
|
||||
# In practice we only serialize **lists** of whoosh.searching.Hit.
|
||||
# I'm keeping this check for completeness but marking it no cover for now.
|
||||
# In practice we only serialize **lists** of SearchHit dicts.
|
||||
# Keeping this check for completeness but marking it no cover for now.
|
||||
documents = self.fetch_documents([hit["id"]])
|
||||
document = documents[hit["id"]]
|
||||
|
||||
notes = ",".join(
|
||||
[str(c.note) for c in document.notes.all()],
|
||||
)
|
||||
highlights = hit.get("highlights", {})
|
||||
r = super().to_representation(document)
|
||||
r["__search_hit__"] = {
|
||||
"score": hit.score,
|
||||
"highlights": hit.highlights("content", text=document.content),
|
||||
"note_highlights": (
|
||||
hit.highlights("notes", text=notes) if document else None
|
||||
),
|
||||
"rank": hit.rank,
|
||||
"score": hit["score"],
|
||||
"highlights": highlights.get("content", ""),
|
||||
"note_highlights": highlights.get("notes") or None,
|
||||
"rank": hit["rank"],
|
||||
}
|
||||
|
||||
return r
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
EXPORTER_FILE_NAME = "__exported_file_name__"
|
||||
EXPORTER_THUMBNAIL_NAME = "__exported_thumbnail_name__"
|
||||
EXPORTER_ARCHIVE_NAME = "__exported_archive_name__"
|
||||
EXPORTER_SHARE_LINK_BUNDLE_NAME = "__exported_share_link_bundle_name__"
|
||||
|
||||
EXPORTER_CRYPTO_SETTINGS_NAME = "__crypto__"
|
||||
EXPORTER_CRYPTO_SALT_NAME = "__salt_hex__"
|
||||
|
||||
@@ -790,15 +790,12 @@ def cleanup_user_deletion(sender, instance: User | Group, **kwargs) -> None:
|
||||
|
||||
|
||||
def add_to_index(sender, document, **kwargs) -> None:
|
||||
from documents import index
|
||||
from documents.search import get_backend
|
||||
|
||||
index.add_or_update_document(document)
|
||||
if document.root_document_id is not None and document.root_document is not None:
|
||||
# keep in sync when a new version is consumed.
|
||||
index.add_or_update_document(
|
||||
document.root_document,
|
||||
effective_content=document.content,
|
||||
)
|
||||
get_backend().add_or_update(
|
||||
document,
|
||||
effective_content=document.get_effective_content(),
|
||||
)
|
||||
|
||||
|
||||
def run_workflows_added(
|
||||
|
||||
@@ -4,11 +4,9 @@ import shutil
|
||||
import uuid
|
||||
import zipfile
|
||||
from collections.abc import Callable
|
||||
from collections.abc import Iterable
|
||||
from pathlib import Path
|
||||
from tempfile import TemporaryDirectory
|
||||
from tempfile import mkstemp
|
||||
from typing import TypeVar
|
||||
|
||||
from celery import Task
|
||||
from celery import shared_task
|
||||
@@ -20,9 +18,7 @@ from django.db import transaction
|
||||
from django.db.models.signals import post_save
|
||||
from django.utils import timezone
|
||||
from filelock import FileLock
|
||||
from whoosh.writing import AsyncWriter
|
||||
|
||||
from documents import index
|
||||
from documents import sanity_checker
|
||||
from documents.barcodes import BarcodePlugin
|
||||
from documents.bulk_download import ArchiveOnlyStrategy
|
||||
@@ -60,43 +56,28 @@ from documents.signals import document_updated
|
||||
from documents.signals.handlers import cleanup_document_deletion
|
||||
from documents.signals.handlers import run_workflows
|
||||
from documents.signals.handlers import send_websocket_document_updated
|
||||
from documents.utils import IterWrapper
|
||||
from documents.utils import compute_checksum
|
||||
from documents.utils import identity
|
||||
from documents.workflows.utils import get_workflows_for_trigger
|
||||
from paperless.config import AIConfig
|
||||
from paperless.logging import consume_task_id
|
||||
from paperless.parsers import ParserContext
|
||||
from paperless.parsers.registry import get_parser_registry
|
||||
from paperless_ai.indexing import llm_index_add_or_update_document
|
||||
from paperless_ai.indexing import llm_index_remove_document
|
||||
from paperless_ai.indexing import update_llm_index
|
||||
|
||||
_T = TypeVar("_T")
|
||||
IterWrapper = Callable[[Iterable[_T]], Iterable[_T]]
|
||||
|
||||
|
||||
if settings.AUDIT_LOG_ENABLED:
|
||||
from auditlog.models import LogEntry
|
||||
logger = logging.getLogger("paperless.tasks")
|
||||
|
||||
|
||||
def _identity(iterable: Iterable[_T]) -> Iterable[_T]:
|
||||
return iterable
|
||||
|
||||
|
||||
@shared_task
|
||||
def index_optimize() -> None:
|
||||
ix = index.open_index()
|
||||
writer = AsyncWriter(ix)
|
||||
writer.commit(optimize=True)
|
||||
|
||||
|
||||
def index_reindex(*, iter_wrapper: IterWrapper[Document] = _identity) -> None:
|
||||
documents = Document.objects.all()
|
||||
|
||||
ix = index.open_index(recreate=True)
|
||||
|
||||
with AsyncWriter(ix) as writer:
|
||||
for document in iter_wrapper(documents):
|
||||
index.update_document(writer, document)
|
||||
logger.info(
|
||||
"index_optimize is a no-op — Tantivy manages segment merging automatically.",
|
||||
)
|
||||
|
||||
|
||||
@shared_task
|
||||
@@ -167,76 +148,85 @@ def consume_file(
|
||||
input_doc: ConsumableDocument,
|
||||
overrides: DocumentMetadataOverrides | None = None,
|
||||
):
|
||||
# Default no overrides
|
||||
if overrides is None:
|
||||
overrides = DocumentMetadataOverrides()
|
||||
token = consume_task_id.set((self.request.id or "")[:8])
|
||||
try:
|
||||
# Default no overrides
|
||||
if overrides is None:
|
||||
overrides = DocumentMetadataOverrides()
|
||||
|
||||
plugins: list[type[ConsumeTaskPlugin]] = (
|
||||
[
|
||||
ConsumerPreflightPlugin,
|
||||
ConsumerPlugin,
|
||||
]
|
||||
if input_doc.root_document_id is not None
|
||||
else [
|
||||
ConsumerPreflightPlugin,
|
||||
AsnCheckPlugin,
|
||||
CollatePlugin,
|
||||
BarcodePlugin,
|
||||
AsnCheckPlugin, # Re-run ASN check after barcode reading
|
||||
WorkflowTriggerPlugin,
|
||||
ConsumerPlugin,
|
||||
]
|
||||
)
|
||||
plugins: list[type[ConsumeTaskPlugin]] = (
|
||||
[
|
||||
ConsumerPreflightPlugin,
|
||||
ConsumerPlugin,
|
||||
]
|
||||
if input_doc.root_document_id is not None
|
||||
else [
|
||||
ConsumerPreflightPlugin,
|
||||
AsnCheckPlugin,
|
||||
CollatePlugin,
|
||||
BarcodePlugin,
|
||||
AsnCheckPlugin, # Re-run ASN check after barcode reading
|
||||
WorkflowTriggerPlugin,
|
||||
ConsumerPlugin,
|
||||
]
|
||||
)
|
||||
|
||||
with (
|
||||
ProgressManager(
|
||||
overrides.filename or input_doc.original_file.name,
|
||||
self.request.id,
|
||||
) as status_mgr,
|
||||
TemporaryDirectory(dir=settings.SCRATCH_DIR) as tmp_dir,
|
||||
):
|
||||
tmp_dir = Path(tmp_dir)
|
||||
for plugin_class in plugins:
|
||||
plugin_name = plugin_class.NAME
|
||||
|
||||
plugin = plugin_class(
|
||||
input_doc,
|
||||
overrides,
|
||||
status_mgr,
|
||||
tmp_dir,
|
||||
with (
|
||||
ProgressManager(
|
||||
overrides.filename or input_doc.original_file.name,
|
||||
self.request.id,
|
||||
)
|
||||
) as status_mgr,
|
||||
TemporaryDirectory(dir=settings.SCRATCH_DIR) as tmp_dir,
|
||||
):
|
||||
tmp_dir = Path(tmp_dir)
|
||||
for plugin_class in plugins:
|
||||
plugin_name = plugin_class.NAME
|
||||
|
||||
if not plugin.able_to_run:
|
||||
logger.debug(f"Skipping plugin {plugin_name}")
|
||||
continue
|
||||
plugin = plugin_class(
|
||||
input_doc,
|
||||
overrides,
|
||||
status_mgr,
|
||||
tmp_dir,
|
||||
self.request.id,
|
||||
)
|
||||
|
||||
try:
|
||||
logger.debug(f"Executing plugin {plugin_name}")
|
||||
plugin.setup()
|
||||
if not plugin.able_to_run:
|
||||
logger.debug(f"Skipping plugin {plugin_name}")
|
||||
continue
|
||||
|
||||
msg = plugin.run()
|
||||
try:
|
||||
logger.debug(f"Executing plugin {plugin_name}")
|
||||
plugin.setup()
|
||||
|
||||
if msg is not None:
|
||||
logger.info(f"{plugin_name} completed with: {msg}")
|
||||
else:
|
||||
logger.info(f"{plugin_name} completed with no message")
|
||||
msg = plugin.run()
|
||||
|
||||
overrides = plugin.metadata
|
||||
if msg is not None:
|
||||
logger.info(f"{plugin_name} completed with: {msg}")
|
||||
else:
|
||||
logger.info(f"{plugin_name} completed with no message")
|
||||
|
||||
except StopConsumeTaskError as e:
|
||||
logger.info(f"{plugin_name} requested task exit: {e.message}")
|
||||
return e.message
|
||||
overrides = plugin.metadata
|
||||
|
||||
except Exception as e:
|
||||
logger.exception(f"{plugin_name} failed: {e}")
|
||||
status_mgr.send_progress(ProgressStatusOptions.FAILED, f"{e}", 100, 100)
|
||||
raise
|
||||
except StopConsumeTaskError as e:
|
||||
logger.info(f"{plugin_name} requested task exit: {e.message}")
|
||||
return e.message
|
||||
|
||||
finally:
|
||||
plugin.cleanup()
|
||||
except Exception as e:
|
||||
logger.exception(f"{plugin_name} failed: {e}")
|
||||
status_mgr.send_progress(
|
||||
ProgressStatusOptions.FAILED,
|
||||
f"{e}",
|
||||
100,
|
||||
100,
|
||||
)
|
||||
raise
|
||||
|
||||
return msg
|
||||
finally:
|
||||
plugin.cleanup()
|
||||
|
||||
return msg
|
||||
finally:
|
||||
consume_task_id.reset(token)
|
||||
|
||||
|
||||
@shared_task
|
||||
@@ -270,9 +260,9 @@ def sanity_check(*, scheduled=True, raise_on_error=True):
|
||||
|
||||
@shared_task
|
||||
def bulk_update_documents(document_ids) -> None:
|
||||
documents = Document.objects.filter(id__in=document_ids)
|
||||
from documents.search import get_backend
|
||||
|
||||
ix = index.open_index()
|
||||
documents = Document.objects.filter(id__in=document_ids)
|
||||
|
||||
for doc in documents:
|
||||
clear_document_caches(doc.pk)
|
||||
@@ -283,9 +273,9 @@ def bulk_update_documents(document_ids) -> None:
|
||||
)
|
||||
post_save.send(Document, instance=doc, created=False)
|
||||
|
||||
with AsyncWriter(ix) as writer:
|
||||
with get_backend().batch_update() as batch:
|
||||
for doc in documents:
|
||||
index.update_document(writer, doc)
|
||||
batch.add_or_update(doc)
|
||||
|
||||
ai_config = AIConfig()
|
||||
if ai_config.llm_index_enabled:
|
||||
@@ -389,8 +379,9 @@ def update_document_content_maybe_archive_file(document_id) -> None:
|
||||
logger.info(
|
||||
f"Updating index for document {document_id} ({document.archive_checksum})",
|
||||
)
|
||||
with index.open_index_writer() as writer:
|
||||
index.update_document(writer, document)
|
||||
from documents.search import get_backend
|
||||
|
||||
get_backend().add_or_update(document)
|
||||
|
||||
ai_config = AIConfig()
|
||||
if ai_config.llm_index_enabled:
|
||||
@@ -633,7 +624,7 @@ def update_document_parent_tags(tag: Tag, new_parent: Tag) -> None:
|
||||
@shared_task
|
||||
def llmindex_index(
|
||||
*,
|
||||
iter_wrapper: IterWrapper[Document] = _identity,
|
||||
iter_wrapper: IterWrapper[Document] = identity,
|
||||
rebuild=False,
|
||||
scheduled=True,
|
||||
auto=False,
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import shutil
|
||||
import zoneinfo
|
||||
from collections.abc import Generator
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
@@ -92,6 +93,26 @@ def sample_doc(
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def _search_index(
|
||||
tmp_path: Path,
|
||||
settings: SettingsWrapper,
|
||||
) -> Generator[None, None, None]:
|
||||
"""Create a temp index directory and point INDEX_DIR at it.
|
||||
|
||||
Resets the backend singleton before and after so each test gets a clean
|
||||
index rather than reusing a stale singleton from another test.
|
||||
"""
|
||||
from documents.search import reset_backend
|
||||
|
||||
index_dir = tmp_path / "index"
|
||||
index_dir.mkdir()
|
||||
settings.INDEX_DIR = index_dir
|
||||
reset_backend()
|
||||
yield
|
||||
reset_backend()
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def settings_timezone(settings: SettingsWrapper) -> zoneinfo.ZoneInfo:
|
||||
return zoneinfo.ZoneInfo(settings.TIME_ZONE)
|
||||
|
||||
Binary file not shown.
0
src/documents/tests/search/__init__.py
Normal file
0
src/documents/tests/search/__init__.py
Normal file
33
src/documents/tests/search/conftest.py
Normal file
33
src/documents/tests/search/conftest.py
Normal file
@@ -0,0 +1,33 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import pytest
|
||||
|
||||
from documents.search._backend import TantivyBackend
|
||||
from documents.search._backend import reset_backend
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Generator
|
||||
from pathlib import Path
|
||||
|
||||
from pytest_django.fixtures import SettingsWrapper
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def index_dir(tmp_path: Path, settings: SettingsWrapper) -> Path:
|
||||
path = tmp_path / "index"
|
||||
path.mkdir()
|
||||
settings.INDEX_DIR = path
|
||||
return path
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def backend() -> Generator[TantivyBackend, None, None]:
|
||||
b = TantivyBackend() # path=None → in-memory index
|
||||
b.open()
|
||||
try:
|
||||
yield b
|
||||
finally:
|
||||
b.close()
|
||||
reset_backend()
|
||||
932
src/documents/tests/search/test_backend.py
Normal file
932
src/documents/tests/search/test_backend.py
Normal file
@@ -0,0 +1,932 @@
|
||||
import pytest
|
||||
from django.contrib.auth.models import User
|
||||
|
||||
from documents.models import CustomField
|
||||
from documents.models import CustomFieldInstance
|
||||
from documents.models import Document
|
||||
from documents.models import Note
|
||||
from documents.search._backend import SearchMode
|
||||
from documents.search._backend import TantivyBackend
|
||||
from documents.search._backend import get_backend
|
||||
from documents.search._backend import reset_backend
|
||||
|
||||
pytestmark = [pytest.mark.search, pytest.mark.django_db]
|
||||
|
||||
|
||||
class TestWriteBatch:
|
||||
"""Test WriteBatch context manager functionality."""
|
||||
|
||||
def test_rolls_back_on_exception(self, backend: TantivyBackend):
|
||||
"""Batch operations must rollback on exception to preserve index integrity."""
|
||||
doc = Document.objects.create(
|
||||
title="Rollback Target",
|
||||
content="should survive",
|
||||
checksum="RB1",
|
||||
pk=1,
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
|
||||
try:
|
||||
with backend.batch_update() as batch:
|
||||
batch.remove(doc.pk)
|
||||
raise RuntimeError("simulated failure")
|
||||
except RuntimeError:
|
||||
pass
|
||||
|
||||
r = backend.search(
|
||||
"should survive",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
)
|
||||
assert r.total == 1
|
||||
|
||||
|
||||
class TestSearch:
|
||||
"""Test search functionality."""
|
||||
|
||||
def test_text_mode_limits_default_search_to_title_and_content(
|
||||
self,
|
||||
backend: TantivyBackend,
|
||||
):
|
||||
"""Simple text mode must not match metadata-only fields."""
|
||||
doc = Document.objects.create(
|
||||
title="Invoice document",
|
||||
content="monthly statement",
|
||||
checksum="TXT1",
|
||||
pk=9,
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
|
||||
metadata_only = backend.search(
|
||||
"document_type:invoice",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
search_mode=SearchMode.TEXT,
|
||||
)
|
||||
assert metadata_only.total == 0
|
||||
|
||||
content_match = backend.search(
|
||||
"monthly",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
search_mode=SearchMode.TEXT,
|
||||
)
|
||||
assert content_match.total == 1
|
||||
|
||||
def test_title_mode_limits_default_search_to_title_only(
|
||||
self,
|
||||
backend: TantivyBackend,
|
||||
):
|
||||
"""Title mode must not match content-only terms."""
|
||||
doc = Document.objects.create(
|
||||
title="Invoice document",
|
||||
content="monthly statement",
|
||||
checksum="TXT2",
|
||||
pk=10,
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
|
||||
content_only = backend.search(
|
||||
"monthly",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
search_mode=SearchMode.TITLE,
|
||||
)
|
||||
assert content_only.total == 0
|
||||
|
||||
title_match = backend.search(
|
||||
"invoice",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
search_mode=SearchMode.TITLE,
|
||||
)
|
||||
assert title_match.total == 1
|
||||
|
||||
def test_text_mode_matches_partial_term_substrings(
|
||||
self,
|
||||
backend: TantivyBackend,
|
||||
):
|
||||
"""Simple text mode should support substring matching within tokens."""
|
||||
doc = Document.objects.create(
|
||||
title="Account access",
|
||||
content="password reset instructions",
|
||||
checksum="TXT3",
|
||||
pk=11,
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
|
||||
prefix_match = backend.search(
|
||||
"pass",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
search_mode=SearchMode.TEXT,
|
||||
)
|
||||
assert prefix_match.total == 1
|
||||
|
||||
infix_match = backend.search(
|
||||
"sswo",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
search_mode=SearchMode.TEXT,
|
||||
)
|
||||
assert infix_match.total == 1
|
||||
|
||||
phrase_match = backend.search(
|
||||
"sswo re",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
search_mode=SearchMode.TEXT,
|
||||
)
|
||||
assert phrase_match.total == 1
|
||||
|
||||
def test_text_mode_does_not_match_on_partial_term_overlap(
|
||||
self,
|
||||
backend: TantivyBackend,
|
||||
):
|
||||
"""Simple text mode should not match documents that merely share partial fragments."""
|
||||
doc = Document.objects.create(
|
||||
title="Adobe Acrobat PDF Files",
|
||||
content="Lorem ipsum dolor sit amet, consectetur adipiscing elit.",
|
||||
checksum="TXT7",
|
||||
pk=13,
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
|
||||
non_match = backend.search(
|
||||
"raptor",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
search_mode=SearchMode.TEXT,
|
||||
)
|
||||
assert non_match.total == 0
|
||||
|
||||
def test_text_mode_anchors_later_query_tokens_to_token_starts(
|
||||
self,
|
||||
backend: TantivyBackend,
|
||||
):
|
||||
"""Multi-token simple search should not match later tokens in the middle of a word."""
|
||||
exact_doc = Document.objects.create(
|
||||
title="Z-Berichte 6",
|
||||
content="monthly report",
|
||||
checksum="TXT9",
|
||||
pk=15,
|
||||
)
|
||||
prefix_doc = Document.objects.create(
|
||||
title="Z-Berichte 60",
|
||||
content="monthly report",
|
||||
checksum="TXT10",
|
||||
pk=16,
|
||||
)
|
||||
false_positive = Document.objects.create(
|
||||
title="Z-Berichte 16",
|
||||
content="monthly report",
|
||||
checksum="TXT11",
|
||||
pk=17,
|
||||
)
|
||||
backend.add_or_update(exact_doc)
|
||||
backend.add_or_update(prefix_doc)
|
||||
backend.add_or_update(false_positive)
|
||||
|
||||
results = backend.search(
|
||||
"Z-Berichte 6",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
search_mode=SearchMode.TEXT,
|
||||
)
|
||||
result_ids = {hit["id"] for hit in results.hits}
|
||||
|
||||
assert exact_doc.id in result_ids
|
||||
assert prefix_doc.id in result_ids
|
||||
assert false_positive.id not in result_ids
|
||||
|
||||
def test_text_mode_ignores_queries_without_searchable_tokens(
|
||||
self,
|
||||
backend: TantivyBackend,
|
||||
):
|
||||
"""Simple text mode should safely return no hits for symbol-only strings."""
|
||||
doc = Document.objects.create(
|
||||
title="Guide",
|
||||
content="This is a guide.",
|
||||
checksum="TXT8",
|
||||
pk=14,
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
|
||||
no_tokens = backend.search(
|
||||
"!!!",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
search_mode=SearchMode.TEXT,
|
||||
)
|
||||
assert no_tokens.total == 0
|
||||
|
||||
def test_title_mode_matches_partial_term_substrings(
|
||||
self,
|
||||
backend: TantivyBackend,
|
||||
):
|
||||
"""Title mode should support substring matching within title tokens."""
|
||||
doc = Document.objects.create(
|
||||
title="Password guide",
|
||||
content="reset instructions",
|
||||
checksum="TXT4",
|
||||
pk=12,
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
|
||||
prefix_match = backend.search(
|
||||
"pass",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
search_mode=SearchMode.TITLE,
|
||||
)
|
||||
assert prefix_match.total == 1
|
||||
|
||||
infix_match = backend.search(
|
||||
"sswo",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
search_mode=SearchMode.TITLE,
|
||||
)
|
||||
assert infix_match.total == 1
|
||||
|
||||
phrase_match = backend.search(
|
||||
"sswo gu",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
search_mode=SearchMode.TITLE,
|
||||
)
|
||||
assert phrase_match.total == 1
|
||||
|
||||
def test_scores_normalised_top_hit_is_one(self, backend: TantivyBackend):
|
||||
"""Search scores must be normalized so top hit has score 1.0 for UI consistency."""
|
||||
for i, title in enumerate(["bank invoice", "bank statement", "bank receipt"]):
|
||||
doc = Document.objects.create(
|
||||
title=title,
|
||||
content=title,
|
||||
checksum=f"SN{i}",
|
||||
pk=10 + i,
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
r = backend.search(
|
||||
"bank",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
)
|
||||
assert r.hits[0]["score"] == pytest.approx(1.0)
|
||||
assert all(0.0 <= h["score"] <= 1.0 for h in r.hits)
|
||||
|
||||
def test_sort_field_ascending(self, backend: TantivyBackend):
|
||||
"""Searching with sort_reverse=False must return results in ascending ASN order."""
|
||||
for asn in [30, 10, 20]:
|
||||
doc = Document.objects.create(
|
||||
title="sortable",
|
||||
content="sortable content",
|
||||
checksum=f"SFA{asn}",
|
||||
archive_serial_number=asn,
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
|
||||
r = backend.search(
|
||||
"sortable",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10,
|
||||
sort_field="archive_serial_number",
|
||||
sort_reverse=False,
|
||||
)
|
||||
assert r.total == 3
|
||||
asns = [Document.objects.get(pk=h["id"]).archive_serial_number for h in r.hits]
|
||||
assert asns == [10, 20, 30]
|
||||
|
||||
def test_sort_field_descending(self, backend: TantivyBackend):
|
||||
"""Searching with sort_reverse=True must return results in descending ASN order."""
|
||||
for asn in [30, 10, 20]:
|
||||
doc = Document.objects.create(
|
||||
title="sortable",
|
||||
content="sortable content",
|
||||
checksum=f"SFD{asn}",
|
||||
archive_serial_number=asn,
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
|
||||
r = backend.search(
|
||||
"sortable",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10,
|
||||
sort_field="archive_serial_number",
|
||||
sort_reverse=True,
|
||||
)
|
||||
assert r.total == 3
|
||||
asns = [Document.objects.get(pk=h["id"]).archive_serial_number for h in r.hits]
|
||||
assert asns == [30, 20, 10]
|
||||
|
||||
def test_fuzzy_threshold_filters_low_score_hits(
|
||||
self,
|
||||
backend: TantivyBackend,
|
||||
settings,
|
||||
):
|
||||
"""When ADVANCED_FUZZY_SEARCH_THRESHOLD exceeds all normalized scores, hits must be filtered out."""
|
||||
doc = Document.objects.create(
|
||||
title="Invoice document",
|
||||
content="financial report",
|
||||
checksum="FT1",
|
||||
pk=120,
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
|
||||
# Threshold above 1.0 filters every hit (normalized scores top out at 1.0)
|
||||
settings.ADVANCED_FUZZY_SEARCH_THRESHOLD = 1.1
|
||||
r = backend.search(
|
||||
"invoice",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
)
|
||||
assert r.hits == []
|
||||
|
||||
def test_owner_filter(self, backend: TantivyBackend):
|
||||
"""Document owners can search their private documents; other users cannot access them."""
|
||||
owner = User.objects.create_user("owner")
|
||||
other = User.objects.create_user("other")
|
||||
doc = Document.objects.create(
|
||||
title="Private",
|
||||
content="secret",
|
||||
checksum="PF1",
|
||||
pk=20,
|
||||
owner=owner,
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
|
||||
assert (
|
||||
backend.search(
|
||||
"secret",
|
||||
user=owner,
|
||||
page=1,
|
||||
page_size=10,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
).total
|
||||
== 1
|
||||
)
|
||||
assert (
|
||||
backend.search(
|
||||
"secret",
|
||||
user=other,
|
||||
page=1,
|
||||
page_size=10,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
).total
|
||||
== 0
|
||||
)
|
||||
|
||||
def test_highlight_page_only_highlights_requested_slice(
|
||||
self,
|
||||
backend: TantivyBackend,
|
||||
):
|
||||
"""Only hits in the highlight_page slice should have non-empty highlights."""
|
||||
for i in range(6):
|
||||
doc = Document.objects.create(
|
||||
title=f"highlight doc {i}",
|
||||
content=f"searchable highlight content number {i}",
|
||||
checksum=f"HP{i}",
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
|
||||
r = backend.search(
|
||||
"searchable",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10000,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
highlight_page=1,
|
||||
highlight_page_size=3,
|
||||
)
|
||||
assert r.total == 6
|
||||
assert len(r.hits) == 6
|
||||
highlighted = [h for h in r.hits if h["highlights"]]
|
||||
not_highlighted = [h for h in r.hits if not h["highlights"]]
|
||||
assert len(highlighted) == 3
|
||||
assert len(not_highlighted) == 3
|
||||
|
||||
def test_highlight_page_2_highlights_correct_slice(self, backend: TantivyBackend):
|
||||
"""highlight_page=2 should highlight only the second page of results."""
|
||||
for i in range(6):
|
||||
doc = Document.objects.create(
|
||||
title=f"page2 doc {i}",
|
||||
content=f"searchable page2 content number {i}",
|
||||
checksum=f"HP2{i}",
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
|
||||
r = backend.search(
|
||||
"searchable",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10000,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
highlight_page=2,
|
||||
highlight_page_size=2,
|
||||
)
|
||||
assert r.total == 6
|
||||
assert len(r.hits) == 6
|
||||
highlighted = [h for h in r.hits if h["highlights"]]
|
||||
not_highlighted = [h for h in r.hits if not h["highlights"]]
|
||||
# Only 2 hits (the second page) should have highlights
|
||||
assert len(highlighted) == 2
|
||||
assert len(not_highlighted) == 4
|
||||
|
||||
def test_no_highlight_page_highlights_all(self, backend: TantivyBackend):
|
||||
"""When highlight_page is not specified, all hits get highlights (backward compat)."""
|
||||
for i in range(3):
|
||||
doc = Document.objects.create(
|
||||
title=f"compat doc {i}",
|
||||
content=f"searchable compat content {i}",
|
||||
checksum=f"HC{i}",
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
|
||||
r = backend.search(
|
||||
"searchable",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10000,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
)
|
||||
assert len(r.hits) == 3
|
||||
for hit in r.hits:
|
||||
assert "content" in hit["highlights"]
|
||||
|
||||
|
||||
class TestSearchIds:
|
||||
"""Test lightweight ID-only search."""
|
||||
|
||||
def test_returns_matching_ids(self, backend: TantivyBackend):
|
||||
"""search_ids must return IDs of all matching documents."""
|
||||
docs = []
|
||||
for i in range(5):
|
||||
doc = Document.objects.create(
|
||||
title=f"findable doc {i}",
|
||||
content="common keyword",
|
||||
checksum=f"SI{i}",
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
docs.append(doc)
|
||||
other = Document.objects.create(
|
||||
title="unrelated",
|
||||
content="nothing here",
|
||||
checksum="SI_other",
|
||||
)
|
||||
backend.add_or_update(other)
|
||||
|
||||
ids = backend.search_ids(
|
||||
"common keyword",
|
||||
user=None,
|
||||
search_mode=SearchMode.QUERY,
|
||||
)
|
||||
assert set(ids) == {d.pk for d in docs}
|
||||
assert other.pk not in ids
|
||||
|
||||
def test_respects_permission_filter(self, backend: TantivyBackend):
|
||||
"""search_ids must respect user permission filtering."""
|
||||
owner = User.objects.create_user("ids_owner")
|
||||
other = User.objects.create_user("ids_other")
|
||||
doc = Document.objects.create(
|
||||
title="private doc",
|
||||
content="secret keyword",
|
||||
checksum="SIP1",
|
||||
owner=owner,
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
|
||||
assert backend.search_ids(
|
||||
"secret",
|
||||
user=owner,
|
||||
search_mode=SearchMode.QUERY,
|
||||
) == [doc.pk]
|
||||
assert (
|
||||
backend.search_ids("secret", user=other, search_mode=SearchMode.QUERY) == []
|
||||
)
|
||||
|
||||
def test_respects_fuzzy_threshold(self, backend: TantivyBackend, settings):
|
||||
"""search_ids must apply the same fuzzy threshold as search()."""
|
||||
doc = Document.objects.create(
|
||||
title="threshold test",
|
||||
content="unique term",
|
||||
checksum="SIT1",
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
|
||||
settings.ADVANCED_FUZZY_SEARCH_THRESHOLD = 1.1
|
||||
ids = backend.search_ids("unique", user=None, search_mode=SearchMode.QUERY)
|
||||
assert ids == []
|
||||
|
||||
def test_returns_ids_for_text_mode(self, backend: TantivyBackend):
|
||||
"""search_ids must work with TEXT search mode."""
|
||||
doc = Document.objects.create(
|
||||
title="text mode doc",
|
||||
content="findable phrase",
|
||||
checksum="SIM1",
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
|
||||
ids = backend.search_ids("findable", user=None, search_mode=SearchMode.TEXT)
|
||||
assert ids == [doc.pk]
|
||||
|
||||
|
||||
class TestRebuild:
|
||||
"""Test index rebuilding functionality."""
|
||||
|
||||
def test_with_iter_wrapper_called(self, backend: TantivyBackend):
|
||||
"""Index rebuild must pass documents through iter_wrapper for progress tracking."""
|
||||
seen = []
|
||||
|
||||
def wrapper(docs):
|
||||
for doc in docs:
|
||||
seen.append(doc.pk)
|
||||
yield doc
|
||||
|
||||
Document.objects.create(title="Tracked", content="x", checksum="TW1", pk=30)
|
||||
backend.rebuild(Document.objects.all(), iter_wrapper=wrapper)
|
||||
assert 30 in seen
|
||||
|
||||
|
||||
class TestAutocomplete:
|
||||
"""Test autocomplete functionality."""
|
||||
|
||||
def test_basic_functionality(self, backend: TantivyBackend):
|
||||
"""Autocomplete must return words matching the given prefix."""
|
||||
doc = Document.objects.create(
|
||||
title="Invoice from Microsoft Corporation",
|
||||
content="payment details",
|
||||
checksum="AC1",
|
||||
pk=40,
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
|
||||
results = backend.autocomplete("micro", limit=10)
|
||||
assert "microsoft" in results
|
||||
|
||||
def test_results_ordered_by_document_frequency(self, backend: TantivyBackend):
|
||||
"""Autocomplete results must be ordered by document frequency to prioritize common terms."""
|
||||
# "payment" appears in 3 docs; "payslip" in 1 — "pay" prefix should
|
||||
# return "payment" before "payslip".
|
||||
for i, (title, checksum) in enumerate(
|
||||
[
|
||||
("payment invoice", "AF1"),
|
||||
("payment receipt", "AF2"),
|
||||
("payment confirmation", "AF3"),
|
||||
("payslip march", "AF4"),
|
||||
],
|
||||
start=41,
|
||||
):
|
||||
doc = Document.objects.create(
|
||||
title=title,
|
||||
content="details",
|
||||
checksum=checksum,
|
||||
pk=i,
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
|
||||
results = backend.autocomplete("pay", limit=10)
|
||||
assert results.index("payment") < results.index("payslip")
|
||||
|
||||
|
||||
class TestMoreLikeThis:
|
||||
"""Test more like this functionality."""
|
||||
|
||||
def test_excludes_original(self, backend: TantivyBackend):
|
||||
"""More like this queries must exclude the reference document from results."""
|
||||
doc1 = Document.objects.create(
|
||||
title="Important document",
|
||||
content="financial information",
|
||||
checksum="MLT1",
|
||||
pk=50,
|
||||
)
|
||||
doc2 = Document.objects.create(
|
||||
title="Another document",
|
||||
content="financial report",
|
||||
checksum="MLT2",
|
||||
pk=51,
|
||||
)
|
||||
backend.add_or_update(doc1)
|
||||
backend.add_or_update(doc2)
|
||||
|
||||
results = backend.more_like_this(doc_id=50, user=None, page=1, page_size=10)
|
||||
returned_ids = [hit["id"] for hit in results.hits]
|
||||
assert 50 not in returned_ids # Original document excluded
|
||||
|
||||
def test_with_user_applies_permission_filter(self, backend: TantivyBackend):
|
||||
"""more_like_this with a user must exclude documents that user cannot see."""
|
||||
viewer = User.objects.create_user("mlt_viewer")
|
||||
other = User.objects.create_user("mlt_other")
|
||||
public_doc = Document.objects.create(
|
||||
title="Public financial document",
|
||||
content="quarterly financial analysis report figures",
|
||||
checksum="MLT3",
|
||||
pk=52,
|
||||
)
|
||||
private_doc = Document.objects.create(
|
||||
title="Private financial document",
|
||||
content="quarterly financial analysis report figures",
|
||||
checksum="MLT4",
|
||||
pk=53,
|
||||
owner=other,
|
||||
)
|
||||
backend.add_or_update(public_doc)
|
||||
backend.add_or_update(private_doc)
|
||||
|
||||
results = backend.more_like_this(doc_id=52, user=viewer, page=1, page_size=10)
|
||||
returned_ids = [hit["id"] for hit in results.hits]
|
||||
# private_doc is owned by other, so viewer cannot see it
|
||||
assert 53 not in returned_ids
|
||||
|
||||
def test_document_not_in_index_returns_empty(self, backend: TantivyBackend):
|
||||
"""more_like_this for a doc_id absent from the index must return empty results."""
|
||||
results = backend.more_like_this(doc_id=9999, user=None, page=1, page_size=10)
|
||||
assert results.hits == []
|
||||
assert results.total == 0
|
||||
|
||||
def test_more_like_this_ids_excludes_original(self, backend: TantivyBackend):
|
||||
"""more_like_this_ids must return IDs of similar documents, excluding the original."""
|
||||
doc1 = Document.objects.create(
|
||||
title="Important document",
|
||||
content="financial information report",
|
||||
checksum="MLTI1",
|
||||
pk=150,
|
||||
)
|
||||
doc2 = Document.objects.create(
|
||||
title="Another document",
|
||||
content="financial information report",
|
||||
checksum="MLTI2",
|
||||
pk=151,
|
||||
)
|
||||
backend.add_or_update(doc1)
|
||||
backend.add_or_update(doc2)
|
||||
|
||||
ids = backend.more_like_this_ids(doc_id=150, user=None)
|
||||
assert 150 not in ids
|
||||
assert 151 in ids
|
||||
|
||||
|
||||
class TestSingleton:
|
||||
"""Test get_backend() and reset_backend() singleton lifecycle."""
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _clean(self):
|
||||
reset_backend()
|
||||
yield
|
||||
reset_backend()
|
||||
|
||||
def test_returns_same_instance_on_repeated_calls(self, index_dir):
|
||||
"""Singleton pattern: repeated calls to get_backend() must return the same instance."""
|
||||
assert get_backend() is get_backend()
|
||||
|
||||
def test_reinitializes_when_index_dir_changes(self, tmp_path, settings):
|
||||
"""Backend singleton must reinitialize when INDEX_DIR setting changes for test isolation."""
|
||||
settings.INDEX_DIR = tmp_path / "a"
|
||||
(tmp_path / "a").mkdir()
|
||||
b1 = get_backend()
|
||||
|
||||
settings.INDEX_DIR = tmp_path / "b"
|
||||
(tmp_path / "b").mkdir()
|
||||
b2 = get_backend()
|
||||
|
||||
assert b1 is not b2
|
||||
assert b2._path == tmp_path / "b"
|
||||
|
||||
def test_reset_forces_new_instance(self, index_dir):
|
||||
"""reset_backend() must force creation of a new backend instance on next get_backend() call."""
|
||||
b1 = get_backend()
|
||||
reset_backend()
|
||||
b2 = get_backend()
|
||||
assert b1 is not b2
|
||||
|
||||
|
||||
class TestFieldHandling:
|
||||
"""Test handling of various document fields."""
|
||||
|
||||
def test_none_values_handled_correctly(self, backend: TantivyBackend):
|
||||
"""Document fields with None values must not cause indexing errors."""
|
||||
doc = Document.objects.create(
|
||||
title="Test Doc",
|
||||
content="test content",
|
||||
checksum="NV1",
|
||||
pk=60,
|
||||
original_filename=None,
|
||||
page_count=None,
|
||||
)
|
||||
# Should not raise an exception
|
||||
backend.add_or_update(doc)
|
||||
|
||||
results = backend.search(
|
||||
"test",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
)
|
||||
assert results.total == 1
|
||||
|
||||
def test_custom_fields_include_name_and_value(self, backend: TantivyBackend):
|
||||
"""Custom fields must be indexed with both field name and value for structured queries."""
|
||||
# Create a custom field
|
||||
field = CustomField.objects.create(
|
||||
name="Invoice Number",
|
||||
data_type=CustomField.FieldDataType.STRING,
|
||||
)
|
||||
doc = Document.objects.create(
|
||||
title="Invoice",
|
||||
content="test",
|
||||
checksum="CF1",
|
||||
pk=70,
|
||||
)
|
||||
CustomFieldInstance.objects.create(
|
||||
document=doc,
|
||||
field=field,
|
||||
value_text="INV-2024-001",
|
||||
)
|
||||
|
||||
# Should not raise an exception during indexing
|
||||
backend.add_or_update(doc)
|
||||
|
||||
results = backend.search(
|
||||
"invoice",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
)
|
||||
assert results.total == 1
|
||||
|
||||
def test_select_custom_field_indexes_label_not_id(self, backend: TantivyBackend):
|
||||
"""SELECT custom fields must index the human-readable label, not the opaque option ID."""
|
||||
field = CustomField.objects.create(
|
||||
name="Category",
|
||||
data_type=CustomField.FieldDataType.SELECT,
|
||||
extra_data={
|
||||
"select_options": [
|
||||
{"id": "opt_abc", "label": "Invoice"},
|
||||
{"id": "opt_def", "label": "Receipt"},
|
||||
],
|
||||
},
|
||||
)
|
||||
doc = Document.objects.create(
|
||||
title="Categorised doc",
|
||||
content="test",
|
||||
checksum="SEL1",
|
||||
pk=71,
|
||||
)
|
||||
CustomFieldInstance.objects.create(
|
||||
document=doc,
|
||||
field=field,
|
||||
value_select="opt_abc",
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
|
||||
# Label should be findable
|
||||
results = backend.search(
|
||||
"custom_fields.value:invoice",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
)
|
||||
assert results.total == 1
|
||||
|
||||
# Opaque ID must not appear in the index
|
||||
results = backend.search(
|
||||
"custom_fields.value:opt_abc",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
)
|
||||
assert results.total == 0
|
||||
|
||||
def test_none_custom_field_value_not_indexed(self, backend: TantivyBackend):
|
||||
"""Custom field instances with no value set must not produce an index entry."""
|
||||
field = CustomField.objects.create(
|
||||
name="Optional",
|
||||
data_type=CustomField.FieldDataType.SELECT,
|
||||
extra_data={"select_options": [{"id": "opt_1", "label": "Yes"}]},
|
||||
)
|
||||
doc = Document.objects.create(
|
||||
title="Unset field doc",
|
||||
content="test",
|
||||
checksum="SEL2",
|
||||
pk=72,
|
||||
)
|
||||
CustomFieldInstance.objects.create(
|
||||
document=doc,
|
||||
field=field,
|
||||
value_select=None,
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
|
||||
# The string "none" must not appear as an indexed value
|
||||
results = backend.search(
|
||||
"custom_fields.value:none",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
)
|
||||
assert results.total == 0
|
||||
|
||||
def test_notes_include_user_information(self, backend: TantivyBackend):
|
||||
"""Notes must be indexed with user information when available for structured queries."""
|
||||
user = User.objects.create_user("notewriter")
|
||||
doc = Document.objects.create(
|
||||
title="Doc with notes",
|
||||
content="test",
|
||||
checksum="NT1",
|
||||
pk=80,
|
||||
)
|
||||
Note.objects.create(document=doc, note="Important note", user=user)
|
||||
|
||||
# Should not raise an exception during indexing
|
||||
backend.add_or_update(doc)
|
||||
|
||||
# Test basic document search first
|
||||
results = backend.search(
|
||||
"test",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
)
|
||||
assert results.total == 1, (
|
||||
f"Expected 1, got {results.total}. Document content should be searchable."
|
||||
)
|
||||
|
||||
# Test notes search — must use structured JSON syntax now that note
|
||||
# is no longer in DEFAULT_SEARCH_FIELDS
|
||||
results = backend.search(
|
||||
"notes.note:important",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
)
|
||||
assert results.total == 1, (
|
||||
f"Expected 1, got {results.total}. Note content should be searchable via notes.note: prefix."
|
||||
)
|
||||
@@ -0,0 +1,138 @@
|
||||
import pytest
|
||||
|
||||
from documents.tests.utils import TestMigrations
|
||||
|
||||
pytestmark = pytest.mark.search
|
||||
|
||||
|
||||
class TestMigrateFulltextQueryFieldPrefixes(TestMigrations):
|
||||
migrate_from = "0016_sha256_checksums"
|
||||
migrate_to = "0017_migrate_fulltext_query_field_prefixes"
|
||||
|
||||
def setUpBeforeMigration(self, apps) -> None:
|
||||
User = apps.get_model("auth", "User")
|
||||
SavedView = apps.get_model("documents", "SavedView")
|
||||
SavedViewFilterRule = apps.get_model("documents", "SavedViewFilterRule")
|
||||
|
||||
user = User.objects.create(username="testuser")
|
||||
|
||||
def make_rule(value: str):
|
||||
view = SavedView.objects.create(
|
||||
owner=user,
|
||||
name=f"view-{value}",
|
||||
sort_field="created",
|
||||
)
|
||||
return SavedViewFilterRule.objects.create(
|
||||
saved_view=view,
|
||||
rule_type=20, # fulltext query
|
||||
value=value,
|
||||
)
|
||||
|
||||
# Simple field prefixes
|
||||
self.rule_note = make_rule("note:invoice")
|
||||
self.rule_cf = make_rule("custom_field:amount")
|
||||
|
||||
# Combined query
|
||||
self.rule_combined = make_rule("note:invoice AND custom_field:total")
|
||||
|
||||
# Parenthesized groups (Whoosh syntax)
|
||||
self.rule_parens = make_rule("(note:invoice OR note:receipt)")
|
||||
|
||||
# Prefix operators
|
||||
self.rule_plus = make_rule("+note:foo")
|
||||
self.rule_minus = make_rule("-note:bar")
|
||||
|
||||
# Boosted
|
||||
self.rule_boost = make_rule("note:test^2")
|
||||
|
||||
# Should NOT be rewritten — no field prefix match
|
||||
self.rule_no_match = make_rule("title:hello content:world")
|
||||
|
||||
# Should NOT false-positive on word boundaries
|
||||
self.rule_denote = make_rule("denote:foo")
|
||||
|
||||
# Already using new syntax — should be idempotent
|
||||
self.rule_already_migrated = make_rule("notes.note:foo")
|
||||
self.rule_already_migrated_cf = make_rule("custom_fields.value:bar")
|
||||
|
||||
# Null value — should not crash
|
||||
view_null = SavedView.objects.create(
|
||||
owner=user,
|
||||
name="view-null",
|
||||
sort_field="created",
|
||||
)
|
||||
self.rule_null = SavedViewFilterRule.objects.create(
|
||||
saved_view=view_null,
|
||||
rule_type=20,
|
||||
value=None,
|
||||
)
|
||||
|
||||
# Non-fulltext rule type — should be untouched
|
||||
view_other = SavedView.objects.create(
|
||||
owner=user,
|
||||
name="view-other-type",
|
||||
sort_field="created",
|
||||
)
|
||||
self.rule_other_type = SavedViewFilterRule.objects.create(
|
||||
saved_view=view_other,
|
||||
rule_type=0, # title contains
|
||||
value="note:something",
|
||||
)
|
||||
|
||||
def test_note_prefix_rewritten(self):
|
||||
self.rule_note.refresh_from_db()
|
||||
self.assertEqual(self.rule_note.value, "notes.note:invoice")
|
||||
|
||||
def test_custom_field_prefix_rewritten(self):
|
||||
self.rule_cf.refresh_from_db()
|
||||
self.assertEqual(self.rule_cf.value, "custom_fields.value:amount")
|
||||
|
||||
def test_combined_query_rewritten(self):
|
||||
self.rule_combined.refresh_from_db()
|
||||
self.assertEqual(
|
||||
self.rule_combined.value,
|
||||
"notes.note:invoice AND custom_fields.value:total",
|
||||
)
|
||||
|
||||
def test_parenthesized_groups(self):
|
||||
self.rule_parens.refresh_from_db()
|
||||
self.assertEqual(
|
||||
self.rule_parens.value,
|
||||
"(notes.note:invoice OR notes.note:receipt)",
|
||||
)
|
||||
|
||||
def test_plus_prefix(self):
|
||||
self.rule_plus.refresh_from_db()
|
||||
self.assertEqual(self.rule_plus.value, "+notes.note:foo")
|
||||
|
||||
def test_minus_prefix(self):
|
||||
self.rule_minus.refresh_from_db()
|
||||
self.assertEqual(self.rule_minus.value, "-notes.note:bar")
|
||||
|
||||
def test_boosted(self):
|
||||
self.rule_boost.refresh_from_db()
|
||||
self.assertEqual(self.rule_boost.value, "notes.note:test^2")
|
||||
|
||||
def test_no_match_unchanged(self):
|
||||
self.rule_no_match.refresh_from_db()
|
||||
self.assertEqual(self.rule_no_match.value, "title:hello content:world")
|
||||
|
||||
def test_word_boundary_no_false_positive(self):
|
||||
self.rule_denote.refresh_from_db()
|
||||
self.assertEqual(self.rule_denote.value, "denote:foo")
|
||||
|
||||
def test_already_migrated_idempotent(self):
|
||||
self.rule_already_migrated.refresh_from_db()
|
||||
self.assertEqual(self.rule_already_migrated.value, "notes.note:foo")
|
||||
|
||||
def test_already_migrated_cf_idempotent(self):
|
||||
self.rule_already_migrated_cf.refresh_from_db()
|
||||
self.assertEqual(self.rule_already_migrated_cf.value, "custom_fields.value:bar")
|
||||
|
||||
def test_null_value_no_crash(self):
|
||||
self.rule_null.refresh_from_db()
|
||||
self.assertIsNone(self.rule_null.value)
|
||||
|
||||
def test_non_fulltext_rule_untouched(self):
|
||||
self.rule_other_type.refresh_from_db()
|
||||
self.assertEqual(self.rule_other_type.value, "note:something")
|
||||
530
src/documents/tests/search/test_query.py
Normal file
530
src/documents/tests/search/test_query.py
Normal file
@@ -0,0 +1,530 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from datetime import UTC
|
||||
from datetime import datetime
|
||||
from datetime import tzinfo
|
||||
from typing import TYPE_CHECKING
|
||||
from zoneinfo import ZoneInfo
|
||||
|
||||
import pytest
|
||||
import tantivy
|
||||
import time_machine
|
||||
|
||||
from documents.search._query import _date_only_range
|
||||
from documents.search._query import _datetime_range
|
||||
from documents.search._query import _rewrite_compact_date
|
||||
from documents.search._query import build_permission_filter
|
||||
from documents.search._query import normalize_query
|
||||
from documents.search._query import parse_user_query
|
||||
from documents.search._query import rewrite_natural_date_keywords
|
||||
from documents.search._schema import build_schema
|
||||
from documents.search._tokenizer import register_tokenizers
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from django.contrib.auth.base_user import AbstractBaseUser
|
||||
|
||||
pytestmark = pytest.mark.search
|
||||
|
||||
EASTERN = ZoneInfo("America/New_York") # UTC-5 / UTC-4 (DST)
|
||||
AUCKLAND = ZoneInfo("Pacific/Auckland") # UTC+13 in southern-hemisphere summer
|
||||
|
||||
|
||||
def _range(result: str, field: str) -> tuple[str, str]:
|
||||
m = re.search(rf"{field}:\[(.+?) TO (.+?)\]", result)
|
||||
assert m, f"No range for {field!r} in: {result!r}"
|
||||
return m.group(1), m.group(2)
|
||||
|
||||
|
||||
class TestCreatedDateField:
|
||||
"""
|
||||
created is a Django DateField: indexed as midnight UTC of the local calendar
|
||||
date. No offset arithmetic needed - the local calendar date is what matters.
|
||||
"""
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("tz", "expected_lo", "expected_hi"),
|
||||
[
|
||||
pytest.param(UTC, "2026-03-28T00:00:00Z", "2026-03-29T00:00:00Z", id="utc"),
|
||||
pytest.param(
|
||||
EASTERN,
|
||||
"2026-03-28T00:00:00Z",
|
||||
"2026-03-29T00:00:00Z",
|
||||
id="eastern_same_calendar_date",
|
||||
),
|
||||
],
|
||||
)
|
||||
@time_machine.travel(datetime(2026, 3, 28, 15, 30, tzinfo=UTC), tick=False)
|
||||
def test_today(self, tz: tzinfo, expected_lo: str, expected_hi: str) -> None:
|
||||
lo, hi = _range(rewrite_natural_date_keywords("created:today", tz), "created")
|
||||
assert lo == expected_lo
|
||||
assert hi == expected_hi
|
||||
|
||||
@time_machine.travel(datetime(2026, 3, 28, 3, 0, tzinfo=UTC), tick=False)
|
||||
def test_today_auckland_ahead_of_utc(self) -> None:
|
||||
# UTC 03:00 -> Auckland (UTC+13) = 16:00 same date; local date = 2026-03-28
|
||||
lo, _ = _range(
|
||||
rewrite_natural_date_keywords("created:today", AUCKLAND),
|
||||
"created",
|
||||
)
|
||||
assert lo == "2026-03-28T00:00:00Z"
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("field", "keyword", "expected_lo", "expected_hi"),
|
||||
[
|
||||
pytest.param(
|
||||
"created",
|
||||
"yesterday",
|
||||
"2026-03-27T00:00:00Z",
|
||||
"2026-03-28T00:00:00Z",
|
||||
id="yesterday",
|
||||
),
|
||||
pytest.param(
|
||||
"created",
|
||||
"this_week",
|
||||
"2026-03-23T00:00:00Z",
|
||||
"2026-03-30T00:00:00Z",
|
||||
id="this_week_mon_sun",
|
||||
),
|
||||
pytest.param(
|
||||
"created",
|
||||
"last_week",
|
||||
"2026-03-16T00:00:00Z",
|
||||
"2026-03-23T00:00:00Z",
|
||||
id="last_week",
|
||||
),
|
||||
pytest.param(
|
||||
"created",
|
||||
"this_month",
|
||||
"2026-03-01T00:00:00Z",
|
||||
"2026-04-01T00:00:00Z",
|
||||
id="this_month",
|
||||
),
|
||||
pytest.param(
|
||||
"created",
|
||||
"last_month",
|
||||
"2026-02-01T00:00:00Z",
|
||||
"2026-03-01T00:00:00Z",
|
||||
id="last_month",
|
||||
),
|
||||
pytest.param(
|
||||
"created",
|
||||
"this_year",
|
||||
"2026-01-01T00:00:00Z",
|
||||
"2027-01-01T00:00:00Z",
|
||||
id="this_year",
|
||||
),
|
||||
pytest.param(
|
||||
"created",
|
||||
"last_year",
|
||||
"2025-01-01T00:00:00Z",
|
||||
"2026-01-01T00:00:00Z",
|
||||
id="last_year",
|
||||
),
|
||||
],
|
||||
)
|
||||
@time_machine.travel(datetime(2026, 3, 28, 15, 0, tzinfo=UTC), tick=False)
|
||||
def test_date_keywords(
|
||||
self,
|
||||
field: str,
|
||||
keyword: str,
|
||||
expected_lo: str,
|
||||
expected_hi: str,
|
||||
) -> None:
|
||||
# 2026-03-28 is Saturday; Mon-Sun week calculation built into expectations
|
||||
query = f"{field}:{keyword}"
|
||||
lo, hi = _range(rewrite_natural_date_keywords(query, UTC), field)
|
||||
assert lo == expected_lo
|
||||
assert hi == expected_hi
|
||||
|
||||
@time_machine.travel(datetime(2026, 12, 15, 12, 0, tzinfo=UTC), tick=False)
|
||||
def test_this_month_december_wraps_to_next_year(self) -> None:
|
||||
# December: next month must roll over to January 1 of next year
|
||||
lo, hi = _range(
|
||||
rewrite_natural_date_keywords("created:this_month", UTC),
|
||||
"created",
|
||||
)
|
||||
assert lo == "2026-12-01T00:00:00Z"
|
||||
assert hi == "2027-01-01T00:00:00Z"
|
||||
|
||||
@time_machine.travel(datetime(2026, 1, 15, 12, 0, tzinfo=UTC), tick=False)
|
||||
def test_last_month_january_wraps_to_previous_year(self) -> None:
|
||||
# January: last month must roll back to December 1 of previous year
|
||||
lo, hi = _range(
|
||||
rewrite_natural_date_keywords("created:last_month", UTC),
|
||||
"created",
|
||||
)
|
||||
assert lo == "2025-12-01T00:00:00Z"
|
||||
assert hi == "2026-01-01T00:00:00Z"
|
||||
|
||||
def test_unknown_keyword_raises(self) -> None:
|
||||
with pytest.raises(ValueError, match="Unknown keyword"):
|
||||
_date_only_range("bogus_keyword", UTC)
|
||||
|
||||
|
||||
class TestDateTimeFields:
|
||||
"""
|
||||
added/modified store full UTC datetimes. Natural keywords must convert
|
||||
the local day boundaries to UTC - timezone offset arithmetic IS required.
|
||||
"""
|
||||
|
||||
@time_machine.travel(datetime(2026, 3, 28, 15, 30, tzinfo=UTC), tick=False)
|
||||
def test_added_today_eastern(self) -> None:
|
||||
# EDT = UTC-4; local midnight 2026-03-28 00:00 EDT = 2026-03-28 04:00 UTC
|
||||
lo, hi = _range(rewrite_natural_date_keywords("added:today", EASTERN), "added")
|
||||
assert lo == "2026-03-28T04:00:00Z"
|
||||
assert hi == "2026-03-29T04:00:00Z"
|
||||
|
||||
@time_machine.travel(datetime(2026, 3, 29, 2, 0, tzinfo=UTC), tick=False)
|
||||
def test_added_today_auckland_midnight_crossing(self) -> None:
|
||||
# UTC 02:00 on 2026-03-29 -> Auckland (UTC+13) = 2026-03-29 15:00 local
|
||||
# Auckland midnight = UTC 2026-03-28 11:00
|
||||
lo, hi = _range(rewrite_natural_date_keywords("added:today", AUCKLAND), "added")
|
||||
assert lo == "2026-03-28T11:00:00Z"
|
||||
assert hi == "2026-03-29T11:00:00Z"
|
||||
|
||||
@time_machine.travel(datetime(2026, 3, 28, 15, 0, tzinfo=UTC), tick=False)
|
||||
def test_modified_today_utc(self) -> None:
|
||||
lo, hi = _range(
|
||||
rewrite_natural_date_keywords("modified:today", UTC),
|
||||
"modified",
|
||||
)
|
||||
assert lo == "2026-03-28T00:00:00Z"
|
||||
assert hi == "2026-03-29T00:00:00Z"
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("keyword", "expected_lo", "expected_hi"),
|
||||
[
|
||||
pytest.param(
|
||||
"yesterday",
|
||||
"2026-03-27T00:00:00Z",
|
||||
"2026-03-28T00:00:00Z",
|
||||
id="yesterday",
|
||||
),
|
||||
pytest.param(
|
||||
"this_week",
|
||||
"2026-03-23T00:00:00Z",
|
||||
"2026-03-30T00:00:00Z",
|
||||
id="this_week",
|
||||
),
|
||||
pytest.param(
|
||||
"last_week",
|
||||
"2026-03-16T00:00:00Z",
|
||||
"2026-03-23T00:00:00Z",
|
||||
id="last_week",
|
||||
),
|
||||
pytest.param(
|
||||
"this_month",
|
||||
"2026-03-01T00:00:00Z",
|
||||
"2026-04-01T00:00:00Z",
|
||||
id="this_month",
|
||||
),
|
||||
pytest.param(
|
||||
"last_month",
|
||||
"2026-02-01T00:00:00Z",
|
||||
"2026-03-01T00:00:00Z",
|
||||
id="last_month",
|
||||
),
|
||||
pytest.param(
|
||||
"this_year",
|
||||
"2026-01-01T00:00:00Z",
|
||||
"2027-01-01T00:00:00Z",
|
||||
id="this_year",
|
||||
),
|
||||
pytest.param(
|
||||
"last_year",
|
||||
"2025-01-01T00:00:00Z",
|
||||
"2026-01-01T00:00:00Z",
|
||||
id="last_year",
|
||||
),
|
||||
],
|
||||
)
|
||||
@time_machine.travel(datetime(2026, 3, 28, 12, 0, tzinfo=UTC), tick=False)
|
||||
def test_datetime_keywords_utc(
|
||||
self,
|
||||
keyword: str,
|
||||
expected_lo: str,
|
||||
expected_hi: str,
|
||||
) -> None:
|
||||
# 2026-03-28 is Saturday; weekday()==5 so Monday=2026-03-23
|
||||
lo, hi = _range(rewrite_natural_date_keywords(f"added:{keyword}", UTC), "added")
|
||||
assert lo == expected_lo
|
||||
assert hi == expected_hi
|
||||
|
||||
@time_machine.travel(datetime(2026, 12, 15, 12, 0, tzinfo=UTC), tick=False)
|
||||
def test_this_month_december_wraps_to_next_year(self) -> None:
|
||||
# December: next month wraps to January of next year
|
||||
lo, hi = _range(rewrite_natural_date_keywords("added:this_month", UTC), "added")
|
||||
assert lo == "2026-12-01T00:00:00Z"
|
||||
assert hi == "2027-01-01T00:00:00Z"
|
||||
|
||||
@time_machine.travel(datetime(2026, 1, 15, 12, 0, tzinfo=UTC), tick=False)
|
||||
def test_last_month_january_wraps_to_previous_year(self) -> None:
|
||||
# January: last month wraps back to December of previous year
|
||||
lo, hi = _range(rewrite_natural_date_keywords("added:last_month", UTC), "added")
|
||||
assert lo == "2025-12-01T00:00:00Z"
|
||||
assert hi == "2026-01-01T00:00:00Z"
|
||||
|
||||
def test_unknown_keyword_raises(self) -> None:
|
||||
with pytest.raises(ValueError, match="Unknown keyword"):
|
||||
_datetime_range("bogus_keyword", UTC)
|
||||
|
||||
|
||||
class TestWhooshQueryRewriting:
|
||||
"""All Whoosh query syntax variants must be rewritten to ISO 8601 before Tantivy parses them."""
|
||||
|
||||
@time_machine.travel(datetime(2026, 3, 28, 15, 0, tzinfo=UTC), tick=False)
|
||||
def test_compact_date_shim_rewrites_to_iso(self) -> None:
|
||||
result = rewrite_natural_date_keywords("created:20240115120000", UTC)
|
||||
assert "2024-01-15" in result
|
||||
assert "20240115120000" not in result
|
||||
|
||||
@time_machine.travel(datetime(2026, 3, 28, 15, 0, tzinfo=UTC), tick=False)
|
||||
def test_relative_range_shim_removes_now(self) -> None:
|
||||
result = rewrite_natural_date_keywords("added:[now-7d TO now]", UTC)
|
||||
assert "now" not in result
|
||||
assert "2026-03-" in result
|
||||
|
||||
@time_machine.travel(datetime(2026, 3, 28, 12, 0, tzinfo=UTC), tick=False)
|
||||
def test_bracket_minus_7_days(self) -> None:
|
||||
lo, hi = _range(
|
||||
rewrite_natural_date_keywords("added:[-7 days to now]", UTC),
|
||||
"added",
|
||||
)
|
||||
assert lo == "2026-03-21T12:00:00Z"
|
||||
assert hi == "2026-03-28T12:00:00Z"
|
||||
|
||||
@time_machine.travel(datetime(2026, 3, 28, 12, 0, tzinfo=UTC), tick=False)
|
||||
def test_bracket_minus_1_week(self) -> None:
|
||||
lo, hi = _range(
|
||||
rewrite_natural_date_keywords("added:[-1 week to now]", UTC),
|
||||
"added",
|
||||
)
|
||||
assert lo == "2026-03-21T12:00:00Z"
|
||||
assert hi == "2026-03-28T12:00:00Z"
|
||||
|
||||
@time_machine.travel(datetime(2026, 3, 28, 12, 0, tzinfo=UTC), tick=False)
|
||||
def test_bracket_minus_1_month_uses_relativedelta(self) -> None:
|
||||
# relativedelta(months=1) from 2026-03-28 = 2026-02-28 (not 29)
|
||||
lo, hi = _range(
|
||||
rewrite_natural_date_keywords("created:[-1 month to now]", UTC),
|
||||
"created",
|
||||
)
|
||||
assert lo == "2026-02-28T12:00:00Z"
|
||||
assert hi == "2026-03-28T12:00:00Z"
|
||||
|
||||
@time_machine.travel(datetime(2026, 3, 28, 12, 0, tzinfo=UTC), tick=False)
|
||||
def test_bracket_minus_1_year(self) -> None:
|
||||
lo, hi = _range(
|
||||
rewrite_natural_date_keywords("modified:[-1 year to now]", UTC),
|
||||
"modified",
|
||||
)
|
||||
assert lo == "2025-03-28T12:00:00Z"
|
||||
assert hi == "2026-03-28T12:00:00Z"
|
||||
|
||||
@time_machine.travel(datetime(2026, 3, 28, 12, 0, tzinfo=UTC), tick=False)
|
||||
def test_bracket_plural_unit_hours(self) -> None:
|
||||
lo, hi = _range(
|
||||
rewrite_natural_date_keywords("added:[-3 hours to now]", UTC),
|
||||
"added",
|
||||
)
|
||||
assert lo == "2026-03-28T09:00:00Z"
|
||||
assert hi == "2026-03-28T12:00:00Z"
|
||||
|
||||
@time_machine.travel(datetime(2026, 3, 28, 12, 0, tzinfo=UTC), tick=False)
|
||||
def test_bracket_case_insensitive(self) -> None:
|
||||
result = rewrite_natural_date_keywords("added:[-1 WEEK TO NOW]", UTC)
|
||||
assert "now" not in result.lower()
|
||||
lo, hi = _range(result, "added")
|
||||
assert lo == "2026-03-21T12:00:00Z"
|
||||
assert hi == "2026-03-28T12:00:00Z"
|
||||
|
||||
@time_machine.travel(datetime(2026, 3, 28, 12, 0, tzinfo=UTC), tick=False)
|
||||
def test_relative_range_swaps_bounds_when_lo_exceeds_hi(self) -> None:
|
||||
# [now+1h TO now-1h] has lo > hi before substitution; they must be swapped
|
||||
lo, hi = _range(
|
||||
rewrite_natural_date_keywords("added:[now+1h TO now-1h]", UTC),
|
||||
"added",
|
||||
)
|
||||
assert lo == "2026-03-28T11:00:00Z"
|
||||
assert hi == "2026-03-28T13:00:00Z"
|
||||
|
||||
def test_8digit_created_date_field_always_uses_utc_midnight(self) -> None:
|
||||
# created is a DateField: boundaries are always UTC midnight, no TZ offset
|
||||
result = rewrite_natural_date_keywords("created:20231201", EASTERN)
|
||||
lo, hi = _range(result, "created")
|
||||
assert lo == "2023-12-01T00:00:00Z"
|
||||
assert hi == "2023-12-02T00:00:00Z"
|
||||
|
||||
def test_8digit_added_datetime_field_converts_local_midnight_to_utc(self) -> None:
|
||||
# added is DateTimeField: midnight Dec 1 Eastern (EST = UTC-5) = 05:00 UTC
|
||||
result = rewrite_natural_date_keywords("added:20231201", EASTERN)
|
||||
lo, hi = _range(result, "added")
|
||||
assert lo == "2023-12-01T05:00:00Z"
|
||||
assert hi == "2023-12-02T05:00:00Z"
|
||||
|
||||
def test_8digit_modified_datetime_field_converts_local_midnight_to_utc(
|
||||
self,
|
||||
) -> None:
|
||||
result = rewrite_natural_date_keywords("modified:20231201", EASTERN)
|
||||
lo, hi = _range(result, "modified")
|
||||
assert lo == "2023-12-01T05:00:00Z"
|
||||
assert hi == "2023-12-02T05:00:00Z"
|
||||
|
||||
def test_8digit_invalid_date_passes_through_unchanged(self) -> None:
|
||||
assert rewrite_natural_date_keywords("added:20231340", UTC) == "added:20231340"
|
||||
|
||||
def test_compact_14digit_invalid_date_passes_through_unchanged(self) -> None:
|
||||
# Month=13 makes datetime() raise ValueError; the token must be left as-is
|
||||
assert _rewrite_compact_date("20231300120000") == "20231300120000"
|
||||
|
||||
|
||||
class TestParseUserQuery:
|
||||
"""parse_user_query runs the full preprocessing pipeline."""
|
||||
|
||||
@pytest.fixture
|
||||
def query_index(self) -> tantivy.Index:
|
||||
schema = build_schema()
|
||||
idx = tantivy.Index(schema, path=None)
|
||||
register_tokenizers(idx, "")
|
||||
return idx
|
||||
|
||||
def test_returns_tantivy_query(self, query_index: tantivy.Index) -> None:
|
||||
assert isinstance(parse_user_query(query_index, "invoice", UTC), tantivy.Query)
|
||||
|
||||
def test_fuzzy_mode_does_not_raise(
|
||||
self,
|
||||
query_index: tantivy.Index,
|
||||
settings,
|
||||
) -> None:
|
||||
settings.ADVANCED_FUZZY_SEARCH_THRESHOLD = 0.5
|
||||
assert isinstance(parse_user_query(query_index, "invoice", UTC), tantivy.Query)
|
||||
|
||||
def test_date_rewriting_applied_before_tantivy_parse(
|
||||
self,
|
||||
query_index: tantivy.Index,
|
||||
) -> None:
|
||||
# created:today must be rewritten to an ISO range before Tantivy parses it;
|
||||
# if passed raw, Tantivy would reject "today" as an invalid date value
|
||||
with time_machine.travel(datetime(2026, 3, 28, 12, 0, tzinfo=UTC), tick=False):
|
||||
q = parse_user_query(query_index, "created:today", UTC)
|
||||
assert isinstance(q, tantivy.Query)
|
||||
|
||||
|
||||
class TestPassthrough:
|
||||
"""Queries without field prefixes or unrelated content pass through unchanged."""
|
||||
|
||||
def test_bare_keyword_no_field_prefix_unchanged(self) -> None:
|
||||
# Bare 'today' with no field: prefix passes through unchanged
|
||||
result = rewrite_natural_date_keywords("bank statement today", UTC)
|
||||
assert "today" in result
|
||||
|
||||
def test_unrelated_query_unchanged(self) -> None:
|
||||
assert rewrite_natural_date_keywords("title:invoice", UTC) == "title:invoice"
|
||||
|
||||
|
||||
class TestNormalizeQuery:
|
||||
"""normalize_query expands comma-separated values and collapses whitespace."""
|
||||
|
||||
def test_normalize_expands_comma_separated_tags(self) -> None:
|
||||
assert normalize_query("tag:foo,bar") == "tag:foo AND tag:bar"
|
||||
|
||||
def test_normalize_expands_three_values(self) -> None:
|
||||
assert normalize_query("tag:foo,bar,baz") == "tag:foo AND tag:bar AND tag:baz"
|
||||
|
||||
def test_normalize_collapses_whitespace(self) -> None:
|
||||
assert normalize_query("bank statement") == "bank statement"
|
||||
|
||||
def test_normalize_no_commas_unchanged(self) -> None:
|
||||
assert normalize_query("bank statement") == "bank statement"
|
||||
|
||||
|
||||
class TestPermissionFilter:
|
||||
"""
|
||||
build_permission_filter tests use an in-memory index — no DB access needed.
|
||||
|
||||
Users are constructed as unsaved model instances (django_user_model(pk=N))
|
||||
so no database round-trip occurs; only .pk is read by build_permission_filter.
|
||||
"""
|
||||
|
||||
@pytest.fixture
|
||||
def perm_index(self) -> tantivy.Index:
|
||||
schema = build_schema()
|
||||
idx = tantivy.Index(schema, path=None)
|
||||
register_tokenizers(idx, "")
|
||||
return idx
|
||||
|
||||
def _add_doc(
|
||||
self,
|
||||
idx: tantivy.Index,
|
||||
doc_id: int,
|
||||
owner_id: int | None = None,
|
||||
viewer_ids: tuple[int, ...] = (),
|
||||
) -> None:
|
||||
writer = idx.writer()
|
||||
doc = tantivy.Document()
|
||||
doc.add_unsigned("id", doc_id)
|
||||
# Only add owner_id field if the document has an owner
|
||||
if owner_id is not None:
|
||||
doc.add_unsigned("owner_id", owner_id)
|
||||
for vid in viewer_ids:
|
||||
doc.add_unsigned("viewer_id", vid)
|
||||
writer.add_document(doc)
|
||||
writer.commit()
|
||||
idx.reload()
|
||||
|
||||
def test_perm_no_owner_visible_to_any_user(
|
||||
self,
|
||||
perm_index: tantivy.Index,
|
||||
django_user_model: type[AbstractBaseUser],
|
||||
) -> None:
|
||||
"""Documents with no owner must be visible to every user."""
|
||||
self._add_doc(perm_index, doc_id=1, owner_id=None)
|
||||
user = django_user_model(pk=99)
|
||||
perm = build_permission_filter(perm_index.schema, user)
|
||||
assert perm_index.searcher().search(perm, limit=10).count == 1
|
||||
|
||||
def test_perm_owned_by_user_is_visible(
|
||||
self,
|
||||
perm_index: tantivy.Index,
|
||||
django_user_model: type[AbstractBaseUser],
|
||||
) -> None:
|
||||
"""A document owned by the requesting user must be visible."""
|
||||
self._add_doc(perm_index, doc_id=2, owner_id=42)
|
||||
user = django_user_model(pk=42)
|
||||
perm = build_permission_filter(perm_index.schema, user)
|
||||
assert perm_index.searcher().search(perm, limit=10).count == 1
|
||||
|
||||
def test_perm_owned_by_other_not_visible(
|
||||
self,
|
||||
perm_index: tantivy.Index,
|
||||
django_user_model: type[AbstractBaseUser],
|
||||
) -> None:
|
||||
"""A document owned by a different user must not be visible."""
|
||||
self._add_doc(perm_index, doc_id=3, owner_id=42)
|
||||
user = django_user_model(pk=99)
|
||||
perm = build_permission_filter(perm_index.schema, user)
|
||||
assert perm_index.searcher().search(perm, limit=10).count == 0
|
||||
|
||||
def test_perm_shared_viewer_is_visible(
|
||||
self,
|
||||
perm_index: tantivy.Index,
|
||||
django_user_model: type[AbstractBaseUser],
|
||||
) -> None:
|
||||
"""A document explicitly shared with a user must be visible to that user."""
|
||||
self._add_doc(perm_index, doc_id=4, owner_id=42, viewer_ids=(99,))
|
||||
user = django_user_model(pk=99)
|
||||
perm = build_permission_filter(perm_index.schema, user)
|
||||
assert perm_index.searcher().search(perm, limit=10).count == 1
|
||||
|
||||
def test_perm_only_owned_docs_hidden_from_others(
|
||||
self,
|
||||
perm_index: tantivy.Index,
|
||||
django_user_model: type[AbstractBaseUser],
|
||||
) -> None:
|
||||
"""Only unowned documents appear when the user owns none of them."""
|
||||
self._add_doc(perm_index, doc_id=5, owner_id=10) # owned by 10
|
||||
self._add_doc(perm_index, doc_id=6, owner_id=None) # unowned
|
||||
user = django_user_model(pk=20)
|
||||
perm = build_permission_filter(perm_index.schema, user)
|
||||
assert perm_index.searcher().search(perm, limit=10).count == 1 # only unowned
|
||||
63
src/documents/tests/search/test_schema.py
Normal file
63
src/documents/tests/search/test_schema.py
Normal file
@@ -0,0 +1,63 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import pytest
|
||||
|
||||
from documents.search._schema import SCHEMA_VERSION
|
||||
from documents.search._schema import needs_rebuild
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pathlib import Path
|
||||
|
||||
from pytest_django.fixtures import SettingsWrapper
|
||||
|
||||
pytestmark = pytest.mark.search
|
||||
|
||||
|
||||
class TestNeedsRebuild:
|
||||
"""needs_rebuild covers all sentinel-file states that require a full reindex."""
|
||||
|
||||
def test_returns_true_when_version_file_missing(self, index_dir: Path) -> None:
|
||||
assert needs_rebuild(index_dir) is True
|
||||
|
||||
def test_returns_false_when_version_and_language_match(
|
||||
self,
|
||||
index_dir: Path,
|
||||
settings: SettingsWrapper,
|
||||
) -> None:
|
||||
settings.SEARCH_LANGUAGE = "en"
|
||||
(index_dir / ".schema_version").write_text(str(SCHEMA_VERSION))
|
||||
(index_dir / ".schema_language").write_text("en")
|
||||
assert needs_rebuild(index_dir) is False
|
||||
|
||||
def test_returns_true_on_schema_version_mismatch(self, index_dir: Path) -> None:
|
||||
(index_dir / ".schema_version").write_text(str(SCHEMA_VERSION - 1))
|
||||
assert needs_rebuild(index_dir) is True
|
||||
|
||||
def test_returns_true_when_version_file_not_an_integer(
|
||||
self,
|
||||
index_dir: Path,
|
||||
) -> None:
|
||||
(index_dir / ".schema_version").write_text("not-a-number")
|
||||
assert needs_rebuild(index_dir) is True
|
||||
|
||||
def test_returns_true_when_language_sentinel_missing(
|
||||
self,
|
||||
index_dir: Path,
|
||||
settings: SettingsWrapper,
|
||||
) -> None:
|
||||
settings.SEARCH_LANGUAGE = "en"
|
||||
(index_dir / ".schema_version").write_text(str(SCHEMA_VERSION))
|
||||
# .schema_language intentionally absent
|
||||
assert needs_rebuild(index_dir) is True
|
||||
|
||||
def test_returns_true_when_language_sentinel_content_differs(
|
||||
self,
|
||||
index_dir: Path,
|
||||
settings: SettingsWrapper,
|
||||
) -> None:
|
||||
settings.SEARCH_LANGUAGE = "de"
|
||||
(index_dir / ".schema_version").write_text(str(SCHEMA_VERSION))
|
||||
(index_dir / ".schema_language").write_text("en")
|
||||
assert needs_rebuild(index_dir) is True
|
||||
111
src/documents/tests/search/test_tokenizer.py
Normal file
111
src/documents/tests/search/test_tokenizer.py
Normal file
@@ -0,0 +1,111 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import pytest
|
||||
import tantivy
|
||||
|
||||
from documents.search._tokenizer import _bigram_analyzer
|
||||
from documents.search._tokenizer import _paperless_text
|
||||
from documents.search._tokenizer import _simple_search_analyzer
|
||||
from documents.search._tokenizer import register_tokenizers
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from _pytest.logging import LogCaptureFixture
|
||||
|
||||
pytestmark = pytest.mark.search
|
||||
|
||||
|
||||
class TestTokenizers:
|
||||
@pytest.fixture
|
||||
def content_index(self) -> tantivy.Index:
|
||||
"""Index with just a content field for ASCII folding tests."""
|
||||
sb = tantivy.SchemaBuilder()
|
||||
sb.add_text_field("content", stored=True, tokenizer_name="paperless_text")
|
||||
schema = sb.build()
|
||||
idx = tantivy.Index(schema, path=None)
|
||||
idx.register_tokenizer("paperless_text", _paperless_text(""))
|
||||
return idx
|
||||
|
||||
@pytest.fixture
|
||||
def bigram_index(self) -> tantivy.Index:
|
||||
"""Index with bigram field for CJK tests."""
|
||||
sb = tantivy.SchemaBuilder()
|
||||
sb.add_text_field(
|
||||
"bigram_content",
|
||||
stored=False,
|
||||
tokenizer_name="bigram_analyzer",
|
||||
)
|
||||
schema = sb.build()
|
||||
idx = tantivy.Index(schema, path=None)
|
||||
idx.register_tokenizer("bigram_analyzer", _bigram_analyzer())
|
||||
return idx
|
||||
|
||||
@pytest.fixture
|
||||
def simple_search_index(self) -> tantivy.Index:
|
||||
"""Index with simple-search field for Latin substring tests."""
|
||||
sb = tantivy.SchemaBuilder()
|
||||
sb.add_text_field(
|
||||
"simple_content",
|
||||
stored=False,
|
||||
tokenizer_name="simple_search_analyzer",
|
||||
)
|
||||
schema = sb.build()
|
||||
idx = tantivy.Index(schema, path=None)
|
||||
idx.register_tokenizer("simple_search_analyzer", _simple_search_analyzer())
|
||||
return idx
|
||||
|
||||
def test_ascii_fold_finds_accented_content(
|
||||
self,
|
||||
content_index: tantivy.Index,
|
||||
) -> None:
|
||||
"""ASCII folding allows searching accented text with plain ASCII queries."""
|
||||
writer = content_index.writer()
|
||||
doc = tantivy.Document()
|
||||
doc.add_text("content", "café résumé")
|
||||
writer.add_document(doc)
|
||||
writer.commit()
|
||||
content_index.reload()
|
||||
q = content_index.parse_query("cafe resume", ["content"])
|
||||
assert content_index.searcher().search(q, limit=5).count == 1
|
||||
|
||||
def test_bigram_finds_cjk_substring(self, bigram_index: tantivy.Index) -> None:
|
||||
"""Bigram tokenizer enables substring search in CJK languages without whitespace delimiters."""
|
||||
writer = bigram_index.writer()
|
||||
doc = tantivy.Document()
|
||||
doc.add_text("bigram_content", "東京都")
|
||||
writer.add_document(doc)
|
||||
writer.commit()
|
||||
bigram_index.reload()
|
||||
q = bigram_index.parse_query("東京", ["bigram_content"])
|
||||
assert bigram_index.searcher().search(q, limit=5).count == 1
|
||||
|
||||
def test_simple_search_analyzer_supports_regex_substrings(
|
||||
self,
|
||||
simple_search_index: tantivy.Index,
|
||||
) -> None:
|
||||
"""Whitespace-preserving simple search analyzer supports substring regex matching."""
|
||||
writer = simple_search_index.writer()
|
||||
doc = tantivy.Document()
|
||||
doc.add_text("simple_content", "tag:invoice password-reset")
|
||||
writer.add_document(doc)
|
||||
writer.commit()
|
||||
simple_search_index.reload()
|
||||
q = tantivy.Query.regex_query(
|
||||
simple_search_index.schema,
|
||||
"simple_content",
|
||||
".*sswo.*",
|
||||
)
|
||||
assert simple_search_index.searcher().search(q, limit=5).count == 1
|
||||
|
||||
def test_unsupported_language_logs_warning(self, caplog: LogCaptureFixture) -> None:
|
||||
"""Unsupported language codes should log a warning and disable stemming gracefully."""
|
||||
sb = tantivy.SchemaBuilder()
|
||||
sb.add_text_field("content", stored=True, tokenizer_name="paperless_text")
|
||||
schema = sb.build()
|
||||
idx = tantivy.Index(schema, path=None)
|
||||
|
||||
with caplog.at_level(logging.WARNING, logger="paperless.search"):
|
||||
register_tokenizers(idx, "klingon")
|
||||
assert "klingon" in caplog.text
|
||||
@@ -1,6 +1,7 @@
|
||||
import types
|
||||
from unittest.mock import patch
|
||||
|
||||
import tantivy
|
||||
from django.contrib.admin.sites import AdminSite
|
||||
from django.contrib.auth.models import Permission
|
||||
from django.contrib.auth.models import User
|
||||
@@ -8,36 +9,54 @@ from django.test import TestCase
|
||||
from django.utils import timezone
|
||||
from rest_framework import status
|
||||
|
||||
from documents import index
|
||||
from documents.admin import DocumentAdmin
|
||||
from documents.admin import TagAdmin
|
||||
from documents.models import Document
|
||||
from documents.models import Tag
|
||||
from documents.search import get_backend
|
||||
from documents.search import reset_backend
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
from paperless.admin import PaperlessUserAdmin
|
||||
|
||||
|
||||
class TestDocumentAdmin(DirectoriesMixin, TestCase):
|
||||
def get_document_from_index(self, doc):
|
||||
ix = index.open_index()
|
||||
with ix.searcher() as searcher:
|
||||
return searcher.document(id=doc.id)
|
||||
backend = get_backend()
|
||||
searcher = backend._index.searcher()
|
||||
results = searcher.search(
|
||||
tantivy.Query.range_query(
|
||||
backend._schema,
|
||||
"id",
|
||||
tantivy.FieldType.Unsigned,
|
||||
doc.pk,
|
||||
doc.pk,
|
||||
),
|
||||
limit=1,
|
||||
)
|
||||
if results.hits:
|
||||
return searcher.doc(results.hits[0][1]).to_dict()
|
||||
return None
|
||||
|
||||
def setUp(self) -> None:
|
||||
super().setUp()
|
||||
reset_backend()
|
||||
self.doc_admin = DocumentAdmin(model=Document, admin_site=AdminSite())
|
||||
|
||||
def tearDown(self) -> None:
|
||||
reset_backend()
|
||||
super().tearDown()
|
||||
|
||||
def test_save_model(self) -> None:
|
||||
doc = Document.objects.create(title="test")
|
||||
|
||||
doc.title = "new title"
|
||||
self.doc_admin.save_model(None, doc, None, None)
|
||||
self.assertEqual(Document.objects.get(id=doc.id).title, "new title")
|
||||
self.assertEqual(self.get_document_from_index(doc)["id"], doc.id)
|
||||
self.assertEqual(self.get_document_from_index(doc)["id"], [doc.id])
|
||||
|
||||
def test_delete_model(self) -> None:
|
||||
doc = Document.objects.create(title="test")
|
||||
index.add_or_update_document(doc)
|
||||
get_backend().add_or_update(doc)
|
||||
self.assertIsNotNone(self.get_document_from_index(doc))
|
||||
|
||||
self.doc_admin.delete_model(None, doc)
|
||||
@@ -53,7 +72,7 @@ class TestDocumentAdmin(DirectoriesMixin, TestCase):
|
||||
checksum=f"{i:02}",
|
||||
)
|
||||
docs.append(doc)
|
||||
index.add_or_update_document(doc)
|
||||
get_backend().add_or_update(doc)
|
||||
|
||||
self.assertEqual(Document.objects.count(), 42)
|
||||
|
||||
|
||||
@@ -109,7 +109,7 @@ class TestDocumentVersioningApi(DirectoriesMixin, APITestCase):
|
||||
mime_type="application/pdf",
|
||||
)
|
||||
|
||||
with mock.patch("documents.index.remove_document_from_index"):
|
||||
with mock.patch("documents.search.get_backend"):
|
||||
resp = self.client.delete(f"/api/documents/{root.id}/versions/{root.id}/")
|
||||
|
||||
self.assertEqual(resp.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
@@ -137,10 +137,7 @@ class TestDocumentVersioningApi(DirectoriesMixin, APITestCase):
|
||||
content="v2-content",
|
||||
)
|
||||
|
||||
with (
|
||||
mock.patch("documents.index.remove_document_from_index"),
|
||||
mock.patch("documents.index.add_or_update_document"),
|
||||
):
|
||||
with mock.patch("documents.search.get_backend"):
|
||||
resp = self.client.delete(f"/api/documents/{root.id}/versions/{v2.id}/")
|
||||
|
||||
self.assertEqual(resp.status_code, status.HTTP_200_OK)
|
||||
@@ -149,10 +146,7 @@ class TestDocumentVersioningApi(DirectoriesMixin, APITestCase):
|
||||
root.refresh_from_db()
|
||||
self.assertEqual(root.content, "root-content")
|
||||
|
||||
with (
|
||||
mock.patch("documents.index.remove_document_from_index"),
|
||||
mock.patch("documents.index.add_or_update_document"),
|
||||
):
|
||||
with mock.patch("documents.search.get_backend"):
|
||||
resp = self.client.delete(f"/api/documents/{root.id}/versions/{v1.id}/")
|
||||
|
||||
self.assertEqual(resp.status_code, status.HTTP_200_OK)
|
||||
@@ -175,10 +169,7 @@ class TestDocumentVersioningApi(DirectoriesMixin, APITestCase):
|
||||
)
|
||||
version_id = version.id
|
||||
|
||||
with (
|
||||
mock.patch("documents.index.remove_document_from_index"),
|
||||
mock.patch("documents.index.add_or_update_document"),
|
||||
):
|
||||
with mock.patch("documents.search.get_backend"):
|
||||
resp = self.client.delete(
|
||||
f"/api/documents/{root.id}/versions/{version_id}/",
|
||||
)
|
||||
@@ -225,7 +216,7 @@ class TestDocumentVersioningApi(DirectoriesMixin, APITestCase):
|
||||
root_document=other_root,
|
||||
)
|
||||
|
||||
with mock.patch("documents.index.remove_document_from_index"):
|
||||
with mock.patch("documents.search.get_backend"):
|
||||
resp = self.client.delete(
|
||||
f"/api/documents/{root.id}/versions/{other_version.id}/",
|
||||
)
|
||||
@@ -245,10 +236,7 @@ class TestDocumentVersioningApi(DirectoriesMixin, APITestCase):
|
||||
root_document=root,
|
||||
)
|
||||
|
||||
with (
|
||||
mock.patch("documents.index.remove_document_from_index"),
|
||||
mock.patch("documents.index.add_or_update_document"),
|
||||
):
|
||||
with mock.patch("documents.search.get_backend"):
|
||||
resp = self.client.delete(
|
||||
f"/api/documents/{version.id}/versions/{version.id}/",
|
||||
)
|
||||
@@ -275,18 +263,17 @@ class TestDocumentVersioningApi(DirectoriesMixin, APITestCase):
|
||||
root_document=root,
|
||||
)
|
||||
|
||||
with (
|
||||
mock.patch("documents.index.remove_document_from_index") as remove_index,
|
||||
mock.patch("documents.index.add_or_update_document") as add_or_update,
|
||||
):
|
||||
with mock.patch("documents.search.get_backend") as mock_get_backend:
|
||||
mock_backend = mock.MagicMock()
|
||||
mock_get_backend.return_value = mock_backend
|
||||
resp = self.client.delete(
|
||||
f"/api/documents/{root.id}/versions/{version.id}/",
|
||||
)
|
||||
|
||||
self.assertEqual(resp.status_code, status.HTTP_200_OK)
|
||||
remove_index.assert_called_once_with(version)
|
||||
add_or_update.assert_called_once()
|
||||
self.assertEqual(add_or_update.call_args[0][0].id, root.id)
|
||||
mock_backend.remove.assert_called_once_with(version.pk)
|
||||
mock_backend.add_or_update.assert_called_once()
|
||||
self.assertEqual(mock_backend.add_or_update.call_args[0][0].id, root.id)
|
||||
|
||||
def test_delete_version_returns_403_without_permission(self) -> None:
|
||||
owner = User.objects.create_user(username="owner")
|
||||
|
||||
@@ -2,6 +2,7 @@ import datetime
|
||||
from datetime import timedelta
|
||||
from unittest import mock
|
||||
|
||||
import pytest
|
||||
from dateutil.relativedelta import relativedelta
|
||||
from django.contrib.auth.models import Group
|
||||
from django.contrib.auth.models import Permission
|
||||
@@ -11,9 +12,7 @@ from django.utils import timezone
|
||||
from guardian.shortcuts import assign_perm
|
||||
from rest_framework import status
|
||||
from rest_framework.test import APITestCase
|
||||
from whoosh.writing import AsyncWriter
|
||||
|
||||
from documents import index
|
||||
from documents.bulk_edit import set_permissions
|
||||
from documents.models import Correspondent
|
||||
from documents.models import CustomField
|
||||
@@ -25,18 +24,27 @@ from documents.models import SavedView
|
||||
from documents.models import StoragePath
|
||||
from documents.models import Tag
|
||||
from documents.models import Workflow
|
||||
from documents.search import get_backend
|
||||
from documents.search import reset_backend
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
from paperless_mail.models import MailAccount
|
||||
from paperless_mail.models import MailRule
|
||||
|
||||
pytestmark = pytest.mark.search
|
||||
|
||||
|
||||
class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
def setUp(self) -> None:
|
||||
super().setUp()
|
||||
reset_backend()
|
||||
|
||||
self.user = User.objects.create_superuser(username="temp_admin")
|
||||
self.client.force_authenticate(user=self.user)
|
||||
|
||||
def tearDown(self) -> None:
|
||||
reset_backend()
|
||||
super().tearDown()
|
||||
|
||||
def test_search(self) -> None:
|
||||
d1 = Document.objects.create(
|
||||
title="invoice",
|
||||
@@ -57,13 +65,11 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
checksum="C",
|
||||
original_filename="someepdf.pdf",
|
||||
)
|
||||
with AsyncWriter(index.open_index()) as writer:
|
||||
# Note to future self: there is a reason we dont use a model signal handler to update the index: some operations edit many documents at once
|
||||
# (retagger, renamer) and we don't want to open a writer for each of these, but rather perform the entire operation with one writer.
|
||||
# That's why we can't open the writer in a model on_save handler or something.
|
||||
index.update_document(writer, d1)
|
||||
index.update_document(writer, d2)
|
||||
index.update_document(writer, d3)
|
||||
backend = get_backend()
|
||||
backend.add_or_update(d1)
|
||||
backend.add_or_update(d2)
|
||||
backend.add_or_update(d3)
|
||||
|
||||
response = self.client.get("/api/documents/?query=bank")
|
||||
results = response.data["results"]
|
||||
self.assertEqual(response.data["count"], 3)
|
||||
@@ -85,6 +91,135 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
self.assertEqual(response.data["count"], 0)
|
||||
self.assertEqual(len(results), 0)
|
||||
|
||||
def test_simple_text_search(self) -> None:
|
||||
tagged = Tag.objects.create(name="invoice")
|
||||
matching_doc = Document.objects.create(
|
||||
title="Quarterly summary",
|
||||
content="Monthly bank report",
|
||||
checksum="T1",
|
||||
pk=11,
|
||||
)
|
||||
matching_doc.tags.add(tagged)
|
||||
|
||||
metadata_only_doc = Document.objects.create(
|
||||
title="Completely unrelated",
|
||||
content="No matching terms here",
|
||||
checksum="T2",
|
||||
pk=12,
|
||||
)
|
||||
metadata_only_doc.tags.add(tagged)
|
||||
|
||||
backend = get_backend()
|
||||
backend.add_or_update(matching_doc)
|
||||
backend.add_or_update(metadata_only_doc)
|
||||
|
||||
response = self.client.get("/api/documents/?text=monthly")
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(response.data["count"], 1)
|
||||
self.assertEqual(response.data["results"][0]["id"], matching_doc.id)
|
||||
|
||||
response = self.client.get("/api/documents/?text=tag:invoice")
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(response.data["count"], 0)
|
||||
|
||||
def test_simple_text_search_matches_substrings(self) -> None:
|
||||
matching_doc = Document.objects.create(
|
||||
title="Quarterly summary",
|
||||
content="Password reset instructions",
|
||||
checksum="T5",
|
||||
pk=15,
|
||||
)
|
||||
|
||||
backend = get_backend()
|
||||
backend.add_or_update(matching_doc)
|
||||
|
||||
response = self.client.get("/api/documents/?text=pass")
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(response.data["count"], 1)
|
||||
self.assertEqual(response.data["results"][0]["id"], matching_doc.id)
|
||||
|
||||
response = self.client.get("/api/documents/?text=sswo")
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(response.data["count"], 1)
|
||||
self.assertEqual(response.data["results"][0]["id"], matching_doc.id)
|
||||
|
||||
response = self.client.get("/api/documents/?text=sswo re")
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(response.data["count"], 1)
|
||||
self.assertEqual(response.data["results"][0]["id"], matching_doc.id)
|
||||
|
||||
def test_simple_text_search_does_not_match_on_partial_term_overlap(self) -> None:
|
||||
non_matching_doc = Document.objects.create(
|
||||
title="Adobe Acrobat PDF Files",
|
||||
content="Lorem ipsum dolor sit amet, consectetur adipiscing elit.",
|
||||
checksum="T7",
|
||||
pk=17,
|
||||
)
|
||||
|
||||
backend = get_backend()
|
||||
backend.add_or_update(non_matching_doc)
|
||||
|
||||
response = self.client.get("/api/documents/?text=raptor")
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(response.data["count"], 0)
|
||||
|
||||
def test_simple_title_search(self) -> None:
|
||||
title_match = Document.objects.create(
|
||||
title="Quarterly summary",
|
||||
content="No matching content here",
|
||||
checksum="T3",
|
||||
pk=13,
|
||||
)
|
||||
content_only = Document.objects.create(
|
||||
title="Completely unrelated",
|
||||
content="Quarterly summary appears only in content",
|
||||
checksum="T4",
|
||||
pk=14,
|
||||
)
|
||||
|
||||
backend = get_backend()
|
||||
backend.add_or_update(title_match)
|
||||
backend.add_or_update(content_only)
|
||||
|
||||
response = self.client.get("/api/documents/?title_search=quarterly")
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(response.data["count"], 1)
|
||||
self.assertEqual(response.data["results"][0]["id"], title_match.id)
|
||||
|
||||
def test_simple_title_search_matches_substrings(self) -> None:
|
||||
title_match = Document.objects.create(
|
||||
title="Password handbook",
|
||||
content="No matching content here",
|
||||
checksum="T6",
|
||||
pk=16,
|
||||
)
|
||||
|
||||
backend = get_backend()
|
||||
backend.add_or_update(title_match)
|
||||
|
||||
response = self.client.get("/api/documents/?title_search=pass")
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(response.data["count"], 1)
|
||||
self.assertEqual(response.data["results"][0]["id"], title_match.id)
|
||||
|
||||
response = self.client.get("/api/documents/?title_search=sswo")
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(response.data["count"], 1)
|
||||
self.assertEqual(response.data["results"][0]["id"], title_match.id)
|
||||
|
||||
response = self.client.get("/api/documents/?title_search=sswo hand")
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(response.data["count"], 1)
|
||||
self.assertEqual(response.data["results"][0]["id"], title_match.id)
|
||||
|
||||
def test_search_rejects_multiple_search_modes(self) -> None:
|
||||
response = self.client.get("/api/documents/?text=bank&query=bank")
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertEqual(
|
||||
response.data["detail"],
|
||||
"Specify only one of text, title_search, query, or more_like_id.",
|
||||
)
|
||||
|
||||
def test_search_returns_all_for_api_version_9(self) -> None:
|
||||
d1 = Document.objects.create(
|
||||
title="invoice",
|
||||
@@ -98,9 +233,9 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
checksum="B",
|
||||
pk=2,
|
||||
)
|
||||
with AsyncWriter(index.open_index()) as writer:
|
||||
index.update_document(writer, d1)
|
||||
index.update_document(writer, d2)
|
||||
backend = get_backend()
|
||||
backend.add_or_update(d1)
|
||||
backend.add_or_update(d2)
|
||||
|
||||
response = self.client.get(
|
||||
"/api/documents/?query=bank",
|
||||
@@ -127,8 +262,7 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
)
|
||||
matching_doc.tags.add(tag)
|
||||
|
||||
with AsyncWriter(index.open_index()) as writer:
|
||||
index.update_document(writer, matching_doc)
|
||||
get_backend().add_or_update(matching_doc)
|
||||
|
||||
response = self.client.get(
|
||||
"/api/documents/?query=bank&include_selection_data=true",
|
||||
@@ -187,10 +321,10 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
value_int=20,
|
||||
)
|
||||
|
||||
with AsyncWriter(index.open_index()) as writer:
|
||||
index.update_document(writer, d1)
|
||||
index.update_document(writer, d2)
|
||||
index.update_document(writer, d3)
|
||||
backend = get_backend()
|
||||
backend.add_or_update(d1)
|
||||
backend.add_or_update(d2)
|
||||
backend.add_or_update(d3)
|
||||
|
||||
response = self.client.get(
|
||||
f"/api/documents/?query=match&ordering=custom_field_{custom_field.pk}",
|
||||
@@ -211,15 +345,15 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
)
|
||||
|
||||
def test_search_multi_page(self) -> None:
|
||||
with AsyncWriter(index.open_index()) as writer:
|
||||
for i in range(55):
|
||||
doc = Document.objects.create(
|
||||
checksum=str(i),
|
||||
pk=i + 1,
|
||||
title=f"Document {i + 1}",
|
||||
content="content",
|
||||
)
|
||||
index.update_document(writer, doc)
|
||||
backend = get_backend()
|
||||
for i in range(55):
|
||||
doc = Document.objects.create(
|
||||
checksum=str(i),
|
||||
pk=i + 1,
|
||||
title=f"Document {i + 1}",
|
||||
content="content",
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
|
||||
# This is here so that we test that no document gets returned twice (might happen if the paging is not working)
|
||||
seen_ids = []
|
||||
@@ -246,15 +380,15 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
seen_ids.append(result["id"])
|
||||
|
||||
def test_search_invalid_page(self) -> None:
|
||||
with AsyncWriter(index.open_index()) as writer:
|
||||
for i in range(15):
|
||||
doc = Document.objects.create(
|
||||
checksum=str(i),
|
||||
pk=i + 1,
|
||||
title=f"Document {i + 1}",
|
||||
content="content",
|
||||
)
|
||||
index.update_document(writer, doc)
|
||||
backend = get_backend()
|
||||
for i in range(15):
|
||||
doc = Document.objects.create(
|
||||
checksum=str(i),
|
||||
pk=i + 1,
|
||||
title=f"Document {i + 1}",
|
||||
content="content",
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
|
||||
response = self.client.get("/api/documents/?query=content&page=0&page_size=10")
|
||||
self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND)
|
||||
@@ -292,26 +426,25 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
pk=3,
|
||||
checksum="C",
|
||||
)
|
||||
with index.open_index_writer() as writer:
|
||||
index.update_document(writer, d1)
|
||||
index.update_document(writer, d2)
|
||||
index.update_document(writer, d3)
|
||||
backend = get_backend()
|
||||
backend.add_or_update(d1)
|
||||
backend.add_or_update(d2)
|
||||
backend.add_or_update(d3)
|
||||
|
||||
response = self.client.get("/api/documents/?query=added:[-1 week to now]")
|
||||
results = response.data["results"]
|
||||
# Expect 3 documents returned
|
||||
self.assertEqual(len(results), 3)
|
||||
|
||||
for idx, subset in enumerate(
|
||||
[
|
||||
{"id": 1, "title": "invoice"},
|
||||
{"id": 2, "title": "bank statement 1"},
|
||||
{"id": 3, "title": "bank statement 3"},
|
||||
],
|
||||
):
|
||||
result = results[idx]
|
||||
# Assert subset in results
|
||||
self.assertDictEqual(result, {**result, **subset})
|
||||
result_map = {r["id"]: r for r in results}
|
||||
self.assertEqual(set(result_map.keys()), {1, 2, 3})
|
||||
for subset in [
|
||||
{"id": 1, "title": "invoice"},
|
||||
{"id": 2, "title": "bank statement 1"},
|
||||
{"id": 3, "title": "bank statement 3"},
|
||||
]:
|
||||
r = result_map[subset["id"]]
|
||||
self.assertDictEqual(r, {**r, **subset})
|
||||
|
||||
@override_settings(
|
||||
TIME_ZONE="America/Chicago",
|
||||
@@ -347,10 +480,10 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
# 7 days, 1 hour and 1 minute ago
|
||||
added=timezone.now() - timedelta(days=7, hours=1, minutes=1),
|
||||
)
|
||||
with index.open_index_writer() as writer:
|
||||
index.update_document(writer, d1)
|
||||
index.update_document(writer, d2)
|
||||
index.update_document(writer, d3)
|
||||
backend = get_backend()
|
||||
backend.add_or_update(d1)
|
||||
backend.add_or_update(d2)
|
||||
backend.add_or_update(d3)
|
||||
|
||||
response = self.client.get("/api/documents/?query=added:[-1 week to now]")
|
||||
results = response.data["results"]
|
||||
@@ -358,12 +491,14 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
# Expect 2 documents returned
|
||||
self.assertEqual(len(results), 2)
|
||||
|
||||
for idx, subset in enumerate(
|
||||
[{"id": 1, "title": "invoice"}, {"id": 2, "title": "bank statement 1"}],
|
||||
):
|
||||
result = results[idx]
|
||||
# Assert subset in results
|
||||
self.assertDictEqual(result, {**result, **subset})
|
||||
result_map = {r["id"]: r for r in results}
|
||||
self.assertEqual(set(result_map.keys()), {1, 2})
|
||||
for subset in [
|
||||
{"id": 1, "title": "invoice"},
|
||||
{"id": 2, "title": "bank statement 1"},
|
||||
]:
|
||||
r = result_map[subset["id"]]
|
||||
self.assertDictEqual(r, {**r, **subset})
|
||||
|
||||
@override_settings(
|
||||
TIME_ZONE="Europe/Sofia",
|
||||
@@ -399,10 +534,10 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
# 7 days, 1 hour and 1 minute ago
|
||||
added=timezone.now() - timedelta(days=7, hours=1, minutes=1),
|
||||
)
|
||||
with index.open_index_writer() as writer:
|
||||
index.update_document(writer, d1)
|
||||
index.update_document(writer, d2)
|
||||
index.update_document(writer, d3)
|
||||
backend = get_backend()
|
||||
backend.add_or_update(d1)
|
||||
backend.add_or_update(d2)
|
||||
backend.add_or_update(d3)
|
||||
|
||||
response = self.client.get("/api/documents/?query=added:[-1 week to now]")
|
||||
results = response.data["results"]
|
||||
@@ -410,12 +545,14 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
# Expect 2 documents returned
|
||||
self.assertEqual(len(results), 2)
|
||||
|
||||
for idx, subset in enumerate(
|
||||
[{"id": 1, "title": "invoice"}, {"id": 2, "title": "bank statement 1"}],
|
||||
):
|
||||
result = results[idx]
|
||||
# Assert subset in results
|
||||
self.assertDictEqual(result, {**result, **subset})
|
||||
result_map = {r["id"]: r for r in results}
|
||||
self.assertEqual(set(result_map.keys()), {1, 2})
|
||||
for subset in [
|
||||
{"id": 1, "title": "invoice"},
|
||||
{"id": 2, "title": "bank statement 1"},
|
||||
]:
|
||||
r = result_map[subset["id"]]
|
||||
self.assertDictEqual(r, {**r, **subset})
|
||||
|
||||
def test_search_added_in_last_month(self) -> None:
|
||||
"""
|
||||
@@ -451,10 +588,10 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
added=timezone.now() - timedelta(days=7, hours=1, minutes=1),
|
||||
)
|
||||
|
||||
with index.open_index_writer() as writer:
|
||||
index.update_document(writer, d1)
|
||||
index.update_document(writer, d2)
|
||||
index.update_document(writer, d3)
|
||||
backend = get_backend()
|
||||
backend.add_or_update(d1)
|
||||
backend.add_or_update(d2)
|
||||
backend.add_or_update(d3)
|
||||
|
||||
response = self.client.get("/api/documents/?query=added:[-1 month to now]")
|
||||
results = response.data["results"]
|
||||
@@ -462,12 +599,14 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
# Expect 2 documents returned
|
||||
self.assertEqual(len(results), 2)
|
||||
|
||||
for idx, subset in enumerate(
|
||||
[{"id": 1, "title": "invoice"}, {"id": 3, "title": "bank statement 3"}],
|
||||
):
|
||||
result = results[idx]
|
||||
# Assert subset in results
|
||||
self.assertDictEqual(result, {**result, **subset})
|
||||
result_map = {r["id"]: r for r in results}
|
||||
self.assertEqual(set(result_map.keys()), {1, 3})
|
||||
for subset in [
|
||||
{"id": 1, "title": "invoice"},
|
||||
{"id": 3, "title": "bank statement 3"},
|
||||
]:
|
||||
r = result_map[subset["id"]]
|
||||
self.assertDictEqual(r, {**r, **subset})
|
||||
|
||||
@override_settings(
|
||||
TIME_ZONE="America/Denver",
|
||||
@@ -507,10 +646,10 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
added=timezone.now() - timedelta(days=7, hours=1, minutes=1),
|
||||
)
|
||||
|
||||
with index.open_index_writer() as writer:
|
||||
index.update_document(writer, d1)
|
||||
index.update_document(writer, d2)
|
||||
index.update_document(writer, d3)
|
||||
backend = get_backend()
|
||||
backend.add_or_update(d1)
|
||||
backend.add_or_update(d2)
|
||||
backend.add_or_update(d3)
|
||||
|
||||
response = self.client.get("/api/documents/?query=added:[-1 month to now]")
|
||||
results = response.data["results"]
|
||||
@@ -518,12 +657,14 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
# Expect 2 documents returned
|
||||
self.assertEqual(len(results), 2)
|
||||
|
||||
for idx, subset in enumerate(
|
||||
[{"id": 1, "title": "invoice"}, {"id": 3, "title": "bank statement 3"}],
|
||||
):
|
||||
result = results[idx]
|
||||
# Assert subset in results
|
||||
self.assertDictEqual(result, {**result, **subset})
|
||||
result_map = {r["id"]: r for r in results}
|
||||
self.assertEqual(set(result_map.keys()), {1, 3})
|
||||
for subset in [
|
||||
{"id": 1, "title": "invoice"},
|
||||
{"id": 3, "title": "bank statement 3"},
|
||||
]:
|
||||
r = result_map[subset["id"]]
|
||||
self.assertDictEqual(r, {**r, **subset})
|
||||
|
||||
@override_settings(
|
||||
TIME_ZONE="Europe/Sofia",
|
||||
@@ -563,10 +704,10 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
# Django converts dates to UTC
|
||||
d3.refresh_from_db()
|
||||
|
||||
with index.open_index_writer() as writer:
|
||||
index.update_document(writer, d1)
|
||||
index.update_document(writer, d2)
|
||||
index.update_document(writer, d3)
|
||||
backend = get_backend()
|
||||
backend.add_or_update(d1)
|
||||
backend.add_or_update(d2)
|
||||
backend.add_or_update(d3)
|
||||
|
||||
response = self.client.get("/api/documents/?query=added:20231201")
|
||||
results = response.data["results"]
|
||||
@@ -574,12 +715,8 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
# Expect 1 document returned
|
||||
self.assertEqual(len(results), 1)
|
||||
|
||||
for idx, subset in enumerate(
|
||||
[{"id": 3, "title": "bank statement 3"}],
|
||||
):
|
||||
result = results[idx]
|
||||
# Assert subset in results
|
||||
self.assertDictEqual(result, {**result, **subset})
|
||||
self.assertEqual(results[0]["id"], 3)
|
||||
self.assertEqual(results[0]["title"], "bank statement 3")
|
||||
|
||||
def test_search_added_invalid_date(self) -> None:
|
||||
"""
|
||||
@@ -588,7 +725,7 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
WHEN:
|
||||
- Query with invalid added date
|
||||
THEN:
|
||||
- No documents returned
|
||||
- 400 Bad Request returned (Tantivy rejects invalid date field syntax)
|
||||
"""
|
||||
d1 = Document.objects.create(
|
||||
title="invoice",
|
||||
@@ -597,16 +734,14 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
pk=1,
|
||||
)
|
||||
|
||||
with index.open_index_writer() as writer:
|
||||
index.update_document(writer, d1)
|
||||
get_backend().add_or_update(d1)
|
||||
|
||||
response = self.client.get("/api/documents/?query=added:invalid-date")
|
||||
results = response.data["results"]
|
||||
|
||||
# Expect 0 document returned
|
||||
self.assertEqual(len(results), 0)
|
||||
# Tantivy rejects unparsable field queries with a 400
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
|
||||
@mock.patch("documents.index.autocomplete")
|
||||
@mock.patch("documents.search._backend.TantivyBackend.autocomplete")
|
||||
def test_search_autocomplete_limits(self, m) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
@@ -618,7 +753,7 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
- Limit requests are obeyed
|
||||
"""
|
||||
|
||||
m.side_effect = lambda ix, term, limit, user: [term for _ in range(limit)]
|
||||
m.side_effect = lambda term, limit, user=None: [term for _ in range(limit)]
|
||||
|
||||
response = self.client.get("/api/search/autocomplete/?term=test")
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
@@ -671,32 +806,29 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
owner=u1,
|
||||
)
|
||||
|
||||
with AsyncWriter(index.open_index()) as writer:
|
||||
index.update_document(writer, d1)
|
||||
index.update_document(writer, d2)
|
||||
index.update_document(writer, d3)
|
||||
backend = get_backend()
|
||||
backend.add_or_update(d1)
|
||||
backend.add_or_update(d2)
|
||||
backend.add_or_update(d3)
|
||||
|
||||
response = self.client.get("/api/search/autocomplete/?term=app")
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(response.data, [b"apples", b"applebaum", b"appletini"])
|
||||
self.assertEqual(response.data, ["applebaum", "apples", "appletini"])
|
||||
|
||||
d3.owner = u2
|
||||
|
||||
with AsyncWriter(index.open_index()) as writer:
|
||||
index.update_document(writer, d3)
|
||||
d3.save()
|
||||
backend.add_or_update(d3)
|
||||
|
||||
response = self.client.get("/api/search/autocomplete/?term=app")
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(response.data, [b"apples", b"applebaum"])
|
||||
self.assertEqual(response.data, ["applebaum", "apples"])
|
||||
|
||||
assign_perm("view_document", u1, d3)
|
||||
|
||||
with AsyncWriter(index.open_index()) as writer:
|
||||
index.update_document(writer, d3)
|
||||
backend.add_or_update(d3)
|
||||
|
||||
response = self.client.get("/api/search/autocomplete/?term=app")
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(response.data, [b"apples", b"applebaum", b"appletini"])
|
||||
self.assertEqual(response.data, ["applebaum", "apples", "appletini"])
|
||||
|
||||
def test_search_autocomplete_field_name_match(self) -> None:
|
||||
"""
|
||||
@@ -714,8 +846,7 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
checksum="1",
|
||||
)
|
||||
|
||||
with AsyncWriter(index.open_index()) as writer:
|
||||
index.update_document(writer, d1)
|
||||
get_backend().add_or_update(d1)
|
||||
|
||||
response = self.client.get("/api/search/autocomplete/?term=created:2023")
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
@@ -736,33 +867,36 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
checksum="1",
|
||||
)
|
||||
|
||||
with AsyncWriter(index.open_index()) as writer:
|
||||
index.update_document(writer, d1)
|
||||
get_backend().add_or_update(d1)
|
||||
|
||||
response = self.client.get("/api/search/autocomplete/?term=auto")
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(response.data[0], b"auto")
|
||||
self.assertEqual(response.data[0], "auto")
|
||||
|
||||
def test_search_spelling_suggestion(self) -> None:
|
||||
with AsyncWriter(index.open_index()) as writer:
|
||||
for i in range(55):
|
||||
doc = Document.objects.create(
|
||||
checksum=str(i),
|
||||
pk=i + 1,
|
||||
title=f"Document {i + 1}",
|
||||
content=f"Things document {i + 1}",
|
||||
)
|
||||
index.update_document(writer, doc)
|
||||
def test_search_no_spelling_suggestion(self) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- Documents exist with various terms
|
||||
WHEN:
|
||||
- Query for documents with any term
|
||||
THEN:
|
||||
- corrected_query is always None (Tantivy has no spell correction)
|
||||
"""
|
||||
backend = get_backend()
|
||||
for i in range(5):
|
||||
doc = Document.objects.create(
|
||||
checksum=str(i),
|
||||
pk=i + 1,
|
||||
title=f"Document {i + 1}",
|
||||
content=f"Things document {i + 1}",
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
|
||||
response = self.client.get("/api/documents/?query=thing")
|
||||
correction = response.data["corrected_query"]
|
||||
|
||||
self.assertEqual(correction, "things")
|
||||
self.assertIsNone(response.data["corrected_query"])
|
||||
|
||||
response = self.client.get("/api/documents/?query=things")
|
||||
correction = response.data["corrected_query"]
|
||||
|
||||
self.assertEqual(correction, None)
|
||||
self.assertIsNone(response.data["corrected_query"])
|
||||
|
||||
def test_search_spelling_suggestion_suppressed_for_private_terms(self):
|
||||
owner = User.objects.create_user("owner")
|
||||
@@ -771,24 +905,24 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
Permission.objects.get(codename="view_document"),
|
||||
)
|
||||
|
||||
with AsyncWriter(index.open_index()) as writer:
|
||||
for i in range(55):
|
||||
private_doc = Document.objects.create(
|
||||
checksum=f"p{i}",
|
||||
pk=100 + i,
|
||||
title=f"Private Document {i + 1}",
|
||||
content=f"treasury document {i + 1}",
|
||||
owner=owner,
|
||||
)
|
||||
visible_doc = Document.objects.create(
|
||||
checksum=f"v{i}",
|
||||
pk=200 + i,
|
||||
title=f"Visible Document {i + 1}",
|
||||
content=f"public ledger {i + 1}",
|
||||
owner=attacker,
|
||||
)
|
||||
index.update_document(writer, private_doc)
|
||||
index.update_document(writer, visible_doc)
|
||||
backend = get_backend()
|
||||
for i in range(5):
|
||||
private_doc = Document.objects.create(
|
||||
checksum=f"p{i}",
|
||||
pk=100 + i,
|
||||
title=f"Private Document {i + 1}",
|
||||
content=f"treasury document {i + 1}",
|
||||
owner=owner,
|
||||
)
|
||||
visible_doc = Document.objects.create(
|
||||
checksum=f"v{i}",
|
||||
pk=200 + i,
|
||||
title=f"Visible Document {i + 1}",
|
||||
content=f"public ledger {i + 1}",
|
||||
owner=attacker,
|
||||
)
|
||||
backend.add_or_update(private_doc)
|
||||
backend.add_or_update(visible_doc)
|
||||
|
||||
self.client.force_authenticate(user=attacker)
|
||||
|
||||
@@ -798,26 +932,6 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
self.assertEqual(response.data["count"], 0)
|
||||
self.assertIsNone(response.data["corrected_query"])
|
||||
|
||||
@mock.patch(
|
||||
"whoosh.searching.Searcher.correct_query",
|
||||
side_effect=Exception("Test error"),
|
||||
)
|
||||
def test_corrected_query_error(self, mock_correct_query) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- A query that raises an error on correction
|
||||
WHEN:
|
||||
- API request for search with that query
|
||||
THEN:
|
||||
- The error is logged and the search proceeds
|
||||
"""
|
||||
with self.assertLogs("paperless.index", level="INFO") as cm:
|
||||
response = self.client.get("/api/documents/?query=2025-06-04")
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
error_str = cm.output[0]
|
||||
expected_str = "Error while correcting query '2025-06-04': Test error"
|
||||
self.assertIn(expected_str, error_str)
|
||||
|
||||
def test_search_more_like(self) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
@@ -847,16 +961,16 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
checksum="C",
|
||||
)
|
||||
d4 = Document.objects.create(
|
||||
title="Monty Python & the Holy Grail",
|
||||
content="And now for something completely different",
|
||||
title="Quarterly Report",
|
||||
content="quarterly revenue profit margin earnings growth",
|
||||
pk=4,
|
||||
checksum="ABC",
|
||||
)
|
||||
with AsyncWriter(index.open_index()) as writer:
|
||||
index.update_document(writer, d1)
|
||||
index.update_document(writer, d2)
|
||||
index.update_document(writer, d3)
|
||||
index.update_document(writer, d4)
|
||||
backend = get_backend()
|
||||
backend.add_or_update(d1)
|
||||
backend.add_or_update(d2)
|
||||
backend.add_or_update(d3)
|
||||
backend.add_or_update(d4)
|
||||
|
||||
response = self.client.get(f"/api/documents/?more_like_id={d2.id}")
|
||||
|
||||
@@ -864,9 +978,10 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
|
||||
results = response.data["results"]
|
||||
|
||||
self.assertEqual(len(results), 2)
|
||||
self.assertEqual(results[0]["id"], d3.id)
|
||||
self.assertEqual(results[1]["id"], d1.id)
|
||||
self.assertGreaterEqual(len(results), 1)
|
||||
result_ids = [r["id"] for r in results]
|
||||
self.assertIn(d3.id, result_ids)
|
||||
self.assertNotIn(d4.id, result_ids)
|
||||
|
||||
def test_search_more_like_requires_view_permission_on_seed_document(
|
||||
self,
|
||||
@@ -908,10 +1023,10 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
pk=12,
|
||||
)
|
||||
|
||||
with AsyncWriter(index.open_index()) as writer:
|
||||
index.update_document(writer, private_seed)
|
||||
index.update_document(writer, visible_doc)
|
||||
index.update_document(writer, other_doc)
|
||||
backend = get_backend()
|
||||
backend.add_or_update(private_seed)
|
||||
backend.add_or_update(visible_doc)
|
||||
backend.add_or_update(other_doc)
|
||||
|
||||
self.client.force_authenticate(user=attacker)
|
||||
|
||||
@@ -985,9 +1100,9 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
value_text="foobard4",
|
||||
)
|
||||
|
||||
with AsyncWriter(index.open_index()) as writer:
|
||||
for doc in Document.objects.all():
|
||||
index.update_document(writer, doc)
|
||||
backend = get_backend()
|
||||
for doc in Document.objects.all():
|
||||
backend.add_or_update(doc)
|
||||
|
||||
def search_query(q):
|
||||
r = self.client.get("/api/documents/?query=test" + q)
|
||||
@@ -1203,9 +1318,9 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
Document.objects.create(checksum="3", content="test 3", owner=u2)
|
||||
Document.objects.create(checksum="4", content="test 4")
|
||||
|
||||
with AsyncWriter(index.open_index()) as writer:
|
||||
for doc in Document.objects.all():
|
||||
index.update_document(writer, doc)
|
||||
backend = get_backend()
|
||||
for doc in Document.objects.all():
|
||||
backend.add_or_update(doc)
|
||||
|
||||
self.client.force_authenticate(user=u1)
|
||||
r = self.client.get("/api/documents/?query=test")
|
||||
@@ -1256,9 +1371,9 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
d3 = Document.objects.create(checksum="3", content="test 3", owner=u2)
|
||||
Document.objects.create(checksum="4", content="test 4")
|
||||
|
||||
with AsyncWriter(index.open_index()) as writer:
|
||||
for doc in Document.objects.all():
|
||||
index.update_document(writer, doc)
|
||||
backend = get_backend()
|
||||
for doc in Document.objects.all():
|
||||
backend.add_or_update(doc)
|
||||
|
||||
self.client.force_authenticate(user=u1)
|
||||
r = self.client.get("/api/documents/?query=test")
|
||||
@@ -1278,9 +1393,9 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
assign_perm("view_document", u1, d3)
|
||||
assign_perm("view_document", u2, d1)
|
||||
|
||||
with AsyncWriter(index.open_index()) as writer:
|
||||
for doc in [d1, d2, d3]:
|
||||
index.update_document(writer, doc)
|
||||
backend.add_or_update(d1)
|
||||
backend.add_or_update(d2)
|
||||
backend.add_or_update(d3)
|
||||
|
||||
self.client.force_authenticate(user=u1)
|
||||
r = self.client.get("/api/documents/?query=test")
|
||||
@@ -1343,9 +1458,9 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
user=u1,
|
||||
)
|
||||
|
||||
with AsyncWriter(index.open_index()) as writer:
|
||||
for doc in Document.objects.all():
|
||||
index.update_document(writer, doc)
|
||||
backend = get_backend()
|
||||
for doc in Document.objects.all():
|
||||
backend.add_or_update(doc)
|
||||
|
||||
def search_query(q):
|
||||
r = self.client.get("/api/documents/?query=test" + q)
|
||||
@@ -1378,15 +1493,99 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
search_query("&ordering=-num_notes"),
|
||||
[d1.id, d3.id, d2.id],
|
||||
)
|
||||
# owner sort: ORM orders by owner_id (integer); NULLs first in SQLite ASC
|
||||
self.assertListEqual(
|
||||
search_query("&ordering=owner"),
|
||||
[d1.id, d2.id, d3.id],
|
||||
[d3.id, d1.id, d2.id],
|
||||
)
|
||||
self.assertListEqual(
|
||||
search_query("&ordering=-owner"),
|
||||
[d3.id, d2.id, d1.id],
|
||||
[d2.id, d1.id, d3.id],
|
||||
)
|
||||
|
||||
def test_search_with_tantivy_native_sort(self) -> None:
|
||||
"""When ordering by a Tantivy-sortable field, results must be correctly sorted."""
|
||||
backend = get_backend()
|
||||
for i, asn in enumerate([30, 10, 20]):
|
||||
doc = Document.objects.create(
|
||||
title=f"sortable doc {i}",
|
||||
content="searchable content",
|
||||
checksum=f"TNS{i}",
|
||||
archive_serial_number=asn,
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
|
||||
response = self.client.get(
|
||||
"/api/documents/?query=searchable&ordering=archive_serial_number",
|
||||
)
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
asns = [doc["archive_serial_number"] for doc in response.data["results"]]
|
||||
self.assertEqual(asns, [10, 20, 30])
|
||||
|
||||
response = self.client.get(
|
||||
"/api/documents/?query=searchable&ordering=-archive_serial_number",
|
||||
)
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
asns = [doc["archive_serial_number"] for doc in response.data["results"]]
|
||||
self.assertEqual(asns, [30, 20, 10])
|
||||
|
||||
def test_search_page_2_returns_correct_slice(self) -> None:
|
||||
"""Page 2 must return the second slice, not overlap with page 1."""
|
||||
backend = get_backend()
|
||||
for i in range(10):
|
||||
doc = Document.objects.create(
|
||||
title=f"doc {i}",
|
||||
content="paginated content",
|
||||
checksum=f"PG2{i}",
|
||||
archive_serial_number=i + 1,
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
|
||||
response = self.client.get(
|
||||
"/api/documents/?query=paginated&ordering=archive_serial_number&page=1&page_size=3",
|
||||
)
|
||||
page1_ids = [r["id"] for r in response.data["results"]]
|
||||
self.assertEqual(len(page1_ids), 3)
|
||||
|
||||
response = self.client.get(
|
||||
"/api/documents/?query=paginated&ordering=archive_serial_number&page=2&page_size=3",
|
||||
)
|
||||
page2_ids = [r["id"] for r in response.data["results"]]
|
||||
self.assertEqual(len(page2_ids), 3)
|
||||
|
||||
# No overlap between pages
|
||||
self.assertEqual(set(page1_ids) & set(page2_ids), set())
|
||||
# Page 2 ASNs are higher than page 1
|
||||
page1_asns = [
|
||||
Document.objects.get(pk=pk).archive_serial_number for pk in page1_ids
|
||||
]
|
||||
page2_asns = [
|
||||
Document.objects.get(pk=pk).archive_serial_number for pk in page2_ids
|
||||
]
|
||||
self.assertTrue(max(page1_asns) < min(page2_asns))
|
||||
|
||||
def test_search_all_field_contains_all_ids_when_paginated(self) -> None:
|
||||
"""The 'all' field must contain every matching ID, even when paginated."""
|
||||
backend = get_backend()
|
||||
doc_ids = []
|
||||
for i in range(10):
|
||||
doc = Document.objects.create(
|
||||
title=f"all field doc {i}",
|
||||
content="allfield content",
|
||||
checksum=f"AF{i}",
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
doc_ids.append(doc.pk)
|
||||
|
||||
response = self.client.get(
|
||||
"/api/documents/?query=allfield&page=1&page_size=3",
|
||||
headers={"Accept": "application/json; version=9"},
|
||||
)
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(len(response.data["results"]), 3)
|
||||
# "all" must contain ALL 10 matching IDs
|
||||
self.assertCountEqual(response.data["all"], doc_ids)
|
||||
|
||||
@mock.patch("documents.bulk_edit.bulk_update_documents")
|
||||
def test_global_search(self, m) -> None:
|
||||
"""
|
||||
@@ -1441,12 +1640,12 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
)
|
||||
set_permissions([4, 5], set_permissions={}, owner=user2, merge=False)
|
||||
|
||||
with index.open_index_writer() as writer:
|
||||
index.update_document(writer, d1)
|
||||
index.update_document(writer, d2)
|
||||
index.update_document(writer, d3)
|
||||
index.update_document(writer, d4)
|
||||
index.update_document(writer, d5)
|
||||
backend = get_backend()
|
||||
backend.add_or_update(d1)
|
||||
backend.add_or_update(d2)
|
||||
backend.add_or_update(d3)
|
||||
backend.add_or_update(d4)
|
||||
backend.add_or_update(d5)
|
||||
|
||||
correspondent1 = Correspondent.objects.create(name="bank correspondent 1")
|
||||
Correspondent.objects.create(name="correspondent 2")
|
||||
@@ -1506,6 +1705,31 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
self.assertEqual(results["custom_fields"][0]["id"], custom_field1.id)
|
||||
self.assertEqual(results["workflows"][0]["id"], workflow1.id)
|
||||
|
||||
def test_global_search_db_only_limits_documents_to_title_matches(self) -> None:
|
||||
title_match = Document.objects.create(
|
||||
title="bank statement",
|
||||
content="no additional terms",
|
||||
checksum="GS1",
|
||||
pk=21,
|
||||
)
|
||||
content_only = Document.objects.create(
|
||||
title="not a title match",
|
||||
content="bank appears only in content",
|
||||
checksum="GS2",
|
||||
pk=22,
|
||||
)
|
||||
|
||||
backend = get_backend()
|
||||
backend.add_or_update(title_match)
|
||||
backend.add_or_update(content_only)
|
||||
|
||||
self.client.force_authenticate(self.user)
|
||||
|
||||
response = self.client.get("/api/search/?query=bank&db_only=true")
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(len(response.data["documents"]), 1)
|
||||
self.assertEqual(response.data["documents"][0]["id"], title_match.id)
|
||||
|
||||
def test_global_search_filters_owned_mail_objects(self) -> None:
|
||||
user1 = User.objects.create_user("mail-search-user")
|
||||
user2 = User.objects.create_user("other-mail-search-user")
|
||||
|
||||
@@ -191,40 +191,42 @@ class TestSystemStatus(APITestCase):
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(response.data["tasks"]["celery_status"], "OK")
|
||||
|
||||
@override_settings(INDEX_DIR=Path("/tmp/index"))
|
||||
@mock.patch("whoosh.index.FileIndex.last_modified")
|
||||
def test_system_status_index_ok(self, mock_last_modified) -> None:
|
||||
@mock.patch("documents.search.get_backend")
|
||||
def test_system_status_index_ok(self, mock_get_backend) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- The index last modified time is set
|
||||
- The index is accessible
|
||||
WHEN:
|
||||
- The user requests the system status
|
||||
THEN:
|
||||
- The response contains the correct index status
|
||||
"""
|
||||
mock_last_modified.return_value = 1707839087
|
||||
self.client.force_login(self.user)
|
||||
response = self.client.get(self.ENDPOINT)
|
||||
mock_get_backend.return_value = mock.MagicMock()
|
||||
# Use the temp dir created in setUp (self.tmp_dir) as a real INDEX_DIR
|
||||
# with a real file so the mtime lookup works
|
||||
sentinel = self.tmp_dir / "sentinel.txt"
|
||||
sentinel.write_text("ok")
|
||||
with self.settings(INDEX_DIR=self.tmp_dir):
|
||||
self.client.force_login(self.user)
|
||||
response = self.client.get(self.ENDPOINT)
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(response.data["tasks"]["index_status"], "OK")
|
||||
self.assertIsNotNone(response.data["tasks"]["index_last_modified"])
|
||||
|
||||
@override_settings(INDEX_DIR=Path("/tmp/index/"))
|
||||
@mock.patch("documents.index.open_index", autospec=True)
|
||||
def test_system_status_index_error(self, mock_open_index) -> None:
|
||||
@mock.patch("documents.search.get_backend")
|
||||
def test_system_status_index_error(self, mock_get_backend) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- The index is not found
|
||||
- The index cannot be opened
|
||||
WHEN:
|
||||
- The user requests the system status
|
||||
THEN:
|
||||
- The response contains the correct index status
|
||||
"""
|
||||
mock_open_index.return_value = None
|
||||
mock_open_index.side_effect = Exception("Index error")
|
||||
mock_get_backend.side_effect = Exception("Index error")
|
||||
self.client.force_login(self.user)
|
||||
response = self.client.get(self.ENDPOINT)
|
||||
mock_open_index.assert_called_once()
|
||||
mock_get_backend.assert_called_once()
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(response.data["tasks"]["index_status"], "ERROR")
|
||||
self.assertIsNotNone(response.data["tasks"]["index_error"])
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import re
|
||||
import shutil
|
||||
import warnings
|
||||
from pathlib import Path
|
||||
from unittest import mock
|
||||
|
||||
@@ -366,8 +366,7 @@ class TestClassifier(DirectoriesMixin, TestCase):
|
||||
|
||||
self.assertCountEqual(new_classifier.predict_tags(self.doc2.content), [45, 12])
|
||||
|
||||
@mock.patch("documents.classifier.pickle.load")
|
||||
def test_load_corrupt_file(self, patched_pickle_load: mock.MagicMock) -> None:
|
||||
def test_load_corrupt_file(self) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- Corrupted classifier pickle file
|
||||
@@ -378,36 +377,116 @@ class TestClassifier(DirectoriesMixin, TestCase):
|
||||
"""
|
||||
self.generate_train_and_save()
|
||||
|
||||
# First load is the schema version,allow it
|
||||
patched_pickle_load.side_effect = [DocumentClassifier.FORMAT_VERSION, OSError()]
|
||||
# Write garbage data (valid HMAC length but invalid content)
|
||||
Path(settings.MODEL_FILE).write_bytes(b"\x00" * 64)
|
||||
|
||||
with self.assertRaises(ClassifierModelCorruptError):
|
||||
self.classifier.load()
|
||||
patched_pickle_load.assert_called()
|
||||
|
||||
patched_pickle_load.reset_mock()
|
||||
patched_pickle_load.side_effect = [
|
||||
DocumentClassifier.FORMAT_VERSION,
|
||||
ClassifierModelCorruptError(),
|
||||
]
|
||||
|
||||
self.assertIsNone(load_classifier())
|
||||
patched_pickle_load.assert_called()
|
||||
|
||||
def test_load_corrupt_pickle_valid_hmac(self) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- A classifier file with valid HMAC but unparsable pickle data
|
||||
WHEN:
|
||||
- An attempt is made to load the classifier
|
||||
THEN:
|
||||
- The ClassifierModelCorruptError is raised
|
||||
"""
|
||||
garbage_data = b"this is not valid pickle data"
|
||||
signature = DocumentClassifier._compute_hmac(garbage_data)
|
||||
Path(settings.MODEL_FILE).write_bytes(signature + garbage_data)
|
||||
|
||||
with self.assertRaises(ClassifierModelCorruptError):
|
||||
self.classifier.load()
|
||||
|
||||
def test_load_tampered_file(self) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- A classifier model file whose data has been modified
|
||||
WHEN:
|
||||
- An attempt is made to load the classifier
|
||||
THEN:
|
||||
- The ClassifierModelCorruptError is raised due to HMAC mismatch
|
||||
"""
|
||||
self.generate_train_and_save()
|
||||
|
||||
raw = Path(settings.MODEL_FILE).read_bytes()
|
||||
# Flip a byte in the data portion (after the 32-byte HMAC)
|
||||
tampered = raw[:32] + bytes([raw[32] ^ 0xFF]) + raw[33:]
|
||||
Path(settings.MODEL_FILE).write_bytes(tampered)
|
||||
|
||||
with self.assertRaises(ClassifierModelCorruptError):
|
||||
self.classifier.load()
|
||||
|
||||
def test_load_wrong_secret_key(self) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- A classifier model file signed with a different SECRET_KEY
|
||||
WHEN:
|
||||
- An attempt is made to load the classifier
|
||||
THEN:
|
||||
- The ClassifierModelCorruptError is raised due to HMAC mismatch
|
||||
"""
|
||||
self.generate_train_and_save()
|
||||
|
||||
with override_settings(SECRET_KEY="different-secret-key"):
|
||||
with self.assertRaises(ClassifierModelCorruptError):
|
||||
self.classifier.load()
|
||||
|
||||
def test_load_truncated_file(self) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- A classifier model file that is too short to contain an HMAC
|
||||
WHEN:
|
||||
- An attempt is made to load the classifier
|
||||
THEN:
|
||||
- The ClassifierModelCorruptError is raised
|
||||
"""
|
||||
Path(settings.MODEL_FILE).write_bytes(b"\x00" * 16)
|
||||
|
||||
with self.assertRaises(ClassifierModelCorruptError):
|
||||
self.classifier.load()
|
||||
|
||||
def test_load_new_scikit_learn_version(self) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- classifier pickle file created with a different scikit-learn version
|
||||
- classifier pickle file triggers an InconsistentVersionWarning
|
||||
WHEN:
|
||||
- An attempt is made to load the classifier
|
||||
THEN:
|
||||
- The classifier reports the warning was captured and processed
|
||||
- IncompatibleClassifierVersionError is raised
|
||||
"""
|
||||
# TODO: This wasn't testing the warning anymore, as the schema changed
|
||||
# but as it was implemented, it would require installing an old version
|
||||
# rebuilding the file and committing that. Not developer friendly
|
||||
# Need to rethink how to pass the load through to a file with a single
|
||||
# old model?
|
||||
from sklearn.exceptions import InconsistentVersionWarning
|
||||
|
||||
self.generate_train_and_save()
|
||||
|
||||
fake_warning = warnings.WarningMessage(
|
||||
message=InconsistentVersionWarning(
|
||||
estimator_name="MLPClassifier",
|
||||
current_sklearn_version="1.0",
|
||||
original_sklearn_version="0.9",
|
||||
),
|
||||
category=InconsistentVersionWarning,
|
||||
filename="",
|
||||
lineno=0,
|
||||
)
|
||||
|
||||
real_catch_warnings = warnings.catch_warnings
|
||||
|
||||
class PatchedCatchWarnings(real_catch_warnings):
|
||||
def __enter__(self):
|
||||
w = super().__enter__()
|
||||
w.append(fake_warning)
|
||||
return w
|
||||
|
||||
with mock.patch(
|
||||
"documents.classifier.warnings.catch_warnings",
|
||||
PatchedCatchWarnings,
|
||||
):
|
||||
with self.assertRaises(IncompatibleClassifierVersionError):
|
||||
self.classifier.load()
|
||||
|
||||
def test_one_correspondent_predict(self) -> None:
|
||||
c1 = Correspondent.objects.create(
|
||||
@@ -685,17 +764,6 @@ class TestClassifier(DirectoriesMixin, TestCase):
|
||||
self.assertIsNone(load_classifier())
|
||||
self.assertTrue(Path(settings.MODEL_FILE).exists())
|
||||
|
||||
def test_load_old_classifier_version(self) -> None:
|
||||
shutil.copy(
|
||||
Path(__file__).parent / "data" / "v1.17.4.model.pickle",
|
||||
self.dirs.scratch_dir,
|
||||
)
|
||||
with override_settings(
|
||||
MODEL_FILE=self.dirs.scratch_dir / "v1.17.4.model.pickle",
|
||||
):
|
||||
classifier = load_classifier()
|
||||
self.assertIsNone(classifier)
|
||||
|
||||
@mock.patch("documents.classifier.DocumentClassifier.load")
|
||||
def test_load_classifier_raise_exception(self, mock_load) -> None:
|
||||
Path(settings.MODEL_FILE).touch()
|
||||
|
||||
@@ -1,58 +0,0 @@
|
||||
from django.test import TestCase
|
||||
from whoosh import query
|
||||
|
||||
from documents.index import get_permissions_criterias
|
||||
from documents.models import User
|
||||
|
||||
|
||||
class TestDelayedQuery(TestCase):
|
||||
def setUp(self) -> None:
|
||||
super().setUp()
|
||||
# all tests run without permission criteria, so has_no_owner query will always
|
||||
# be appended.
|
||||
self.has_no_owner = query.Or([query.Term("has_owner", text=False)])
|
||||
|
||||
def _get_testset__id__in(self, param, field):
|
||||
return (
|
||||
{f"{param}__id__in": "42,43"},
|
||||
query.And(
|
||||
[
|
||||
query.Or(
|
||||
[
|
||||
query.Term(f"{field}_id", "42"),
|
||||
query.Term(f"{field}_id", "43"),
|
||||
],
|
||||
),
|
||||
self.has_no_owner,
|
||||
],
|
||||
),
|
||||
)
|
||||
|
||||
def _get_testset__id__none(self, param, field):
|
||||
return (
|
||||
{f"{param}__id__none": "42,43"},
|
||||
query.And(
|
||||
[
|
||||
query.Not(query.Term(f"{field}_id", "42")),
|
||||
query.Not(query.Term(f"{field}_id", "43")),
|
||||
self.has_no_owner,
|
||||
],
|
||||
),
|
||||
)
|
||||
|
||||
def test_get_permission_criteria(self) -> None:
|
||||
# tests contains tuples of user instances and the expected filter
|
||||
tests = (
|
||||
(None, [query.Term("has_owner", text=False)]),
|
||||
(User(42, username="foo", is_superuser=True), []),
|
||||
(
|
||||
User(42, username="foo", is_superuser=False),
|
||||
[
|
||||
query.Term("has_owner", text=False),
|
||||
query.Term("owner_id", 42),
|
||||
query.Term("viewer_id", "42"),
|
||||
],
|
||||
),
|
||||
)
|
||||
for user, expected in tests:
|
||||
self.assertEqual(get_permissions_criterias(user), expected)
|
||||
@@ -1,371 +0,0 @@
|
||||
from datetime import datetime
|
||||
from unittest import mock
|
||||
|
||||
from django.conf import settings
|
||||
from django.contrib.auth.models import User
|
||||
from django.test import SimpleTestCase
|
||||
from django.test import TestCase
|
||||
from django.test import override_settings
|
||||
from django.utils.timezone import get_current_timezone
|
||||
from django.utils.timezone import timezone
|
||||
|
||||
from documents import index
|
||||
from documents.models import Document
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
|
||||
|
||||
class TestAutoComplete(DirectoriesMixin, TestCase):
|
||||
def test_auto_complete(self) -> None:
|
||||
doc1 = Document.objects.create(
|
||||
title="doc1",
|
||||
checksum="A",
|
||||
content="test test2 test3",
|
||||
)
|
||||
doc2 = Document.objects.create(title="doc2", checksum="B", content="test test2")
|
||||
doc3 = Document.objects.create(title="doc3", checksum="C", content="test2")
|
||||
|
||||
index.add_or_update_document(doc1)
|
||||
index.add_or_update_document(doc2)
|
||||
index.add_or_update_document(doc3)
|
||||
|
||||
ix = index.open_index()
|
||||
|
||||
self.assertListEqual(
|
||||
index.autocomplete(ix, "tes"),
|
||||
[b"test2", b"test", b"test3"],
|
||||
)
|
||||
self.assertListEqual(
|
||||
index.autocomplete(ix, "tes", limit=3),
|
||||
[b"test2", b"test", b"test3"],
|
||||
)
|
||||
self.assertListEqual(index.autocomplete(ix, "tes", limit=1), [b"test2"])
|
||||
self.assertListEqual(index.autocomplete(ix, "tes", limit=0), [])
|
||||
|
||||
def test_archive_serial_number_ranging(self) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- Document with an archive serial number above schema allowed size
|
||||
WHEN:
|
||||
- Document is provided to the index
|
||||
THEN:
|
||||
- Error is logged
|
||||
- Document ASN is reset to 0 for the index
|
||||
"""
|
||||
doc1 = Document.objects.create(
|
||||
title="doc1",
|
||||
checksum="A",
|
||||
content="test test2 test3",
|
||||
# yes, this is allowed, unless full_clean is run
|
||||
# DRF does call the validators, this test won't
|
||||
archive_serial_number=Document.ARCHIVE_SERIAL_NUMBER_MAX + 1,
|
||||
)
|
||||
with self.assertLogs("paperless.index", level="ERROR") as cm:
|
||||
with mock.patch(
|
||||
"documents.index.AsyncWriter.update_document",
|
||||
) as mocked_update_doc:
|
||||
index.add_or_update_document(doc1)
|
||||
|
||||
mocked_update_doc.assert_called_once()
|
||||
_, kwargs = mocked_update_doc.call_args
|
||||
|
||||
self.assertEqual(kwargs["asn"], 0)
|
||||
|
||||
error_str = cm.output[0]
|
||||
expected_str = "ERROR:paperless.index:Not indexing Archive Serial Number 4294967296 of document 1"
|
||||
self.assertIn(expected_str, error_str)
|
||||
|
||||
def test_archive_serial_number_is_none(self) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- Document with no archive serial number
|
||||
WHEN:
|
||||
- Document is provided to the index
|
||||
THEN:
|
||||
- ASN isn't touched
|
||||
"""
|
||||
doc1 = Document.objects.create(
|
||||
title="doc1",
|
||||
checksum="A",
|
||||
content="test test2 test3",
|
||||
)
|
||||
with mock.patch(
|
||||
"documents.index.AsyncWriter.update_document",
|
||||
) as mocked_update_doc:
|
||||
index.add_or_update_document(doc1)
|
||||
|
||||
mocked_update_doc.assert_called_once()
|
||||
_, kwargs = mocked_update_doc.call_args
|
||||
|
||||
self.assertIsNone(kwargs["asn"])
|
||||
|
||||
@override_settings(TIME_ZONE="Pacific/Auckland")
|
||||
def test_added_today_respects_local_timezone_boundary(self) -> None:
|
||||
tz = get_current_timezone()
|
||||
fixed_now = datetime(2025, 7, 20, 15, 0, 0, tzinfo=tz)
|
||||
|
||||
# Fake a time near the local boundary (1 AM NZT = 13:00 UTC on previous UTC day)
|
||||
local_dt = datetime(2025, 7, 20, 1, 0, 0).replace(tzinfo=tz)
|
||||
utc_dt = local_dt.astimezone(timezone.utc)
|
||||
|
||||
doc = Document.objects.create(
|
||||
title="Time zone",
|
||||
content="Testing added:today",
|
||||
checksum="edgecase123",
|
||||
added=utc_dt,
|
||||
)
|
||||
|
||||
with index.open_index_writer() as writer:
|
||||
index.update_document(writer, doc)
|
||||
|
||||
superuser = User.objects.create_superuser(username="testuser")
|
||||
self.client.force_login(superuser)
|
||||
|
||||
with mock.patch("documents.index.now", return_value=fixed_now):
|
||||
response = self.client.get("/api/documents/?query=added:today")
|
||||
results = response.json()["results"]
|
||||
self.assertEqual(len(results), 1)
|
||||
self.assertEqual(results[0]["id"], doc.id)
|
||||
|
||||
response = self.client.get("/api/documents/?query=added:yesterday")
|
||||
results = response.json()["results"]
|
||||
self.assertEqual(len(results), 0)
|
||||
|
||||
|
||||
@override_settings(TIME_ZONE="UTC")
|
||||
class TestRewriteNaturalDateKeywords(SimpleTestCase):
|
||||
"""
|
||||
Unit tests for rewrite_natural_date_keywords function.
|
||||
"""
|
||||
|
||||
def _rewrite_with_now(self, query: str, now_dt: datetime) -> str:
|
||||
with mock.patch("documents.index.now", return_value=now_dt):
|
||||
return index.rewrite_natural_date_keywords(query)
|
||||
|
||||
def _assert_rewrite_contains(
|
||||
self,
|
||||
query: str,
|
||||
now_dt: datetime,
|
||||
*expected_fragments: str,
|
||||
) -> str:
|
||||
result = self._rewrite_with_now(query, now_dt)
|
||||
for fragment in expected_fragments:
|
||||
self.assertIn(fragment, result)
|
||||
return result
|
||||
|
||||
def test_range_keywords(self) -> None:
|
||||
"""
|
||||
Test various different range keywords
|
||||
"""
|
||||
cases = [
|
||||
(
|
||||
"added:today",
|
||||
datetime(2025, 7, 20, 15, 30, 45, tzinfo=timezone.utc),
|
||||
("added:[20250720", "TO 20250720"),
|
||||
),
|
||||
(
|
||||
"added:yesterday",
|
||||
datetime(2025, 7, 20, 15, 30, 45, tzinfo=timezone.utc),
|
||||
("added:[20250719", "TO 20250719"),
|
||||
),
|
||||
(
|
||||
"added:this month",
|
||||
datetime(2025, 7, 15, 12, 0, 0, tzinfo=timezone.utc),
|
||||
("added:[20250701", "TO 20250731"),
|
||||
),
|
||||
(
|
||||
"added:previous month",
|
||||
datetime(2025, 7, 15, 12, 0, 0, tzinfo=timezone.utc),
|
||||
("added:[20250601", "TO 20250630"),
|
||||
),
|
||||
(
|
||||
"added:this year",
|
||||
datetime(2025, 7, 15, 12, 0, 0, tzinfo=timezone.utc),
|
||||
("added:[20250101", "TO 20251231"),
|
||||
),
|
||||
(
|
||||
"added:previous year",
|
||||
datetime(2025, 7, 15, 12, 0, 0, tzinfo=timezone.utc),
|
||||
("added:[20240101", "TO 20241231"),
|
||||
),
|
||||
# Previous quarter from July 15, 2025 is April-June.
|
||||
(
|
||||
"added:previous quarter",
|
||||
datetime(2025, 7, 15, 12, 0, 0, tzinfo=timezone.utc),
|
||||
("added:[20250401", "TO 20250630"),
|
||||
),
|
||||
# July 20, 2025 is a Sunday (weekday 6) so previous week is July 7-13.
|
||||
(
|
||||
"added:previous week",
|
||||
datetime(2025, 7, 20, 12, 0, 0, tzinfo=timezone.utc),
|
||||
("added:[20250707", "TO 20250713"),
|
||||
),
|
||||
]
|
||||
|
||||
for query, now_dt, fragments in cases:
|
||||
with self.subTest(query=query):
|
||||
self._assert_rewrite_contains(query, now_dt, *fragments)
|
||||
|
||||
def test_additional_fields(self) -> None:
|
||||
fixed_now = datetime(2025, 7, 20, 15, 30, 45, tzinfo=timezone.utc)
|
||||
# created
|
||||
self._assert_rewrite_contains("created:today", fixed_now, "created:[20250720")
|
||||
# modified
|
||||
self._assert_rewrite_contains("modified:today", fixed_now, "modified:[20250720")
|
||||
|
||||
def test_basic_syntax_variants(self) -> None:
|
||||
"""
|
||||
Test that quoting, casing, and multi-clause queries are parsed.
|
||||
"""
|
||||
fixed_now = datetime(2025, 7, 20, 15, 30, 45, tzinfo=timezone.utc)
|
||||
|
||||
# quoted keywords
|
||||
result1 = self._rewrite_with_now('added:"today"', fixed_now)
|
||||
result2 = self._rewrite_with_now("added:'today'", fixed_now)
|
||||
self.assertIn("added:[20250720", result1)
|
||||
self.assertIn("added:[20250720", result2)
|
||||
|
||||
# case insensitivity
|
||||
for query in ("added:TODAY", "added:Today", "added:ToDaY"):
|
||||
with self.subTest(case_variant=query):
|
||||
self._assert_rewrite_contains(query, fixed_now, "added:[20250720")
|
||||
|
||||
# multiple clauses
|
||||
result = self._rewrite_with_now("added:today created:yesterday", fixed_now)
|
||||
self.assertIn("added:[20250720", result)
|
||||
self.assertIn("created:[20250719", result)
|
||||
|
||||
def test_no_match(self) -> None:
|
||||
"""
|
||||
Test that queries without keywords are unchanged.
|
||||
"""
|
||||
query = "title:test content:example"
|
||||
result = index.rewrite_natural_date_keywords(query)
|
||||
self.assertEqual(query, result)
|
||||
|
||||
@override_settings(TIME_ZONE="Pacific/Auckland")
|
||||
def test_timezone_awareness(self) -> None:
|
||||
"""
|
||||
Test timezone conversion.
|
||||
"""
|
||||
# July 20, 2025 1:00 AM NZST = July 19, 2025 13:00 UTC
|
||||
fixed_now = datetime(2025, 7, 20, 1, 0, 0, tzinfo=get_current_timezone())
|
||||
result = self._rewrite_with_now("added:today", fixed_now)
|
||||
# Should convert to UTC properly
|
||||
self.assertIn("added:[20250719", result)
|
||||
|
||||
|
||||
class TestIndexResilience(DirectoriesMixin, SimpleTestCase):
|
||||
def _assert_recreate_called(self, mock_create_in) -> None:
|
||||
mock_create_in.assert_called_once()
|
||||
path_arg, schema_arg = mock_create_in.call_args.args
|
||||
self.assertEqual(path_arg, settings.INDEX_DIR)
|
||||
self.assertEqual(schema_arg.__class__.__name__, "Schema")
|
||||
|
||||
def test_transient_missing_segment_does_not_force_recreate(self) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- Index directory exists
|
||||
WHEN:
|
||||
- open_index is called
|
||||
- Opening the index raises FileNotFoundError once due to a
|
||||
transient missing segment
|
||||
THEN:
|
||||
- Index is opened successfully on retry
|
||||
- Index is not recreated
|
||||
"""
|
||||
file_marker = settings.INDEX_DIR / "file_marker.txt"
|
||||
file_marker.write_text("keep")
|
||||
expected_index = object()
|
||||
|
||||
with (
|
||||
mock.patch("documents.index.exists_in", return_value=True),
|
||||
mock.patch(
|
||||
"documents.index.open_dir",
|
||||
side_effect=[FileNotFoundError("missing"), expected_index],
|
||||
) as mock_open_dir,
|
||||
mock.patch(
|
||||
"documents.index.create_in",
|
||||
) as mock_create_in,
|
||||
mock.patch(
|
||||
"documents.index.rmtree",
|
||||
) as mock_rmtree,
|
||||
):
|
||||
ix = index.open_index()
|
||||
|
||||
self.assertIs(ix, expected_index)
|
||||
self.assertGreaterEqual(mock_open_dir.call_count, 2)
|
||||
mock_rmtree.assert_not_called()
|
||||
mock_create_in.assert_not_called()
|
||||
self.assertEqual(file_marker.read_text(), "keep")
|
||||
|
||||
def test_transient_errors_exhaust_retries_and_recreate(self) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- Index directory exists
|
||||
WHEN:
|
||||
- open_index is called
|
||||
- Opening the index raises FileNotFoundError multiple times due to
|
||||
transient missing segments
|
||||
THEN:
|
||||
- Index is recreated after retries are exhausted
|
||||
"""
|
||||
recreated_index = object()
|
||||
|
||||
with (
|
||||
self.assertLogs("paperless.index", level="ERROR") as cm,
|
||||
mock.patch("documents.index.exists_in", return_value=True),
|
||||
mock.patch(
|
||||
"documents.index.open_dir",
|
||||
side_effect=FileNotFoundError("missing"),
|
||||
) as mock_open_dir,
|
||||
mock.patch("documents.index.rmtree") as mock_rmtree,
|
||||
mock.patch(
|
||||
"documents.index.create_in",
|
||||
return_value=recreated_index,
|
||||
) as mock_create_in,
|
||||
):
|
||||
ix = index.open_index()
|
||||
|
||||
self.assertIs(ix, recreated_index)
|
||||
self.assertEqual(mock_open_dir.call_count, 4)
|
||||
mock_rmtree.assert_called_once_with(settings.INDEX_DIR)
|
||||
self._assert_recreate_called(mock_create_in)
|
||||
self.assertIn(
|
||||
"Error while opening the index after retries, recreating.",
|
||||
cm.output[0],
|
||||
)
|
||||
|
||||
def test_non_transient_error_recreates_index(self) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- Index directory exists
|
||||
WHEN:
|
||||
- open_index is called
|
||||
- Opening the index raises a "non-transient" error
|
||||
THEN:
|
||||
- Index is recreated
|
||||
"""
|
||||
recreated_index = object()
|
||||
|
||||
with (
|
||||
self.assertLogs("paperless.index", level="ERROR") as cm,
|
||||
mock.patch("documents.index.exists_in", return_value=True),
|
||||
mock.patch(
|
||||
"documents.index.open_dir",
|
||||
side_effect=RuntimeError("boom"),
|
||||
),
|
||||
mock.patch("documents.index.rmtree") as mock_rmtree,
|
||||
mock.patch(
|
||||
"documents.index.create_in",
|
||||
return_value=recreated_index,
|
||||
) as mock_create_in,
|
||||
):
|
||||
ix = index.open_index()
|
||||
|
||||
self.assertIs(ix, recreated_index)
|
||||
mock_rmtree.assert_called_once_with(settings.INDEX_DIR)
|
||||
self._assert_recreate_called(mock_create_in)
|
||||
self.assertIn(
|
||||
"Error while opening the index, recreating.",
|
||||
cm.output[0],
|
||||
)
|
||||
@@ -103,16 +103,75 @@ class TestArchiver(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
|
||||
|
||||
@pytest.mark.management
|
||||
class TestMakeIndex(TestCase):
|
||||
@mock.patch("documents.management.commands.document_index.index_reindex")
|
||||
def test_reindex(self, m) -> None:
|
||||
@pytest.mark.django_db
|
||||
class TestMakeIndex:
|
||||
def test_reindex(self, mocker: MockerFixture) -> None:
|
||||
"""Reindex command must call the backend rebuild method to recreate the index."""
|
||||
mock_get_backend = mocker.patch(
|
||||
"documents.management.commands.document_index.get_backend",
|
||||
)
|
||||
call_command("document_index", "reindex", skip_checks=True)
|
||||
m.assert_called_once()
|
||||
mock_get_backend.return_value.rebuild.assert_called_once()
|
||||
|
||||
@mock.patch("documents.management.commands.document_index.index_optimize")
|
||||
def test_optimize(self, m) -> None:
|
||||
def test_optimize(self) -> None:
|
||||
"""Optimize command must execute without error (Tantivy handles optimization automatically)."""
|
||||
call_command("document_index", "optimize", skip_checks=True)
|
||||
m.assert_called_once()
|
||||
|
||||
def test_reindex_recreate_wipes_index(self, mocker: MockerFixture) -> None:
|
||||
"""Reindex with --recreate must wipe the index before rebuilding."""
|
||||
mock_wipe = mocker.patch(
|
||||
"documents.management.commands.document_index.wipe_index",
|
||||
)
|
||||
mock_get_backend = mocker.patch(
|
||||
"documents.management.commands.document_index.get_backend",
|
||||
)
|
||||
call_command("document_index", "reindex", recreate=True, skip_checks=True)
|
||||
mock_wipe.assert_called_once()
|
||||
mock_get_backend.return_value.rebuild.assert_called_once()
|
||||
|
||||
def test_reindex_without_recreate_does_not_wipe_index(
|
||||
self,
|
||||
mocker: MockerFixture,
|
||||
) -> None:
|
||||
"""Reindex without --recreate must not wipe the index."""
|
||||
mock_wipe = mocker.patch(
|
||||
"documents.management.commands.document_index.wipe_index",
|
||||
)
|
||||
mocker.patch(
|
||||
"documents.management.commands.document_index.get_backend",
|
||||
)
|
||||
call_command("document_index", "reindex", skip_checks=True)
|
||||
mock_wipe.assert_not_called()
|
||||
|
||||
def test_reindex_if_needed_skips_when_up_to_date(
|
||||
self,
|
||||
mocker: MockerFixture,
|
||||
) -> None:
|
||||
"""Conditional reindex must skip rebuild when schema version and language match."""
|
||||
mocker.patch(
|
||||
"documents.management.commands.document_index.needs_rebuild",
|
||||
return_value=False,
|
||||
)
|
||||
mock_get_backend = mocker.patch(
|
||||
"documents.management.commands.document_index.get_backend",
|
||||
)
|
||||
call_command("document_index", "reindex", if_needed=True, skip_checks=True)
|
||||
mock_get_backend.return_value.rebuild.assert_not_called()
|
||||
|
||||
def test_reindex_if_needed_runs_when_rebuild_needed(
|
||||
self,
|
||||
mocker: MockerFixture,
|
||||
) -> None:
|
||||
"""Conditional reindex must proceed with rebuild when schema version or language changed."""
|
||||
mocker.patch(
|
||||
"documents.management.commands.document_index.needs_rebuild",
|
||||
return_value=True,
|
||||
)
|
||||
mock_get_backend = mocker.patch(
|
||||
"documents.management.commands.document_index.get_backend",
|
||||
)
|
||||
call_command("document_index", "reindex", if_needed=True, skip_checks=True)
|
||||
mock_get_backend.return_value.rebuild.assert_called_once()
|
||||
|
||||
|
||||
@pytest.mark.management
|
||||
|
||||
@@ -2,6 +2,7 @@ import hashlib
|
||||
import json
|
||||
import shutil
|
||||
import tempfile
|
||||
from datetime import timedelta
|
||||
from io import StringIO
|
||||
from pathlib import Path
|
||||
from unittest import mock
|
||||
@@ -11,6 +12,7 @@ import pytest
|
||||
from allauth.socialaccount.models import SocialAccount
|
||||
from allauth.socialaccount.models import SocialApp
|
||||
from allauth.socialaccount.models import SocialToken
|
||||
from django.conf import settings
|
||||
from django.contrib.auth.models import Group
|
||||
from django.contrib.auth.models import Permission
|
||||
from django.contrib.contenttypes.models import ContentType
|
||||
@@ -31,6 +33,8 @@ from documents.models import CustomFieldInstance
|
||||
from documents.models import Document
|
||||
from documents.models import DocumentType
|
||||
from documents.models import Note
|
||||
from documents.models import ShareLink
|
||||
from documents.models import ShareLinkBundle
|
||||
from documents.models import StoragePath
|
||||
from documents.models import Tag
|
||||
from documents.models import User
|
||||
@@ -39,6 +43,7 @@ from documents.models import WorkflowAction
|
||||
from documents.models import WorkflowTrigger
|
||||
from documents.sanity_checker import check_sanity
|
||||
from documents.settings import EXPORTER_FILE_NAME
|
||||
from documents.settings import EXPORTER_SHARE_LINK_BUNDLE_NAME
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
from documents.tests.utils import FileSystemAssertsMixin
|
||||
from documents.tests.utils import SampleDirMixin
|
||||
@@ -306,6 +311,108 @@ class TestExportImport(
|
||||
):
|
||||
self.test_exporter(use_filename_format=True)
|
||||
|
||||
def test_exporter_includes_share_links_and_bundles(self) -> None:
|
||||
shutil.rmtree(Path(self.dirs.media_dir) / "documents")
|
||||
shutil.copytree(
|
||||
Path(__file__).parent / "samples" / "documents",
|
||||
Path(self.dirs.media_dir) / "documents",
|
||||
)
|
||||
|
||||
share_link = ShareLink.objects.create(
|
||||
slug="share-link-slug",
|
||||
document=self.d1,
|
||||
owner=self.user,
|
||||
file_version=ShareLink.FileVersion.ORIGINAL,
|
||||
expiration=timezone.now() + timedelta(days=7),
|
||||
)
|
||||
|
||||
bundle_relative_path = Path("nested") / "share-bundle.zip"
|
||||
bundle_source_path = settings.SHARE_LINK_BUNDLE_DIR / bundle_relative_path
|
||||
bundle_source_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
bundle_source_path.write_bytes(b"share-bundle-contents")
|
||||
bundle = ShareLinkBundle.objects.create(
|
||||
slug="share-bundle-slug",
|
||||
owner=self.user,
|
||||
file_version=ShareLink.FileVersion.ARCHIVE,
|
||||
expiration=timezone.now() + timedelta(days=7),
|
||||
status=ShareLinkBundle.Status.READY,
|
||||
size_bytes=bundle_source_path.stat().st_size,
|
||||
file_path=str(bundle_relative_path),
|
||||
built_at=timezone.now(),
|
||||
)
|
||||
bundle.documents.set([self.d1, self.d2])
|
||||
|
||||
manifest = self._do_export()
|
||||
|
||||
share_link_records = [
|
||||
record for record in manifest if record["model"] == "documents.sharelink"
|
||||
]
|
||||
self.assertEqual(len(share_link_records), 1)
|
||||
self.assertEqual(share_link_records[0]["pk"], share_link.pk)
|
||||
self.assertEqual(share_link_records[0]["fields"]["document"], self.d1.pk)
|
||||
self.assertEqual(share_link_records[0]["fields"]["owner"], self.user.pk)
|
||||
|
||||
share_link_bundle_records = [
|
||||
record
|
||||
for record in manifest
|
||||
if record["model"] == "documents.sharelinkbundle"
|
||||
]
|
||||
self.assertEqual(len(share_link_bundle_records), 1)
|
||||
bundle_record = share_link_bundle_records[0]
|
||||
self.assertEqual(bundle_record["pk"], bundle.pk)
|
||||
self.assertEqual(
|
||||
bundle_record["fields"]["documents"],
|
||||
[self.d1.pk, self.d2.pk],
|
||||
)
|
||||
self.assertEqual(
|
||||
bundle_record[EXPORTER_SHARE_LINK_BUNDLE_NAME],
|
||||
"share_link_bundles/nested/share-bundle.zip",
|
||||
)
|
||||
self.assertEqual(
|
||||
bundle_record["fields"]["file_path"],
|
||||
"nested/share-bundle.zip",
|
||||
)
|
||||
self.assertIsFile(self.target / bundle_record[EXPORTER_SHARE_LINK_BUNDLE_NAME])
|
||||
|
||||
with paperless_environment():
|
||||
ShareLink.objects.all().delete()
|
||||
ShareLinkBundle.objects.all().delete()
|
||||
shutil.rmtree(settings.SHARE_LINK_BUNDLE_DIR, ignore_errors=True)
|
||||
|
||||
call_command(
|
||||
"document_importer",
|
||||
"--no-progress-bar",
|
||||
self.target,
|
||||
skip_checks=True,
|
||||
)
|
||||
|
||||
imported_share_link = ShareLink.objects.get(pk=share_link.pk)
|
||||
self.assertEqual(imported_share_link.document_id, self.d1.pk)
|
||||
self.assertEqual(imported_share_link.owner_id, self.user.pk)
|
||||
self.assertEqual(
|
||||
imported_share_link.file_version,
|
||||
ShareLink.FileVersion.ORIGINAL,
|
||||
)
|
||||
|
||||
imported_bundle = ShareLinkBundle.objects.get(pk=bundle.pk)
|
||||
imported_bundle_path = imported_bundle.absolute_file_path
|
||||
self.assertEqual(imported_bundle.owner_id, self.user.pk)
|
||||
self.assertEqual(
|
||||
list(
|
||||
imported_bundle.documents.order_by("pk").values_list(
|
||||
"pk",
|
||||
flat=True,
|
||||
),
|
||||
),
|
||||
[self.d1.pk, self.d2.pk],
|
||||
)
|
||||
self.assertEqual(imported_bundle.file_path, "nested/share-bundle.zip")
|
||||
self.assertIsNotNone(imported_bundle_path)
|
||||
self.assertEqual(
|
||||
imported_bundle_path.read_bytes(),
|
||||
b"share-bundle-contents",
|
||||
)
|
||||
|
||||
def test_update_export_changed_time(self) -> None:
|
||||
shutil.rmtree(Path(self.dirs.media_dir) / "documents")
|
||||
shutil.copytree(
|
||||
|
||||
@@ -452,7 +452,10 @@ class TestDocumentConsumptionFinishedSignal(TestCase):
|
||||
"""
|
||||
|
||||
def setUp(self) -> None:
|
||||
from documents.search import reset_backend
|
||||
|
||||
TestCase.setUp(self)
|
||||
reset_backend()
|
||||
User.objects.create_user(username="test_consumer", password="12345")
|
||||
self.doc_contains = Document.objects.create(
|
||||
content="I contain the keyword.",
|
||||
@@ -464,6 +467,9 @@ class TestDocumentConsumptionFinishedSignal(TestCase):
|
||||
override_settings(INDEX_DIR=self.index_dir).enable()
|
||||
|
||||
def tearDown(self) -> None:
|
||||
from documents.search import reset_backend
|
||||
|
||||
reset_backend()
|
||||
shutil.rmtree(self.index_dir, ignore_errors=True)
|
||||
|
||||
def test_tag_applied_any(self) -> None:
|
||||
|
||||
128
src/documents/tests/test_regex.py
Normal file
128
src/documents/tests/test_regex.py
Normal file
@@ -0,0 +1,128 @@
|
||||
import pytest
|
||||
import regex
|
||||
from pytest_mock import MockerFixture
|
||||
|
||||
from documents.regex import safe_regex_finditer
|
||||
from documents.regex import safe_regex_match
|
||||
from documents.regex import safe_regex_search
|
||||
from documents.regex import safe_regex_sub
|
||||
from documents.regex import validate_regex_pattern
|
||||
|
||||
|
||||
class TestValidateRegexPattern:
|
||||
def test_valid_pattern(self):
|
||||
validate_regex_pattern(r"\d+")
|
||||
|
||||
def test_invalid_pattern_raises(self):
|
||||
with pytest.raises(ValueError):
|
||||
validate_regex_pattern(r"[invalid")
|
||||
|
||||
|
||||
class TestSafeRegexSearchAndMatch:
|
||||
"""Tests for safe_regex_search and safe_regex_match (same contract)."""
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("func", "pattern", "text", "expected_group"),
|
||||
[
|
||||
pytest.param(
|
||||
safe_regex_search,
|
||||
r"\d+",
|
||||
"abc123def",
|
||||
"123",
|
||||
id="search-match-found",
|
||||
),
|
||||
pytest.param(
|
||||
safe_regex_match,
|
||||
r"\d+",
|
||||
"123abc",
|
||||
"123",
|
||||
id="match-match-found",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_match_found(self, func, pattern, text, expected_group):
|
||||
result = func(pattern, text)
|
||||
assert result is not None
|
||||
assert result.group() == expected_group
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("func", "pattern", "text"),
|
||||
[
|
||||
pytest.param(safe_regex_search, r"\d+", "abcdef", id="search-no-match"),
|
||||
pytest.param(safe_regex_match, r"\d+", "abc123", id="match-no-match"),
|
||||
],
|
||||
)
|
||||
def test_no_match(self, func, pattern, text):
|
||||
assert func(pattern, text) is None
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"func",
|
||||
[
|
||||
pytest.param(safe_regex_search, id="search"),
|
||||
pytest.param(safe_regex_match, id="match"),
|
||||
],
|
||||
)
|
||||
def test_invalid_pattern_returns_none(self, func):
|
||||
assert func(r"[invalid", "test") is None
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"func",
|
||||
[
|
||||
pytest.param(safe_regex_search, id="search"),
|
||||
pytest.param(safe_regex_match, id="match"),
|
||||
],
|
||||
)
|
||||
def test_flags_respected(self, func):
|
||||
assert func(r"abc", "ABC", flags=regex.IGNORECASE) is not None
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("func", "method_name"),
|
||||
[
|
||||
pytest.param(safe_regex_search, "search", id="search"),
|
||||
pytest.param(safe_regex_match, "match", id="match"),
|
||||
],
|
||||
)
|
||||
def test_timeout_returns_none(self, func, method_name, mocker: MockerFixture):
|
||||
mock_compile = mocker.patch("documents.regex.regex.compile")
|
||||
getattr(mock_compile.return_value, method_name).side_effect = TimeoutError
|
||||
assert func(r"\d+", "test") is None
|
||||
|
||||
|
||||
class TestSafeRegexSub:
|
||||
@pytest.mark.parametrize(
|
||||
("pattern", "repl", "text", "expected"),
|
||||
[
|
||||
pytest.param(r"\d+", "NUM", "abc123def456", "abcNUMdefNUM", id="basic-sub"),
|
||||
pytest.param(r"\d+", "NUM", "abcdef", "abcdef", id="no-match"),
|
||||
pytest.param(r"abc", "X", "ABC", "X", id="flags"),
|
||||
],
|
||||
)
|
||||
def test_substitution(self, pattern, repl, text, expected):
|
||||
flags = regex.IGNORECASE if pattern == r"abc" else 0
|
||||
result = safe_regex_sub(pattern, repl, text, flags=flags)
|
||||
assert result == expected
|
||||
|
||||
def test_invalid_pattern_returns_none(self):
|
||||
assert safe_regex_sub(r"[invalid", "x", "test") is None
|
||||
|
||||
def test_timeout_returns_none(self, mocker: MockerFixture):
|
||||
mock_compile = mocker.patch("documents.regex.regex.compile")
|
||||
mock_compile.return_value.sub.side_effect = TimeoutError
|
||||
assert safe_regex_sub(r"\d+", "X", "test") is None
|
||||
|
||||
|
||||
class TestSafeRegexFinditer:
|
||||
def test_yields_matches(self):
|
||||
pattern = regex.compile(r"\d+")
|
||||
matches = list(safe_regex_finditer(pattern, "a1b22c333"))
|
||||
assert [m.group() for m in matches] == ["1", "22", "333"]
|
||||
|
||||
def test_no_matches(self):
|
||||
pattern = regex.compile(r"\d+")
|
||||
assert list(safe_regex_finditer(pattern, "abcdef")) == []
|
||||
|
||||
def test_timeout_stops_iteration(self, mocker: MockerFixture):
|
||||
mock_pattern = mocker.MagicMock()
|
||||
mock_pattern.finditer.side_effect = TimeoutError
|
||||
mock_pattern.pattern = r"\d+"
|
||||
assert list(safe_regex_finditer(mock_pattern, "test")) == []
|
||||
@@ -11,10 +11,12 @@ from documents.models import WorkflowAction
|
||||
from documents.models import WorkflowTrigger
|
||||
from documents.serialisers import TagSerializer
|
||||
from documents.signals.handlers import run_workflows
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
|
||||
|
||||
class TestTagHierarchy(APITestCase):
|
||||
class TestTagHierarchy(DirectoriesMixin, APITestCase):
|
||||
def setUp(self) -> None:
|
||||
super().setUp()
|
||||
self.user = User.objects.create_superuser(username="admin")
|
||||
self.client.force_authenticate(user=self.user)
|
||||
|
||||
|
||||
@@ -2,6 +2,7 @@ import uuid
|
||||
from unittest import mock
|
||||
|
||||
import celery
|
||||
from django.contrib.auth import get_user_model
|
||||
from django.test import TestCase
|
||||
|
||||
from documents.data_models import ConsumableDocument
|
||||
@@ -20,6 +21,11 @@ from documents.tests.utils import DirectoriesMixin
|
||||
|
||||
@mock.patch("documents.consumer.magic.from_file", fake_magic_from_file)
|
||||
class TestTaskSignalHandler(DirectoriesMixin, TestCase):
|
||||
@classmethod
|
||||
def setUpTestData(cls) -> None:
|
||||
super().setUpTestData()
|
||||
cls.user = get_user_model().objects.create_user(username="testuser")
|
||||
|
||||
def util_call_before_task_publish_handler(
|
||||
self,
|
||||
headers_to_use,
|
||||
@@ -57,7 +63,7 @@ class TestTaskSignalHandler(DirectoriesMixin, TestCase):
|
||||
),
|
||||
DocumentMetadataOverrides(
|
||||
title="Hello world",
|
||||
owner_id=1,
|
||||
owner_id=self.user.id,
|
||||
),
|
||||
),
|
||||
# kwargs
|
||||
@@ -75,7 +81,7 @@ class TestTaskSignalHandler(DirectoriesMixin, TestCase):
|
||||
self.assertEqual(headers["id"], task.task_id)
|
||||
self.assertEqual("hello-999.pdf", task.task_file_name)
|
||||
self.assertEqual(PaperlessTask.TaskName.CONSUME_FILE, task.task_name)
|
||||
self.assertEqual(1, task.owner_id)
|
||||
self.assertEqual(self.user.id, task.owner_id)
|
||||
self.assertEqual(celery.states.PENDING, task.status)
|
||||
|
||||
def test_task_prerun_handler(self) -> None:
|
||||
@@ -208,10 +214,12 @@ class TestTaskSignalHandler(DirectoriesMixin, TestCase):
|
||||
mime_type="application/pdf",
|
||||
)
|
||||
|
||||
with mock.patch("documents.index.add_or_update_document") as add:
|
||||
with mock.patch("documents.search.get_backend") as mock_get_backend:
|
||||
mock_backend = mock.MagicMock()
|
||||
mock_get_backend.return_value = mock_backend
|
||||
add_to_index(sender=None, document=root)
|
||||
|
||||
add.assert_called_once_with(root)
|
||||
mock_backend.add_or_update.assert_called_once_with(root, effective_content="")
|
||||
|
||||
def test_add_to_index_reindexes_root_for_version_documents(self) -> None:
|
||||
root = Document.objects.create(
|
||||
@@ -226,13 +234,17 @@ class TestTaskSignalHandler(DirectoriesMixin, TestCase):
|
||||
root_document=root,
|
||||
)
|
||||
|
||||
with mock.patch("documents.index.add_or_update_document") as add:
|
||||
with mock.patch("documents.search.get_backend") as mock_get_backend:
|
||||
mock_backend = mock.MagicMock()
|
||||
mock_get_backend.return_value = mock_backend
|
||||
add_to_index(sender=None, document=version)
|
||||
|
||||
self.assertEqual(add.call_count, 2)
|
||||
self.assertEqual(add.call_args_list[0].args[0].id, version.id)
|
||||
self.assertEqual(add.call_args_list[1].args[0].id, root.id)
|
||||
self.assertEqual(mock_backend.add_or_update.call_count, 1)
|
||||
self.assertEqual(
|
||||
add.call_args_list[1].kwargs,
|
||||
mock_backend.add_or_update.call_args_list[0].args[0].id,
|
||||
version.id,
|
||||
)
|
||||
self.assertEqual(
|
||||
mock_backend.add_or_update.call_args_list[0].kwargs,
|
||||
{"effective_content": version.content},
|
||||
)
|
||||
|
||||
@@ -23,29 +23,10 @@ from documents.tests.utils import DirectoriesMixin
|
||||
from documents.tests.utils import FileSystemAssertsMixin
|
||||
|
||||
|
||||
class TestIndexReindex(DirectoriesMixin, TestCase):
|
||||
def test_index_reindex(self) -> None:
|
||||
Document.objects.create(
|
||||
title="test",
|
||||
content="my document",
|
||||
checksum="wow",
|
||||
added=timezone.now(),
|
||||
created=timezone.now(),
|
||||
modified=timezone.now(),
|
||||
)
|
||||
|
||||
tasks.index_reindex()
|
||||
|
||||
@pytest.mark.django_db
|
||||
class TestIndexOptimize:
|
||||
def test_index_optimize(self) -> None:
|
||||
Document.objects.create(
|
||||
title="test",
|
||||
content="my document",
|
||||
checksum="wow",
|
||||
added=timezone.now(),
|
||||
created=timezone.now(),
|
||||
modified=timezone.now(),
|
||||
)
|
||||
|
||||
"""Index optimization task must execute without error (Tantivy handles optimization automatically)."""
|
||||
tasks.index_optimize()
|
||||
|
||||
|
||||
|
||||
@@ -31,6 +31,11 @@ from paperless.models import ApplicationConfiguration
|
||||
|
||||
|
||||
class TestViews(DirectoriesMixin, TestCase):
|
||||
@classmethod
|
||||
def setUpTestData(cls) -> None:
|
||||
super().setUpTestData()
|
||||
ApplicationConfiguration.objects.get_or_create()
|
||||
|
||||
def setUp(self) -> None:
|
||||
self.user = User.objects.create_user("testuser")
|
||||
super().setUp()
|
||||
|
||||
@@ -4802,6 +4802,7 @@ class TestWebhookSecurity:
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
@pytest.mark.usefixtures("_search_index")
|
||||
class TestDateWorkflowLocalization(
|
||||
SampleDirMixin,
|
||||
):
|
||||
|
||||
@@ -157,11 +157,17 @@ class DirectoriesMixin:
|
||||
"""
|
||||
|
||||
def setUp(self) -> None:
|
||||
from documents.search import reset_backend
|
||||
|
||||
reset_backend()
|
||||
self.dirs = setup_directories()
|
||||
super().setUp()
|
||||
|
||||
def tearDown(self) -> None:
|
||||
from documents.search import reset_backend
|
||||
|
||||
super().tearDown()
|
||||
reset_backend()
|
||||
remove_dirs(self.dirs)
|
||||
|
||||
|
||||
@@ -429,7 +435,11 @@ class DummyProgressManager:
|
||||
message: str,
|
||||
current_progress: int,
|
||||
max_progress: int,
|
||||
extra_args: dict[str, str | int] | None = None,
|
||||
*,
|
||||
document_id: int | None = None,
|
||||
owner_id: int | None = None,
|
||||
users_can_view: list[int] | None = None,
|
||||
groups_can_view: list[int] | None = None,
|
||||
) -> None:
|
||||
# Ensure the layer is open
|
||||
self.open()
|
||||
@@ -443,9 +453,10 @@ class DummyProgressManager:
|
||||
"max_progress": max_progress,
|
||||
"status": status,
|
||||
"message": message,
|
||||
"document_id": document_id,
|
||||
"owner_id": owner_id,
|
||||
"users_can_view": users_can_view or [],
|
||||
"groups_can_view": groups_can_view or [],
|
||||
},
|
||||
}
|
||||
if extra_args is not None:
|
||||
payload["data"].update(extra_args)
|
||||
|
||||
self.payloads.append(payload)
|
||||
|
||||
@@ -1,14 +1,27 @@
|
||||
import hashlib
|
||||
import logging
|
||||
import shutil
|
||||
from collections.abc import Callable
|
||||
from collections.abc import Iterable
|
||||
from os import utime
|
||||
from pathlib import Path
|
||||
from subprocess import CompletedProcess
|
||||
from subprocess import run
|
||||
from typing import TypeVar
|
||||
|
||||
from django.conf import settings
|
||||
from PIL import Image
|
||||
|
||||
_T = TypeVar("_T")
|
||||
|
||||
# A function that wraps an iterable — typically used to inject a progress bar.
|
||||
IterWrapper = Callable[[Iterable[_T]], Iterable[_T]]
|
||||
|
||||
|
||||
def identity(iterable: Iterable[_T]) -> Iterable[_T]:
|
||||
"""Return the iterable unchanged; the no-op default for IterWrapper."""
|
||||
return iterable
|
||||
|
||||
|
||||
def _coerce_to_path(
|
||||
source: Path | str,
|
||||
|
||||
@@ -100,7 +100,6 @@ from rest_framework.viewsets import ReadOnlyModelViewSet
|
||||
from rest_framework.viewsets import ViewSet
|
||||
|
||||
from documents import bulk_edit
|
||||
from documents import index
|
||||
from documents.bulk_download import ArchiveOnlyStrategy
|
||||
from documents.bulk_download import OriginalAndArchiveStrategy
|
||||
from documents.bulk_download import OriginalsOnlyStrategy
|
||||
@@ -1029,9 +1028,9 @@ class DocumentViewSet(
|
||||
response_data["content"] = content_doc.content
|
||||
response = Response(response_data)
|
||||
|
||||
from documents import index
|
||||
from documents.search import get_backend
|
||||
|
||||
index.add_or_update_document(refreshed_doc)
|
||||
get_backend().add_or_update(refreshed_doc)
|
||||
|
||||
document_updated.send(
|
||||
sender=self.__class__,
|
||||
@@ -1060,9 +1059,9 @@ class DocumentViewSet(
|
||||
return Response({"results": serializer.data, "selection_data": selection_data})
|
||||
|
||||
def destroy(self, request, *args, **kwargs):
|
||||
from documents import index
|
||||
from documents.search import get_backend
|
||||
|
||||
index.remove_document_from_index(self.get_object())
|
||||
get_backend().remove(self.get_object().pk)
|
||||
try:
|
||||
return super().destroy(request, *args, **kwargs)
|
||||
except Exception as e:
|
||||
@@ -1469,9 +1468,9 @@ class DocumentViewSet(
|
||||
doc.modified = timezone.now()
|
||||
doc.save()
|
||||
|
||||
from documents import index
|
||||
from documents.search import get_backend
|
||||
|
||||
index.add_or_update_document(doc)
|
||||
get_backend().add_or_update(doc)
|
||||
|
||||
notes = serializer.to_representation(doc).get("notes")
|
||||
|
||||
@@ -1506,9 +1505,9 @@ class DocumentViewSet(
|
||||
doc.modified = timezone.now()
|
||||
doc.save()
|
||||
|
||||
from documents import index
|
||||
from documents.search import get_backend
|
||||
|
||||
index.add_or_update_document(doc)
|
||||
get_backend().add_or_update(doc)
|
||||
|
||||
notes = serializer.to_representation(doc).get("notes")
|
||||
|
||||
@@ -1820,12 +1819,13 @@ class DocumentViewSet(
|
||||
"Cannot delete the root/original version. Delete the document instead.",
|
||||
)
|
||||
|
||||
from documents import index
|
||||
from documents.search import get_backend
|
||||
|
||||
index.remove_document_from_index(version_doc)
|
||||
_backend = get_backend()
|
||||
_backend.remove(version_doc.pk)
|
||||
version_doc_id = version_doc.id
|
||||
version_doc.delete()
|
||||
index.add_or_update_document(root_doc)
|
||||
_backend.add_or_update(root_doc)
|
||||
if settings.AUDIT_LOG_ENABLED:
|
||||
actor = (
|
||||
request.user if request.user and request.user.is_authenticated else None
|
||||
@@ -1995,11 +1995,23 @@ class ChatStreamingView(GenericAPIView):
|
||||
list=extend_schema(
|
||||
description="Document views including search",
|
||||
parameters=[
|
||||
OpenApiParameter(
|
||||
name="text",
|
||||
type=OpenApiTypes.STR,
|
||||
location=OpenApiParameter.QUERY,
|
||||
description="Simple Tantivy-backed text search query string",
|
||||
),
|
||||
OpenApiParameter(
|
||||
name="title_search",
|
||||
type=OpenApiTypes.STR,
|
||||
location=OpenApiParameter.QUERY,
|
||||
description="Simple Tantivy-backed title-only search query string",
|
||||
),
|
||||
OpenApiParameter(
|
||||
name="query",
|
||||
type=OpenApiTypes.STR,
|
||||
location=OpenApiParameter.QUERY,
|
||||
description="Advanced search query string",
|
||||
description="Advanced Tantivy search query string",
|
||||
),
|
||||
OpenApiParameter(
|
||||
name="full_perms",
|
||||
@@ -2025,9 +2037,7 @@ class ChatStreamingView(GenericAPIView):
|
||||
),
|
||||
)
|
||||
class UnifiedSearchViewSet(DocumentViewSet):
|
||||
def __init__(self, *args: Any, **kwargs: Any) -> None:
|
||||
super().__init__(*args, **kwargs)
|
||||
self.searcher = None
|
||||
SEARCH_PARAM_NAMES = ("text", "title_search", "query", "more_like_id")
|
||||
|
||||
def get_serializer_class(self):
|
||||
if self._is_search_request():
|
||||
@@ -2035,23 +2045,117 @@ class UnifiedSearchViewSet(DocumentViewSet):
|
||||
else:
|
||||
return DocumentSerializer
|
||||
|
||||
def _get_active_search_params(self, request: Request | None = None) -> list[str]:
|
||||
request = request or self.request
|
||||
return [
|
||||
param for param in self.SEARCH_PARAM_NAMES if param in request.query_params
|
||||
]
|
||||
|
||||
def _is_search_request(self):
|
||||
return (
|
||||
"query" in self.request.query_params
|
||||
or "more_like_id" in self.request.query_params
|
||||
)
|
||||
return bool(self._get_active_search_params())
|
||||
|
||||
def filter_queryset(self, queryset):
|
||||
filtered_queryset = super().filter_queryset(queryset)
|
||||
def list(self, request, *args, **kwargs):
|
||||
if not self._is_search_request():
|
||||
return super().list(request)
|
||||
|
||||
if self._is_search_request():
|
||||
if "query" in self.request.query_params:
|
||||
from documents import index
|
||||
from documents.search import SearchHit
|
||||
from documents.search import SearchMode
|
||||
from documents.search import TantivyBackend
|
||||
from documents.search import TantivyRelevanceList
|
||||
from documents.search import get_backend
|
||||
|
||||
query_class = index.DelayedFullTextQuery
|
||||
elif "more_like_id" in self.request.query_params:
|
||||
try:
|
||||
backend = get_backend()
|
||||
filtered_qs = self.filter_queryset(self.get_queryset())
|
||||
|
||||
user = None if request.user.is_superuser else request.user
|
||||
active_search_params = self._get_active_search_params(request)
|
||||
|
||||
if len(active_search_params) > 1:
|
||||
raise ValidationError(
|
||||
{
|
||||
"detail": _(
|
||||
"Specify only one of text, title_search, query, or more_like_id.",
|
||||
),
|
||||
},
|
||||
)
|
||||
|
||||
# Parse ordering param
|
||||
ordering_param = request.query_params.get("ordering", "")
|
||||
sort_reverse = ordering_param.startswith("-")
|
||||
sort_field_name = ordering_param.lstrip("-") or None
|
||||
|
||||
use_tantivy_sort = (
|
||||
sort_field_name in TantivyBackend.SORTABLE_FIELDS
|
||||
or sort_field_name is None
|
||||
)
|
||||
|
||||
# Compute the DRF page so we can tell Tantivy which slice to highlight
|
||||
try:
|
||||
requested_page = int(request.query_params.get("page", 1))
|
||||
except (TypeError, ValueError):
|
||||
requested_page = 1
|
||||
try:
|
||||
requested_page_size = int(
|
||||
request.query_params.get("page_size", self.paginator.page_size),
|
||||
)
|
||||
except (TypeError, ValueError):
|
||||
requested_page_size = self.paginator.page_size
|
||||
|
||||
if (
|
||||
"text" in request.query_params
|
||||
or "title_search" in request.query_params
|
||||
or "query" in request.query_params
|
||||
):
|
||||
if "text" in request.query_params:
|
||||
search_mode = SearchMode.TEXT
|
||||
query_str = request.query_params["text"]
|
||||
elif "title_search" in request.query_params:
|
||||
search_mode = SearchMode.TITLE
|
||||
query_str = request.query_params["title_search"]
|
||||
else:
|
||||
search_mode = SearchMode.QUERY
|
||||
query_str = request.query_params["query"]
|
||||
|
||||
# Step 1: Get all matching IDs (lightweight, no highlights)
|
||||
all_ids = backend.search_ids(
|
||||
query_str,
|
||||
user=user,
|
||||
sort_field=sort_field_name if use_tantivy_sort else None,
|
||||
sort_reverse=sort_reverse,
|
||||
search_mode=search_mode,
|
||||
)
|
||||
|
||||
# Step 2: Intersect with ORM-visible IDs (field filters)
|
||||
orm_ids = set(filtered_qs.values_list("pk", flat=True))
|
||||
|
||||
if use_tantivy_sort:
|
||||
# Fast path: Tantivy already ordered the IDs
|
||||
ordered_ids = [doc_id for doc_id in all_ids if doc_id in orm_ids]
|
||||
else:
|
||||
# Slow path: ORM must re-sort
|
||||
id_set = set(all_ids) & orm_ids
|
||||
ordered_ids = list(
|
||||
filtered_qs.filter(id__in=id_set).values_list(
|
||||
"pk",
|
||||
flat=True,
|
||||
),
|
||||
)
|
||||
|
||||
# Step 3: Fetch highlights for the displayed page only
|
||||
page_offset = (requested_page - 1) * requested_page_size
|
||||
page_ids = ordered_ids[page_offset : page_offset + requested_page_size]
|
||||
|
||||
page_hits = backend.highlight_hits(
|
||||
query_str,
|
||||
page_ids,
|
||||
search_mode=search_mode,
|
||||
)
|
||||
|
||||
else:
|
||||
# more_like_id path
|
||||
try:
|
||||
more_like_doc_id = int(self.request.query_params["more_like_id"])
|
||||
more_like_doc_id = int(request.query_params["more_like_id"])
|
||||
more_like_doc = Document.objects.select_related("owner").get(
|
||||
pk=more_like_doc_id,
|
||||
)
|
||||
@@ -2059,76 +2163,63 @@ class UnifiedSearchViewSet(DocumentViewSet):
|
||||
raise PermissionDenied(_("Invalid more_like_id"))
|
||||
|
||||
if not has_perms_owner_aware(
|
||||
self.request.user,
|
||||
request.user,
|
||||
"view_document",
|
||||
more_like_doc,
|
||||
):
|
||||
raise PermissionDenied(_("Insufficient permissions."))
|
||||
|
||||
from documents import index
|
||||
|
||||
query_class = index.DelayedMoreLikeThisQuery
|
||||
else:
|
||||
raise ValueError
|
||||
|
||||
return query_class(
|
||||
self.searcher,
|
||||
self.request.query_params,
|
||||
self.paginator.get_page_size(self.request),
|
||||
filter_queryset=filtered_queryset,
|
||||
)
|
||||
else:
|
||||
return filtered_queryset
|
||||
|
||||
def list(self, request, *args, **kwargs):
|
||||
if self._is_search_request():
|
||||
from documents import index
|
||||
|
||||
try:
|
||||
with index.open_index_searcher() as s:
|
||||
self.searcher = s
|
||||
queryset = self.filter_queryset(self.get_queryset())
|
||||
page = self.paginate_queryset(queryset)
|
||||
|
||||
serializer = self.get_serializer(page, many=True)
|
||||
response = self.get_paginated_response(serializer.data)
|
||||
|
||||
response.data["corrected_query"] = (
|
||||
queryset.suggested_correction
|
||||
if hasattr(queryset, "suggested_correction")
|
||||
else None
|
||||
)
|
||||
|
||||
if get_boolean(
|
||||
str(
|
||||
request.query_params.get(
|
||||
"include_selection_data",
|
||||
"false",
|
||||
),
|
||||
),
|
||||
):
|
||||
result_ids = queryset.get_result_ids()
|
||||
response.data["selection_data"] = (
|
||||
self._get_selection_data_for_queryset(
|
||||
Document.objects.filter(pk__in=result_ids),
|
||||
)
|
||||
)
|
||||
|
||||
return response
|
||||
except NotFound:
|
||||
raise
|
||||
except PermissionDenied as e:
|
||||
invalid_more_like_id_message = _("Invalid more_like_id")
|
||||
if str(e.detail) == str(invalid_more_like_id_message):
|
||||
return HttpResponseForbidden(invalid_more_like_id_message)
|
||||
return HttpResponseForbidden(_("Insufficient permissions."))
|
||||
except Exception as e:
|
||||
logger.warning(f"An error occurred listing search results: {e!s}")
|
||||
return HttpResponseBadRequest(
|
||||
"Error listing search results, check logs for more detail.",
|
||||
# Step 1: Get all matching IDs (lightweight)
|
||||
all_ids = backend.more_like_this_ids(
|
||||
more_like_doc_id,
|
||||
user=user,
|
||||
)
|
||||
else:
|
||||
return super().list(request)
|
||||
orm_ids = set(filtered_qs.values_list("pk", flat=True))
|
||||
ordered_ids = [doc_id for doc_id in all_ids if doc_id in orm_ids]
|
||||
|
||||
# Step 2: Build hit dicts for the displayed page
|
||||
# MLT has no text query, so no highlights needed
|
||||
page_offset = (requested_page - 1) * requested_page_size
|
||||
page_ids = ordered_ids[page_offset : page_offset + requested_page_size]
|
||||
page_hits = [
|
||||
SearchHit(id=doc_id, score=0.0, rank=rank, highlights={})
|
||||
for rank, doc_id in enumerate(page_ids, start=page_offset + 1)
|
||||
]
|
||||
|
||||
rl = TantivyRelevanceList(ordered_ids, page_hits, page_offset)
|
||||
page = self.paginate_queryset(rl)
|
||||
|
||||
if page is not None:
|
||||
serializer = self.get_serializer(page, many=True)
|
||||
response = self.get_paginated_response(serializer.data)
|
||||
response.data["corrected_query"] = None
|
||||
if get_boolean(
|
||||
str(request.query_params.get("include_selection_data", "false")),
|
||||
):
|
||||
response.data["selection_data"] = (
|
||||
self._get_selection_data_for_queryset(
|
||||
filtered_qs.filter(pk__in=ordered_ids),
|
||||
)
|
||||
)
|
||||
return response
|
||||
|
||||
serializer = self.get_serializer(page_hits, many=True)
|
||||
return Response(serializer.data)
|
||||
|
||||
except NotFound:
|
||||
raise
|
||||
except PermissionDenied as e:
|
||||
invalid_more_like_id_message = _("Invalid more_like_id")
|
||||
if str(e.detail) == str(invalid_more_like_id_message):
|
||||
return HttpResponseForbidden(invalid_more_like_id_message)
|
||||
return HttpResponseForbidden(_("Insufficient permissions."))
|
||||
except ValidationError:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.warning(f"An error occurred listing search results: {e!s}")
|
||||
return HttpResponseBadRequest(
|
||||
"Error listing search results, check logs for more detail.",
|
||||
)
|
||||
|
||||
@action(detail=False, methods=["GET"], name="Get Next ASN")
|
||||
def next_asn(self, request, *args, **kwargs):
|
||||
@@ -2946,18 +3037,9 @@ class SearchAutoCompleteView(GenericAPIView):
|
||||
else:
|
||||
limit = 10
|
||||
|
||||
from documents import index
|
||||
from documents.search import get_backend
|
||||
|
||||
ix = index.open_index()
|
||||
|
||||
return Response(
|
||||
index.autocomplete(
|
||||
ix,
|
||||
term,
|
||||
limit,
|
||||
user,
|
||||
),
|
||||
)
|
||||
return Response(get_backend().autocomplete(term, limit, user))
|
||||
|
||||
|
||||
@extend_schema_view(
|
||||
@@ -3004,6 +3086,9 @@ class GlobalSearchView(PassUserMixin):
|
||||
serializer_class = SearchResultSerializer
|
||||
|
||||
def get(self, request, *args, **kwargs):
|
||||
from documents.search import SearchMode
|
||||
from documents.search import get_backend
|
||||
|
||||
query = request.query_params.get("query", None)
|
||||
if query is None:
|
||||
return HttpResponseBadRequest("Query required")
|
||||
@@ -3020,24 +3105,25 @@ class GlobalSearchView(PassUserMixin):
|
||||
"view_document",
|
||||
Document,
|
||||
)
|
||||
# First search by title
|
||||
docs = all_docs.filter(title__icontains=query)
|
||||
if not db_only and len(docs) < OBJECT_LIMIT:
|
||||
# If we don't have enough results, search by content
|
||||
from documents import index
|
||||
|
||||
with index.open_index_searcher() as s:
|
||||
fts_query = index.DelayedFullTextQuery(
|
||||
s,
|
||||
request.query_params,
|
||||
OBJECT_LIMIT,
|
||||
filter_queryset=all_docs,
|
||||
)
|
||||
results = fts_query[0:1]
|
||||
docs = docs | Document.objects.filter(
|
||||
id__in=[r["id"] for r in results],
|
||||
)
|
||||
docs = docs[:OBJECT_LIMIT]
|
||||
if db_only:
|
||||
docs = all_docs.filter(title__icontains=query)[:OBJECT_LIMIT]
|
||||
else:
|
||||
user = None if request.user.is_superuser else request.user
|
||||
fts_results = get_backend().search(
|
||||
query,
|
||||
user=user,
|
||||
page=1,
|
||||
page_size=1000,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
search_mode=SearchMode.TEXT,
|
||||
)
|
||||
docs_by_id = all_docs.in_bulk([hit["id"] for hit in fts_results.hits])
|
||||
docs = [
|
||||
docs_by_id[hit["id"]]
|
||||
for hit in fts_results.hits
|
||||
if hit["id"] in docs_by_id
|
||||
][:OBJECT_LIMIT]
|
||||
saved_views = (
|
||||
get_objects_for_user_owner_aware(
|
||||
request.user,
|
||||
@@ -4279,10 +4365,16 @@ class SystemStatusView(PassUserMixin):
|
||||
|
||||
index_error = None
|
||||
try:
|
||||
ix = index.open_index()
|
||||
from documents.search import get_backend
|
||||
|
||||
get_backend() # triggers open/rebuild; raises on error
|
||||
index_status = "OK"
|
||||
index_last_modified = make_aware(
|
||||
datetime.fromtimestamp(ix.last_modified()),
|
||||
# Use the most-recently modified file in the index directory as a proxy
|
||||
# for last index write time (Tantivy has no single last_modified() call).
|
||||
index_dir = settings.INDEX_DIR
|
||||
mtimes = [p.stat().st_mtime for p in index_dir.iterdir() if p.is_file()]
|
||||
index_last_modified = (
|
||||
make_aware(datetime.fromtimestamp(max(mtimes))) if mtimes else None
|
||||
)
|
||||
except Exception as e:
|
||||
index_status = "ERROR"
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,11 +1,59 @@
|
||||
import hmac
|
||||
import os
|
||||
import pickle
|
||||
from hashlib import sha256
|
||||
|
||||
from celery import Celery
|
||||
from celery.signals import worker_process_init
|
||||
from kombu.serialization import register
|
||||
|
||||
# Set the default Django settings module for the 'celery' program.
|
||||
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "paperless.settings")
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Signed-pickle serializer: pickle with HMAC-SHA256 integrity verification.
|
||||
#
|
||||
# Protects against malicious pickle injection via an exposed Redis broker.
|
||||
# Messages are signed on the producer side and verified before deserialization
|
||||
# on the worker side using Django's SECRET_KEY.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
HMAC_SIZE = 32 # SHA-256 digest length
|
||||
|
||||
|
||||
def _get_signing_key() -> bytes:
|
||||
from django.conf import settings
|
||||
|
||||
return settings.SECRET_KEY.encode()
|
||||
|
||||
|
||||
def signed_pickle_dumps(obj: object) -> bytes:
|
||||
data = pickle.dumps(obj, protocol=pickle.HIGHEST_PROTOCOL)
|
||||
signature = hmac.new(_get_signing_key(), data, sha256).digest()
|
||||
return signature + data
|
||||
|
||||
|
||||
def signed_pickle_loads(payload: bytes) -> object:
|
||||
if len(payload) < HMAC_SIZE:
|
||||
msg = "Signed-pickle payload too short"
|
||||
raise ValueError(msg)
|
||||
signature = payload[:HMAC_SIZE]
|
||||
data = payload[HMAC_SIZE:]
|
||||
expected = hmac.new(_get_signing_key(), data, sha256).digest()
|
||||
if not hmac.compare_digest(signature, expected):
|
||||
msg = "Signed-pickle HMAC verification failed — message may have been tampered with"
|
||||
raise ValueError(msg)
|
||||
return pickle.loads(data)
|
||||
|
||||
|
||||
register(
|
||||
"signed-pickle",
|
||||
signed_pickle_dumps,
|
||||
signed_pickle_loads,
|
||||
content_type="application/x-signed-pickle",
|
||||
content_encoding="binary",
|
||||
)
|
||||
|
||||
app = Celery("paperless")
|
||||
|
||||
# Using a string here means the worker doesn't have to serialize
|
||||
|
||||
@@ -1,16 +1,27 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from typing import Any
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from channels.generic.websocket import AsyncWebsocketConsumer
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from django.contrib.auth.base_user import AbstractBaseUser
|
||||
from django.contrib.auth.models import AnonymousUser
|
||||
|
||||
from documents.plugins.helpers import DocumentsDeletedPayload
|
||||
from documents.plugins.helpers import DocumentUpdatedPayload
|
||||
from documents.plugins.helpers import PermissionsData
|
||||
from documents.plugins.helpers import StatusUpdatePayload
|
||||
|
||||
|
||||
class StatusConsumer(AsyncWebsocketConsumer):
|
||||
def _authenticated(self) -> bool:
|
||||
user: Any = self.scope.get("user")
|
||||
user: AbstractBaseUser | AnonymousUser | None = self.scope.get("user")
|
||||
return user is not None and user.is_authenticated
|
||||
|
||||
async def _can_view(self, data: dict[str, Any]) -> bool:
|
||||
user: Any = self.scope.get("user")
|
||||
async def _can_view(self, data: PermissionsData) -> bool:
|
||||
user: AbstractBaseUser | AnonymousUser | None = self.scope.get("user")
|
||||
if user is None:
|
||||
return False
|
||||
owner_id = data.get("owner_id")
|
||||
@@ -32,19 +43,19 @@ class StatusConsumer(AsyncWebsocketConsumer):
|
||||
async def disconnect(self, code: int) -> None:
|
||||
await self.channel_layer.group_discard("status_updates", self.channel_name)
|
||||
|
||||
async def status_update(self, event: dict[str, Any]) -> None:
|
||||
async def status_update(self, event: StatusUpdatePayload) -> None:
|
||||
if not self._authenticated():
|
||||
await self.close()
|
||||
elif await self._can_view(event["data"]):
|
||||
await self.send(json.dumps(event))
|
||||
|
||||
async def documents_deleted(self, event: dict[str, Any]) -> None:
|
||||
async def documents_deleted(self, event: DocumentsDeletedPayload) -> None:
|
||||
if not self._authenticated():
|
||||
await self.close()
|
||||
else:
|
||||
await self.send(json.dumps(event))
|
||||
|
||||
async def document_updated(self, event: dict[str, Any]) -> None:
|
||||
async def document_updated(self, event: DocumentUpdatedPayload) -> None:
|
||||
if not self._authenticated():
|
||||
await self.close()
|
||||
elif await self._can_view(event["data"]):
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user