mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-04-10 18:18:50 +00:00
Compare commits
3 Commits
feature-se
...
feature-cl
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1a26514a96 | ||
|
|
1fefd506b7 | ||
|
|
68b866aeee |
2
.github/dependabot.yml
vendored
2
.github/dependabot.yml
vendored
@@ -164,8 +164,6 @@ updates:
|
||||
directory: "/" # Location of package manifests
|
||||
schedule:
|
||||
interval: "monthly"
|
||||
cooldown:
|
||||
default-days: 7
|
||||
groups:
|
||||
pre-commit-dependencies:
|
||||
patterns:
|
||||
|
||||
61
.github/workflows/ci-backend.yml
vendored
61
.github/workflows/ci-backend.yml
vendored
@@ -30,13 +30,10 @@ jobs:
|
||||
persist-credentials: false
|
||||
- name: Decide run mode
|
||||
id: force
|
||||
env:
|
||||
EVENT_NAME: ${{ github.event_name }}
|
||||
REF_NAME: ${{ github.ref_name }}
|
||||
run: |
|
||||
if [[ "${EVENT_NAME}" == "workflow_dispatch" ]]; then
|
||||
if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then
|
||||
echo "run_all=true" >> "$GITHUB_OUTPUT"
|
||||
elif [[ "${EVENT_NAME}" == "push" && ( "${REF_NAME}" == "main" || "${REF_NAME}" == "dev" ) ]]; then
|
||||
elif [[ "${{ github.event_name }}" == "push" && ( "${{ github.ref_name }}" == "main" || "${{ github.ref_name }}" == "dev" ) ]]; then
|
||||
echo "run_all=true" >> "$GITHUB_OUTPUT"
|
||||
else
|
||||
echo "run_all=false" >> "$GITHUB_OUTPUT"
|
||||
@@ -44,22 +41,15 @@ jobs:
|
||||
- name: Set diff range
|
||||
id: range
|
||||
if: steps.force.outputs.run_all != 'true'
|
||||
env:
|
||||
BEFORE_SHA: ${{ github.event.before }}
|
||||
DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}
|
||||
EVENT_CREATED: ${{ github.event.created }}
|
||||
EVENT_NAME: ${{ github.event_name }}
|
||||
PR_BASE_SHA: ${{ github.event.pull_request.base.sha }}
|
||||
SHA: ${{ github.sha }}
|
||||
run: |
|
||||
if [[ "${EVENT_NAME}" == "pull_request" ]]; then
|
||||
echo "base=${PR_BASE_SHA}" >> "$GITHUB_OUTPUT"
|
||||
elif [[ "${EVENT_CREATED}" == "true" ]]; then
|
||||
echo "base=${DEFAULT_BRANCH}" >> "$GITHUB_OUTPUT"
|
||||
if [[ "${{ github.event_name }}" == "pull_request" ]]; then
|
||||
echo "base=${{ github.event.pull_request.base.sha }}" >> "$GITHUB_OUTPUT"
|
||||
elif [[ "${{ github.event.created }}" == "true" ]]; then
|
||||
echo "base=${{ github.event.repository.default_branch }}" >> "$GITHUB_OUTPUT"
|
||||
else
|
||||
echo "base=${BEFORE_SHA}" >> "$GITHUB_OUTPUT"
|
||||
echo "base=${{ github.event.before }}" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
echo "ref=${SHA}" >> "$GITHUB_OUTPUT"
|
||||
echo "ref=${{ github.sha }}" >> "$GITHUB_OUTPUT"
|
||||
- name: Detect changes
|
||||
id: filter
|
||||
if: steps.force.outputs.run_all != 'true'
|
||||
@@ -100,7 +90,7 @@ jobs:
|
||||
with:
|
||||
python-version: "${{ matrix.python-version }}"
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@cec208311dfd045dd5311c1add060b2062131d57 # v8.0.0
|
||||
uses: astral-sh/setup-uv@5a095e7a2014a4212f075830d4f7277575a9d098 # v7.3.1
|
||||
with:
|
||||
version: ${{ env.DEFAULT_UV_VERSION }}
|
||||
enable-cache: true
|
||||
@@ -114,11 +104,9 @@ jobs:
|
||||
run: |
|
||||
sudo cp docker/rootfs/etc/ImageMagick-6/paperless-policy.xml /etc/ImageMagick-6/policy.xml
|
||||
- name: Install Python dependencies
|
||||
env:
|
||||
PYTHON_VERSION: ${{ steps.setup-python.outputs.python-version }}
|
||||
run: |
|
||||
uv sync \
|
||||
--python "${PYTHON_VERSION}" \
|
||||
--python ${{ steps.setup-python.outputs.python-version }} \
|
||||
--group testing \
|
||||
--frozen
|
||||
- name: List installed Python dependencies
|
||||
@@ -126,27 +114,26 @@ jobs:
|
||||
uv pip list
|
||||
- name: Install NLTK data
|
||||
run: |
|
||||
uv run python -m nltk.downloader punkt punkt_tab snowball_data stopwords -d "${NLTK_DATA}"
|
||||
uv run python -m nltk.downloader punkt punkt_tab snowball_data stopwords -d ${{ env.NLTK_DATA }}
|
||||
- name: Run tests
|
||||
env:
|
||||
NLTK_DATA: ${{ env.NLTK_DATA }}
|
||||
PAPERLESS_CI_TEST: 1
|
||||
PYTHON_VERSION: ${{ steps.setup-python.outputs.python-version }}
|
||||
run: |
|
||||
uv run \
|
||||
--python "${PYTHON_VERSION}" \
|
||||
--python ${{ steps.setup-python.outputs.python-version }} \
|
||||
--dev \
|
||||
--frozen \
|
||||
pytest
|
||||
- name: Upload test results to Codecov
|
||||
if: always()
|
||||
uses: codecov/codecov-action@57e3a136b779b570ffcdbf80b3bdc90e7fab3de2 # v6.0.0
|
||||
uses: codecov/codecov-action@671740ac38dd9b0130fbe1cec585b89eea48d3de # v5.5.2
|
||||
with:
|
||||
flags: backend-python-${{ matrix.python-version }}
|
||||
files: junit.xml
|
||||
report_type: test_results
|
||||
- name: Upload coverage to Codecov
|
||||
uses: codecov/codecov-action@57e3a136b779b570ffcdbf80b3bdc90e7fab3de2 # v6.0.0
|
||||
uses: codecov/codecov-action@671740ac38dd9b0130fbe1cec585b89eea48d3de # v5.5.2
|
||||
with:
|
||||
flags: backend-python-${{ matrix.python-version }}
|
||||
files: coverage.xml
|
||||
@@ -176,17 +163,15 @@ jobs:
|
||||
with:
|
||||
python-version: "${{ env.DEFAULT_PYTHON }}"
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@cec208311dfd045dd5311c1add060b2062131d57 # v8.0.0
|
||||
uses: astral-sh/setup-uv@5a095e7a2014a4212f075830d4f7277575a9d098 # v7.3.1
|
||||
with:
|
||||
version: ${{ env.DEFAULT_UV_VERSION }}
|
||||
enable-cache: true
|
||||
python-version: ${{ steps.setup-python.outputs.python-version }}
|
||||
- name: Install Python dependencies
|
||||
env:
|
||||
PYTHON_VERSION: ${{ steps.setup-python.outputs.python-version }}
|
||||
run: |
|
||||
uv sync \
|
||||
--python "${PYTHON_VERSION}" \
|
||||
--python ${{ steps.setup-python.outputs.python-version }} \
|
||||
--group testing \
|
||||
--group typing \
|
||||
--frozen
|
||||
@@ -222,23 +207,19 @@ jobs:
|
||||
runs-on: ubuntu-slim
|
||||
steps:
|
||||
- name: Check gate
|
||||
env:
|
||||
BACKEND_CHANGED: ${{ needs.changes.outputs.backend_changed }}
|
||||
TEST_RESULT: ${{ needs.test.result }}
|
||||
TYPING_RESULT: ${{ needs.typing.result }}
|
||||
run: |
|
||||
if [[ "${BACKEND_CHANGED}" != "true" ]]; then
|
||||
if [[ "${{ needs.changes.outputs.backend_changed }}" != "true" ]]; then
|
||||
echo "No backend-relevant changes detected."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if [[ "${TEST_RESULT}" != "success" ]]; then
|
||||
echo "::error::Backend test job result: ${TEST_RESULT}"
|
||||
if [[ "${{ needs.test.result }}" != "success" ]]; then
|
||||
echo "::error::Backend test job result: ${{ needs.test.result }}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ "${TYPING_RESULT}" != "success" ]]; then
|
||||
echo "::error::Backend typing job result: ${TYPING_RESULT}"
|
||||
if [[ "${{ needs.typing.result }}" != "success" ]]; then
|
||||
echo "::error::Backend typing job result: ${{ needs.typing.result }}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
||||
1
.github/workflows/ci-docker.yml
vendored
1
.github/workflows/ci-docker.yml
vendored
@@ -166,7 +166,6 @@ jobs:
|
||||
runs-on: ubuntu-24.04
|
||||
needs: build-arch
|
||||
if: needs.build-arch.outputs.should-push == 'true'
|
||||
environment: image-publishing
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
|
||||
2
.github/workflows/ci-docs.yml
vendored
2
.github/workflows/ci-docs.yml
vendored
@@ -78,7 +78,7 @@ jobs:
|
||||
with:
|
||||
python-version: ${{ env.DEFAULT_PYTHON_VERSION }}
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@cec208311dfd045dd5311c1add060b2062131d57 # v8.0.0
|
||||
uses: astral-sh/setup-uv@5a095e7a2014a4212f075830d4f7277575a9d098 # v7.3.1
|
||||
with:
|
||||
version: ${{ env.DEFAULT_UV_VERSION }}
|
||||
enable-cache: true
|
||||
|
||||
62
.github/workflows/ci-frontend.yml
vendored
62
.github/workflows/ci-frontend.yml
vendored
@@ -27,13 +27,10 @@ jobs:
|
||||
persist-credentials: false
|
||||
- name: Decide run mode
|
||||
id: force
|
||||
env:
|
||||
EVENT_NAME: ${{ github.event_name }}
|
||||
REF_NAME: ${{ github.ref_name }}
|
||||
run: |
|
||||
if [[ "${EVENT_NAME}" == "workflow_dispatch" ]]; then
|
||||
if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then
|
||||
echo "run_all=true" >> "$GITHUB_OUTPUT"
|
||||
elif [[ "${EVENT_NAME}" == "push" && ( "${REF_NAME}" == "main" || "${REF_NAME}" == "dev" ) ]]; then
|
||||
elif [[ "${{ github.event_name }}" == "push" && ( "${{ github.ref_name }}" == "main" || "${{ github.ref_name }}" == "dev" ) ]]; then
|
||||
echo "run_all=true" >> "$GITHUB_OUTPUT"
|
||||
else
|
||||
echo "run_all=false" >> "$GITHUB_OUTPUT"
|
||||
@@ -41,22 +38,15 @@ jobs:
|
||||
- name: Set diff range
|
||||
id: range
|
||||
if: steps.force.outputs.run_all != 'true'
|
||||
env:
|
||||
BEFORE_SHA: ${{ github.event.before }}
|
||||
DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}
|
||||
EVENT_CREATED: ${{ github.event.created }}
|
||||
EVENT_NAME: ${{ github.event_name }}
|
||||
PR_BASE_SHA: ${{ github.event.pull_request.base.sha }}
|
||||
SHA: ${{ github.sha }}
|
||||
run: |
|
||||
if [[ "${EVENT_NAME}" == "pull_request" ]]; then
|
||||
echo "base=${PR_BASE_SHA}" >> "$GITHUB_OUTPUT"
|
||||
elif [[ "${EVENT_CREATED}" == "true" ]]; then
|
||||
echo "base=${DEFAULT_BRANCH}" >> "$GITHUB_OUTPUT"
|
||||
if [[ "${{ github.event_name }}" == "pull_request" ]]; then
|
||||
echo "base=${{ github.event.pull_request.base.sha }}" >> "$GITHUB_OUTPUT"
|
||||
elif [[ "${{ github.event.created }}" == "true" ]]; then
|
||||
echo "base=${{ github.event.repository.default_branch }}" >> "$GITHUB_OUTPUT"
|
||||
else
|
||||
echo "base=${BEFORE_SHA}" >> "$GITHUB_OUTPUT"
|
||||
echo "base=${{ github.event.before }}" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
echo "ref=${SHA}" >> "$GITHUB_OUTPUT"
|
||||
echo "ref=${{ github.sha }}" >> "$GITHUB_OUTPUT"
|
||||
- name: Detect changes
|
||||
id: filter
|
||||
if: steps.force.outputs.run_all != 'true'
|
||||
@@ -174,13 +164,13 @@ jobs:
|
||||
run: cd src-ui && pnpm run test --max-workers=2 --shard=${{ matrix.shard-index }}/${{ matrix.shard-count }}
|
||||
- name: Upload test results to Codecov
|
||||
if: always()
|
||||
uses: codecov/codecov-action@57e3a136b779b570ffcdbf80b3bdc90e7fab3de2 # v6.0.0
|
||||
uses: codecov/codecov-action@671740ac38dd9b0130fbe1cec585b89eea48d3de # v5.5.2
|
||||
with:
|
||||
flags: frontend-node-${{ matrix.node-version }}
|
||||
directory: src-ui/
|
||||
report_type: test_results
|
||||
- name: Upload coverage to Codecov
|
||||
uses: codecov/codecov-action@57e3a136b779b570ffcdbf80b3bdc90e7fab3de2 # v6.0.0
|
||||
uses: codecov/codecov-action@671740ac38dd9b0130fbe1cec585b89eea48d3de # v5.5.2
|
||||
with:
|
||||
flags: frontend-node-${{ matrix.node-version }}
|
||||
directory: src-ui/coverage/
|
||||
@@ -191,7 +181,7 @@ jobs:
|
||||
runs-on: ubuntu-24.04
|
||||
permissions:
|
||||
contents: read
|
||||
container: mcr.microsoft.com/playwright:v1.59.0-noble
|
||||
container: mcr.microsoft.com/playwright:v1.58.2-noble
|
||||
env:
|
||||
PLAYWRIGHT_BROWSERS_PATH: /ms-playwright
|
||||
PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD: 1
|
||||
@@ -234,7 +224,6 @@ jobs:
|
||||
needs: [changes, unit-tests, e2e-tests]
|
||||
if: needs.changes.outputs.frontend_changed == 'true'
|
||||
runs-on: ubuntu-24.04
|
||||
environment: bundle-analysis
|
||||
permissions:
|
||||
contents: read
|
||||
steps:
|
||||
@@ -273,41 +262,34 @@ jobs:
|
||||
runs-on: ubuntu-slim
|
||||
steps:
|
||||
- name: Check gate
|
||||
env:
|
||||
BUNDLE_ANALYSIS_RESULT: ${{ needs['bundle-analysis'].result }}
|
||||
E2E_RESULT: ${{ needs['e2e-tests'].result }}
|
||||
FRONTEND_CHANGED: ${{ needs.changes.outputs.frontend_changed }}
|
||||
INSTALL_RESULT: ${{ needs['install-dependencies'].result }}
|
||||
LINT_RESULT: ${{ needs.lint.result }}
|
||||
UNIT_RESULT: ${{ needs['unit-tests'].result }}
|
||||
run: |
|
||||
if [[ "${FRONTEND_CHANGED}" != "true" ]]; then
|
||||
if [[ "${{ needs.changes.outputs.frontend_changed }}" != "true" ]]; then
|
||||
echo "No frontend-relevant changes detected."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if [[ "${INSTALL_RESULT}" != "success" ]]; then
|
||||
echo "::error::Frontend install job result: ${INSTALL_RESULT}"
|
||||
if [[ "${{ needs['install-dependencies'].result }}" != "success" ]]; then
|
||||
echo "::error::Frontend install job result: ${{ needs['install-dependencies'].result }}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ "${LINT_RESULT}" != "success" ]]; then
|
||||
echo "::error::Frontend lint job result: ${LINT_RESULT}"
|
||||
if [[ "${{ needs.lint.result }}" != "success" ]]; then
|
||||
echo "::error::Frontend lint job result: ${{ needs.lint.result }}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ "${UNIT_RESULT}" != "success" ]]; then
|
||||
echo "::error::Frontend unit-tests job result: ${UNIT_RESULT}"
|
||||
if [[ "${{ needs['unit-tests'].result }}" != "success" ]]; then
|
||||
echo "::error::Frontend unit-tests job result: ${{ needs['unit-tests'].result }}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ "${E2E_RESULT}" != "success" ]]; then
|
||||
echo "::error::Frontend e2e-tests job result: ${E2E_RESULT}"
|
||||
if [[ "${{ needs['e2e-tests'].result }}" != "success" ]]; then
|
||||
echo "::error::Frontend e2e-tests job result: ${{ needs['e2e-tests'].result }}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ "${BUNDLE_ANALYSIS_RESULT}" != "success" ]]; then
|
||||
echo "::error::Frontend bundle-analysis job result: ${BUNDLE_ANALYSIS_RESULT}"
|
||||
if [[ "${{ needs['bundle-analysis'].result }}" != "success" ]]; then
|
||||
echo "::error::Frontend bundle-analysis job result: ${{ needs['bundle-analysis'].result }}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
||||
42
.github/workflows/ci-release.yml
vendored
42
.github/workflows/ci-release.yml
vendored
@@ -58,27 +58,23 @@ jobs:
|
||||
with:
|
||||
python-version: ${{ env.DEFAULT_PYTHON_VERSION }}
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@cec208311dfd045dd5311c1add060b2062131d57 # v8.0.0
|
||||
uses: astral-sh/setup-uv@5a095e7a2014a4212f075830d4f7277575a9d098 # v7.3.1
|
||||
with:
|
||||
version: ${{ env.DEFAULT_UV_VERSION }}
|
||||
enable-cache: false
|
||||
python-version: ${{ steps.setup-python.outputs.python-version }}
|
||||
- name: Install Python dependencies
|
||||
env:
|
||||
PYTHON_VERSION: ${{ steps.setup-python.outputs.python-version }}
|
||||
run: |
|
||||
uv sync --python "${PYTHON_VERSION}" --dev --frozen
|
||||
uv sync --python ${{ steps.setup-python.outputs.python-version }} --dev --frozen
|
||||
- name: Install system dependencies
|
||||
run: |
|
||||
sudo apt-get update -qq
|
||||
sudo apt-get install -qq --no-install-recommends gettext liblept5
|
||||
# ---- Build Documentation ----
|
||||
- name: Build documentation
|
||||
env:
|
||||
PYTHON_VERSION: ${{ steps.setup-python.outputs.python-version }}
|
||||
run: |
|
||||
uv run \
|
||||
--python "${PYTHON_VERSION}" \
|
||||
--python ${{ steps.setup-python.outputs.python-version }} \
|
||||
--dev \
|
||||
--frozen \
|
||||
zensical build --clean
|
||||
@@ -87,20 +83,16 @@ jobs:
|
||||
run: |
|
||||
uv export --quiet --no-dev --all-extras --format requirements-txt --output-file requirements.txt
|
||||
- name: Compile messages
|
||||
env:
|
||||
PYTHON_VERSION: ${{ steps.setup-python.outputs.python-version }}
|
||||
run: |
|
||||
cd src/
|
||||
uv run \
|
||||
--python "${PYTHON_VERSION}" \
|
||||
--python ${{ steps.setup-python.outputs.python-version }} \
|
||||
manage.py compilemessages
|
||||
- name: Collect static files
|
||||
env:
|
||||
PYTHON_VERSION: ${{ steps.setup-python.outputs.python-version }}
|
||||
run: |
|
||||
cd src/
|
||||
uv run \
|
||||
--python "${PYTHON_VERSION}" \
|
||||
--python ${{ steps.setup-python.outputs.python-version }} \
|
||||
manage.py collectstatic --no-input --clear
|
||||
- name: Assemble release package
|
||||
run: |
|
||||
@@ -209,7 +201,7 @@ jobs:
|
||||
with:
|
||||
python-version: ${{ env.DEFAULT_PYTHON_VERSION }}
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@cec208311dfd045dd5311c1add060b2062131d57 # v8.0.0
|
||||
uses: astral-sh/setup-uv@5a095e7a2014a4212f075830d4f7277575a9d098 # v7.3.1
|
||||
with:
|
||||
version: ${{ env.DEFAULT_UV_VERSION }}
|
||||
enable-cache: false
|
||||
@@ -218,13 +210,9 @@ jobs:
|
||||
working-directory: docs
|
||||
env:
|
||||
CHANGELOG: ${{ needs.publish-release.outputs.changelog }}
|
||||
PYTHON_VERSION: ${{ steps.setup-python.outputs.python-version }}
|
||||
VERSION: ${{ needs.publish-release.outputs.version }}
|
||||
run: |
|
||||
branch_name="${VERSION}-changelog"
|
||||
|
||||
git branch "${branch_name}"
|
||||
git checkout "${branch_name}"
|
||||
git branch ${{ needs.publish-release.outputs.version }}-changelog
|
||||
git checkout ${{ needs.publish-release.outputs.version }}-changelog
|
||||
|
||||
printf '# Changelog\n\n%s\n' "${CHANGELOG}" > changelog-new.md
|
||||
|
||||
@@ -239,28 +227,24 @@ jobs:
|
||||
mv changelog-new.md changelog.md
|
||||
|
||||
uv run \
|
||||
--python "${PYTHON_VERSION}" \
|
||||
--python ${{ steps.setup-python.outputs.python-version }} \
|
||||
--dev \
|
||||
prek run --files changelog.md || true
|
||||
|
||||
git config --global user.name "github-actions"
|
||||
git config --global user.email "41898282+github-actions[bot]@users.noreply.github.com"
|
||||
git commit -am "Changelog ${VERSION} - GHA"
|
||||
git push origin "${branch_name}"
|
||||
git commit -am "Changelog ${{ needs.publish-release.outputs.version }} - GHA"
|
||||
git push origin ${{ needs.publish-release.outputs.version }}-changelog
|
||||
- name: Create pull request
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
|
||||
env:
|
||||
VERSION: ${{ needs.publish-release.outputs.version }}
|
||||
with:
|
||||
script: |
|
||||
const { repo, owner } = context.repo;
|
||||
const version = process.env.VERSION;
|
||||
const head = `${version}-changelog`;
|
||||
const result = await github.rest.pulls.create({
|
||||
title: `Documentation: Add ${version} changelog`,
|
||||
title: 'Documentation: Add ${{ needs.publish-release.outputs.version }} changelog',
|
||||
owner,
|
||||
repo,
|
||||
head,
|
||||
head: '${{ needs.publish-release.outputs.version }}-changelog',
|
||||
base: 'main',
|
||||
body: 'This PR is auto-generated by CI.'
|
||||
});
|
||||
|
||||
2
.github/workflows/cleanup-tags.yml
vendored
2
.github/workflows/cleanup-tags.yml
vendored
@@ -18,7 +18,6 @@ jobs:
|
||||
name: Cleanup Image Tags for ${{ matrix.primary-name }}
|
||||
if: github.repository_owner == 'paperless-ngx'
|
||||
runs-on: ubuntu-24.04
|
||||
environment: registry-maintenance
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
@@ -45,7 +44,6 @@ jobs:
|
||||
runs-on: ubuntu-24.04
|
||||
needs:
|
||||
- cleanup-images
|
||||
environment: registry-maintenance
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
|
||||
4
.github/workflows/codeql-analysis.yml
vendored
4
.github/workflows/codeql-analysis.yml
vendored
@@ -39,7 +39,7 @@ jobs:
|
||||
persist-credentials: false
|
||||
# Initializes the CodeQL tools for scanning.
|
||||
- name: Initialize CodeQL
|
||||
uses: github/codeql-action/init@c10b8064de6f491fea524254123dbe5e09572f13 # v4.35.1
|
||||
uses: github/codeql-action/init@c793b717bc78562f491db7b0e93a3a178b099162 # v4.32.5
|
||||
with:
|
||||
languages: ${{ matrix.language }}
|
||||
# If you wish to specify custom queries, you can do so here or in a config file.
|
||||
@@ -47,4 +47,4 @@ jobs:
|
||||
# Prefix the list here with "+" to use these queries and those in the config file.
|
||||
# queries: ./path/to/local/query, your-org/your-repo/queries@main
|
||||
- name: Perform CodeQL Analysis
|
||||
uses: github/codeql-action/analyze@c10b8064de6f491fea524254123dbe5e09572f13 # v4.35.1
|
||||
uses: github/codeql-action/analyze@c793b717bc78562f491db7b0e93a3a178b099162 # v4.32.5
|
||||
|
||||
3
.github/workflows/crowdin.yml
vendored
3
.github/workflows/crowdin.yml
vendored
@@ -14,7 +14,6 @@ jobs:
|
||||
name: Crowdin Sync
|
||||
if: github.repository_owner == 'paperless-ngx'
|
||||
runs-on: ubuntu-24.04
|
||||
environment: translation-sync
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
@@ -22,7 +21,7 @@ jobs:
|
||||
token: ${{ secrets.PNGX_BOT_PAT }}
|
||||
persist-credentials: false
|
||||
- name: crowdin action
|
||||
uses: crowdin/github-action@7ca9c452bfe9197d3bb7fa83a4d7e2b0c9ae835d # v2.16.0
|
||||
uses: crowdin/github-action@8818ff65bfc4322384f983ea37e3926948c11745 # v2.15.0
|
||||
with:
|
||||
upload_translations: false
|
||||
download_translations: true
|
||||
|
||||
3
.github/workflows/translate-strings.yml
vendored
3
.github/workflows/translate-strings.yml
vendored
@@ -7,7 +7,6 @@ jobs:
|
||||
generate-translate-strings:
|
||||
name: Generate Translation Strings
|
||||
runs-on: ubuntu-latest
|
||||
environment: translation-sync
|
||||
permissions:
|
||||
contents: write
|
||||
steps:
|
||||
@@ -27,7 +26,7 @@ jobs:
|
||||
sudo apt-get update -qq
|
||||
sudo apt-get install -qq --no-install-recommends gettext
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@cec208311dfd045dd5311c1add060b2062131d57 # v8.0.0
|
||||
uses: astral-sh/setup-uv@5a095e7a2014a4212f075830d4f7277575a9d098 # v7.3.1
|
||||
with:
|
||||
enable-cache: true
|
||||
- name: Install backend python dependencies
|
||||
|
||||
32
.github/zizmor.yml
vendored
32
.github/zizmor.yml
vendored
@@ -3,22 +3,54 @@ rules:
|
||||
ignore:
|
||||
# github.event_name is a GitHub-internal constant (push/pull_request/etc.),
|
||||
# not attacker-controllable.
|
||||
- ci-backend.yml:35
|
||||
- ci-docker.yml:74
|
||||
- ci-docs.yml:33
|
||||
- ci-frontend.yml:32
|
||||
# github.event.repository.default_branch refers to the target repo's setting,
|
||||
# which only admins can change; not influenced by fork PR authors.
|
||||
- ci-backend.yml:47
|
||||
- ci-docs.yml:45
|
||||
- ci-frontend.yml:44
|
||||
# steps.setup-python.outputs.python-version is always a semver string (e.g. "3.12.0")
|
||||
# produced by actions/setup-python from a hardcoded env var input.
|
||||
- ci-backend.yml:106
|
||||
- ci-backend.yml:121
|
||||
- ci-backend.yml:169
|
||||
- ci-docs.yml:88
|
||||
- ci-docs.yml:92
|
||||
- ci-release.yml:69
|
||||
- ci-release.yml:78
|
||||
- ci-release.yml:90
|
||||
- ci-release.yml:96
|
||||
- ci-release.yml:229
|
||||
# needs.*.result is always one of: success/failure/cancelled/skipped.
|
||||
- ci-backend.yml:211
|
||||
- ci-backend.yml:212
|
||||
- ci-backend.yml:216
|
||||
- ci-docs.yml:131
|
||||
- ci-docs.yml:132
|
||||
- ci-frontend.yml:259
|
||||
- ci-frontend.yml:260
|
||||
- ci-frontend.yml:264
|
||||
- ci-frontend.yml:269
|
||||
- ci-frontend.yml:274
|
||||
- ci-frontend.yml:279
|
||||
# needs.changes.outputs.* is always "true" or "false".
|
||||
- ci-backend.yml:206
|
||||
- ci-docs.yml:126
|
||||
- ci-frontend.yml:254
|
||||
# steps.build.outputs.digest is always a SHA256 digest (sha256:[a-f0-9]{64}).
|
||||
- ci-docker.yml:152
|
||||
# needs.publish-release.outputs.version is the git tag name (e.g. v2.14.0);
|
||||
# only maintainers can push tags upstream, and the tag pattern excludes
|
||||
# shell metacharacters. Used in git commands and github-script JS, not eval.
|
||||
- ci-release.yml:215
|
||||
- ci-release.yml:216
|
||||
- ci-release.yml:231
|
||||
- ci-release.yml:237
|
||||
- ci-release.yml:245
|
||||
- ci-release.yml:248
|
||||
dangerous-triggers:
|
||||
ignore:
|
||||
# Both workflows use pull_request_target solely to label/comment on fork PRs
|
||||
|
||||
@@ -398,27 +398,25 @@ Global permissions define what areas of the app and API endpoints users can acce
|
||||
determine if a user can create, edit, delete or view _any_ documents, but individual documents themselves
|
||||
still have "object-level" permissions.
|
||||
|
||||
| Type | Details |
|
||||
| ---------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| AppConfig | _Change_ or higher permissions grants access to the "Application Configuration" area. |
|
||||
| Correspondent | Add, edit, delete or view Correspondents. |
|
||||
| CustomField | Add, edit, delete or view Custom Fields. |
|
||||
| Document | Add, edit, delete or view Documents. |
|
||||
| DocumentType | Add, edit, delete or view Document Types. |
|
||||
| Group | Add, edit, delete or view Groups. |
|
||||
| GlobalStatistics | View aggregate object counts and statistics. This does not grant access to view individual documents. |
|
||||
| MailAccount | Add, edit, delete or view Mail Accounts. |
|
||||
| MailRule | Add, edit, delete or view Mail Rules. |
|
||||
| Note | Add, edit, delete or view Notes. |
|
||||
| PaperlessTask | View or dismiss (_Change_) File Tasks. |
|
||||
| SavedView | Add, edit, delete or view Saved Views. |
|
||||
| ShareLink | Add, delete or view Share Links. |
|
||||
| StoragePath | Add, edit, delete or view Storage Paths. |
|
||||
| SystemStatus | View the system status dialog and corresponding API endpoint. Admin users also retain system status access. |
|
||||
| Tag | Add, edit, delete or view Tags. |
|
||||
| UISettings | Add, edit, delete or view the UI settings that are used by the web app.<br/>:warning: **Users that will access the web UI must be granted at least _View_ permissions.** |
|
||||
| User | Add, edit, delete or view Users. |
|
||||
| Workflow | Add, edit, delete or view Workflows.<br/>Note that Workflows are global; all users who can access workflows see the same set. Workflows have other permission implications — see [Workflow permissions](#workflow-permissions). |
|
||||
| Type | Details |
|
||||
| ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| AppConfig | _Change_ or higher permissions grants access to the "Application Configuration" area. |
|
||||
| Correspondent | Add, edit, delete or view Correspondents. |
|
||||
| CustomField | Add, edit, delete or view Custom Fields. |
|
||||
| Document | Add, edit, delete or view Documents. |
|
||||
| DocumentType | Add, edit, delete or view Document Types. |
|
||||
| Group | Add, edit, delete or view Groups. |
|
||||
| MailAccount | Add, edit, delete or view Mail Accounts. |
|
||||
| MailRule | Add, edit, delete or view Mail Rules. |
|
||||
| Note | Add, edit, delete or view Notes. |
|
||||
| PaperlessTask | View or dismiss (_Change_) File Tasks. |
|
||||
| SavedView | Add, edit, delete or view Saved Views. |
|
||||
| ShareLink | Add, delete or view Share Links. |
|
||||
| StoragePath | Add, edit, delete or view Storage Paths. |
|
||||
| Tag | Add, edit, delete or view Tags. |
|
||||
| UISettings | Add, edit, delete or view the UI settings that are used by the web app.<br/>:warning: **Users that will access the web UI must be granted at least _View_ permissions.** |
|
||||
| User | Add, edit, delete or view Users. |
|
||||
| Workflow | Add, edit, delete or view Workflows.<br/>Note that Workflows are global; all users who can access workflows see the same set. Workflows have other permission implications — see [Workflow permissions](#workflow-permissions). |
|
||||
|
||||
#### Detailed Explanation of Object Permissions {#object-permissions}
|
||||
|
||||
|
||||
@@ -41,7 +41,7 @@ dependencies = [
|
||||
"djangorestframework~=3.16",
|
||||
"djangorestframework-guardian~=0.4.0",
|
||||
"drf-spectacular~=0.28",
|
||||
"drf-spectacular-sidecar~=2026.4.1",
|
||||
"drf-spectacular-sidecar~=2026.3.1",
|
||||
"drf-writable-nested~=0.7.1",
|
||||
"faiss-cpu>=1.10",
|
||||
"filelock~=3.25.2",
|
||||
@@ -76,7 +76,7 @@ dependencies = [
|
||||
"setproctitle~=1.3.4",
|
||||
"tantivy>=0.25.1",
|
||||
"tika-client~=0.11.0",
|
||||
"torch~=2.11.0",
|
||||
"torch~=2.10.0",
|
||||
"watchfiles>=1.1.1",
|
||||
"whitenoise~=6.11",
|
||||
"zxing-cpp~=3.0.0",
|
||||
@@ -111,12 +111,12 @@ lint = [
|
||||
testing = [
|
||||
"daphne",
|
||||
"factory-boy~=3.3.1",
|
||||
"faker~=40.12.0",
|
||||
"faker~=40.8.0",
|
||||
"imagehash",
|
||||
"pytest~=9.0.0",
|
||||
"pytest-cov~=7.1.0",
|
||||
"pytest-cov~=7.0.0",
|
||||
"pytest-django~=4.12.0",
|
||||
"pytest-env~=1.6.0",
|
||||
"pytest-env~=1.5.0",
|
||||
"pytest-httpx",
|
||||
"pytest-mock~=3.15.1",
|
||||
# "pytest-randomly~=4.0.1",
|
||||
|
||||
@@ -316,11 +316,11 @@
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">195</context>
|
||||
<context context-type="linenumber">193</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">199</context>
|
||||
<context context-type="linenumber">197</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/app-frame/app-frame.component.html</context>
|
||||
@@ -518,7 +518,7 @@
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">138</context>
|
||||
<context context-type="linenumber">136</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="2180291763949669799" datatype="html">
|
||||
@@ -540,7 +540,7 @@
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">401</context>
|
||||
<context context-type="linenumber">399</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/confirm-dialog/confirm-dialog.component.ts</context>
|
||||
@@ -615,7 +615,7 @@
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">402</context>
|
||||
<context context-type="linenumber">400</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/edit-dialog/correspondent-edit-dialog/correspondent-edit-dialog.component.html</context>
|
||||
@@ -922,126 +922,126 @@
|
||||
<source>Open Django Admin</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">32</context>
|
||||
<context context-type="linenumber">30</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="6439365426343089851" datatype="html">
|
||||
<source>General</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">42</context>
|
||||
<context context-type="linenumber">40</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="8671234314555525900" datatype="html">
|
||||
<source>Appearance</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">46</context>
|
||||
<context context-type="linenumber">44</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="3777637051272512093" datatype="html">
|
||||
<source>Display language</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">49</context>
|
||||
<context context-type="linenumber">47</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="53523152145406584" datatype="html">
|
||||
<source>You need to reload the page after applying a new language.</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">62</context>
|
||||
<context context-type="linenumber">60</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="3766032098416558788" datatype="html">
|
||||
<source>Date display</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">70</context>
|
||||
<context context-type="linenumber">68</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="3733378544613473393" datatype="html">
|
||||
<source>Date format</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">87</context>
|
||||
<context context-type="linenumber">85</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="3407788781115661841" datatype="html">
|
||||
<source>Short: <x id="INTERPOLATION" equiv-text="{{today | customDate:'shortDate':null:computedDateLocale}}"/></source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">93,94</context>
|
||||
<context context-type="linenumber">91,92</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="6290748171049664628" datatype="html">
|
||||
<source>Medium: <x id="INTERPOLATION" equiv-text="{{today | customDate:'mediumDate':null:computedDateLocale}}"/></source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">97,98</context>
|
||||
<context context-type="linenumber">95,96</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="7189855711197998347" datatype="html">
|
||||
<source>Long: <x id="INTERPOLATION" equiv-text="{{today | customDate:'longDate':null:computedDateLocale}}"/></source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">101,102</context>
|
||||
<context context-type="linenumber">99,100</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="3982403428275430291" datatype="html">
|
||||
<source>Sidebar</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">109</context>
|
||||
<context context-type="linenumber">107</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="4608457133854405683" datatype="html">
|
||||
<source>Use 'slim' sidebar (icons only)</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">113</context>
|
||||
<context context-type="linenumber">111</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="1356890996281769972" datatype="html">
|
||||
<source>Dark mode</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">120</context>
|
||||
<context context-type="linenumber">118</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="4913823100518391922" datatype="html">
|
||||
<source>Use system settings</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">123</context>
|
||||
<context context-type="linenumber">121</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="5782828784040423650" datatype="html">
|
||||
<source>Enable dark mode</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">124</context>
|
||||
<context context-type="linenumber">122</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="6336642923114460405" datatype="html">
|
||||
<source>Invert thumbnails in dark mode</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">125</context>
|
||||
<context context-type="linenumber">123</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="7983234071833154796" datatype="html">
|
||||
<source>Theme Color</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">131</context>
|
||||
<context context-type="linenumber">129</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="6760166989231109310" datatype="html">
|
||||
<source>Global search</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">144</context>
|
||||
<context context-type="linenumber">142</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/app-frame/global-search/global-search.component.ts</context>
|
||||
@@ -1052,28 +1052,28 @@
|
||||
<source>Do not include advanced search results</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">147</context>
|
||||
<context context-type="linenumber">145</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="3969258421469113318" datatype="html">
|
||||
<source>Full search links to</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">153</context>
|
||||
<context context-type="linenumber">151</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="6631288852577115923" datatype="html">
|
||||
<source>Title and content search</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">157</context>
|
||||
<context context-type="linenumber">155</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="1010505078885609376" datatype="html">
|
||||
<source>Advanced search</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">158</context>
|
||||
<context context-type="linenumber">156</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/app-frame/global-search/global-search.component.html</context>
|
||||
@@ -1088,21 +1088,21 @@
|
||||
<source>Update checking</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">163</context>
|
||||
<context context-type="linenumber">161</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="5070799004079086984" datatype="html">
|
||||
<source>Enable update checking</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">166</context>
|
||||
<context context-type="linenumber">164</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="5752465522295465624" datatype="html">
|
||||
<source>What's this?</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">167</context>
|
||||
<context context-type="linenumber">165</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/page-header/page-header.component.html</context>
|
||||
@@ -1121,21 +1121,21 @@
|
||||
<source> Update checking works by pinging the public GitHub API for the latest release to determine whether a new version is available. Actual updating of the app must still be performed manually. </source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">171,173</context>
|
||||
<context context-type="linenumber">169,171</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="8416061320800650487" datatype="html">
|
||||
<source>No tracking data is collected by the app in any way.</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">175</context>
|
||||
<context context-type="linenumber">173</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="5775451530782446954" datatype="html">
|
||||
<source>Saved Views</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">181</context>
|
||||
<context context-type="linenumber">179</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/app-frame/app-frame.component.html</context>
|
||||
@@ -1154,126 +1154,126 @@
|
||||
<source>Show warning when closing saved views with unsaved changes</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">184</context>
|
||||
<context context-type="linenumber">182</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="4975481913502931184" datatype="html">
|
||||
<source>Show document counts in sidebar saved views</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">185</context>
|
||||
<context context-type="linenumber">183</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="8939587804990976924" datatype="html">
|
||||
<source>Items per page</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">202</context>
|
||||
<context context-type="linenumber">200</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="908152367861642592" datatype="html">
|
||||
<source>Document editing</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">214</context>
|
||||
<context context-type="linenumber">212</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="6708098108196142028" datatype="html">
|
||||
<source>Use PDF viewer provided by the browser</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">217</context>
|
||||
<context context-type="linenumber">215</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="9003921625412907981" datatype="html">
|
||||
<source>This is usually faster for displaying large PDF documents, but it might not work on some browsers.</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">217</context>
|
||||
<context context-type="linenumber">215</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="2678648946508279627" datatype="html">
|
||||
<source>Default zoom</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">223</context>
|
||||
<context context-type="linenumber">221</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="2222784219255971268" datatype="html">
|
||||
<source>Fit width</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">227</context>
|
||||
<context context-type="linenumber">225</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="8409221133589393872" datatype="html">
|
||||
<source>Fit page</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">228</context>
|
||||
<context context-type="linenumber">226</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="7019985100624067992" datatype="html">
|
||||
<source>Only applies to the Paperless-ngx PDF viewer.</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">230</context>
|
||||
<context context-type="linenumber">228</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="2959590948110714366" datatype="html">
|
||||
<source>Automatically remove inbox tag(s) on save</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">236</context>
|
||||
<context context-type="linenumber">234</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="8793267604636304297" datatype="html">
|
||||
<source>Show document thumbnail during loading</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">242</context>
|
||||
<context context-type="linenumber">240</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="1783600598811723080" datatype="html">
|
||||
<source>Built-in fields to show:</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">248</context>
|
||||
<context context-type="linenumber">246</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="3467966318201103991" datatype="html">
|
||||
<source>Uncheck fields to hide them on the document details page.</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">260</context>
|
||||
<context context-type="linenumber">258</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="8508424367627989968" datatype="html">
|
||||
<source>Bulk editing</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">266</context>
|
||||
<context context-type="linenumber">264</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="8158899674926420054" datatype="html">
|
||||
<source>Show confirmation dialogs</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">269</context>
|
||||
<context context-type="linenumber">267</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="290238406234356122" datatype="html">
|
||||
<source>Apply on close</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">270</context>
|
||||
<context context-type="linenumber">268</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="5084275925647254161" datatype="html">
|
||||
<source>PDF Editor</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">274</context>
|
||||
<context context-type="linenumber">272</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.html</context>
|
||||
@@ -1288,14 +1288,14 @@
|
||||
<source>Default editing mode</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">277</context>
|
||||
<context context-type="linenumber">275</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="7273640930165035289" datatype="html">
|
||||
<source>Create new document(s)</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">281</context>
|
||||
<context context-type="linenumber">279</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/pdf-editor/pdf-editor.component.html</context>
|
||||
@@ -1306,7 +1306,7 @@
|
||||
<source>Add document version</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">282</context>
|
||||
<context context-type="linenumber">280</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/pdf-editor/pdf-editor.component.html</context>
|
||||
@@ -1317,7 +1317,7 @@
|
||||
<source>Notes</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">287</context>
|
||||
<context context-type="linenumber">285</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/document-list.component.html</context>
|
||||
@@ -1336,14 +1336,14 @@
|
||||
<source>Enable notes</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">290</context>
|
||||
<context context-type="linenumber">288</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="7314814725704332646" datatype="html">
|
||||
<source>Permissions</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">299</context>
|
||||
<context context-type="linenumber">297</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/edit-dialog/group-edit-dialog/group-edit-dialog.component.html</context>
|
||||
@@ -1394,28 +1394,28 @@
|
||||
<source>Default Permissions</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">302</context>
|
||||
<context context-type="linenumber">300</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="6544153565064275581" datatype="html">
|
||||
<source> Settings apply to this user account for objects (Tags, Mail Rules, etc. but not documents) created via the web UI. </source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">306,308</context>
|
||||
<context context-type="linenumber">304,306</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="4292903881380648974" datatype="html">
|
||||
<source>Default Owner</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">313</context>
|
||||
<context context-type="linenumber">311</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="734147282056744882" datatype="html">
|
||||
<source>Objects without an owner can be viewed and edited by all users</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">317</context>
|
||||
<context context-type="linenumber">315</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/input/permissions/permissions-form/permissions-form.component.html</context>
|
||||
@@ -1426,18 +1426,18 @@
|
||||
<source>Default View Permissions</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">322</context>
|
||||
<context context-type="linenumber">320</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="2191775412581217688" datatype="html">
|
||||
<source>Users:</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">327</context>
|
||||
<context context-type="linenumber">325</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">354</context>
|
||||
<context context-type="linenumber">352</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/edit-dialog/workflow-edit-dialog/workflow-edit-dialog.component.html</context>
|
||||
@@ -1468,11 +1468,11 @@
|
||||
<source>Groups:</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">337</context>
|
||||
<context context-type="linenumber">335</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">364</context>
|
||||
<context context-type="linenumber">362</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/edit-dialog/workflow-edit-dialog/workflow-edit-dialog.component.html</context>
|
||||
@@ -1503,14 +1503,14 @@
|
||||
<source>Default Edit Permissions</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">349</context>
|
||||
<context context-type="linenumber">347</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="3728984448750213892" datatype="html">
|
||||
<source>Edit permissions also grant viewing permissions</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">373</context>
|
||||
<context context-type="linenumber">371</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/edit-dialog/workflow-edit-dialog/workflow-edit-dialog.component.html</context>
|
||||
@@ -1529,7 +1529,7 @@
|
||||
<source>Notifications</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">381</context>
|
||||
<context context-type="linenumber">379</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/app-frame/toasts-dropdown/toasts-dropdown.component.html</context>
|
||||
@@ -1540,42 +1540,42 @@
|
||||
<source>Document processing</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">384</context>
|
||||
<context context-type="linenumber">382</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="3656786776644872398" datatype="html">
|
||||
<source>Show notifications when new documents are detected</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">388</context>
|
||||
<context context-type="linenumber">386</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="6057053428592387613" datatype="html">
|
||||
<source>Show notifications when document processing completes successfully</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">389</context>
|
||||
<context context-type="linenumber">387</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="370315664367425513" datatype="html">
|
||||
<source>Show notifications when document processing fails</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">390</context>
|
||||
<context context-type="linenumber">388</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="6838309441164918531" datatype="html">
|
||||
<source>Suppress notifications on dashboard</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">391</context>
|
||||
<context context-type="linenumber">389</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="2741919327232918179" datatype="html">
|
||||
<source>This will suppress all messages about document processing status on the dashboard.</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">391</context>
|
||||
<context context-type="linenumber">389</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="6839066544204061364" datatype="html">
|
||||
@@ -4800,8 +4800,8 @@
|
||||
<context context-type="linenumber">26</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="5409927574404161431" datatype="html">
|
||||
<source>Access system status, logs, Django backend</source>
|
||||
<trans-unit id="8563400529811056364" datatype="html">
|
||||
<source>Access logs, Django backend</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/edit-dialog/user-edit-dialog/user-edit-dialog.component.html</context>
|
||||
<context context-type="linenumber">26</context>
|
||||
@@ -4814,8 +4814,8 @@
|
||||
<context context-type="linenumber">30</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="5622335314381948156" datatype="html">
|
||||
<source>Grants all permissions and can view all objects</source>
|
||||
<trans-unit id="1403759966357927756" datatype="html">
|
||||
<source>(Grants all permissions and can view objects)</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/edit-dialog/user-edit-dialog/user-edit-dialog.component.html</context>
|
||||
<context context-type="linenumber">30</context>
|
||||
@@ -6198,7 +6198,7 @@
|
||||
<source>Inherited from group</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/permissions-select/permissions-select.component.ts</context>
|
||||
<context context-type="linenumber">85</context>
|
||||
<context context-type="linenumber">78</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="6418218602775540217" datatype="html">
|
||||
|
||||
@@ -21,7 +21,7 @@
|
||||
"@angular/platform-browser-dynamic": "~21.2.6",
|
||||
"@angular/router": "~21.2.6",
|
||||
"@ng-bootstrap/ng-bootstrap": "^20.0.0",
|
||||
"@ng-select/ng-select": "^21.7.0",
|
||||
"@ng-select/ng-select": "^21.5.2",
|
||||
"@ngneat/dirty-check-forms": "^3.0.3",
|
||||
"@popperjs/core": "^2.11.8",
|
||||
"bootstrap": "^5.3.8",
|
||||
@@ -32,7 +32,7 @@
|
||||
"ngx-cookie-service": "^21.3.1",
|
||||
"ngx-device-detector": "^11.0.0",
|
||||
"ngx-ui-tour-ng-bootstrap": "^18.0.0",
|
||||
"pdfjs-dist": "^5.6.205",
|
||||
"pdfjs-dist": "^5.4.624",
|
||||
"rxjs": "^7.8.2",
|
||||
"tslib": "^2.8.1",
|
||||
"utif": "^3.1.0",
|
||||
@@ -42,28 +42,28 @@
|
||||
"devDependencies": {
|
||||
"@angular-builders/custom-webpack": "^21.0.3",
|
||||
"@angular-builders/jest": "^21.0.3",
|
||||
"@angular-devkit/core": "^21.2.6",
|
||||
"@angular-devkit/schematics": "^21.2.6",
|
||||
"@angular-devkit/core": "^21.2.3",
|
||||
"@angular-devkit/schematics": "^21.2.3",
|
||||
"@angular-eslint/builder": "21.3.1",
|
||||
"@angular-eslint/eslint-plugin": "21.3.1",
|
||||
"@angular-eslint/eslint-plugin-template": "21.3.1",
|
||||
"@angular-eslint/schematics": "21.3.1",
|
||||
"@angular-eslint/template-parser": "21.3.1",
|
||||
"@angular/build": "^21.2.6",
|
||||
"@angular/cli": "~21.2.6",
|
||||
"@angular/build": "^21.2.3",
|
||||
"@angular/cli": "~21.2.3",
|
||||
"@angular/compiler-cli": "~21.2.6",
|
||||
"@codecov/webpack-plugin": "^1.9.1",
|
||||
"@playwright/test": "^1.59.0",
|
||||
"@playwright/test": "^1.58.2",
|
||||
"@types/jest": "^30.0.0",
|
||||
"@types/node": "^25.5.0",
|
||||
"@typescript-eslint/eslint-plugin": "^8.58.0",
|
||||
"@typescript-eslint/parser": "^8.58.0",
|
||||
"@typescript-eslint/utils": "^8.58.0",
|
||||
"@typescript-eslint/eslint-plugin": "^8.57.2",
|
||||
"@typescript-eslint/parser": "^8.57.2",
|
||||
"@typescript-eslint/utils": "^8.57.2",
|
||||
"eslint": "^10.1.0",
|
||||
"jest": "30.3.0",
|
||||
"jest-environment-jsdom": "^30.3.0",
|
||||
"jest-junit": "^16.0.0",
|
||||
"jest-preset-angular": "^16.1.2",
|
||||
"jest-preset-angular": "^16.1.1",
|
||||
"jest-websocket-mock": "^2.5.0",
|
||||
"prettier-plugin-organize-imports": "^4.3.0",
|
||||
"ts-node": "~10.9.1",
|
||||
|
||||
1035
src-ui/pnpm-lock.yaml
generated
1035
src-ui/pnpm-lock.yaml
generated
File diff suppressed because it is too large
Load Diff
@@ -7,7 +7,7 @@
|
||||
<button class="btn btn-sm btn-outline-primary" (click)="tourService.start()">
|
||||
<i-bs class="me-2" name="airplane"></i-bs><ng-container i18n>Start tour</ng-container>
|
||||
</button>
|
||||
@if (canViewSystemStatus) {
|
||||
@if (permissionsService.isAdmin()) {
|
||||
<button class="btn btn-sm btn-outline-primary position-relative ms-md-5 me-1" (click)="showSystemStatus()"
|
||||
[disabled]="!systemStatus">
|
||||
@if (!systemStatus) {
|
||||
@@ -26,8 +26,6 @@
|
||||
}
|
||||
<ng-container i18n>System Status</ng-container>
|
||||
</button>
|
||||
}
|
||||
@if (permissionsService.isAdmin()) {
|
||||
<a class="btn btn-sm btn-primary" href="admin/" target="_blank">
|
||||
<ng-container i18n>Open Django Admin</ng-container>
|
||||
<i-bs class="ms-2" name="arrow-up-right"></i-bs>
|
||||
|
||||
@@ -29,11 +29,7 @@ import { IfOwnerDirective } from 'src/app/directives/if-owner.directive'
|
||||
import { IfPermissionsDirective } from 'src/app/directives/if-permissions.directive'
|
||||
import { PermissionsGuard } from 'src/app/guards/permissions.guard'
|
||||
import { CustomDatePipe } from 'src/app/pipes/custom-date.pipe'
|
||||
import {
|
||||
PermissionAction,
|
||||
PermissionType,
|
||||
PermissionsService,
|
||||
} from 'src/app/services/permissions.service'
|
||||
import { PermissionsService } from 'src/app/services/permissions.service'
|
||||
import { GroupService } from 'src/app/services/rest/group.service'
|
||||
import { SavedViewService } from 'src/app/services/rest/saved-view.service'
|
||||
import { UserService } from 'src/app/services/rest/user.service'
|
||||
@@ -332,13 +328,7 @@ describe('SettingsComponent', () => {
|
||||
|
||||
it('should load system status on initialize, show errors if needed', () => {
|
||||
jest.spyOn(systemStatusService, 'get').mockReturnValue(of(status))
|
||||
jest
|
||||
.spyOn(permissionsService, 'currentUserCan')
|
||||
.mockImplementation(
|
||||
(action, type) =>
|
||||
action === PermissionAction.View &&
|
||||
type === PermissionType.SystemStatus
|
||||
)
|
||||
jest.spyOn(permissionsService, 'isAdmin').mockReturnValue(true)
|
||||
completeSetup()
|
||||
expect(component['systemStatus']).toEqual(status) // private
|
||||
expect(component.systemStatusHasErrors).toBeTruthy()
|
||||
@@ -354,13 +344,7 @@ describe('SettingsComponent', () => {
|
||||
it('should open system status dialog', () => {
|
||||
const modalOpenSpy = jest.spyOn(modalService, 'open')
|
||||
jest.spyOn(systemStatusService, 'get').mockReturnValue(of(status))
|
||||
jest
|
||||
.spyOn(permissionsService, 'currentUserCan')
|
||||
.mockImplementation(
|
||||
(action, type) =>
|
||||
action === PermissionAction.View &&
|
||||
type === PermissionType.SystemStatus
|
||||
)
|
||||
jest.spyOn(permissionsService, 'isAdmin').mockReturnValue(true)
|
||||
completeSetup()
|
||||
component.showSystemStatus()
|
||||
expect(modalOpenSpy).toHaveBeenCalledWith(SystemStatusDialogComponent, {
|
||||
|
||||
@@ -429,7 +429,7 @@ export class SettingsComponent
|
||||
this.settingsForm.patchValue(currentFormValue)
|
||||
}
|
||||
|
||||
if (this.canViewSystemStatus) {
|
||||
if (this.permissionsService.isAdmin()) {
|
||||
this.systemStatusService.get().subscribe((status) => {
|
||||
this.systemStatus = status
|
||||
})
|
||||
@@ -647,16 +647,6 @@ export class SettingsComponent
|
||||
.setValue(Array.from(hiddenFields))
|
||||
}
|
||||
|
||||
public get canViewSystemStatus(): boolean {
|
||||
return (
|
||||
this.permissionsService.isAdmin() ||
|
||||
this.permissionsService.currentUserCan(
|
||||
PermissionAction.View,
|
||||
PermissionType.SystemStatus
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
showSystemStatus() {
|
||||
const modal: NgbModalRef = this.modalService.open(
|
||||
SystemStatusDialogComponent,
|
||||
|
||||
@@ -23,11 +23,11 @@
|
||||
</div>
|
||||
<div class="form-check form-switch form-check-inline">
|
||||
<input type="checkbox" class="form-check-input" id="is_staff" formControlName="is_staff">
|
||||
<label class="form-check-label" for="is_staff"><ng-container i18n>Admin</ng-container> <small class="form-text text-muted ms-1" i18n>Access system status, logs, Django backend</small></label>
|
||||
<label class="form-check-label" for="is_staff"><ng-container i18n>Admin</ng-container> <small class="form-text text-muted ms-1" i18n>Access logs, Django backend</small></label>
|
||||
</div>
|
||||
<div class="form-check form-switch form-check-inline">
|
||||
<input type="checkbox" class="form-check-input" id="is_superuser" formControlName="is_superuser" (change)="onToggleSuperUser()">
|
||||
<label class="form-check-label" for="is_superuser"><ng-container i18n>Superuser</ng-container> <small class="form-text text-muted ms-1" i18n>Grants all permissions and can view all objects</small></label>
|
||||
<label class="form-check-label" for="is_superuser"><ng-container i18n>Superuser</ng-container> <small class="form-text text-muted ms-1" i18n>(Grants all permissions and can view objects)</small></label>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
@@ -26,8 +26,8 @@
|
||||
<input type="checkbox" class="form-check-input" id="{{type}}_all" (change)="toggleAll($event, type)" [checked]="typesWithAllActions.has(type) || isInherited(type)" [attr.disabled]="disabled || isInherited(type) ? true : null">
|
||||
<label class="form-check-label visually-hidden" for="{{type}}_all" i18n>All</label>
|
||||
</div>
|
||||
@for (action of PermissionAction | keyvalue: sortActions; track action.key) {
|
||||
<div class="col form-check form-check-inline" [class.invisible]="!isActionSupported(PermissionType[type], action.value)" [ngbPopover]="inheritedWarning" [disablePopover]="!isInherited(type, action.key)" placement="left" triggers="mouseenter:mouseleave">
|
||||
@for (action of PermissionAction | keyvalue; track action) {
|
||||
<div class="col form-check form-check-inline" [ngbPopover]="inheritedWarning" [disablePopover]="!isInherited(type, action.key)" placement="left" triggers="mouseenter:mouseleave">
|
||||
<input type="checkbox" class="form-check-input" id="{{type}}_{{action.key}}" formControlName="{{action.key}}">
|
||||
<label class="form-check-label visually-hidden" for="{{type}}_{{action.key}}">{{action.key}}</label>
|
||||
</div>
|
||||
|
||||
@@ -26,6 +26,7 @@ const inheritedPermissions = ['change_tag', 'view_documenttype']
|
||||
describe('PermissionsSelectComponent', () => {
|
||||
let component: PermissionsSelectComponent
|
||||
let fixture: ComponentFixture<PermissionsSelectComponent>
|
||||
let permissionsChangeResult: Permissions
|
||||
let settingsService: SettingsService
|
||||
|
||||
beforeEach(async () => {
|
||||
@@ -44,7 +45,7 @@ describe('PermissionsSelectComponent', () => {
|
||||
fixture = TestBed.createComponent(PermissionsSelectComponent)
|
||||
fixture.debugElement.injector.get(NG_VALUE_ACCESSOR)
|
||||
component = fixture.componentInstance
|
||||
component.registerOnChange((r) => r)
|
||||
component.registerOnChange((r) => (permissionsChangeResult = r))
|
||||
fixture.detectChanges()
|
||||
})
|
||||
|
||||
@@ -74,6 +75,7 @@ describe('PermissionsSelectComponent', () => {
|
||||
it('should update on permissions set', () => {
|
||||
component.ngOnInit()
|
||||
component.writeValue(permissions)
|
||||
expect(permissionsChangeResult).toEqual(permissions)
|
||||
expect(component.typesWithAllActions).toContain('Document')
|
||||
})
|
||||
|
||||
@@ -90,12 +92,13 @@ describe('PermissionsSelectComponent', () => {
|
||||
it('disable checkboxes when permissions are inherited', () => {
|
||||
component.ngOnInit()
|
||||
component.inheritedPermissions = inheritedPermissions
|
||||
fixture.detectChanges()
|
||||
expect(component.isInherited('Document', 'Add')).toBeFalsy()
|
||||
expect(component.isInherited('Document')).toBeFalsy()
|
||||
expect(component.isInherited('Tag', 'Change')).toBeTruthy()
|
||||
expect(component.form.get('Document').get('Add').disabled).toBeFalsy()
|
||||
expect(component.form.get('Tag').get('Change').disabled).toBeTruthy()
|
||||
const input1 = fixture.debugElement.query(By.css('input#Document_Add'))
|
||||
expect(input1.nativeElement.disabled).toBeFalsy()
|
||||
const input2 = fixture.debugElement.query(By.css('input#Tag_Change'))
|
||||
expect(input2.nativeElement.disabled).toBeTruthy()
|
||||
})
|
||||
|
||||
it('should exclude history permissions if disabled', () => {
|
||||
@@ -104,60 +107,4 @@ describe('PermissionsSelectComponent', () => {
|
||||
component = fixture.componentInstance
|
||||
expect(component.allowedTypes).not.toContain('History')
|
||||
})
|
||||
|
||||
it('should treat global statistics as view-only', () => {
|
||||
component.ngOnInit()
|
||||
fixture.detectChanges()
|
||||
|
||||
expect(
|
||||
component.isActionSupported(
|
||||
PermissionType.GlobalStatistics,
|
||||
PermissionAction.View
|
||||
)
|
||||
).toBeTruthy()
|
||||
expect(
|
||||
component.isActionSupported(
|
||||
PermissionType.GlobalStatistics,
|
||||
PermissionAction.Add
|
||||
)
|
||||
).toBeFalsy()
|
||||
|
||||
const addInput = fixture.debugElement.query(
|
||||
By.css('input#GlobalStatistics_Add')
|
||||
)
|
||||
const viewInput = fixture.debugElement.query(
|
||||
By.css('input#GlobalStatistics_View')
|
||||
)
|
||||
|
||||
expect(addInput.nativeElement.disabled).toBeTruthy()
|
||||
expect(viewInput.nativeElement.disabled).toBeFalsy()
|
||||
})
|
||||
|
||||
it('should treat system status as view-only', () => {
|
||||
component.ngOnInit()
|
||||
fixture.detectChanges()
|
||||
|
||||
expect(
|
||||
component.isActionSupported(
|
||||
PermissionType.SystemStatus,
|
||||
PermissionAction.View
|
||||
)
|
||||
).toBeTruthy()
|
||||
expect(
|
||||
component.isActionSupported(
|
||||
PermissionType.SystemStatus,
|
||||
PermissionAction.Change
|
||||
)
|
||||
).toBeFalsy()
|
||||
|
||||
const changeInput = fixture.debugElement.query(
|
||||
By.css('input#SystemStatus_Change')
|
||||
)
|
||||
const viewInput = fixture.debugElement.query(
|
||||
By.css('input#SystemStatus_View')
|
||||
)
|
||||
|
||||
expect(changeInput.nativeElement.disabled).toBeTruthy()
|
||||
expect(viewInput.nativeElement.disabled).toBeFalsy()
|
||||
})
|
||||
})
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { KeyValue, KeyValuePipe } from '@angular/common'
|
||||
import { KeyValuePipe } from '@angular/common'
|
||||
import { Component, forwardRef, inject, Input, OnInit } from '@angular/core'
|
||||
import {
|
||||
AbstractControl,
|
||||
@@ -58,13 +58,6 @@ export class PermissionsSelectComponent
|
||||
|
||||
typesWithAllActions: Set<string> = new Set()
|
||||
|
||||
private readonly actionOrder = [
|
||||
PermissionAction.Add,
|
||||
PermissionAction.Change,
|
||||
PermissionAction.Delete,
|
||||
PermissionAction.View,
|
||||
]
|
||||
|
||||
_inheritedPermissions: string[] = []
|
||||
|
||||
@Input()
|
||||
@@ -93,7 +86,7 @@ export class PermissionsSelectComponent
|
||||
}
|
||||
this.allowedTypes.forEach((type) => {
|
||||
const control = new FormGroup({})
|
||||
for (const action of Object.keys(PermissionAction)) {
|
||||
for (const action in PermissionAction) {
|
||||
control.addControl(action, new FormControl(null))
|
||||
}
|
||||
this.form.addControl(type, control)
|
||||
@@ -113,14 +106,18 @@ export class PermissionsSelectComponent
|
||||
this.permissionsService.getPermissionKeys(permissionStr)
|
||||
|
||||
if (actionKey && typeKey) {
|
||||
this.form
|
||||
.get(typeKey)
|
||||
?.get(actionKey)
|
||||
?.patchValue(true, { emitEvent: false })
|
||||
if (this.form.get(typeKey)?.get(actionKey)) {
|
||||
this.form
|
||||
.get(typeKey)
|
||||
.get(actionKey)
|
||||
.patchValue(true, { emitEvent: false })
|
||||
}
|
||||
}
|
||||
})
|
||||
this.allowedTypes.forEach((type) => {
|
||||
if (this.typeHasAllActionsSelected(type)) {
|
||||
if (
|
||||
Object.values(this.form.get(type).value).every((val) => val == true)
|
||||
) {
|
||||
this.typesWithAllActions.add(type)
|
||||
} else {
|
||||
this.typesWithAllActions.delete(type)
|
||||
@@ -152,16 +149,12 @@ export class PermissionsSelectComponent
|
||||
this.form.valueChanges.subscribe((newValue) => {
|
||||
let permissions = []
|
||||
Object.entries(newValue).forEach(([typeKey, typeValue]) => {
|
||||
// e.g. [Document, { Add: true, View: true ... }]
|
||||
const selectedActions = Object.entries(typeValue).filter(
|
||||
([actionKey, actionValue]) =>
|
||||
actionValue &&
|
||||
this.isActionSupported(
|
||||
PermissionType[typeKey],
|
||||
PermissionAction[actionKey]
|
||||
)
|
||||
([actionKey, actionValue]) => actionValue == true
|
||||
)
|
||||
|
||||
selectedActions.forEach(([actionKey]) => {
|
||||
selectedActions.forEach(([actionKey, actionValue]) => {
|
||||
permissions.push(
|
||||
(PermissionType[typeKey] as string).replace(
|
||||
'%s',
|
||||
@@ -170,7 +163,7 @@ export class PermissionsSelectComponent
|
||||
)
|
||||
})
|
||||
|
||||
if (this.typeHasAllActionsSelected(typeKey)) {
|
||||
if (selectedActions.length == Object.entries(typeValue).length) {
|
||||
this.typesWithAllActions.add(typeKey)
|
||||
} else {
|
||||
this.typesWithAllActions.delete(typeKey)
|
||||
@@ -181,23 +174,19 @@ export class PermissionsSelectComponent
|
||||
permissions.filter((p) => !this._inheritedPermissions.includes(p))
|
||||
)
|
||||
})
|
||||
|
||||
this.updateDisabledStates()
|
||||
}
|
||||
|
||||
toggleAll(event, type) {
|
||||
const typeGroup = this.form.get(type)
|
||||
Object.keys(PermissionAction)
|
||||
.filter((action) =>
|
||||
this.isActionSupported(PermissionType[type], PermissionAction[action])
|
||||
)
|
||||
.forEach((action) => {
|
||||
typeGroup.get(action).patchValue(event.target.checked)
|
||||
if (event.target.checked) {
|
||||
Object.keys(PermissionAction).forEach((action) => {
|
||||
typeGroup.get(action).patchValue(true)
|
||||
})
|
||||
|
||||
if (this.typeHasAllActionsSelected(type)) {
|
||||
this.typesWithAllActions.add(type)
|
||||
} else {
|
||||
Object.keys(PermissionAction).forEach((action) => {
|
||||
typeGroup.get(action).patchValue(false)
|
||||
})
|
||||
this.typesWithAllActions.delete(type)
|
||||
}
|
||||
}
|
||||
@@ -212,21 +201,14 @@ export class PermissionsSelectComponent
|
||||
)
|
||||
)
|
||||
} else {
|
||||
return Object.keys(PermissionAction)
|
||||
.filter((action) =>
|
||||
this.isActionSupported(
|
||||
PermissionType[typeKey],
|
||||
PermissionAction[action]
|
||||
return Object.values(PermissionAction).every((action) => {
|
||||
return this._inheritedPermissions.includes(
|
||||
this.permissionsService.getPermissionCode(
|
||||
action as PermissionAction,
|
||||
PermissionType[typeKey]
|
||||
)
|
||||
)
|
||||
.every((action) => {
|
||||
return this._inheritedPermissions.includes(
|
||||
this.permissionsService.getPermissionCode(
|
||||
PermissionAction[action],
|
||||
PermissionType[typeKey]
|
||||
)
|
||||
)
|
||||
})
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -234,55 +216,12 @@ export class PermissionsSelectComponent
|
||||
this.allowedTypes.forEach((type) => {
|
||||
const control = this.form.get(type)
|
||||
let actionControl: AbstractControl
|
||||
for (const action of Object.keys(PermissionAction)) {
|
||||
for (const action in PermissionAction) {
|
||||
actionControl = control.get(action)
|
||||
if (
|
||||
!this.isActionSupported(
|
||||
PermissionType[type],
|
||||
PermissionAction[action]
|
||||
)
|
||||
) {
|
||||
actionControl.patchValue(false, { emitEvent: false })
|
||||
actionControl.disable({ emitEvent: false })
|
||||
continue
|
||||
}
|
||||
|
||||
this.isInherited(type, action) || this.disabled
|
||||
? actionControl.disable({ emitEvent: false })
|
||||
: actionControl.enable({ emitEvent: false })
|
||||
? actionControl.disable()
|
||||
: actionControl.enable()
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
public isActionSupported(
|
||||
type: PermissionType,
|
||||
action: PermissionAction
|
||||
): boolean {
|
||||
// Global statistics and system status only support view
|
||||
if (
|
||||
type === PermissionType.GlobalStatistics ||
|
||||
type === PermissionType.SystemStatus
|
||||
) {
|
||||
return action === PermissionAction.View
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
private typeHasAllActionsSelected(typeKey: string): boolean {
|
||||
return Object.keys(PermissionAction)
|
||||
.filter((action) =>
|
||||
this.isActionSupported(
|
||||
PermissionType[typeKey],
|
||||
PermissionAction[action]
|
||||
)
|
||||
)
|
||||
.every((action) => !!this.form.get(typeKey)?.get(action)?.value)
|
||||
}
|
||||
|
||||
public sortActions = (
|
||||
a: KeyValue<string, PermissionAction>,
|
||||
b: KeyValue<string, PermissionAction>
|
||||
): number =>
|
||||
this.actionOrder.indexOf(a.value) - this.actionOrder.indexOf(b.value)
|
||||
}
|
||||
|
||||
@@ -43,7 +43,7 @@
|
||||
</div>
|
||||
<p class="card-text">
|
||||
@if (document) {
|
||||
@if (hasSearchHighlights) {
|
||||
@if (document.__search_hit__ && document.__search_hit__.highlights) {
|
||||
<span [innerHtml]="document.__search_hit__.highlights"></span>
|
||||
}
|
||||
@for (highlight of searchNoteHighlights; track highlight) {
|
||||
@@ -52,7 +52,7 @@
|
||||
<span [innerHtml]="highlight"></span>
|
||||
</span>
|
||||
}
|
||||
@if (shouldShowContentFallback) {
|
||||
@if (!document.__search_hit__?.score) {
|
||||
<span class="result-content">{{contentTrimmed}}</span>
|
||||
}
|
||||
} @else {
|
||||
|
||||
@@ -127,19 +127,6 @@ describe('DocumentCardLargeComponent', () => {
|
||||
expect(component.searchNoteHighlights).toContain('<span>bananas</span>')
|
||||
})
|
||||
|
||||
it('should fall back to document content when a search hit has no highlights', () => {
|
||||
component.document.__search_hit__ = {
|
||||
score: 0.9,
|
||||
rank: 1,
|
||||
highlights: '',
|
||||
note_highlights: null,
|
||||
}
|
||||
fixture.detectChanges()
|
||||
|
||||
expect(fixture.nativeElement.textContent).toContain('Cupcake ipsum')
|
||||
expect(component.shouldShowContentFallback).toBe(true)
|
||||
})
|
||||
|
||||
it('should try to close the preview on mouse leave', () => {
|
||||
component.popupPreview = {
|
||||
close: jest.fn(),
|
||||
|
||||
@@ -164,17 +164,6 @@ export class DocumentCardLargeComponent
|
||||
)
|
||||
}
|
||||
|
||||
get hasSearchHighlights() {
|
||||
return Boolean(this.document?.__search_hit__?.highlights?.trim()?.length)
|
||||
}
|
||||
|
||||
get shouldShowContentFallback() {
|
||||
return (
|
||||
this.document?.__search_hit__?.score == null ||
|
||||
(!this.hasSearchHighlights && this.searchNoteHighlights.length === 0)
|
||||
)
|
||||
}
|
||||
|
||||
get notesEnabled(): boolean {
|
||||
return this.settingsService.get(SETTINGS_KEYS.NOTES_ENABLED)
|
||||
}
|
||||
|
||||
@@ -6,11 +6,6 @@ import {
|
||||
PermissionsService,
|
||||
} from './permissions.service'
|
||||
|
||||
const VIEW_ONLY_PERMISSION_TYPES = new Set<PermissionType>([
|
||||
PermissionType.GlobalStatistics,
|
||||
PermissionType.SystemStatus,
|
||||
])
|
||||
|
||||
describe('PermissionsService', () => {
|
||||
let permissionsService: PermissionsService
|
||||
|
||||
@@ -269,8 +264,6 @@ describe('PermissionsService', () => {
|
||||
'change_applicationconfiguration',
|
||||
'delete_applicationconfiguration',
|
||||
'view_applicationconfiguration',
|
||||
'view_global_statistics',
|
||||
'view_system_status',
|
||||
],
|
||||
{
|
||||
username: 'testuser',
|
||||
@@ -281,10 +274,7 @@ describe('PermissionsService', () => {
|
||||
|
||||
Object.values(PermissionType).forEach((type) => {
|
||||
Object.values(PermissionAction).forEach((action) => {
|
||||
expect(permissionsService.currentUserCan(action, type)).toBe(
|
||||
!VIEW_ONLY_PERMISSION_TYPES.has(type) ||
|
||||
action === PermissionAction.View
|
||||
)
|
||||
expect(permissionsService.currentUserCan(action, type)).toBeTruthy()
|
||||
})
|
||||
})
|
||||
|
||||
|
||||
@@ -29,8 +29,6 @@ export enum PermissionType {
|
||||
CustomField = '%s_customfield',
|
||||
Workflow = '%s_workflow',
|
||||
ProcessedMail = '%s_processedmail',
|
||||
GlobalStatistics = '%s_global_statistics',
|
||||
SystemStatus = '%s_system_status',
|
||||
}
|
||||
|
||||
@Injectable({
|
||||
|
||||
@@ -73,7 +73,7 @@ describe('LocalizedDateParserFormatter', () => {
|
||||
|
||||
it('should handle years when current year % 100 < 50', () => {
|
||||
jest.useFakeTimers()
|
||||
jest.setSystemTime(new Date(2026, 5, 15).getTime())
|
||||
jest.setSystemTime(new Date(2026, 5, 15))
|
||||
let val = dateParserFormatter.parse('5/4/26')
|
||||
expect(val).toEqual({ day: 4, month: 5, year: 2026 })
|
||||
|
||||
@@ -87,7 +87,7 @@ describe('LocalizedDateParserFormatter', () => {
|
||||
|
||||
it('should handle years when current year % 100 >= 50', () => {
|
||||
jest.useFakeTimers()
|
||||
jest.setSystemTime(new Date(2076, 5, 15).getTime())
|
||||
jest.setSystemTime(new Date(2076, 5, 15))
|
||||
const val = dateParserFormatter.parse('5/4/00')
|
||||
expect(val).toEqual({ day: 4, month: 5, year: 2100 })
|
||||
jest.useRealTimers()
|
||||
|
||||
@@ -11,7 +11,6 @@ from typing import TYPE_CHECKING
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Callable
|
||||
from collections.abc import Iterator
|
||||
from datetime import datetime
|
||||
|
||||
from numpy import ndarray
|
||||
@@ -19,6 +18,7 @@ if TYPE_CHECKING:
|
||||
from django.conf import settings
|
||||
from django.core.cache import cache
|
||||
from django.core.cache import caches
|
||||
from django.db.models import Max
|
||||
|
||||
from documents.caching import CACHE_5_MINUTES
|
||||
from documents.caching import CACHE_50_MINUTES
|
||||
@@ -99,7 +99,8 @@ class DocumentClassifier:
|
||||
# v8 - Added storage path classifier
|
||||
# v9 - Changed from hashing to time/ids for re-train check
|
||||
# v10 - HMAC-signed model file
|
||||
FORMAT_VERSION = 10
|
||||
# v11 - Added auto-label-set digest for fast skip without full document scan
|
||||
FORMAT_VERSION = 11
|
||||
|
||||
HMAC_SIZE = 32 # SHA-256 digest length
|
||||
|
||||
@@ -108,6 +109,8 @@ class DocumentClassifier:
|
||||
self.last_doc_change_time: datetime | None = None
|
||||
# Hash of primary keys of AUTO matching values last used in training
|
||||
self.last_auto_type_hash: bytes | None = None
|
||||
# Digest of the set of all MATCH_AUTO label PKs (fast-skip guard)
|
||||
self.last_auto_label_set_digest: bytes | None = None
|
||||
|
||||
self.data_vectorizer = None
|
||||
self.data_vectorizer_hash = None
|
||||
@@ -140,6 +143,29 @@ class DocumentClassifier:
|
||||
sha256,
|
||||
).digest()
|
||||
|
||||
@staticmethod
|
||||
def _compute_auto_label_set_digest() -> bytes:
|
||||
"""
|
||||
Return a SHA-256 digest of all MATCH_AUTO label PKs across the four
|
||||
label types. Four cheap indexed queries; stable for any fixed set of
|
||||
AUTO labels regardless of document assignments.
|
||||
"""
|
||||
from documents.models import Correspondent
|
||||
from documents.models import DocumentType
|
||||
from documents.models import StoragePath
|
||||
from documents.models import Tag
|
||||
|
||||
hasher = sha256()
|
||||
for model in (Correspondent, DocumentType, Tag, StoragePath):
|
||||
pks = sorted(
|
||||
model.objects.filter(
|
||||
matching_algorithm=MatchingModel.MATCH_AUTO,
|
||||
).values_list("pk", flat=True),
|
||||
)
|
||||
for pk in pks:
|
||||
hasher.update(pk.to_bytes(4, "little", signed=False))
|
||||
return hasher.digest()
|
||||
|
||||
def load(self) -> None:
|
||||
from sklearn.exceptions import InconsistentVersionWarning
|
||||
|
||||
@@ -161,6 +187,7 @@ class DocumentClassifier:
|
||||
schema_version,
|
||||
self.last_doc_change_time,
|
||||
self.last_auto_type_hash,
|
||||
self.last_auto_label_set_digest,
|
||||
self.data_vectorizer,
|
||||
self.tags_binarizer,
|
||||
self.tags_classifier,
|
||||
@@ -202,6 +229,7 @@ class DocumentClassifier:
|
||||
self.FORMAT_VERSION,
|
||||
self.last_doc_change_time,
|
||||
self.last_auto_type_hash,
|
||||
self.last_auto_label_set_digest,
|
||||
self.data_vectorizer,
|
||||
self.tags_binarizer,
|
||||
self.tags_classifier,
|
||||
@@ -224,6 +252,39 @@ class DocumentClassifier:
|
||||
) -> bool:
|
||||
notify = status_callback if status_callback is not None else lambda _: None
|
||||
|
||||
# Fast skip: avoid the expensive per-document label scan when nothing
|
||||
# has changed. Requires a prior training run to have populated both
|
||||
# last_doc_change_time and last_auto_label_set_digest.
|
||||
if (
|
||||
self.last_doc_change_time is not None
|
||||
and self.last_auto_label_set_digest is not None
|
||||
):
|
||||
latest_mod = Document.objects.exclude(
|
||||
tags__is_inbox_tag=True,
|
||||
).aggregate(Max("modified"))["modified__max"]
|
||||
if latest_mod is not None and latest_mod <= self.last_doc_change_time:
|
||||
current_digest = self._compute_auto_label_set_digest()
|
||||
if current_digest == self.last_auto_label_set_digest:
|
||||
logger.info("No updates since last training")
|
||||
cache.set(
|
||||
CLASSIFIER_MODIFIED_KEY,
|
||||
self.last_doc_change_time,
|
||||
CACHE_50_MINUTES,
|
||||
)
|
||||
cache.set(
|
||||
CLASSIFIER_HASH_KEY,
|
||||
self.last_auto_type_hash.hex()
|
||||
if self.last_auto_type_hash
|
||||
else "",
|
||||
CACHE_50_MINUTES,
|
||||
)
|
||||
cache.set(
|
||||
CLASSIFIER_VERSION_KEY,
|
||||
self.FORMAT_VERSION,
|
||||
CACHE_50_MINUTES,
|
||||
)
|
||||
return False
|
||||
|
||||
# Get non-inbox documents
|
||||
docs_queryset = (
|
||||
Document.objects.exclude(
|
||||
@@ -242,12 +303,15 @@ class DocumentClassifier:
|
||||
labels_correspondent = []
|
||||
labels_document_type = []
|
||||
labels_storage_path = []
|
||||
doc_contents: list[str] = []
|
||||
|
||||
# Step 1: Extract and preprocess training data from the database.
|
||||
# Step 1: Extract labels and capture content in a single pass.
|
||||
logger.debug("Gathering data from database...")
|
||||
notify(f"Gathering data from {docs_queryset.count()} document(s)...")
|
||||
hasher = sha256()
|
||||
for doc in docs_queryset:
|
||||
doc_contents.append(doc.content)
|
||||
|
||||
y = -1
|
||||
dt = doc.document_type
|
||||
if dt and dt.matching_algorithm == MatchingModel.MATCH_AUTO:
|
||||
@@ -282,25 +346,7 @@ class DocumentClassifier:
|
||||
|
||||
num_tags = len(labels_tags_unique)
|
||||
|
||||
# Check if retraining is actually required.
|
||||
# A document has been updated since the classifier was trained
|
||||
# New auto tags, types, correspondent, storage paths exist
|
||||
latest_doc_change = docs_queryset.latest("modified").modified
|
||||
if (
|
||||
self.last_doc_change_time is not None
|
||||
and self.last_doc_change_time >= latest_doc_change
|
||||
) and self.last_auto_type_hash == hasher.digest():
|
||||
logger.info("No updates since last training")
|
||||
# Set the classifier information into the cache
|
||||
# Caching for 50 minutes, so slightly less than the normal retrain time
|
||||
cache.set(
|
||||
CLASSIFIER_MODIFIED_KEY,
|
||||
self.last_doc_change_time,
|
||||
CACHE_50_MINUTES,
|
||||
)
|
||||
cache.set(CLASSIFIER_HASH_KEY, hasher.hexdigest(), CACHE_50_MINUTES)
|
||||
cache.set(CLASSIFIER_VERSION_KEY, self.FORMAT_VERSION, CACHE_50_MINUTES)
|
||||
return False
|
||||
|
||||
# subtract 1 since -1 (null) is also part of the classes.
|
||||
|
||||
@@ -317,21 +363,16 @@ class DocumentClassifier:
|
||||
)
|
||||
|
||||
from sklearn.feature_extraction.text import CountVectorizer
|
||||
from sklearn.multiclass import OneVsRestClassifier
|
||||
from sklearn.neural_network import MLPClassifier
|
||||
from sklearn.preprocessing import LabelBinarizer
|
||||
from sklearn.preprocessing import MultiLabelBinarizer
|
||||
from sklearn.svm import LinearSVC
|
||||
|
||||
# Step 2: vectorize data
|
||||
logger.debug("Vectorizing data...")
|
||||
notify("Vectorizing document content...")
|
||||
|
||||
def content_generator() -> Iterator[str]:
|
||||
"""
|
||||
Generates the content for documents, but once at a time
|
||||
"""
|
||||
for doc in docs_queryset:
|
||||
yield self.preprocess_content(doc.content, shared_cache=False)
|
||||
|
||||
self.data_vectorizer = CountVectorizer(
|
||||
analyzer="word",
|
||||
ngram_range=(1, 2),
|
||||
@@ -339,7 +380,8 @@ class DocumentClassifier:
|
||||
)
|
||||
|
||||
data_vectorized: ndarray = self.data_vectorizer.fit_transform(
|
||||
content_generator(),
|
||||
self.preprocess_content(content, shared_cache=False)
|
||||
for content in doc_contents
|
||||
)
|
||||
|
||||
# See the notes here:
|
||||
@@ -353,8 +395,10 @@ class DocumentClassifier:
|
||||
notify(f"Training tags classifier ({num_tags} tag(s))...")
|
||||
|
||||
if num_tags == 1:
|
||||
# Special case where only one tag has auto:
|
||||
# Fallback to binary classification.
|
||||
# Special case: only one AUTO tag — use binary classification.
|
||||
# MLPClassifier is used here because LinearSVC requires at least
|
||||
# 2 distinct classes in training data, which cannot be guaranteed
|
||||
# when all documents share the single AUTO tag.
|
||||
labels_tags = [
|
||||
label[0] if len(label) == 1 else -1 for label in labels_tags
|
||||
]
|
||||
@@ -362,11 +406,15 @@ class DocumentClassifier:
|
||||
labels_tags_vectorized: ndarray = self.tags_binarizer.fit_transform(
|
||||
labels_tags,
|
||||
).ravel()
|
||||
self.tags_classifier = MLPClassifier(tol=0.01)
|
||||
else:
|
||||
# General multi-label case: LinearSVC via OneVsRestClassifier.
|
||||
# Vastly more memory- and time-efficient than MLPClassifier for
|
||||
# large class counts (e.g. hundreds of AUTO tags).
|
||||
self.tags_binarizer = MultiLabelBinarizer()
|
||||
labels_tags_vectorized = self.tags_binarizer.fit_transform(labels_tags)
|
||||
self.tags_classifier = OneVsRestClassifier(LinearSVC())
|
||||
|
||||
self.tags_classifier = MLPClassifier(tol=0.01)
|
||||
self.tags_classifier.fit(data_vectorized, labels_tags_vectorized)
|
||||
else:
|
||||
self.tags_classifier = None
|
||||
@@ -416,6 +464,7 @@ class DocumentClassifier:
|
||||
|
||||
self.last_doc_change_time = latest_doc_change
|
||||
self.last_auto_type_hash = hasher.digest()
|
||||
self.last_auto_label_set_digest = self._compute_auto_label_set_digest()
|
||||
self._update_data_vectorizer_hash()
|
||||
|
||||
# Set the classifier information into the cache
|
||||
|
||||
@@ -56,26 +56,6 @@ class PaperlessAdminPermissions(BasePermission):
|
||||
return request.user.is_staff
|
||||
|
||||
|
||||
def has_global_statistics_permission(user: User | None) -> bool:
|
||||
if user is None or not getattr(user, "is_authenticated", False):
|
||||
return False
|
||||
|
||||
return getattr(user, "is_superuser", False) or user.has_perm(
|
||||
"paperless.view_global_statistics",
|
||||
)
|
||||
|
||||
|
||||
def has_system_status_permission(user: User | None) -> bool:
|
||||
if user is None or not getattr(user, "is_authenticated", False):
|
||||
return False
|
||||
|
||||
return (
|
||||
getattr(user, "is_superuser", False)
|
||||
or getattr(user, "is_staff", False)
|
||||
or user.has_perm("paperless.view_system_status")
|
||||
)
|
||||
|
||||
|
||||
def get_groups_with_only_permission(obj, codename):
|
||||
ctype = ContentType.objects.get_for_model(obj)
|
||||
permission = Permission.objects.get(content_type=ctype, codename=codename)
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
from documents.search._backend import SearchHit
|
||||
from documents.search._backend import SearchIndexLockError
|
||||
from documents.search._backend import SearchMode
|
||||
from documents.search._backend import SearchResults
|
||||
from documents.search._backend import TantivyBackend
|
||||
from documents.search._backend import TantivyRelevanceList
|
||||
from documents.search._backend import WriteBatch
|
||||
@@ -10,9 +10,9 @@ from documents.search._schema import needs_rebuild
|
||||
from documents.search._schema import wipe_index
|
||||
|
||||
__all__ = [
|
||||
"SearchHit",
|
||||
"SearchIndexLockError",
|
||||
"SearchMode",
|
||||
"SearchResults",
|
||||
"TantivyBackend",
|
||||
"TantivyRelevanceList",
|
||||
"WriteBatch",
|
||||
|
||||
@@ -1,13 +1,12 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
import threading
|
||||
from collections import Counter
|
||||
from dataclasses import dataclass
|
||||
from datetime import UTC
|
||||
from datetime import datetime
|
||||
from enum import StrEnum
|
||||
from html import escape
|
||||
from typing import TYPE_CHECKING
|
||||
from typing import Self
|
||||
from typing import TypedDict
|
||||
@@ -55,36 +54,6 @@ class SearchMode(StrEnum):
|
||||
TITLE = "title"
|
||||
|
||||
|
||||
def _render_snippet_html(snippet: tantivy.Snippet) -> str:
|
||||
fragment = snippet.fragment()
|
||||
highlighted = sorted(snippet.highlighted(), key=lambda r: r.start)
|
||||
|
||||
if not highlighted:
|
||||
return escape(fragment)
|
||||
|
||||
parts: list[str] = []
|
||||
cursor = 0
|
||||
fragment_len = len(fragment)
|
||||
|
||||
for highlight in highlighted:
|
||||
start = max(0, min(fragment_len, highlight.start))
|
||||
end = max(start, min(fragment_len, highlight.end))
|
||||
|
||||
if end <= cursor:
|
||||
continue
|
||||
|
||||
if start > cursor:
|
||||
parts.append(escape(fragment[cursor:start]))
|
||||
|
||||
parts.append(f'<span class="match">{escape(fragment[start:end])}</span>')
|
||||
cursor = end
|
||||
|
||||
if cursor < fragment_len:
|
||||
parts.append(escape(fragment[cursor:]))
|
||||
|
||||
return "".join(parts)
|
||||
|
||||
|
||||
def _extract_autocomplete_words(text_sources: list[str]) -> set[str]:
|
||||
"""Extract and normalize words for autocomplete.
|
||||
|
||||
@@ -119,63 +88,45 @@ class SearchHit(TypedDict):
|
||||
highlights: dict[str, str]
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class SearchResults:
|
||||
"""
|
||||
Container for search results with pagination metadata.
|
||||
|
||||
Attributes:
|
||||
hits: List of search results with scores and highlights
|
||||
total: Total matching documents across all pages (for pagination)
|
||||
query: Preprocessed query string after date/syntax rewriting
|
||||
"""
|
||||
|
||||
hits: list[SearchHit]
|
||||
total: int # total matching documents (for pagination)
|
||||
query: str # preprocessed query string
|
||||
|
||||
|
||||
class TantivyRelevanceList:
|
||||
"""
|
||||
DRF-compatible list wrapper for Tantivy search results.
|
||||
DRF-compatible list wrapper for Tantivy search hits.
|
||||
|
||||
Holds a lightweight ordered list of IDs (for pagination count and
|
||||
``selection_data``) together with a small page of rich ``SearchHit``
|
||||
dicts (for serialization). DRF's ``PageNumberPagination`` calls
|
||||
``__len__`` to compute the total page count and ``__getitem__`` to
|
||||
slice the displayed page.
|
||||
Provides paginated access to search results while storing all hits in memory
|
||||
for efficient ID retrieval. Used by Django REST framework for pagination.
|
||||
|
||||
Args:
|
||||
ordered_ids: All matching document IDs in display order.
|
||||
page_hits: Rich SearchHit dicts for the requested DRF page only.
|
||||
page_offset: Index into *ordered_ids* where *page_hits* starts.
|
||||
Methods:
|
||||
__len__: Returns total hit count for pagination calculations
|
||||
__getitem__: Slices the hit list for page-specific results
|
||||
|
||||
Note: Stores ALL post-filter hits so get_all_result_ids() can return
|
||||
every matching document ID without requiring a second search query.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
ordered_ids: list[int],
|
||||
page_hits: list[SearchHit],
|
||||
page_offset: int = 0,
|
||||
) -> None:
|
||||
self._ordered_ids = ordered_ids
|
||||
self._page_hits = page_hits
|
||||
self._page_offset = page_offset
|
||||
def __init__(self, hits: list[SearchHit]) -> None:
|
||||
self._hits = hits
|
||||
|
||||
def __len__(self) -> int:
|
||||
return len(self._ordered_ids)
|
||||
return len(self._hits)
|
||||
|
||||
def __getitem__(self, key: int | slice) -> SearchHit | list[SearchHit]:
|
||||
if isinstance(key, int):
|
||||
idx = key if key >= 0 else len(self._ordered_ids) + key
|
||||
if self._page_offset <= idx < self._page_offset + len(self._page_hits):
|
||||
return self._page_hits[idx - self._page_offset]
|
||||
return SearchHit(
|
||||
id=self._ordered_ids[key],
|
||||
score=0.0,
|
||||
rank=idx + 1,
|
||||
highlights={},
|
||||
)
|
||||
start = key.start or 0
|
||||
stop = key.stop or len(self._ordered_ids)
|
||||
# DRF slices to extract the current page. If the slice aligns
|
||||
# with our pre-fetched page_hits, return them directly.
|
||||
# We only check start — DRF always slices with stop=start+page_size,
|
||||
# which exceeds page_hits length on the last page.
|
||||
if start == self._page_offset:
|
||||
return self._page_hits[: stop - start]
|
||||
# Fallback: return stub dicts (no highlights).
|
||||
return [
|
||||
SearchHit(id=doc_id, score=0.0, rank=start + i + 1, highlights={})
|
||||
for i, doc_id in enumerate(self._ordered_ids[key])
|
||||
]
|
||||
|
||||
def get_all_ids(self) -> list[int]:
|
||||
"""Return all matching document IDs in display order."""
|
||||
return self._ordered_ids
|
||||
def __getitem__(self, key: slice) -> list[SearchHit]:
|
||||
return self._hits[key]
|
||||
|
||||
|
||||
class SearchIndexLockError(Exception):
|
||||
@@ -255,13 +206,10 @@ class WriteBatch:
|
||||
"""
|
||||
Remove a document from the batch by its primary key.
|
||||
|
||||
Uses range_query instead of term_query to work around a tantivy-py bug
|
||||
where Python integers are inferred as i64, producing Terms that never
|
||||
match u64 fields.
|
||||
|
||||
TODO: Replace with term_query("id", doc_id) once
|
||||
https://github.com/quickwit-oss/tantivy-py/pull/642 lands.
|
||||
Uses range query instead of term query to work around unsigned integer
|
||||
type detection bug in tantivy-py 0.25.
|
||||
"""
|
||||
# Use range query to work around u64 deletion bug
|
||||
self._writer.delete_documents_by_query(
|
||||
tantivy.Query.range_query(
|
||||
self._backend._schema,
|
||||
@@ -286,34 +234,6 @@ class TantivyBackend:
|
||||
the underlying index directory changes (e.g., during test isolation).
|
||||
"""
|
||||
|
||||
# Maps DRF ordering field names to Tantivy index field names.
|
||||
SORT_FIELD_MAP: dict[str, str] = {
|
||||
"title": "title_sort",
|
||||
"correspondent__name": "correspondent_sort",
|
||||
"document_type__name": "type_sort",
|
||||
"created": "created",
|
||||
"added": "added",
|
||||
"modified": "modified",
|
||||
"archive_serial_number": "asn",
|
||||
"page_count": "page_count",
|
||||
"num_notes": "num_notes",
|
||||
}
|
||||
|
||||
# Fields where Tantivy's sort order matches the ORM's sort order.
|
||||
# Text-based fields (title, correspondent__name, document_type__name)
|
||||
# are excluded because Tantivy's tokenized fast fields produce different
|
||||
# ordering than the ORM's collation-based ordering.
|
||||
SORTABLE_FIELDS: frozenset[str] = frozenset(
|
||||
{
|
||||
"created",
|
||||
"added",
|
||||
"modified",
|
||||
"archive_serial_number",
|
||||
"page_count",
|
||||
"num_notes",
|
||||
},
|
||||
)
|
||||
|
||||
def __init__(self, path: Path | None = None):
|
||||
# path=None → in-memory index (for tests)
|
||||
# path=some_dir → on-disk index (for production)
|
||||
@@ -352,36 +272,6 @@ class TantivyBackend:
|
||||
if self._index is None:
|
||||
self.open() # pragma: no cover
|
||||
|
||||
def _parse_query(
|
||||
self,
|
||||
query: str,
|
||||
search_mode: SearchMode,
|
||||
) -> tantivy.Query:
|
||||
"""Parse a user query string into a Tantivy Query object."""
|
||||
tz = get_current_timezone()
|
||||
if search_mode is SearchMode.TEXT:
|
||||
return parse_simple_text_query(self._index, query)
|
||||
elif search_mode is SearchMode.TITLE:
|
||||
return parse_simple_title_query(self._index, query)
|
||||
else:
|
||||
return parse_user_query(self._index, query, tz)
|
||||
|
||||
def _apply_permission_filter(
|
||||
self,
|
||||
query: tantivy.Query,
|
||||
user: AbstractBaseUser | None,
|
||||
) -> tantivy.Query:
|
||||
"""Wrap a query with a permission filter if the user is not a superuser."""
|
||||
if user is not None:
|
||||
permission_filter = build_permission_filter(self._schema, user)
|
||||
return tantivy.Query.boolean_query(
|
||||
[
|
||||
(tantivy.Occur.Must, query),
|
||||
(tantivy.Occur.Must, permission_filter),
|
||||
],
|
||||
)
|
||||
return query
|
||||
|
||||
def _build_tantivy_doc(
|
||||
self,
|
||||
document: Document,
|
||||
@@ -436,17 +326,12 @@ class TantivyBackend:
|
||||
doc.add_unsigned("tag_id", tag.pk)
|
||||
tag_names.append(tag.name)
|
||||
|
||||
# Notes — JSON for structured queries (notes.user:alice, notes.note:text).
|
||||
# notes_text is a plain-text companion for snippet/highlight generation;
|
||||
# tantivy's SnippetGenerator does not support JSON fields.
|
||||
# Notes — JSON for structured queries (notes.user:alice, notes.note:text),
|
||||
# companion text field for default full-text search.
|
||||
num_notes = 0
|
||||
note_texts: list[str] = []
|
||||
for note in document.notes.all():
|
||||
num_notes += 1
|
||||
doc.add_json("notes", {"note": note.note, "user": note.user.username})
|
||||
note_texts.append(note.note)
|
||||
if note_texts:
|
||||
doc.add_text("notes_text", " ".join(note_texts))
|
||||
|
||||
# Custom fields — JSON for structured queries (custom_fields.name:x, custom_fields.value:y),
|
||||
# companion text field for default full-text search.
|
||||
@@ -540,127 +425,155 @@ class TantivyBackend:
|
||||
with self.batch_update(lock_timeout=5.0) as batch:
|
||||
batch.remove(doc_id)
|
||||
|
||||
def highlight_hits(
|
||||
def search(
|
||||
self,
|
||||
query: str,
|
||||
doc_ids: list[int],
|
||||
user: AbstractBaseUser | None,
|
||||
page: int,
|
||||
page_size: int,
|
||||
sort_field: str | None,
|
||||
*,
|
||||
sort_reverse: bool,
|
||||
search_mode: SearchMode = SearchMode.QUERY,
|
||||
rank_start: int = 1,
|
||||
) -> list[SearchHit]:
|
||||
) -> SearchResults:
|
||||
"""
|
||||
Generate SearchHit dicts with highlights for specific document IDs.
|
||||
Execute a search query against the document index.
|
||||
|
||||
Unlike search(), this does not execute a ranked query — it looks up
|
||||
each document by ID and generates snippets against the provided query.
|
||||
Use this when you already know which documents to display (from
|
||||
search_ids + ORM filtering) and just need highlight data.
|
||||
Processes the user query through date rewriting, normalization, and
|
||||
permission filtering before executing against Tantivy. Supports both
|
||||
relevance-based and field-based sorting.
|
||||
|
||||
Note: Each doc_id requires an individual index lookup because tantivy-py
|
||||
does not yet expose a batch fast-field read API. This is acceptable for
|
||||
page-sized batches (typically 25 docs) but should not be called with
|
||||
thousands of IDs.
|
||||
|
||||
TODO: When https://github.com/quickwit-oss/tantivy-py/pull/641 lands,
|
||||
the per-doc range_query lookups here can be replaced with a single
|
||||
collect_u64_fast_field("id", doc_addresses) call.
|
||||
QUERY search mode supports natural date keywords, field filters, etc.
|
||||
TITLE search mode treats the query as plain text to search for in title only
|
||||
TEXT search mode treats the query as plain text to search for in title and content
|
||||
|
||||
Args:
|
||||
query: The search query (used for snippet generation)
|
||||
doc_ids: Ordered list of document IDs to generate hits for
|
||||
search_mode: Query parsing mode (for building the snippet query)
|
||||
rank_start: Starting rank value (1-based absolute position in the
|
||||
full result set; pass ``page_offset + 1`` for paginated calls)
|
||||
query: User's search query
|
||||
user: User for permission filtering (None for superuser/no filtering)
|
||||
page: Page number (1-indexed) for pagination
|
||||
page_size: Number of results per page
|
||||
sort_field: Field to sort by (None for relevance ranking)
|
||||
sort_reverse: Whether to reverse the sort order
|
||||
search_mode: "query" for advanced Tantivy syntax, "text" for
|
||||
plain-text search over title and content only, "title" for
|
||||
plain-text search over title only
|
||||
|
||||
Returns:
|
||||
List of SearchHit dicts in the same order as doc_ids
|
||||
SearchResults with hits, total count, and processed query
|
||||
"""
|
||||
if not doc_ids:
|
||||
return []
|
||||
|
||||
self._ensure_open()
|
||||
user_query = self._parse_query(query, search_mode)
|
||||
tz = get_current_timezone()
|
||||
if search_mode is SearchMode.TEXT:
|
||||
user_query = parse_simple_text_query(self._index, query)
|
||||
elif search_mode is SearchMode.TITLE:
|
||||
user_query = parse_simple_title_query(self._index, query)
|
||||
else:
|
||||
user_query = parse_user_query(self._index, query, tz)
|
||||
|
||||
# For notes_text snippet generation, we need a query that targets the
|
||||
# notes_text field directly. user_query may contain JSON-field terms
|
||||
# (e.g. notes.note:urgent) that the SnippetGenerator cannot resolve
|
||||
# against a text field. Strip field:value prefixes so bare terms like
|
||||
# "urgent" are re-parsed against notes_text, producing highlights even
|
||||
# when the original query used structured syntax.
|
||||
bare_query = re.sub(r"\w[\w.]*:", "", query).strip()
|
||||
try:
|
||||
notes_text_query = (
|
||||
self._index.parse_query(bare_query, ["notes_text"])
|
||||
if bare_query
|
||||
else user_query
|
||||
)
|
||||
except Exception:
|
||||
notes_text_query = user_query
|
||||
|
||||
searcher = self._index.searcher()
|
||||
snippet_generator = None
|
||||
notes_snippet_generator = None
|
||||
hits: list[SearchHit] = []
|
||||
|
||||
for rank, doc_id in enumerate(doc_ids, start=rank_start):
|
||||
# Look up document by ID, scoring against the user query so that
|
||||
# the returned SearchHit carries a real BM25 relevance score.
|
||||
id_query = tantivy.Query.range_query(
|
||||
self._schema,
|
||||
"id",
|
||||
tantivy.FieldType.Unsigned,
|
||||
doc_id,
|
||||
doc_id,
|
||||
)
|
||||
scored_query = tantivy.Query.boolean_query(
|
||||
# Apply permission filter if user is not None (not superuser)
|
||||
if user is not None:
|
||||
permission_filter = build_permission_filter(self._schema, user)
|
||||
final_query = tantivy.Query.boolean_query(
|
||||
[
|
||||
(tantivy.Occur.Must, user_query),
|
||||
(tantivy.Occur.Must, id_query),
|
||||
(tantivy.Occur.Must, permission_filter),
|
||||
],
|
||||
)
|
||||
results = searcher.search(scored_query, limit=1)
|
||||
else:
|
||||
final_query = user_query
|
||||
|
||||
if not results.hits:
|
||||
continue
|
||||
searcher = self._index.searcher()
|
||||
offset = (page - 1) * page_size
|
||||
|
||||
score, doc_address = results.hits[0]
|
||||
# Map sort fields
|
||||
sort_field_map = {
|
||||
"title": "title_sort",
|
||||
"correspondent__name": "correspondent_sort",
|
||||
"document_type__name": "type_sort",
|
||||
"created": "created",
|
||||
"added": "added",
|
||||
"modified": "modified",
|
||||
"archive_serial_number": "asn",
|
||||
"page_count": "page_count",
|
||||
"num_notes": "num_notes",
|
||||
}
|
||||
|
||||
# Perform search
|
||||
if sort_field and sort_field in sort_field_map:
|
||||
mapped_field = sort_field_map[sort_field]
|
||||
results = searcher.search(
|
||||
final_query,
|
||||
limit=offset + page_size,
|
||||
order_by_field=mapped_field,
|
||||
order=tantivy.Order.Desc if sort_reverse else tantivy.Order.Asc,
|
||||
)
|
||||
# Field sorting: hits are still (score, DocAddress) tuples; score unused
|
||||
all_hits = [(hit[1], 0.0) for hit in results.hits]
|
||||
else:
|
||||
# Score-based search: hits are (score, DocAddress) tuples
|
||||
results = searcher.search(final_query, limit=offset + page_size)
|
||||
all_hits = [(hit[1], hit[0]) for hit in results.hits]
|
||||
|
||||
total = results.count
|
||||
|
||||
# Normalize scores for score-based searches
|
||||
if not sort_field and all_hits:
|
||||
max_score = max(hit[1] for hit in all_hits) or 1.0
|
||||
all_hits = [(hit[0], hit[1] / max_score) for hit in all_hits]
|
||||
|
||||
# Apply threshold filter if configured (score-based search only)
|
||||
threshold = settings.ADVANCED_FUZZY_SEARCH_THRESHOLD
|
||||
if threshold is not None and not sort_field:
|
||||
all_hits = [hit for hit in all_hits if hit[1] >= threshold]
|
||||
|
||||
# Get the page's hits
|
||||
page_hits = all_hits[offset : offset + page_size]
|
||||
|
||||
# Build result hits with highlights
|
||||
hits: list[SearchHit] = []
|
||||
snippet_generator = None
|
||||
notes_snippet_generator = None
|
||||
|
||||
for rank, (doc_address, score) in enumerate(page_hits, start=offset + 1):
|
||||
# Get the actual document from the searcher using the doc address
|
||||
actual_doc = searcher.doc(doc_address)
|
||||
doc_dict = actual_doc.to_dict()
|
||||
doc_id = doc_dict["id"][0]
|
||||
|
||||
highlights: dict[str, str] = {}
|
||||
try:
|
||||
if snippet_generator is None:
|
||||
snippet_generator = tantivy.SnippetGenerator.create(
|
||||
searcher,
|
||||
user_query,
|
||||
self._schema,
|
||||
"content",
|
||||
)
|
||||
|
||||
content_html = _render_snippet_html(
|
||||
snippet_generator.snippet_from_doc(actual_doc),
|
||||
)
|
||||
if content_html:
|
||||
highlights["content"] = content_html
|
||||
|
||||
if "notes_text" in doc_dict:
|
||||
# Use notes_text (plain text) for snippet generation — tantivy's
|
||||
# SnippetGenerator does not support JSON fields.
|
||||
if notes_snippet_generator is None:
|
||||
notes_snippet_generator = tantivy.SnippetGenerator.create(
|
||||
# Generate highlights if score > 0
|
||||
if score > 0:
|
||||
try:
|
||||
if snippet_generator is None:
|
||||
snippet_generator = tantivy.SnippetGenerator.create(
|
||||
searcher,
|
||||
notes_text_query,
|
||||
final_query,
|
||||
self._schema,
|
||||
"notes_text",
|
||||
"content",
|
||||
)
|
||||
notes_html = _render_snippet_html(
|
||||
notes_snippet_generator.snippet_from_doc(actual_doc),
|
||||
)
|
||||
if notes_html:
|
||||
highlights["notes"] = notes_html
|
||||
|
||||
except Exception: # pragma: no cover
|
||||
logger.debug("Failed to generate highlights for doc %s", doc_id)
|
||||
content_snippet = snippet_generator.snippet_from_doc(actual_doc)
|
||||
if content_snippet:
|
||||
highlights["content"] = str(content_snippet)
|
||||
|
||||
# Try notes highlights
|
||||
if "notes" in doc_dict:
|
||||
if notes_snippet_generator is None:
|
||||
notes_snippet_generator = tantivy.SnippetGenerator.create(
|
||||
searcher,
|
||||
final_query,
|
||||
self._schema,
|
||||
"notes",
|
||||
)
|
||||
notes_snippet = notes_snippet_generator.snippet_from_doc(
|
||||
actual_doc,
|
||||
)
|
||||
if notes_snippet:
|
||||
highlights["notes"] = str(notes_snippet)
|
||||
|
||||
except Exception: # pragma: no cover
|
||||
logger.debug("Failed to generate highlights for doc %s", doc_id)
|
||||
|
||||
hits.append(
|
||||
SearchHit(
|
||||
@@ -671,69 +584,11 @@ class TantivyBackend:
|
||||
),
|
||||
)
|
||||
|
||||
return hits
|
||||
|
||||
def search_ids(
|
||||
self,
|
||||
query: str,
|
||||
user: AbstractBaseUser | None,
|
||||
*,
|
||||
sort_field: str | None = None,
|
||||
sort_reverse: bool = False,
|
||||
search_mode: SearchMode = SearchMode.QUERY,
|
||||
limit: int | None = None,
|
||||
) -> list[int]:
|
||||
"""
|
||||
Return document IDs matching a query — no highlights or scores.
|
||||
|
||||
This is the lightweight companion to search(). Use it when you need the
|
||||
full set of matching IDs (e.g. for ``selection_data``) but don't need
|
||||
scores, ranks, or highlights.
|
||||
|
||||
Args:
|
||||
query: User's search query
|
||||
user: User for permission filtering (None for superuser/no filtering)
|
||||
sort_field: Field to sort by (None for relevance ranking)
|
||||
sort_reverse: Whether to reverse the sort order
|
||||
search_mode: Query parsing mode (QUERY, TEXT, or TITLE)
|
||||
limit: Maximum number of IDs to return (None = all matching docs)
|
||||
|
||||
Returns:
|
||||
List of document IDs in the requested order
|
||||
"""
|
||||
self._ensure_open()
|
||||
user_query = self._parse_query(query, search_mode)
|
||||
final_query = self._apply_permission_filter(user_query, user)
|
||||
|
||||
searcher = self._index.searcher()
|
||||
effective_limit = limit if limit is not None else searcher.num_docs
|
||||
|
||||
if sort_field and sort_field in self.SORT_FIELD_MAP:
|
||||
mapped_field = self.SORT_FIELD_MAP[sort_field]
|
||||
results = searcher.search(
|
||||
final_query,
|
||||
limit=effective_limit,
|
||||
order_by_field=mapped_field,
|
||||
order=tantivy.Order.Desc if sort_reverse else tantivy.Order.Asc,
|
||||
)
|
||||
all_hits = [(hit[1],) for hit in results.hits]
|
||||
else:
|
||||
results = searcher.search(final_query, limit=effective_limit)
|
||||
all_hits = [(hit[1], hit[0]) for hit in results.hits]
|
||||
|
||||
# Normalize scores and apply threshold (relevance search only)
|
||||
if all_hits:
|
||||
max_score = max(hit[1] for hit in all_hits) or 1.0
|
||||
all_hits = [(hit[0], hit[1] / max_score) for hit in all_hits]
|
||||
|
||||
threshold = settings.ADVANCED_FUZZY_SEARCH_THRESHOLD
|
||||
if threshold is not None:
|
||||
all_hits = [hit for hit in all_hits if hit[1] >= threshold]
|
||||
|
||||
# TODO: Replace with searcher.collect_u64_fast_field("id", addrs) once
|
||||
# https://github.com/quickwit-oss/tantivy-py/pull/641 lands — eliminates
|
||||
# one stored-doc fetch per result (~80% reduction in search_ids latency).
|
||||
return [searcher.doc(doc_addr).to_dict()["id"][0] for doc_addr, *_ in all_hits]
|
||||
return SearchResults(
|
||||
hits=hits,
|
||||
total=total,
|
||||
query=query,
|
||||
)
|
||||
|
||||
def autocomplete(
|
||||
self,
|
||||
@@ -748,10 +603,6 @@ class TantivyBackend:
|
||||
frequency (how many documents contain each word). Optionally filters
|
||||
results to only words from documents visible to the specified user.
|
||||
|
||||
NOTE: This is the hottest search path (called per keystroke).
|
||||
A future improvement would be to cache results in Redis, keyed by
|
||||
(prefix, user_id), and invalidate on index writes.
|
||||
|
||||
Args:
|
||||
term: Prefix to match against autocomplete words
|
||||
limit: Maximum number of suggestions to return
|
||||
@@ -762,94 +613,64 @@ class TantivyBackend:
|
||||
"""
|
||||
self._ensure_open()
|
||||
normalized_term = ascii_fold(term.lower())
|
||||
if not normalized_term:
|
||||
return []
|
||||
|
||||
searcher = self._index.searcher()
|
||||
|
||||
# Build a prefix query on autocomplete_word so we only scan docs
|
||||
# containing words that start with the prefix, not the entire index.
|
||||
# tantivy regex is implicitly anchored; .+ avoids the empty-match
|
||||
# error that .* triggers. We OR with term_query to also match the
|
||||
# exact prefix as a complete word.
|
||||
escaped = re.escape(normalized_term)
|
||||
prefix_query = tantivy.Query.boolean_query(
|
||||
[
|
||||
(
|
||||
tantivy.Occur.Should,
|
||||
tantivy.Query.term_query(
|
||||
self._schema,
|
||||
"autocomplete_word",
|
||||
normalized_term,
|
||||
),
|
||||
),
|
||||
(
|
||||
tantivy.Occur.Should,
|
||||
tantivy.Query.regex_query(
|
||||
self._schema,
|
||||
"autocomplete_word",
|
||||
f"{escaped}.+",
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
||||
|
||||
# Intersect with permission filter so autocomplete words from
|
||||
# invisible documents don't leak to other users.
|
||||
# Apply permission filter for non-superusers so autocomplete words
|
||||
# from invisible documents don't leak to other users.
|
||||
if user is not None and not user.is_superuser:
|
||||
final_query = tantivy.Query.boolean_query(
|
||||
[
|
||||
(tantivy.Occur.Must, prefix_query),
|
||||
(tantivy.Occur.Must, build_permission_filter(self._schema, user)),
|
||||
],
|
||||
)
|
||||
base_query = build_permission_filter(self._schema, user)
|
||||
else:
|
||||
final_query = prefix_query
|
||||
base_query = tantivy.Query.all_query()
|
||||
|
||||
results = searcher.search(final_query, limit=searcher.num_docs)
|
||||
results = searcher.search(base_query, limit=10000)
|
||||
|
||||
# Count how many visible documents each matching word appears in.
|
||||
# Count how many visible documents each word appears in.
|
||||
# Using Counter (not set) preserves per-word document frequency so
|
||||
# we can rank suggestions by how commonly they occur — the same
|
||||
# signal Whoosh used for Tf/Idf-based autocomplete ordering.
|
||||
word_counts: Counter[str] = Counter()
|
||||
for _score, doc_address in results.hits:
|
||||
stored_doc = searcher.doc(doc_address)
|
||||
doc_dict = stored_doc.to_dict()
|
||||
if "autocomplete_word" in doc_dict:
|
||||
for word in doc_dict["autocomplete_word"]:
|
||||
if word.startswith(normalized_term):
|
||||
word_counts[word] += 1
|
||||
word_counts.update(doc_dict["autocomplete_word"])
|
||||
|
||||
# Sort by document frequency descending; break ties alphabetically.
|
||||
# Filter to prefix matches, sort by document frequency descending;
|
||||
# break ties alphabetically for stable, deterministic output.
|
||||
matches = sorted(
|
||||
word_counts,
|
||||
(w for w in word_counts if w.startswith(normalized_term)),
|
||||
key=lambda w: (-word_counts[w], w),
|
||||
)
|
||||
|
||||
return matches[:limit]
|
||||
|
||||
def more_like_this_ids(
|
||||
def more_like_this(
|
||||
self,
|
||||
doc_id: int,
|
||||
user: AbstractBaseUser | None,
|
||||
*,
|
||||
limit: int | None = None,
|
||||
) -> list[int]:
|
||||
page: int,
|
||||
page_size: int,
|
||||
) -> SearchResults:
|
||||
"""
|
||||
Return IDs of documents similar to the given document — no highlights.
|
||||
Find documents similar to the given document using content analysis.
|
||||
|
||||
Lightweight companion to more_like_this(). The original document is
|
||||
excluded from results.
|
||||
Uses Tantivy's "more like this" query to find documents with similar
|
||||
content patterns. The original document is excluded from results.
|
||||
|
||||
Args:
|
||||
doc_id: Primary key of the reference document
|
||||
user: User for permission filtering (None for no filtering)
|
||||
limit: Maximum number of IDs to return (None = all matching docs)
|
||||
page: Page number (1-indexed) for pagination
|
||||
page_size: Number of results per page
|
||||
|
||||
Returns:
|
||||
List of similar document IDs (excluding the original)
|
||||
SearchResults with similar documents (excluding the original)
|
||||
"""
|
||||
self._ensure_open()
|
||||
searcher = self._index.searcher()
|
||||
|
||||
# First find the document address
|
||||
id_query = tantivy.Query.range_query(
|
||||
self._schema,
|
||||
"id",
|
||||
@@ -860,9 +681,13 @@ class TantivyBackend:
|
||||
results = searcher.search(id_query, limit=1)
|
||||
|
||||
if not results.hits:
|
||||
return []
|
||||
# Document not found
|
||||
return SearchResults(hits=[], total=0, query=f"more_like:{doc_id}")
|
||||
|
||||
# Extract doc_address from (score, doc_address) tuple
|
||||
doc_address = results.hits[0][1]
|
||||
|
||||
# Build more like this query
|
||||
mlt_query = tantivy.Query.more_like_this_query(
|
||||
doc_address,
|
||||
min_doc_frequency=1,
|
||||
@@ -874,21 +699,59 @@ class TantivyBackend:
|
||||
boost_factor=None,
|
||||
)
|
||||
|
||||
final_query = self._apply_permission_filter(mlt_query, user)
|
||||
# Apply permission filter
|
||||
if user is not None:
|
||||
permission_filter = build_permission_filter(self._schema, user)
|
||||
final_query = tantivy.Query.boolean_query(
|
||||
[
|
||||
(tantivy.Occur.Must, mlt_query),
|
||||
(tantivy.Occur.Must, permission_filter),
|
||||
],
|
||||
)
|
||||
else:
|
||||
final_query = mlt_query
|
||||
|
||||
effective_limit = limit if limit is not None else searcher.num_docs
|
||||
# Fetch one extra to account for excluding the original document
|
||||
results = searcher.search(final_query, limit=effective_limit + 1)
|
||||
# Search
|
||||
offset = (page - 1) * page_size
|
||||
results = searcher.search(final_query, limit=offset + page_size)
|
||||
|
||||
# TODO: Replace with collect_u64_fast_field("id", addrs) once
|
||||
# https://github.com/quickwit-oss/tantivy-py/pull/641 lands.
|
||||
ids = []
|
||||
for _score, doc_address in results.hits:
|
||||
result_doc_id = searcher.doc(doc_address).to_dict()["id"][0]
|
||||
if result_doc_id != doc_id:
|
||||
ids.append(result_doc_id)
|
||||
total = results.count
|
||||
# Convert from (score, doc_address) to (doc_address, score)
|
||||
all_hits = [(hit[1], hit[0]) for hit in results.hits]
|
||||
|
||||
return ids[:limit] if limit is not None else ids
|
||||
# Normalize scores
|
||||
if all_hits:
|
||||
max_score = max(hit[1] for hit in all_hits) or 1.0
|
||||
all_hits = [(hit[0], hit[1] / max_score) for hit in all_hits]
|
||||
|
||||
# Get page hits
|
||||
page_hits = all_hits[offset : offset + page_size]
|
||||
|
||||
# Build results
|
||||
hits: list[SearchHit] = []
|
||||
for rank, (doc_address, score) in enumerate(page_hits, start=offset + 1):
|
||||
actual_doc = searcher.doc(doc_address)
|
||||
doc_dict = actual_doc.to_dict()
|
||||
result_doc_id = doc_dict["id"][0]
|
||||
|
||||
# Skip the original document
|
||||
if result_doc_id == doc_id:
|
||||
continue
|
||||
|
||||
hits.append(
|
||||
SearchHit(
|
||||
id=result_doc_id,
|
||||
score=score,
|
||||
rank=rank,
|
||||
highlights={}, # MLT doesn't generate highlights
|
||||
),
|
||||
)
|
||||
|
||||
return SearchResults(
|
||||
hits=hits,
|
||||
total=max(0, total - 1), # Subtract 1 for the original document
|
||||
query=f"more_like:{doc_id}",
|
||||
)
|
||||
|
||||
def batch_update(self, lock_timeout: float = 30.0) -> WriteBatch:
|
||||
"""
|
||||
|
||||
@@ -396,17 +396,10 @@ def build_permission_filter(
|
||||
Tantivy query that filters results to visible documents
|
||||
|
||||
Implementation Notes:
|
||||
- Uses range_query instead of term_query for owner_id/viewer_id to work
|
||||
around a tantivy-py bug where Python ints are inferred as i64, causing
|
||||
term_query to return no hits on u64 fields.
|
||||
TODO: Replace with term_query once
|
||||
https://github.com/quickwit-oss/tantivy-py/pull/642 lands.
|
||||
|
||||
- Uses range_query(owner_id, 1, MAX_U64) as an "owner exists" check
|
||||
because exists_query is not yet available in tantivy-py 0.25.
|
||||
TODO: Replace with exists_query("owner_id") once that is exposed in
|
||||
a tantivy-py release.
|
||||
|
||||
- Uses range_query instead of term_query to work around unsigned integer
|
||||
type detection bug in tantivy-py 0.25
|
||||
- Uses boolean_query for "no owner" check since exists_query is not
|
||||
available in tantivy-py 0.25.1 (available in master)
|
||||
- Uses disjunction_max_query to combine permission clauses with OR logic
|
||||
"""
|
||||
owner_any = tantivy.Query.range_query(
|
||||
|
||||
@@ -72,9 +72,6 @@ def build_schema() -> tantivy.Schema:
|
||||
|
||||
# JSON fields — structured queries: notes.user:alice, custom_fields.name:invoice
|
||||
sb.add_json_field("notes", stored=True, tokenizer_name="paperless_text")
|
||||
# Plain-text companion for notes — tantivy's SnippetGenerator does not support
|
||||
# JSON fields, so highlights require a text field with the same content.
|
||||
sb.add_text_field("notes_text", stored=True, tokenizer_name="paperless_text")
|
||||
sb.add_json_field("custom_fields", stored=True, tokenizer_name="paperless_text")
|
||||
|
||||
for field in (
|
||||
|
||||
@@ -33,12 +33,19 @@ class TestWriteBatch:
|
||||
except RuntimeError:
|
||||
pass
|
||||
|
||||
ids = backend.search_ids("should survive", user=None)
|
||||
assert len(ids) == 1
|
||||
r = backend.search(
|
||||
"should survive",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
)
|
||||
assert r.total == 1
|
||||
|
||||
|
||||
class TestSearch:
|
||||
"""Test search query parsing and matching via search_ids."""
|
||||
"""Test search functionality."""
|
||||
|
||||
def test_text_mode_limits_default_search_to_title_and_content(
|
||||
self,
|
||||
@@ -53,20 +60,27 @@ class TestSearch:
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
|
||||
assert (
|
||||
len(
|
||||
backend.search_ids(
|
||||
"document_type:invoice",
|
||||
user=None,
|
||||
search_mode=SearchMode.TEXT,
|
||||
),
|
||||
)
|
||||
== 0
|
||||
metadata_only = backend.search(
|
||||
"document_type:invoice",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
search_mode=SearchMode.TEXT,
|
||||
)
|
||||
assert (
|
||||
len(backend.search_ids("monthly", user=None, search_mode=SearchMode.TEXT))
|
||||
== 1
|
||||
assert metadata_only.total == 0
|
||||
|
||||
content_match = backend.search(
|
||||
"monthly",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
search_mode=SearchMode.TEXT,
|
||||
)
|
||||
assert content_match.total == 1
|
||||
|
||||
def test_title_mode_limits_default_search_to_title_only(
|
||||
self,
|
||||
@@ -81,14 +95,27 @@ class TestSearch:
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
|
||||
assert (
|
||||
len(backend.search_ids("monthly", user=None, search_mode=SearchMode.TITLE))
|
||||
== 0
|
||||
content_only = backend.search(
|
||||
"monthly",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
search_mode=SearchMode.TITLE,
|
||||
)
|
||||
assert (
|
||||
len(backend.search_ids("invoice", user=None, search_mode=SearchMode.TITLE))
|
||||
== 1
|
||||
assert content_only.total == 0
|
||||
|
||||
title_match = backend.search(
|
||||
"invoice",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
search_mode=SearchMode.TITLE,
|
||||
)
|
||||
assert title_match.total == 1
|
||||
|
||||
def test_text_mode_matches_partial_term_substrings(
|
||||
self,
|
||||
@@ -103,16 +130,38 @@ class TestSearch:
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
|
||||
assert (
|
||||
len(backend.search_ids("pass", user=None, search_mode=SearchMode.TEXT)) == 1
|
||||
prefix_match = backend.search(
|
||||
"pass",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
search_mode=SearchMode.TEXT,
|
||||
)
|
||||
assert (
|
||||
len(backend.search_ids("sswo", user=None, search_mode=SearchMode.TEXT)) == 1
|
||||
assert prefix_match.total == 1
|
||||
|
||||
infix_match = backend.search(
|
||||
"sswo",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
search_mode=SearchMode.TEXT,
|
||||
)
|
||||
assert (
|
||||
len(backend.search_ids("sswo re", user=None, search_mode=SearchMode.TEXT))
|
||||
== 1
|
||||
assert infix_match.total == 1
|
||||
|
||||
phrase_match = backend.search(
|
||||
"sswo re",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
search_mode=SearchMode.TEXT,
|
||||
)
|
||||
assert phrase_match.total == 1
|
||||
|
||||
def test_text_mode_does_not_match_on_partial_term_overlap(
|
||||
self,
|
||||
@@ -127,10 +176,16 @@ class TestSearch:
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
|
||||
assert (
|
||||
len(backend.search_ids("raptor", user=None, search_mode=SearchMode.TEXT))
|
||||
== 0
|
||||
non_match = backend.search(
|
||||
"raptor",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
search_mode=SearchMode.TEXT,
|
||||
)
|
||||
assert non_match.total == 0
|
||||
|
||||
def test_text_mode_anchors_later_query_tokens_to_token_starts(
|
||||
self,
|
||||
@@ -159,9 +214,16 @@ class TestSearch:
|
||||
backend.add_or_update(prefix_doc)
|
||||
backend.add_or_update(false_positive)
|
||||
|
||||
result_ids = set(
|
||||
backend.search_ids("Z-Berichte 6", user=None, search_mode=SearchMode.TEXT),
|
||||
results = backend.search(
|
||||
"Z-Berichte 6",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
search_mode=SearchMode.TEXT,
|
||||
)
|
||||
result_ids = {hit["id"] for hit in results.hits}
|
||||
|
||||
assert exact_doc.id in result_ids
|
||||
assert prefix_doc.id in result_ids
|
||||
@@ -180,9 +242,16 @@ class TestSearch:
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
|
||||
assert (
|
||||
len(backend.search_ids("!!!", user=None, search_mode=SearchMode.TEXT)) == 0
|
||||
no_tokens = backend.search(
|
||||
"!!!",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
search_mode=SearchMode.TEXT,
|
||||
)
|
||||
assert no_tokens.total == 0
|
||||
|
||||
def test_title_mode_matches_partial_term_substrings(
|
||||
self,
|
||||
@@ -197,18 +266,59 @@ class TestSearch:
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
|
||||
assert (
|
||||
len(backend.search_ids("pass", user=None, search_mode=SearchMode.TITLE))
|
||||
== 1
|
||||
prefix_match = backend.search(
|
||||
"pass",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
search_mode=SearchMode.TITLE,
|
||||
)
|
||||
assert (
|
||||
len(backend.search_ids("sswo", user=None, search_mode=SearchMode.TITLE))
|
||||
== 1
|
||||
assert prefix_match.total == 1
|
||||
|
||||
infix_match = backend.search(
|
||||
"sswo",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
search_mode=SearchMode.TITLE,
|
||||
)
|
||||
assert (
|
||||
len(backend.search_ids("sswo gu", user=None, search_mode=SearchMode.TITLE))
|
||||
== 1
|
||||
assert infix_match.total == 1
|
||||
|
||||
phrase_match = backend.search(
|
||||
"sswo gu",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
search_mode=SearchMode.TITLE,
|
||||
)
|
||||
assert phrase_match.total == 1
|
||||
|
||||
def test_scores_normalised_top_hit_is_one(self, backend: TantivyBackend):
|
||||
"""Search scores must be normalized so top hit has score 1.0 for UI consistency."""
|
||||
for i, title in enumerate(["bank invoice", "bank statement", "bank receipt"]):
|
||||
doc = Document.objects.create(
|
||||
title=title,
|
||||
content=title,
|
||||
checksum=f"SN{i}",
|
||||
pk=10 + i,
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
r = backend.search(
|
||||
"bank",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
)
|
||||
assert r.hits[0]["score"] == pytest.approx(1.0)
|
||||
assert all(0.0 <= h["score"] <= 1.0 for h in r.hits)
|
||||
|
||||
def test_sort_field_ascending(self, backend: TantivyBackend):
|
||||
"""Searching with sort_reverse=False must return results in ascending ASN order."""
|
||||
@@ -221,14 +331,16 @@ class TestSearch:
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
|
||||
ids = backend.search_ids(
|
||||
r = backend.search(
|
||||
"sortable",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10,
|
||||
sort_field="archive_serial_number",
|
||||
sort_reverse=False,
|
||||
)
|
||||
assert len(ids) == 3
|
||||
asns = [Document.objects.get(pk=doc_id).archive_serial_number for doc_id in ids]
|
||||
assert r.total == 3
|
||||
asns = [Document.objects.get(pk=h["id"]).archive_serial_number for h in r.hits]
|
||||
assert asns == [10, 20, 30]
|
||||
|
||||
def test_sort_field_descending(self, backend: TantivyBackend):
|
||||
@@ -242,91 +354,79 @@ class TestSearch:
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
|
||||
ids = backend.search_ids(
|
||||
r = backend.search(
|
||||
"sortable",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10,
|
||||
sort_field="archive_serial_number",
|
||||
sort_reverse=True,
|
||||
)
|
||||
assert len(ids) == 3
|
||||
asns = [Document.objects.get(pk=doc_id).archive_serial_number for doc_id in ids]
|
||||
assert r.total == 3
|
||||
asns = [Document.objects.get(pk=h["id"]).archive_serial_number for h in r.hits]
|
||||
assert asns == [30, 20, 10]
|
||||
|
||||
|
||||
class TestSearchIds:
|
||||
"""Test lightweight ID-only search."""
|
||||
|
||||
def test_returns_matching_ids(self, backend: TantivyBackend):
|
||||
"""search_ids must return IDs of all matching documents."""
|
||||
docs = []
|
||||
for i in range(5):
|
||||
doc = Document.objects.create(
|
||||
title=f"findable doc {i}",
|
||||
content="common keyword",
|
||||
checksum=f"SI{i}",
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
docs.append(doc)
|
||||
other = Document.objects.create(
|
||||
title="unrelated",
|
||||
content="nothing here",
|
||||
checksum="SI_other",
|
||||
)
|
||||
backend.add_or_update(other)
|
||||
|
||||
ids = backend.search_ids(
|
||||
"common keyword",
|
||||
user=None,
|
||||
search_mode=SearchMode.QUERY,
|
||||
)
|
||||
assert set(ids) == {d.pk for d in docs}
|
||||
assert other.pk not in ids
|
||||
|
||||
def test_respects_permission_filter(self, backend: TantivyBackend):
|
||||
"""search_ids must respect user permission filtering."""
|
||||
owner = User.objects.create_user("ids_owner")
|
||||
other = User.objects.create_user("ids_other")
|
||||
def test_fuzzy_threshold_filters_low_score_hits(
|
||||
self,
|
||||
backend: TantivyBackend,
|
||||
settings,
|
||||
):
|
||||
"""When ADVANCED_FUZZY_SEARCH_THRESHOLD exceeds all normalized scores, hits must be filtered out."""
|
||||
doc = Document.objects.create(
|
||||
title="private doc",
|
||||
content="secret keyword",
|
||||
checksum="SIP1",
|
||||
title="Invoice document",
|
||||
content="financial report",
|
||||
checksum="FT1",
|
||||
pk=120,
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
|
||||
# Threshold above 1.0 filters every hit (normalized scores top out at 1.0)
|
||||
settings.ADVANCED_FUZZY_SEARCH_THRESHOLD = 1.1
|
||||
r = backend.search(
|
||||
"invoice",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
)
|
||||
assert r.hits == []
|
||||
|
||||
def test_owner_filter(self, backend: TantivyBackend):
|
||||
"""Document owners can search their private documents; other users cannot access them."""
|
||||
owner = User.objects.create_user("owner")
|
||||
other = User.objects.create_user("other")
|
||||
doc = Document.objects.create(
|
||||
title="Private",
|
||||
content="secret",
|
||||
checksum="PF1",
|
||||
pk=20,
|
||||
owner=owner,
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
|
||||
assert backend.search_ids(
|
||||
"secret",
|
||||
user=owner,
|
||||
search_mode=SearchMode.QUERY,
|
||||
) == [doc.pk]
|
||||
assert (
|
||||
backend.search_ids("secret", user=other, search_mode=SearchMode.QUERY) == []
|
||||
backend.search(
|
||||
"secret",
|
||||
user=owner,
|
||||
page=1,
|
||||
page_size=10,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
).total
|
||||
== 1
|
||||
)
|
||||
|
||||
def test_respects_fuzzy_threshold(self, backend: TantivyBackend, settings):
|
||||
"""search_ids must apply the same fuzzy threshold as search()."""
|
||||
doc = Document.objects.create(
|
||||
title="threshold test",
|
||||
content="unique term",
|
||||
checksum="SIT1",
|
||||
assert (
|
||||
backend.search(
|
||||
"secret",
|
||||
user=other,
|
||||
page=1,
|
||||
page_size=10,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
).total
|
||||
== 0
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
|
||||
settings.ADVANCED_FUZZY_SEARCH_THRESHOLD = 1.1
|
||||
ids = backend.search_ids("unique", user=None, search_mode=SearchMode.QUERY)
|
||||
assert ids == []
|
||||
|
||||
def test_returns_ids_for_text_mode(self, backend: TantivyBackend):
|
||||
"""search_ids must work with TEXT search mode."""
|
||||
doc = Document.objects.create(
|
||||
title="text mode doc",
|
||||
content="findable phrase",
|
||||
checksum="SIM1",
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
|
||||
ids = backend.search_ids("findable", user=None, search_mode=SearchMode.TEXT)
|
||||
assert ids == [doc.pk]
|
||||
|
||||
|
||||
class TestRebuild:
|
||||
@@ -390,26 +490,57 @@ class TestAutocomplete:
|
||||
class TestMoreLikeThis:
|
||||
"""Test more like this functionality."""
|
||||
|
||||
def test_more_like_this_ids_excludes_original(self, backend: TantivyBackend):
|
||||
"""more_like_this_ids must return IDs of similar documents, excluding the original."""
|
||||
def test_excludes_original(self, backend: TantivyBackend):
|
||||
"""More like this queries must exclude the reference document from results."""
|
||||
doc1 = Document.objects.create(
|
||||
title="Important document",
|
||||
content="financial information report",
|
||||
checksum="MLTI1",
|
||||
pk=150,
|
||||
content="financial information",
|
||||
checksum="MLT1",
|
||||
pk=50,
|
||||
)
|
||||
doc2 = Document.objects.create(
|
||||
title="Another document",
|
||||
content="financial information report",
|
||||
checksum="MLTI2",
|
||||
pk=151,
|
||||
content="financial report",
|
||||
checksum="MLT2",
|
||||
pk=51,
|
||||
)
|
||||
backend.add_or_update(doc1)
|
||||
backend.add_or_update(doc2)
|
||||
|
||||
ids = backend.more_like_this_ids(doc_id=150, user=None)
|
||||
assert 150 not in ids
|
||||
assert 151 in ids
|
||||
results = backend.more_like_this(doc_id=50, user=None, page=1, page_size=10)
|
||||
returned_ids = [hit["id"] for hit in results.hits]
|
||||
assert 50 not in returned_ids # Original document excluded
|
||||
|
||||
def test_with_user_applies_permission_filter(self, backend: TantivyBackend):
|
||||
"""more_like_this with a user must exclude documents that user cannot see."""
|
||||
viewer = User.objects.create_user("mlt_viewer")
|
||||
other = User.objects.create_user("mlt_other")
|
||||
public_doc = Document.objects.create(
|
||||
title="Public financial document",
|
||||
content="quarterly financial analysis report figures",
|
||||
checksum="MLT3",
|
||||
pk=52,
|
||||
)
|
||||
private_doc = Document.objects.create(
|
||||
title="Private financial document",
|
||||
content="quarterly financial analysis report figures",
|
||||
checksum="MLT4",
|
||||
pk=53,
|
||||
owner=other,
|
||||
)
|
||||
backend.add_or_update(public_doc)
|
||||
backend.add_or_update(private_doc)
|
||||
|
||||
results = backend.more_like_this(doc_id=52, user=viewer, page=1, page_size=10)
|
||||
returned_ids = [hit["id"] for hit in results.hits]
|
||||
# private_doc is owned by other, so viewer cannot see it
|
||||
assert 53 not in returned_ids
|
||||
|
||||
def test_document_not_in_index_returns_empty(self, backend: TantivyBackend):
|
||||
"""more_like_this for a doc_id absent from the index must return empty results."""
|
||||
results = backend.more_like_this(doc_id=9999, user=None, page=1, page_size=10)
|
||||
assert results.hits == []
|
||||
assert results.total == 0
|
||||
|
||||
|
||||
class TestSingleton:
|
||||
@@ -462,10 +593,19 @@ class TestFieldHandling:
|
||||
# Should not raise an exception
|
||||
backend.add_or_update(doc)
|
||||
|
||||
assert len(backend.search_ids("test", user=None)) == 1
|
||||
results = backend.search(
|
||||
"test",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
)
|
||||
assert results.total == 1
|
||||
|
||||
def test_custom_fields_include_name_and_value(self, backend: TantivyBackend):
|
||||
"""Custom fields must be indexed with both field name and value for structured queries."""
|
||||
# Create a custom field
|
||||
field = CustomField.objects.create(
|
||||
name="Invoice Number",
|
||||
data_type=CustomField.FieldDataType.STRING,
|
||||
@@ -482,9 +622,18 @@ class TestFieldHandling:
|
||||
value_text="INV-2024-001",
|
||||
)
|
||||
|
||||
# Should not raise an exception during indexing
|
||||
backend.add_or_update(doc)
|
||||
|
||||
assert len(backend.search_ids("invoice", user=None)) == 1
|
||||
results = backend.search(
|
||||
"invoice",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
)
|
||||
assert results.total == 1
|
||||
|
||||
def test_select_custom_field_indexes_label_not_id(self, backend: TantivyBackend):
|
||||
"""SELECT custom fields must index the human-readable label, not the opaque option ID."""
|
||||
@@ -511,8 +660,27 @@ class TestFieldHandling:
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
|
||||
assert len(backend.search_ids("custom_fields.value:invoice", user=None)) == 1
|
||||
assert len(backend.search_ids("custom_fields.value:opt_abc", user=None)) == 0
|
||||
# Label should be findable
|
||||
results = backend.search(
|
||||
"custom_fields.value:invoice",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
)
|
||||
assert results.total == 1
|
||||
|
||||
# Opaque ID must not appear in the index
|
||||
results = backend.search(
|
||||
"custom_fields.value:opt_abc",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
)
|
||||
assert results.total == 0
|
||||
|
||||
def test_none_custom_field_value_not_indexed(self, backend: TantivyBackend):
|
||||
"""Custom field instances with no value set must not produce an index entry."""
|
||||
@@ -534,7 +702,16 @@ class TestFieldHandling:
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
|
||||
assert len(backend.search_ids("custom_fields.value:none", user=None)) == 0
|
||||
# The string "none" must not appear as an indexed value
|
||||
results = backend.search(
|
||||
"custom_fields.value:none",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
)
|
||||
assert results.total == 0
|
||||
|
||||
def test_notes_include_user_information(self, backend: TantivyBackend):
|
||||
"""Notes must be indexed with user information when available for structured queries."""
|
||||
@@ -547,101 +724,32 @@ class TestFieldHandling:
|
||||
)
|
||||
Note.objects.create(document=doc, note="Important note", user=user)
|
||||
|
||||
# Should not raise an exception during indexing
|
||||
backend.add_or_update(doc)
|
||||
|
||||
ids = backend.search_ids("test", user=None)
|
||||
assert len(ids) == 1, (
|
||||
f"Expected 1, got {len(ids)}. Document content should be searchable."
|
||||
# Test basic document search first
|
||||
results = backend.search(
|
||||
"test",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
)
|
||||
assert results.total == 1, (
|
||||
f"Expected 1, got {results.total}. Document content should be searchable."
|
||||
)
|
||||
|
||||
ids = backend.search_ids("notes.note:important", user=None)
|
||||
assert len(ids) == 1, (
|
||||
f"Expected 1, got {len(ids)}. Note content should be searchable via notes.note: prefix."
|
||||
# Test notes search — must use structured JSON syntax now that note
|
||||
# is no longer in DEFAULT_SEARCH_FIELDS
|
||||
results = backend.search(
|
||||
"notes.note:important",
|
||||
user=None,
|
||||
page=1,
|
||||
page_size=10,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
)
|
||||
|
||||
|
||||
class TestHighlightHits:
|
||||
"""Test highlight_hits returns proper HTML strings, not raw Snippet objects."""
|
||||
|
||||
def test_highlights_content_returns_match_span_html(
|
||||
self,
|
||||
backend: TantivyBackend,
|
||||
):
|
||||
"""highlight_hits must return frontend-ready highlight spans."""
|
||||
doc = Document.objects.create(
|
||||
title="Highlight Test",
|
||||
content="The quick brown fox jumps over the lazy dog",
|
||||
checksum="HH1",
|
||||
pk=90,
|
||||
assert results.total == 1, (
|
||||
f"Expected 1, got {results.total}. Note content should be searchable via notes.note: prefix."
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
|
||||
hits = backend.highlight_hits("quick", [doc.pk])
|
||||
|
||||
assert len(hits) == 1
|
||||
highlights = hits[0]["highlights"]
|
||||
assert "content" in highlights
|
||||
content_highlight = highlights["content"]
|
||||
assert isinstance(content_highlight, str), (
|
||||
f"Expected str, got {type(content_highlight)}: {content_highlight!r}"
|
||||
)
|
||||
assert '<span class="match">' in content_highlight, (
|
||||
f"Expected HTML with match span, got: {content_highlight!r}"
|
||||
)
|
||||
|
||||
def test_highlights_notes_returns_match_span_html(
|
||||
self,
|
||||
backend: TantivyBackend,
|
||||
):
|
||||
"""Note highlights must be frontend-ready HTML via notes_text companion field.
|
||||
|
||||
The notes JSON field does not support tantivy SnippetGenerator; the
|
||||
notes_text plain-text field is used instead. We use the full-text
|
||||
query "urgent" (not notes.note:) because notes_text IS in
|
||||
DEFAULT_SEARCH_FIELDS via the normal search path… actually, we use
|
||||
notes.note: prefix so the query targets notes content directly, but
|
||||
the snippet is generated from notes_text which stores the same text.
|
||||
"""
|
||||
user = User.objects.create_user("hl_noteuser")
|
||||
doc = Document.objects.create(
|
||||
title="Doc with matching note",
|
||||
content="unrelated content",
|
||||
checksum="HH2",
|
||||
pk=91,
|
||||
)
|
||||
Note.objects.create(document=doc, note="urgent payment required", user=user)
|
||||
backend.add_or_update(doc)
|
||||
|
||||
# Use notes.note: prefix so the document matches the query and the
|
||||
# notes_text snippet generator can produce highlights.
|
||||
hits = backend.highlight_hits("notes.note:urgent", [doc.pk])
|
||||
|
||||
assert len(hits) == 1
|
||||
highlights = hits[0]["highlights"]
|
||||
assert "notes" in highlights
|
||||
note_highlight = highlights["notes"]
|
||||
assert isinstance(note_highlight, str), (
|
||||
f"Expected str, got {type(note_highlight)}: {note_highlight!r}"
|
||||
)
|
||||
assert '<span class="match">' in note_highlight, (
|
||||
f"Expected HTML with match span, got: {note_highlight!r}"
|
||||
)
|
||||
|
||||
def test_empty_doc_list_returns_empty_hits(self, backend: TantivyBackend):
|
||||
"""highlight_hits with no doc IDs must return an empty list."""
|
||||
hits = backend.highlight_hits("anything", [])
|
||||
assert hits == []
|
||||
|
||||
def test_no_highlights_when_no_match(self, backend: TantivyBackend):
|
||||
"""Documents not matching the query should not appear in results."""
|
||||
doc = Document.objects.create(
|
||||
title="Unrelated",
|
||||
content="completely different text",
|
||||
checksum="HH3",
|
||||
pk=92,
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
|
||||
hits = backend.highlight_hits("quick", [doc.pk])
|
||||
|
||||
assert len(hits) == 0
|
||||
|
||||
@@ -1309,7 +1309,7 @@ class TestCustomFieldsAPI(DirectoriesMixin, APITestCase):
|
||||
# Test as user without access to the document
|
||||
non_superuser = User.objects.create_user(username="non_superuser")
|
||||
non_superuser.user_permissions.add(
|
||||
*Permission.objects.exclude(codename="view_global_statistics"),
|
||||
*Permission.objects.all(),
|
||||
)
|
||||
non_superuser.save()
|
||||
self.client.force_authenticate(user=non_superuser)
|
||||
|
||||
@@ -1314,41 +1314,6 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(response.data["documents_inbox"], 0)
|
||||
|
||||
def test_statistics_with_statistics_permission(self) -> None:
|
||||
owner = User.objects.create_user("owner")
|
||||
stats_user = User.objects.create_user("stats-user")
|
||||
stats_user.user_permissions.add(
|
||||
Permission.objects.get(codename="view_global_statistics"),
|
||||
)
|
||||
|
||||
inbox_tag = Tag.objects.create(
|
||||
name="stats_inbox",
|
||||
is_inbox_tag=True,
|
||||
owner=owner,
|
||||
)
|
||||
Document.objects.create(
|
||||
title="owned-doc",
|
||||
checksum="stats-A",
|
||||
mime_type="application/pdf",
|
||||
content="abcdef",
|
||||
owner=owner,
|
||||
).tags.add(inbox_tag)
|
||||
Correspondent.objects.create(name="stats-correspondent", owner=owner)
|
||||
DocumentType.objects.create(name="stats-type", owner=owner)
|
||||
StoragePath.objects.create(name="stats-path", path="archive", owner=owner)
|
||||
|
||||
self.client.force_authenticate(user=stats_user)
|
||||
response = self.client.get("/api/statistics/")
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(response.data["documents_total"], 1)
|
||||
self.assertEqual(response.data["documents_inbox"], 1)
|
||||
self.assertEqual(response.data["inbox_tags"], [inbox_tag.pk])
|
||||
self.assertEqual(response.data["character_count"], 6)
|
||||
self.assertEqual(response.data["correspondent_count"], 1)
|
||||
self.assertEqual(response.data["document_type_count"], 1)
|
||||
self.assertEqual(response.data["storage_path_count"], 1)
|
||||
|
||||
def test_upload(self) -> None:
|
||||
self.consume_file_mock.return_value = celery.result.AsyncResult(
|
||||
id=str(uuid.uuid4()),
|
||||
|
||||
@@ -1503,126 +1503,6 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
[d2.id, d1.id, d3.id],
|
||||
)
|
||||
|
||||
def test_search_ordering_by_score(self) -> None:
|
||||
"""ordering=-score must return results in descending relevance order (best first)."""
|
||||
backend = get_backend()
|
||||
# doc_high has more occurrences of the search term → higher BM25 score
|
||||
doc_low = Document.objects.create(
|
||||
title="score sort low",
|
||||
content="apple",
|
||||
checksum="SCL1",
|
||||
)
|
||||
doc_high = Document.objects.create(
|
||||
title="score sort high",
|
||||
content="apple apple apple apple apple",
|
||||
checksum="SCH1",
|
||||
)
|
||||
backend.add_or_update(doc_low)
|
||||
backend.add_or_update(doc_high)
|
||||
|
||||
# -score = descending = best first (highest score)
|
||||
response = self.client.get("/api/documents/?query=apple&ordering=-score")
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
ids = [r["id"] for r in response.data["results"]]
|
||||
self.assertEqual(
|
||||
ids[0],
|
||||
doc_high.id,
|
||||
"Most relevant doc should be first for -score",
|
||||
)
|
||||
|
||||
# score = ascending = worst first (lowest score)
|
||||
response = self.client.get("/api/documents/?query=apple&ordering=score")
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
ids = [r["id"] for r in response.data["results"]]
|
||||
self.assertEqual(
|
||||
ids[0],
|
||||
doc_low.id,
|
||||
"Least relevant doc should be first for +score",
|
||||
)
|
||||
|
||||
def test_search_with_tantivy_native_sort(self) -> None:
|
||||
"""When ordering by a Tantivy-sortable field, results must be correctly sorted."""
|
||||
backend = get_backend()
|
||||
for i, asn in enumerate([30, 10, 20]):
|
||||
doc = Document.objects.create(
|
||||
title=f"sortable doc {i}",
|
||||
content="searchable content",
|
||||
checksum=f"TNS{i}",
|
||||
archive_serial_number=asn,
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
|
||||
response = self.client.get(
|
||||
"/api/documents/?query=searchable&ordering=archive_serial_number",
|
||||
)
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
asns = [doc["archive_serial_number"] for doc in response.data["results"]]
|
||||
self.assertEqual(asns, [10, 20, 30])
|
||||
|
||||
response = self.client.get(
|
||||
"/api/documents/?query=searchable&ordering=-archive_serial_number",
|
||||
)
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
asns = [doc["archive_serial_number"] for doc in response.data["results"]]
|
||||
self.assertEqual(asns, [30, 20, 10])
|
||||
|
||||
def test_search_page_2_returns_correct_slice(self) -> None:
|
||||
"""Page 2 must return the second slice, not overlap with page 1."""
|
||||
backend = get_backend()
|
||||
for i in range(10):
|
||||
doc = Document.objects.create(
|
||||
title=f"doc {i}",
|
||||
content="paginated content",
|
||||
checksum=f"PG2{i}",
|
||||
archive_serial_number=i + 1,
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
|
||||
response = self.client.get(
|
||||
"/api/documents/?query=paginated&ordering=archive_serial_number&page=1&page_size=3",
|
||||
)
|
||||
page1_ids = [r["id"] for r in response.data["results"]]
|
||||
self.assertEqual(len(page1_ids), 3)
|
||||
|
||||
response = self.client.get(
|
||||
"/api/documents/?query=paginated&ordering=archive_serial_number&page=2&page_size=3",
|
||||
)
|
||||
page2_ids = [r["id"] for r in response.data["results"]]
|
||||
self.assertEqual(len(page2_ids), 3)
|
||||
|
||||
# No overlap between pages
|
||||
self.assertEqual(set(page1_ids) & set(page2_ids), set())
|
||||
# Page 2 ASNs are higher than page 1
|
||||
page1_asns = [
|
||||
Document.objects.get(pk=pk).archive_serial_number for pk in page1_ids
|
||||
]
|
||||
page2_asns = [
|
||||
Document.objects.get(pk=pk).archive_serial_number for pk in page2_ids
|
||||
]
|
||||
self.assertTrue(max(page1_asns) < min(page2_asns))
|
||||
|
||||
def test_search_all_field_contains_all_ids_when_paginated(self) -> None:
|
||||
"""The 'all' field must contain every matching ID, even when paginated."""
|
||||
backend = get_backend()
|
||||
doc_ids = []
|
||||
for i in range(10):
|
||||
doc = Document.objects.create(
|
||||
title=f"all field doc {i}",
|
||||
content="allfield content",
|
||||
checksum=f"AF{i}",
|
||||
)
|
||||
backend.add_or_update(doc)
|
||||
doc_ids.append(doc.pk)
|
||||
|
||||
response = self.client.get(
|
||||
"/api/documents/?query=allfield&page=1&page_size=3",
|
||||
headers={"Accept": "application/json; version=9"},
|
||||
)
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(len(response.data["results"]), 3)
|
||||
# "all" must contain ALL 10 matching IDs
|
||||
self.assertCountEqual(response.data["all"], doc_ids)
|
||||
|
||||
@mock.patch("documents.bulk_edit.bulk_update_documents")
|
||||
def test_global_search(self, m) -> None:
|
||||
"""
|
||||
|
||||
@@ -5,14 +5,12 @@ from pathlib import Path
|
||||
from unittest import mock
|
||||
|
||||
from celery import states
|
||||
from django.contrib.auth.models import Permission
|
||||
from django.contrib.auth.models import User
|
||||
from django.test import override_settings
|
||||
from rest_framework import status
|
||||
from rest_framework.test import APITestCase
|
||||
|
||||
from documents.models import PaperlessTask
|
||||
from documents.permissions import has_system_status_permission
|
||||
from paperless import version
|
||||
|
||||
|
||||
@@ -93,22 +91,6 @@ class TestSystemStatus(APITestCase):
|
||||
self.client.force_login(normal_user)
|
||||
response = self.client.get(self.ENDPOINT)
|
||||
self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
|
||||
# test the permission helper function directly for good measure
|
||||
self.assertFalse(has_system_status_permission(None))
|
||||
|
||||
def test_system_status_with_system_status_permission(self) -> None:
|
||||
response = self.client.get(self.ENDPOINT)
|
||||
self.assertEqual(response.status_code, status.HTTP_401_UNAUTHORIZED)
|
||||
|
||||
user = User.objects.create_user(username="status_user")
|
||||
user.user_permissions.add(
|
||||
Permission.objects.get(codename="view_system_status"),
|
||||
)
|
||||
|
||||
self.client.force_login(user)
|
||||
response = self.client.get(self.ENDPOINT)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
|
||||
def test_system_status_with_bad_basic_auth_challenges(self) -> None:
|
||||
self.client.credentials(HTTP_AUTHORIZATION="Basic invalid")
|
||||
|
||||
134
src/documents/tests/test_classifier_single_pass.py
Normal file
134
src/documents/tests/test_classifier_single_pass.py
Normal file
@@ -0,0 +1,134 @@
|
||||
"""
|
||||
Phase 2 — Single queryset pass in DocumentClassifier.train()
|
||||
|
||||
The document queryset must be iterated exactly once: during the label
|
||||
extraction loop, which now also captures doc.content for vectorization.
|
||||
The previous content_generator() caused a second full table scan.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from unittest import mock
|
||||
|
||||
import pytest
|
||||
from django.db.models.query import QuerySet
|
||||
|
||||
from documents.classifier import DocumentClassifier
|
||||
from documents.models import Correspondent
|
||||
from documents.models import Document
|
||||
from documents.models import DocumentType
|
||||
from documents.models import MatchingModel
|
||||
from documents.models import StoragePath
|
||||
from documents.models import Tag
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fixtures (mirrors test_classifier_train_skip.py)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def classifier_settings(settings, tmp_path):
|
||||
settings.MODEL_FILE = tmp_path / "model.pickle"
|
||||
return settings
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def classifier(classifier_settings):
|
||||
return DocumentClassifier()
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def label_corpus(classifier_settings):
|
||||
c_auto = Correspondent.objects.create(
|
||||
name="Auto Corp",
|
||||
matching_algorithm=MatchingModel.MATCH_AUTO,
|
||||
)
|
||||
dt_auto = DocumentType.objects.create(
|
||||
name="Invoice",
|
||||
matching_algorithm=MatchingModel.MATCH_AUTO,
|
||||
)
|
||||
t_auto = Tag.objects.create(
|
||||
name="finance",
|
||||
matching_algorithm=MatchingModel.MATCH_AUTO,
|
||||
)
|
||||
sp_auto = StoragePath.objects.create(
|
||||
name="Finance Path",
|
||||
path="finance/{correspondent}",
|
||||
matching_algorithm=MatchingModel.MATCH_AUTO,
|
||||
)
|
||||
|
||||
doc_a = Document.objects.create(
|
||||
title="Invoice A",
|
||||
content="quarterly invoice payment tax financial statement revenue",
|
||||
correspondent=c_auto,
|
||||
document_type=dt_auto,
|
||||
storage_path=sp_auto,
|
||||
checksum="aaa",
|
||||
mime_type="application/pdf",
|
||||
filename="invoice_a.pdf",
|
||||
)
|
||||
doc_a.tags.set([t_auto])
|
||||
|
||||
doc_b = Document.objects.create(
|
||||
title="Invoice B",
|
||||
content="monthly invoice billing statement account balance due",
|
||||
correspondent=c_auto,
|
||||
document_type=dt_auto,
|
||||
storage_path=sp_auto,
|
||||
checksum="bbb",
|
||||
mime_type="application/pdf",
|
||||
filename="invoice_b.pdf",
|
||||
)
|
||||
doc_b.tags.set([t_auto])
|
||||
|
||||
doc_c = Document.objects.create(
|
||||
title="Notes",
|
||||
content="meeting notes agenda discussion summary action items follow",
|
||||
checksum="ccc",
|
||||
mime_type="application/pdf",
|
||||
filename="notes_c.pdf",
|
||||
)
|
||||
|
||||
return {"doc_a": doc_a, "doc_b": doc_b, "doc_c": doc_c}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.django_db()
|
||||
class TestSingleQuerysetPass:
|
||||
def test_train_iterates_document_queryset_once(self, classifier, label_corpus):
|
||||
"""
|
||||
train() must iterate the Document queryset exactly once.
|
||||
|
||||
Before Phase 2 there were two iterations: one in the label extraction
|
||||
loop and a second inside content_generator() for CountVectorizer.
|
||||
After Phase 2 content is captured during the label loop; the second
|
||||
iteration is eliminated.
|
||||
"""
|
||||
original_iter = QuerySet.__iter__
|
||||
doc_iter_count = 0
|
||||
|
||||
def counting_iter(qs):
|
||||
nonlocal doc_iter_count
|
||||
if qs.model is Document:
|
||||
doc_iter_count += 1
|
||||
return original_iter(qs)
|
||||
|
||||
with mock.patch.object(QuerySet, "__iter__", counting_iter):
|
||||
classifier.train()
|
||||
|
||||
assert doc_iter_count == 1, (
|
||||
f"Expected 1 Document queryset iteration, got {doc_iter_count}. "
|
||||
"content_generator() may still be re-fetching from the DB."
|
||||
)
|
||||
|
||||
def test_train_result_unchanged(self, classifier, label_corpus):
|
||||
"""
|
||||
Collapsing to a single pass must not change what the classifier learns:
|
||||
a second train() with no changes still returns False.
|
||||
"""
|
||||
assert classifier.train() is True
|
||||
assert classifier.train() is False
|
||||
300
src/documents/tests/test_classifier_tags_correctness.py
Normal file
300
src/documents/tests/test_classifier_tags_correctness.py
Normal file
@@ -0,0 +1,300 @@
|
||||
"""
|
||||
Tags classifier correctness test — Phase 3b gate.
|
||||
|
||||
This test must pass both BEFORE and AFTER the MLPClassifier → LinearSVC swap.
|
||||
It verifies that the tags classifier correctly learns discriminative signal and
|
||||
predicts the right tags on held-out documents.
|
||||
|
||||
Run before the swap to establish a baseline, then run again after to confirm
|
||||
the new algorithm is at least as correct.
|
||||
|
||||
Two scenarios are tested:
|
||||
1. Multi-tag (num_tags > 1) — the common case; uses MultiLabelBinarizer
|
||||
2. Single-tag (num_tags == 1) — special binary path; uses LabelBinarizer
|
||||
|
||||
Corpus design: each tag has a distinct vocabulary cluster. Each training
|
||||
document contains words from exactly one cluster (or two for multi-tag docs).
|
||||
Held-out test documents contain the same cluster words; correct classification
|
||||
requires the model to learn the vocabulary → tag mapping.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from documents.classifier import DocumentClassifier
|
||||
from documents.models import Correspondent
|
||||
from documents.models import Document
|
||||
from documents.models import DocumentType
|
||||
from documents.models import MatchingModel
|
||||
from documents.models import StoragePath
|
||||
from documents.models import Tag
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Vocabulary clusters — intentionally non-overlapping so both MLP and SVM
|
||||
# should learn them perfectly or near-perfectly.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
FINANCE_WORDS = (
|
||||
"invoice payment tax revenue billing statement account receivable "
|
||||
"quarterly budget expense ledger debit credit profit loss fiscal"
|
||||
)
|
||||
LEGAL_WORDS = (
|
||||
"contract agreement terms conditions clause liability indemnity "
|
||||
"jurisdiction arbitration compliance regulation statute obligation"
|
||||
)
|
||||
MEDICAL_WORDS = (
|
||||
"prescription diagnosis treatment patient health symptom dosage "
|
||||
"physician referral therapy clinical examination procedure chronic"
|
||||
)
|
||||
HR_WORDS = (
|
||||
"employee salary onboarding performance review appraisal benefits "
|
||||
"recruitment hiring resignation termination payroll department staff"
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fixtures
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def classifier_settings(settings, tmp_path):
|
||||
settings.MODEL_FILE = tmp_path / "model.pickle"
|
||||
return settings
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def classifier(classifier_settings):
|
||||
return DocumentClassifier()
|
||||
|
||||
|
||||
def _make_doc(title, content, checksum, tags=(), **kwargs):
|
||||
doc = Document.objects.create(
|
||||
title=title,
|
||||
content=content,
|
||||
checksum=checksum,
|
||||
mime_type="application/pdf",
|
||||
filename=f"{checksum}.pdf",
|
||||
**kwargs,
|
||||
)
|
||||
if tags:
|
||||
doc.tags.set(tags)
|
||||
return doc
|
||||
|
||||
|
||||
def _words(cluster, extra=""):
|
||||
"""Repeat cluster words enough times to clear min_df=0.01 at ~40 docs."""
|
||||
return f"{cluster} {cluster} {extra}".strip()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Multi-tag correctness
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def multi_tag_corpus(classifier_settings):
|
||||
"""
|
||||
40 training documents across 4 AUTO tags with distinct vocabulary.
|
||||
10 single-tag docs per tag + 5 two-tag docs. Total: 45 docs.
|
||||
|
||||
A non-AUTO correspondent and doc type are included to keep the
|
||||
other classifiers happy and not raise ValueError.
|
||||
"""
|
||||
t_finance = Tag.objects.create(
|
||||
name="finance",
|
||||
matching_algorithm=MatchingModel.MATCH_AUTO,
|
||||
)
|
||||
t_legal = Tag.objects.create(
|
||||
name="legal",
|
||||
matching_algorithm=MatchingModel.MATCH_AUTO,
|
||||
)
|
||||
t_medical = Tag.objects.create(
|
||||
name="medical",
|
||||
matching_algorithm=MatchingModel.MATCH_AUTO,
|
||||
)
|
||||
t_hr = Tag.objects.create(name="hr", matching_algorithm=MatchingModel.MATCH_AUTO)
|
||||
|
||||
# non-AUTO labels to keep the other classifiers from raising
|
||||
c = Correspondent.objects.create(
|
||||
name="org",
|
||||
matching_algorithm=MatchingModel.MATCH_NONE,
|
||||
)
|
||||
dt = DocumentType.objects.create(
|
||||
name="doc",
|
||||
matching_algorithm=MatchingModel.MATCH_NONE,
|
||||
)
|
||||
sp = StoragePath.objects.create(
|
||||
name="archive",
|
||||
path="archive",
|
||||
matching_algorithm=MatchingModel.MATCH_NONE,
|
||||
)
|
||||
|
||||
checksum = 0
|
||||
|
||||
def make(title, content, tags):
|
||||
nonlocal checksum
|
||||
checksum += 1
|
||||
return _make_doc(
|
||||
title,
|
||||
content,
|
||||
f"{checksum:04d}",
|
||||
tags=tags,
|
||||
correspondent=c,
|
||||
document_type=dt,
|
||||
storage_path=sp,
|
||||
)
|
||||
|
||||
# 10 single-tag training docs per tag
|
||||
for i in range(10):
|
||||
make(f"finance-{i}", _words(FINANCE_WORDS, f"doc{i}"), [t_finance])
|
||||
make(f"legal-{i}", _words(LEGAL_WORDS, f"doc{i}"), [t_legal])
|
||||
make(f"medical-{i}", _words(MEDICAL_WORDS, f"doc{i}"), [t_medical])
|
||||
make(f"hr-{i}", _words(HR_WORDS, f"doc{i}"), [t_hr])
|
||||
|
||||
# 5 two-tag training docs
|
||||
for i in range(5):
|
||||
make(
|
||||
f"finance-legal-{i}",
|
||||
_words(FINANCE_WORDS + " " + LEGAL_WORDS, f"combo{i}"),
|
||||
[t_finance, t_legal],
|
||||
)
|
||||
|
||||
return {
|
||||
"t_finance": t_finance,
|
||||
"t_legal": t_legal,
|
||||
"t_medical": t_medical,
|
||||
"t_hr": t_hr,
|
||||
}
|
||||
|
||||
|
||||
@pytest.mark.django_db()
|
||||
class TestMultiTagCorrectness:
|
||||
"""
|
||||
The tags classifier must correctly predict tags on held-out documents whose
|
||||
content clearly belongs to one or two vocabulary clusters.
|
||||
|
||||
A prediction is "correct" if the expected tag is present in the result.
|
||||
"""
|
||||
|
||||
def test_single_cluster_docs_predicted_correctly(
|
||||
self,
|
||||
classifier,
|
||||
multi_tag_corpus,
|
||||
):
|
||||
"""Each single-cluster held-out doc gets exactly the right tag."""
|
||||
classifier.train()
|
||||
tags = multi_tag_corpus
|
||||
|
||||
cases = [
|
||||
(FINANCE_WORDS + " unique alpha", [tags["t_finance"].pk]),
|
||||
(LEGAL_WORDS + " unique beta", [tags["t_legal"].pk]),
|
||||
(MEDICAL_WORDS + " unique gamma", [tags["t_medical"].pk]),
|
||||
(HR_WORDS + " unique delta", [tags["t_hr"].pk]),
|
||||
]
|
||||
|
||||
for content, expected_pks in cases:
|
||||
predicted = classifier.predict_tags(content)
|
||||
for pk in expected_pks:
|
||||
assert pk in predicted, (
|
||||
f"Expected tag pk={pk} in predictions for content starting "
|
||||
f"'{content[:40]}…', got {predicted}"
|
||||
)
|
||||
|
||||
def test_multi_cluster_doc_gets_both_tags(self, classifier, multi_tag_corpus):
|
||||
"""A document with finance + legal vocabulary gets both tags."""
|
||||
classifier.train()
|
||||
tags = multi_tag_corpus
|
||||
|
||||
content = FINANCE_WORDS + " " + LEGAL_WORDS + " unique epsilon"
|
||||
predicted = classifier.predict_tags(content)
|
||||
|
||||
assert tags["t_finance"].pk in predicted, f"Expected finance tag in {predicted}"
|
||||
assert tags["t_legal"].pk in predicted, f"Expected legal tag in {predicted}"
|
||||
|
||||
def test_unrelated_content_predicts_no_trained_tags(
|
||||
self,
|
||||
classifier,
|
||||
multi_tag_corpus,
|
||||
):
|
||||
"""
|
||||
Completely alien content should not confidently fire any learned tag.
|
||||
This is a soft check — we only assert no false positives on a document
|
||||
that shares zero vocabulary with the training corpus.
|
||||
"""
|
||||
classifier.train()
|
||||
|
||||
alien = (
|
||||
"xyzzyx qwerty asdfgh zxcvbn plokij unique zeta "
|
||||
"xyzzyx qwerty asdfgh zxcvbn plokij unique zeta"
|
||||
)
|
||||
predicted = classifier.predict_tags(alien)
|
||||
# Not a hard requirement — just log for human inspection
|
||||
# Both MLP and SVM may or may not produce false positives on OOV content
|
||||
assert isinstance(predicted, list)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Single-tag (binary) correctness
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def single_tag_corpus(classifier_settings):
|
||||
"""
|
||||
Corpus with exactly ONE AUTO tag, exercising the LabelBinarizer +
|
||||
binary classification path. Documents either have the tag or don't.
|
||||
"""
|
||||
t_finance = Tag.objects.create(
|
||||
name="finance",
|
||||
matching_algorithm=MatchingModel.MATCH_AUTO,
|
||||
)
|
||||
c = Correspondent.objects.create(
|
||||
name="org",
|
||||
matching_algorithm=MatchingModel.MATCH_NONE,
|
||||
)
|
||||
dt = DocumentType.objects.create(
|
||||
name="doc",
|
||||
matching_algorithm=MatchingModel.MATCH_NONE,
|
||||
)
|
||||
|
||||
checksum = 0
|
||||
|
||||
def make(title, content, tags):
|
||||
nonlocal checksum
|
||||
checksum += 1
|
||||
return _make_doc(
|
||||
title,
|
||||
content,
|
||||
f"s{checksum:04d}",
|
||||
tags=tags,
|
||||
correspondent=c,
|
||||
document_type=dt,
|
||||
)
|
||||
|
||||
for i in range(10):
|
||||
make(f"finance-{i}", _words(FINANCE_WORDS, f"s{i}"), [t_finance])
|
||||
make(f"other-{i}", _words(LEGAL_WORDS, f"s{i}"), [])
|
||||
|
||||
return {"t_finance": t_finance}
|
||||
|
||||
|
||||
@pytest.mark.django_db()
|
||||
class TestSingleTagCorrectness:
|
||||
def test_finance_content_predicts_finance_tag(self, classifier, single_tag_corpus):
|
||||
"""Finance vocabulary → finance tag predicted."""
|
||||
classifier.train()
|
||||
tags = single_tag_corpus
|
||||
|
||||
predicted = classifier.predict_tags(FINANCE_WORDS + " unique alpha single")
|
||||
assert tags["t_finance"].pk in predicted, (
|
||||
f"Expected finance tag pk={tags['t_finance'].pk} in {predicted}"
|
||||
)
|
||||
|
||||
def test_non_finance_content_predicts_no_tag(self, classifier, single_tag_corpus):
|
||||
"""Non-finance vocabulary → no tag predicted."""
|
||||
classifier.train()
|
||||
|
||||
predicted = classifier.predict_tags(LEGAL_WORDS + " unique beta single")
|
||||
assert predicted == [], f"Expected no tags, got {predicted}"
|
||||
325
src/documents/tests/test_classifier_train_skip.py
Normal file
325
src/documents/tests/test_classifier_train_skip.py
Normal file
@@ -0,0 +1,325 @@
|
||||
"""
|
||||
Phase 1 — fast-skip optimisation in DocumentClassifier.train()
|
||||
|
||||
The goal: when nothing has changed since the last training run, train() should
|
||||
return False after at most 5 DB queries (1x MAX(modified) + 4x MATCH_AUTO pk
|
||||
lists), not after a full per-document label scan.
|
||||
|
||||
Correctness invariant: the skip must NOT fire when the set of AUTO-matching
|
||||
labels has changed, even if no Document.modified timestamp has advanced (e.g.
|
||||
a Tag's matching_algorithm was flipped to MATCH_AUTO after the last train).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import pytest
|
||||
from django.db import connection
|
||||
from django.test.utils import CaptureQueriesContext
|
||||
|
||||
from documents.classifier import DocumentClassifier
|
||||
from documents.models import Correspondent
|
||||
from documents.models import Document
|
||||
from documents.models import DocumentType
|
||||
from documents.models import MatchingModel
|
||||
from documents.models import StoragePath
|
||||
from documents.models import Tag
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fixtures
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def classifier_settings(settings, tmp_path: Path):
|
||||
"""Point MODEL_FILE at a temp directory so tests are hermetic."""
|
||||
settings.MODEL_FILE = tmp_path / "model.pickle"
|
||||
return settings
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def classifier(classifier_settings):
|
||||
"""Fresh DocumentClassifier instance with test settings active."""
|
||||
return DocumentClassifier()
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def label_corpus(classifier_settings):
|
||||
"""
|
||||
Minimal label + document corpus that produces a trainable classifier.
|
||||
|
||||
Creates
|
||||
-------
|
||||
Correspondents
|
||||
c_auto — MATCH_AUTO, assigned to two docs
|
||||
c_none — MATCH_NONE (control)
|
||||
DocumentTypes
|
||||
dt_auto — MATCH_AUTO, assigned to two docs
|
||||
dt_none — MATCH_NONE (control)
|
||||
Tags
|
||||
t_auto — MATCH_AUTO, applied to two docs
|
||||
t_none — MATCH_NONE (control, applied to one doc but never learned)
|
||||
StoragePaths
|
||||
sp_auto — MATCH_AUTO, assigned to two docs
|
||||
sp_none — MATCH_NONE (control)
|
||||
|
||||
Documents
|
||||
doc_a, doc_b — assigned AUTO labels above
|
||||
doc_c — control doc (MATCH_NONE labels only)
|
||||
|
||||
The fixture returns a dict with all created objects for direct mutation in
|
||||
individual tests.
|
||||
"""
|
||||
c_auto = Correspondent.objects.create(
|
||||
name="Auto Corp",
|
||||
matching_algorithm=MatchingModel.MATCH_AUTO,
|
||||
)
|
||||
c_none = Correspondent.objects.create(
|
||||
name="Manual Corp",
|
||||
matching_algorithm=MatchingModel.MATCH_NONE,
|
||||
)
|
||||
|
||||
dt_auto = DocumentType.objects.create(
|
||||
name="Invoice",
|
||||
matching_algorithm=MatchingModel.MATCH_AUTO,
|
||||
)
|
||||
dt_none = DocumentType.objects.create(
|
||||
name="Other",
|
||||
matching_algorithm=MatchingModel.MATCH_NONE,
|
||||
)
|
||||
|
||||
t_auto = Tag.objects.create(
|
||||
name="finance",
|
||||
matching_algorithm=MatchingModel.MATCH_AUTO,
|
||||
)
|
||||
t_none = Tag.objects.create(
|
||||
name="misc",
|
||||
matching_algorithm=MatchingModel.MATCH_NONE,
|
||||
)
|
||||
|
||||
sp_auto = StoragePath.objects.create(
|
||||
name="Finance Path",
|
||||
path="finance/{correspondent}",
|
||||
matching_algorithm=MatchingModel.MATCH_AUTO,
|
||||
)
|
||||
sp_none = StoragePath.objects.create(
|
||||
name="Other Path",
|
||||
path="other/{correspondent}",
|
||||
matching_algorithm=MatchingModel.MATCH_NONE,
|
||||
)
|
||||
|
||||
doc_a = Document.objects.create(
|
||||
title="Invoice from Auto Corp Jan",
|
||||
content="quarterly invoice payment tax financial statement revenue",
|
||||
correspondent=c_auto,
|
||||
document_type=dt_auto,
|
||||
storage_path=sp_auto,
|
||||
checksum="aaa",
|
||||
mime_type="application/pdf",
|
||||
filename="invoice_a.pdf",
|
||||
)
|
||||
doc_a.tags.set([t_auto])
|
||||
|
||||
doc_b = Document.objects.create(
|
||||
title="Invoice from Auto Corp Feb",
|
||||
content="monthly invoice billing statement account balance due",
|
||||
correspondent=c_auto,
|
||||
document_type=dt_auto,
|
||||
storage_path=sp_auto,
|
||||
checksum="bbb",
|
||||
mime_type="application/pdf",
|
||||
filename="invoice_b.pdf",
|
||||
)
|
||||
doc_b.tags.set([t_auto])
|
||||
|
||||
# Control document — no AUTO labels, but has enough content to vectorize
|
||||
doc_c = Document.objects.create(
|
||||
title="Miscellaneous Notes",
|
||||
content="meeting notes agenda discussion summary action items follow",
|
||||
correspondent=c_none,
|
||||
document_type=dt_none,
|
||||
checksum="ccc",
|
||||
mime_type="application/pdf",
|
||||
filename="notes_c.pdf",
|
||||
)
|
||||
doc_c.tags.set([t_none])
|
||||
|
||||
return {
|
||||
"c_auto": c_auto,
|
||||
"c_none": c_none,
|
||||
"dt_auto": dt_auto,
|
||||
"dt_none": dt_none,
|
||||
"t_auto": t_auto,
|
||||
"t_none": t_none,
|
||||
"sp_auto": sp_auto,
|
||||
"sp_none": sp_none,
|
||||
"doc_a": doc_a,
|
||||
"doc_b": doc_b,
|
||||
"doc_c": doc_c,
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Happy-path skip tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.django_db()
|
||||
class TestFastSkipFires:
|
||||
"""The no-op path: nothing changed, so the second train() is skipped."""
|
||||
|
||||
def test_first_train_returns_true(self, classifier, label_corpus):
|
||||
"""First train on a fresh classifier must return True (did work)."""
|
||||
assert classifier.train() is True
|
||||
|
||||
def test_second_train_returns_false(self, classifier, label_corpus):
|
||||
"""Second train with no changes must return False (skipped)."""
|
||||
classifier.train()
|
||||
assert classifier.train() is False
|
||||
|
||||
def test_fast_skip_runs_minimal_queries(self, classifier, label_corpus):
|
||||
"""
|
||||
The no-op path must use at most 5 DB queries:
|
||||
1x Document.objects.aggregate(Max('modified'))
|
||||
4x MATCH_AUTO pk lists (Correspondent / DocumentType / Tag / StoragePath)
|
||||
|
||||
The current implementation (before Phase 1) iterates every document
|
||||
to build the label hash BEFORE it can decide to skip, which is O(N).
|
||||
This test verifies the fast path is in place.
|
||||
"""
|
||||
classifier.train()
|
||||
with CaptureQueriesContext(connection) as ctx:
|
||||
result = classifier.train()
|
||||
assert result is False
|
||||
assert len(ctx.captured_queries) <= 5, (
|
||||
f"Fast skip used {len(ctx.captured_queries)} queries; expected ≤5.\n"
|
||||
+ "\n".join(q["sql"] for q in ctx.captured_queries)
|
||||
)
|
||||
|
||||
def test_fast_skip_refreshes_cache_keys(self, classifier, label_corpus):
|
||||
"""
|
||||
Even on a skip, the cache keys must be refreshed so that the task
|
||||
scheduler can detect the classifier is still current.
|
||||
"""
|
||||
from django.core.cache import cache
|
||||
|
||||
from documents.caching import CLASSIFIER_HASH_KEY
|
||||
from documents.caching import CLASSIFIER_MODIFIED_KEY
|
||||
from documents.caching import CLASSIFIER_VERSION_KEY
|
||||
|
||||
classifier.train()
|
||||
# Evict the keys to prove skip re-populates them
|
||||
cache.delete(CLASSIFIER_MODIFIED_KEY)
|
||||
cache.delete(CLASSIFIER_HASH_KEY)
|
||||
cache.delete(CLASSIFIER_VERSION_KEY)
|
||||
|
||||
result = classifier.train()
|
||||
|
||||
assert result is False
|
||||
assert cache.get(CLASSIFIER_MODIFIED_KEY) is not None
|
||||
assert cache.get(CLASSIFIER_HASH_KEY) is not None
|
||||
assert cache.get(CLASSIFIER_VERSION_KEY) is not None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Correctness tests — skip must NOT fire when the world has changed
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.django_db()
|
||||
class TestFastSkipDoesNotFire:
|
||||
"""The skip guard must yield to a full retrain whenever labels change."""
|
||||
|
||||
def test_document_content_modification_triggers_retrain(
|
||||
self,
|
||||
classifier,
|
||||
label_corpus,
|
||||
):
|
||||
"""Updating a document's content updates modified → retrain required."""
|
||||
classifier.train()
|
||||
doc_a = label_corpus["doc_a"]
|
||||
doc_a.content = "completely different words here now nothing same"
|
||||
doc_a.save()
|
||||
assert classifier.train() is True
|
||||
|
||||
def test_document_label_reassignment_triggers_retrain(
|
||||
self,
|
||||
classifier,
|
||||
label_corpus,
|
||||
):
|
||||
"""
|
||||
Reassigning a document to a different AUTO correspondent (touching
|
||||
doc.modified) must trigger a retrain.
|
||||
"""
|
||||
c_auto2 = Correspondent.objects.create(
|
||||
name="Second Auto Corp",
|
||||
matching_algorithm=MatchingModel.MATCH_AUTO,
|
||||
)
|
||||
classifier.train()
|
||||
doc_a = label_corpus["doc_a"]
|
||||
doc_a.correspondent = c_auto2
|
||||
doc_a.save()
|
||||
assert classifier.train() is True
|
||||
|
||||
def test_matching_algorithm_change_on_assigned_tag_triggers_retrain(
|
||||
self,
|
||||
classifier,
|
||||
label_corpus,
|
||||
):
|
||||
"""
|
||||
Flipping a tag's matching_algorithm to MATCH_AUTO after it is already
|
||||
assigned to documents must trigger a retrain — even though no
|
||||
Document.modified timestamp advances.
|
||||
|
||||
This is the key correctness case for the auto-label-set digest:
|
||||
the tag is already on doc_a and doc_b, so once it becomes MATCH_AUTO
|
||||
the classifier needs to learn it.
|
||||
"""
|
||||
# t_none is applied to doc_c (a control doc) via the fixture.
|
||||
# We flip it to MATCH_AUTO; the set of learnable AUTO tags grows.
|
||||
classifier.train()
|
||||
t_none = label_corpus["t_none"]
|
||||
t_none.matching_algorithm = MatchingModel.MATCH_AUTO
|
||||
t_none.save(update_fields=["matching_algorithm"])
|
||||
# Document.modified is NOT touched — this test specifically verifies
|
||||
# that the auto-label-set digest catches the change.
|
||||
assert classifier.train() is True
|
||||
|
||||
def test_new_auto_correspondent_triggers_retrain(self, classifier, label_corpus):
|
||||
"""
|
||||
Adding a brand-new MATCH_AUTO correspondent (unassigned to any doc)
|
||||
must trigger a retrain: the auto-label-set has grown.
|
||||
"""
|
||||
classifier.train()
|
||||
Correspondent.objects.create(
|
||||
name="New Auto Corp",
|
||||
matching_algorithm=MatchingModel.MATCH_AUTO,
|
||||
)
|
||||
assert classifier.train() is True
|
||||
|
||||
def test_removing_auto_label_triggers_retrain(self, classifier, label_corpus):
|
||||
"""
|
||||
Deleting a MATCH_AUTO correspondent shrinks the auto-label-set and
|
||||
must trigger a retrain.
|
||||
"""
|
||||
classifier.train()
|
||||
label_corpus["c_auto"].delete()
|
||||
assert classifier.train() is True
|
||||
|
||||
def test_fresh_classifier_always_trains(self, classifier, label_corpus):
|
||||
"""
|
||||
A classifier that has never been trained (last_doc_change_time is None)
|
||||
must always perform a full train, regardless of corpus state.
|
||||
"""
|
||||
assert classifier.last_doc_change_time is None
|
||||
assert classifier.train() is True
|
||||
|
||||
def test_no_documents_raises_value_error(self, classifier, classifier_settings):
|
||||
"""train() with an empty database must raise ValueError."""
|
||||
with pytest.raises(ValueError, match="No training data"):
|
||||
classifier.train()
|
||||
@@ -38,7 +38,6 @@ from django.db.models import Model
|
||||
from django.db.models import OuterRef
|
||||
from django.db.models import Prefetch
|
||||
from django.db.models import Q
|
||||
from django.db.models import QuerySet
|
||||
from django.db.models import Subquery
|
||||
from django.db.models import Sum
|
||||
from django.db.models import When
|
||||
@@ -166,9 +165,7 @@ from documents.permissions import ViewDocumentsPermissions
|
||||
from documents.permissions import annotate_document_count_for_related_queryset
|
||||
from documents.permissions import get_document_count_filter_for_user
|
||||
from documents.permissions import get_objects_for_user_owner_aware
|
||||
from documents.permissions import has_global_statistics_permission
|
||||
from documents.permissions import has_perms_owner_aware
|
||||
from documents.permissions import has_system_status_permission
|
||||
from documents.permissions import set_permissions_for_object
|
||||
from documents.plugins.date_parsing import get_date_parser
|
||||
from documents.schema import generate_object_with_permissions_schema
|
||||
@@ -249,13 +246,6 @@ if settings.AUDIT_LOG_ENABLED:
|
||||
|
||||
logger = logging.getLogger("paperless.api")
|
||||
|
||||
# Crossover point for intersect_and_order: below this count use a targeted
|
||||
# IN-clause query; at or above this count fall back to a full-table scan +
|
||||
# Python set intersection. The IN-clause is faster for small result sets but
|
||||
# degrades on SQLite with thousands of parameters. PostgreSQL handles large IN
|
||||
# clauses efficiently, so this threshold mainly protects SQLite users.
|
||||
_TANTIVY_INTERSECT_THRESHOLD = 5_000
|
||||
|
||||
|
||||
class IndexView(TemplateView):
|
||||
template_name = "index.html"
|
||||
@@ -2068,16 +2058,19 @@ class UnifiedSearchViewSet(DocumentViewSet):
|
||||
if not self._is_search_request():
|
||||
return super().list(request)
|
||||
|
||||
from documents.search import SearchHit
|
||||
from documents.search import SearchMode
|
||||
from documents.search import TantivyBackend
|
||||
from documents.search import TantivyRelevanceList
|
||||
from documents.search import get_backend
|
||||
|
||||
def parse_search_params() -> tuple[str | None, bool, bool, int, int]:
|
||||
"""Extract query string, search mode, and ordering from request."""
|
||||
active = self._get_active_search_params(request)
|
||||
if len(active) > 1:
|
||||
try:
|
||||
backend = get_backend()
|
||||
# ORM-filtered queryset: permissions + field filters + ordering (DRF backends applied)
|
||||
filtered_qs = self.filter_queryset(self.get_queryset())
|
||||
|
||||
user = None if request.user.is_superuser else request.user
|
||||
active_search_params = self._get_active_search_params(request)
|
||||
|
||||
if len(active_search_params) > 1:
|
||||
raise ValidationError(
|
||||
{
|
||||
"detail": _(
|
||||
@@ -2086,161 +2079,73 @@ class UnifiedSearchViewSet(DocumentViewSet):
|
||||
},
|
||||
)
|
||||
|
||||
ordering_param = request.query_params.get("ordering", "")
|
||||
sort_reverse = ordering_param.startswith("-")
|
||||
sort_field_name = ordering_param.lstrip("-") or None
|
||||
# "score" means relevance order — Tantivy handles it natively,
|
||||
# so treat it as a Tantivy sort to preserve the ranked order through
|
||||
# the ORM intersection step.
|
||||
use_tantivy_sort = (
|
||||
sort_field_name in TantivyBackend.SORTABLE_FIELDS
|
||||
or sort_field_name is None
|
||||
or sort_field_name == "score"
|
||||
)
|
||||
|
||||
try:
|
||||
page_num = int(request.query_params.get("page", 1))
|
||||
except (TypeError, ValueError):
|
||||
page_num = 1
|
||||
page_size = (
|
||||
self.paginator.get_page_size(request) or self.paginator.page_size
|
||||
)
|
||||
|
||||
return sort_field_name, sort_reverse, use_tantivy_sort, page_num, page_size
|
||||
|
||||
def intersect_and_order(
|
||||
all_ids: list[int],
|
||||
filtered_qs: QuerySet[Document],
|
||||
*,
|
||||
use_tantivy_sort: bool,
|
||||
) -> list[int]:
|
||||
"""Intersect search IDs with ORM-visible IDs, preserving order."""
|
||||
if not all_ids:
|
||||
return []
|
||||
if use_tantivy_sort:
|
||||
if len(all_ids) <= _TANTIVY_INTERSECT_THRESHOLD:
|
||||
# Small result set: targeted IN-clause avoids a full-table scan.
|
||||
visible_ids = set(
|
||||
filtered_qs.filter(pk__in=all_ids).values_list("pk", flat=True),
|
||||
)
|
||||
else:
|
||||
# Large result set: full-table scan + Python intersection is faster
|
||||
# than a large IN-clause on SQLite.
|
||||
visible_ids = set(
|
||||
filtered_qs.values_list("pk", flat=True),
|
||||
)
|
||||
return [doc_id for doc_id in all_ids if doc_id in visible_ids]
|
||||
return list(
|
||||
filtered_qs.filter(id__in=all_ids).values_list("pk", flat=True),
|
||||
)
|
||||
|
||||
def run_text_search(
|
||||
backend: TantivyBackend,
|
||||
user: User | None,
|
||||
filtered_qs: QuerySet[Document],
|
||||
) -> tuple[list[int], list[SearchHit], int]:
|
||||
"""Handle text/title/query search: IDs, ORM intersection, page highlights."""
|
||||
if "text" in request.query_params:
|
||||
search_mode = SearchMode.TEXT
|
||||
query_str = request.query_params["text"]
|
||||
elif "title_search" in request.query_params:
|
||||
search_mode = SearchMode.TITLE
|
||||
query_str = request.query_params["title_search"]
|
||||
else:
|
||||
search_mode = SearchMode.QUERY
|
||||
query_str = request.query_params["query"]
|
||||
|
||||
# "score" is not a real Tantivy sort field — it means relevance order,
|
||||
# which is Tantivy's default when no sort field is specified.
|
||||
is_score_sort = sort_field_name == "score"
|
||||
all_ids = backend.search_ids(
|
||||
query_str,
|
||||
user=user,
|
||||
sort_field=(
|
||||
None if (not use_tantivy_sort or is_score_sort) else sort_field_name
|
||||
),
|
||||
sort_reverse=sort_reverse,
|
||||
search_mode=search_mode,
|
||||
)
|
||||
ordered_ids = intersect_and_order(
|
||||
all_ids,
|
||||
filtered_qs,
|
||||
use_tantivy_sort=use_tantivy_sort,
|
||||
)
|
||||
# Tantivy returns relevance results best-first (descending score).
|
||||
# ordering=score (ascending, worst-first) requires a reversal.
|
||||
if is_score_sort and not sort_reverse:
|
||||
ordered_ids = list(reversed(ordered_ids))
|
||||
|
||||
page_offset = (page_num - 1) * page_size
|
||||
page_ids = ordered_ids[page_offset : page_offset + page_size]
|
||||
page_hits = backend.highlight_hits(
|
||||
query_str,
|
||||
page_ids,
|
||||
search_mode=search_mode,
|
||||
rank_start=page_offset + 1,
|
||||
)
|
||||
return ordered_ids, page_hits, page_offset
|
||||
|
||||
def run_more_like_this(
|
||||
backend: TantivyBackend,
|
||||
user: User | None,
|
||||
filtered_qs: QuerySet[Document],
|
||||
) -> tuple[list[int], list[SearchHit], int]:
|
||||
"""Handle more_like_id search: permission check, IDs, stub hits."""
|
||||
try:
|
||||
more_like_doc_id = int(request.query_params["more_like_id"])
|
||||
more_like_doc = Document.objects.select_related("owner").get(
|
||||
pk=more_like_doc_id,
|
||||
)
|
||||
except (TypeError, ValueError, Document.DoesNotExist):
|
||||
raise PermissionDenied(_("Invalid more_like_id"))
|
||||
|
||||
if not has_perms_owner_aware(
|
||||
request.user,
|
||||
"view_document",
|
||||
more_like_doc,
|
||||
if (
|
||||
"text" in request.query_params
|
||||
or "title_search" in request.query_params
|
||||
or "query" in request.query_params
|
||||
):
|
||||
raise PermissionDenied(_("Insufficient permissions."))
|
||||
|
||||
all_ids = backend.more_like_this_ids(more_like_doc_id, user=user)
|
||||
ordered_ids = intersect_and_order(
|
||||
all_ids,
|
||||
filtered_qs,
|
||||
use_tantivy_sort=True,
|
||||
)
|
||||
|
||||
page_offset = (page_num - 1) * page_size
|
||||
page_ids = ordered_ids[page_offset : page_offset + page_size]
|
||||
page_hits = [
|
||||
SearchHit(id=doc_id, score=0.0, rank=rank, highlights={})
|
||||
for rank, doc_id in enumerate(page_ids, start=page_offset + 1)
|
||||
]
|
||||
return ordered_ids, page_hits, page_offset
|
||||
|
||||
try:
|
||||
sort_field_name, sort_reverse, use_tantivy_sort, page_num, page_size = (
|
||||
parse_search_params()
|
||||
)
|
||||
|
||||
backend = get_backend()
|
||||
filtered_qs = self.filter_queryset(self.get_queryset())
|
||||
user = None if request.user.is_superuser else request.user
|
||||
|
||||
if "more_like_id" in request.query_params:
|
||||
ordered_ids, page_hits, page_offset = run_more_like_this(
|
||||
backend,
|
||||
user,
|
||||
filtered_qs,
|
||||
if "text" in request.query_params:
|
||||
search_mode = SearchMode.TEXT
|
||||
query_str = request.query_params["text"]
|
||||
elif "title_search" in request.query_params:
|
||||
search_mode = SearchMode.TITLE
|
||||
query_str = request.query_params["title_search"]
|
||||
else:
|
||||
search_mode = SearchMode.QUERY
|
||||
query_str = request.query_params["query"]
|
||||
results = backend.search(
|
||||
query_str,
|
||||
user=user,
|
||||
page=1,
|
||||
page_size=10000,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
search_mode=search_mode,
|
||||
)
|
||||
else:
|
||||
ordered_ids, page_hits, page_offset = run_text_search(
|
||||
backend,
|
||||
user,
|
||||
filtered_qs,
|
||||
# more_like_id — validate permission on the seed document first
|
||||
try:
|
||||
more_like_doc_id = int(request.query_params["more_like_id"])
|
||||
more_like_doc = Document.objects.select_related("owner").get(
|
||||
pk=more_like_doc_id,
|
||||
)
|
||||
except (TypeError, ValueError, Document.DoesNotExist):
|
||||
raise PermissionDenied(_("Invalid more_like_id"))
|
||||
|
||||
if not has_perms_owner_aware(
|
||||
request.user,
|
||||
"view_document",
|
||||
more_like_doc,
|
||||
):
|
||||
raise PermissionDenied(_("Insufficient permissions."))
|
||||
|
||||
results = backend.more_like_this(
|
||||
more_like_doc_id,
|
||||
user=user,
|
||||
page=1,
|
||||
page_size=10000,
|
||||
)
|
||||
|
||||
rl = TantivyRelevanceList(ordered_ids, page_hits, page_offset)
|
||||
hits_by_id = {h["id"]: h for h in results.hits}
|
||||
|
||||
# Determine sort order: no ordering param -> Tantivy relevance; otherwise -> ORM order
|
||||
ordering_param = request.query_params.get("ordering", "").lstrip("-")
|
||||
if not ordering_param:
|
||||
# Preserve Tantivy relevance order; intersect with ORM-visible IDs
|
||||
orm_ids = set(filtered_qs.values_list("pk", flat=True))
|
||||
ordered_hits = [h for h in results.hits if h["id"] in orm_ids]
|
||||
else:
|
||||
# Use ORM ordering (already applied by DocumentsOrderingFilter)
|
||||
hit_ids = set(hits_by_id.keys())
|
||||
orm_ordered_ids = filtered_qs.filter(id__in=hit_ids).values_list(
|
||||
"pk",
|
||||
flat=True,
|
||||
)
|
||||
ordered_hits = [
|
||||
hits_by_id[pk] for pk in orm_ordered_ids if pk in hits_by_id
|
||||
]
|
||||
|
||||
rl = TantivyRelevanceList(ordered_hits)
|
||||
page = self.paginate_queryset(rl)
|
||||
|
||||
if page is not None:
|
||||
@@ -2250,18 +2155,15 @@ class UnifiedSearchViewSet(DocumentViewSet):
|
||||
if get_boolean(
|
||||
str(request.query_params.get("include_selection_data", "false")),
|
||||
):
|
||||
# NOTE: pk__in=ordered_ids generates a large SQL IN clause
|
||||
# for big result sets. Acceptable today but may need a temp
|
||||
# table or chunked approach if selection_data becomes slow
|
||||
# at scale (tens of thousands of matching documents).
|
||||
all_ids = [h["id"] for h in ordered_hits]
|
||||
response.data["selection_data"] = (
|
||||
self._get_selection_data_for_queryset(
|
||||
filtered_qs.filter(pk__in=ordered_ids),
|
||||
filtered_qs.filter(pk__in=all_ids),
|
||||
)
|
||||
)
|
||||
return response
|
||||
|
||||
serializer = self.get_serializer(page_hits, many=True)
|
||||
serializer = self.get_serializer(ordered_hits, many=True)
|
||||
return Response(serializer.data)
|
||||
|
||||
except NotFound:
|
||||
@@ -3167,17 +3069,20 @@ class GlobalSearchView(PassUserMixin):
|
||||
docs = all_docs.filter(title__icontains=query)[:OBJECT_LIMIT]
|
||||
else:
|
||||
user = None if request.user.is_superuser else request.user
|
||||
matching_ids = get_backend().search_ids(
|
||||
fts_results = get_backend().search(
|
||||
query,
|
||||
user=user,
|
||||
page=1,
|
||||
page_size=1000,
|
||||
sort_field=None,
|
||||
sort_reverse=False,
|
||||
search_mode=SearchMode.TEXT,
|
||||
limit=OBJECT_LIMIT * 3,
|
||||
)
|
||||
docs_by_id = all_docs.in_bulk(matching_ids)
|
||||
docs_by_id = all_docs.in_bulk([hit["id"] for hit in fts_results.hits])
|
||||
docs = [
|
||||
docs_by_id[doc_id]
|
||||
for doc_id in matching_ids
|
||||
if doc_id in docs_by_id
|
||||
docs_by_id[hit["id"]]
|
||||
for hit in fts_results.hits
|
||||
if hit["id"] in docs_by_id
|
||||
][:OBJECT_LIMIT]
|
||||
saved_views = (
|
||||
get_objects_for_user_owner_aware(
|
||||
@@ -3360,11 +3265,10 @@ class StatisticsView(GenericAPIView):
|
||||
|
||||
def get(self, request, format=None):
|
||||
user = request.user if request.user is not None else None
|
||||
can_view_global_stats = has_global_statistics_permission(user) or user is None
|
||||
|
||||
documents = (
|
||||
Document.objects.all()
|
||||
if can_view_global_stats
|
||||
if user is None
|
||||
else get_objects_for_user_owner_aware(
|
||||
user,
|
||||
"documents.view_document",
|
||||
@@ -3373,12 +3277,12 @@ class StatisticsView(GenericAPIView):
|
||||
)
|
||||
tags = (
|
||||
Tag.objects.all()
|
||||
if can_view_global_stats
|
||||
if user is None
|
||||
else get_objects_for_user_owner_aware(user, "documents.view_tag", Tag)
|
||||
).only("id", "is_inbox_tag")
|
||||
correspondent_count = (
|
||||
Correspondent.objects.count()
|
||||
if can_view_global_stats
|
||||
if user is None
|
||||
else get_objects_for_user_owner_aware(
|
||||
user,
|
||||
"documents.view_correspondent",
|
||||
@@ -3387,7 +3291,7 @@ class StatisticsView(GenericAPIView):
|
||||
)
|
||||
document_type_count = (
|
||||
DocumentType.objects.count()
|
||||
if can_view_global_stats
|
||||
if user is None
|
||||
else get_objects_for_user_owner_aware(
|
||||
user,
|
||||
"documents.view_documenttype",
|
||||
@@ -3396,7 +3300,7 @@ class StatisticsView(GenericAPIView):
|
||||
)
|
||||
storage_path_count = (
|
||||
StoragePath.objects.count()
|
||||
if can_view_global_stats
|
||||
if user is None
|
||||
else get_objects_for_user_owner_aware(
|
||||
user,
|
||||
"documents.view_storagepath",
|
||||
@@ -4353,7 +4257,7 @@ class SystemStatusView(PassUserMixin):
|
||||
permission_classes = (IsAuthenticated,)
|
||||
|
||||
def get(self, request, format=None):
|
||||
if not has_system_status_permission(request.user):
|
||||
if not request.user.is_staff:
|
||||
return HttpResponseForbidden("Insufficient permissions")
|
||||
|
||||
current_version = version.__full_version_str__
|
||||
|
||||
@@ -2,7 +2,7 @@ msgid ""
|
||||
msgstr ""
|
||||
"Project-Id-Version: paperless-ngx\n"
|
||||
"Report-Msgid-Bugs-To: \n"
|
||||
"POT-Creation-Date: 2026-04-08 15:41+0000\n"
|
||||
"POT-Creation-Date: 2026-04-06 22:51+0000\n"
|
||||
"PO-Revision-Date: 2022-02-17 04:17\n"
|
||||
"Last-Translator: \n"
|
||||
"Language-Team: English\n"
|
||||
@@ -1308,8 +1308,8 @@ msgid "workflow runs"
|
||||
msgstr ""
|
||||
|
||||
#: documents/serialisers.py:463 documents/serialisers.py:815
|
||||
#: documents/serialisers.py:2545 documents/views.py:2122
|
||||
#: documents/views.py:2177 paperless_mail/serialisers.py:143
|
||||
#: documents/serialisers.py:2545 documents/views.py:2120
|
||||
#: documents/views.py:2175 paperless_mail/serialisers.py:143
|
||||
msgid "Insufficient permissions."
|
||||
msgstr ""
|
||||
|
||||
@@ -1349,7 +1349,7 @@ msgstr ""
|
||||
msgid "Duplicate document identifiers are not allowed."
|
||||
msgstr ""
|
||||
|
||||
#: documents/serialisers.py:2631 documents/views.py:3787
|
||||
#: documents/serialisers.py:2631 documents/views.py:3784
|
||||
#, python-format
|
||||
msgid "Documents not found: %(ids)s"
|
||||
msgstr ""
|
||||
@@ -1617,28 +1617,28 @@ msgstr ""
|
||||
msgid "Unable to parse URI {value}"
|
||||
msgstr ""
|
||||
|
||||
#: documents/views.py:2079
|
||||
#: documents/views.py:2077
|
||||
msgid "Specify only one of text, title_search, query, or more_like_id."
|
||||
msgstr ""
|
||||
|
||||
#: documents/views.py:2115 documents/views.py:2174
|
||||
#: documents/views.py:2113 documents/views.py:2172
|
||||
msgid "Invalid more_like_id"
|
||||
msgstr ""
|
||||
|
||||
#: documents/views.py:3799
|
||||
#: documents/views.py:3796
|
||||
#, python-format
|
||||
msgid "Insufficient permissions to share document %(id)s."
|
||||
msgstr ""
|
||||
|
||||
#: documents/views.py:3842
|
||||
#: documents/views.py:3839
|
||||
msgid "Bundle is already being processed."
|
||||
msgstr ""
|
||||
|
||||
#: documents/views.py:3899
|
||||
#: documents/views.py:3896
|
||||
msgid "The share link bundle is still being prepared. Please try again later."
|
||||
msgstr ""
|
||||
|
||||
#: documents/views.py:3909
|
||||
#: documents/views.py:3906
|
||||
msgid "The share link bundle is unavailable."
|
||||
msgstr ""
|
||||
|
||||
|
||||
@@ -1,22 +0,0 @@
|
||||
# Generated by Django 5.2.12 on 2026-04-07 23:13
|
||||
|
||||
from django.db import migrations
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
("paperless", "0008_replace_skip_archive_file"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterModelOptions(
|
||||
name="applicationconfiguration",
|
||||
options={
|
||||
"permissions": [
|
||||
("view_global_statistics", "Can view global object counts"),
|
||||
("view_system_status", "Can view system status information"),
|
||||
],
|
||||
"verbose_name": "paperless application settings",
|
||||
},
|
||||
),
|
||||
]
|
||||
@@ -341,10 +341,6 @@ class ApplicationConfiguration(AbstractSingletonModel):
|
||||
|
||||
class Meta:
|
||||
verbose_name = _("paperless application settings")
|
||||
permissions = [
|
||||
("view_global_statistics", "Can view global object counts"),
|
||||
("view_system_status", "Can view system status information"),
|
||||
]
|
||||
|
||||
def __str__(self) -> str: # pragma: no cover
|
||||
return "ApplicationConfiguration"
|
||||
|
||||
@@ -89,7 +89,7 @@ class StandardPagination(PageNumberPagination):
|
||||
|
||||
query = self.page.paginator.object_list
|
||||
if isinstance(query, TantivyRelevanceList):
|
||||
return query.get_all_ids()
|
||||
return [h["id"] for h in query._hits]
|
||||
return self.page.paginator.object_list.values_list("pk", flat=True)
|
||||
|
||||
def get_paginated_response_schema(self, schema):
|
||||
|
||||
Reference in New Issue
Block a user