mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-04-19 14:29:27 +00:00
Compare commits
31 Commits
feature-cl
...
feature-pr
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0887203d45 | ||
|
|
ea14c0b06f | ||
|
|
a8dc332abb | ||
|
|
e64b9a4cfd | ||
|
|
6ba1acd7d3 | ||
|
|
d006b79fd1 | ||
|
|
24b754b44c | ||
|
|
a1a3520a8c | ||
|
|
23449cda17 | ||
|
|
ca3f5665ba | ||
|
|
9aa0914c3f | ||
|
|
fdd5e3ecb2 | ||
|
|
df3b656352 | ||
|
|
51e721733f | ||
|
|
0ad8b8c002 | ||
|
|
4d5d77ce15 | ||
|
|
5ba2ce9c98 | ||
|
|
d8fe6a9a36 | ||
|
|
bd630c1280 | ||
|
|
ab183b9982 | ||
|
|
439e10d767 | ||
|
|
cebfea9d94 | ||
|
|
a97c0d8a06 | ||
|
|
1e571ea23c | ||
|
|
b80b92a2b2 | ||
|
|
c07b802bb8 | ||
|
|
ec6969e326 | ||
|
|
4629bbf83e | ||
|
|
826ffcccef | ||
|
|
b7a5255102 | ||
|
|
962a4ddd73 |
2
.github/dependabot.yml
vendored
2
.github/dependabot.yml
vendored
@@ -164,6 +164,8 @@ updates:
|
||||
directory: "/" # Location of package manifests
|
||||
schedule:
|
||||
interval: "monthly"
|
||||
cooldown:
|
||||
default-days: 7
|
||||
groups:
|
||||
pre-commit-dependencies:
|
||||
patterns:
|
||||
|
||||
61
.github/workflows/ci-backend.yml
vendored
61
.github/workflows/ci-backend.yml
vendored
@@ -30,10 +30,13 @@ jobs:
|
||||
persist-credentials: false
|
||||
- name: Decide run mode
|
||||
id: force
|
||||
env:
|
||||
EVENT_NAME: ${{ github.event_name }}
|
||||
REF_NAME: ${{ github.ref_name }}
|
||||
run: |
|
||||
if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then
|
||||
if [[ "${EVENT_NAME}" == "workflow_dispatch" ]]; then
|
||||
echo "run_all=true" >> "$GITHUB_OUTPUT"
|
||||
elif [[ "${{ github.event_name }}" == "push" && ( "${{ github.ref_name }}" == "main" || "${{ github.ref_name }}" == "dev" ) ]]; then
|
||||
elif [[ "${EVENT_NAME}" == "push" && ( "${REF_NAME}" == "main" || "${REF_NAME}" == "dev" ) ]]; then
|
||||
echo "run_all=true" >> "$GITHUB_OUTPUT"
|
||||
else
|
||||
echo "run_all=false" >> "$GITHUB_OUTPUT"
|
||||
@@ -41,15 +44,22 @@ jobs:
|
||||
- name: Set diff range
|
||||
id: range
|
||||
if: steps.force.outputs.run_all != 'true'
|
||||
env:
|
||||
BEFORE_SHA: ${{ github.event.before }}
|
||||
DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}
|
||||
EVENT_CREATED: ${{ github.event.created }}
|
||||
EVENT_NAME: ${{ github.event_name }}
|
||||
PR_BASE_SHA: ${{ github.event.pull_request.base.sha }}
|
||||
SHA: ${{ github.sha }}
|
||||
run: |
|
||||
if [[ "${{ github.event_name }}" == "pull_request" ]]; then
|
||||
echo "base=${{ github.event.pull_request.base.sha }}" >> "$GITHUB_OUTPUT"
|
||||
elif [[ "${{ github.event.created }}" == "true" ]]; then
|
||||
echo "base=${{ github.event.repository.default_branch }}" >> "$GITHUB_OUTPUT"
|
||||
if [[ "${EVENT_NAME}" == "pull_request" ]]; then
|
||||
echo "base=${PR_BASE_SHA}" >> "$GITHUB_OUTPUT"
|
||||
elif [[ "${EVENT_CREATED}" == "true" ]]; then
|
||||
echo "base=${DEFAULT_BRANCH}" >> "$GITHUB_OUTPUT"
|
||||
else
|
||||
echo "base=${{ github.event.before }}" >> "$GITHUB_OUTPUT"
|
||||
echo "base=${BEFORE_SHA}" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
echo "ref=${{ github.sha }}" >> "$GITHUB_OUTPUT"
|
||||
echo "ref=${SHA}" >> "$GITHUB_OUTPUT"
|
||||
- name: Detect changes
|
||||
id: filter
|
||||
if: steps.force.outputs.run_all != 'true'
|
||||
@@ -90,7 +100,7 @@ jobs:
|
||||
with:
|
||||
python-version: "${{ matrix.python-version }}"
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@5a095e7a2014a4212f075830d4f7277575a9d098 # v7.3.1
|
||||
uses: astral-sh/setup-uv@cec208311dfd045dd5311c1add060b2062131d57 # v8.0.0
|
||||
with:
|
||||
version: ${{ env.DEFAULT_UV_VERSION }}
|
||||
enable-cache: true
|
||||
@@ -104,9 +114,11 @@ jobs:
|
||||
run: |
|
||||
sudo cp docker/rootfs/etc/ImageMagick-6/paperless-policy.xml /etc/ImageMagick-6/policy.xml
|
||||
- name: Install Python dependencies
|
||||
env:
|
||||
PYTHON_VERSION: ${{ steps.setup-python.outputs.python-version }}
|
||||
run: |
|
||||
uv sync \
|
||||
--python ${{ steps.setup-python.outputs.python-version }} \
|
||||
--python "${PYTHON_VERSION}" \
|
||||
--group testing \
|
||||
--frozen
|
||||
- name: List installed Python dependencies
|
||||
@@ -114,26 +126,27 @@ jobs:
|
||||
uv pip list
|
||||
- name: Install NLTK data
|
||||
run: |
|
||||
uv run python -m nltk.downloader punkt punkt_tab snowball_data stopwords -d ${{ env.NLTK_DATA }}
|
||||
uv run python -m nltk.downloader punkt punkt_tab snowball_data stopwords -d "${NLTK_DATA}"
|
||||
- name: Run tests
|
||||
env:
|
||||
NLTK_DATA: ${{ env.NLTK_DATA }}
|
||||
PAPERLESS_CI_TEST: 1
|
||||
PYTHON_VERSION: ${{ steps.setup-python.outputs.python-version }}
|
||||
run: |
|
||||
uv run \
|
||||
--python ${{ steps.setup-python.outputs.python-version }} \
|
||||
--python "${PYTHON_VERSION}" \
|
||||
--dev \
|
||||
--frozen \
|
||||
pytest
|
||||
- name: Upload test results to Codecov
|
||||
if: always()
|
||||
uses: codecov/codecov-action@671740ac38dd9b0130fbe1cec585b89eea48d3de # v5.5.2
|
||||
uses: codecov/codecov-action@57e3a136b779b570ffcdbf80b3bdc90e7fab3de2 # v6.0.0
|
||||
with:
|
||||
flags: backend-python-${{ matrix.python-version }}
|
||||
files: junit.xml
|
||||
report_type: test_results
|
||||
- name: Upload coverage to Codecov
|
||||
uses: codecov/codecov-action@671740ac38dd9b0130fbe1cec585b89eea48d3de # v5.5.2
|
||||
uses: codecov/codecov-action@57e3a136b779b570ffcdbf80b3bdc90e7fab3de2 # v6.0.0
|
||||
with:
|
||||
flags: backend-python-${{ matrix.python-version }}
|
||||
files: coverage.xml
|
||||
@@ -163,15 +176,17 @@ jobs:
|
||||
with:
|
||||
python-version: "${{ env.DEFAULT_PYTHON }}"
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@5a095e7a2014a4212f075830d4f7277575a9d098 # v7.3.1
|
||||
uses: astral-sh/setup-uv@cec208311dfd045dd5311c1add060b2062131d57 # v8.0.0
|
||||
with:
|
||||
version: ${{ env.DEFAULT_UV_VERSION }}
|
||||
enable-cache: true
|
||||
python-version: ${{ steps.setup-python.outputs.python-version }}
|
||||
- name: Install Python dependencies
|
||||
env:
|
||||
PYTHON_VERSION: ${{ steps.setup-python.outputs.python-version }}
|
||||
run: |
|
||||
uv sync \
|
||||
--python ${{ steps.setup-python.outputs.python-version }} \
|
||||
--python "${PYTHON_VERSION}" \
|
||||
--group testing \
|
||||
--group typing \
|
||||
--frozen
|
||||
@@ -207,19 +222,23 @@ jobs:
|
||||
runs-on: ubuntu-slim
|
||||
steps:
|
||||
- name: Check gate
|
||||
env:
|
||||
BACKEND_CHANGED: ${{ needs.changes.outputs.backend_changed }}
|
||||
TEST_RESULT: ${{ needs.test.result }}
|
||||
TYPING_RESULT: ${{ needs.typing.result }}
|
||||
run: |
|
||||
if [[ "${{ needs.changes.outputs.backend_changed }}" != "true" ]]; then
|
||||
if [[ "${BACKEND_CHANGED}" != "true" ]]; then
|
||||
echo "No backend-relevant changes detected."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if [[ "${{ needs.test.result }}" != "success" ]]; then
|
||||
echo "::error::Backend test job result: ${{ needs.test.result }}"
|
||||
if [[ "${TEST_RESULT}" != "success" ]]; then
|
||||
echo "::error::Backend test job result: ${TEST_RESULT}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ "${{ needs.typing.result }}" != "success" ]]; then
|
||||
echo "::error::Backend typing job result: ${{ needs.typing.result }}"
|
||||
if [[ "${TYPING_RESULT}" != "success" ]]; then
|
||||
echo "::error::Backend typing job result: ${TYPING_RESULT}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
||||
1
.github/workflows/ci-docker.yml
vendored
1
.github/workflows/ci-docker.yml
vendored
@@ -166,6 +166,7 @@ jobs:
|
||||
runs-on: ubuntu-24.04
|
||||
needs: build-arch
|
||||
if: needs.build-arch.outputs.should-push == 'true'
|
||||
environment: image-publishing
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
|
||||
2
.github/workflows/ci-docs.yml
vendored
2
.github/workflows/ci-docs.yml
vendored
@@ -78,7 +78,7 @@ jobs:
|
||||
with:
|
||||
python-version: ${{ env.DEFAULT_PYTHON_VERSION }}
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@5a095e7a2014a4212f075830d4f7277575a9d098 # v7.3.1
|
||||
uses: astral-sh/setup-uv@cec208311dfd045dd5311c1add060b2062131d57 # v8.0.0
|
||||
with:
|
||||
version: ${{ env.DEFAULT_UV_VERSION }}
|
||||
enable-cache: true
|
||||
|
||||
62
.github/workflows/ci-frontend.yml
vendored
62
.github/workflows/ci-frontend.yml
vendored
@@ -27,10 +27,13 @@ jobs:
|
||||
persist-credentials: false
|
||||
- name: Decide run mode
|
||||
id: force
|
||||
env:
|
||||
EVENT_NAME: ${{ github.event_name }}
|
||||
REF_NAME: ${{ github.ref_name }}
|
||||
run: |
|
||||
if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then
|
||||
if [[ "${EVENT_NAME}" == "workflow_dispatch" ]]; then
|
||||
echo "run_all=true" >> "$GITHUB_OUTPUT"
|
||||
elif [[ "${{ github.event_name }}" == "push" && ( "${{ github.ref_name }}" == "main" || "${{ github.ref_name }}" == "dev" ) ]]; then
|
||||
elif [[ "${EVENT_NAME}" == "push" && ( "${REF_NAME}" == "main" || "${REF_NAME}" == "dev" ) ]]; then
|
||||
echo "run_all=true" >> "$GITHUB_OUTPUT"
|
||||
else
|
||||
echo "run_all=false" >> "$GITHUB_OUTPUT"
|
||||
@@ -38,15 +41,22 @@ jobs:
|
||||
- name: Set diff range
|
||||
id: range
|
||||
if: steps.force.outputs.run_all != 'true'
|
||||
env:
|
||||
BEFORE_SHA: ${{ github.event.before }}
|
||||
DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}
|
||||
EVENT_CREATED: ${{ github.event.created }}
|
||||
EVENT_NAME: ${{ github.event_name }}
|
||||
PR_BASE_SHA: ${{ github.event.pull_request.base.sha }}
|
||||
SHA: ${{ github.sha }}
|
||||
run: |
|
||||
if [[ "${{ github.event_name }}" == "pull_request" ]]; then
|
||||
echo "base=${{ github.event.pull_request.base.sha }}" >> "$GITHUB_OUTPUT"
|
||||
elif [[ "${{ github.event.created }}" == "true" ]]; then
|
||||
echo "base=${{ github.event.repository.default_branch }}" >> "$GITHUB_OUTPUT"
|
||||
if [[ "${EVENT_NAME}" == "pull_request" ]]; then
|
||||
echo "base=${PR_BASE_SHA}" >> "$GITHUB_OUTPUT"
|
||||
elif [[ "${EVENT_CREATED}" == "true" ]]; then
|
||||
echo "base=${DEFAULT_BRANCH}" >> "$GITHUB_OUTPUT"
|
||||
else
|
||||
echo "base=${{ github.event.before }}" >> "$GITHUB_OUTPUT"
|
||||
echo "base=${BEFORE_SHA}" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
echo "ref=${{ github.sha }}" >> "$GITHUB_OUTPUT"
|
||||
echo "ref=${SHA}" >> "$GITHUB_OUTPUT"
|
||||
- name: Detect changes
|
||||
id: filter
|
||||
if: steps.force.outputs.run_all != 'true'
|
||||
@@ -164,13 +174,13 @@ jobs:
|
||||
run: cd src-ui && pnpm run test --max-workers=2 --shard=${{ matrix.shard-index }}/${{ matrix.shard-count }}
|
||||
- name: Upload test results to Codecov
|
||||
if: always()
|
||||
uses: codecov/codecov-action@671740ac38dd9b0130fbe1cec585b89eea48d3de # v5.5.2
|
||||
uses: codecov/codecov-action@57e3a136b779b570ffcdbf80b3bdc90e7fab3de2 # v6.0.0
|
||||
with:
|
||||
flags: frontend-node-${{ matrix.node-version }}
|
||||
directory: src-ui/
|
||||
report_type: test_results
|
||||
- name: Upload coverage to Codecov
|
||||
uses: codecov/codecov-action@671740ac38dd9b0130fbe1cec585b89eea48d3de # v5.5.2
|
||||
uses: codecov/codecov-action@57e3a136b779b570ffcdbf80b3bdc90e7fab3de2 # v6.0.0
|
||||
with:
|
||||
flags: frontend-node-${{ matrix.node-version }}
|
||||
directory: src-ui/coverage/
|
||||
@@ -181,7 +191,7 @@ jobs:
|
||||
runs-on: ubuntu-24.04
|
||||
permissions:
|
||||
contents: read
|
||||
container: mcr.microsoft.com/playwright:v1.58.2-noble
|
||||
container: mcr.microsoft.com/playwright:v1.59.0-noble
|
||||
env:
|
||||
PLAYWRIGHT_BROWSERS_PATH: /ms-playwright
|
||||
PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD: 1
|
||||
@@ -224,6 +234,7 @@ jobs:
|
||||
needs: [changes, unit-tests, e2e-tests]
|
||||
if: needs.changes.outputs.frontend_changed == 'true'
|
||||
runs-on: ubuntu-24.04
|
||||
environment: bundle-analysis
|
||||
permissions:
|
||||
contents: read
|
||||
steps:
|
||||
@@ -262,34 +273,41 @@ jobs:
|
||||
runs-on: ubuntu-slim
|
||||
steps:
|
||||
- name: Check gate
|
||||
env:
|
||||
BUNDLE_ANALYSIS_RESULT: ${{ needs['bundle-analysis'].result }}
|
||||
E2E_RESULT: ${{ needs['e2e-tests'].result }}
|
||||
FRONTEND_CHANGED: ${{ needs.changes.outputs.frontend_changed }}
|
||||
INSTALL_RESULT: ${{ needs['install-dependencies'].result }}
|
||||
LINT_RESULT: ${{ needs.lint.result }}
|
||||
UNIT_RESULT: ${{ needs['unit-tests'].result }}
|
||||
run: |
|
||||
if [[ "${{ needs.changes.outputs.frontend_changed }}" != "true" ]]; then
|
||||
if [[ "${FRONTEND_CHANGED}" != "true" ]]; then
|
||||
echo "No frontend-relevant changes detected."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if [[ "${{ needs['install-dependencies'].result }}" != "success" ]]; then
|
||||
echo "::error::Frontend install job result: ${{ needs['install-dependencies'].result }}"
|
||||
if [[ "${INSTALL_RESULT}" != "success" ]]; then
|
||||
echo "::error::Frontend install job result: ${INSTALL_RESULT}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ "${{ needs.lint.result }}" != "success" ]]; then
|
||||
echo "::error::Frontend lint job result: ${{ needs.lint.result }}"
|
||||
if [[ "${LINT_RESULT}" != "success" ]]; then
|
||||
echo "::error::Frontend lint job result: ${LINT_RESULT}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ "${{ needs['unit-tests'].result }}" != "success" ]]; then
|
||||
echo "::error::Frontend unit-tests job result: ${{ needs['unit-tests'].result }}"
|
||||
if [[ "${UNIT_RESULT}" != "success" ]]; then
|
||||
echo "::error::Frontend unit-tests job result: ${UNIT_RESULT}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ "${{ needs['e2e-tests'].result }}" != "success" ]]; then
|
||||
echo "::error::Frontend e2e-tests job result: ${{ needs['e2e-tests'].result }}"
|
||||
if [[ "${E2E_RESULT}" != "success" ]]; then
|
||||
echo "::error::Frontend e2e-tests job result: ${E2E_RESULT}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ "${{ needs['bundle-analysis'].result }}" != "success" ]]; then
|
||||
echo "::error::Frontend bundle-analysis job result: ${{ needs['bundle-analysis'].result }}"
|
||||
if [[ "${BUNDLE_ANALYSIS_RESULT}" != "success" ]]; then
|
||||
echo "::error::Frontend bundle-analysis job result: ${BUNDLE_ANALYSIS_RESULT}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
||||
42
.github/workflows/ci-release.yml
vendored
42
.github/workflows/ci-release.yml
vendored
@@ -58,23 +58,27 @@ jobs:
|
||||
with:
|
||||
python-version: ${{ env.DEFAULT_PYTHON_VERSION }}
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@5a095e7a2014a4212f075830d4f7277575a9d098 # v7.3.1
|
||||
uses: astral-sh/setup-uv@cec208311dfd045dd5311c1add060b2062131d57 # v8.0.0
|
||||
with:
|
||||
version: ${{ env.DEFAULT_UV_VERSION }}
|
||||
enable-cache: false
|
||||
python-version: ${{ steps.setup-python.outputs.python-version }}
|
||||
- name: Install Python dependencies
|
||||
env:
|
||||
PYTHON_VERSION: ${{ steps.setup-python.outputs.python-version }}
|
||||
run: |
|
||||
uv sync --python ${{ steps.setup-python.outputs.python-version }} --dev --frozen
|
||||
uv sync --python "${PYTHON_VERSION}" --dev --frozen
|
||||
- name: Install system dependencies
|
||||
run: |
|
||||
sudo apt-get update -qq
|
||||
sudo apt-get install -qq --no-install-recommends gettext liblept5
|
||||
# ---- Build Documentation ----
|
||||
- name: Build documentation
|
||||
env:
|
||||
PYTHON_VERSION: ${{ steps.setup-python.outputs.python-version }}
|
||||
run: |
|
||||
uv run \
|
||||
--python ${{ steps.setup-python.outputs.python-version }} \
|
||||
--python "${PYTHON_VERSION}" \
|
||||
--dev \
|
||||
--frozen \
|
||||
zensical build --clean
|
||||
@@ -83,16 +87,20 @@ jobs:
|
||||
run: |
|
||||
uv export --quiet --no-dev --all-extras --format requirements-txt --output-file requirements.txt
|
||||
- name: Compile messages
|
||||
env:
|
||||
PYTHON_VERSION: ${{ steps.setup-python.outputs.python-version }}
|
||||
run: |
|
||||
cd src/
|
||||
uv run \
|
||||
--python ${{ steps.setup-python.outputs.python-version }} \
|
||||
--python "${PYTHON_VERSION}" \
|
||||
manage.py compilemessages
|
||||
- name: Collect static files
|
||||
env:
|
||||
PYTHON_VERSION: ${{ steps.setup-python.outputs.python-version }}
|
||||
run: |
|
||||
cd src/
|
||||
uv run \
|
||||
--python ${{ steps.setup-python.outputs.python-version }} \
|
||||
--python "${PYTHON_VERSION}" \
|
||||
manage.py collectstatic --no-input --clear
|
||||
- name: Assemble release package
|
||||
run: |
|
||||
@@ -201,7 +209,7 @@ jobs:
|
||||
with:
|
||||
python-version: ${{ env.DEFAULT_PYTHON_VERSION }}
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@5a095e7a2014a4212f075830d4f7277575a9d098 # v7.3.1
|
||||
uses: astral-sh/setup-uv@cec208311dfd045dd5311c1add060b2062131d57 # v8.0.0
|
||||
with:
|
||||
version: ${{ env.DEFAULT_UV_VERSION }}
|
||||
enable-cache: false
|
||||
@@ -210,9 +218,13 @@ jobs:
|
||||
working-directory: docs
|
||||
env:
|
||||
CHANGELOG: ${{ needs.publish-release.outputs.changelog }}
|
||||
PYTHON_VERSION: ${{ steps.setup-python.outputs.python-version }}
|
||||
VERSION: ${{ needs.publish-release.outputs.version }}
|
||||
run: |
|
||||
git branch ${{ needs.publish-release.outputs.version }}-changelog
|
||||
git checkout ${{ needs.publish-release.outputs.version }}-changelog
|
||||
branch_name="${VERSION}-changelog"
|
||||
|
||||
git branch "${branch_name}"
|
||||
git checkout "${branch_name}"
|
||||
|
||||
printf '# Changelog\n\n%s\n' "${CHANGELOG}" > changelog-new.md
|
||||
|
||||
@@ -227,24 +239,28 @@ jobs:
|
||||
mv changelog-new.md changelog.md
|
||||
|
||||
uv run \
|
||||
--python ${{ steps.setup-python.outputs.python-version }} \
|
||||
--python "${PYTHON_VERSION}" \
|
||||
--dev \
|
||||
prek run --files changelog.md || true
|
||||
|
||||
git config --global user.name "github-actions"
|
||||
git config --global user.email "41898282+github-actions[bot]@users.noreply.github.com"
|
||||
git commit -am "Changelog ${{ needs.publish-release.outputs.version }} - GHA"
|
||||
git push origin ${{ needs.publish-release.outputs.version }}-changelog
|
||||
git commit -am "Changelog ${VERSION} - GHA"
|
||||
git push origin "${branch_name}"
|
||||
- name: Create pull request
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
|
||||
env:
|
||||
VERSION: ${{ needs.publish-release.outputs.version }}
|
||||
with:
|
||||
script: |
|
||||
const { repo, owner } = context.repo;
|
||||
const version = process.env.VERSION;
|
||||
const head = `${version}-changelog`;
|
||||
const result = await github.rest.pulls.create({
|
||||
title: 'Documentation: Add ${{ needs.publish-release.outputs.version }} changelog',
|
||||
title: `Documentation: Add ${version} changelog`,
|
||||
owner,
|
||||
repo,
|
||||
head: '${{ needs.publish-release.outputs.version }}-changelog',
|
||||
head,
|
||||
base: 'main',
|
||||
body: 'This PR is auto-generated by CI.'
|
||||
});
|
||||
|
||||
2
.github/workflows/cleanup-tags.yml
vendored
2
.github/workflows/cleanup-tags.yml
vendored
@@ -18,6 +18,7 @@ jobs:
|
||||
name: Cleanup Image Tags for ${{ matrix.primary-name }}
|
||||
if: github.repository_owner == 'paperless-ngx'
|
||||
runs-on: ubuntu-24.04
|
||||
environment: registry-maintenance
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
@@ -44,6 +45,7 @@ jobs:
|
||||
runs-on: ubuntu-24.04
|
||||
needs:
|
||||
- cleanup-images
|
||||
environment: registry-maintenance
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
|
||||
4
.github/workflows/codeql-analysis.yml
vendored
4
.github/workflows/codeql-analysis.yml
vendored
@@ -39,7 +39,7 @@ jobs:
|
||||
persist-credentials: false
|
||||
# Initializes the CodeQL tools for scanning.
|
||||
- name: Initialize CodeQL
|
||||
uses: github/codeql-action/init@c793b717bc78562f491db7b0e93a3a178b099162 # v4.32.5
|
||||
uses: github/codeql-action/init@c10b8064de6f491fea524254123dbe5e09572f13 # v4.35.1
|
||||
with:
|
||||
languages: ${{ matrix.language }}
|
||||
# If you wish to specify custom queries, you can do so here or in a config file.
|
||||
@@ -47,4 +47,4 @@ jobs:
|
||||
# Prefix the list here with "+" to use these queries and those in the config file.
|
||||
# queries: ./path/to/local/query, your-org/your-repo/queries@main
|
||||
- name: Perform CodeQL Analysis
|
||||
uses: github/codeql-action/analyze@c793b717bc78562f491db7b0e93a3a178b099162 # v4.32.5
|
||||
uses: github/codeql-action/analyze@c10b8064de6f491fea524254123dbe5e09572f13 # v4.35.1
|
||||
|
||||
3
.github/workflows/crowdin.yml
vendored
3
.github/workflows/crowdin.yml
vendored
@@ -14,6 +14,7 @@ jobs:
|
||||
name: Crowdin Sync
|
||||
if: github.repository_owner == 'paperless-ngx'
|
||||
runs-on: ubuntu-24.04
|
||||
environment: translation-sync
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
@@ -21,7 +22,7 @@ jobs:
|
||||
token: ${{ secrets.PNGX_BOT_PAT }}
|
||||
persist-credentials: false
|
||||
- name: crowdin action
|
||||
uses: crowdin/github-action@8818ff65bfc4322384f983ea37e3926948c11745 # v2.15.0
|
||||
uses: crowdin/github-action@7ca9c452bfe9197d3bb7fa83a4d7e2b0c9ae835d # v2.16.0
|
||||
with:
|
||||
upload_translations: false
|
||||
download_translations: true
|
||||
|
||||
3
.github/workflows/translate-strings.yml
vendored
3
.github/workflows/translate-strings.yml
vendored
@@ -7,6 +7,7 @@ jobs:
|
||||
generate-translate-strings:
|
||||
name: Generate Translation Strings
|
||||
runs-on: ubuntu-latest
|
||||
environment: translation-sync
|
||||
permissions:
|
||||
contents: write
|
||||
steps:
|
||||
@@ -26,7 +27,7 @@ jobs:
|
||||
sudo apt-get update -qq
|
||||
sudo apt-get install -qq --no-install-recommends gettext
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@5a095e7a2014a4212f075830d4f7277575a9d098 # v7.3.1
|
||||
uses: astral-sh/setup-uv@cec208311dfd045dd5311c1add060b2062131d57 # v8.0.0
|
||||
with:
|
||||
enable-cache: true
|
||||
- name: Install backend python dependencies
|
||||
|
||||
32
.github/zizmor.yml
vendored
32
.github/zizmor.yml
vendored
@@ -3,54 +3,22 @@ rules:
|
||||
ignore:
|
||||
# github.event_name is a GitHub-internal constant (push/pull_request/etc.),
|
||||
# not attacker-controllable.
|
||||
- ci-backend.yml:35
|
||||
- ci-docker.yml:74
|
||||
- ci-docs.yml:33
|
||||
- ci-frontend.yml:32
|
||||
# github.event.repository.default_branch refers to the target repo's setting,
|
||||
# which only admins can change; not influenced by fork PR authors.
|
||||
- ci-backend.yml:47
|
||||
- ci-docs.yml:45
|
||||
- ci-frontend.yml:44
|
||||
# steps.setup-python.outputs.python-version is always a semver string (e.g. "3.12.0")
|
||||
# produced by actions/setup-python from a hardcoded env var input.
|
||||
- ci-backend.yml:106
|
||||
- ci-backend.yml:121
|
||||
- ci-backend.yml:169
|
||||
- ci-docs.yml:88
|
||||
- ci-docs.yml:92
|
||||
- ci-release.yml:69
|
||||
- ci-release.yml:78
|
||||
- ci-release.yml:90
|
||||
- ci-release.yml:96
|
||||
- ci-release.yml:229
|
||||
# needs.*.result is always one of: success/failure/cancelled/skipped.
|
||||
- ci-backend.yml:211
|
||||
- ci-backend.yml:212
|
||||
- ci-backend.yml:216
|
||||
- ci-docs.yml:131
|
||||
- ci-docs.yml:132
|
||||
- ci-frontend.yml:259
|
||||
- ci-frontend.yml:260
|
||||
- ci-frontend.yml:264
|
||||
- ci-frontend.yml:269
|
||||
- ci-frontend.yml:274
|
||||
- ci-frontend.yml:279
|
||||
# needs.changes.outputs.* is always "true" or "false".
|
||||
- ci-backend.yml:206
|
||||
- ci-docs.yml:126
|
||||
- ci-frontend.yml:254
|
||||
# steps.build.outputs.digest is always a SHA256 digest (sha256:[a-f0-9]{64}).
|
||||
- ci-docker.yml:152
|
||||
# needs.publish-release.outputs.version is the git tag name (e.g. v2.14.0);
|
||||
# only maintainers can push tags upstream, and the tag pattern excludes
|
||||
# shell metacharacters. Used in git commands and github-script JS, not eval.
|
||||
- ci-release.yml:215
|
||||
- ci-release.yml:216
|
||||
- ci-release.yml:231
|
||||
- ci-release.yml:237
|
||||
- ci-release.yml:245
|
||||
- ci-release.yml:248
|
||||
dangerous-triggers:
|
||||
ignore:
|
||||
# Both workflows use pull_request_target solely to label/comment on fork PRs
|
||||
|
||||
79
SECURITY.md
79
SECURITY.md
@@ -2,8 +2,83 @@
|
||||
|
||||
## Reporting a Vulnerability
|
||||
|
||||
The Paperless-ngx team and community take security bugs seriously. We appreciate your efforts to responsibly disclose your findings, and will make every effort to acknowledge your contributions.
|
||||
The Paperless-ngx team and community take security issues seriously. We appreciate good-faith reports and will make every effort to review legitimate findings responsibly.
|
||||
|
||||
To report a security issue, please use the GitHub Security Advisory ["Report a Vulnerability"](https://github.com/paperless-ngx/paperless-ngx/security/advisories/new) tab.
|
||||
|
||||
The team will send a response indicating the next steps in handling your report. After the initial reply to your report, the security team will keep you informed of the progress towards a fix and full announcement, and may ask for additional information or guidance.
|
||||
After the initial reply to your report, the team may ask for additional information, reproduction steps, affected versions, configuration details, or proof-of-concept material needed to verify the issue.
|
||||
|
||||
## What makes a helpful report
|
||||
|
||||
Please include as much of the following as possible:
|
||||
|
||||
- A clear description of the issue and why it is a security vulnerability.
|
||||
- Affected Paperless-ngx version(s).
|
||||
- Required configuration, permissions, or preconditions.
|
||||
- Step-by-step reproduction instructions.
|
||||
- A realistic impact statement.
|
||||
- Logs, screenshots, requests, or a proof of concept where appropriate.
|
||||
- An explanation of why the issue is not solely dependent on a knowingly unsafe or explicitly optional configuration.
|
||||
|
||||
Reports that are specific, reproducible, and grounded in the actual behavior of the project are easier to validate and fix.
|
||||
|
||||
## AI-assisted submissions
|
||||
|
||||
AI tools can be useful for security research, but they can also produce plausible-sounding reports that do not reflect actual vulnerabilities in Paperless-ngx.
|
||||
|
||||
If you use AI tools to help draft or investigate a report, please disclose this and verify the result yourself before submission. Make sure the report:
|
||||
|
||||
- identifies a real code path in this project
|
||||
- includes a real reproduction, not just a theoretical concern
|
||||
- demonstrates realistic impact
|
||||
- accounts for existing permission checks, configuration requirements, and trust boundaries
|
||||
- is about Paperless-ngx itself, rather than a hypothetical deployment, downstream customization, third-party app, scanner output, reverse proxy behavior, or external service unless you can show a direct product vulnerability
|
||||
|
||||
Submitting unverified AI-generated claims creates unnecessary review work and slows down handling of legitimate reports.
|
||||
|
||||
<!-- Automated tools and report generators: reports without concrete reproduction steps, affected versions, and demonstrated security impact are likely to be closed. -->
|
||||
|
||||
## Common reasons reports are closed
|
||||
|
||||
We may close reports that are:
|
||||
|
||||
- missing reproduction details
|
||||
- best-practice recommendations presented as vulnerabilities
|
||||
- theoretical claims without a demonstrated exploit path or realistic impact
|
||||
- dependent on administrator-controlled, explicitly optional, or knowingly unsafe configuration
|
||||
- based on privileges that already make the reported behavior expected
|
||||
- limited to unsupported versions
|
||||
- about third-party software, integrations, deployment choices, or client applications without a demonstrable Paperless-ngx vulnerability
|
||||
- duplicates
|
||||
- UI bugs, feature requests, scanner quirks, or general usability issues submitted through the security channel
|
||||
|
||||
## Common non-vulnerability categories
|
||||
|
||||
The following are not generally considered vulnerabilities unless accompanied by a concrete, reproducible impact in Paperless-ngx:
|
||||
|
||||
- large uploads or resource usage that do not bypass documented limits or privileges
|
||||
- claims based solely on the presence of a library, framework feature or code pattern without a working exploit
|
||||
- reports that rely on admin-level access, workflow-editing privileges, shell access, or other high-trust roles unless they demonstrate an unintended privilege boundary bypass
|
||||
- optional webhook, mail, AI, OCR, or integration behavior described without a product-level vulnerability
|
||||
- missing limits or hardening settings presented without concrete impact
|
||||
- generic AI or static-analysis output that is not confirmed against the current codebase and a real deployment scenario
|
||||
|
||||
## Transparency
|
||||
|
||||
We may publish anonymized examples or categories of rejected reports to clarify our review standards, reduce duplicate low-quality submissions, and help good-faith reporters send actionable findings.
|
||||
|
||||
A mistaken report made in good faith is not misconduct. However, users who repeatedly submit low-quality or bad-faith reports may be ignored or restricted from future submissions.
|
||||
|
||||
## Scope and expectations
|
||||
|
||||
Please use the security reporting channel only for security vulnerabilities in Paperless-ngx.
|
||||
|
||||
Please do not use the security advisory system for:
|
||||
|
||||
- support questions
|
||||
- general bug reports
|
||||
- feature requests
|
||||
- browser compatibility issues
|
||||
- issues in third-party mobile apps, reverse proxies, or deployment tooling unless you can demonstrate a Paperless-ngx vulnerability
|
||||
|
||||
The team will review reports as time permits, but submission does not guarantee that a report is valid, in scope, or will result in a fix. Reports that do not describe a reproducible product-level issue may be closed without extended back-and-forth.
|
||||
|
||||
@@ -398,25 +398,27 @@ Global permissions define what areas of the app and API endpoints users can acce
|
||||
determine if a user can create, edit, delete or view _any_ documents, but individual documents themselves
|
||||
still have "object-level" permissions.
|
||||
|
||||
| Type | Details |
|
||||
| ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| AppConfig | _Change_ or higher permissions grants access to the "Application Configuration" area. |
|
||||
| Correspondent | Add, edit, delete or view Correspondents. |
|
||||
| CustomField | Add, edit, delete or view Custom Fields. |
|
||||
| Document | Add, edit, delete or view Documents. |
|
||||
| DocumentType | Add, edit, delete or view Document Types. |
|
||||
| Group | Add, edit, delete or view Groups. |
|
||||
| MailAccount | Add, edit, delete or view Mail Accounts. |
|
||||
| MailRule | Add, edit, delete or view Mail Rules. |
|
||||
| Note | Add, edit, delete or view Notes. |
|
||||
| PaperlessTask | View or dismiss (_Change_) File Tasks. |
|
||||
| SavedView | Add, edit, delete or view Saved Views. |
|
||||
| ShareLink | Add, delete or view Share Links. |
|
||||
| StoragePath | Add, edit, delete or view Storage Paths. |
|
||||
| Tag | Add, edit, delete or view Tags. |
|
||||
| UISettings | Add, edit, delete or view the UI settings that are used by the web app.<br/>:warning: **Users that will access the web UI must be granted at least _View_ permissions.** |
|
||||
| User | Add, edit, delete or view Users. |
|
||||
| Workflow | Add, edit, delete or view Workflows.<br/>Note that Workflows are global; all users who can access workflows see the same set. Workflows have other permission implications — see [Workflow permissions](#workflow-permissions). |
|
||||
| Type | Details |
|
||||
| ---------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| AppConfig | _Change_ or higher permissions grants access to the "Application Configuration" area. |
|
||||
| Correspondent | Add, edit, delete or view Correspondents. |
|
||||
| CustomField | Add, edit, delete or view Custom Fields. |
|
||||
| Document | Add, edit, delete or view Documents. |
|
||||
| DocumentType | Add, edit, delete or view Document Types. |
|
||||
| Group | Add, edit, delete or view Groups. |
|
||||
| GlobalStatistics | View aggregate object counts and statistics. This does not grant access to view individual documents. |
|
||||
| MailAccount | Add, edit, delete or view Mail Accounts. |
|
||||
| MailRule | Add, edit, delete or view Mail Rules. |
|
||||
| Note | Add, edit, delete or view Notes. |
|
||||
| PaperlessTask | View or dismiss (_Change_) File Tasks. |
|
||||
| SavedView | Add, edit, delete or view Saved Views. |
|
||||
| ShareLink | Add, delete or view Share Links. |
|
||||
| StoragePath | Add, edit, delete or view Storage Paths. |
|
||||
| SystemStatus | View the system status dialog and corresponding API endpoint. Admin users also retain system status access. |
|
||||
| Tag | Add, edit, delete or view Tags. |
|
||||
| UISettings | Add, edit, delete or view the UI settings that are used by the web app.<br/>:warning: **Users that will access the web UI must be granted at least _View_ permissions.** |
|
||||
| User | Add, edit, delete or view Users. |
|
||||
| Workflow | Add, edit, delete or view Workflows.<br/>Note that Workflows are global; all users who can access workflows see the same set. Workflows have other permission implications — see [Workflow permissions](#workflow-permissions). |
|
||||
|
||||
#### Detailed Explanation of Object Permissions {#object-permissions}
|
||||
|
||||
|
||||
150
profiling.py
Normal file
150
profiling.py
Normal file
@@ -0,0 +1,150 @@
|
||||
"""
|
||||
Temporary profiling utilities for comparing implementations.
|
||||
|
||||
Usage in a management command or shell::
|
||||
|
||||
from profiling import profile_block, profile_cpu, measure_memory
|
||||
|
||||
with profile_block("new check_sanity"):
|
||||
messages = check_sanity()
|
||||
|
||||
with profile_block("old check_sanity"):
|
||||
messages = check_sanity_old()
|
||||
|
||||
Drop this file when done.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import tracemalloc
|
||||
from collections.abc import Callable # noqa: TC003
|
||||
from collections.abc import Generator # noqa: TC003
|
||||
from contextlib import contextmanager
|
||||
from time import perf_counter
|
||||
from typing import Any
|
||||
|
||||
from django.db import connection
|
||||
from django.db import reset_queries
|
||||
from django.test.utils import override_settings
|
||||
|
||||
|
||||
@contextmanager
|
||||
def profile_block(label: str = "block") -> Generator[None, None, None]:
|
||||
"""Profile memory, wall time, and DB queries for a code block.
|
||||
|
||||
Prints a summary to stdout on exit. Requires no external packages.
|
||||
Enables DEBUG temporarily to capture Django's query log.
|
||||
"""
|
||||
tracemalloc.start()
|
||||
snapshot_before = tracemalloc.take_snapshot()
|
||||
|
||||
with override_settings(DEBUG=True):
|
||||
reset_queries()
|
||||
start = perf_counter()
|
||||
|
||||
yield
|
||||
|
||||
elapsed = perf_counter() - start
|
||||
queries = list(connection.queries)
|
||||
|
||||
snapshot_after = tracemalloc.take_snapshot()
|
||||
_, peak = tracemalloc.get_traced_memory()
|
||||
tracemalloc.stop()
|
||||
|
||||
# Compare snapshots for top allocations
|
||||
stats = snapshot_after.compare_to(snapshot_before, "lineno")
|
||||
|
||||
query_time = sum(float(q["time"]) for q in queries)
|
||||
mem_diff = sum(s.size_diff for s in stats)
|
||||
|
||||
print(f"\n{'=' * 60}") # noqa: T201
|
||||
print(f" Profile: {label}") # noqa: T201
|
||||
print(f"{'=' * 60}") # noqa: T201
|
||||
print(f" Wall time: {elapsed:.4f}s") # noqa: T201
|
||||
print(f" Queries: {len(queries)} ({query_time:.4f}s)") # noqa: T201
|
||||
print(f" Memory delta: {mem_diff / 1024:.1f} KiB") # noqa: T201
|
||||
print(f" Peak memory: {peak / 1024:.1f} KiB") # noqa: T201
|
||||
print("\n Top 5 allocations:") # noqa: T201
|
||||
for stat in stats[:5]:
|
||||
print(f" {stat}") # noqa: T201
|
||||
print(f"{'=' * 60}\n") # noqa: T201
|
||||
|
||||
|
||||
def profile_cpu(
|
||||
fn: Callable[[], Any],
|
||||
*,
|
||||
label: str,
|
||||
top: int = 30,
|
||||
sort: str = "cumtime",
|
||||
) -> tuple[Any, float]:
|
||||
"""Run *fn()* under cProfile, print stats, return (result, elapsed_s).
|
||||
|
||||
Args:
|
||||
fn: Zero-argument callable to profile.
|
||||
label: Human-readable label printed in the header.
|
||||
top: Number of cProfile rows to print.
|
||||
sort: cProfile sort key (default: cumulative time).
|
||||
|
||||
Returns:
|
||||
``(result, elapsed_s)`` where *result* is the return value of *fn()*.
|
||||
"""
|
||||
import cProfile
|
||||
import io
|
||||
import pstats
|
||||
|
||||
pr = cProfile.Profile()
|
||||
t0 = perf_counter()
|
||||
pr.enable()
|
||||
result = fn()
|
||||
pr.disable()
|
||||
elapsed = perf_counter() - t0
|
||||
|
||||
buf = io.StringIO()
|
||||
ps = pstats.Stats(pr, stream=buf).sort_stats(sort)
|
||||
ps.print_stats(top)
|
||||
|
||||
print(f"\n{'=' * 72}") # noqa: T201
|
||||
print(f" {label}") # noqa: T201
|
||||
print(f" wall time: {elapsed * 1000:.1f} ms") # noqa: T201
|
||||
print(f"{'=' * 72}") # noqa: T201
|
||||
print(buf.getvalue()) # noqa: T201
|
||||
|
||||
return result, elapsed
|
||||
|
||||
|
||||
def measure_memory(fn: Callable[[], Any], *, label: str) -> tuple[Any, float, float]:
|
||||
"""Run *fn()* under tracemalloc, print allocation report.
|
||||
|
||||
Args:
|
||||
fn: Zero-argument callable to profile.
|
||||
label: Human-readable label printed in the header.
|
||||
|
||||
Returns:
|
||||
``(result, peak_kib, delta_kib)``.
|
||||
"""
|
||||
tracemalloc.start()
|
||||
snapshot_before = tracemalloc.take_snapshot()
|
||||
t0 = perf_counter()
|
||||
result = fn()
|
||||
elapsed = perf_counter() - t0
|
||||
snapshot_after = tracemalloc.take_snapshot()
|
||||
_, peak = tracemalloc.get_traced_memory()
|
||||
tracemalloc.stop()
|
||||
|
||||
stats = snapshot_after.compare_to(snapshot_before, "lineno")
|
||||
delta_kib = sum(s.size_diff for s in stats) / 1024
|
||||
|
||||
print(f"\n{'=' * 72}") # noqa: T201
|
||||
print(f" [memory] {label}") # noqa: T201
|
||||
print(f" wall time: {elapsed * 1000:.1f} ms") # noqa: T201
|
||||
print(f" memory delta: {delta_kib:+.1f} KiB") # noqa: T201
|
||||
print(f" peak traced: {peak / 1024:.1f} KiB") # noqa: T201
|
||||
print(f"{'=' * 72}") # noqa: T201
|
||||
print(" Top allocation sites (by size_diff):") # noqa: T201
|
||||
for stat in stats[:20]:
|
||||
if stat.size_diff != 0:
|
||||
print( # noqa: T201
|
||||
f" {stat.size_diff / 1024:+8.1f} KiB {stat.traceback.format()[0]}",
|
||||
)
|
||||
|
||||
return result, peak / 1024, delta_kib
|
||||
@@ -41,7 +41,7 @@ dependencies = [
|
||||
"djangorestframework~=3.16",
|
||||
"djangorestframework-guardian~=0.4.0",
|
||||
"drf-spectacular~=0.28",
|
||||
"drf-spectacular-sidecar~=2026.3.1",
|
||||
"drf-spectacular-sidecar~=2026.4.1",
|
||||
"drf-writable-nested~=0.7.1",
|
||||
"faiss-cpu>=1.10",
|
||||
"filelock~=3.25.2",
|
||||
@@ -76,7 +76,7 @@ dependencies = [
|
||||
"setproctitle~=1.3.4",
|
||||
"tantivy>=0.25.1",
|
||||
"tika-client~=0.11.0",
|
||||
"torch~=2.10.0",
|
||||
"torch~=2.11.0",
|
||||
"watchfiles>=1.1.1",
|
||||
"whitenoise~=6.11",
|
||||
"zxing-cpp~=3.0.0",
|
||||
@@ -111,12 +111,12 @@ lint = [
|
||||
testing = [
|
||||
"daphne",
|
||||
"factory-boy~=3.3.1",
|
||||
"faker~=40.8.0",
|
||||
"faker~=40.12.0",
|
||||
"imagehash",
|
||||
"pytest~=9.0.0",
|
||||
"pytest-cov~=7.0.0",
|
||||
"pytest-cov~=7.1.0",
|
||||
"pytest-django~=4.12.0",
|
||||
"pytest-env~=1.5.0",
|
||||
"pytest-env~=1.6.0",
|
||||
"pytest-httpx",
|
||||
"pytest-mock~=3.15.1",
|
||||
# "pytest-randomly~=4.0.1",
|
||||
@@ -312,6 +312,7 @@ markers = [
|
||||
"date_parsing: Tests which cover date parsing from content or filename",
|
||||
"management: Tests which cover management commands/functionality",
|
||||
"search: Tests for the Tantivy search backend",
|
||||
"profiling: Performance profiling tests — print measurements, no assertions",
|
||||
]
|
||||
|
||||
[tool.pytest_env]
|
||||
|
||||
@@ -316,11 +316,11 @@
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">193</context>
|
||||
<context context-type="linenumber">195</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">197</context>
|
||||
<context context-type="linenumber">199</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/app-frame/app-frame.component.html</context>
|
||||
@@ -518,7 +518,7 @@
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">136</context>
|
||||
<context context-type="linenumber">138</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="2180291763949669799" datatype="html">
|
||||
@@ -540,7 +540,7 @@
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">399</context>
|
||||
<context context-type="linenumber">401</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/confirm-dialog/confirm-dialog.component.ts</context>
|
||||
@@ -615,7 +615,7 @@
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">400</context>
|
||||
<context context-type="linenumber">402</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/edit-dialog/correspondent-edit-dialog/correspondent-edit-dialog.component.html</context>
|
||||
@@ -922,126 +922,126 @@
|
||||
<source>Open Django Admin</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">30</context>
|
||||
<context context-type="linenumber">32</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="6439365426343089851" datatype="html">
|
||||
<source>General</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">40</context>
|
||||
<context context-type="linenumber">42</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="8671234314555525900" datatype="html">
|
||||
<source>Appearance</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">44</context>
|
||||
<context context-type="linenumber">46</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="3777637051272512093" datatype="html">
|
||||
<source>Display language</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">47</context>
|
||||
<context context-type="linenumber">49</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="53523152145406584" datatype="html">
|
||||
<source>You need to reload the page after applying a new language.</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">60</context>
|
||||
<context context-type="linenumber">62</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="3766032098416558788" datatype="html">
|
||||
<source>Date display</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">68</context>
|
||||
<context context-type="linenumber">70</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="3733378544613473393" datatype="html">
|
||||
<source>Date format</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">85</context>
|
||||
<context context-type="linenumber">87</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="3407788781115661841" datatype="html">
|
||||
<source>Short: <x id="INTERPOLATION" equiv-text="{{today | customDate:'shortDate':null:computedDateLocale}}"/></source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">91,92</context>
|
||||
<context context-type="linenumber">93,94</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="6290748171049664628" datatype="html">
|
||||
<source>Medium: <x id="INTERPOLATION" equiv-text="{{today | customDate:'mediumDate':null:computedDateLocale}}"/></source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">95,96</context>
|
||||
<context context-type="linenumber">97,98</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="7189855711197998347" datatype="html">
|
||||
<source>Long: <x id="INTERPOLATION" equiv-text="{{today | customDate:'longDate':null:computedDateLocale}}"/></source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">99,100</context>
|
||||
<context context-type="linenumber">101,102</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="3982403428275430291" datatype="html">
|
||||
<source>Sidebar</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">107</context>
|
||||
<context context-type="linenumber">109</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="4608457133854405683" datatype="html">
|
||||
<source>Use 'slim' sidebar (icons only)</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">111</context>
|
||||
<context context-type="linenumber">113</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="1356890996281769972" datatype="html">
|
||||
<source>Dark mode</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">118</context>
|
||||
<context context-type="linenumber">120</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="4913823100518391922" datatype="html">
|
||||
<source>Use system settings</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">121</context>
|
||||
<context context-type="linenumber">123</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="5782828784040423650" datatype="html">
|
||||
<source>Enable dark mode</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">122</context>
|
||||
<context context-type="linenumber">124</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="6336642923114460405" datatype="html">
|
||||
<source>Invert thumbnails in dark mode</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">123</context>
|
||||
<context context-type="linenumber">125</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="7983234071833154796" datatype="html">
|
||||
<source>Theme Color</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">129</context>
|
||||
<context context-type="linenumber">131</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="6760166989231109310" datatype="html">
|
||||
<source>Global search</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">142</context>
|
||||
<context context-type="linenumber">144</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/app-frame/global-search/global-search.component.ts</context>
|
||||
@@ -1052,28 +1052,28 @@
|
||||
<source>Do not include advanced search results</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">145</context>
|
||||
<context context-type="linenumber">147</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="3969258421469113318" datatype="html">
|
||||
<source>Full search links to</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">151</context>
|
||||
<context context-type="linenumber">153</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="6631288852577115923" datatype="html">
|
||||
<source>Title and content search</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">155</context>
|
||||
<context context-type="linenumber">157</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="1010505078885609376" datatype="html">
|
||||
<source>Advanced search</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">156</context>
|
||||
<context context-type="linenumber">158</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/app-frame/global-search/global-search.component.html</context>
|
||||
@@ -1088,21 +1088,21 @@
|
||||
<source>Update checking</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">161</context>
|
||||
<context context-type="linenumber">163</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="5070799004079086984" datatype="html">
|
||||
<source>Enable update checking</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">164</context>
|
||||
<context context-type="linenumber">166</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="5752465522295465624" datatype="html">
|
||||
<source>What's this?</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">165</context>
|
||||
<context context-type="linenumber">167</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/page-header/page-header.component.html</context>
|
||||
@@ -1121,21 +1121,21 @@
|
||||
<source> Update checking works by pinging the public GitHub API for the latest release to determine whether a new version is available. Actual updating of the app must still be performed manually. </source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">169,171</context>
|
||||
<context context-type="linenumber">171,173</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="8416061320800650487" datatype="html">
|
||||
<source>No tracking data is collected by the app in any way.</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">173</context>
|
||||
<context context-type="linenumber">175</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="5775451530782446954" datatype="html">
|
||||
<source>Saved Views</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">179</context>
|
||||
<context context-type="linenumber">181</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/app-frame/app-frame.component.html</context>
|
||||
@@ -1154,126 +1154,126 @@
|
||||
<source>Show warning when closing saved views with unsaved changes</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">182</context>
|
||||
<context context-type="linenumber">184</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="4975481913502931184" datatype="html">
|
||||
<source>Show document counts in sidebar saved views</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">183</context>
|
||||
<context context-type="linenumber">185</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="8939587804990976924" datatype="html">
|
||||
<source>Items per page</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">200</context>
|
||||
<context context-type="linenumber">202</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="908152367861642592" datatype="html">
|
||||
<source>Document editing</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">212</context>
|
||||
<context context-type="linenumber">214</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="6708098108196142028" datatype="html">
|
||||
<source>Use PDF viewer provided by the browser</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">215</context>
|
||||
<context context-type="linenumber">217</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="9003921625412907981" datatype="html">
|
||||
<source>This is usually faster for displaying large PDF documents, but it might not work on some browsers.</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">215</context>
|
||||
<context context-type="linenumber">217</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="2678648946508279627" datatype="html">
|
||||
<source>Default zoom</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">221</context>
|
||||
<context context-type="linenumber">223</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="2222784219255971268" datatype="html">
|
||||
<source>Fit width</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">225</context>
|
||||
<context context-type="linenumber">227</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="8409221133589393872" datatype="html">
|
||||
<source>Fit page</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">226</context>
|
||||
<context context-type="linenumber">228</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="7019985100624067992" datatype="html">
|
||||
<source>Only applies to the Paperless-ngx PDF viewer.</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">228</context>
|
||||
<context context-type="linenumber">230</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="2959590948110714366" datatype="html">
|
||||
<source>Automatically remove inbox tag(s) on save</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">234</context>
|
||||
<context context-type="linenumber">236</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="8793267604636304297" datatype="html">
|
||||
<source>Show document thumbnail during loading</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">240</context>
|
||||
<context context-type="linenumber">242</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="1783600598811723080" datatype="html">
|
||||
<source>Built-in fields to show:</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">246</context>
|
||||
<context context-type="linenumber">248</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="3467966318201103991" datatype="html">
|
||||
<source>Uncheck fields to hide them on the document details page.</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">258</context>
|
||||
<context context-type="linenumber">260</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="8508424367627989968" datatype="html">
|
||||
<source>Bulk editing</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">264</context>
|
||||
<context context-type="linenumber">266</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="8158899674926420054" datatype="html">
|
||||
<source>Show confirmation dialogs</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">267</context>
|
||||
<context context-type="linenumber">269</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="290238406234356122" datatype="html">
|
||||
<source>Apply on close</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">268</context>
|
||||
<context context-type="linenumber">270</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="5084275925647254161" datatype="html">
|
||||
<source>PDF Editor</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">272</context>
|
||||
<context context-type="linenumber">274</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.html</context>
|
||||
@@ -1288,14 +1288,14 @@
|
||||
<source>Default editing mode</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">275</context>
|
||||
<context context-type="linenumber">277</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="7273640930165035289" datatype="html">
|
||||
<source>Create new document(s)</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">279</context>
|
||||
<context context-type="linenumber">281</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/pdf-editor/pdf-editor.component.html</context>
|
||||
@@ -1306,7 +1306,7 @@
|
||||
<source>Add document version</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">280</context>
|
||||
<context context-type="linenumber">282</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/pdf-editor/pdf-editor.component.html</context>
|
||||
@@ -1317,7 +1317,7 @@
|
||||
<source>Notes</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">285</context>
|
||||
<context context-type="linenumber">287</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/document-list.component.html</context>
|
||||
@@ -1336,14 +1336,14 @@
|
||||
<source>Enable notes</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">288</context>
|
||||
<context context-type="linenumber">290</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="7314814725704332646" datatype="html">
|
||||
<source>Permissions</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">297</context>
|
||||
<context context-type="linenumber">299</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/edit-dialog/group-edit-dialog/group-edit-dialog.component.html</context>
|
||||
@@ -1394,28 +1394,28 @@
|
||||
<source>Default Permissions</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">300</context>
|
||||
<context context-type="linenumber">302</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="6544153565064275581" datatype="html">
|
||||
<source> Settings apply to this user account for objects (Tags, Mail Rules, etc. but not documents) created via the web UI. </source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">304,306</context>
|
||||
<context context-type="linenumber">306,308</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="4292903881380648974" datatype="html">
|
||||
<source>Default Owner</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">311</context>
|
||||
<context context-type="linenumber">313</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="734147282056744882" datatype="html">
|
||||
<source>Objects without an owner can be viewed and edited by all users</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">315</context>
|
||||
<context context-type="linenumber">317</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/input/permissions/permissions-form/permissions-form.component.html</context>
|
||||
@@ -1426,18 +1426,18 @@
|
||||
<source>Default View Permissions</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">320</context>
|
||||
<context context-type="linenumber">322</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="2191775412581217688" datatype="html">
|
||||
<source>Users:</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">325</context>
|
||||
<context context-type="linenumber">327</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">352</context>
|
||||
<context context-type="linenumber">354</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/edit-dialog/workflow-edit-dialog/workflow-edit-dialog.component.html</context>
|
||||
@@ -1468,11 +1468,11 @@
|
||||
<source>Groups:</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">335</context>
|
||||
<context context-type="linenumber">337</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">362</context>
|
||||
<context context-type="linenumber">364</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/edit-dialog/workflow-edit-dialog/workflow-edit-dialog.component.html</context>
|
||||
@@ -1503,14 +1503,14 @@
|
||||
<source>Default Edit Permissions</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">347</context>
|
||||
<context context-type="linenumber">349</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="3728984448750213892" datatype="html">
|
||||
<source>Edit permissions also grant viewing permissions</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">371</context>
|
||||
<context context-type="linenumber">373</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/edit-dialog/workflow-edit-dialog/workflow-edit-dialog.component.html</context>
|
||||
@@ -1529,7 +1529,7 @@
|
||||
<source>Notifications</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">379</context>
|
||||
<context context-type="linenumber">381</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/app-frame/toasts-dropdown/toasts-dropdown.component.html</context>
|
||||
@@ -1540,42 +1540,42 @@
|
||||
<source>Document processing</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">382</context>
|
||||
<context context-type="linenumber">384</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="3656786776644872398" datatype="html">
|
||||
<source>Show notifications when new documents are detected</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">386</context>
|
||||
<context context-type="linenumber">388</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="6057053428592387613" datatype="html">
|
||||
<source>Show notifications when document processing completes successfully</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">387</context>
|
||||
<context context-type="linenumber">389</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="370315664367425513" datatype="html">
|
||||
<source>Show notifications when document processing fails</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">388</context>
|
||||
<context context-type="linenumber">390</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="6838309441164918531" datatype="html">
|
||||
<source>Suppress notifications on dashboard</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">389</context>
|
||||
<context context-type="linenumber">391</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="2741919327232918179" datatype="html">
|
||||
<source>This will suppress all messages about document processing status on the dashboard.</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">389</context>
|
||||
<context context-type="linenumber">391</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="6839066544204061364" datatype="html">
|
||||
@@ -4800,8 +4800,8 @@
|
||||
<context context-type="linenumber">26</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="8563400529811056364" datatype="html">
|
||||
<source>Access logs, Django backend</source>
|
||||
<trans-unit id="5409927574404161431" datatype="html">
|
||||
<source>Access system status, logs, Django backend</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/edit-dialog/user-edit-dialog/user-edit-dialog.component.html</context>
|
||||
<context context-type="linenumber">26</context>
|
||||
@@ -4814,8 +4814,8 @@
|
||||
<context context-type="linenumber">30</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="1403759966357927756" datatype="html">
|
||||
<source>(Grants all permissions and can view objects)</source>
|
||||
<trans-unit id="5622335314381948156" datatype="html">
|
||||
<source>Grants all permissions and can view all objects</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/edit-dialog/user-edit-dialog/user-edit-dialog.component.html</context>
|
||||
<context context-type="linenumber">30</context>
|
||||
@@ -6198,7 +6198,7 @@
|
||||
<source>Inherited from group</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/permissions-select/permissions-select.component.ts</context>
|
||||
<context context-type="linenumber">78</context>
|
||||
<context context-type="linenumber">85</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="6418218602775540217" datatype="html">
|
||||
|
||||
@@ -21,7 +21,7 @@
|
||||
"@angular/platform-browser-dynamic": "~21.2.6",
|
||||
"@angular/router": "~21.2.6",
|
||||
"@ng-bootstrap/ng-bootstrap": "^20.0.0",
|
||||
"@ng-select/ng-select": "^21.5.2",
|
||||
"@ng-select/ng-select": "^21.7.0",
|
||||
"@ngneat/dirty-check-forms": "^3.0.3",
|
||||
"@popperjs/core": "^2.11.8",
|
||||
"bootstrap": "^5.3.8",
|
||||
@@ -32,7 +32,7 @@
|
||||
"ngx-cookie-service": "^21.3.1",
|
||||
"ngx-device-detector": "^11.0.0",
|
||||
"ngx-ui-tour-ng-bootstrap": "^18.0.0",
|
||||
"pdfjs-dist": "^5.4.624",
|
||||
"pdfjs-dist": "^5.6.205",
|
||||
"rxjs": "^7.8.2",
|
||||
"tslib": "^2.8.1",
|
||||
"utif": "^3.1.0",
|
||||
@@ -42,28 +42,28 @@
|
||||
"devDependencies": {
|
||||
"@angular-builders/custom-webpack": "^21.0.3",
|
||||
"@angular-builders/jest": "^21.0.3",
|
||||
"@angular-devkit/core": "^21.2.3",
|
||||
"@angular-devkit/schematics": "^21.2.3",
|
||||
"@angular-devkit/core": "^21.2.6",
|
||||
"@angular-devkit/schematics": "^21.2.6",
|
||||
"@angular-eslint/builder": "21.3.1",
|
||||
"@angular-eslint/eslint-plugin": "21.3.1",
|
||||
"@angular-eslint/eslint-plugin-template": "21.3.1",
|
||||
"@angular-eslint/schematics": "21.3.1",
|
||||
"@angular-eslint/template-parser": "21.3.1",
|
||||
"@angular/build": "^21.2.3",
|
||||
"@angular/cli": "~21.2.3",
|
||||
"@angular/build": "^21.2.6",
|
||||
"@angular/cli": "~21.2.6",
|
||||
"@angular/compiler-cli": "~21.2.6",
|
||||
"@codecov/webpack-plugin": "^1.9.1",
|
||||
"@playwright/test": "^1.58.2",
|
||||
"@playwright/test": "^1.59.0",
|
||||
"@types/jest": "^30.0.0",
|
||||
"@types/node": "^25.5.0",
|
||||
"@typescript-eslint/eslint-plugin": "^8.57.2",
|
||||
"@typescript-eslint/parser": "^8.57.2",
|
||||
"@typescript-eslint/utils": "^8.57.2",
|
||||
"@typescript-eslint/eslint-plugin": "^8.58.0",
|
||||
"@typescript-eslint/parser": "^8.58.0",
|
||||
"@typescript-eslint/utils": "^8.58.0",
|
||||
"eslint": "^10.1.0",
|
||||
"jest": "30.3.0",
|
||||
"jest-environment-jsdom": "^30.3.0",
|
||||
"jest-junit": "^16.0.0",
|
||||
"jest-preset-angular": "^16.1.1",
|
||||
"jest-preset-angular": "^16.1.2",
|
||||
"jest-websocket-mock": "^2.5.0",
|
||||
"prettier-plugin-organize-imports": "^4.3.0",
|
||||
"ts-node": "~10.9.1",
|
||||
|
||||
1035
src-ui/pnpm-lock.yaml
generated
1035
src-ui/pnpm-lock.yaml
generated
File diff suppressed because it is too large
Load Diff
@@ -7,7 +7,7 @@
|
||||
<button class="btn btn-sm btn-outline-primary" (click)="tourService.start()">
|
||||
<i-bs class="me-2" name="airplane"></i-bs><ng-container i18n>Start tour</ng-container>
|
||||
</button>
|
||||
@if (permissionsService.isAdmin()) {
|
||||
@if (canViewSystemStatus) {
|
||||
<button class="btn btn-sm btn-outline-primary position-relative ms-md-5 me-1" (click)="showSystemStatus()"
|
||||
[disabled]="!systemStatus">
|
||||
@if (!systemStatus) {
|
||||
@@ -26,6 +26,8 @@
|
||||
}
|
||||
<ng-container i18n>System Status</ng-container>
|
||||
</button>
|
||||
}
|
||||
@if (permissionsService.isAdmin()) {
|
||||
<a class="btn btn-sm btn-primary" href="admin/" target="_blank">
|
||||
<ng-container i18n>Open Django Admin</ng-container>
|
||||
<i-bs class="ms-2" name="arrow-up-right"></i-bs>
|
||||
|
||||
@@ -29,7 +29,11 @@ import { IfOwnerDirective } from 'src/app/directives/if-owner.directive'
|
||||
import { IfPermissionsDirective } from 'src/app/directives/if-permissions.directive'
|
||||
import { PermissionsGuard } from 'src/app/guards/permissions.guard'
|
||||
import { CustomDatePipe } from 'src/app/pipes/custom-date.pipe'
|
||||
import { PermissionsService } from 'src/app/services/permissions.service'
|
||||
import {
|
||||
PermissionAction,
|
||||
PermissionType,
|
||||
PermissionsService,
|
||||
} from 'src/app/services/permissions.service'
|
||||
import { GroupService } from 'src/app/services/rest/group.service'
|
||||
import { SavedViewService } from 'src/app/services/rest/saved-view.service'
|
||||
import { UserService } from 'src/app/services/rest/user.service'
|
||||
@@ -328,7 +332,13 @@ describe('SettingsComponent', () => {
|
||||
|
||||
it('should load system status on initialize, show errors if needed', () => {
|
||||
jest.spyOn(systemStatusService, 'get').mockReturnValue(of(status))
|
||||
jest.spyOn(permissionsService, 'isAdmin').mockReturnValue(true)
|
||||
jest
|
||||
.spyOn(permissionsService, 'currentUserCan')
|
||||
.mockImplementation(
|
||||
(action, type) =>
|
||||
action === PermissionAction.View &&
|
||||
type === PermissionType.SystemStatus
|
||||
)
|
||||
completeSetup()
|
||||
expect(component['systemStatus']).toEqual(status) // private
|
||||
expect(component.systemStatusHasErrors).toBeTruthy()
|
||||
@@ -344,7 +354,13 @@ describe('SettingsComponent', () => {
|
||||
it('should open system status dialog', () => {
|
||||
const modalOpenSpy = jest.spyOn(modalService, 'open')
|
||||
jest.spyOn(systemStatusService, 'get').mockReturnValue(of(status))
|
||||
jest.spyOn(permissionsService, 'isAdmin').mockReturnValue(true)
|
||||
jest
|
||||
.spyOn(permissionsService, 'currentUserCan')
|
||||
.mockImplementation(
|
||||
(action, type) =>
|
||||
action === PermissionAction.View &&
|
||||
type === PermissionType.SystemStatus
|
||||
)
|
||||
completeSetup()
|
||||
component.showSystemStatus()
|
||||
expect(modalOpenSpy).toHaveBeenCalledWith(SystemStatusDialogComponent, {
|
||||
|
||||
@@ -429,7 +429,7 @@ export class SettingsComponent
|
||||
this.settingsForm.patchValue(currentFormValue)
|
||||
}
|
||||
|
||||
if (this.permissionsService.isAdmin()) {
|
||||
if (this.canViewSystemStatus) {
|
||||
this.systemStatusService.get().subscribe((status) => {
|
||||
this.systemStatus = status
|
||||
})
|
||||
@@ -647,6 +647,16 @@ export class SettingsComponent
|
||||
.setValue(Array.from(hiddenFields))
|
||||
}
|
||||
|
||||
public get canViewSystemStatus(): boolean {
|
||||
return (
|
||||
this.permissionsService.isAdmin() ||
|
||||
this.permissionsService.currentUserCan(
|
||||
PermissionAction.View,
|
||||
PermissionType.SystemStatus
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
showSystemStatus() {
|
||||
const modal: NgbModalRef = this.modalService.open(
|
||||
SystemStatusDialogComponent,
|
||||
|
||||
@@ -23,11 +23,11 @@
|
||||
</div>
|
||||
<div class="form-check form-switch form-check-inline">
|
||||
<input type="checkbox" class="form-check-input" id="is_staff" formControlName="is_staff">
|
||||
<label class="form-check-label" for="is_staff"><ng-container i18n>Admin</ng-container> <small class="form-text text-muted ms-1" i18n>Access logs, Django backend</small></label>
|
||||
<label class="form-check-label" for="is_staff"><ng-container i18n>Admin</ng-container> <small class="form-text text-muted ms-1" i18n>Access system status, logs, Django backend</small></label>
|
||||
</div>
|
||||
<div class="form-check form-switch form-check-inline">
|
||||
<input type="checkbox" class="form-check-input" id="is_superuser" formControlName="is_superuser" (change)="onToggleSuperUser()">
|
||||
<label class="form-check-label" for="is_superuser"><ng-container i18n>Superuser</ng-container> <small class="form-text text-muted ms-1" i18n>(Grants all permissions and can view objects)</small></label>
|
||||
<label class="form-check-label" for="is_superuser"><ng-container i18n>Superuser</ng-container> <small class="form-text text-muted ms-1" i18n>Grants all permissions and can view all objects</small></label>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
@@ -26,8 +26,8 @@
|
||||
<input type="checkbox" class="form-check-input" id="{{type}}_all" (change)="toggleAll($event, type)" [checked]="typesWithAllActions.has(type) || isInherited(type)" [attr.disabled]="disabled || isInherited(type) ? true : null">
|
||||
<label class="form-check-label visually-hidden" for="{{type}}_all" i18n>All</label>
|
||||
</div>
|
||||
@for (action of PermissionAction | keyvalue; track action) {
|
||||
<div class="col form-check form-check-inline" [ngbPopover]="inheritedWarning" [disablePopover]="!isInherited(type, action.key)" placement="left" triggers="mouseenter:mouseleave">
|
||||
@for (action of PermissionAction | keyvalue: sortActions; track action.key) {
|
||||
<div class="col form-check form-check-inline" [class.invisible]="!isActionSupported(PermissionType[type], action.value)" [ngbPopover]="inheritedWarning" [disablePopover]="!isInherited(type, action.key)" placement="left" triggers="mouseenter:mouseleave">
|
||||
<input type="checkbox" class="form-check-input" id="{{type}}_{{action.key}}" formControlName="{{action.key}}">
|
||||
<label class="form-check-label visually-hidden" for="{{type}}_{{action.key}}">{{action.key}}</label>
|
||||
</div>
|
||||
|
||||
@@ -26,7 +26,6 @@ const inheritedPermissions = ['change_tag', 'view_documenttype']
|
||||
describe('PermissionsSelectComponent', () => {
|
||||
let component: PermissionsSelectComponent
|
||||
let fixture: ComponentFixture<PermissionsSelectComponent>
|
||||
let permissionsChangeResult: Permissions
|
||||
let settingsService: SettingsService
|
||||
|
||||
beforeEach(async () => {
|
||||
@@ -45,7 +44,7 @@ describe('PermissionsSelectComponent', () => {
|
||||
fixture = TestBed.createComponent(PermissionsSelectComponent)
|
||||
fixture.debugElement.injector.get(NG_VALUE_ACCESSOR)
|
||||
component = fixture.componentInstance
|
||||
component.registerOnChange((r) => (permissionsChangeResult = r))
|
||||
component.registerOnChange((r) => r)
|
||||
fixture.detectChanges()
|
||||
})
|
||||
|
||||
@@ -75,7 +74,6 @@ describe('PermissionsSelectComponent', () => {
|
||||
it('should update on permissions set', () => {
|
||||
component.ngOnInit()
|
||||
component.writeValue(permissions)
|
||||
expect(permissionsChangeResult).toEqual(permissions)
|
||||
expect(component.typesWithAllActions).toContain('Document')
|
||||
})
|
||||
|
||||
@@ -92,13 +90,12 @@ describe('PermissionsSelectComponent', () => {
|
||||
it('disable checkboxes when permissions are inherited', () => {
|
||||
component.ngOnInit()
|
||||
component.inheritedPermissions = inheritedPermissions
|
||||
fixture.detectChanges()
|
||||
expect(component.isInherited('Document', 'Add')).toBeFalsy()
|
||||
expect(component.isInherited('Document')).toBeFalsy()
|
||||
expect(component.isInherited('Tag', 'Change')).toBeTruthy()
|
||||
const input1 = fixture.debugElement.query(By.css('input#Document_Add'))
|
||||
expect(input1.nativeElement.disabled).toBeFalsy()
|
||||
const input2 = fixture.debugElement.query(By.css('input#Tag_Change'))
|
||||
expect(input2.nativeElement.disabled).toBeTruthy()
|
||||
expect(component.form.get('Document').get('Add').disabled).toBeFalsy()
|
||||
expect(component.form.get('Tag').get('Change').disabled).toBeTruthy()
|
||||
})
|
||||
|
||||
it('should exclude history permissions if disabled', () => {
|
||||
@@ -107,4 +104,60 @@ describe('PermissionsSelectComponent', () => {
|
||||
component = fixture.componentInstance
|
||||
expect(component.allowedTypes).not.toContain('History')
|
||||
})
|
||||
|
||||
it('should treat global statistics as view-only', () => {
|
||||
component.ngOnInit()
|
||||
fixture.detectChanges()
|
||||
|
||||
expect(
|
||||
component.isActionSupported(
|
||||
PermissionType.GlobalStatistics,
|
||||
PermissionAction.View
|
||||
)
|
||||
).toBeTruthy()
|
||||
expect(
|
||||
component.isActionSupported(
|
||||
PermissionType.GlobalStatistics,
|
||||
PermissionAction.Add
|
||||
)
|
||||
).toBeFalsy()
|
||||
|
||||
const addInput = fixture.debugElement.query(
|
||||
By.css('input#GlobalStatistics_Add')
|
||||
)
|
||||
const viewInput = fixture.debugElement.query(
|
||||
By.css('input#GlobalStatistics_View')
|
||||
)
|
||||
|
||||
expect(addInput.nativeElement.disabled).toBeTruthy()
|
||||
expect(viewInput.nativeElement.disabled).toBeFalsy()
|
||||
})
|
||||
|
||||
it('should treat system status as view-only', () => {
|
||||
component.ngOnInit()
|
||||
fixture.detectChanges()
|
||||
|
||||
expect(
|
||||
component.isActionSupported(
|
||||
PermissionType.SystemStatus,
|
||||
PermissionAction.View
|
||||
)
|
||||
).toBeTruthy()
|
||||
expect(
|
||||
component.isActionSupported(
|
||||
PermissionType.SystemStatus,
|
||||
PermissionAction.Change
|
||||
)
|
||||
).toBeFalsy()
|
||||
|
||||
const changeInput = fixture.debugElement.query(
|
||||
By.css('input#SystemStatus_Change')
|
||||
)
|
||||
const viewInput = fixture.debugElement.query(
|
||||
By.css('input#SystemStatus_View')
|
||||
)
|
||||
|
||||
expect(changeInput.nativeElement.disabled).toBeTruthy()
|
||||
expect(viewInput.nativeElement.disabled).toBeFalsy()
|
||||
})
|
||||
})
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { KeyValuePipe } from '@angular/common'
|
||||
import { KeyValue, KeyValuePipe } from '@angular/common'
|
||||
import { Component, forwardRef, inject, Input, OnInit } from '@angular/core'
|
||||
import {
|
||||
AbstractControl,
|
||||
@@ -58,6 +58,13 @@ export class PermissionsSelectComponent
|
||||
|
||||
typesWithAllActions: Set<string> = new Set()
|
||||
|
||||
private readonly actionOrder = [
|
||||
PermissionAction.Add,
|
||||
PermissionAction.Change,
|
||||
PermissionAction.Delete,
|
||||
PermissionAction.View,
|
||||
]
|
||||
|
||||
_inheritedPermissions: string[] = []
|
||||
|
||||
@Input()
|
||||
@@ -86,7 +93,7 @@ export class PermissionsSelectComponent
|
||||
}
|
||||
this.allowedTypes.forEach((type) => {
|
||||
const control = new FormGroup({})
|
||||
for (const action in PermissionAction) {
|
||||
for (const action of Object.keys(PermissionAction)) {
|
||||
control.addControl(action, new FormControl(null))
|
||||
}
|
||||
this.form.addControl(type, control)
|
||||
@@ -106,18 +113,14 @@ export class PermissionsSelectComponent
|
||||
this.permissionsService.getPermissionKeys(permissionStr)
|
||||
|
||||
if (actionKey && typeKey) {
|
||||
if (this.form.get(typeKey)?.get(actionKey)) {
|
||||
this.form
|
||||
.get(typeKey)
|
||||
.get(actionKey)
|
||||
.patchValue(true, { emitEvent: false })
|
||||
}
|
||||
this.form
|
||||
.get(typeKey)
|
||||
?.get(actionKey)
|
||||
?.patchValue(true, { emitEvent: false })
|
||||
}
|
||||
})
|
||||
this.allowedTypes.forEach((type) => {
|
||||
if (
|
||||
Object.values(this.form.get(type).value).every((val) => val == true)
|
||||
) {
|
||||
if (this.typeHasAllActionsSelected(type)) {
|
||||
this.typesWithAllActions.add(type)
|
||||
} else {
|
||||
this.typesWithAllActions.delete(type)
|
||||
@@ -149,12 +152,16 @@ export class PermissionsSelectComponent
|
||||
this.form.valueChanges.subscribe((newValue) => {
|
||||
let permissions = []
|
||||
Object.entries(newValue).forEach(([typeKey, typeValue]) => {
|
||||
// e.g. [Document, { Add: true, View: true ... }]
|
||||
const selectedActions = Object.entries(typeValue).filter(
|
||||
([actionKey, actionValue]) => actionValue == true
|
||||
([actionKey, actionValue]) =>
|
||||
actionValue &&
|
||||
this.isActionSupported(
|
||||
PermissionType[typeKey],
|
||||
PermissionAction[actionKey]
|
||||
)
|
||||
)
|
||||
|
||||
selectedActions.forEach(([actionKey, actionValue]) => {
|
||||
selectedActions.forEach(([actionKey]) => {
|
||||
permissions.push(
|
||||
(PermissionType[typeKey] as string).replace(
|
||||
'%s',
|
||||
@@ -163,7 +170,7 @@ export class PermissionsSelectComponent
|
||||
)
|
||||
})
|
||||
|
||||
if (selectedActions.length == Object.entries(typeValue).length) {
|
||||
if (this.typeHasAllActionsSelected(typeKey)) {
|
||||
this.typesWithAllActions.add(typeKey)
|
||||
} else {
|
||||
this.typesWithAllActions.delete(typeKey)
|
||||
@@ -174,19 +181,23 @@ export class PermissionsSelectComponent
|
||||
permissions.filter((p) => !this._inheritedPermissions.includes(p))
|
||||
)
|
||||
})
|
||||
|
||||
this.updateDisabledStates()
|
||||
}
|
||||
|
||||
toggleAll(event, type) {
|
||||
const typeGroup = this.form.get(type)
|
||||
if (event.target.checked) {
|
||||
Object.keys(PermissionAction).forEach((action) => {
|
||||
typeGroup.get(action).patchValue(true)
|
||||
Object.keys(PermissionAction)
|
||||
.filter((action) =>
|
||||
this.isActionSupported(PermissionType[type], PermissionAction[action])
|
||||
)
|
||||
.forEach((action) => {
|
||||
typeGroup.get(action).patchValue(event.target.checked)
|
||||
})
|
||||
|
||||
if (this.typeHasAllActionsSelected(type)) {
|
||||
this.typesWithAllActions.add(type)
|
||||
} else {
|
||||
Object.keys(PermissionAction).forEach((action) => {
|
||||
typeGroup.get(action).patchValue(false)
|
||||
})
|
||||
this.typesWithAllActions.delete(type)
|
||||
}
|
||||
}
|
||||
@@ -201,14 +212,21 @@ export class PermissionsSelectComponent
|
||||
)
|
||||
)
|
||||
} else {
|
||||
return Object.values(PermissionAction).every((action) => {
|
||||
return this._inheritedPermissions.includes(
|
||||
this.permissionsService.getPermissionCode(
|
||||
action as PermissionAction,
|
||||
PermissionType[typeKey]
|
||||
return Object.keys(PermissionAction)
|
||||
.filter((action) =>
|
||||
this.isActionSupported(
|
||||
PermissionType[typeKey],
|
||||
PermissionAction[action]
|
||||
)
|
||||
)
|
||||
})
|
||||
.every((action) => {
|
||||
return this._inheritedPermissions.includes(
|
||||
this.permissionsService.getPermissionCode(
|
||||
PermissionAction[action],
|
||||
PermissionType[typeKey]
|
||||
)
|
||||
)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -216,12 +234,55 @@ export class PermissionsSelectComponent
|
||||
this.allowedTypes.forEach((type) => {
|
||||
const control = this.form.get(type)
|
||||
let actionControl: AbstractControl
|
||||
for (const action in PermissionAction) {
|
||||
for (const action of Object.keys(PermissionAction)) {
|
||||
actionControl = control.get(action)
|
||||
if (
|
||||
!this.isActionSupported(
|
||||
PermissionType[type],
|
||||
PermissionAction[action]
|
||||
)
|
||||
) {
|
||||
actionControl.patchValue(false, { emitEvent: false })
|
||||
actionControl.disable({ emitEvent: false })
|
||||
continue
|
||||
}
|
||||
|
||||
this.isInherited(type, action) || this.disabled
|
||||
? actionControl.disable()
|
||||
: actionControl.enable()
|
||||
? actionControl.disable({ emitEvent: false })
|
||||
: actionControl.enable({ emitEvent: false })
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
public isActionSupported(
|
||||
type: PermissionType,
|
||||
action: PermissionAction
|
||||
): boolean {
|
||||
// Global statistics and system status only support view
|
||||
if (
|
||||
type === PermissionType.GlobalStatistics ||
|
||||
type === PermissionType.SystemStatus
|
||||
) {
|
||||
return action === PermissionAction.View
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
private typeHasAllActionsSelected(typeKey: string): boolean {
|
||||
return Object.keys(PermissionAction)
|
||||
.filter((action) =>
|
||||
this.isActionSupported(
|
||||
PermissionType[typeKey],
|
||||
PermissionAction[action]
|
||||
)
|
||||
)
|
||||
.every((action) => !!this.form.get(typeKey)?.get(action)?.value)
|
||||
}
|
||||
|
||||
public sortActions = (
|
||||
a: KeyValue<string, PermissionAction>,
|
||||
b: KeyValue<string, PermissionAction>
|
||||
): number =>
|
||||
this.actionOrder.indexOf(a.value) - this.actionOrder.indexOf(b.value)
|
||||
}
|
||||
|
||||
@@ -6,6 +6,11 @@ import {
|
||||
PermissionsService,
|
||||
} from './permissions.service'
|
||||
|
||||
const VIEW_ONLY_PERMISSION_TYPES = new Set<PermissionType>([
|
||||
PermissionType.GlobalStatistics,
|
||||
PermissionType.SystemStatus,
|
||||
])
|
||||
|
||||
describe('PermissionsService', () => {
|
||||
let permissionsService: PermissionsService
|
||||
|
||||
@@ -264,6 +269,8 @@ describe('PermissionsService', () => {
|
||||
'change_applicationconfiguration',
|
||||
'delete_applicationconfiguration',
|
||||
'view_applicationconfiguration',
|
||||
'view_global_statistics',
|
||||
'view_system_status',
|
||||
],
|
||||
{
|
||||
username: 'testuser',
|
||||
@@ -274,7 +281,10 @@ describe('PermissionsService', () => {
|
||||
|
||||
Object.values(PermissionType).forEach((type) => {
|
||||
Object.values(PermissionAction).forEach((action) => {
|
||||
expect(permissionsService.currentUserCan(action, type)).toBeTruthy()
|
||||
expect(permissionsService.currentUserCan(action, type)).toBe(
|
||||
!VIEW_ONLY_PERMISSION_TYPES.has(type) ||
|
||||
action === PermissionAction.View
|
||||
)
|
||||
})
|
||||
})
|
||||
|
||||
|
||||
@@ -29,6 +29,8 @@ export enum PermissionType {
|
||||
CustomField = '%s_customfield',
|
||||
Workflow = '%s_workflow',
|
||||
ProcessedMail = '%s_processedmail',
|
||||
GlobalStatistics = '%s_global_statistics',
|
||||
SystemStatus = '%s_system_status',
|
||||
}
|
||||
|
||||
@Injectable({
|
||||
|
||||
@@ -73,7 +73,7 @@ describe('LocalizedDateParserFormatter', () => {
|
||||
|
||||
it('should handle years when current year % 100 < 50', () => {
|
||||
jest.useFakeTimers()
|
||||
jest.setSystemTime(new Date(2026, 5, 15))
|
||||
jest.setSystemTime(new Date(2026, 5, 15).getTime())
|
||||
let val = dateParserFormatter.parse('5/4/26')
|
||||
expect(val).toEqual({ day: 4, month: 5, year: 2026 })
|
||||
|
||||
@@ -87,7 +87,7 @@ describe('LocalizedDateParserFormatter', () => {
|
||||
|
||||
it('should handle years when current year % 100 >= 50', () => {
|
||||
jest.useFakeTimers()
|
||||
jest.setSystemTime(new Date(2076, 5, 15))
|
||||
jest.setSystemTime(new Date(2076, 5, 15).getTime())
|
||||
const val = dateParserFormatter.parse('5/4/00')
|
||||
expect(val).toEqual({ day: 4, month: 5, year: 2100 })
|
||||
jest.useRealTimers()
|
||||
|
||||
@@ -56,6 +56,26 @@ class PaperlessAdminPermissions(BasePermission):
|
||||
return request.user.is_staff
|
||||
|
||||
|
||||
def has_global_statistics_permission(user: User | None) -> bool:
|
||||
if user is None or not getattr(user, "is_authenticated", False):
|
||||
return False
|
||||
|
||||
return getattr(user, "is_superuser", False) or user.has_perm(
|
||||
"paperless.view_global_statistics",
|
||||
)
|
||||
|
||||
|
||||
def has_system_status_permission(user: User | None) -> bool:
|
||||
if user is None or not getattr(user, "is_authenticated", False):
|
||||
return False
|
||||
|
||||
return (
|
||||
getattr(user, "is_superuser", False)
|
||||
or getattr(user, "is_staff", False)
|
||||
or user.has_perm("paperless.view_system_status")
|
||||
)
|
||||
|
||||
|
||||
def get_groups_with_only_permission(obj, codename):
|
||||
ctype = ContentType.objects.get_for_model(obj)
|
||||
permission = Permission.objects.get(content_type=ctype, codename=codename)
|
||||
|
||||
@@ -6,6 +6,8 @@ from unittest.mock import patch
|
||||
from django.contrib.auth.models import User
|
||||
from django.core.files.uploadedfile import SimpleUploadedFile
|
||||
from django.test import override_settings
|
||||
from PIL import Image
|
||||
from PIL.PngImagePlugin import PngInfo
|
||||
from rest_framework import status
|
||||
from rest_framework.test import APITestCase
|
||||
|
||||
@@ -201,6 +203,156 @@ class TestApiAppConfig(DirectoriesMixin, APITestCase):
|
||||
)
|
||||
self.assertFalse(Path(old_logo.path).exists())
|
||||
|
||||
def test_api_strips_exif_data_from_uploaded_logo(self) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- A JPEG logo upload containing EXIF metadata
|
||||
WHEN:
|
||||
- Uploaded via PATCH to app config
|
||||
THEN:
|
||||
- Stored logo image has EXIF metadata removed
|
||||
"""
|
||||
image = Image.new("RGB", (12, 12), "blue")
|
||||
exif = Image.Exif()
|
||||
exif[315] = "Paperless Test Author"
|
||||
|
||||
logo = BytesIO()
|
||||
image.save(logo, format="JPEG", exif=exif)
|
||||
logo.seek(0)
|
||||
|
||||
response = self.client.patch(
|
||||
f"{self.ENDPOINT}1/",
|
||||
{
|
||||
"app_logo": SimpleUploadedFile(
|
||||
name="logo-with-exif.jpg",
|
||||
content=logo.getvalue(),
|
||||
content_type="image/jpeg",
|
||||
),
|
||||
},
|
||||
)
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
|
||||
config = ApplicationConfiguration.objects.first()
|
||||
with Image.open(config.app_logo.path) as stored_logo:
|
||||
stored_exif = stored_logo.getexif()
|
||||
|
||||
self.assertEqual(len(stored_exif), 0)
|
||||
|
||||
def test_api_strips_png_metadata_from_uploaded_logo(self) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- A PNG logo upload containing text metadata
|
||||
WHEN:
|
||||
- Uploaded via PATCH to app config
|
||||
THEN:
|
||||
- Stored logo image has metadata removed
|
||||
"""
|
||||
image = Image.new("RGB", (12, 12), "green")
|
||||
pnginfo = PngInfo()
|
||||
pnginfo.add_text("Author", "Paperless Test Author")
|
||||
|
||||
logo = BytesIO()
|
||||
image.save(logo, format="PNG", pnginfo=pnginfo)
|
||||
logo.seek(0)
|
||||
|
||||
response = self.client.patch(
|
||||
f"{self.ENDPOINT}1/",
|
||||
{
|
||||
"app_logo": SimpleUploadedFile(
|
||||
name="logo-with-metadata.png",
|
||||
content=logo.getvalue(),
|
||||
content_type="image/png",
|
||||
),
|
||||
},
|
||||
)
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
|
||||
config = ApplicationConfiguration.objects.first()
|
||||
with Image.open(config.app_logo.path) as stored_logo:
|
||||
stored_text = stored_logo.text
|
||||
|
||||
self.assertEqual(stored_text, {})
|
||||
|
||||
def test_api_accepts_valid_gif_logo(self) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- A valid GIF logo upload
|
||||
WHEN:
|
||||
- Uploaded via PATCH to app config
|
||||
THEN:
|
||||
- Upload succeeds
|
||||
"""
|
||||
image = Image.new("RGB", (12, 12), "red")
|
||||
|
||||
logo = BytesIO()
|
||||
image.save(logo, format="GIF", comment=b"Paperless Test Comment")
|
||||
logo.seek(0)
|
||||
|
||||
response = self.client.patch(
|
||||
f"{self.ENDPOINT}1/",
|
||||
{
|
||||
"app_logo": SimpleUploadedFile(
|
||||
name="logo.gif",
|
||||
content=logo.getvalue(),
|
||||
content_type="image/gif",
|
||||
),
|
||||
},
|
||||
)
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
|
||||
def test_api_rejects_invalid_raster_logo(self) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- A file named as a JPEG but containing non-image payload data
|
||||
WHEN:
|
||||
- Uploaded via PATCH to app config
|
||||
THEN:
|
||||
- Upload is rejected with 400
|
||||
"""
|
||||
response = self.client.patch(
|
||||
f"{self.ENDPOINT}1/",
|
||||
{
|
||||
"app_logo": SimpleUploadedFile(
|
||||
name="not-an-image.jpg",
|
||||
content=b"<script>alert('xss')</script>",
|
||||
content_type="image/jpeg",
|
||||
),
|
||||
},
|
||||
)
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn("invalid logo image", str(response.data).lower())
|
||||
|
||||
@override_settings(MAX_IMAGE_PIXELS=100)
|
||||
def test_api_rejects_logo_exceeding_max_image_pixels(self) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- A raster logo larger than the configured MAX_IMAGE_PIXELS limit
|
||||
WHEN:
|
||||
- Uploaded via PATCH to app config
|
||||
THEN:
|
||||
- Upload is rejected with 400
|
||||
"""
|
||||
image = Image.new("RGB", (12, 12), "purple")
|
||||
logo = BytesIO()
|
||||
image.save(logo, format="PNG")
|
||||
logo.seek(0)
|
||||
|
||||
response = self.client.patch(
|
||||
f"{self.ENDPOINT}1/",
|
||||
{
|
||||
"app_logo": SimpleUploadedFile(
|
||||
name="too-large.png",
|
||||
content=logo.getvalue(),
|
||||
content_type="image/png",
|
||||
),
|
||||
},
|
||||
)
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn(
|
||||
"uploaded logo exceeds the maximum allowed image size",
|
||||
str(response.data).lower(),
|
||||
)
|
||||
|
||||
def test_api_rejects_malicious_svg_logo(self) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
|
||||
@@ -1309,7 +1309,7 @@ class TestCustomFieldsAPI(DirectoriesMixin, APITestCase):
|
||||
# Test as user without access to the document
|
||||
non_superuser = User.objects.create_user(username="non_superuser")
|
||||
non_superuser.user_permissions.add(
|
||||
*Permission.objects.all(),
|
||||
*Permission.objects.exclude(codename="view_global_statistics"),
|
||||
)
|
||||
non_superuser.save()
|
||||
self.client.force_authenticate(user=non_superuser)
|
||||
|
||||
@@ -18,6 +18,7 @@ from django.contrib.auth.models import Permission
|
||||
from django.contrib.auth.models import User
|
||||
from django.core import mail
|
||||
from django.core.cache import cache
|
||||
from django.core.files.uploadedfile import SimpleUploadedFile
|
||||
from django.db import DataError
|
||||
from django.test import override_settings
|
||||
from django.utils import timezone
|
||||
@@ -1314,6 +1315,41 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(response.data["documents_inbox"], 0)
|
||||
|
||||
def test_statistics_with_statistics_permission(self) -> None:
|
||||
owner = User.objects.create_user("owner")
|
||||
stats_user = User.objects.create_user("stats-user")
|
||||
stats_user.user_permissions.add(
|
||||
Permission.objects.get(codename="view_global_statistics"),
|
||||
)
|
||||
|
||||
inbox_tag = Tag.objects.create(
|
||||
name="stats_inbox",
|
||||
is_inbox_tag=True,
|
||||
owner=owner,
|
||||
)
|
||||
Document.objects.create(
|
||||
title="owned-doc",
|
||||
checksum="stats-A",
|
||||
mime_type="application/pdf",
|
||||
content="abcdef",
|
||||
owner=owner,
|
||||
).tags.add(inbox_tag)
|
||||
Correspondent.objects.create(name="stats-correspondent", owner=owner)
|
||||
DocumentType.objects.create(name="stats-type", owner=owner)
|
||||
StoragePath.objects.create(name="stats-path", path="archive", owner=owner)
|
||||
|
||||
self.client.force_authenticate(user=stats_user)
|
||||
response = self.client.get("/api/statistics/")
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(response.data["documents_total"], 1)
|
||||
self.assertEqual(response.data["documents_inbox"], 1)
|
||||
self.assertEqual(response.data["inbox_tags"], [inbox_tag.pk])
|
||||
self.assertEqual(response.data["character_count"], 6)
|
||||
self.assertEqual(response.data["correspondent_count"], 1)
|
||||
self.assertEqual(response.data["document_type_count"], 1)
|
||||
self.assertEqual(response.data["storage_path_count"], 1)
|
||||
|
||||
def test_upload(self) -> None:
|
||||
self.consume_file_mock.return_value = celery.result.AsyncResult(
|
||||
id=str(uuid.uuid4()),
|
||||
@@ -1342,6 +1378,79 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
|
||||
self.assertIsNone(overrides.document_type_id)
|
||||
self.assertIsNone(overrides.tag_ids)
|
||||
|
||||
def test_upload_with_path_traversal_filename_is_reduced_to_basename(self) -> None:
|
||||
self.consume_file_mock.return_value = celery.result.AsyncResult(
|
||||
id=str(uuid.uuid4()),
|
||||
)
|
||||
|
||||
payload = SimpleUploadedFile(
|
||||
"../../outside.pdf",
|
||||
(Path(__file__).parent / "samples" / "simple.pdf").read_bytes(),
|
||||
content_type="application/pdf",
|
||||
)
|
||||
|
||||
response = self.client.post(
|
||||
"/api/documents/post_document/",
|
||||
{"document": payload},
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.consume_file_mock.assert_called_once()
|
||||
|
||||
input_doc, overrides = self.get_last_consume_delay_call_args()
|
||||
|
||||
self.assertEqual(input_doc.original_file.name, "outside.pdf")
|
||||
self.assertEqual(overrides.filename, "outside.pdf")
|
||||
self.assertNotIn("..", input_doc.original_file.name)
|
||||
self.assertNotIn("..", overrides.filename)
|
||||
self.assertTrue(
|
||||
input_doc.original_file.resolve(strict=False).is_relative_to(
|
||||
Path(settings.SCRATCH_DIR).resolve(strict=False),
|
||||
),
|
||||
)
|
||||
|
||||
def test_upload_with_path_traversal_content_disposition_filename_is_reduced_to_basename(
|
||||
self,
|
||||
) -> None:
|
||||
self.consume_file_mock.return_value = celery.result.AsyncResult(
|
||||
id=str(uuid.uuid4()),
|
||||
)
|
||||
|
||||
pdf_bytes = (Path(__file__).parent / "samples" / "simple.pdf").read_bytes()
|
||||
boundary = "paperless-boundary"
|
||||
payload = (
|
||||
(
|
||||
f"--{boundary}\r\n"
|
||||
'Content-Disposition: form-data; name="document"; '
|
||||
'filename="../../outside.pdf"\r\n'
|
||||
"Content-Type: application/pdf\r\n\r\n"
|
||||
).encode()
|
||||
+ pdf_bytes
|
||||
+ f"\r\n--{boundary}--\r\n".encode()
|
||||
)
|
||||
|
||||
response = self.client.generic(
|
||||
"POST",
|
||||
"/api/documents/post_document/",
|
||||
payload,
|
||||
content_type=f"multipart/form-data; boundary={boundary}",
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.consume_file_mock.assert_called_once()
|
||||
|
||||
input_doc, overrides = self.get_last_consume_delay_call_args()
|
||||
|
||||
self.assertEqual(input_doc.original_file.name, "outside.pdf")
|
||||
self.assertEqual(overrides.filename, "outside.pdf")
|
||||
self.assertNotIn("..", input_doc.original_file.name)
|
||||
self.assertNotIn("..", overrides.filename)
|
||||
self.assertTrue(
|
||||
input_doc.original_file.resolve(strict=False).is_relative_to(
|
||||
Path(settings.SCRATCH_DIR).resolve(strict=False),
|
||||
),
|
||||
)
|
||||
|
||||
def test_document_filters_use_latest_version_content(self) -> None:
|
||||
root = Document.objects.create(
|
||||
title="versioned root",
|
||||
|
||||
@@ -5,12 +5,14 @@ from pathlib import Path
|
||||
from unittest import mock
|
||||
|
||||
from celery import states
|
||||
from django.contrib.auth.models import Permission
|
||||
from django.contrib.auth.models import User
|
||||
from django.test import override_settings
|
||||
from rest_framework import status
|
||||
from rest_framework.test import APITestCase
|
||||
|
||||
from documents.models import PaperlessTask
|
||||
from documents.permissions import has_system_status_permission
|
||||
from paperless import version
|
||||
|
||||
|
||||
@@ -91,6 +93,22 @@ class TestSystemStatus(APITestCase):
|
||||
self.client.force_login(normal_user)
|
||||
response = self.client.get(self.ENDPOINT)
|
||||
self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
|
||||
# test the permission helper function directly for good measure
|
||||
self.assertFalse(has_system_status_permission(None))
|
||||
|
||||
def test_system_status_with_system_status_permission(self) -> None:
|
||||
response = self.client.get(self.ENDPOINT)
|
||||
self.assertEqual(response.status_code, status.HTTP_401_UNAUTHORIZED)
|
||||
|
||||
user = User.objects.create_user(username="status_user")
|
||||
user.user_permissions.add(
|
||||
Permission.objects.get(codename="view_system_status"),
|
||||
)
|
||||
|
||||
self.client.force_login(user)
|
||||
response = self.client.get(self.ENDPOINT)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
|
||||
def test_system_status_with_bad_basic_auth_challenges(self) -> None:
|
||||
self.client.credentials(HTTP_AUTHORIZATION="Basic invalid")
|
||||
|
||||
@@ -165,7 +165,9 @@ from documents.permissions import ViewDocumentsPermissions
|
||||
from documents.permissions import annotate_document_count_for_related_queryset
|
||||
from documents.permissions import get_document_count_filter_for_user
|
||||
from documents.permissions import get_objects_for_user_owner_aware
|
||||
from documents.permissions import has_global_statistics_permission
|
||||
from documents.permissions import has_perms_owner_aware
|
||||
from documents.permissions import has_system_status_permission
|
||||
from documents.permissions import set_permissions_for_object
|
||||
from documents.plugins.date_parsing import get_date_parser
|
||||
from documents.schema import generate_object_with_permissions_schema
|
||||
@@ -3265,10 +3267,11 @@ class StatisticsView(GenericAPIView):
|
||||
|
||||
def get(self, request, format=None):
|
||||
user = request.user if request.user is not None else None
|
||||
can_view_global_stats = has_global_statistics_permission(user) or user is None
|
||||
|
||||
documents = (
|
||||
Document.objects.all()
|
||||
if user is None
|
||||
if can_view_global_stats
|
||||
else get_objects_for_user_owner_aware(
|
||||
user,
|
||||
"documents.view_document",
|
||||
@@ -3277,12 +3280,12 @@ class StatisticsView(GenericAPIView):
|
||||
)
|
||||
tags = (
|
||||
Tag.objects.all()
|
||||
if user is None
|
||||
if can_view_global_stats
|
||||
else get_objects_for_user_owner_aware(user, "documents.view_tag", Tag)
|
||||
).only("id", "is_inbox_tag")
|
||||
correspondent_count = (
|
||||
Correspondent.objects.count()
|
||||
if user is None
|
||||
if can_view_global_stats
|
||||
else get_objects_for_user_owner_aware(
|
||||
user,
|
||||
"documents.view_correspondent",
|
||||
@@ -3291,7 +3294,7 @@ class StatisticsView(GenericAPIView):
|
||||
)
|
||||
document_type_count = (
|
||||
DocumentType.objects.count()
|
||||
if user is None
|
||||
if can_view_global_stats
|
||||
else get_objects_for_user_owner_aware(
|
||||
user,
|
||||
"documents.view_documenttype",
|
||||
@@ -3300,7 +3303,7 @@ class StatisticsView(GenericAPIView):
|
||||
)
|
||||
storage_path_count = (
|
||||
StoragePath.objects.count()
|
||||
if user is None
|
||||
if can_view_global_stats
|
||||
else get_objects_for_user_owner_aware(
|
||||
user,
|
||||
"documents.view_storagepath",
|
||||
@@ -4257,7 +4260,7 @@ class SystemStatusView(PassUserMixin):
|
||||
permission_classes = (IsAuthenticated,)
|
||||
|
||||
def get(self, request, format=None):
|
||||
if not request.user.is_staff:
|
||||
if not has_system_status_permission(request.user):
|
||||
return HttpResponseForbidden("Insufficient permissions")
|
||||
|
||||
current_version = version.__full_version_str__
|
||||
|
||||
@@ -2,7 +2,7 @@ msgid ""
|
||||
msgstr ""
|
||||
"Project-Id-Version: paperless-ngx\n"
|
||||
"Report-Msgid-Bugs-To: \n"
|
||||
"POT-Creation-Date: 2026-04-06 22:51+0000\n"
|
||||
"POT-Creation-Date: 2026-04-08 15:41+0000\n"
|
||||
"PO-Revision-Date: 2022-02-17 04:17\n"
|
||||
"Last-Translator: \n"
|
||||
"Language-Team: English\n"
|
||||
@@ -1308,8 +1308,8 @@ msgid "workflow runs"
|
||||
msgstr ""
|
||||
|
||||
#: documents/serialisers.py:463 documents/serialisers.py:815
|
||||
#: documents/serialisers.py:2545 documents/views.py:2120
|
||||
#: documents/views.py:2175 paperless_mail/serialisers.py:143
|
||||
#: documents/serialisers.py:2545 documents/views.py:2122
|
||||
#: documents/views.py:2177 paperless_mail/serialisers.py:143
|
||||
msgid "Insufficient permissions."
|
||||
msgstr ""
|
||||
|
||||
@@ -1349,7 +1349,7 @@ msgstr ""
|
||||
msgid "Duplicate document identifiers are not allowed."
|
||||
msgstr ""
|
||||
|
||||
#: documents/serialisers.py:2631 documents/views.py:3784
|
||||
#: documents/serialisers.py:2631 documents/views.py:3787
|
||||
#, python-format
|
||||
msgid "Documents not found: %(ids)s"
|
||||
msgstr ""
|
||||
@@ -1617,28 +1617,28 @@ msgstr ""
|
||||
msgid "Unable to parse URI {value}"
|
||||
msgstr ""
|
||||
|
||||
#: documents/views.py:2077
|
||||
#: documents/views.py:2079
|
||||
msgid "Specify only one of text, title_search, query, or more_like_id."
|
||||
msgstr ""
|
||||
|
||||
#: documents/views.py:2113 documents/views.py:2172
|
||||
#: documents/views.py:2115 documents/views.py:2174
|
||||
msgid "Invalid more_like_id"
|
||||
msgstr ""
|
||||
|
||||
#: documents/views.py:3796
|
||||
#: documents/views.py:3799
|
||||
#, python-format
|
||||
msgid "Insufficient permissions to share document %(id)s."
|
||||
msgstr ""
|
||||
|
||||
#: documents/views.py:3839
|
||||
#: documents/views.py:3842
|
||||
msgid "Bundle is already being processed."
|
||||
msgstr ""
|
||||
|
||||
#: documents/views.py:3896
|
||||
#: documents/views.py:3899
|
||||
msgid "The share link bundle is still being prepared. Please try again later."
|
||||
msgstr ""
|
||||
|
||||
#: documents/views.py:3906
|
||||
#: documents/views.py:3909
|
||||
msgid "The share link bundle is unavailable."
|
||||
msgstr ""
|
||||
|
||||
|
||||
@@ -0,0 +1,22 @@
|
||||
# Generated by Django 5.2.12 on 2026-04-07 23:13
|
||||
|
||||
from django.db import migrations
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
("paperless", "0008_replace_skip_archive_file"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterModelOptions(
|
||||
name="applicationconfiguration",
|
||||
options={
|
||||
"permissions": [
|
||||
("view_global_statistics", "Can view global object counts"),
|
||||
("view_system_status", "Can view system status information"),
|
||||
],
|
||||
"verbose_name": "paperless application settings",
|
||||
},
|
||||
),
|
||||
]
|
||||
@@ -341,6 +341,10 @@ class ApplicationConfiguration(AbstractSingletonModel):
|
||||
|
||||
class Meta:
|
||||
verbose_name = _("paperless application settings")
|
||||
permissions = [
|
||||
("view_global_statistics", "Can view global object counts"),
|
||||
("view_system_status", "Can view system status information"),
|
||||
]
|
||||
|
||||
def __str__(self) -> str: # pragma: no cover
|
||||
return "ApplicationConfiguration"
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import logging
|
||||
from io import BytesIO
|
||||
|
||||
import magic
|
||||
from allauth.mfa.adapter import get_adapter as get_mfa_adapter
|
||||
@@ -11,13 +12,16 @@ from django.contrib.auth.models import Group
|
||||
from django.contrib.auth.models import Permission
|
||||
from django.contrib.auth.models import User
|
||||
from django.contrib.auth.password_validation import validate_password
|
||||
from django.core.files.uploadedfile import InMemoryUploadedFile
|
||||
from django.core.files.uploadedfile import UploadedFile
|
||||
from PIL import Image
|
||||
from rest_framework import serializers
|
||||
from rest_framework.authtoken.serializers import AuthTokenSerializer
|
||||
|
||||
from paperless.models import ApplicationConfiguration
|
||||
from paperless.network import validate_outbound_http_url
|
||||
from paperless.validators import reject_dangerous_svg
|
||||
from paperless.validators import validate_raster_image
|
||||
from paperless_mail.serialisers import ObfuscatedPasswordField
|
||||
|
||||
logger = logging.getLogger("paperless.settings")
|
||||
@@ -233,9 +237,40 @@ class ApplicationConfigurationSerializer(serializers.ModelSerializer):
|
||||
instance.app_logo.delete()
|
||||
return super().update(instance, validated_data)
|
||||
|
||||
def _sanitize_raster_image(self, file: UploadedFile) -> UploadedFile:
|
||||
try:
|
||||
data = BytesIO()
|
||||
image = Image.open(file)
|
||||
image.save(data, format=image.format)
|
||||
data.seek(0)
|
||||
|
||||
return InMemoryUploadedFile(
|
||||
file=data,
|
||||
field_name=file.field_name,
|
||||
name=file.name,
|
||||
content_type=file.content_type,
|
||||
size=data.getbuffer().nbytes,
|
||||
charset=getattr(file, "charset", None),
|
||||
)
|
||||
finally:
|
||||
image.close()
|
||||
|
||||
def validate_app_logo(self, file: UploadedFile):
|
||||
if file and magic.from_buffer(file.read(2048), mime=True) == "image/svg+xml":
|
||||
reject_dangerous_svg(file)
|
||||
"""
|
||||
Validates and sanitizes the uploaded app logo image. Model field already restricts to
|
||||
jpg/png/gif/svg.
|
||||
"""
|
||||
if file:
|
||||
mime_type = magic.from_buffer(file.read(2048), mime=True)
|
||||
|
||||
if mime_type == "image/svg+xml":
|
||||
reject_dangerous_svg(file)
|
||||
else:
|
||||
validate_raster_image(file)
|
||||
|
||||
if mime_type in {"image/jpeg", "image/png"}:
|
||||
file = self._sanitize_raster_image(file)
|
||||
|
||||
return file
|
||||
|
||||
def validate_llm_endpoint(self, value: str | None) -> str | None:
|
||||
|
||||
@@ -1,6 +1,10 @@
|
||||
from io import BytesIO
|
||||
|
||||
from django.conf import settings
|
||||
from django.core.exceptions import ValidationError
|
||||
from django.core.files.uploadedfile import UploadedFile
|
||||
from lxml import etree
|
||||
from PIL import Image
|
||||
|
||||
ALLOWED_SVG_TAGS: set[str] = {
|
||||
# Basic shapes
|
||||
@@ -254,3 +258,30 @@ def reject_dangerous_svg(file: UploadedFile) -> None:
|
||||
raise ValidationError(
|
||||
f"URI scheme not allowed in {attr_name}: must be #anchor, relative path, or data:image/*",
|
||||
)
|
||||
|
||||
|
||||
def validate_raster_image(file: UploadedFile) -> None:
|
||||
"""
|
||||
Validates that the uploaded file is a valid raster image (JPEG, PNG, etc.)
|
||||
and does not exceed maximum pixel limits.
|
||||
Raises ValidationError if the image is invalid or exceeds the allowed size.
|
||||
"""
|
||||
|
||||
file.seek(0)
|
||||
image_data = file.read()
|
||||
try:
|
||||
with Image.open(BytesIO(image_data)) as image:
|
||||
image.verify()
|
||||
|
||||
if (
|
||||
settings.MAX_IMAGE_PIXELS is not None
|
||||
and settings.MAX_IMAGE_PIXELS > 0
|
||||
and image.width * image.height > settings.MAX_IMAGE_PIXELS
|
||||
):
|
||||
raise ValidationError(
|
||||
"Uploaded logo exceeds the maximum allowed image size.",
|
||||
)
|
||||
if image.format is None: # pragma: no cover
|
||||
raise ValidationError("Invalid logo image.")
|
||||
except (OSError, Image.DecompressionBombError) as e:
|
||||
raise ValidationError("Invalid logo image.") from e
|
||||
|
||||
346
test_backend_profile.py
Normal file
346
test_backend_profile.py
Normal file
@@ -0,0 +1,346 @@
|
||||
# ruff: noqa: T201
|
||||
"""
|
||||
cProfile-based search pipeline profiling with a 20k-document dataset.
|
||||
|
||||
Run with:
|
||||
uv run pytest ../test_backend_profile.py \
|
||||
-m profiling --override-ini="addopts=" -s -v
|
||||
|
||||
Each scenario prints:
|
||||
- Wall time for the operation
|
||||
- cProfile stats sorted by cumulative time (top 25 callers)
|
||||
|
||||
This is a developer tool, not a correctness test. Nothing here should
|
||||
fail unless the code is broken.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import random
|
||||
import time
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import pytest
|
||||
from profiling import profile_cpu
|
||||
|
||||
from documents.models import Document
|
||||
from documents.search._backend import TantivyBackend
|
||||
from documents.search._backend import reset_backend
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pathlib import Path
|
||||
|
||||
# transaction=False (default): tests roll back, but the module-scoped fixture
|
||||
# commits its data outside the test transaction so it remains visible throughout.
|
||||
pytestmark = [pytest.mark.profiling, pytest.mark.django_db]
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Dataset constants
|
||||
# ---------------------------------------------------------------------------
|
||||
NUM_DOCS = 20_000
|
||||
SEED = 42
|
||||
|
||||
# Terms and their approximate match rates across the corpus.
|
||||
# "rechnung" -> ~70% of docs (~14 000)
|
||||
# "mahnung" -> ~20% of docs (~4 000)
|
||||
# "kontonummer" -> ~5% of docs (~1 000)
|
||||
# "rarewort" -> ~1% of docs (~200)
|
||||
COMMON_TERM = "rechnung"
|
||||
MEDIUM_TERM = "mahnung"
|
||||
RARE_TERM = "kontonummer"
|
||||
VERY_RARE_TERM = "rarewort"
|
||||
|
||||
PAGE_SIZE = 25
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_FILLER_WORDS = [
|
||||
"dokument", # codespell:ignore
|
||||
"seite",
|
||||
"datum",
|
||||
"betrag",
|
||||
"nummer",
|
||||
"konto",
|
||||
"firma",
|
||||
"vertrag",
|
||||
"lieferant",
|
||||
"bestellung",
|
||||
"steuer",
|
||||
"mwst",
|
||||
"leistung",
|
||||
"auftrag",
|
||||
"zahlung",
|
||||
]
|
||||
|
||||
|
||||
def _build_content(rng: random.Random) -> str:
|
||||
"""Return a short paragraph with terms embedded at the desired rates."""
|
||||
words = rng.choices(_FILLER_WORDS, k=15)
|
||||
if rng.random() < 0.70:
|
||||
words.append(COMMON_TERM)
|
||||
if rng.random() < 0.20:
|
||||
words.append(MEDIUM_TERM)
|
||||
if rng.random() < 0.05:
|
||||
words.append(RARE_TERM)
|
||||
if rng.random() < 0.01:
|
||||
words.append(VERY_RARE_TERM)
|
||||
rng.shuffle(words)
|
||||
return " ".join(words)
|
||||
|
||||
|
||||
def _time(fn, *, label: str, runs: int = 3):
|
||||
"""Run *fn()* several times and report min/avg/max wall time (no cProfile)."""
|
||||
times = []
|
||||
result = None
|
||||
for _ in range(runs):
|
||||
t0 = time.perf_counter()
|
||||
result = fn()
|
||||
times.append(time.perf_counter() - t0)
|
||||
mn, avg, mx = min(times), sum(times) / len(times), max(times)
|
||||
print(
|
||||
f" {label}: min={mn * 1000:.1f}ms avg={avg * 1000:.1f}ms max={mx * 1000:.1f}ms (n={runs})",
|
||||
)
|
||||
return result
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fixtures
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def module_db(django_db_setup, django_db_blocker):
|
||||
"""Unlock the DB for the whole module (module-scoped)."""
|
||||
with django_db_blocker.unblock():
|
||||
yield
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def large_backend(tmp_path_factory, module_db) -> TantivyBackend:
|
||||
"""
|
||||
Build a 20 000-document DB + on-disk Tantivy index, shared across all
|
||||
profiling scenarios in this module. Teardown deletes all documents.
|
||||
"""
|
||||
index_path: Path = tmp_path_factory.mktemp("tantivy_profile")
|
||||
|
||||
# ---- 1. Bulk-create Document rows ----------------------------------------
|
||||
rng = random.Random(SEED)
|
||||
docs = [
|
||||
Document(
|
||||
title=f"Document {i:05d}",
|
||||
content=_build_content(rng),
|
||||
checksum=f"{i:064x}",
|
||||
pk=i + 1,
|
||||
)
|
||||
for i in range(NUM_DOCS)
|
||||
]
|
||||
t0 = time.perf_counter()
|
||||
Document.objects.bulk_create(docs, batch_size=1_000)
|
||||
db_time = time.perf_counter() - t0
|
||||
print(f"\n[setup] bulk_create {NUM_DOCS} docs: {db_time:.2f}s")
|
||||
|
||||
# ---- 2. Build Tantivy index -----------------------------------------------
|
||||
backend = TantivyBackend(path=index_path)
|
||||
backend.open()
|
||||
|
||||
t0 = time.perf_counter()
|
||||
with backend.batch_update() as batch:
|
||||
for doc in Document.objects.iterator(chunk_size=500):
|
||||
batch.add_or_update(doc)
|
||||
idx_time = time.perf_counter() - t0
|
||||
print(f"[setup] index {NUM_DOCS} docs: {idx_time:.2f}s")
|
||||
|
||||
# ---- 3. Report corpus stats -----------------------------------------------
|
||||
for term in (COMMON_TERM, MEDIUM_TERM, RARE_TERM, VERY_RARE_TERM):
|
||||
count = len(backend.search_ids(term, user=None))
|
||||
print(f"[setup] '{term}' -> {count} hits")
|
||||
|
||||
yield backend
|
||||
|
||||
# ---- Teardown ------------------------------------------------------------
|
||||
backend.close()
|
||||
reset_backend()
|
||||
Document.objects.all().delete()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Profiling tests — each scenario is a separate function so pytest can run
|
||||
# them individually or all together with -m profiling.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestSearchIdsProfile:
|
||||
"""Profile backend.search_ids() — pure Tantivy, no DB."""
|
||||
|
||||
def test_search_ids_large(self, large_backend: TantivyBackend):
|
||||
"""~14 000 hits: how long does Tantivy take to collect all IDs?"""
|
||||
profile_cpu(
|
||||
lambda: large_backend.search_ids(COMMON_TERM, user=None),
|
||||
label=f"search_ids('{COMMON_TERM}') [large result set ~14k]",
|
||||
)
|
||||
|
||||
def test_search_ids_medium(self, large_backend: TantivyBackend):
|
||||
"""~4 000 hits."""
|
||||
profile_cpu(
|
||||
lambda: large_backend.search_ids(MEDIUM_TERM, user=None),
|
||||
label=f"search_ids('{MEDIUM_TERM}') [medium result set ~4k]",
|
||||
)
|
||||
|
||||
def test_search_ids_rare(self, large_backend: TantivyBackend):
|
||||
"""~1 000 hits."""
|
||||
profile_cpu(
|
||||
lambda: large_backend.search_ids(RARE_TERM, user=None),
|
||||
label=f"search_ids('{RARE_TERM}') [rare result set ~1k]",
|
||||
)
|
||||
|
||||
|
||||
class TestIntersectAndOrderProfile:
|
||||
"""
|
||||
Profile the DB intersection step: filter(pk__in=search_ids).
|
||||
This is the 'intersect_and_order' logic from views.py.
|
||||
"""
|
||||
|
||||
def test_intersect_large(self, large_backend: TantivyBackend):
|
||||
"""Intersect 14k Tantivy IDs with all 20k ORM-visible docs."""
|
||||
all_ids = large_backend.search_ids(COMMON_TERM, user=None)
|
||||
qs = Document.objects.all()
|
||||
|
||||
print(f"\n Tantivy returned {len(all_ids)} IDs")
|
||||
|
||||
profile_cpu(
|
||||
lambda: list(qs.filter(pk__in=all_ids).values_list("pk", flat=True)),
|
||||
label=f"filter(pk__in={len(all_ids)} ids) [large, use_tantivy_sort=True path]",
|
||||
)
|
||||
|
||||
# Also time it a few times to get stable numbers
|
||||
print()
|
||||
_time(
|
||||
lambda: list(qs.filter(pk__in=all_ids).values_list("pk", flat=True)),
|
||||
label=f"filter(pk__in={len(all_ids)}) repeated",
|
||||
)
|
||||
|
||||
def test_intersect_rare(self, large_backend: TantivyBackend):
|
||||
"""Intersect ~1k Tantivy IDs — the happy path."""
|
||||
all_ids = large_backend.search_ids(RARE_TERM, user=None)
|
||||
qs = Document.objects.all()
|
||||
|
||||
print(f"\n Tantivy returned {len(all_ids)} IDs")
|
||||
|
||||
profile_cpu(
|
||||
lambda: list(qs.filter(pk__in=all_ids).values_list("pk", flat=True)),
|
||||
label=f"filter(pk__in={len(all_ids)} ids) [rare, use_tantivy_sort=True path]",
|
||||
)
|
||||
|
||||
|
||||
class TestHighlightHitsProfile:
|
||||
"""Profile backend.highlight_hits() — per-doc Tantivy lookups with BM25 scoring."""
|
||||
|
||||
def test_highlight_page1(self, large_backend: TantivyBackend):
|
||||
"""25-doc highlight for page 1 (rank_start=1)."""
|
||||
all_ids = large_backend.search_ids(COMMON_TERM, user=None)
|
||||
page_ids = all_ids[:PAGE_SIZE]
|
||||
|
||||
profile_cpu(
|
||||
lambda: large_backend.highlight_hits(
|
||||
COMMON_TERM,
|
||||
page_ids,
|
||||
rank_start=1,
|
||||
),
|
||||
label=f"highlight_hits page 1 (ids {all_ids[0]}..{all_ids[PAGE_SIZE - 1]})",
|
||||
)
|
||||
|
||||
def test_highlight_page_middle(self, large_backend: TantivyBackend):
|
||||
"""25-doc highlight for a mid-corpus page (rank_start=page_offset+1)."""
|
||||
all_ids = large_backend.search_ids(COMMON_TERM, user=None)
|
||||
mid = len(all_ids) // 2
|
||||
page_ids = all_ids[mid : mid + PAGE_SIZE]
|
||||
page_offset = mid
|
||||
|
||||
profile_cpu(
|
||||
lambda: large_backend.highlight_hits(
|
||||
COMMON_TERM,
|
||||
page_ids,
|
||||
rank_start=page_offset + 1,
|
||||
),
|
||||
label=f"highlight_hits page ~{mid // PAGE_SIZE} (offset {page_offset})",
|
||||
)
|
||||
|
||||
def test_highlight_repeated(self, large_backend: TantivyBackend):
|
||||
"""Multiple runs of page-1 highlight to see variance."""
|
||||
all_ids = large_backend.search_ids(COMMON_TERM, user=None)
|
||||
page_ids = all_ids[:PAGE_SIZE]
|
||||
|
||||
print()
|
||||
_time(
|
||||
lambda: large_backend.highlight_hits(COMMON_TERM, page_ids, rank_start=1),
|
||||
label="highlight_hits page 1",
|
||||
runs=5,
|
||||
)
|
||||
|
||||
|
||||
class TestFullPipelineProfile:
|
||||
"""
|
||||
Profile the combined pipeline as it runs in views.py:
|
||||
search_ids -> filter(pk__in) -> highlight_hits
|
||||
"""
|
||||
|
||||
def _run_pipeline(
|
||||
self,
|
||||
backend: TantivyBackend,
|
||||
term: str,
|
||||
page: int = 1,
|
||||
):
|
||||
all_ids = backend.search_ids(term, user=None)
|
||||
qs = Document.objects.all()
|
||||
visible_ids = set(qs.filter(pk__in=all_ids).values_list("pk", flat=True))
|
||||
ordered_ids = [i for i in all_ids if i in visible_ids]
|
||||
|
||||
page_offset = (page - 1) * PAGE_SIZE
|
||||
page_ids = ordered_ids[page_offset : page_offset + PAGE_SIZE]
|
||||
hits = backend.highlight_hits(
|
||||
term,
|
||||
page_ids,
|
||||
rank_start=page_offset + 1,
|
||||
)
|
||||
return ordered_ids, hits
|
||||
|
||||
def test_pipeline_large_page1(self, large_backend: TantivyBackend):
|
||||
"""Full pipeline: large result set, page 1."""
|
||||
ordered_ids, hits = profile_cpu(
|
||||
lambda: self._run_pipeline(large_backend, COMMON_TERM, page=1),
|
||||
label=f"full pipeline '{COMMON_TERM}' page 1",
|
||||
)[0]
|
||||
print(f" -> {len(ordered_ids)} total results, {len(hits)} hits on page")
|
||||
|
||||
def test_pipeline_large_page5(self, large_backend: TantivyBackend):
|
||||
"""Full pipeline: large result set, page 5."""
|
||||
ordered_ids, hits = profile_cpu(
|
||||
lambda: self._run_pipeline(large_backend, COMMON_TERM, page=5),
|
||||
label=f"full pipeline '{COMMON_TERM}' page 5",
|
||||
)[0]
|
||||
print(f" -> {len(ordered_ids)} total results, {len(hits)} hits on page")
|
||||
|
||||
def test_pipeline_rare(self, large_backend: TantivyBackend):
|
||||
"""Full pipeline: rare term, page 1 (fast path)."""
|
||||
ordered_ids, hits = profile_cpu(
|
||||
lambda: self._run_pipeline(large_backend, RARE_TERM, page=1),
|
||||
label=f"full pipeline '{RARE_TERM}' page 1",
|
||||
)[0]
|
||||
print(f" -> {len(ordered_ids)} total results, {len(hits)} hits on page")
|
||||
|
||||
def test_pipeline_repeated(self, large_backend: TantivyBackend):
|
||||
"""Repeated runs to get stable timing (no cProfile overhead)."""
|
||||
print()
|
||||
for term, label in [
|
||||
(COMMON_TERM, f"'{COMMON_TERM}' (large)"),
|
||||
(MEDIUM_TERM, f"'{MEDIUM_TERM}' (medium)"),
|
||||
(RARE_TERM, f"'{RARE_TERM}' (rare)"),
|
||||
]:
|
||||
_time(
|
||||
lambda t=term: self._run_pipeline(large_backend, t, page=1),
|
||||
label=f"full pipeline {label} page 1",
|
||||
runs=3,
|
||||
)
|
||||
605
test_classifier_profile.py
Normal file
605
test_classifier_profile.py
Normal file
@@ -0,0 +1,605 @@
|
||||
# ruff: noqa: T201
|
||||
"""
|
||||
cProfile + tracemalloc classifier profiling test.
|
||||
|
||||
Run with:
|
||||
uv run pytest ../test_classifier_profile.py \
|
||||
-m profiling --override-ini="addopts=" -s -v
|
||||
|
||||
Corpus: 5 000 documents, 40 correspondents (25 AUTO), 25 doc types (15 AUTO),
|
||||
50 tags (30 AUTO), 20 storage paths (12 AUTO).
|
||||
|
||||
Document content is generated with Faker for realistic base text, with a
|
||||
per-label fingerprint injected so the MLP has a real learning signal.
|
||||
|
||||
Scenarios:
|
||||
- train() full corpus — memory and CPU profiles
|
||||
- second train() no-op path — shows cost of the skip check
|
||||
- save()/load() round-trip — model file size and memory cost
|
||||
- _update_data_vectorizer_hash() isolated hash overhead
|
||||
- predict_*() four independent calls per document — the 4x redundant
|
||||
vectorization path used by the signal handlers
|
||||
- _vectorize() cache-miss vs cache-hit breakdown
|
||||
|
||||
Memory: tracemalloc (delta + peak + top-20 allocation sites).
|
||||
CPU: cProfile sorted by cumulative time (top 30).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import random
|
||||
import time
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import pytest
|
||||
from django.test import override_settings
|
||||
from faker import Faker
|
||||
from profiling import measure_memory
|
||||
from profiling import profile_cpu
|
||||
|
||||
from documents.classifier import DocumentClassifier
|
||||
from documents.models import Correspondent
|
||||
from documents.models import Document
|
||||
from documents.models import DocumentType
|
||||
from documents.models import MatchingModel
|
||||
from documents.models import StoragePath
|
||||
from documents.models import Tag
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pathlib import Path
|
||||
|
||||
pytestmark = [pytest.mark.profiling, pytest.mark.django_db]
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Corpus parameters
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
NUM_DOCS = 5_000
|
||||
NUM_CORRESPONDENTS = 40 # first 25 are MATCH_AUTO
|
||||
NUM_DOC_TYPES = 25 # first 15 are MATCH_AUTO
|
||||
NUM_TAGS = 50 # first 30 are MATCH_AUTO
|
||||
NUM_STORAGE_PATHS = 20 # first 12 are MATCH_AUTO
|
||||
|
||||
NUM_AUTO_CORRESPONDENTS = 25
|
||||
NUM_AUTO_DOC_TYPES = 15
|
||||
NUM_AUTO_TAGS = 30
|
||||
NUM_AUTO_STORAGE_PATHS = 12
|
||||
|
||||
SEED = 42
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Content generation
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _make_label_fingerprint(
|
||||
fake: Faker,
|
||||
label_seed: int,
|
||||
n_words: int = 6,
|
||||
) -> list[str]:
|
||||
"""
|
||||
Generate a small set of unique-looking words to use as the learning
|
||||
fingerprint for a label. Each label gets its own seeded Faker so the
|
||||
fingerprints are distinct and reproducible.
|
||||
"""
|
||||
per_label_fake = Faker()
|
||||
per_label_fake.seed_instance(label_seed)
|
||||
# Mix word() and last_name() to get varied, pronounceable tokens
|
||||
words: list[str] = []
|
||||
while len(words) < n_words:
|
||||
w = per_label_fake.word().lower()
|
||||
if w not in words:
|
||||
words.append(w)
|
||||
return words
|
||||
|
||||
|
||||
def _build_fingerprints(
|
||||
num_correspondents: int,
|
||||
num_doc_types: int,
|
||||
num_tags: int,
|
||||
num_paths: int,
|
||||
) -> tuple[list[list[str]], list[list[str]], list[list[str]], list[list[str]]]:
|
||||
"""Pre-generate per-label fingerprints. Expensive once, free to reuse."""
|
||||
fake = Faker()
|
||||
# Use deterministic seeds offset by type so fingerprints don't collide
|
||||
corr_fps = [
|
||||
_make_label_fingerprint(fake, 1_000 + i) for i in range(num_correspondents)
|
||||
]
|
||||
dtype_fps = [_make_label_fingerprint(fake, 2_000 + i) for i in range(num_doc_types)]
|
||||
tag_fps = [_make_label_fingerprint(fake, 3_000 + i) for i in range(num_tags)]
|
||||
path_fps = [_make_label_fingerprint(fake, 4_000 + i) for i in range(num_paths)]
|
||||
return corr_fps, dtype_fps, tag_fps, path_fps
|
||||
|
||||
|
||||
def _build_content(
|
||||
fake: Faker,
|
||||
corr_fp: list[str] | None,
|
||||
dtype_fp: list[str] | None,
|
||||
tag_fps: list[list[str]],
|
||||
path_fp: list[str] | None,
|
||||
) -> str:
|
||||
"""
|
||||
Combine a Faker paragraph (realistic base text) with per-label
|
||||
fingerprint words so the classifier has a genuine learning signal.
|
||||
"""
|
||||
# 3-sentence paragraph provides realistic vocabulary
|
||||
base = fake.paragraph(nb_sentences=3)
|
||||
|
||||
extras: list[str] = []
|
||||
if corr_fp:
|
||||
extras.extend(corr_fp)
|
||||
if dtype_fp:
|
||||
extras.extend(dtype_fp)
|
||||
for fp in tag_fps:
|
||||
extras.extend(fp)
|
||||
if path_fp:
|
||||
extras.extend(path_fp)
|
||||
|
||||
if extras:
|
||||
return base + " " + " ".join(extras)
|
||||
return base
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Module-scoped corpus fixture
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def module_db(django_db_setup, django_db_blocker):
|
||||
"""Unlock the DB for the whole module (module-scoped)."""
|
||||
with django_db_blocker.unblock():
|
||||
yield
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def classifier_corpus(tmp_path_factory, module_db):
|
||||
"""
|
||||
Build the full 5 000-document corpus once for all profiling tests.
|
||||
|
||||
Label objects are created individually (small number), documents are
|
||||
bulk-inserted, and tag M2M rows go through the through-table.
|
||||
|
||||
Yields a dict with the model path and a sample content string for
|
||||
prediction tests. All rows are deleted on teardown.
|
||||
"""
|
||||
model_path: Path = tmp_path_factory.mktemp("cls_profile") / "model.pickle"
|
||||
|
||||
with override_settings(MODEL_FILE=model_path):
|
||||
fake = Faker()
|
||||
Faker.seed(SEED)
|
||||
rng = random.Random(SEED)
|
||||
|
||||
# Pre-generate fingerprints for all labels
|
||||
print("\n[setup] Generating label fingerprints...")
|
||||
corr_fps, dtype_fps, tag_fps, path_fps = _build_fingerprints(
|
||||
NUM_CORRESPONDENTS,
|
||||
NUM_DOC_TYPES,
|
||||
NUM_TAGS,
|
||||
NUM_STORAGE_PATHS,
|
||||
)
|
||||
|
||||
# -----------------------------------------------------------------
|
||||
# 1. Create label objects
|
||||
# -----------------------------------------------------------------
|
||||
print(f"[setup] Creating {NUM_CORRESPONDENTS} correspondents...")
|
||||
correspondents: list[Correspondent] = []
|
||||
for i in range(NUM_CORRESPONDENTS):
|
||||
algo = (
|
||||
MatchingModel.MATCH_AUTO
|
||||
if i < NUM_AUTO_CORRESPONDENTS
|
||||
else MatchingModel.MATCH_NONE
|
||||
)
|
||||
correspondents.append(
|
||||
Correspondent.objects.create(
|
||||
name=fake.company(),
|
||||
matching_algorithm=algo,
|
||||
),
|
||||
)
|
||||
|
||||
print(f"[setup] Creating {NUM_DOC_TYPES} document types...")
|
||||
doc_types: list[DocumentType] = []
|
||||
for i in range(NUM_DOC_TYPES):
|
||||
algo = (
|
||||
MatchingModel.MATCH_AUTO
|
||||
if i < NUM_AUTO_DOC_TYPES
|
||||
else MatchingModel.MATCH_NONE
|
||||
)
|
||||
doc_types.append(
|
||||
DocumentType.objects.create(
|
||||
name=fake.bs()[:64],
|
||||
matching_algorithm=algo,
|
||||
),
|
||||
)
|
||||
|
||||
print(f"[setup] Creating {NUM_TAGS} tags...")
|
||||
tags: list[Tag] = []
|
||||
for i in range(NUM_TAGS):
|
||||
algo = (
|
||||
MatchingModel.MATCH_AUTO
|
||||
if i < NUM_AUTO_TAGS
|
||||
else MatchingModel.MATCH_NONE
|
||||
)
|
||||
tags.append(
|
||||
Tag.objects.create(
|
||||
name=f"{fake.word()} {i}",
|
||||
matching_algorithm=algo,
|
||||
is_inbox_tag=False,
|
||||
),
|
||||
)
|
||||
|
||||
print(f"[setup] Creating {NUM_STORAGE_PATHS} storage paths...")
|
||||
storage_paths: list[StoragePath] = []
|
||||
for i in range(NUM_STORAGE_PATHS):
|
||||
algo = (
|
||||
MatchingModel.MATCH_AUTO
|
||||
if i < NUM_AUTO_STORAGE_PATHS
|
||||
else MatchingModel.MATCH_NONE
|
||||
)
|
||||
storage_paths.append(
|
||||
StoragePath.objects.create(
|
||||
name=fake.word(),
|
||||
path=f"{fake.word()}/{fake.word()}/{{title}}",
|
||||
matching_algorithm=algo,
|
||||
),
|
||||
)
|
||||
|
||||
# -----------------------------------------------------------------
|
||||
# 2. Build document rows and M2M assignments
|
||||
# -----------------------------------------------------------------
|
||||
print(f"[setup] Building {NUM_DOCS} document rows...")
|
||||
doc_rows: list[Document] = []
|
||||
doc_tag_map: list[tuple[int, int]] = [] # (doc_position, tag_index)
|
||||
|
||||
for i in range(NUM_DOCS):
|
||||
corr_idx = (
|
||||
rng.randrange(NUM_CORRESPONDENTS) if rng.random() < 0.80 else None
|
||||
)
|
||||
dt_idx = rng.randrange(NUM_DOC_TYPES) if rng.random() < 0.80 else None
|
||||
sp_idx = rng.randrange(NUM_STORAGE_PATHS) if rng.random() < 0.70 else None
|
||||
|
||||
# 1-4 tags; most documents get at least one
|
||||
n_tags = rng.randint(1, 4) if rng.random() < 0.85 else 0
|
||||
assigned_tag_indices = rng.sample(range(NUM_TAGS), min(n_tags, NUM_TAGS))
|
||||
|
||||
content = _build_content(
|
||||
fake,
|
||||
corr_fp=corr_fps[corr_idx] if corr_idx is not None else None,
|
||||
dtype_fp=dtype_fps[dt_idx] if dt_idx is not None else None,
|
||||
tag_fps=[tag_fps[ti] for ti in assigned_tag_indices],
|
||||
path_fp=path_fps[sp_idx] if sp_idx is not None else None,
|
||||
)
|
||||
|
||||
doc_rows.append(
|
||||
Document(
|
||||
title=fake.sentence(nb_words=5),
|
||||
content=content,
|
||||
checksum=f"{i:064x}",
|
||||
correspondent=correspondents[corr_idx]
|
||||
if corr_idx is not None
|
||||
else None,
|
||||
document_type=doc_types[dt_idx] if dt_idx is not None else None,
|
||||
storage_path=storage_paths[sp_idx] if sp_idx is not None else None,
|
||||
),
|
||||
)
|
||||
for ti in assigned_tag_indices:
|
||||
doc_tag_map.append((i, ti))
|
||||
|
||||
t0 = time.perf_counter()
|
||||
Document.objects.bulk_create(doc_rows, batch_size=500)
|
||||
print(
|
||||
f"[setup] bulk_create {NUM_DOCS} documents: {time.perf_counter() - t0:.2f}s",
|
||||
)
|
||||
|
||||
# -----------------------------------------------------------------
|
||||
# 3. Bulk-create M2M through-table rows
|
||||
# -----------------------------------------------------------------
|
||||
created_docs = list(Document.objects.order_by("pk"))
|
||||
through_rows = [
|
||||
Document.tags.through(
|
||||
document_id=created_docs[pos].pk,
|
||||
tag_id=tags[ti].pk,
|
||||
)
|
||||
for pos, ti in doc_tag_map
|
||||
if pos < len(created_docs)
|
||||
]
|
||||
t0 = time.perf_counter()
|
||||
Document.tags.through.objects.bulk_create(
|
||||
through_rows,
|
||||
batch_size=1_000,
|
||||
ignore_conflicts=True,
|
||||
)
|
||||
print(
|
||||
f"[setup] bulk_create {len(through_rows)} tag M2M rows: "
|
||||
f"{time.perf_counter() - t0:.2f}s",
|
||||
)
|
||||
|
||||
# Sample content for prediction tests
|
||||
sample_content = _build_content(
|
||||
fake,
|
||||
corr_fp=corr_fps[0],
|
||||
dtype_fp=dtype_fps[0],
|
||||
tag_fps=[tag_fps[0], tag_fps[1], tag_fps[5]],
|
||||
path_fp=path_fps[0],
|
||||
)
|
||||
|
||||
yield {
|
||||
"model_path": model_path,
|
||||
"sample_content": sample_content,
|
||||
}
|
||||
|
||||
# Teardown
|
||||
print("\n[teardown] Removing corpus...")
|
||||
Document.objects.all().delete()
|
||||
Correspondent.objects.all().delete()
|
||||
DocumentType.objects.all().delete()
|
||||
Tag.objects.all().delete()
|
||||
StoragePath.objects.all().delete()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Training profiles
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestClassifierTrainingProfile:
|
||||
"""Profile DocumentClassifier.train() on the full corpus."""
|
||||
|
||||
def test_train_memory(self, classifier_corpus, tmp_path):
|
||||
"""
|
||||
Peak memory allocated during train().
|
||||
tracemalloc reports the delta and top allocation sites.
|
||||
"""
|
||||
model_path = tmp_path / "model.pickle"
|
||||
with override_settings(MODEL_FILE=model_path):
|
||||
classifier = DocumentClassifier()
|
||||
|
||||
result, _, _ = measure_memory(
|
||||
classifier.train,
|
||||
label=(
|
||||
f"train() [{NUM_DOCS} docs | "
|
||||
f"{NUM_CORRESPONDENTS} correspondents ({NUM_AUTO_CORRESPONDENTS} AUTO) | "
|
||||
f"{NUM_DOC_TYPES} doc types ({NUM_AUTO_DOC_TYPES} AUTO) | "
|
||||
f"{NUM_TAGS} tags ({NUM_AUTO_TAGS} AUTO) | "
|
||||
f"{NUM_STORAGE_PATHS} paths ({NUM_AUTO_STORAGE_PATHS} AUTO)]"
|
||||
),
|
||||
)
|
||||
assert result is True, "train() must return True on first run"
|
||||
|
||||
print("\n Classifiers trained:")
|
||||
print(
|
||||
f" tags_classifier: {classifier.tags_classifier is not None}",
|
||||
)
|
||||
print(
|
||||
f" correspondent_classifier: {classifier.correspondent_classifier is not None}",
|
||||
)
|
||||
print(
|
||||
f" document_type_classifier: {classifier.document_type_classifier is not None}",
|
||||
)
|
||||
print(
|
||||
f" storage_path_classifier: {classifier.storage_path_classifier is not None}",
|
||||
)
|
||||
if classifier.data_vectorizer is not None:
|
||||
vocab_size = len(classifier.data_vectorizer.vocabulary_)
|
||||
print(f" vocabulary size: {vocab_size} terms")
|
||||
|
||||
def test_train_cpu(self, classifier_corpus, tmp_path):
|
||||
"""
|
||||
CPU profile of train() — shows time spent in DB queries,
|
||||
CountVectorizer.fit_transform(), and four MLPClassifier.fit() calls.
|
||||
"""
|
||||
model_path = tmp_path / "model_cpu.pickle"
|
||||
with override_settings(MODEL_FILE=model_path):
|
||||
classifier = DocumentClassifier()
|
||||
profile_cpu(
|
||||
classifier.train,
|
||||
label=f"train() [{NUM_DOCS} docs]",
|
||||
top=30,
|
||||
)
|
||||
|
||||
def test_train_second_call_noop(self, classifier_corpus, tmp_path):
|
||||
"""
|
||||
No-op path: second train() on unchanged data should return False.
|
||||
Still queries the DB to build the hash — shown here as the remaining cost.
|
||||
"""
|
||||
model_path = tmp_path / "model_noop.pickle"
|
||||
with override_settings(MODEL_FILE=model_path):
|
||||
classifier = DocumentClassifier()
|
||||
|
||||
t0 = time.perf_counter()
|
||||
classifier.train()
|
||||
first_ms = (time.perf_counter() - t0) * 1000
|
||||
|
||||
result, second_elapsed = profile_cpu(
|
||||
classifier.train,
|
||||
label="train() second call (no-op — same data unchanged)",
|
||||
top=20,
|
||||
)
|
||||
assert result is False, "second train() should skip and return False"
|
||||
|
||||
print(f"\n First train: {first_ms:.1f} ms (full fit)")
|
||||
print(f" Second train: {second_elapsed * 1000:.1f} ms (skip)")
|
||||
print(f" Speedup: {first_ms / (second_elapsed * 1000):.1f}x")
|
||||
|
||||
def test_vectorizer_hash_cost(self, classifier_corpus, tmp_path):
|
||||
"""
|
||||
Isolate _update_data_vectorizer_hash() — pickles the entire
|
||||
CountVectorizer just to SHA256 it. Called at both save and load.
|
||||
"""
|
||||
import pickle
|
||||
|
||||
model_path = tmp_path / "model_hash.pickle"
|
||||
with override_settings(MODEL_FILE=model_path):
|
||||
classifier = DocumentClassifier()
|
||||
classifier.train()
|
||||
|
||||
profile_cpu(
|
||||
classifier._update_data_vectorizer_hash,
|
||||
label="_update_data_vectorizer_hash() [pickle.dumps vectorizer + sha256]",
|
||||
top=10,
|
||||
)
|
||||
|
||||
pickled_size = len(pickle.dumps(classifier.data_vectorizer))
|
||||
vocab_size = len(classifier.data_vectorizer.vocabulary_)
|
||||
print(f"\n Vocabulary size: {vocab_size} terms")
|
||||
print(f" Pickled vectorizer: {pickled_size / 1024:.1f} KiB")
|
||||
|
||||
def test_save_load_roundtrip(self, classifier_corpus, tmp_path):
|
||||
"""
|
||||
Profile save() and load() — model file size directly reflects how
|
||||
much memory the classifier occupies on disk (and roughly in RAM).
|
||||
"""
|
||||
model_path = tmp_path / "model_saveload.pickle"
|
||||
with override_settings(MODEL_FILE=model_path):
|
||||
classifier = DocumentClassifier()
|
||||
classifier.train()
|
||||
|
||||
_, save_peak, _ = measure_memory(
|
||||
classifier.save,
|
||||
label="save() [pickle.dumps + HMAC + atomic rename]",
|
||||
)
|
||||
|
||||
file_size_kib = model_path.stat().st_size / 1024
|
||||
print(f"\n Model file size: {file_size_kib:.1f} KiB")
|
||||
|
||||
classifier2 = DocumentClassifier()
|
||||
_, load_peak, _ = measure_memory(
|
||||
classifier2.load,
|
||||
label="load() [read file + verify HMAC + pickle.loads]",
|
||||
)
|
||||
|
||||
print("\n Summary:")
|
||||
print(f" Model file size: {file_size_kib:.1f} KiB")
|
||||
print(f" Save peak memory: {save_peak:.1f} KiB")
|
||||
print(f" Load peak memory: {load_peak:.1f} KiB")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Prediction profiles
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestClassifierPredictionProfile:
|
||||
"""
|
||||
Profile the four predict_*() methods — specifically the redundant
|
||||
per-call vectorization overhead from the signal handler pattern.
|
||||
"""
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def trained_classifier(self, classifier_corpus, tmp_path):
|
||||
model_path = tmp_path / "model_pred.pickle"
|
||||
self._ctx = override_settings(MODEL_FILE=model_path)
|
||||
self._ctx.enable()
|
||||
self.classifier = DocumentClassifier()
|
||||
self.classifier.train()
|
||||
self.content = classifier_corpus["sample_content"]
|
||||
yield
|
||||
self._ctx.disable()
|
||||
|
||||
def test_predict_all_four_separately_cpu(self):
|
||||
"""
|
||||
Profile all four predict_*() calls in the order the signal handlers
|
||||
fire them. Call 1 is a cache miss; calls 2-4 hit the locmem cache
|
||||
but still pay sha256 + pickle.loads each time.
|
||||
"""
|
||||
from django.core.cache import caches
|
||||
|
||||
caches["read-cache"].clear()
|
||||
|
||||
content = self.content
|
||||
print(f"\n Content length: {len(content)} chars")
|
||||
|
||||
calls = [
|
||||
("predict_correspondent", self.classifier.predict_correspondent),
|
||||
("predict_document_type", self.classifier.predict_document_type),
|
||||
("predict_tags", self.classifier.predict_tags),
|
||||
("predict_storage_path", self.classifier.predict_storage_path),
|
||||
]
|
||||
|
||||
timings: list[tuple[str, float]] = []
|
||||
for name, fn in calls:
|
||||
_, elapsed = profile_cpu(
|
||||
lambda f=fn: f(content),
|
||||
label=f"{name}() [call {len(timings) + 1}/4]",
|
||||
top=15,
|
||||
)
|
||||
timings.append((name, elapsed * 1000))
|
||||
|
||||
print("\n Per-call timings (sequential, locmem cache):")
|
||||
for name, ms in timings:
|
||||
print(f" {name:<32s} {ms:8.3f} ms")
|
||||
print(f" {'TOTAL':<32s} {sum(t for _, t in timings):8.3f} ms")
|
||||
|
||||
def test_predict_all_four_memory(self):
|
||||
"""
|
||||
Memory allocated for the full four-prediction sequence, both cold
|
||||
and warm, to show pickle serialization allocation per call.
|
||||
"""
|
||||
from django.core.cache import caches
|
||||
|
||||
content = self.content
|
||||
calls = [
|
||||
self.classifier.predict_correspondent,
|
||||
self.classifier.predict_document_type,
|
||||
self.classifier.predict_tags,
|
||||
self.classifier.predict_storage_path,
|
||||
]
|
||||
|
||||
caches["read-cache"].clear()
|
||||
measure_memory(
|
||||
lambda: [fn(content) for fn in calls],
|
||||
label="all four predict_*() [cache COLD — first call misses]",
|
||||
)
|
||||
|
||||
measure_memory(
|
||||
lambda: [fn(content) for fn in calls],
|
||||
label="all four predict_*() [cache WARM — all calls hit]",
|
||||
)
|
||||
|
||||
def test_vectorize_cache_miss_vs_hit(self):
|
||||
"""
|
||||
Isolate the cost of a cache miss (sha256 + transform + pickle.dumps)
|
||||
vs a cache hit (sha256 + pickle.loads).
|
||||
"""
|
||||
from django.core.cache import caches
|
||||
|
||||
read_cache = caches["read-cache"]
|
||||
content = self.content
|
||||
|
||||
read_cache.clear()
|
||||
_, miss_elapsed = profile_cpu(
|
||||
lambda: self.classifier._vectorize(content),
|
||||
label="_vectorize() [MISS: sha256 + transform + pickle.dumps]",
|
||||
top=15,
|
||||
)
|
||||
|
||||
_, hit_elapsed = profile_cpu(
|
||||
lambda: self.classifier._vectorize(content),
|
||||
label="_vectorize() [HIT: sha256 + pickle.loads]",
|
||||
top=15,
|
||||
)
|
||||
|
||||
print(f"\n Cache miss: {miss_elapsed * 1000:.3f} ms")
|
||||
print(f" Cache hit: {hit_elapsed * 1000:.3f} ms")
|
||||
print(f" Hit is {miss_elapsed / hit_elapsed:.1f}x faster than miss")
|
||||
|
||||
def test_content_hash_overhead(self):
|
||||
"""
|
||||
Micro-benchmark the sha256 of the content string — paid on every
|
||||
_vectorize() call regardless of cache state, including x4 per doc.
|
||||
"""
|
||||
import hashlib
|
||||
|
||||
content = self.content
|
||||
encoded = content.encode()
|
||||
runs = 5_000
|
||||
|
||||
t0 = time.perf_counter()
|
||||
for _ in range(runs):
|
||||
hashlib.sha256(encoded).hexdigest()
|
||||
us_per_call = (time.perf_counter() - t0) / runs * 1_000_000
|
||||
|
||||
print(f"\n Content: {len(content)} chars / {len(encoded)} bytes")
|
||||
print(f" sha256 cost per call: {us_per_call:.2f} us (avg over {runs} runs)")
|
||||
print(f" x4 calls per document: {us_per_call * 4:.2f} us total overhead")
|
||||
293
test_doclist_profile.py
Normal file
293
test_doclist_profile.py
Normal file
@@ -0,0 +1,293 @@
|
||||
"""
|
||||
Document list API profiling — no search, pure ORM path.
|
||||
|
||||
Run with:
|
||||
uv run pytest ../test_doclist_profile.py \
|
||||
-m profiling --override-ini="addopts=" -s -v
|
||||
|
||||
Corpus: 5 000 documents, 30 correspondents, 20 doc types, 80 tags,
|
||||
~500 notes (10 %), 10 custom fields with instances on ~50 % of docs.
|
||||
|
||||
Scenarios
|
||||
---------
|
||||
TestDocListProfile
|
||||
- test_list_default_ordering GET /api/documents/ created desc, page 1, page_size=25
|
||||
- test_list_title_ordering same with ordering=title
|
||||
- test_list_page_size_comparison page_size=10 / 25 / 100 in sequence
|
||||
- test_list_detail_fields GET /api/documents/{id}/ — single document serializer cost
|
||||
- test_list_cpu_profile cProfile of one list request
|
||||
|
||||
TestSelectionDataProfile
|
||||
- test_selection_data_unfiltered _get_selection_data_for_queryset(all docs) in isolation
|
||||
- test_selection_data_via_api GET /api/documents/?include_selection_data=true
|
||||
- test_selection_data_filtered filtered vs unfiltered COUNT query comparison
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime
|
||||
import random
|
||||
import time
|
||||
|
||||
import pytest
|
||||
from django.contrib.auth.models import User
|
||||
from faker import Faker
|
||||
from profiling import profile_block
|
||||
from profiling import profile_cpu
|
||||
from rest_framework.test import APIClient
|
||||
|
||||
from documents.models import Correspondent
|
||||
from documents.models import CustomField
|
||||
from documents.models import CustomFieldInstance
|
||||
from documents.models import Document
|
||||
from documents.models import DocumentType
|
||||
from documents.models import Note
|
||||
from documents.models import Tag
|
||||
from documents.views import DocumentViewSet
|
||||
|
||||
pytestmark = [pytest.mark.profiling, pytest.mark.django_db]
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Corpus parameters
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
NUM_DOCS = 5_000
|
||||
NUM_CORRESPONDENTS = 30
|
||||
NUM_DOC_TYPES = 20
|
||||
NUM_TAGS = 80
|
||||
NOTE_FRACTION = 0.10
|
||||
CUSTOM_FIELD_COUNT = 10
|
||||
CUSTOM_FIELD_FRACTION = 0.50
|
||||
PAGE_SIZE = 25
|
||||
SEED = 42
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Module-scoped corpus fixture
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def module_db(django_db_setup, django_db_blocker):
|
||||
"""Unlock the DB for the whole module (module-scoped)."""
|
||||
with django_db_blocker.unblock():
|
||||
yield
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def doclist_corpus(module_db):
|
||||
"""
|
||||
Build a 5 000-document corpus with tags, notes, custom fields, correspondents,
|
||||
and doc types. All objects are deleted on teardown.
|
||||
"""
|
||||
fake = Faker()
|
||||
Faker.seed(SEED)
|
||||
rng = random.Random(SEED)
|
||||
|
||||
print(f"\n[setup] Creating {NUM_CORRESPONDENTS} correspondents...") # noqa: T201
|
||||
correspondents = [
|
||||
Correspondent.objects.create(name=f"dlcorp-{i}-{fake.company()}"[:128])
|
||||
for i in range(NUM_CORRESPONDENTS)
|
||||
]
|
||||
|
||||
print(f"[setup] Creating {NUM_DOC_TYPES} doc types...") # noqa: T201
|
||||
doc_types = [
|
||||
DocumentType.objects.create(name=f"dltype-{i}-{fake.word()}"[:128])
|
||||
for i in range(NUM_DOC_TYPES)
|
||||
]
|
||||
|
||||
print(f"[setup] Creating {NUM_TAGS} tags...") # noqa: T201
|
||||
tags = [
|
||||
Tag.objects.create(name=f"dltag-{i}-{fake.word()}"[:100])
|
||||
for i in range(NUM_TAGS)
|
||||
]
|
||||
|
||||
print(f"[setup] Creating {CUSTOM_FIELD_COUNT} custom fields...") # noqa: T201
|
||||
custom_fields = [
|
||||
CustomField.objects.create(
|
||||
name=f"Field {i}",
|
||||
data_type=CustomField.FieldDataType.STRING,
|
||||
)
|
||||
for i in range(CUSTOM_FIELD_COUNT)
|
||||
]
|
||||
|
||||
note_user = User.objects.create_user(username="doclistnoteuser", password="x")
|
||||
owner = User.objects.create_superuser(username="doclistowner", password="admin")
|
||||
|
||||
print(f"[setup] Building {NUM_DOCS} document rows...") # noqa: T201
|
||||
base_date = datetime.date(2018, 1, 1)
|
||||
raw_docs = []
|
||||
for i in range(NUM_DOCS):
|
||||
day_offset = rng.randint(0, 6 * 365)
|
||||
raw_docs.append(
|
||||
Document(
|
||||
title=fake.sentence(nb_words=rng.randint(3, 8)).rstrip("."),
|
||||
content="\n\n".join(
|
||||
fake.paragraph(nb_sentences=rng.randint(2, 5))
|
||||
for _ in range(rng.randint(1, 3))
|
||||
),
|
||||
checksum=f"DL{i:07d}",
|
||||
correspondent=rng.choice(correspondents + [None] * 5),
|
||||
document_type=rng.choice(doc_types + [None] * 4),
|
||||
created=base_date + datetime.timedelta(days=day_offset),
|
||||
owner=owner if rng.random() < 0.8 else None,
|
||||
),
|
||||
)
|
||||
t0 = time.perf_counter()
|
||||
documents = Document.objects.bulk_create(raw_docs)
|
||||
print(f"[setup] bulk_create {NUM_DOCS} docs: {time.perf_counter() - t0:.2f}s") # noqa: T201
|
||||
|
||||
t0 = time.perf_counter()
|
||||
for doc in documents:
|
||||
k = rng.randint(0, 5)
|
||||
if k:
|
||||
doc.tags.add(*rng.sample(tags, k))
|
||||
print(f"[setup] tag M2M assignments: {time.perf_counter() - t0:.2f}s") # noqa: T201
|
||||
|
||||
note_docs = rng.sample(documents, int(NUM_DOCS * NOTE_FRACTION))
|
||||
Note.objects.bulk_create(
|
||||
[
|
||||
Note(
|
||||
document=doc,
|
||||
note=fake.sentence(nb_words=rng.randint(4, 15)),
|
||||
user=note_user,
|
||||
)
|
||||
for doc in note_docs
|
||||
],
|
||||
)
|
||||
|
||||
cf_docs = rng.sample(documents, int(NUM_DOCS * CUSTOM_FIELD_FRACTION))
|
||||
CustomFieldInstance.objects.bulk_create(
|
||||
[
|
||||
CustomFieldInstance(
|
||||
document=doc,
|
||||
field=rng.choice(custom_fields),
|
||||
value_text=fake.word(),
|
||||
)
|
||||
for doc in cf_docs
|
||||
],
|
||||
)
|
||||
|
||||
first_doc_pk = documents[0].pk
|
||||
|
||||
yield {"owner": owner, "first_doc_pk": first_doc_pk, "tags": tags}
|
||||
|
||||
print("\n[teardown] Removing doclist corpus...") # noqa: T201
|
||||
Document.objects.all().delete()
|
||||
Correspondent.objects.all().delete()
|
||||
DocumentType.objects.all().delete()
|
||||
Tag.objects.all().delete()
|
||||
CustomField.objects.all().delete()
|
||||
User.objects.filter(username__in=["doclistnoteuser", "doclistowner"]).delete()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# TestDocListProfile
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestDocListProfile:
|
||||
"""Profile GET /api/documents/ — pure ORM path, no Tantivy."""
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _client(self, doclist_corpus):
|
||||
owner = doclist_corpus["owner"]
|
||||
self.client = APIClient()
|
||||
self.client.force_authenticate(user=owner)
|
||||
self.first_doc_pk = doclist_corpus["first_doc_pk"]
|
||||
|
||||
def test_list_default_ordering(self):
|
||||
"""GET /api/documents/ default ordering (-created), page 1, page_size=25."""
|
||||
with profile_block(
|
||||
f"GET /api/documents/ default ordering [page_size={PAGE_SIZE}]",
|
||||
):
|
||||
response = self.client.get(
|
||||
f"/api/documents/?page=1&page_size={PAGE_SIZE}",
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
def test_list_title_ordering(self):
|
||||
"""GET /api/documents/ ordered by title — tests ORM sort path."""
|
||||
with profile_block(
|
||||
f"GET /api/documents/?ordering=title [page_size={PAGE_SIZE}]",
|
||||
):
|
||||
response = self.client.get(
|
||||
f"/api/documents/?ordering=title&page=1&page_size={PAGE_SIZE}",
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
def test_list_page_size_comparison(self):
|
||||
"""Compare serializer cost at page_size=10, 25, 100."""
|
||||
for page_size in [10, 25, 100]:
|
||||
with profile_block(f"GET /api/documents/ [page_size={page_size}]"):
|
||||
response = self.client.get(
|
||||
f"/api/documents/?page=1&page_size={page_size}",
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
def test_list_detail_fields(self):
|
||||
"""GET /api/documents/{id}/ — per-doc serializer cost with all relations."""
|
||||
pk = self.first_doc_pk
|
||||
with profile_block(f"GET /api/documents/{pk}/ — single doc serializer"):
|
||||
response = self.client.get(f"/api/documents/{pk}/")
|
||||
assert response.status_code == 200
|
||||
|
||||
def test_list_cpu_profile(self):
|
||||
"""cProfile of one list request — surfaces hot frames in serializer."""
|
||||
profile_cpu(
|
||||
lambda: self.client.get(
|
||||
f"/api/documents/?page=1&page_size={PAGE_SIZE}",
|
||||
),
|
||||
label=f"GET /api/documents/ cProfile [page_size={PAGE_SIZE}]",
|
||||
top=30,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# TestSelectionDataProfile
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestSelectionDataProfile:
|
||||
"""Profile _get_selection_data_for_queryset — the 5+ COUNT queries per request."""
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _setup(self, doclist_corpus):
|
||||
owner = doclist_corpus["owner"]
|
||||
self.client = APIClient()
|
||||
self.client.force_authenticate(user=owner)
|
||||
self.tags = doclist_corpus["tags"]
|
||||
|
||||
def test_selection_data_unfiltered(self):
|
||||
"""Call _get_selection_data_for_queryset(all docs) directly — COUNT queries in isolation."""
|
||||
viewset = DocumentViewSet()
|
||||
qs = Document.objects.all()
|
||||
|
||||
with profile_block("_get_selection_data_for_queryset(all docs) — direct call"):
|
||||
viewset._get_selection_data_for_queryset(qs)
|
||||
|
||||
def test_selection_data_via_api(self):
|
||||
"""Full API round-trip with include_selection_data=true."""
|
||||
with profile_block(
|
||||
f"GET /api/documents/?include_selection_data=true [page_size={PAGE_SIZE}]",
|
||||
):
|
||||
response = self.client.get(
|
||||
f"/api/documents/?page=1&page_size={PAGE_SIZE}&include_selection_data=true",
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert "selection_data" in response.data
|
||||
|
||||
def test_selection_data_filtered(self):
|
||||
"""selection_data on a tag-filtered queryset — filtered COUNT vs unfiltered."""
|
||||
tag = self.tags[0]
|
||||
viewset = DocumentViewSet()
|
||||
filtered_qs = Document.objects.filter(tags=tag)
|
||||
unfiltered_qs = Document.objects.all()
|
||||
|
||||
print(f"\n Tag '{tag.name}' matches {filtered_qs.count()} docs") # noqa: T201
|
||||
|
||||
with profile_block("_get_selection_data_for_queryset(unfiltered)"):
|
||||
viewset._get_selection_data_for_queryset(unfiltered_qs)
|
||||
|
||||
with profile_block("_get_selection_data_for_queryset(filtered by tag)"):
|
||||
viewset._get_selection_data_for_queryset(filtered_qs)
|
||||
284
test_matching_profile.py
Normal file
284
test_matching_profile.py
Normal file
@@ -0,0 +1,284 @@
|
||||
"""
|
||||
Matching pipeline profiling.
|
||||
|
||||
Run with:
|
||||
uv run pytest ../test_matching_profile.py \
|
||||
-m profiling --override-ini="addopts=" -s -v
|
||||
|
||||
Corpus: 1 document + 50 correspondents, 100 tags, 25 doc types, 20 storage
|
||||
paths. Labels are spread across all six matching algorithms
|
||||
(NONE, ANY, ALL, LITERAL, REGEX, FUZZY, AUTO).
|
||||
|
||||
Classifier is passed as None -- MATCH_AUTO models skip prediction gracefully,
|
||||
which is correct for isolating the ORM query and Python-side evaluation cost.
|
||||
|
||||
Scenarios
|
||||
---------
|
||||
TestMatchingPipelineProfile
|
||||
- test_match_correspondents 50 correspondents, algorithm mix
|
||||
- test_match_tags 100 tags
|
||||
- test_match_document_types 25 doc types
|
||||
- test_match_storage_paths 20 storage paths
|
||||
- test_full_match_sequence all four in order (cumulative consumption cost)
|
||||
- test_algorithm_breakdown each MATCH_* algorithm in isolation
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import random
|
||||
|
||||
import pytest
|
||||
from faker import Faker
|
||||
from profiling import profile_block
|
||||
|
||||
from documents.matching import match_correspondents
|
||||
from documents.matching import match_document_types
|
||||
from documents.matching import match_storage_paths
|
||||
from documents.matching import match_tags
|
||||
from documents.models import Correspondent
|
||||
from documents.models import Document
|
||||
from documents.models import DocumentType
|
||||
from documents.models import MatchingModel
|
||||
from documents.models import StoragePath
|
||||
from documents.models import Tag
|
||||
|
||||
pytestmark = [pytest.mark.profiling, pytest.mark.django_db]
|
||||
|
||||
NUM_CORRESPONDENTS = 50
|
||||
NUM_TAGS = 100
|
||||
NUM_DOC_TYPES = 25
|
||||
NUM_STORAGE_PATHS = 20
|
||||
SEED = 42
|
||||
|
||||
# Algorithm distribution across labels (cycles through in order)
|
||||
_ALGORITHMS = [
|
||||
MatchingModel.MATCH_NONE,
|
||||
MatchingModel.MATCH_ANY,
|
||||
MatchingModel.MATCH_ALL,
|
||||
MatchingModel.MATCH_LITERAL,
|
||||
MatchingModel.MATCH_REGEX,
|
||||
MatchingModel.MATCH_FUZZY,
|
||||
MatchingModel.MATCH_AUTO,
|
||||
]
|
||||
|
||||
|
||||
def _algo(i: int) -> int:
|
||||
return _ALGORITHMS[i % len(_ALGORITHMS)]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Module-scoped corpus fixture
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def module_db(django_db_setup, django_db_blocker):
|
||||
"""Unlock the DB for the whole module (module-scoped)."""
|
||||
with django_db_blocker.unblock():
|
||||
yield
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def matching_corpus(module_db):
|
||||
"""
|
||||
1 document with realistic content + dense matching model sets.
|
||||
Classifier=None so MATCH_AUTO models are simply skipped.
|
||||
"""
|
||||
fake = Faker()
|
||||
Faker.seed(SEED)
|
||||
random.seed(SEED)
|
||||
|
||||
# ---- matching models ---------------------------------------------------
|
||||
print(f"\n[setup] Creating {NUM_CORRESPONDENTS} correspondents...") # noqa: T201
|
||||
correspondents = []
|
||||
for i in range(NUM_CORRESPONDENTS):
|
||||
algo = _algo(i)
|
||||
match_text = (
|
||||
fake.word()
|
||||
if algo not in (MatchingModel.MATCH_NONE, MatchingModel.MATCH_AUTO)
|
||||
else ""
|
||||
)
|
||||
if algo == MatchingModel.MATCH_REGEX:
|
||||
match_text = r"\b" + fake.word() + r"\b"
|
||||
correspondents.append(
|
||||
Correspondent.objects.create(
|
||||
name=f"mcorp-{i}-{fake.company()}"[:128],
|
||||
matching_algorithm=algo,
|
||||
match=match_text,
|
||||
),
|
||||
)
|
||||
|
||||
print(f"[setup] Creating {NUM_TAGS} tags...") # noqa: T201
|
||||
tags = []
|
||||
for i in range(NUM_TAGS):
|
||||
algo = _algo(i)
|
||||
match_text = (
|
||||
fake.word()
|
||||
if algo not in (MatchingModel.MATCH_NONE, MatchingModel.MATCH_AUTO)
|
||||
else ""
|
||||
)
|
||||
if algo == MatchingModel.MATCH_REGEX:
|
||||
match_text = r"\b" + fake.word() + r"\b"
|
||||
tags.append(
|
||||
Tag.objects.create(
|
||||
name=f"mtag-{i}-{fake.word()}"[:100],
|
||||
matching_algorithm=algo,
|
||||
match=match_text,
|
||||
),
|
||||
)
|
||||
|
||||
print(f"[setup] Creating {NUM_DOC_TYPES} doc types...") # noqa: T201
|
||||
doc_types = []
|
||||
for i in range(NUM_DOC_TYPES):
|
||||
algo = _algo(i)
|
||||
match_text = (
|
||||
fake.word()
|
||||
if algo not in (MatchingModel.MATCH_NONE, MatchingModel.MATCH_AUTO)
|
||||
else ""
|
||||
)
|
||||
if algo == MatchingModel.MATCH_REGEX:
|
||||
match_text = r"\b" + fake.word() + r"\b"
|
||||
doc_types.append(
|
||||
DocumentType.objects.create(
|
||||
name=f"mtype-{i}-{fake.word()}"[:128],
|
||||
matching_algorithm=algo,
|
||||
match=match_text,
|
||||
),
|
||||
)
|
||||
|
||||
print(f"[setup] Creating {NUM_STORAGE_PATHS} storage paths...") # noqa: T201
|
||||
storage_paths = []
|
||||
for i in range(NUM_STORAGE_PATHS):
|
||||
algo = _algo(i)
|
||||
match_text = (
|
||||
fake.word()
|
||||
if algo not in (MatchingModel.MATCH_NONE, MatchingModel.MATCH_AUTO)
|
||||
else ""
|
||||
)
|
||||
if algo == MatchingModel.MATCH_REGEX:
|
||||
match_text = r"\b" + fake.word() + r"\b"
|
||||
storage_paths.append(
|
||||
StoragePath.objects.create(
|
||||
name=f"mpath-{i}-{fake.word()}",
|
||||
path=f"{fake.word()}/{{title}}",
|
||||
matching_algorithm=algo,
|
||||
match=match_text,
|
||||
),
|
||||
)
|
||||
|
||||
# ---- document with diverse content ------------------------------------
|
||||
doc = Document.objects.create(
|
||||
title="quarterly invoice payment tax financial statement",
|
||||
content=" ".join(fake.paragraph(nb_sentences=5) for _ in range(3)),
|
||||
checksum="MATCHPROF0001",
|
||||
)
|
||||
|
||||
print(f"[setup] Document pk={doc.pk}, content length={len(doc.content)} chars") # noqa: T201
|
||||
print( # noqa: T201
|
||||
f" Correspondents: {NUM_CORRESPONDENTS} "
|
||||
f"({sum(1 for c in correspondents if c.matching_algorithm == MatchingModel.MATCH_AUTO)} AUTO)",
|
||||
)
|
||||
print( # noqa: T201
|
||||
f" Tags: {NUM_TAGS} "
|
||||
f"({sum(1 for t in tags if t.matching_algorithm == MatchingModel.MATCH_AUTO)} AUTO)",
|
||||
)
|
||||
|
||||
yield {"doc": doc}
|
||||
|
||||
# Teardown
|
||||
print("\n[teardown] Removing matching corpus...") # noqa: T201
|
||||
Document.objects.all().delete()
|
||||
Correspondent.objects.all().delete()
|
||||
Tag.objects.all().delete()
|
||||
DocumentType.objects.all().delete()
|
||||
StoragePath.objects.all().delete()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# TestMatchingPipelineProfile
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestMatchingPipelineProfile:
|
||||
"""Profile the matching functions called per document during consumption."""
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _setup(self, matching_corpus):
|
||||
self.doc = matching_corpus["doc"]
|
||||
|
||||
def test_match_correspondents(self):
|
||||
"""50 correspondents, algorithm mix. Query count + time."""
|
||||
with profile_block(
|
||||
f"match_correspondents() [{NUM_CORRESPONDENTS} correspondents, mixed algorithms]",
|
||||
):
|
||||
result = match_correspondents(self.doc, classifier=None)
|
||||
print(f" -> {len(result)} matched") # noqa: T201
|
||||
|
||||
def test_match_tags(self):
|
||||
"""100 tags -- densest set in real installs."""
|
||||
with profile_block(f"match_tags() [{NUM_TAGS} tags, mixed algorithms]"):
|
||||
result = match_tags(self.doc, classifier=None)
|
||||
print(f" -> {len(result)} matched") # noqa: T201
|
||||
|
||||
def test_match_document_types(self):
|
||||
"""25 doc types."""
|
||||
with profile_block(
|
||||
f"match_document_types() [{NUM_DOC_TYPES} types, mixed algorithms]",
|
||||
):
|
||||
result = match_document_types(self.doc, classifier=None)
|
||||
print(f" -> {len(result)} matched") # noqa: T201
|
||||
|
||||
def test_match_storage_paths(self):
|
||||
"""20 storage paths."""
|
||||
with profile_block(
|
||||
f"match_storage_paths() [{NUM_STORAGE_PATHS} paths, mixed algorithms]",
|
||||
):
|
||||
result = match_storage_paths(self.doc, classifier=None)
|
||||
print(f" -> {len(result)} matched") # noqa: T201
|
||||
|
||||
def test_full_match_sequence(self):
|
||||
"""All four match_*() calls in order -- cumulative cost per document consumed."""
|
||||
with profile_block(
|
||||
"full match sequence: correspondents + doc_types + tags + storage_paths",
|
||||
):
|
||||
match_correspondents(self.doc, classifier=None)
|
||||
match_document_types(self.doc, classifier=None)
|
||||
match_tags(self.doc, classifier=None)
|
||||
match_storage_paths(self.doc, classifier=None)
|
||||
|
||||
def test_algorithm_breakdown(self):
|
||||
"""Create one correspondent per algorithm and time each independently."""
|
||||
import time
|
||||
|
||||
from documents.matching import matches
|
||||
|
||||
fake = Faker()
|
||||
algo_names = {
|
||||
MatchingModel.MATCH_NONE: "MATCH_NONE",
|
||||
MatchingModel.MATCH_ANY: "MATCH_ANY",
|
||||
MatchingModel.MATCH_ALL: "MATCH_ALL",
|
||||
MatchingModel.MATCH_LITERAL: "MATCH_LITERAL",
|
||||
MatchingModel.MATCH_REGEX: "MATCH_REGEX",
|
||||
MatchingModel.MATCH_FUZZY: "MATCH_FUZZY",
|
||||
}
|
||||
doc = self.doc
|
||||
print() # noqa: T201
|
||||
|
||||
for algo, name in algo_names.items():
|
||||
match_text = fake.word() if algo != MatchingModel.MATCH_NONE else ""
|
||||
if algo == MatchingModel.MATCH_REGEX:
|
||||
match_text = r"\b" + fake.word() + r"\b"
|
||||
model = Correspondent(
|
||||
name=f"algo-test-{name}",
|
||||
matching_algorithm=algo,
|
||||
match=match_text,
|
||||
)
|
||||
# Time 1000 iterations to get stable microsecond readings
|
||||
runs = 1_000
|
||||
t0 = time.perf_counter()
|
||||
for _ in range(runs):
|
||||
matches(model, doc)
|
||||
us_per_call = (time.perf_counter() - t0) / runs * 1_000_000
|
||||
print( # noqa: T201
|
||||
f" {name:<20s} {us_per_call:8.2f} us/call (match={match_text[:20]!r})",
|
||||
)
|
||||
154
test_sanity_profile.py
Normal file
154
test_sanity_profile.py
Normal file
@@ -0,0 +1,154 @@
|
||||
"""
|
||||
Sanity checker profiling.
|
||||
|
||||
Run with:
|
||||
uv run pytest ../test_sanity_profile.py \
|
||||
-m profiling --override-ini="addopts=" -s -v
|
||||
|
||||
Corpus: 2 000 documents with stub files (original + archive + thumbnail)
|
||||
created in a temp MEDIA_ROOT.
|
||||
|
||||
Scenarios
|
||||
---------
|
||||
TestSanityCheckerProfile
|
||||
- test_sanity_full_corpus full check_sanity() -- cProfile + tracemalloc
|
||||
- test_sanity_query_pattern profile_block summary: query count + time
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import time
|
||||
|
||||
import pytest
|
||||
from django.test import override_settings
|
||||
from profiling import measure_memory
|
||||
from profiling import profile_block
|
||||
from profiling import profile_cpu
|
||||
|
||||
from documents.models import Document
|
||||
from documents.sanity_checker import check_sanity
|
||||
|
||||
pytestmark = [pytest.mark.profiling, pytest.mark.django_db]
|
||||
|
||||
NUM_DOCS = 2_000
|
||||
SEED = 42
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Module-scoped fixture: temp directories + corpus
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def module_db(django_db_setup, django_db_blocker):
|
||||
"""Unlock the DB for the whole module (module-scoped)."""
|
||||
with django_db_blocker.unblock():
|
||||
yield
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def sanity_corpus(tmp_path_factory, module_db):
|
||||
"""
|
||||
Build a 2 000-document corpus. For each document create stub files
|
||||
(1-byte placeholders) in ORIGINALS_DIR, ARCHIVE_DIR, and THUMBNAIL_DIR
|
||||
so the sanity checker's file-existence and checksum checks have real targets.
|
||||
"""
|
||||
media = tmp_path_factory.mktemp("sanity_media")
|
||||
originals_dir = media / "documents" / "originals"
|
||||
archive_dir = media / "documents" / "archive"
|
||||
thumb_dir = media / "documents" / "thumbnails"
|
||||
for d in (originals_dir, archive_dir, thumb_dir):
|
||||
d.mkdir(parents=True)
|
||||
|
||||
# Use override_settings as a context manager for the whole fixture lifetime
|
||||
settings_ctx = override_settings(
|
||||
MEDIA_ROOT=media,
|
||||
ORIGINALS_DIR=originals_dir,
|
||||
ARCHIVE_DIR=archive_dir,
|
||||
THUMBNAIL_DIR=thumb_dir,
|
||||
MEDIA_LOCK=media / "media.lock",
|
||||
)
|
||||
settings_ctx.enable()
|
||||
|
||||
print(f"\n[setup] Creating {NUM_DOCS} documents with stub files...") # noqa: T201
|
||||
t0 = time.perf_counter()
|
||||
docs = []
|
||||
for i in range(NUM_DOCS):
|
||||
content = f"document content for doc {i}"
|
||||
checksum = hashlib.sha256(content.encode()).hexdigest()
|
||||
|
||||
orig_filename = f"{i:07d}.pdf"
|
||||
arch_filename = f"{i:07d}.pdf"
|
||||
|
||||
orig_path = originals_dir / orig_filename
|
||||
arch_path = archive_dir / arch_filename
|
||||
|
||||
orig_path.write_bytes(content.encode())
|
||||
arch_path.write_bytes(content.encode())
|
||||
|
||||
docs.append(
|
||||
Document(
|
||||
title=f"Document {i:05d}",
|
||||
content=content,
|
||||
checksum=checksum,
|
||||
archive_checksum=checksum,
|
||||
filename=orig_filename,
|
||||
archive_filename=arch_filename,
|
||||
mime_type="application/pdf",
|
||||
),
|
||||
)
|
||||
|
||||
created = Document.objects.bulk_create(docs, batch_size=500)
|
||||
|
||||
# Thumbnails use doc.pk, so create them after bulk_create assigns pks
|
||||
for doc in created:
|
||||
thumb_path = thumb_dir / f"{doc.pk:07d}.webp"
|
||||
thumb_path.write_bytes(b"\x00") # minimal thumbnail stub
|
||||
|
||||
print( # noqa: T201
|
||||
f"[setup] bulk_create + file creation: {time.perf_counter() - t0:.2f}s",
|
||||
)
|
||||
|
||||
yield {"media": media}
|
||||
|
||||
# Teardown
|
||||
print("\n[teardown] Removing sanity corpus...") # noqa: T201
|
||||
Document.objects.all().delete()
|
||||
settings_ctx.disable()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# TestSanityCheckerProfile
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestSanityCheckerProfile:
|
||||
"""Profile check_sanity() on a realistic corpus with real files."""
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _setup(self, sanity_corpus):
|
||||
self.media = sanity_corpus["media"]
|
||||
|
||||
def test_sanity_full_corpus(self):
|
||||
"""Full check_sanity() -- cProfile surfaces hot frames, tracemalloc shows peak."""
|
||||
_, elapsed = profile_cpu(
|
||||
lambda: check_sanity(scheduled=False),
|
||||
label=f"check_sanity() [{NUM_DOCS} docs, real files]",
|
||||
top=25,
|
||||
)
|
||||
|
||||
_, peak_kib, delta_kib = measure_memory(
|
||||
lambda: check_sanity(scheduled=False),
|
||||
label=f"check_sanity() [{NUM_DOCS} docs] -- memory",
|
||||
)
|
||||
|
||||
print("\n Summary:") # noqa: T201
|
||||
print(f" Wall time (CPU profile run): {elapsed * 1000:.1f} ms") # noqa: T201
|
||||
print(f" Peak memory (second run): {peak_kib:.1f} KiB") # noqa: T201
|
||||
print(f" Memory delta: {delta_kib:+.1f} KiB") # noqa: T201
|
||||
|
||||
def test_sanity_query_pattern(self):
|
||||
"""profile_block view: query count + query time + wall time in one summary."""
|
||||
with profile_block(f"check_sanity() [{NUM_DOCS} docs] -- query count"):
|
||||
check_sanity(scheduled=False)
|
||||
273
test_search_profiling.py
Normal file
273
test_search_profiling.py
Normal file
@@ -0,0 +1,273 @@
|
||||
"""
|
||||
Search performance profiling tests.
|
||||
|
||||
Run explicitly — excluded from the normal test suite:
|
||||
|
||||
uv run pytest -m profiling -s -p no:xdist --override-ini="addopts=" -v
|
||||
|
||||
The ``-s`` flag is required to see profile_block() output.
|
||||
The ``-p no:xdist`` flag disables parallel execution for accurate measurements.
|
||||
|
||||
Corpus: 5 000 documents generated deterministically from a fixed Faker seed,
|
||||
with realistic variety: 30 correspondents, 15 document types, 50 tags, ~500
|
||||
notes spread across ~10 % of documents.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import random
|
||||
|
||||
import pytest
|
||||
from django.contrib.auth.models import User
|
||||
from faker import Faker
|
||||
from profiling import profile_block
|
||||
from rest_framework.test import APIClient
|
||||
|
||||
from documents.models import Correspondent
|
||||
from documents.models import Document
|
||||
from documents.models import DocumentType
|
||||
from documents.models import Note
|
||||
from documents.models import Tag
|
||||
from documents.search import get_backend
|
||||
from documents.search import reset_backend
|
||||
from documents.search._backend import SearchMode
|
||||
|
||||
pytestmark = [pytest.mark.profiling, pytest.mark.search, pytest.mark.django_db]
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Corpus parameters
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
DOC_COUNT = 5_000
|
||||
SEED = 42
|
||||
NUM_CORRESPONDENTS = 30
|
||||
NUM_DOC_TYPES = 15
|
||||
NUM_TAGS = 50
|
||||
NOTE_FRACTION = 0.10 # ~500 documents get a note
|
||||
PAGE_SIZE = 25
|
||||
|
||||
|
||||
def _build_corpus(rng: random.Random, fake: Faker) -> None:
|
||||
"""
|
||||
Insert the full corpus into the database and index it.
|
||||
|
||||
Uses bulk_create for the Document rows (fast) then handles the M2M tag
|
||||
relationships and notes individually. Indexes the full corpus with a
|
||||
single backend.rebuild() call.
|
||||
"""
|
||||
import datetime
|
||||
|
||||
# ---- lookup objects -------------------------------------------------
|
||||
correspondents = [
|
||||
Correspondent.objects.create(name=f"profcorp-{i}-{fake.company()}"[:128])
|
||||
for i in range(NUM_CORRESPONDENTS)
|
||||
]
|
||||
doc_types = [
|
||||
DocumentType.objects.create(name=f"proftype-{i}-{fake.word()}"[:128])
|
||||
for i in range(NUM_DOC_TYPES)
|
||||
]
|
||||
tags = [
|
||||
Tag.objects.create(name=f"proftag-{i}-{fake.word()}"[:100])
|
||||
for i in range(NUM_TAGS)
|
||||
]
|
||||
note_user = User.objects.create_user(username="profnoteuser", password="x")
|
||||
|
||||
# ---- bulk-create documents ------------------------------------------
|
||||
base_date = datetime.date(2018, 1, 1)
|
||||
raw_docs = []
|
||||
for i in range(DOC_COUNT):
|
||||
day_offset = rng.randint(0, 6 * 365)
|
||||
created = base_date + datetime.timedelta(days=day_offset)
|
||||
raw_docs.append(
|
||||
Document(
|
||||
title=fake.sentence(nb_words=rng.randint(3, 9)).rstrip("."),
|
||||
content="\n\n".join(
|
||||
fake.paragraph(nb_sentences=rng.randint(3, 7))
|
||||
for _ in range(rng.randint(2, 5))
|
||||
),
|
||||
checksum=f"PROF{i:07d}",
|
||||
correspondent=rng.choice(correspondents + [None] * 8),
|
||||
document_type=rng.choice(doc_types + [None] * 4),
|
||||
created=created,
|
||||
),
|
||||
)
|
||||
documents = Document.objects.bulk_create(raw_docs)
|
||||
|
||||
# ---- tags (M2M, post-bulk) ------------------------------------------
|
||||
for doc in documents:
|
||||
k = rng.randint(0, 5)
|
||||
if k:
|
||||
doc.tags.add(*rng.sample(tags, k))
|
||||
|
||||
# ---- notes on ~10 % of docs -----------------------------------------
|
||||
note_docs = rng.sample(documents, int(DOC_COUNT * NOTE_FRACTION))
|
||||
for doc in note_docs:
|
||||
Note.objects.create(
|
||||
document=doc,
|
||||
note=fake.sentence(nb_words=rng.randint(6, 20)),
|
||||
user=note_user,
|
||||
)
|
||||
|
||||
# ---- build Tantivy index --------------------------------------------
|
||||
backend = get_backend()
|
||||
qs = Document.objects.select_related(
|
||||
"correspondent",
|
||||
"document_type",
|
||||
"storage_path",
|
||||
"owner",
|
||||
).prefetch_related("tags", "notes__user", "custom_fields__field")
|
||||
backend.rebuild(qs)
|
||||
|
||||
|
||||
class TestSearchProfiling:
|
||||
"""
|
||||
Performance profiling for the Tantivy search backend and DRF API layer.
|
||||
|
||||
Each test builds a fresh 5 000-document corpus, exercises one hot path,
|
||||
and prints profile_block() measurements to stdout. No correctness
|
||||
assertions — the goal is to surface hot spots and track regressions.
|
||||
"""
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _setup(self, tmp_path, settings):
|
||||
index_dir = tmp_path / "index"
|
||||
index_dir.mkdir()
|
||||
settings.INDEX_DIR = index_dir
|
||||
|
||||
reset_backend()
|
||||
rng = random.Random(SEED)
|
||||
fake = Faker()
|
||||
Faker.seed(SEED)
|
||||
|
||||
self.user = User.objects.create_superuser(
|
||||
username="profiler",
|
||||
password="admin",
|
||||
)
|
||||
self.client = APIClient()
|
||||
self.client.force_authenticate(user=self.user)
|
||||
|
||||
_build_corpus(rng, fake)
|
||||
yield
|
||||
reset_backend()
|
||||
|
||||
# -- 1. Backend: search_ids relevance ---------------------------------
|
||||
|
||||
def test_profile_search_ids_relevance(self):
|
||||
"""Profile: search_ids() with relevance ordering across several queries."""
|
||||
backend = get_backend()
|
||||
queries = [
|
||||
"invoice payment",
|
||||
"annual report",
|
||||
"bank statement",
|
||||
"contract agreement",
|
||||
"receipt",
|
||||
]
|
||||
with profile_block(f"search_ids — relevance ({len(queries)} queries)"):
|
||||
for q in queries:
|
||||
backend.search_ids(q, user=None)
|
||||
|
||||
# -- 2. Backend: search_ids with Tantivy-native sort ------------------
|
||||
|
||||
def test_profile_search_ids_sorted(self):
|
||||
"""Profile: search_ids() sorted by a Tantivy fast field (created)."""
|
||||
backend = get_backend()
|
||||
with profile_block("search_ids — sorted by created (asc + desc)"):
|
||||
backend.search_ids(
|
||||
"the",
|
||||
user=None,
|
||||
sort_field="created",
|
||||
sort_reverse=False,
|
||||
)
|
||||
backend.search_ids(
|
||||
"the",
|
||||
user=None,
|
||||
sort_field="created",
|
||||
sort_reverse=True,
|
||||
)
|
||||
|
||||
# -- 3. Backend: highlight_hits for a page of 25 ----------------------
|
||||
|
||||
def test_profile_highlight_hits(self):
|
||||
"""Profile: highlight_hits() for a 25-document page."""
|
||||
backend = get_backend()
|
||||
all_ids = backend.search_ids("report", user=None)
|
||||
page_ids = all_ids[:PAGE_SIZE]
|
||||
with profile_block(f"highlight_hits — {len(page_ids)} docs"):
|
||||
backend.highlight_hits("report", page_ids)
|
||||
|
||||
# -- 4. Backend: autocomplete -----------------------------------------
|
||||
|
||||
def test_profile_autocomplete(self):
|
||||
"""Profile: autocomplete() with eight common prefixes."""
|
||||
backend = get_backend()
|
||||
prefixes = ["inv", "pay", "con", "rep", "sta", "acc", "doc", "fin"]
|
||||
with profile_block(f"autocomplete — {len(prefixes)} prefixes"):
|
||||
for prefix in prefixes:
|
||||
backend.autocomplete(prefix, limit=10)
|
||||
|
||||
# -- 5. Backend: simple-mode search (TEXT and TITLE) ------------------
|
||||
|
||||
def test_profile_search_ids_simple_modes(self):
|
||||
"""Profile: search_ids() in TEXT and TITLE simple-search modes."""
|
||||
backend = get_backend()
|
||||
queries = ["invoice 2023", "annual report", "bank statement"]
|
||||
with profile_block(
|
||||
f"search_ids — TEXT + TITLE modes ({len(queries)} queries each)",
|
||||
):
|
||||
for q in queries:
|
||||
backend.search_ids(q, user=None, search_mode=SearchMode.TEXT)
|
||||
backend.search_ids(q, user=None, search_mode=SearchMode.TITLE)
|
||||
|
||||
# -- 6. API: full round-trip, relevance + page 1 ----------------------
|
||||
|
||||
def test_profile_api_relevance_search(self):
|
||||
"""Profile: full API search round-trip, relevance order, page 1."""
|
||||
with profile_block(
|
||||
f"API /documents/?query=… relevance (page 1, page_size={PAGE_SIZE})",
|
||||
):
|
||||
response = self.client.get(
|
||||
f"/api/documents/?query=invoice+payment&page=1&page_size={PAGE_SIZE}",
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
# -- 7. API: full round-trip, ORM-ordered (title) ---------------------
|
||||
|
||||
def test_profile_api_orm_sorted_search(self):
|
||||
"""Profile: full API search round-trip with ORM-delegated sort (title)."""
|
||||
with profile_block("API /documents/?query=…&ordering=title"):
|
||||
response = self.client.get(
|
||||
f"/api/documents/?query=report&ordering=title&page=1&page_size={PAGE_SIZE}",
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
# -- 8. API: full round-trip, score sort ------------------------------
|
||||
|
||||
def test_profile_api_score_sort(self):
|
||||
"""Profile: full API search with ordering=-score (relevance, preserve order)."""
|
||||
with profile_block("API /documents/?query=…&ordering=-score"):
|
||||
response = self.client.get(
|
||||
f"/api/documents/?query=statement&ordering=-score&page=1&page_size={PAGE_SIZE}",
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
# -- 9. API: full round-trip, with selection_data ---------------------
|
||||
|
||||
def test_profile_api_with_selection_data(self):
|
||||
"""Profile: full API search including include_selection_data=true."""
|
||||
with profile_block("API /documents/?query=…&include_selection_data=true"):
|
||||
response = self.client.get(
|
||||
f"/api/documents/?query=contract&page=1&page_size={PAGE_SIZE}"
|
||||
"&include_selection_data=true",
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert "selection_data" in response.data
|
||||
|
||||
# -- 10. API: paginated (page 2) --------------------------------------
|
||||
|
||||
def test_profile_api_page_2(self):
|
||||
"""Profile: full API search, page 2 — exercises page offset arithmetic."""
|
||||
with profile_block(f"API /documents/?query=…&page=2&page_size={PAGE_SIZE}"):
|
||||
response = self.client.get(
|
||||
f"/api/documents/?query=the&page=2&page_size={PAGE_SIZE}",
|
||||
)
|
||||
assert response.status_code == 200
|
||||
231
test_workflow_profile.py
Normal file
231
test_workflow_profile.py
Normal file
@@ -0,0 +1,231 @@
|
||||
"""
|
||||
Workflow trigger matching profiling.
|
||||
|
||||
Run with:
|
||||
uv run pytest ../test_workflow_profile.py \
|
||||
-m profiling --override-ini="addopts=" -s -v
|
||||
|
||||
Corpus: 500 documents + correspondents + tags + sets of WorkflowTrigger
|
||||
objects at 5 and 20 count to allow scaling comparisons.
|
||||
|
||||
Scenarios
|
||||
---------
|
||||
TestWorkflowMatchingProfile
|
||||
- test_existing_document_5_workflows existing_document_matches_workflow x 5 triggers
|
||||
- test_existing_document_20_workflows same x 20 triggers
|
||||
- test_workflow_prefilter prefilter_documents_by_workflowtrigger on 500 docs
|
||||
- test_trigger_type_comparison compare DOCUMENT_ADDED vs DOCUMENT_UPDATED overhead
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import random
|
||||
import time
|
||||
|
||||
import pytest
|
||||
from faker import Faker
|
||||
from profiling import profile_block
|
||||
|
||||
from documents.matching import existing_document_matches_workflow
|
||||
from documents.matching import prefilter_documents_by_workflowtrigger
|
||||
from documents.models import Correspondent
|
||||
from documents.models import Document
|
||||
from documents.models import Tag
|
||||
from documents.models import Workflow
|
||||
from documents.models import WorkflowAction
|
||||
from documents.models import WorkflowTrigger
|
||||
|
||||
pytestmark = [pytest.mark.profiling, pytest.mark.django_db]
|
||||
|
||||
NUM_DOCS = 500
|
||||
NUM_CORRESPONDENTS = 10
|
||||
NUM_TAGS = 20
|
||||
SEED = 42
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Module-scoped fixture
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def module_db(django_db_setup, django_db_blocker):
|
||||
"""Unlock the DB for the whole module (module-scoped)."""
|
||||
with django_db_blocker.unblock():
|
||||
yield
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def workflow_corpus(module_db):
|
||||
"""
|
||||
500 documents + correspondents + tags + sets of workflow triggers
|
||||
at 5 and 20 count to allow scaling comparisons.
|
||||
"""
|
||||
fake = Faker()
|
||||
Faker.seed(SEED)
|
||||
rng = random.Random(SEED)
|
||||
|
||||
# ---- lookup objects ---------------------------------------------------
|
||||
print("\n[setup] Creating lookup objects...") # noqa: T201
|
||||
correspondents = [
|
||||
Correspondent.objects.create(name=f"wfcorp-{i}-{fake.company()}"[:128])
|
||||
for i in range(NUM_CORRESPONDENTS)
|
||||
]
|
||||
tags = [
|
||||
Tag.objects.create(name=f"wftag-{i}-{fake.word()}"[:100])
|
||||
for i in range(NUM_TAGS)
|
||||
]
|
||||
|
||||
# ---- documents --------------------------------------------------------
|
||||
print(f"[setup] Building {NUM_DOCS} documents...") # noqa: T201
|
||||
raw_docs = []
|
||||
for i in range(NUM_DOCS):
|
||||
raw_docs.append(
|
||||
Document(
|
||||
title=fake.sentence(nb_words=4).rstrip("."),
|
||||
content=fake.paragraph(nb_sentences=3),
|
||||
checksum=f"WF{i:07d}",
|
||||
correspondent=rng.choice(correspondents + [None] * 3),
|
||||
),
|
||||
)
|
||||
documents = Document.objects.bulk_create(raw_docs, batch_size=500)
|
||||
for doc in documents:
|
||||
k = rng.randint(0, 3)
|
||||
if k:
|
||||
doc.tags.add(*rng.sample(tags, k))
|
||||
|
||||
sample_doc = documents[0]
|
||||
print(f"[setup] Sample doc pk={sample_doc.pk}") # noqa: T201
|
||||
|
||||
# ---- build triggers at scale 5 and 20 --------------------------------
|
||||
_wf_counter = [0]
|
||||
|
||||
def _make_triggers(n: int, trigger_type: int) -> list[WorkflowTrigger]:
|
||||
triggers = []
|
||||
for i in range(n):
|
||||
# Alternate between no filter and a correspondent filter
|
||||
corr = correspondents[i % NUM_CORRESPONDENTS] if i % 3 == 0 else None
|
||||
trigger = WorkflowTrigger.objects.create(
|
||||
type=trigger_type,
|
||||
filter_has_correspondent=corr,
|
||||
)
|
||||
action = WorkflowAction.objects.create(
|
||||
type=WorkflowAction.WorkflowActionType.ASSIGNMENT,
|
||||
)
|
||||
idx = _wf_counter[0]
|
||||
_wf_counter[0] += 1
|
||||
wf = Workflow.objects.create(name=f"wf-profile-{idx}")
|
||||
wf.triggers.add(trigger)
|
||||
wf.actions.add(action)
|
||||
triggers.append(trigger)
|
||||
return triggers
|
||||
|
||||
print("[setup] Creating workflow triggers...") # noqa: T201
|
||||
triggers_5 = _make_triggers(5, WorkflowTrigger.WorkflowTriggerType.DOCUMENT_UPDATED)
|
||||
triggers_20 = _make_triggers(
|
||||
20,
|
||||
WorkflowTrigger.WorkflowTriggerType.DOCUMENT_UPDATED,
|
||||
)
|
||||
triggers_added = _make_triggers(
|
||||
5,
|
||||
WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED,
|
||||
)
|
||||
|
||||
yield {
|
||||
"doc": sample_doc,
|
||||
"triggers_5": triggers_5,
|
||||
"triggers_20": triggers_20,
|
||||
"triggers_added": triggers_added,
|
||||
}
|
||||
|
||||
# Teardown
|
||||
print("\n[teardown] Removing workflow corpus...") # noqa: T201
|
||||
Workflow.objects.all().delete()
|
||||
WorkflowTrigger.objects.all().delete()
|
||||
WorkflowAction.objects.all().delete()
|
||||
Document.objects.all().delete()
|
||||
Correspondent.objects.all().delete()
|
||||
Tag.objects.all().delete()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# TestWorkflowMatchingProfile
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestWorkflowMatchingProfile:
|
||||
"""Profile workflow trigger evaluation per document save."""
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _setup(self, workflow_corpus):
|
||||
self.doc = workflow_corpus["doc"]
|
||||
self.triggers_5 = workflow_corpus["triggers_5"]
|
||||
self.triggers_20 = workflow_corpus["triggers_20"]
|
||||
self.triggers_added = workflow_corpus["triggers_added"]
|
||||
|
||||
def test_existing_document_5_workflows(self):
|
||||
"""existing_document_matches_workflow x 5 DOCUMENT_UPDATED triggers."""
|
||||
doc = self.doc
|
||||
triggers = self.triggers_5
|
||||
|
||||
with profile_block(
|
||||
f"existing_document_matches_workflow [{len(triggers)} triggers]",
|
||||
):
|
||||
for trigger in triggers:
|
||||
existing_document_matches_workflow(doc, trigger)
|
||||
|
||||
def test_existing_document_20_workflows(self):
|
||||
"""existing_document_matches_workflow x 20 triggers -- shows linear scaling."""
|
||||
doc = self.doc
|
||||
triggers = self.triggers_20
|
||||
|
||||
with profile_block(
|
||||
f"existing_document_matches_workflow [{len(triggers)} triggers]",
|
||||
):
|
||||
for trigger in triggers:
|
||||
existing_document_matches_workflow(doc, trigger)
|
||||
|
||||
# Also time each call individually to show per-trigger overhead
|
||||
timings = []
|
||||
for trigger in triggers:
|
||||
t0 = time.perf_counter()
|
||||
existing_document_matches_workflow(doc, trigger)
|
||||
timings.append((time.perf_counter() - t0) * 1_000_000)
|
||||
avg_us = sum(timings) / len(timings)
|
||||
print(f"\n Per-trigger avg: {avg_us:.1f} us (n={len(timings)})") # noqa: T201
|
||||
|
||||
def test_workflow_prefilter(self):
|
||||
"""prefilter_documents_by_workflowtrigger on 500 docs -- tag + correspondent filters."""
|
||||
qs = Document.objects.all()
|
||||
print(f"\n Corpus: {qs.count()} documents") # noqa: T201
|
||||
|
||||
for trigger in self.triggers_20[:3]:
|
||||
label = (
|
||||
f"prefilter_documents_by_workflowtrigger "
|
||||
f"[corr={trigger.filter_has_correspondent_id}]"
|
||||
)
|
||||
with profile_block(label):
|
||||
result = prefilter_documents_by_workflowtrigger(qs, trigger)
|
||||
# Evaluate the queryset
|
||||
count = result.count()
|
||||
print(f" -> {count} docs passed filter") # noqa: T201
|
||||
|
||||
def test_trigger_type_comparison(self):
|
||||
"""Compare per-call overhead of DOCUMENT_UPDATED vs DOCUMENT_ADDED."""
|
||||
doc = self.doc
|
||||
runs = 200
|
||||
|
||||
for label, triggers in [
|
||||
("DOCUMENT_UPDATED", self.triggers_5),
|
||||
("DOCUMENT_ADDED", self.triggers_added),
|
||||
]:
|
||||
t0 = time.perf_counter()
|
||||
for _ in range(runs):
|
||||
for trigger in triggers:
|
||||
existing_document_matches_workflow(doc, trigger)
|
||||
total_calls = runs * len(triggers)
|
||||
us_per_call = (time.perf_counter() - t0) / total_calls * 1_000_000
|
||||
print( # noqa: T201
|
||||
f" {label:<22s} {us_per_call:.2f} us/call "
|
||||
f"({total_calls} calls, {len(triggers)} triggers)",
|
||||
)
|
||||
Reference in New Issue
Block a user