Compare commits

..

8 Commits

Author SHA1 Message Date
shamoon
f85a360c21 Add to diagram 2026-03-09 10:58:07 -07:00
shamoon
add2c68b7c Move migration 2026-03-09 10:58:06 -07:00
shamoon
3f0770a9a6 make it a separate one 2026-03-09 10:58:06 -07:00
shamoon
c0c09bd0da Save this 2026-03-09 10:51:45 -07:00
GitHub Actions
0c7d56c5e7 Auto translate strings 2026-03-09 17:45:53 +00:00
Trenton H
0bcf904e3a Chore: Finish settings refactor (#12263) 2026-03-09 17:43:51 +00:00
Trenton H
bcc2f11152 Performance: Stream JSON during import for memory improvements (#12276)
* Perf: stream manifest parsing with ijson in document_importer

Replace bulk json.load of the full manifest (which materializes the
entire JSON array into memory) with incremental ijson streaming.
Eliminates self.manifest entirely — records are never all in memory
at once.

- Add ijson>=3.2 dependency
- New module-level iter_manifest_records() generator
- load_manifest_files() collects paths only; no parsing at load time
- check_manifest_validity() streams without accumulating records
- decrypt_secret_fields() streams each manifest to a .decrypted.json
  temp file record-by-record; temp files cleaned up after file copy
- _import_files_from_manifest() collects only document records (small
  fraction of manifest) for the tqdm progress bar

Measured on 200 docs + 200 CustomFieldInstances:
- Streaming validation: peak memory 3081 KiB -> 333 KiB (89% reduction)
- Stream-decrypt to file: peak memory 3081 KiB -> 549 KiB (82% reduction)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

* Perf: slim dict in _import_files_from_manifest, discard fields

When collecting document records for the file-copy step, extract only
the 4 keys the loop actually uses (pk + 3 exported filename keys) and
discard the full fields dict (content, checksum, tags, etc.).

Peak memory for the document-record list: 939 KiB -> 375 KiB (60% reduction).
Wall time unchanged.
2026-03-09 10:20:48 -07:00
shamoon
e18b1fd99d Chore: use unified "gates" for ci tests and docs checks (#12277) 2026-03-09 17:02:34 +00:00
32 changed files with 1664 additions and 1177 deletions

View File

@@ -3,21 +3,9 @@ on:
push:
branches-ignore:
- 'translations**'
paths:
- 'src/**'
- 'pyproject.toml'
- 'uv.lock'
- 'docker/compose/docker-compose.ci-test.yml'
- '.github/workflows/ci-backend.yml'
pull_request:
branches-ignore:
- 'translations**'
paths:
- 'src/**'
- 'pyproject.toml'
- 'uv.lock'
- 'docker/compose/docker-compose.ci-test.yml'
- '.github/workflows/ci-backend.yml'
workflow_dispatch:
concurrency:
group: backend-${{ github.event.pull_request.number || github.ref }}
@@ -26,7 +14,55 @@ env:
DEFAULT_UV_VERSION: "0.10.x"
NLTK_DATA: "/usr/share/nltk_data"
jobs:
changes:
name: Detect Backend Changes
runs-on: ubuntu-slim
outputs:
backend_changed: ${{ steps.force.outputs.run_all == 'true' || steps.filter.outputs.backend == 'true' }}
steps:
- name: Checkout
uses: actions/checkout@v6.0.2
with:
fetch-depth: 0
- name: Decide run mode
id: force
run: |
if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then
echo "run_all=true" >> "$GITHUB_OUTPUT"
elif [[ "${{ github.event_name }}" == "push" && ( "${{ github.ref_name }}" == "main" || "${{ github.ref_name }}" == "dev" ) ]]; then
echo "run_all=true" >> "$GITHUB_OUTPUT"
else
echo "run_all=false" >> "$GITHUB_OUTPUT"
fi
- name: Set diff range
id: range
if: steps.force.outputs.run_all != 'true'
run: |
if [[ "${{ github.event_name }}" == "pull_request" ]]; then
echo "base=${{ github.event.pull_request.base.sha }}" >> "$GITHUB_OUTPUT"
elif [[ "${{ github.event.created }}" == "true" ]]; then
echo "base=origin/${{ github.event.repository.default_branch }}" >> "$GITHUB_OUTPUT"
else
echo "base=${{ github.event.before }}" >> "$GITHUB_OUTPUT"
fi
echo "ref=${{ github.sha }}" >> "$GITHUB_OUTPUT"
- name: Detect changes
id: filter
if: steps.force.outputs.run_all != 'true'
uses: dorny/paths-filter@v3.0.2
with:
base: ${{ steps.range.outputs.base }}
ref: ${{ steps.range.outputs.ref }}
filters: |
backend:
- 'src/**'
- 'pyproject.toml'
- 'uv.lock'
- 'docker/compose/docker-compose.ci-test.yml'
- '.github/workflows/ci-backend.yml'
test:
needs: changes
if: needs.changes.outputs.backend_changed == 'true'
name: "Python ${{ matrix.python-version }}"
runs-on: ubuntu-24.04
strategy:
@@ -100,6 +136,8 @@ jobs:
docker compose --file docker/compose/docker-compose.ci-test.yml logs
docker compose --file docker/compose/docker-compose.ci-test.yml down
typing:
needs: changes
if: needs.changes.outputs.backend_changed == 'true'
name: Check project typing
runs-on: ubuntu-24.04
env:
@@ -150,3 +188,27 @@ jobs:
--show-error-codes \
--warn-unused-configs \
src/ | uv run mypy-baseline filter
gate:
name: Backend CI Gate
needs: [changes, test, typing]
if: always()
runs-on: ubuntu-slim
steps:
- name: Check gate
run: |
if [[ "${{ needs.changes.outputs.backend_changed }}" != "true" ]]; then
echo "No backend-relevant changes detected."
exit 0
fi
if [[ "${{ needs.test.result }}" != "success" ]]; then
echo "::error::Backend test job result: ${{ needs.test.result }}"
exit 1
fi
if [[ "${{ needs.typing.result }}" != "success" ]]; then
echo "::error::Backend typing job result: ${{ needs.typing.result }}"
exit 1
fi
echo "Backend checks passed."

View File

@@ -1,22 +1,9 @@
name: Documentation
on:
push:
branches:
- main
- dev
paths:
- 'docs/**'
- 'zensical.toml'
- 'pyproject.toml'
- 'uv.lock'
- '.github/workflows/ci-docs.yml'
branches-ignore:
- 'translations**'
pull_request:
paths:
- 'docs/**'
- 'zensical.toml'
- 'pyproject.toml'
- 'uv.lock'
- '.github/workflows/ci-docs.yml'
workflow_dispatch:
concurrency:
group: docs-${{ github.event.pull_request.number || github.ref }}
@@ -29,7 +16,55 @@ env:
DEFAULT_UV_VERSION: "0.10.x"
DEFAULT_PYTHON_VERSION: "3.12"
jobs:
changes:
name: Detect Docs Changes
runs-on: ubuntu-slim
outputs:
docs_changed: ${{ steps.force.outputs.run_all == 'true' || steps.filter.outputs.docs == 'true' }}
steps:
- name: Checkout
uses: actions/checkout@v6.0.2
with:
fetch-depth: 0
- name: Decide run mode
id: force
run: |
if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then
echo "run_all=true" >> "$GITHUB_OUTPUT"
elif [[ "${{ github.event_name }}" == "push" && ( "${{ github.ref_name }}" == "main" || "${{ github.ref_name }}" == "dev" ) ]]; then
echo "run_all=true" >> "$GITHUB_OUTPUT"
else
echo "run_all=false" >> "$GITHUB_OUTPUT"
fi
- name: Set diff range
id: range
if: steps.force.outputs.run_all != 'true'
run: |
if [[ "${{ github.event_name }}" == "pull_request" ]]; then
echo "base=${{ github.event.pull_request.base.sha }}" >> "$GITHUB_OUTPUT"
elif [[ "${{ github.event.created }}" == "true" ]]; then
echo "base=origin/${{ github.event.repository.default_branch }}" >> "$GITHUB_OUTPUT"
else
echo "base=${{ github.event.before }}" >> "$GITHUB_OUTPUT"
fi
echo "ref=${{ github.sha }}" >> "$GITHUB_OUTPUT"
- name: Detect changes
id: filter
if: steps.force.outputs.run_all != 'true'
uses: dorny/paths-filter@v3.0.2
with:
base: ${{ steps.range.outputs.base }}
ref: ${{ steps.range.outputs.ref }}
filters: |
docs:
- 'docs/**'
- 'zensical.toml'
- 'pyproject.toml'
- 'uv.lock'
- '.github/workflows/ci-docs.yml'
build:
needs: changes
if: needs.changes.outputs.docs_changed == 'true'
name: Build Documentation
runs-on: ubuntu-24.04
steps:
@@ -64,8 +99,8 @@ jobs:
name: github-pages-${{ github.run_id }}-${{ github.run_attempt }}
deploy:
name: Deploy Documentation
needs: build
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
needs: [changes, build]
if: github.event_name == 'push' && github.ref == 'refs/heads/main' && needs.changes.outputs.docs_changed == 'true'
runs-on: ubuntu-24.04
environment:
name: github-pages
@@ -76,3 +111,22 @@ jobs:
id: deployment
with:
artifact_name: github-pages-${{ github.run_id }}-${{ github.run_attempt }}
gate:
name: Docs CI Gate
needs: [changes, build]
if: always()
runs-on: ubuntu-slim
steps:
- name: Check gate
run: |
if [[ "${{ needs.changes.outputs.docs_changed }}" != "true" ]]; then
echo "No docs-relevant changes detected."
exit 0
fi
if [[ "${{ needs.build.result }}" != "success" ]]; then
echo "::error::Docs build job result: ${{ needs.build.result }}"
exit 1
fi
echo "Docs checks passed."

View File

@@ -3,21 +3,60 @@ on:
push:
branches-ignore:
- 'translations**'
paths:
- 'src-ui/**'
- '.github/workflows/ci-frontend.yml'
pull_request:
branches-ignore:
- 'translations**'
paths:
- 'src-ui/**'
- '.github/workflows/ci-frontend.yml'
workflow_dispatch:
concurrency:
group: frontend-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
changes:
name: Detect Frontend Changes
runs-on: ubuntu-slim
outputs:
frontend_changed: ${{ steps.force.outputs.run_all == 'true' || steps.filter.outputs.frontend == 'true' }}
steps:
- name: Checkout
uses: actions/checkout@v6.0.2
with:
fetch-depth: 0
- name: Decide run mode
id: force
run: |
if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then
echo "run_all=true" >> "$GITHUB_OUTPUT"
elif [[ "${{ github.event_name }}" == "push" && ( "${{ github.ref_name }}" == "main" || "${{ github.ref_name }}" == "dev" ) ]]; then
echo "run_all=true" >> "$GITHUB_OUTPUT"
else
echo "run_all=false" >> "$GITHUB_OUTPUT"
fi
- name: Set diff range
id: range
if: steps.force.outputs.run_all != 'true'
run: |
if [[ "${{ github.event_name }}" == "pull_request" ]]; then
echo "base=${{ github.event.pull_request.base.sha }}" >> "$GITHUB_OUTPUT"
elif [[ "${{ github.event.created }}" == "true" ]]; then
echo "base=origin/${{ github.event.repository.default_branch }}" >> "$GITHUB_OUTPUT"
else
echo "base=${{ github.event.before }}" >> "$GITHUB_OUTPUT"
fi
echo "ref=${{ github.sha }}" >> "$GITHUB_OUTPUT"
- name: Detect changes
id: filter
if: steps.force.outputs.run_all != 'true'
uses: dorny/paths-filter@v3.0.2
with:
base: ${{ steps.range.outputs.base }}
ref: ${{ steps.range.outputs.ref }}
filters: |
frontend:
- 'src-ui/**'
- '.github/workflows/ci-frontend.yml'
install-dependencies:
needs: changes
if: needs.changes.outputs.frontend_changed == 'true'
name: Install Dependencies
runs-on: ubuntu-24.04
steps:
@@ -45,7 +84,8 @@ jobs:
run: cd src-ui && pnpm install
lint:
name: Lint
needs: install-dependencies
needs: [changes, install-dependencies]
if: needs.changes.outputs.frontend_changed == 'true'
runs-on: ubuntu-24.04
steps:
- name: Checkout
@@ -73,7 +113,8 @@ jobs:
run: cd src-ui && pnpm run lint
unit-tests:
name: "Unit Tests (${{ matrix.shard-index }}/${{ matrix.shard-count }})"
needs: install-dependencies
needs: [changes, install-dependencies]
if: needs.changes.outputs.frontend_changed == 'true'
runs-on: ubuntu-24.04
strategy:
fail-fast: false
@@ -119,7 +160,8 @@ jobs:
directory: src-ui/coverage/
e2e-tests:
name: "E2E Tests (${{ matrix.shard-index }}/${{ matrix.shard-count }})"
needs: install-dependencies
needs: [changes, install-dependencies]
if: needs.changes.outputs.frontend_changed == 'true'
runs-on: ubuntu-24.04
container: mcr.microsoft.com/playwright:v1.58.2-noble
env:
@@ -159,7 +201,8 @@ jobs:
run: cd src-ui && pnpm exec playwright test --shard ${{ matrix.shard-index }}/${{ matrix.shard-count }}
bundle-analysis:
name: Bundle Analysis
needs: [unit-tests, e2e-tests]
needs: [changes, unit-tests, e2e-tests]
if: needs.changes.outputs.frontend_changed == 'true'
runs-on: ubuntu-24.04
steps:
- name: Checkout
@@ -189,3 +232,42 @@ jobs:
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
run: cd src-ui && pnpm run build --configuration=production
gate:
name: Frontend CI Gate
needs: [changes, install-dependencies, lint, unit-tests, e2e-tests, bundle-analysis]
if: always()
runs-on: ubuntu-slim
steps:
- name: Check gate
run: |
if [[ "${{ needs.changes.outputs.frontend_changed }}" != "true" ]]; then
echo "No frontend-relevant changes detected."
exit 0
fi
if [[ "${{ needs['install-dependencies'].result }}" != "success" ]]; then
echo "::error::Frontend install job result: ${{ needs['install-dependencies'].result }}"
exit 1
fi
if [[ "${{ needs.lint.result }}" != "success" ]]; then
echo "::error::Frontend lint job result: ${{ needs.lint.result }}"
exit 1
fi
if [[ "${{ needs['unit-tests'].result }}" != "success" ]]; then
echo "::error::Frontend unit-tests job result: ${{ needs['unit-tests'].result }}"
exit 1
fi
if [[ "${{ needs['e2e-tests'].result }}" != "success" ]]; then
echo "::error::Frontend e2e-tests job result: ${{ needs['e2e-tests'].result }}"
exit 1
fi
if [[ "${{ needs['bundle-analysis'].result }}" != "success" ]]; then
echo "::error::Frontend bundle-analysis job result: ${{ needs['bundle-analysis'].result }}"
exit 1
fi
echo "Frontend checks passed."

View File

@@ -458,7 +458,7 @@ fields and permissions, which will be merged.
#### Types {#workflow-trigger-types}
Currently, there are four events that correspond to workflow trigger 'types':
Currently, there are five events that correspond to workflow trigger 'types':
1. **Consumption Started**: _before_ a document is consumed, so events can include filters by source (mail, consumption
folder or API), file path, file name, mail rule
@@ -470,8 +470,10 @@ Currently, there are four events that correspond to workflow trigger 'types':
4. **Scheduled**: a scheduled trigger that can be used to run workflows at a specific time. The date used can be either the document
added, created, updated date or you can specify a (date) custom field. You can also specify a day offset from the date (positive
offsets will trigger after the date, negative offsets will trigger before).
5. **Version Added**: when a new version is added for an existing document. This trigger evaluates filters against the root document
and applies actions to the root document.
The following flow diagram illustrates the four document trigger types:
The following flow diagram illustrates the document trigger types:
```mermaid
flowchart TD
@@ -487,6 +489,10 @@ flowchart TD
'Updated'
trigger(s)"}
version{"Matching
'Version Added'
trigger(s)"}
scheduled{"Documents
matching
trigger(s)"}
@@ -503,11 +509,15 @@ flowchart TD
updated --> |Yes| J[Workflow Actions Run]
updated --> |No| K
J --> K[Document Saved]
L[Scheduled Task Check<br/>hourly at :05] --> M[Get All Scheduled Triggers]
M --> scheduled
scheduled --> |Yes| N[Workflow Actions Run]
scheduled --> |No| O[Document Saved]
N --> O
L[New Document Version Added] --> version
version --> |Yes| V[Workflow Actions Run]
version --> |No| W
V --> W[Document Saved]
X[Scheduled Task Check<br/>hourly at :05] --> Y[Get All Scheduled Triggers]
Y --> scheduled
scheduled --> |Yes| Z[Workflow Actions Run]
scheduled --> |No| AA[Document Saved]
Z --> AA
```
#### Filters {#workflow-trigger-filters}

View File

@@ -49,6 +49,7 @@ dependencies = [
"flower~=2.0.1",
"gotenberg-client~=0.13.1",
"httpx-oauth~=0.16",
"ijson>=3.2",
"imap-tools~=1.11.0",
"jinja2~=3.1.5",
"langdetect~=1.0.9",

View File

@@ -164,7 +164,7 @@
<pngx-input-text i18n-title title="Filter path" formControlName="filter_path" horizontal="true" i18n-hint hint="Apply to documents that match this path. Wildcards specified as * are allowed. Case-normalized." [error]="error?.filter_path"></pngx-input-text>
<pngx-input-select i18n-title title="Filter mail rule" [items]="mailRules" horizontal="true" [allowNull]="true" formControlName="filter_mailrule" i18n-hint hint="Apply to documents consumed via this mail rule." [error]="error?.filter_mailrule"></pngx-input-select>
}
@if (formGroup.get('type').value === WorkflowTriggerType.DocumentAdded || formGroup.get('type').value === WorkflowTriggerType.DocumentUpdated || formGroup.get('type').value === WorkflowTriggerType.Scheduled) {
@if (formGroup.get('type').value === WorkflowTriggerType.DocumentAdded || formGroup.get('type').value === WorkflowTriggerType.DocumentUpdated || formGroup.get('type').value === WorkflowTriggerType.Scheduled || formGroup.get('type').value === WorkflowTriggerType.VersionAdded) {
<pngx-input-select i18n-title title="Content matching algorithm" horizontal="true" [items]="getMatchingAlgorithms()" formControlName="matching_algorithm"></pngx-input-select>
@if (matchingPatternRequired(formGroup)) {
<pngx-input-text i18n-title title="Content matching pattern" horizontal="true" formControlName="match" [error]="error?.match"></pngx-input-text>
@@ -175,7 +175,7 @@
}
</div>
</div>
@if (formGroup.get('type').value === WorkflowTriggerType.DocumentAdded || formGroup.get('type').value === WorkflowTriggerType.DocumentUpdated || formGroup.get('type').value === WorkflowTriggerType.Scheduled) {
@if (formGroup.get('type').value === WorkflowTriggerType.DocumentAdded || formGroup.get('type').value === WorkflowTriggerType.DocumentUpdated || formGroup.get('type').value === WorkflowTriggerType.Scheduled || formGroup.get('type').value === WorkflowTriggerType.VersionAdded) {
<div class="row mt-3">
<div class="col">
<div class="trigger-filters mb-3">

View File

@@ -120,6 +120,10 @@ export const WORKFLOW_TYPE_OPTIONS = [
id: WorkflowTriggerType.Scheduled,
name: $localize`Scheduled`,
},
{
id: WorkflowTriggerType.VersionAdded,
name: $localize`Version Added`,
},
]
export const WORKFLOW_ACTION_OPTIONS = [

View File

@@ -12,6 +12,7 @@ export enum WorkflowTriggerType {
DocumentAdded = 2,
DocumentUpdated = 3,
Scheduled = 4,
VersionAdded = 5,
}
export enum ScheduleDateField {

View File

@@ -10,11 +10,13 @@ class DocumentsConfig(AppConfig):
def ready(self) -> None:
from documents.signals import document_consumption_finished
from documents.signals import document_updated
from documents.signals import document_version_added
from documents.signals.handlers import add_inbox_tags
from documents.signals.handlers import add_or_update_document_in_llm_index
from documents.signals.handlers import add_to_index
from documents.signals.handlers import run_workflows_added
from documents.signals.handlers import run_workflows_updated
from documents.signals.handlers import run_workflows_version_added
from documents.signals.handlers import send_websocket_document_updated
from documents.signals.handlers import set_correspondent
from documents.signals.handlers import set_document_type
@@ -28,6 +30,7 @@ class DocumentsConfig(AppConfig):
document_consumption_finished.connect(set_storage_path)
document_consumption_finished.connect(add_to_index)
document_consumption_finished.connect(run_workflows_added)
document_version_added.connect(run_workflows_version_added)
document_consumption_finished.connect(add_or_update_document_in_llm_index)
document_updated.connect(run_workflows_updated)
document_updated.connect(send_websocket_document_updated)

View File

@@ -46,6 +46,7 @@ from documents.plugins.helpers import ProgressStatusOptions
from documents.signals import document_consumption_finished
from documents.signals import document_consumption_started
from documents.signals import document_updated
from documents.signals import document_version_added
from documents.signals.handlers import run_workflows
from documents.templating.workflows import parse_w_workflow_placeholders
from documents.utils import copy_basic_file_stats
@@ -601,6 +602,12 @@ class ConsumerPlugin(
if self.unmodified_original
else self.working_copy,
)
if document.root_document_id:
document_version_added.send(
sender=self.__class__,
document=document,
logging_group=self.logging_group,
)
# After everything is in the database, copy the files into
# place. If this fails, we'll also rollback the transaction.

View File

@@ -1,9 +1,8 @@
import hashlib
import io
import json
import os
import shutil
import zipfile
import tempfile
from itertools import islice
from pathlib import Path
from typing import TYPE_CHECKING
@@ -94,8 +93,6 @@ class StreamingManifestWriter:
*,
compare_json: bool = False,
files_in_export_dir: "set[Path] | None" = None,
zip_file: "zipfile.ZipFile | None" = None,
zip_arcname: str | None = None,
) -> None:
self._path = path.resolve()
self._tmp_path = self._path.with_suffix(self._path.suffix + ".tmp")
@@ -103,20 +100,12 @@ class StreamingManifestWriter:
self._files_in_export_dir: set[Path] = (
files_in_export_dir if files_in_export_dir is not None else set()
)
self._zip_file = zip_file
self._zip_arcname = zip_arcname
self._zip_mode = zip_file is not None
self._file = None
self._first = True
def open(self) -> None:
if self._zip_mode:
# zipfile only allows one open write handle at a time, so buffer
# the manifest in memory and write it atomically on close()
self._file = io.StringIO()
else:
self._path.parent.mkdir(parents=True, exist_ok=True)
self._file = self._tmp_path.open("w", encoding="utf-8")
self._path.parent.mkdir(parents=True, exist_ok=True)
self._file = self._tmp_path.open("w", encoding="utf-8")
self._file.write("[")
self._first = True
@@ -137,18 +126,15 @@ class StreamingManifestWriter:
if self._file is None:
return
self._file.write("\n]")
if self._zip_mode:
self._zip_file.writestr(self._zip_arcname, self._file.getvalue())
self._file.close()
self._file = None
if not self._zip_mode:
self._finalize()
self._finalize()
def discard(self) -> None:
if self._file is not None:
self._file.close()
self._file = None
if not self._zip_mode and self._tmp_path.exists():
if self._tmp_path.exists():
self._tmp_path.unlink()
def _finalize(self) -> None:
@@ -325,13 +311,18 @@ class Command(CryptMixin, PaperlessCommand):
self.files_in_export_dir: set[Path] = set()
self.exported_files: set[str] = set()
self.zip_file: zipfile.ZipFile | None = None
self._zip_dirs: set[str] = set()
# If zipping, save the original target for later and
# get a temporary directory for the target instead
temp_dir = None
self.original_target = self.target
if self.zip_export:
zip_name = options["zip_name"]
self.zip_path = (self.target / zip_name).with_suffix(".zip")
self.zip_tmp_path = self.zip_path.parent / (self.zip_path.name + ".tmp")
settings.SCRATCH_DIR.mkdir(parents=True, exist_ok=True)
temp_dir = tempfile.TemporaryDirectory(
dir=settings.SCRATCH_DIR,
prefix="paperless-export",
)
self.target = Path(temp_dir.name).resolve()
if not self.target.exists():
raise CommandError("That path doesn't exist")
@@ -342,53 +333,30 @@ class Command(CryptMixin, PaperlessCommand):
if not os.access(self.target, os.W_OK):
raise CommandError("That path doesn't appear to be writable")
if self.zip_export:
if self.compare_checksums:
self.stdout.write(
self.style.WARNING(
"--compare-checksums is ignored when --zip is used",
),
)
if self.compare_json:
self.stdout.write(
self.style.WARNING(
"--compare-json is ignored when --zip is used",
),
)
try:
# Prevent any ongoing changes in the documents
with FileLock(settings.MEDIA_LOCK):
if self.zip_export:
self.zip_file = zipfile.ZipFile(
self.zip_tmp_path,
"w",
compression=zipfile.ZIP_DEFLATED,
allowZip64=True,
)
self.dump()
if self.zip_export and self.zip_file is not None:
self.zip_file.close()
self.zip_file = None
self.zip_tmp_path.rename(self.zip_path)
# We've written everything to the temporary directory in this case,
# now make an archive in the original target, with all files stored
if self.zip_export and temp_dir is not None:
shutil.make_archive(
self.original_target / options["zip_name"],
format="zip",
root_dir=temp_dir.name,
)
finally:
# Ensure zip_file is closed and the incomplete .tmp is removed on failure
if self.zip_file is not None:
self.zip_file.close()
self.zip_file = None
if self.zip_export and self.zip_tmp_path.exists():
self.zip_tmp_path.unlink()
# Always cleanup the temporary directory, if one was created
if self.zip_export and temp_dir is not None:
temp_dir.cleanup()
def dump(self) -> None:
# 1. Take a snapshot of what files exist in the current export folder
# (skipped in zip mode — always write fresh, no skip/compare logic applies)
if not self.zip_export:
for x in self.target.glob("**/*"):
if x.is_file():
self.files_in_export_dir.add(x.resolve())
for x in self.target.glob("**/*"):
if x.is_file():
self.files_in_export_dir.add(x.resolve())
# 2. Create manifest, containing all correspondents, types, tags, storage paths
# note, documents and ui_settings
@@ -446,8 +414,6 @@ class Command(CryptMixin, PaperlessCommand):
manifest_path,
compare_json=self.compare_json,
files_in_export_dir=self.files_in_export_dir,
zip_file=self.zip_file,
zip_arcname="manifest.json",
) as writer:
with transaction.atomic():
for key, qs in manifest_key_to_object_query.items():
@@ -538,12 +504,8 @@ class Command(CryptMixin, PaperlessCommand):
self.target,
)
else:
# 5. Remove pre-existing files/dirs from target, keeping the
# in-progress zip (.tmp) and any prior zip at the final path
skip = {self.zip_path.resolve(), self.zip_tmp_path.resolve()}
for item in self.target.glob("*"):
if item.resolve() in skip:
continue
# 5. Remove anything in the original location (before moving the zip)
for item in self.original_target.glob("*"):
if item.is_dir():
shutil.rmtree(item)
else:
@@ -668,23 +630,9 @@ class Command(CryptMixin, PaperlessCommand):
if self.use_folder_prefix:
manifest_name = Path("json") / manifest_name
manifest_name = (self.target / manifest_name).resolve()
if not self.zip_export:
manifest_name.parent.mkdir(parents=True, exist_ok=True)
manifest_name.parent.mkdir(parents=True, exist_ok=True)
self.check_and_write_json(content, manifest_name)
def _ensure_zip_dirs(self, arcname: str) -> None:
"""Write directory marker entries for all parent directories of arcname.
Some zip viewers only show folder structure when explicit directory
entries exist, so we add them to avoid confusing users.
"""
parts = Path(arcname).parts[:-1]
for i in range(len(parts)):
dir_arc = "/".join(parts[: i + 1]) + "/"
if dir_arc not in self._zip_dirs:
self._zip_dirs.add(dir_arc)
self.zip_file.mkdir(dir_arc)
def check_and_write_json(
self,
content: list[dict] | dict,
@@ -697,20 +645,6 @@ class Command(CryptMixin, PaperlessCommand):
This preserves the file timestamps when no changes are made.
"""
if self.zip_export:
arcname = str(target.resolve().relative_to(self.target))
self._ensure_zip_dirs(arcname)
self.zip_file.writestr(
arcname,
json.dumps(
content,
cls=DjangoJSONEncoder,
indent=2,
ensure_ascii=False,
),
)
return
target = target.resolve()
perform_write = True
if target in self.files_in_export_dir:
@@ -749,12 +683,6 @@ class Command(CryptMixin, PaperlessCommand):
the source attributes
"""
if self.zip_export:
arcname = str(target.resolve().relative_to(self.target))
self._ensure_zip_dirs(arcname)
self.zip_file.write(source, arcname=arcname)
return
target = target.resolve()
if target in self.files_in_export_dir:
self.files_in_export_dir.remove(target)

View File

@@ -8,6 +8,7 @@ from pathlib import Path
from zipfile import ZipFile
from zipfile import is_zipfile
import ijson
from django.conf import settings
from django.contrib.auth.models import Permission
from django.contrib.auth.models import User
@@ -46,6 +47,15 @@ if settings.AUDIT_LOG_ENABLED:
from auditlog.registry import auditlog
def iter_manifest_records(path: Path) -> Generator[dict, None, None]:
"""Yield records one at a time from a manifest JSON array via ijson."""
try:
with path.open("rb") as f:
yield from ijson.items(f, "item")
except ijson.JSONError as e:
raise CommandError(f"Failed to parse manifest file {path}: {e}") from e
@contextmanager
def disable_signal(sig, receiver, sender, *, weak: bool | None = None) -> Generator:
try:
@@ -143,14 +153,9 @@ class Command(CryptMixin, PaperlessCommand):
Loads manifest data from the various JSON files for parsing and loading the database
"""
main_manifest_path: Path = self.source / "manifest.json"
with main_manifest_path.open() as infile:
self.manifest = json.load(infile)
self.manifest_paths.append(main_manifest_path)
for file in Path(self.source).glob("**/*-manifest.json"):
with file.open() as infile:
self.manifest += json.load(infile)
self.manifest_paths.append(file)
def load_metadata(self) -> None:
@@ -231,7 +236,6 @@ class Command(CryptMixin, PaperlessCommand):
self.version: str | None = None
self.salt: str | None = None
self.manifest_paths = []
self.manifest = []
# Create a temporary directory for extracting a zip file into it, even if supplied source is no zip file to keep code cleaner.
with tempfile.TemporaryDirectory() as tmp_dir:
@@ -291,6 +295,9 @@ class Command(CryptMixin, PaperlessCommand):
else:
self.stdout.write(self.style.NOTICE("Data only import completed"))
for tmp in getattr(self, "_decrypted_tmp_paths", []):
tmp.unlink(missing_ok=True)
self.stdout.write("Updating search index...")
call_command(
"document_index",
@@ -343,11 +350,12 @@ class Command(CryptMixin, PaperlessCommand):
) from e
self.stdout.write("Checking the manifest")
for record in self.manifest:
# Only check if the document files exist if this is not data only
# We don't care about documents for a data only import
if not self.data_only and record["model"] == "documents.document":
check_document_validity(record)
for manifest_path in self.manifest_paths:
for record in iter_manifest_records(manifest_path):
# Only check if the document files exist if this is not data only
# We don't care about documents for a data only import
if not self.data_only and record["model"] == "documents.document":
check_document_validity(record)
def _import_files_from_manifest(self) -> None:
settings.ORIGINALS_DIR.mkdir(parents=True, exist_ok=True)
@@ -356,23 +364,31 @@ class Command(CryptMixin, PaperlessCommand):
self.stdout.write("Copy files into paperless...")
manifest_documents = list(
filter(lambda r: r["model"] == "documents.document", self.manifest),
)
document_records = [
{
"pk": record["pk"],
EXPORTER_FILE_NAME: record[EXPORTER_FILE_NAME],
EXPORTER_THUMBNAIL_NAME: record.get(EXPORTER_THUMBNAIL_NAME),
EXPORTER_ARCHIVE_NAME: record.get(EXPORTER_ARCHIVE_NAME),
}
for manifest_path in self.manifest_paths
for record in iter_manifest_records(manifest_path)
if record["model"] == "documents.document"
]
for record in self.track(manifest_documents, description="Copying files..."):
for record in self.track(document_records, description="Copying files..."):
document = Document.objects.get(pk=record["pk"])
doc_file = record[EXPORTER_FILE_NAME]
document_path = self.source / doc_file
if EXPORTER_THUMBNAIL_NAME in record:
if record[EXPORTER_THUMBNAIL_NAME]:
thumb_file = record[EXPORTER_THUMBNAIL_NAME]
thumbnail_path = (self.source / thumb_file).resolve()
else:
thumbnail_path = None
if EXPORTER_ARCHIVE_NAME in record:
if record[EXPORTER_ARCHIVE_NAME]:
archive_file = record[EXPORTER_ARCHIVE_NAME]
archive_path = self.source / archive_file
else:
@@ -413,33 +429,43 @@ class Command(CryptMixin, PaperlessCommand):
document.save()
def _decrypt_record_if_needed(self, record: dict) -> dict:
fields = self.CRYPT_FIELDS_BY_MODEL.get(record.get("model", ""))
if fields:
for field in fields:
if record["fields"].get(field):
record["fields"][field] = self.decrypt_string(
value=record["fields"][field],
)
return record
def decrypt_secret_fields(self) -> None:
"""
The converse decryption of some fields out of the export before importing to database
The converse decryption of some fields out of the export before importing to database.
Streams records from each manifest path and writes decrypted content to a temp file.
"""
if self.passphrase:
# Salt has been loaded from metadata.json at this point, so it cannot be None
self.setup_crypto(passphrase=self.passphrase, salt=self.salt)
had_at_least_one_record = False
for crypt_config in self.CRYPT_FIELDS:
importer_model: str = crypt_config["model_name"]
crypt_fields: str = crypt_config["fields"]
for record in filter(
lambda x: x["model"] == importer_model,
self.manifest,
):
had_at_least_one_record = True
for field in crypt_fields:
if record["fields"][field]:
record["fields"][field] = self.decrypt_string(
value=record["fields"][field],
)
if had_at_least_one_record:
# It's annoying, but the DB is loaded from the JSON directly
# Maybe could change that in the future?
(self.source / "manifest.json").write_text(
json.dumps(self.manifest, indent=2, ensure_ascii=False),
)
if not self.passphrase:
return
# Salt has been loaded from metadata.json at this point, so it cannot be None
self.setup_crypto(passphrase=self.passphrase, salt=self.salt)
self._decrypted_tmp_paths: list[Path] = []
new_paths: list[Path] = []
for manifest_path in self.manifest_paths:
tmp = manifest_path.with_name(manifest_path.stem + ".decrypted.json")
with tmp.open("w", encoding="utf-8") as out:
out.write("[\n")
first = True
for record in iter_manifest_records(manifest_path):
if not first:
out.write(",\n")
json.dump(
self._decrypt_record_if_needed(record),
out,
indent=2,
ensure_ascii=False,
)
first = False
out.write("\n]\n")
self._decrypted_tmp_paths.append(tmp)
new_paths.append(tmp)
self.manifest_paths = new_paths

View File

@@ -689,6 +689,7 @@ def document_matches_workflow(
trigger_type == WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED
or trigger_type == WorkflowTrigger.WorkflowTriggerType.DOCUMENT_UPDATED
or trigger_type == WorkflowTrigger.WorkflowTriggerType.SCHEDULED
or trigger_type == WorkflowTrigger.WorkflowTriggerType.VERSION_ADDED
):
trigger_matched, reason = existing_document_matches_workflow(
document,

View File

@@ -0,0 +1,28 @@
# Generated by Django 5.2.7 on 2026-03-02 00:00
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "0016_document_version_index_and_more"),
]
operations = [
migrations.AlterField(
model_name="workflowtrigger",
name="type",
field=models.PositiveSmallIntegerField(
choices=[
(1, "Consumption Started"),
(2, "Document Added"),
(3, "Document Updated"),
(4, "Scheduled"),
(5, "Version Added"),
],
default=1,
verbose_name="Workflow Trigger Type",
),
),
]

View File

@@ -1132,6 +1132,7 @@ class WorkflowTrigger(models.Model):
DOCUMENT_ADDED = 2, _("Document Added")
DOCUMENT_UPDATED = 3, _("Document Updated")
SCHEDULED = 4, _("Scheduled")
VERSION_ADDED = 5, _("Version Added")
class DocumentSourceChoices(models.IntegerChoices):
CONSUME_FOLDER = DocumentSource.ConsumeFolder.value, _("Consume Folder")

View File

@@ -2,5 +2,6 @@ from django.dispatch import Signal
document_consumption_started = Signal()
document_consumption_finished = Signal()
document_version_added = Signal()
document_consumer_declaration = Signal()
document_updated = Signal()

View File

@@ -783,6 +783,19 @@ def run_workflows_added(
)
def run_workflows_version_added(
sender,
document: Document,
logging_group: uuid.UUID | None = None,
**kwargs,
) -> None:
run_workflows(
trigger_type=WorkflowTrigger.WorkflowTriggerType.VERSION_ADDED,
document=document.root_document,
logging_group=logging_group,
)
def run_workflows_updated(
sender,
document: Document,

View File

@@ -715,9 +715,16 @@ class TestConsumer(
self._assert_first_last_send_progress()
@override_settings(AUDIT_LOG_ENABLED=True)
@mock.patch("documents.consumer.document_updated.send")
@mock.patch("documents.consumer.document_version_added.send")
@mock.patch("documents.consumer.load_classifier")
def test_consume_version_creates_new_version(self, m) -> None:
m.return_value = MagicMock()
def test_consume_version_creates_new_version(
self,
mock_load_classifier: mock.Mock,
mock_document_version_added_send: mock.Mock,
mock_document_updated_send: mock.Mock,
) -> None:
mock_load_classifier.return_value = MagicMock()
with self.get_consumer(self.get_test_file()) as consumer:
consumer.run()
@@ -785,6 +792,16 @@ class TestConsumer(
self.assertIsNone(version.archive_serial_number)
self.assertEqual(version.original_filename, version_file.name)
self.assertTrue(bool(version.content))
mock_document_version_added_send.assert_called_once()
self.assertEqual(
mock_document_version_added_send.call_args.kwargs["document"].id,
version.id,
)
mock_document_updated_send.assert_called_once()
self.assertEqual(
mock_document_updated_send.call_args.kwargs["document"].id,
root_doc.id,
)
@override_settings(AUDIT_LOG_ENABLED=True)
@mock.patch("documents.consumer.load_classifier")

View File

@@ -505,7 +505,7 @@ class TestExportImport(
self.assertIsFile(expected_file)
with ZipFile(expected_file) as zip:
# 11 files + 3 directory marker entries for the subdirectory structure
# Extras are from the directories, which also appear in the listing
self.assertEqual(len(zip.namelist()), 14)
self.assertIn("manifest.json", zip.namelist())
self.assertIn("metadata.json", zip.namelist())
@@ -557,59 +557,6 @@ class TestExportImport(
self.assertIn("manifest.json", zip.namelist())
self.assertIn("metadata.json", zip.namelist())
def test_export_zip_atomic_on_failure(self) -> None:
"""
GIVEN:
- Request to export documents to zipfile
WHEN:
- Export raises an exception mid-way
THEN:
- No .zip file is written at the final path
- The .tmp file is cleaned up
"""
args = ["document_exporter", self.target, "--zip"]
with mock.patch.object(
document_exporter.Command,
"dump",
side_effect=RuntimeError("simulated failure"),
):
with self.assertRaises(RuntimeError):
call_command(*args)
expected_zip = self.target / f"export-{timezone.localdate().isoformat()}.zip"
expected_tmp = (
self.target / f"export-{timezone.localdate().isoformat()}.zip.tmp"
)
self.assertIsNotFile(expected_zip)
self.assertIsNotFile(expected_tmp)
def test_export_zip_no_scratch_dir(self) -> None:
"""
GIVEN:
- Request to export documents to zipfile
WHEN:
- Documents are exported
THEN:
- No files are written under SCRATCH_DIR during the export
(the old workaround used a temp dir there)
"""
from django.conf import settings
shutil.rmtree(Path(self.dirs.media_dir) / "documents")
shutil.copytree(
Path(__file__).parent / "samples" / "documents",
Path(self.dirs.media_dir) / "documents",
)
scratch_before = set(settings.SCRATCH_DIR.glob("paperless-export*"))
args = ["document_exporter", self.target, "--zip"]
call_command(*args)
scratch_after = set(settings.SCRATCH_DIR.glob("paperless-export*"))
self.assertEqual(scratch_before, scratch_after)
def test_export_target_not_exists(self) -> None:
"""
GIVEN:

View File

@@ -119,15 +119,22 @@ class TestCommandImport(
# No read permissions
original_path.chmod(0o222)
manifest_path = Path(temp_dir) / "manifest.json"
manifest_path.write_text(
json.dumps(
[
{
"model": "documents.document",
EXPORTER_FILE_NAME: "original.pdf",
EXPORTER_ARCHIVE_NAME: "archive.pdf",
},
],
),
)
cmd = Command()
cmd.source = Path(temp_dir)
cmd.manifest = [
{
"model": "documents.document",
EXPORTER_FILE_NAME: "original.pdf",
EXPORTER_ARCHIVE_NAME: "archive.pdf",
},
]
cmd.manifest_paths = [manifest_path]
cmd.data_only = False
with self.assertRaises(CommandError) as cm:
cmd.check_manifest_validity()
@@ -296,7 +303,7 @@ class TestCommandImport(
(self.dirs.scratch_dir / "manifest.json").touch()
# We're not building a manifest, so it fails, but this test doesn't care
with self.assertRaises(json.decoder.JSONDecodeError):
with self.assertRaises(CommandError):
call_command(
"document_importer",
"--no-progress-bar",
@@ -325,7 +332,7 @@ class TestCommandImport(
)
# We're not building a manifest, so it fails, but this test doesn't care
with self.assertRaises(json.decoder.JSONDecodeError):
with self.assertRaises(CommandError):
call_command(
"document_importer",
"--no-progress-bar",

View File

@@ -60,6 +60,7 @@ from documents.models import WorkflowTrigger
from documents.plugins.base import StopConsumeTaskError
from documents.serialisers import WorkflowTriggerSerializer
from documents.signals import document_consumption_finished
from documents.signals import document_version_added
from documents.tests.utils import DirectoriesMixin
from documents.tests.utils import DummyProgressManager
from documents.tests.utils import FileSystemAssertsMixin
@@ -1786,6 +1787,53 @@ class TestWorkflows(
).exists(),
)
def test_version_added_workflow_runs_on_root_document(self) -> None:
trigger = WorkflowTrigger.objects.create(
type=WorkflowTrigger.WorkflowTriggerType.VERSION_ADDED,
)
action = WorkflowAction.objects.create(
assign_title="Updated by version",
assign_owner=self.user2,
)
workflow = Workflow.objects.create(
name="Version workflow",
order=0,
)
workflow.triggers.add(trigger)
workflow.actions.add(action)
root_doc = Document.objects.create(
title="root",
correspondent=self.c,
original_filename="root.pdf",
)
version_doc = Document.objects.create(
title="version",
correspondent=self.c,
original_filename="version.pdf",
root_document=root_doc,
)
document_version_added.send(
sender=self.__class__,
document=version_doc,
)
root_doc.refresh_from_db()
version_doc.refresh_from_db()
self.assertEqual(root_doc.title, "Updated by version")
self.assertEqual(root_doc.owner, self.user2)
self.assertIsNone(version_doc.owner)
self.assertEqual(
WorkflowRun.objects.filter(
workflow=workflow,
type=WorkflowTrigger.WorkflowTriggerType.VERSION_ADDED,
document=root_doc,
).count(),
1,
)
def test_document_updated_workflow(self) -> None:
trigger = WorkflowTrigger.objects.create(
type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_UPDATED,

View File

@@ -2,7 +2,7 @@ msgid ""
msgstr ""
"Project-Id-Version: paperless-ngx\n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2026-03-09 01:51+0000\n"
"POT-Creation-Date: 2026-03-09 17:44+0000\n"
"PO-Revision-Date: 2022-02-17 04:17\n"
"Last-Translator: \n"
"Language-Team: English\n"
@@ -1856,151 +1856,151 @@ msgstr ""
msgid "paperless application settings"
msgstr ""
#: paperless/settings/__init__.py:752
#: paperless/settings/__init__.py:521
msgid "English (US)"
msgstr ""
#: paperless/settings/__init__.py:753
#: paperless/settings/__init__.py:522
msgid "Arabic"
msgstr ""
#: paperless/settings/__init__.py:754
#: paperless/settings/__init__.py:523
msgid "Afrikaans"
msgstr ""
#: paperless/settings/__init__.py:755
#: paperless/settings/__init__.py:524
msgid "Belarusian"
msgstr ""
#: paperless/settings/__init__.py:756
#: paperless/settings/__init__.py:525
msgid "Bulgarian"
msgstr ""
#: paperless/settings/__init__.py:757
#: paperless/settings/__init__.py:526
msgid "Catalan"
msgstr ""
#: paperless/settings/__init__.py:758
#: paperless/settings/__init__.py:527
msgid "Czech"
msgstr ""
#: paperless/settings/__init__.py:759
#: paperless/settings/__init__.py:528
msgid "Danish"
msgstr ""
#: paperless/settings/__init__.py:760
#: paperless/settings/__init__.py:529
msgid "German"
msgstr ""
#: paperless/settings/__init__.py:761
#: paperless/settings/__init__.py:530
msgid "Greek"
msgstr ""
#: paperless/settings/__init__.py:762
#: paperless/settings/__init__.py:531
msgid "English (GB)"
msgstr ""
#: paperless/settings/__init__.py:763
#: paperless/settings/__init__.py:532
msgid "Spanish"
msgstr ""
#: paperless/settings/__init__.py:764
#: paperless/settings/__init__.py:533
msgid "Persian"
msgstr ""
#: paperless/settings/__init__.py:765
#: paperless/settings/__init__.py:534
msgid "Finnish"
msgstr ""
#: paperless/settings/__init__.py:766
#: paperless/settings/__init__.py:535
msgid "French"
msgstr ""
#: paperless/settings/__init__.py:767
#: paperless/settings/__init__.py:536
msgid "Hungarian"
msgstr ""
#: paperless/settings/__init__.py:768
#: paperless/settings/__init__.py:537
msgid "Indonesian"
msgstr ""
#: paperless/settings/__init__.py:769
#: paperless/settings/__init__.py:538
msgid "Italian"
msgstr ""
#: paperless/settings/__init__.py:770
#: paperless/settings/__init__.py:539
msgid "Japanese"
msgstr ""
#: paperless/settings/__init__.py:771
#: paperless/settings/__init__.py:540
msgid "Korean"
msgstr ""
#: paperless/settings/__init__.py:772
#: paperless/settings/__init__.py:541
msgid "Luxembourgish"
msgstr ""
#: paperless/settings/__init__.py:773
#: paperless/settings/__init__.py:542
msgid "Norwegian"
msgstr ""
#: paperless/settings/__init__.py:774
#: paperless/settings/__init__.py:543
msgid "Dutch"
msgstr ""
#: paperless/settings/__init__.py:775
#: paperless/settings/__init__.py:544
msgid "Polish"
msgstr ""
#: paperless/settings/__init__.py:776
#: paperless/settings/__init__.py:545
msgid "Portuguese (Brazil)"
msgstr ""
#: paperless/settings/__init__.py:777
#: paperless/settings/__init__.py:546
msgid "Portuguese"
msgstr ""
#: paperless/settings/__init__.py:778
#: paperless/settings/__init__.py:547
msgid "Romanian"
msgstr ""
#: paperless/settings/__init__.py:779
#: paperless/settings/__init__.py:548
msgid "Russian"
msgstr ""
#: paperless/settings/__init__.py:780
#: paperless/settings/__init__.py:549
msgid "Slovak"
msgstr ""
#: paperless/settings/__init__.py:781
#: paperless/settings/__init__.py:550
msgid "Slovenian"
msgstr ""
#: paperless/settings/__init__.py:782
#: paperless/settings/__init__.py:551
msgid "Serbian"
msgstr ""
#: paperless/settings/__init__.py:783
#: paperless/settings/__init__.py:552
msgid "Swedish"
msgstr ""
#: paperless/settings/__init__.py:784
#: paperless/settings/__init__.py:553
msgid "Turkish"
msgstr ""
#: paperless/settings/__init__.py:785
#: paperless/settings/__init__.py:554
msgid "Ukrainian"
msgstr ""
#: paperless/settings/__init__.py:786
#: paperless/settings/__init__.py:555
msgid "Vietnamese"
msgstr ""
#: paperless/settings/__init__.py:787
#: paperless/settings/__init__.py:556
msgid "Chinese Simplified"
msgstr ""
#: paperless/settings/__init__.py:788
#: paperless/settings/__init__.py:557
msgid "Chinese Traditional"
msgstr ""

View File

@@ -6,18 +6,25 @@ import math
import multiprocessing
import os
import tempfile
from os import PathLike
from pathlib import Path
from typing import Final
from urllib.parse import urlparse
from celery.schedules import crontab
from compression_middleware.middleware import CompressionMiddleware
from dateparser.languages.loader import LocaleDataLoader
from django.utils.translation import gettext_lazy as _
from dotenv import load_dotenv
from paperless.settings.custom import parse_beat_schedule
from paperless.settings.custom import parse_dateparser_languages
from paperless.settings.custom import parse_db_settings
from paperless.settings.custom import parse_hosting_settings
from paperless.settings.custom import parse_ignore_dates
from paperless.settings.custom import parse_redis_url
from paperless.settings.parsers import get_bool_from_env
from paperless.settings.parsers import get_float_from_env
from paperless.settings.parsers import get_int_from_env
from paperless.settings.parsers import get_list_from_env
from paperless.settings.parsers import get_path_from_env
logger = logging.getLogger("paperless.settings")
@@ -45,239 +52,8 @@ for path in [
os.environ["OMP_THREAD_LIMIT"] = "1"
def __get_boolean(key: str, default: str = "NO") -> bool:
"""
Return a boolean value based on whatever the user has supplied in the
environment based on whether the value "looks like" it's True or not.
"""
return bool(os.getenv(key, default).lower() in ("yes", "y", "1", "t", "true"))
def __get_int(key: str, default: int) -> int:
"""
Return an integer value based on the environment variable or a default
"""
return int(os.getenv(key, default))
def __get_optional_int(key: str) -> int | None:
"""
Returns None if the environment key is not present, otherwise an integer
"""
if key in os.environ:
return __get_int(key, -1) # pragma: no cover
return None
def __get_float(key: str, default: float) -> float:
"""
Return an integer value based on the environment variable or a default
"""
return float(os.getenv(key, default))
def __get_path(
key: str,
default: PathLike | str,
) -> Path:
"""
Return a normalized, absolute path based on the environment variable or a default,
if provided
"""
if key in os.environ:
return Path(os.environ[key]).resolve()
return Path(default).resolve()
def __get_optional_path(key: str) -> Path | None:
"""
Returns None if the environment key is not present, otherwise a fully resolved Path
"""
if key in os.environ:
return __get_path(key, "")
return None
def __get_list(
key: str,
default: list[str] | None = None,
sep: str = ",",
) -> list[str]:
"""
Return a list of elements from the environment, as separated by the given
string, or the default if the key does not exist
"""
if key in os.environ:
return list(filter(None, os.environ[key].split(sep)))
elif default is not None:
return default
else:
return []
def _parse_redis_url(env_redis: str | None) -> tuple[str, str]:
"""
Gets the Redis information from the environment or a default and handles
converting from incompatible django_channels and celery formats.
Returns a tuple of (celery_url, channels_url)
"""
# Not set, return a compatible default
if env_redis is None:
return ("redis://localhost:6379", "redis://localhost:6379")
if "unix" in env_redis.lower():
# channels_redis socket format, looks like:
# "unix:///path/to/redis.sock"
_, path = env_redis.split(":", 1)
# Optionally setting a db number
if "?db=" in env_redis:
path, number = path.split("?db=")
return (f"redis+socket:{path}?virtual_host={number}", env_redis)
else:
return (f"redis+socket:{path}", env_redis)
elif "+socket" in env_redis.lower():
# celery socket style, looks like:
# "redis+socket:///path/to/redis.sock"
_, path = env_redis.split(":", 1)
if "?virtual_host=" in env_redis:
# Virtual host (aka db number)
path, number = path.split("?virtual_host=")
return (env_redis, f"unix:{path}?db={number}")
else:
return (env_redis, f"unix:{path}")
# Not a socket
return (env_redis, env_redis)
def _parse_beat_schedule() -> dict:
"""
Configures the scheduled tasks, according to default or
environment variables. Task expiration is configured so the task will
expire (and not run), shortly before the default frequency will put another
of the same task into the queue
https://docs.celeryq.dev/en/stable/userguide/periodic-tasks.html#beat-entries
https://docs.celeryq.dev/en/latest/userguide/calling.html#expiration
"""
schedule = {}
tasks = [
{
"name": "Check all e-mail accounts",
"env_key": "PAPERLESS_EMAIL_TASK_CRON",
# Default every ten minutes
"env_default": "*/10 * * * *",
"task": "paperless_mail.tasks.process_mail_accounts",
"options": {
# 1 minute before default schedule sends again
"expires": 9.0 * 60.0,
},
},
{
"name": "Train the classifier",
"env_key": "PAPERLESS_TRAIN_TASK_CRON",
# Default hourly at 5 minutes past the hour
"env_default": "5 */1 * * *",
"task": "documents.tasks.train_classifier",
"options": {
# 1 minute before default schedule sends again
"expires": 59.0 * 60.0,
},
},
{
"name": "Optimize the index",
"env_key": "PAPERLESS_INDEX_TASK_CRON",
# Default daily at midnight
"env_default": "0 0 * * *",
"task": "documents.tasks.index_optimize",
"options": {
# 1 hour before default schedule sends again
"expires": 23.0 * 60.0 * 60.0,
},
},
{
"name": "Perform sanity check",
"env_key": "PAPERLESS_SANITY_TASK_CRON",
# Default Sunday at 00:30
"env_default": "30 0 * * sun",
"task": "documents.tasks.sanity_check",
"options": {
# 1 hour before default schedule sends again
"expires": ((7.0 * 24.0) - 1.0) * 60.0 * 60.0,
},
},
{
"name": "Empty trash",
"env_key": "PAPERLESS_EMPTY_TRASH_TASK_CRON",
# Default daily at 01:00
"env_default": "0 1 * * *",
"task": "documents.tasks.empty_trash",
"options": {
# 1 hour before default schedule sends again
"expires": 23.0 * 60.0 * 60.0,
},
},
{
"name": "Check and run scheduled workflows",
"env_key": "PAPERLESS_WORKFLOW_SCHEDULED_TASK_CRON",
# Default hourly at 5 minutes past the hour
"env_default": "5 */1 * * *",
"task": "documents.tasks.check_scheduled_workflows",
"options": {
# 1 minute before default schedule sends again
"expires": 59.0 * 60.0,
},
},
{
"name": "Rebuild LLM index",
"env_key": "PAPERLESS_LLM_INDEX_TASK_CRON",
# Default daily at 02:10
"env_default": "10 2 * * *",
"task": "documents.tasks.llmindex_index",
"options": {
# 1 hour before default schedule sends again
"expires": 23.0 * 60.0 * 60.0,
},
},
{
"name": "Cleanup expired share link bundles",
"env_key": "PAPERLESS_SHARE_LINK_BUNDLE_CLEANUP_CRON",
# Default daily at 02:00
"env_default": "0 2 * * *",
"task": "documents.tasks.cleanup_expired_share_link_bundles",
"options": {
# 1 hour before default schedule sends again
"expires": 23.0 * 60.0 * 60.0,
},
},
]
for task in tasks:
# Either get the environment setting or use the default
value = os.getenv(task["env_key"], task["env_default"])
# Don't add disabled tasks to the schedule
if value == "disable":
continue
# I find https://crontab.guru/ super helpful
# crontab(5) format
# - five time-and-date fields
# - separated by at least one blank
minute, hour, day_month, month, day_week = value.split(" ")
schedule[task["name"]] = {
"task": task["task"],
"schedule": crontab(minute, hour, day_week, day_month, month),
"options": task["options"],
}
return schedule
# NEVER RUN WITH DEBUG IN PRODUCTION.
DEBUG = __get_boolean("PAPERLESS_DEBUG", "NO")
DEBUG = get_bool_from_env("PAPERLESS_DEBUG", "NO")
###############################################################################
@@ -286,21 +62,21 @@ DEBUG = __get_boolean("PAPERLESS_DEBUG", "NO")
BASE_DIR: Path = Path(__file__).resolve().parent.parent.parent
STATIC_ROOT = __get_path("PAPERLESS_STATICDIR", BASE_DIR.parent / "static")
STATIC_ROOT = get_path_from_env("PAPERLESS_STATICDIR", BASE_DIR.parent / "static")
MEDIA_ROOT = __get_path("PAPERLESS_MEDIA_ROOT", BASE_DIR.parent / "media")
MEDIA_ROOT = get_path_from_env("PAPERLESS_MEDIA_ROOT", BASE_DIR.parent / "media")
ORIGINALS_DIR = MEDIA_ROOT / "documents" / "originals"
ARCHIVE_DIR = MEDIA_ROOT / "documents" / "archive"
THUMBNAIL_DIR = MEDIA_ROOT / "documents" / "thumbnails"
SHARE_LINK_BUNDLE_DIR = MEDIA_ROOT / "documents" / "share_link_bundles"
DATA_DIR = __get_path("PAPERLESS_DATA_DIR", BASE_DIR.parent / "data")
DATA_DIR = get_path_from_env("PAPERLESS_DATA_DIR", BASE_DIR.parent / "data")
NLTK_DIR = __get_path("PAPERLESS_NLTK_DIR", "/usr/share/nltk_data")
NLTK_DIR = get_path_from_env("PAPERLESS_NLTK_DIR", "/usr/share/nltk_data")
# Check deprecated setting first
EMPTY_TRASH_DIR = (
__get_path("PAPERLESS_TRASH_DIR", os.getenv("PAPERLESS_EMPTY_TRASH_DIR"))
get_path_from_env("PAPERLESS_TRASH_DIR", os.getenv("PAPERLESS_EMPTY_TRASH_DIR"))
if os.getenv("PAPERLESS_TRASH_DIR") or os.getenv("PAPERLESS_EMPTY_TRASH_DIR")
else None
)
@@ -309,21 +85,21 @@ EMPTY_TRASH_DIR = (
# threads.
MEDIA_LOCK = MEDIA_ROOT / "media.lock"
INDEX_DIR = DATA_DIR / "index"
MODEL_FILE = __get_path(
MODEL_FILE = get_path_from_env(
"PAPERLESS_MODEL_FILE",
DATA_DIR / "classification_model.pickle",
)
LLM_INDEX_DIR = DATA_DIR / "llm_index"
LOGGING_DIR = __get_path("PAPERLESS_LOGGING_DIR", DATA_DIR / "log")
LOGGING_DIR = get_path_from_env("PAPERLESS_LOGGING_DIR", DATA_DIR / "log")
CONSUMPTION_DIR = __get_path(
CONSUMPTION_DIR = get_path_from_env(
"PAPERLESS_CONSUMPTION_DIR",
BASE_DIR.parent / "consume",
)
# This will be created if it doesn't exist
SCRATCH_DIR = __get_path(
SCRATCH_DIR = get_path_from_env(
"PAPERLESS_SCRATCH_DIR",
Path(tempfile.gettempdir()) / "paperless",
)
@@ -332,7 +108,7 @@ SCRATCH_DIR = __get_path(
# Application Definition #
###############################################################################
env_apps = __get_list("PAPERLESS_APPS")
env_apps = get_list_from_env("PAPERLESS_APPS")
INSTALLED_APPS = [
"whitenoise.runserver_nostatic",
@@ -405,7 +181,7 @@ MIDDLEWARE = [
]
# Optional to enable compression
if __get_boolean("PAPERLESS_ENABLE_COMPRESSION", "yes"): # pragma: no cover
if get_bool_from_env("PAPERLESS_ENABLE_COMPRESSION", "yes"): # pragma: no cover
MIDDLEWARE.insert(0, "compression_middleware.middleware.CompressionMiddleware")
# Workaround to not compress streaming responses (e.g. chat).
@@ -424,20 +200,8 @@ CompressionMiddleware.process_response = patched_process_response
ROOT_URLCONF = "paperless.urls"
def _parse_base_paths() -> tuple[str, str, str, str, str]:
script_name = os.getenv("PAPERLESS_FORCE_SCRIPT_NAME")
base_url = (script_name or "") + "/"
login_url = base_url + "accounts/login/"
login_redirect_url = base_url + "dashboard"
logout_redirect_url = os.getenv(
"PAPERLESS_LOGOUT_REDIRECT_URL",
login_url + "?loggedout=1",
)
return script_name, base_url, login_url, login_redirect_url, logout_redirect_url
FORCE_SCRIPT_NAME, BASE_URL, LOGIN_URL, LOGIN_REDIRECT_URL, LOGOUT_REDIRECT_URL = (
_parse_base_paths()
parse_hosting_settings()
)
# DRF Spectacular settings
@@ -471,7 +235,7 @@ STORAGES = {
"default": {"BACKEND": "django.core.files.storage.FileSystemStorage"},
}
_CELERY_REDIS_URL, _CHANNELS_REDIS_URL = _parse_redis_url(
_CELERY_REDIS_URL, _CHANNELS_REDIS_URL = parse_redis_url(
os.getenv("PAPERLESS_REDIS", None),
)
_REDIS_KEY_PREFIX = os.getenv("PAPERLESS_REDIS_PREFIX", "")
@@ -520,8 +284,8 @@ EMAIL_PORT: Final[int] = int(os.getenv("PAPERLESS_EMAIL_PORT", 25))
EMAIL_HOST_USER: Final[str] = os.getenv("PAPERLESS_EMAIL_HOST_USER", "")
EMAIL_HOST_PASSWORD: Final[str] = os.getenv("PAPERLESS_EMAIL_HOST_PASSWORD", "")
DEFAULT_FROM_EMAIL: Final[str] = os.getenv("PAPERLESS_EMAIL_FROM", EMAIL_HOST_USER)
EMAIL_USE_TLS: Final[bool] = __get_boolean("PAPERLESS_EMAIL_USE_TLS")
EMAIL_USE_SSL: Final[bool] = __get_boolean("PAPERLESS_EMAIL_USE_SSL")
EMAIL_USE_TLS: Final[bool] = get_bool_from_env("PAPERLESS_EMAIL_USE_TLS")
EMAIL_USE_SSL: Final[bool] = get_bool_from_env("PAPERLESS_EMAIL_USE_SSL")
EMAIL_SUBJECT_PREFIX: Final[str] = "[Paperless-ngx] "
EMAIL_TIMEOUT = 30.0
EMAIL_ENABLED = EMAIL_HOST != "localhost" or EMAIL_HOST_USER != ""
@@ -546,20 +310,22 @@ ACCOUNT_DEFAULT_HTTP_PROTOCOL = os.getenv(
)
ACCOUNT_ADAPTER = "paperless.adapter.CustomAccountAdapter"
ACCOUNT_ALLOW_SIGNUPS = __get_boolean("PAPERLESS_ACCOUNT_ALLOW_SIGNUPS")
ACCOUNT_DEFAULT_GROUPS = __get_list("PAPERLESS_ACCOUNT_DEFAULT_GROUPS")
ACCOUNT_ALLOW_SIGNUPS = get_bool_from_env("PAPERLESS_ACCOUNT_ALLOW_SIGNUPS")
ACCOUNT_DEFAULT_GROUPS = get_list_from_env("PAPERLESS_ACCOUNT_DEFAULT_GROUPS")
SOCIALACCOUNT_ADAPTER = "paperless.adapter.CustomSocialAccountAdapter"
SOCIALACCOUNT_ALLOW_SIGNUPS = __get_boolean(
SOCIALACCOUNT_ALLOW_SIGNUPS = get_bool_from_env(
"PAPERLESS_SOCIALACCOUNT_ALLOW_SIGNUPS",
"yes",
)
SOCIALACCOUNT_AUTO_SIGNUP = __get_boolean("PAPERLESS_SOCIAL_AUTO_SIGNUP")
SOCIALACCOUNT_AUTO_SIGNUP = get_bool_from_env("PAPERLESS_SOCIAL_AUTO_SIGNUP")
SOCIALACCOUNT_PROVIDERS = json.loads(
os.getenv("PAPERLESS_SOCIALACCOUNT_PROVIDERS", "{}"),
)
SOCIAL_ACCOUNT_DEFAULT_GROUPS = __get_list("PAPERLESS_SOCIAL_ACCOUNT_DEFAULT_GROUPS")
SOCIAL_ACCOUNT_SYNC_GROUPS = __get_boolean("PAPERLESS_SOCIAL_ACCOUNT_SYNC_GROUPS")
SOCIAL_ACCOUNT_DEFAULT_GROUPS = get_list_from_env(
"PAPERLESS_SOCIAL_ACCOUNT_DEFAULT_GROUPS",
)
SOCIAL_ACCOUNT_SYNC_GROUPS = get_bool_from_env("PAPERLESS_SOCIAL_ACCOUNT_SYNC_GROUPS")
SOCIAL_ACCOUNT_SYNC_GROUPS_CLAIM: Final[str] = os.getenv(
"PAPERLESS_SOCIAL_ACCOUNT_SYNC_GROUPS_CLAIM",
"groups",
@@ -571,8 +337,8 @@ MFA_TOTP_ISSUER = "Paperless-ngx"
ACCOUNT_EMAIL_SUBJECT_PREFIX = "[Paperless-ngx] "
DISABLE_REGULAR_LOGIN = __get_boolean("PAPERLESS_DISABLE_REGULAR_LOGIN")
REDIRECT_LOGIN_TO_SSO = __get_boolean("PAPERLESS_REDIRECT_LOGIN_TO_SSO")
DISABLE_REGULAR_LOGIN = get_bool_from_env("PAPERLESS_DISABLE_REGULAR_LOGIN")
REDIRECT_LOGIN_TO_SSO = get_bool_from_env("PAPERLESS_REDIRECT_LOGIN_TO_SSO")
AUTO_LOGIN_USERNAME = os.getenv("PAPERLESS_AUTO_LOGIN_USERNAME")
@@ -585,12 +351,15 @@ ACCOUNT_EMAIL_VERIFICATION = (
)
)
ACCOUNT_EMAIL_UNKNOWN_ACCOUNTS = __get_boolean(
ACCOUNT_EMAIL_UNKNOWN_ACCOUNTS = get_bool_from_env(
"PAPERLESS_ACCOUNT_EMAIL_UNKNOWN_ACCOUNTS",
"True",
)
ACCOUNT_SESSION_REMEMBER = __get_boolean("PAPERLESS_ACCOUNT_SESSION_REMEMBER", "True")
ACCOUNT_SESSION_REMEMBER = get_bool_from_env(
"PAPERLESS_ACCOUNT_SESSION_REMEMBER",
"True",
)
SESSION_EXPIRE_AT_BROWSER_CLOSE = not ACCOUNT_SESSION_REMEMBER
SESSION_COOKIE_AGE = int(
os.getenv("PAPERLESS_SESSION_COOKIE_AGE", 60 * 60 * 24 * 7 * 3),
@@ -607,8 +376,8 @@ if AUTO_LOGIN_USERNAME:
def _parse_remote_user_settings() -> str:
global MIDDLEWARE, AUTHENTICATION_BACKENDS, REST_FRAMEWORK
enable = __get_boolean("PAPERLESS_ENABLE_HTTP_REMOTE_USER")
enable_api = __get_boolean("PAPERLESS_ENABLE_HTTP_REMOTE_USER_API")
enable = get_bool_from_env("PAPERLESS_ENABLE_HTTP_REMOTE_USER")
enable_api = get_bool_from_env("PAPERLESS_ENABLE_HTTP_REMOTE_USER_API")
if enable or enable_api:
MIDDLEWARE.append("paperless.auth.HttpRemoteUserMiddleware")
AUTHENTICATION_BACKENDS.insert(
@@ -636,16 +405,16 @@ HTTP_REMOTE_USER_HEADER_NAME = _parse_remote_user_settings()
X_FRAME_OPTIONS = "SAMEORIGIN"
# The next 3 settings can also be set using just PAPERLESS_URL
CSRF_TRUSTED_ORIGINS = __get_list("PAPERLESS_CSRF_TRUSTED_ORIGINS")
CSRF_TRUSTED_ORIGINS = get_list_from_env("PAPERLESS_CSRF_TRUSTED_ORIGINS")
if DEBUG:
# Allow access from the angular development server during debugging
CSRF_TRUSTED_ORIGINS.append("http://localhost:4200")
# We allow CORS from localhost:8000
CORS_ALLOWED_ORIGINS = __get_list(
CORS_ALLOWED_ORIGINS = get_list_from_env(
"PAPERLESS_CORS_ALLOWED_HOSTS",
["http://localhost:8000"],
default=["http://localhost:8000"],
)
if DEBUG:
@@ -658,7 +427,7 @@ CORS_EXPOSE_HEADERS = [
"Content-Disposition",
]
ALLOWED_HOSTS = __get_list("PAPERLESS_ALLOWED_HOSTS", ["*"])
ALLOWED_HOSTS = get_list_from_env("PAPERLESS_ALLOWED_HOSTS", default=["*"])
if ALLOWED_HOSTS != ["*"]:
# always allow localhost. Necessary e.g. for healthcheck in docker.
ALLOWED_HOSTS.append("localhost")
@@ -678,10 +447,10 @@ def _parse_paperless_url():
PAPERLESS_URL = _parse_paperless_url()
# For use with trusted proxies
TRUSTED_PROXIES = __get_list("PAPERLESS_TRUSTED_PROXIES")
TRUSTED_PROXIES = get_list_from_env("PAPERLESS_TRUSTED_PROXIES")
USE_X_FORWARDED_HOST = __get_boolean("PAPERLESS_USE_X_FORWARD_HOST", "false")
USE_X_FORWARDED_PORT = __get_boolean("PAPERLESS_USE_X_FORWARD_PORT", "false")
USE_X_FORWARDED_HOST = get_bool_from_env("PAPERLESS_USE_X_FORWARD_HOST", "false")
USE_X_FORWARDED_PORT = get_bool_from_env("PAPERLESS_USE_X_FORWARD_PORT", "false")
SECURE_PROXY_SSL_HEADER = (
tuple(json.loads(os.environ["PAPERLESS_PROXY_SSL_HEADER"]))
if "PAPERLESS_PROXY_SSL_HEADER" in os.environ
@@ -724,7 +493,7 @@ CSRF_COOKIE_NAME = f"{COOKIE_PREFIX}csrftoken"
SESSION_COOKIE_NAME = f"{COOKIE_PREFIX}sessionid"
LANGUAGE_COOKIE_NAME = f"{COOKIE_PREFIX}django_language"
EMAIL_CERTIFICATE_FILE = __get_optional_path("PAPERLESS_EMAIL_CERTIFICATE_LOCATION")
EMAIL_CERTIFICATE_FILE = get_path_from_env("PAPERLESS_EMAIL_CERTIFICATE_LOCATION")
###############################################################################
@@ -875,7 +644,7 @@ CELERY_BROKER_URL = _CELERY_REDIS_URL
CELERY_TIMEZONE = TIME_ZONE
CELERY_WORKER_HIJACK_ROOT_LOGGER = False
CELERY_WORKER_CONCURRENCY: Final[int] = __get_int("PAPERLESS_TASK_WORKERS", 1)
CELERY_WORKER_CONCURRENCY: Final[int] = get_int_from_env("PAPERLESS_TASK_WORKERS", 1)
TASK_WORKERS = CELERY_WORKER_CONCURRENCY
CELERY_WORKER_MAX_TASKS_PER_CHILD = 1
CELERY_WORKER_SEND_TASK_EVENTS = True
@@ -888,7 +657,7 @@ CELERY_BROKER_TRANSPORT_OPTIONS = {
}
CELERY_TASK_TRACK_STARTED = True
CELERY_TASK_TIME_LIMIT: Final[int] = __get_int("PAPERLESS_WORKER_TIMEOUT", 1800)
CELERY_TASK_TIME_LIMIT: Final[int] = get_int_from_env("PAPERLESS_WORKER_TIMEOUT", 1800)
CELERY_RESULT_EXTENDED = True
CELERY_RESULT_BACKEND = "django-db"
@@ -900,7 +669,7 @@ CELERY_TASK_SERIALIZER = "pickle"
CELERY_ACCEPT_CONTENT = ["application/json", "application/x-python-serialize"]
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#beat-schedule
CELERY_BEAT_SCHEDULE = _parse_beat_schedule()
CELERY_BEAT_SCHEDULE = parse_beat_schedule()
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#beat-schedule-filename
CELERY_BEAT_SCHEDULE_FILENAME = str(DATA_DIR / "celerybeat-schedule.db")
@@ -908,14 +677,14 @@ CELERY_BEAT_SCHEDULE_FILENAME = str(DATA_DIR / "celerybeat-schedule.db")
# Cachalot: Database read cache.
def _parse_cachalot_settings():
ttl = __get_int("PAPERLESS_READ_CACHE_TTL", 3600)
ttl = get_int_from_env("PAPERLESS_READ_CACHE_TTL", 3600)
ttl = min(ttl, 31536000) if ttl > 0 else 3600
_, redis_url = _parse_redis_url(
_, redis_url = parse_redis_url(
os.getenv("PAPERLESS_READ_CACHE_REDIS_URL", _CHANNELS_REDIS_URL),
)
result = {
"CACHALOT_CACHE": "read-cache",
"CACHALOT_ENABLED": __get_boolean(
"CACHALOT_ENABLED": get_bool_from_env(
"PAPERLESS_DB_READ_CACHE_ENABLED",
default="no",
),
@@ -1000,9 +769,9 @@ CONSUMER_POLLING_INTERVAL = float(os.getenv("PAPERLESS_CONSUMER_POLLING_INTERVAL
CONSUMER_STABILITY_DELAY = float(os.getenv("PAPERLESS_CONSUMER_STABILITY_DELAY", 5))
CONSUMER_DELETE_DUPLICATES = __get_boolean("PAPERLESS_CONSUMER_DELETE_DUPLICATES")
CONSUMER_DELETE_DUPLICATES = get_bool_from_env("PAPERLESS_CONSUMER_DELETE_DUPLICATES")
CONSUMER_RECURSIVE = __get_boolean("PAPERLESS_CONSUMER_RECURSIVE")
CONSUMER_RECURSIVE = get_bool_from_env("PAPERLESS_CONSUMER_RECURSIVE")
# Ignore regex patterns, matched against filename only
CONSUMER_IGNORE_PATTERNS = list(
@@ -1024,13 +793,13 @@ CONSUMER_IGNORE_DIRS = list(
),
)
CONSUMER_SUBDIRS_AS_TAGS = __get_boolean("PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS")
CONSUMER_SUBDIRS_AS_TAGS = get_bool_from_env("PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS")
CONSUMER_ENABLE_BARCODES: Final[bool] = __get_boolean(
CONSUMER_ENABLE_BARCODES: Final[bool] = get_bool_from_env(
"PAPERLESS_CONSUMER_ENABLE_BARCODES",
)
CONSUMER_BARCODE_TIFF_SUPPORT: Final[bool] = __get_boolean(
CONSUMER_BARCODE_TIFF_SUPPORT: Final[bool] = get_bool_from_env(
"PAPERLESS_CONSUMER_BARCODE_TIFF_SUPPORT",
)
@@ -1039,7 +808,7 @@ CONSUMER_BARCODE_STRING: Final[str] = os.getenv(
"PATCHT",
)
CONSUMER_ENABLE_ASN_BARCODE: Final[bool] = __get_boolean(
CONSUMER_ENABLE_ASN_BARCODE: Final[bool] = get_bool_from_env(
"PAPERLESS_CONSUMER_ENABLE_ASN_BARCODE",
)
@@ -1048,23 +817,26 @@ CONSUMER_ASN_BARCODE_PREFIX: Final[str] = os.getenv(
"ASN",
)
CONSUMER_BARCODE_UPSCALE: Final[float] = __get_float(
CONSUMER_BARCODE_UPSCALE: Final[float] = get_float_from_env(
"PAPERLESS_CONSUMER_BARCODE_UPSCALE",
0.0,
)
CONSUMER_BARCODE_DPI: Final[int] = __get_int("PAPERLESS_CONSUMER_BARCODE_DPI", 300)
CONSUMER_BARCODE_DPI: Final[int] = get_int_from_env(
"PAPERLESS_CONSUMER_BARCODE_DPI",
300,
)
CONSUMER_BARCODE_MAX_PAGES: Final[int] = __get_int(
CONSUMER_BARCODE_MAX_PAGES: Final[int] = get_int_from_env(
"PAPERLESS_CONSUMER_BARCODE_MAX_PAGES",
0,
)
CONSUMER_BARCODE_RETAIN_SPLIT_PAGES = __get_boolean(
CONSUMER_BARCODE_RETAIN_SPLIT_PAGES = get_bool_from_env(
"PAPERLESS_CONSUMER_BARCODE_RETAIN_SPLIT_PAGES",
)
CONSUMER_ENABLE_TAG_BARCODE: Final[bool] = __get_boolean(
CONSUMER_ENABLE_TAG_BARCODE: Final[bool] = get_bool_from_env(
"PAPERLESS_CONSUMER_ENABLE_TAG_BARCODE",
)
@@ -1077,11 +849,11 @@ CONSUMER_TAG_BARCODE_MAPPING = dict(
),
)
CONSUMER_TAG_BARCODE_SPLIT: Final[bool] = __get_boolean(
CONSUMER_TAG_BARCODE_SPLIT: Final[bool] = get_bool_from_env(
"PAPERLESS_CONSUMER_TAG_BARCODE_SPLIT",
)
CONSUMER_ENABLE_COLLATE_DOUBLE_SIDED: Final[bool] = __get_boolean(
CONSUMER_ENABLE_COLLATE_DOUBLE_SIDED: Final[bool] = get_bool_from_env(
"PAPERLESS_CONSUMER_ENABLE_COLLATE_DOUBLE_SIDED",
)
@@ -1090,13 +862,13 @@ CONSUMER_COLLATE_DOUBLE_SIDED_SUBDIR_NAME: Final[str] = os.getenv(
"double-sided",
)
CONSUMER_COLLATE_DOUBLE_SIDED_TIFF_SUPPORT: Final[bool] = __get_boolean(
CONSUMER_COLLATE_DOUBLE_SIDED_TIFF_SUPPORT: Final[bool] = get_bool_from_env(
"PAPERLESS_CONSUMER_COLLATE_DOUBLE_SIDED_TIFF_SUPPORT",
)
CONSUMER_PDF_RECOVERABLE_MIME_TYPES = ("application/octet-stream",)
OCR_PAGES = __get_optional_int("PAPERLESS_OCR_PAGES")
OCR_PAGES = get_int_from_env("PAPERLESS_OCR_PAGES")
# The default language that tesseract will attempt to use when parsing
# documents. It should be a 3-letter language code consistent with ISO 639.
@@ -1110,20 +882,20 @@ OCR_MODE = os.getenv("PAPERLESS_OCR_MODE", "skip")
OCR_SKIP_ARCHIVE_FILE = os.getenv("PAPERLESS_OCR_SKIP_ARCHIVE_FILE", "never")
OCR_IMAGE_DPI = __get_optional_int("PAPERLESS_OCR_IMAGE_DPI")
OCR_IMAGE_DPI = get_int_from_env("PAPERLESS_OCR_IMAGE_DPI")
OCR_CLEAN = os.getenv("PAPERLESS_OCR_CLEAN", "clean")
OCR_DESKEW: Final[bool] = __get_boolean("PAPERLESS_OCR_DESKEW", "true")
OCR_DESKEW: Final[bool] = get_bool_from_env("PAPERLESS_OCR_DESKEW", "true")
OCR_ROTATE_PAGES: Final[bool] = __get_boolean("PAPERLESS_OCR_ROTATE_PAGES", "true")
OCR_ROTATE_PAGES: Final[bool] = get_bool_from_env("PAPERLESS_OCR_ROTATE_PAGES", "true")
OCR_ROTATE_PAGES_THRESHOLD: Final[float] = __get_float(
OCR_ROTATE_PAGES_THRESHOLD: Final[float] = get_float_from_env(
"PAPERLESS_OCR_ROTATE_PAGES_THRESHOLD",
12.0,
)
OCR_MAX_IMAGE_PIXELS: Final[int | None] = __get_optional_int(
OCR_MAX_IMAGE_PIXELS: Final[int | None] = get_int_from_env(
"PAPERLESS_OCR_MAX_IMAGE_PIXELS",
)
@@ -1134,7 +906,7 @@ OCR_COLOR_CONVERSION_STRATEGY = os.getenv(
OCR_USER_ARGS = os.getenv("PAPERLESS_OCR_USER_ARGS")
MAX_IMAGE_PIXELS: Final[int | None] = __get_optional_int(
MAX_IMAGE_PIXELS: Final[int | None] = get_int_from_env(
"PAPERLESS_MAX_IMAGE_PIXELS",
)
@@ -1149,7 +921,7 @@ CONVERT_MEMORY_LIMIT = os.getenv("PAPERLESS_CONVERT_MEMORY_LIMIT")
GS_BINARY = os.getenv("PAPERLESS_GS_BINARY", "gs")
# Fallback layout for .eml consumption
EMAIL_PARSE_DEFAULT_LAYOUT = __get_int(
EMAIL_PARSE_DEFAULT_LAYOUT = get_int_from_env(
"PAPERLESS_EMAIL_PARSE_DEFAULT_LAYOUT",
1, # MailRule.PdfLayout.TEXT_HTML but that can't be imported here
)
@@ -1163,23 +935,9 @@ DATE_ORDER = os.getenv("PAPERLESS_DATE_ORDER", "DMY")
FILENAME_DATE_ORDER = os.getenv("PAPERLESS_FILENAME_DATE_ORDER")
def _parse_dateparser_languages(languages: str | None):
language_list = languages.split("+") if languages else []
# There is an unfixed issue in zh-Hant and zh-Hans locales in the dateparser lib.
# See: https://github.com/scrapinghub/dateparser/issues/875
for index, language in enumerate(language_list):
if language.startswith("zh-") and "zh" not in language_list:
logger.warning(
f'Chinese locale detected: {language}. dateparser might fail to parse some dates with this locale, so Chinese ("zh") will be used as a fallback.',
)
language_list.append("zh")
return list(LocaleDataLoader().get_locale_map(locales=language_list))
# If not set, we will infer it at runtime
DATE_PARSER_LANGUAGES = (
_parse_dateparser_languages(
parse_dateparser_languages(
os.getenv("PAPERLESS_DATE_PARSER_LANGUAGES"),
)
if os.getenv("PAPERLESS_DATE_PARSER_LANGUAGES")
@@ -1190,7 +948,7 @@ DATE_PARSER_LANGUAGES = (
# Maximum number of dates taken from document start to end to show as suggestions for
# `created` date in the frontend. Duplicates are removed, which can result in
# fewer dates shown.
NUMBER_OF_SUGGESTED_DATES = __get_int("PAPERLESS_NUMBER_OF_SUGGESTED_DATES", 3)
NUMBER_OF_SUGGESTED_DATES = get_int_from_env("PAPERLESS_NUMBER_OF_SUGGESTED_DATES", 3)
# Specify the filename format for out files
FILENAME_FORMAT = os.getenv("PAPERLESS_FILENAME_FORMAT")
@@ -1198,7 +956,7 @@ FILENAME_FORMAT = os.getenv("PAPERLESS_FILENAME_FORMAT")
# If this is enabled, variables in filename format will resolve to
# empty-string instead of 'none'.
# Directories with 'empty names' are omitted, too.
FILENAME_FORMAT_REMOVE_NONE = __get_boolean(
FILENAME_FORMAT_REMOVE_NONE = get_bool_from_env(
"PAPERLESS_FILENAME_FORMAT_REMOVE_NONE",
"NO",
)
@@ -1209,7 +967,7 @@ THUMBNAIL_FONT_NAME = os.getenv(
)
# Tika settings
TIKA_ENABLED = __get_boolean("PAPERLESS_TIKA_ENABLED", "NO")
TIKA_ENABLED = get_bool_from_env("PAPERLESS_TIKA_ENABLED", "NO")
TIKA_ENDPOINT = os.getenv("PAPERLESS_TIKA_ENDPOINT", "http://localhost:9998")
TIKA_GOTENBERG_ENDPOINT = os.getenv(
"PAPERLESS_TIKA_GOTENBERG_ENDPOINT",
@@ -1219,52 +977,21 @@ TIKA_GOTENBERG_ENDPOINT = os.getenv(
if TIKA_ENABLED:
INSTALLED_APPS.append("paperless_tika.apps.PaperlessTikaConfig")
AUDIT_LOG_ENABLED = __get_boolean("PAPERLESS_AUDIT_LOG_ENABLED", "true")
AUDIT_LOG_ENABLED = get_bool_from_env("PAPERLESS_AUDIT_LOG_ENABLED", "true")
if AUDIT_LOG_ENABLED:
INSTALLED_APPS.append("auditlog")
MIDDLEWARE.append("auditlog.middleware.AuditlogMiddleware")
def _parse_ignore_dates(
env_ignore: str,
date_order: str = DATE_ORDER,
) -> set[datetime.datetime]:
"""
If the PAPERLESS_IGNORE_DATES environment variable is set, parse the
user provided string(s) into dates
Args:
env_ignore (str): The value of the environment variable, comma separated dates
date_order (str, optional): The format of the date strings.
Defaults to DATE_ORDER.
Returns:
Set[datetime.datetime]: The set of parsed date objects
"""
import dateparser
ignored_dates = set()
for s in env_ignore.split(","):
d = dateparser.parse(
s,
settings={
"DATE_ORDER": date_order,
},
)
if d:
ignored_dates.add(d.date())
return ignored_dates
# List dates that should be ignored when trying to parse date from document text
IGNORE_DATES: set[datetime.date] = set()
if os.getenv("PAPERLESS_IGNORE_DATES") is not None:
IGNORE_DATES = _parse_ignore_dates(os.getenv("PAPERLESS_IGNORE_DATES"))
IGNORE_DATES = parse_ignore_dates(os.getenv("PAPERLESS_IGNORE_DATES"), DATE_ORDER)
ENABLE_UPDATE_CHECK = os.getenv("PAPERLESS_ENABLE_UPDATE_CHECK", "default")
if ENABLE_UPDATE_CHECK != "default":
ENABLE_UPDATE_CHECK = __get_boolean("PAPERLESS_ENABLE_UPDATE_CHECK")
ENABLE_UPDATE_CHECK = get_bool_from_env("PAPERLESS_ENABLE_UPDATE_CHECK")
APP_TITLE = os.getenv("PAPERLESS_APP_TITLE", None)
APP_LOGO = os.getenv("PAPERLESS_APP_LOGO", None)
@@ -1309,7 +1036,7 @@ def _get_nltk_language_setting(ocr_lang: str) -> str | None:
return iso_code_to_nltk.get(ocr_lang)
NLTK_ENABLED: Final[bool] = __get_boolean("PAPERLESS_ENABLE_NLTK", "yes")
NLTK_ENABLED: Final[bool] = get_bool_from_env("PAPERLESS_ENABLE_NLTK", "yes")
NLTK_LANGUAGE: str | None = _get_nltk_language_setting(OCR_LANGUAGE)
@@ -1318,7 +1045,7 @@ NLTK_LANGUAGE: str | None = _get_nltk_language_setting(OCR_LANGUAGE)
###############################################################################
EMAIL_GNUPG_HOME: Final[str | None] = os.getenv("PAPERLESS_EMAIL_GNUPG_HOME")
EMAIL_ENABLE_GPG_DECRYPTOR: Final[bool] = __get_boolean(
EMAIL_ENABLE_GPG_DECRYPTOR: Final[bool] = get_bool_from_env(
"PAPERLESS_ENABLE_GPG_DECRYPTOR",
)
@@ -1326,7 +1053,7 @@ EMAIL_ENABLE_GPG_DECRYPTOR: Final[bool] = __get_boolean(
###############################################################################
# Soft Delete #
###############################################################################
EMPTY_TRASH_DELAY = max(__get_int("PAPERLESS_EMPTY_TRASH_DELAY", 30), 1)
EMPTY_TRASH_DELAY = max(get_int_from_env("PAPERLESS_EMPTY_TRASH_DELAY", 30), 1)
###############################################################################
@@ -1351,21 +1078,17 @@ OUTLOOK_OAUTH_ENABLED = bool(
###############################################################################
# Webhooks
###############################################################################
WEBHOOKS_ALLOWED_SCHEMES = set(
WEBHOOKS_ALLOWED_SCHEMES = {
s.lower()
for s in __get_list(
for s in get_list_from_env(
"PAPERLESS_WEBHOOKS_ALLOWED_SCHEMES",
["http", "https"],
default=["http", "https"],
)
)
WEBHOOKS_ALLOWED_PORTS = set(
int(p)
for p in __get_list(
"PAPERLESS_WEBHOOKS_ALLOWED_PORTS",
[],
)
)
WEBHOOKS_ALLOW_INTERNAL_REQUESTS = __get_boolean(
}
WEBHOOKS_ALLOWED_PORTS = {
int(p) for p in get_list_from_env("PAPERLESS_WEBHOOKS_ALLOWED_PORTS", default=[])
}
WEBHOOKS_ALLOW_INTERNAL_REQUESTS = get_bool_from_env(
"PAPERLESS_WEBHOOKS_ALLOW_INTERNAL_REQUESTS",
"true",
)
@@ -1380,7 +1103,7 @@ REMOTE_OCR_ENDPOINT = os.getenv("PAPERLESS_REMOTE_OCR_ENDPOINT")
################################################################################
# AI Settings #
################################################################################
AI_ENABLED = __get_boolean("PAPERLESS_AI_ENABLED", "NO")
AI_ENABLED = get_bool_from_env("PAPERLESS_AI_ENABLED", "NO")
LLM_EMBEDDING_BACKEND = os.getenv(
"PAPERLESS_AI_LLM_EMBEDDING_BACKEND",
) # "huggingface" or "openai"

View File

@@ -1,11 +1,191 @@
import datetime
import logging
import os
from pathlib import Path
from typing import Any
from celery.schedules import crontab
from dateparser.languages.loader import LocaleDataLoader
from paperless.settings.parsers import get_choice_from_env
from paperless.settings.parsers import get_int_from_env
from paperless.settings.parsers import parse_dict_from_str
logger = logging.getLogger(__name__)
def parse_hosting_settings() -> tuple[str | None, str, str, str, str]:
script_name = os.getenv("PAPERLESS_FORCE_SCRIPT_NAME")
base_url = (script_name or "") + "/"
login_url = base_url + "accounts/login/"
login_redirect_url = base_url + "dashboard"
logout_redirect_url = os.getenv(
"PAPERLESS_LOGOUT_REDIRECT_URL",
login_url + "?loggedout=1",
)
return script_name, base_url, login_url, login_redirect_url, logout_redirect_url
def parse_redis_url(env_redis: str | None) -> tuple[str, str]:
"""
Gets the Redis information from the environment or a default and handles
converting from incompatible django_channels and celery formats.
Returns a tuple of (celery_url, channels_url)
"""
# Not set, return a compatible default
if env_redis is None:
return ("redis://localhost:6379", "redis://localhost:6379")
if "unix" in env_redis.lower():
# channels_redis socket format, looks like:
# "unix:///path/to/redis.sock"
_, path = env_redis.split(":", maxsplit=1)
# Optionally setting a db number
if "?db=" in env_redis:
path, number = path.split("?db=")
return (f"redis+socket:{path}?virtual_host={number}", env_redis)
else:
return (f"redis+socket:{path}", env_redis)
elif "+socket" in env_redis.lower():
# celery socket style, looks like:
# "redis+socket:///path/to/redis.sock"
_, path = env_redis.split(":", maxsplit=1)
if "?virtual_host=" in env_redis:
# Virtual host (aka db number)
path, number = path.split("?virtual_host=")
return (env_redis, f"unix:{path}?db={number}")
else:
return (env_redis, f"unix:{path}")
# Not a socket
return (env_redis, env_redis)
def parse_beat_schedule() -> dict:
"""
Configures the scheduled tasks, according to default or
environment variables. Task expiration is configured so the task will
expire (and not run), shortly before the default frequency will put another
of the same task into the queue
https://docs.celeryq.dev/en/stable/userguide/periodic-tasks.html#beat-entries
https://docs.celeryq.dev/en/latest/userguide/calling.html#expiration
"""
schedule = {}
tasks = [
{
"name": "Check all e-mail accounts",
"env_key": "PAPERLESS_EMAIL_TASK_CRON",
# Default every ten minutes
"env_default": "*/10 * * * *",
"task": "paperless_mail.tasks.process_mail_accounts",
"options": {
# 1 minute before default schedule sends again
"expires": 9.0 * 60.0,
},
},
{
"name": "Train the classifier",
"env_key": "PAPERLESS_TRAIN_TASK_CRON",
# Default hourly at 5 minutes past the hour
"env_default": "5 */1 * * *",
"task": "documents.tasks.train_classifier",
"options": {
# 1 minute before default schedule sends again
"expires": 59.0 * 60.0,
},
},
{
"name": "Optimize the index",
"env_key": "PAPERLESS_INDEX_TASK_CRON",
# Default daily at midnight
"env_default": "0 0 * * *",
"task": "documents.tasks.index_optimize",
"options": {
# 1 hour before default schedule sends again
"expires": 23.0 * 60.0 * 60.0,
},
},
{
"name": "Perform sanity check",
"env_key": "PAPERLESS_SANITY_TASK_CRON",
# Default Sunday at 00:30
"env_default": "30 0 * * sun",
"task": "documents.tasks.sanity_check",
"options": {
# 1 hour before default schedule sends again
"expires": ((7.0 * 24.0) - 1.0) * 60.0 * 60.0,
},
},
{
"name": "Empty trash",
"env_key": "PAPERLESS_EMPTY_TRASH_TASK_CRON",
# Default daily at 01:00
"env_default": "0 1 * * *",
"task": "documents.tasks.empty_trash",
"options": {
# 1 hour before default schedule sends again
"expires": 23.0 * 60.0 * 60.0,
},
},
{
"name": "Check and run scheduled workflows",
"env_key": "PAPERLESS_WORKFLOW_SCHEDULED_TASK_CRON",
# Default hourly at 5 minutes past the hour
"env_default": "5 */1 * * *",
"task": "documents.tasks.check_scheduled_workflows",
"options": {
# 1 minute before default schedule sends again
"expires": 59.0 * 60.0,
},
},
{
"name": "Rebuild LLM index",
"env_key": "PAPERLESS_LLM_INDEX_TASK_CRON",
# Default daily at 02:10
"env_default": "10 2 * * *",
"task": "documents.tasks.llmindex_index",
"options": {
# 1 hour before default schedule sends again
"expires": 23.0 * 60.0 * 60.0,
},
},
{
"name": "Cleanup expired share link bundles",
"env_key": "PAPERLESS_SHARE_LINK_BUNDLE_CLEANUP_CRON",
# Default daily at 02:00
"env_default": "0 2 * * *",
"task": "documents.tasks.cleanup_expired_share_link_bundles",
"options": {
# 1 hour before default schedule sends again
"expires": 23.0 * 60.0 * 60.0,
},
},
]
for task in tasks:
# Either get the environment setting or use the default
value = os.getenv(task["env_key"], task["env_default"])
# Don't add disabled tasks to the schedule
if value == "disable":
continue
# I find https://crontab.guru/ super helpful
# crontab(5) format
# - five time-and-date fields
# - separated by at least one blank
minute, hour, day_month, month, day_week = value.split(" ")
schedule[task["name"]] = {
"task": task["task"],
"schedule": crontab(minute, hour, day_week, day_month, month),
"options": task["options"],
}
return schedule
def parse_db_settings(data_dir: Path) -> dict[str, dict[str, Any]]:
"""Parse database settings from environment variables.
@@ -120,3 +300,48 @@ def parse_db_settings(data_dir: Path) -> dict[str, dict[str, Any]]:
)
return {"default": db_config}
def parse_dateparser_languages(languages: str | None) -> list[str]:
language_list = languages.split("+") if languages else []
# There is an unfixed issue in zh-Hant and zh-Hans locales in the dateparser lib.
# See: https://github.com/scrapinghub/dateparser/issues/875
for index, language in enumerate(language_list):
if language.startswith("zh-") and "zh" not in language_list:
logger.warning(
f"Chinese locale detected: {language}. dateparser might fail to parse"
f' some dates with this locale, so Chinese ("zh") will be used as a fallback.',
)
language_list.append("zh")
return list(LocaleDataLoader().get_locale_map(locales=language_list))
def parse_ignore_dates(
env_ignore: str,
date_order: str,
) -> set[datetime.date]:
"""
If the PAPERLESS_IGNORE_DATES environment variable is set, parse the
user provided string(s) into dates
Args:
env_ignore (str): The value of the environment variable, comma separated dates
date_order (str): The format of the date strings.
Returns:
set[datetime.date]: The set of parsed date objects
"""
import dateparser
ignored_dates = set()
for s in env_ignore.split(","):
d = dateparser.parse(
s,
settings={
"DATE_ORDER": date_order,
},
)
if d:
ignored_dates.add(d.date())
return ignored_dates

View File

@@ -156,6 +156,108 @@ def parse_dict_from_str(
return settings
def get_bool_from_env(key: str, default: str = "NO") -> bool:
"""
Return a boolean value based on whatever the user has supplied in the
environment based on whether the value "looks like" it's True or not.
"""
return str_to_bool(os.getenv(key, default))
@overload
def get_float_from_env(key: str) -> float | None: ...
@overload
def get_float_from_env(key: str, default: None) -> float | None: ...
@overload
def get_float_from_env(key: str, default: float) -> float: ...
def get_float_from_env(key: str, default: float | None = None) -> float | None:
"""
Return a float value based on the environment variable.
If default is provided, returns that value when key is missing.
If default is None, returns None when key is missing.
"""
if key not in os.environ:
return default
return float(os.environ[key])
@overload
def get_path_from_env(key: str) -> Path | None: ...
@overload
def get_path_from_env(key: str, default: None) -> Path | None: ...
@overload
def get_path_from_env(key: str, default: Path | str) -> Path: ...
def get_path_from_env(key: str, default: Path | str | None = None) -> Path | None:
"""
Return a Path object based on the environment variable.
If default is provided, returns that value when key is missing.
If default is None, returns None when key is missing.
"""
if key not in os.environ:
return default if default is None else Path(default).resolve()
return Path(os.environ[key]).resolve()
def get_list_from_env(
key: str,
separator: str = ",",
default: list[T] | None = None,
*,
strip_whitespace: bool = True,
remove_empty: bool = True,
required: bool = False,
) -> list[str] | list[T]:
"""
Get and parse a list from an environment variable or return a default.
Args:
key: Environment variable name
separator: Character(s) to split on (default: ',')
default: Default value to return if env var is not set or empty
strip_whitespace: Whether to strip whitespace from each element
remove_empty: Whether to remove empty strings from the result
required: If True, raise an error when the env var is missing and no default provided
Returns:
List of strings or list of type-cast values, or default if env var is empty/None
Raises:
ValueError: If required=True and env var is missing and there is no default
"""
# Get the environment variable value
env_value = os.environ.get(key)
# Handle required environment variables
if required and env_value is None and default is None:
raise ValueError(f"Required environment variable '{key}' is not set")
if env_value:
items = env_value.split(separator)
if strip_whitespace:
items = [item.strip() for item in items]
if remove_empty:
items = [item for item in items if item]
return items
elif default is not None:
return default
else:
return []
def get_choice_from_env(
env_key: str,
choices: set[str],

View File

@@ -1,10 +1,279 @@
import datetime
import os
from pathlib import Path
from typing import Any
import pytest
from celery.schedules import crontab
from pytest_mock import MockerFixture
from paperless.settings.custom import parse_beat_schedule
from paperless.settings.custom import parse_dateparser_languages
from paperless.settings.custom import parse_db_settings
from paperless.settings.custom import parse_hosting_settings
from paperless.settings.custom import parse_ignore_dates
from paperless.settings.custom import parse_redis_url
class TestRedisSocketConversion:
@pytest.mark.parametrize(
("input_url", "expected"),
[
pytest.param(
None,
("redis://localhost:6379", "redis://localhost:6379"),
id="none_uses_default",
),
pytest.param(
"redis+socket:///run/redis/redis.sock",
(
"redis+socket:///run/redis/redis.sock",
"unix:///run/redis/redis.sock",
),
id="celery_style_socket",
),
pytest.param(
"unix:///run/redis/redis.sock",
(
"redis+socket:///run/redis/redis.sock",
"unix:///run/redis/redis.sock",
),
id="redis_py_style_socket",
),
pytest.param(
"redis+socket:///run/redis/redis.sock?virtual_host=5",
(
"redis+socket:///run/redis/redis.sock?virtual_host=5",
"unix:///run/redis/redis.sock?db=5",
),
id="celery_style_socket_with_db",
),
pytest.param(
"unix:///run/redis/redis.sock?db=10",
(
"redis+socket:///run/redis/redis.sock?virtual_host=10",
"unix:///run/redis/redis.sock?db=10",
),
id="redis_py_style_socket_with_db",
),
pytest.param(
"redis://myredishost:6379",
("redis://myredishost:6379", "redis://myredishost:6379"),
id="host_with_port_unchanged",
),
# Credentials in unix:// URL contain multiple colons (user:password@)
# Regression test for https://github.com/paperless-ngx/paperless-ngx/pull/12239
pytest.param(
"unix://user:password@/run/redis/redis.sock",
(
"redis+socket://user:password@/run/redis/redis.sock",
"unix://user:password@/run/redis/redis.sock",
),
id="redis_py_style_socket_with_credentials",
),
pytest.param(
"redis+socket://user:password@/run/redis/redis.sock",
(
"redis+socket://user:password@/run/redis/redis.sock",
"unix://user:password@/run/redis/redis.sock",
),
id="celery_style_socket_with_credentials",
),
],
)
def test_redis_socket_parsing(
self,
input_url: str | None,
expected: tuple[str, str],
) -> None:
"""
GIVEN:
- Various Redis connection URI formats
WHEN:
- The URI is parsed
THEN:
- Socket based URIs are translated
- Non-socket URIs are unchanged
- None provided uses default
"""
result = parse_redis_url(input_url)
assert expected == result
class TestParseHostingSettings:
@pytest.mark.parametrize(
("env", "expected"),
[
pytest.param(
{},
(
None,
"/",
"/accounts/login/",
"/dashboard",
"/accounts/login/?loggedout=1",
),
id="no_env_vars",
),
pytest.param(
{"PAPERLESS_FORCE_SCRIPT_NAME": "/paperless"},
(
"/paperless",
"/paperless/",
"/paperless/accounts/login/",
"/paperless/dashboard",
"/paperless/accounts/login/?loggedout=1",
),
id="force_script_name_only",
),
pytest.param(
{
"PAPERLESS_FORCE_SCRIPT_NAME": "/docs",
"PAPERLESS_LOGOUT_REDIRECT_URL": "/custom/logout",
},
(
"/docs",
"/docs/",
"/docs/accounts/login/",
"/docs/dashboard",
"/custom/logout",
),
id="force_script_name_and_logout_redirect",
),
],
)
def test_parse_hosting_settings(
self,
mocker: MockerFixture,
env: dict[str, str],
expected: tuple[str | None, str, str, str, str],
) -> None:
"""Test parse_hosting_settings with various env configurations."""
mocker.patch.dict(os.environ, env, clear=True)
result = parse_hosting_settings()
assert result == expected
def make_expected_schedule(
overrides: dict[str, dict[str, Any]] | None = None,
disabled: set[str] | None = None,
) -> dict[str, Any]:
"""
Build the expected schedule with optional overrides and disabled tasks.
"""
mail_expire = 9.0 * 60.0
classifier_expire = 59.0 * 60.0
index_expire = 23.0 * 60.0 * 60.0
sanity_expire = ((7.0 * 24.0) - 1.0) * 60.0 * 60.0
empty_trash_expire = 23.0 * 60.0 * 60.0
workflow_expire = 59.0 * 60.0
llm_index_expire = 23.0 * 60.0 * 60.0
share_link_cleanup_expire = 23.0 * 60.0 * 60.0
schedule: dict[str, Any] = {
"Check all e-mail accounts": {
"task": "paperless_mail.tasks.process_mail_accounts",
"schedule": crontab(minute="*/10"),
"options": {"expires": mail_expire},
},
"Train the classifier": {
"task": "documents.tasks.train_classifier",
"schedule": crontab(minute="5", hour="*/1"),
"options": {"expires": classifier_expire},
},
"Optimize the index": {
"task": "documents.tasks.index_optimize",
"schedule": crontab(minute=0, hour=0),
"options": {"expires": index_expire},
},
"Perform sanity check": {
"task": "documents.tasks.sanity_check",
"schedule": crontab(minute=30, hour=0, day_of_week="sun"),
"options": {"expires": sanity_expire},
},
"Empty trash": {
"task": "documents.tasks.empty_trash",
"schedule": crontab(minute=0, hour="1"),
"options": {"expires": empty_trash_expire},
},
"Check and run scheduled workflows": {
"task": "documents.tasks.check_scheduled_workflows",
"schedule": crontab(minute="5", hour="*/1"),
"options": {"expires": workflow_expire},
},
"Rebuild LLM index": {
"task": "documents.tasks.llmindex_index",
"schedule": crontab(minute="10", hour="2"),
"options": {"expires": llm_index_expire},
},
"Cleanup expired share link bundles": {
"task": "documents.tasks.cleanup_expired_share_link_bundles",
"schedule": crontab(minute=0, hour="2"),
"options": {"expires": share_link_cleanup_expire},
},
}
overrides = overrides or {}
disabled = disabled or set()
for key, val in overrides.items():
schedule[key] = {**schedule.get(key, {}), **val}
for key in disabled:
schedule.pop(key, None)
return schedule
class TestParseBeatSchedule:
@pytest.mark.parametrize(
("env", "expected"),
[
pytest.param({}, make_expected_schedule(), id="defaults"),
pytest.param(
{"PAPERLESS_EMAIL_TASK_CRON": "*/50 * * * mon"},
make_expected_schedule(
overrides={
"Check all e-mail accounts": {
"schedule": crontab(minute="*/50", day_of_week="mon"),
},
},
),
id="email-changed",
),
pytest.param(
{"PAPERLESS_INDEX_TASK_CRON": "disable"},
make_expected_schedule(disabled={"Optimize the index"}),
id="index-disabled",
),
pytest.param(
{
"PAPERLESS_EMAIL_TASK_CRON": "disable",
"PAPERLESS_TRAIN_TASK_CRON": "disable",
"PAPERLESS_SANITY_TASK_CRON": "disable",
"PAPERLESS_INDEX_TASK_CRON": "disable",
"PAPERLESS_EMPTY_TRASH_TASK_CRON": "disable",
"PAPERLESS_WORKFLOW_SCHEDULED_TASK_CRON": "disable",
"PAPERLESS_LLM_INDEX_TASK_CRON": "disable",
"PAPERLESS_SHARE_LINK_BUNDLE_CLEANUP_CRON": "disable",
},
{},
id="all-disabled",
),
],
)
def test_parse_beat_schedule(
self,
env: dict[str, str],
expected: dict[str, Any],
mocker: MockerFixture,
) -> None:
mocker.patch.dict(os.environ, env, clear=False)
schedule = parse_beat_schedule()
assert schedule == expected
class TestParseDbSettings:
@@ -264,3 +533,85 @@ class TestParseDbSettings:
settings = parse_db_settings(tmp_path)
assert settings == expected_database_settings
class TestParseIgnoreDates:
"""Tests the parsing of the PAPERLESS_IGNORE_DATES setting value."""
def test_no_ignore_dates_set(self) -> None:
"""
GIVEN:
- No ignore dates are set
THEN:
- No ignore dates are parsed
"""
assert parse_ignore_dates("", "YMD") == set()
@pytest.mark.parametrize(
("env_str", "date_format", "expected"),
[
pytest.param(
"1985-05-01",
"YMD",
{datetime.date(1985, 5, 1)},
id="single-ymd",
),
pytest.param(
"1985-05-01,1991-12-05",
"YMD",
{datetime.date(1985, 5, 1), datetime.date(1991, 12, 5)},
id="multiple-ymd",
),
pytest.param(
"2010-12-13",
"YMD",
{datetime.date(2010, 12, 13)},
id="single-ymd-2",
),
pytest.param(
"11.01.10",
"DMY",
{datetime.date(2010, 1, 11)},
id="single-dmy",
),
pytest.param(
"11.01.2001,15-06-1996",
"DMY",
{datetime.date(2001, 1, 11), datetime.date(1996, 6, 15)},
id="multiple-dmy",
),
],
)
def test_ignore_dates_parsed(
self,
env_str: str,
date_format: str,
expected: set[datetime.date],
) -> None:
"""
GIVEN:
- Ignore dates are set per certain inputs
THEN:
- All ignore dates are parsed
"""
assert parse_ignore_dates(env_str, date_format) == expected
@pytest.mark.parametrize(
("languages", "expected"),
[
("de", ["de"]),
("zh", ["zh"]),
("fr+en", ["fr", "en"]),
# Locales must be supported
("en-001+fr-CA", ["en-001", "fr-CA"]),
("en-001+fr", ["en-001", "fr"]),
# Special case for Chinese: variants seem to miss some dates,
# so we always add "zh" as a fallback.
("en+zh-Hans-HK", ["en", "zh-Hans-HK", "zh"]),
("en+zh-Hans", ["en", "zh-Hans", "zh"]),
("en+zh-Hans+zh-Hant", ["en", "zh-Hans", "zh-Hant", "zh"]),
],
)
def test_parse_dateparser_languages(languages: str, expected: list[str]) -> None:
assert sorted(parse_dateparser_languages(languages)) == sorted(expected)

View File

@@ -4,8 +4,12 @@ from pathlib import Path
import pytest
from pytest_mock import MockerFixture
from paperless.settings.parsers import get_bool_from_env
from paperless.settings.parsers import get_choice_from_env
from paperless.settings.parsers import get_float_from_env
from paperless.settings.parsers import get_int_from_env
from paperless.settings.parsers import get_list_from_env
from paperless.settings.parsers import get_path_from_env
from paperless.settings.parsers import parse_dict_from_str
from paperless.settings.parsers import str_to_bool
@@ -205,6 +209,29 @@ class TestParseDictFromString:
assert isinstance(result["database"]["port"], int)
class TestGetBoolFromEnv:
def test_existing_env_var(self, mocker):
"""Test that an existing environment variable is read and converted."""
mocker.patch.dict(os.environ, {"TEST_VAR": "true"})
assert get_bool_from_env("TEST_VAR") is True
def test_missing_env_var_uses_default_no(self, mocker):
"""Test that a missing environment variable uses default 'NO' and returns False."""
mocker.patch.dict(os.environ, {}, clear=True)
assert get_bool_from_env("MISSING_VAR") is False
def test_missing_env_var_with_explicit_default(self, mocker):
"""Test that a missing environment variable uses the provided default."""
mocker.patch.dict(os.environ, {}, clear=True)
assert get_bool_from_env("MISSING_VAR", default="yes") is True
def test_invalid_value_raises_error(self, mocker):
"""Test that an invalid value raises ValueError (delegates to str_to_bool)."""
mocker.patch.dict(os.environ, {"INVALID_VAR": "maybe"})
with pytest.raises(ValueError):
get_bool_from_env("INVALID_VAR")
class TestGetIntFromEnv:
@pytest.mark.parametrize(
("env_value", "expected"),
@@ -259,6 +286,199 @@ class TestGetIntFromEnv:
get_int_from_env("INVALID_INT")
class TestGetFloatFromEnv:
@pytest.mark.parametrize(
("env_value", "expected"),
[
pytest.param("3.14", 3.14, id="pi"),
pytest.param("42", 42.0, id="int_as_float"),
pytest.param("-2.5", -2.5, id="negative"),
pytest.param("0.0", 0.0, id="zero_float"),
pytest.param("0", 0.0, id="zero_int"),
pytest.param("1.5e2", 150.0, id="sci_positive"),
pytest.param("1e-3", 0.001, id="sci_negative"),
pytest.param("-1.23e4", -12300.0, id="sci_large"),
],
)
def test_existing_env_var_valid_floats(self, mocker, env_value, expected):
"""Test that existing environment variables with valid floats return correct values."""
mocker.patch.dict(os.environ, {"FLOAT_VAR": env_value})
assert get_float_from_env("FLOAT_VAR") == expected
@pytest.mark.parametrize(
("default", "expected"),
[
pytest.param(3.14, 3.14, id="pi_default"),
pytest.param(0.0, 0.0, id="zero_default"),
pytest.param(-2.5, -2.5, id="negative_default"),
pytest.param(None, None, id="none_default"),
],
)
def test_missing_env_var_with_defaults(self, mocker, default, expected):
"""Test that missing environment variables return provided defaults."""
mocker.patch.dict(os.environ, {}, clear=True)
assert get_float_from_env("MISSING_VAR", default=default) == expected
def test_missing_env_var_no_default(self, mocker):
"""Test that missing environment variable with no default returns None."""
mocker.patch.dict(os.environ, {}, clear=True)
assert get_float_from_env("MISSING_VAR") is None
@pytest.mark.parametrize(
"invalid_value",
[
pytest.param("not_a_number", id="text"),
pytest.param("42.5.0", id="double_decimal"),
pytest.param("42a", id="alpha_suffix"),
pytest.param("", id="empty"),
pytest.param(" ", id="whitespace"),
pytest.param("true", id="boolean"),
pytest.param("1.2.3", id="triple_decimal"),
],
)
def test_invalid_float_values_raise_error(self, mocker, invalid_value):
"""Test that invalid float values raise ValueError."""
mocker.patch.dict(os.environ, {"INVALID_FLOAT": invalid_value})
with pytest.raises(ValueError):
get_float_from_env("INVALID_FLOAT")
class TestGetPathFromEnv:
@pytest.mark.parametrize(
"env_value",
[
pytest.param("/tmp/test", id="absolute"),
pytest.param("relative/path", id="relative"),
pytest.param("/path/with spaces/file.txt", id="spaces"),
pytest.param(".", id="current_dir"),
pytest.param("..", id="parent_dir"),
pytest.param("/", id="root"),
],
)
def test_existing_env_var_paths(self, mocker, env_value):
"""Test that existing environment variables with paths return resolved Path objects."""
mocker.patch.dict(os.environ, {"PATH_VAR": env_value})
result = get_path_from_env("PATH_VAR")
assert isinstance(result, Path)
assert result == Path(env_value).resolve()
def test_missing_env_var_no_default(self, mocker):
"""Test that missing environment variable with no default returns None."""
mocker.patch.dict(os.environ, {}, clear=True)
assert get_path_from_env("MISSING_VAR") is None
def test_missing_env_var_with_none_default(self, mocker):
"""Test that missing environment variable with None default returns None."""
mocker.patch.dict(os.environ, {}, clear=True)
assert get_path_from_env("MISSING_VAR", default=None) is None
@pytest.mark.parametrize(
"default_path_str",
[
pytest.param("/default/path", id="absolute_default"),
pytest.param("relative/default", id="relative_default"),
pytest.param(".", id="current_default"),
],
)
def test_missing_env_var_with_path_defaults(self, mocker, default_path_str):
"""Test that missing environment variables return resolved default Path objects."""
mocker.patch.dict(os.environ, {}, clear=True)
default_path = Path(default_path_str)
result = get_path_from_env("MISSING_VAR", default=default_path)
assert isinstance(result, Path)
assert result == default_path.resolve()
def test_relative_paths_are_resolved(self, mocker):
"""Test that relative paths are properly resolved to absolute paths."""
mocker.patch.dict(os.environ, {"REL_PATH": "relative/path"})
result = get_path_from_env("REL_PATH")
assert result is not None
assert result.is_absolute()
class TestGetListFromEnv:
@pytest.mark.parametrize(
("env_value", "expected"),
[
pytest.param("a,b,c", ["a", "b", "c"], id="basic_comma_separated"),
pytest.param("single", ["single"], id="single_element"),
pytest.param("", [], id="empty_string"),
pytest.param("a, b , c", ["a", "b", "c"], id="whitespace_trimmed"),
pytest.param("a,,b,c", ["a", "b", "c"], id="empty_elements_removed"),
],
)
def test_existing_env_var_basic_parsing(self, mocker, env_value, expected):
"""Test that existing environment variables are parsed correctly."""
mocker.patch.dict(os.environ, {"LIST_VAR": env_value})
result = get_list_from_env("LIST_VAR")
assert result == expected
@pytest.mark.parametrize(
("separator", "env_value", "expected"),
[
pytest.param("|", "a|b|c", ["a", "b", "c"], id="pipe_separator"),
pytest.param(":", "a:b:c", ["a", "b", "c"], id="colon_separator"),
pytest.param(";", "a;b;c", ["a", "b", "c"], id="semicolon_separator"),
],
)
def test_custom_separators(self, mocker, separator, env_value, expected):
"""Test that custom separators work correctly."""
mocker.patch.dict(os.environ, {"LIST_VAR": env_value})
result = get_list_from_env("LIST_VAR", separator=separator)
assert result == expected
@pytest.mark.parametrize(
("default", "expected"),
[
pytest.param(
["default1", "default2"],
["default1", "default2"],
id="string_list_default",
),
pytest.param([1, 2, 3], [1, 2, 3], id="int_list_default"),
pytest.param(None, [], id="none_default_returns_empty_list"),
],
)
def test_missing_env_var_with_defaults(self, mocker, default, expected):
"""Test that missing environment variables return provided defaults."""
mocker.patch.dict(os.environ, {}, clear=True)
result = get_list_from_env("MISSING_VAR", default=default)
assert result == expected
def test_missing_env_var_no_default(self, mocker):
"""Test that missing environment variable with no default returns empty list."""
mocker.patch.dict(os.environ, {}, clear=True)
result = get_list_from_env("MISSING_VAR")
assert result == []
def test_required_env_var_missing_raises_error(self, mocker):
"""Test that missing required environment variable raises ValueError."""
mocker.patch.dict(os.environ, {}, clear=True)
with pytest.raises(
ValueError,
match="Required environment variable 'REQUIRED_VAR' is not set",
):
get_list_from_env("REQUIRED_VAR", required=True)
def test_required_env_var_with_default_does_not_raise(self, mocker):
"""Test that required environment variable with default does not raise error."""
mocker.patch.dict(os.environ, {}, clear=True)
result = get_list_from_env("REQUIRED_VAR", default=["default"], required=True)
assert result == ["default"]
def test_strip_whitespace_false(self, mocker):
"""Test that whitespace is preserved when strip_whitespace=False."""
mocker.patch.dict(os.environ, {"LIST_VAR": " a , b , c "})
result = get_list_from_env("LIST_VAR", strip_whitespace=False)
assert result == [" a ", " b ", " c "]
def test_remove_empty_false(self, mocker):
"""Test that empty elements are preserved when remove_empty=False."""
mocker.patch.dict(os.environ, {"LIST_VAR": "a,,b,,c"})
result = get_list_from_env("LIST_VAR", remove_empty=False)
assert result == ["a", "", "b", "", "c"]
class TestGetEnvChoice:
@pytest.fixture
def valid_choices(self) -> set[str]:
@@ -394,21 +614,3 @@ class TestGetEnvChoice:
result = get_choice_from_env("TEST_ENV", large_choices)
assert result == "option_50"
def test_different_env_keys(
self,
mocker: MockerFixture,
valid_choices: set[str],
) -> None:
"""Test function works with different environment variable keys."""
test_cases = [
("DJANGO_ENV", "development"),
("DATABASE_BACKEND", "staging"),
("LOG_LEVEL", "production"),
("APP_MODE", "development"),
]
for env_key, env_value in test_cases:
mocker.patch.dict("os.environ", {env_key: env_value})
result = get_choice_from_env(env_key, valid_choices)
assert result == env_value

View File

@@ -0,0 +1,56 @@
import os
from unittest import TestCase
from unittest import mock
from paperless.settings import _parse_paperless_url
from paperless.settings import default_threads_per_worker
class TestThreadCalculation(TestCase):
def test_workers_threads(self) -> None:
"""
GIVEN:
- Certain CPU counts
WHEN:
- Threads per worker is calculated
THEN:
- Threads per worker less than or equal to CPU count
- At least 1 thread per worker
"""
default_workers = 1
for i in range(1, 64):
with mock.patch(
"paperless.settings.multiprocessing.cpu_count",
) as cpu_count:
cpu_count.return_value = i
default_threads = default_threads_per_worker(default_workers)
self.assertGreaterEqual(default_threads, 1)
self.assertLessEqual(default_workers * default_threads, i)
class TestPaperlessURLSettings(TestCase):
def test_paperless_url(self) -> None:
"""
GIVEN:
- PAPERLESS_URL is set
WHEN:
- The URL is parsed
THEN:
- The URL is returned and present in related settings
"""
with mock.patch.dict(
os.environ,
{
"PAPERLESS_URL": "https://example.com",
},
):
url = _parse_paperless_url()
self.assertEqual("https://example.com", url)
from django.conf import settings
self.assertIn(url, settings.CSRF_TRUSTED_ORIGINS)
self.assertIn(url, settings.CORS_ALLOWED_ORIGINS)

View File

@@ -1,482 +0,0 @@
import datetime
import os
from unittest import TestCase
from unittest import mock
import pytest
from celery.schedules import crontab
from paperless.settings import _parse_base_paths
from paperless.settings import _parse_beat_schedule
from paperless.settings import _parse_dateparser_languages
from paperless.settings import _parse_ignore_dates
from paperless.settings import _parse_paperless_url
from paperless.settings import _parse_redis_url
from paperless.settings import default_threads_per_worker
class TestIgnoreDateParsing(TestCase):
"""
Tests the parsing of the PAPERLESS_IGNORE_DATES setting value
"""
def _parse_checker(self, test_cases) -> None:
"""
Helper function to check ignore date parsing
Args:
test_cases (_type_): _description_
"""
for env_str, date_format, expected_date_set in test_cases:
self.assertSetEqual(
_parse_ignore_dates(env_str, date_format),
expected_date_set,
)
def test_no_ignore_dates_set(self) -> None:
"""
GIVEN:
- No ignore dates are set
THEN:
- No ignore dates are parsed
"""
self.assertSetEqual(_parse_ignore_dates(""), set())
def test_single_ignore_dates_set(self) -> None:
"""
GIVEN:
- Ignore dates are set per certain inputs
THEN:
- All ignore dates are parsed
"""
test_cases = [
("1985-05-01", "YMD", {datetime.date(1985, 5, 1)}),
(
"1985-05-01,1991-12-05",
"YMD",
{datetime.date(1985, 5, 1), datetime.date(1991, 12, 5)},
),
("2010-12-13", "YMD", {datetime.date(2010, 12, 13)}),
("11.01.10", "DMY", {datetime.date(2010, 1, 11)}),
(
"11.01.2001,15-06-1996",
"DMY",
{datetime.date(2001, 1, 11), datetime.date(1996, 6, 15)},
),
]
self._parse_checker(test_cases)
class TestThreadCalculation(TestCase):
def test_workers_threads(self) -> None:
"""
GIVEN:
- Certain CPU counts
WHEN:
- Threads per worker is calculated
THEN:
- Threads per worker less than or equal to CPU count
- At least 1 thread per worker
"""
default_workers = 1
for i in range(1, 64):
with mock.patch(
"paperless.settings.multiprocessing.cpu_count",
) as cpu_count:
cpu_count.return_value = i
default_threads = default_threads_per_worker(default_workers)
self.assertGreaterEqual(default_threads, 1)
self.assertLessEqual(default_workers * default_threads, i)
class TestRedisSocketConversion(TestCase):
def test_redis_socket_parsing(self) -> None:
"""
GIVEN:
- Various Redis connection URI formats
WHEN:
- The URI is parsed
THEN:
- Socket based URIs are translated
- Non-socket URIs are unchanged
- None provided uses default
"""
for input, expected in [
# Nothing is set
(None, ("redis://localhost:6379", "redis://localhost:6379")),
# celery style
(
"redis+socket:///run/redis/redis.sock",
(
"redis+socket:///run/redis/redis.sock",
"unix:///run/redis/redis.sock",
),
),
# redis-py / channels-redis style
(
"unix:///run/redis/redis.sock",
(
"redis+socket:///run/redis/redis.sock",
"unix:///run/redis/redis.sock",
),
),
# celery style with db
(
"redis+socket:///run/redis/redis.sock?virtual_host=5",
(
"redis+socket:///run/redis/redis.sock?virtual_host=5",
"unix:///run/redis/redis.sock?db=5",
),
),
# redis-py / channels-redis style with db
(
"unix:///run/redis/redis.sock?db=10",
(
"redis+socket:///run/redis/redis.sock?virtual_host=10",
"unix:///run/redis/redis.sock?db=10",
),
),
# Just a host with a port
(
"redis://myredishost:6379",
("redis://myredishost:6379", "redis://myredishost:6379"),
),
]:
result = _parse_redis_url(input)
self.assertTupleEqual(expected, result)
class TestCeleryScheduleParsing(TestCase):
MAIL_EXPIRE_TIME = 9.0 * 60.0
CLASSIFIER_EXPIRE_TIME = 59.0 * 60.0
INDEX_EXPIRE_TIME = 23.0 * 60.0 * 60.0
SANITY_EXPIRE_TIME = ((7.0 * 24.0) - 1.0) * 60.0 * 60.0
EMPTY_TRASH_EXPIRE_TIME = 23.0 * 60.0 * 60.0
RUN_SCHEDULED_WORKFLOWS_EXPIRE_TIME = 59.0 * 60.0
LLM_INDEX_EXPIRE_TIME = 23.0 * 60.0 * 60.0
CLEANUP_EXPIRED_SHARE_BUNDLES_EXPIRE_TIME = 23.0 * 60.0 * 60.0
def test_schedule_configuration_default(self) -> None:
"""
GIVEN:
- No configured task schedules
WHEN:
- The celery beat schedule is built
THEN:
- The default schedule is returned
"""
schedule = _parse_beat_schedule()
self.assertDictEqual(
{
"Check all e-mail accounts": {
"task": "paperless_mail.tasks.process_mail_accounts",
"schedule": crontab(minute="*/10"),
"options": {"expires": self.MAIL_EXPIRE_TIME},
},
"Train the classifier": {
"task": "documents.tasks.train_classifier",
"schedule": crontab(minute="5", hour="*/1"),
"options": {"expires": self.CLASSIFIER_EXPIRE_TIME},
},
"Optimize the index": {
"task": "documents.tasks.index_optimize",
"schedule": crontab(minute=0, hour=0),
"options": {"expires": self.INDEX_EXPIRE_TIME},
},
"Perform sanity check": {
"task": "documents.tasks.sanity_check",
"schedule": crontab(minute=30, hour=0, day_of_week="sun"),
"options": {"expires": self.SANITY_EXPIRE_TIME},
},
"Empty trash": {
"task": "documents.tasks.empty_trash",
"schedule": crontab(minute=0, hour="1"),
"options": {"expires": self.EMPTY_TRASH_EXPIRE_TIME},
},
"Check and run scheduled workflows": {
"task": "documents.tasks.check_scheduled_workflows",
"schedule": crontab(minute="5", hour="*/1"),
"options": {"expires": self.RUN_SCHEDULED_WORKFLOWS_EXPIRE_TIME},
},
"Rebuild LLM index": {
"task": "documents.tasks.llmindex_index",
"schedule": crontab(minute=10, hour=2),
"options": {
"expires": self.LLM_INDEX_EXPIRE_TIME,
},
},
"Cleanup expired share link bundles": {
"task": "documents.tasks.cleanup_expired_share_link_bundles",
"schedule": crontab(minute=0, hour=2),
"options": {
"expires": self.CLEANUP_EXPIRED_SHARE_BUNDLES_EXPIRE_TIME,
},
},
},
schedule,
)
def test_schedule_configuration_changed(self) -> None:
"""
GIVEN:
- Email task is configured non-default
WHEN:
- The celery beat schedule is built
THEN:
- The email task is configured per environment
- The default schedule is returned for other tasks
"""
with mock.patch.dict(
os.environ,
{"PAPERLESS_EMAIL_TASK_CRON": "*/50 * * * mon"},
):
schedule = _parse_beat_schedule()
self.assertDictEqual(
{
"Check all e-mail accounts": {
"task": "paperless_mail.tasks.process_mail_accounts",
"schedule": crontab(minute="*/50", day_of_week="mon"),
"options": {"expires": self.MAIL_EXPIRE_TIME},
},
"Train the classifier": {
"task": "documents.tasks.train_classifier",
"schedule": crontab(minute="5", hour="*/1"),
"options": {"expires": self.CLASSIFIER_EXPIRE_TIME},
},
"Optimize the index": {
"task": "documents.tasks.index_optimize",
"schedule": crontab(minute=0, hour=0),
"options": {"expires": self.INDEX_EXPIRE_TIME},
},
"Perform sanity check": {
"task": "documents.tasks.sanity_check",
"schedule": crontab(minute=30, hour=0, day_of_week="sun"),
"options": {"expires": self.SANITY_EXPIRE_TIME},
},
"Empty trash": {
"task": "documents.tasks.empty_trash",
"schedule": crontab(minute=0, hour="1"),
"options": {"expires": self.EMPTY_TRASH_EXPIRE_TIME},
},
"Check and run scheduled workflows": {
"task": "documents.tasks.check_scheduled_workflows",
"schedule": crontab(minute="5", hour="*/1"),
"options": {"expires": self.RUN_SCHEDULED_WORKFLOWS_EXPIRE_TIME},
},
"Rebuild LLM index": {
"task": "documents.tasks.llmindex_index",
"schedule": crontab(minute=10, hour=2),
"options": {
"expires": self.LLM_INDEX_EXPIRE_TIME,
},
},
"Cleanup expired share link bundles": {
"task": "documents.tasks.cleanup_expired_share_link_bundles",
"schedule": crontab(minute=0, hour=2),
"options": {
"expires": self.CLEANUP_EXPIRED_SHARE_BUNDLES_EXPIRE_TIME,
},
},
},
schedule,
)
def test_schedule_configuration_disabled(self) -> None:
"""
GIVEN:
- Search index task is disabled
WHEN:
- The celery beat schedule is built
THEN:
- The search index task is not present
- The default schedule is returned for other tasks
"""
with mock.patch.dict(os.environ, {"PAPERLESS_INDEX_TASK_CRON": "disable"}):
schedule = _parse_beat_schedule()
self.assertDictEqual(
{
"Check all e-mail accounts": {
"task": "paperless_mail.tasks.process_mail_accounts",
"schedule": crontab(minute="*/10"),
"options": {"expires": self.MAIL_EXPIRE_TIME},
},
"Train the classifier": {
"task": "documents.tasks.train_classifier",
"schedule": crontab(minute="5", hour="*/1"),
"options": {"expires": self.CLASSIFIER_EXPIRE_TIME},
},
"Perform sanity check": {
"task": "documents.tasks.sanity_check",
"schedule": crontab(minute=30, hour=0, day_of_week="sun"),
"options": {"expires": self.SANITY_EXPIRE_TIME},
},
"Empty trash": {
"task": "documents.tasks.empty_trash",
"schedule": crontab(minute=0, hour="1"),
"options": {"expires": self.EMPTY_TRASH_EXPIRE_TIME},
},
"Check and run scheduled workflows": {
"task": "documents.tasks.check_scheduled_workflows",
"schedule": crontab(minute="5", hour="*/1"),
"options": {"expires": self.RUN_SCHEDULED_WORKFLOWS_EXPIRE_TIME},
},
"Rebuild LLM index": {
"task": "documents.tasks.llmindex_index",
"schedule": crontab(minute=10, hour=2),
"options": {
"expires": self.LLM_INDEX_EXPIRE_TIME,
},
},
"Cleanup expired share link bundles": {
"task": "documents.tasks.cleanup_expired_share_link_bundles",
"schedule": crontab(minute=0, hour=2),
"options": {
"expires": self.CLEANUP_EXPIRED_SHARE_BUNDLES_EXPIRE_TIME,
},
},
},
schedule,
)
def test_schedule_configuration_disabled_all(self) -> None:
"""
GIVEN:
- All tasks are disabled
WHEN:
- The celery beat schedule is built
THEN:
- No tasks are scheduled
"""
with mock.patch.dict(
os.environ,
{
"PAPERLESS_EMAIL_TASK_CRON": "disable",
"PAPERLESS_TRAIN_TASK_CRON": "disable",
"PAPERLESS_SANITY_TASK_CRON": "disable",
"PAPERLESS_INDEX_TASK_CRON": "disable",
"PAPERLESS_EMPTY_TRASH_TASK_CRON": "disable",
"PAPERLESS_WORKFLOW_SCHEDULED_TASK_CRON": "disable",
"PAPERLESS_LLM_INDEX_TASK_CRON": "disable",
"PAPERLESS_SHARE_LINK_BUNDLE_CLEANUP_CRON": "disable",
},
):
schedule = _parse_beat_schedule()
self.assertDictEqual(
{},
schedule,
)
class TestPaperlessURLSettings(TestCase):
def test_paperless_url(self) -> None:
"""
GIVEN:
- PAPERLESS_URL is set
WHEN:
- The URL is parsed
THEN:
- The URL is returned and present in related settings
"""
with mock.patch.dict(
os.environ,
{
"PAPERLESS_URL": "https://example.com",
},
):
url = _parse_paperless_url()
self.assertEqual("https://example.com", url)
from django.conf import settings
self.assertIn(url, settings.CSRF_TRUSTED_ORIGINS)
self.assertIn(url, settings.CORS_ALLOWED_ORIGINS)
class TestPathSettings(TestCase):
def test_default_paths(self) -> None:
"""
GIVEN:
- PAPERLESS_FORCE_SCRIPT_NAME is not set
WHEN:
- Settings are parsed
THEN:
- Paths are as expected
"""
base_paths = _parse_base_paths()
self.assertEqual(None, base_paths[0]) # FORCE_SCRIPT_NAME
self.assertEqual("/", base_paths[1]) # BASE_URL
self.assertEqual("/accounts/login/", base_paths[2]) # LOGIN_URL
self.assertEqual("/dashboard", base_paths[3]) # LOGIN_REDIRECT_URL
self.assertEqual(
"/accounts/login/?loggedout=1",
base_paths[4],
) # LOGOUT_REDIRECT_URL
@mock.patch("os.environ", {"PAPERLESS_FORCE_SCRIPT_NAME": "/paperless"})
def test_subpath(self) -> None:
"""
GIVEN:
- PAPERLESS_FORCE_SCRIPT_NAME is set
WHEN:
- Settings are parsed
THEN:
- The path is returned and present in related settings
"""
base_paths = _parse_base_paths()
self.assertEqual("/paperless", base_paths[0]) # FORCE_SCRIPT_NAME
self.assertEqual("/paperless/", base_paths[1]) # BASE_URL
self.assertEqual("/paperless/accounts/login/", base_paths[2]) # LOGIN_URL
self.assertEqual("/paperless/dashboard", base_paths[3]) # LOGIN_REDIRECT_URL
self.assertEqual(
"/paperless/accounts/login/?loggedout=1",
base_paths[4],
) # LOGOUT_REDIRECT_URL
@mock.patch(
"os.environ",
{
"PAPERLESS_FORCE_SCRIPT_NAME": "/paperless",
"PAPERLESS_LOGOUT_REDIRECT_URL": "/foobar/",
},
)
def test_subpath_with_explicit_logout_url(self) -> None:
"""
GIVEN:
- PAPERLESS_FORCE_SCRIPT_NAME is set and so is PAPERLESS_LOGOUT_REDIRECT_URL
WHEN:
- Settings are parsed
THEN:
- The correct logout redirect URL is returned
"""
base_paths = _parse_base_paths()
self.assertEqual("/paperless/", base_paths[1]) # BASE_URL
self.assertEqual("/foobar/", base_paths[4]) # LOGOUT_REDIRECT_URL
@pytest.mark.parametrize(
("languages", "expected"),
[
("de", ["de"]),
("zh", ["zh"]),
("fr+en", ["fr", "en"]),
# Locales must be supported
("en-001+fr-CA", ["en-001", "fr-CA"]),
("en-001+fr", ["en-001", "fr"]),
# Special case for Chinese: variants seem to miss some dates,
# so we always add "zh" as a fallback.
("en+zh-Hans-HK", ["en", "zh-Hans-HK", "zh"]),
("en+zh-Hans", ["en", "zh-Hans", "zh"]),
("en+zh-Hans+zh-Hant", ["en", "zh-Hans", "zh-Hant", "zh"]),
],
)
def test_parser_date_parser_languages(languages, expected) -> None:
assert sorted(_parse_dateparser_languages(languages)) == sorted(expected)

69
uv.lock generated
View File

@@ -1748,6 +1748,73 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008, upload-time = "2025-10-12T14:55:18.883Z" },
]
[[package]]
name = "ijson"
version = "3.5.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/f4/57/60d1a6a512f2f0508d0bc8b4f1cc5616fd3196619b66bd6a01f9155a1292/ijson-3.5.0.tar.gz", hash = "sha256:94688760720e3f5212731b3cb8d30267f9a045fb38fb3870254e7b9504246f31", size = 68658, upload-time = "2026-02-24T03:58:30.974Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/65/da/644343198abca5e0f6e2486063f8d8f3c443ca0ef5e5c890e51ef6032e33/ijson-3.5.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:5616311404b858d32740b7ad8b9a799c62165f5ecb85d0a8ed16c21665a90533", size = 88964, upload-time = "2026-02-24T03:56:53.099Z" },
{ url = "https://files.pythonhosted.org/packages/5b/63/8621190aa2baf96156dfd4c632b6aa9f1464411e50b98750c09acc0505ea/ijson-3.5.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e9733f94029dd41702d573ef64752e2556e72aea14623d6dbb7a44ca1ccf30fd", size = 60582, upload-time = "2026-02-24T03:56:54.261Z" },
{ url = "https://files.pythonhosted.org/packages/20/31/6a3f041fdd17dacff33b7d7d3ba3df6dca48740108340c6042f974b2ad20/ijson-3.5.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:db8398c6721b98412a4f618da8022550c8b9c5d9214040646071b5deb4d4a393", size = 60632, upload-time = "2026-02-24T03:56:55.159Z" },
{ url = "https://files.pythonhosted.org/packages/e4/68/474541998abbdecfd46a744536878335de89aceb9f085bff1aaf35575ceb/ijson-3.5.0-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:c061314845c08163b1784b6076ea5f075372461a32e6916f4e5f211fd4130b64", size = 131988, upload-time = "2026-02-24T03:56:56.35Z" },
{ url = "https://files.pythonhosted.org/packages/cd/32/e05ff8b72a44fe9d192f41c5dcbc35cfa87efc280cdbfe539ffaf4a7535e/ijson-3.5.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1111a1c5ac79119c5d6e836f900c1a53844b50a18af38311baa6bb61e2645aca", size = 138669, upload-time = "2026-02-24T03:56:57.555Z" },
{ url = "https://files.pythonhosted.org/packages/49/b5/955a83b031102c7a602e2c06d03aff0a0e584212f09edb94ccc754d203ac/ijson-3.5.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1e74aff8c681c24002b61b1822f9511d4c384f324f7dbc08c78538e01fdc9fcb", size = 135093, upload-time = "2026-02-24T03:56:59.267Z" },
{ url = "https://files.pythonhosted.org/packages/e8/f2/30250cfcb4d2766669b31f6732689aab2bb91de426a15a3ebe482df7ee48/ijson-3.5.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:739a7229b1b0cc5f7e2785a6e7a5fc915e850d3fed9588d0e89a09f88a417253", size = 138715, upload-time = "2026-02-24T03:57:00.491Z" },
{ url = "https://files.pythonhosted.org/packages/a2/05/785a145d7e75e04e04480d59b6323cd4b1d9013a6cd8643fa635fbc93490/ijson-3.5.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:ef88712160360cab3ca6471a4e5418243f8b267cf1fe1620879d1b5558babc71", size = 133194, upload-time = "2026-02-24T03:57:01.759Z" },
{ url = "https://files.pythonhosted.org/packages/14/eb/80d6f8a748dead4034cea0939494a67d10ccf88d6413bf6e860393139676/ijson-3.5.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:6ca0d1b6b5f8166a6248f4309497585fb8553b04bc8179a0260fad636cfdb798", size = 135588, upload-time = "2026-02-24T03:57:03.131Z" },
{ url = "https://files.pythonhosted.org/packages/aa/17/9c63c7688025f3a8c47ea717b8306649c8c7244e49e20a2be4e3515dc75c/ijson-3.5.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:1ebefbe149a6106cc848a3eaf536af51a9b5ccc9082de801389f152dba6ab755", size = 88536, upload-time = "2026-02-24T03:57:06.809Z" },
{ url = "https://files.pythonhosted.org/packages/6f/dd/e15c2400244c117b06585452ebc63ae254f5a6964f712306afd1422daae0/ijson-3.5.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:19e30d9f00f82e64de689c0b8651b9cfed879c184b139d7e1ea5030cec401c21", size = 60499, upload-time = "2026-02-24T03:57:09.155Z" },
{ url = "https://files.pythonhosted.org/packages/77/a9/bf4fe3538a0c965f16b406f180a06105b875da83f0743e36246be64ef550/ijson-3.5.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a04a33ee78a6f27b9b8528c1ca3c207b1df3b8b867a4cf2fcc4109986f35c227", size = 60330, upload-time = "2026-02-24T03:57:10.574Z" },
{ url = "https://files.pythonhosted.org/packages/31/76/6f91bdb019dd978fce1bc5ea1cd620cfc096d258126c91db2c03a20a7f34/ijson-3.5.0-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:7d48dc2984af02eb3c56edfb3f13b3f62f2f3e4fe36f058c8cfc75d93adf4fed", size = 138977, upload-time = "2026-02-24T03:57:11.932Z" },
{ url = "https://files.pythonhosted.org/packages/11/be/bbc983059e48a54b0121ee60042979faed7674490bbe7b2c41560db3f436/ijson-3.5.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f1e73a44844d9adbca9cf2c4132cd875933e83f3d4b23881fcaf82be83644c7d", size = 149785, upload-time = "2026-02-24T03:57:13.255Z" },
{ url = "https://files.pythonhosted.org/packages/6d/81/2fee58f9024a3449aee83edfa7167fb5ccd7e1af2557300e28531bb68e16/ijson-3.5.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7389a56b8562a19948bdf1d7bae3a2edc8c7f86fb59834dcb1c4c722818e645a", size = 149729, upload-time = "2026-02-24T03:57:14.191Z" },
{ url = "https://files.pythonhosted.org/packages/c7/56/f1706761fcc096c9d414b3dcd000b1e6e5c24364c21cfba429837f98ee8d/ijson-3.5.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3176f23f8ebec83f374ed0c3b4e5a0c4db7ede54c005864efebbed46da123608", size = 150697, upload-time = "2026-02-24T03:57:15.855Z" },
{ url = "https://files.pythonhosted.org/packages/d9/6e/ee0d9c875a0193b632b3e9ccd1b22a50685fb510256ad57ba483b6529f77/ijson-3.5.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:6babd88e508630c6ef86c9bebaaf13bb2fb8ec1d8f8868773a03c20253f599bc", size = 142873, upload-time = "2026-02-24T03:57:16.831Z" },
{ url = "https://files.pythonhosted.org/packages/d2/bf/f9d4399d0e6e3fd615035290a71e97c843f17f329b43638c0a01cf112d73/ijson-3.5.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:dc1b3836b174b6db2fa8319f1926fb5445abd195dc963368092103f8579cb8ed", size = 151583, upload-time = "2026-02-24T03:57:17.757Z" },
{ url = "https://files.pythonhosted.org/packages/a2/71/d67e764a712c3590627480643a3b51efcc3afa4ef3cb54ee4c989073c97e/ijson-3.5.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:e9cedc10e40dd6023c351ed8bfc7dcfce58204f15c321c3c1546b9c7b12562a4", size = 88544, upload-time = "2026-02-24T03:57:21.293Z" },
{ url = "https://files.pythonhosted.org/packages/1a/39/f1c299371686153fa3cf5c0736b96247a87a1bee1b7145e6d21f359c505a/ijson-3.5.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:3647649f782ee06c97490b43680371186651f3f69bebe64c6083ee7615d185e5", size = 60495, upload-time = "2026-02-24T03:57:22.501Z" },
{ url = "https://files.pythonhosted.org/packages/16/94/b1438e204d75e01541bebe3e668fe3e68612d210e9931ae1611062dd0a56/ijson-3.5.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:90e74be1dce05fce73451c62d1118671f78f47c9f6be3991c82b91063bf01fc9", size = 60325, upload-time = "2026-02-24T03:57:23.332Z" },
{ url = "https://files.pythonhosted.org/packages/30/e2/4aa9c116fa86cc8b0f574f3c3a47409edc1cd4face05d0e589a5a176b05d/ijson-3.5.0-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:78e9ad73e7be2dd80627504bd5cbf512348c55ce2c06e362ed7683b5220e8568", size = 138774, upload-time = "2026-02-24T03:57:24.683Z" },
{ url = "https://files.pythonhosted.org/packages/d2/d2/738b88752a70c3be1505faa4dcd7110668c2712e582a6a36488ed1e295d4/ijson-3.5.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9577449313cc94be89a4fe4b3e716c65f09cc19636d5a6b2861c4e80dddebd58", size = 149820, upload-time = "2026-02-24T03:57:26.062Z" },
{ url = "https://files.pythonhosted.org/packages/ed/df/0b3ab9f393ca8f72ea03bc896ba9fdc987e90ae08cdb51c32a4ee0c14d5e/ijson-3.5.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3e4c1178fb50aff5f5701a30a5152ead82a14e189ce0f6102fa1b5f10b2f54ff", size = 149747, upload-time = "2026-02-24T03:57:27.308Z" },
{ url = "https://files.pythonhosted.org/packages/cc/a3/b0037119f75131b78cb00acc2657b1a9d0435475f1f2c5f8f5a170b66b9c/ijson-3.5.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:0eb402ab026ffb37a918d75af2b7260fe6cfbce13232cc83728a714dd30bd81d", size = 151027, upload-time = "2026-02-24T03:57:28.522Z" },
{ url = "https://files.pythonhosted.org/packages/22/a0/cb344de1862bf09d8f769c9d25c944078c87dd59a1b496feec5ad96309a4/ijson-3.5.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:5b08ee08355f9f729612a8eb9bf69cc14f9310c3b2a487c6f1c3c65d85216ec4", size = 142996, upload-time = "2026-02-24T03:57:29.774Z" },
{ url = "https://files.pythonhosted.org/packages/ca/32/a8ffd67182e02ea61f70f62daf43ded4fa8a830a2520a851d2782460aba8/ijson-3.5.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:bda62b6d48442903e7bf56152108afb7f0f1293c2b9bef2f2c369defea76ab18", size = 152068, upload-time = "2026-02-24T03:57:30.969Z" },
{ url = "https://files.pythonhosted.org/packages/42/65/13e2492d17e19a2084523e18716dc2809159f2287fd2700c735f311e76c4/ijson-3.5.0-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:4d4b0cd676b8c842f7648c1a783448fac5cd3b98289abd83711b3e275e143524", size = 93019, upload-time = "2026-02-24T03:57:33.976Z" },
{ url = "https://files.pythonhosted.org/packages/33/92/483fc97ece0c3f1cecabf48f6a7a36e89d19369eec462faaeaa34c788992/ijson-3.5.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:252dec3680a48bb82d475e36b4ae1b3a9d7eb690b951bb98a76c5fe519e30188", size = 62714, upload-time = "2026-02-24T03:57:34.819Z" },
{ url = "https://files.pythonhosted.org/packages/4b/88/793fe020a0fe9d9eed4c285cf4a5cfdb0a935708b3bde0d72f35c794b513/ijson-3.5.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:aa1b5dca97d323931fde2501172337384c958914d81a9dac7f00f0d4bfc76bc7", size = 62460, upload-time = "2026-02-24T03:57:35.874Z" },
{ url = "https://files.pythonhosted.org/packages/51/69/f1a2690aa8d4df1f4e262b385e65a933ffdc250b091531bac9a449c19e16/ijson-3.5.0-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:7a5ec7fd86d606094bba6f6f8f87494897102fa4584ef653f3005c51a784c320", size = 199273, upload-time = "2026-02-24T03:57:37.07Z" },
{ url = "https://files.pythonhosted.org/packages/ea/a2/f1346d5299e79b988ab472dc773d5381ec2d57c23cb2f1af3ede4a810e62/ijson-3.5.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:009f41443e1521847701c6d87fa3923c0b1961be3c7e7de90947c8cb92ea7c44", size = 216884, upload-time = "2026-02-24T03:57:38.346Z" },
{ url = "https://files.pythonhosted.org/packages/28/3c/8b637e869be87799e6c2c3c275a30a546f086b1aed77e2b7f11512168c5a/ijson-3.5.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e4c3651d1f9fe2839a93fdf8fd1d5ca3a54975349894249f3b1b572bcc4bd577", size = 207306, upload-time = "2026-02-24T03:57:39.718Z" },
{ url = "https://files.pythonhosted.org/packages/7f/7c/18b1c1df6951ca056782d7580ec40cea4ff9a27a0947d92640d1cc8c4ae3/ijson-3.5.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:945b7abcfcfeae2cde17d8d900870f03536494245dda7ad4f8d056faa303256c", size = 211364, upload-time = "2026-02-24T03:57:40.953Z" },
{ url = "https://files.pythonhosted.org/packages/f3/55/e795812e82851574a9dba8a53fde045378f531ef14110c6fb55dbd23b443/ijson-3.5.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:0574b0a841ff97495c13e9d7260fbf3d85358b061f540c52a123db9dbbaa2ed6", size = 200608, upload-time = "2026-02-24T03:57:42.272Z" },
{ url = "https://files.pythonhosted.org/packages/5c/cd/013c85b4749b57a4cb4c2670014d1b32b8db4ab1a7be92ea7aeb5d7fe7b5/ijson-3.5.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:f969ffb2b89c5cdf686652d7fb66252bc72126fa54d416317411497276056a18", size = 205127, upload-time = "2026-02-24T03:57:43.286Z" },
{ url = "https://files.pythonhosted.org/packages/7a/93/0868efe753dc1df80cc405cf0c1f2527a6991643607c741bff8dcb899b3b/ijson-3.5.0-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:25a5a6b2045c90bb83061df27cfa43572afa43ba9408611d7bfe237c20a731a9", size = 89094, upload-time = "2026-02-24T03:57:46.115Z" },
{ url = "https://files.pythonhosted.org/packages/24/94/fd5a832a0df52ef5e4e740f14ac8640725d61034a1b0c561e8b5fb424706/ijson-3.5.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:8976c54c0b864bc82b951bae06567566ac77ef63b90a773a69cd73aab47f4f4f", size = 60715, upload-time = "2026-02-24T03:57:47.552Z" },
{ url = "https://files.pythonhosted.org/packages/70/79/1b9a90af5732491f9eec751ee211b86b11011e1158c555c06576d52c3919/ijson-3.5.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:859eb2038f7f1b0664df4241957694cc35e6295992d71c98659b22c69b3cbc10", size = 60638, upload-time = "2026-02-24T03:57:48.428Z" },
{ url = "https://files.pythonhosted.org/packages/23/6f/2c551ea980fe56f68710a8d5389cfbd015fc45aaafd17c3c52c346db6aa1/ijson-3.5.0-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:c911aa02991c7c0d3639b6619b93a93210ff1e7f58bf7225d613abea10adc78e", size = 140667, upload-time = "2026-02-24T03:57:49.314Z" },
{ url = "https://files.pythonhosted.org/packages/25/0e/27b887879ba6a5bc29766e3c5af4942638c952220fd63e1e442674f7883a/ijson-3.5.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:903cbdc350173605220edc19796fbea9b2203c8b3951fb7335abfa8ed37afda8", size = 149850, upload-time = "2026-02-24T03:57:50.329Z" },
{ url = "https://files.pythonhosted.org/packages/da/1e/23e10e1bc04bf31193b21e2960dce14b17dbd5d0c62204e8401c59d62c08/ijson-3.5.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a4549d96ded5b8efa71639b2160235415f6bdb8c83367615e2dbabcb72755c33", size = 149206, upload-time = "2026-02-24T03:57:51.261Z" },
{ url = "https://files.pythonhosted.org/packages/8e/90/e552f6495063b235cf7fa2c592f6597c057077195e517b842a0374fd470c/ijson-3.5.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:6b2dcf6349e6042d83f3f8c39ce84823cf7577eba25bac5aae5e39bbbbbe9c1c", size = 150438, upload-time = "2026-02-24T03:57:52.198Z" },
{ url = "https://files.pythonhosted.org/packages/5c/18/45bf8f297c41b42a1c231d261141097babd953d2c28a07be57ae4c3a1a02/ijson-3.5.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:e44af39e6f8a17e5627dcd89715d8279bf3474153ff99aae031a936e5c5572e5", size = 144369, upload-time = "2026-02-24T03:57:53.22Z" },
{ url = "https://files.pythonhosted.org/packages/9b/3a/deb9772bb2c0cead7ad64f00c3598eec9072bdf511818e70e2c512eeabbe/ijson-3.5.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:9260332304b7e7828db56d43f08fc970a3ab741bf84ff10189361ea1b60c395b", size = 151352, upload-time = "2026-02-24T03:57:54.375Z" },
{ url = "https://files.pythonhosted.org/packages/9f/d9/86f7fac35e0835faa188085ae0579e813493d5261ce056484015ad533445/ijson-3.5.0-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:2ea4b676ec98e374c1df400a47929859e4fa1239274339024df4716e802aa7e4", size = 93069, upload-time = "2026-02-24T03:57:57.849Z" },
{ url = "https://files.pythonhosted.org/packages/33/d2/e7366ed9c6e60228d35baf4404bac01a126e7775ea8ce57f560125ed190a/ijson-3.5.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:014586eec043e23c80be9a923c56c3a0920a0f1f7d17478ce7bc20ba443968ef", size = 62767, upload-time = "2026-02-24T03:57:58.758Z" },
{ url = "https://files.pythonhosted.org/packages/35/8b/3e703e8cc4b3ada79f13b28070b51d9550c578f76d1968657905857b2ddd/ijson-3.5.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:d5b8b886b0248652d437f66e7c5ac318bbdcb2c7137a7e5327a68ca00b286f5f", size = 62467, upload-time = "2026-02-24T03:58:00.261Z" },
{ url = "https://files.pythonhosted.org/packages/21/42/0c91af32c1ee8a957fdac2e051b5780756d05fd34e4b60d94a08d51bac1d/ijson-3.5.0-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:498fd46ae2349297e43acf97cdc421e711dbd7198418677259393d2acdc62d78", size = 200447, upload-time = "2026-02-24T03:58:01.591Z" },
{ url = "https://files.pythonhosted.org/packages/f9/80/796ea0e391b7e2d45c5b1b451734bba03f81c2984cf955ea5eaa6c4920ad/ijson-3.5.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:22a51b4f9b81f12793731cf226266d1de2112c3c04ba4a04117ad4e466897e05", size = 217820, upload-time = "2026-02-24T03:58:02.598Z" },
{ url = "https://files.pythonhosted.org/packages/38/14/52b6613fdda4078c62eb5b4fe3efc724ddc55a4ad524c93de51830107aa3/ijson-3.5.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9636c710dc4ac4a281baa266a64f323b4cc165cec26836af702c44328b59a515", size = 208310, upload-time = "2026-02-24T03:58:04.759Z" },
{ url = "https://files.pythonhosted.org/packages/6a/ad/8b3105a78774fd4a65e534a21d975ef3a77e189489fe3029ebcaeba5e243/ijson-3.5.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:f7168a39e8211107666d71b25693fd1b2bac0b33735ef744114c403c6cac21e1", size = 211843, upload-time = "2026-02-24T03:58:05.836Z" },
{ url = "https://files.pythonhosted.org/packages/36/ab/a2739f6072d6e1160581bc3ed32da614c8cced023dcd519d9c5fa66e0425/ijson-3.5.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:8696454245415bc617ab03b0dc3ae4c86987df5dc6a90bad378fe72c5409d89e", size = 200906, upload-time = "2026-02-24T03:58:07.788Z" },
{ url = "https://files.pythonhosted.org/packages/6d/5e/e06c2de3c3d4a9cfb655c1ad08a68fb72838d271072cdd3196576ac4431a/ijson-3.5.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:c21bfb61f71f191565885bf1bc29e0a186292d866b4880637b833848360bdc1b", size = 205495, upload-time = "2026-02-24T03:58:09.163Z" },
{ url = "https://files.pythonhosted.org/packages/d9/3b/d31ecfa63a218978617446159f3d77aab2417a5bd2885c425b176353ff78/ijson-3.5.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:d64c624da0e9d692d6eb0ff63a79656b59d76bf80773a17c5b0f835e4e8ef627", size = 57715, upload-time = "2026-02-24T03:58:24.545Z" },
{ url = "https://files.pythonhosted.org/packages/30/51/b170e646d378e8cccf9637c05edb5419b00c2c4df64b0258c3af5355608e/ijson-3.5.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:876f7df73b7e0d6474f9caa729b9cdbfc8e76de9075a4887dfd689e29e85c4ca", size = 57205, upload-time = "2026-02-24T03:58:25.681Z" },
{ url = "https://files.pythonhosted.org/packages/ef/83/44dbd0231b0a8c6c14d27473d10c4e27dfbce7d5d9a833c79e3e6c33eb40/ijson-3.5.0-pp311-pypy311_pp73-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:e7dbff2c8d9027809b0cde663df44f3210da10ea377121d42896fb6ee405dd31", size = 71229, upload-time = "2026-02-24T03:58:27.103Z" },
{ url = "https://files.pythonhosted.org/packages/c8/98/cf84048b7c6cec888826e696a31f45bee7ebcac15e532b6be1fc4c2c9608/ijson-3.5.0-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4217a1edc278660679e1197c83a1a2a2d367792bfbb2a3279577f4b59b93730d", size = 71217, upload-time = "2026-02-24T03:58:28.021Z" },
{ url = "https://files.pythonhosted.org/packages/3c/0a/e34c729a87ff67dc6540f6bcc896626158e691d433ab57db0086d73decd2/ijson-3.5.0-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:04f0fc740311388ee745ba55a12292b722d6f52000b11acbb913982ba5fbdf87", size = 68618, upload-time = "2026-02-24T03:58:28.918Z" },
]
[[package]]
name = "imagehash"
version = "4.3.2"
@@ -2751,6 +2818,7 @@ dependencies = [
{ name = "flower", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "gotenberg-client", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "httpx-oauth", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "ijson", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "imap-tools", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "jinja2", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "langdetect", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
@@ -2898,6 +2966,7 @@ requires-dist = [
{ name = "gotenberg-client", specifier = "~=0.13.1" },
{ name = "granian", extras = ["uvloop"], marker = "extra == 'webserver'", specifier = "~=2.7.0" },
{ name = "httpx-oauth", specifier = "~=0.16" },
{ name = "ijson", specifier = ">=3.2" },
{ name = "imap-tools", specifier = "~=1.11.0" },
{ name = "jinja2", specifier = "~=3.1.5" },
{ name = "langdetect", specifier = "~=1.0.9" },