Compare commits

...

7 Commits

Author SHA1 Message Date
shamoon
d9628f7255 Update tests 2026-03-09 11:24:58 -07:00
shamoon
fcbe4b200c Use effective_content for matching 2026-03-09 11:23:46 -07:00
shamoon
2b434916a0 Add an effective_content for the model 2026-03-09 11:23:32 -07:00
shamoon
d85ee29976 Fix ci gate base 2026-03-09 11:16:46 -07:00
GitHub Actions
0c7d56c5e7 Auto translate strings 2026-03-09 17:45:53 +00:00
Trenton H
0bcf904e3a Chore: Finish settings refactor (#12263) 2026-03-09 17:43:51 +00:00
Trenton H
bcc2f11152 Performance: Stream JSON during import for memory improvements (#12276)
* Perf: stream manifest parsing with ijson in document_importer

Replace bulk json.load of the full manifest (which materializes the
entire JSON array into memory) with incremental ijson streaming.
Eliminates self.manifest entirely — records are never all in memory
at once.

- Add ijson>=3.2 dependency
- New module-level iter_manifest_records() generator
- load_manifest_files() collects paths only; no parsing at load time
- check_manifest_validity() streams without accumulating records
- decrypt_secret_fields() streams each manifest to a .decrypted.json
  temp file record-by-record; temp files cleaned up after file copy
- _import_files_from_manifest() collects only document records (small
  fraction of manifest) for the tqdm progress bar

Measured on 200 docs + 200 CustomFieldInstances:
- Streaming validation: peak memory 3081 KiB -> 333 KiB (89% reduction)
- Stream-decrypt to file: peak memory 3081 KiB -> 549 KiB (82% reduction)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

* Perf: slim dict in _import_files_from_manifest, discard fields

When collecting document records for the file-copy step, extract only
the 4 keys the loop actually uses (pk + 3 exported filename keys) and
discard the full fields dict (content, checksum, tags, etc.).

Peak memory for the document-record list: 939 KiB -> 375 KiB (60% reduction).
Wall time unchanged.
2026-03-09 10:20:48 -07:00
21 changed files with 1384 additions and 976 deletions

View File

@@ -41,7 +41,7 @@ jobs:
if [[ "${{ github.event_name }}" == "pull_request" ]]; then
echo "base=${{ github.event.pull_request.base.sha }}" >> "$GITHUB_OUTPUT"
elif [[ "${{ github.event.created }}" == "true" ]]; then
echo "base=origin/${{ github.event.repository.default_branch }}" >> "$GITHUB_OUTPUT"
echo "base=${{ github.event.repository.default_branch }}" >> "$GITHUB_OUTPUT"
else
echo "base=${{ github.event.before }}" >> "$GITHUB_OUTPUT"
fi

View File

@@ -43,7 +43,7 @@ jobs:
if [[ "${{ github.event_name }}" == "pull_request" ]]; then
echo "base=${{ github.event.pull_request.base.sha }}" >> "$GITHUB_OUTPUT"
elif [[ "${{ github.event.created }}" == "true" ]]; then
echo "base=origin/${{ github.event.repository.default_branch }}" >> "$GITHUB_OUTPUT"
echo "base=${{ github.event.repository.default_branch }}" >> "$GITHUB_OUTPUT"
else
echo "base=${{ github.event.before }}" >> "$GITHUB_OUTPUT"
fi

View File

@@ -38,7 +38,7 @@ jobs:
if [[ "${{ github.event_name }}" == "pull_request" ]]; then
echo "base=${{ github.event.pull_request.base.sha }}" >> "$GITHUB_OUTPUT"
elif [[ "${{ github.event.created }}" == "true" ]]; then
echo "base=origin/${{ github.event.repository.default_branch }}" >> "$GITHUB_OUTPUT"
echo "base=${{ github.event.repository.default_branch }}" >> "$GITHUB_OUTPUT"
else
echo "base=${{ github.event.before }}" >> "$GITHUB_OUTPUT"
fi

View File

@@ -49,6 +49,7 @@ dependencies = [
"flower~=2.0.1",
"gotenberg-client~=0.13.1",
"httpx-oauth~=0.16",
"ijson>=3.2",
"imap-tools~=1.11.0",
"jinja2~=3.1.5",
"langdetect~=1.0.9",

View File

@@ -8,6 +8,7 @@ from pathlib import Path
from zipfile import ZipFile
from zipfile import is_zipfile
import ijson
from django.conf import settings
from django.contrib.auth.models import Permission
from django.contrib.auth.models import User
@@ -46,6 +47,15 @@ if settings.AUDIT_LOG_ENABLED:
from auditlog.registry import auditlog
def iter_manifest_records(path: Path) -> Generator[dict, None, None]:
"""Yield records one at a time from a manifest JSON array via ijson."""
try:
with path.open("rb") as f:
yield from ijson.items(f, "item")
except ijson.JSONError as e:
raise CommandError(f"Failed to parse manifest file {path}: {e}") from e
@contextmanager
def disable_signal(sig, receiver, sender, *, weak: bool | None = None) -> Generator:
try:
@@ -143,14 +153,9 @@ class Command(CryptMixin, PaperlessCommand):
Loads manifest data from the various JSON files for parsing and loading the database
"""
main_manifest_path: Path = self.source / "manifest.json"
with main_manifest_path.open() as infile:
self.manifest = json.load(infile)
self.manifest_paths.append(main_manifest_path)
for file in Path(self.source).glob("**/*-manifest.json"):
with file.open() as infile:
self.manifest += json.load(infile)
self.manifest_paths.append(file)
def load_metadata(self) -> None:
@@ -231,7 +236,6 @@ class Command(CryptMixin, PaperlessCommand):
self.version: str | None = None
self.salt: str | None = None
self.manifest_paths = []
self.manifest = []
# Create a temporary directory for extracting a zip file into it, even if supplied source is no zip file to keep code cleaner.
with tempfile.TemporaryDirectory() as tmp_dir:
@@ -291,6 +295,9 @@ class Command(CryptMixin, PaperlessCommand):
else:
self.stdout.write(self.style.NOTICE("Data only import completed"))
for tmp in getattr(self, "_decrypted_tmp_paths", []):
tmp.unlink(missing_ok=True)
self.stdout.write("Updating search index...")
call_command(
"document_index",
@@ -343,11 +350,12 @@ class Command(CryptMixin, PaperlessCommand):
) from e
self.stdout.write("Checking the manifest")
for record in self.manifest:
# Only check if the document files exist if this is not data only
# We don't care about documents for a data only import
if not self.data_only and record["model"] == "documents.document":
check_document_validity(record)
for manifest_path in self.manifest_paths:
for record in iter_manifest_records(manifest_path):
# Only check if the document files exist if this is not data only
# We don't care about documents for a data only import
if not self.data_only and record["model"] == "documents.document":
check_document_validity(record)
def _import_files_from_manifest(self) -> None:
settings.ORIGINALS_DIR.mkdir(parents=True, exist_ok=True)
@@ -356,23 +364,31 @@ class Command(CryptMixin, PaperlessCommand):
self.stdout.write("Copy files into paperless...")
manifest_documents = list(
filter(lambda r: r["model"] == "documents.document", self.manifest),
)
document_records = [
{
"pk": record["pk"],
EXPORTER_FILE_NAME: record[EXPORTER_FILE_NAME],
EXPORTER_THUMBNAIL_NAME: record.get(EXPORTER_THUMBNAIL_NAME),
EXPORTER_ARCHIVE_NAME: record.get(EXPORTER_ARCHIVE_NAME),
}
for manifest_path in self.manifest_paths
for record in iter_manifest_records(manifest_path)
if record["model"] == "documents.document"
]
for record in self.track(manifest_documents, description="Copying files..."):
for record in self.track(document_records, description="Copying files..."):
document = Document.objects.get(pk=record["pk"])
doc_file = record[EXPORTER_FILE_NAME]
document_path = self.source / doc_file
if EXPORTER_THUMBNAIL_NAME in record:
if record[EXPORTER_THUMBNAIL_NAME]:
thumb_file = record[EXPORTER_THUMBNAIL_NAME]
thumbnail_path = (self.source / thumb_file).resolve()
else:
thumbnail_path = None
if EXPORTER_ARCHIVE_NAME in record:
if record[EXPORTER_ARCHIVE_NAME]:
archive_file = record[EXPORTER_ARCHIVE_NAME]
archive_path = self.source / archive_file
else:
@@ -413,33 +429,43 @@ class Command(CryptMixin, PaperlessCommand):
document.save()
def _decrypt_record_if_needed(self, record: dict) -> dict:
fields = self.CRYPT_FIELDS_BY_MODEL.get(record.get("model", ""))
if fields:
for field in fields:
if record["fields"].get(field):
record["fields"][field] = self.decrypt_string(
value=record["fields"][field],
)
return record
def decrypt_secret_fields(self) -> None:
"""
The converse decryption of some fields out of the export before importing to database
The converse decryption of some fields out of the export before importing to database.
Streams records from each manifest path and writes decrypted content to a temp file.
"""
if self.passphrase:
# Salt has been loaded from metadata.json at this point, so it cannot be None
self.setup_crypto(passphrase=self.passphrase, salt=self.salt)
had_at_least_one_record = False
for crypt_config in self.CRYPT_FIELDS:
importer_model: str = crypt_config["model_name"]
crypt_fields: str = crypt_config["fields"]
for record in filter(
lambda x: x["model"] == importer_model,
self.manifest,
):
had_at_least_one_record = True
for field in crypt_fields:
if record["fields"][field]:
record["fields"][field] = self.decrypt_string(
value=record["fields"][field],
)
if had_at_least_one_record:
# It's annoying, but the DB is loaded from the JSON directly
# Maybe could change that in the future?
(self.source / "manifest.json").write_text(
json.dumps(self.manifest, indent=2, ensure_ascii=False),
)
if not self.passphrase:
return
# Salt has been loaded from metadata.json at this point, so it cannot be None
self.setup_crypto(passphrase=self.passphrase, salt=self.salt)
self._decrypted_tmp_paths: list[Path] = []
new_paths: list[Path] = []
for manifest_path in self.manifest_paths:
tmp = manifest_path.with_name(manifest_path.stem + ".decrypted.json")
with tmp.open("w", encoding="utf-8") as out:
out.write("[\n")
first = True
for record in iter_manifest_records(manifest_path):
if not first:
out.write(",\n")
json.dump(
self._decrypt_record_if_needed(record),
out,
indent=2,
ensure_ascii=False,
)
first = False
out.write("\n]\n")
self._decrypted_tmp_paths.append(tmp)
new_paths.append(tmp)
self.manifest_paths = new_paths

View File

@@ -169,7 +169,7 @@ def match_storage_paths(document: Document, classifier: DocumentClassifier, user
def matches(matching_model: MatchingModel, document: Document):
search_flags = 0
document_content = document.content
document_content = document.get_effective_content() or ""
# Check that match is not empty
if not matching_model.match.strip():

View File

@@ -361,6 +361,42 @@ class Document(SoftDeleteModel, ModelWithOwner): # type: ignore[django-manager-
res += f" {self.title}"
return res
def get_effective_content(self) -> str | None:
"""
Returns the effective content for the document.
For root documents, this is the latest version's content when available.
For version documents, this is always the document's own content.
If the queryset already annotated ``effective_content``, that value is used.
"""
if hasattr(self, "effective_content"):
return getattr(self, "effective_content")
if self.root_document_id is not None or self.pk is None:
return self.content
prefetched_cache = getattr(self, "_prefetched_objects_cache", None)
prefetched_versions = (
prefetched_cache.get("versions")
if isinstance(prefetched_cache, dict)
else None
)
if prefetched_versions:
latest_prefetched = max(prefetched_versions, key=lambda doc: doc.id)
return latest_prefetched.content
latest_version_content = (
Document.objects.filter(root_document=self)
.order_by("-id")
.values_list("content", flat=True)
.first()
)
return (
latest_version_content
if latest_version_content is not None
else self.content
)
@property
def suggestion_content(self):
"""
@@ -373,15 +409,21 @@ class Document(SoftDeleteModel, ModelWithOwner): # type: ignore[django-manager-
This improves processing speed for large documents while keeping
enough context for accurate suggestions.
"""
if not self.content or len(self.content) <= 1200000:
return self.content
effective_content = self.get_effective_content()
if not effective_content or len(effective_content) <= 1200000:
return effective_content
else:
# Use 80% from the start and 20% from the end
# to preserve both opening and closing context.
head_len = 800000
tail_len = 200000
return " ".join((self.content[:head_len], self.content[-tail_len:]))
return " ".join(
(
effective_content[:head_len],
effective_content[-tail_len:],
),
)
@property
def source_path(self) -> Path:

View File

@@ -156,6 +156,46 @@ class TestDocument(TestCase):
)
self.assertEqual(doc.get_public_filename(), "2020-12-25 test")
def test_suggestion_content_uses_latest_version_content_for_root_documents(
self,
) -> None:
root = Document.objects.create(
title="root",
checksum="root",
mime_type="application/pdf",
content="outdated root content",
)
version = Document.objects.create(
title="v1",
checksum="v1",
mime_type="application/pdf",
root_document=root,
content="latest version content",
)
self.assertEqual(root.suggestion_content, version.content)
def test_content_length_is_per_document_row_for_versions(self) -> None:
root = Document.objects.create(
title="root",
checksum="root",
mime_type="application/pdf",
content="abc",
)
version = Document.objects.create(
title="v1",
checksum="v1",
mime_type="application/pdf",
root_document=root,
content="abcdefgh",
)
root.refresh_from_db()
version.refresh_from_db()
self.assertEqual(root.content_length, 3)
self.assertEqual(version.content_length, 8)
def test_suggestion_content() -> None:
"""

View File

@@ -119,15 +119,22 @@ class TestCommandImport(
# No read permissions
original_path.chmod(0o222)
manifest_path = Path(temp_dir) / "manifest.json"
manifest_path.write_text(
json.dumps(
[
{
"model": "documents.document",
EXPORTER_FILE_NAME: "original.pdf",
EXPORTER_ARCHIVE_NAME: "archive.pdf",
},
],
),
)
cmd = Command()
cmd.source = Path(temp_dir)
cmd.manifest = [
{
"model": "documents.document",
EXPORTER_FILE_NAME: "original.pdf",
EXPORTER_ARCHIVE_NAME: "archive.pdf",
},
]
cmd.manifest_paths = [manifest_path]
cmd.data_only = False
with self.assertRaises(CommandError) as cm:
cmd.check_manifest_validity()
@@ -296,7 +303,7 @@ class TestCommandImport(
(self.dirs.scratch_dir / "manifest.json").touch()
# We're not building a manifest, so it fails, but this test doesn't care
with self.assertRaises(json.decoder.JSONDecodeError):
with self.assertRaises(CommandError):
call_command(
"document_importer",
"--no-progress-bar",
@@ -325,7 +332,7 @@ class TestCommandImport(
)
# We're not building a manifest, so it fails, but this test doesn't care
with self.assertRaises(json.decoder.JSONDecodeError):
with self.assertRaises(CommandError):
call_command(
"document_importer",
"--no-progress-bar",

View File

@@ -48,6 +48,52 @@ class _TestMatchingBase(TestCase):
class TestMatching(_TestMatchingBase):
def test_matches_uses_latest_version_content_for_root_documents(self) -> None:
root = Document.objects.create(
title="root",
checksum="root",
mime_type="application/pdf",
content="root content without token",
)
Document.objects.create(
title="v1",
checksum="v1",
mime_type="application/pdf",
root_document=root,
content="latest version contains keyword",
)
tag = Tag.objects.create(
name="tag",
match="keyword",
matching_algorithm=Tag.MATCH_ANY,
)
self.assertTrue(matching.matches(tag, root))
def test_matches_does_not_fall_back_to_root_content_when_version_exists(
self,
) -> None:
root = Document.objects.create(
title="root",
checksum="root",
mime_type="application/pdf",
content="root contains keyword",
)
Document.objects.create(
title="v1",
checksum="v1",
mime_type="application/pdf",
root_document=root,
content="latest version without token",
)
tag = Tag.objects.create(
name="tag",
match="keyword",
matching_algorithm=Tag.MATCH_ANY,
)
self.assertFalse(matching.matches(tag, root))
def test_match_none(self) -> None:
self._test_matching(
"",

View File

@@ -2,7 +2,7 @@ msgid ""
msgstr ""
"Project-Id-Version: paperless-ngx\n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2026-03-09 01:51+0000\n"
"POT-Creation-Date: 2026-03-09 17:44+0000\n"
"PO-Revision-Date: 2022-02-17 04:17\n"
"Last-Translator: \n"
"Language-Team: English\n"
@@ -1856,151 +1856,151 @@ msgstr ""
msgid "paperless application settings"
msgstr ""
#: paperless/settings/__init__.py:752
#: paperless/settings/__init__.py:521
msgid "English (US)"
msgstr ""
#: paperless/settings/__init__.py:753
#: paperless/settings/__init__.py:522
msgid "Arabic"
msgstr ""
#: paperless/settings/__init__.py:754
#: paperless/settings/__init__.py:523
msgid "Afrikaans"
msgstr ""
#: paperless/settings/__init__.py:755
#: paperless/settings/__init__.py:524
msgid "Belarusian"
msgstr ""
#: paperless/settings/__init__.py:756
#: paperless/settings/__init__.py:525
msgid "Bulgarian"
msgstr ""
#: paperless/settings/__init__.py:757
#: paperless/settings/__init__.py:526
msgid "Catalan"
msgstr ""
#: paperless/settings/__init__.py:758
#: paperless/settings/__init__.py:527
msgid "Czech"
msgstr ""
#: paperless/settings/__init__.py:759
#: paperless/settings/__init__.py:528
msgid "Danish"
msgstr ""
#: paperless/settings/__init__.py:760
#: paperless/settings/__init__.py:529
msgid "German"
msgstr ""
#: paperless/settings/__init__.py:761
#: paperless/settings/__init__.py:530
msgid "Greek"
msgstr ""
#: paperless/settings/__init__.py:762
#: paperless/settings/__init__.py:531
msgid "English (GB)"
msgstr ""
#: paperless/settings/__init__.py:763
#: paperless/settings/__init__.py:532
msgid "Spanish"
msgstr ""
#: paperless/settings/__init__.py:764
#: paperless/settings/__init__.py:533
msgid "Persian"
msgstr ""
#: paperless/settings/__init__.py:765
#: paperless/settings/__init__.py:534
msgid "Finnish"
msgstr ""
#: paperless/settings/__init__.py:766
#: paperless/settings/__init__.py:535
msgid "French"
msgstr ""
#: paperless/settings/__init__.py:767
#: paperless/settings/__init__.py:536
msgid "Hungarian"
msgstr ""
#: paperless/settings/__init__.py:768
#: paperless/settings/__init__.py:537
msgid "Indonesian"
msgstr ""
#: paperless/settings/__init__.py:769
#: paperless/settings/__init__.py:538
msgid "Italian"
msgstr ""
#: paperless/settings/__init__.py:770
#: paperless/settings/__init__.py:539
msgid "Japanese"
msgstr ""
#: paperless/settings/__init__.py:771
#: paperless/settings/__init__.py:540
msgid "Korean"
msgstr ""
#: paperless/settings/__init__.py:772
#: paperless/settings/__init__.py:541
msgid "Luxembourgish"
msgstr ""
#: paperless/settings/__init__.py:773
#: paperless/settings/__init__.py:542
msgid "Norwegian"
msgstr ""
#: paperless/settings/__init__.py:774
#: paperless/settings/__init__.py:543
msgid "Dutch"
msgstr ""
#: paperless/settings/__init__.py:775
#: paperless/settings/__init__.py:544
msgid "Polish"
msgstr ""
#: paperless/settings/__init__.py:776
#: paperless/settings/__init__.py:545
msgid "Portuguese (Brazil)"
msgstr ""
#: paperless/settings/__init__.py:777
#: paperless/settings/__init__.py:546
msgid "Portuguese"
msgstr ""
#: paperless/settings/__init__.py:778
#: paperless/settings/__init__.py:547
msgid "Romanian"
msgstr ""
#: paperless/settings/__init__.py:779
#: paperless/settings/__init__.py:548
msgid "Russian"
msgstr ""
#: paperless/settings/__init__.py:780
#: paperless/settings/__init__.py:549
msgid "Slovak"
msgstr ""
#: paperless/settings/__init__.py:781
#: paperless/settings/__init__.py:550
msgid "Slovenian"
msgstr ""
#: paperless/settings/__init__.py:782
#: paperless/settings/__init__.py:551
msgid "Serbian"
msgstr ""
#: paperless/settings/__init__.py:783
#: paperless/settings/__init__.py:552
msgid "Swedish"
msgstr ""
#: paperless/settings/__init__.py:784
#: paperless/settings/__init__.py:553
msgid "Turkish"
msgstr ""
#: paperless/settings/__init__.py:785
#: paperless/settings/__init__.py:554
msgid "Ukrainian"
msgstr ""
#: paperless/settings/__init__.py:786
#: paperless/settings/__init__.py:555
msgid "Vietnamese"
msgstr ""
#: paperless/settings/__init__.py:787
#: paperless/settings/__init__.py:556
msgid "Chinese Simplified"
msgstr ""
#: paperless/settings/__init__.py:788
#: paperless/settings/__init__.py:557
msgid "Chinese Traditional"
msgstr ""

View File

@@ -6,18 +6,25 @@ import math
import multiprocessing
import os
import tempfile
from os import PathLike
from pathlib import Path
from typing import Final
from urllib.parse import urlparse
from celery.schedules import crontab
from compression_middleware.middleware import CompressionMiddleware
from dateparser.languages.loader import LocaleDataLoader
from django.utils.translation import gettext_lazy as _
from dotenv import load_dotenv
from paperless.settings.custom import parse_beat_schedule
from paperless.settings.custom import parse_dateparser_languages
from paperless.settings.custom import parse_db_settings
from paperless.settings.custom import parse_hosting_settings
from paperless.settings.custom import parse_ignore_dates
from paperless.settings.custom import parse_redis_url
from paperless.settings.parsers import get_bool_from_env
from paperless.settings.parsers import get_float_from_env
from paperless.settings.parsers import get_int_from_env
from paperless.settings.parsers import get_list_from_env
from paperless.settings.parsers import get_path_from_env
logger = logging.getLogger("paperless.settings")
@@ -45,239 +52,8 @@ for path in [
os.environ["OMP_THREAD_LIMIT"] = "1"
def __get_boolean(key: str, default: str = "NO") -> bool:
"""
Return a boolean value based on whatever the user has supplied in the
environment based on whether the value "looks like" it's True or not.
"""
return bool(os.getenv(key, default).lower() in ("yes", "y", "1", "t", "true"))
def __get_int(key: str, default: int) -> int:
"""
Return an integer value based on the environment variable or a default
"""
return int(os.getenv(key, default))
def __get_optional_int(key: str) -> int | None:
"""
Returns None if the environment key is not present, otherwise an integer
"""
if key in os.environ:
return __get_int(key, -1) # pragma: no cover
return None
def __get_float(key: str, default: float) -> float:
"""
Return an integer value based on the environment variable or a default
"""
return float(os.getenv(key, default))
def __get_path(
key: str,
default: PathLike | str,
) -> Path:
"""
Return a normalized, absolute path based on the environment variable or a default,
if provided
"""
if key in os.environ:
return Path(os.environ[key]).resolve()
return Path(default).resolve()
def __get_optional_path(key: str) -> Path | None:
"""
Returns None if the environment key is not present, otherwise a fully resolved Path
"""
if key in os.environ:
return __get_path(key, "")
return None
def __get_list(
key: str,
default: list[str] | None = None,
sep: str = ",",
) -> list[str]:
"""
Return a list of elements from the environment, as separated by the given
string, or the default if the key does not exist
"""
if key in os.environ:
return list(filter(None, os.environ[key].split(sep)))
elif default is not None:
return default
else:
return []
def _parse_redis_url(env_redis: str | None) -> tuple[str, str]:
"""
Gets the Redis information from the environment or a default and handles
converting from incompatible django_channels and celery formats.
Returns a tuple of (celery_url, channels_url)
"""
# Not set, return a compatible default
if env_redis is None:
return ("redis://localhost:6379", "redis://localhost:6379")
if "unix" in env_redis.lower():
# channels_redis socket format, looks like:
# "unix:///path/to/redis.sock"
_, path = env_redis.split(":", 1)
# Optionally setting a db number
if "?db=" in env_redis:
path, number = path.split("?db=")
return (f"redis+socket:{path}?virtual_host={number}", env_redis)
else:
return (f"redis+socket:{path}", env_redis)
elif "+socket" in env_redis.lower():
# celery socket style, looks like:
# "redis+socket:///path/to/redis.sock"
_, path = env_redis.split(":", 1)
if "?virtual_host=" in env_redis:
# Virtual host (aka db number)
path, number = path.split("?virtual_host=")
return (env_redis, f"unix:{path}?db={number}")
else:
return (env_redis, f"unix:{path}")
# Not a socket
return (env_redis, env_redis)
def _parse_beat_schedule() -> dict:
"""
Configures the scheduled tasks, according to default or
environment variables. Task expiration is configured so the task will
expire (and not run), shortly before the default frequency will put another
of the same task into the queue
https://docs.celeryq.dev/en/stable/userguide/periodic-tasks.html#beat-entries
https://docs.celeryq.dev/en/latest/userguide/calling.html#expiration
"""
schedule = {}
tasks = [
{
"name": "Check all e-mail accounts",
"env_key": "PAPERLESS_EMAIL_TASK_CRON",
# Default every ten minutes
"env_default": "*/10 * * * *",
"task": "paperless_mail.tasks.process_mail_accounts",
"options": {
# 1 minute before default schedule sends again
"expires": 9.0 * 60.0,
},
},
{
"name": "Train the classifier",
"env_key": "PAPERLESS_TRAIN_TASK_CRON",
# Default hourly at 5 minutes past the hour
"env_default": "5 */1 * * *",
"task": "documents.tasks.train_classifier",
"options": {
# 1 minute before default schedule sends again
"expires": 59.0 * 60.0,
},
},
{
"name": "Optimize the index",
"env_key": "PAPERLESS_INDEX_TASK_CRON",
# Default daily at midnight
"env_default": "0 0 * * *",
"task": "documents.tasks.index_optimize",
"options": {
# 1 hour before default schedule sends again
"expires": 23.0 * 60.0 * 60.0,
},
},
{
"name": "Perform sanity check",
"env_key": "PAPERLESS_SANITY_TASK_CRON",
# Default Sunday at 00:30
"env_default": "30 0 * * sun",
"task": "documents.tasks.sanity_check",
"options": {
# 1 hour before default schedule sends again
"expires": ((7.0 * 24.0) - 1.0) * 60.0 * 60.0,
},
},
{
"name": "Empty trash",
"env_key": "PAPERLESS_EMPTY_TRASH_TASK_CRON",
# Default daily at 01:00
"env_default": "0 1 * * *",
"task": "documents.tasks.empty_trash",
"options": {
# 1 hour before default schedule sends again
"expires": 23.0 * 60.0 * 60.0,
},
},
{
"name": "Check and run scheduled workflows",
"env_key": "PAPERLESS_WORKFLOW_SCHEDULED_TASK_CRON",
# Default hourly at 5 minutes past the hour
"env_default": "5 */1 * * *",
"task": "documents.tasks.check_scheduled_workflows",
"options": {
# 1 minute before default schedule sends again
"expires": 59.0 * 60.0,
},
},
{
"name": "Rebuild LLM index",
"env_key": "PAPERLESS_LLM_INDEX_TASK_CRON",
# Default daily at 02:10
"env_default": "10 2 * * *",
"task": "documents.tasks.llmindex_index",
"options": {
# 1 hour before default schedule sends again
"expires": 23.0 * 60.0 * 60.0,
},
},
{
"name": "Cleanup expired share link bundles",
"env_key": "PAPERLESS_SHARE_LINK_BUNDLE_CLEANUP_CRON",
# Default daily at 02:00
"env_default": "0 2 * * *",
"task": "documents.tasks.cleanup_expired_share_link_bundles",
"options": {
# 1 hour before default schedule sends again
"expires": 23.0 * 60.0 * 60.0,
},
},
]
for task in tasks:
# Either get the environment setting or use the default
value = os.getenv(task["env_key"], task["env_default"])
# Don't add disabled tasks to the schedule
if value == "disable":
continue
# I find https://crontab.guru/ super helpful
# crontab(5) format
# - five time-and-date fields
# - separated by at least one blank
minute, hour, day_month, month, day_week = value.split(" ")
schedule[task["name"]] = {
"task": task["task"],
"schedule": crontab(minute, hour, day_week, day_month, month),
"options": task["options"],
}
return schedule
# NEVER RUN WITH DEBUG IN PRODUCTION.
DEBUG = __get_boolean("PAPERLESS_DEBUG", "NO")
DEBUG = get_bool_from_env("PAPERLESS_DEBUG", "NO")
###############################################################################
@@ -286,21 +62,21 @@ DEBUG = __get_boolean("PAPERLESS_DEBUG", "NO")
BASE_DIR: Path = Path(__file__).resolve().parent.parent.parent
STATIC_ROOT = __get_path("PAPERLESS_STATICDIR", BASE_DIR.parent / "static")
STATIC_ROOT = get_path_from_env("PAPERLESS_STATICDIR", BASE_DIR.parent / "static")
MEDIA_ROOT = __get_path("PAPERLESS_MEDIA_ROOT", BASE_DIR.parent / "media")
MEDIA_ROOT = get_path_from_env("PAPERLESS_MEDIA_ROOT", BASE_DIR.parent / "media")
ORIGINALS_DIR = MEDIA_ROOT / "documents" / "originals"
ARCHIVE_DIR = MEDIA_ROOT / "documents" / "archive"
THUMBNAIL_DIR = MEDIA_ROOT / "documents" / "thumbnails"
SHARE_LINK_BUNDLE_DIR = MEDIA_ROOT / "documents" / "share_link_bundles"
DATA_DIR = __get_path("PAPERLESS_DATA_DIR", BASE_DIR.parent / "data")
DATA_DIR = get_path_from_env("PAPERLESS_DATA_DIR", BASE_DIR.parent / "data")
NLTK_DIR = __get_path("PAPERLESS_NLTK_DIR", "/usr/share/nltk_data")
NLTK_DIR = get_path_from_env("PAPERLESS_NLTK_DIR", "/usr/share/nltk_data")
# Check deprecated setting first
EMPTY_TRASH_DIR = (
__get_path("PAPERLESS_TRASH_DIR", os.getenv("PAPERLESS_EMPTY_TRASH_DIR"))
get_path_from_env("PAPERLESS_TRASH_DIR", os.getenv("PAPERLESS_EMPTY_TRASH_DIR"))
if os.getenv("PAPERLESS_TRASH_DIR") or os.getenv("PAPERLESS_EMPTY_TRASH_DIR")
else None
)
@@ -309,21 +85,21 @@ EMPTY_TRASH_DIR = (
# threads.
MEDIA_LOCK = MEDIA_ROOT / "media.lock"
INDEX_DIR = DATA_DIR / "index"
MODEL_FILE = __get_path(
MODEL_FILE = get_path_from_env(
"PAPERLESS_MODEL_FILE",
DATA_DIR / "classification_model.pickle",
)
LLM_INDEX_DIR = DATA_DIR / "llm_index"
LOGGING_DIR = __get_path("PAPERLESS_LOGGING_DIR", DATA_DIR / "log")
LOGGING_DIR = get_path_from_env("PAPERLESS_LOGGING_DIR", DATA_DIR / "log")
CONSUMPTION_DIR = __get_path(
CONSUMPTION_DIR = get_path_from_env(
"PAPERLESS_CONSUMPTION_DIR",
BASE_DIR.parent / "consume",
)
# This will be created if it doesn't exist
SCRATCH_DIR = __get_path(
SCRATCH_DIR = get_path_from_env(
"PAPERLESS_SCRATCH_DIR",
Path(tempfile.gettempdir()) / "paperless",
)
@@ -332,7 +108,7 @@ SCRATCH_DIR = __get_path(
# Application Definition #
###############################################################################
env_apps = __get_list("PAPERLESS_APPS")
env_apps = get_list_from_env("PAPERLESS_APPS")
INSTALLED_APPS = [
"whitenoise.runserver_nostatic",
@@ -405,7 +181,7 @@ MIDDLEWARE = [
]
# Optional to enable compression
if __get_boolean("PAPERLESS_ENABLE_COMPRESSION", "yes"): # pragma: no cover
if get_bool_from_env("PAPERLESS_ENABLE_COMPRESSION", "yes"): # pragma: no cover
MIDDLEWARE.insert(0, "compression_middleware.middleware.CompressionMiddleware")
# Workaround to not compress streaming responses (e.g. chat).
@@ -424,20 +200,8 @@ CompressionMiddleware.process_response = patched_process_response
ROOT_URLCONF = "paperless.urls"
def _parse_base_paths() -> tuple[str, str, str, str, str]:
script_name = os.getenv("PAPERLESS_FORCE_SCRIPT_NAME")
base_url = (script_name or "") + "/"
login_url = base_url + "accounts/login/"
login_redirect_url = base_url + "dashboard"
logout_redirect_url = os.getenv(
"PAPERLESS_LOGOUT_REDIRECT_URL",
login_url + "?loggedout=1",
)
return script_name, base_url, login_url, login_redirect_url, logout_redirect_url
FORCE_SCRIPT_NAME, BASE_URL, LOGIN_URL, LOGIN_REDIRECT_URL, LOGOUT_REDIRECT_URL = (
_parse_base_paths()
parse_hosting_settings()
)
# DRF Spectacular settings
@@ -471,7 +235,7 @@ STORAGES = {
"default": {"BACKEND": "django.core.files.storage.FileSystemStorage"},
}
_CELERY_REDIS_URL, _CHANNELS_REDIS_URL = _parse_redis_url(
_CELERY_REDIS_URL, _CHANNELS_REDIS_URL = parse_redis_url(
os.getenv("PAPERLESS_REDIS", None),
)
_REDIS_KEY_PREFIX = os.getenv("PAPERLESS_REDIS_PREFIX", "")
@@ -520,8 +284,8 @@ EMAIL_PORT: Final[int] = int(os.getenv("PAPERLESS_EMAIL_PORT", 25))
EMAIL_HOST_USER: Final[str] = os.getenv("PAPERLESS_EMAIL_HOST_USER", "")
EMAIL_HOST_PASSWORD: Final[str] = os.getenv("PAPERLESS_EMAIL_HOST_PASSWORD", "")
DEFAULT_FROM_EMAIL: Final[str] = os.getenv("PAPERLESS_EMAIL_FROM", EMAIL_HOST_USER)
EMAIL_USE_TLS: Final[bool] = __get_boolean("PAPERLESS_EMAIL_USE_TLS")
EMAIL_USE_SSL: Final[bool] = __get_boolean("PAPERLESS_EMAIL_USE_SSL")
EMAIL_USE_TLS: Final[bool] = get_bool_from_env("PAPERLESS_EMAIL_USE_TLS")
EMAIL_USE_SSL: Final[bool] = get_bool_from_env("PAPERLESS_EMAIL_USE_SSL")
EMAIL_SUBJECT_PREFIX: Final[str] = "[Paperless-ngx] "
EMAIL_TIMEOUT = 30.0
EMAIL_ENABLED = EMAIL_HOST != "localhost" or EMAIL_HOST_USER != ""
@@ -546,20 +310,22 @@ ACCOUNT_DEFAULT_HTTP_PROTOCOL = os.getenv(
)
ACCOUNT_ADAPTER = "paperless.adapter.CustomAccountAdapter"
ACCOUNT_ALLOW_SIGNUPS = __get_boolean("PAPERLESS_ACCOUNT_ALLOW_SIGNUPS")
ACCOUNT_DEFAULT_GROUPS = __get_list("PAPERLESS_ACCOUNT_DEFAULT_GROUPS")
ACCOUNT_ALLOW_SIGNUPS = get_bool_from_env("PAPERLESS_ACCOUNT_ALLOW_SIGNUPS")
ACCOUNT_DEFAULT_GROUPS = get_list_from_env("PAPERLESS_ACCOUNT_DEFAULT_GROUPS")
SOCIALACCOUNT_ADAPTER = "paperless.adapter.CustomSocialAccountAdapter"
SOCIALACCOUNT_ALLOW_SIGNUPS = __get_boolean(
SOCIALACCOUNT_ALLOW_SIGNUPS = get_bool_from_env(
"PAPERLESS_SOCIALACCOUNT_ALLOW_SIGNUPS",
"yes",
)
SOCIALACCOUNT_AUTO_SIGNUP = __get_boolean("PAPERLESS_SOCIAL_AUTO_SIGNUP")
SOCIALACCOUNT_AUTO_SIGNUP = get_bool_from_env("PAPERLESS_SOCIAL_AUTO_SIGNUP")
SOCIALACCOUNT_PROVIDERS = json.loads(
os.getenv("PAPERLESS_SOCIALACCOUNT_PROVIDERS", "{}"),
)
SOCIAL_ACCOUNT_DEFAULT_GROUPS = __get_list("PAPERLESS_SOCIAL_ACCOUNT_DEFAULT_GROUPS")
SOCIAL_ACCOUNT_SYNC_GROUPS = __get_boolean("PAPERLESS_SOCIAL_ACCOUNT_SYNC_GROUPS")
SOCIAL_ACCOUNT_DEFAULT_GROUPS = get_list_from_env(
"PAPERLESS_SOCIAL_ACCOUNT_DEFAULT_GROUPS",
)
SOCIAL_ACCOUNT_SYNC_GROUPS = get_bool_from_env("PAPERLESS_SOCIAL_ACCOUNT_SYNC_GROUPS")
SOCIAL_ACCOUNT_SYNC_GROUPS_CLAIM: Final[str] = os.getenv(
"PAPERLESS_SOCIAL_ACCOUNT_SYNC_GROUPS_CLAIM",
"groups",
@@ -571,8 +337,8 @@ MFA_TOTP_ISSUER = "Paperless-ngx"
ACCOUNT_EMAIL_SUBJECT_PREFIX = "[Paperless-ngx] "
DISABLE_REGULAR_LOGIN = __get_boolean("PAPERLESS_DISABLE_REGULAR_LOGIN")
REDIRECT_LOGIN_TO_SSO = __get_boolean("PAPERLESS_REDIRECT_LOGIN_TO_SSO")
DISABLE_REGULAR_LOGIN = get_bool_from_env("PAPERLESS_DISABLE_REGULAR_LOGIN")
REDIRECT_LOGIN_TO_SSO = get_bool_from_env("PAPERLESS_REDIRECT_LOGIN_TO_SSO")
AUTO_LOGIN_USERNAME = os.getenv("PAPERLESS_AUTO_LOGIN_USERNAME")
@@ -585,12 +351,15 @@ ACCOUNT_EMAIL_VERIFICATION = (
)
)
ACCOUNT_EMAIL_UNKNOWN_ACCOUNTS = __get_boolean(
ACCOUNT_EMAIL_UNKNOWN_ACCOUNTS = get_bool_from_env(
"PAPERLESS_ACCOUNT_EMAIL_UNKNOWN_ACCOUNTS",
"True",
)
ACCOUNT_SESSION_REMEMBER = __get_boolean("PAPERLESS_ACCOUNT_SESSION_REMEMBER", "True")
ACCOUNT_SESSION_REMEMBER = get_bool_from_env(
"PAPERLESS_ACCOUNT_SESSION_REMEMBER",
"True",
)
SESSION_EXPIRE_AT_BROWSER_CLOSE = not ACCOUNT_SESSION_REMEMBER
SESSION_COOKIE_AGE = int(
os.getenv("PAPERLESS_SESSION_COOKIE_AGE", 60 * 60 * 24 * 7 * 3),
@@ -607,8 +376,8 @@ if AUTO_LOGIN_USERNAME:
def _parse_remote_user_settings() -> str:
global MIDDLEWARE, AUTHENTICATION_BACKENDS, REST_FRAMEWORK
enable = __get_boolean("PAPERLESS_ENABLE_HTTP_REMOTE_USER")
enable_api = __get_boolean("PAPERLESS_ENABLE_HTTP_REMOTE_USER_API")
enable = get_bool_from_env("PAPERLESS_ENABLE_HTTP_REMOTE_USER")
enable_api = get_bool_from_env("PAPERLESS_ENABLE_HTTP_REMOTE_USER_API")
if enable or enable_api:
MIDDLEWARE.append("paperless.auth.HttpRemoteUserMiddleware")
AUTHENTICATION_BACKENDS.insert(
@@ -636,16 +405,16 @@ HTTP_REMOTE_USER_HEADER_NAME = _parse_remote_user_settings()
X_FRAME_OPTIONS = "SAMEORIGIN"
# The next 3 settings can also be set using just PAPERLESS_URL
CSRF_TRUSTED_ORIGINS = __get_list("PAPERLESS_CSRF_TRUSTED_ORIGINS")
CSRF_TRUSTED_ORIGINS = get_list_from_env("PAPERLESS_CSRF_TRUSTED_ORIGINS")
if DEBUG:
# Allow access from the angular development server during debugging
CSRF_TRUSTED_ORIGINS.append("http://localhost:4200")
# We allow CORS from localhost:8000
CORS_ALLOWED_ORIGINS = __get_list(
CORS_ALLOWED_ORIGINS = get_list_from_env(
"PAPERLESS_CORS_ALLOWED_HOSTS",
["http://localhost:8000"],
default=["http://localhost:8000"],
)
if DEBUG:
@@ -658,7 +427,7 @@ CORS_EXPOSE_HEADERS = [
"Content-Disposition",
]
ALLOWED_HOSTS = __get_list("PAPERLESS_ALLOWED_HOSTS", ["*"])
ALLOWED_HOSTS = get_list_from_env("PAPERLESS_ALLOWED_HOSTS", default=["*"])
if ALLOWED_HOSTS != ["*"]:
# always allow localhost. Necessary e.g. for healthcheck in docker.
ALLOWED_HOSTS.append("localhost")
@@ -678,10 +447,10 @@ def _parse_paperless_url():
PAPERLESS_URL = _parse_paperless_url()
# For use with trusted proxies
TRUSTED_PROXIES = __get_list("PAPERLESS_TRUSTED_PROXIES")
TRUSTED_PROXIES = get_list_from_env("PAPERLESS_TRUSTED_PROXIES")
USE_X_FORWARDED_HOST = __get_boolean("PAPERLESS_USE_X_FORWARD_HOST", "false")
USE_X_FORWARDED_PORT = __get_boolean("PAPERLESS_USE_X_FORWARD_PORT", "false")
USE_X_FORWARDED_HOST = get_bool_from_env("PAPERLESS_USE_X_FORWARD_HOST", "false")
USE_X_FORWARDED_PORT = get_bool_from_env("PAPERLESS_USE_X_FORWARD_PORT", "false")
SECURE_PROXY_SSL_HEADER = (
tuple(json.loads(os.environ["PAPERLESS_PROXY_SSL_HEADER"]))
if "PAPERLESS_PROXY_SSL_HEADER" in os.environ
@@ -724,7 +493,7 @@ CSRF_COOKIE_NAME = f"{COOKIE_PREFIX}csrftoken"
SESSION_COOKIE_NAME = f"{COOKIE_PREFIX}sessionid"
LANGUAGE_COOKIE_NAME = f"{COOKIE_PREFIX}django_language"
EMAIL_CERTIFICATE_FILE = __get_optional_path("PAPERLESS_EMAIL_CERTIFICATE_LOCATION")
EMAIL_CERTIFICATE_FILE = get_path_from_env("PAPERLESS_EMAIL_CERTIFICATE_LOCATION")
###############################################################################
@@ -875,7 +644,7 @@ CELERY_BROKER_URL = _CELERY_REDIS_URL
CELERY_TIMEZONE = TIME_ZONE
CELERY_WORKER_HIJACK_ROOT_LOGGER = False
CELERY_WORKER_CONCURRENCY: Final[int] = __get_int("PAPERLESS_TASK_WORKERS", 1)
CELERY_WORKER_CONCURRENCY: Final[int] = get_int_from_env("PAPERLESS_TASK_WORKERS", 1)
TASK_WORKERS = CELERY_WORKER_CONCURRENCY
CELERY_WORKER_MAX_TASKS_PER_CHILD = 1
CELERY_WORKER_SEND_TASK_EVENTS = True
@@ -888,7 +657,7 @@ CELERY_BROKER_TRANSPORT_OPTIONS = {
}
CELERY_TASK_TRACK_STARTED = True
CELERY_TASK_TIME_LIMIT: Final[int] = __get_int("PAPERLESS_WORKER_TIMEOUT", 1800)
CELERY_TASK_TIME_LIMIT: Final[int] = get_int_from_env("PAPERLESS_WORKER_TIMEOUT", 1800)
CELERY_RESULT_EXTENDED = True
CELERY_RESULT_BACKEND = "django-db"
@@ -900,7 +669,7 @@ CELERY_TASK_SERIALIZER = "pickle"
CELERY_ACCEPT_CONTENT = ["application/json", "application/x-python-serialize"]
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#beat-schedule
CELERY_BEAT_SCHEDULE = _parse_beat_schedule()
CELERY_BEAT_SCHEDULE = parse_beat_schedule()
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#beat-schedule-filename
CELERY_BEAT_SCHEDULE_FILENAME = str(DATA_DIR / "celerybeat-schedule.db")
@@ -908,14 +677,14 @@ CELERY_BEAT_SCHEDULE_FILENAME = str(DATA_DIR / "celerybeat-schedule.db")
# Cachalot: Database read cache.
def _parse_cachalot_settings():
ttl = __get_int("PAPERLESS_READ_CACHE_TTL", 3600)
ttl = get_int_from_env("PAPERLESS_READ_CACHE_TTL", 3600)
ttl = min(ttl, 31536000) if ttl > 0 else 3600
_, redis_url = _parse_redis_url(
_, redis_url = parse_redis_url(
os.getenv("PAPERLESS_READ_CACHE_REDIS_URL", _CHANNELS_REDIS_URL),
)
result = {
"CACHALOT_CACHE": "read-cache",
"CACHALOT_ENABLED": __get_boolean(
"CACHALOT_ENABLED": get_bool_from_env(
"PAPERLESS_DB_READ_CACHE_ENABLED",
default="no",
),
@@ -1000,9 +769,9 @@ CONSUMER_POLLING_INTERVAL = float(os.getenv("PAPERLESS_CONSUMER_POLLING_INTERVAL
CONSUMER_STABILITY_DELAY = float(os.getenv("PAPERLESS_CONSUMER_STABILITY_DELAY", 5))
CONSUMER_DELETE_DUPLICATES = __get_boolean("PAPERLESS_CONSUMER_DELETE_DUPLICATES")
CONSUMER_DELETE_DUPLICATES = get_bool_from_env("PAPERLESS_CONSUMER_DELETE_DUPLICATES")
CONSUMER_RECURSIVE = __get_boolean("PAPERLESS_CONSUMER_RECURSIVE")
CONSUMER_RECURSIVE = get_bool_from_env("PAPERLESS_CONSUMER_RECURSIVE")
# Ignore regex patterns, matched against filename only
CONSUMER_IGNORE_PATTERNS = list(
@@ -1024,13 +793,13 @@ CONSUMER_IGNORE_DIRS = list(
),
)
CONSUMER_SUBDIRS_AS_TAGS = __get_boolean("PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS")
CONSUMER_SUBDIRS_AS_TAGS = get_bool_from_env("PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS")
CONSUMER_ENABLE_BARCODES: Final[bool] = __get_boolean(
CONSUMER_ENABLE_BARCODES: Final[bool] = get_bool_from_env(
"PAPERLESS_CONSUMER_ENABLE_BARCODES",
)
CONSUMER_BARCODE_TIFF_SUPPORT: Final[bool] = __get_boolean(
CONSUMER_BARCODE_TIFF_SUPPORT: Final[bool] = get_bool_from_env(
"PAPERLESS_CONSUMER_BARCODE_TIFF_SUPPORT",
)
@@ -1039,7 +808,7 @@ CONSUMER_BARCODE_STRING: Final[str] = os.getenv(
"PATCHT",
)
CONSUMER_ENABLE_ASN_BARCODE: Final[bool] = __get_boolean(
CONSUMER_ENABLE_ASN_BARCODE: Final[bool] = get_bool_from_env(
"PAPERLESS_CONSUMER_ENABLE_ASN_BARCODE",
)
@@ -1048,23 +817,26 @@ CONSUMER_ASN_BARCODE_PREFIX: Final[str] = os.getenv(
"ASN",
)
CONSUMER_BARCODE_UPSCALE: Final[float] = __get_float(
CONSUMER_BARCODE_UPSCALE: Final[float] = get_float_from_env(
"PAPERLESS_CONSUMER_BARCODE_UPSCALE",
0.0,
)
CONSUMER_BARCODE_DPI: Final[int] = __get_int("PAPERLESS_CONSUMER_BARCODE_DPI", 300)
CONSUMER_BARCODE_DPI: Final[int] = get_int_from_env(
"PAPERLESS_CONSUMER_BARCODE_DPI",
300,
)
CONSUMER_BARCODE_MAX_PAGES: Final[int] = __get_int(
CONSUMER_BARCODE_MAX_PAGES: Final[int] = get_int_from_env(
"PAPERLESS_CONSUMER_BARCODE_MAX_PAGES",
0,
)
CONSUMER_BARCODE_RETAIN_SPLIT_PAGES = __get_boolean(
CONSUMER_BARCODE_RETAIN_SPLIT_PAGES = get_bool_from_env(
"PAPERLESS_CONSUMER_BARCODE_RETAIN_SPLIT_PAGES",
)
CONSUMER_ENABLE_TAG_BARCODE: Final[bool] = __get_boolean(
CONSUMER_ENABLE_TAG_BARCODE: Final[bool] = get_bool_from_env(
"PAPERLESS_CONSUMER_ENABLE_TAG_BARCODE",
)
@@ -1077,11 +849,11 @@ CONSUMER_TAG_BARCODE_MAPPING = dict(
),
)
CONSUMER_TAG_BARCODE_SPLIT: Final[bool] = __get_boolean(
CONSUMER_TAG_BARCODE_SPLIT: Final[bool] = get_bool_from_env(
"PAPERLESS_CONSUMER_TAG_BARCODE_SPLIT",
)
CONSUMER_ENABLE_COLLATE_DOUBLE_SIDED: Final[bool] = __get_boolean(
CONSUMER_ENABLE_COLLATE_DOUBLE_SIDED: Final[bool] = get_bool_from_env(
"PAPERLESS_CONSUMER_ENABLE_COLLATE_DOUBLE_SIDED",
)
@@ -1090,13 +862,13 @@ CONSUMER_COLLATE_DOUBLE_SIDED_SUBDIR_NAME: Final[str] = os.getenv(
"double-sided",
)
CONSUMER_COLLATE_DOUBLE_SIDED_TIFF_SUPPORT: Final[bool] = __get_boolean(
CONSUMER_COLLATE_DOUBLE_SIDED_TIFF_SUPPORT: Final[bool] = get_bool_from_env(
"PAPERLESS_CONSUMER_COLLATE_DOUBLE_SIDED_TIFF_SUPPORT",
)
CONSUMER_PDF_RECOVERABLE_MIME_TYPES = ("application/octet-stream",)
OCR_PAGES = __get_optional_int("PAPERLESS_OCR_PAGES")
OCR_PAGES = get_int_from_env("PAPERLESS_OCR_PAGES")
# The default language that tesseract will attempt to use when parsing
# documents. It should be a 3-letter language code consistent with ISO 639.
@@ -1110,20 +882,20 @@ OCR_MODE = os.getenv("PAPERLESS_OCR_MODE", "skip")
OCR_SKIP_ARCHIVE_FILE = os.getenv("PAPERLESS_OCR_SKIP_ARCHIVE_FILE", "never")
OCR_IMAGE_DPI = __get_optional_int("PAPERLESS_OCR_IMAGE_DPI")
OCR_IMAGE_DPI = get_int_from_env("PAPERLESS_OCR_IMAGE_DPI")
OCR_CLEAN = os.getenv("PAPERLESS_OCR_CLEAN", "clean")
OCR_DESKEW: Final[bool] = __get_boolean("PAPERLESS_OCR_DESKEW", "true")
OCR_DESKEW: Final[bool] = get_bool_from_env("PAPERLESS_OCR_DESKEW", "true")
OCR_ROTATE_PAGES: Final[bool] = __get_boolean("PAPERLESS_OCR_ROTATE_PAGES", "true")
OCR_ROTATE_PAGES: Final[bool] = get_bool_from_env("PAPERLESS_OCR_ROTATE_PAGES", "true")
OCR_ROTATE_PAGES_THRESHOLD: Final[float] = __get_float(
OCR_ROTATE_PAGES_THRESHOLD: Final[float] = get_float_from_env(
"PAPERLESS_OCR_ROTATE_PAGES_THRESHOLD",
12.0,
)
OCR_MAX_IMAGE_PIXELS: Final[int | None] = __get_optional_int(
OCR_MAX_IMAGE_PIXELS: Final[int | None] = get_int_from_env(
"PAPERLESS_OCR_MAX_IMAGE_PIXELS",
)
@@ -1134,7 +906,7 @@ OCR_COLOR_CONVERSION_STRATEGY = os.getenv(
OCR_USER_ARGS = os.getenv("PAPERLESS_OCR_USER_ARGS")
MAX_IMAGE_PIXELS: Final[int | None] = __get_optional_int(
MAX_IMAGE_PIXELS: Final[int | None] = get_int_from_env(
"PAPERLESS_MAX_IMAGE_PIXELS",
)
@@ -1149,7 +921,7 @@ CONVERT_MEMORY_LIMIT = os.getenv("PAPERLESS_CONVERT_MEMORY_LIMIT")
GS_BINARY = os.getenv("PAPERLESS_GS_BINARY", "gs")
# Fallback layout for .eml consumption
EMAIL_PARSE_DEFAULT_LAYOUT = __get_int(
EMAIL_PARSE_DEFAULT_LAYOUT = get_int_from_env(
"PAPERLESS_EMAIL_PARSE_DEFAULT_LAYOUT",
1, # MailRule.PdfLayout.TEXT_HTML but that can't be imported here
)
@@ -1163,23 +935,9 @@ DATE_ORDER = os.getenv("PAPERLESS_DATE_ORDER", "DMY")
FILENAME_DATE_ORDER = os.getenv("PAPERLESS_FILENAME_DATE_ORDER")
def _parse_dateparser_languages(languages: str | None):
language_list = languages.split("+") if languages else []
# There is an unfixed issue in zh-Hant and zh-Hans locales in the dateparser lib.
# See: https://github.com/scrapinghub/dateparser/issues/875
for index, language in enumerate(language_list):
if language.startswith("zh-") and "zh" not in language_list:
logger.warning(
f'Chinese locale detected: {language}. dateparser might fail to parse some dates with this locale, so Chinese ("zh") will be used as a fallback.',
)
language_list.append("zh")
return list(LocaleDataLoader().get_locale_map(locales=language_list))
# If not set, we will infer it at runtime
DATE_PARSER_LANGUAGES = (
_parse_dateparser_languages(
parse_dateparser_languages(
os.getenv("PAPERLESS_DATE_PARSER_LANGUAGES"),
)
if os.getenv("PAPERLESS_DATE_PARSER_LANGUAGES")
@@ -1190,7 +948,7 @@ DATE_PARSER_LANGUAGES = (
# Maximum number of dates taken from document start to end to show as suggestions for
# `created` date in the frontend. Duplicates are removed, which can result in
# fewer dates shown.
NUMBER_OF_SUGGESTED_DATES = __get_int("PAPERLESS_NUMBER_OF_SUGGESTED_DATES", 3)
NUMBER_OF_SUGGESTED_DATES = get_int_from_env("PAPERLESS_NUMBER_OF_SUGGESTED_DATES", 3)
# Specify the filename format for out files
FILENAME_FORMAT = os.getenv("PAPERLESS_FILENAME_FORMAT")
@@ -1198,7 +956,7 @@ FILENAME_FORMAT = os.getenv("PAPERLESS_FILENAME_FORMAT")
# If this is enabled, variables in filename format will resolve to
# empty-string instead of 'none'.
# Directories with 'empty names' are omitted, too.
FILENAME_FORMAT_REMOVE_NONE = __get_boolean(
FILENAME_FORMAT_REMOVE_NONE = get_bool_from_env(
"PAPERLESS_FILENAME_FORMAT_REMOVE_NONE",
"NO",
)
@@ -1209,7 +967,7 @@ THUMBNAIL_FONT_NAME = os.getenv(
)
# Tika settings
TIKA_ENABLED = __get_boolean("PAPERLESS_TIKA_ENABLED", "NO")
TIKA_ENABLED = get_bool_from_env("PAPERLESS_TIKA_ENABLED", "NO")
TIKA_ENDPOINT = os.getenv("PAPERLESS_TIKA_ENDPOINT", "http://localhost:9998")
TIKA_GOTENBERG_ENDPOINT = os.getenv(
"PAPERLESS_TIKA_GOTENBERG_ENDPOINT",
@@ -1219,52 +977,21 @@ TIKA_GOTENBERG_ENDPOINT = os.getenv(
if TIKA_ENABLED:
INSTALLED_APPS.append("paperless_tika.apps.PaperlessTikaConfig")
AUDIT_LOG_ENABLED = __get_boolean("PAPERLESS_AUDIT_LOG_ENABLED", "true")
AUDIT_LOG_ENABLED = get_bool_from_env("PAPERLESS_AUDIT_LOG_ENABLED", "true")
if AUDIT_LOG_ENABLED:
INSTALLED_APPS.append("auditlog")
MIDDLEWARE.append("auditlog.middleware.AuditlogMiddleware")
def _parse_ignore_dates(
env_ignore: str,
date_order: str = DATE_ORDER,
) -> set[datetime.datetime]:
"""
If the PAPERLESS_IGNORE_DATES environment variable is set, parse the
user provided string(s) into dates
Args:
env_ignore (str): The value of the environment variable, comma separated dates
date_order (str, optional): The format of the date strings.
Defaults to DATE_ORDER.
Returns:
Set[datetime.datetime]: The set of parsed date objects
"""
import dateparser
ignored_dates = set()
for s in env_ignore.split(","):
d = dateparser.parse(
s,
settings={
"DATE_ORDER": date_order,
},
)
if d:
ignored_dates.add(d.date())
return ignored_dates
# List dates that should be ignored when trying to parse date from document text
IGNORE_DATES: set[datetime.date] = set()
if os.getenv("PAPERLESS_IGNORE_DATES") is not None:
IGNORE_DATES = _parse_ignore_dates(os.getenv("PAPERLESS_IGNORE_DATES"))
IGNORE_DATES = parse_ignore_dates(os.getenv("PAPERLESS_IGNORE_DATES"), DATE_ORDER)
ENABLE_UPDATE_CHECK = os.getenv("PAPERLESS_ENABLE_UPDATE_CHECK", "default")
if ENABLE_UPDATE_CHECK != "default":
ENABLE_UPDATE_CHECK = __get_boolean("PAPERLESS_ENABLE_UPDATE_CHECK")
ENABLE_UPDATE_CHECK = get_bool_from_env("PAPERLESS_ENABLE_UPDATE_CHECK")
APP_TITLE = os.getenv("PAPERLESS_APP_TITLE", None)
APP_LOGO = os.getenv("PAPERLESS_APP_LOGO", None)
@@ -1309,7 +1036,7 @@ def _get_nltk_language_setting(ocr_lang: str) -> str | None:
return iso_code_to_nltk.get(ocr_lang)
NLTK_ENABLED: Final[bool] = __get_boolean("PAPERLESS_ENABLE_NLTK", "yes")
NLTK_ENABLED: Final[bool] = get_bool_from_env("PAPERLESS_ENABLE_NLTK", "yes")
NLTK_LANGUAGE: str | None = _get_nltk_language_setting(OCR_LANGUAGE)
@@ -1318,7 +1045,7 @@ NLTK_LANGUAGE: str | None = _get_nltk_language_setting(OCR_LANGUAGE)
###############################################################################
EMAIL_GNUPG_HOME: Final[str | None] = os.getenv("PAPERLESS_EMAIL_GNUPG_HOME")
EMAIL_ENABLE_GPG_DECRYPTOR: Final[bool] = __get_boolean(
EMAIL_ENABLE_GPG_DECRYPTOR: Final[bool] = get_bool_from_env(
"PAPERLESS_ENABLE_GPG_DECRYPTOR",
)
@@ -1326,7 +1053,7 @@ EMAIL_ENABLE_GPG_DECRYPTOR: Final[bool] = __get_boolean(
###############################################################################
# Soft Delete #
###############################################################################
EMPTY_TRASH_DELAY = max(__get_int("PAPERLESS_EMPTY_TRASH_DELAY", 30), 1)
EMPTY_TRASH_DELAY = max(get_int_from_env("PAPERLESS_EMPTY_TRASH_DELAY", 30), 1)
###############################################################################
@@ -1351,21 +1078,17 @@ OUTLOOK_OAUTH_ENABLED = bool(
###############################################################################
# Webhooks
###############################################################################
WEBHOOKS_ALLOWED_SCHEMES = set(
WEBHOOKS_ALLOWED_SCHEMES = {
s.lower()
for s in __get_list(
for s in get_list_from_env(
"PAPERLESS_WEBHOOKS_ALLOWED_SCHEMES",
["http", "https"],
default=["http", "https"],
)
)
WEBHOOKS_ALLOWED_PORTS = set(
int(p)
for p in __get_list(
"PAPERLESS_WEBHOOKS_ALLOWED_PORTS",
[],
)
)
WEBHOOKS_ALLOW_INTERNAL_REQUESTS = __get_boolean(
}
WEBHOOKS_ALLOWED_PORTS = {
int(p) for p in get_list_from_env("PAPERLESS_WEBHOOKS_ALLOWED_PORTS", default=[])
}
WEBHOOKS_ALLOW_INTERNAL_REQUESTS = get_bool_from_env(
"PAPERLESS_WEBHOOKS_ALLOW_INTERNAL_REQUESTS",
"true",
)
@@ -1380,7 +1103,7 @@ REMOTE_OCR_ENDPOINT = os.getenv("PAPERLESS_REMOTE_OCR_ENDPOINT")
################################################################################
# AI Settings #
################################################################################
AI_ENABLED = __get_boolean("PAPERLESS_AI_ENABLED", "NO")
AI_ENABLED = get_bool_from_env("PAPERLESS_AI_ENABLED", "NO")
LLM_EMBEDDING_BACKEND = os.getenv(
"PAPERLESS_AI_LLM_EMBEDDING_BACKEND",
) # "huggingface" or "openai"

View File

@@ -1,11 +1,191 @@
import datetime
import logging
import os
from pathlib import Path
from typing import Any
from celery.schedules import crontab
from dateparser.languages.loader import LocaleDataLoader
from paperless.settings.parsers import get_choice_from_env
from paperless.settings.parsers import get_int_from_env
from paperless.settings.parsers import parse_dict_from_str
logger = logging.getLogger(__name__)
def parse_hosting_settings() -> tuple[str | None, str, str, str, str]:
script_name = os.getenv("PAPERLESS_FORCE_SCRIPT_NAME")
base_url = (script_name or "") + "/"
login_url = base_url + "accounts/login/"
login_redirect_url = base_url + "dashboard"
logout_redirect_url = os.getenv(
"PAPERLESS_LOGOUT_REDIRECT_URL",
login_url + "?loggedout=1",
)
return script_name, base_url, login_url, login_redirect_url, logout_redirect_url
def parse_redis_url(env_redis: str | None) -> tuple[str, str]:
"""
Gets the Redis information from the environment or a default and handles
converting from incompatible django_channels and celery formats.
Returns a tuple of (celery_url, channels_url)
"""
# Not set, return a compatible default
if env_redis is None:
return ("redis://localhost:6379", "redis://localhost:6379")
if "unix" in env_redis.lower():
# channels_redis socket format, looks like:
# "unix:///path/to/redis.sock"
_, path = env_redis.split(":", maxsplit=1)
# Optionally setting a db number
if "?db=" in env_redis:
path, number = path.split("?db=")
return (f"redis+socket:{path}?virtual_host={number}", env_redis)
else:
return (f"redis+socket:{path}", env_redis)
elif "+socket" in env_redis.lower():
# celery socket style, looks like:
# "redis+socket:///path/to/redis.sock"
_, path = env_redis.split(":", maxsplit=1)
if "?virtual_host=" in env_redis:
# Virtual host (aka db number)
path, number = path.split("?virtual_host=")
return (env_redis, f"unix:{path}?db={number}")
else:
return (env_redis, f"unix:{path}")
# Not a socket
return (env_redis, env_redis)
def parse_beat_schedule() -> dict:
"""
Configures the scheduled tasks, according to default or
environment variables. Task expiration is configured so the task will
expire (and not run), shortly before the default frequency will put another
of the same task into the queue
https://docs.celeryq.dev/en/stable/userguide/periodic-tasks.html#beat-entries
https://docs.celeryq.dev/en/latest/userguide/calling.html#expiration
"""
schedule = {}
tasks = [
{
"name": "Check all e-mail accounts",
"env_key": "PAPERLESS_EMAIL_TASK_CRON",
# Default every ten minutes
"env_default": "*/10 * * * *",
"task": "paperless_mail.tasks.process_mail_accounts",
"options": {
# 1 minute before default schedule sends again
"expires": 9.0 * 60.0,
},
},
{
"name": "Train the classifier",
"env_key": "PAPERLESS_TRAIN_TASK_CRON",
# Default hourly at 5 minutes past the hour
"env_default": "5 */1 * * *",
"task": "documents.tasks.train_classifier",
"options": {
# 1 minute before default schedule sends again
"expires": 59.0 * 60.0,
},
},
{
"name": "Optimize the index",
"env_key": "PAPERLESS_INDEX_TASK_CRON",
# Default daily at midnight
"env_default": "0 0 * * *",
"task": "documents.tasks.index_optimize",
"options": {
# 1 hour before default schedule sends again
"expires": 23.0 * 60.0 * 60.0,
},
},
{
"name": "Perform sanity check",
"env_key": "PAPERLESS_SANITY_TASK_CRON",
# Default Sunday at 00:30
"env_default": "30 0 * * sun",
"task": "documents.tasks.sanity_check",
"options": {
# 1 hour before default schedule sends again
"expires": ((7.0 * 24.0) - 1.0) * 60.0 * 60.0,
},
},
{
"name": "Empty trash",
"env_key": "PAPERLESS_EMPTY_TRASH_TASK_CRON",
# Default daily at 01:00
"env_default": "0 1 * * *",
"task": "documents.tasks.empty_trash",
"options": {
# 1 hour before default schedule sends again
"expires": 23.0 * 60.0 * 60.0,
},
},
{
"name": "Check and run scheduled workflows",
"env_key": "PAPERLESS_WORKFLOW_SCHEDULED_TASK_CRON",
# Default hourly at 5 minutes past the hour
"env_default": "5 */1 * * *",
"task": "documents.tasks.check_scheduled_workflows",
"options": {
# 1 minute before default schedule sends again
"expires": 59.0 * 60.0,
},
},
{
"name": "Rebuild LLM index",
"env_key": "PAPERLESS_LLM_INDEX_TASK_CRON",
# Default daily at 02:10
"env_default": "10 2 * * *",
"task": "documents.tasks.llmindex_index",
"options": {
# 1 hour before default schedule sends again
"expires": 23.0 * 60.0 * 60.0,
},
},
{
"name": "Cleanup expired share link bundles",
"env_key": "PAPERLESS_SHARE_LINK_BUNDLE_CLEANUP_CRON",
# Default daily at 02:00
"env_default": "0 2 * * *",
"task": "documents.tasks.cleanup_expired_share_link_bundles",
"options": {
# 1 hour before default schedule sends again
"expires": 23.0 * 60.0 * 60.0,
},
},
]
for task in tasks:
# Either get the environment setting or use the default
value = os.getenv(task["env_key"], task["env_default"])
# Don't add disabled tasks to the schedule
if value == "disable":
continue
# I find https://crontab.guru/ super helpful
# crontab(5) format
# - five time-and-date fields
# - separated by at least one blank
minute, hour, day_month, month, day_week = value.split(" ")
schedule[task["name"]] = {
"task": task["task"],
"schedule": crontab(minute, hour, day_week, day_month, month),
"options": task["options"],
}
return schedule
def parse_db_settings(data_dir: Path) -> dict[str, dict[str, Any]]:
"""Parse database settings from environment variables.
@@ -120,3 +300,48 @@ def parse_db_settings(data_dir: Path) -> dict[str, dict[str, Any]]:
)
return {"default": db_config}
def parse_dateparser_languages(languages: str | None) -> list[str]:
language_list = languages.split("+") if languages else []
# There is an unfixed issue in zh-Hant and zh-Hans locales in the dateparser lib.
# See: https://github.com/scrapinghub/dateparser/issues/875
for index, language in enumerate(language_list):
if language.startswith("zh-") and "zh" not in language_list:
logger.warning(
f"Chinese locale detected: {language}. dateparser might fail to parse"
f' some dates with this locale, so Chinese ("zh") will be used as a fallback.',
)
language_list.append("zh")
return list(LocaleDataLoader().get_locale_map(locales=language_list))
def parse_ignore_dates(
env_ignore: str,
date_order: str,
) -> set[datetime.date]:
"""
If the PAPERLESS_IGNORE_DATES environment variable is set, parse the
user provided string(s) into dates
Args:
env_ignore (str): The value of the environment variable, comma separated dates
date_order (str): The format of the date strings.
Returns:
set[datetime.date]: The set of parsed date objects
"""
import dateparser
ignored_dates = set()
for s in env_ignore.split(","):
d = dateparser.parse(
s,
settings={
"DATE_ORDER": date_order,
},
)
if d:
ignored_dates.add(d.date())
return ignored_dates

View File

@@ -156,6 +156,108 @@ def parse_dict_from_str(
return settings
def get_bool_from_env(key: str, default: str = "NO") -> bool:
"""
Return a boolean value based on whatever the user has supplied in the
environment based on whether the value "looks like" it's True or not.
"""
return str_to_bool(os.getenv(key, default))
@overload
def get_float_from_env(key: str) -> float | None: ...
@overload
def get_float_from_env(key: str, default: None) -> float | None: ...
@overload
def get_float_from_env(key: str, default: float) -> float: ...
def get_float_from_env(key: str, default: float | None = None) -> float | None:
"""
Return a float value based on the environment variable.
If default is provided, returns that value when key is missing.
If default is None, returns None when key is missing.
"""
if key not in os.environ:
return default
return float(os.environ[key])
@overload
def get_path_from_env(key: str) -> Path | None: ...
@overload
def get_path_from_env(key: str, default: None) -> Path | None: ...
@overload
def get_path_from_env(key: str, default: Path | str) -> Path: ...
def get_path_from_env(key: str, default: Path | str | None = None) -> Path | None:
"""
Return a Path object based on the environment variable.
If default is provided, returns that value when key is missing.
If default is None, returns None when key is missing.
"""
if key not in os.environ:
return default if default is None else Path(default).resolve()
return Path(os.environ[key]).resolve()
def get_list_from_env(
key: str,
separator: str = ",",
default: list[T] | None = None,
*,
strip_whitespace: bool = True,
remove_empty: bool = True,
required: bool = False,
) -> list[str] | list[T]:
"""
Get and parse a list from an environment variable or return a default.
Args:
key: Environment variable name
separator: Character(s) to split on (default: ',')
default: Default value to return if env var is not set or empty
strip_whitespace: Whether to strip whitespace from each element
remove_empty: Whether to remove empty strings from the result
required: If True, raise an error when the env var is missing and no default provided
Returns:
List of strings or list of type-cast values, or default if env var is empty/None
Raises:
ValueError: If required=True and env var is missing and there is no default
"""
# Get the environment variable value
env_value = os.environ.get(key)
# Handle required environment variables
if required and env_value is None and default is None:
raise ValueError(f"Required environment variable '{key}' is not set")
if env_value:
items = env_value.split(separator)
if strip_whitespace:
items = [item.strip() for item in items]
if remove_empty:
items = [item for item in items if item]
return items
elif default is not None:
return default
else:
return []
def get_choice_from_env(
env_key: str,
choices: set[str],

View File

@@ -1,10 +1,279 @@
import datetime
import os
from pathlib import Path
from typing import Any
import pytest
from celery.schedules import crontab
from pytest_mock import MockerFixture
from paperless.settings.custom import parse_beat_schedule
from paperless.settings.custom import parse_dateparser_languages
from paperless.settings.custom import parse_db_settings
from paperless.settings.custom import parse_hosting_settings
from paperless.settings.custom import parse_ignore_dates
from paperless.settings.custom import parse_redis_url
class TestRedisSocketConversion:
@pytest.mark.parametrize(
("input_url", "expected"),
[
pytest.param(
None,
("redis://localhost:6379", "redis://localhost:6379"),
id="none_uses_default",
),
pytest.param(
"redis+socket:///run/redis/redis.sock",
(
"redis+socket:///run/redis/redis.sock",
"unix:///run/redis/redis.sock",
),
id="celery_style_socket",
),
pytest.param(
"unix:///run/redis/redis.sock",
(
"redis+socket:///run/redis/redis.sock",
"unix:///run/redis/redis.sock",
),
id="redis_py_style_socket",
),
pytest.param(
"redis+socket:///run/redis/redis.sock?virtual_host=5",
(
"redis+socket:///run/redis/redis.sock?virtual_host=5",
"unix:///run/redis/redis.sock?db=5",
),
id="celery_style_socket_with_db",
),
pytest.param(
"unix:///run/redis/redis.sock?db=10",
(
"redis+socket:///run/redis/redis.sock?virtual_host=10",
"unix:///run/redis/redis.sock?db=10",
),
id="redis_py_style_socket_with_db",
),
pytest.param(
"redis://myredishost:6379",
("redis://myredishost:6379", "redis://myredishost:6379"),
id="host_with_port_unchanged",
),
# Credentials in unix:// URL contain multiple colons (user:password@)
# Regression test for https://github.com/paperless-ngx/paperless-ngx/pull/12239
pytest.param(
"unix://user:password@/run/redis/redis.sock",
(
"redis+socket://user:password@/run/redis/redis.sock",
"unix://user:password@/run/redis/redis.sock",
),
id="redis_py_style_socket_with_credentials",
),
pytest.param(
"redis+socket://user:password@/run/redis/redis.sock",
(
"redis+socket://user:password@/run/redis/redis.sock",
"unix://user:password@/run/redis/redis.sock",
),
id="celery_style_socket_with_credentials",
),
],
)
def test_redis_socket_parsing(
self,
input_url: str | None,
expected: tuple[str, str],
) -> None:
"""
GIVEN:
- Various Redis connection URI formats
WHEN:
- The URI is parsed
THEN:
- Socket based URIs are translated
- Non-socket URIs are unchanged
- None provided uses default
"""
result = parse_redis_url(input_url)
assert expected == result
class TestParseHostingSettings:
@pytest.mark.parametrize(
("env", "expected"),
[
pytest.param(
{},
(
None,
"/",
"/accounts/login/",
"/dashboard",
"/accounts/login/?loggedout=1",
),
id="no_env_vars",
),
pytest.param(
{"PAPERLESS_FORCE_SCRIPT_NAME": "/paperless"},
(
"/paperless",
"/paperless/",
"/paperless/accounts/login/",
"/paperless/dashboard",
"/paperless/accounts/login/?loggedout=1",
),
id="force_script_name_only",
),
pytest.param(
{
"PAPERLESS_FORCE_SCRIPT_NAME": "/docs",
"PAPERLESS_LOGOUT_REDIRECT_URL": "/custom/logout",
},
(
"/docs",
"/docs/",
"/docs/accounts/login/",
"/docs/dashboard",
"/custom/logout",
),
id="force_script_name_and_logout_redirect",
),
],
)
def test_parse_hosting_settings(
self,
mocker: MockerFixture,
env: dict[str, str],
expected: tuple[str | None, str, str, str, str],
) -> None:
"""Test parse_hosting_settings with various env configurations."""
mocker.patch.dict(os.environ, env, clear=True)
result = parse_hosting_settings()
assert result == expected
def make_expected_schedule(
overrides: dict[str, dict[str, Any]] | None = None,
disabled: set[str] | None = None,
) -> dict[str, Any]:
"""
Build the expected schedule with optional overrides and disabled tasks.
"""
mail_expire = 9.0 * 60.0
classifier_expire = 59.0 * 60.0
index_expire = 23.0 * 60.0 * 60.0
sanity_expire = ((7.0 * 24.0) - 1.0) * 60.0 * 60.0
empty_trash_expire = 23.0 * 60.0 * 60.0
workflow_expire = 59.0 * 60.0
llm_index_expire = 23.0 * 60.0 * 60.0
share_link_cleanup_expire = 23.0 * 60.0 * 60.0
schedule: dict[str, Any] = {
"Check all e-mail accounts": {
"task": "paperless_mail.tasks.process_mail_accounts",
"schedule": crontab(minute="*/10"),
"options": {"expires": mail_expire},
},
"Train the classifier": {
"task": "documents.tasks.train_classifier",
"schedule": crontab(minute="5", hour="*/1"),
"options": {"expires": classifier_expire},
},
"Optimize the index": {
"task": "documents.tasks.index_optimize",
"schedule": crontab(minute=0, hour=0),
"options": {"expires": index_expire},
},
"Perform sanity check": {
"task": "documents.tasks.sanity_check",
"schedule": crontab(minute=30, hour=0, day_of_week="sun"),
"options": {"expires": sanity_expire},
},
"Empty trash": {
"task": "documents.tasks.empty_trash",
"schedule": crontab(minute=0, hour="1"),
"options": {"expires": empty_trash_expire},
},
"Check and run scheduled workflows": {
"task": "documents.tasks.check_scheduled_workflows",
"schedule": crontab(minute="5", hour="*/1"),
"options": {"expires": workflow_expire},
},
"Rebuild LLM index": {
"task": "documents.tasks.llmindex_index",
"schedule": crontab(minute="10", hour="2"),
"options": {"expires": llm_index_expire},
},
"Cleanup expired share link bundles": {
"task": "documents.tasks.cleanup_expired_share_link_bundles",
"schedule": crontab(minute=0, hour="2"),
"options": {"expires": share_link_cleanup_expire},
},
}
overrides = overrides or {}
disabled = disabled or set()
for key, val in overrides.items():
schedule[key] = {**schedule.get(key, {}), **val}
for key in disabled:
schedule.pop(key, None)
return schedule
class TestParseBeatSchedule:
@pytest.mark.parametrize(
("env", "expected"),
[
pytest.param({}, make_expected_schedule(), id="defaults"),
pytest.param(
{"PAPERLESS_EMAIL_TASK_CRON": "*/50 * * * mon"},
make_expected_schedule(
overrides={
"Check all e-mail accounts": {
"schedule": crontab(minute="*/50", day_of_week="mon"),
},
},
),
id="email-changed",
),
pytest.param(
{"PAPERLESS_INDEX_TASK_CRON": "disable"},
make_expected_schedule(disabled={"Optimize the index"}),
id="index-disabled",
),
pytest.param(
{
"PAPERLESS_EMAIL_TASK_CRON": "disable",
"PAPERLESS_TRAIN_TASK_CRON": "disable",
"PAPERLESS_SANITY_TASK_CRON": "disable",
"PAPERLESS_INDEX_TASK_CRON": "disable",
"PAPERLESS_EMPTY_TRASH_TASK_CRON": "disable",
"PAPERLESS_WORKFLOW_SCHEDULED_TASK_CRON": "disable",
"PAPERLESS_LLM_INDEX_TASK_CRON": "disable",
"PAPERLESS_SHARE_LINK_BUNDLE_CLEANUP_CRON": "disable",
},
{},
id="all-disabled",
),
],
)
def test_parse_beat_schedule(
self,
env: dict[str, str],
expected: dict[str, Any],
mocker: MockerFixture,
) -> None:
mocker.patch.dict(os.environ, env, clear=False)
schedule = parse_beat_schedule()
assert schedule == expected
class TestParseDbSettings:
@@ -264,3 +533,85 @@ class TestParseDbSettings:
settings = parse_db_settings(tmp_path)
assert settings == expected_database_settings
class TestParseIgnoreDates:
"""Tests the parsing of the PAPERLESS_IGNORE_DATES setting value."""
def test_no_ignore_dates_set(self) -> None:
"""
GIVEN:
- No ignore dates are set
THEN:
- No ignore dates are parsed
"""
assert parse_ignore_dates("", "YMD") == set()
@pytest.mark.parametrize(
("env_str", "date_format", "expected"),
[
pytest.param(
"1985-05-01",
"YMD",
{datetime.date(1985, 5, 1)},
id="single-ymd",
),
pytest.param(
"1985-05-01,1991-12-05",
"YMD",
{datetime.date(1985, 5, 1), datetime.date(1991, 12, 5)},
id="multiple-ymd",
),
pytest.param(
"2010-12-13",
"YMD",
{datetime.date(2010, 12, 13)},
id="single-ymd-2",
),
pytest.param(
"11.01.10",
"DMY",
{datetime.date(2010, 1, 11)},
id="single-dmy",
),
pytest.param(
"11.01.2001,15-06-1996",
"DMY",
{datetime.date(2001, 1, 11), datetime.date(1996, 6, 15)},
id="multiple-dmy",
),
],
)
def test_ignore_dates_parsed(
self,
env_str: str,
date_format: str,
expected: set[datetime.date],
) -> None:
"""
GIVEN:
- Ignore dates are set per certain inputs
THEN:
- All ignore dates are parsed
"""
assert parse_ignore_dates(env_str, date_format) == expected
@pytest.mark.parametrize(
("languages", "expected"),
[
("de", ["de"]),
("zh", ["zh"]),
("fr+en", ["fr", "en"]),
# Locales must be supported
("en-001+fr-CA", ["en-001", "fr-CA"]),
("en-001+fr", ["en-001", "fr"]),
# Special case for Chinese: variants seem to miss some dates,
# so we always add "zh" as a fallback.
("en+zh-Hans-HK", ["en", "zh-Hans-HK", "zh"]),
("en+zh-Hans", ["en", "zh-Hans", "zh"]),
("en+zh-Hans+zh-Hant", ["en", "zh-Hans", "zh-Hant", "zh"]),
],
)
def test_parse_dateparser_languages(languages: str, expected: list[str]) -> None:
assert sorted(parse_dateparser_languages(languages)) == sorted(expected)

View File

@@ -4,8 +4,12 @@ from pathlib import Path
import pytest
from pytest_mock import MockerFixture
from paperless.settings.parsers import get_bool_from_env
from paperless.settings.parsers import get_choice_from_env
from paperless.settings.parsers import get_float_from_env
from paperless.settings.parsers import get_int_from_env
from paperless.settings.parsers import get_list_from_env
from paperless.settings.parsers import get_path_from_env
from paperless.settings.parsers import parse_dict_from_str
from paperless.settings.parsers import str_to_bool
@@ -205,6 +209,29 @@ class TestParseDictFromString:
assert isinstance(result["database"]["port"], int)
class TestGetBoolFromEnv:
def test_existing_env_var(self, mocker):
"""Test that an existing environment variable is read and converted."""
mocker.patch.dict(os.environ, {"TEST_VAR": "true"})
assert get_bool_from_env("TEST_VAR") is True
def test_missing_env_var_uses_default_no(self, mocker):
"""Test that a missing environment variable uses default 'NO' and returns False."""
mocker.patch.dict(os.environ, {}, clear=True)
assert get_bool_from_env("MISSING_VAR") is False
def test_missing_env_var_with_explicit_default(self, mocker):
"""Test that a missing environment variable uses the provided default."""
mocker.patch.dict(os.environ, {}, clear=True)
assert get_bool_from_env("MISSING_VAR", default="yes") is True
def test_invalid_value_raises_error(self, mocker):
"""Test that an invalid value raises ValueError (delegates to str_to_bool)."""
mocker.patch.dict(os.environ, {"INVALID_VAR": "maybe"})
with pytest.raises(ValueError):
get_bool_from_env("INVALID_VAR")
class TestGetIntFromEnv:
@pytest.mark.parametrize(
("env_value", "expected"),
@@ -259,6 +286,199 @@ class TestGetIntFromEnv:
get_int_from_env("INVALID_INT")
class TestGetFloatFromEnv:
@pytest.mark.parametrize(
("env_value", "expected"),
[
pytest.param("3.14", 3.14, id="pi"),
pytest.param("42", 42.0, id="int_as_float"),
pytest.param("-2.5", -2.5, id="negative"),
pytest.param("0.0", 0.0, id="zero_float"),
pytest.param("0", 0.0, id="zero_int"),
pytest.param("1.5e2", 150.0, id="sci_positive"),
pytest.param("1e-3", 0.001, id="sci_negative"),
pytest.param("-1.23e4", -12300.0, id="sci_large"),
],
)
def test_existing_env_var_valid_floats(self, mocker, env_value, expected):
"""Test that existing environment variables with valid floats return correct values."""
mocker.patch.dict(os.environ, {"FLOAT_VAR": env_value})
assert get_float_from_env("FLOAT_VAR") == expected
@pytest.mark.parametrize(
("default", "expected"),
[
pytest.param(3.14, 3.14, id="pi_default"),
pytest.param(0.0, 0.0, id="zero_default"),
pytest.param(-2.5, -2.5, id="negative_default"),
pytest.param(None, None, id="none_default"),
],
)
def test_missing_env_var_with_defaults(self, mocker, default, expected):
"""Test that missing environment variables return provided defaults."""
mocker.patch.dict(os.environ, {}, clear=True)
assert get_float_from_env("MISSING_VAR", default=default) == expected
def test_missing_env_var_no_default(self, mocker):
"""Test that missing environment variable with no default returns None."""
mocker.patch.dict(os.environ, {}, clear=True)
assert get_float_from_env("MISSING_VAR") is None
@pytest.mark.parametrize(
"invalid_value",
[
pytest.param("not_a_number", id="text"),
pytest.param("42.5.0", id="double_decimal"),
pytest.param("42a", id="alpha_suffix"),
pytest.param("", id="empty"),
pytest.param(" ", id="whitespace"),
pytest.param("true", id="boolean"),
pytest.param("1.2.3", id="triple_decimal"),
],
)
def test_invalid_float_values_raise_error(self, mocker, invalid_value):
"""Test that invalid float values raise ValueError."""
mocker.patch.dict(os.environ, {"INVALID_FLOAT": invalid_value})
with pytest.raises(ValueError):
get_float_from_env("INVALID_FLOAT")
class TestGetPathFromEnv:
@pytest.mark.parametrize(
"env_value",
[
pytest.param("/tmp/test", id="absolute"),
pytest.param("relative/path", id="relative"),
pytest.param("/path/with spaces/file.txt", id="spaces"),
pytest.param(".", id="current_dir"),
pytest.param("..", id="parent_dir"),
pytest.param("/", id="root"),
],
)
def test_existing_env_var_paths(self, mocker, env_value):
"""Test that existing environment variables with paths return resolved Path objects."""
mocker.patch.dict(os.environ, {"PATH_VAR": env_value})
result = get_path_from_env("PATH_VAR")
assert isinstance(result, Path)
assert result == Path(env_value).resolve()
def test_missing_env_var_no_default(self, mocker):
"""Test that missing environment variable with no default returns None."""
mocker.patch.dict(os.environ, {}, clear=True)
assert get_path_from_env("MISSING_VAR") is None
def test_missing_env_var_with_none_default(self, mocker):
"""Test that missing environment variable with None default returns None."""
mocker.patch.dict(os.environ, {}, clear=True)
assert get_path_from_env("MISSING_VAR", default=None) is None
@pytest.mark.parametrize(
"default_path_str",
[
pytest.param("/default/path", id="absolute_default"),
pytest.param("relative/default", id="relative_default"),
pytest.param(".", id="current_default"),
],
)
def test_missing_env_var_with_path_defaults(self, mocker, default_path_str):
"""Test that missing environment variables return resolved default Path objects."""
mocker.patch.dict(os.environ, {}, clear=True)
default_path = Path(default_path_str)
result = get_path_from_env("MISSING_VAR", default=default_path)
assert isinstance(result, Path)
assert result == default_path.resolve()
def test_relative_paths_are_resolved(self, mocker):
"""Test that relative paths are properly resolved to absolute paths."""
mocker.patch.dict(os.environ, {"REL_PATH": "relative/path"})
result = get_path_from_env("REL_PATH")
assert result is not None
assert result.is_absolute()
class TestGetListFromEnv:
@pytest.mark.parametrize(
("env_value", "expected"),
[
pytest.param("a,b,c", ["a", "b", "c"], id="basic_comma_separated"),
pytest.param("single", ["single"], id="single_element"),
pytest.param("", [], id="empty_string"),
pytest.param("a, b , c", ["a", "b", "c"], id="whitespace_trimmed"),
pytest.param("a,,b,c", ["a", "b", "c"], id="empty_elements_removed"),
],
)
def test_existing_env_var_basic_parsing(self, mocker, env_value, expected):
"""Test that existing environment variables are parsed correctly."""
mocker.patch.dict(os.environ, {"LIST_VAR": env_value})
result = get_list_from_env("LIST_VAR")
assert result == expected
@pytest.mark.parametrize(
("separator", "env_value", "expected"),
[
pytest.param("|", "a|b|c", ["a", "b", "c"], id="pipe_separator"),
pytest.param(":", "a:b:c", ["a", "b", "c"], id="colon_separator"),
pytest.param(";", "a;b;c", ["a", "b", "c"], id="semicolon_separator"),
],
)
def test_custom_separators(self, mocker, separator, env_value, expected):
"""Test that custom separators work correctly."""
mocker.patch.dict(os.environ, {"LIST_VAR": env_value})
result = get_list_from_env("LIST_VAR", separator=separator)
assert result == expected
@pytest.mark.parametrize(
("default", "expected"),
[
pytest.param(
["default1", "default2"],
["default1", "default2"],
id="string_list_default",
),
pytest.param([1, 2, 3], [1, 2, 3], id="int_list_default"),
pytest.param(None, [], id="none_default_returns_empty_list"),
],
)
def test_missing_env_var_with_defaults(self, mocker, default, expected):
"""Test that missing environment variables return provided defaults."""
mocker.patch.dict(os.environ, {}, clear=True)
result = get_list_from_env("MISSING_VAR", default=default)
assert result == expected
def test_missing_env_var_no_default(self, mocker):
"""Test that missing environment variable with no default returns empty list."""
mocker.patch.dict(os.environ, {}, clear=True)
result = get_list_from_env("MISSING_VAR")
assert result == []
def test_required_env_var_missing_raises_error(self, mocker):
"""Test that missing required environment variable raises ValueError."""
mocker.patch.dict(os.environ, {}, clear=True)
with pytest.raises(
ValueError,
match="Required environment variable 'REQUIRED_VAR' is not set",
):
get_list_from_env("REQUIRED_VAR", required=True)
def test_required_env_var_with_default_does_not_raise(self, mocker):
"""Test that required environment variable with default does not raise error."""
mocker.patch.dict(os.environ, {}, clear=True)
result = get_list_from_env("REQUIRED_VAR", default=["default"], required=True)
assert result == ["default"]
def test_strip_whitespace_false(self, mocker):
"""Test that whitespace is preserved when strip_whitespace=False."""
mocker.patch.dict(os.environ, {"LIST_VAR": " a , b , c "})
result = get_list_from_env("LIST_VAR", strip_whitespace=False)
assert result == [" a ", " b ", " c "]
def test_remove_empty_false(self, mocker):
"""Test that empty elements are preserved when remove_empty=False."""
mocker.patch.dict(os.environ, {"LIST_VAR": "a,,b,,c"})
result = get_list_from_env("LIST_VAR", remove_empty=False)
assert result == ["a", "", "b", "", "c"]
class TestGetEnvChoice:
@pytest.fixture
def valid_choices(self) -> set[str]:
@@ -394,21 +614,3 @@ class TestGetEnvChoice:
result = get_choice_from_env("TEST_ENV", large_choices)
assert result == "option_50"
def test_different_env_keys(
self,
mocker: MockerFixture,
valid_choices: set[str],
) -> None:
"""Test function works with different environment variable keys."""
test_cases = [
("DJANGO_ENV", "development"),
("DATABASE_BACKEND", "staging"),
("LOG_LEVEL", "production"),
("APP_MODE", "development"),
]
for env_key, env_value in test_cases:
mocker.patch.dict("os.environ", {env_key: env_value})
result = get_choice_from_env(env_key, valid_choices)
assert result == env_value

View File

@@ -0,0 +1,56 @@
import os
from unittest import TestCase
from unittest import mock
from paperless.settings import _parse_paperless_url
from paperless.settings import default_threads_per_worker
class TestThreadCalculation(TestCase):
def test_workers_threads(self) -> None:
"""
GIVEN:
- Certain CPU counts
WHEN:
- Threads per worker is calculated
THEN:
- Threads per worker less than or equal to CPU count
- At least 1 thread per worker
"""
default_workers = 1
for i in range(1, 64):
with mock.patch(
"paperless.settings.multiprocessing.cpu_count",
) as cpu_count:
cpu_count.return_value = i
default_threads = default_threads_per_worker(default_workers)
self.assertGreaterEqual(default_threads, 1)
self.assertLessEqual(default_workers * default_threads, i)
class TestPaperlessURLSettings(TestCase):
def test_paperless_url(self) -> None:
"""
GIVEN:
- PAPERLESS_URL is set
WHEN:
- The URL is parsed
THEN:
- The URL is returned and present in related settings
"""
with mock.patch.dict(
os.environ,
{
"PAPERLESS_URL": "https://example.com",
},
):
url = _parse_paperless_url()
self.assertEqual("https://example.com", url)
from django.conf import settings
self.assertIn(url, settings.CSRF_TRUSTED_ORIGINS)
self.assertIn(url, settings.CORS_ALLOWED_ORIGINS)

View File

@@ -1,482 +0,0 @@
import datetime
import os
from unittest import TestCase
from unittest import mock
import pytest
from celery.schedules import crontab
from paperless.settings import _parse_base_paths
from paperless.settings import _parse_beat_schedule
from paperless.settings import _parse_dateparser_languages
from paperless.settings import _parse_ignore_dates
from paperless.settings import _parse_paperless_url
from paperless.settings import _parse_redis_url
from paperless.settings import default_threads_per_worker
class TestIgnoreDateParsing(TestCase):
"""
Tests the parsing of the PAPERLESS_IGNORE_DATES setting value
"""
def _parse_checker(self, test_cases) -> None:
"""
Helper function to check ignore date parsing
Args:
test_cases (_type_): _description_
"""
for env_str, date_format, expected_date_set in test_cases:
self.assertSetEqual(
_parse_ignore_dates(env_str, date_format),
expected_date_set,
)
def test_no_ignore_dates_set(self) -> None:
"""
GIVEN:
- No ignore dates are set
THEN:
- No ignore dates are parsed
"""
self.assertSetEqual(_parse_ignore_dates(""), set())
def test_single_ignore_dates_set(self) -> None:
"""
GIVEN:
- Ignore dates are set per certain inputs
THEN:
- All ignore dates are parsed
"""
test_cases = [
("1985-05-01", "YMD", {datetime.date(1985, 5, 1)}),
(
"1985-05-01,1991-12-05",
"YMD",
{datetime.date(1985, 5, 1), datetime.date(1991, 12, 5)},
),
("2010-12-13", "YMD", {datetime.date(2010, 12, 13)}),
("11.01.10", "DMY", {datetime.date(2010, 1, 11)}),
(
"11.01.2001,15-06-1996",
"DMY",
{datetime.date(2001, 1, 11), datetime.date(1996, 6, 15)},
),
]
self._parse_checker(test_cases)
class TestThreadCalculation(TestCase):
def test_workers_threads(self) -> None:
"""
GIVEN:
- Certain CPU counts
WHEN:
- Threads per worker is calculated
THEN:
- Threads per worker less than or equal to CPU count
- At least 1 thread per worker
"""
default_workers = 1
for i in range(1, 64):
with mock.patch(
"paperless.settings.multiprocessing.cpu_count",
) as cpu_count:
cpu_count.return_value = i
default_threads = default_threads_per_worker(default_workers)
self.assertGreaterEqual(default_threads, 1)
self.assertLessEqual(default_workers * default_threads, i)
class TestRedisSocketConversion(TestCase):
def test_redis_socket_parsing(self) -> None:
"""
GIVEN:
- Various Redis connection URI formats
WHEN:
- The URI is parsed
THEN:
- Socket based URIs are translated
- Non-socket URIs are unchanged
- None provided uses default
"""
for input, expected in [
# Nothing is set
(None, ("redis://localhost:6379", "redis://localhost:6379")),
# celery style
(
"redis+socket:///run/redis/redis.sock",
(
"redis+socket:///run/redis/redis.sock",
"unix:///run/redis/redis.sock",
),
),
# redis-py / channels-redis style
(
"unix:///run/redis/redis.sock",
(
"redis+socket:///run/redis/redis.sock",
"unix:///run/redis/redis.sock",
),
),
# celery style with db
(
"redis+socket:///run/redis/redis.sock?virtual_host=5",
(
"redis+socket:///run/redis/redis.sock?virtual_host=5",
"unix:///run/redis/redis.sock?db=5",
),
),
# redis-py / channels-redis style with db
(
"unix:///run/redis/redis.sock?db=10",
(
"redis+socket:///run/redis/redis.sock?virtual_host=10",
"unix:///run/redis/redis.sock?db=10",
),
),
# Just a host with a port
(
"redis://myredishost:6379",
("redis://myredishost:6379", "redis://myredishost:6379"),
),
]:
result = _parse_redis_url(input)
self.assertTupleEqual(expected, result)
class TestCeleryScheduleParsing(TestCase):
MAIL_EXPIRE_TIME = 9.0 * 60.0
CLASSIFIER_EXPIRE_TIME = 59.0 * 60.0
INDEX_EXPIRE_TIME = 23.0 * 60.0 * 60.0
SANITY_EXPIRE_TIME = ((7.0 * 24.0) - 1.0) * 60.0 * 60.0
EMPTY_TRASH_EXPIRE_TIME = 23.0 * 60.0 * 60.0
RUN_SCHEDULED_WORKFLOWS_EXPIRE_TIME = 59.0 * 60.0
LLM_INDEX_EXPIRE_TIME = 23.0 * 60.0 * 60.0
CLEANUP_EXPIRED_SHARE_BUNDLES_EXPIRE_TIME = 23.0 * 60.0 * 60.0
def test_schedule_configuration_default(self) -> None:
"""
GIVEN:
- No configured task schedules
WHEN:
- The celery beat schedule is built
THEN:
- The default schedule is returned
"""
schedule = _parse_beat_schedule()
self.assertDictEqual(
{
"Check all e-mail accounts": {
"task": "paperless_mail.tasks.process_mail_accounts",
"schedule": crontab(minute="*/10"),
"options": {"expires": self.MAIL_EXPIRE_TIME},
},
"Train the classifier": {
"task": "documents.tasks.train_classifier",
"schedule": crontab(minute="5", hour="*/1"),
"options": {"expires": self.CLASSIFIER_EXPIRE_TIME},
},
"Optimize the index": {
"task": "documents.tasks.index_optimize",
"schedule": crontab(minute=0, hour=0),
"options": {"expires": self.INDEX_EXPIRE_TIME},
},
"Perform sanity check": {
"task": "documents.tasks.sanity_check",
"schedule": crontab(minute=30, hour=0, day_of_week="sun"),
"options": {"expires": self.SANITY_EXPIRE_TIME},
},
"Empty trash": {
"task": "documents.tasks.empty_trash",
"schedule": crontab(minute=0, hour="1"),
"options": {"expires": self.EMPTY_TRASH_EXPIRE_TIME},
},
"Check and run scheduled workflows": {
"task": "documents.tasks.check_scheduled_workflows",
"schedule": crontab(minute="5", hour="*/1"),
"options": {"expires": self.RUN_SCHEDULED_WORKFLOWS_EXPIRE_TIME},
},
"Rebuild LLM index": {
"task": "documents.tasks.llmindex_index",
"schedule": crontab(minute=10, hour=2),
"options": {
"expires": self.LLM_INDEX_EXPIRE_TIME,
},
},
"Cleanup expired share link bundles": {
"task": "documents.tasks.cleanup_expired_share_link_bundles",
"schedule": crontab(minute=0, hour=2),
"options": {
"expires": self.CLEANUP_EXPIRED_SHARE_BUNDLES_EXPIRE_TIME,
},
},
},
schedule,
)
def test_schedule_configuration_changed(self) -> None:
"""
GIVEN:
- Email task is configured non-default
WHEN:
- The celery beat schedule is built
THEN:
- The email task is configured per environment
- The default schedule is returned for other tasks
"""
with mock.patch.dict(
os.environ,
{"PAPERLESS_EMAIL_TASK_CRON": "*/50 * * * mon"},
):
schedule = _parse_beat_schedule()
self.assertDictEqual(
{
"Check all e-mail accounts": {
"task": "paperless_mail.tasks.process_mail_accounts",
"schedule": crontab(minute="*/50", day_of_week="mon"),
"options": {"expires": self.MAIL_EXPIRE_TIME},
},
"Train the classifier": {
"task": "documents.tasks.train_classifier",
"schedule": crontab(minute="5", hour="*/1"),
"options": {"expires": self.CLASSIFIER_EXPIRE_TIME},
},
"Optimize the index": {
"task": "documents.tasks.index_optimize",
"schedule": crontab(minute=0, hour=0),
"options": {"expires": self.INDEX_EXPIRE_TIME},
},
"Perform sanity check": {
"task": "documents.tasks.sanity_check",
"schedule": crontab(minute=30, hour=0, day_of_week="sun"),
"options": {"expires": self.SANITY_EXPIRE_TIME},
},
"Empty trash": {
"task": "documents.tasks.empty_trash",
"schedule": crontab(minute=0, hour="1"),
"options": {"expires": self.EMPTY_TRASH_EXPIRE_TIME},
},
"Check and run scheduled workflows": {
"task": "documents.tasks.check_scheduled_workflows",
"schedule": crontab(minute="5", hour="*/1"),
"options": {"expires": self.RUN_SCHEDULED_WORKFLOWS_EXPIRE_TIME},
},
"Rebuild LLM index": {
"task": "documents.tasks.llmindex_index",
"schedule": crontab(minute=10, hour=2),
"options": {
"expires": self.LLM_INDEX_EXPIRE_TIME,
},
},
"Cleanup expired share link bundles": {
"task": "documents.tasks.cleanup_expired_share_link_bundles",
"schedule": crontab(minute=0, hour=2),
"options": {
"expires": self.CLEANUP_EXPIRED_SHARE_BUNDLES_EXPIRE_TIME,
},
},
},
schedule,
)
def test_schedule_configuration_disabled(self) -> None:
"""
GIVEN:
- Search index task is disabled
WHEN:
- The celery beat schedule is built
THEN:
- The search index task is not present
- The default schedule is returned for other tasks
"""
with mock.patch.dict(os.environ, {"PAPERLESS_INDEX_TASK_CRON": "disable"}):
schedule = _parse_beat_schedule()
self.assertDictEqual(
{
"Check all e-mail accounts": {
"task": "paperless_mail.tasks.process_mail_accounts",
"schedule": crontab(minute="*/10"),
"options": {"expires": self.MAIL_EXPIRE_TIME},
},
"Train the classifier": {
"task": "documents.tasks.train_classifier",
"schedule": crontab(minute="5", hour="*/1"),
"options": {"expires": self.CLASSIFIER_EXPIRE_TIME},
},
"Perform sanity check": {
"task": "documents.tasks.sanity_check",
"schedule": crontab(minute=30, hour=0, day_of_week="sun"),
"options": {"expires": self.SANITY_EXPIRE_TIME},
},
"Empty trash": {
"task": "documents.tasks.empty_trash",
"schedule": crontab(minute=0, hour="1"),
"options": {"expires": self.EMPTY_TRASH_EXPIRE_TIME},
},
"Check and run scheduled workflows": {
"task": "documents.tasks.check_scheduled_workflows",
"schedule": crontab(minute="5", hour="*/1"),
"options": {"expires": self.RUN_SCHEDULED_WORKFLOWS_EXPIRE_TIME},
},
"Rebuild LLM index": {
"task": "documents.tasks.llmindex_index",
"schedule": crontab(minute=10, hour=2),
"options": {
"expires": self.LLM_INDEX_EXPIRE_TIME,
},
},
"Cleanup expired share link bundles": {
"task": "documents.tasks.cleanup_expired_share_link_bundles",
"schedule": crontab(minute=0, hour=2),
"options": {
"expires": self.CLEANUP_EXPIRED_SHARE_BUNDLES_EXPIRE_TIME,
},
},
},
schedule,
)
def test_schedule_configuration_disabled_all(self) -> None:
"""
GIVEN:
- All tasks are disabled
WHEN:
- The celery beat schedule is built
THEN:
- No tasks are scheduled
"""
with mock.patch.dict(
os.environ,
{
"PAPERLESS_EMAIL_TASK_CRON": "disable",
"PAPERLESS_TRAIN_TASK_CRON": "disable",
"PAPERLESS_SANITY_TASK_CRON": "disable",
"PAPERLESS_INDEX_TASK_CRON": "disable",
"PAPERLESS_EMPTY_TRASH_TASK_CRON": "disable",
"PAPERLESS_WORKFLOW_SCHEDULED_TASK_CRON": "disable",
"PAPERLESS_LLM_INDEX_TASK_CRON": "disable",
"PAPERLESS_SHARE_LINK_BUNDLE_CLEANUP_CRON": "disable",
},
):
schedule = _parse_beat_schedule()
self.assertDictEqual(
{},
schedule,
)
class TestPaperlessURLSettings(TestCase):
def test_paperless_url(self) -> None:
"""
GIVEN:
- PAPERLESS_URL is set
WHEN:
- The URL is parsed
THEN:
- The URL is returned and present in related settings
"""
with mock.patch.dict(
os.environ,
{
"PAPERLESS_URL": "https://example.com",
},
):
url = _parse_paperless_url()
self.assertEqual("https://example.com", url)
from django.conf import settings
self.assertIn(url, settings.CSRF_TRUSTED_ORIGINS)
self.assertIn(url, settings.CORS_ALLOWED_ORIGINS)
class TestPathSettings(TestCase):
def test_default_paths(self) -> None:
"""
GIVEN:
- PAPERLESS_FORCE_SCRIPT_NAME is not set
WHEN:
- Settings are parsed
THEN:
- Paths are as expected
"""
base_paths = _parse_base_paths()
self.assertEqual(None, base_paths[0]) # FORCE_SCRIPT_NAME
self.assertEqual("/", base_paths[1]) # BASE_URL
self.assertEqual("/accounts/login/", base_paths[2]) # LOGIN_URL
self.assertEqual("/dashboard", base_paths[3]) # LOGIN_REDIRECT_URL
self.assertEqual(
"/accounts/login/?loggedout=1",
base_paths[4],
) # LOGOUT_REDIRECT_URL
@mock.patch("os.environ", {"PAPERLESS_FORCE_SCRIPT_NAME": "/paperless"})
def test_subpath(self) -> None:
"""
GIVEN:
- PAPERLESS_FORCE_SCRIPT_NAME is set
WHEN:
- Settings are parsed
THEN:
- The path is returned and present in related settings
"""
base_paths = _parse_base_paths()
self.assertEqual("/paperless", base_paths[0]) # FORCE_SCRIPT_NAME
self.assertEqual("/paperless/", base_paths[1]) # BASE_URL
self.assertEqual("/paperless/accounts/login/", base_paths[2]) # LOGIN_URL
self.assertEqual("/paperless/dashboard", base_paths[3]) # LOGIN_REDIRECT_URL
self.assertEqual(
"/paperless/accounts/login/?loggedout=1",
base_paths[4],
) # LOGOUT_REDIRECT_URL
@mock.patch(
"os.environ",
{
"PAPERLESS_FORCE_SCRIPT_NAME": "/paperless",
"PAPERLESS_LOGOUT_REDIRECT_URL": "/foobar/",
},
)
def test_subpath_with_explicit_logout_url(self) -> None:
"""
GIVEN:
- PAPERLESS_FORCE_SCRIPT_NAME is set and so is PAPERLESS_LOGOUT_REDIRECT_URL
WHEN:
- Settings are parsed
THEN:
- The correct logout redirect URL is returned
"""
base_paths = _parse_base_paths()
self.assertEqual("/paperless/", base_paths[1]) # BASE_URL
self.assertEqual("/foobar/", base_paths[4]) # LOGOUT_REDIRECT_URL
@pytest.mark.parametrize(
("languages", "expected"),
[
("de", ["de"]),
("zh", ["zh"]),
("fr+en", ["fr", "en"]),
# Locales must be supported
("en-001+fr-CA", ["en-001", "fr-CA"]),
("en-001+fr", ["en-001", "fr"]),
# Special case for Chinese: variants seem to miss some dates,
# so we always add "zh" as a fallback.
("en+zh-Hans-HK", ["en", "zh-Hans-HK", "zh"]),
("en+zh-Hans", ["en", "zh-Hans", "zh"]),
("en+zh-Hans+zh-Hant", ["en", "zh-Hans", "zh-Hant", "zh"]),
],
)
def test_parser_date_parser_languages(languages, expected) -> None:
assert sorted(_parse_dateparser_languages(languages)) == sorted(expected)

69
uv.lock generated
View File

@@ -1748,6 +1748,73 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008, upload-time = "2025-10-12T14:55:18.883Z" },
]
[[package]]
name = "ijson"
version = "3.5.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/f4/57/60d1a6a512f2f0508d0bc8b4f1cc5616fd3196619b66bd6a01f9155a1292/ijson-3.5.0.tar.gz", hash = "sha256:94688760720e3f5212731b3cb8d30267f9a045fb38fb3870254e7b9504246f31", size = 68658, upload-time = "2026-02-24T03:58:30.974Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/65/da/644343198abca5e0f6e2486063f8d8f3c443ca0ef5e5c890e51ef6032e33/ijson-3.5.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:5616311404b858d32740b7ad8b9a799c62165f5ecb85d0a8ed16c21665a90533", size = 88964, upload-time = "2026-02-24T03:56:53.099Z" },
{ url = "https://files.pythonhosted.org/packages/5b/63/8621190aa2baf96156dfd4c632b6aa9f1464411e50b98750c09acc0505ea/ijson-3.5.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e9733f94029dd41702d573ef64752e2556e72aea14623d6dbb7a44ca1ccf30fd", size = 60582, upload-time = "2026-02-24T03:56:54.261Z" },
{ url = "https://files.pythonhosted.org/packages/20/31/6a3f041fdd17dacff33b7d7d3ba3df6dca48740108340c6042f974b2ad20/ijson-3.5.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:db8398c6721b98412a4f618da8022550c8b9c5d9214040646071b5deb4d4a393", size = 60632, upload-time = "2026-02-24T03:56:55.159Z" },
{ url = "https://files.pythonhosted.org/packages/e4/68/474541998abbdecfd46a744536878335de89aceb9f085bff1aaf35575ceb/ijson-3.5.0-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:c061314845c08163b1784b6076ea5f075372461a32e6916f4e5f211fd4130b64", size = 131988, upload-time = "2026-02-24T03:56:56.35Z" },
{ url = "https://files.pythonhosted.org/packages/cd/32/e05ff8b72a44fe9d192f41c5dcbc35cfa87efc280cdbfe539ffaf4a7535e/ijson-3.5.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1111a1c5ac79119c5d6e836f900c1a53844b50a18af38311baa6bb61e2645aca", size = 138669, upload-time = "2026-02-24T03:56:57.555Z" },
{ url = "https://files.pythonhosted.org/packages/49/b5/955a83b031102c7a602e2c06d03aff0a0e584212f09edb94ccc754d203ac/ijson-3.5.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1e74aff8c681c24002b61b1822f9511d4c384f324f7dbc08c78538e01fdc9fcb", size = 135093, upload-time = "2026-02-24T03:56:59.267Z" },
{ url = "https://files.pythonhosted.org/packages/e8/f2/30250cfcb4d2766669b31f6732689aab2bb91de426a15a3ebe482df7ee48/ijson-3.5.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:739a7229b1b0cc5f7e2785a6e7a5fc915e850d3fed9588d0e89a09f88a417253", size = 138715, upload-time = "2026-02-24T03:57:00.491Z" },
{ url = "https://files.pythonhosted.org/packages/a2/05/785a145d7e75e04e04480d59b6323cd4b1d9013a6cd8643fa635fbc93490/ijson-3.5.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:ef88712160360cab3ca6471a4e5418243f8b267cf1fe1620879d1b5558babc71", size = 133194, upload-time = "2026-02-24T03:57:01.759Z" },
{ url = "https://files.pythonhosted.org/packages/14/eb/80d6f8a748dead4034cea0939494a67d10ccf88d6413bf6e860393139676/ijson-3.5.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:6ca0d1b6b5f8166a6248f4309497585fb8553b04bc8179a0260fad636cfdb798", size = 135588, upload-time = "2026-02-24T03:57:03.131Z" },
{ url = "https://files.pythonhosted.org/packages/aa/17/9c63c7688025f3a8c47ea717b8306649c8c7244e49e20a2be4e3515dc75c/ijson-3.5.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:1ebefbe149a6106cc848a3eaf536af51a9b5ccc9082de801389f152dba6ab755", size = 88536, upload-time = "2026-02-24T03:57:06.809Z" },
{ url = "https://files.pythonhosted.org/packages/6f/dd/e15c2400244c117b06585452ebc63ae254f5a6964f712306afd1422daae0/ijson-3.5.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:19e30d9f00f82e64de689c0b8651b9cfed879c184b139d7e1ea5030cec401c21", size = 60499, upload-time = "2026-02-24T03:57:09.155Z" },
{ url = "https://files.pythonhosted.org/packages/77/a9/bf4fe3538a0c965f16b406f180a06105b875da83f0743e36246be64ef550/ijson-3.5.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a04a33ee78a6f27b9b8528c1ca3c207b1df3b8b867a4cf2fcc4109986f35c227", size = 60330, upload-time = "2026-02-24T03:57:10.574Z" },
{ url = "https://files.pythonhosted.org/packages/31/76/6f91bdb019dd978fce1bc5ea1cd620cfc096d258126c91db2c03a20a7f34/ijson-3.5.0-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:7d48dc2984af02eb3c56edfb3f13b3f62f2f3e4fe36f058c8cfc75d93adf4fed", size = 138977, upload-time = "2026-02-24T03:57:11.932Z" },
{ url = "https://files.pythonhosted.org/packages/11/be/bbc983059e48a54b0121ee60042979faed7674490bbe7b2c41560db3f436/ijson-3.5.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f1e73a44844d9adbca9cf2c4132cd875933e83f3d4b23881fcaf82be83644c7d", size = 149785, upload-time = "2026-02-24T03:57:13.255Z" },
{ url = "https://files.pythonhosted.org/packages/6d/81/2fee58f9024a3449aee83edfa7167fb5ccd7e1af2557300e28531bb68e16/ijson-3.5.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7389a56b8562a19948bdf1d7bae3a2edc8c7f86fb59834dcb1c4c722818e645a", size = 149729, upload-time = "2026-02-24T03:57:14.191Z" },
{ url = "https://files.pythonhosted.org/packages/c7/56/f1706761fcc096c9d414b3dcd000b1e6e5c24364c21cfba429837f98ee8d/ijson-3.5.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3176f23f8ebec83f374ed0c3b4e5a0c4db7ede54c005864efebbed46da123608", size = 150697, upload-time = "2026-02-24T03:57:15.855Z" },
{ url = "https://files.pythonhosted.org/packages/d9/6e/ee0d9c875a0193b632b3e9ccd1b22a50685fb510256ad57ba483b6529f77/ijson-3.5.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:6babd88e508630c6ef86c9bebaaf13bb2fb8ec1d8f8868773a03c20253f599bc", size = 142873, upload-time = "2026-02-24T03:57:16.831Z" },
{ url = "https://files.pythonhosted.org/packages/d2/bf/f9d4399d0e6e3fd615035290a71e97c843f17f329b43638c0a01cf112d73/ijson-3.5.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:dc1b3836b174b6db2fa8319f1926fb5445abd195dc963368092103f8579cb8ed", size = 151583, upload-time = "2026-02-24T03:57:17.757Z" },
{ url = "https://files.pythonhosted.org/packages/a2/71/d67e764a712c3590627480643a3b51efcc3afa4ef3cb54ee4c989073c97e/ijson-3.5.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:e9cedc10e40dd6023c351ed8bfc7dcfce58204f15c321c3c1546b9c7b12562a4", size = 88544, upload-time = "2026-02-24T03:57:21.293Z" },
{ url = "https://files.pythonhosted.org/packages/1a/39/f1c299371686153fa3cf5c0736b96247a87a1bee1b7145e6d21f359c505a/ijson-3.5.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:3647649f782ee06c97490b43680371186651f3f69bebe64c6083ee7615d185e5", size = 60495, upload-time = "2026-02-24T03:57:22.501Z" },
{ url = "https://files.pythonhosted.org/packages/16/94/b1438e204d75e01541bebe3e668fe3e68612d210e9931ae1611062dd0a56/ijson-3.5.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:90e74be1dce05fce73451c62d1118671f78f47c9f6be3991c82b91063bf01fc9", size = 60325, upload-time = "2026-02-24T03:57:23.332Z" },
{ url = "https://files.pythonhosted.org/packages/30/e2/4aa9c116fa86cc8b0f574f3c3a47409edc1cd4face05d0e589a5a176b05d/ijson-3.5.0-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:78e9ad73e7be2dd80627504bd5cbf512348c55ce2c06e362ed7683b5220e8568", size = 138774, upload-time = "2026-02-24T03:57:24.683Z" },
{ url = "https://files.pythonhosted.org/packages/d2/d2/738b88752a70c3be1505faa4dcd7110668c2712e582a6a36488ed1e295d4/ijson-3.5.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9577449313cc94be89a4fe4b3e716c65f09cc19636d5a6b2861c4e80dddebd58", size = 149820, upload-time = "2026-02-24T03:57:26.062Z" },
{ url = "https://files.pythonhosted.org/packages/ed/df/0b3ab9f393ca8f72ea03bc896ba9fdc987e90ae08cdb51c32a4ee0c14d5e/ijson-3.5.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3e4c1178fb50aff5f5701a30a5152ead82a14e189ce0f6102fa1b5f10b2f54ff", size = 149747, upload-time = "2026-02-24T03:57:27.308Z" },
{ url = "https://files.pythonhosted.org/packages/cc/a3/b0037119f75131b78cb00acc2657b1a9d0435475f1f2c5f8f5a170b66b9c/ijson-3.5.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:0eb402ab026ffb37a918d75af2b7260fe6cfbce13232cc83728a714dd30bd81d", size = 151027, upload-time = "2026-02-24T03:57:28.522Z" },
{ url = "https://files.pythonhosted.org/packages/22/a0/cb344de1862bf09d8f769c9d25c944078c87dd59a1b496feec5ad96309a4/ijson-3.5.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:5b08ee08355f9f729612a8eb9bf69cc14f9310c3b2a487c6f1c3c65d85216ec4", size = 142996, upload-time = "2026-02-24T03:57:29.774Z" },
{ url = "https://files.pythonhosted.org/packages/ca/32/a8ffd67182e02ea61f70f62daf43ded4fa8a830a2520a851d2782460aba8/ijson-3.5.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:bda62b6d48442903e7bf56152108afb7f0f1293c2b9bef2f2c369defea76ab18", size = 152068, upload-time = "2026-02-24T03:57:30.969Z" },
{ url = "https://files.pythonhosted.org/packages/42/65/13e2492d17e19a2084523e18716dc2809159f2287fd2700c735f311e76c4/ijson-3.5.0-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:4d4b0cd676b8c842f7648c1a783448fac5cd3b98289abd83711b3e275e143524", size = 93019, upload-time = "2026-02-24T03:57:33.976Z" },
{ url = "https://files.pythonhosted.org/packages/33/92/483fc97ece0c3f1cecabf48f6a7a36e89d19369eec462faaeaa34c788992/ijson-3.5.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:252dec3680a48bb82d475e36b4ae1b3a9d7eb690b951bb98a76c5fe519e30188", size = 62714, upload-time = "2026-02-24T03:57:34.819Z" },
{ url = "https://files.pythonhosted.org/packages/4b/88/793fe020a0fe9d9eed4c285cf4a5cfdb0a935708b3bde0d72f35c794b513/ijson-3.5.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:aa1b5dca97d323931fde2501172337384c958914d81a9dac7f00f0d4bfc76bc7", size = 62460, upload-time = "2026-02-24T03:57:35.874Z" },
{ url = "https://files.pythonhosted.org/packages/51/69/f1a2690aa8d4df1f4e262b385e65a933ffdc250b091531bac9a449c19e16/ijson-3.5.0-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:7a5ec7fd86d606094bba6f6f8f87494897102fa4584ef653f3005c51a784c320", size = 199273, upload-time = "2026-02-24T03:57:37.07Z" },
{ url = "https://files.pythonhosted.org/packages/ea/a2/f1346d5299e79b988ab472dc773d5381ec2d57c23cb2f1af3ede4a810e62/ijson-3.5.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:009f41443e1521847701c6d87fa3923c0b1961be3c7e7de90947c8cb92ea7c44", size = 216884, upload-time = "2026-02-24T03:57:38.346Z" },
{ url = "https://files.pythonhosted.org/packages/28/3c/8b637e869be87799e6c2c3c275a30a546f086b1aed77e2b7f11512168c5a/ijson-3.5.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e4c3651d1f9fe2839a93fdf8fd1d5ca3a54975349894249f3b1b572bcc4bd577", size = 207306, upload-time = "2026-02-24T03:57:39.718Z" },
{ url = "https://files.pythonhosted.org/packages/7f/7c/18b1c1df6951ca056782d7580ec40cea4ff9a27a0947d92640d1cc8c4ae3/ijson-3.5.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:945b7abcfcfeae2cde17d8d900870f03536494245dda7ad4f8d056faa303256c", size = 211364, upload-time = "2026-02-24T03:57:40.953Z" },
{ url = "https://files.pythonhosted.org/packages/f3/55/e795812e82851574a9dba8a53fde045378f531ef14110c6fb55dbd23b443/ijson-3.5.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:0574b0a841ff97495c13e9d7260fbf3d85358b061f540c52a123db9dbbaa2ed6", size = 200608, upload-time = "2026-02-24T03:57:42.272Z" },
{ url = "https://files.pythonhosted.org/packages/5c/cd/013c85b4749b57a4cb4c2670014d1b32b8db4ab1a7be92ea7aeb5d7fe7b5/ijson-3.5.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:f969ffb2b89c5cdf686652d7fb66252bc72126fa54d416317411497276056a18", size = 205127, upload-time = "2026-02-24T03:57:43.286Z" },
{ url = "https://files.pythonhosted.org/packages/7a/93/0868efe753dc1df80cc405cf0c1f2527a6991643607c741bff8dcb899b3b/ijson-3.5.0-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:25a5a6b2045c90bb83061df27cfa43572afa43ba9408611d7bfe237c20a731a9", size = 89094, upload-time = "2026-02-24T03:57:46.115Z" },
{ url = "https://files.pythonhosted.org/packages/24/94/fd5a832a0df52ef5e4e740f14ac8640725d61034a1b0c561e8b5fb424706/ijson-3.5.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:8976c54c0b864bc82b951bae06567566ac77ef63b90a773a69cd73aab47f4f4f", size = 60715, upload-time = "2026-02-24T03:57:47.552Z" },
{ url = "https://files.pythonhosted.org/packages/70/79/1b9a90af5732491f9eec751ee211b86b11011e1158c555c06576d52c3919/ijson-3.5.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:859eb2038f7f1b0664df4241957694cc35e6295992d71c98659b22c69b3cbc10", size = 60638, upload-time = "2026-02-24T03:57:48.428Z" },
{ url = "https://files.pythonhosted.org/packages/23/6f/2c551ea980fe56f68710a8d5389cfbd015fc45aaafd17c3c52c346db6aa1/ijson-3.5.0-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:c911aa02991c7c0d3639b6619b93a93210ff1e7f58bf7225d613abea10adc78e", size = 140667, upload-time = "2026-02-24T03:57:49.314Z" },
{ url = "https://files.pythonhosted.org/packages/25/0e/27b887879ba6a5bc29766e3c5af4942638c952220fd63e1e442674f7883a/ijson-3.5.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:903cbdc350173605220edc19796fbea9b2203c8b3951fb7335abfa8ed37afda8", size = 149850, upload-time = "2026-02-24T03:57:50.329Z" },
{ url = "https://files.pythonhosted.org/packages/da/1e/23e10e1bc04bf31193b21e2960dce14b17dbd5d0c62204e8401c59d62c08/ijson-3.5.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a4549d96ded5b8efa71639b2160235415f6bdb8c83367615e2dbabcb72755c33", size = 149206, upload-time = "2026-02-24T03:57:51.261Z" },
{ url = "https://files.pythonhosted.org/packages/8e/90/e552f6495063b235cf7fa2c592f6597c057077195e517b842a0374fd470c/ijson-3.5.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:6b2dcf6349e6042d83f3f8c39ce84823cf7577eba25bac5aae5e39bbbbbe9c1c", size = 150438, upload-time = "2026-02-24T03:57:52.198Z" },
{ url = "https://files.pythonhosted.org/packages/5c/18/45bf8f297c41b42a1c231d261141097babd953d2c28a07be57ae4c3a1a02/ijson-3.5.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:e44af39e6f8a17e5627dcd89715d8279bf3474153ff99aae031a936e5c5572e5", size = 144369, upload-time = "2026-02-24T03:57:53.22Z" },
{ url = "https://files.pythonhosted.org/packages/9b/3a/deb9772bb2c0cead7ad64f00c3598eec9072bdf511818e70e2c512eeabbe/ijson-3.5.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:9260332304b7e7828db56d43f08fc970a3ab741bf84ff10189361ea1b60c395b", size = 151352, upload-time = "2026-02-24T03:57:54.375Z" },
{ url = "https://files.pythonhosted.org/packages/9f/d9/86f7fac35e0835faa188085ae0579e813493d5261ce056484015ad533445/ijson-3.5.0-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:2ea4b676ec98e374c1df400a47929859e4fa1239274339024df4716e802aa7e4", size = 93069, upload-time = "2026-02-24T03:57:57.849Z" },
{ url = "https://files.pythonhosted.org/packages/33/d2/e7366ed9c6e60228d35baf4404bac01a126e7775ea8ce57f560125ed190a/ijson-3.5.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:014586eec043e23c80be9a923c56c3a0920a0f1f7d17478ce7bc20ba443968ef", size = 62767, upload-time = "2026-02-24T03:57:58.758Z" },
{ url = "https://files.pythonhosted.org/packages/35/8b/3e703e8cc4b3ada79f13b28070b51d9550c578f76d1968657905857b2ddd/ijson-3.5.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:d5b8b886b0248652d437f66e7c5ac318bbdcb2c7137a7e5327a68ca00b286f5f", size = 62467, upload-time = "2026-02-24T03:58:00.261Z" },
{ url = "https://files.pythonhosted.org/packages/21/42/0c91af32c1ee8a957fdac2e051b5780756d05fd34e4b60d94a08d51bac1d/ijson-3.5.0-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:498fd46ae2349297e43acf97cdc421e711dbd7198418677259393d2acdc62d78", size = 200447, upload-time = "2026-02-24T03:58:01.591Z" },
{ url = "https://files.pythonhosted.org/packages/f9/80/796ea0e391b7e2d45c5b1b451734bba03f81c2984cf955ea5eaa6c4920ad/ijson-3.5.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:22a51b4f9b81f12793731cf226266d1de2112c3c04ba4a04117ad4e466897e05", size = 217820, upload-time = "2026-02-24T03:58:02.598Z" },
{ url = "https://files.pythonhosted.org/packages/38/14/52b6613fdda4078c62eb5b4fe3efc724ddc55a4ad524c93de51830107aa3/ijson-3.5.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9636c710dc4ac4a281baa266a64f323b4cc165cec26836af702c44328b59a515", size = 208310, upload-time = "2026-02-24T03:58:04.759Z" },
{ url = "https://files.pythonhosted.org/packages/6a/ad/8b3105a78774fd4a65e534a21d975ef3a77e189489fe3029ebcaeba5e243/ijson-3.5.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:f7168a39e8211107666d71b25693fd1b2bac0b33735ef744114c403c6cac21e1", size = 211843, upload-time = "2026-02-24T03:58:05.836Z" },
{ url = "https://files.pythonhosted.org/packages/36/ab/a2739f6072d6e1160581bc3ed32da614c8cced023dcd519d9c5fa66e0425/ijson-3.5.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:8696454245415bc617ab03b0dc3ae4c86987df5dc6a90bad378fe72c5409d89e", size = 200906, upload-time = "2026-02-24T03:58:07.788Z" },
{ url = "https://files.pythonhosted.org/packages/6d/5e/e06c2de3c3d4a9cfb655c1ad08a68fb72838d271072cdd3196576ac4431a/ijson-3.5.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:c21bfb61f71f191565885bf1bc29e0a186292d866b4880637b833848360bdc1b", size = 205495, upload-time = "2026-02-24T03:58:09.163Z" },
{ url = "https://files.pythonhosted.org/packages/d9/3b/d31ecfa63a218978617446159f3d77aab2417a5bd2885c425b176353ff78/ijson-3.5.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:d64c624da0e9d692d6eb0ff63a79656b59d76bf80773a17c5b0f835e4e8ef627", size = 57715, upload-time = "2026-02-24T03:58:24.545Z" },
{ url = "https://files.pythonhosted.org/packages/30/51/b170e646d378e8cccf9637c05edb5419b00c2c4df64b0258c3af5355608e/ijson-3.5.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:876f7df73b7e0d6474f9caa729b9cdbfc8e76de9075a4887dfd689e29e85c4ca", size = 57205, upload-time = "2026-02-24T03:58:25.681Z" },
{ url = "https://files.pythonhosted.org/packages/ef/83/44dbd0231b0a8c6c14d27473d10c4e27dfbce7d5d9a833c79e3e6c33eb40/ijson-3.5.0-pp311-pypy311_pp73-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:e7dbff2c8d9027809b0cde663df44f3210da10ea377121d42896fb6ee405dd31", size = 71229, upload-time = "2026-02-24T03:58:27.103Z" },
{ url = "https://files.pythonhosted.org/packages/c8/98/cf84048b7c6cec888826e696a31f45bee7ebcac15e532b6be1fc4c2c9608/ijson-3.5.0-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4217a1edc278660679e1197c83a1a2a2d367792bfbb2a3279577f4b59b93730d", size = 71217, upload-time = "2026-02-24T03:58:28.021Z" },
{ url = "https://files.pythonhosted.org/packages/3c/0a/e34c729a87ff67dc6540f6bcc896626158e691d433ab57db0086d73decd2/ijson-3.5.0-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:04f0fc740311388ee745ba55a12292b722d6f52000b11acbb913982ba5fbdf87", size = 68618, upload-time = "2026-02-24T03:58:28.918Z" },
]
[[package]]
name = "imagehash"
version = "4.3.2"
@@ -2751,6 +2818,7 @@ dependencies = [
{ name = "flower", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "gotenberg-client", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "httpx-oauth", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "ijson", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "imap-tools", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "jinja2", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "langdetect", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
@@ -2898,6 +2966,7 @@ requires-dist = [
{ name = "gotenberg-client", specifier = "~=0.13.1" },
{ name = "granian", extras = ["uvloop"], marker = "extra == 'webserver'", specifier = "~=2.7.0" },
{ name = "httpx-oauth", specifier = "~=0.16" },
{ name = "ijson", specifier = ">=3.2" },
{ name = "imap-tools", specifier = "~=1.11.0" },
{ name = "jinja2", specifier = "~=3.1.5" },
{ name = "langdetect", specifier = "~=1.0.9" },