Update tests

Use effective_content for matching
Add an effective_content for the model
2026-03-09 18:51:25 +00:00 · 2026-03-09 11:24:58 -07:00 · 2026-03-09 11:23:46 -07:00 · 2026-03-09 11:23:32 -07:00 · 2026-03-09 11:16:46 -07:00 · 2026-03-09 17:45:53 +00:00
21 changed files with 1384 additions and 976 deletions
--- a/.github/workflows/ci-backend.yml
+++ b/.github/workflows/ci-backend.yml
@@ -41,7 +41,7 @@ jobs:
          if [[ "${{ github.event_name }}" == "pull_request" ]]; then
            echo "base=${{ github.event.pull_request.base.sha }}" >> "$GITHUB_OUTPUT"
          elif [[ "${{ github.event.created }}" == "true" ]]; then
-            echo "base=origin/${{ github.event.repository.default_branch }}" >> "$GITHUB_OUTPUT"
+            echo "base=${{ github.event.repository.default_branch }}" >> "$GITHUB_OUTPUT"
          else
            echo "base=${{ github.event.before }}" >> "$GITHUB_OUTPUT"
          fi
--- a/.github/workflows/ci-docs.yml
+++ b/.github/workflows/ci-docs.yml
@@ -43,7 +43,7 @@ jobs:
          if [[ "${{ github.event_name }}" == "pull_request" ]]; then
            echo "base=${{ github.event.pull_request.base.sha }}" >> "$GITHUB_OUTPUT"
          elif [[ "${{ github.event.created }}" == "true" ]]; then
-            echo "base=origin/${{ github.event.repository.default_branch }}" >> "$GITHUB_OUTPUT"
+            echo "base=${{ github.event.repository.default_branch }}" >> "$GITHUB_OUTPUT"
          else
            echo "base=${{ github.event.before }}" >> "$GITHUB_OUTPUT"
          fi
--- a/.github/workflows/ci-frontend.yml
+++ b/.github/workflows/ci-frontend.yml
@@ -38,7 +38,7 @@ jobs:
          if [[ "${{ github.event_name }}" == "pull_request" ]]; then
            echo "base=${{ github.event.pull_request.base.sha }}" >> "$GITHUB_OUTPUT"
          elif [[ "${{ github.event.created }}" == "true" ]]; then
-            echo "base=origin/${{ github.event.repository.default_branch }}" >> "$GITHUB_OUTPUT"
+            echo "base=${{ github.event.repository.default_branch }}" >> "$GITHUB_OUTPUT"
          else
            echo "base=${{ github.event.before }}" >> "$GITHUB_OUTPUT"
          fi
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -49,6 +49,7 @@ dependencies = [
  "flower~=2.0.1",
  "gotenberg-client~=0.13.1",
  "httpx-oauth~=0.16",
+  "ijson>=3.2",
  "imap-tools~=1.11.0",
  "jinja2~=3.1.5",
  "langdetect~=1.0.9",
--- a/src/documents/management/commands/document_importer.py
+++ b/src/documents/management/commands/document_importer.py
@@ -8,6 +8,7 @@ from pathlib import Path
 from zipfile import ZipFile
 from zipfile import is_zipfile

+import ijson
 from django.conf import settings
 from django.contrib.auth.models import Permission
 from django.contrib.auth.models import User
@@ -46,6 +47,15 @@ if settings.AUDIT_LOG_ENABLED:
    from auditlog.registry import auditlog


+def iter_manifest_records(path: Path) -> Generator[dict, None, None]:
+    """Yield records one at a time from a manifest JSON array via ijson."""
+    try:
+        with path.open("rb") as f:
+            yield from ijson.items(f, "item")
+    except ijson.JSONError as e:
+        raise CommandError(f"Failed to parse manifest file {path}: {e}") from e
+
+
@contextmanager
 def disable_signal(sig, receiver, sender, *, weak: bool | None = None) -> Generator:
    try:
@@ -143,14 +153,9 @@ class Command(CryptMixin, PaperlessCommand):
        Loads manifest data from the various JSON files for parsing and loading the database
        """
        main_manifest_path: Path = self.source / "manifest.json"
-
-        with main_manifest_path.open() as infile:
-            self.manifest = json.load(infile)
        self.manifest_paths.append(main_manifest_path)

        for file in Path(self.source).glob("**/*-manifest.json"):
-            with file.open() as infile:
-                self.manifest += json.load(infile)
            self.manifest_paths.append(file)

    def load_metadata(self) -> None:
@@ -231,7 +236,6 @@ class Command(CryptMixin, PaperlessCommand):
        self.version: str | None = None
        self.salt: str | None = None
        self.manifest_paths = []
-        self.manifest = []

        # Create a temporary directory for extracting a zip file into it, even if supplied source is no zip file to keep code cleaner.
        with tempfile.TemporaryDirectory() as tmp_dir:
@@ -291,6 +295,9 @@ class Command(CryptMixin, PaperlessCommand):
            else:
                self.stdout.write(self.style.NOTICE("Data only import completed"))

+            for tmp in getattr(self, "_decrypted_tmp_paths", []):
+                tmp.unlink(missing_ok=True)
+
        self.stdout.write("Updating search index...")
        call_command(
            "document_index",
@@ -343,11 +350,12 @@ class Command(CryptMixin, PaperlessCommand):
                    ) from e

        self.stdout.write("Checking the manifest")
-        for record in self.manifest:
-            # Only check if the document files exist if this is not data only
-            # We don't care about documents for a data only import
-            if not self.data_only and record["model"] == "documents.document":
-                check_document_validity(record)
+        for manifest_path in self.manifest_paths:
+            for record in iter_manifest_records(manifest_path):
+                # Only check if the document files exist if this is not data only
+                # We don't care about documents for a data only import
+                if not self.data_only and record["model"] == "documents.document":
+                    check_document_validity(record)

    def _import_files_from_manifest(self) -> None:
        settings.ORIGINALS_DIR.mkdir(parents=True, exist_ok=True)
@@ -356,23 +364,31 @@ class Command(CryptMixin, PaperlessCommand):

        self.stdout.write("Copy files into paperless...")

-        manifest_documents = list(
-            filter(lambda r: r["model"] == "documents.document", self.manifest),
-        )
+        document_records = [
+            {
+                "pk": record["pk"],
+                EXPORTER_FILE_NAME: record[EXPORTER_FILE_NAME],
+                EXPORTER_THUMBNAIL_NAME: record.get(EXPORTER_THUMBNAIL_NAME),
+                EXPORTER_ARCHIVE_NAME: record.get(EXPORTER_ARCHIVE_NAME),
+            }
+            for manifest_path in self.manifest_paths
+            for record in iter_manifest_records(manifest_path)
+            if record["model"] == "documents.document"
+        ]

-        for record in self.track(manifest_documents, description="Copying files..."):
+        for record in self.track(document_records, description="Copying files..."):
            document = Document.objects.get(pk=record["pk"])

            doc_file = record[EXPORTER_FILE_NAME]
            document_path = self.source / doc_file

-            if EXPORTER_THUMBNAIL_NAME in record:
+            if record[EXPORTER_THUMBNAIL_NAME]:
                thumb_file = record[EXPORTER_THUMBNAIL_NAME]
                thumbnail_path = (self.source / thumb_file).resolve()
            else:
                thumbnail_path = None

-            if EXPORTER_ARCHIVE_NAME in record:
+            if record[EXPORTER_ARCHIVE_NAME]:
                archive_file = record[EXPORTER_ARCHIVE_NAME]
                archive_path = self.source / archive_file
            else:
@@ -413,33 +429,43 @@ class Command(CryptMixin, PaperlessCommand):

            document.save()

+    def _decrypt_record_if_needed(self, record: dict) -> dict:
+        fields = self.CRYPT_FIELDS_BY_MODEL.get(record.get("model", ""))
+        if fields:
+            for field in fields:
+                if record["fields"].get(field):
+                    record["fields"][field] = self.decrypt_string(
+                        value=record["fields"][field],
+                    )
+        return record
+
    def decrypt_secret_fields(self) -> None:
        """
-        The converse decryption of some fields out of the export before importing to database
+        The converse decryption of some fields out of the export before importing to database.
+        Streams records from each manifest path and writes decrypted content to a temp file.
        """
-        if self.passphrase:
-            # Salt has been loaded from metadata.json at this point, so it cannot be None
-            self.setup_crypto(passphrase=self.passphrase, salt=self.salt)
-
-            had_at_least_one_record = False
-
-            for crypt_config in self.CRYPT_FIELDS:
-                importer_model: str = crypt_config["model_name"]
-                crypt_fields: str = crypt_config["fields"]
-                for record in filter(
-                    lambda x: x["model"] == importer_model,
-                    self.manifest,
-                ):
-                    had_at_least_one_record = True
-                    for field in crypt_fields:
-                        if record["fields"][field]:
-                            record["fields"][field] = self.decrypt_string(
-                                value=record["fields"][field],
-                            )
-
-            if had_at_least_one_record:
-                # It's annoying, but the DB is loaded from the JSON directly
-                # Maybe could change that in the future?
-                (self.source / "manifest.json").write_text(
-                    json.dumps(self.manifest, indent=2, ensure_ascii=False),
-                )
+        if not self.passphrase:
+            return
+        # Salt has been loaded from metadata.json at this point, so it cannot be None
+        self.setup_crypto(passphrase=self.passphrase, salt=self.salt)
+        self._decrypted_tmp_paths: list[Path] = []
+        new_paths: list[Path] = []
+        for manifest_path in self.manifest_paths:
+            tmp = manifest_path.with_name(manifest_path.stem + ".decrypted.json")
+            with tmp.open("w", encoding="utf-8") as out:
+                out.write("[\n")
+                first = True
+                for record in iter_manifest_records(manifest_path):
+                    if not first:
+                        out.write(",\n")
+                    json.dump(
+                        self._decrypt_record_if_needed(record),
+                        out,
+                        indent=2,
+                        ensure_ascii=False,
+                    )
+                    first = False
+                out.write("\n]\n")
+            self._decrypted_tmp_paths.append(tmp)
+            new_paths.append(tmp)
+        self.manifest_paths = new_paths
--- a/src/documents/matching.py
+++ b/src/documents/matching.py
@@ -169,7 +169,7 @@ def match_storage_paths(document: Document, classifier: DocumentClassifier, user
 def matches(matching_model: MatchingModel, document: Document):
    search_flags = 0

-    document_content = document.content
+    document_content = document.get_effective_content() or ""

    # Check that match is not empty
    if not matching_model.match.strip():
--- a/src/documents/models.py
+++ b/src/documents/models.py
@@ -361,6 +361,42 @@ class Document(SoftDeleteModel, ModelWithOwner):  # type: ignore[django-manager-
            res += f" {self.title}"
        return res

+    def get_effective_content(self) -> str | None:
+        """
+        Returns the effective content for the document.
+
+        For root documents, this is the latest version's content when available.
+        For version documents, this is always the document's own content.
+        If the queryset already annotated ``effective_content``, that value is used.
+        """
+        if hasattr(self, "effective_content"):
+            return getattr(self, "effective_content")
+
+        if self.root_document_id is not None or self.pk is None:
+            return self.content
+
+        prefetched_cache = getattr(self, "_prefetched_objects_cache", None)
+        prefetched_versions = (
+            prefetched_cache.get("versions")
+            if isinstance(prefetched_cache, dict)
+            else None
+        )
+        if prefetched_versions:
+            latest_prefetched = max(prefetched_versions, key=lambda doc: doc.id)
+            return latest_prefetched.content
+
+        latest_version_content = (
+            Document.objects.filter(root_document=self)
+            .order_by("-id")
+            .values_list("content", flat=True)
+            .first()
+        )
+        return (
+            latest_version_content
+            if latest_version_content is not None
+            else self.content
+        )
+
    @property
    def suggestion_content(self):
        """
@@ -373,15 +409,21 @@ class Document(SoftDeleteModel, ModelWithOwner):  # type: ignore[django-manager-
        This improves processing speed for large documents while keeping
        enough context for accurate suggestions.
        """
-        if not self.content or len(self.content) <= 1200000:
-            return self.content
+        effective_content = self.get_effective_content()
+        if not effective_content or len(effective_content) <= 1200000:
+            return effective_content
        else:
            # Use 80% from the start and 20% from the end
            # to preserve both opening and closing context.
            head_len = 800000
            tail_len = 200000

-            return " ".join((self.content[:head_len], self.content[-tail_len:]))
+            return " ".join(
+                (
+                    effective_content[:head_len],
+                    effective_content[-tail_len:],
+                ),
+            )

    @property
    def source_path(self) -> Path:
--- a/src/documents/tests/test_document_model.py
+++ b/src/documents/tests/test_document_model.py
@@ -156,6 +156,46 @@ class TestDocument(TestCase):
        )
        self.assertEqual(doc.get_public_filename(), "2020-12-25 test")

+    def test_suggestion_content_uses_latest_version_content_for_root_documents(
+        self,
+    ) -> None:
+        root = Document.objects.create(
+            title="root",
+            checksum="root",
+            mime_type="application/pdf",
+            content="outdated root content",
+        )
+        version = Document.objects.create(
+            title="v1",
+            checksum="v1",
+            mime_type="application/pdf",
+            root_document=root,
+            content="latest version content",
+        )
+
+        self.assertEqual(root.suggestion_content, version.content)
+
+    def test_content_length_is_per_document_row_for_versions(self) -> None:
+        root = Document.objects.create(
+            title="root",
+            checksum="root",
+            mime_type="application/pdf",
+            content="abc",
+        )
+        version = Document.objects.create(
+            title="v1",
+            checksum="v1",
+            mime_type="application/pdf",
+            root_document=root,
+            content="abcdefgh",
+        )
+
+        root.refresh_from_db()
+        version.refresh_from_db()
+
+        self.assertEqual(root.content_length, 3)
+        self.assertEqual(version.content_length, 8)
+

 def test_suggestion_content() -> None:
    """
--- a/src/documents/tests/test_management_importer.py
+++ b/src/documents/tests/test_management_importer.py
@@ -119,15 +119,22 @@ class TestCommandImport(
            # No read permissions
            original_path.chmod(0o222)

+            manifest_path = Path(temp_dir) / "manifest.json"
+            manifest_path.write_text(
+                json.dumps(
+                    [
+                        {
+                            "model": "documents.document",
+                            EXPORTER_FILE_NAME: "original.pdf",
+                            EXPORTER_ARCHIVE_NAME: "archive.pdf",
+                        },
+                    ],
+                ),
+            )
+
            cmd = Command()
            cmd.source = Path(temp_dir)
-            cmd.manifest = [
-                {
-                    "model": "documents.document",
-                    EXPORTER_FILE_NAME: "original.pdf",
-                    EXPORTER_ARCHIVE_NAME: "archive.pdf",
-                },
-            ]
+            cmd.manifest_paths = [manifest_path]
            cmd.data_only = False
            with self.assertRaises(CommandError) as cm:
                cmd.check_manifest_validity()
@@ -296,7 +303,7 @@ class TestCommandImport(
        (self.dirs.scratch_dir / "manifest.json").touch()

        # We're not building a manifest, so it fails, but this test doesn't care
-        with self.assertRaises(json.decoder.JSONDecodeError):
+        with self.assertRaises(CommandError):
            call_command(
                "document_importer",
                "--no-progress-bar",
@@ -325,7 +332,7 @@ class TestCommandImport(
        )

        # We're not building a manifest, so it fails, but this test doesn't care
-        with self.assertRaises(json.decoder.JSONDecodeError):
+        with self.assertRaises(CommandError):
            call_command(
                "document_importer",
                "--no-progress-bar",
--- a/src/documents/tests/test_matchables.py
+++ b/src/documents/tests/test_matchables.py
@@ -48,6 +48,52 @@ class _TestMatchingBase(TestCase):


 class TestMatching(_TestMatchingBase):
+    def test_matches_uses_latest_version_content_for_root_documents(self) -> None:
+        root = Document.objects.create(
+            title="root",
+            checksum="root",
+            mime_type="application/pdf",
+            content="root content without token",
+        )
+        Document.objects.create(
+            title="v1",
+            checksum="v1",
+            mime_type="application/pdf",
+            root_document=root,
+            content="latest version contains keyword",
+        )
+        tag = Tag.objects.create(
+            name="tag",
+            match="keyword",
+            matching_algorithm=Tag.MATCH_ANY,
+        )
+
+        self.assertTrue(matching.matches(tag, root))
+
+    def test_matches_does_not_fall_back_to_root_content_when_version_exists(
+        self,
+    ) -> None:
+        root = Document.objects.create(
+            title="root",
+            checksum="root",
+            mime_type="application/pdf",
+            content="root contains keyword",
+        )
+        Document.objects.create(
+            title="v1",
+            checksum="v1",
+            mime_type="application/pdf",
+            root_document=root,
+            content="latest version without token",
+        )
+        tag = Tag.objects.create(
+            name="tag",
+            match="keyword",
+            matching_algorithm=Tag.MATCH_ANY,
+        )
+
+        self.assertFalse(matching.matches(tag, root))
+
    def test_match_none(self) -> None:
        self._test_matching(
            "",
--- a/src/locale/en_US/LC_MESSAGES/django.po
+++ b/src/locale/en_US/LC_MESSAGES/django.po
@@ -2,7 +2,7 @@ msgid ""
 msgstr ""
 "Project-Id-Version: paperless-ngx\n"
 "Report-Msgid-Bugs-To: \n"
-"POT-Creation-Date: 2026-03-09 01:51+0000\n"
+"POT-Creation-Date: 2026-03-09 17:44+0000\n"
 "PO-Revision-Date: 2022-02-17 04:17\n"
 "Last-Translator: \n"
 "Language-Team: English\n"
@@ -1856,151 +1856,151 @@ msgstr ""
 msgid "paperless application settings"
 msgstr ""

-#: paperless/settings/__init__.py:752
+#: paperless/settings/__init__.py:521
 msgid "English (US)"
 msgstr ""

-#: paperless/settings/__init__.py:753
+#: paperless/settings/__init__.py:522
 msgid "Arabic"
 msgstr ""

-#: paperless/settings/__init__.py:754
+#: paperless/settings/__init__.py:523
 msgid "Afrikaans"
 msgstr ""

-#: paperless/settings/__init__.py:755
+#: paperless/settings/__init__.py:524
 msgid "Belarusian"
 msgstr ""

-#: paperless/settings/__init__.py:756
+#: paperless/settings/__init__.py:525
 msgid "Bulgarian"
 msgstr ""

-#: paperless/settings/__init__.py:757
+#: paperless/settings/__init__.py:526
 msgid "Catalan"
 msgstr ""

-#: paperless/settings/__init__.py:758
+#: paperless/settings/__init__.py:527
 msgid "Czech"
 msgstr ""

-#: paperless/settings/__init__.py:759
+#: paperless/settings/__init__.py:528
 msgid "Danish"
 msgstr ""

-#: paperless/settings/__init__.py:760
+#: paperless/settings/__init__.py:529
 msgid "German"
 msgstr ""

-#: paperless/settings/__init__.py:761
+#: paperless/settings/__init__.py:530
 msgid "Greek"
 msgstr ""

-#: paperless/settings/__init__.py:762
+#: paperless/settings/__init__.py:531
 msgid "English (GB)"
 msgstr ""

-#: paperless/settings/__init__.py:763
+#: paperless/settings/__init__.py:532
 msgid "Spanish"
 msgstr ""

-#: paperless/settings/__init__.py:764
+#: paperless/settings/__init__.py:533
 msgid "Persian"
 msgstr ""

-#: paperless/settings/__init__.py:765
+#: paperless/settings/__init__.py:534
 msgid "Finnish"
 msgstr ""

-#: paperless/settings/__init__.py:766
+#: paperless/settings/__init__.py:535
 msgid "French"
 msgstr ""

-#: paperless/settings/__init__.py:767
+#: paperless/settings/__init__.py:536
 msgid "Hungarian"
 msgstr ""

-#: paperless/settings/__init__.py:768
+#: paperless/settings/__init__.py:537
 msgid "Indonesian"
 msgstr ""

-#: paperless/settings/__init__.py:769
+#: paperless/settings/__init__.py:538
 msgid "Italian"
 msgstr ""

-#: paperless/settings/__init__.py:770
+#: paperless/settings/__init__.py:539
 msgid "Japanese"
 msgstr ""

-#: paperless/settings/__init__.py:771
+#: paperless/settings/__init__.py:540
 msgid "Korean"
 msgstr ""

-#: paperless/settings/__init__.py:772
+#: paperless/settings/__init__.py:541
 msgid "Luxembourgish"
 msgstr ""

-#: paperless/settings/__init__.py:773
+#: paperless/settings/__init__.py:542
 msgid "Norwegian"
 msgstr ""

-#: paperless/settings/__init__.py:774
+#: paperless/settings/__init__.py:543
 msgid "Dutch"
 msgstr ""

-#: paperless/settings/__init__.py:775
+#: paperless/settings/__init__.py:544
 msgid "Polish"
 msgstr ""

-#: paperless/settings/__init__.py:776
+#: paperless/settings/__init__.py:545
 msgid "Portuguese (Brazil)"
 msgstr ""

-#: paperless/settings/__init__.py:777
+#: paperless/settings/__init__.py:546
 msgid "Portuguese"
 msgstr ""

-#: paperless/settings/__init__.py:778
+#: paperless/settings/__init__.py:547
 msgid "Romanian"
 msgstr ""

-#: paperless/settings/__init__.py:779
+#: paperless/settings/__init__.py:548
 msgid "Russian"
 msgstr ""

-#: paperless/settings/__init__.py:780
+#: paperless/settings/__init__.py:549
 msgid "Slovak"
 msgstr ""

-#: paperless/settings/__init__.py:781
+#: paperless/settings/__init__.py:550
 msgid "Slovenian"
 msgstr ""

-#: paperless/settings/__init__.py:782
+#: paperless/settings/__init__.py:551
 msgid "Serbian"
 msgstr ""

-#: paperless/settings/__init__.py:783
+#: paperless/settings/__init__.py:552
 msgid "Swedish"
 msgstr ""

-#: paperless/settings/__init__.py:784
+#: paperless/settings/__init__.py:553
 msgid "Turkish"
 msgstr ""

-#: paperless/settings/__init__.py:785
+#: paperless/settings/__init__.py:554
 msgid "Ukrainian"
 msgstr ""

-#: paperless/settings/__init__.py:786
+#: paperless/settings/__init__.py:555
 msgid "Vietnamese"
 msgstr ""

-#: paperless/settings/__init__.py:787
+#: paperless/settings/__init__.py:556
 msgid "Chinese Simplified"
 msgstr ""

-#: paperless/settings/__init__.py:788
+#: paperless/settings/__init__.py:557
 msgid "Chinese Traditional"
 msgstr ""

--- a/src/paperless/settings/init.py
+++ b/src/paperless/settings/init.py
@@ -6,18 +6,25 @@ import math
 import multiprocessing
 import os
 import tempfile
-from os import PathLike
 from pathlib import Path
 from typing import Final
 from urllib.parse import urlparse

-from celery.schedules import crontab
 from compression_middleware.middleware import CompressionMiddleware
-from dateparser.languages.loader import LocaleDataLoader
 from django.utils.translation import gettext_lazy as _
 from dotenv import load_dotenv

+from paperless.settings.custom import parse_beat_schedule
+from paperless.settings.custom import parse_dateparser_languages
 from paperless.settings.custom import parse_db_settings
+from paperless.settings.custom import parse_hosting_settings
+from paperless.settings.custom import parse_ignore_dates
+from paperless.settings.custom import parse_redis_url
+from paperless.settings.parsers import get_bool_from_env
+from paperless.settings.parsers import get_float_from_env
+from paperless.settings.parsers import get_int_from_env
+from paperless.settings.parsers import get_list_from_env
+from paperless.settings.parsers import get_path_from_env

 logger = logging.getLogger("paperless.settings")

@@ -45,239 +52,8 @@ for path in [
 os.environ["OMP_THREAD_LIMIT"] = "1"


-def __get_boolean(key: str, default: str = "NO") -> bool:
-    """
-    Return a boolean value based on whatever the user has supplied in the
-    environment based on whether the value "looks like" it's True or not.
-    """
-    return bool(os.getenv(key, default).lower() in ("yes", "y", "1", "t", "true"))
-
-
-def __get_int(key: str, default: int) -> int:
-    """
-    Return an integer value based on the environment variable or a default
-    """
-    return int(os.getenv(key, default))
-
-
-def __get_optional_int(key: str) -> int | None:
-    """
-    Returns None if the environment key is not present, otherwise an integer
-    """
-    if key in os.environ:
-        return __get_int(key, -1)  # pragma: no cover
-    return None
-
-
-def __get_float(key: str, default: float) -> float:
-    """
-    Return an integer value based on the environment variable or a default
-    """
-    return float(os.getenv(key, default))
-
-
-def __get_path(
-    key: str,
-    default: PathLike | str,
-) -> Path:
-    """
-    Return a normalized, absolute path based on the environment variable or a default,
-    if provided
-    """
-    if key in os.environ:
-        return Path(os.environ[key]).resolve()
-    return Path(default).resolve()
-
-
-def __get_optional_path(key: str) -> Path | None:
-    """
-    Returns None if the environment key is not present, otherwise a fully resolved Path
-    """
-    if key in os.environ:
-        return __get_path(key, "")
-    return None
-
-
-def __get_list(
-    key: str,
-    default: list[str] | None = None,
-    sep: str = ",",
-) -> list[str]:
-    """
-    Return a list of elements from the environment, as separated by the given
-    string, or the default if the key does not exist
-    """
-    if key in os.environ:
-        return list(filter(None, os.environ[key].split(sep)))
-    elif default is not None:
-        return default
-    else:
-        return []
-
-
-def _parse_redis_url(env_redis: str | None) -> tuple[str, str]:
-    """
-    Gets the Redis information from the environment or a default and handles
-    converting from incompatible django_channels and celery formats.
-
-    Returns a tuple of (celery_url, channels_url)
-    """
-
-    # Not set, return a compatible default
-    if env_redis is None:
-        return ("redis://localhost:6379", "redis://localhost:6379")
-
-    if "unix" in env_redis.lower():
-        # channels_redis socket format, looks like:
-        # "unix:///path/to/redis.sock"
-        _, path = env_redis.split(":", 1)
-        # Optionally setting a db number
-        if "?db=" in env_redis:
-            path, number = path.split("?db=")
-            return (f"redis+socket:{path}?virtual_host={number}", env_redis)
-        else:
-            return (f"redis+socket:{path}", env_redis)
-
-    elif "+socket" in env_redis.lower():
-        # celery socket style, looks like:
-        # "redis+socket:///path/to/redis.sock"
-        _, path = env_redis.split(":", 1)
-        if "?virtual_host=" in env_redis:
-            # Virtual host (aka db number)
-            path, number = path.split("?virtual_host=")
-            return (env_redis, f"unix:{path}?db={number}")
-        else:
-            return (env_redis, f"unix:{path}")
-
-    # Not a socket
-    return (env_redis, env_redis)
-
-
-def _parse_beat_schedule() -> dict:
-    """
-    Configures the scheduled tasks, according to default or
-    environment variables.  Task expiration is configured so the task will
-    expire (and not run), shortly before the default frequency will put another
-    of the same task into the queue
-
-
-    https://docs.celeryq.dev/en/stable/userguide/periodic-tasks.html#beat-entries
-    https://docs.celeryq.dev/en/latest/userguide/calling.html#expiration
-    """
-    schedule = {}
-    tasks = [
-        {
-            "name": "Check all e-mail accounts",
-            "env_key": "PAPERLESS_EMAIL_TASK_CRON",
-            # Default every ten minutes
-            "env_default": "*/10 * * * *",
-            "task": "paperless_mail.tasks.process_mail_accounts",
-            "options": {
-                # 1 minute before default schedule sends again
-                "expires": 9.0 * 60.0,
-            },
-        },
-        {
-            "name": "Train the classifier",
-            "env_key": "PAPERLESS_TRAIN_TASK_CRON",
-            # Default hourly at 5 minutes past the hour
-            "env_default": "5 */1 * * *",
-            "task": "documents.tasks.train_classifier",
-            "options": {
-                # 1 minute before default schedule sends again
-                "expires": 59.0 * 60.0,
-            },
-        },
-        {
-            "name": "Optimize the index",
-            "env_key": "PAPERLESS_INDEX_TASK_CRON",
-            # Default daily at midnight
-            "env_default": "0 0 * * *",
-            "task": "documents.tasks.index_optimize",
-            "options": {
-                # 1 hour before default schedule sends again
-                "expires": 23.0 * 60.0 * 60.0,
-            },
-        },
-        {
-            "name": "Perform sanity check",
-            "env_key": "PAPERLESS_SANITY_TASK_CRON",
-            # Default Sunday at 00:30
-            "env_default": "30 0 * * sun",
-            "task": "documents.tasks.sanity_check",
-            "options": {
-                # 1 hour before default schedule sends again
-                "expires": ((7.0 * 24.0) - 1.0) * 60.0 * 60.0,
-            },
-        },
-        {
-            "name": "Empty trash",
-            "env_key": "PAPERLESS_EMPTY_TRASH_TASK_CRON",
-            # Default daily at 01:00
-            "env_default": "0 1 * * *",
-            "task": "documents.tasks.empty_trash",
-            "options": {
-                # 1 hour before default schedule sends again
-                "expires": 23.0 * 60.0 * 60.0,
-            },
-        },
-        {
-            "name": "Check and run scheduled workflows",
-            "env_key": "PAPERLESS_WORKFLOW_SCHEDULED_TASK_CRON",
-            # Default hourly at 5 minutes past the hour
-            "env_default": "5 */1 * * *",
-            "task": "documents.tasks.check_scheduled_workflows",
-            "options": {
-                # 1 minute before default schedule sends again
-                "expires": 59.0 * 60.0,
-            },
-        },
-        {
-            "name": "Rebuild LLM index",
-            "env_key": "PAPERLESS_LLM_INDEX_TASK_CRON",
-            # Default daily at 02:10
-            "env_default": "10 2 * * *",
-            "task": "documents.tasks.llmindex_index",
-            "options": {
-                # 1 hour before default schedule sends again
-                "expires": 23.0 * 60.0 * 60.0,
-            },
-        },
-        {
-            "name": "Cleanup expired share link bundles",
-            "env_key": "PAPERLESS_SHARE_LINK_BUNDLE_CLEANUP_CRON",
-            # Default daily at 02:00
-            "env_default": "0 2 * * *",
-            "task": "documents.tasks.cleanup_expired_share_link_bundles",
-            "options": {
-                # 1 hour before default schedule sends again
-                "expires": 23.0 * 60.0 * 60.0,
-            },
-        },
-    ]
-    for task in tasks:
-        # Either get the environment setting or use the default
-        value = os.getenv(task["env_key"], task["env_default"])
-        # Don't add disabled tasks to the schedule
-        if value == "disable":
-            continue
-        # I find https://crontab.guru/ super helpful
-        # crontab(5) format
-        #   - five time-and-date fields
-        #   - separated by at least one blank
-        minute, hour, day_month, month, day_week = value.split(" ")
-
-        schedule[task["name"]] = {
-            "task": task["task"],
-            "schedule": crontab(minute, hour, day_week, day_month, month),
-            "options": task["options"],
-        }
-
-    return schedule
-
-
 # NEVER RUN WITH DEBUG IN PRODUCTION.
-DEBUG = __get_boolean("PAPERLESS_DEBUG", "NO")
+DEBUG = get_bool_from_env("PAPERLESS_DEBUG", "NO")


 ###############################################################################
@@ -286,21 +62,21 @@ DEBUG = __get_boolean("PAPERLESS_DEBUG", "NO")

 BASE_DIR: Path = Path(__file__).resolve().parent.parent.parent

-STATIC_ROOT = __get_path("PAPERLESS_STATICDIR", BASE_DIR.parent / "static")
+STATIC_ROOT = get_path_from_env("PAPERLESS_STATICDIR", BASE_DIR.parent / "static")

-MEDIA_ROOT = __get_path("PAPERLESS_MEDIA_ROOT", BASE_DIR.parent / "media")
+MEDIA_ROOT = get_path_from_env("PAPERLESS_MEDIA_ROOT", BASE_DIR.parent / "media")
 ORIGINALS_DIR = MEDIA_ROOT / "documents" / "originals"
 ARCHIVE_DIR = MEDIA_ROOT / "documents" / "archive"
 THUMBNAIL_DIR = MEDIA_ROOT / "documents" / "thumbnails"
 SHARE_LINK_BUNDLE_DIR = MEDIA_ROOT / "documents" / "share_link_bundles"

-DATA_DIR = __get_path("PAPERLESS_DATA_DIR", BASE_DIR.parent / "data")
+DATA_DIR = get_path_from_env("PAPERLESS_DATA_DIR", BASE_DIR.parent / "data")

-NLTK_DIR = __get_path("PAPERLESS_NLTK_DIR", "/usr/share/nltk_data")
+NLTK_DIR = get_path_from_env("PAPERLESS_NLTK_DIR", "/usr/share/nltk_data")

 # Check deprecated setting first
 EMPTY_TRASH_DIR = (
-    __get_path("PAPERLESS_TRASH_DIR", os.getenv("PAPERLESS_EMPTY_TRASH_DIR"))
+    get_path_from_env("PAPERLESS_TRASH_DIR", os.getenv("PAPERLESS_EMPTY_TRASH_DIR"))
    if os.getenv("PAPERLESS_TRASH_DIR") or os.getenv("PAPERLESS_EMPTY_TRASH_DIR")
    else None
 )
@@ -309,21 +85,21 @@ EMPTY_TRASH_DIR = (
 # threads.
 MEDIA_LOCK = MEDIA_ROOT / "media.lock"
 INDEX_DIR = DATA_DIR / "index"
-MODEL_FILE = __get_path(
+MODEL_FILE = get_path_from_env(
    "PAPERLESS_MODEL_FILE",
    DATA_DIR / "classification_model.pickle",
 )
 LLM_INDEX_DIR = DATA_DIR / "llm_index"

-LOGGING_DIR = __get_path("PAPERLESS_LOGGING_DIR", DATA_DIR / "log")
+LOGGING_DIR = get_path_from_env("PAPERLESS_LOGGING_DIR", DATA_DIR / "log")

-CONSUMPTION_DIR = __get_path(
+CONSUMPTION_DIR = get_path_from_env(
    "PAPERLESS_CONSUMPTION_DIR",
    BASE_DIR.parent / "consume",
 )

 # This will be created if it doesn't exist
-SCRATCH_DIR = __get_path(
+SCRATCH_DIR = get_path_from_env(
    "PAPERLESS_SCRATCH_DIR",
    Path(tempfile.gettempdir()) / "paperless",
 )
@@ -332,7 +108,7 @@ SCRATCH_DIR = __get_path(
 # Application Definition                                                      #
 ###############################################################################

-env_apps = __get_list("PAPERLESS_APPS")
+env_apps = get_list_from_env("PAPERLESS_APPS")

 INSTALLED_APPS = [
    "whitenoise.runserver_nostatic",
@@ -405,7 +181,7 @@ MIDDLEWARE = [
 ]

 # Optional to enable compression
-if __get_boolean("PAPERLESS_ENABLE_COMPRESSION", "yes"):  # pragma: no cover
+if get_bool_from_env("PAPERLESS_ENABLE_COMPRESSION", "yes"):  # pragma: no cover
    MIDDLEWARE.insert(0, "compression_middleware.middleware.CompressionMiddleware")

 # Workaround to not compress streaming responses (e.g. chat).
@@ -424,20 +200,8 @@ CompressionMiddleware.process_response = patched_process_response
 ROOT_URLCONF = "paperless.urls"


-def _parse_base_paths() -> tuple[str, str, str, str, str]:
-    script_name = os.getenv("PAPERLESS_FORCE_SCRIPT_NAME")
-    base_url = (script_name or "") + "/"
-    login_url = base_url + "accounts/login/"
-    login_redirect_url = base_url + "dashboard"
-    logout_redirect_url = os.getenv(
-        "PAPERLESS_LOGOUT_REDIRECT_URL",
-        login_url + "?loggedout=1",
-    )
-    return script_name, base_url, login_url, login_redirect_url, logout_redirect_url
-
-
 FORCE_SCRIPT_NAME, BASE_URL, LOGIN_URL, LOGIN_REDIRECT_URL, LOGOUT_REDIRECT_URL = (
-    _parse_base_paths()
+    parse_hosting_settings()
 )

 # DRF Spectacular settings
@@ -471,7 +235,7 @@ STORAGES = {
    "default": {"BACKEND": "django.core.files.storage.FileSystemStorage"},
 }

-_CELERY_REDIS_URL, _CHANNELS_REDIS_URL = _parse_redis_url(
+_CELERY_REDIS_URL, _CHANNELS_REDIS_URL = parse_redis_url(
    os.getenv("PAPERLESS_REDIS", None),
 )
 _REDIS_KEY_PREFIX = os.getenv("PAPERLESS_REDIS_PREFIX", "")
@@ -520,8 +284,8 @@ EMAIL_PORT: Final[int] = int(os.getenv("PAPERLESS_EMAIL_PORT", 25))
 EMAIL_HOST_USER: Final[str] = os.getenv("PAPERLESS_EMAIL_HOST_USER", "")
 EMAIL_HOST_PASSWORD: Final[str] = os.getenv("PAPERLESS_EMAIL_HOST_PASSWORD", "")
 DEFAULT_FROM_EMAIL: Final[str] = os.getenv("PAPERLESS_EMAIL_FROM", EMAIL_HOST_USER)
-EMAIL_USE_TLS: Final[bool] = __get_boolean("PAPERLESS_EMAIL_USE_TLS")
-EMAIL_USE_SSL: Final[bool] = __get_boolean("PAPERLESS_EMAIL_USE_SSL")
+EMAIL_USE_TLS: Final[bool] = get_bool_from_env("PAPERLESS_EMAIL_USE_TLS")
+EMAIL_USE_SSL: Final[bool] = get_bool_from_env("PAPERLESS_EMAIL_USE_SSL")
 EMAIL_SUBJECT_PREFIX: Final[str] = "[Paperless-ngx] "
 EMAIL_TIMEOUT = 30.0
 EMAIL_ENABLED = EMAIL_HOST != "localhost" or EMAIL_HOST_USER != ""
@@ -546,20 +310,22 @@ ACCOUNT_DEFAULT_HTTP_PROTOCOL = os.getenv(
 )

 ACCOUNT_ADAPTER = "paperless.adapter.CustomAccountAdapter"
-ACCOUNT_ALLOW_SIGNUPS = __get_boolean("PAPERLESS_ACCOUNT_ALLOW_SIGNUPS")
-ACCOUNT_DEFAULT_GROUPS = __get_list("PAPERLESS_ACCOUNT_DEFAULT_GROUPS")
+ACCOUNT_ALLOW_SIGNUPS = get_bool_from_env("PAPERLESS_ACCOUNT_ALLOW_SIGNUPS")
+ACCOUNT_DEFAULT_GROUPS = get_list_from_env("PAPERLESS_ACCOUNT_DEFAULT_GROUPS")

 SOCIALACCOUNT_ADAPTER = "paperless.adapter.CustomSocialAccountAdapter"
-SOCIALACCOUNT_ALLOW_SIGNUPS = __get_boolean(
+SOCIALACCOUNT_ALLOW_SIGNUPS = get_bool_from_env(
    "PAPERLESS_SOCIALACCOUNT_ALLOW_SIGNUPS",
    "yes",
 )
-SOCIALACCOUNT_AUTO_SIGNUP = __get_boolean("PAPERLESS_SOCIAL_AUTO_SIGNUP")
+SOCIALACCOUNT_AUTO_SIGNUP = get_bool_from_env("PAPERLESS_SOCIAL_AUTO_SIGNUP")
 SOCIALACCOUNT_PROVIDERS = json.loads(
    os.getenv("PAPERLESS_SOCIALACCOUNT_PROVIDERS", "{}"),
 )
-SOCIAL_ACCOUNT_DEFAULT_GROUPS = __get_list("PAPERLESS_SOCIAL_ACCOUNT_DEFAULT_GROUPS")
-SOCIAL_ACCOUNT_SYNC_GROUPS = __get_boolean("PAPERLESS_SOCIAL_ACCOUNT_SYNC_GROUPS")
+SOCIAL_ACCOUNT_DEFAULT_GROUPS = get_list_from_env(
+    "PAPERLESS_SOCIAL_ACCOUNT_DEFAULT_GROUPS",
+)
+SOCIAL_ACCOUNT_SYNC_GROUPS = get_bool_from_env("PAPERLESS_SOCIAL_ACCOUNT_SYNC_GROUPS")
 SOCIAL_ACCOUNT_SYNC_GROUPS_CLAIM: Final[str] = os.getenv(
    "PAPERLESS_SOCIAL_ACCOUNT_SYNC_GROUPS_CLAIM",
    "groups",
@@ -571,8 +337,8 @@ MFA_TOTP_ISSUER = "Paperless-ngx"

 ACCOUNT_EMAIL_SUBJECT_PREFIX = "[Paperless-ngx] "

-DISABLE_REGULAR_LOGIN = __get_boolean("PAPERLESS_DISABLE_REGULAR_LOGIN")
-REDIRECT_LOGIN_TO_SSO = __get_boolean("PAPERLESS_REDIRECT_LOGIN_TO_SSO")
+DISABLE_REGULAR_LOGIN = get_bool_from_env("PAPERLESS_DISABLE_REGULAR_LOGIN")
+REDIRECT_LOGIN_TO_SSO = get_bool_from_env("PAPERLESS_REDIRECT_LOGIN_TO_SSO")

 AUTO_LOGIN_USERNAME = os.getenv("PAPERLESS_AUTO_LOGIN_USERNAME")

@@ -585,12 +351,15 @@ ACCOUNT_EMAIL_VERIFICATION = (
    )
 )

-ACCOUNT_EMAIL_UNKNOWN_ACCOUNTS = __get_boolean(
+ACCOUNT_EMAIL_UNKNOWN_ACCOUNTS = get_bool_from_env(
    "PAPERLESS_ACCOUNT_EMAIL_UNKNOWN_ACCOUNTS",
    "True",
 )

-ACCOUNT_SESSION_REMEMBER = __get_boolean("PAPERLESS_ACCOUNT_SESSION_REMEMBER", "True")
+ACCOUNT_SESSION_REMEMBER = get_bool_from_env(
+    "PAPERLESS_ACCOUNT_SESSION_REMEMBER",
+    "True",
+)
 SESSION_EXPIRE_AT_BROWSER_CLOSE = not ACCOUNT_SESSION_REMEMBER
 SESSION_COOKIE_AGE = int(
    os.getenv("PAPERLESS_SESSION_COOKIE_AGE", 60 * 60 * 24 * 7 * 3),
@@ -607,8 +376,8 @@ if AUTO_LOGIN_USERNAME:

 def _parse_remote_user_settings() -> str:
    global MIDDLEWARE, AUTHENTICATION_BACKENDS, REST_FRAMEWORK
-    enable = __get_boolean("PAPERLESS_ENABLE_HTTP_REMOTE_USER")
-    enable_api = __get_boolean("PAPERLESS_ENABLE_HTTP_REMOTE_USER_API")
+    enable = get_bool_from_env("PAPERLESS_ENABLE_HTTP_REMOTE_USER")
+    enable_api = get_bool_from_env("PAPERLESS_ENABLE_HTTP_REMOTE_USER_API")
    if enable or enable_api:
        MIDDLEWARE.append("paperless.auth.HttpRemoteUserMiddleware")
        AUTHENTICATION_BACKENDS.insert(
@@ -636,16 +405,16 @@ HTTP_REMOTE_USER_HEADER_NAME = _parse_remote_user_settings()
 X_FRAME_OPTIONS = "SAMEORIGIN"

 # The next 3 settings can also be set using just PAPERLESS_URL
-CSRF_TRUSTED_ORIGINS = __get_list("PAPERLESS_CSRF_TRUSTED_ORIGINS")
+CSRF_TRUSTED_ORIGINS = get_list_from_env("PAPERLESS_CSRF_TRUSTED_ORIGINS")

 if DEBUG:
    # Allow access from the angular development server during debugging
    CSRF_TRUSTED_ORIGINS.append("http://localhost:4200")

 # We allow CORS from localhost:8000
-CORS_ALLOWED_ORIGINS = __get_list(
+CORS_ALLOWED_ORIGINS = get_list_from_env(
    "PAPERLESS_CORS_ALLOWED_HOSTS",
-    ["http://localhost:8000"],
+    default=["http://localhost:8000"],
 )

 if DEBUG:
@@ -658,7 +427,7 @@ CORS_EXPOSE_HEADERS = [
    "Content-Disposition",
 ]

-ALLOWED_HOSTS = __get_list("PAPERLESS_ALLOWED_HOSTS", ["*"])
+ALLOWED_HOSTS = get_list_from_env("PAPERLESS_ALLOWED_HOSTS", default=["*"])
 if ALLOWED_HOSTS != ["*"]:
    # always allow localhost. Necessary e.g. for healthcheck in docker.
    ALLOWED_HOSTS.append("localhost")
@@ -678,10 +447,10 @@ def _parse_paperless_url():
 PAPERLESS_URL = _parse_paperless_url()

 # For use with trusted proxies
-TRUSTED_PROXIES = __get_list("PAPERLESS_TRUSTED_PROXIES")
+TRUSTED_PROXIES = get_list_from_env("PAPERLESS_TRUSTED_PROXIES")

-USE_X_FORWARDED_HOST = __get_boolean("PAPERLESS_USE_X_FORWARD_HOST", "false")
-USE_X_FORWARDED_PORT = __get_boolean("PAPERLESS_USE_X_FORWARD_PORT", "false")
+USE_X_FORWARDED_HOST = get_bool_from_env("PAPERLESS_USE_X_FORWARD_HOST", "false")
+USE_X_FORWARDED_PORT = get_bool_from_env("PAPERLESS_USE_X_FORWARD_PORT", "false")
 SECURE_PROXY_SSL_HEADER = (
    tuple(json.loads(os.environ["PAPERLESS_PROXY_SSL_HEADER"]))
    if "PAPERLESS_PROXY_SSL_HEADER" in os.environ
@@ -724,7 +493,7 @@ CSRF_COOKIE_NAME = f"{COOKIE_PREFIX}csrftoken"
 SESSION_COOKIE_NAME = f"{COOKIE_PREFIX}sessionid"
 LANGUAGE_COOKIE_NAME = f"{COOKIE_PREFIX}django_language"

-EMAIL_CERTIFICATE_FILE = __get_optional_path("PAPERLESS_EMAIL_CERTIFICATE_LOCATION")
+EMAIL_CERTIFICATE_FILE = get_path_from_env("PAPERLESS_EMAIL_CERTIFICATE_LOCATION")


 ###############################################################################
@@ -875,7 +644,7 @@ CELERY_BROKER_URL = _CELERY_REDIS_URL
 CELERY_TIMEZONE = TIME_ZONE

 CELERY_WORKER_HIJACK_ROOT_LOGGER = False
-CELERY_WORKER_CONCURRENCY: Final[int] = __get_int("PAPERLESS_TASK_WORKERS", 1)
+CELERY_WORKER_CONCURRENCY: Final[int] = get_int_from_env("PAPERLESS_TASK_WORKERS", 1)
 TASK_WORKERS = CELERY_WORKER_CONCURRENCY
 CELERY_WORKER_MAX_TASKS_PER_CHILD = 1
 CELERY_WORKER_SEND_TASK_EVENTS = True
@@ -888,7 +657,7 @@ CELERY_BROKER_TRANSPORT_OPTIONS = {
 }

 CELERY_TASK_TRACK_STARTED = True
-CELERY_TASK_TIME_LIMIT: Final[int] = __get_int("PAPERLESS_WORKER_TIMEOUT", 1800)
+CELERY_TASK_TIME_LIMIT: Final[int] = get_int_from_env("PAPERLESS_WORKER_TIMEOUT", 1800)

 CELERY_RESULT_EXTENDED = True
 CELERY_RESULT_BACKEND = "django-db"
@@ -900,7 +669,7 @@ CELERY_TASK_SERIALIZER = "pickle"
 CELERY_ACCEPT_CONTENT = ["application/json", "application/x-python-serialize"]

 # https://docs.celeryq.dev/en/stable/userguide/configuration.html#beat-schedule
-CELERY_BEAT_SCHEDULE = _parse_beat_schedule()
+CELERY_BEAT_SCHEDULE = parse_beat_schedule()

 # https://docs.celeryq.dev/en/stable/userguide/configuration.html#beat-schedule-filename
 CELERY_BEAT_SCHEDULE_FILENAME = str(DATA_DIR / "celerybeat-schedule.db")
@@ -908,14 +677,14 @@ CELERY_BEAT_SCHEDULE_FILENAME = str(DATA_DIR / "celerybeat-schedule.db")

 # Cachalot: Database read cache.
 def _parse_cachalot_settings():
-    ttl = __get_int("PAPERLESS_READ_CACHE_TTL", 3600)
+    ttl = get_int_from_env("PAPERLESS_READ_CACHE_TTL", 3600)
    ttl = min(ttl, 31536000) if ttl > 0 else 3600
-    _, redis_url = _parse_redis_url(
+    _, redis_url = parse_redis_url(
        os.getenv("PAPERLESS_READ_CACHE_REDIS_URL", _CHANNELS_REDIS_URL),
    )
    result = {
        "CACHALOT_CACHE": "read-cache",
-        "CACHALOT_ENABLED": __get_boolean(
+        "CACHALOT_ENABLED": get_bool_from_env(
            "PAPERLESS_DB_READ_CACHE_ENABLED",
            default="no",
        ),
@@ -1000,9 +769,9 @@ CONSUMER_POLLING_INTERVAL = float(os.getenv("PAPERLESS_CONSUMER_POLLING_INTERVAL

 CONSUMER_STABILITY_DELAY = float(os.getenv("PAPERLESS_CONSUMER_STABILITY_DELAY", 5))

-CONSUMER_DELETE_DUPLICATES = __get_boolean("PAPERLESS_CONSUMER_DELETE_DUPLICATES")
+CONSUMER_DELETE_DUPLICATES = get_bool_from_env("PAPERLESS_CONSUMER_DELETE_DUPLICATES")

-CONSUMER_RECURSIVE = __get_boolean("PAPERLESS_CONSUMER_RECURSIVE")
+CONSUMER_RECURSIVE = get_bool_from_env("PAPERLESS_CONSUMER_RECURSIVE")

 # Ignore regex patterns, matched against filename only
 CONSUMER_IGNORE_PATTERNS = list(
@@ -1024,13 +793,13 @@ CONSUMER_IGNORE_DIRS = list(
    ),
 )

-CONSUMER_SUBDIRS_AS_TAGS = __get_boolean("PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS")
+CONSUMER_SUBDIRS_AS_TAGS = get_bool_from_env("PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS")

-CONSUMER_ENABLE_BARCODES: Final[bool] = __get_boolean(
+CONSUMER_ENABLE_BARCODES: Final[bool] = get_bool_from_env(
    "PAPERLESS_CONSUMER_ENABLE_BARCODES",
 )

-CONSUMER_BARCODE_TIFF_SUPPORT: Final[bool] = __get_boolean(
+CONSUMER_BARCODE_TIFF_SUPPORT: Final[bool] = get_bool_from_env(
    "PAPERLESS_CONSUMER_BARCODE_TIFF_SUPPORT",
 )

@@ -1039,7 +808,7 @@ CONSUMER_BARCODE_STRING: Final[str] = os.getenv(
    "PATCHT",
 )

-CONSUMER_ENABLE_ASN_BARCODE: Final[bool] = __get_boolean(
+CONSUMER_ENABLE_ASN_BARCODE: Final[bool] = get_bool_from_env(
    "PAPERLESS_CONSUMER_ENABLE_ASN_BARCODE",
 )

@@ -1048,23 +817,26 @@ CONSUMER_ASN_BARCODE_PREFIX: Final[str] = os.getenv(
    "ASN",
 )

-CONSUMER_BARCODE_UPSCALE: Final[float] = __get_float(
+CONSUMER_BARCODE_UPSCALE: Final[float] = get_float_from_env(
    "PAPERLESS_CONSUMER_BARCODE_UPSCALE",
    0.0,
 )

-CONSUMER_BARCODE_DPI: Final[int] = __get_int("PAPERLESS_CONSUMER_BARCODE_DPI", 300)
+CONSUMER_BARCODE_DPI: Final[int] = get_int_from_env(
+    "PAPERLESS_CONSUMER_BARCODE_DPI",
+    300,
+)

-CONSUMER_BARCODE_MAX_PAGES: Final[int] = __get_int(
+CONSUMER_BARCODE_MAX_PAGES: Final[int] = get_int_from_env(
    "PAPERLESS_CONSUMER_BARCODE_MAX_PAGES",
    0,
 )

-CONSUMER_BARCODE_RETAIN_SPLIT_PAGES = __get_boolean(
+CONSUMER_BARCODE_RETAIN_SPLIT_PAGES = get_bool_from_env(
    "PAPERLESS_CONSUMER_BARCODE_RETAIN_SPLIT_PAGES",
 )

-CONSUMER_ENABLE_TAG_BARCODE: Final[bool] = __get_boolean(
+CONSUMER_ENABLE_TAG_BARCODE: Final[bool] = get_bool_from_env(
    "PAPERLESS_CONSUMER_ENABLE_TAG_BARCODE",
 )

@@ -1077,11 +849,11 @@ CONSUMER_TAG_BARCODE_MAPPING = dict(
    ),
 )

-CONSUMER_TAG_BARCODE_SPLIT: Final[bool] = __get_boolean(
+CONSUMER_TAG_BARCODE_SPLIT: Final[bool] = get_bool_from_env(
    "PAPERLESS_CONSUMER_TAG_BARCODE_SPLIT",
 )

-CONSUMER_ENABLE_COLLATE_DOUBLE_SIDED: Final[bool] = __get_boolean(
+CONSUMER_ENABLE_COLLATE_DOUBLE_SIDED: Final[bool] = get_bool_from_env(
    "PAPERLESS_CONSUMER_ENABLE_COLLATE_DOUBLE_SIDED",
 )

@@ -1090,13 +862,13 @@ CONSUMER_COLLATE_DOUBLE_SIDED_SUBDIR_NAME: Final[str] = os.getenv(
    "double-sided",
 )

-CONSUMER_COLLATE_DOUBLE_SIDED_TIFF_SUPPORT: Final[bool] = __get_boolean(
+CONSUMER_COLLATE_DOUBLE_SIDED_TIFF_SUPPORT: Final[bool] = get_bool_from_env(
    "PAPERLESS_CONSUMER_COLLATE_DOUBLE_SIDED_TIFF_SUPPORT",
 )

 CONSUMER_PDF_RECOVERABLE_MIME_TYPES = ("application/octet-stream",)

-OCR_PAGES = __get_optional_int("PAPERLESS_OCR_PAGES")
+OCR_PAGES = get_int_from_env("PAPERLESS_OCR_PAGES")

 # The default language that tesseract will attempt to use when parsing
 # documents.  It should be a 3-letter language code consistent with ISO 639.
@@ -1110,20 +882,20 @@ OCR_MODE = os.getenv("PAPERLESS_OCR_MODE", "skip")

 OCR_SKIP_ARCHIVE_FILE = os.getenv("PAPERLESS_OCR_SKIP_ARCHIVE_FILE", "never")

-OCR_IMAGE_DPI = __get_optional_int("PAPERLESS_OCR_IMAGE_DPI")
+OCR_IMAGE_DPI = get_int_from_env("PAPERLESS_OCR_IMAGE_DPI")

 OCR_CLEAN = os.getenv("PAPERLESS_OCR_CLEAN", "clean")

-OCR_DESKEW: Final[bool] = __get_boolean("PAPERLESS_OCR_DESKEW", "true")
+OCR_DESKEW: Final[bool] = get_bool_from_env("PAPERLESS_OCR_DESKEW", "true")

-OCR_ROTATE_PAGES: Final[bool] = __get_boolean("PAPERLESS_OCR_ROTATE_PAGES", "true")
+OCR_ROTATE_PAGES: Final[bool] = get_bool_from_env("PAPERLESS_OCR_ROTATE_PAGES", "true")

-OCR_ROTATE_PAGES_THRESHOLD: Final[float] = __get_float(
+OCR_ROTATE_PAGES_THRESHOLD: Final[float] = get_float_from_env(
    "PAPERLESS_OCR_ROTATE_PAGES_THRESHOLD",
    12.0,
 )

-OCR_MAX_IMAGE_PIXELS: Final[int | None] = __get_optional_int(
+OCR_MAX_IMAGE_PIXELS: Final[int | None] = get_int_from_env(
    "PAPERLESS_OCR_MAX_IMAGE_PIXELS",
 )

@@ -1134,7 +906,7 @@ OCR_COLOR_CONVERSION_STRATEGY = os.getenv(

 OCR_USER_ARGS = os.getenv("PAPERLESS_OCR_USER_ARGS")

-MAX_IMAGE_PIXELS: Final[int | None] = __get_optional_int(
+MAX_IMAGE_PIXELS: Final[int | None] = get_int_from_env(
    "PAPERLESS_MAX_IMAGE_PIXELS",
 )

@@ -1149,7 +921,7 @@ CONVERT_MEMORY_LIMIT = os.getenv("PAPERLESS_CONVERT_MEMORY_LIMIT")
 GS_BINARY = os.getenv("PAPERLESS_GS_BINARY", "gs")

 # Fallback layout for .eml consumption
-EMAIL_PARSE_DEFAULT_LAYOUT = __get_int(
+EMAIL_PARSE_DEFAULT_LAYOUT = get_int_from_env(
    "PAPERLESS_EMAIL_PARSE_DEFAULT_LAYOUT",
    1,  # MailRule.PdfLayout.TEXT_HTML but that can't be imported here
 )
@@ -1163,23 +935,9 @@ DATE_ORDER = os.getenv("PAPERLESS_DATE_ORDER", "DMY")
 FILENAME_DATE_ORDER = os.getenv("PAPERLESS_FILENAME_DATE_ORDER")


-def _parse_dateparser_languages(languages: str | None):
-    language_list = languages.split("+") if languages else []
-    # There is an unfixed issue in zh-Hant and zh-Hans locales in the dateparser lib.
-    # See: https://github.com/scrapinghub/dateparser/issues/875
-    for index, language in enumerate(language_list):
-        if language.startswith("zh-") and "zh" not in language_list:
-            logger.warning(
-                f'Chinese locale detected: {language}. dateparser might fail to parse some dates with this locale, so Chinese ("zh") will be used as a fallback.',
-            )
-            language_list.append("zh")
-
-    return list(LocaleDataLoader().get_locale_map(locales=language_list))
-
-
 # If not set, we will infer it at runtime
 DATE_PARSER_LANGUAGES = (
-    _parse_dateparser_languages(
+    parse_dateparser_languages(
        os.getenv("PAPERLESS_DATE_PARSER_LANGUAGES"),
    )
    if os.getenv("PAPERLESS_DATE_PARSER_LANGUAGES")
@@ -1190,7 +948,7 @@ DATE_PARSER_LANGUAGES = (
 # Maximum number of dates taken from document start to end to show as suggestions for
 # `created` date in the frontend. Duplicates are removed, which can result in
 # fewer dates shown.
-NUMBER_OF_SUGGESTED_DATES = __get_int("PAPERLESS_NUMBER_OF_SUGGESTED_DATES", 3)
+NUMBER_OF_SUGGESTED_DATES = get_int_from_env("PAPERLESS_NUMBER_OF_SUGGESTED_DATES", 3)

 # Specify the filename format for out files
 FILENAME_FORMAT = os.getenv("PAPERLESS_FILENAME_FORMAT")
@@ -1198,7 +956,7 @@ FILENAME_FORMAT = os.getenv("PAPERLESS_FILENAME_FORMAT")
 # If this is enabled, variables in filename format will resolve to
 # empty-string instead of 'none'.
 # Directories with 'empty names' are omitted, too.
-FILENAME_FORMAT_REMOVE_NONE = __get_boolean(
+FILENAME_FORMAT_REMOVE_NONE = get_bool_from_env(
    "PAPERLESS_FILENAME_FORMAT_REMOVE_NONE",
    "NO",
 )
@@ -1209,7 +967,7 @@ THUMBNAIL_FONT_NAME = os.getenv(
 )

 # Tika settings
-TIKA_ENABLED = __get_boolean("PAPERLESS_TIKA_ENABLED", "NO")
+TIKA_ENABLED = get_bool_from_env("PAPERLESS_TIKA_ENABLED", "NO")
 TIKA_ENDPOINT = os.getenv("PAPERLESS_TIKA_ENDPOINT", "http://localhost:9998")
 TIKA_GOTENBERG_ENDPOINT = os.getenv(
    "PAPERLESS_TIKA_GOTENBERG_ENDPOINT",
@@ -1219,52 +977,21 @@ TIKA_GOTENBERG_ENDPOINT = os.getenv(
 if TIKA_ENABLED:
    INSTALLED_APPS.append("paperless_tika.apps.PaperlessTikaConfig")

-AUDIT_LOG_ENABLED = __get_boolean("PAPERLESS_AUDIT_LOG_ENABLED", "true")
+AUDIT_LOG_ENABLED = get_bool_from_env("PAPERLESS_AUDIT_LOG_ENABLED", "true")
 if AUDIT_LOG_ENABLED:
    INSTALLED_APPS.append("auditlog")
    MIDDLEWARE.append("auditlog.middleware.AuditlogMiddleware")


-def _parse_ignore_dates(
-    env_ignore: str,
-    date_order: str = DATE_ORDER,
-) -> set[datetime.datetime]:
-    """
-    If the PAPERLESS_IGNORE_DATES environment variable is set, parse the
-    user provided string(s) into dates
-
-    Args:
-        env_ignore (str): The value of the environment variable, comma separated dates
-        date_order (str, optional): The format of the date strings.
-                                    Defaults to DATE_ORDER.
-
-    Returns:
-        Set[datetime.datetime]: The set of parsed date objects
-    """
-    import dateparser
-
-    ignored_dates = set()
-    for s in env_ignore.split(","):
-        d = dateparser.parse(
-            s,
-            settings={
-                "DATE_ORDER": date_order,
-            },
-        )
-        if d:
-            ignored_dates.add(d.date())
-    return ignored_dates
-
-
 # List dates that should be ignored when trying to parse date from document text
 IGNORE_DATES: set[datetime.date] = set()

 if os.getenv("PAPERLESS_IGNORE_DATES") is not None:
-    IGNORE_DATES = _parse_ignore_dates(os.getenv("PAPERLESS_IGNORE_DATES"))
+    IGNORE_DATES = parse_ignore_dates(os.getenv("PAPERLESS_IGNORE_DATES"), DATE_ORDER)

 ENABLE_UPDATE_CHECK = os.getenv("PAPERLESS_ENABLE_UPDATE_CHECK", "default")
 if ENABLE_UPDATE_CHECK != "default":
-    ENABLE_UPDATE_CHECK = __get_boolean("PAPERLESS_ENABLE_UPDATE_CHECK")
+    ENABLE_UPDATE_CHECK = get_bool_from_env("PAPERLESS_ENABLE_UPDATE_CHECK")

 APP_TITLE = os.getenv("PAPERLESS_APP_TITLE", None)
 APP_LOGO = os.getenv("PAPERLESS_APP_LOGO", None)
@@ -1309,7 +1036,7 @@ def _get_nltk_language_setting(ocr_lang: str) -> str | None:
    return iso_code_to_nltk.get(ocr_lang)


-NLTK_ENABLED: Final[bool] = __get_boolean("PAPERLESS_ENABLE_NLTK", "yes")
+NLTK_ENABLED: Final[bool] = get_bool_from_env("PAPERLESS_ENABLE_NLTK", "yes")

 NLTK_LANGUAGE: str | None = _get_nltk_language_setting(OCR_LANGUAGE)

@@ -1318,7 +1045,7 @@ NLTK_LANGUAGE: str | None = _get_nltk_language_setting(OCR_LANGUAGE)
 ###############################################################################

 EMAIL_GNUPG_HOME: Final[str | None] = os.getenv("PAPERLESS_EMAIL_GNUPG_HOME")
-EMAIL_ENABLE_GPG_DECRYPTOR: Final[bool] = __get_boolean(
+EMAIL_ENABLE_GPG_DECRYPTOR: Final[bool] = get_bool_from_env(
    "PAPERLESS_ENABLE_GPG_DECRYPTOR",
 )

@@ -1326,7 +1053,7 @@ EMAIL_ENABLE_GPG_DECRYPTOR: Final[bool] = __get_boolean(
 ###############################################################################
 # Soft Delete                                                                 #
 ###############################################################################
-EMPTY_TRASH_DELAY = max(__get_int("PAPERLESS_EMPTY_TRASH_DELAY", 30), 1)
+EMPTY_TRASH_DELAY = max(get_int_from_env("PAPERLESS_EMPTY_TRASH_DELAY", 30), 1)


 ###############################################################################
@@ -1351,21 +1078,17 @@ OUTLOOK_OAUTH_ENABLED = bool(
 ###############################################################################
 # Webhooks
 ###############################################################################
-WEBHOOKS_ALLOWED_SCHEMES = set(
+WEBHOOKS_ALLOWED_SCHEMES = {
    s.lower()
-    for s in __get_list(
+    for s in get_list_from_env(
        "PAPERLESS_WEBHOOKS_ALLOWED_SCHEMES",
-        ["http", "https"],
+        default=["http", "https"],
    )
-)
-WEBHOOKS_ALLOWED_PORTS = set(
-    int(p)
-    for p in __get_list(
-        "PAPERLESS_WEBHOOKS_ALLOWED_PORTS",
-        [],
-    )
-)
-WEBHOOKS_ALLOW_INTERNAL_REQUESTS = __get_boolean(
+}
+WEBHOOKS_ALLOWED_PORTS = {
+    int(p) for p in get_list_from_env("PAPERLESS_WEBHOOKS_ALLOWED_PORTS", default=[])
+}
+WEBHOOKS_ALLOW_INTERNAL_REQUESTS = get_bool_from_env(
    "PAPERLESS_WEBHOOKS_ALLOW_INTERNAL_REQUESTS",
    "true",
 )
@@ -1380,7 +1103,7 @@ REMOTE_OCR_ENDPOINT = os.getenv("PAPERLESS_REMOTE_OCR_ENDPOINT")
 ################################################################################
 # AI Settings                                                                  #
 ################################################################################
-AI_ENABLED = __get_boolean("PAPERLESS_AI_ENABLED", "NO")
+AI_ENABLED = get_bool_from_env("PAPERLESS_AI_ENABLED", "NO")
 LLM_EMBEDDING_BACKEND = os.getenv(
    "PAPERLESS_AI_LLM_EMBEDDING_BACKEND",
 )  # "huggingface" or "openai"
--- a/src/paperless/settings/custom.py
+++ b/src/paperless/settings/custom.py
@@ -1,11 +1,191 @@
+import datetime
+import logging
 import os
 from pathlib import Path
 from typing import Any

+from celery.schedules import crontab
+from dateparser.languages.loader import LocaleDataLoader
+
 from paperless.settings.parsers import get_choice_from_env
 from paperless.settings.parsers import get_int_from_env
 from paperless.settings.parsers import parse_dict_from_str

+logger = logging.getLogger(__name__)
+
+
+def parse_hosting_settings() -> tuple[str | None, str, str, str, str]:
+    script_name = os.getenv("PAPERLESS_FORCE_SCRIPT_NAME")
+    base_url = (script_name or "") + "/"
+    login_url = base_url + "accounts/login/"
+    login_redirect_url = base_url + "dashboard"
+    logout_redirect_url = os.getenv(
+        "PAPERLESS_LOGOUT_REDIRECT_URL",
+        login_url + "?loggedout=1",
+    )
+    return script_name, base_url, login_url, login_redirect_url, logout_redirect_url
+
+
+def parse_redis_url(env_redis: str | None) -> tuple[str, str]:
+    """
+    Gets the Redis information from the environment or a default and handles
+    converting from incompatible django_channels and celery formats.
+
+    Returns a tuple of (celery_url, channels_url)
+    """
+
+    # Not set, return a compatible default
+    if env_redis is None:
+        return ("redis://localhost:6379", "redis://localhost:6379")
+
+    if "unix" in env_redis.lower():
+        # channels_redis socket format, looks like:
+        # "unix:///path/to/redis.sock"
+        _, path = env_redis.split(":", maxsplit=1)
+        # Optionally setting a db number
+        if "?db=" in env_redis:
+            path, number = path.split("?db=")
+            return (f"redis+socket:{path}?virtual_host={number}", env_redis)
+        else:
+            return (f"redis+socket:{path}", env_redis)
+
+    elif "+socket" in env_redis.lower():
+        # celery socket style, looks like:
+        # "redis+socket:///path/to/redis.sock"
+        _, path = env_redis.split(":", maxsplit=1)
+        if "?virtual_host=" in env_redis:
+            # Virtual host (aka db number)
+            path, number = path.split("?virtual_host=")
+            return (env_redis, f"unix:{path}?db={number}")
+        else:
+            return (env_redis, f"unix:{path}")
+
+    # Not a socket
+    return (env_redis, env_redis)
+
+
+def parse_beat_schedule() -> dict:
+    """
+    Configures the scheduled tasks, according to default or
+    environment variables.  Task expiration is configured so the task will
+    expire (and not run), shortly before the default frequency will put another
+    of the same task into the queue
+
+
+    https://docs.celeryq.dev/en/stable/userguide/periodic-tasks.html#beat-entries
+    https://docs.celeryq.dev/en/latest/userguide/calling.html#expiration
+    """
+    schedule = {}
+    tasks = [
+        {
+            "name": "Check all e-mail accounts",
+            "env_key": "PAPERLESS_EMAIL_TASK_CRON",
+            # Default every ten minutes
+            "env_default": "*/10 * * * *",
+            "task": "paperless_mail.tasks.process_mail_accounts",
+            "options": {
+                # 1 minute before default schedule sends again
+                "expires": 9.0 * 60.0,
+            },
+        },
+        {
+            "name": "Train the classifier",
+            "env_key": "PAPERLESS_TRAIN_TASK_CRON",
+            # Default hourly at 5 minutes past the hour
+            "env_default": "5 */1 * * *",
+            "task": "documents.tasks.train_classifier",
+            "options": {
+                # 1 minute before default schedule sends again
+                "expires": 59.0 * 60.0,
+            },
+        },
+        {
+            "name": "Optimize the index",
+            "env_key": "PAPERLESS_INDEX_TASK_CRON",
+            # Default daily at midnight
+            "env_default": "0 0 * * *",
+            "task": "documents.tasks.index_optimize",
+            "options": {
+                # 1 hour before default schedule sends again
+                "expires": 23.0 * 60.0 * 60.0,
+            },
+        },
+        {
+            "name": "Perform sanity check",
+            "env_key": "PAPERLESS_SANITY_TASK_CRON",
+            # Default Sunday at 00:30
+            "env_default": "30 0 * * sun",
+            "task": "documents.tasks.sanity_check",
+            "options": {
+                # 1 hour before default schedule sends again
+                "expires": ((7.0 * 24.0) - 1.0) * 60.0 * 60.0,
+            },
+        },
+        {
+            "name": "Empty trash",
+            "env_key": "PAPERLESS_EMPTY_TRASH_TASK_CRON",
+            # Default daily at 01:00
+            "env_default": "0 1 * * *",
+            "task": "documents.tasks.empty_trash",
+            "options": {
+                # 1 hour before default schedule sends again
+                "expires": 23.0 * 60.0 * 60.0,
+            },
+        },
+        {
+            "name": "Check and run scheduled workflows",
+            "env_key": "PAPERLESS_WORKFLOW_SCHEDULED_TASK_CRON",
+            # Default hourly at 5 minutes past the hour
+            "env_default": "5 */1 * * *",
+            "task": "documents.tasks.check_scheduled_workflows",
+            "options": {
+                # 1 minute before default schedule sends again
+                "expires": 59.0 * 60.0,
+            },
+        },
+        {
+            "name": "Rebuild LLM index",
+            "env_key": "PAPERLESS_LLM_INDEX_TASK_CRON",
+            # Default daily at 02:10
+            "env_default": "10 2 * * *",
+            "task": "documents.tasks.llmindex_index",
+            "options": {
+                # 1 hour before default schedule sends again
+                "expires": 23.0 * 60.0 * 60.0,
+            },
+        },
+        {
+            "name": "Cleanup expired share link bundles",
+            "env_key": "PAPERLESS_SHARE_LINK_BUNDLE_CLEANUP_CRON",
+            # Default daily at 02:00
+            "env_default": "0 2 * * *",
+            "task": "documents.tasks.cleanup_expired_share_link_bundles",
+            "options": {
+                # 1 hour before default schedule sends again
+                "expires": 23.0 * 60.0 * 60.0,
+            },
+        },
+    ]
+    for task in tasks:
+        # Either get the environment setting or use the default
+        value = os.getenv(task["env_key"], task["env_default"])
+        # Don't add disabled tasks to the schedule
+        if value == "disable":
+            continue
+        # I find https://crontab.guru/ super helpful
+        # crontab(5) format
+        #   - five time-and-date fields
+        #   - separated by at least one blank
+        minute, hour, day_month, month, day_week = value.split(" ")
+
+        schedule[task["name"]] = {
+            "task": task["task"],
+            "schedule": crontab(minute, hour, day_week, day_month, month),
+            "options": task["options"],
+        }
+
+    return schedule
+

 def parse_db_settings(data_dir: Path) -> dict[str, dict[str, Any]]:
    """Parse database settings from environment variables.
@@ -120,3 +300,48 @@ def parse_db_settings(data_dir: Path) -> dict[str, dict[str, Any]]:
    )

    return {"default": db_config}
+
+
+def parse_dateparser_languages(languages: str | None) -> list[str]:
+    language_list = languages.split("+") if languages else []
+    # There is an unfixed issue in zh-Hant and zh-Hans locales in the dateparser lib.
+    # See: https://github.com/scrapinghub/dateparser/issues/875
+    for index, language in enumerate(language_list):
+        if language.startswith("zh-") and "zh" not in language_list:
+            logger.warning(
+                f"Chinese locale detected: {language}. dateparser might fail to parse"
+                f' some dates with this locale, so Chinese ("zh") will be used as a fallback.',
+            )
+            language_list.append("zh")
+
+    return list(LocaleDataLoader().get_locale_map(locales=language_list))
+
+
+def parse_ignore_dates(
+    env_ignore: str,
+    date_order: str,
+) -> set[datetime.date]:
+    """
+    If the PAPERLESS_IGNORE_DATES environment variable is set, parse the
+    user provided string(s) into dates
+
+    Args:
+        env_ignore (str): The value of the environment variable, comma separated dates
+        date_order (str): The format of the date strings.
+
+    Returns:
+        set[datetime.date]: The set of parsed date objects
+    """
+    import dateparser
+
+    ignored_dates = set()
+    for s in env_ignore.split(","):
+        d = dateparser.parse(
+            s,
+            settings={
+                "DATE_ORDER": date_order,
+            },
+        )
+        if d:
+            ignored_dates.add(d.date())
+    return ignored_dates
--- a/src/paperless/settings/parsers.py
+++ b/src/paperless/settings/parsers.py
@@ -156,6 +156,108 @@ def parse_dict_from_str(
    return settings


+def get_bool_from_env(key: str, default: str = "NO") -> bool:
+    """
+    Return a boolean value based on whatever the user has supplied in the
+    environment based on whether the value "looks like" it's True or not.
+    """
+    return str_to_bool(os.getenv(key, default))
+
+
+@overload
+def get_float_from_env(key: str) -> float | None: ...
+
+
+@overload
+def get_float_from_env(key: str, default: None) -> float | None: ...
+
+
+@overload
+def get_float_from_env(key: str, default: float) -> float: ...
+
+
+def get_float_from_env(key: str, default: float | None = None) -> float | None:
+    """
+    Return a float value based on the environment variable.
+    If default is provided, returns that value when key is missing.
+    If default is None, returns None when key is missing.
+    """
+    if key not in os.environ:
+        return default
+
+    return float(os.environ[key])
+
+
+@overload
+def get_path_from_env(key: str) -> Path | None: ...
+
+
+@overload
+def get_path_from_env(key: str, default: None) -> Path | None: ...
+
+
+@overload
+def get_path_from_env(key: str, default: Path | str) -> Path: ...
+
+
+def get_path_from_env(key: str, default: Path | str | None = None) -> Path | None:
+    """
+    Return a Path object based on the environment variable.
+    If default is provided, returns that value when key is missing.
+    If default is None, returns None when key is missing.
+    """
+    if key not in os.environ:
+        return default if default is None else Path(default).resolve()
+
+    return Path(os.environ[key]).resolve()
+
+
+def get_list_from_env(
+    key: str,
+    separator: str = ",",
+    default: list[T] | None = None,
+    *,
+    strip_whitespace: bool = True,
+    remove_empty: bool = True,
+    required: bool = False,
+) -> list[str] | list[T]:
+    """
+    Get and parse a list from an environment variable or return a default.
+
+    Args:
+        key: Environment variable name
+        separator: Character(s) to split on (default: ',')
+        default: Default value to return if env var is not set or empty
+        strip_whitespace: Whether to strip whitespace from each element
+        remove_empty: Whether to remove empty strings from the result
+        required: If True, raise an error when the env var is missing and no default provided
+
+    Returns:
+        List of strings or list of type-cast values, or default if env var is empty/None
+
+    Raises:
+        ValueError: If required=True and env var is missing and there is no default
+    """
+    # Get the environment variable value
+    env_value = os.environ.get(key)
+
+    # Handle required environment variables
+    if required and env_value is None and default is None:
+        raise ValueError(f"Required environment variable '{key}' is not set")
+
+    if env_value:
+        items = env_value.split(separator)
+        if strip_whitespace:
+            items = [item.strip() for item in items]
+        if remove_empty:
+            items = [item for item in items if item]
+        return items
+    elif default is not None:
+        return default
+    else:
+        return []
+
+
 def get_choice_from_env(
    env_key: str,
    choices: set[str],
--- a/src/paperless/tests/settings/test_custom_parsers.py
+++ b/src/paperless/tests/settings/test_custom_parsers.py
@@ -1,10 +1,279 @@
+import datetime
 import os
 from pathlib import Path
+from typing import Any

 import pytest
+from celery.schedules import crontab
 from pytest_mock import MockerFixture

+from paperless.settings.custom import parse_beat_schedule
+from paperless.settings.custom import parse_dateparser_languages
 from paperless.settings.custom import parse_db_settings
+from paperless.settings.custom import parse_hosting_settings
+from paperless.settings.custom import parse_ignore_dates
+from paperless.settings.custom import parse_redis_url
+
+
+class TestRedisSocketConversion:
+    @pytest.mark.parametrize(
+        ("input_url", "expected"),
+        [
+            pytest.param(
+                None,
+                ("redis://localhost:6379", "redis://localhost:6379"),
+                id="none_uses_default",
+            ),
+            pytest.param(
+                "redis+socket:///run/redis/redis.sock",
+                (
+                    "redis+socket:///run/redis/redis.sock",
+                    "unix:///run/redis/redis.sock",
+                ),
+                id="celery_style_socket",
+            ),
+            pytest.param(
+                "unix:///run/redis/redis.sock",
+                (
+                    "redis+socket:///run/redis/redis.sock",
+                    "unix:///run/redis/redis.sock",
+                ),
+                id="redis_py_style_socket",
+            ),
+            pytest.param(
+                "redis+socket:///run/redis/redis.sock?virtual_host=5",
+                (
+                    "redis+socket:///run/redis/redis.sock?virtual_host=5",
+                    "unix:///run/redis/redis.sock?db=5",
+                ),
+                id="celery_style_socket_with_db",
+            ),
+            pytest.param(
+                "unix:///run/redis/redis.sock?db=10",
+                (
+                    "redis+socket:///run/redis/redis.sock?virtual_host=10",
+                    "unix:///run/redis/redis.sock?db=10",
+                ),
+                id="redis_py_style_socket_with_db",
+            ),
+            pytest.param(
+                "redis://myredishost:6379",
+                ("redis://myredishost:6379", "redis://myredishost:6379"),
+                id="host_with_port_unchanged",
+            ),
+            # Credentials in unix:// URL contain multiple colons (user:password@)
+            # Regression test for https://github.com/paperless-ngx/paperless-ngx/pull/12239
+            pytest.param(
+                "unix://user:password@/run/redis/redis.sock",
+                (
+                    "redis+socket://user:password@/run/redis/redis.sock",
+                    "unix://user:password@/run/redis/redis.sock",
+                ),
+                id="redis_py_style_socket_with_credentials",
+            ),
+            pytest.param(
+                "redis+socket://user:password@/run/redis/redis.sock",
+                (
+                    "redis+socket://user:password@/run/redis/redis.sock",
+                    "unix://user:password@/run/redis/redis.sock",
+                ),
+                id="celery_style_socket_with_credentials",
+            ),
+        ],
+    )
+    def test_redis_socket_parsing(
+        self,
+        input_url: str | None,
+        expected: tuple[str, str],
+    ) -> None:
+        """
+        GIVEN:
+            - Various Redis connection URI formats
+        WHEN:
+            - The URI is parsed
+        THEN:
+            - Socket based URIs are translated
+            - Non-socket URIs are unchanged
+            - None provided uses default
+        """
+        result = parse_redis_url(input_url)
+        assert expected == result
+
+
+class TestParseHostingSettings:
+    @pytest.mark.parametrize(
+        ("env", "expected"),
+        [
+            pytest.param(
+                {},
+                (
+                    None,
+                    "/",
+                    "/accounts/login/",
+                    "/dashboard",
+                    "/accounts/login/?loggedout=1",
+                ),
+                id="no_env_vars",
+            ),
+            pytest.param(
+                {"PAPERLESS_FORCE_SCRIPT_NAME": "/paperless"},
+                (
+                    "/paperless",
+                    "/paperless/",
+                    "/paperless/accounts/login/",
+                    "/paperless/dashboard",
+                    "/paperless/accounts/login/?loggedout=1",
+                ),
+                id="force_script_name_only",
+            ),
+            pytest.param(
+                {
+                    "PAPERLESS_FORCE_SCRIPT_NAME": "/docs",
+                    "PAPERLESS_LOGOUT_REDIRECT_URL": "/custom/logout",
+                },
+                (
+                    "/docs",
+                    "/docs/",
+                    "/docs/accounts/login/",
+                    "/docs/dashboard",
+                    "/custom/logout",
+                ),
+                id="force_script_name_and_logout_redirect",
+            ),
+        ],
+    )
+    def test_parse_hosting_settings(
+        self,
+        mocker: MockerFixture,
+        env: dict[str, str],
+        expected: tuple[str | None, str, str, str, str],
+    ) -> None:
+        """Test parse_hosting_settings with various env configurations."""
+        mocker.patch.dict(os.environ, env, clear=True)
+
+        result = parse_hosting_settings()
+
+        assert result == expected
+
+
+def make_expected_schedule(
+    overrides: dict[str, dict[str, Any]] | None = None,
+    disabled: set[str] | None = None,
+) -> dict[str, Any]:
+    """
+    Build the expected schedule with optional overrides and disabled tasks.
+    """
+
+    mail_expire = 9.0 * 60.0
+    classifier_expire = 59.0 * 60.0
+    index_expire = 23.0 * 60.0 * 60.0
+    sanity_expire = ((7.0 * 24.0) - 1.0) * 60.0 * 60.0
+    empty_trash_expire = 23.0 * 60.0 * 60.0
+    workflow_expire = 59.0 * 60.0
+    llm_index_expire = 23.0 * 60.0 * 60.0
+    share_link_cleanup_expire = 23.0 * 60.0 * 60.0
+
+    schedule: dict[str, Any] = {
+        "Check all e-mail accounts": {
+            "task": "paperless_mail.tasks.process_mail_accounts",
+            "schedule": crontab(minute="*/10"),
+            "options": {"expires": mail_expire},
+        },
+        "Train the classifier": {
+            "task": "documents.tasks.train_classifier",
+            "schedule": crontab(minute="5", hour="*/1"),
+            "options": {"expires": classifier_expire},
+        },
+        "Optimize the index": {
+            "task": "documents.tasks.index_optimize",
+            "schedule": crontab(minute=0, hour=0),
+            "options": {"expires": index_expire},
+        },
+        "Perform sanity check": {
+            "task": "documents.tasks.sanity_check",
+            "schedule": crontab(minute=30, hour=0, day_of_week="sun"),
+            "options": {"expires": sanity_expire},
+        },
+        "Empty trash": {
+            "task": "documents.tasks.empty_trash",
+            "schedule": crontab(minute=0, hour="1"),
+            "options": {"expires": empty_trash_expire},
+        },
+        "Check and run scheduled workflows": {
+            "task": "documents.tasks.check_scheduled_workflows",
+            "schedule": crontab(minute="5", hour="*/1"),
+            "options": {"expires": workflow_expire},
+        },
+        "Rebuild LLM index": {
+            "task": "documents.tasks.llmindex_index",
+            "schedule": crontab(minute="10", hour="2"),
+            "options": {"expires": llm_index_expire},
+        },
+        "Cleanup expired share link bundles": {
+            "task": "documents.tasks.cleanup_expired_share_link_bundles",
+            "schedule": crontab(minute=0, hour="2"),
+            "options": {"expires": share_link_cleanup_expire},
+        },
+    }
+
+    overrides = overrides or {}
+    disabled = disabled or set()
+
+    for key, val in overrides.items():
+        schedule[key] = {**schedule.get(key, {}), **val}
+
+    for key in disabled:
+        schedule.pop(key, None)
+
+    return schedule
+
+
+class TestParseBeatSchedule:
+    @pytest.mark.parametrize(
+        ("env", "expected"),
+        [
+            pytest.param({}, make_expected_schedule(), id="defaults"),
+            pytest.param(
+                {"PAPERLESS_EMAIL_TASK_CRON": "*/50 * * * mon"},
+                make_expected_schedule(
+                    overrides={
+                        "Check all e-mail accounts": {
+                            "schedule": crontab(minute="*/50", day_of_week="mon"),
+                        },
+                    },
+                ),
+                id="email-changed",
+            ),
+            pytest.param(
+                {"PAPERLESS_INDEX_TASK_CRON": "disable"},
+                make_expected_schedule(disabled={"Optimize the index"}),
+                id="index-disabled",
+            ),
+            pytest.param(
+                {
+                    "PAPERLESS_EMAIL_TASK_CRON": "disable",
+                    "PAPERLESS_TRAIN_TASK_CRON": "disable",
+                    "PAPERLESS_SANITY_TASK_CRON": "disable",
+                    "PAPERLESS_INDEX_TASK_CRON": "disable",
+                    "PAPERLESS_EMPTY_TRASH_TASK_CRON": "disable",
+                    "PAPERLESS_WORKFLOW_SCHEDULED_TASK_CRON": "disable",
+                    "PAPERLESS_LLM_INDEX_TASK_CRON": "disable",
+                    "PAPERLESS_SHARE_LINK_BUNDLE_CLEANUP_CRON": "disable",
+                },
+                {},
+                id="all-disabled",
+            ),
+        ],
+    )
+    def test_parse_beat_schedule(
+        self,
+        env: dict[str, str],
+        expected: dict[str, Any],
+        mocker: MockerFixture,
+    ) -> None:
+        mocker.patch.dict(os.environ, env, clear=False)
+        schedule = parse_beat_schedule()
+        assert schedule == expected


 class TestParseDbSettings:
@@ -264,3 +533,85 @@ class TestParseDbSettings:
        settings = parse_db_settings(tmp_path)

        assert settings == expected_database_settings
+
+
+class TestParseIgnoreDates:
+    """Tests the parsing of the PAPERLESS_IGNORE_DATES setting value."""
+
+    def test_no_ignore_dates_set(self) -> None:
+        """
+        GIVEN:
+            - No ignore dates are set
+        THEN:
+            - No ignore dates are parsed
+        """
+        assert parse_ignore_dates("", "YMD") == set()
+
+    @pytest.mark.parametrize(
+        ("env_str", "date_format", "expected"),
+        [
+            pytest.param(
+                "1985-05-01",
+                "YMD",
+                {datetime.date(1985, 5, 1)},
+                id="single-ymd",
+            ),
+            pytest.param(
+                "1985-05-01,1991-12-05",
+                "YMD",
+                {datetime.date(1985, 5, 1), datetime.date(1991, 12, 5)},
+                id="multiple-ymd",
+            ),
+            pytest.param(
+                "2010-12-13",
+                "YMD",
+                {datetime.date(2010, 12, 13)},
+                id="single-ymd-2",
+            ),
+            pytest.param(
+                "11.01.10",
+                "DMY",
+                {datetime.date(2010, 1, 11)},
+                id="single-dmy",
+            ),
+            pytest.param(
+                "11.01.2001,15-06-1996",
+                "DMY",
+                {datetime.date(2001, 1, 11), datetime.date(1996, 6, 15)},
+                id="multiple-dmy",
+            ),
+        ],
+    )
+    def test_ignore_dates_parsed(
+        self,
+        env_str: str,
+        date_format: str,
+        expected: set[datetime.date],
+    ) -> None:
+        """
+        GIVEN:
+            - Ignore dates are set per certain inputs
+        THEN:
+            - All ignore dates are parsed
+        """
+        assert parse_ignore_dates(env_str, date_format) == expected
+
+
+@pytest.mark.parametrize(
+    ("languages", "expected"),
+    [
+        ("de", ["de"]),
+        ("zh", ["zh"]),
+        ("fr+en", ["fr", "en"]),
+        # Locales must be supported
+        ("en-001+fr-CA", ["en-001", "fr-CA"]),
+        ("en-001+fr", ["en-001", "fr"]),
+        # Special case for Chinese: variants seem to miss some dates,
+        # so we always add "zh" as a fallback.
+        ("en+zh-Hans-HK", ["en", "zh-Hans-HK", "zh"]),
+        ("en+zh-Hans", ["en", "zh-Hans", "zh"]),
+        ("en+zh-Hans+zh-Hant", ["en", "zh-Hans", "zh-Hant", "zh"]),
+    ],
+)
+def test_parse_dateparser_languages(languages: str, expected: list[str]) -> None:
+    assert sorted(parse_dateparser_languages(languages)) == sorted(expected)
--- a/src/paperless/tests/settings/test_db_cache.py
+++ b/src/paperless/tests/settings/test_db_cache.py
--- a/src/paperless/tests/settings/test_environment_parsers.py
+++ b/src/paperless/tests/settings/test_environment_parsers.py
@@ -4,8 +4,12 @@ from pathlib import Path
 import pytest
 from pytest_mock import MockerFixture

+from paperless.settings.parsers import get_bool_from_env
 from paperless.settings.parsers import get_choice_from_env
+from paperless.settings.parsers import get_float_from_env
 from paperless.settings.parsers import get_int_from_env
+from paperless.settings.parsers import get_list_from_env
+from paperless.settings.parsers import get_path_from_env
 from paperless.settings.parsers import parse_dict_from_str
 from paperless.settings.parsers import str_to_bool

@@ -205,6 +209,29 @@ class TestParseDictFromString:
        assert isinstance(result["database"]["port"], int)


+class TestGetBoolFromEnv:
+    def test_existing_env_var(self, mocker):
+        """Test that an existing environment variable is read and converted."""
+        mocker.patch.dict(os.environ, {"TEST_VAR": "true"})
+        assert get_bool_from_env("TEST_VAR") is True
+
+    def test_missing_env_var_uses_default_no(self, mocker):
+        """Test that a missing environment variable uses default 'NO' and returns False."""
+        mocker.patch.dict(os.environ, {}, clear=True)
+        assert get_bool_from_env("MISSING_VAR") is False
+
+    def test_missing_env_var_with_explicit_default(self, mocker):
+        """Test that a missing environment variable uses the provided default."""
+        mocker.patch.dict(os.environ, {}, clear=True)
+        assert get_bool_from_env("MISSING_VAR", default="yes") is True
+
+    def test_invalid_value_raises_error(self, mocker):
+        """Test that an invalid value raises ValueError (delegates to str_to_bool)."""
+        mocker.patch.dict(os.environ, {"INVALID_VAR": "maybe"})
+        with pytest.raises(ValueError):
+            get_bool_from_env("INVALID_VAR")
+
+
 class TestGetIntFromEnv:
    @pytest.mark.parametrize(
        ("env_value", "expected"),
@@ -259,6 +286,199 @@ class TestGetIntFromEnv:
            get_int_from_env("INVALID_INT")


+class TestGetFloatFromEnv:
+    @pytest.mark.parametrize(
+        ("env_value", "expected"),
+        [
+            pytest.param("3.14", 3.14, id="pi"),
+            pytest.param("42", 42.0, id="int_as_float"),
+            pytest.param("-2.5", -2.5, id="negative"),
+            pytest.param("0.0", 0.0, id="zero_float"),
+            pytest.param("0", 0.0, id="zero_int"),
+            pytest.param("1.5e2", 150.0, id="sci_positive"),
+            pytest.param("1e-3", 0.001, id="sci_negative"),
+            pytest.param("-1.23e4", -12300.0, id="sci_large"),
+        ],
+    )
+    def test_existing_env_var_valid_floats(self, mocker, env_value, expected):
+        """Test that existing environment variables with valid floats return correct values."""
+        mocker.patch.dict(os.environ, {"FLOAT_VAR": env_value})
+        assert get_float_from_env("FLOAT_VAR") == expected
+
+    @pytest.mark.parametrize(
+        ("default", "expected"),
+        [
+            pytest.param(3.14, 3.14, id="pi_default"),
+            pytest.param(0.0, 0.0, id="zero_default"),
+            pytest.param(-2.5, -2.5, id="negative_default"),
+            pytest.param(None, None, id="none_default"),
+        ],
+    )
+    def test_missing_env_var_with_defaults(self, mocker, default, expected):
+        """Test that missing environment variables return provided defaults."""
+        mocker.patch.dict(os.environ, {}, clear=True)
+        assert get_float_from_env("MISSING_VAR", default=default) == expected
+
+    def test_missing_env_var_no_default(self, mocker):
+        """Test that missing environment variable with no default returns None."""
+        mocker.patch.dict(os.environ, {}, clear=True)
+        assert get_float_from_env("MISSING_VAR") is None
+
+    @pytest.mark.parametrize(
+        "invalid_value",
+        [
+            pytest.param("not_a_number", id="text"),
+            pytest.param("42.5.0", id="double_decimal"),
+            pytest.param("42a", id="alpha_suffix"),
+            pytest.param("", id="empty"),
+            pytest.param(" ", id="whitespace"),
+            pytest.param("true", id="boolean"),
+            pytest.param("1.2.3", id="triple_decimal"),
+        ],
+    )
+    def test_invalid_float_values_raise_error(self, mocker, invalid_value):
+        """Test that invalid float values raise ValueError."""
+        mocker.patch.dict(os.environ, {"INVALID_FLOAT": invalid_value})
+        with pytest.raises(ValueError):
+            get_float_from_env("INVALID_FLOAT")
+
+
+class TestGetPathFromEnv:
+    @pytest.mark.parametrize(
+        "env_value",
+        [
+            pytest.param("/tmp/test", id="absolute"),
+            pytest.param("relative/path", id="relative"),
+            pytest.param("/path/with spaces/file.txt", id="spaces"),
+            pytest.param(".", id="current_dir"),
+            pytest.param("..", id="parent_dir"),
+            pytest.param("/", id="root"),
+        ],
+    )
+    def test_existing_env_var_paths(self, mocker, env_value):
+        """Test that existing environment variables with paths return resolved Path objects."""
+        mocker.patch.dict(os.environ, {"PATH_VAR": env_value})
+        result = get_path_from_env("PATH_VAR")
+        assert isinstance(result, Path)
+        assert result == Path(env_value).resolve()
+
+    def test_missing_env_var_no_default(self, mocker):
+        """Test that missing environment variable with no default returns None."""
+        mocker.patch.dict(os.environ, {}, clear=True)
+        assert get_path_from_env("MISSING_VAR") is None
+
+    def test_missing_env_var_with_none_default(self, mocker):
+        """Test that missing environment variable with None default returns None."""
+        mocker.patch.dict(os.environ, {}, clear=True)
+        assert get_path_from_env("MISSING_VAR", default=None) is None
+
+    @pytest.mark.parametrize(
+        "default_path_str",
+        [
+            pytest.param("/default/path", id="absolute_default"),
+            pytest.param("relative/default", id="relative_default"),
+            pytest.param(".", id="current_default"),
+        ],
+    )
+    def test_missing_env_var_with_path_defaults(self, mocker, default_path_str):
+        """Test that missing environment variables return resolved default Path objects."""
+        mocker.patch.dict(os.environ, {}, clear=True)
+        default_path = Path(default_path_str)
+        result = get_path_from_env("MISSING_VAR", default=default_path)
+        assert isinstance(result, Path)
+        assert result == default_path.resolve()
+
+    def test_relative_paths_are_resolved(self, mocker):
+        """Test that relative paths are properly resolved to absolute paths."""
+        mocker.patch.dict(os.environ, {"REL_PATH": "relative/path"})
+        result = get_path_from_env("REL_PATH")
+        assert result is not None
+        assert result.is_absolute()
+
+
+class TestGetListFromEnv:
+    @pytest.mark.parametrize(
+        ("env_value", "expected"),
+        [
+            pytest.param("a,b,c", ["a", "b", "c"], id="basic_comma_separated"),
+            pytest.param("single", ["single"], id="single_element"),
+            pytest.param("", [], id="empty_string"),
+            pytest.param("a, b , c", ["a", "b", "c"], id="whitespace_trimmed"),
+            pytest.param("a,,b,c", ["a", "b", "c"], id="empty_elements_removed"),
+        ],
+    )
+    def test_existing_env_var_basic_parsing(self, mocker, env_value, expected):
+        """Test that existing environment variables are parsed correctly."""
+        mocker.patch.dict(os.environ, {"LIST_VAR": env_value})
+        result = get_list_from_env("LIST_VAR")
+        assert result == expected
+
+    @pytest.mark.parametrize(
+        ("separator", "env_value", "expected"),
+        [
+            pytest.param("|", "a|b|c", ["a", "b", "c"], id="pipe_separator"),
+            pytest.param(":", "a:b:c", ["a", "b", "c"], id="colon_separator"),
+            pytest.param(";", "a;b;c", ["a", "b", "c"], id="semicolon_separator"),
+        ],
+    )
+    def test_custom_separators(self, mocker, separator, env_value, expected):
+        """Test that custom separators work correctly."""
+        mocker.patch.dict(os.environ, {"LIST_VAR": env_value})
+        result = get_list_from_env("LIST_VAR", separator=separator)
+        assert result == expected
+
+    @pytest.mark.parametrize(
+        ("default", "expected"),
+        [
+            pytest.param(
+                ["default1", "default2"],
+                ["default1", "default2"],
+                id="string_list_default",
+            ),
+            pytest.param([1, 2, 3], [1, 2, 3], id="int_list_default"),
+            pytest.param(None, [], id="none_default_returns_empty_list"),
+        ],
+    )
+    def test_missing_env_var_with_defaults(self, mocker, default, expected):
+        """Test that missing environment variables return provided defaults."""
+        mocker.patch.dict(os.environ, {}, clear=True)
+        result = get_list_from_env("MISSING_VAR", default=default)
+        assert result == expected
+
+    def test_missing_env_var_no_default(self, mocker):
+        """Test that missing environment variable with no default returns empty list."""
+        mocker.patch.dict(os.environ, {}, clear=True)
+        result = get_list_from_env("MISSING_VAR")
+        assert result == []
+
+    def test_required_env_var_missing_raises_error(self, mocker):
+        """Test that missing required environment variable raises ValueError."""
+        mocker.patch.dict(os.environ, {}, clear=True)
+        with pytest.raises(
+            ValueError,
+            match="Required environment variable 'REQUIRED_VAR' is not set",
+        ):
+            get_list_from_env("REQUIRED_VAR", required=True)
+
+    def test_required_env_var_with_default_does_not_raise(self, mocker):
+        """Test that required environment variable with default does not raise error."""
+        mocker.patch.dict(os.environ, {}, clear=True)
+        result = get_list_from_env("REQUIRED_VAR", default=["default"], required=True)
+        assert result == ["default"]
+
+    def test_strip_whitespace_false(self, mocker):
+        """Test that whitespace is preserved when strip_whitespace=False."""
+        mocker.patch.dict(os.environ, {"LIST_VAR": " a , b , c "})
+        result = get_list_from_env("LIST_VAR", strip_whitespace=False)
+        assert result == [" a ", " b ", " c "]
+
+    def test_remove_empty_false(self, mocker):
+        """Test that empty elements are preserved when remove_empty=False."""
+        mocker.patch.dict(os.environ, {"LIST_VAR": "a,,b,,c"})
+        result = get_list_from_env("LIST_VAR", remove_empty=False)
+        assert result == ["a", "", "b", "", "c"]
+
+
 class TestGetEnvChoice:
    @pytest.fixture
    def valid_choices(self) -> set[str]:
@@ -394,21 +614,3 @@ class TestGetEnvChoice:
        result = get_choice_from_env("TEST_ENV", large_choices)

        assert result == "option_50"
-
-    def test_different_env_keys(
-        self,
-        mocker: MockerFixture,
-        valid_choices: set[str],
-    ) -> None:
-        """Test function works with different environment variable keys."""
-        test_cases = [
-            ("DJANGO_ENV", "development"),
-            ("DATABASE_BACKEND", "staging"),
-            ("LOG_LEVEL", "production"),
-            ("APP_MODE", "development"),
-        ]
-
-        for env_key, env_value in test_cases:
-            mocker.patch.dict("os.environ", {env_key: env_value})
-            result = get_choice_from_env(env_key, valid_choices)
-            assert result == env_value
--- a/src/paperless/tests/settings/test_remote_user.py
+++ b/src/paperless/tests/settings/test_remote_user.py
--- a/src/paperless/tests/settings/test_settings.py
+++ b/src/paperless/tests/settings/test_settings.py
@@ -0,0 +1,56 @@
+import os
+from unittest import TestCase
+from unittest import mock
+
+from paperless.settings import _parse_paperless_url
+from paperless.settings import default_threads_per_worker
+
+
+class TestThreadCalculation(TestCase):
+    def test_workers_threads(self) -> None:
+        """
+        GIVEN:
+            - Certain CPU counts
+        WHEN:
+            - Threads per worker is calculated
+        THEN:
+            - Threads per worker less than or equal to CPU count
+            - At least 1 thread per worker
+        """
+        default_workers = 1
+
+        for i in range(1, 64):
+            with mock.patch(
+                "paperless.settings.multiprocessing.cpu_count",
+            ) as cpu_count:
+                cpu_count.return_value = i
+
+                default_threads = default_threads_per_worker(default_workers)
+
+                self.assertGreaterEqual(default_threads, 1)
+
+                self.assertLessEqual(default_workers * default_threads, i)
+
+
+class TestPaperlessURLSettings(TestCase):
+    def test_paperless_url(self) -> None:
+        """
+        GIVEN:
+            - PAPERLESS_URL is set
+        WHEN:
+            - The URL is parsed
+        THEN:
+            - The URL is returned and present in related settings
+        """
+        with mock.patch.dict(
+            os.environ,
+            {
+                "PAPERLESS_URL": "https://example.com",
+            },
+        ):
+            url = _parse_paperless_url()
+            self.assertEqual("https://example.com", url)
+            from django.conf import settings
+
+            self.assertIn(url, settings.CSRF_TRUSTED_ORIGINS)
+            self.assertIn(url, settings.CORS_ALLOWED_ORIGINS)
--- a/src/paperless/tests/test_settings.py
+++ b/src/paperless/tests/test_settings.py
@@ -1,482 +0,0 @@
-import datetime
-import os
-from unittest import TestCase
-from unittest import mock
-
-import pytest
-from celery.schedules import crontab
-
-from paperless.settings import _parse_base_paths
-from paperless.settings import _parse_beat_schedule
-from paperless.settings import _parse_dateparser_languages
-from paperless.settings import _parse_ignore_dates
-from paperless.settings import _parse_paperless_url
-from paperless.settings import _parse_redis_url
-from paperless.settings import default_threads_per_worker
-
-
-class TestIgnoreDateParsing(TestCase):
-    """
-    Tests the parsing of the PAPERLESS_IGNORE_DATES setting value
-    """
-
-    def _parse_checker(self, test_cases) -> None:
-        """
-        Helper function to check ignore date parsing
-
-        Args:
-            test_cases (_type_): _description_
-        """
-        for env_str, date_format, expected_date_set in test_cases:
-            self.assertSetEqual(
-                _parse_ignore_dates(env_str, date_format),
-                expected_date_set,
-            )
-
-    def test_no_ignore_dates_set(self) -> None:
-        """
-        GIVEN:
-            - No ignore dates are set
-        THEN:
-            - No ignore dates are parsed
-        """
-        self.assertSetEqual(_parse_ignore_dates(""), set())
-
-    def test_single_ignore_dates_set(self) -> None:
-        """
-        GIVEN:
-            - Ignore dates are set per certain inputs
-        THEN:
-            - All ignore dates are parsed
-        """
-        test_cases = [
-            ("1985-05-01", "YMD", {datetime.date(1985, 5, 1)}),
-            (
-                "1985-05-01,1991-12-05",
-                "YMD",
-                {datetime.date(1985, 5, 1), datetime.date(1991, 12, 5)},
-            ),
-            ("2010-12-13", "YMD", {datetime.date(2010, 12, 13)}),
-            ("11.01.10", "DMY", {datetime.date(2010, 1, 11)}),
-            (
-                "11.01.2001,15-06-1996",
-                "DMY",
-                {datetime.date(2001, 1, 11), datetime.date(1996, 6, 15)},
-            ),
-        ]
-
-        self._parse_checker(test_cases)
-
-
-class TestThreadCalculation(TestCase):
-    def test_workers_threads(self) -> None:
-        """
-        GIVEN:
-            - Certain CPU counts
-        WHEN:
-            - Threads per worker is calculated
-        THEN:
-            - Threads per worker less than or equal to CPU count
-            - At least 1 thread per worker
-        """
-        default_workers = 1
-
-        for i in range(1, 64):
-            with mock.patch(
-                "paperless.settings.multiprocessing.cpu_count",
-            ) as cpu_count:
-                cpu_count.return_value = i
-
-                default_threads = default_threads_per_worker(default_workers)
-
-                self.assertGreaterEqual(default_threads, 1)
-
-                self.assertLessEqual(default_workers * default_threads, i)
-
-
-class TestRedisSocketConversion(TestCase):
-    def test_redis_socket_parsing(self) -> None:
-        """
-        GIVEN:
-            - Various Redis connection URI formats
-        WHEN:
-            - The URI is parsed
-        THEN:
-            - Socket based URIs are translated
-            - Non-socket URIs are unchanged
-            - None provided uses default
-        """
-
-        for input, expected in [
-            # Nothing is set
-            (None, ("redis://localhost:6379", "redis://localhost:6379")),
-            # celery style
-            (
-                "redis+socket:///run/redis/redis.sock",
-                (
-                    "redis+socket:///run/redis/redis.sock",
-                    "unix:///run/redis/redis.sock",
-                ),
-            ),
-            # redis-py / channels-redis style
-            (
-                "unix:///run/redis/redis.sock",
-                (
-                    "redis+socket:///run/redis/redis.sock",
-                    "unix:///run/redis/redis.sock",
-                ),
-            ),
-            # celery style with db
-            (
-                "redis+socket:///run/redis/redis.sock?virtual_host=5",
-                (
-                    "redis+socket:///run/redis/redis.sock?virtual_host=5",
-                    "unix:///run/redis/redis.sock?db=5",
-                ),
-            ),
-            # redis-py / channels-redis style with db
-            (
-                "unix:///run/redis/redis.sock?db=10",
-                (
-                    "redis+socket:///run/redis/redis.sock?virtual_host=10",
-                    "unix:///run/redis/redis.sock?db=10",
-                ),
-            ),
-            # Just a host with a port
-            (
-                "redis://myredishost:6379",
-                ("redis://myredishost:6379", "redis://myredishost:6379"),
-            ),
-        ]:
-            result = _parse_redis_url(input)
-            self.assertTupleEqual(expected, result)
-
-
-class TestCeleryScheduleParsing(TestCase):
-    MAIL_EXPIRE_TIME = 9.0 * 60.0
-    CLASSIFIER_EXPIRE_TIME = 59.0 * 60.0
-    INDEX_EXPIRE_TIME = 23.0 * 60.0 * 60.0
-    SANITY_EXPIRE_TIME = ((7.0 * 24.0) - 1.0) * 60.0 * 60.0
-    EMPTY_TRASH_EXPIRE_TIME = 23.0 * 60.0 * 60.0
-    RUN_SCHEDULED_WORKFLOWS_EXPIRE_TIME = 59.0 * 60.0
-    LLM_INDEX_EXPIRE_TIME = 23.0 * 60.0 * 60.0
-    CLEANUP_EXPIRED_SHARE_BUNDLES_EXPIRE_TIME = 23.0 * 60.0 * 60.0
-
-    def test_schedule_configuration_default(self) -> None:
-        """
-        GIVEN:
-            - No configured task schedules
-        WHEN:
-            - The celery beat schedule is built
-        THEN:
-            - The default schedule is returned
-        """
-        schedule = _parse_beat_schedule()
-
-        self.assertDictEqual(
-            {
-                "Check all e-mail accounts": {
-                    "task": "paperless_mail.tasks.process_mail_accounts",
-                    "schedule": crontab(minute="*/10"),
-                    "options": {"expires": self.MAIL_EXPIRE_TIME},
-                },
-                "Train the classifier": {
-                    "task": "documents.tasks.train_classifier",
-                    "schedule": crontab(minute="5", hour="*/1"),
-                    "options": {"expires": self.CLASSIFIER_EXPIRE_TIME},
-                },
-                "Optimize the index": {
-                    "task": "documents.tasks.index_optimize",
-                    "schedule": crontab(minute=0, hour=0),
-                    "options": {"expires": self.INDEX_EXPIRE_TIME},
-                },
-                "Perform sanity check": {
-                    "task": "documents.tasks.sanity_check",
-                    "schedule": crontab(minute=30, hour=0, day_of_week="sun"),
-                    "options": {"expires": self.SANITY_EXPIRE_TIME},
-                },
-                "Empty trash": {
-                    "task": "documents.tasks.empty_trash",
-                    "schedule": crontab(minute=0, hour="1"),
-                    "options": {"expires": self.EMPTY_TRASH_EXPIRE_TIME},
-                },
-                "Check and run scheduled workflows": {
-                    "task": "documents.tasks.check_scheduled_workflows",
-                    "schedule": crontab(minute="5", hour="*/1"),
-                    "options": {"expires": self.RUN_SCHEDULED_WORKFLOWS_EXPIRE_TIME},
-                },
-                "Rebuild LLM index": {
-                    "task": "documents.tasks.llmindex_index",
-                    "schedule": crontab(minute=10, hour=2),
-                    "options": {
-                        "expires": self.LLM_INDEX_EXPIRE_TIME,
-                    },
-                },
-                "Cleanup expired share link bundles": {
-                    "task": "documents.tasks.cleanup_expired_share_link_bundles",
-                    "schedule": crontab(minute=0, hour=2),
-                    "options": {
-                        "expires": self.CLEANUP_EXPIRED_SHARE_BUNDLES_EXPIRE_TIME,
-                    },
-                },
-            },
-            schedule,
-        )
-
-    def test_schedule_configuration_changed(self) -> None:
-        """
-        GIVEN:
-            - Email task is configured non-default
-        WHEN:
-            - The celery beat schedule is built
-        THEN:
-            - The email task is configured per environment
-            - The default schedule is returned for other tasks
-        """
-        with mock.patch.dict(
-            os.environ,
-            {"PAPERLESS_EMAIL_TASK_CRON": "*/50 * * * mon"},
-        ):
-            schedule = _parse_beat_schedule()
-
-        self.assertDictEqual(
-            {
-                "Check all e-mail accounts": {
-                    "task": "paperless_mail.tasks.process_mail_accounts",
-                    "schedule": crontab(minute="*/50", day_of_week="mon"),
-                    "options": {"expires": self.MAIL_EXPIRE_TIME},
-                },
-                "Train the classifier": {
-                    "task": "documents.tasks.train_classifier",
-                    "schedule": crontab(minute="5", hour="*/1"),
-                    "options": {"expires": self.CLASSIFIER_EXPIRE_TIME},
-                },
-                "Optimize the index": {
-                    "task": "documents.tasks.index_optimize",
-                    "schedule": crontab(minute=0, hour=0),
-                    "options": {"expires": self.INDEX_EXPIRE_TIME},
-                },
-                "Perform sanity check": {
-                    "task": "documents.tasks.sanity_check",
-                    "schedule": crontab(minute=30, hour=0, day_of_week="sun"),
-                    "options": {"expires": self.SANITY_EXPIRE_TIME},
-                },
-                "Empty trash": {
-                    "task": "documents.tasks.empty_trash",
-                    "schedule": crontab(minute=0, hour="1"),
-                    "options": {"expires": self.EMPTY_TRASH_EXPIRE_TIME},
-                },
-                "Check and run scheduled workflows": {
-                    "task": "documents.tasks.check_scheduled_workflows",
-                    "schedule": crontab(minute="5", hour="*/1"),
-                    "options": {"expires": self.RUN_SCHEDULED_WORKFLOWS_EXPIRE_TIME},
-                },
-                "Rebuild LLM index": {
-                    "task": "documents.tasks.llmindex_index",
-                    "schedule": crontab(minute=10, hour=2),
-                    "options": {
-                        "expires": self.LLM_INDEX_EXPIRE_TIME,
-                    },
-                },
-                "Cleanup expired share link bundles": {
-                    "task": "documents.tasks.cleanup_expired_share_link_bundles",
-                    "schedule": crontab(minute=0, hour=2),
-                    "options": {
-                        "expires": self.CLEANUP_EXPIRED_SHARE_BUNDLES_EXPIRE_TIME,
-                    },
-                },
-            },
-            schedule,
-        )
-
-    def test_schedule_configuration_disabled(self) -> None:
-        """
-        GIVEN:
-            - Search index task is disabled
-        WHEN:
-            - The celery beat schedule is built
-        THEN:
-            - The search index task is not present
-            - The default schedule is returned for other tasks
-        """
-        with mock.patch.dict(os.environ, {"PAPERLESS_INDEX_TASK_CRON": "disable"}):
-            schedule = _parse_beat_schedule()
-
-        self.assertDictEqual(
-            {
-                "Check all e-mail accounts": {
-                    "task": "paperless_mail.tasks.process_mail_accounts",
-                    "schedule": crontab(minute="*/10"),
-                    "options": {"expires": self.MAIL_EXPIRE_TIME},
-                },
-                "Train the classifier": {
-                    "task": "documents.tasks.train_classifier",
-                    "schedule": crontab(minute="5", hour="*/1"),
-                    "options": {"expires": self.CLASSIFIER_EXPIRE_TIME},
-                },
-                "Perform sanity check": {
-                    "task": "documents.tasks.sanity_check",
-                    "schedule": crontab(minute=30, hour=0, day_of_week="sun"),
-                    "options": {"expires": self.SANITY_EXPIRE_TIME},
-                },
-                "Empty trash": {
-                    "task": "documents.tasks.empty_trash",
-                    "schedule": crontab(minute=0, hour="1"),
-                    "options": {"expires": self.EMPTY_TRASH_EXPIRE_TIME},
-                },
-                "Check and run scheduled workflows": {
-                    "task": "documents.tasks.check_scheduled_workflows",
-                    "schedule": crontab(minute="5", hour="*/1"),
-                    "options": {"expires": self.RUN_SCHEDULED_WORKFLOWS_EXPIRE_TIME},
-                },
-                "Rebuild LLM index": {
-                    "task": "documents.tasks.llmindex_index",
-                    "schedule": crontab(minute=10, hour=2),
-                    "options": {
-                        "expires": self.LLM_INDEX_EXPIRE_TIME,
-                    },
-                },
-                "Cleanup expired share link bundles": {
-                    "task": "documents.tasks.cleanup_expired_share_link_bundles",
-                    "schedule": crontab(minute=0, hour=2),
-                    "options": {
-                        "expires": self.CLEANUP_EXPIRED_SHARE_BUNDLES_EXPIRE_TIME,
-                    },
-                },
-            },
-            schedule,
-        )
-
-    def test_schedule_configuration_disabled_all(self) -> None:
-        """
-        GIVEN:
-            - All tasks are disabled
-        WHEN:
-            - The celery beat schedule is built
-        THEN:
-            - No tasks are scheduled
-        """
-        with mock.patch.dict(
-            os.environ,
-            {
-                "PAPERLESS_EMAIL_TASK_CRON": "disable",
-                "PAPERLESS_TRAIN_TASK_CRON": "disable",
-                "PAPERLESS_SANITY_TASK_CRON": "disable",
-                "PAPERLESS_INDEX_TASK_CRON": "disable",
-                "PAPERLESS_EMPTY_TRASH_TASK_CRON": "disable",
-                "PAPERLESS_WORKFLOW_SCHEDULED_TASK_CRON": "disable",
-                "PAPERLESS_LLM_INDEX_TASK_CRON": "disable",
-                "PAPERLESS_SHARE_LINK_BUNDLE_CLEANUP_CRON": "disable",
-            },
-        ):
-            schedule = _parse_beat_schedule()
-
-        self.assertDictEqual(
-            {},
-            schedule,
-        )
-
-
-class TestPaperlessURLSettings(TestCase):
-    def test_paperless_url(self) -> None:
-        """
-        GIVEN:
-            - PAPERLESS_URL is set
-        WHEN:
-            - The URL is parsed
-        THEN:
-            - The URL is returned and present in related settings
-        """
-        with mock.patch.dict(
-            os.environ,
-            {
-                "PAPERLESS_URL": "https://example.com",
-            },
-        ):
-            url = _parse_paperless_url()
-            self.assertEqual("https://example.com", url)
-            from django.conf import settings
-
-            self.assertIn(url, settings.CSRF_TRUSTED_ORIGINS)
-            self.assertIn(url, settings.CORS_ALLOWED_ORIGINS)
-
-
-class TestPathSettings(TestCase):
-    def test_default_paths(self) -> None:
-        """
-        GIVEN:
-            - PAPERLESS_FORCE_SCRIPT_NAME is not set
-        WHEN:
-            - Settings are parsed
-        THEN:
-            - Paths are as expected
-        """
-        base_paths = _parse_base_paths()
-        self.assertEqual(None, base_paths[0])  # FORCE_SCRIPT_NAME
-        self.assertEqual("/", base_paths[1])  # BASE_URL
-        self.assertEqual("/accounts/login/", base_paths[2])  # LOGIN_URL
-        self.assertEqual("/dashboard", base_paths[3])  # LOGIN_REDIRECT_URL
-        self.assertEqual(
-            "/accounts/login/?loggedout=1",
-            base_paths[4],
-        )  # LOGOUT_REDIRECT_URL
-
-    @mock.patch("os.environ", {"PAPERLESS_FORCE_SCRIPT_NAME": "/paperless"})
-    def test_subpath(self) -> None:
-        """
-        GIVEN:
-            - PAPERLESS_FORCE_SCRIPT_NAME is set
-        WHEN:
-            - Settings are parsed
-        THEN:
-            - The path is returned and present in related settings
-        """
-        base_paths = _parse_base_paths()
-        self.assertEqual("/paperless", base_paths[0])  # FORCE_SCRIPT_NAME
-        self.assertEqual("/paperless/", base_paths[1])  # BASE_URL
-        self.assertEqual("/paperless/accounts/login/", base_paths[2])  # LOGIN_URL
-        self.assertEqual("/paperless/dashboard", base_paths[3])  # LOGIN_REDIRECT_URL
-        self.assertEqual(
-            "/paperless/accounts/login/?loggedout=1",
-            base_paths[4],
-        )  # LOGOUT_REDIRECT_URL
-
-    @mock.patch(
-        "os.environ",
-        {
-            "PAPERLESS_FORCE_SCRIPT_NAME": "/paperless",
-            "PAPERLESS_LOGOUT_REDIRECT_URL": "/foobar/",
-        },
-    )
-    def test_subpath_with_explicit_logout_url(self) -> None:
-        """
-        GIVEN:
-            - PAPERLESS_FORCE_SCRIPT_NAME is set and so is PAPERLESS_LOGOUT_REDIRECT_URL
-        WHEN:
-            - Settings are parsed
-        THEN:
-            - The correct logout redirect URL is returned
-        """
-        base_paths = _parse_base_paths()
-        self.assertEqual("/paperless/", base_paths[1])  # BASE_URL
-        self.assertEqual("/foobar/", base_paths[4])  # LOGOUT_REDIRECT_URL
-
-
-@pytest.mark.parametrize(
-    ("languages", "expected"),
-    [
-        ("de", ["de"]),
-        ("zh", ["zh"]),
-        ("fr+en", ["fr", "en"]),
-        # Locales must be supported
-        ("en-001+fr-CA", ["en-001", "fr-CA"]),
-        ("en-001+fr", ["en-001", "fr"]),
-        # Special case for Chinese: variants seem to miss some dates,
-        # so we always add "zh" as a fallback.
-        ("en+zh-Hans-HK", ["en", "zh-Hans-HK", "zh"]),
-        ("en+zh-Hans", ["en", "zh-Hans", "zh"]),
-        ("en+zh-Hans+zh-Hant", ["en", "zh-Hans", "zh-Hant", "zh"]),
-    ],
-)
-def test_parser_date_parser_languages(languages, expected) -> None:
-    assert sorted(_parse_dateparser_languages(languages)) == sorted(expected)
--- a/uv.lock
+++ b/uv.lock
@@ -1748,6 +1748,73 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008, upload-time = "2025-10-12T14:55:18.883Z" },
 ]

+[[package]]
+name = "ijson"
+version = "3.5.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/f4/57/60d1a6a512f2f0508d0bc8b4f1cc5616fd3196619b66bd6a01f9155a1292/ijson-3.5.0.tar.gz", hash = "sha256:94688760720e3f5212731b3cb8d30267f9a045fb38fb3870254e7b9504246f31", size = 68658, upload-time = "2026-02-24T03:58:30.974Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/65/da/644343198abca5e0f6e2486063f8d8f3c443ca0ef5e5c890e51ef6032e33/ijson-3.5.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:5616311404b858d32740b7ad8b9a799c62165f5ecb85d0a8ed16c21665a90533", size = 88964, upload-time = "2026-02-24T03:56:53.099Z" },
+    { url = "https://files.pythonhosted.org/packages/5b/63/8621190aa2baf96156dfd4c632b6aa9f1464411e50b98750c09acc0505ea/ijson-3.5.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e9733f94029dd41702d573ef64752e2556e72aea14623d6dbb7a44ca1ccf30fd", size = 60582, upload-time = "2026-02-24T03:56:54.261Z" },
+    { url = "https://files.pythonhosted.org/packages/20/31/6a3f041fdd17dacff33b7d7d3ba3df6dca48740108340c6042f974b2ad20/ijson-3.5.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:db8398c6721b98412a4f618da8022550c8b9c5d9214040646071b5deb4d4a393", size = 60632, upload-time = "2026-02-24T03:56:55.159Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/68/474541998abbdecfd46a744536878335de89aceb9f085bff1aaf35575ceb/ijson-3.5.0-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:c061314845c08163b1784b6076ea5f075372461a32e6916f4e5f211fd4130b64", size = 131988, upload-time = "2026-02-24T03:56:56.35Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/32/e05ff8b72a44fe9d192f41c5dcbc35cfa87efc280cdbfe539ffaf4a7535e/ijson-3.5.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1111a1c5ac79119c5d6e836f900c1a53844b50a18af38311baa6bb61e2645aca", size = 138669, upload-time = "2026-02-24T03:56:57.555Z" },
+    { url = "https://files.pythonhosted.org/packages/49/b5/955a83b031102c7a602e2c06d03aff0a0e584212f09edb94ccc754d203ac/ijson-3.5.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1e74aff8c681c24002b61b1822f9511d4c384f324f7dbc08c78538e01fdc9fcb", size = 135093, upload-time = "2026-02-24T03:56:59.267Z" },
+    { url = "https://files.pythonhosted.org/packages/e8/f2/30250cfcb4d2766669b31f6732689aab2bb91de426a15a3ebe482df7ee48/ijson-3.5.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:739a7229b1b0cc5f7e2785a6e7a5fc915e850d3fed9588d0e89a09f88a417253", size = 138715, upload-time = "2026-02-24T03:57:00.491Z" },
+    { url = "https://files.pythonhosted.org/packages/a2/05/785a145d7e75e04e04480d59b6323cd4b1d9013a6cd8643fa635fbc93490/ijson-3.5.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:ef88712160360cab3ca6471a4e5418243f8b267cf1fe1620879d1b5558babc71", size = 133194, upload-time = "2026-02-24T03:57:01.759Z" },
+    { url = "https://files.pythonhosted.org/packages/14/eb/80d6f8a748dead4034cea0939494a67d10ccf88d6413bf6e860393139676/ijson-3.5.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:6ca0d1b6b5f8166a6248f4309497585fb8553b04bc8179a0260fad636cfdb798", size = 135588, upload-time = "2026-02-24T03:57:03.131Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/17/9c63c7688025f3a8c47ea717b8306649c8c7244e49e20a2be4e3515dc75c/ijson-3.5.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:1ebefbe149a6106cc848a3eaf536af51a9b5ccc9082de801389f152dba6ab755", size = 88536, upload-time = "2026-02-24T03:57:06.809Z" },
+    { url = "https://files.pythonhosted.org/packages/6f/dd/e15c2400244c117b06585452ebc63ae254f5a6964f712306afd1422daae0/ijson-3.5.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:19e30d9f00f82e64de689c0b8651b9cfed879c184b139d7e1ea5030cec401c21", size = 60499, upload-time = "2026-02-24T03:57:09.155Z" },
+    { url = "https://files.pythonhosted.org/packages/77/a9/bf4fe3538a0c965f16b406f180a06105b875da83f0743e36246be64ef550/ijson-3.5.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a04a33ee78a6f27b9b8528c1ca3c207b1df3b8b867a4cf2fcc4109986f35c227", size = 60330, upload-time = "2026-02-24T03:57:10.574Z" },
+    { url = "https://files.pythonhosted.org/packages/31/76/6f91bdb019dd978fce1bc5ea1cd620cfc096d258126c91db2c03a20a7f34/ijson-3.5.0-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:7d48dc2984af02eb3c56edfb3f13b3f62f2f3e4fe36f058c8cfc75d93adf4fed", size = 138977, upload-time = "2026-02-24T03:57:11.932Z" },
+    { url = "https://files.pythonhosted.org/packages/11/be/bbc983059e48a54b0121ee60042979faed7674490bbe7b2c41560db3f436/ijson-3.5.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f1e73a44844d9adbca9cf2c4132cd875933e83f3d4b23881fcaf82be83644c7d", size = 149785, upload-time = "2026-02-24T03:57:13.255Z" },
+    { url = "https://files.pythonhosted.org/packages/6d/81/2fee58f9024a3449aee83edfa7167fb5ccd7e1af2557300e28531bb68e16/ijson-3.5.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7389a56b8562a19948bdf1d7bae3a2edc8c7f86fb59834dcb1c4c722818e645a", size = 149729, upload-time = "2026-02-24T03:57:14.191Z" },
+    { url = "https://files.pythonhosted.org/packages/c7/56/f1706761fcc096c9d414b3dcd000b1e6e5c24364c21cfba429837f98ee8d/ijson-3.5.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3176f23f8ebec83f374ed0c3b4e5a0c4db7ede54c005864efebbed46da123608", size = 150697, upload-time = "2026-02-24T03:57:15.855Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/6e/ee0d9c875a0193b632b3e9ccd1b22a50685fb510256ad57ba483b6529f77/ijson-3.5.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:6babd88e508630c6ef86c9bebaaf13bb2fb8ec1d8f8868773a03c20253f599bc", size = 142873, upload-time = "2026-02-24T03:57:16.831Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/bf/f9d4399d0e6e3fd615035290a71e97c843f17f329b43638c0a01cf112d73/ijson-3.5.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:dc1b3836b174b6db2fa8319f1926fb5445abd195dc963368092103f8579cb8ed", size = 151583, upload-time = "2026-02-24T03:57:17.757Z" },
+    { url = "https://files.pythonhosted.org/packages/a2/71/d67e764a712c3590627480643a3b51efcc3afa4ef3cb54ee4c989073c97e/ijson-3.5.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:e9cedc10e40dd6023c351ed8bfc7dcfce58204f15c321c3c1546b9c7b12562a4", size = 88544, upload-time = "2026-02-24T03:57:21.293Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/39/f1c299371686153fa3cf5c0736b96247a87a1bee1b7145e6d21f359c505a/ijson-3.5.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:3647649f782ee06c97490b43680371186651f3f69bebe64c6083ee7615d185e5", size = 60495, upload-time = "2026-02-24T03:57:22.501Z" },
+    { url = "https://files.pythonhosted.org/packages/16/94/b1438e204d75e01541bebe3e668fe3e68612d210e9931ae1611062dd0a56/ijson-3.5.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:90e74be1dce05fce73451c62d1118671f78f47c9f6be3991c82b91063bf01fc9", size = 60325, upload-time = "2026-02-24T03:57:23.332Z" },
+    { url = "https://files.pythonhosted.org/packages/30/e2/4aa9c116fa86cc8b0f574f3c3a47409edc1cd4face05d0e589a5a176b05d/ijson-3.5.0-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:78e9ad73e7be2dd80627504bd5cbf512348c55ce2c06e362ed7683b5220e8568", size = 138774, upload-time = "2026-02-24T03:57:24.683Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/d2/738b88752a70c3be1505faa4dcd7110668c2712e582a6a36488ed1e295d4/ijson-3.5.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9577449313cc94be89a4fe4b3e716c65f09cc19636d5a6b2861c4e80dddebd58", size = 149820, upload-time = "2026-02-24T03:57:26.062Z" },
+    { url = "https://files.pythonhosted.org/packages/ed/df/0b3ab9f393ca8f72ea03bc896ba9fdc987e90ae08cdb51c32a4ee0c14d5e/ijson-3.5.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3e4c1178fb50aff5f5701a30a5152ead82a14e189ce0f6102fa1b5f10b2f54ff", size = 149747, upload-time = "2026-02-24T03:57:27.308Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/a3/b0037119f75131b78cb00acc2657b1a9d0435475f1f2c5f8f5a170b66b9c/ijson-3.5.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:0eb402ab026ffb37a918d75af2b7260fe6cfbce13232cc83728a714dd30bd81d", size = 151027, upload-time = "2026-02-24T03:57:28.522Z" },
+    { url = "https://files.pythonhosted.org/packages/22/a0/cb344de1862bf09d8f769c9d25c944078c87dd59a1b496feec5ad96309a4/ijson-3.5.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:5b08ee08355f9f729612a8eb9bf69cc14f9310c3b2a487c6f1c3c65d85216ec4", size = 142996, upload-time = "2026-02-24T03:57:29.774Z" },
+    { url = "https://files.pythonhosted.org/packages/ca/32/a8ffd67182e02ea61f70f62daf43ded4fa8a830a2520a851d2782460aba8/ijson-3.5.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:bda62b6d48442903e7bf56152108afb7f0f1293c2b9bef2f2c369defea76ab18", size = 152068, upload-time = "2026-02-24T03:57:30.969Z" },
+    { url = "https://files.pythonhosted.org/packages/42/65/13e2492d17e19a2084523e18716dc2809159f2287fd2700c735f311e76c4/ijson-3.5.0-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:4d4b0cd676b8c842f7648c1a783448fac5cd3b98289abd83711b3e275e143524", size = 93019, upload-time = "2026-02-24T03:57:33.976Z" },
+    { url = "https://files.pythonhosted.org/packages/33/92/483fc97ece0c3f1cecabf48f6a7a36e89d19369eec462faaeaa34c788992/ijson-3.5.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:252dec3680a48bb82d475e36b4ae1b3a9d7eb690b951bb98a76c5fe519e30188", size = 62714, upload-time = "2026-02-24T03:57:34.819Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/88/793fe020a0fe9d9eed4c285cf4a5cfdb0a935708b3bde0d72f35c794b513/ijson-3.5.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:aa1b5dca97d323931fde2501172337384c958914d81a9dac7f00f0d4bfc76bc7", size = 62460, upload-time = "2026-02-24T03:57:35.874Z" },
+    { url = "https://files.pythonhosted.org/packages/51/69/f1a2690aa8d4df1f4e262b385e65a933ffdc250b091531bac9a449c19e16/ijson-3.5.0-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:7a5ec7fd86d606094bba6f6f8f87494897102fa4584ef653f3005c51a784c320", size = 199273, upload-time = "2026-02-24T03:57:37.07Z" },
+    { url = "https://files.pythonhosted.org/packages/ea/a2/f1346d5299e79b988ab472dc773d5381ec2d57c23cb2f1af3ede4a810e62/ijson-3.5.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:009f41443e1521847701c6d87fa3923c0b1961be3c7e7de90947c8cb92ea7c44", size = 216884, upload-time = "2026-02-24T03:57:38.346Z" },
+    { url = "https://files.pythonhosted.org/packages/28/3c/8b637e869be87799e6c2c3c275a30a546f086b1aed77e2b7f11512168c5a/ijson-3.5.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e4c3651d1f9fe2839a93fdf8fd1d5ca3a54975349894249f3b1b572bcc4bd577", size = 207306, upload-time = "2026-02-24T03:57:39.718Z" },
+    { url = "https://files.pythonhosted.org/packages/7f/7c/18b1c1df6951ca056782d7580ec40cea4ff9a27a0947d92640d1cc8c4ae3/ijson-3.5.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:945b7abcfcfeae2cde17d8d900870f03536494245dda7ad4f8d056faa303256c", size = 211364, upload-time = "2026-02-24T03:57:40.953Z" },
+    { url = "https://files.pythonhosted.org/packages/f3/55/e795812e82851574a9dba8a53fde045378f531ef14110c6fb55dbd23b443/ijson-3.5.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:0574b0a841ff97495c13e9d7260fbf3d85358b061f540c52a123db9dbbaa2ed6", size = 200608, upload-time = "2026-02-24T03:57:42.272Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/cd/013c85b4749b57a4cb4c2670014d1b32b8db4ab1a7be92ea7aeb5d7fe7b5/ijson-3.5.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:f969ffb2b89c5cdf686652d7fb66252bc72126fa54d416317411497276056a18", size = 205127, upload-time = "2026-02-24T03:57:43.286Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/93/0868efe753dc1df80cc405cf0c1f2527a6991643607c741bff8dcb899b3b/ijson-3.5.0-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:25a5a6b2045c90bb83061df27cfa43572afa43ba9408611d7bfe237c20a731a9", size = 89094, upload-time = "2026-02-24T03:57:46.115Z" },
+    { url = "https://files.pythonhosted.org/packages/24/94/fd5a832a0df52ef5e4e740f14ac8640725d61034a1b0c561e8b5fb424706/ijson-3.5.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:8976c54c0b864bc82b951bae06567566ac77ef63b90a773a69cd73aab47f4f4f", size = 60715, upload-time = "2026-02-24T03:57:47.552Z" },
+    { url = "https://files.pythonhosted.org/packages/70/79/1b9a90af5732491f9eec751ee211b86b11011e1158c555c06576d52c3919/ijson-3.5.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:859eb2038f7f1b0664df4241957694cc35e6295992d71c98659b22c69b3cbc10", size = 60638, upload-time = "2026-02-24T03:57:48.428Z" },
+    { url = "https://files.pythonhosted.org/packages/23/6f/2c551ea980fe56f68710a8d5389cfbd015fc45aaafd17c3c52c346db6aa1/ijson-3.5.0-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:c911aa02991c7c0d3639b6619b93a93210ff1e7f58bf7225d613abea10adc78e", size = 140667, upload-time = "2026-02-24T03:57:49.314Z" },
+    { url = "https://files.pythonhosted.org/packages/25/0e/27b887879ba6a5bc29766e3c5af4942638c952220fd63e1e442674f7883a/ijson-3.5.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:903cbdc350173605220edc19796fbea9b2203c8b3951fb7335abfa8ed37afda8", size = 149850, upload-time = "2026-02-24T03:57:50.329Z" },
+    { url = "https://files.pythonhosted.org/packages/da/1e/23e10e1bc04bf31193b21e2960dce14b17dbd5d0c62204e8401c59d62c08/ijson-3.5.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a4549d96ded5b8efa71639b2160235415f6bdb8c83367615e2dbabcb72755c33", size = 149206, upload-time = "2026-02-24T03:57:51.261Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/90/e552f6495063b235cf7fa2c592f6597c057077195e517b842a0374fd470c/ijson-3.5.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:6b2dcf6349e6042d83f3f8c39ce84823cf7577eba25bac5aae5e39bbbbbe9c1c", size = 150438, upload-time = "2026-02-24T03:57:52.198Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/18/45bf8f297c41b42a1c231d261141097babd953d2c28a07be57ae4c3a1a02/ijson-3.5.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:e44af39e6f8a17e5627dcd89715d8279bf3474153ff99aae031a936e5c5572e5", size = 144369, upload-time = "2026-02-24T03:57:53.22Z" },
+    { url = "https://files.pythonhosted.org/packages/9b/3a/deb9772bb2c0cead7ad64f00c3598eec9072bdf511818e70e2c512eeabbe/ijson-3.5.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:9260332304b7e7828db56d43f08fc970a3ab741bf84ff10189361ea1b60c395b", size = 151352, upload-time = "2026-02-24T03:57:54.375Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/d9/86f7fac35e0835faa188085ae0579e813493d5261ce056484015ad533445/ijson-3.5.0-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:2ea4b676ec98e374c1df400a47929859e4fa1239274339024df4716e802aa7e4", size = 93069, upload-time = "2026-02-24T03:57:57.849Z" },
+    { url = "https://files.pythonhosted.org/packages/33/d2/e7366ed9c6e60228d35baf4404bac01a126e7775ea8ce57f560125ed190a/ijson-3.5.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:014586eec043e23c80be9a923c56c3a0920a0f1f7d17478ce7bc20ba443968ef", size = 62767, upload-time = "2026-02-24T03:57:58.758Z" },
+    { url = "https://files.pythonhosted.org/packages/35/8b/3e703e8cc4b3ada79f13b28070b51d9550c578f76d1968657905857b2ddd/ijson-3.5.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:d5b8b886b0248652d437f66e7c5ac318bbdcb2c7137a7e5327a68ca00b286f5f", size = 62467, upload-time = "2026-02-24T03:58:00.261Z" },
+    { url = "https://files.pythonhosted.org/packages/21/42/0c91af32c1ee8a957fdac2e051b5780756d05fd34e4b60d94a08d51bac1d/ijson-3.5.0-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:498fd46ae2349297e43acf97cdc421e711dbd7198418677259393d2acdc62d78", size = 200447, upload-time = "2026-02-24T03:58:01.591Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/80/796ea0e391b7e2d45c5b1b451734bba03f81c2984cf955ea5eaa6c4920ad/ijson-3.5.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:22a51b4f9b81f12793731cf226266d1de2112c3c04ba4a04117ad4e466897e05", size = 217820, upload-time = "2026-02-24T03:58:02.598Z" },
+    { url = "https://files.pythonhosted.org/packages/38/14/52b6613fdda4078c62eb5b4fe3efc724ddc55a4ad524c93de51830107aa3/ijson-3.5.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9636c710dc4ac4a281baa266a64f323b4cc165cec26836af702c44328b59a515", size = 208310, upload-time = "2026-02-24T03:58:04.759Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/ad/8b3105a78774fd4a65e534a21d975ef3a77e189489fe3029ebcaeba5e243/ijson-3.5.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:f7168a39e8211107666d71b25693fd1b2bac0b33735ef744114c403c6cac21e1", size = 211843, upload-time = "2026-02-24T03:58:05.836Z" },
+    { url = "https://files.pythonhosted.org/packages/36/ab/a2739f6072d6e1160581bc3ed32da614c8cced023dcd519d9c5fa66e0425/ijson-3.5.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:8696454245415bc617ab03b0dc3ae4c86987df5dc6a90bad378fe72c5409d89e", size = 200906, upload-time = "2026-02-24T03:58:07.788Z" },
+    { url = "https://files.pythonhosted.org/packages/6d/5e/e06c2de3c3d4a9cfb655c1ad08a68fb72838d271072cdd3196576ac4431a/ijson-3.5.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:c21bfb61f71f191565885bf1bc29e0a186292d866b4880637b833848360bdc1b", size = 205495, upload-time = "2026-02-24T03:58:09.163Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/3b/d31ecfa63a218978617446159f3d77aab2417a5bd2885c425b176353ff78/ijson-3.5.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:d64c624da0e9d692d6eb0ff63a79656b59d76bf80773a17c5b0f835e4e8ef627", size = 57715, upload-time = "2026-02-24T03:58:24.545Z" },
+    { url = "https://files.pythonhosted.org/packages/30/51/b170e646d378e8cccf9637c05edb5419b00c2c4df64b0258c3af5355608e/ijson-3.5.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:876f7df73b7e0d6474f9caa729b9cdbfc8e76de9075a4887dfd689e29e85c4ca", size = 57205, upload-time = "2026-02-24T03:58:25.681Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/83/44dbd0231b0a8c6c14d27473d10c4e27dfbce7d5d9a833c79e3e6c33eb40/ijson-3.5.0-pp311-pypy311_pp73-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:e7dbff2c8d9027809b0cde663df44f3210da10ea377121d42896fb6ee405dd31", size = 71229, upload-time = "2026-02-24T03:58:27.103Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/98/cf84048b7c6cec888826e696a31f45bee7ebcac15e532b6be1fc4c2c9608/ijson-3.5.0-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4217a1edc278660679e1197c83a1a2a2d367792bfbb2a3279577f4b59b93730d", size = 71217, upload-time = "2026-02-24T03:58:28.021Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/0a/e34c729a87ff67dc6540f6bcc896626158e691d433ab57db0086d73decd2/ijson-3.5.0-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:04f0fc740311388ee745ba55a12292b722d6f52000b11acbb913982ba5fbdf87", size = 68618, upload-time = "2026-02-24T03:58:28.918Z" },
+]
+
 [[package]]
 name = "imagehash"
 version = "4.3.2"
@@ -2751,6 +2818,7 @@ dependencies = [
    { name = "flower", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "gotenberg-client", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "httpx-oauth", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "ijson", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "imap-tools", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "jinja2", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "langdetect", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
@@ -2898,6 +2966,7 @@ requires-dist = [
    { name = "gotenberg-client", specifier = "~=0.13.1" },
    { name = "granian", extras = ["uvloop"], marker = "extra == 'webserver'", specifier = "~=2.7.0" },
    { name = "httpx-oauth", specifier = "~=0.16" },
+    { name = "ijson", specifier = ">=3.2" },
    { name = "imap-tools", specifier = "~=1.11.0" },
    { name = "jinja2", specifier = "~=3.1.5" },
    { name = "langdetect", specifier = "~=1.0.9" },
Author	SHA1	Message	Date
shamoon	d9628f7255	Update tests	2026-03-09 11:24:58 -07:00
shamoon	fcbe4b200c	Use effective_content for matching	2026-03-09 11:23:46 -07:00
shamoon	2b434916a0	Add an effective_content for the model	2026-03-09 11:23:32 -07:00
shamoon	d85ee29976	Fix ci gate base	2026-03-09 11:16:46 -07:00
GitHub Actions	0c7d56c5e7	Auto translate strings	2026-03-09 17:45:53 +00:00
Trenton H	0bcf904e3a	Chore: Finish settings refactor (#12263 )	2026-03-09 17:43:51 +00:00
Trenton H	bcc2f11152	Performance: Stream JSON during import for memory improvements (#12276 ) * Perf: stream manifest parsing with ijson in document_importer Replace bulk json.load of the full manifest (which materializes the entire JSON array into memory) with incremental ijson streaming. Eliminates self.manifest entirely — records are never all in memory at once. - Add ijson>=3.2 dependency - New module-level iter_manifest_records() generator - load_manifest_files() collects paths only; no parsing at load time - check_manifest_validity() streams without accumulating records - decrypt_secret_fields() streams each manifest to a .decrypted.json temp file record-by-record; temp files cleaned up after file copy - _import_files_from_manifest() collects only document records (small fraction of manifest) for the tqdm progress bar Measured on 200 docs + 200 CustomFieldInstances: - Streaming validation: peak memory 3081 KiB -> 333 KiB (89% reduction) - Stream-decrypt to file: peak memory 3081 KiB -> 549 KiB (82% reduction) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> * Perf: slim dict in _import_files_from_manifest, discard fields When collecting document records for the file-copy step, extract only the 4 keys the loop actually uses (pk + 3 exported filename keys) and discard the full fields dict (content, checksum, tags, etc.). Peak memory for the document-record list: 939 KiB -> 375 KiB (60% reduction). Wall time unchanged.	2026-03-09 10:20:48 -07:00