Disables the system checks for most of our commands, as the image has already run them, so we're good enough without them

2026-03-13 12:41:23 +00:00 · 2026-03-12 13:38:01 -07:00
10 changed files with 54 additions and 78 deletions
--- a/docs/migration-v3.md
+++ b/docs/migration-v3.md
@@ -72,6 +72,30 @@ PAPERLESS_DBHOST: postgres

 See [`PAPERLESS_DBENGINE`](configuration.md#PAPERLESS_DBENGINE) for accepted values.

+## Management Command: `--skip-checks` Removed
+
+The `--skip-checks` flag has been removed from all Paperless-ngx management commands
+(`document_exporter`, `document_importer`, `document_retagger`, `document_archiver`,
+`document_thumbnails`, `document_index`, `document_renamer`, `document_sanity_checker`,
+`document_fuzzy_match`, and others).
+
+These commands now set `requires_system_checks = []` internally, which both skips
+redundant checks at runtime (they are already run as a dedicated step during Docker
+startup via `init-system-checks`) and removes `--skip-checks` from the argument parser.
+
+#### Action Required
+
+Remove `--skip-checks` from any scripts, cron jobs, or automation that invokes
+these commands:
+
+```bash
+# v2
+document_exporter /backup --skip-checks
+
+# v3
+document_exporter /backup
+```
+
 ## Database Advanced Options

 The individual SSL, timeout, and pooling variables have been removed in favor of a
--- a/src/documents/classifier.py
+++ b/src/documents/classifier.py
@@ -9,7 +9,6 @@ from pathlib import Path
 from typing import TYPE_CHECKING

 if TYPE_CHECKING:
-    from collections.abc import Callable
    from collections.abc import Iterator
    from datetime import datetime

@@ -192,12 +191,7 @@ class DocumentClassifier:

        target_file_temp.rename(target_file)

-    def train(
-        self,
-        status_callback: Callable[[str], None] | None = None,
-    ) -> bool:
-        notify = status_callback if status_callback is not None else lambda _: None
-
+    def train(self) -> bool:
        # Get non-inbox documents
        docs_queryset = (
            Document.objects.exclude(
@@ -219,7 +213,6 @@ class DocumentClassifier:

        # Step 1: Extract and preprocess training data from the database.
        logger.debug("Gathering data from database...")
-        notify(f"Gathering data from {docs_queryset.count()} document(s)...")
        hasher = sha256()
        for doc in docs_queryset:
            y = -1
@@ -297,7 +290,6 @@ class DocumentClassifier:

        # Step 2: vectorize data
        logger.debug("Vectorizing data...")
-        notify("Vectorizing document content...")

        def content_generator() -> Iterator[str]:
            """
@@ -324,7 +316,6 @@ class DocumentClassifier:
        # Step 3: train the classifiers
        if num_tags > 0:
            logger.debug("Training tags classifier...")
-            notify(f"Training tags classifier ({num_tags} tag(s))...")

            if num_tags == 1:
                # Special case where only one tag has auto:
@@ -348,9 +339,6 @@ class DocumentClassifier:

        if num_correspondents > 0:
            logger.debug("Training correspondent classifier...")
-            notify(
-                f"Training correspondent classifier ({num_correspondents} correspondent(s))...",
-            )
            self.correspondent_classifier = MLPClassifier(tol=0.01)
            self.correspondent_classifier.fit(data_vectorized, labels_correspondent)
        else:
@@ -361,9 +349,6 @@ class DocumentClassifier:

        if num_document_types > 0:
            logger.debug("Training document type classifier...")
-            notify(
-                f"Training document type classifier ({num_document_types} type(s))...",
-            )
            self.document_type_classifier = MLPClassifier(tol=0.01)
            self.document_type_classifier.fit(data_vectorized, labels_document_type)
        else:
@@ -376,7 +361,6 @@ class DocumentClassifier:
            logger.debug(
                "Training storage paths classifier...",
            )
-            notify(f"Training storage path classifier ({num_storage_paths} path(s))...")
            self.storage_path_classifier = MLPClassifier(tol=0.01)
            self.storage_path_classifier.fit(
                data_vectorized,
--- a/src/documents/management/commands/base.py
+++ b/src/documents/management/commands/base.py
@@ -151,6 +151,10 @@ class PaperlessCommand(RichCommand):
        supports_progress_bar: Adds --no-progress-bar argument (default: True)
        supports_multiprocessing: Adds --processes argument (default: False)

+    System checks are skipped by default (requires_system_checks = []) because
+    these commands run post-startup where checks have already been performed by
+    the application server. Subclasses that genuinely need checks can override.
+
    Example usage:

        class Command(PaperlessCommand):
@@ -189,6 +193,8 @@ class PaperlessCommand(RichCommand):
                    stats.imported += 1
    """

+    requires_system_checks: ClassVar[list] = []
+
    supports_progress_bar: ClassVar[bool] = True
    supports_multiprocessing: ClassVar[bool] = False

--- a/src/documents/management/commands/document_create_classifier.py
+++ b/src/documents/management/commands/document_create_classifier.py
@@ -1,29 +1,13 @@
-from __future__ import annotations
+from django.core.management.base import BaseCommand

-import time
-
-from documents.management.commands.base import PaperlessCommand
 from documents.tasks import train_classifier


-class Command(PaperlessCommand):
+class Command(BaseCommand):
    help = (
        "Trains the classifier on your data and saves the resulting models to a "
        "file. The document consumer will then automatically use this new model."
    )
-    supports_progress_bar = False
-    supports_multiprocessing = False

-    def handle(self, *args, **options) -> None:
-        start = time.monotonic()
-
-        with self.buffered_logging("paperless.tasks"):
-            train_classifier(
-                scheduled=False,
-                status_callback=lambda msg: self.console.print(f"  {msg}"),
-            )
-
-        elapsed = time.monotonic() - start
-        self.console.print(
-            f"[green]✓[/green] Classifier training complete ({elapsed:.1f}s)",
-        )
+    def handle(self, *args, **options):
+        train_classifier(scheduled=False)
--- a/src/documents/management/commands/document_importer.py
+++ b/src/documents/management/commands/document_importer.py
@@ -205,7 +205,7 @@ class Command(CryptMixin, PaperlessCommand):
                ContentType.objects.all().delete()
                Permission.objects.all().delete()
                for manifest_path in self.manifest_paths:
-                    call_command("loaddata", manifest_path)
+                    call_command("loaddata", manifest_path, skip_checks=True)
        except (FieldDoesNotExist, DeserializationError, IntegrityError) as e:
            self.stdout.write(self.style.ERROR("Database import failed"))
            if (
--- a/src/documents/tasks.py
+++ b/src/documents/tasks.py
@@ -100,11 +100,7 @@ def index_reindex(*, iter_wrapper: IterWrapper[Document] = _identity) -> None:


@shared_task
-def train_classifier(
-    *,
-    scheduled=True,
-    status_callback: Callable[[str], None] | None = None,
-) -> None:
+def train_classifier(*, scheduled=True) -> None:
    task = PaperlessTask.objects.create(
        type=PaperlessTask.TaskType.SCHEDULED_TASK
        if scheduled
@@ -140,7 +136,7 @@ def train_classifier(
        classifier = DocumentClassifier()

    try:
-        if classifier.train(status_callback=status_callback):
+        if classifier.train():
            logger.info(
                f"Saving updated classifier model to {settings.MODEL_FILE}...",
            )
--- a/src/documents/tests/test_api_schema.py
+++ b/src/documents/tests/test_api_schema.py
@@ -12,7 +12,12 @@ class TestApiSchema(APITestCase):
        Test that the schema is valid
        """
        try:
-            call_command("spectacular", "--validate", "--fail-on-warn")
+            call_command(
+                "spectacular",
+                "--validate",
+                "--fail-on-warn",
+                skip_checks=True,
+            )
        except CommandError as e:
            self.fail(f"Schema validation failed: {e}")

--- a/src/documents/tests/test_management.py
+++ b/src/documents/tests/test_management.py
@@ -1,10 +1,7 @@
-from __future__ import annotations
-
 import filecmp
 import shutil
 from io import StringIO
 from pathlib import Path
-from typing import TYPE_CHECKING
 from unittest import mock

 import pytest
@@ -14,9 +11,6 @@ from django.core.management import call_command
 from django.test import TestCase
 from django.test import override_settings

-if TYPE_CHECKING:
-    from pytest_mock import MockerFixture
-
 from documents.file_handling import generate_filename
 from documents.models import Document
 from documents.tasks import update_document_content_maybe_archive_file
@@ -141,32 +135,14 @@ class TestRenamer(DirectoriesMixin, FileSystemAssertsMixin, TestCase):


@pytest.mark.management
-class TestCreateClassifier:
-    def test_create_classifier(self, mocker: MockerFixture) -> None:
-        m = mocker.patch(
-            "documents.management.commands.document_create_classifier.train_classifier",
-        )
+class TestCreateClassifier(TestCase):
+    @mock.patch(
+        "documents.management.commands.document_create_classifier.train_classifier",
+    )
+    def test_create_classifier(self, m) -> None:
+        call_command("document_create_classifier", skip_checks=True)

-        call_command("document_create_classifier", "--skip-checks")
-
-        m.assert_called_once_with(scheduled=False, status_callback=mocker.ANY)
-        assert callable(m.call_args.kwargs["status_callback"])
-
-    def test_create_classifier_callback_output(self, mocker: MockerFixture) -> None:
-        """Callback passed to train_classifier writes each phase message to the console."""
-        m = mocker.patch(
-            "documents.management.commands.document_create_classifier.train_classifier",
-        )
-
-        def invoke_callback(**kwargs):
-            kwargs["status_callback"]("Vectorizing document content...")
-
-        m.side_effect = invoke_callback
-
-        stdout = StringIO()
-        call_command("document_create_classifier", "--skip-checks", stdout=stdout)
-
-        assert "Vectorizing document content..." in stdout.getvalue()
+        m.assert_called_once()


@pytest.mark.management
@@ -176,7 +152,7 @@ class TestConvertMariaDBUUID(TestCase):
        m.alter_field.return_value = None

        stdout = StringIO()
-        call_command("convert_mariadb_uuid", stdout=stdout)
+        call_command("convert_mariadb_uuid", stdout=stdout, skip_checks=True)

        m.assert_called_once()

--- a/src/documents/tests/test_management_superuser.py
+++ b/src/documents/tests/test_management_superuser.py
@@ -20,6 +20,7 @@ class TestManageSuperUser(DirectoriesMixin, TestCase):
                "--no-color",
                stdout=out,
                stderr=StringIO(),
+                skip_checks=True,
            )
        return out.getvalue()

--- a/src/paperless_mail/tests/test_mail.py
+++ b/src/paperless_mail/tests/test_mail.py
@@ -1665,7 +1665,7 @@ class TestManagementCommand(TestCase):
        "paperless_mail.management.commands.mail_fetcher.tasks.process_mail_accounts",
    )
    def test_mail_fetcher(self, m) -> None:
-        call_command("mail_fetcher")
+        call_command("mail_fetcher", skip_checks=True)

        m.assert_called_once()