From 061099b064e945505b796332eadd44a71855bc45 Mon Sep 17 00:00:00 2001 From: Trenton H <797416+stumpylog@users.noreply.github.com> Date: Mon, 30 Mar 2026 14:41:25 -0700 Subject: [PATCH] Refactor: inline index_reindex into management command; promote needs_rebuild to public API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Rename _needs_rebuild -> needs_rebuild and export from documents.search - document_index command imports directly from documents.search, constructs the queryset and calls get_backend().rebuild() inline — no tasks.py indirection - Optimize subcommand logs deprecation directly; no longer calls index_optimize - Remove index_reindex from tasks.py - Convert TestMakeIndex to pytest class (no TestCase); use mocker fixtures - Simplify TestIndexReindex -> TestIndexOptimize (wrapper test removed) Co-Authored-By: Antoine Mérino <3023499+Merinorus@users.noreply.github.com> Co-Authored-By: Claude Sonnet 4.6 --- .../management/commands/document_index.py | 39 +++++++++++------ src/documents/search/__init__.py | 2 + src/documents/search/_schema.py | 4 +- src/documents/tasks.py | 14 ------ src/documents/tests/test_management.py | 43 +++++++++++-------- src/documents/tests/test_tasks.py | 23 +--------- 6 files changed, 58 insertions(+), 67 deletions(-) diff --git a/src/documents/management/commands/document_index.py b/src/documents/management/commands/document_index.py index 6b85e61b1..598719024 100644 --- a/src/documents/management/commands/document_index.py +++ b/src/documents/management/commands/document_index.py @@ -1,9 +1,16 @@ +import logging + from django.conf import settings from django.db import transaction from documents.management.commands.base import PaperlessCommand -from documents.tasks import index_optimize -from documents.tasks import index_reindex +from documents.models import Document +from documents.search import get_backend +from documents.search import needs_rebuild +from documents.search import reset_backend +from documents.search import wipe_index + +logger = logging.getLogger("paperless.management.document_index") class Command(PaperlessCommand): @@ -35,21 +42,29 @@ class Command(PaperlessCommand): def handle(self, *args, **options): with transaction.atomic(): if options["command"] == "reindex": - if options.get("if_needed"): - from documents.search._schema import _needs_rebuild - - if not _needs_rebuild(settings.INDEX_DIR): - self.stdout.write("Search index is up to date.") - return + if options.get("if_needed") and not needs_rebuild(settings.INDEX_DIR): + self.stdout.write("Search index is up to date.") + return if options.get("recreate"): - from documents.search import wipe_index - wipe_index(settings.INDEX_DIR) - index_reindex( + + documents = Document.objects.select_related( + "correspondent", + "document_type", + "storage_path", + "owner", + ).prefetch_related("tags", "notes", "custom_fields", "versions") + get_backend().rebuild( + documents, iter_wrapper=lambda docs: self.track( docs, description="Indexing documents...", ), ) + reset_backend() + elif options["command"] == "optimize": - index_optimize() + logger.info( + "document_index optimize is a no-op — Tantivy manages " + "segment merging automatically.", + ) diff --git a/src/documents/search/__init__.py b/src/documents/search/__init__.py index 5da0a91d4..b0a89f242 100644 --- a/src/documents/search/__init__.py +++ b/src/documents/search/__init__.py @@ -5,6 +5,7 @@ from documents.search._backend import TantivyRelevanceList from documents.search._backend import WriteBatch from documents.search._backend import get_backend from documents.search._backend import reset_backend +from documents.search._schema import needs_rebuild from documents.search._schema import wipe_index __all__ = [ @@ -14,6 +15,7 @@ __all__ = [ "TantivyRelevanceList", "WriteBatch", "get_backend", + "needs_rebuild", "reset_backend", "wipe_index", ] diff --git a/src/documents/search/_schema.py b/src/documents/search/_schema.py index cb5b85e3e..ef7e4a921 100644 --- a/src/documents/search/_schema.py +++ b/src/documents/search/_schema.py @@ -78,7 +78,7 @@ def build_schema() -> tantivy.Schema: return sb.build() -def _needs_rebuild(index_dir: Path) -> bool: +def needs_rebuild(index_dir: Path) -> bool: """Check if the search index needs rebuilding by comparing schema version and language sentinel files.""" version_file = index_dir / ".schema_version" if not version_file.exists(): @@ -124,7 +124,7 @@ def open_or_rebuild_index(index_dir: Path | None = None) -> tantivy.Index: """ if index_dir is None: index_dir = settings.INDEX_DIR - if _needs_rebuild(index_dir): + if needs_rebuild(index_dir): wipe_index(index_dir) idx = tantivy.Index(build_schema(), path=str(index_dir)) _write_sentinels(index_dir) diff --git a/src/documents/tasks.py b/src/documents/tasks.py index 87892dd7d..5c8eea45b 100644 --- a/src/documents/tasks.py +++ b/src/documents/tasks.py @@ -88,20 +88,6 @@ def index_optimize() -> None: ) -def index_reindex(*, iter_wrapper: IterWrapper[Document] = _identity) -> None: - from documents.search import get_backend - from documents.search import reset_backend - - documents = Document.objects.select_related( - "correspondent", - "document_type", - "storage_path", - "owner", - ).prefetch_related("tags", "notes", "custom_fields", "versions") - get_backend().rebuild(documents, iter_wrapper=iter_wrapper) - reset_backend() - - @shared_task def train_classifier( *, diff --git a/src/documents/tests/test_management.py b/src/documents/tests/test_management.py index afc589705..f6c8ba904 100644 --- a/src/documents/tests/test_management.py +++ b/src/documents/tests/test_management.py @@ -103,36 +103,45 @@ class TestArchiver(DirectoriesMixin, FileSystemAssertsMixin, TestCase): @pytest.mark.management -class TestMakeIndex(TestCase): - @mock.patch("documents.management.commands.document_index.index_reindex") - def test_reindex(self, m) -> None: +@pytest.mark.django_db +class TestMakeIndex: + def test_reindex(self, mocker: MockerFixture) -> None: + mock_get_backend = mocker.patch( + "documents.management.commands.document_index.get_backend", + ) call_command("document_index", "reindex", skip_checks=True) - m.assert_called_once() + mock_get_backend.return_value.rebuild.assert_called_once() - @mock.patch("documents.management.commands.document_index.index_optimize") - def test_optimize(self, m) -> None: + def test_optimize(self) -> None: call_command("document_index", "optimize", skip_checks=True) - m.assert_called_once() - @mock.patch("documents.management.commands.document_index.index_reindex") - @mock.patch("documents.search._schema._needs_rebuild", return_value=False) def test_reindex_if_needed_skips_when_up_to_date( self, - _needs_rebuild, - reindex, + mocker: MockerFixture, ) -> None: + mocker.patch( + "documents.management.commands.document_index.needs_rebuild", + return_value=False, + ) + mock_get_backend = mocker.patch( + "documents.management.commands.document_index.get_backend", + ) call_command("document_index", "reindex", if_needed=True, skip_checks=True) - reindex.assert_not_called() + mock_get_backend.return_value.rebuild.assert_not_called() - @mock.patch("documents.management.commands.document_index.index_reindex") - @mock.patch("documents.search._schema._needs_rebuild", return_value=True) def test_reindex_if_needed_runs_when_rebuild_needed( self, - _needs_rebuild, - reindex, + mocker: MockerFixture, ) -> None: + mocker.patch( + "documents.management.commands.document_index.needs_rebuild", + return_value=True, + ) + mock_get_backend = mocker.patch( + "documents.management.commands.document_index.get_backend", + ) call_command("document_index", "reindex", if_needed=True, skip_checks=True) - reindex.assert_called_once() + mock_get_backend.return_value.rebuild.assert_called_once() @pytest.mark.management diff --git a/src/documents/tests/test_tasks.py b/src/documents/tests/test_tasks.py index 37f1e6fed..d73481609 100644 --- a/src/documents/tests/test_tasks.py +++ b/src/documents/tests/test_tasks.py @@ -23,29 +23,8 @@ from documents.tests.utils import DirectoriesMixin from documents.tests.utils import FileSystemAssertsMixin -class TestIndexReindex(DirectoriesMixin, TestCase): - def test_index_reindex(self) -> None: - Document.objects.create( - title="test", - content="my document", - checksum="wow", - added=timezone.now(), - created=timezone.now(), - modified=timezone.now(), - ) - - tasks.index_reindex() - +class TestIndexOptimize(TestCase): def test_index_optimize(self) -> None: - Document.objects.create( - title="test", - content="my document", - checksum="wow", - added=timezone.now(), - created=timezone.now(), - modified=timezone.now(), - ) - tasks.index_optimize()