Refactor: inline index_reindex into management command; promote needs_rebuild to public API

- Rename _needs_rebuild -> needs_rebuild and export from documents.search
- document_index command imports directly from documents.search, constructs
  the queryset and calls get_backend().rebuild() inline — no tasks.py indirection
- Optimize subcommand logs deprecation directly; no longer calls index_optimize
- Remove index_reindex from tasks.py
- Convert TestMakeIndex to pytest class (no TestCase); use mocker fixtures
- Simplify TestIndexReindex -> TestIndexOptimize (wrapper test removed)

Co-Authored-By: Antoine Mérino <3023499+Merinorus@users.noreply.github.com>
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Trenton H
2026-03-30 14:41:25 -07:00
parent 6699679c29
commit 061099b064
6 changed files with 58 additions and 67 deletions
@@ -1,9 +1,16 @@
import logging
from django.conf import settings
from django.db import transaction
from documents.management.commands.base import PaperlessCommand
from documents.tasks import index_optimize
from documents.tasks import index_reindex
from documents.models import Document
from documents.search import get_backend
from documents.search import needs_rebuild
from documents.search import reset_backend
from documents.search import wipe_index
logger = logging.getLogger("paperless.management.document_index")
class Command(PaperlessCommand):
@@ -35,21 +42,29 @@ class Command(PaperlessCommand):
def handle(self, *args, **options):
with transaction.atomic():
if options["command"] == "reindex":
if options.get("if_needed"):
from documents.search._schema import _needs_rebuild
if not _needs_rebuild(settings.INDEX_DIR):
self.stdout.write("Search index is up to date.")
return
if options.get("if_needed") and not needs_rebuild(settings.INDEX_DIR):
self.stdout.write("Search index is up to date.")
return
if options.get("recreate"):
from documents.search import wipe_index
wipe_index(settings.INDEX_DIR)
index_reindex(
documents = Document.objects.select_related(
"correspondent",
"document_type",
"storage_path",
"owner",
).prefetch_related("tags", "notes", "custom_fields", "versions")
get_backend().rebuild(
documents,
iter_wrapper=lambda docs: self.track(
docs,
description="Indexing documents...",
),
)
reset_backend()
elif options["command"] == "optimize":
index_optimize()
logger.info(
"document_index optimize is a no-op — Tantivy manages "
"segment merging automatically.",
)
+2
View File
@@ -5,6 +5,7 @@ from documents.search._backend import TantivyRelevanceList
from documents.search._backend import WriteBatch
from documents.search._backend import get_backend
from documents.search._backend import reset_backend
from documents.search._schema import needs_rebuild
from documents.search._schema import wipe_index
__all__ = [
@@ -14,6 +15,7 @@ __all__ = [
"TantivyRelevanceList",
"WriteBatch",
"get_backend",
"needs_rebuild",
"reset_backend",
"wipe_index",
]
+2 -2
View File
@@ -78,7 +78,7 @@ def build_schema() -> tantivy.Schema:
return sb.build()
def _needs_rebuild(index_dir: Path) -> bool:
def needs_rebuild(index_dir: Path) -> bool:
"""Check if the search index needs rebuilding by comparing schema version and language sentinel files."""
version_file = index_dir / ".schema_version"
if not version_file.exists():
@@ -124,7 +124,7 @@ def open_or_rebuild_index(index_dir: Path | None = None) -> tantivy.Index:
"""
if index_dir is None:
index_dir = settings.INDEX_DIR
if _needs_rebuild(index_dir):
if needs_rebuild(index_dir):
wipe_index(index_dir)
idx = tantivy.Index(build_schema(), path=str(index_dir))
_write_sentinels(index_dir)
-14
View File
@@ -88,20 +88,6 @@ def index_optimize() -> None:
)
def index_reindex(*, iter_wrapper: IterWrapper[Document] = _identity) -> None:
from documents.search import get_backend
from documents.search import reset_backend
documents = Document.objects.select_related(
"correspondent",
"document_type",
"storage_path",
"owner",
).prefetch_related("tags", "notes", "custom_fields", "versions")
get_backend().rebuild(documents, iter_wrapper=iter_wrapper)
reset_backend()
@shared_task
def train_classifier(
*,
+26 -17
View File
@@ -103,36 +103,45 @@ class TestArchiver(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
@pytest.mark.management
class TestMakeIndex(TestCase):
@mock.patch("documents.management.commands.document_index.index_reindex")
def test_reindex(self, m) -> None:
@pytest.mark.django_db
class TestMakeIndex:
def test_reindex(self, mocker: MockerFixture) -> None:
mock_get_backend = mocker.patch(
"documents.management.commands.document_index.get_backend",
)
call_command("document_index", "reindex", skip_checks=True)
m.assert_called_once()
mock_get_backend.return_value.rebuild.assert_called_once()
@mock.patch("documents.management.commands.document_index.index_optimize")
def test_optimize(self, m) -> None:
def test_optimize(self) -> None:
call_command("document_index", "optimize", skip_checks=True)
m.assert_called_once()
@mock.patch("documents.management.commands.document_index.index_reindex")
@mock.patch("documents.search._schema._needs_rebuild", return_value=False)
def test_reindex_if_needed_skips_when_up_to_date(
self,
_needs_rebuild,
reindex,
mocker: MockerFixture,
) -> None:
mocker.patch(
"documents.management.commands.document_index.needs_rebuild",
return_value=False,
)
mock_get_backend = mocker.patch(
"documents.management.commands.document_index.get_backend",
)
call_command("document_index", "reindex", if_needed=True, skip_checks=True)
reindex.assert_not_called()
mock_get_backend.return_value.rebuild.assert_not_called()
@mock.patch("documents.management.commands.document_index.index_reindex")
@mock.patch("documents.search._schema._needs_rebuild", return_value=True)
def test_reindex_if_needed_runs_when_rebuild_needed(
self,
_needs_rebuild,
reindex,
mocker: MockerFixture,
) -> None:
mocker.patch(
"documents.management.commands.document_index.needs_rebuild",
return_value=True,
)
mock_get_backend = mocker.patch(
"documents.management.commands.document_index.get_backend",
)
call_command("document_index", "reindex", if_needed=True, skip_checks=True)
reindex.assert_called_once()
mock_get_backend.return_value.rebuild.assert_called_once()
@pytest.mark.management
+1 -22
View File
@@ -23,29 +23,8 @@ from documents.tests.utils import DirectoriesMixin
from documents.tests.utils import FileSystemAssertsMixin
class TestIndexReindex(DirectoriesMixin, TestCase):
def test_index_reindex(self) -> None:
Document.objects.create(
title="test",
content="my document",
checksum="wow",
added=timezone.now(),
created=timezone.now(),
modified=timezone.now(),
)
tasks.index_reindex()
class TestIndexOptimize(TestCase):
def test_index_optimize(self) -> None:
Document.objects.create(
title="test",
content="my document",
checksum="wow",
added=timezone.now(),
created=timezone.now(),
modified=timezone.now(),
)
tasks.index_optimize()