mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-06-22 21:34:20 +00:00
Refactor: inline index_reindex into management command; promote needs_rebuild to public API
- Rename _needs_rebuild -> needs_rebuild and export from documents.search - document_index command imports directly from documents.search, constructs the queryset and calls get_backend().rebuild() inline — no tasks.py indirection - Optimize subcommand logs deprecation directly; no longer calls index_optimize - Remove index_reindex from tasks.py - Convert TestMakeIndex to pytest class (no TestCase); use mocker fixtures - Simplify TestIndexReindex -> TestIndexOptimize (wrapper test removed) Co-Authored-By: Antoine Mérino <3023499+Merinorus@users.noreply.github.com> Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,9 +1,16 @@
|
||||
import logging
|
||||
|
||||
from django.conf import settings
|
||||
from django.db import transaction
|
||||
|
||||
from documents.management.commands.base import PaperlessCommand
|
||||
from documents.tasks import index_optimize
|
||||
from documents.tasks import index_reindex
|
||||
from documents.models import Document
|
||||
from documents.search import get_backend
|
||||
from documents.search import needs_rebuild
|
||||
from documents.search import reset_backend
|
||||
from documents.search import wipe_index
|
||||
|
||||
logger = logging.getLogger("paperless.management.document_index")
|
||||
|
||||
|
||||
class Command(PaperlessCommand):
|
||||
@@ -35,21 +42,29 @@ class Command(PaperlessCommand):
|
||||
def handle(self, *args, **options):
|
||||
with transaction.atomic():
|
||||
if options["command"] == "reindex":
|
||||
if options.get("if_needed"):
|
||||
from documents.search._schema import _needs_rebuild
|
||||
|
||||
if not _needs_rebuild(settings.INDEX_DIR):
|
||||
self.stdout.write("Search index is up to date.")
|
||||
return
|
||||
if options.get("if_needed") and not needs_rebuild(settings.INDEX_DIR):
|
||||
self.stdout.write("Search index is up to date.")
|
||||
return
|
||||
if options.get("recreate"):
|
||||
from documents.search import wipe_index
|
||||
|
||||
wipe_index(settings.INDEX_DIR)
|
||||
index_reindex(
|
||||
|
||||
documents = Document.objects.select_related(
|
||||
"correspondent",
|
||||
"document_type",
|
||||
"storage_path",
|
||||
"owner",
|
||||
).prefetch_related("tags", "notes", "custom_fields", "versions")
|
||||
get_backend().rebuild(
|
||||
documents,
|
||||
iter_wrapper=lambda docs: self.track(
|
||||
docs,
|
||||
description="Indexing documents...",
|
||||
),
|
||||
)
|
||||
reset_backend()
|
||||
|
||||
elif options["command"] == "optimize":
|
||||
index_optimize()
|
||||
logger.info(
|
||||
"document_index optimize is a no-op — Tantivy manages "
|
||||
"segment merging automatically.",
|
||||
)
|
||||
|
||||
@@ -5,6 +5,7 @@ from documents.search._backend import TantivyRelevanceList
|
||||
from documents.search._backend import WriteBatch
|
||||
from documents.search._backend import get_backend
|
||||
from documents.search._backend import reset_backend
|
||||
from documents.search._schema import needs_rebuild
|
||||
from documents.search._schema import wipe_index
|
||||
|
||||
__all__ = [
|
||||
@@ -14,6 +15,7 @@ __all__ = [
|
||||
"TantivyRelevanceList",
|
||||
"WriteBatch",
|
||||
"get_backend",
|
||||
"needs_rebuild",
|
||||
"reset_backend",
|
||||
"wipe_index",
|
||||
]
|
||||
|
||||
@@ -78,7 +78,7 @@ def build_schema() -> tantivy.Schema:
|
||||
return sb.build()
|
||||
|
||||
|
||||
def _needs_rebuild(index_dir: Path) -> bool:
|
||||
def needs_rebuild(index_dir: Path) -> bool:
|
||||
"""Check if the search index needs rebuilding by comparing schema version and language sentinel files."""
|
||||
version_file = index_dir / ".schema_version"
|
||||
if not version_file.exists():
|
||||
@@ -124,7 +124,7 @@ def open_or_rebuild_index(index_dir: Path | None = None) -> tantivy.Index:
|
||||
"""
|
||||
if index_dir is None:
|
||||
index_dir = settings.INDEX_DIR
|
||||
if _needs_rebuild(index_dir):
|
||||
if needs_rebuild(index_dir):
|
||||
wipe_index(index_dir)
|
||||
idx = tantivy.Index(build_schema(), path=str(index_dir))
|
||||
_write_sentinels(index_dir)
|
||||
|
||||
@@ -88,20 +88,6 @@ def index_optimize() -> None:
|
||||
)
|
||||
|
||||
|
||||
def index_reindex(*, iter_wrapper: IterWrapper[Document] = _identity) -> None:
|
||||
from documents.search import get_backend
|
||||
from documents.search import reset_backend
|
||||
|
||||
documents = Document.objects.select_related(
|
||||
"correspondent",
|
||||
"document_type",
|
||||
"storage_path",
|
||||
"owner",
|
||||
).prefetch_related("tags", "notes", "custom_fields", "versions")
|
||||
get_backend().rebuild(documents, iter_wrapper=iter_wrapper)
|
||||
reset_backend()
|
||||
|
||||
|
||||
@shared_task
|
||||
def train_classifier(
|
||||
*,
|
||||
|
||||
@@ -103,36 +103,45 @@ class TestArchiver(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
|
||||
|
||||
@pytest.mark.management
|
||||
class TestMakeIndex(TestCase):
|
||||
@mock.patch("documents.management.commands.document_index.index_reindex")
|
||||
def test_reindex(self, m) -> None:
|
||||
@pytest.mark.django_db
|
||||
class TestMakeIndex:
|
||||
def test_reindex(self, mocker: MockerFixture) -> None:
|
||||
mock_get_backend = mocker.patch(
|
||||
"documents.management.commands.document_index.get_backend",
|
||||
)
|
||||
call_command("document_index", "reindex", skip_checks=True)
|
||||
m.assert_called_once()
|
||||
mock_get_backend.return_value.rebuild.assert_called_once()
|
||||
|
||||
@mock.patch("documents.management.commands.document_index.index_optimize")
|
||||
def test_optimize(self, m) -> None:
|
||||
def test_optimize(self) -> None:
|
||||
call_command("document_index", "optimize", skip_checks=True)
|
||||
m.assert_called_once()
|
||||
|
||||
@mock.patch("documents.management.commands.document_index.index_reindex")
|
||||
@mock.patch("documents.search._schema._needs_rebuild", return_value=False)
|
||||
def test_reindex_if_needed_skips_when_up_to_date(
|
||||
self,
|
||||
_needs_rebuild,
|
||||
reindex,
|
||||
mocker: MockerFixture,
|
||||
) -> None:
|
||||
mocker.patch(
|
||||
"documents.management.commands.document_index.needs_rebuild",
|
||||
return_value=False,
|
||||
)
|
||||
mock_get_backend = mocker.patch(
|
||||
"documents.management.commands.document_index.get_backend",
|
||||
)
|
||||
call_command("document_index", "reindex", if_needed=True, skip_checks=True)
|
||||
reindex.assert_not_called()
|
||||
mock_get_backend.return_value.rebuild.assert_not_called()
|
||||
|
||||
@mock.patch("documents.management.commands.document_index.index_reindex")
|
||||
@mock.patch("documents.search._schema._needs_rebuild", return_value=True)
|
||||
def test_reindex_if_needed_runs_when_rebuild_needed(
|
||||
self,
|
||||
_needs_rebuild,
|
||||
reindex,
|
||||
mocker: MockerFixture,
|
||||
) -> None:
|
||||
mocker.patch(
|
||||
"documents.management.commands.document_index.needs_rebuild",
|
||||
return_value=True,
|
||||
)
|
||||
mock_get_backend = mocker.patch(
|
||||
"documents.management.commands.document_index.get_backend",
|
||||
)
|
||||
call_command("document_index", "reindex", if_needed=True, skip_checks=True)
|
||||
reindex.assert_called_once()
|
||||
mock_get_backend.return_value.rebuild.assert_called_once()
|
||||
|
||||
|
||||
@pytest.mark.management
|
||||
|
||||
@@ -23,29 +23,8 @@ from documents.tests.utils import DirectoriesMixin
|
||||
from documents.tests.utils import FileSystemAssertsMixin
|
||||
|
||||
|
||||
class TestIndexReindex(DirectoriesMixin, TestCase):
|
||||
def test_index_reindex(self) -> None:
|
||||
Document.objects.create(
|
||||
title="test",
|
||||
content="my document",
|
||||
checksum="wow",
|
||||
added=timezone.now(),
|
||||
created=timezone.now(),
|
||||
modified=timezone.now(),
|
||||
)
|
||||
|
||||
tasks.index_reindex()
|
||||
|
||||
class TestIndexOptimize(TestCase):
|
||||
def test_index_optimize(self) -> None:
|
||||
Document.objects.create(
|
||||
title="test",
|
||||
content="my document",
|
||||
checksum="wow",
|
||||
added=timezone.now(),
|
||||
created=timezone.now(),
|
||||
modified=timezone.now(),
|
||||
)
|
||||
|
||||
tasks.index_optimize()
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user