diff --git a/src/documents/management/commands/document_llmindex.py b/src/documents/management/commands/document_llmindex.py index 9823b1b87..7b34ca9a8 100644 --- a/src/documents/management/commands/document_llmindex.py +++ b/src/documents/management/commands/document_llmindex.py @@ -2,6 +2,7 @@ from typing import Any from documents.management.commands.base import PaperlessCommand from documents.tasks import llmindex_index +from paperless_ai.indexing import llm_index_compact class Command(PaperlessCommand): @@ -12,9 +13,12 @@ class Command(PaperlessCommand): def add_arguments(self, parser: Any) -> None: super().add_arguments(parser) - parser.add_argument("command", choices=["rebuild", "update"]) + parser.add_argument("command", choices=["rebuild", "update", "compact"]) def handle(self, *args: Any, **options: Any) -> None: + if options["command"] == "compact": + llm_index_compact() + return llmindex_index( rebuild=options["command"] == "rebuild", iter_wrapper=lambda docs: self.track( diff --git a/src/paperless_ai/indexing.py b/src/paperless_ai/indexing.py index caae1cbb9..ede26766b 100644 --- a/src/paperless_ai/indexing.py +++ b/src/paperless_ai/indexing.py @@ -311,6 +311,12 @@ def llm_index_add_or_update_document(document: Document): store.ensure_document_id_scalar_index() +def llm_index_compact() -> None: + """Compact the index immediately, clearing all MVCC version history.""" + with write_store() as store: + store.compact(retention_seconds=0) + + def llm_index_remove_document(document: Document): """Remove a document's chunks from the LLM index.""" with write_store() as store: diff --git a/src/paperless_ai/tests/test_ai_indexing.py b/src/paperless_ai/tests/test_ai_indexing.py index 5333e70d9..4162eb01f 100644 --- a/src/paperless_ai/tests/test_ai_indexing.py +++ b/src/paperless_ai/tests/test_ai_indexing.py @@ -525,6 +525,26 @@ class TestLlmIndexAddOrUpdateDocumentEmptyContent: mock_load.assert_not_called() +@pytest.mark.django_db +def test_llm_index_compact_uses_zero_retention( + temp_llm_index_dir: Path, + mocker: pytest_mock.MockerFixture, +) -> None: + """compact must use retention_seconds=0 to clear all MVCC history immediately.""" + mock_store = mocker.MagicMock() + mocker.patch( + "paperless_ai.indexing.write_store", + return_value=mocker.MagicMock( + __enter__=mocker.MagicMock(return_value=mock_store), + __exit__=mocker.MagicMock(return_value=False), + ), + ) + + indexing.llm_index_compact() + + mock_store.compact.assert_called_once_with(retention_seconds=0) + + @pytest.mark.django_db class TestLlmIndexLocking: """Index mutation functions must go through write_store(), which holds the lock.