Adds a new compact sub-command + handler to force compact lancedb version

This commit is contained in:
stumpylog
2026-06-05 10:36:49 -07:00
parent 3aa83c9e4c
commit ca6dca0efe
3 changed files with 31 additions and 1 deletions
@@ -2,6 +2,7 @@ from typing import Any
from documents.management.commands.base import PaperlessCommand
from documents.tasks import llmindex_index
from paperless_ai.indexing import llm_index_compact
class Command(PaperlessCommand):
@@ -12,9 +13,12 @@ class Command(PaperlessCommand):
def add_arguments(self, parser: Any) -> None:
super().add_arguments(parser)
parser.add_argument("command", choices=["rebuild", "update"])
parser.add_argument("command", choices=["rebuild", "update", "compact"])
def handle(self, *args: Any, **options: Any) -> None:
if options["command"] == "compact":
llm_index_compact()
return
llmindex_index(
rebuild=options["command"] == "rebuild",
iter_wrapper=lambda docs: self.track(
+6
View File
@@ -311,6 +311,12 @@ def llm_index_add_or_update_document(document: Document):
store.ensure_document_id_scalar_index()
def llm_index_compact() -> None:
"""Compact the index immediately, clearing all MVCC version history."""
with write_store() as store:
store.compact(retention_seconds=0)
def llm_index_remove_document(document: Document):
"""Remove a document's chunks from the LLM index."""
with write_store() as store:
@@ -525,6 +525,26 @@ class TestLlmIndexAddOrUpdateDocumentEmptyContent:
mock_load.assert_not_called()
@pytest.mark.django_db
def test_llm_index_compact_uses_zero_retention(
temp_llm_index_dir: Path,
mocker: pytest_mock.MockerFixture,
) -> None:
"""compact must use retention_seconds=0 to clear all MVCC history immediately."""
mock_store = mocker.MagicMock()
mocker.patch(
"paperless_ai.indexing.write_store",
return_value=mocker.MagicMock(
__enter__=mocker.MagicMock(return_value=mock_store),
__exit__=mocker.MagicMock(return_value=False),
),
)
indexing.llm_index_compact()
mock_store.compact.assert_called_once_with(retention_seconds=0)
@pytest.mark.django_db
class TestLlmIndexLocking:
"""Index mutation functions must go through write_store(), which holds the lock.