Don't always re-create the document_id index, do it only if not already existing

This commit is contained in:
stumpylog
2026-06-05 12:58:22 -07:00
parent a5f7a5561d
commit b54b8a23ce
2 changed files with 24 additions and 2 deletions
@@ -302,3 +302,19 @@ class TestPaperlessLanceVectorStoreMaintenance:
)
assert doc1 == ["1-0"]
assert table.count_rows() == 2
def test_ensure_scalar_index_is_idempotent(
self,
store: PaperlessLanceVectorStore,
) -> None:
store.add([_node("1-0", "1", "text", 0.5)])
store.ensure_document_id_scalar_index()
# Second call must not raise and must not replace the existing index.
store.ensure_document_id_scalar_index()
assert store._has_scalar_index()
def test_ensure_scalar_index_noop_on_empty_store(
self,
store: PaperlessLanceVectorStore,
) -> None:
store.ensure_document_id_scalar_index() # no table yet — must not raise
+8 -2
View File
@@ -239,6 +239,9 @@ class PaperlessLanceVectorStore(BasePydanticVectorStore):
def _has_vector_index(self) -> bool:
return any("vector" in idx.columns for idx in self._table.list_indices())
def _has_scalar_index(self) -> bool:
return any("document_id" in idx.columns for idx in self._table.list_indices())
def maybe_create_ann_index(self, min_rows: int = ANN_INDEX_MIN_ROWS) -> None:
"""Best-effort: build an IVF index once the table is large enough.
@@ -273,11 +276,14 @@ class PaperlessLanceVectorStore(BasePydanticVectorStore):
def ensure_document_id_scalar_index(self) -> None:
"""Create a scalar index on the filter column (never on the merge key
``id`` — see LanceDB #3177)."""
``id`` — see https://github.com/lancedb/lancedb/issues/3177).
No-op if the index already exists."""
if self._table is None:
return
if self._has_scalar_index():
return
try:
self._table.create_scalar_index("document_id", replace=True)
self._table.create_scalar_index("document_id")
except Exception as e: # pragma: no cover
logger.warning("Skipping document_id scalar index: %s", e)