Proper node deletion

This commit is contained in:
shamoon
2026-06-02 09:32:51 -07:00
parent a2d66a232e
commit c5bfe008d7
2 changed files with 14 additions and 14 deletions
+13 -14
View File
@@ -31,7 +31,7 @@ RAG_CHUNK_OVERLAP = 200
def _index_lock_path() -> Path:
"""Return the path used as the file lock for FAISS index mutations.
"""Return the path used as the file lock for LLM index mutations.
The lock file lives in DATA_DIR/locks/ (not inside LLM_INDEX_DIR) so that a
rebuild — which calls shutil.rmtree(LLM_INDEX_DIR) — cannot delete the lock
@@ -184,8 +184,7 @@ def load_or_build_index(nodes=None):
def remove_document_docstore_nodes(document: Document, index: "VectorStoreIndex"):
"""
Removes existing documents from docstore for a given document from the index.
This is necessary because FAISS IndexFlatL2 is append-only.
Removes existing nodes for a given document from the vector index and docstore.
"""
all_node_ids = list(index.docstore.docs.keys())
existing_nodes = [
@@ -193,16 +192,10 @@ def remove_document_docstore_nodes(document: Document, index: "VectorStoreIndex"
for node in index.docstore.get_nodes(all_node_ids)
if node.metadata.get("document_id") == str(document.id)
]
if existing_nodes:
index.vector_store.delete_nodes(existing_nodes)
for node_id in existing_nodes:
# Delete from docstore, FAISS IndexFlatL2 are append-only
index.docstore.delete_document(node_id)
# Also purge the FAISS position -> UUID mapping so subsequent similarity
# queries don't raise KeyError on ghost vector positions.
stale_keys = [
k for k, v in index.index_struct.nodes_dict.items() if v == node_id
]
for key in stale_keys:
del index.index_struct.nodes_dict[key]
# Re-sync the mutated index_struct so persist() writes the updated nodes_dict.
index.storage_context.index_store.add_index_struct(index.index_struct)
@@ -457,12 +450,18 @@ def query_similar_documents(
)
try:
results = retriever.retrieve(query_text)
except Warning as e:
logger.debug(
"Skipping LLM similarity query for document %s because the "
"vector store returned no results: %s",
document.pk,
e,
)
return []
except KeyError as e:
# Ghost FAISS positions remain after deletion because IndexFlatL2 is
# append-only. Treat them as absent and return no results.
logger.debug(
"Skipping LLM similarity query for document %s due to a stale "
"FAISS position with no docstore node: %s",
"vector mapping with no docstore node: %s",
document.pk,
e,
)
@@ -330,6 +330,7 @@ def test_remove_document_deletes_node_from_docstore(
indexing.llm_index_remove_document(real_document)
index = indexing.load_or_build_index()
assert len(index.docstore.docs) == 0
assert index.vector_store.table.count_rows() == 0
@pytest.mark.django_db