mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-06-06 13:49:44 +00:00
Proper node deletion
This commit is contained in:
@@ -31,7 +31,7 @@ RAG_CHUNK_OVERLAP = 200
|
||||
|
||||
|
||||
def _index_lock_path() -> Path:
|
||||
"""Return the path used as the file lock for FAISS index mutations.
|
||||
"""Return the path used as the file lock for LLM index mutations.
|
||||
|
||||
The lock file lives in DATA_DIR/locks/ (not inside LLM_INDEX_DIR) so that a
|
||||
rebuild — which calls shutil.rmtree(LLM_INDEX_DIR) — cannot delete the lock
|
||||
@@ -184,8 +184,7 @@ def load_or_build_index(nodes=None):
|
||||
|
||||
def remove_document_docstore_nodes(document: Document, index: "VectorStoreIndex"):
|
||||
"""
|
||||
Removes existing documents from docstore for a given document from the index.
|
||||
This is necessary because FAISS IndexFlatL2 is append-only.
|
||||
Removes existing nodes for a given document from the vector index and docstore.
|
||||
"""
|
||||
all_node_ids = list(index.docstore.docs.keys())
|
||||
existing_nodes = [
|
||||
@@ -193,16 +192,10 @@ def remove_document_docstore_nodes(document: Document, index: "VectorStoreIndex"
|
||||
for node in index.docstore.get_nodes(all_node_ids)
|
||||
if node.metadata.get("document_id") == str(document.id)
|
||||
]
|
||||
if existing_nodes:
|
||||
index.vector_store.delete_nodes(existing_nodes)
|
||||
for node_id in existing_nodes:
|
||||
# Delete from docstore, FAISS IndexFlatL2 are append-only
|
||||
index.docstore.delete_document(node_id)
|
||||
# Also purge the FAISS position -> UUID mapping so subsequent similarity
|
||||
# queries don't raise KeyError on ghost vector positions.
|
||||
stale_keys = [
|
||||
k for k, v in index.index_struct.nodes_dict.items() if v == node_id
|
||||
]
|
||||
for key in stale_keys:
|
||||
del index.index_struct.nodes_dict[key]
|
||||
# Re-sync the mutated index_struct so persist() writes the updated nodes_dict.
|
||||
index.storage_context.index_store.add_index_struct(index.index_struct)
|
||||
|
||||
@@ -457,12 +450,18 @@ def query_similar_documents(
|
||||
)
|
||||
try:
|
||||
results = retriever.retrieve(query_text)
|
||||
except Warning as e:
|
||||
logger.debug(
|
||||
"Skipping LLM similarity query for document %s because the "
|
||||
"vector store returned no results: %s",
|
||||
document.pk,
|
||||
e,
|
||||
)
|
||||
return []
|
||||
except KeyError as e:
|
||||
# Ghost FAISS positions remain after deletion because IndexFlatL2 is
|
||||
# append-only. Treat them as absent and return no results.
|
||||
logger.debug(
|
||||
"Skipping LLM similarity query for document %s due to a stale "
|
||||
"FAISS position with no docstore node: %s",
|
||||
"vector mapping with no docstore node: %s",
|
||||
document.pk,
|
||||
e,
|
||||
)
|
||||
|
||||
@@ -330,6 +330,7 @@ def test_remove_document_deletes_node_from_docstore(
|
||||
indexing.llm_index_remove_document(real_document)
|
||||
index = indexing.load_or_build_index()
|
||||
assert len(index.docstore.docs) == 0
|
||||
assert index.vector_store.table.count_rows() == 0
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
|
||||
Reference in New Issue
Block a user