diff --git a/src/paperless_ai/indexing.py b/src/paperless_ai/indexing.py index dd33dfce9..3e88a93f6 100644 --- a/src/paperless_ai/indexing.py +++ b/src/paperless_ai/indexing.py @@ -31,7 +31,7 @@ RAG_CHUNK_OVERLAP = 200 def _index_lock_path() -> Path: - """Return the path used as the file lock for FAISS index mutations. + """Return the path used as the file lock for LLM index mutations. The lock file lives in DATA_DIR/locks/ (not inside LLM_INDEX_DIR) so that a rebuild — which calls shutil.rmtree(LLM_INDEX_DIR) — cannot delete the lock @@ -184,8 +184,7 @@ def load_or_build_index(nodes=None): def remove_document_docstore_nodes(document: Document, index: "VectorStoreIndex"): """ - Removes existing documents from docstore for a given document from the index. - This is necessary because FAISS IndexFlatL2 is append-only. + Removes existing nodes for a given document from the vector index and docstore. """ all_node_ids = list(index.docstore.docs.keys()) existing_nodes = [ @@ -193,16 +192,10 @@ def remove_document_docstore_nodes(document: Document, index: "VectorStoreIndex" for node in index.docstore.get_nodes(all_node_ids) if node.metadata.get("document_id") == str(document.id) ] + if existing_nodes: + index.vector_store.delete_nodes(existing_nodes) for node_id in existing_nodes: - # Delete from docstore, FAISS IndexFlatL2 are append-only index.docstore.delete_document(node_id) - # Also purge the FAISS position -> UUID mapping so subsequent similarity - # queries don't raise KeyError on ghost vector positions. - stale_keys = [ - k for k, v in index.index_struct.nodes_dict.items() if v == node_id - ] - for key in stale_keys: - del index.index_struct.nodes_dict[key] # Re-sync the mutated index_struct so persist() writes the updated nodes_dict. index.storage_context.index_store.add_index_struct(index.index_struct) @@ -457,12 +450,18 @@ def query_similar_documents( ) try: results = retriever.retrieve(query_text) + except Warning as e: + logger.debug( + "Skipping LLM similarity query for document %s because the " + "vector store returned no results: %s", + document.pk, + e, + ) + return [] except KeyError as e: - # Ghost FAISS positions remain after deletion because IndexFlatL2 is - # append-only. Treat them as absent and return no results. logger.debug( "Skipping LLM similarity query for document %s due to a stale " - "FAISS position with no docstore node: %s", + "vector mapping with no docstore node: %s", document.pk, e, ) diff --git a/src/paperless_ai/tests/test_ai_indexing.py b/src/paperless_ai/tests/test_ai_indexing.py index 9855089c3..46e5c017c 100644 --- a/src/paperless_ai/tests/test_ai_indexing.py +++ b/src/paperless_ai/tests/test_ai_indexing.py @@ -330,6 +330,7 @@ def test_remove_document_deletes_node_from_docstore( indexing.llm_index_remove_document(real_document) index = indexing.load_or_build_index() assert len(index.docstore.docs) == 0 + assert index.vector_store.table.count_rows() == 0 @pytest.mark.django_db