mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-06-06 05:39:45 +00:00
fix(ai): upsert empty-nodes path deletes by document_id
When upsert_document receives an empty nodes list, delete existing chunks using the document_id column directly (consistent with the merge_insert prune predicate) rather than calling delete() which filters on doc_id. Guard for a missing table to avoid a no-op. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -145,7 +145,6 @@ class TestPaperlessLanceVectorStoreCrud:
|
||||
assert store.table_exists() is False
|
||||
|
||||
def test_build_where_or_condition(self) -> None:
|
||||
|
||||
from llama_index.core.vector_stores.types import FilterCondition
|
||||
|
||||
from paperless_ai.vector_store import _build_where
|
||||
@@ -208,3 +207,14 @@ class TestPaperlessLanceVectorStoreUpsert:
|
||||
before = table.version
|
||||
store.upsert_document("1", [_node("1-0", "1", "new0", 0.1)])
|
||||
assert store.client.open_table("documents").version == before + 1
|
||||
|
||||
def test_upsert_empty_nodes_removes_document(
|
||||
self,
|
||||
store: PaperlessLanceVectorStore,
|
||||
) -> None:
|
||||
store.upsert_document("1", [])
|
||||
|
||||
table = store.client.open_table("documents")
|
||||
remaining = sorted(r["document_id"] for r in table.search().to_list())
|
||||
assert "1" not in remaining
|
||||
assert "2" in remaining
|
||||
|
||||
@@ -139,8 +139,9 @@ class PaperlessLanceVectorStore(BasePydanticVectorStore):
|
||||
transient empty state for concurrent lock-free readers.
|
||||
"""
|
||||
if not nodes:
|
||||
# No indexable content: treat as a removal.
|
||||
self.delete(document_id)
|
||||
# No indexable content: remove any existing chunks for this document.
|
||||
if self._table is not None:
|
||||
self._table.delete(f"document_id = '{_escape(document_id)}'")
|
||||
return []
|
||||
rows = [self._row(node) for node in nodes]
|
||||
if self._table is None:
|
||||
|
||||
Reference in New Issue
Block a user