From fa74bb77b3db09a42312a6993e06c8cf48a74a44 Mon Sep 17 00:00:00 2001
From: stumpylog <797416+stumpylog@users.noreply.github.com>
Date: Tue, 2 Jun 2026 14:08:27 -0700
Subject: [PATCH] fix(ai): upsert empty-nodes path deletes by document_id

When upsert_document receives an empty nodes list, delete existing
chunks using the document_id column directly (consistent with the
merge_insert prune predicate) rather than calling delete() which
filters on doc_id. Guard for a missing table to avoid a no-op.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 src/paperless_ai/tests/test_vector_store.py | 12 +++++++++++-
 src/paperless_ai/vector_store.py            |  5 +++--
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/src/paperless_ai/tests/test_vector_store.py b/src/paperless_ai/tests/test_vector_store.py
index 2633e513f..dacc5e327 100644
--- a/src/paperless_ai/tests/test_vector_store.py
+++ b/src/paperless_ai/tests/test_vector_store.py
@@ -145,7 +145,6 @@ class TestPaperlessLanceVectorStoreCrud:
         assert store.table_exists() is False
 
     def test_build_where_or_condition(self) -> None:
-
         from llama_index.core.vector_stores.types import FilterCondition
 
         from paperless_ai.vector_store import _build_where
@@ -208,3 +207,14 @@ class TestPaperlessLanceVectorStoreUpsert:
         before = table.version
         store.upsert_document("1", [_node("1-0", "1", "new0", 0.1)])
         assert store.client.open_table("documents").version == before + 1
+
+    def test_upsert_empty_nodes_removes_document(
+        self,
+        store: PaperlessLanceVectorStore,
+    ) -> None:
+        store.upsert_document("1", [])
+
+        table = store.client.open_table("documents")
+        remaining = sorted(r["document_id"] for r in table.search().to_list())
+        assert "1" not in remaining
+        assert "2" in remaining
diff --git a/src/paperless_ai/vector_store.py b/src/paperless_ai/vector_store.py
index 67b242176..59d28706d 100644
--- a/src/paperless_ai/vector_store.py
+++ b/src/paperless_ai/vector_store.py
@@ -139,8 +139,9 @@ class PaperlessLanceVectorStore(BasePydanticVectorStore):
         transient empty state for concurrent lock-free readers.
         """
         if not nodes:
-            # No indexable content: treat as a removal.
-            self.delete(document_id)
+            # No indexable content: remove any existing chunks for this document.
+            if self._table is not None:
+                self._table.delete(f"document_id = '{_escape(document_id)}'")
             return []
         rows = [self._row(node) for node in nodes]
         if self._table is None: