From 9b23eddeac775bc7fe9ace4059a5a21a42d7c6e5 Mon Sep 17 00:00:00 2001 From: stumpylog <797416+stumpylog@users.noreply.github.com> Date: Fri, 5 Jun 2026 13:38:10 -0700 Subject: [PATCH] Promote modified to a column so we don't need JSON parsing at all --- src/paperless_ai/vector_store.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/paperless_ai/vector_store.py b/src/paperless_ai/vector_store.py index 6b225bad8..cd9bf154d 100644 --- a/src/paperless_ai/vector_store.py +++ b/src/paperless_ai/vector_store.py @@ -108,6 +108,7 @@ class PaperlessLanceVectorStore(BasePydanticVectorStore): pa.field("id", pa.string()), pa.field("doc_id", pa.string()), pa.field("document_id", pa.string()), + pa.field("modified", pa.string()), pa.field("vector", pa.list_(pa.float32(), dim)), pa.field("node_content", pa.string()), ], @@ -123,6 +124,7 @@ class PaperlessLanceVectorStore(BasePydanticVectorStore): "id": node.node_id, "doc_id": node.ref_doc_id, "document_id": str(node.metadata.get("document_id")), + "modified": str(node.metadata.get("modified", "")), "vector": node.get_embedding(), "node_content": json.dumps(meta), } @@ -284,13 +286,10 @@ class PaperlessLanceVectorStore(BasePydanticVectorStore): if self._table is None: return {} result: dict[str, str] = {} - for row in ( - self._table.search().select(["document_id", "node_content"]).to_list() - ): + for row in self._table.search().select(["document_id", "modified"]).to_list(): doc_id = str(row["document_id"]) if doc_id not in result: - meta = json.loads(row["node_content"]) - result[doc_id] = meta.get("modified", "") + result[doc_id] = str(row["modified"] or "") return result def ensure_document_id_scalar_index(self) -> None: