Ok, first swap out storage stuff

This commit is contained in:
shamoon
2026-06-02 09:27:49 -07:00
parent ad2140ecd9
commit a2d66a232e
2 changed files with 24 additions and 21 deletions
+19 -18
View File
@@ -80,26 +80,24 @@ def get_or_create_storage_context(*, rebuild=False):
shutil.rmtree(settings.LLM_INDEX_DIR, ignore_errors=True)
settings.LLM_INDEX_DIR.mkdir(parents=True, exist_ok=True)
if rebuild or not settings.LLM_INDEX_DIR.exists():
import faiss
from llama_index.core import StorageContext
from llama_index.core.storage.docstore import SimpleDocumentStore
from llama_index.core.storage.index_store import SimpleIndexStore
from llama_index.vector_stores.faiss import FaissVectorStore
from llama_index.core import StorageContext
from llama_index.core.storage.docstore import SimpleDocumentStore
from llama_index.core.storage.index_store import SimpleIndexStore
from llama_index.vector_stores.lancedb import LanceDBVectorStore
settings.LLM_INDEX_DIR.mkdir(parents=True, exist_ok=True)
embedding_dim = get_embedding_dim()
faiss_index = faiss.IndexFlatL2(embedding_dim)
vector_store = FaissVectorStore(faiss_index=faiss_index)
settings.LLM_INDEX_DIR.mkdir(parents=True, exist_ok=True)
vector_store = LanceDBVectorStore(
uri=str(settings.LLM_INDEX_DIR / "lancedb"),
table_name="vectors",
mode="overwrite" if rebuild else "create",
stores_text=False,
flat_metadata=False,
)
if rebuild or not (settings.LLM_INDEX_DIR / "docstore.json").exists():
docstore = SimpleDocumentStore()
index_store = SimpleIndexStore()
else:
from llama_index.core import StorageContext
from llama_index.core.storage.docstore import SimpleDocumentStore
from llama_index.core.storage.index_store import SimpleIndexStore
from llama_index.vector_stores.faiss import FaissVectorStore
vector_store = FaissVectorStore.from_persist_dir(settings.LLM_INDEX_DIR)
docstore = SimpleDocumentStore.from_persist_dir(settings.LLM_INDEX_DIR)
index_store = SimpleIndexStore.from_persist_dir(settings.LLM_INDEX_DIR)
@@ -211,9 +209,11 @@ def remove_document_docstore_nodes(document: Document, index: "VectorStoreIndex"
def vector_store_file_exists():
"""
Check if the vector store file exists in the LLM index directory.
Check if the LanceDB vector store exists in the LLM index directory.
"""
return Path(settings.LLM_INDEX_DIR / "default__vector_store.json").exists()
return Path(
settings.LLM_INDEX_DIR / "lancedb" / "vectors.lance",
).exists()
def get_rag_chunk_size() -> int:
@@ -281,6 +281,7 @@ def update_llm_index(
embed_model = get_embedding_model()
llama_settings.Settings.embed_model = embed_model
storage_context = get_or_create_storage_context(rebuild=True)
get_embedding_dim()
for document in iter_wrapper(documents):
document_nodes = build_document_node(document, chunk_size=chunk_size)
nodes.extend(document_nodes)
+5 -3
View File
@@ -222,12 +222,14 @@ def test_update_llm_index_partial_update(
assert any(temp_llm_index_dir.glob("*.json"))
def test_get_or_create_storage_context_raises_exception(
def test_get_or_create_storage_context_creates_empty_context(
temp_llm_index_dir,
mock_embed_model,
) -> None:
with pytest.raises(Exception):
indexing.get_or_create_storage_context(rebuild=False)
storage_context = indexing.get_or_create_storage_context(rebuild=False)
assert storage_context.vector_store is not None
assert not indexing.vector_store_file_exists()
@override_settings(