mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-06-06 13:49:44 +00:00
Ok, first swap out storage stuff
This commit is contained in:
@@ -80,26 +80,24 @@ def get_or_create_storage_context(*, rebuild=False):
|
||||
shutil.rmtree(settings.LLM_INDEX_DIR, ignore_errors=True)
|
||||
settings.LLM_INDEX_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if rebuild or not settings.LLM_INDEX_DIR.exists():
|
||||
import faiss
|
||||
from llama_index.core import StorageContext
|
||||
from llama_index.core.storage.docstore import SimpleDocumentStore
|
||||
from llama_index.core.storage.index_store import SimpleIndexStore
|
||||
from llama_index.vector_stores.faiss import FaissVectorStore
|
||||
from llama_index.core import StorageContext
|
||||
from llama_index.core.storage.docstore import SimpleDocumentStore
|
||||
from llama_index.core.storage.index_store import SimpleIndexStore
|
||||
from llama_index.vector_stores.lancedb import LanceDBVectorStore
|
||||
|
||||
settings.LLM_INDEX_DIR.mkdir(parents=True, exist_ok=True)
|
||||
embedding_dim = get_embedding_dim()
|
||||
faiss_index = faiss.IndexFlatL2(embedding_dim)
|
||||
vector_store = FaissVectorStore(faiss_index=faiss_index)
|
||||
settings.LLM_INDEX_DIR.mkdir(parents=True, exist_ok=True)
|
||||
vector_store = LanceDBVectorStore(
|
||||
uri=str(settings.LLM_INDEX_DIR / "lancedb"),
|
||||
table_name="vectors",
|
||||
mode="overwrite" if rebuild else "create",
|
||||
stores_text=False,
|
||||
flat_metadata=False,
|
||||
)
|
||||
|
||||
if rebuild or not (settings.LLM_INDEX_DIR / "docstore.json").exists():
|
||||
docstore = SimpleDocumentStore()
|
||||
index_store = SimpleIndexStore()
|
||||
else:
|
||||
from llama_index.core import StorageContext
|
||||
from llama_index.core.storage.docstore import SimpleDocumentStore
|
||||
from llama_index.core.storage.index_store import SimpleIndexStore
|
||||
from llama_index.vector_stores.faiss import FaissVectorStore
|
||||
|
||||
vector_store = FaissVectorStore.from_persist_dir(settings.LLM_INDEX_DIR)
|
||||
docstore = SimpleDocumentStore.from_persist_dir(settings.LLM_INDEX_DIR)
|
||||
index_store = SimpleIndexStore.from_persist_dir(settings.LLM_INDEX_DIR)
|
||||
|
||||
@@ -211,9 +209,11 @@ def remove_document_docstore_nodes(document: Document, index: "VectorStoreIndex"
|
||||
|
||||
def vector_store_file_exists():
|
||||
"""
|
||||
Check if the vector store file exists in the LLM index directory.
|
||||
Check if the LanceDB vector store exists in the LLM index directory.
|
||||
"""
|
||||
return Path(settings.LLM_INDEX_DIR / "default__vector_store.json").exists()
|
||||
return Path(
|
||||
settings.LLM_INDEX_DIR / "lancedb" / "vectors.lance",
|
||||
).exists()
|
||||
|
||||
|
||||
def get_rag_chunk_size() -> int:
|
||||
@@ -281,6 +281,7 @@ def update_llm_index(
|
||||
embed_model = get_embedding_model()
|
||||
llama_settings.Settings.embed_model = embed_model
|
||||
storage_context = get_or_create_storage_context(rebuild=True)
|
||||
get_embedding_dim()
|
||||
for document in iter_wrapper(documents):
|
||||
document_nodes = build_document_node(document, chunk_size=chunk_size)
|
||||
nodes.extend(document_nodes)
|
||||
|
||||
@@ -222,12 +222,14 @@ def test_update_llm_index_partial_update(
|
||||
assert any(temp_llm_index_dir.glob("*.json"))
|
||||
|
||||
|
||||
def test_get_or_create_storage_context_raises_exception(
|
||||
def test_get_or_create_storage_context_creates_empty_context(
|
||||
temp_llm_index_dir,
|
||||
mock_embed_model,
|
||||
) -> None:
|
||||
with pytest.raises(Exception):
|
||||
indexing.get_or_create_storage_context(rebuild=False)
|
||||
storage_context = indexing.get_or_create_storage_context(rebuild=False)
|
||||
|
||||
assert storage_context.vector_store is not None
|
||||
assert not indexing.vector_store_file_exists()
|
||||
|
||||
|
||||
@override_settings(
|
||||
|
||||
Reference in New Issue
Block a user