mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-06-06 13:49:44 +00:00
feat(ai): dimension guard and FAISS index migration
Drops migrate_stale_faiss_index (users delete llm_index/ manually on upgrade). Keeps embedding_dim_mismatch to force a rebuild when the model changes. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,6 +1,5 @@
|
||||
import json
|
||||
import logging
|
||||
import shutil
|
||||
from collections.abc import Iterable
|
||||
from datetime import timedelta
|
||||
from pathlib import Path
|
||||
@@ -156,15 +155,6 @@ def vector_store_file_exists() -> bool:
|
||||
return get_vector_store().table_exists()
|
||||
|
||||
|
||||
def migrate_stale_faiss_index() -> None:
|
||||
"""Remove a pre-LanceDB FAISS index directory so it is rebuilt fresh."""
|
||||
stale_marker = settings.LLM_INDEX_DIR / "default__vector_store.json"
|
||||
if stale_marker.exists():
|
||||
logger.info("Removing stale FAISS LLM index; it will be rebuilt.")
|
||||
shutil.rmtree(settings.LLM_INDEX_DIR, ignore_errors=True)
|
||||
settings.LLM_INDEX_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
|
||||
def embedding_dim_mismatch() -> bool:
|
||||
"""True when the stored table's vector dim differs from the current model."""
|
||||
store = get_vector_store()
|
||||
@@ -232,7 +222,6 @@ def update_llm_index(
|
||||
"""Rebuild or incrementally update the LLM index."""
|
||||
from llama_index.core.schema import MetadataMode
|
||||
|
||||
migrate_stale_faiss_index()
|
||||
if not rebuild and vector_store_file_exists() and embedding_dim_mismatch():
|
||||
logger.warning("Embedding dimension changed; forcing LLM index rebuild.")
|
||||
rebuild = True
|
||||
|
||||
@@ -911,15 +911,14 @@ class TestLlmIndexLocking:
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
class TestFaissMigration:
|
||||
def test_migration_wipes_stale_faiss_files(
|
||||
class TestDimensionGuard:
|
||||
def test_embedding_dim_mismatch_false_when_no_table(
|
||||
self,
|
||||
temp_llm_index_dir: Path,
|
||||
mock_embed_model,
|
||||
) -> None:
|
||||
stale = temp_llm_index_dir / "default__vector_store.json"
|
||||
stale.write_text("{}")
|
||||
indexing.migrate_stale_faiss_index()
|
||||
assert not stale.exists()
|
||||
"""No table yet — dim mismatch must return False (nothing to compare)."""
|
||||
assert not indexing.embedding_dim_mismatch()
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
|
||||
Reference in New Issue
Block a user