mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-06-06 13:49:44 +00:00
788ae5d4e5
Delete _get_document_filtered_retriever (74-line custom FAISS retriever with expanding top_k loop) and rewrite _stream_chat_with_documents to use a stock VectorIndexRetriever with MetadataFilters(IN). The no-content pre-check now calls index.vector_store.get_nodes(filters=...) which returns [] cleanly for un-indexed documents. Move FakeEmbedding and mock_embed_model fixture to conftest.py so both test_chat.py and test_ai_indexing.py share them. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
41 lines
1.1 KiB
Python
41 lines
1.1 KiB
Python
from pathlib import Path
|
|
from unittest.mock import patch
|
|
|
|
import pytest
|
|
from llama_index.core.base.embeddings.base import BaseEmbedding
|
|
from pytest_django.fixtures import SettingsWrapper
|
|
|
|
|
|
@pytest.fixture
|
|
def temp_llm_index_dir(tmp_path: Path, settings: SettingsWrapper) -> Path:
|
|
settings.LLM_INDEX_DIR = tmp_path
|
|
return tmp_path
|
|
|
|
|
|
class FakeEmbedding(BaseEmbedding):
|
|
def _aget_query_embedding(self, query: str) -> list[float]:
|
|
return [0.1] * self.get_query_embedding_dim()
|
|
|
|
def _get_query_embedding(self, query: str) -> list[float]:
|
|
return [0.1] * self.get_query_embedding_dim()
|
|
|
|
def _get_text_embedding(self, text: str) -> list[float]:
|
|
return [0.1] * self.get_query_embedding_dim()
|
|
|
|
def get_query_embedding_dim(self) -> int:
|
|
return 384
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_embed_model():
|
|
fake = FakeEmbedding()
|
|
with (
|
|
patch("paperless_ai.indexing.get_embedding_model") as mock_index,
|
|
patch(
|
|
"paperless_ai.embedding.get_embedding_model",
|
|
) as mock_embedding,
|
|
):
|
|
mock_index.return_value = fake
|
|
mock_embedding.return_value = fake
|
|
yield mock_index
|