Enhancement: support ollama embeddings (#12753)

This commit is contained in:
shamoon
2026-05-08 17:06:14 -07:00
committed by GitHub
parent 177d81c8d4
commit 79d0a04df0
12 changed files with 211 additions and 10 deletions
+11 -4
View File
@@ -2014,8 +2014,8 @@ suggestions. This setting is required to be set to true in order to use the AI f
#### [`PAPERLESS_AI_LLM_EMBEDDING_BACKEND=<str>`](#PAPERLESS_AI_LLM_EMBEDDING_BACKEND) {#PAPERLESS_AI_LLM_EMBEDDING_BACKEND}
: The embedding backend to use for RAG. This can be either "openai-like" or "huggingface". The
"openai-like" backend uses an OpenAI-compatible embeddings API.
: The embedding backend to use for RAG. This can be "openai-like", "huggingface", or
"ollama". The "openai-like" backend uses an OpenAI-compatible embeddings API.
Defaults to None.
@@ -2023,8 +2023,15 @@ suggestions. This setting is required to be set to true in order to use the AI f
: The model to use for the embedding backend for RAG. This can be set to any of the embedding
models supported by the current embedding backend. If not supplied, defaults to
"text-embedding-3-small" for the OpenAI-compatible backend and
"sentence-transformers/all-MiniLM-L6-v2" for Huggingface.
"text-embedding-3-small" for the OpenAI-compatible backend,
"sentence-transformers/all-MiniLM-L6-v2" for Huggingface, and "embeddinggemma" for Ollama.
Defaults to None.
#### [`PAPERLESS_AI_LLM_EMBEDDING_ENDPOINT=<str>`](#PAPERLESS_AI_LLM_EMBEDDING_ENDPOINT) {#PAPERLESS_AI_LLM_EMBEDDING_ENDPOINT}
: The endpoint / url to use for the embedding backend. If not supplied, embeddings use
`PAPERLESS_AI_LLM_ENDPOINT`.
Defaults to None.
+1
View File
@@ -53,6 +53,7 @@ dependencies = [
"langdetect~=1.0.9",
"llama-index-core>=0.14.21",
"llama-index-embeddings-huggingface>=0.6.1",
"llama-index-embeddings-ollama>=0.9",
"llama-index-embeddings-openai-like>=0.2.2",
"llama-index-llms-ollama>=0.9.1",
"llama-index-llms-openai-like>=0.7.1",
+9
View File
@@ -57,6 +57,7 @@ export const ConfigCategory = {
export const LLMEmbeddingBackendConfig = {
OPENAI_LIKE: 'openai-like',
HUGGINGFACE: 'huggingface',
OLLAMA: 'ollama',
}
export const LLMBackendConfig = {
@@ -301,6 +302,13 @@ export const PaperlessConfigOptions: ConfigOption[] = [
config_key: 'PAPERLESS_AI_LLM_EMBEDDING_MODEL',
category: ConfigCategory.AI,
},
{
key: 'llm_embedding_endpoint',
title: $localize`LLM Embedding Endpoint`,
type: ConfigOptionType.String,
config_key: 'PAPERLESS_AI_LLM_EMBEDDING_ENDPOINT',
category: ConfigCategory.AI,
},
{
key: 'llm_backend',
title: $localize`LLM Backend`,
@@ -363,6 +371,7 @@ export interface PaperlessConfig extends ObjectWithId {
ai_enabled: boolean
llm_embedding_backend: string
llm_embedding_model: string
llm_embedding_endpoint: string
llm_backend: string
llm_model: string
llm_api_key: string
@@ -74,6 +74,7 @@ class TestApiAppConfig(DirectoriesMixin, APITestCase):
"ai_enabled": False,
"llm_embedding_backend": None,
"llm_embedding_model": None,
"llm_embedding_endpoint": None,
"llm_backend": None,
"llm_model": None,
"llm_api_key": None,
@@ -868,3 +869,19 @@ class TestApiAppConfig(DirectoriesMixin, APITestCase):
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertIn("non-public address", str(response.data).lower())
@override_settings(LLM_ALLOW_INTERNAL_ENDPOINTS=False)
def test_update_llm_embedding_endpoint_blocks_internal_endpoint_when_disallowed(
self,
) -> None:
response = self.client.patch(
f"{self.ENDPOINT}1/",
json.dumps(
{
"llm_embedding_endpoint": "http://127.0.0.1:11434",
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertIn("non-public address", str(response.data).lower())
+4
View File
@@ -194,6 +194,7 @@ class AIConfig(BaseConfig):
ai_enabled: bool = dataclasses.field(init=False)
llm_embedding_backend: str = dataclasses.field(init=False)
llm_embedding_model: str = dataclasses.field(init=False)
llm_embedding_endpoint: str = dataclasses.field(init=False)
llm_backend: str = dataclasses.field(init=False)
llm_model: str = dataclasses.field(init=False)
llm_api_key: str = dataclasses.field(init=False)
@@ -210,6 +211,9 @@ class AIConfig(BaseConfig):
self.llm_embedding_model = (
app_config.llm_embedding_model or settings.LLM_EMBEDDING_MODEL
)
self.llm_embedding_endpoint = (
app_config.llm_embedding_endpoint or settings.LLM_EMBEDDING_ENDPOINT
)
self.llm_backend = app_config.llm_backend or settings.LLM_BACKEND
self.llm_model = app_config.llm_model or settings.LLM_MODEL
self.llm_api_key = app_config.llm_api_key or settings.LLM_API_KEY
@@ -0,0 +1,38 @@
# Generated by Django 5.2.6 on 2026-05-08 00:00
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("paperless", "0009_alter_applicationconfiguration_options"),
]
operations = [
migrations.AlterField(
model_name="applicationconfiguration",
name="llm_embedding_backend",
field=models.CharField(
blank=True,
choices=[
("openai-like", "OpenAI-compatible"),
("huggingface", "Huggingface"),
("ollama", "Ollama"),
],
max_length=128,
null=True,
verbose_name="Sets the LLM embedding backend",
),
),
migrations.AddField(
model_name="applicationconfiguration",
name="llm_embedding_endpoint",
field=models.CharField(
blank=True,
max_length=256,
null=True,
verbose_name="Sets the LLM embedding endpoint, optional",
),
),
]
+8
View File
@@ -77,6 +77,7 @@ class ColorConvertChoices(models.TextChoices):
class LLMEmbeddingBackend(models.TextChoices):
OPENAI_LIKE = ("openai-like", _("OpenAI-compatible"))
HUGGINGFACE = ("huggingface", _("Huggingface"))
OLLAMA = ("ollama", _("Ollama"))
class LLMBackend(models.TextChoices):
@@ -310,6 +311,13 @@ class ApplicationConfiguration(AbstractSingletonModel):
max_length=128,
)
llm_embedding_endpoint = models.CharField(
verbose_name=_("Sets the LLM embedding endpoint, optional"),
blank=True,
null=True,
max_length=256,
)
llm_backend = models.CharField(
verbose_name=_("Sets the LLM backend"),
blank=True,
+2
View File
@@ -291,6 +291,8 @@ class ApplicationConfigurationSerializer(
return value
validate_llm_embedding_endpoint = validate_llm_endpoint
class Meta:
model = ApplicationConfiguration
fields = "__all__"
+2 -1
View File
@@ -1178,8 +1178,9 @@ REMOTE_OCR_ENDPOINT = os.getenv("PAPERLESS_REMOTE_OCR_ENDPOINT")
AI_ENABLED = get_bool_from_env("PAPERLESS_AI_ENABLED", "NO")
LLM_EMBEDDING_BACKEND = os.getenv(
"PAPERLESS_AI_LLM_EMBEDDING_BACKEND",
) # "huggingface" or "openai-like"
) # "huggingface", "openai-like", or "ollama"
LLM_EMBEDDING_MODEL = os.getenv("PAPERLESS_AI_LLM_EMBEDDING_MODEL")
LLM_EMBEDDING_ENDPOINT = os.getenv("PAPERLESS_AI_LLM_EMBEDDING_ENDPOINT")
LLM_BACKEND = os.getenv("PAPERLESS_AI_LLM_BACKEND") # "ollama" or "openai-like"
LLM_MODEL = os.getenv("PAPERLESS_AI_LLM_MODEL")
LLM_API_KEY = os.getenv("PAPERLESS_AI_LLM_API_KEY")
+25 -5
View File
@@ -22,7 +22,7 @@ def get_embedding_model() -> "BaseEmbedding":
case LLMEmbeddingBackend.OPENAI_LIKE:
from llama_index.embeddings.openai_like import OpenAILikeEmbedding
endpoint = config.llm_endpoint or None
endpoint = config.llm_embedding_endpoint or config.llm_endpoint or None
if endpoint:
validate_outbound_http_url(
endpoint,
@@ -40,6 +40,22 @@ def get_embedding_model() -> "BaseEmbedding":
model_name=config.llm_embedding_model
or "sentence-transformers/all-MiniLM-L6-v2",
)
case LLMEmbeddingBackend.OLLAMA:
from llama_index.embeddings.ollama import OllamaEmbedding
endpoint = (
config.llm_embedding_endpoint
or config.llm_endpoint
or "http://localhost:11434"
)
validate_outbound_http_url(
endpoint,
allow_internal=config.llm_allow_internal_endpoints,
)
return OllamaEmbedding(
model_name=config.llm_embedding_model or "embeddinggemma",
base_url=endpoint,
)
case _:
raise ValueError(
f"Unsupported embedding backend: {config.llm_embedding_backend}",
@@ -52,11 +68,15 @@ def get_embedding_dim() -> int:
from a dummy embedding and stores it for future use.
"""
config = AIConfig()
model = config.llm_embedding_model or (
"text-embedding-3-small"
if config.llm_embedding_backend == LLMEmbeddingBackend.OPENAI_LIKE
else "sentence-transformers/all-MiniLM-L6-v2"
default_model = {
LLMEmbeddingBackend.OPENAI_LIKE: "text-embedding-3-small",
LLMEmbeddingBackend.HUGGINGFACE: "sentence-transformers/all-MiniLM-L6-v2",
LLMEmbeddingBackend.OLLAMA: "embeddinggemma",
}.get(
config.llm_embedding_backend,
"sentence-transformers/all-MiniLM-L6-v2",
)
model = config.llm_embedding_model or default_model
meta_path: Path = settings.LLM_INDEX_DIR / "meta.json"
if meta_path.exists():
+65
View File
@@ -14,6 +14,7 @@ from paperless_ai.embedding import get_embedding_model
@pytest.fixture
def mock_ai_config():
with patch("paperless_ai.embedding.AIConfig") as MockAIConfig:
MockAIConfig.return_value.llm_embedding_endpoint = None
MockAIConfig.return_value.llm_allow_internal_endpoints = True
yield MockAIConfig
@@ -71,6 +72,25 @@ def test_get_embedding_model_openai(mock_ai_config):
assert model == MockOpenAIEmbedding.return_value
def test_get_embedding_model_openai_prefers_embedding_endpoint(mock_ai_config):
mock_ai_config.return_value.llm_embedding_backend = LLMEmbeddingBackend.OPENAI_LIKE
mock_ai_config.return_value.llm_embedding_model = "text-embedding-3-small"
mock_ai_config.return_value.llm_api_key = "test_api_key"
mock_ai_config.return_value.llm_embedding_endpoint = "http://embedding-url"
mock_ai_config.return_value.llm_endpoint = "http://test-url"
with patch(
"llama_index.embeddings.openai_like.OpenAILikeEmbedding",
) as MockOpenAIEmbedding:
model = get_embedding_model()
MockOpenAIEmbedding.assert_called_once_with(
model_name="text-embedding-3-small",
api_key="test_api_key",
api_base="http://embedding-url",
)
assert model == MockOpenAIEmbedding.return_value
def test_get_embedding_model_openai_blocks_internal_endpoint_when_disallowed(
mock_ai_config,
):
@@ -100,6 +120,51 @@ def test_get_embedding_model_huggingface(mock_ai_config):
assert model == MockHuggingFaceEmbedding.return_value
def test_get_embedding_model_ollama(mock_ai_config):
mock_ai_config.return_value.llm_embedding_backend = LLMEmbeddingBackend.OLLAMA
mock_ai_config.return_value.llm_embedding_model = "embeddinggemma"
mock_ai_config.return_value.llm_endpoint = "http://test-url"
with patch(
"llama_index.embeddings.ollama.OllamaEmbedding",
) as MockOllamaEmbedding:
model = get_embedding_model()
MockOllamaEmbedding.assert_called_once_with(
model_name="embeddinggemma",
base_url="http://test-url",
)
assert model == MockOllamaEmbedding.return_value
def test_get_embedding_model_ollama_prefers_embedding_endpoint(mock_ai_config):
mock_ai_config.return_value.llm_embedding_backend = LLMEmbeddingBackend.OLLAMA
mock_ai_config.return_value.llm_embedding_model = "embeddinggemma"
mock_ai_config.return_value.llm_embedding_endpoint = "http://embedding-url"
mock_ai_config.return_value.llm_endpoint = "http://test-url"
with patch(
"llama_index.embeddings.ollama.OllamaEmbedding",
) as MockOllamaEmbedding:
model = get_embedding_model()
MockOllamaEmbedding.assert_called_once_with(
model_name="embeddinggemma",
base_url="http://embedding-url",
)
assert model == MockOllamaEmbedding.return_value
def test_get_embedding_model_ollama_blocks_internal_endpoint_when_disallowed(
mock_ai_config,
):
mock_ai_config.return_value.llm_embedding_backend = LLMEmbeddingBackend.OLLAMA
mock_ai_config.return_value.llm_embedding_model = "embeddinggemma"
mock_ai_config.return_value.llm_endpoint = "http://127.0.0.1:11434"
mock_ai_config.return_value.llm_allow_internal_endpoints = False
with pytest.raises(ValueError, match="non-public address"):
get_embedding_model()
def test_get_embedding_model_invalid_backend(mock_ai_config):
mock_ai_config.return_value.llm_embedding_backend = "INVALID_BACKEND"
Generated
+29
View File
@@ -2213,6 +2213,20 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/01/8e/b9ea889f88318f2faa20b615989e12a15a133c9273630f9266fcf69f35a6/llama_index_embeddings_openai_like-0.3.1-py3-none-any.whl", hash = "sha256:167c7e462cde7d53ea907ceaffbbf10a750676c7c9f7bcc9bc9686a41921387a", size = 3631, upload-time = "2026-03-13T16:15:19.58Z" },
]
[[package]]
name = "llama-index-embeddings-ollama"
version = "0.9.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "llama-index-core", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "ollama", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "pytest-asyncio", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
]
sdist = { url = "https://files.pythonhosted.org/packages/8b/cd/2cff1feac66368a4c60ea7afbdbb3f3fdd49ee8c279fc105457e726a3ad2/llama_index_embeddings_ollama-0.9.0.tar.gz", hash = "sha256:19d2d2a0e3f0934480eae31243ac5f1ce171319578b9c0adad25cf1b6c35659e", size = 6575, upload-time = "2026-03-12T20:21:18.810Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/9a/36/53674403380483510a7f657c5d6f0bdac5b7f9ec5a1a8d06cdfdd6dc47f2/llama_index_embeddings_ollama-0.9.0-py3-none-any.whl", hash = "sha256:92e0ce481e60a9bcbddbe2c369d2f72c6fdd7158d03a34ab9b35d80869b673c3", size = 6250, upload-time = "2026-03-12T20:21:19.441Z" },
]
[[package]]
name = "llama-index-instrumentation"
version = "0.5.0"
@@ -2909,6 +2923,7 @@ dependencies = [
{ name = "llama-index-core", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "llama-index-embeddings-huggingface", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "llama-index-embeddings-openai-like", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "llama-index-embeddings-ollama", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "llama-index-llms-ollama", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "llama-index-llms-openai-like", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "llama-index-vector-stores-faiss", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
@@ -3059,6 +3074,7 @@ requires-dist = [
{ name = "llama-index-core", specifier = ">=0.14.21" },
{ name = "llama-index-embeddings-huggingface", specifier = ">=0.6.1" },
{ name = "llama-index-embeddings-openai-like", specifier = ">=0.2.2" },
{ name = "llama-index-embeddings-ollama", specifier = ">=0.9.0" },
{ name = "llama-index-llms-ollama", specifier = ">=0.9.1" },
{ name = "llama-index-llms-openai-like", specifier = ">=0.7.1" },
{ name = "llama-index-vector-stores-faiss", specifier = ">=0.5.2" },
@@ -3799,6 +3815,19 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/d4/24/a372aaf5c9b7208e7112038812994107bc65a84cd00e0354a88c2c77a617/pytest-9.0.3-py3-none-any.whl", hash = "sha256:2c5efc453d45394fdd706ade797c0a81091eccd1d6e4bccfcd476e2b8e0ab5d9", size = 375249, upload-time = "2026-04-07T17:16:16.13Z" },
]
[[package]]
name = "pytest-asyncio"
version = "1.3.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "pytest", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "typing-extensions", marker = "(python_full_version < '3.13' and sys_platform == 'darwin') or (python_full_version < '3.13' and sys_platform == 'linux')" },
]
sdist = { url = "https://files.pythonhosted.org/packages/90/2c/8af215c0f776415f3590cac4f9086ccefd6fd463befeae41cd4d3f193e5a/pytest_asyncio-1.3.0.tar.gz", hash = "sha256:d7f52f36d231b80ee124cd216ffb19369aa168fc10095013c6b014a34d3ee9e5", size = 50087, upload-time = "2025-11-10T16:07:47.256Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/e5/35/f8b19922b6a25bc0880171a2f1a003eaeb93657475193ab516fd87cac9da/pytest_asyncio-1.3.0-py3-none-any.whl", hash = "sha256:611e26147c7f77640e6d0a92a38ed17c3e9848063698d5c93d5aa7aa11cebff5", size = 15075, upload-time = "2025-11-10T16:07:45.537Z" },
]
[[package]]
name = "pytest-cov"
version = "7.1.0"