From 262183e848561438b1bfeba9cd804ef7acb2211d Mon Sep 17 00:00:00 2001 From: shamoon <4887959+shamoon@users.noreply.github.com> Date: Thu, 18 Jun 2026 08:35:11 -0700 Subject: [PATCH] Enhancement (beta): support LLM timeout config (#13002) --- docs/configuration.md | 7 ++ src-ui/src/app/data/paperless-config.ts | 9 +++ src/documents/tests/test_api_app_config.py | 1 + src/documents/tests/test_views.py | 28 +++++++ src/documents/views.py | 12 +++ src/paperless/config.py | 4 + ...cationconfiguration_llm_request_timeout.py | 23 ++++++ src/paperless/models.py | 6 ++ src/paperless/settings/__init__.py | 3 + src/paperless_ai/client.py | 78 ++++++++++++------- src/paperless_ai/embedding.py | 5 ++ src/paperless_ai/exceptions.py | 2 + src/paperless_ai/tests/test_client.py | 39 ++++++++-- src/paperless_ai/tests/test_embedding.py | 3 + 14 files changed, 184 insertions(+), 36 deletions(-) create mode 100644 src/paperless/migrations/0013_applicationconfiguration_llm_request_timeout.py create mode 100644 src/paperless_ai/exceptions.py diff --git a/docs/configuration.md b/docs/configuration.md index 9780aa94d..4f721fa36 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -2068,6 +2068,13 @@ context by default. Defaults to 8192. +#### [`PAPERLESS_AI_LLM_REQUEST_TIMEOUT=`](#PAPERLESS_AI_LLM_REQUEST_TIMEOUT) {#PAPERLESS_AI_LLM_REQUEST_TIMEOUT} + +: The timeout, in seconds, for requests to the configured AI backend. Increase this when using +local or slow inference servers that need more time to generate responses. + + Defaults to 120. + #### [`PAPERLESS_AI_LLM_BACKEND=`](#PAPERLESS_AI_LLM_BACKEND) {#PAPERLESS_AI_LLM_BACKEND} : The AI backend to use. This can be either "openai-like" or "ollama". If set to "ollama", the AI diff --git a/src-ui/src/app/data/paperless-config.ts b/src-ui/src/app/data/paperless-config.ts index 1c74d8c08..ad3f4c0c9 100644 --- a/src-ui/src/app/data/paperless-config.ts +++ b/src-ui/src/app/data/paperless-config.ts @@ -360,6 +360,14 @@ export const PaperlessConfigOptions: ConfigOption[] = [ category: ConfigCategory.AI, note: $localize`Language to use for generated AI suggestions. When unset, AI suggestions use the user's display language if explicitly set.`, }, + { + key: 'llm_request_timeout', + title: $localize`LLM Request Timeout`, + type: ConfigOptionType.Number, + config_key: 'PAPERLESS_AI_LLM_REQUEST_TIMEOUT', + category: ConfigCategory.AI, + note: $localize`Timeout in seconds for LLM requests.`, + }, ] export interface PaperlessConfig extends ObjectWithId { @@ -401,4 +409,5 @@ export interface PaperlessConfig extends ObjectWithId { llm_api_key: string llm_endpoint: string llm_output_language: string + llm_request_timeout: number } diff --git a/src/documents/tests/test_api_app_config.py b/src/documents/tests/test_api_app_config.py index 9b94fff17..214edbeee 100644 --- a/src/documents/tests/test_api_app_config.py +++ b/src/documents/tests/test_api_app_config.py @@ -82,6 +82,7 @@ class TestApiAppConfig(DirectoriesMixin, APITestCase): "llm_api_key": None, "llm_endpoint": None, "llm_output_language": None, + "llm_request_timeout": None, }, ) diff --git a/src/documents/tests/test_views.py b/src/documents/tests/test_views.py index a67590b81..376cb8e93 100644 --- a/src/documents/tests/test_views.py +++ b/src/documents/tests/test_views.py @@ -30,6 +30,7 @@ from documents.signals.handlers import update_llm_suggestions_cache from documents.tests.utils import DirectoriesMixin from documents.tests.utils import read_streaming_response from paperless.models import ApplicationConfiguration +from paperless_ai.exceptions import LLMTimeoutError class TestViews(DirectoriesMixin, TestCase): @@ -476,6 +477,33 @@ class TestAISuggestions(DirectoriesMixin, TestCase): get_llm_suggestion_cache(self.document.pk, backend="openai-like"), ) + @patch("documents.views.get_ai_document_classification") + @override_settings( + AI_ENABLED=True, + LLM_BACKEND="openai-like", + ) + def test_ai_suggestions_with_llm_timeout( + self, + mock_get_ai_classification, + ) -> None: + mock_get_ai_classification.side_effect = LLMTimeoutError() + + self.client.force_login(user=self.user) + response = self.client.get( + f"/api/documents/{self.document.pk}/ai_suggestions/", + ) + + self.assertEqual(response.status_code, status.HTTP_503_SERVICE_UNAVAILABLE) + self.assertEqual( + response.json(), + { + "ai": ["AI backend request timed out."], + }, + ) + self.assertIsNone( + get_llm_suggestion_cache(self.document.pk, backend="openai-like"), + ) + def test_invalidate_suggestions_cache(self) -> None: self.client.force_login(user=self.user) suggestions = { diff --git a/src/documents/views.py b/src/documents/views.py index 1bc459211..0b8ce0f2a 100644 --- a/src/documents/views.py +++ b/src/documents/views.py @@ -241,6 +241,7 @@ from paperless.serialisers import UserSerializer from paperless.views import StandardPagination from paperless_ai.ai_classifier import get_ai_document_classification from paperless_ai.chat import stream_chat_with_documents +from paperless_ai.exceptions import LLMTimeoutError from paperless_ai.matching import extract_unmatched_names from paperless_ai.matching import match_correspondents_by_name from paperless_ai.matching import match_document_types_by_name @@ -1510,6 +1511,17 @@ class DocumentViewSet( exc_info=True, ) raise ValidationError({"ai": [_("Invalid AI configuration.")]}) from exc + except LLMTimeoutError as exc: + logger.exception( + "AI backend timed out while generating suggestions for document %s: %s", + doc.pk, + exc, + exc_info=True, + ) + return Response( + {"ai": [_("AI backend request timed out.")]}, + status=status.HTTP_503_SERVICE_UNAVAILABLE, + ) matched_tags = match_tags_by_name( llm_suggestions.get("tags", []), diff --git a/src/paperless/config.py b/src/paperless/config.py index 40341b92e..0a1984de7 100644 --- a/src/paperless/config.py +++ b/src/paperless/config.py @@ -197,6 +197,7 @@ class AIConfig(BaseConfig): llm_embedding_endpoint: str = dataclasses.field(init=False) llm_embedding_chunk_size: int = dataclasses.field(init=False) llm_context_size: int = dataclasses.field(init=False) + llm_request_timeout: int = dataclasses.field(init=False) llm_backend: str = dataclasses.field(init=False) llm_model: str = dataclasses.field(init=False) llm_api_key: str = dataclasses.field(init=False) @@ -221,6 +222,9 @@ class AIConfig(BaseConfig): app_config.llm_embedding_chunk_size or settings.LLM_EMBEDDING_CHUNK_SIZE ) self.llm_context_size = app_config.llm_context_size or settings.LLM_CONTEXT_SIZE + self.llm_request_timeout = ( + app_config.llm_request_timeout or settings.LLM_REQUEST_TIMEOUT + ) self.llm_backend = app_config.llm_backend or settings.LLM_BACKEND self.llm_model = app_config.llm_model or settings.LLM_MODEL self.llm_api_key = app_config.llm_api_key or settings.LLM_API_KEY diff --git a/src/paperless/migrations/0013_applicationconfiguration_llm_request_timeout.py b/src/paperless/migrations/0013_applicationconfiguration_llm_request_timeout.py new file mode 100644 index 000000000..836c0696c --- /dev/null +++ b/src/paperless/migrations/0013_applicationconfiguration_llm_request_timeout.py @@ -0,0 +1,23 @@ +# Generated by Django 5.2.14 on 2026-06-14 14:22 + +import django.core.validators +from django.db import migrations +from django.db import models + + +class Migration(migrations.Migration): + dependencies = [ + ("paperless", "0012_applicationconfiguration_llm_output_language"), + ] + + operations = [ + migrations.AddField( + model_name="applicationconfiguration", + name="llm_request_timeout", + field=models.PositiveSmallIntegerField( + null=True, + validators=[django.core.validators.MinValueValidator(1)], + verbose_name="Sets the LLM request timeout in seconds", + ), + ), + ] diff --git a/src/paperless/models.py b/src/paperless/models.py index d246a546f..71920cb0b 100644 --- a/src/paperless/models.py +++ b/src/paperless/models.py @@ -366,6 +366,12 @@ class ApplicationConfiguration(AbstractSingletonModel): max_length=32, ) + llm_request_timeout = models.PositiveSmallIntegerField( + verbose_name=_("Sets the LLM timeout in seconds"), + null=True, + validators=[MinValueValidator(1)], + ) + class Meta: verbose_name = _("paperless application settings") permissions = [ diff --git a/src/paperless/settings/__init__.py b/src/paperless/settings/__init__.py index 46433a490..546b09b80 100644 --- a/src/paperless/settings/__init__.py +++ b/src/paperless/settings/__init__.py @@ -1206,6 +1206,9 @@ if LLM_EMBEDDING_CHUNK_SIZE < 1: LLM_CONTEXT_SIZE = get_int_from_env("PAPERLESS_AI_LLM_CONTEXT_SIZE", 8192) if LLM_CONTEXT_SIZE < 1: raise ImproperlyConfigured("PAPERLESS_AI_LLM_CONTEXT_SIZE must be >= 1") +LLM_REQUEST_TIMEOUT = get_int_from_env("PAPERLESS_AI_LLM_REQUEST_TIMEOUT", 120) +if LLM_REQUEST_TIMEOUT < 1: + raise ImproperlyConfigured("PAPERLESS_AI_LLM_REQUEST_TIMEOUT must be >= 1") LLM_BACKEND = get_choice_from_env( "PAPERLESS_AI_LLM_BACKEND", {"ollama", "openai-like"}, diff --git a/src/paperless_ai/client.py b/src/paperless_ai/client.py index 4aa57c95a..fcc572f29 100644 --- a/src/paperless_ai/client.py +++ b/src/paperless_ai/client.py @@ -1,11 +1,14 @@ import json import logging +from collections.abc import Iterator +from contextlib import contextmanager from typing import TYPE_CHECKING +import httpx + from paperless.models import LLMBackend if TYPE_CHECKING: - from llama_index.core.llms import ChatMessage from llama_index.llms.ollama import Ollama from llama_index.llms.openai_like import OpenAILike @@ -16,6 +19,7 @@ from paperless.network import create_pinned_async_httpx_client from paperless.network import create_pinned_httpx_client from paperless.network import validate_outbound_http_url from paperless_ai.base_model import DocumentClassifierSchema +from paperless_ai.exceptions import LLMTimeoutError logger = logging.getLogger("paperless_ai.client") @@ -61,16 +65,16 @@ class AIClient: model=self.settings.llm_model or "llama3.1", base_url=endpoint, context_window=self.settings.llm_context_size, - request_timeout=120, + request_timeout=self.settings.llm_request_timeout, system_prompt=LLM_SYSTEM_PROMPT, client=Client( host=endpoint, - timeout=120, + timeout=self.settings.llm_request_timeout, transport=transport, ), async_client=AsyncClient( host=endpoint, - timeout=120, + timeout=self.settings.llm_request_timeout, transport=async_transport, ), ) @@ -84,15 +88,18 @@ class AIClient: http_client = create_pinned_httpx_client( endpoint, allow_internal=self.settings.llm_allow_internal_endpoints, + timeout=self.settings.llm_request_timeout, ) async_http_client = create_pinned_async_httpx_client( endpoint, allow_internal=self.settings.llm_allow_internal_endpoints, + timeout=self.settings.llm_request_timeout, ) return OpenAILike( model=self.settings.llm_model or "gpt-3.5-turbo", api_base=endpoint, api_key=self.settings.llm_api_key, + timeout=self.settings.llm_request_timeout, is_chat_model=True, is_function_calling_model=True, system_prompt=LLM_SYSTEM_PROMPT, @@ -113,11 +120,12 @@ class AIClient: user_msg = ChatMessage(role="user", content=prompt) if self.settings.llm_backend == LLMBackend.OLLAMA: - result = self.llm.chat( - [user_msg], - format=DocumentClassifierSchema.model_json_schema(), - think=False, - ) + with self._normalize_timeouts(): + result = self.llm.chat( + [user_msg], + format=DocumentClassifierSchema.model_json_schema(), + think=False, + ) logger.debug("LLM query result: %s", result) parsed = DocumentClassifierSchema(**json.loads(result.message.content)) return parsed.model_dump() @@ -125,27 +133,39 @@ class AIClient: from llama_index.core.program.function_program import get_function_tool tool = get_function_tool(DocumentClassifierSchema) - result = self.llm.chat_with_tools( - tools=[tool], - user_msg=user_msg, - chat_history=[], - allow_parallel_tool_calls=True, - tool_required=True, - ) - tool_calls = self.llm.get_tool_calls_from_response( - result, - error_on_no_tool_call=True, - ) + with self._normalize_timeouts(): + result = self.llm.chat_with_tools( + tools=[tool], + user_msg=user_msg, + chat_history=[], + allow_parallel_tool_calls=True, + tool_required=True, + ) + tool_calls = self.llm.get_tool_calls_from_response( + result, + error_on_no_tool_call=True, + ) logger.debug("LLM query result: %s", tool_calls) parsed = DocumentClassifierSchema(**tool_calls[0].tool_kwargs) return parsed.model_dump() - def run_chat(self, messages: list["ChatMessage"]) -> str: - logger.debug( - "Running chat query against %s with model %s", - self.settings.llm_backend, - self.settings.llm_model, - ) - result = self.llm.chat(messages) - logger.debug("Chat result: %s", result) - return result + @contextmanager + def _normalize_timeouts(self) -> Iterator[None]: + try: + yield + except httpx.TimeoutException as exc: + raise LLMTimeoutError from exc + except Exception as exc: + if self._is_openai_timeout(exc): + raise LLMTimeoutError from exc + raise + + def _is_openai_timeout(self, exc: Exception) -> bool: + if self.settings.llm_backend != LLMBackend.OPENAI_LIKE: + return False + + # Keep OpenAI imports out of module import paths and only load the SDK + # when translating an error from an OpenAI-backed request. + from openai import APITimeoutError + + return isinstance(exc, APITimeoutError) diff --git a/src/paperless_ai/embedding.py b/src/paperless_ai/embedding.py index 0d11ea423..4044d7f08 100644 --- a/src/paperless_ai/embedding.py +++ b/src/paperless_ai/embedding.py @@ -32,15 +32,18 @@ def get_embedding_model(config: AIConfig) -> "BaseEmbedding": http_client = create_pinned_httpx_client( endpoint, allow_internal=config.llm_allow_internal_endpoints, + timeout=config.llm_request_timeout, ) async_http_client = create_pinned_async_httpx_client( endpoint, allow_internal=config.llm_allow_internal_endpoints, + timeout=config.llm_request_timeout, ) return OpenAILikeEmbedding( model_name=config.llm_embedding_model or "text-embedding-3-small", api_key=config.llm_api_key, api_base=endpoint, + timeout=config.llm_request_timeout, http_client=http_client, async_http_client=async_http_client, ) @@ -73,12 +76,14 @@ def get_embedding_model(config: AIConfig) -> "BaseEmbedding": ) embedding._client = Client( host=endpoint, + timeout=config.llm_request_timeout, transport=PinnedHostHTTPTransport( allow_internal=config.llm_allow_internal_endpoints, ), ) embedding._async_client = AsyncClient( host=endpoint, + timeout=config.llm_request_timeout, transport=PinnedHostAsyncHTTPTransport( allow_internal=config.llm_allow_internal_endpoints, ), diff --git a/src/paperless_ai/exceptions.py b/src/paperless_ai/exceptions.py new file mode 100644 index 000000000..48ac1db5d --- /dev/null +++ b/src/paperless_ai/exceptions.py @@ -0,0 +1,2 @@ +class LLMTimeoutError(Exception): + pass diff --git a/src/paperless_ai/tests/test_client.py b/src/paperless_ai/tests/test_client.py index be660274f..b51c22acb 100644 --- a/src/paperless_ai/tests/test_client.py +++ b/src/paperless_ai/tests/test_client.py @@ -3,12 +3,14 @@ from unittest.mock import ANY from unittest.mock import MagicMock from unittest.mock import patch +import httpx +import openai import pytest -from llama_index.core.llms import ChatMessage from llama_index.core.llms.llm import ToolSelection from paperless_ai.client import LLM_SYSTEM_PROMPT from paperless_ai.client import AIClient +from paperless_ai.exceptions import LLMTimeoutError @pytest.fixture @@ -17,6 +19,7 @@ def mock_ai_config(): mock_config = MagicMock() mock_config.llm_allow_internal_endpoints = True mock_config.llm_context_size = 8192 + mock_config.llm_request_timeout = 120 MockAIConfig.return_value = mock_config yield mock_config @@ -64,6 +67,7 @@ def test_get_llm_openai(mock_ai_config, mock_openai_llm): model="test_model", api_base="http://test-url", api_key="test_api_key", + timeout=120, is_chat_model=True, is_function_calling_model=True, system_prompt=LLM_SYSTEM_PROMPT, @@ -151,17 +155,38 @@ def test_run_llm_query_openai_uses_tools(mock_ai_config, mock_openai_llm): mock_llm_instance.chat_with_tools.assert_called_once() -def test_run_chat(mock_ai_config, mock_ollama_llm): +def test_run_llm_query_openai_timeout_raises_local_error( + mock_ai_config, + mock_openai_llm, +): + mock_ai_config.llm_backend = "openai-like" + mock_ai_config.llm_model = "test_model" + mock_ai_config.llm_api_key = "test_api_key" + mock_ai_config.llm_endpoint = "http://test-url" + + request = httpx.Request("POST", "http://test-url/v1/chat/completions") + mock_openai_llm.return_value.chat_with_tools.side_effect = openai.APITimeoutError( + request, + ) + + client = AIClient() + + with pytest.raises(LLMTimeoutError): + client.run_llm_query("test_prompt") + + +def test_run_llm_query_httpx_timeout_raises_local_error( + mock_ai_config, + mock_ollama_llm, +): mock_ai_config.llm_backend = "ollama" mock_ai_config.llm_model = "test_model" mock_ai_config.llm_endpoint = "http://test-url" mock_llm_instance = mock_ollama_llm.return_value - mock_llm_instance.chat.return_value = "test_chat_result" + mock_llm_instance.chat.side_effect = httpx.ReadTimeout("timed out") client = AIClient() - messages = [ChatMessage(role="user", content="Hello")] - result = client.run_chat(messages) - mock_llm_instance.chat.assert_called_once_with(messages) - assert result == "test_chat_result" + with pytest.raises(LLMTimeoutError): + client.run_llm_query("test_prompt") diff --git a/src/paperless_ai/tests/test_embedding.py b/src/paperless_ai/tests/test_embedding.py index 883b5172f..fa84d1acc 100644 --- a/src/paperless_ai/tests/test_embedding.py +++ b/src/paperless_ai/tests/test_embedding.py @@ -19,6 +19,7 @@ def mock_ai_config(): MockAIConfig.return_value.llm_embedding_endpoint = None MockAIConfig.return_value.llm_allow_internal_endpoints = True MockAIConfig.return_value.llm_context_size = 8192 + MockAIConfig.return_value.llm_request_timeout = 120 yield MockAIConfig @@ -71,6 +72,7 @@ def test_get_embedding_model_openai(mock_ai_config): model_name="text-embedding-3-small", api_key="test_api_key", api_base="http://test-url", + timeout=120, http_client=ANY, async_http_client=ANY, ) @@ -92,6 +94,7 @@ def test_get_embedding_model_openai_prefers_embedding_endpoint(mock_ai_config): model_name="text-embedding-3-small", api_key="test_api_key", api_base="http://embedding-url", + timeout=120, http_client=ANY, async_http_client=ANY, )