From 262183e848561438b1bfeba9cd804ef7acb2211d Mon Sep 17 00:00:00 2001
From: shamoon <4887959+shamoon@users.noreply.github.com>
Date: Thu, 18 Jun 2026 08:35:11 -0700
Subject: [PATCH] Enhancement (beta): support LLM timeout config (#13002)

---
 docs/configuration.md                         |  7 ++
 src-ui/src/app/data/paperless-config.ts       |  9 +++
 src/documents/tests/test_api_app_config.py    |  1 +
 src/documents/tests/test_views.py             | 28 +++++++
 src/documents/views.py                        | 12 +++
 src/paperless/config.py                       |  4 +
 ...cationconfiguration_llm_request_timeout.py | 23 ++++++
 src/paperless/models.py                       |  6 ++
 src/paperless/settings/__init__.py            |  3 +
 src/paperless_ai/client.py                    | 78 ++++++++++++-------
 src/paperless_ai/embedding.py                 |  5 ++
 src/paperless_ai/exceptions.py                |  2 +
 src/paperless_ai/tests/test_client.py         | 39 ++++++++--
 src/paperless_ai/tests/test_embedding.py      |  3 +
 14 files changed, 184 insertions(+), 36 deletions(-)
 create mode 100644 src/paperless/migrations/0013_applicationconfiguration_llm_request_timeout.py
 create mode 100644 src/paperless_ai/exceptions.py
diff --git a/docs/configuration.md b/docs/configuration.md
index 9780aa94d..4f721fa36 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -2068,6 +2068,13 @@ context by default.
 
     Defaults to 8192.
 
+#### [`PAPERLESS_AI_LLM_REQUEST_TIMEOUT=<int>`](#PAPERLESS_AI_LLM_REQUEST_TIMEOUT) {#PAPERLESS_AI_LLM_REQUEST_TIMEOUT}
+
+: The timeout, in seconds, for requests to the configured AI backend. Increase this when using
+local or slow inference servers that need more time to generate responses.
+
+    Defaults to 120.
+
 #### [`PAPERLESS_AI_LLM_BACKEND=<str>`](#PAPERLESS_AI_LLM_BACKEND) {#PAPERLESS_AI_LLM_BACKEND}
 
 : The AI backend to use. This can be either "openai-like" or "ollama". If set to "ollama", the AI
diff --git a/src-ui/src/app/data/paperless-config.ts b/src-ui/src/app/data/paperless-config.ts
index 1c74d8c08..ad3f4c0c9 100644
--- a/src-ui/src/app/data/paperless-config.ts
+++ b/src-ui/src/app/data/paperless-config.ts
@@ -360,6 +360,14 @@ export const PaperlessConfigOptions: ConfigOption[] = [
     category: ConfigCategory.AI,
     note: $localize`Language to use for generated AI suggestions. When unset, AI suggestions use the user's display language if explicitly set.`,
   },
+  {
+    key: 'llm_request_timeout',
+    title: $localize`LLM Request Timeout`,
+    type: ConfigOptionType.Number,
+    config_key: 'PAPERLESS_AI_LLM_REQUEST_TIMEOUT',
+    category: ConfigCategory.AI,
+    note: $localize`Timeout in seconds for LLM requests.`,
+  },
 ]
 
 export interface PaperlessConfig extends ObjectWithId {
@@ -401,4 +409,5 @@ export interface PaperlessConfig extends ObjectWithId {
   llm_api_key: string
   llm_endpoint: string
   llm_output_language: string
+  llm_request_timeout: number
 }
diff --git a/src/documents/tests/test_api_app_config.py b/src/documents/tests/test_api_app_config.py
index 9b94fff17..214edbeee 100644
--- a/src/documents/tests/test_api_app_config.py
+++ b/src/documents/tests/test_api_app_config.py
@@ -82,6 +82,7 @@ class TestApiAppConfig(DirectoriesMixin, APITestCase):
                 "llm_api_key": None,
                 "llm_endpoint": None,
                 "llm_output_language": None,
+                "llm_request_timeout": None,
             },
         )
 
diff --git a/src/documents/tests/test_views.py b/src/documents/tests/test_views.py
index a67590b81..376cb8e93 100644
--- a/src/documents/tests/test_views.py
+++ b/src/documents/tests/test_views.py
@@ -30,6 +30,7 @@ from documents.signals.handlers import update_llm_suggestions_cache
 from documents.tests.utils import DirectoriesMixin
 from documents.tests.utils import read_streaming_response
 from paperless.models import ApplicationConfiguration
+from paperless_ai.exceptions import LLMTimeoutError
 
 
 class TestViews(DirectoriesMixin, TestCase):
@@ -476,6 +477,33 @@ class TestAISuggestions(DirectoriesMixin, TestCase):
             get_llm_suggestion_cache(self.document.pk, backend="openai-like"),
         )
 
+    @patch("documents.views.get_ai_document_classification")
+    @override_settings(
+        AI_ENABLED=True,
+        LLM_BACKEND="openai-like",
+    )
+    def test_ai_suggestions_with_llm_timeout(
+        self,
+        mock_get_ai_classification,
+    ) -> None:
+        mock_get_ai_classification.side_effect = LLMTimeoutError()
+
+        self.client.force_login(user=self.user)
+        response = self.client.get(
+            f"/api/documents/{self.document.pk}/ai_suggestions/",
+        )
+
+        self.assertEqual(response.status_code, status.HTTP_503_SERVICE_UNAVAILABLE)
+        self.assertEqual(
+            response.json(),
+            {
+                "ai": ["AI backend request timed out."],
+            },
+        )
+        self.assertIsNone(
+            get_llm_suggestion_cache(self.document.pk, backend="openai-like"),
+        )
+
     def test_invalidate_suggestions_cache(self) -> None:
         self.client.force_login(user=self.user)
         suggestions = {
diff --git a/src/documents/views.py b/src/documents/views.py
index 1bc459211..0b8ce0f2a 100644
--- a/src/documents/views.py
+++ b/src/documents/views.py
@@ -241,6 +241,7 @@ from paperless.serialisers import UserSerializer
 from paperless.views import StandardPagination
 from paperless_ai.ai_classifier import get_ai_document_classification
 from paperless_ai.chat import stream_chat_with_documents
+from paperless_ai.exceptions import LLMTimeoutError
 from paperless_ai.matching import extract_unmatched_names
 from paperless_ai.matching import match_correspondents_by_name
 from paperless_ai.matching import match_document_types_by_name
@@ -1510,6 +1511,17 @@ class DocumentViewSet(
                 exc_info=True,
             )
             raise ValidationError({"ai": [_("Invalid AI configuration.")]}) from exc
+        except LLMTimeoutError as exc:
+            logger.exception(
+                "AI backend timed out while generating suggestions for document %s: %s",
+                doc.pk,
+                exc,
+                exc_info=True,
+            )
+            return Response(
+                {"ai": [_("AI backend request timed out.")]},
+                status=status.HTTP_503_SERVICE_UNAVAILABLE,
+            )
 
         matched_tags = match_tags_by_name(
             llm_suggestions.get("tags", []),
diff --git a/src/paperless/config.py b/src/paperless/config.py
index 40341b92e..0a1984de7 100644
--- a/src/paperless/config.py
+++ b/src/paperless/config.py
@@ -197,6 +197,7 @@ class AIConfig(BaseConfig):
     llm_embedding_endpoint: str = dataclasses.field(init=False)
     llm_embedding_chunk_size: int = dataclasses.field(init=False)
     llm_context_size: int = dataclasses.field(init=False)
+    llm_request_timeout: int = dataclasses.field(init=False)
     llm_backend: str = dataclasses.field(init=False)
     llm_model: str = dataclasses.field(init=False)
     llm_api_key: str = dataclasses.field(init=False)
@@ -221,6 +222,9 @@ class AIConfig(BaseConfig):
             app_config.llm_embedding_chunk_size or settings.LLM_EMBEDDING_CHUNK_SIZE
         )
         self.llm_context_size = app_config.llm_context_size or settings.LLM_CONTEXT_SIZE
+        self.llm_request_timeout = (
+            app_config.llm_request_timeout or settings.LLM_REQUEST_TIMEOUT
+        )
         self.llm_backend = app_config.llm_backend or settings.LLM_BACKEND
         self.llm_model = app_config.llm_model or settings.LLM_MODEL
         self.llm_api_key = app_config.llm_api_key or settings.LLM_API_KEY
diff --git a/src/paperless/migrations/0013_applicationconfiguration_llm_request_timeout.py b/src/paperless/migrations/0013_applicationconfiguration_llm_request_timeout.py
new file mode 100644
index 000000000..836c0696c
--- /dev/null
+++ b/src/paperless/migrations/0013_applicationconfiguration_llm_request_timeout.py
@@ -0,0 +1,23 @@
+# Generated by Django 5.2.14 on 2026-06-14 14:22
+
+import django.core.validators
+from django.db import migrations
+from django.db import models
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ("paperless", "0012_applicationconfiguration_llm_output_language"),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name="applicationconfiguration",
+            name="llm_request_timeout",
+            field=models.PositiveSmallIntegerField(
+                null=True,
+                validators=[django.core.validators.MinValueValidator(1)],
+                verbose_name="Sets the LLM request timeout in seconds",
+            ),
+        ),
+    ]
diff --git a/src/paperless/models.py b/src/paperless/models.py
index d246a546f..71920cb0b 100644
--- a/src/paperless/models.py
+++ b/src/paperless/models.py
@@ -366,6 +366,12 @@ class ApplicationConfiguration(AbstractSingletonModel):
         max_length=32,
     )
 
+    llm_request_timeout = models.PositiveSmallIntegerField(
+        verbose_name=_("Sets the LLM timeout in seconds"),
+        null=True,
+        validators=[MinValueValidator(1)],
+    )
+
     class Meta:
         verbose_name = _("paperless application settings")
         permissions = [
diff --git a/src/paperless/settings/__init__.py b/src/paperless/settings/__init__.py
index 46433a490..546b09b80 100644
--- a/src/paperless/settings/__init__.py
+++ b/src/paperless/settings/__init__.py
@@ -1206,6 +1206,9 @@ if LLM_EMBEDDING_CHUNK_SIZE < 1:
 LLM_CONTEXT_SIZE = get_int_from_env("PAPERLESS_AI_LLM_CONTEXT_SIZE", 8192)
 if LLM_CONTEXT_SIZE < 1:
     raise ImproperlyConfigured("PAPERLESS_AI_LLM_CONTEXT_SIZE must be >= 1")
+LLM_REQUEST_TIMEOUT = get_int_from_env("PAPERLESS_AI_LLM_REQUEST_TIMEOUT", 120)
+if LLM_REQUEST_TIMEOUT < 1:
+    raise ImproperlyConfigured("PAPERLESS_AI_LLM_REQUEST_TIMEOUT must be >= 1")
 LLM_BACKEND = get_choice_from_env(
     "PAPERLESS_AI_LLM_BACKEND",
     {"ollama", "openai-like"},
diff --git a/src/paperless_ai/client.py b/src/paperless_ai/client.py
index 4aa57c95a..fcc572f29 100644
--- a/src/paperless_ai/client.py
+++ b/src/paperless_ai/client.py
@@ -1,11 +1,14 @@
 import json
 import logging
+from collections.abc import Iterator
+from contextlib import contextmanager
 from typing import TYPE_CHECKING
 
+import httpx
+
 from paperless.models import LLMBackend
 
 if TYPE_CHECKING:
-    from llama_index.core.llms import ChatMessage
     from llama_index.llms.ollama import Ollama
     from llama_index.llms.openai_like import OpenAILike
 
@@ -16,6 +19,7 @@ from paperless.network import create_pinned_async_httpx_client
 from paperless.network import create_pinned_httpx_client
 from paperless.network import validate_outbound_http_url
 from paperless_ai.base_model import DocumentClassifierSchema
+from paperless_ai.exceptions import LLMTimeoutError
 
 logger = logging.getLogger("paperless_ai.client")
 
@@ -61,16 +65,16 @@ class AIClient:
                 model=self.settings.llm_model or "llama3.1",
                 base_url=endpoint,
                 context_window=self.settings.llm_context_size,
-                request_timeout=120,
+                request_timeout=self.settings.llm_request_timeout,
                 system_prompt=LLM_SYSTEM_PROMPT,
                 client=Client(
                     host=endpoint,
-                    timeout=120,
+                    timeout=self.settings.llm_request_timeout,
                     transport=transport,
                 ),
                 async_client=AsyncClient(
                     host=endpoint,
-                    timeout=120,
+                    timeout=self.settings.llm_request_timeout,
                     transport=async_transport,
                 ),
             )
@@ -84,15 +88,18 @@ class AIClient:
                 http_client = create_pinned_httpx_client(
                     endpoint,
                     allow_internal=self.settings.llm_allow_internal_endpoints,
+                    timeout=self.settings.llm_request_timeout,
                 )
                 async_http_client = create_pinned_async_httpx_client(
                     endpoint,
                     allow_internal=self.settings.llm_allow_internal_endpoints,
+                    timeout=self.settings.llm_request_timeout,
                 )
             return OpenAILike(
                 model=self.settings.llm_model or "gpt-3.5-turbo",
                 api_base=endpoint,
                 api_key=self.settings.llm_api_key,
+                timeout=self.settings.llm_request_timeout,
                 is_chat_model=True,
                 is_function_calling_model=True,
                 system_prompt=LLM_SYSTEM_PROMPT,
@@ -113,11 +120,12 @@ class AIClient:
 
         user_msg = ChatMessage(role="user", content=prompt)
         if self.settings.llm_backend == LLMBackend.OLLAMA:
-            result = self.llm.chat(
-                [user_msg],
-                format=DocumentClassifierSchema.model_json_schema(),
-                think=False,
-            )
+            with self._normalize_timeouts():
+                result = self.llm.chat(
+                    [user_msg],
+                    format=DocumentClassifierSchema.model_json_schema(),
+                    think=False,
+                )
             logger.debug("LLM query result: %s", result)
             parsed = DocumentClassifierSchema(**json.loads(result.message.content))
             return parsed.model_dump()
@@ -125,27 +133,39 @@ class AIClient:
         from llama_index.core.program.function_program import get_function_tool
 
         tool = get_function_tool(DocumentClassifierSchema)
-        result = self.llm.chat_with_tools(
-            tools=[tool],
-            user_msg=user_msg,
-            chat_history=[],
-            allow_parallel_tool_calls=True,
-            tool_required=True,
-        )
-        tool_calls = self.llm.get_tool_calls_from_response(
-            result,
-            error_on_no_tool_call=True,
-        )
+        with self._normalize_timeouts():
+            result = self.llm.chat_with_tools(
+                tools=[tool],
+                user_msg=user_msg,
+                chat_history=[],
+                allow_parallel_tool_calls=True,
+                tool_required=True,
+            )
+            tool_calls = self.llm.get_tool_calls_from_response(
+                result,
+                error_on_no_tool_call=True,
+            )
         logger.debug("LLM query result: %s", tool_calls)
         parsed = DocumentClassifierSchema(**tool_calls[0].tool_kwargs)
         return parsed.model_dump()
 
-    def run_chat(self, messages: list["ChatMessage"]) -> str:
-        logger.debug(
-            "Running chat query against %s with model %s",
-            self.settings.llm_backend,
-            self.settings.llm_model,
-        )
-        result = self.llm.chat(messages)
-        logger.debug("Chat result: %s", result)
-        return result
+    @contextmanager
+    def _normalize_timeouts(self) -> Iterator[None]:
+        try:
+            yield
+        except httpx.TimeoutException as exc:
+            raise LLMTimeoutError from exc
+        except Exception as exc:
+            if self._is_openai_timeout(exc):
+                raise LLMTimeoutError from exc
+            raise
+
+    def _is_openai_timeout(self, exc: Exception) -> bool:
+        if self.settings.llm_backend != LLMBackend.OPENAI_LIKE:
+            return False
+
+        # Keep OpenAI imports out of module import paths and only load the SDK
+        # when translating an error from an OpenAI-backed request.
+        from openai import APITimeoutError
+
+        return isinstance(exc, APITimeoutError)
diff --git a/src/paperless_ai/embedding.py b/src/paperless_ai/embedding.py
index 0d11ea423..4044d7f08 100644
--- a/src/paperless_ai/embedding.py
+++ b/src/paperless_ai/embedding.py
@@ -32,15 +32,18 @@ def get_embedding_model(config: AIConfig) -> "BaseEmbedding":
                 http_client = create_pinned_httpx_client(
                     endpoint,
                     allow_internal=config.llm_allow_internal_endpoints,
+                    timeout=config.llm_request_timeout,
                 )
                 async_http_client = create_pinned_async_httpx_client(
                     endpoint,
                     allow_internal=config.llm_allow_internal_endpoints,
+                    timeout=config.llm_request_timeout,
                 )
             return OpenAILikeEmbedding(
                 model_name=config.llm_embedding_model or "text-embedding-3-small",
                 api_key=config.llm_api_key,
                 api_base=endpoint,
+                timeout=config.llm_request_timeout,
                 http_client=http_client,
                 async_http_client=async_http_client,
             )
@@ -73,12 +76,14 @@ def get_embedding_model(config: AIConfig) -> "BaseEmbedding":
             )
             embedding._client = Client(
                 host=endpoint,
+                timeout=config.llm_request_timeout,
                 transport=PinnedHostHTTPTransport(
                     allow_internal=config.llm_allow_internal_endpoints,
                 ),
             )
             embedding._async_client = AsyncClient(
                 host=endpoint,
+                timeout=config.llm_request_timeout,
                 transport=PinnedHostAsyncHTTPTransport(
                     allow_internal=config.llm_allow_internal_endpoints,
                 ),
diff --git a/src/paperless_ai/exceptions.py b/src/paperless_ai/exceptions.py
new file mode 100644
index 000000000..48ac1db5d
--- /dev/null
+++ b/src/paperless_ai/exceptions.py
@@ -0,0 +1,2 @@
+class LLMTimeoutError(Exception):
+    pass
diff --git a/src/paperless_ai/tests/test_client.py b/src/paperless_ai/tests/test_client.py
index be660274f..b51c22acb 100644
--- a/src/paperless_ai/tests/test_client.py
+++ b/src/paperless_ai/tests/test_client.py
@@ -3,12 +3,14 @@ from unittest.mock import ANY
 from unittest.mock import MagicMock
 from unittest.mock import patch
 
+import httpx
+import openai
 import pytest
-from llama_index.core.llms import ChatMessage
 from llama_index.core.llms.llm import ToolSelection
 
 from paperless_ai.client import LLM_SYSTEM_PROMPT
 from paperless_ai.client import AIClient
+from paperless_ai.exceptions import LLMTimeoutError
 
 
 @pytest.fixture
@@ -17,6 +19,7 @@ def mock_ai_config():
         mock_config = MagicMock()
         mock_config.llm_allow_internal_endpoints = True
         mock_config.llm_context_size = 8192
+        mock_config.llm_request_timeout = 120
         MockAIConfig.return_value = mock_config
         yield mock_config
 
@@ -64,6 +67,7 @@ def test_get_llm_openai(mock_ai_config, mock_openai_llm):
         model="test_model",
         api_base="http://test-url",
         api_key="test_api_key",
+        timeout=120,
         is_chat_model=True,
         is_function_calling_model=True,
         system_prompt=LLM_SYSTEM_PROMPT,
@@ -151,17 +155,38 @@ def test_run_llm_query_openai_uses_tools(mock_ai_config, mock_openai_llm):
     mock_llm_instance.chat_with_tools.assert_called_once()
 
 
-def test_run_chat(mock_ai_config, mock_ollama_llm):
+def test_run_llm_query_openai_timeout_raises_local_error(
+    mock_ai_config,
+    mock_openai_llm,
+):
+    mock_ai_config.llm_backend = "openai-like"
+    mock_ai_config.llm_model = "test_model"
+    mock_ai_config.llm_api_key = "test_api_key"
+    mock_ai_config.llm_endpoint = "http://test-url"
+
+    request = httpx.Request("POST", "http://test-url/v1/chat/completions")
+    mock_openai_llm.return_value.chat_with_tools.side_effect = openai.APITimeoutError(
+        request,
+    )
+
+    client = AIClient()
+
+    with pytest.raises(LLMTimeoutError):
+        client.run_llm_query("test_prompt")
+
+
+def test_run_llm_query_httpx_timeout_raises_local_error(
+    mock_ai_config,
+    mock_ollama_llm,
+):
     mock_ai_config.llm_backend = "ollama"
     mock_ai_config.llm_model = "test_model"
     mock_ai_config.llm_endpoint = "http://test-url"
 
     mock_llm_instance = mock_ollama_llm.return_value
-    mock_llm_instance.chat.return_value = "test_chat_result"
+    mock_llm_instance.chat.side_effect = httpx.ReadTimeout("timed out")
 
     client = AIClient()
-    messages = [ChatMessage(role="user", content="Hello")]
-    result = client.run_chat(messages)
 
-    mock_llm_instance.chat.assert_called_once_with(messages)
-    assert result == "test_chat_result"
+    with pytest.raises(LLMTimeoutError):
+        client.run_llm_query("test_prompt")
diff --git a/src/paperless_ai/tests/test_embedding.py b/src/paperless_ai/tests/test_embedding.py
index 883b5172f..fa84d1acc 100644
--- a/src/paperless_ai/tests/test_embedding.py
+++ b/src/paperless_ai/tests/test_embedding.py
@@ -19,6 +19,7 @@ def mock_ai_config():
         MockAIConfig.return_value.llm_embedding_endpoint = None
         MockAIConfig.return_value.llm_allow_internal_endpoints = True
         MockAIConfig.return_value.llm_context_size = 8192
+        MockAIConfig.return_value.llm_request_timeout = 120
         yield MockAIConfig
 
 
@@ -71,6 +72,7 @@ def test_get_embedding_model_openai(mock_ai_config):
             model_name="text-embedding-3-small",
             api_key="test_api_key",
             api_base="http://test-url",
+            timeout=120,
             http_client=ANY,
             async_http_client=ANY,
         )
@@ -92,6 +94,7 @@ def test_get_embedding_model_openai_prefers_embedding_endpoint(mock_ai_config):
             model_name="text-embedding-3-small",
             api_key="test_api_key",
             api_base="http://embedding-url",
+            timeout=120,
             http_client=ANY,
             async_http_client=ANY,
         )