Fix: release pooled DB connection during AI LLM/embedding calls (#12983)

2026-08-02 17:12:18 +00:00 · 2026-06-11 13:07:31 -07:00
parent f188d308eb
commit 4394403beb
4 changed files with 70 additions and 24 deletions
@@ -8,6 +8,7 @@ from documents.models import Document
 from documents.permissions import get_objects_for_user_owner_aware
 from paperless.config import AIConfig
 from paperless_ai.client import AIClient
+from paperless_ai.db import db_connection_released
 from paperless_ai.indexing import query_similar_documents
 from paperless_ai.indexing import truncate_content

@@ -146,20 +147,23 @@ def get_ai_document_classification(
    )

    client = AIClient()
-    result = client.run_llm_query(prompt)
-    suggestions = parse_ai_response(result)
-    if output_language:
-        localized = client.run_llm_query(
-            build_localization_prompt(suggestions, output_language),
-        )
-        localized_suggestions = parse_ai_response(localized)
-        suggestions = {
-            **suggestions,
-            "title": localized_suggestions["title"] or suggestions["title"],
-            "tags": localized_suggestions["tags"] or suggestions["tags"],
-            "document_types": localized_suggestions["document_types"]
-            or suggestions["document_types"],
-            "storage_paths": localized_suggestions["storage_paths"]
-            or suggestions["storage_paths"],
-        }
+    # Hand the pooled DB connection back while the (slow) LLM query runs so it
+    # is not pinned for the call's duration; see paperless_ai.db and #12976.
+    with db_connection_released():
+        result = client.run_llm_query(prompt)
+        suggestions = parse_ai_response(result)
+        if output_language:
+            localized = client.run_llm_query(
+                build_localization_prompt(suggestions, output_language),
+            )
+            localized_suggestions = parse_ai_response(localized)
+            suggestions = {
+                **suggestions,
+                "title": localized_suggestions["title"] or suggestions["title"],
+                "tags": localized_suggestions["tags"] or suggestions["tags"],
+                "document_types": localized_suggestions["document_types"]
+                or suggestions["document_types"],
+                "storage_paths": localized_suggestions["storage_paths"]
+                or suggestions["storage_paths"],
+            }
    return suggestions
@@ -5,6 +5,7 @@ import sys
 from documents.models import Document
 from paperless.config import AIConfig
 from paperless_ai.client import AIClient
+from paperless_ai.db import db_connection_released
 from paperless_ai.indexing import _document_id_filters
 from paperless_ai.indexing import get_rag_prompt_helper
 from paperless_ai.indexing import load_or_build_index
@@ -105,7 +106,10 @@ def _stream_chat_with_documents(query_str: str, documents: list[Document]):
        filters=filters,
    )

-    top_nodes = retriever.retrieve(query_str)
+    # Slow query-embedding + vector search; no Django ORM access happens during
+    # it, so release the pooled DB connection for its duration. See #12976.
+    with db_connection_released():
+        top_nodes = retriever.retrieve(query_str)
    if not top_nodes:
        logger.warning("No nodes found for the given documents.")
        yield CHAT_NO_CONTENT_MESSAGE
@@ -133,10 +137,13 @@ def _stream_chat_with_documents(query_str: str, documents: list[Document]):
    )

    logger.debug("Document chat query: %s", query_str)
-    response_stream = query_engine.query(query_str)
-    for chunk in response_stream.response_gen:
-        yield chunk
-        sys.stdout.flush()
+    # Release the pooled DB connection for the slow streaming LLM response so it
+    # is not pinned for the whole stream; see paperless_ai.db and #12976.
+    with db_connection_released():
+        response_stream = query_engine.query(query_str)
+        for chunk in response_stream.response_gen:
+            yield chunk
+            sys.stdout.flush()

-    if references:
-        yield _format_chat_metadata_trailer(references)
+        if references:
+            yield _format_chat_metadata_trailer(references)
@@ -0,0 +1,30 @@
+from __future__ import annotations
+
+from contextlib import contextmanager
+
+from django.db import connections
+
+
+@contextmanager
+def db_connection_released():
+    """
+    Return any checked-out DB connections to the pool for the duration of the
+    wrapped block.
+
+    The AI endpoints run inside a synchronous web request (``ai_suggestions``)
+    or a streaming response (``chat``). Django keeps the request's database
+    connection checked out for the entire request/response, so a blocking LLM
+    call - which can take many seconds - pins a pooled connection the whole
+    time. With connection pooling enabled, enough concurrent AI requests check
+    out every slot and all other requests then fail with
+    ``psycopg_pool.PoolTimeout`` (see issue #12976).
+
+    No Django ORM access happens during the LLM call, so we hand the connection
+    back to the pool first; Django transparently re-checks-out a connection on
+    the next ORM use after the block.
+    """
+    connections.close_all()
+    try:
+        yield
+    finally:
+        connections.close_all()
@@ -13,6 +13,7 @@ from documents.models import PaperlessTask
 from documents.utils import IterWrapper
 from documents.utils import identity
 from paperless.config import AIConfig
+from paperless_ai.db import db_connection_released
 from paperless_ai.embedding import build_llm_index_text
 from paperless_ai.embedding import get_configured_model_name
 from paperless_ai.embedding import get_embedding_model
@@ -385,7 +386,11 @@ def query_similar_documents(
        chunk_size=config.llm_embedding_chunk_size,
        context_size=config.llm_context_size,
    )
-    results = retriever.retrieve(query_text)
+    # The retrieve() call generates a query embedding (a slow external request)
+    # and searches the vector store; no Django ORM access happens during it, so
+    # release the pooled DB connection for its duration. See #12976.
+    with db_connection_released():
+        results = retriever.retrieve(query_text)

    retrieved_document_ids: list[int] = []
    for node in results: