From 4394403bebfa18f5061160f2bb0210ce092ffe65 Mon Sep 17 00:00:00 2001
From: Trenton H <797416+stumpylog@users.noreply.github.com>
Date: Thu, 11 Jun 2026 13:07:31 -0700
Subject: [PATCH] Fix: release pooled DB connection during AI LLM/embedding
 calls (#12983)

---
 src/paperless_ai/ai_classifier.py | 36 +++++++++++++++++--------------
 src/paperless_ai/chat.py          | 21 ++++++++++++------
 src/paperless_ai/db.py            | 30 ++++++++++++++++++++++++++
 src/paperless_ai/indexing.py      |  7 +++++-
 4 files changed, 70 insertions(+), 24 deletions(-)
 create mode 100644 src/paperless_ai/db.py

diff --git a/src/paperless_ai/ai_classifier.py b/src/paperless_ai/ai_classifier.py
index 5420812eb..f9c2f1e06 100644
--- a/src/paperless_ai/ai_classifier.py
+++ b/src/paperless_ai/ai_classifier.py
@@ -8,6 +8,7 @@ from documents.models import Document
 from documents.permissions import get_objects_for_user_owner_aware
 from paperless.config import AIConfig
 from paperless_ai.client import AIClient
+from paperless_ai.db import db_connection_released
 from paperless_ai.indexing import query_similar_documents
 from paperless_ai.indexing import truncate_content
 
@@ -146,20 +147,23 @@ def get_ai_document_classification(
     )
 
     client = AIClient()
-    result = client.run_llm_query(prompt)
-    suggestions = parse_ai_response(result)
-    if output_language:
-        localized = client.run_llm_query(
-            build_localization_prompt(suggestions, output_language),
-        )
-        localized_suggestions = parse_ai_response(localized)
-        suggestions = {
-            **suggestions,
-            "title": localized_suggestions["title"] or suggestions["title"],
-            "tags": localized_suggestions["tags"] or suggestions["tags"],
-            "document_types": localized_suggestions["document_types"]
-            or suggestions["document_types"],
-            "storage_paths": localized_suggestions["storage_paths"]
-            or suggestions["storage_paths"],
-        }
+    # Hand the pooled DB connection back while the (slow) LLM query runs so it
+    # is not pinned for the call's duration; see paperless_ai.db and #12976.
+    with db_connection_released():
+        result = client.run_llm_query(prompt)
+        suggestions = parse_ai_response(result)
+        if output_language:
+            localized = client.run_llm_query(
+                build_localization_prompt(suggestions, output_language),
+            )
+            localized_suggestions = parse_ai_response(localized)
+            suggestions = {
+                **suggestions,
+                "title": localized_suggestions["title"] or suggestions["title"],
+                "tags": localized_suggestions["tags"] or suggestions["tags"],
+                "document_types": localized_suggestions["document_types"]
+                or suggestions["document_types"],
+                "storage_paths": localized_suggestions["storage_paths"]
+                or suggestions["storage_paths"],
+            }
     return suggestions
diff --git a/src/paperless_ai/chat.py b/src/paperless_ai/chat.py
index 123771c50..6102d94e4 100644
--- a/src/paperless_ai/chat.py
+++ b/src/paperless_ai/chat.py
@@ -5,6 +5,7 @@ import sys
 from documents.models import Document
 from paperless.config import AIConfig
 from paperless_ai.client import AIClient
+from paperless_ai.db import db_connection_released
 from paperless_ai.indexing import _document_id_filters
 from paperless_ai.indexing import get_rag_prompt_helper
 from paperless_ai.indexing import load_or_build_index
@@ -105,7 +106,10 @@ def _stream_chat_with_documents(query_str: str, documents: list[Document]):
         filters=filters,
     )
 
-    top_nodes = retriever.retrieve(query_str)
+    # Slow query-embedding + vector search; no Django ORM access happens during
+    # it, so release the pooled DB connection for its duration. See #12976.
+    with db_connection_released():
+        top_nodes = retriever.retrieve(query_str)
     if not top_nodes:
         logger.warning("No nodes found for the given documents.")
         yield CHAT_NO_CONTENT_MESSAGE
@@ -133,10 +137,13 @@ def _stream_chat_with_documents(query_str: str, documents: list[Document]):
     )
 
     logger.debug("Document chat query: %s", query_str)
-    response_stream = query_engine.query(query_str)
-    for chunk in response_stream.response_gen:
-        yield chunk
-        sys.stdout.flush()
+    # Release the pooled DB connection for the slow streaming LLM response so it
+    # is not pinned for the whole stream; see paperless_ai.db and #12976.
+    with db_connection_released():
+        response_stream = query_engine.query(query_str)
+        for chunk in response_stream.response_gen:
+            yield chunk
+            sys.stdout.flush()
 
-    if references:
-        yield _format_chat_metadata_trailer(references)
+        if references:
+            yield _format_chat_metadata_trailer(references)
diff --git a/src/paperless_ai/db.py b/src/paperless_ai/db.py
new file mode 100644
index 000000000..066689acf
--- /dev/null
+++ b/src/paperless_ai/db.py
@@ -0,0 +1,30 @@
+from __future__ import annotations
+
+from contextlib import contextmanager
+
+from django.db import connections
+
+
+@contextmanager
+def db_connection_released():
+    """
+    Return any checked-out DB connections to the pool for the duration of the
+    wrapped block.
+
+    The AI endpoints run inside a synchronous web request (``ai_suggestions``)
+    or a streaming response (``chat``). Django keeps the request's database
+    connection checked out for the entire request/response, so a blocking LLM
+    call - which can take many seconds - pins a pooled connection the whole
+    time. With connection pooling enabled, enough concurrent AI requests check
+    out every slot and all other requests then fail with
+    ``psycopg_pool.PoolTimeout`` (see issue #12976).
+
+    No Django ORM access happens during the LLM call, so we hand the connection
+    back to the pool first; Django transparently re-checks-out a connection on
+    the next ORM use after the block.
+    """
+    connections.close_all()
+    try:
+        yield
+    finally:
+        connections.close_all()
diff --git a/src/paperless_ai/indexing.py b/src/paperless_ai/indexing.py
index dd96106a6..5153c0baa 100644
--- a/src/paperless_ai/indexing.py
+++ b/src/paperless_ai/indexing.py
@@ -13,6 +13,7 @@ from documents.models import PaperlessTask
 from documents.utils import IterWrapper
 from documents.utils import identity
 from paperless.config import AIConfig
+from paperless_ai.db import db_connection_released
 from paperless_ai.embedding import build_llm_index_text
 from paperless_ai.embedding import get_configured_model_name
 from paperless_ai.embedding import get_embedding_model
@@ -385,7 +386,11 @@ def query_similar_documents(
         chunk_size=config.llm_embedding_chunk_size,
         context_size=config.llm_context_size,
     )
-    results = retriever.retrieve(query_text)
+    # The retrieve() call generates a query embedding (a slow external request)
+    # and searches the vector store; no Django ORM access happens during it, so
+    # release the pooled DB connection for its duration. See #12976.
+    with db_connection_released():
+        results = retriever.retrieve(query_text)
 
     retrieved_document_ids: list[int] = []
     for node in results: