From 75f0c4c92e487101583e1c39b30aa430c4040c7d Mon Sep 17 00:00:00 2001
From: shamoon <4887959+shamoon@users.noreply.github.com>
Date: Mon, 15 Jun 2026 15:05:43 -0700
Subject: [PATCH] Fix (beta): retry celery ping and report warning on no
response (#13012)
---
.../system-status-dialog.component.html | 4 +-
src/documents/tests/test_api_status.py | 71 +++++++++++++++++++
src/documents/views.py | 29 ++++++--
3 files changed, 98 insertions(+), 6 deletions(-)
diff --git a/src-ui/src/app/components/common/system-status-dialog/system-status-dialog.component.html b/src-ui/src/app/components/common/system-status-dialog/system-status-dialog.component.html
index d9194fd2c..5422c875e 100644
--- a/src-ui/src/app/components/common/system-status-dialog/system-status-dialog.component.html
+++ b/src-ui/src/app/components/common/system-status-dialog/system-status-dialog.component.html
@@ -131,7 +131,9 @@
@if (status.tasks.celery_status === 'OK') {
} @else {
-
+
}
diff --git a/src/documents/tests/test_api_status.py b/src/documents/tests/test_api_status.py
index bfe6cc9ee..ca6613573 100644
--- a/src/documents/tests/test_api_status.py
+++ b/src/documents/tests/test_api_status.py
@@ -216,6 +216,77 @@ class TestSystemStatus(APITestCase):
self.assertEqual(response.status_code, status.HTTP_200_OK)
self.assertEqual(response.data["tasks"]["celery_status"], "OK")
+ @mock.patch("celery.app.control.Inspect.ping")
+ def test_system_status_celery_ping_none(self, mock_ping) -> None:
+ """
+ GIVEN:
+ - Celery ping returns no worker responses
+ WHEN:
+ - The user requests the system status
+ THEN:
+ - The response contains a warning celery status
+ """
+ mock_ping.return_value = None
+ self.client.force_login(self.user)
+ response = self.client.get(self.ENDPOINT)
+ self.assertEqual(response.status_code, status.HTTP_200_OK)
+ self.assertEqual(response.data["tasks"]["celery_status"], "WARNING")
+ self.assertEqual(
+ response.data["tasks"]["celery_error"],
+ "No celery workers responded to ping. This may be temporary.",
+ )
+
+ @mock.patch("celery.app.control.Inspect.ping")
+ def test_system_status_celery_ping_unexpected_responses(self, mock_ping) -> None:
+ """
+ GIVEN:
+ - Celery ping returns an unexpected worker response
+ WHEN:
+ - The user requests the system status
+ THEN:
+ - The response contains a warning celery status
+ """
+ self.client.force_login(self.user)
+ for ping_response in (
+ {"hostname": {"ok": "not-pong"}},
+ {"hostname": {}},
+ {"hostname": "pong"},
+ ):
+ with self.subTest(ping_response=ping_response):
+ mock_ping.return_value = ping_response
+ response = self.client.get(self.ENDPOINT)
+ self.assertEqual(response.status_code, status.HTTP_200_OK)
+ self.assertEqual(response.data["tasks"]["celery_status"], "WARNING")
+ self.assertEqual(response.data["tasks"]["celery_url"], "hostname")
+ self.assertEqual(
+ response.data["tasks"]["celery_error"],
+ "Celery worker responded unexpectedly.",
+ )
+
+ @mock.patch("documents.views.sleep")
+ @mock.patch("celery.app.control.Inspect.ping")
+ def test_system_status_celery_ping_retry_success(
+ self,
+ mock_ping,
+ mock_sleep,
+ ) -> None:
+ """
+ GIVEN:
+ - Celery ping fails once but succeeds on retry
+ WHEN:
+ - The user requests the system status
+ THEN:
+ - The response contains an OK celery status
+ """
+ mock_ping.side_effect = [None, {"hostname": {"ok": "pong"}}]
+ self.client.force_login(self.user)
+ response = self.client.get(self.ENDPOINT)
+ self.assertEqual(response.status_code, status.HTTP_200_OK)
+ self.assertEqual(response.data["tasks"]["celery_status"], "OK")
+ self.assertIsNone(response.data["tasks"]["celery_error"])
+ self.assertEqual(mock_ping.call_count, 2)
+ mock_sleep.assert_called_once_with(0.25)
+
@mock.patch("documents.search.get_backend")
def test_system_status_index_ok(self, mock_get_backend) -> None:
"""
diff --git a/src/documents/views.py b/src/documents/views.py
index 5ed6fdaf5..8979113f7 100644
--- a/src/documents/views.py
+++ b/src/documents/views.py
@@ -12,6 +12,7 @@ from datetime import timedelta
from http import HTTPStatus
from pathlib import Path
from time import mktime
+from time import sleep
from typing import TYPE_CHECKING
from typing import Any
from typing import Literal
@@ -4990,11 +4991,29 @@ class SystemStatusView(PassUserMixin):
celery_error = None
celery_url = None
try:
- celery_ping = celery_app.control.inspect().ping()
- celery_url = next(iter(celery_ping.keys()))
- first_worker_ping = celery_ping[celery_url]
- if first_worker_ping["ok"] == "pong":
- celery_active = "OK"
+ celery_ping = None
+ for ping_attempt in range(3):
+ celery_ping = celery_app.control.inspect().ping()
+ if celery_ping:
+ break
+ if ping_attempt < 2:
+ sleep(0.25)
+
+ if not celery_ping:
+ celery_active = "WARNING"
+ celery_error = (
+ "No celery workers responded to ping. This may be temporary."
+ )
+ else:
+ celery_url, first_worker_ping = next(iter(celery_ping.items()))
+ if (
+ isinstance(first_worker_ping, dict)
+ and first_worker_ping.get("ok") == "pong"
+ ):
+ celery_active = "OK"
+ else:
+ celery_active = "WARNING"
+ celery_error = "Celery worker responded unexpectedly."
except Exception as e:
celery_active = "ERROR"
logger.exception(