From 75f0c4c92e487101583e1c39b30aa430c4040c7d Mon Sep 17 00:00:00 2001 From: shamoon <4887959+shamoon@users.noreply.github.com> Date: Mon, 15 Jun 2026 15:05:43 -0700 Subject: [PATCH] Fix (beta): retry celery ping and report warning on no response (#13012) --- .../system-status-dialog.component.html | 4 +- src/documents/tests/test_api_status.py | 71 +++++++++++++++++++ src/documents/views.py | 29 ++++++-- 3 files changed, 98 insertions(+), 6 deletions(-) diff --git a/src-ui/src/app/components/common/system-status-dialog/system-status-dialog.component.html b/src-ui/src/app/components/common/system-status-dialog/system-status-dialog.component.html index d9194fd2c..5422c875e 100644 --- a/src-ui/src/app/components/common/system-status-dialog/system-status-dialog.component.html +++ b/src-ui/src/app/components/common/system-status-dialog/system-status-dialog.component.html @@ -131,7 +131,9 @@ @if (status.tasks.celery_status === 'OK') { } @else { - + } diff --git a/src/documents/tests/test_api_status.py b/src/documents/tests/test_api_status.py index bfe6cc9ee..ca6613573 100644 --- a/src/documents/tests/test_api_status.py +++ b/src/documents/tests/test_api_status.py @@ -216,6 +216,77 @@ class TestSystemStatus(APITestCase): self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertEqual(response.data["tasks"]["celery_status"], "OK") + @mock.patch("celery.app.control.Inspect.ping") + def test_system_status_celery_ping_none(self, mock_ping) -> None: + """ + GIVEN: + - Celery ping returns no worker responses + WHEN: + - The user requests the system status + THEN: + - The response contains a warning celery status + """ + mock_ping.return_value = None + self.client.force_login(self.user) + response = self.client.get(self.ENDPOINT) + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertEqual(response.data["tasks"]["celery_status"], "WARNING") + self.assertEqual( + response.data["tasks"]["celery_error"], + "No celery workers responded to ping. This may be temporary.", + ) + + @mock.patch("celery.app.control.Inspect.ping") + def test_system_status_celery_ping_unexpected_responses(self, mock_ping) -> None: + """ + GIVEN: + - Celery ping returns an unexpected worker response + WHEN: + - The user requests the system status + THEN: + - The response contains a warning celery status + """ + self.client.force_login(self.user) + for ping_response in ( + {"hostname": {"ok": "not-pong"}}, + {"hostname": {}}, + {"hostname": "pong"}, + ): + with self.subTest(ping_response=ping_response): + mock_ping.return_value = ping_response + response = self.client.get(self.ENDPOINT) + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertEqual(response.data["tasks"]["celery_status"], "WARNING") + self.assertEqual(response.data["tasks"]["celery_url"], "hostname") + self.assertEqual( + response.data["tasks"]["celery_error"], + "Celery worker responded unexpectedly.", + ) + + @mock.patch("documents.views.sleep") + @mock.patch("celery.app.control.Inspect.ping") + def test_system_status_celery_ping_retry_success( + self, + mock_ping, + mock_sleep, + ) -> None: + """ + GIVEN: + - Celery ping fails once but succeeds on retry + WHEN: + - The user requests the system status + THEN: + - The response contains an OK celery status + """ + mock_ping.side_effect = [None, {"hostname": {"ok": "pong"}}] + self.client.force_login(self.user) + response = self.client.get(self.ENDPOINT) + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertEqual(response.data["tasks"]["celery_status"], "OK") + self.assertIsNone(response.data["tasks"]["celery_error"]) + self.assertEqual(mock_ping.call_count, 2) + mock_sleep.assert_called_once_with(0.25) + @mock.patch("documents.search.get_backend") def test_system_status_index_ok(self, mock_get_backend) -> None: """ diff --git a/src/documents/views.py b/src/documents/views.py index 5ed6fdaf5..8979113f7 100644 --- a/src/documents/views.py +++ b/src/documents/views.py @@ -12,6 +12,7 @@ from datetime import timedelta from http import HTTPStatus from pathlib import Path from time import mktime +from time import sleep from typing import TYPE_CHECKING from typing import Any from typing import Literal @@ -4990,11 +4991,29 @@ class SystemStatusView(PassUserMixin): celery_error = None celery_url = None try: - celery_ping = celery_app.control.inspect().ping() - celery_url = next(iter(celery_ping.keys())) - first_worker_ping = celery_ping[celery_url] - if first_worker_ping["ok"] == "pong": - celery_active = "OK" + celery_ping = None + for ping_attempt in range(3): + celery_ping = celery_app.control.inspect().ping() + if celery_ping: + break + if ping_attempt < 2: + sleep(0.25) + + if not celery_ping: + celery_active = "WARNING" + celery_error = ( + "No celery workers responded to ping. This may be temporary." + ) + else: + celery_url, first_worker_ping = next(iter(celery_ping.items())) + if ( + isinstance(first_worker_ping, dict) + and first_worker_ping.get("ok") == "pong" + ): + celery_active = "OK" + else: + celery_active = "WARNING" + celery_error = "Celery worker responded unexpectedly." except Exception as e: celery_active = "ERROR" logger.exception(