From 03c71c604fb03582fd1b02791f8324b1b9b05bf7 Mon Sep 17 00:00:00 2001 From: shamoon <4887959+shamoon@users.noreply.github.com> Date: Thu, 7 Nov 2024 17:31:28 -0800 Subject: [PATCH] Retry action, basic frontend, cleanup handler --- .../admin/tasks/tasks.component.html | 5 ++ .../admin/tasks/tasks.component.spec.ts | 18 ++++- .../components/admin/tasks/tasks.component.ts | 14 +++- src-ui/src/app/services/tasks.service.spec.ts | 29 ++++++++ src-ui/src/app/services/tasks.service.ts | 14 ++++ src/documents/signals/handlers.py | 13 ++++ src/documents/tasks.py | 10 +-- src/documents/tests/test_tasks.py | 71 +++++++++++++++++++ src/documents/views.py | 13 ++++ 9 files changed, 181 insertions(+), 6 deletions(-) diff --git a/src-ui/src/app/components/admin/tasks/tasks.component.html b/src-ui/src/app/components/admin/tasks/tasks.component.html index 418dfa8fa..d20d9045e 100644 --- a/src-ui/src/app/components/admin/tasks/tasks.component.html +++ b/src-ui/src/app/components/admin/tasks/tasks.component.html @@ -112,6 +112,11 @@
+ @if (task.status === PaperlessTaskStatus.Failed) { + + } diff --git a/src-ui/src/app/components/admin/tasks/tasks.component.spec.ts b/src-ui/src/app/components/admin/tasks/tasks.component.spec.ts index 1a085150e..6846e0f7c 100644 --- a/src-ui/src/app/components/admin/tasks/tasks.component.spec.ts +++ b/src-ui/src/app/components/admin/tasks/tasks.component.spec.ts @@ -16,7 +16,7 @@ import { NgbNavItem, } from '@ng-bootstrap/ng-bootstrap' import { allIcons, NgxBootstrapIconsModule } from 'ngx-bootstrap-icons' -import { throwError } from 'rxjs' +import { of, throwError } from 'rxjs' import { routes } from 'src/app/app-routing.module' import { PaperlessTask, @@ -389,4 +389,20 @@ describe('TasksComponent', () => { expect(component.filterText).toEqual('') expect(component.filterTargetID).toEqual(0) }) + + it('should retry a task, show toast on error or success', () => { + const retrySpy = jest.spyOn(tasksService, 'retryTask') + const toastInfoSpy = jest.spyOn(toastService, 'showInfo') + const toastErrorSpy = jest.spyOn(toastService, 'showError') + retrySpy.mockReturnValueOnce(of({ task_id: '123' })) + component.retryTask(tasks[0]) + expect(retrySpy).toHaveBeenCalledWith(tasks[0]) + expect(toastInfoSpy).toHaveBeenCalledWith('Retrying task...') + retrySpy.mockReturnValueOnce(throwError(() => new Error('test'))) + component.retryTask(tasks[0]) + expect(toastErrorSpy).toHaveBeenCalledWith( + 'Failed to retry task', + new Error('test') + ) + }) }) diff --git a/src-ui/src/app/components/admin/tasks/tasks.component.ts b/src-ui/src/app/components/admin/tasks/tasks.component.ts index 6f144c58c..a3fe50447 100644 --- a/src-ui/src/app/components/admin/tasks/tasks.component.ts +++ b/src-ui/src/app/components/admin/tasks/tasks.component.ts @@ -20,7 +20,7 @@ import { takeUntil, timer, } from 'rxjs' -import { PaperlessTask } from 'src/app/data/paperless-task' +import { PaperlessTask, PaperlessTaskStatus } from 'src/app/data/paperless-task' import { IfPermissionsDirective } from 'src/app/directives/if-permissions.directive' import { CustomDatePipe } from 'src/app/pipes/custom-date.pipe' import { TasksService } from 'src/app/services/tasks.service' @@ -75,6 +75,7 @@ export class TasksComponent private readonly router = inject(Router) private readonly toastService = inject(ToastService) + public PaperlessTaskStatus = PaperlessTaskStatus public activeTab: TaskTab public selectedTasks: Set = new Set() public togggleAll: boolean = false @@ -178,6 +179,17 @@ export class TasksComponent this.router.navigate(['documents', task.related_document]) } + retryTask(task: PaperlessTask) { + this.tasksService.retryTask(task).subscribe({ + next: () => { + this.toastService.showInfo($localize`Retrying task...`) + }, + error: (e) => { + this.toastService.showError($localize`Failed to retry task`, e) + }, + }) + } + expandTask(task: PaperlessTask) { this.expandedTask = this.expandedTask == task.id ? undefined : task.id } diff --git a/src-ui/src/app/services/tasks.service.spec.ts b/src-ui/src/app/services/tasks.service.spec.ts index 640f84587..ae066cc7d 100644 --- a/src-ui/src/app/services/tasks.service.spec.ts +++ b/src-ui/src/app/services/tasks.service.spec.ts @@ -147,4 +147,33 @@ describe('TasksService', () => { result: 'success', }) }) + + it('should call retry task api endpoint', () => { + const task = { + id: 1, + type: PaperlessTaskType.File, + status: PaperlessTaskStatus.Failed, + acknowledged: false, + task_id: '1234', + task_file_name: 'file1.pdf', + date_created: new Date(), + } + + tasksService.retryTask(task).subscribe() + const reloadSpy = jest.spyOn(tasksService, 'reload') + const req = httpTestingController.expectOne( + `${environment.apiBaseUrl}tasks/${task.id}/retry/` + ) + expect(req.request.method).toEqual('POST') + expect(req.request.body).toEqual({ + task_id: task.id, + }) + req.flush({ task_id: 12345 }) + expect(reloadSpy).toHaveBeenCalled() + httpTestingController + .expectOne( + `${environment.apiBaseUrl}tasks/?task_name=consume_file&acknowledged=false` + ) + .flush([]) + }) }) diff --git a/src-ui/src/app/services/tasks.service.ts b/src-ui/src/app/services/tasks.service.ts index 305258d7b..bdcd00d1c 100644 --- a/src-ui/src/app/services/tasks.service.ts +++ b/src-ui/src/app/services/tasks.service.ts @@ -81,6 +81,20 @@ export class TasksService { ) } + public retryTask(task: PaperlessTask): Observable { + return this.http + .post(`${this.baseUrl}tasks/${task.id}/retry/`, { + task_id: task.id, + }) + .pipe( + takeUntil(this.unsubscribeNotifer), + first(), + tap(() => { + this.reload() + }) + ) + } + public cancelPending(): void { this.unsubscribeNotifer.next(true) } diff --git a/src/documents/signals/handlers.py b/src/documents/signals/handlers.py index 97e4adaf3..48782b2d8 100644 --- a/src/documents/signals/handlers.py +++ b/src/documents/signals/handlers.py @@ -631,6 +631,19 @@ def update_filename_and_move_files( ) +@receiver(models.signals.post_save, sender=PaperlessTask) +def cleanup_failed_documents(sender, instance: PaperlessTask, **kwargs): + if instance.status != states.FAILURE or not instance.acknowledged: + return + + if instance.task_file_name: + try: + Path(settings.CONSUMPTION_FAILED_DIR / instance.task_file_name).unlink() + logger.debug(f"Cleaned up failed file {instance.task_file_name}") + except FileNotFoundError: + logger.warning(f"Failed to clean up failed file {instance.task_file_name}") + + @shared_task def process_cf_select_update(custom_field: CustomField) -> None: """ diff --git a/src/documents/tasks.py b/src/documents/tasks.py index e1942898f..5547028aa 100644 --- a/src/documents/tasks.py +++ b/src/documents/tasks.py @@ -247,8 +247,8 @@ def retry_failed_file(task_id: str, clean: bool = False, skip_ocr: bool = False) if task: failed_file = settings.CONSUMPTION_FAILED_DIR / task.task_file_name if not failed_file.exists(): - logger.error(f"Failed file {failed_file} not found") - return + logger.error(f"File {failed_file} not found") + raise FileNotFoundError(f"File {failed_file} not found") working_copy = settings.SCRATCH_DIR / failed_file.name copy_file_with_basic_stats(failed_file, working_copy) @@ -271,15 +271,17 @@ def retry_failed_file(task_id: str, clean: bool = False, skip_ocr: bool = False) logger.debug("PDF cleaned successfully") except Exception as e: logger.error(f"Error while cleaning PDF: {e}") - return + raise e - consume_file( + task = consume_file.delay( ConsumableDocument( source=DocumentSource.ConsumeFolder, original_file=working_copy, ), ) + return task.id + @shared_task def sanity_check(*, scheduled=True, raise_on_error=True): diff --git a/src/documents/tests/test_tasks.py b/src/documents/tests/test_tasks.py index 37f1e6fed..c0c238fef 100644 --- a/src/documents/tests/test_tasks.py +++ b/src/documents/tests/test_tasks.py @@ -1,4 +1,5 @@ import shutil +import uuid from datetime import timedelta from pathlib import Path from unittest import mock @@ -11,6 +12,8 @@ from django.test import override_settings from django.utils import timezone from documents import tasks +from documents.data_models import ConsumableDocument +from documents.data_models import DocumentSource from documents.models import Correspondent from documents.models import Document from documents.models import DocumentType @@ -18,9 +21,13 @@ from documents.models import PaperlessTask from documents.models import Tag from documents.sanity_checker import SanityCheckFailedException from documents.sanity_checker import SanityCheckMessages +from documents.signals.handlers import before_task_publish_handler +from documents.signals.handlers import task_failure_handler from documents.tests.test_classifier import dummy_preprocess from documents.tests.utils import DirectoriesMixin +from documents.tests.utils import DummyProgressManager from documents.tests.utils import FileSystemAssertsMixin +from documents.tests.utils import SampleDirMixin class TestIndexReindex(DirectoriesMixin, TestCase): @@ -232,6 +239,70 @@ class TestEmptyTrashTask(DirectoriesMixin, FileSystemAssertsMixin, TestCase): self.assertEqual(Document.global_objects.count(), 0) +class TestRetryConsumeTask( + DirectoriesMixin, + SampleDirMixin, + FileSystemAssertsMixin, + TestCase, +): + def do_failed_task(self, test_file: Path) -> PaperlessTask: + temp_copy = self.dirs.scratch_dir / test_file.name + shutil.copy(test_file, temp_copy) + + headers = { + "id": str(uuid.uuid4()), + "task": "documents.tasks.consume_file", + } + body = ( + # args + ( + ConsumableDocument( + source=DocumentSource.ConsumeFolder, + original_file=str(temp_copy), + ), + None, + ), + # kwargs + {}, + # celery metadata + {"callbacks": None, "errbacks": None, "chain": None, "chord": None}, + ) + before_task_publish_handler(headers=headers, body=body) + + with mock.patch("documents.tasks.ProgressManager", DummyProgressManager): + with self.assertRaises(Exception): + tasks.consume_file( + ConsumableDocument( + source=DocumentSource.ConsumeFolder, + original_file=temp_copy, + ), + ) + + task_failure_handler( + task_id=headers["id"], + exception="Example failure", + ) + + task = PaperlessTask.objects.first() + self.assertIsFile(settings.CONSUMPTION_FAILED_DIR / task.task_file_name) + return task + + @mock.patch("documents.tasks.consume_file.delay") + @mock.patch("documents.tasks.run_subprocess") + def test_retry_consume_clean(self, m_subprocess, m_consume_file) -> None: + task = self.do_failed_task(self.SAMPLE_DIR / "corrupted.pdf") + m_subprocess.return_value.returncode = 0 + task_id = tasks.retry_failed_file(task_id=task.task_id, clean=True) + self.assertIsNotNone(task_id) + m_consume_file.assert_called_once() + + def test_cleanup(self) -> None: + task = self.do_failed_task(self.SAMPLE_DIR / "corrupted.pdf") + task.acknowledged = True + task.save() + self.assertIsNotFile(settings.CONSUMPTION_FAILED_DIR / task.task_file_name) + + class TestUpdateContent(DirectoriesMixin, TestCase): def test_update_content_maybe_archive_file(self) -> None: """ diff --git a/src/documents/views.py b/src/documents/views.py index 6f9caa987..9c1743b61 100644 --- a/src/documents/views.py +++ b/src/documents/views.py @@ -211,6 +211,7 @@ from documents.tasks import consume_file from documents.tasks import empty_trash from documents.tasks import index_optimize from documents.tasks import llmindex_index +from documents.tasks import retry_failed_file from documents.tasks import sanity_check from documents.tasks import train_classifier from documents.tasks import update_document_parent_tags @@ -3467,6 +3468,18 @@ class TasksViewSet(ReadOnlyModelViewSet): queryset = PaperlessTask.objects.filter(task_id=task_id) return queryset + @action(methods=["post"], detail=True) + def retry(self, request, pk=None): + task = self.get_object() + try: + new_task_id = retry_failed_file(task.task_id, True) + return Response({"task_id": new_task_id}) + except Exception as e: + logger.warning(f"An error occurred retrying task: {e!s}") + return HttpResponseBadRequest( + "Error retrying task, check logs for more detail.", + ) + @action( methods=["post"], detail=False,