From 03c71c604fb03582fd1b02791f8324b1b9b05bf7 Mon Sep 17 00:00:00 2001
From: shamoon <4887959+shamoon@users.noreply.github.com>
Date: Thu, 7 Nov 2024 17:31:28 -0800
Subject: [PATCH] Retry action, basic frontend, cleanup handler
---
.../admin/tasks/tasks.component.html | 5 ++
.../admin/tasks/tasks.component.spec.ts | 18 ++++-
.../components/admin/tasks/tasks.component.ts | 14 +++-
src-ui/src/app/services/tasks.service.spec.ts | 29 ++++++++
src-ui/src/app/services/tasks.service.ts | 14 ++++
src/documents/signals/handlers.py | 13 ++++
src/documents/tasks.py | 10 +--
src/documents/tests/test_tasks.py | 71 +++++++++++++++++++
src/documents/views.py | 13 ++++
9 files changed, 181 insertions(+), 6 deletions(-)
diff --git a/src-ui/src/app/components/admin/tasks/tasks.component.html b/src-ui/src/app/components/admin/tasks/tasks.component.html
index 418dfa8fa..d20d9045e 100644
--- a/src-ui/src/app/components/admin/tasks/tasks.component.html
+++ b/src-ui/src/app/components/admin/tasks/tasks.component.html
@@ -112,6 +112,11 @@
+ @if (task.status === PaperlessTaskStatus.Failed) {
+
+ }
diff --git a/src-ui/src/app/components/admin/tasks/tasks.component.spec.ts b/src-ui/src/app/components/admin/tasks/tasks.component.spec.ts
index 1a085150e..6846e0f7c 100644
--- a/src-ui/src/app/components/admin/tasks/tasks.component.spec.ts
+++ b/src-ui/src/app/components/admin/tasks/tasks.component.spec.ts
@@ -16,7 +16,7 @@ import {
NgbNavItem,
} from '@ng-bootstrap/ng-bootstrap'
import { allIcons, NgxBootstrapIconsModule } from 'ngx-bootstrap-icons'
-import { throwError } from 'rxjs'
+import { of, throwError } from 'rxjs'
import { routes } from 'src/app/app-routing.module'
import {
PaperlessTask,
@@ -389,4 +389,20 @@ describe('TasksComponent', () => {
expect(component.filterText).toEqual('')
expect(component.filterTargetID).toEqual(0)
})
+
+ it('should retry a task, show toast on error or success', () => {
+ const retrySpy = jest.spyOn(tasksService, 'retryTask')
+ const toastInfoSpy = jest.spyOn(toastService, 'showInfo')
+ const toastErrorSpy = jest.spyOn(toastService, 'showError')
+ retrySpy.mockReturnValueOnce(of({ task_id: '123' }))
+ component.retryTask(tasks[0])
+ expect(retrySpy).toHaveBeenCalledWith(tasks[0])
+ expect(toastInfoSpy).toHaveBeenCalledWith('Retrying task...')
+ retrySpy.mockReturnValueOnce(throwError(() => new Error('test')))
+ component.retryTask(tasks[0])
+ expect(toastErrorSpy).toHaveBeenCalledWith(
+ 'Failed to retry task',
+ new Error('test')
+ )
+ })
})
diff --git a/src-ui/src/app/components/admin/tasks/tasks.component.ts b/src-ui/src/app/components/admin/tasks/tasks.component.ts
index 6f144c58c..a3fe50447 100644
--- a/src-ui/src/app/components/admin/tasks/tasks.component.ts
+++ b/src-ui/src/app/components/admin/tasks/tasks.component.ts
@@ -20,7 +20,7 @@ import {
takeUntil,
timer,
} from 'rxjs'
-import { PaperlessTask } from 'src/app/data/paperless-task'
+import { PaperlessTask, PaperlessTaskStatus } from 'src/app/data/paperless-task'
import { IfPermissionsDirective } from 'src/app/directives/if-permissions.directive'
import { CustomDatePipe } from 'src/app/pipes/custom-date.pipe'
import { TasksService } from 'src/app/services/tasks.service'
@@ -75,6 +75,7 @@ export class TasksComponent
private readonly router = inject(Router)
private readonly toastService = inject(ToastService)
+ public PaperlessTaskStatus = PaperlessTaskStatus
public activeTab: TaskTab
public selectedTasks: Set = new Set()
public togggleAll: boolean = false
@@ -178,6 +179,17 @@ export class TasksComponent
this.router.navigate(['documents', task.related_document])
}
+ retryTask(task: PaperlessTask) {
+ this.tasksService.retryTask(task).subscribe({
+ next: () => {
+ this.toastService.showInfo($localize`Retrying task...`)
+ },
+ error: (e) => {
+ this.toastService.showError($localize`Failed to retry task`, e)
+ },
+ })
+ }
+
expandTask(task: PaperlessTask) {
this.expandedTask = this.expandedTask == task.id ? undefined : task.id
}
diff --git a/src-ui/src/app/services/tasks.service.spec.ts b/src-ui/src/app/services/tasks.service.spec.ts
index 640f84587..ae066cc7d 100644
--- a/src-ui/src/app/services/tasks.service.spec.ts
+++ b/src-ui/src/app/services/tasks.service.spec.ts
@@ -147,4 +147,33 @@ describe('TasksService', () => {
result: 'success',
})
})
+
+ it('should call retry task api endpoint', () => {
+ const task = {
+ id: 1,
+ type: PaperlessTaskType.File,
+ status: PaperlessTaskStatus.Failed,
+ acknowledged: false,
+ task_id: '1234',
+ task_file_name: 'file1.pdf',
+ date_created: new Date(),
+ }
+
+ tasksService.retryTask(task).subscribe()
+ const reloadSpy = jest.spyOn(tasksService, 'reload')
+ const req = httpTestingController.expectOne(
+ `${environment.apiBaseUrl}tasks/${task.id}/retry/`
+ )
+ expect(req.request.method).toEqual('POST')
+ expect(req.request.body).toEqual({
+ task_id: task.id,
+ })
+ req.flush({ task_id: 12345 })
+ expect(reloadSpy).toHaveBeenCalled()
+ httpTestingController
+ .expectOne(
+ `${environment.apiBaseUrl}tasks/?task_name=consume_file&acknowledged=false`
+ )
+ .flush([])
+ })
})
diff --git a/src-ui/src/app/services/tasks.service.ts b/src-ui/src/app/services/tasks.service.ts
index 305258d7b..bdcd00d1c 100644
--- a/src-ui/src/app/services/tasks.service.ts
+++ b/src-ui/src/app/services/tasks.service.ts
@@ -81,6 +81,20 @@ export class TasksService {
)
}
+ public retryTask(task: PaperlessTask): Observable {
+ return this.http
+ .post(`${this.baseUrl}tasks/${task.id}/retry/`, {
+ task_id: task.id,
+ })
+ .pipe(
+ takeUntil(this.unsubscribeNotifer),
+ first(),
+ tap(() => {
+ this.reload()
+ })
+ )
+ }
+
public cancelPending(): void {
this.unsubscribeNotifer.next(true)
}
diff --git a/src/documents/signals/handlers.py b/src/documents/signals/handlers.py
index 97e4adaf3..48782b2d8 100644
--- a/src/documents/signals/handlers.py
+++ b/src/documents/signals/handlers.py
@@ -631,6 +631,19 @@ def update_filename_and_move_files(
)
+@receiver(models.signals.post_save, sender=PaperlessTask)
+def cleanup_failed_documents(sender, instance: PaperlessTask, **kwargs):
+ if instance.status != states.FAILURE or not instance.acknowledged:
+ return
+
+ if instance.task_file_name:
+ try:
+ Path(settings.CONSUMPTION_FAILED_DIR / instance.task_file_name).unlink()
+ logger.debug(f"Cleaned up failed file {instance.task_file_name}")
+ except FileNotFoundError:
+ logger.warning(f"Failed to clean up failed file {instance.task_file_name}")
+
+
@shared_task
def process_cf_select_update(custom_field: CustomField) -> None:
"""
diff --git a/src/documents/tasks.py b/src/documents/tasks.py
index e1942898f..5547028aa 100644
--- a/src/documents/tasks.py
+++ b/src/documents/tasks.py
@@ -247,8 +247,8 @@ def retry_failed_file(task_id: str, clean: bool = False, skip_ocr: bool = False)
if task:
failed_file = settings.CONSUMPTION_FAILED_DIR / task.task_file_name
if not failed_file.exists():
- logger.error(f"Failed file {failed_file} not found")
- return
+ logger.error(f"File {failed_file} not found")
+ raise FileNotFoundError(f"File {failed_file} not found")
working_copy = settings.SCRATCH_DIR / failed_file.name
copy_file_with_basic_stats(failed_file, working_copy)
@@ -271,15 +271,17 @@ def retry_failed_file(task_id: str, clean: bool = False, skip_ocr: bool = False)
logger.debug("PDF cleaned successfully")
except Exception as e:
logger.error(f"Error while cleaning PDF: {e}")
- return
+ raise e
- consume_file(
+ task = consume_file.delay(
ConsumableDocument(
source=DocumentSource.ConsumeFolder,
original_file=working_copy,
),
)
+ return task.id
+
@shared_task
def sanity_check(*, scheduled=True, raise_on_error=True):
diff --git a/src/documents/tests/test_tasks.py b/src/documents/tests/test_tasks.py
index 37f1e6fed..c0c238fef 100644
--- a/src/documents/tests/test_tasks.py
+++ b/src/documents/tests/test_tasks.py
@@ -1,4 +1,5 @@
import shutil
+import uuid
from datetime import timedelta
from pathlib import Path
from unittest import mock
@@ -11,6 +12,8 @@ from django.test import override_settings
from django.utils import timezone
from documents import tasks
+from documents.data_models import ConsumableDocument
+from documents.data_models import DocumentSource
from documents.models import Correspondent
from documents.models import Document
from documents.models import DocumentType
@@ -18,9 +21,13 @@ from documents.models import PaperlessTask
from documents.models import Tag
from documents.sanity_checker import SanityCheckFailedException
from documents.sanity_checker import SanityCheckMessages
+from documents.signals.handlers import before_task_publish_handler
+from documents.signals.handlers import task_failure_handler
from documents.tests.test_classifier import dummy_preprocess
from documents.tests.utils import DirectoriesMixin
+from documents.tests.utils import DummyProgressManager
from documents.tests.utils import FileSystemAssertsMixin
+from documents.tests.utils import SampleDirMixin
class TestIndexReindex(DirectoriesMixin, TestCase):
@@ -232,6 +239,70 @@ class TestEmptyTrashTask(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
self.assertEqual(Document.global_objects.count(), 0)
+class TestRetryConsumeTask(
+ DirectoriesMixin,
+ SampleDirMixin,
+ FileSystemAssertsMixin,
+ TestCase,
+):
+ def do_failed_task(self, test_file: Path) -> PaperlessTask:
+ temp_copy = self.dirs.scratch_dir / test_file.name
+ shutil.copy(test_file, temp_copy)
+
+ headers = {
+ "id": str(uuid.uuid4()),
+ "task": "documents.tasks.consume_file",
+ }
+ body = (
+ # args
+ (
+ ConsumableDocument(
+ source=DocumentSource.ConsumeFolder,
+ original_file=str(temp_copy),
+ ),
+ None,
+ ),
+ # kwargs
+ {},
+ # celery metadata
+ {"callbacks": None, "errbacks": None, "chain": None, "chord": None},
+ )
+ before_task_publish_handler(headers=headers, body=body)
+
+ with mock.patch("documents.tasks.ProgressManager", DummyProgressManager):
+ with self.assertRaises(Exception):
+ tasks.consume_file(
+ ConsumableDocument(
+ source=DocumentSource.ConsumeFolder,
+ original_file=temp_copy,
+ ),
+ )
+
+ task_failure_handler(
+ task_id=headers["id"],
+ exception="Example failure",
+ )
+
+ task = PaperlessTask.objects.first()
+ self.assertIsFile(settings.CONSUMPTION_FAILED_DIR / task.task_file_name)
+ return task
+
+ @mock.patch("documents.tasks.consume_file.delay")
+ @mock.patch("documents.tasks.run_subprocess")
+ def test_retry_consume_clean(self, m_subprocess, m_consume_file) -> None:
+ task = self.do_failed_task(self.SAMPLE_DIR / "corrupted.pdf")
+ m_subprocess.return_value.returncode = 0
+ task_id = tasks.retry_failed_file(task_id=task.task_id, clean=True)
+ self.assertIsNotNone(task_id)
+ m_consume_file.assert_called_once()
+
+ def test_cleanup(self) -> None:
+ task = self.do_failed_task(self.SAMPLE_DIR / "corrupted.pdf")
+ task.acknowledged = True
+ task.save()
+ self.assertIsNotFile(settings.CONSUMPTION_FAILED_DIR / task.task_file_name)
+
+
class TestUpdateContent(DirectoriesMixin, TestCase):
def test_update_content_maybe_archive_file(self) -> None:
"""
diff --git a/src/documents/views.py b/src/documents/views.py
index 6f9caa987..9c1743b61 100644
--- a/src/documents/views.py
+++ b/src/documents/views.py
@@ -211,6 +211,7 @@ from documents.tasks import consume_file
from documents.tasks import empty_trash
from documents.tasks import index_optimize
from documents.tasks import llmindex_index
+from documents.tasks import retry_failed_file
from documents.tasks import sanity_check
from documents.tasks import train_classifier
from documents.tasks import update_document_parent_tags
@@ -3467,6 +3468,18 @@ class TasksViewSet(ReadOnlyModelViewSet):
queryset = PaperlessTask.objects.filter(task_id=task_id)
return queryset
+ @action(methods=["post"], detail=True)
+ def retry(self, request, pk=None):
+ task = self.get_object()
+ try:
+ new_task_id = retry_failed_file(task.task_id, True)
+ return Response({"task_id": new_task_id})
+ except Exception as e:
+ logger.warning(f"An error occurred retrying task: {e!s}")
+ return HttpResponseBadRequest(
+ "Error retrying task, check logs for more detail.",
+ )
+
@action(
methods=["post"],
detail=False,
|