Retry action, basic frontend, cleanup handler

This commit is contained in:
shamoon
2024-11-07 17:31:28 -08:00
parent a2e4977201
commit 62b470f691
10 changed files with 168 additions and 38 deletions
+14
View File
@@ -1,6 +1,7 @@
import logging
import os
import shutil
from pathlib import Path
from celery import states
from celery.signals import before_task_publish
@@ -520,6 +521,19 @@ def update_filename_and_move_files(
)
@receiver(models.signals.post_save, sender=PaperlessTask)
def cleanup_failed_documents(sender, instance: PaperlessTask, **kwargs):
if instance.status != states.FAILURE or not instance.acknowledged:
return
if instance.task_file_name:
try:
Path(settings.CONSUMPTION_FAILED_DIR / instance.task_file_name).unlink()
logger.debug(f"Cleaned up failed file {instance.task_file_name}")
except FileNotFoundError:
logger.warning(f"Failed to clean up failed file {instance.task_file_name}")
def set_log_entry(sender, document: Document, logging_group=None, **kwargs):
ct = ContentType.objects.get(model="document")
user = User.objects.get(username="consumer")
+6 -4
View File
@@ -180,8 +180,8 @@ def retry_failed_file(task_id: str, clean: bool = False, skip_ocr: bool = False)
if task:
failed_file = settings.CONSUMPTION_FAILED_DIR / task.task_file_name
if not failed_file.exists():
logger.error(f"Failed file {failed_file} not found")
return
logger.error(f"File {failed_file} not found")
raise FileNotFoundError(f"File {failed_file} not found")
working_copy = settings.SCRATCH_DIR / failed_file.name
copy_file_with_basic_stats(failed_file, working_copy)
@@ -204,15 +204,17 @@ def retry_failed_file(task_id: str, clean: bool = False, skip_ocr: bool = False)
logger.debug("PDF cleaned successfully")
except Exception as e:
logger.error(f"Error while cleaning PDF: {e}")
return
raise e
consume_file(
task = consume_file.delay(
ConsumableDocument(
source=DocumentSource.ConsumeFolder,
original_file=working_copy,
),
)
return task.id
@shared_task
def sanity_check():
+16 -14
View File
@@ -7,7 +7,6 @@ from unittest import mock
from django.conf import settings
from django.test import TestCase
from django.test import override_settings
from django.utils import timezone
from documents import tasks
@@ -203,9 +202,7 @@ class TestRetryConsumeTask(
FileSystemAssertsMixin,
TestCase,
):
@override_settings(CONSUMPTION_FAILED_DIR=Path(__file__).parent / "samples")
def test_retry_consume_clean(self):
test_file = self.SAMPLE_DIR / "corrupted.pdf"
def do_failed_task(self, test_file: Path) -> PaperlessTask:
temp_copy = self.dirs.scratch_dir / test_file.name
shutil.copy(test_file, temp_copy)
@@ -246,14 +243,19 @@ class TestRetryConsumeTask(
task = PaperlessTask.objects.first()
# Ensure the file is moved to the failed dir
self.assertIsFile(settings.CONSUMPTION_FAILED_DIR / task.task_file_name)
return task
with mock.patch("documents.tasks.ProgressManager", DummyProgressManager):
with self.assertLogs() as cm:
tasks.retry_failed_file(task_id=task.task_id, clean=True)
# on ci, the message is different because qpdf is not installed
msg = (
"No such file or directory: 'qpdf'"
if "PAPERLESS_CI_TEST" in os.environ
else "New document id 1 created"
)
self.assertIn(msg, cm.output[-1])
@mock.patch("documents.tasks.consume_file.delay")
@mock.patch("documents.tasks.run_subprocess")
def test_retry_consume_clean(self, m_subprocess, m_consume_file):
task = self.do_failed_task(self.SAMPLE_DIR / "corrupted.pdf")
m_subprocess.return_value.returncode = 0
task_id = tasks.retry_failed_file(task_id=task.task_id, clean=True)
self.assertIsNotNone(task_id)
m_consume_file.assert_called_once()
def test_cleanup(self):
task = self.do_failed_task(self.SAMPLE_DIR / "corrupted.pdf")
task.acknowledged = True
task.save() # simulate the task being acknowledged
self.assertIsNotFile(settings.CONSUMPTION_FAILED_DIR / task.task_file_name)
+13
View File
@@ -152,6 +152,7 @@ from documents.serialisers import WorkflowTriggerSerializer
from documents.signals import document_updated
from documents.tasks import consume_file
from documents.tasks import empty_trash
from documents.tasks import retry_failed_file
from documents.templating.filepath import validate_filepath_template_and_render
from paperless import version
from paperless.celery import app as celery_app
@@ -1718,6 +1719,18 @@ class TasksViewSet(ReadOnlyModelViewSet):
queryset = PaperlessTask.objects.filter(task_id=task_id)
return queryset
@action(methods=["post"], detail=True)
def retry(self, request, pk=None):
task = self.get_object()
try:
new_task_id = retry_failed_file(task.task_id, True)
return Response({"task_id": new_task_id})
except Exception as e:
logger.warning(f"An error occurred retrying task: {e!s}")
return HttpResponseBadRequest(
"Error retrying task, check logs for more detail.",
)
class AcknowledgeTasksView(GenericAPIView):
permission_classes = (IsAuthenticated,)