Compare commits

...

10 Commits

Author SHA1 Message Date
shamoon
e5afbccffc Lint 2026-03-17 11:48:53 -07:00
shamoon
b8faae72ab Update tests 2026-03-17 11:46:44 -07:00
shamoon
8cff99bef3 Update __init__.py 2026-03-17 11:43:22 -07:00
shamoon
b2bbc2c0ac Basic option selection 2026-03-17 11:42:17 -07:00
shamoon
03c71c604f Retry action, basic frontend, cleanup handler 2026-03-17 11:39:53 -07:00
shamoon
fe89ff760b Move it out of consumer 2026-03-17 11:35:52 -07:00
shamoon
83eabbdf63 Try this 2026-03-17 11:35:11 -07:00
shamoon
24da26959d Update consumer.py 2026-03-17 11:34:12 -07:00
shamoon
220267099a Fix tests 2026-03-17 11:34:11 -07:00
shamoon
0f1a529b51 Messing around 2026-03-17 11:33:01 -07:00
18 changed files with 275 additions and 8 deletions

View File

@@ -14,6 +14,7 @@
# Paths and folders
#PAPERLESS_CONSUMPTION_DIR=../consume
#PAPERLESS_CONSUMPTION_FAILED_DIR=../consume/failed
#PAPERLESS_DATA_DIR=../data
#PAPERLESS_EMPTY_TRASH_DIR=
#PAPERLESS_MEDIA_ROOT=../media

View File

@@ -112,6 +112,9 @@
</td>
<td scope="row">
<div class="btn-group" role="group">
@if (task.status === PaperlessTaskStatus.Failed) {
<ng-container *ngTemplateOutlet="retryDropdown; context: { task: task }"></ng-container>
}
<button class="btn btn-sm btn-outline-secondary" (click)="dismissTask(task); $event.stopPropagation();" *pngxIfPermissions="{ action: PermissionAction.Change, type: PermissionType.PaperlessTask }">
<i-bs name="check" class="me-1"></i-bs><ng-container i18n>Dismiss</ng-container>
</button>
@@ -184,3 +187,25 @@
</li>
</ul>
<div [ngbNavOutlet]="nav"></div>
<ng-template #retryDropdown let-task="task">
<div ngbDropdown>
<button class="btn btn-sm btn-outline-primary" (click)="$event.stopImmediatePropagation()" ngbDropdownToggle>
<i-bs name="arrow-repeat"></i-bs>&nbsp;<ng-container i18n>Retry</ng-container>
</button>
<div ngbDropdownMenu class="shadow retry-dropdown">
<div class="p-2">
<ul class="list-group list-group-flush">
<li class="list-group-item small" i18n>
<pngx-input-check [(ngModel)]="retryClean" i18n-title title="Attempt to clean pdf"></pngx-input-check>
</li>
</ul>
<div class="d-flex justify-content-end">
<button class="btn btn-sm btn-outline-primary" (click)="retryTask(task); $event.stopPropagation();">
<ng-container i18n>Proceed</ng-container>
</button>
</div>
</div>
</div>
</div>
</ng-template>

View File

@@ -37,3 +37,7 @@ pre {
.z-10 {
z-index: 10;
}
.retry-dropdown {
width: 300px;
}

View File

@@ -16,7 +16,7 @@ import {
NgbNavItem,
} from '@ng-bootstrap/ng-bootstrap'
import { allIcons, NgxBootstrapIconsModule } from 'ngx-bootstrap-icons'
import { throwError } from 'rxjs'
import { of, throwError } from 'rxjs'
import { routes } from 'src/app/app-routing.module'
import {
PaperlessTask,
@@ -32,6 +32,7 @@ import { TasksService } from 'src/app/services/tasks.service'
import { ToastService } from 'src/app/services/toast.service'
import { environment } from 'src/environments/environment'
import { ConfirmDialogComponent } from '../../common/confirm-dialog/confirm-dialog.component'
import { CheckComponent } from '../../common/input/check/check.component'
import { PageHeaderComponent } from '../../common/page-header/page-header.component'
import { TasksComponent, TaskTab } from './tasks.component'
@@ -138,6 +139,7 @@ describe('TasksComponent', () => {
PageHeaderComponent,
IfPermissionsDirective,
CustomDatePipe,
CheckComponent,
ConfirmDialogComponent,
],
providers: [
@@ -184,8 +186,10 @@ describe('TasksComponent', () => {
`Failed${currentTasksLength}`
)
expect(
fixture.debugElement.queryAll(By.css('table input[type="checkbox"]'))
).toHaveLength(currentTasksLength + 1)
fixture.debugElement.queryAll(
By.css('table td > .form-check input[type="checkbox"]')
)
).toHaveLength(currentTasksLength)
currentTasksLength = tasks.filter(
(t) => t.status === PaperlessTaskStatus.Complete
@@ -389,4 +393,20 @@ describe('TasksComponent', () => {
expect(component.filterText).toEqual('')
expect(component.filterTargetID).toEqual(0)
})
it('should retry a task, show toast on error or success', () => {
const retrySpy = jest.spyOn(tasksService, 'retryTask')
const toastInfoSpy = jest.spyOn(toastService, 'showInfo')
const toastErrorSpy = jest.spyOn(toastService, 'showError')
retrySpy.mockReturnValueOnce(of({ task_id: '123' }))
component.retryTask(tasks[0])
expect(retrySpy).toHaveBeenCalledWith(tasks[0], false)
expect(toastInfoSpy).toHaveBeenCalledWith('Retrying task...')
retrySpy.mockReturnValueOnce(throwError(() => new Error('test')))
component.retryTask(tasks[0])
expect(toastErrorSpy).toHaveBeenCalledWith(
'Failed to retry task',
new Error('test')
)
})
})

View File

@@ -20,12 +20,13 @@ import {
takeUntil,
timer,
} from 'rxjs'
import { PaperlessTask } from 'src/app/data/paperless-task'
import { PaperlessTask, PaperlessTaskStatus } from 'src/app/data/paperless-task'
import { IfPermissionsDirective } from 'src/app/directives/if-permissions.directive'
import { CustomDatePipe } from 'src/app/pipes/custom-date.pipe'
import { TasksService } from 'src/app/services/tasks.service'
import { ToastService } from 'src/app/services/toast.service'
import { ConfirmDialogComponent } from '../../common/confirm-dialog/confirm-dialog.component'
import { CheckComponent } from '../../common/input/check/check.component'
import { PageHeaderComponent } from '../../common/page-header/page-header.component'
import { LoadingComponentWithPermissions } from '../../loading-component/loading.component'
@@ -54,6 +55,7 @@ const FILTER_TARGETS = [
PageHeaderComponent,
IfPermissionsDirective,
CustomDatePipe,
CheckComponent,
SlicePipe,
FormsModule,
ReactiveFormsModule,
@@ -75,6 +77,7 @@ export class TasksComponent
private readonly router = inject(Router)
private readonly toastService = inject(ToastService)
public PaperlessTaskStatus = PaperlessTaskStatus
public activeTab: TaskTab
public selectedTasks: Set<number> = new Set()
public togggleAll: boolean = false
@@ -105,6 +108,8 @@ export class TasksComponent
: FILTER_TARGETS.slice(0, 1)
}
public retryClean: boolean = false
get dismissButtonText(): string {
return this.selectedTasks.size > 0
? $localize`Dismiss selected`
@@ -178,6 +183,17 @@ export class TasksComponent
this.router.navigate(['documents', task.related_document])
}
retryTask(task: PaperlessTask) {
this.tasksService.retryTask(task, this.retryClean).subscribe({
next: () => {
this.toastService.showInfo($localize`Retrying task...`)
},
error: (e) => {
this.toastService.showError($localize`Failed to retry task`, e)
},
})
}
expandTask(task: PaperlessTask) {
this.expandedTask = this.expandedTask == task.id ? undefined : task.id
}

View File

@@ -147,4 +147,33 @@ describe('TasksService', () => {
result: 'success',
})
})
it('should call retry task api endpoint', () => {
const task = {
id: 1,
type: PaperlessTaskType.File,
status: PaperlessTaskStatus.Failed,
acknowledged: false,
task_id: '1234',
task_file_name: 'file1.pdf',
date_created: new Date(),
}
tasksService.retryTask(task, true).subscribe()
const reloadSpy = jest.spyOn(tasksService, 'reload')
const req = httpTestingController.expectOne(
`${environment.apiBaseUrl}tasks/${task.id}/retry/`
)
expect(req.request.method).toEqual('POST')
expect(req.request.body).toEqual({
clean: true,
})
req.flush({ task_id: 12345 })
expect(reloadSpy).toHaveBeenCalled()
httpTestingController
.expectOne(
`${environment.apiBaseUrl}tasks/?task_name=consume_file&acknowledged=false`
)
.flush([])
})
})

View File

@@ -81,6 +81,20 @@ export class TasksService {
)
}
public retryTask(task: PaperlessTask, clean: boolean): Observable<any> {
return this.http
.post(`${this.baseUrl}tasks/${task.id}/retry/`, {
clean,
})
.pipe(
takeUntil(this.unsubscribeNotifer),
first(),
tap(() => {
this.reload()
})
)
}
public cancelPending(): void {
this.unsubscribeNotifer.next(true)
}

View File

@@ -174,6 +174,17 @@ class ConsumerPluginMixin:
):
self._send_progress(100, 100, ProgressStatusOptions.FAILED, message)
self.log.error(log_message or message, exc_info=exc_info)
# Move the file to the failed directory
if (
self.input_doc.original_file.exists()
and not Path(
settings.CONSUMPTION_FAILED_DIR / self.input_doc.original_file.name,
).exists()
):
copy_file_with_basic_stats(
self.input_doc.original_file,
settings.CONSUMPTION_FAILED_DIR / self.input_doc.original_file.name,
)
raise ConsumerError(f"{self.filename}: {log_message or message}") from exception

View File

@@ -2411,6 +2411,14 @@ class TasksViewSerializer(OwnedObjectSerializer):
return list(duplicates.values("id", "title", "deleted_at"))
class RetryTaskSerializer(serializers.Serializer):
clean = serializers.BooleanField(
default=False,
write_only=True,
required=False,
)
class RunTaskViewSerializer(serializers.Serializer[dict[str, Any]]):
task_name = serializers.ChoiceField(
choices=PaperlessTask.TaskName.choices,

View File

@@ -631,6 +631,19 @@ def update_filename_and_move_files(
)
@receiver(models.signals.post_save, sender=PaperlessTask)
def cleanup_failed_documents(sender, instance: PaperlessTask, **kwargs):
if instance.status != states.FAILURE or not instance.acknowledged:
return
if instance.task_file_name:
try:
Path(settings.CONSUMPTION_FAILED_DIR / instance.task_file_name).unlink()
logger.debug(f"Cleaned up failed file {instance.task_file_name}")
except FileNotFoundError:
logger.warning(f"Failed to clean up failed file {instance.task_file_name}")
@shared_task
def process_cf_select_update(custom_field: CustomField) -> None:
"""

View File

@@ -37,6 +37,7 @@ from documents.consumer import ConsumerPreflightPlugin
from documents.consumer import WorkflowTriggerPlugin
from documents.data_models import ConsumableDocument
from documents.data_models import DocumentMetadataOverrides
from documents.data_models import DocumentSource
from documents.double_sided import CollatePlugin
from documents.file_handling import create_source_path_directory
from documents.file_handling import generate_unique_filename
@@ -63,6 +64,8 @@ from documents.signals import document_updated
from documents.signals.handlers import cleanup_document_deletion
from documents.signals.handlers import run_workflows
from documents.signals.handlers import send_websocket_document_updated
from documents.utils import copy_file_with_basic_stats
from documents.utils import run_subprocess
from documents.workflows.utils import get_workflows_for_trigger
from paperless.config import AIConfig
from paperless_ai.indexing import llm_index_add_or_update_document
@@ -72,7 +75,6 @@ from paperless_ai.indexing import update_llm_index
_T = TypeVar("_T")
IterWrapper = Callable[[Iterable[_T]], Iterable[_T]]
if settings.AUDIT_LOG_ENABLED:
from auditlog.models import LogEntry
logger = logging.getLogger("paperless.tasks")
@@ -239,6 +241,48 @@ def consume_file(
return msg
@shared_task
def retry_failed_file(task_id: str, clean: bool = False, skip_ocr: bool = False):
task = PaperlessTask.objects.get(task_id=task_id, status=states.FAILURE)
if task:
failed_file = settings.CONSUMPTION_FAILED_DIR / task.task_file_name
if not failed_file.exists():
logger.error(f"File {failed_file} not found")
raise FileNotFoundError(f"File {failed_file} not found")
working_copy = settings.SCRATCH_DIR / failed_file.name
copy_file_with_basic_stats(failed_file, working_copy)
if clean:
try:
result = run_subprocess(
[
"qpdf",
"--replace-input",
"--warning-exit-0",
working_copy,
],
logger=logger,
)
if result.returncode != 0:
raise Exception(
f"qpdf failed with exit code {result.returncode}, error: {result.stderr}",
)
else:
logger.debug("PDF cleaned successfully")
except Exception as e:
logger.error(f"Error while cleaning PDF: {e}")
raise e
task = consume_file.delay(
ConsumableDocument(
source=DocumentSource.ConsumeFolder,
original_file=working_copy,
),
)
return task.id
@shared_task
def sanity_check(*, scheduled=True, raise_on_error=True):
messages = sanity_checker.check_sanity(scheduled=scheduled)

Binary file not shown.

View File

@@ -1,4 +1,5 @@
import shutil
import uuid
from datetime import timedelta
from pathlib import Path
from unittest import mock
@@ -21,6 +22,7 @@ from documents.sanity_checker import SanityCheckMessages
from documents.tests.test_classifier import dummy_preprocess
from documents.tests.utils import DirectoriesMixin
from documents.tests.utils import FileSystemAssertsMixin
from documents.tests.utils import SampleDirMixin
class TestIndexReindex(DirectoriesMixin, TestCase):
@@ -232,6 +234,44 @@ class TestEmptyTrashTask(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
self.assertEqual(Document.global_objects.count(), 0)
class TestRetryConsumeTask(
DirectoriesMixin,
SampleDirMixin,
FileSystemAssertsMixin,
TestCase,
):
def do_failed_task(self, test_file: Path) -> PaperlessTask:
failed_file = settings.CONSUMPTION_FAILED_DIR / test_file.name
shutil.copy(test_file, failed_file)
task = PaperlessTask.objects.create(
type=PaperlessTask.TaskType.AUTO,
task_id=str(uuid.uuid4()),
task_file_name=failed_file.name,
task_name=PaperlessTask.TaskName.CONSUME_FILE,
status=states.FAILURE,
date_created=timezone.now(),
date_done=timezone.now(),
)
self.assertIsFile(settings.CONSUMPTION_FAILED_DIR / task.task_file_name)
return task
@mock.patch("documents.tasks.consume_file.delay")
@mock.patch("documents.tasks.run_subprocess")
def test_retry_consume_clean(self, m_subprocess, m_consume_file) -> None:
task = self.do_failed_task(self.SAMPLE_DIR / "corrupted.pdf")
m_subprocess.return_value.returncode = 0
task_id = tasks.retry_failed_file(task_id=task.task_id, clean=True)
self.assertIsNotNone(task_id)
m_consume_file.assert_called_once()
def test_cleanup(self) -> None:
task = self.do_failed_task(self.SAMPLE_DIR / "corrupted.pdf")
task.acknowledged = True
task.save()
self.assertIsNotFile(settings.CONSUMPTION_FAILED_DIR / task.task_file_name)
class TestUpdateContent(DirectoriesMixin, TestCase):
def test_update_content_maybe_archive_file(self) -> None:
"""

View File

@@ -37,6 +37,7 @@ def setup_directories():
dirs.scratch_dir = Path(tempfile.mkdtemp()).resolve()
dirs.media_dir = Path(tempfile.mkdtemp()).resolve()
dirs.consumption_dir = Path(tempfile.mkdtemp()).resolve()
dirs.consumption_failed_dir = Path(tempfile.mkdtemp("failed")).resolve()
dirs.static_dir = Path(tempfile.mkdtemp()).resolve()
dirs.index_dir = dirs.data_dir / "index"
dirs.originals_dir = dirs.media_dir / "documents" / "originals"
@@ -58,6 +59,7 @@ def setup_directories():
THUMBNAIL_DIR=dirs.thumbnail_dir,
ARCHIVE_DIR=dirs.archive_dir,
CONSUMPTION_DIR=dirs.consumption_dir,
CONSUMPTION_FAILED_DIR=dirs.consumption_failed_dir,
LOGGING_DIR=dirs.logging_dir,
INDEX_DIR=dirs.index_dir,
STATIC_ROOT=dirs.static_dir,
@@ -74,6 +76,7 @@ def remove_dirs(dirs) -> None:
shutil.rmtree(dirs.data_dir, ignore_errors=True)
shutil.rmtree(dirs.scratch_dir, ignore_errors=True)
shutil.rmtree(dirs.consumption_dir, ignore_errors=True)
shutil.rmtree(dirs.consumption_failed_dir, ignore_errors=True)
shutil.rmtree(dirs.static_dir, ignore_errors=True)
dirs.settings_override.disable()

View File

@@ -189,6 +189,7 @@ from documents.serialisers import NotesSerializer
from documents.serialisers import PostDocumentSerializer
from documents.serialisers import RemovePasswordDocumentsSerializer
from documents.serialisers import ReprocessDocumentsSerializer
from documents.serialisers import RetryTaskSerializer
from documents.serialisers import RotateDocumentsSerializer
from documents.serialisers import RunTaskViewSerializer
from documents.serialisers import SavedViewSerializer
@@ -211,6 +212,7 @@ from documents.tasks import consume_file
from documents.tasks import empty_trash
from documents.tasks import index_optimize
from documents.tasks import llmindex_index
from documents.tasks import retry_failed_file
from documents.tasks import sanity_check
from documents.tasks import train_classifier
from documents.tasks import update_document_parent_tags
@@ -3467,6 +3469,25 @@ class TasksViewSet(ReadOnlyModelViewSet):
queryset = PaperlessTask.objects.filter(task_id=task_id)
return queryset
@action(methods=["post"], detail=True)
def retry(self, request, pk=None):
task = self.get_object()
serializer = RetryTaskSerializer(data=request.data)
serializer.is_valid(raise_exception=True)
clean = serializer.validated_data.get("clean")
try:
new_task_id = retry_failed_file(task.task_id, clean)
return Response({"task_id": new_task_id})
except FileNotFoundError:
return HttpResponseBadRequest("Original file not found")
except Exception as e:
logger.warning(f"An error occurred retrying task: {e!s}")
return HttpResponseBadRequest(
"Error retrying task, check logs for more detail.",
)
@action(
methods=["post"],
detail=False,

View File

@@ -68,6 +68,10 @@ def paths_check(app_configs, **kwargs) -> list[Error]:
+ path_check("PAPERLESS_EMPTY_TRASH_DIR", settings.EMPTY_TRASH_DIR)
+ path_check("PAPERLESS_MEDIA_ROOT", settings.MEDIA_ROOT)
+ path_check("PAPERLESS_CONSUMPTION_DIR", settings.CONSUMPTION_DIR)
+ path_check(
"PAPERLESS_CONSUMPTION_FAILED_DIR",
settings.CONSUMPTION_FAILED_DIR,
)
)

View File

@@ -98,6 +98,11 @@ CONSUMPTION_DIR = get_path_from_env(
BASE_DIR.parent / "consume",
)
CONSUMPTION_FAILED_DIR = get_path_from_env(
"PAPERLESS_CONSUMPTION_FAILED_DIR",
CONSUMPTION_DIR / "failed",
)
# This will be created if it doesn't exist
SCRATCH_DIR = get_path_from_env(
"PAPERLESS_SCRATCH_DIR",
@@ -782,6 +787,8 @@ CONSUMER_IGNORE_PATTERNS = list(
),
),
)
if CONSUMPTION_DIR in CONSUMPTION_FAILED_DIR.parents:
CONSUMER_IGNORE_PATTERNS.append(CONSUMPTION_FAILED_DIR.name)
# Directories to always ignore. These are matched by directory name, not full path
CONSUMER_IGNORE_DIRS = list(

View File

@@ -24,6 +24,7 @@ class PaperlessTestDirs:
data_dir: Path
media_dir: Path
consumption_dir: Path
consumption_failed_dir: Path
# TODO: consolidate with documents/tests/conftest.py PaperlessDirs/paperless_dirs
@@ -33,18 +34,21 @@ def directories(tmp_path: Path, settings: SettingsWrapper) -> PaperlessTestDirs:
data_dir = tmp_path / "data"
media_dir = tmp_path / "media"
consumption_dir = tmp_path / "consumption"
consumption_failed_dir = tmp_path / "consumption_failed"
for d in (data_dir, media_dir, consumption_dir):
for d in (data_dir, media_dir, consumption_dir, consumption_failed_dir):
d.mkdir()
settings.DATA_DIR = data_dir
settings.MEDIA_ROOT = media_dir
settings.CONSUMPTION_DIR = consumption_dir
settings.CONSUMPTION_FAILED_DIR = consumption_failed_dir
return PaperlessTestDirs(
data_dir=data_dir,
media_dir=media_dir,
consumption_dir=consumption_dir,
consumption_failed_dir=consumption_failed_dir,
)
@@ -64,10 +68,11 @@ class TestChecks:
settings.MEDIA_ROOT = Path("uuh")
settings.DATA_DIR = Path("whatever")
settings.CONSUMPTION_DIR = Path("idontcare")
settings.CONSUMPTION_FAILED_DIR = Path("nope")
msgs = paths_check(None)
assert len(msgs) == 3, str(msgs)
assert len(msgs) == 4, str(msgs)
for msg in msgs:
assert msg.msg.endswith("is set but doesn't exist.")
@@ -75,6 +80,7 @@ class TestChecks:
directories.data_dir.chmod(0o000)
directories.media_dir.chmod(0o000)
directories.consumption_dir.chmod(0o000)
directories.consumption_failed_dir.chmod(0o000)
try:
msgs = paths_check(None)
@@ -82,8 +88,9 @@ class TestChecks:
directories.data_dir.chmod(0o777)
directories.media_dir.chmod(0o777)
directories.consumption_dir.chmod(0o777)
directories.consumption_failed_dir.chmod(0o777)
assert len(msgs) == 3
assert len(msgs) == 4
for msg in msgs:
assert msg.msg.endswith("is not writeable")