diff --git a/src-ui/src/app/components/admin/tasks/tasks.component.html b/src-ui/src/app/components/admin/tasks/tasks.component.html
index 418dfa8fa..934e9007f 100644
--- a/src-ui/src/app/components/admin/tasks/tasks.component.html
+++ b/src-ui/src/app/components/admin/tasks/tasks.component.html
@@ -76,33 +76,27 @@
- | {{ task.task_file_name }} |
+ {{ task.input_data?.filename }} |
{{ task.date_created | customDate:'short' }} |
@if (activeTab !== 'started' && activeTab !== 'queued') {
- @if (task.result?.length > 50) {
+ @if (task.result_message?.length > 50) {
- {{ task.result | slice:0:50 }}…
+ {{ task.result_message | slice:0:50 }}…
}
- @if (task.result?.length <= 50) {
- {{ task.result }}
+ @if (task.result_message?.length <= 50) {
+ {{ task.result_message }}
}
- {{ task.result | slice:0:300 }}@if (task.result.length > 300) {
+ {{ task.result_message | slice:0:300 }}@if (task.result_message.length > 300) {
…
}
- @if (task.result?.length > 300) {
+ @if (task.result_message?.length > 300) {
(click for full output)
}
- @if (task.duplicate_documents?.length > 0) {
-
-
- Duplicate(s) detected
-
- }
|
}
@@ -116,7 +110,7 @@
Dismiss
- @if (task.related_document) {
+ @if (task.related_document_ids?.[0]) {
@@ -127,7 +121,7 @@
- {{ task.result }}
+ {{ task.result_message }}
|
}
diff --git a/src-ui/src/app/components/admin/tasks/tasks.component.spec.ts b/src-ui/src/app/components/admin/tasks/tasks.component.spec.ts
index 1a085150e..4c85c939e 100644
--- a/src-ui/src/app/components/admin/tasks/tasks.component.spec.ts
+++ b/src-ui/src/app/components/admin/tasks/tasks.component.spec.ts
@@ -20,8 +20,8 @@ import { throwError } from 'rxjs'
import { routes } from 'src/app/app-routing.module'
import {
PaperlessTask,
- PaperlessTaskName,
PaperlessTaskStatus,
+ PaperlessTaskTriggerSource,
PaperlessTaskType,
} from 'src/app/data/paperless-task'
import { IfPermissionsDirective } from 'src/app/directives/if-permissions.directive'
@@ -39,81 +39,100 @@ const tasks: PaperlessTask[] = [
{
id: 467,
task_id: '11ca1a5b-9f81-442c-b2c8-7e4ae53657f1',
- task_file_name: 'test.pdf',
+ input_data: { filename: 'test.pdf' },
date_created: new Date('2023-03-01T10:26:03.093116Z'),
date_done: new Date('2023-03-01T10:26:07.223048Z'),
- type: PaperlessTaskType.Auto,
- task_name: PaperlessTaskName.ConsumeFile,
- status: PaperlessTaskStatus.Failed,
- result: 'test.pd: Not consuming test.pdf: It is a duplicate of test (#100)',
+ task_type: PaperlessTaskType.ConsumeFile,
+ task_type_display: 'Consume File',
+ trigger_source: PaperlessTaskTriggerSource.FolderConsume,
+ trigger_source_display: 'Folder Consume',
+ status: PaperlessTaskStatus.Failure,
+ status_display: 'Failure',
+ result_message:
+ 'test.pd: Not consuming test.pdf: It is a duplicate of test (#100)',
acknowledged: false,
- related_document: null,
+ related_document_ids: [],
},
{
id: 466,
task_id: '10ca1a5b-3c08-442c-b2c8-7e4ae53657f1',
- task_file_name: '191092.pdf',
+ input_data: { filename: '191092.pdf' },
date_created: new Date('2023-03-01T09:26:03.093116Z'),
date_done: new Date('2023-03-01T09:26:07.223048Z'),
- type: PaperlessTaskType.Auto,
- task_name: PaperlessTaskName.ConsumeFile,
- status: PaperlessTaskStatus.Failed,
- result:
+ task_type: PaperlessTaskType.ConsumeFile,
+ task_type_display: 'Consume File',
+ trigger_source: PaperlessTaskTriggerSource.FolderConsume,
+ trigger_source_display: 'Folder Consume',
+ status: PaperlessTaskStatus.Failure,
+ status_display: 'Failure',
+ result_message:
'191092.pd: Not consuming 191092.pdf: It is a duplicate of 191092 (#311)',
acknowledged: false,
- related_document: null,
+ related_document_ids: [],
},
{
id: 465,
task_id: '3612d477-bb04-44e3-985b-ac580dd496d8',
- task_file_name: 'Scan Jun 6, 2023 at 3.19 PM.pdf',
+ input_data: { filename: 'Scan Jun 6, 2023 at 3.19 PM.pdf' },
date_created: new Date('2023-06-06T15:22:05.722323-07:00'),
date_done: new Date('2023-06-06T15:22:14.564305-07:00'),
- type: PaperlessTaskType.Auto,
- task_name: PaperlessTaskName.ConsumeFile,
+ task_type: PaperlessTaskType.ConsumeFile,
+ task_type_display: 'Consume File',
+ trigger_source: PaperlessTaskTriggerSource.FolderConsume,
+ trigger_source_display: 'Folder Consume',
status: PaperlessTaskStatus.Pending,
- result: null,
+ status_display: 'Pending',
+ result_message: null,
acknowledged: false,
- related_document: null,
+ related_document_ids: [],
},
{
id: 464,
task_id: '2eac4716-2aa6-4dcd-9953-264e11656d7e',
- task_file_name: 'paperless-mail-l4dkg8ir',
+ input_data: { filename: 'paperless-mail-l4dkg8ir' },
date_created: new Date('2023-06-04T11:24:32.898089-07:00'),
date_done: new Date('2023-06-04T11:24:44.678605-07:00'),
- type: PaperlessTaskType.Auto,
- task_name: PaperlessTaskName.ConsumeFile,
- status: PaperlessTaskStatus.Complete,
- result: 'Success. New document id 422 created',
+ task_type: PaperlessTaskType.ConsumeFile,
+ task_type_display: 'Consume File',
+ trigger_source: PaperlessTaskTriggerSource.EmailConsume,
+ trigger_source_display: 'Email Consume',
+ status: PaperlessTaskStatus.Success,
+ status_display: 'Success',
+ result_message: 'Success. New document id 422 created',
acknowledged: false,
- related_document: 422,
+ related_document_ids: [422],
},
{
id: 463,
task_id: '28125528-1575-4d6b-99e6-168906e8fa5c',
- task_file_name: 'onlinePaymentSummary.pdf',
+ input_data: { filename: 'onlinePaymentSummary.pdf' },
date_created: new Date('2023-06-01T13:49:51.631305-07:00'),
date_done: new Date('2023-06-01T13:49:54.190220-07:00'),
- type: PaperlessTaskType.Auto,
- task_name: PaperlessTaskName.ConsumeFile,
- status: PaperlessTaskStatus.Complete,
- result: 'Success. New document id 421 created',
+ task_type: PaperlessTaskType.ConsumeFile,
+ task_type_display: 'Consume File',
+ trigger_source: PaperlessTaskTriggerSource.FolderConsume,
+ trigger_source_display: 'Folder Consume',
+ status: PaperlessTaskStatus.Success,
+ status_display: 'Success',
+ result_message: 'Success. New document id 421 created',
acknowledged: false,
- related_document: 421,
+ related_document_ids: [421],
},
{
id: 462,
task_id: 'a5b9ca47-0c8e-490f-a04c-6db5d5fc09e5',
- task_file_name: 'paperless-mail-_rrpmqk6',
+ input_data: { filename: 'paperless-mail-_rrpmqk6' },
date_created: new Date('2023-06-07T02:54:35.694916Z'),
date_done: null,
- type: PaperlessTaskType.Auto,
- task_name: PaperlessTaskName.ConsumeFile,
+ task_type: PaperlessTaskType.ConsumeFile,
+ task_type_display: 'Consume File',
+ trigger_source: PaperlessTaskTriggerSource.EmailConsume,
+ trigger_source_display: 'Email Consume',
status: PaperlessTaskStatus.Started,
- result: null,
+ status_display: 'Started',
+ result_message: null,
acknowledged: false,
- related_document: null,
+ related_document_ids: [],
},
]
@@ -167,7 +186,7 @@ describe('TasksComponent', () => {
fixture.detectChanges()
httpTestingController
.expectOne(
- `${environment.apiBaseUrl}tasks/?task_name=consume_file&acknowledged=false`
+ `${environment.apiBaseUrl}tasks/?task_type=consume_file&acknowledged=false`
)
.flush(tasks)
})
@@ -176,7 +195,7 @@ describe('TasksComponent', () => {
const tabButtons = fixture.debugElement.queryAll(By.directive(NgbNavItem))
let currentTasksLength = tasks.filter(
- (t) => t.status === PaperlessTaskStatus.Failed
+ (t) => t.status === PaperlessTaskStatus.Failure
).length
component.activeTab = TaskTab.Failed
fixture.detectChanges()
@@ -188,7 +207,7 @@ describe('TasksComponent', () => {
).toHaveLength(currentTasksLength + 1)
currentTasksLength = tasks.filter(
- (t) => t.status === PaperlessTaskStatus.Complete
+ (t) => t.status === PaperlessTaskStatus.Success
).length
component.activeTab = TaskTab.Completed
fixture.detectChanges()
@@ -308,7 +327,7 @@ describe('TasksComponent', () => {
expect(component.selectedTasks).toEqual(
new Set(
tasks
- .filter((t) => t.status === PaperlessTaskStatus.Failed)
+ .filter((t) => t.status === PaperlessTaskStatus.Failure)
.map((t) => t.id)
)
)
@@ -322,7 +341,7 @@ describe('TasksComponent', () => {
component.dismissAndGo(tasks[3])
expect(routerSpy).toHaveBeenCalledWith([
'documents',
- tasks[3].related_document,
+ tasks[3].related_document_ids?.[0],
])
})
diff --git a/src-ui/src/app/components/admin/tasks/tasks.component.ts b/src-ui/src/app/components/admin/tasks/tasks.component.ts
index 6f144c58c..decba1eb8 100644
--- a/src-ui/src/app/components/admin/tasks/tasks.component.ts
+++ b/src-ui/src/app/components/admin/tasks/tasks.component.ts
@@ -175,7 +175,7 @@ export class TasksComponent
dismissAndGo(task: PaperlessTask) {
this.dismissTask(task)
- this.router.navigate(['documents', task.related_document])
+ this.router.navigate(['documents', task.related_document_ids?.[0]])
}
expandTask(task: PaperlessTask) {
@@ -207,11 +207,13 @@ export class TasksComponent
if (this._filterText.length) {
tasks = tasks.filter((t) => {
if (this.filterTargetID == TaskFilterTargetID.Name) {
- return t.task_file_name
- .toLowerCase()
+ return (t.input_data?.filename as string)
+ ?.toLowerCase()
.includes(this._filterText.toLowerCase())
} else if (this.filterTargetID == TaskFilterTargetID.Result) {
- return t.result.toLowerCase().includes(this._filterText.toLowerCase())
+ return t.result_message
+ ?.toLowerCase()
+ .includes(this._filterText.toLowerCase())
}
})
}
diff --git a/src-ui/src/app/components/common/dates-dropdown/dates-dropdown.component.html b/src-ui/src/app/components/common/dates-dropdown/dates-dropdown.component.html
index 7b1963bf3..fb4df4ea1 100644
--- a/src-ui/src/app/components/common/dates-dropdown/dates-dropdown.component.html
+++ b/src-ui/src/app/components/common/dates-dropdown/dates-dropdown.component.html
@@ -86,7 +86,7 @@
@if (addedRelativeDate) {
-
+
}
diff --git a/src-ui/src/app/components/common/system-status-dialog/system-status-dialog.component.html b/src-ui/src/app/components/common/system-status-dialog/system-status-dialog.component.html
index 636e574f5..2e30a5a24 100644
--- a/src-ui/src/app/components/common/system-status-dialog/system-status-dialog.component.html
+++ b/src-ui/src/app/components/common/system-status-dialog/system-status-dialog.component.html
@@ -168,16 +168,6 @@
}
- @if (currentUserIsSuperUser) {
- @if (isRunning(PaperlessTaskName.IndexOptimize)) {
-
- } @else {
-
- }
- }
@if (status.tasks.index_status === 'OK') {
@@ -203,10 +193,10 @@
}
@if (currentUserIsSuperUser) {
- @if (isRunning(PaperlessTaskName.TrainClassifier)) {
+ @if (isRunning(PaperlessTaskType.TrainClassifier)) {
} @else {
-
@if (currentUserIsSuperUser) {
- @if (isRunning(PaperlessTaskName.SanityCheck)) {
+ @if (isRunning(PaperlessTaskType.SanityCheck)) {
} @else {
-
+
Run Task
@@ -285,10 +275,10 @@
}
@if (currentUserIsSuperUser) {
- @if (isRunning(PaperlessTaskName.LLMIndexUpdate)) {
+ @if (isRunning(PaperlessTaskType.LlmIndex)) {
} @else {
-
+
Run Task
diff --git a/src-ui/src/app/components/common/system-status-dialog/system-status-dialog.component.spec.ts b/src-ui/src/app/components/common/system-status-dialog/system-status-dialog.component.spec.ts
index 0fd331b10..29bad431e 100644
--- a/src-ui/src/app/components/common/system-status-dialog/system-status-dialog.component.spec.ts
+++ b/src-ui/src/app/components/common/system-status-dialog/system-status-dialog.component.spec.ts
@@ -25,7 +25,7 @@ import {
import { NgbActiveModal } from '@ng-bootstrap/ng-bootstrap'
import { NgxBootstrapIconsModule, allIcons } from 'ngx-bootstrap-icons'
import { Subject, of, throwError } from 'rxjs'
-import { PaperlessTaskName } from 'src/app/data/paperless-task'
+import { PaperlessTaskType } from 'src/app/data/paperless-task'
import {
InstallType,
SystemStatus,
@@ -138,9 +138,9 @@ describe('SystemStatusDialogComponent', () => {
})
it('should check if task is running', () => {
- component.runTask(PaperlessTaskName.IndexOptimize)
- expect(component.isRunning(PaperlessTaskName.IndexOptimize)).toBeTruthy()
- expect(component.isRunning(PaperlessTaskName.SanityCheck)).toBeFalsy()
+ component.runTask(PaperlessTaskType.SanityCheck)
+ expect(component.isRunning(PaperlessTaskType.SanityCheck)).toBeTruthy()
+ expect(component.isRunning(PaperlessTaskType.TrainClassifier)).toBeFalsy()
})
it('should support running tasks, refresh status and show toasts', () => {
@@ -151,22 +151,22 @@ describe('SystemStatusDialogComponent', () => {
// fail first
runSpy.mockReturnValue(throwError(() => new Error('error')))
- component.runTask(PaperlessTaskName.IndexOptimize)
- expect(runSpy).toHaveBeenCalledWith(PaperlessTaskName.IndexOptimize)
+ component.runTask(PaperlessTaskType.SanityCheck)
+ expect(runSpy).toHaveBeenCalledWith(PaperlessTaskType.SanityCheck)
expect(toastErrorSpy).toHaveBeenCalledWith(
- `Failed to start task ${PaperlessTaskName.IndexOptimize}, see the logs for more details`,
+ `Failed to start task ${PaperlessTaskType.SanityCheck}, see the logs for more details`,
expect.any(Error)
)
// succeed
runSpy.mockReturnValue(of({}))
getStatusSpy.mockReturnValue(of(status))
- component.runTask(PaperlessTaskName.IndexOptimize)
- expect(runSpy).toHaveBeenCalledWith(PaperlessTaskName.IndexOptimize)
+ component.runTask(PaperlessTaskType.SanityCheck)
+ expect(runSpy).toHaveBeenCalledWith(PaperlessTaskType.SanityCheck)
expect(getStatusSpy).toHaveBeenCalled()
expect(toastSpy).toHaveBeenCalledWith(
- `Task ${PaperlessTaskName.IndexOptimize} started`
+ `Task ${PaperlessTaskType.SanityCheck} started`
)
})
diff --git a/src-ui/src/app/components/common/system-status-dialog/system-status-dialog.component.ts b/src-ui/src/app/components/common/system-status-dialog/system-status-dialog.component.ts
index d53bb74bf..88426186c 100644
--- a/src-ui/src/app/components/common/system-status-dialog/system-status-dialog.component.ts
+++ b/src-ui/src/app/components/common/system-status-dialog/system-status-dialog.component.ts
@@ -8,7 +8,7 @@ import {
} from '@ng-bootstrap/ng-bootstrap'
import { NgxBootstrapIconsModule } from 'ngx-bootstrap-icons'
import { Subject, takeUntil } from 'rxjs'
-import { PaperlessTaskName } from 'src/app/data/paperless-task'
+import { PaperlessTaskType } from 'src/app/data/paperless-task'
import {
SystemStatus,
SystemStatusItemStatus,
@@ -49,14 +49,14 @@ export class SystemStatusDialogComponent implements OnInit, OnDestroy {
private settingsService = inject(SettingsService)
public SystemStatusItemStatus = SystemStatusItemStatus
- public PaperlessTaskName = PaperlessTaskName
+ public PaperlessTaskType = PaperlessTaskType
public status: SystemStatus
public frontendVersion: string = environment.version
public versionMismatch: boolean = false
public copied: boolean = false
- private runningTasks: Set = new Set()
+ private runningTasks: Set = new Set()
private unsubscribeNotifier: Subject = new Subject()
get currentUserIsSuperUser(): boolean {
@@ -107,11 +107,11 @@ export class SystemStatusDialogComponent implements OnInit, OnDestroy {
return now.getTime() - date.getTime() > hours * 60 * 60 * 1000
}
- public isRunning(taskName: PaperlessTaskName): boolean {
+ public isRunning(taskName: PaperlessTaskType): boolean {
return this.runningTasks.has(taskName)
}
- public runTask(taskName: PaperlessTaskName) {
+ public runTask(taskName: PaperlessTaskType) {
this.runningTasks.add(taskName)
this.toastService.showInfo(`Task ${taskName} started`)
this.tasksService.run(taskName).subscribe({
diff --git a/src-ui/src/app/data/paperless-task.ts b/src-ui/src/app/data/paperless-task.ts
index 19dd3921e..aa3390c96 100644
--- a/src-ui/src/app/data/paperless-task.ts
+++ b/src-ui/src/app/data/paperless-task.ts
@@ -1,49 +1,67 @@
-import { Document } from './document'
import { ObjectWithId } from './object-with-id'
export enum PaperlessTaskType {
- Auto = 'auto_task',
- ScheduledTask = 'scheduled_task',
- ManualTask = 'manual_task',
-}
-
-export enum PaperlessTaskName {
ConsumeFile = 'consume_file',
TrainClassifier = 'train_classifier',
- SanityCheck = 'check_sanity',
- IndexOptimize = 'index_optimize',
- LLMIndexUpdate = 'llmindex_update',
+ SanityCheck = 'sanity_check',
+ MailFetch = 'mail_fetch',
+ LlmIndex = 'llm_index',
+ EmptyTrash = 'empty_trash',
+ CheckWorkflows = 'check_workflows',
+ BulkUpdate = 'bulk_update',
+ ReprocessDocument = 'reprocess_document',
+ BuildShareLink = 'build_share_link',
+ BulkDelete = 'bulk_delete',
+}
+
+export enum PaperlessTaskTriggerSource {
+ Scheduled = 'scheduled',
+ WebUI = 'web_ui',
+ ApiUpload = 'api_upload',
+ FolderConsume = 'folder_consume',
+ EmailConsume = 'email_consume',
+ System = 'system',
+ Manual = 'manual',
}
export enum PaperlessTaskStatus {
- Pending = 'PENDING',
- Started = 'STARTED',
- Complete = 'SUCCESS',
- Failed = 'FAILURE',
+ Pending = 'pending',
+ Started = 'started',
+ Success = 'success',
+ Failure = 'failure',
+ Revoked = 'revoked',
}
export interface PaperlessTask extends ObjectWithId {
- type: PaperlessTaskType
-
- status: PaperlessTaskStatus
-
- acknowledged: boolean
-
task_id: string
-
- task_file_name: string
-
- task_name: PaperlessTaskName
-
+ task_type: PaperlessTaskType
+ task_type_display: string
+ trigger_source: PaperlessTaskTriggerSource
+ trigger_source_display: string
+ status: PaperlessTaskStatus
+ status_display: string
date_created: Date
-
+ date_started?: Date
date_done?: Date
-
- result?: string
-
- related_document?: number
-
- duplicate_documents?: Document[]
-
+ duration_seconds?: number
+ wait_time_seconds?: number
+ input_data: Record
+ result_data?: Record
+ result_message?: string
+ related_document_ids: number[]
+ acknowledged: boolean
owner?: number
}
+
+export interface PaperlessTaskSummary {
+ task_type: PaperlessTaskType
+ total_count: number
+ pending_count: number
+ success_count: number
+ failure_count: number
+ avg_duration_seconds: number | null
+ avg_wait_time_seconds: number | null
+ last_run: Date | null
+ last_success: Date | null
+ last_failure: Date | null
+}
diff --git a/src-ui/src/app/services/tasks.service.spec.ts b/src-ui/src/app/services/tasks.service.spec.ts
index 640f84587..09bd29441 100644
--- a/src-ui/src/app/services/tasks.service.spec.ts
+++ b/src-ui/src/app/services/tasks.service.spec.ts
@@ -5,11 +5,7 @@ import {
} from '@angular/common/http/testing'
import { TestBed } from '@angular/core/testing'
import { environment } from 'src/environments/environment'
-import {
- PaperlessTaskName,
- PaperlessTaskStatus,
- PaperlessTaskType,
-} from '../data/paperless-task'
+import { PaperlessTaskStatus, PaperlessTaskType } from '../data/paperless-task'
import { TasksService } from './tasks.service'
describe('TasksService', () => {
@@ -37,7 +33,7 @@ describe('TasksService', () => {
it('calls tasks api endpoint on reload', () => {
tasksService.reload()
const req = httpTestingController.expectOne(
- `${environment.apiBaseUrl}tasks/?task_name=consume_file&acknowledged=false`
+ `${environment.apiBaseUrl}tasks/?task_type=consume_file&acknowledged=false`
)
expect(req.request.method).toEqual('GET')
})
@@ -46,7 +42,7 @@ describe('TasksService', () => {
tasksService.loading = true
tasksService.reload()
httpTestingController.expectNone(
- `${environment.apiBaseUrl}tasks/?task_name=consume_file&acknowledged=false`
+ `${environment.apiBaseUrl}tasks/?task_type=consume_file&acknowledged=false`
)
})
@@ -63,7 +59,7 @@ describe('TasksService', () => {
// reload is then called
httpTestingController
.expectOne(
- `${environment.apiBaseUrl}tasks/?task_name=consume_file&acknowledged=false`
+ `${environment.apiBaseUrl}tasks/?task_type=consume_file&acknowledged=false`
)
.flush([])
})
@@ -72,56 +68,56 @@ describe('TasksService', () => {
expect(tasksService.total).toEqual(0)
const mockTasks = [
{
- type: PaperlessTaskType.Auto,
- task_name: PaperlessTaskName.ConsumeFile,
- status: PaperlessTaskStatus.Complete,
+ task_type: PaperlessTaskType.ConsumeFile,
+ status: PaperlessTaskStatus.Success,
acknowledged: false,
task_id: '1234',
- task_file_name: 'file1.pdf',
+ input_data: { filename: 'file1.pdf' },
date_created: new Date(),
+ related_document_ids: [],
},
{
- type: PaperlessTaskType.Auto,
- task_name: PaperlessTaskName.ConsumeFile,
- status: PaperlessTaskStatus.Failed,
+ task_type: PaperlessTaskType.ConsumeFile,
+ status: PaperlessTaskStatus.Failure,
acknowledged: false,
task_id: '1235',
- task_file_name: 'file2.pdf',
+ input_data: { filename: 'file2.pdf' },
date_created: new Date(),
+ related_document_ids: [],
},
{
- type: PaperlessTaskType.Auto,
- task_name: PaperlessTaskName.ConsumeFile,
+ task_type: PaperlessTaskType.ConsumeFile,
status: PaperlessTaskStatus.Pending,
acknowledged: false,
task_id: '1236',
- task_file_name: 'file3.pdf',
+ input_data: { filename: 'file3.pdf' },
date_created: new Date(),
+ related_document_ids: [],
},
{
- type: PaperlessTaskType.Auto,
- task_name: PaperlessTaskName.ConsumeFile,
+ task_type: PaperlessTaskType.ConsumeFile,
status: PaperlessTaskStatus.Started,
acknowledged: false,
task_id: '1237',
- task_file_name: 'file4.pdf',
+ input_data: { filename: 'file4.pdf' },
date_created: new Date(),
+ related_document_ids: [],
},
{
- type: PaperlessTaskType.Auto,
- task_name: PaperlessTaskName.ConsumeFile,
- status: PaperlessTaskStatus.Complete,
+ task_type: PaperlessTaskType.ConsumeFile,
+ status: PaperlessTaskStatus.Success,
acknowledged: false,
task_id: '1238',
- task_file_name: 'file5.pdf',
+ input_data: { filename: 'file5.pdf' },
date_created: new Date(),
+ related_document_ids: [],
},
]
tasksService.reload()
const req = httpTestingController.expectOne(
- `${environment.apiBaseUrl}tasks/?task_name=consume_file&acknowledged=false`
+ `${environment.apiBaseUrl}tasks/?task_type=consume_file&acknowledged=false`
)
req.flush(mockTasks)
@@ -134,9 +130,9 @@ describe('TasksService', () => {
})
it('supports running tasks', () => {
- tasksService.run(PaperlessTaskName.SanityCheck).subscribe((res) => {
+ tasksService.run(PaperlessTaskType.SanityCheck).subscribe((res) => {
expect(res).toEqual({
- result: 'success',
+ task_id: 'abc-123',
})
})
const req = httpTestingController.expectOne(
@@ -144,7 +140,7 @@ describe('TasksService', () => {
)
expect(req.request.method).toEqual('POST')
req.flush({
- result: 'success',
+ task_id: 'abc-123',
})
})
})
diff --git a/src-ui/src/app/services/tasks.service.ts b/src-ui/src/app/services/tasks.service.ts
index 305258d7b..bdfdf0eb1 100644
--- a/src-ui/src/app/services/tasks.service.ts
+++ b/src-ui/src/app/services/tasks.service.ts
@@ -4,8 +4,8 @@ import { Observable, Subject } from 'rxjs'
import { first, takeUntil, tap } from 'rxjs/operators'
import {
PaperlessTask,
- PaperlessTaskName,
PaperlessTaskStatus,
+ PaperlessTaskType,
} from 'src/app/data/paperless-task'
import { environment } from 'src/environments/environment'
@@ -18,7 +18,7 @@ export class TasksService {
private baseUrl: string = environment.apiBaseUrl
private endpoint: string = 'tasks'
- public loading: boolean
+ public loading: boolean = false
private fileTasks: PaperlessTask[] = []
@@ -33,21 +33,27 @@ export class TasksService {
}
public get queuedFileTasks(): PaperlessTask[] {
- return this.fileTasks.filter((t) => t.status == PaperlessTaskStatus.Pending)
+ return this.fileTasks.filter(
+ (t) => t.status === PaperlessTaskStatus.Pending
+ )
}
public get startedFileTasks(): PaperlessTask[] {
- return this.fileTasks.filter((t) => t.status == PaperlessTaskStatus.Started)
+ return this.fileTasks.filter(
+ (t) => t.status === PaperlessTaskStatus.Started
+ )
}
public get completedFileTasks(): PaperlessTask[] {
return this.fileTasks.filter(
- (t) => t.status == PaperlessTaskStatus.Complete
+ (t) => t.status === PaperlessTaskStatus.Success
)
}
public get failedFileTasks(): PaperlessTask[] {
- return this.fileTasks.filter((t) => t.status == PaperlessTaskStatus.Failed)
+ return this.fileTasks.filter(
+ (t) => t.status === PaperlessTaskStatus.Failure
+ )
}
public reload() {
@@ -56,18 +62,16 @@ export class TasksService {
this.http
.get(
- `${this.baseUrl}${this.endpoint}/?task_name=consume_file&acknowledged=false`
+ `${this.baseUrl}${this.endpoint}/?task_type=${PaperlessTaskType.ConsumeFile}&acknowledged=false`
)
.pipe(takeUntil(this.unsubscribeNotifer), first())
.subscribe((r) => {
- this.fileTasks = r.filter(
- (t) => t.task_name == PaperlessTaskName.ConsumeFile
- )
+ this.fileTasks = r
this.loading = false
})
}
- public dismissTasks(task_ids: Set) {
+ public dismissTasks(task_ids: Set): Observable {
return this.http
.post(`${this.baseUrl}tasks/acknowledge/`, {
tasks: [...task_ids],
@@ -85,12 +89,10 @@ export class TasksService {
this.unsubscribeNotifer.next(true)
}
- public run(taskName: PaperlessTaskName): Observable {
- return this.http.post(
+ public run(taskType: PaperlessTaskType): Observable<{ task_id: string }> {
+ return this.http.post<{ task_id: string }>(
`${environment.apiBaseUrl}${this.endpoint}/run/`,
- {
- task_name: taskName,
- }
+ { task_type: taskType }
)
}
}
diff --git a/src/documents/admin.py b/src/documents/admin.py
index f0e5ccd25..3730160fb 100644
--- a/src/documents/admin.py
+++ b/src/documents/admin.py
@@ -144,18 +144,30 @@ class StoragePathAdmin(GuardedModelAdmin):
class TaskAdmin(admin.ModelAdmin):
- list_display = ("task_id", "task_file_name", "task_name", "date_done", "status")
- list_filter = ("status", "date_done", "task_name")
- search_fields = ("task_name", "task_id", "status", "task_file_name")
+ list_display = (
+ "task_id",
+ "task_type",
+ "trigger_source",
+ "status",
+ "date_created",
+ "date_done",
+ "duration_seconds",
+ )
+ list_filter = ("status", "task_type", "trigger_source", "date_done")
+ search_fields = ("task_id", "task_type", "status")
readonly_fields = (
"task_id",
- "task_file_name",
- "task_name",
+ "task_type",
+ "trigger_source",
"status",
"date_created",
"date_started",
"date_done",
- "result",
+ "duration_seconds",
+ "wait_time_seconds",
+ "input_data",
+ "result_data",
+ "result_message",
)
diff --git a/src/documents/filters.py b/src/documents/filters.py
index 9ee829fc9..75501357f 100644
--- a/src/documents/filters.py
+++ b/src/documents/filters.py
@@ -26,8 +26,10 @@ from django.db.models.functions import Cast
from django.utils.translation import gettext_lazy as _
from django_filters import DateFilter
from django_filters.rest_framework import BooleanFilter
+from django_filters.rest_framework import DateTimeFilter
from django_filters.rest_framework import Filter
from django_filters.rest_framework import FilterSet
+from django_filters.rest_framework import MultipleChoiceFilter
from drf_spectacular.utils import extend_schema_field
from guardian.utils import get_group_obj_perms_model
from guardian.utils import get_user_obj_perms_model
@@ -861,18 +863,51 @@ class ShareLinkBundleFilterSet(FilterSet):
class PaperlessTaskFilterSet(FilterSet):
+ task_type = MultipleChoiceFilter(
+ choices=PaperlessTask.TaskType.choices,
+ label="Task Type",
+ )
+
+ trigger_source = MultipleChoiceFilter(
+ choices=PaperlessTask.TriggerSource.choices,
+ label="Trigger Source",
+ )
+
+ status = MultipleChoiceFilter(
+ choices=PaperlessTask.Status.choices,
+ label="Status",
+ )
+
+ is_complete = BooleanFilter(
+ method="filter_is_complete",
+ label="Is Complete",
+ )
+
acknowledged = BooleanFilter(
label="Acknowledged",
field_name="acknowledged",
)
+ date_created_after = DateTimeFilter(
+ field_name="date_created",
+ lookup_expr="gte",
+ label="Created After",
+ )
+
+ date_created_before = DateTimeFilter(
+ field_name="date_created",
+ lookup_expr="lte",
+ label="Created Before",
+ )
+
class Meta:
model = PaperlessTask
- fields = {
- "type": ["exact"],
- "task_name": ["exact"],
- "status": ["exact"],
- }
+ fields = ["task_type", "trigger_source", "status", "acknowledged", "owner"]
+
+ def filter_is_complete(self, queryset, name, value):
+ if value:
+ return queryset.filter(status__in=PaperlessTask.COMPLETE_STATUSES)
+ return queryset.exclude(status__in=PaperlessTask.COMPLETE_STATUSES)
class ObjectOwnedOrGrantedPermissionsFilter(ObjectPermissionsFilter):
diff --git a/src/documents/management/commands/document_create_classifier.py b/src/documents/management/commands/document_create_classifier.py
index b662195a7..3fa7bdb29 100644
--- a/src/documents/management/commands/document_create_classifier.py
+++ b/src/documents/management/commands/document_create_classifier.py
@@ -22,7 +22,6 @@ class Command(PaperlessCommand):
self.buffered_logging("paperless.classifier"),
):
train_classifier(
- scheduled=False,
status_callback=lambda msg: self.console.print(f" {msg}"),
)
diff --git a/src/documents/management/commands/document_llmindex.py b/src/documents/management/commands/document_llmindex.py
index 3b9e3440b..9823b1b87 100644
--- a/src/documents/management/commands/document_llmindex.py
+++ b/src/documents/management/commands/document_llmindex.py
@@ -17,7 +17,6 @@ class Command(PaperlessCommand):
def handle(self, *args: Any, **options: Any) -> None:
llmindex_index(
rebuild=options["command"] == "rebuild",
- scheduled=False,
iter_wrapper=lambda docs: self.track(
docs,
description="Indexing documents...",
diff --git a/src/documents/management/commands/document_sanity_checker.py b/src/documents/management/commands/document_sanity_checker.py
index 598ddf7bb..c8921d064 100644
--- a/src/documents/management/commands/document_sanity_checker.py
+++ b/src/documents/management/commands/document_sanity_checker.py
@@ -111,7 +111,6 @@ class Command(PaperlessCommand):
def handle(self, *args: Any, **options: Any) -> None:
messages = check_sanity(
- scheduled=False,
iter_wrapper=lambda docs: self.track(
docs,
description="Checking documents...",
diff --git a/src/documents/migrations/0019_task_system_redesign.py b/src/documents/migrations/0019_task_system_redesign.py
new file mode 100644
index 000000000..85ba489f1
--- /dev/null
+++ b/src/documents/migrations/0019_task_system_redesign.py
@@ -0,0 +1,218 @@
+"""
+Drop and recreate the PaperlessTask table with the new structured schema.
+
+We intentionally drop all existing task data -- the old schema was
+string-based and incompatible with the new JSONField result storage.
+"""
+
+import django.db.models.deletion
+import django.utils.timezone
+from django.conf import settings
+from django.db import migrations
+from django.db import models
+
+
+class Migration(migrations.Migration):
+ dependencies = [
+ ("documents", "0018_saved_view_simple_search_rules"),
+ migrations.swappable_dependency(settings.AUTH_USER_MODEL),
+ ]
+
+ operations = [
+ migrations.DeleteModel(name="PaperlessTask"),
+ migrations.CreateModel(
+ name="PaperlessTask",
+ fields=[
+ (
+ "id",
+ models.AutoField(
+ auto_created=True,
+ primary_key=True,
+ serialize=False,
+ verbose_name="ID",
+ ),
+ ),
+ (
+ "owner",
+ models.ForeignKey(
+ blank=True,
+ default=None,
+ null=True,
+ on_delete=django.db.models.deletion.SET_NULL,
+ to=settings.AUTH_USER_MODEL,
+ verbose_name="owner",
+ ),
+ ),
+ (
+ "task_id",
+ models.CharField(
+ help_text="Celery task ID",
+ max_length=72,
+ unique=True,
+ verbose_name="Task ID",
+ ),
+ ),
+ (
+ "task_type",
+ models.CharField(
+ choices=[
+ ("consume_file", "Consume File"),
+ ("train_classifier", "Train Classifier"),
+ ("sanity_check", "Sanity Check"),
+ ("index_optimize", "Index Optimize"),
+ ("mail_fetch", "Mail Fetch"),
+ ("llm_index", "LLM Index"),
+ ("empty_trash", "Empty Trash"),
+ ("check_workflows", "Check Workflows"),
+ ("bulk_update", "Bulk Update"),
+ ("reprocess_document", "Reprocess Document"),
+ ("build_share_link", "Build Share Link"),
+ ("bulk_delete", "Bulk Delete"),
+ ],
+ db_index=True,
+ help_text="The kind of work being performed",
+ max_length=50,
+ verbose_name="Task Type",
+ ),
+ ),
+ (
+ "trigger_source",
+ models.CharField(
+ choices=[
+ ("scheduled", "Scheduled"),
+ ("web_ui", "Web UI"),
+ ("api_upload", "API Upload"),
+ ("folder_consume", "Folder Consume"),
+ ("email_consume", "Email Consume"),
+ ("system", "System"),
+ ("manual", "Manual"),
+ ],
+ db_index=True,
+ help_text="What initiated this task",
+ max_length=50,
+ verbose_name="Trigger Source",
+ ),
+ ),
+ (
+ "status",
+ models.CharField(
+ choices=[
+ ("pending", "Pending"),
+ ("started", "Started"),
+ ("success", "Success"),
+ ("failure", "Failure"),
+ ("revoked", "Revoked"),
+ ],
+ db_index=True,
+ default="pending",
+ max_length=30,
+ verbose_name="Status",
+ ),
+ ),
+ (
+ "date_created",
+ models.DateTimeField(
+ db_index=True,
+ default=django.utils.timezone.now,
+ verbose_name="Created",
+ ),
+ ),
+ (
+ "date_started",
+ models.DateTimeField(
+ blank=True,
+ null=True,
+ verbose_name="Started",
+ ),
+ ),
+ (
+ "date_done",
+ models.DateTimeField(
+ blank=True,
+ db_index=True,
+ null=True,
+ verbose_name="Completed",
+ ),
+ ),
+ (
+ "duration_seconds",
+ models.FloatField(
+ blank=True,
+ help_text="Elapsed time from start to completion",
+ null=True,
+ verbose_name="Duration (seconds)",
+ ),
+ ),
+ (
+ "wait_time_seconds",
+ models.FloatField(
+ blank=True,
+ help_text="Time from task creation to worker pickup",
+ null=True,
+ verbose_name="Wait Time (seconds)",
+ ),
+ ),
+ (
+ "input_data",
+ models.JSONField(
+ blank=True,
+ default=dict,
+ help_text="Structured input parameters for the task",
+ verbose_name="Input Data",
+ ),
+ ),
+ (
+ "result_data",
+ models.JSONField(
+ blank=True,
+ help_text="Structured result data from task execution",
+ null=True,
+ verbose_name="Result Data",
+ ),
+ ),
+ (
+ "result_message",
+ models.TextField(
+ blank=True,
+ help_text="Human-readable result message",
+ null=True,
+ verbose_name="Result Message",
+ ),
+ ),
+ (
+ "acknowledged",
+ models.BooleanField(
+ db_index=True,
+ default=False,
+ verbose_name="Acknowledged",
+ ),
+ ),
+ ],
+ options={
+ "verbose_name": "Task",
+ "verbose_name_plural": "Tasks",
+ "ordering": ["-date_created"],
+ },
+ ),
+ migrations.AddIndex(
+ model_name="paperlesstask",
+ index=models.Index(
+ fields=["status", "date_created"],
+ name="documents_p_status_8aa687_idx",
+ ),
+ ),
+ migrations.AddIndex(
+ model_name="paperlesstask",
+ index=models.Index(
+ fields=["task_type", "status"],
+ name="documents_p_task_ty_e4a93f_idx",
+ ),
+ ),
+ migrations.AddIndex(
+ model_name="paperlesstask",
+ index=models.Index(
+ fields=["owner", "acknowledged", "date_created"],
+ name="documents_p_owner_i_62c545_idx",
+ ),
+ ),
+ ]
diff --git a/src/documents/migrations/0020_drop_celery_results.py b/src/documents/migrations/0020_drop_celery_results.py
new file mode 100644
index 000000000..7d256db33
--- /dev/null
+++ b/src/documents/migrations/0020_drop_celery_results.py
@@ -0,0 +1,26 @@
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+ dependencies = [
+ ("documents", "0019_task_system_redesign"),
+ ]
+
+ operations = [
+ migrations.RunSQL(
+ sql="DROP TABLE IF EXISTS django_celery_results_taskresult;",
+ reverse_sql=migrations.RunSQL.noop,
+ ),
+ migrations.RunSQL(
+ sql="DROP TABLE IF EXISTS django_celery_results_groupresult;",
+ reverse_sql=migrations.RunSQL.noop,
+ ),
+ migrations.RunSQL(
+ sql="DROP TABLE IF EXISTS django_celery_results_chordcounter;",
+ reverse_sql=migrations.RunSQL.noop,
+ ),
+ migrations.RunSQL(
+ sql="DELETE FROM django_migrations WHERE app = 'django_celery_results';",
+ reverse_sql=migrations.RunSQL.noop,
+ ),
+ ]
diff --git a/src/documents/models.py b/src/documents/models.py
index e2f5bb3dc..da51823ae 100644
--- a/src/documents/models.py
+++ b/src/documents/models.py
@@ -3,7 +3,6 @@ from pathlib import Path
from typing import Final
import pathvalidate
-from celery import states
from django.conf import settings
from django.contrib.auth.models import Group
from django.contrib.auth.models import User
@@ -663,97 +662,174 @@ class UiSettings(models.Model):
class PaperlessTask(ModelWithOwner):
- ALL_STATES = sorted(states.ALL_STATES)
- TASK_STATE_CHOICES = sorted(zip(ALL_STATES, ALL_STATES))
+ """
+ Tracks background task execution for user visibility and debugging.
+
+ State transitions:
+ PENDING -> STARTED -> SUCCESS
+ PENDING -> STARTED -> FAILURE
+ PENDING -> REVOKED (if cancelled before starting)
+ """
+
+ class Status(models.TextChoices):
+ PENDING = "pending", _("Pending")
+ STARTED = "started", _("Started")
+ SUCCESS = "success", _("Success")
+ FAILURE = "failure", _("Failure")
+ REVOKED = "revoked", _("Revoked")
class TaskType(models.TextChoices):
- AUTO = ("auto_task", _("Auto Task"))
- SCHEDULED_TASK = ("scheduled_task", _("Scheduled Task"))
- MANUAL_TASK = ("manual_task", _("Manual Task"))
+ CONSUME_FILE = "consume_file", _("Consume File")
+ TRAIN_CLASSIFIER = "train_classifier", _("Train Classifier")
+ SANITY_CHECK = "sanity_check", _("Sanity Check")
+ INDEX_OPTIMIZE = "index_optimize", _("Index Optimize")
+ MAIL_FETCH = "mail_fetch", _("Mail Fetch")
+ LLM_INDEX = "llm_index", _("LLM Index")
+ EMPTY_TRASH = "empty_trash", _("Empty Trash")
+ CHECK_WORKFLOWS = "check_workflows", _("Check Workflows")
+ BULK_UPDATE = "bulk_update", _("Bulk Update")
+ REPROCESS_DOCUMENT = "reprocess_document", _("Reprocess Document")
+ BUILD_SHARE_LINK = "build_share_link", _("Build Share Link")
+ BULK_DELETE = "bulk_delete", _("Bulk Delete")
- class TaskName(models.TextChoices):
- CONSUME_FILE = ("consume_file", _("Consume File"))
- TRAIN_CLASSIFIER = ("train_classifier", _("Train Classifier"))
- CHECK_SANITY = ("check_sanity", _("Check Sanity"))
- INDEX_OPTIMIZE = ("index_optimize", _("Index Optimize"))
- LLMINDEX_UPDATE = ("llmindex_update", _("LLM Index Update"))
+ COMPLETE_STATUSES = (
+ Status.SUCCESS,
+ Status.FAILURE,
+ Status.REVOKED,
+ )
+ class TriggerSource(models.TextChoices):
+ SCHEDULED = "scheduled", _("Scheduled") # Celery beat
+ WEB_UI = "web_ui", _("Web UI") # Document uploaded via web
+ API_UPLOAD = "api_upload", _("API Upload") # Document uploaded via API
+ FOLDER_CONSUME = "folder_consume", _("Folder Consume") # Consume folder
+ EMAIL_CONSUME = "email_consume", _("Email Consume") # Email attachment
+ SYSTEM = "system", _("System") # Auto-triggered (self-heal, config side-effect)
+ MANUAL = "manual", _("Manual") # User explicitly ran via /api/tasks/run/
+
+ # Identification
task_id = models.CharField(
- max_length=255,
+ max_length=72,
unique=True,
verbose_name=_("Task ID"),
- help_text=_("Celery ID for the Task that was run"),
+ help_text=_("Celery task ID"),
)
- acknowledged = models.BooleanField(
- default=False,
- verbose_name=_("Acknowledged"),
- help_text=_("If the task is acknowledged via the frontend or API"),
+ task_type = models.CharField(
+ max_length=50,
+ choices=TaskType.choices,
+ verbose_name=_("Task Type"),
+ help_text=_("The kind of work being performed"),
+ db_index=True,
)
- task_file_name = models.CharField(
- null=True,
- max_length=255,
- verbose_name=_("Task Filename"),
- help_text=_("Name of the file which the Task was run for"),
- )
-
- task_name = models.CharField(
- null=True,
- max_length=255,
- choices=TaskName.choices,
- verbose_name=_("Task Name"),
- help_text=_("Name of the task that was run"),
+ trigger_source = models.CharField(
+ max_length=50,
+ choices=TriggerSource.choices,
+ verbose_name=_("Trigger Source"),
+ help_text=_("What initiated this task"),
+ db_index=True,
)
+ # State tracking
status = models.CharField(
max_length=30,
- default=states.PENDING,
- choices=TASK_STATE_CHOICES,
- verbose_name=_("Task State"),
- help_text=_("Current state of the task being run"),
+ choices=Status.choices,
+ default=Status.PENDING,
+ verbose_name=_("Status"),
+ db_index=True,
)
+ # Timestamps
date_created = models.DateTimeField(
- null=True,
default=timezone.now,
- verbose_name=_("Created DateTime"),
- help_text=_("Datetime field when the task result was created in UTC"),
+ verbose_name=_("Created"),
+ db_index=True,
)
date_started = models.DateTimeField(
null=True,
- default=None,
- verbose_name=_("Started DateTime"),
- help_text=_("Datetime field when the task was started in UTC"),
+ blank=True,
+ verbose_name=_("Started"),
)
date_done = models.DateTimeField(
null=True,
- default=None,
- verbose_name=_("Completed DateTime"),
- help_text=_("Datetime field when the task was completed in UTC"),
+ blank=True,
+ verbose_name=_("Completed"),
+ db_index=True,
)
- result = models.TextField(
+ # Duration fields -- populated by task_postrun signal handler
+ duration_seconds = models.FloatField(
null=True,
- default=None,
+ blank=True,
+ verbose_name=_("Duration (seconds)"),
+ help_text=_("Elapsed time from start to completion"),
+ )
+
+ wait_time_seconds = models.FloatField(
+ null=True,
+ blank=True,
+ verbose_name=_("Wait Time (seconds)"),
+ help_text=_("Time from task creation to worker pickup"),
+ )
+
+ # Input/Output data
+ input_data = models.JSONField(
+ default=dict,
+ blank=True,
+ verbose_name=_("Input Data"),
+ help_text=_("Structured input parameters for the task"),
+ )
+
+ result_data = models.JSONField(
+ null=True,
+ blank=True,
verbose_name=_("Result Data"),
- help_text=_(
- "The data returned by the task",
- ),
+ help_text=_("Structured result data from task execution"),
)
- type = models.CharField(
- max_length=30,
- choices=TaskType.choices,
- default=TaskType.AUTO,
- verbose_name=_("Task Type"),
- help_text=_("The type of task that was run"),
+ result_message = models.TextField(
+ null=True,
+ blank=True,
+ verbose_name=_("Result Message"),
+ help_text=_("Human-readable result message"),
)
- def __str__(self) -> str:
- return f"Task {self.task_id}"
+ # Acknowledgment
+ acknowledged = models.BooleanField(
+ default=False,
+ verbose_name=_("Acknowledged"),
+ db_index=True,
+ )
+
+ class Meta:
+ verbose_name = _("Task")
+ verbose_name_plural = _("Tasks")
+ ordering = ["-date_created"]
+ indexes = [
+ models.Index(fields=["status", "date_created"]),
+ models.Index(fields=["task_type", "status"]),
+ models.Index(fields=["owner", "acknowledged", "date_created"]),
+ ]
+
+ def __str__(self) -> str: # pragma: no cover
+ return f"{self.get_task_type_display()} [{self.task_id[:8]}]"
+
+ @property
+ def is_complete(self) -> bool: # pragma: no cover
+ return self.status in self.COMPLETE_STATUSES
+
+ @property
+ def related_document_ids(self) -> list[int]: # pragma: no cover
+ if not self.result_data:
+ return []
+ if doc_id := self.result_data.get("document_id"):
+ return [doc_id]
+ if dup_id := self.result_data.get("duplicate_of"):
+ return [dup_id]
+ return []
class Note(SoftDeleteModel):
diff --git a/src/documents/sanity_checker.py b/src/documents/sanity_checker.py
index a6d00cd3a..b824e6683 100644
--- a/src/documents/sanity_checker.py
+++ b/src/documents/sanity_checker.py
@@ -10,7 +10,6 @@ is an identity function that adds no overhead.
"""
import logging
-import uuid
from collections import defaultdict
from collections.abc import Iterator
from pathlib import Path
@@ -18,12 +17,9 @@ from typing import TYPE_CHECKING
from typing import Final
from typing import TypedDict
-from celery import states
from django.conf import settings
-from django.utils import timezone
from documents.models import Document
-from documents.models import PaperlessTask
from documents.utils import IterWrapper
from documents.utils import compute_checksum
from documents.utils import identity
@@ -287,33 +283,17 @@ def _check_document(
def check_sanity(
*,
- scheduled: bool = True,
iter_wrapper: IterWrapper[Document] = identity,
) -> SanityCheckMessages:
"""Run a full sanity check on the document archive.
Args:
- scheduled: Whether this is a scheduled (automatic) or manual check.
- Controls the task type recorded in the database.
iter_wrapper: A callable that wraps the document iterable, e.g.,
for progress bar display. Defaults to identity (no wrapping).
Returns:
A SanityCheckMessages instance containing all detected issues.
"""
- paperless_task = PaperlessTask.objects.create(
- task_id=uuid.uuid4(),
- type=(
- PaperlessTask.TaskType.SCHEDULED_TASK
- if scheduled
- else PaperlessTask.TaskType.MANUAL_TASK
- ),
- task_name=PaperlessTask.TaskName.CHECK_SANITY,
- status=states.STARTED,
- date_created=timezone.now(),
- date_started=timezone.now(),
- )
-
messages = SanityCheckMessages()
present_files = _build_present_files()
@@ -332,22 +312,4 @@ def check_sanity(
for extra_file in present_files:
messages.warning(None, f"Orphaned file in media dir: {extra_file}")
- paperless_task.status = states.SUCCESS if not messages.has_error else states.FAILURE
- if messages.total_issue_count == 0:
- paperless_task.result = "No issues found."
- else:
- parts: list[str] = []
- if messages.document_error_count:
- parts.append(f"{messages.document_error_count} document(s) with errors")
- if messages.document_warning_count:
- parts.append(f"{messages.document_warning_count} document(s) with warnings")
- if messages.global_warning_count:
- parts.append(f"{messages.global_warning_count} global warning(s)")
- paperless_task.result = ", ".join(parts) + " found."
- if messages.has_error:
- paperless_task.result += " Check logs for details."
-
- paperless_task.date_done = timezone.now()
- paperless_task.save(update_fields=["status", "result", "date_done"])
-
return messages
diff --git a/src/documents/search/_query.py b/src/documents/search/_query.py
index ed0bb4c15..1bd31b804 100644
--- a/src/documents/search/_query.py
+++ b/src/documents/search/_query.py
@@ -25,21 +25,39 @@ _REGEX_TIMEOUT: Final[float] = 1.0
_DATE_ONLY_FIELDS = frozenset({"created"})
+_TODAY: Final[str] = "today"
+_YESTERDAY: Final[str] = "yesterday"
+_PREVIOUS_WEEK: Final[str] = "previous week"
+_THIS_MONTH: Final[str] = "this month"
+_PREVIOUS_MONTH: Final[str] = "previous month"
+_THIS_YEAR: Final[str] = "this year"
+_PREVIOUS_YEAR: Final[str] = "previous year"
+_PREVIOUS_QUARTER: Final[str] = "previous quarter"
+
_DATE_KEYWORDS = frozenset(
{
- "today",
- "yesterday",
- "this_week",
- "last_week",
- "this_month",
- "last_month",
- "this_year",
- "last_year",
+ _TODAY,
+ _YESTERDAY,
+ _PREVIOUS_WEEK,
+ _THIS_MONTH,
+ _PREVIOUS_MONTH,
+ _THIS_YEAR,
+ _PREVIOUS_YEAR,
+ _PREVIOUS_QUARTER,
},
)
+_DATE_KEYWORD_PATTERN = "|".join(
+ sorted((regex.escape(k) for k in _DATE_KEYWORDS), key=len, reverse=True),
+)
+
_FIELD_DATE_RE = regex.compile(
- r"(\w+):(" + "|".join(_DATE_KEYWORDS) + r")\b",
+ rf"""(?P\w+)\s*:\s*(?:
+ (?P["'])(?P{_DATE_KEYWORD_PATTERN})(?P=quote)
+ |
+ (?P{_DATE_KEYWORD_PATTERN})(?![\w-])
+)""",
+ regex.IGNORECASE | regex.VERBOSE,
)
_COMPACT_DATE_RE = regex.compile(r"\b(\d{14})\b")
_RELATIVE_RANGE_RE = regex.compile(
@@ -74,44 +92,59 @@ def _date_only_range(keyword: str, tz: tzinfo) -> str:
today = datetime.now(tz).date()
- if keyword == "today":
+ def _quarter_start(d: date) -> date:
+ return date(d.year, ((d.month - 1) // 3) * 3 + 1, 1)
+
+ if keyword == _TODAY:
lo = datetime(today.year, today.month, today.day, tzinfo=UTC)
return _iso_range(lo, lo + timedelta(days=1))
- if keyword == "yesterday":
+ if keyword == _YESTERDAY:
y = today - timedelta(days=1)
lo = datetime(y.year, y.month, y.day, tzinfo=UTC)
hi = datetime(today.year, today.month, today.day, tzinfo=UTC)
return _iso_range(lo, hi)
- if keyword == "this_week":
- mon = today - timedelta(days=today.weekday())
- lo = datetime(mon.year, mon.month, mon.day, tzinfo=UTC)
- return _iso_range(lo, lo + timedelta(weeks=1))
- if keyword == "last_week":
+ if keyword == _PREVIOUS_WEEK:
this_mon = today - timedelta(days=today.weekday())
last_mon = this_mon - timedelta(weeks=1)
lo = datetime(last_mon.year, last_mon.month, last_mon.day, tzinfo=UTC)
hi = datetime(this_mon.year, this_mon.month, this_mon.day, tzinfo=UTC)
return _iso_range(lo, hi)
- if keyword == "this_month":
+ if keyword == _THIS_MONTH:
lo = datetime(today.year, today.month, 1, tzinfo=UTC)
if today.month == 12:
hi = datetime(today.year + 1, 1, 1, tzinfo=UTC)
else:
hi = datetime(today.year, today.month + 1, 1, tzinfo=UTC)
return _iso_range(lo, hi)
- if keyword == "last_month":
+ if keyword == _PREVIOUS_MONTH:
if today.month == 1:
lo = datetime(today.year - 1, 12, 1, tzinfo=UTC)
else:
lo = datetime(today.year, today.month - 1, 1, tzinfo=UTC)
hi = datetime(today.year, today.month, 1, tzinfo=UTC)
return _iso_range(lo, hi)
- if keyword == "this_year":
+ if keyword == _THIS_YEAR:
lo = datetime(today.year, 1, 1, tzinfo=UTC)
return _iso_range(lo, datetime(today.year + 1, 1, 1, tzinfo=UTC))
- if keyword == "last_year":
+ if keyword == _PREVIOUS_YEAR:
lo = datetime(today.year - 1, 1, 1, tzinfo=UTC)
return _iso_range(lo, datetime(today.year, 1, 1, tzinfo=UTC))
+ if keyword == _PREVIOUS_QUARTER:
+ this_quarter = _quarter_start(today)
+ last_quarter = this_quarter - relativedelta(months=3)
+ lo = datetime(
+ last_quarter.year,
+ last_quarter.month,
+ last_quarter.day,
+ tzinfo=UTC,
+ )
+ hi = datetime(
+ this_quarter.year,
+ this_quarter.month,
+ this_quarter.day,
+ tzinfo=UTC,
+ )
+ return _iso_range(lo, hi)
raise ValueError(f"Unknown keyword: {keyword}")
@@ -127,42 +160,46 @@ def _datetime_range(keyword: str, tz: tzinfo) -> str:
def _midnight(d: date) -> datetime:
return datetime(d.year, d.month, d.day, tzinfo=tz).astimezone(UTC)
- if keyword == "today":
+ def _quarter_start(d: date) -> date:
+ return date(d.year, ((d.month - 1) // 3) * 3 + 1, 1)
+
+ if keyword == _TODAY:
return _iso_range(_midnight(today), _midnight(today + timedelta(days=1)))
- if keyword == "yesterday":
+ if keyword == _YESTERDAY:
y = today - timedelta(days=1)
return _iso_range(_midnight(y), _midnight(today))
- if keyword == "this_week":
- mon = today - timedelta(days=today.weekday())
- return _iso_range(_midnight(mon), _midnight(mon + timedelta(weeks=1)))
- if keyword == "last_week":
+ if keyword == _PREVIOUS_WEEK:
this_mon = today - timedelta(days=today.weekday())
last_mon = this_mon - timedelta(weeks=1)
return _iso_range(_midnight(last_mon), _midnight(this_mon))
- if keyword == "this_month":
+ if keyword == _THIS_MONTH:
first = today.replace(day=1)
if today.month == 12:
next_first = date(today.year + 1, 1, 1)
else:
next_first = date(today.year, today.month + 1, 1)
return _iso_range(_midnight(first), _midnight(next_first))
- if keyword == "last_month":
+ if keyword == _PREVIOUS_MONTH:
this_first = today.replace(day=1)
if today.month == 1:
last_first = date(today.year - 1, 12, 1)
else:
last_first = date(today.year, today.month - 1, 1)
return _iso_range(_midnight(last_first), _midnight(this_first))
- if keyword == "this_year":
+ if keyword == _THIS_YEAR:
return _iso_range(
_midnight(date(today.year, 1, 1)),
_midnight(date(today.year + 1, 1, 1)),
)
- if keyword == "last_year":
+ if keyword == _PREVIOUS_YEAR:
return _iso_range(
_midnight(date(today.year - 1, 1, 1)),
_midnight(date(today.year, 1, 1)),
)
+ if keyword == _PREVIOUS_QUARTER:
+ this_quarter = _quarter_start(today)
+ last_quarter = this_quarter - relativedelta(months=3)
+ return _iso_range(_midnight(last_quarter), _midnight(this_quarter))
raise ValueError(f"Unknown keyword: {keyword}")
@@ -308,7 +345,7 @@ def rewrite_natural_date_keywords(query: str, tz: tzinfo) -> str:
- Compact 14-digit dates (YYYYMMDDHHmmss)
- Whoosh relative ranges ([-7 days to now], [now-1h TO now+2h])
- 8-digit dates with field awareness (created:20240115)
- - Natural keywords (field:today, field:last_week, etc.)
+ - Natural keywords (field:today, field:"previous quarter", etc.)
Args:
query: Raw user query string
@@ -326,7 +363,8 @@ def rewrite_natural_date_keywords(query: str, tz: tzinfo) -> str:
query = _rewrite_relative_range(query)
def _replace(m: regex.Match[str]) -> str:
- field, keyword = m.group(1), m.group(2)
+ field = m.group("field")
+ keyword = (m.group("quoted") or m.group("bare")).lower()
if field in _DATE_ONLY_FIELDS:
return f"{field}:{_date_only_range(keyword, tz)}"
return f"{field}:{_datetime_range(keyword, tz)}"
diff --git a/src/documents/serialisers.py b/src/documents/serialisers.py
index 1efbf6b7b..986fdf720 100644
--- a/src/documents/serialisers.py
+++ b/src/documents/serialisers.py
@@ -12,7 +12,6 @@ from typing import Literal
from typing import TypedDict
import magic
-from celery import states
from django.conf import settings
from django.contrib.auth.models import Group
from django.contrib.auth.models import User
@@ -40,6 +39,7 @@ from drf_spectacular.utils import extend_schema_field
from drf_spectacular.utils import extend_schema_serializer
from drf_writable_nested.serializers import NestedUpdateMixin
from guardian.core import ObjectPermissionChecker
+from guardian.shortcuts import get_objects_for_user
from guardian.shortcuts import get_users_with_perms
from guardian.utils import get_group_obj_perms_model
from guardian.utils import get_user_obj_perms_model
@@ -2431,7 +2431,84 @@ class UiSettingsViewSerializer(serializers.ModelSerializer[UiSettings]):
return ui_settings
-class TasksViewSerializer(OwnedObjectSerializer):
+class TaskSerializerV10(OwnedObjectSerializer):
+ """Task serializer for API v10+ using new field names."""
+
+ related_document_ids = serializers.ListField(
+ child=serializers.IntegerField(),
+ read_only=True,
+ )
+ task_type_display = serializers.CharField(
+ source="get_task_type_display",
+ read_only=True,
+ )
+ trigger_source_display = serializers.CharField(
+ source="get_trigger_source_display",
+ read_only=True,
+ )
+ status_display = serializers.CharField(
+ source="get_status_display",
+ read_only=True,
+ )
+
+ class Meta:
+ model = PaperlessTask
+ fields = (
+ "id",
+ "task_id",
+ "task_type",
+ "task_type_display",
+ "trigger_source",
+ "trigger_source_display",
+ "status",
+ "status_display",
+ "date_created",
+ "date_started",
+ "date_done",
+ "duration_seconds",
+ "wait_time_seconds",
+ "input_data",
+ "result_data",
+ "result_message",
+ "related_document_ids",
+ "acknowledged",
+ "owner",
+ )
+ read_only_fields = fields
+
+
+class TaskSerializerV9(serializers.ModelSerializer):
+ """Task serializer for API v9 backwards compatibility.
+
+ Maps old field names to the new model fields so existing clients continue
+ to work unchanged.
+ """
+
+ # v9 field: task_name -> task_type (with value remapping for renamed tasks)
+ task_name = serializers.SerializerMethodField()
+
+ # v9 field: task_file_name -> input_data.filename
+ task_file_name = serializers.SerializerMethodField()
+
+ # v9 field: type -> trigger_source (mapped to old enum labels)
+ type = serializers.SerializerMethodField()
+
+ # v9 field: status -> uppercase Celery state strings
+ status = serializers.SerializerMethodField()
+
+ # v9 field: result -> result_message (with legacy format fallback)
+ result = serializers.CharField(
+ source="result_message",
+ read_only=True,
+ allow_null=True,
+ )
+
+ # v9 field: related_document -> first document ID from result_data
+ related_document = serializers.SerializerMethodField()
+
+ # v9 field: duplicate_documents -> list of duplicate IDs from result_data
+ duplicate_documents = serializers.SerializerMethodField()
+
class Meta:
model = PaperlessTask
fields = (
@@ -2439,59 +2516,99 @@ class TasksViewSerializer(OwnedObjectSerializer):
"task_id",
"task_name",
"task_file_name",
- "date_created",
- "date_done",
"type",
"status",
+ "date_created",
+ "date_done",
"result",
"acknowledged",
"related_document",
"duplicate_documents",
"owner",
)
+ read_only_fields = fields
- related_document = serializers.SerializerMethodField()
- duplicate_documents = serializers.SerializerMethodField()
- created_doc_re = re.compile(r"New document id (\d+) created")
- duplicate_doc_re = re.compile(r"It is a duplicate of .* \(#(\d+)\)")
+ _TASK_TYPE_TO_V9_NAME = {
+ PaperlessTask.TaskType.SANITY_CHECK: "check_sanity",
+ PaperlessTask.TaskType.LLM_INDEX: "llmindex_update",
+ }
- def get_related_document(self, obj) -> str | None:
- result = None
- re = None
- if obj.result:
- match obj.status:
- case states.SUCCESS:
- re = self.created_doc_re
- case states.FAILURE:
- re = (
- self.duplicate_doc_re
- if "existing document is in the trash" not in obj.result
- else None
- )
- if re is not None:
- try:
- result = re.search(obj.result).group(1)
- except Exception:
- pass
+ def get_task_name(self, obj: PaperlessTask) -> str:
+ return self._TASK_TYPE_TO_V9_NAME.get(obj.task_type, obj.task_type)
- return result
+ def get_task_file_name(self, obj: PaperlessTask) -> str | None:
+ if not obj.input_data:
+ return None
+ return obj.input_data.get("filename")
- @extend_schema_field(DuplicateDocumentSummarySerializer(many=True))
- def get_duplicate_documents(self, obj):
- related_document = self.get_related_document(obj)
- request = self.context.get("request")
- user = request.user if request else None
- document = Document.global_objects.filter(pk=related_document).first()
- if not related_document or not user or not document:
+ _STATUS_TO_V9 = {
+ PaperlessTask.Status.PENDING: "PENDING",
+ PaperlessTask.Status.STARTED: "STARTED",
+ PaperlessTask.Status.SUCCESS: "SUCCESS",
+ PaperlessTask.Status.FAILURE: "FAILURE",
+ PaperlessTask.Status.REVOKED: "REVOKED",
+ }
+
+ def get_status(self, obj: PaperlessTask) -> str:
+ return self._STATUS_TO_V9.get(obj.status, obj.status.upper())
+
+ _TRIGGER_SOURCE_TO_V9_TYPE = {
+ PaperlessTask.TriggerSource.SCHEDULED: "scheduled_task",
+ PaperlessTask.TriggerSource.SYSTEM: "auto_task",
+ # Email and folder-consumer documents are system-initiated, not manually triggered
+ PaperlessTask.TriggerSource.EMAIL_CONSUME: "auto_task",
+ PaperlessTask.TriggerSource.FOLDER_CONSUME: "auto_task",
+ }
+
+ def get_type(self, obj: PaperlessTask) -> str:
+ return self._TRIGGER_SOURCE_TO_V9_TYPE.get(obj.trigger_source, "manual_task")
+
+ def get_related_document(self, obj: PaperlessTask) -> int | None:
+ ids = obj.related_document_ids
+ return ids[0] if ids else None
+
+ def get_duplicate_documents(
+ self,
+ obj: PaperlessTask,
+ ) -> list[dict[str, Any]]:
+ if not obj.result_data:
return []
- duplicates = _get_viewable_duplicates(document, user)
- return list(duplicates.values("id", "title", "deleted_at"))
+ dup_of = obj.result_data.get("duplicate_of")
+ if dup_of is None:
+ return []
+ request = self.context.get("request")
+ if request is None:
+ return []
+ user = request.user
+ qs = Document.global_objects.filter(pk=dup_of)
+ if not user.is_staff:
+ with_perms = get_objects_for_user(
+ user,
+ "documents.view_document",
+ qs,
+ accept_global_perms=False,
+ )
+ qs = with_perms | qs.filter(owner=user) | qs.filter(owner__isnull=True)
+ return list(qs.values("id", "title", "deleted_at"))
-class RunTaskViewSerializer(serializers.Serializer[dict[str, Any]]):
- task_name = serializers.ChoiceField(
- choices=PaperlessTask.TaskName.choices,
- label="Task Name",
+class TaskSummarySerializer(serializers.Serializer):
+ task_type = serializers.CharField()
+ total_count = serializers.IntegerField()
+ pending_count = serializers.IntegerField()
+ success_count = serializers.IntegerField()
+ failure_count = serializers.IntegerField()
+ avg_duration_seconds = serializers.FloatField(allow_null=True)
+ avg_wait_time_seconds = serializers.FloatField(allow_null=True)
+ last_run = serializers.DateTimeField(allow_null=True)
+ last_success = serializers.DateTimeField(allow_null=True)
+ last_failure = serializers.DateTimeField(allow_null=True)
+
+
+class RunTaskSerializer(serializers.Serializer):
+ task_type = serializers.ChoiceField(
+ choices=PaperlessTask.TaskType.choices,
+ label="Task Type",
write_only=True,
)
diff --git a/src/documents/signals/handlers.py b/src/documents/signals/handlers.py
index dd49a718c..3e04bc52a 100644
--- a/src/documents/signals/handlers.py
+++ b/src/documents/signals/handlers.py
@@ -1,18 +1,21 @@
from __future__ import annotations
+import datetime
import hashlib
import logging
+import re as _re
import shutil
+import traceback as _tb
from pathlib import Path
from typing import TYPE_CHECKING
from typing import Any
from celery import shared_task
-from celery import states
from celery.signals import before_task_publish
from celery.signals import task_failure
from celery.signals import task_postrun
from celery.signals import task_prerun
+from celery.signals import task_revoked
from celery.signals import worker_process_init
from django.conf import settings
from django.contrib.auth.models import Group
@@ -31,6 +34,7 @@ from documents import matching
from documents.caching import clear_document_caches
from documents.caching import invalidate_llm_suggestions_cache
from documents.data_models import ConsumableDocument
+from documents.data_models import DocumentSource
from documents.file_handling import create_source_path_directory
from documents.file_handling import delete_empty_directories
from documents.file_handling import generate_filename
@@ -996,68 +1000,225 @@ def run_workflows(
return overrides, "\n".join(messages)
-@before_task_publish.connect
-def before_task_publish_handler(sender=None, headers=None, body=None, **kwargs) -> None:
+# ---------------------------------------------------------------------------
+# Task tracking -- Celery signal handlers
+# ---------------------------------------------------------------------------
+
+TRACKED_TASKS: dict[str, PaperlessTask.TaskType] = {
+ "documents.tasks.consume_file": PaperlessTask.TaskType.CONSUME_FILE,
+ "documents.tasks.train_classifier": PaperlessTask.TaskType.TRAIN_CLASSIFIER,
+ "documents.tasks.sanity_check": PaperlessTask.TaskType.SANITY_CHECK,
+ "documents.tasks.llmindex_index": PaperlessTask.TaskType.LLM_INDEX,
+ "documents.tasks.empty_trash": PaperlessTask.TaskType.EMPTY_TRASH,
+ "documents.tasks.check_scheduled_workflows": PaperlessTask.TaskType.CHECK_WORKFLOWS,
+ "paperless_mail.tasks.process_mail_accounts": PaperlessTask.TaskType.MAIL_FETCH,
+ "documents.tasks.bulk_update_documents": PaperlessTask.TaskType.BULK_UPDATE,
+ "documents.tasks.update_document_content_maybe_archive_file": PaperlessTask.TaskType.REPROCESS_DOCUMENT,
+ "documents.tasks.build_share_link_bundle": PaperlessTask.TaskType.BUILD_SHARE_LINK,
+ "documents.bulk_edit.delete": PaperlessTask.TaskType.BULK_DELETE,
+}
+
+_CELERY_STATE_TO_STATUS: dict[str, PaperlessTask.Status] = {
+ "SUCCESS": PaperlessTask.Status.SUCCESS,
+ "FAILURE": PaperlessTask.Status.FAILURE,
+ "REVOKED": PaperlessTask.Status.REVOKED,
+}
+
+_DOCUMENT_SOURCE_TO_TRIGGER: dict[DocumentSource, PaperlessTask.TriggerSource] = {
+ DocumentSource.ConsumeFolder: PaperlessTask.TriggerSource.FOLDER_CONSUME,
+ DocumentSource.ApiUpload: PaperlessTask.TriggerSource.API_UPLOAD,
+ DocumentSource.MailFetch: PaperlessTask.TriggerSource.EMAIL_CONSUME,
+ DocumentSource.WebUI: PaperlessTask.TriggerSource.WEB_UI,
+}
+
+
+def _get_consume_args(
+ args: tuple,
+ task_kwargs: dict,
+) -> tuple[Any | None, Any | None]:
+ """Extract (input_doc, overrides) from consume_file task arguments."""
+ input_doc = args[0] if args else task_kwargs.get("input_doc")
+ overrides = args[1] if len(args) >= 2 else task_kwargs.get("overrides")
+ return input_doc, overrides
+
+
+def _extract_input_data(
+ task_type: PaperlessTask.TaskType,
+ args: tuple,
+ task_kwargs: dict,
+) -> dict:
+ """Build the input_data dict stored on the PaperlessTask record.
+
+ For consume_file tasks this includes the filename, MIME type, and any
+ non-null overrides from the DocumentMetadataOverrides object. For
+ mail_fetch tasks it captures the account_ids list. All other task
+ types store no input data and return {}.
"""
- Creates the PaperlessTask object in a pending state. This is sent before
- the task reaches the broker, but before it begins executing on a worker.
+ if task_type == PaperlessTask.TaskType.CONSUME_FILE:
+ input_doc, overrides = _get_consume_args(args, task_kwargs)
+ if input_doc is None: # pragma: no cover
+ return {}
+ data: dict = {
+ "filename": input_doc.original_file.name,
+ "mime_type": input_doc.mime_type,
+ }
+ if input_doc.original_path: # pragma: no cover
+ data["source_path"] = str(input_doc.original_path)
+ if input_doc.mailrule_id: # pragma: no cover
+ data["mailrule_id"] = input_doc.mailrule_id
+ if overrides:
+ override_dict = {}
+ for k, v in vars(overrides).items():
+ if v is None or k.startswith("_"):
+ continue
+ if isinstance(v, datetime.date):
+ v = v.isoformat()
+ elif isinstance(v, Path):
+ v = str(v)
+ override_dict[k] = v
+ if override_dict:
+ data["overrides"] = override_dict
+ return data
+
+ if task_type == PaperlessTask.TaskType.MAIL_FETCH:
+ account_ids = args[0] if args else task_kwargs.get("account_ids")
+ if account_ids is not None:
+ return {"account_ids": account_ids}
+ return {}
+
+ return {}
+
+
+def _determine_trigger_source(
+ task_type: PaperlessTask.TaskType,
+ args: tuple,
+ task_kwargs: dict,
+ headers: dict,
+) -> PaperlessTask.TriggerSource:
+ """Resolve the TriggerSource for a task being published to the broker.
+
+ Priority order:
+ 1. Explicit trigger_source header (set by beat schedule or apply_async callers).
+ 2. For consume_file tasks, the DocumentSource on the input document.
+ 3. MANUAL as the catch-all for all other cases.
+ """
+ # Explicit header takes priority -- callers pass a TriggerSource DB value directly.
+ header_source = headers.get("trigger_source")
+ if header_source is not None:
+ try:
+ return PaperlessTask.TriggerSource(header_source)
+ except ValueError:
+ pass
+
+ if task_type == PaperlessTask.TaskType.CONSUME_FILE:
+ input_doc, _ = _get_consume_args(args, task_kwargs)
+ if input_doc is not None:
+ return _DOCUMENT_SOURCE_TO_TRIGGER.get(
+ input_doc.source,
+ PaperlessTask.TriggerSource.API_UPLOAD,
+ )
+
+ return PaperlessTask.TriggerSource.MANUAL
+
+
+def _extract_owner_id(
+ task_type: PaperlessTask.TaskType,
+ args: tuple,
+ task_kwargs: dict,
+) -> int | None:
+ """Return the owner_id from consume_file overrides, or None for all other task types."""
+ if task_type != PaperlessTask.TaskType.CONSUME_FILE:
+ return None
+ _, overrides = _get_consume_args(args, task_kwargs)
+ if overrides and hasattr(overrides, "owner_id"):
+ return overrides.owner_id
+ return None # pragma: no cover
+
+
+def _parse_consume_result(result: str) -> dict | None:
+ """Parse a consume_file string result into a structured dict.
+
+ consume_file returns human-readable strings rather than dicts (e.g.
+ "Success. New document id 42 created" or "It is a duplicate of foo (#7)").
+ This function extracts the document ID or duplicate reference so the
+ result can be stored as structured data on the PaperlessTask record.
+ Returns None when the string does not match any known pattern.
+ """
+ if match := _re.search(r"New document id (\d+) created", result):
+ return {"document_id": int(match.group(1))}
+ if match := _re.search(r"It is a duplicate of .* \(#(\d+)\)", result):
+ return {
+ "duplicate_of": int(match.group(1)),
+ "duplicate_in_trash": "existing document is in the trash" in result,
+ }
+ return None # pragma: no cover
+
+
+@before_task_publish.connect
+def before_task_publish_handler(
+ sender=None,
+ headers=None,
+ body=None,
+ **kwargs,
+) -> None:
+ """
+ Creates the PaperlessTask record when the task is published to broker.
https://docs.celeryq.dev/en/stable/userguide/signals.html#before-task-publish
-
https://docs.celeryq.dev/en/stable/internals/protocol.html#version-2
-
"""
- if "task" not in headers or headers["task"] != "documents.tasks.consume_file":
- # Assumption: this is only ever a v2 message
+ if headers is None or body is None:
+ return
+
+ task_name = headers.get("task", "")
+ task_type = TRACKED_TASKS.get(task_name)
+ if task_type is None:
return
try:
close_old_connections()
+ args, task_kwargs, _ = body
+ task_id = headers["id"]
- task_args = body[0]
- input_doc, overrides = task_args
-
- task_file_name = input_doc.original_file.name
- user_id = overrides.owner_id if overrides else None
+ input_data = _extract_input_data(task_type, args, task_kwargs)
+ trigger_source = _determine_trigger_source(
+ task_type,
+ args,
+ task_kwargs,
+ headers,
+ )
+ owner_id = _extract_owner_id(task_type, args, task_kwargs)
PaperlessTask.objects.create(
- type=PaperlessTask.TaskType.AUTO,
- task_id=headers["id"],
- status=states.PENDING,
- task_file_name=task_file_name,
- task_name=PaperlessTask.TaskName.CONSUME_FILE,
- result=None,
- date_created=timezone.now(),
- date_started=None,
- date_done=None,
- owner_id=user_id,
+ task_id=task_id,
+ task_type=task_type,
+ trigger_source=trigger_source,
+ status=PaperlessTask.Status.PENDING,
+ input_data=input_data,
+ owner_id=owner_id,
)
except Exception: # pragma: no cover
- # Don't let an exception in the signal handlers prevent
- # a document from being consumed.
logger.exception("Creating PaperlessTask failed")
@task_prerun.connect
def task_prerun_handler(sender=None, task_id=None, task=None, **kwargs) -> None:
"""
-
- Updates the PaperlessTask to be started. Sent before the task begins execution
- on a worker.
+ Marks the task STARTED when execution begins on a worker.
https://docs.celeryq.dev/en/stable/userguide/signals.html#task-prerun
"""
+ if task_id is None: # pragma: no cover
+ return
+ if task and task.name not in TRACKED_TASKS:
+ return
try:
close_old_connections()
- task_instance = PaperlessTask.objects.filter(task_id=task_id).first()
-
- if task_instance is not None:
- task_instance.status = states.STARTED
- task_instance.date_started = timezone.now()
- task_instance.save()
+ PaperlessTask.objects.filter(task_id=task_id).update(
+ status=PaperlessTask.Status.STARTED,
+ date_started=timezone.now(),
+ )
except Exception: # pragma: no cover
- # Don't let an exception in the signal handlers prevent
- # a document from being consumed.
logger.exception("Setting PaperlessTask started failed")
@@ -1071,22 +1232,56 @@ def task_postrun_handler(
**kwargs,
) -> None:
"""
- Updates the result of the PaperlessTask.
+ Records task completion and result data for non-failure outcomes.
+
+ Skips FAILURE states entirely, since task_failure_handler fires first
+ and fully owns the failure path (status, date_done, duration,
+ result_data, result_message).
https://docs.celeryq.dev/en/stable/userguide/signals.html#task-postrun
"""
+ if task_id is None: # pragma: no cover
+ return
+ if task and task.name not in TRACKED_TASKS:
+ return
try:
close_old_connections()
- task_instance = PaperlessTask.objects.filter(task_id=task_id).first()
- if task_instance is not None:
- task_instance.status = state or states.FAILURE
- task_instance.result = retval
- task_instance.date_done = timezone.now()
- task_instance.save()
+ new_status = _CELERY_STATE_TO_STATUS.get(state, PaperlessTask.Status.FAILURE)
+ if new_status == PaperlessTask.Status.FAILURE:
+ return
+
+ now = timezone.now()
+ try:
+ task_instance = PaperlessTask.objects.get(task_id=task_id)
+ except PaperlessTask.DoesNotExist:
+ return
+
+ task_instance.status = new_status
+ task_instance.date_done = now
+ changed_fields = ["status", "date_done"]
+
+ if task_instance.date_started:
+ task_instance.duration_seconds = (
+ now - task_instance.date_started
+ ).total_seconds()
+ changed_fields.append("duration_seconds")
+ if task_instance.date_started and task_instance.date_created:
+ task_instance.wait_time_seconds = (
+ task_instance.date_started - task_instance.date_created
+ ).total_seconds()
+ changed_fields.append("wait_time_seconds")
+
+ if isinstance(retval, dict):
+ task_instance.result_data = retval
+ changed_fields.append("result_data")
+ elif isinstance(retval, str):
+ task_instance.result_message = retval
+ task_instance.result_data = _parse_consume_result(retval)
+ changed_fields.extend(["result_message", "result_data"])
+
+ task_instance.save(update_fields=changed_fields)
except Exception: # pragma: no cover
- # Don't let an exception in the signal handlers prevent
- # a document from being consumed.
logger.exception("Updating PaperlessTask failed")
@@ -1100,21 +1295,85 @@ def task_failure_handler(
**kwargs,
) -> None:
"""
- Updates the result of a failed PaperlessTask.
+ Records failure details when a task raises an exception.
+
+ Fully owns the FAILURE path. task_postrun_handler skips FAILURE
+ states so there is no overlap.
https://docs.celeryq.dev/en/stable/userguide/signals.html#task-failure
"""
+ if task_id is None: # pragma: no cover
+ return
+ if sender and sender.name not in TRACKED_TASKS: # pragma: no cover
+ return
try:
close_old_connections()
- task_instance = PaperlessTask.objects.filter(task_id=task_id).first()
- if task_instance is not None and task_instance.result is None:
- task_instance.status = states.FAILURE
- task_instance.result = traceback
- task_instance.date_done = timezone.now()
- task_instance.save()
+ result_data: dict = {
+ "error_type": type(exception).__name__ if exception else "Unknown",
+ "error_message": str(exception) if exception else "Unknown error",
+ }
+ if traceback:
+ tb_str = "".join(_tb.format_tb(traceback))
+ result_data["traceback"] = tb_str[:5000]
+
+ now = timezone.now()
+ update_fields: dict = {
+ "status": PaperlessTask.Status.FAILURE,
+ "result_data": result_data,
+ "result_message": str(exception) if exception else None,
+ "date_done": now,
+ }
+
+ task_qs = PaperlessTask.objects.filter(task_id=task_id)
+ task_instance = task_qs.values("date_started", "date_created").first()
+ if task_instance:
+ date_started = task_instance["date_started"]
+ if date_started:
+ update_fields["duration_seconds"] = (now - date_started).total_seconds()
+ date_created = task_instance["date_created"]
+ if date_started and date_created:
+ update_fields["wait_time_seconds"] = (
+ date_started - date_created
+ ).total_seconds()
+ task_qs.update(**update_fields)
except Exception: # pragma: no cover
- logger.exception("Updating PaperlessTask failed")
+ logger.exception("Updating PaperlessTask on failure failed")
+
+
+@task_revoked.connect
+def task_revoked_handler(
+ sender=None,
+ request=None,
+ *,
+ terminated: bool = False,
+ signum=None,
+ expired: bool = False,
+ **kwargs,
+) -> None:
+ """
+ Marks the task REVOKED when it is cancelled before or during execution.
+
+ This fires for tasks revoked while still queued (before task_prerun) as
+ well as for tasks terminated mid-run. task_postrun does NOT fire for
+ pre-start revocations, so this handler is the only way to move those
+ records out of PENDING.
+
+ https://docs.celeryq.dev/en/stable/userguide/signals.html#task-revoked
+ """
+ task_id = request.id if request else None
+ if task_id is None: # pragma: no cover
+ return
+ if sender and sender.name not in TRACKED_TASKS: # pragma: no cover
+ return
+ try:
+ close_old_connections()
+ PaperlessTask.objects.filter(task_id=task_id).update(
+ status=PaperlessTask.Status.REVOKED,
+ date_done=timezone.now(),
+ )
+ except Exception: # pragma: no cover
+ logger.exception("Updating PaperlessTask on revocation failed")
@worker_process_init.connect
diff --git a/src/documents/tasks.py b/src/documents/tasks.py
index 57c819492..86a8047bc 100644
--- a/src/documents/tasks.py
+++ b/src/documents/tasks.py
@@ -10,7 +10,6 @@ from tempfile import mkstemp
from celery import Task
from celery import shared_task
-from celery import states
from django.conf import settings
from django.contrib.contenttypes.models import ContentType
from django.db import models
@@ -41,7 +40,6 @@ from documents.models import Correspondent
from documents.models import CustomFieldInstance
from documents.models import Document
from documents.models import DocumentType
-from documents.models import PaperlessTask
from documents.models import ShareLink
from documents.models import ShareLinkBundle
from documents.models import StoragePath
@@ -84,19 +82,8 @@ def index_optimize() -> None:
@shared_task
def train_classifier(
*,
- scheduled=True,
status_callback: Callable[[str], None] | None = None,
-) -> None:
- task = PaperlessTask.objects.create(
- type=PaperlessTask.TaskType.SCHEDULED_TASK
- if scheduled
- else PaperlessTask.TaskType.MANUAL_TASK,
- task_id=uuid.uuid4(),
- task_name=PaperlessTask.TaskName.TRAIN_CLASSIFIER,
- status=states.STARTED,
- date_created=timezone.now(),
- date_started=timezone.now(),
- )
+) -> str:
if (
not Tag.objects.filter(matching_algorithm=Tag.MATCH_AUTO).exists()
and not DocumentType.objects.filter(matching_algorithm=Tag.MATCH_AUTO).exists()
@@ -107,40 +94,25 @@ def train_classifier(
logger.info(result)
# Special case, items were once auto and trained, so remove the model
# and prevent its use again
- if settings.MODEL_FILE.exists():
+ if settings.MODEL_FILE.exists(): # pragma: no cover
logger.info(f"Removing {settings.MODEL_FILE} so it won't be used")
settings.MODEL_FILE.unlink()
- task.status = states.SUCCESS
- task.result = result
- task.date_done = timezone.now()
- task.save()
- return
+ return result
classifier = load_classifier()
if not classifier:
classifier = DocumentClassifier()
- try:
- if classifier.train(status_callback=status_callback):
- logger.info(
- f"Saving updated classifier model to {settings.MODEL_FILE}...",
- )
- classifier.save()
- task.result = "Training completed successfully"
- else:
- logger.debug("Training data unchanged.")
- task.result = "Training data unchanged"
-
- task.status = states.SUCCESS
-
- except Exception as e:
- logger.warning("Classifier error: " + str(e))
- task.status = states.FAILURE
- task.result = str(e)
-
- task.date_done = timezone.now()
- task.save(update_fields=["status", "result", "date_done"])
+ if classifier.train(status_callback=status_callback):
+ logger.info(
+ f"Saving updated classifier model to {settings.MODEL_FILE}...",
+ )
+ classifier.save()
+ return "Training completed successfully"
+ else:
+ logger.debug("Training data unchanged.")
+ return "Training data unchanged"
@shared_task(bind=True)
@@ -231,8 +203,8 @@ def consume_file(
@shared_task
-def sanity_check(*, scheduled=True, raise_on_error=True):
- messages = sanity_checker.check_sanity(scheduled=scheduled)
+def sanity_check(*, raise_on_error: bool = True) -> str:
+ messages = sanity_checker.check_sanity()
messages.log_messages()
if not messages.has_error and not messages.has_warning and not messages.has_info:
@@ -635,42 +607,19 @@ def update_document_parent_tags(tag: Tag, new_parent: Tag) -> None:
def llmindex_index(
*,
iter_wrapper: IterWrapper[Document] = identity,
- rebuild=False,
- scheduled=True,
- auto=False,
-) -> None:
+ rebuild: bool = False,
+) -> str | None:
ai_config = AIConfig()
- if ai_config.llm_index_enabled:
- task = PaperlessTask.objects.create(
- type=PaperlessTask.TaskType.SCHEDULED_TASK
- if scheduled
- else PaperlessTask.TaskType.AUTO
- if auto
- else PaperlessTask.TaskType.MANUAL_TASK,
- task_id=uuid.uuid4(),
- task_name=PaperlessTask.TaskName.LLMINDEX_UPDATE,
- status=states.STARTED,
- date_created=timezone.now(),
- date_started=timezone.now(),
- )
- from paperless_ai.indexing import update_llm_index
-
- try:
- result = update_llm_index(
- iter_wrapper=iter_wrapper,
- rebuild=rebuild,
- )
- task.status = states.SUCCESS
- task.result = result
- except Exception as e:
- logger.error("LLM index error: " + str(e))
- task.status = states.FAILURE
- task.result = str(e)
-
- task.date_done = timezone.now()
- task.save(update_fields=["status", "result", "date_done"])
- else:
+ if not ai_config.llm_index_enabled: # pragma: no cover
logger.info("LLM index is disabled, skipping update.")
+ return None
+
+ from paperless_ai.indexing import update_llm_index
+
+ return update_llm_index(
+ iter_wrapper=iter_wrapper,
+ rebuild=rebuild,
+ )
@shared_task
diff --git a/src/documents/tests/conftest.py b/src/documents/tests/conftest.py
index 5cde34768..88f0e9d76 100644
--- a/src/documents/tests/conftest.py
+++ b/src/documents/tests/conftest.py
@@ -13,6 +13,8 @@ from rest_framework.test import APIClient
from documents.tests.factories import DocumentFactory
+UserModelT = get_user_model()
+
if TYPE_CHECKING:
from documents.models import Document
@@ -126,15 +128,34 @@ def rest_api_client():
yield APIClient()
-@pytest.fixture
-def authenticated_rest_api_client(rest_api_client: APIClient):
- """
- The basic DRF ApiClient which has been authenticated
- """
- UserModel = get_user_model()
- user = UserModel.objects.create_user(username="testuser", password="password")
- rest_api_client.force_authenticate(user=user)
- yield rest_api_client
+@pytest.fixture()
+def regular_user(django_user_model: type[UserModelT]) -> UserModelT:
+ """Unprivileged authenticated user for permission boundary tests."""
+ return django_user_model.objects.create_user(username="regular", password="regular")
+
+
+@pytest.fixture()
+def admin_client(rest_api_client: APIClient, admin_user: UserModelT) -> APIClient:
+ """Admin client pre-authenticated and sending the v10 Accept header."""
+ rest_api_client.force_authenticate(user=admin_user)
+ rest_api_client.credentials(HTTP_ACCEPT="application/json; version=10")
+ return rest_api_client
+
+
+@pytest.fixture()
+def v9_client(rest_api_client: APIClient, admin_user: UserModelT) -> APIClient:
+ """Admin client pre-authenticated and sending the v9 Accept header."""
+ rest_api_client.force_authenticate(user=admin_user)
+ rest_api_client.credentials(HTTP_ACCEPT="application/json; version=9")
+ return rest_api_client
+
+
+@pytest.fixture()
+def user_client(rest_api_client: APIClient, regular_user: UserModelT) -> APIClient:
+ """Regular-user client pre-authenticated and sending the v10 Accept header."""
+ rest_api_client.force_authenticate(user=regular_user)
+ rest_api_client.credentials(HTTP_ACCEPT="application/json; version=10")
+ return rest_api_client
@pytest.fixture(scope="session", autouse=True)
diff --git a/src/documents/tests/factories.py b/src/documents/tests/factories.py
index b0fd68428..21d8bcb37 100644
--- a/src/documents/tests/factories.py
+++ b/src/documents/tests/factories.py
@@ -11,6 +11,7 @@ from documents.models import Correspondent
from documents.models import Document
from documents.models import DocumentType
from documents.models import MatchingModel
+from documents.models import PaperlessTask
from documents.models import StoragePath
from documents.models import Tag
@@ -65,3 +66,17 @@ class DocumentFactory(DjangoModelFactory):
correspondent = None
document_type = None
storage_path = None
+
+
+class PaperlessTaskFactory(DjangoModelFactory):
+ class Meta:
+ model = PaperlessTask
+
+ task_id = factory.Faker("uuid4")
+ task_type = PaperlessTask.TaskType.CONSUME_FILE
+ trigger_source = PaperlessTask.TriggerSource.WEB_UI
+ status = PaperlessTask.Status.PENDING
+ input_data = factory.LazyFunction(dict)
+ result_data = None
+ result_message = None
+ acknowledged = False
diff --git a/src/documents/tests/search/test_query.py b/src/documents/tests/search/test_query.py
index 74a064dbb..e47d6b7df 100644
--- a/src/documents/tests/search/test_query.py
+++ b/src/documents/tests/search/test_query.py
@@ -81,45 +81,38 @@ class TestCreatedDateField:
),
pytest.param(
"created",
- "this_week",
- "2026-03-23T00:00:00Z",
- "2026-03-30T00:00:00Z",
- id="this_week_mon_sun",
- ),
- pytest.param(
- "created",
- "last_week",
+ "previous week",
"2026-03-16T00:00:00Z",
"2026-03-23T00:00:00Z",
- id="last_week",
+ id="previous_week",
),
pytest.param(
"created",
- "this_month",
+ "this month",
"2026-03-01T00:00:00Z",
"2026-04-01T00:00:00Z",
id="this_month",
),
pytest.param(
"created",
- "last_month",
+ "previous month",
"2026-02-01T00:00:00Z",
"2026-03-01T00:00:00Z",
- id="last_month",
+ id="previous_month",
),
pytest.param(
"created",
- "this_year",
+ "this year",
"2026-01-01T00:00:00Z",
"2027-01-01T00:00:00Z",
id="this_year",
),
pytest.param(
"created",
- "last_year",
+ "previous year",
"2025-01-01T00:00:00Z",
"2026-01-01T00:00:00Z",
- id="last_year",
+ id="previous_year",
),
],
)
@@ -141,7 +134,7 @@ class TestCreatedDateField:
def test_this_month_december_wraps_to_next_year(self) -> None:
# December: next month must roll over to January 1 of next year
lo, hi = _range(
- rewrite_natural_date_keywords("created:this_month", UTC),
+ rewrite_natural_date_keywords("created:this month", UTC),
"created",
)
assert lo == "2026-12-01T00:00:00Z"
@@ -151,12 +144,21 @@ class TestCreatedDateField:
def test_last_month_january_wraps_to_previous_year(self) -> None:
# January: last month must roll back to December 1 of previous year
lo, hi = _range(
- rewrite_natural_date_keywords("created:last_month", UTC),
+ rewrite_natural_date_keywords("created:previous month", UTC),
"created",
)
assert lo == "2025-12-01T00:00:00Z"
assert hi == "2026-01-01T00:00:00Z"
+ @time_machine.travel(datetime(2026, 7, 15, 12, 0, tzinfo=UTC), tick=False)
+ def test_previous_quarter(self) -> None:
+ lo, hi = _range(
+ rewrite_natural_date_keywords('created:"previous quarter"', UTC),
+ "created",
+ )
+ assert lo == "2026-04-01T00:00:00Z"
+ assert hi == "2026-07-01T00:00:00Z"
+
def test_unknown_keyword_raises(self) -> None:
with pytest.raises(ValueError, match="Unknown keyword"):
_date_only_range("bogus_keyword", UTC)
@@ -202,40 +204,34 @@ class TestDateTimeFields:
id="yesterday",
),
pytest.param(
- "this_week",
- "2026-03-23T00:00:00Z",
- "2026-03-30T00:00:00Z",
- id="this_week",
- ),
- pytest.param(
- "last_week",
+ "previous week",
"2026-03-16T00:00:00Z",
"2026-03-23T00:00:00Z",
- id="last_week",
+ id="previous_week",
),
pytest.param(
- "this_month",
+ "this month",
"2026-03-01T00:00:00Z",
"2026-04-01T00:00:00Z",
id="this_month",
),
pytest.param(
- "last_month",
+ "previous month",
"2026-02-01T00:00:00Z",
"2026-03-01T00:00:00Z",
- id="last_month",
+ id="previous_month",
),
pytest.param(
- "this_year",
+ "this year",
"2026-01-01T00:00:00Z",
"2027-01-01T00:00:00Z",
id="this_year",
),
pytest.param(
- "last_year",
+ "previous year",
"2025-01-01T00:00:00Z",
"2026-01-01T00:00:00Z",
- id="last_year",
+ id="previous_year",
),
],
)
@@ -254,17 +250,54 @@ class TestDateTimeFields:
@time_machine.travel(datetime(2026, 12, 15, 12, 0, tzinfo=UTC), tick=False)
def test_this_month_december_wraps_to_next_year(self) -> None:
# December: next month wraps to January of next year
- lo, hi = _range(rewrite_natural_date_keywords("added:this_month", UTC), "added")
+ lo, hi = _range(rewrite_natural_date_keywords("added:this month", UTC), "added")
assert lo == "2026-12-01T00:00:00Z"
assert hi == "2027-01-01T00:00:00Z"
@time_machine.travel(datetime(2026, 1, 15, 12, 0, tzinfo=UTC), tick=False)
def test_last_month_january_wraps_to_previous_year(self) -> None:
# January: last month wraps back to December of previous year
- lo, hi = _range(rewrite_natural_date_keywords("added:last_month", UTC), "added")
+ lo, hi = _range(
+ rewrite_natural_date_keywords("added:previous month", UTC),
+ "added",
+ )
assert lo == "2025-12-01T00:00:00Z"
assert hi == "2026-01-01T00:00:00Z"
+ @pytest.mark.parametrize(
+ ("query", "expected_lo", "expected_hi"),
+ [
+ pytest.param(
+ 'added:"previous quarter"',
+ "2026-04-01T00:00:00Z",
+ "2026-07-01T00:00:00Z",
+ id="quoted_previous_quarter",
+ ),
+ pytest.param(
+ "added:previous month",
+ "2026-06-01T00:00:00Z",
+ "2026-07-01T00:00:00Z",
+ id="bare_previous_month",
+ ),
+ pytest.param(
+ "added:this month",
+ "2026-07-01T00:00:00Z",
+ "2026-08-01T00:00:00Z",
+ id="bare_this_month",
+ ),
+ ],
+ )
+ @time_machine.travel(datetime(2026, 7, 15, 12, 0, tzinfo=UTC), tick=False)
+ def test_legacy_natural_language_aliases(
+ self,
+ query: str,
+ expected_lo: str,
+ expected_hi: str,
+ ) -> None:
+ lo, hi = _range(rewrite_natural_date_keywords(query, UTC), "added")
+ assert lo == expected_lo
+ assert hi == expected_hi
+
def test_unknown_keyword_raises(self) -> None:
with pytest.raises(ValueError, match="Unknown keyword"):
_datetime_range("bogus_keyword", UTC)
diff --git a/src/documents/tests/test_api_app_config.py b/src/documents/tests/test_api_app_config.py
index d1241b38a..ccefde1ad 100644
--- a/src/documents/tests/test_api_app_config.py
+++ b/src/documents/tests/test_api_app_config.py
@@ -831,7 +831,7 @@ class TestApiAppConfig(DirectoriesMixin, APITestCase):
config.save()
with (
- patch("documents.tasks.llmindex_index.delay") as mock_update,
+ patch("documents.tasks.llmindex_index.apply_async") as mock_update,
patch("paperless_ai.indexing.vector_store_file_exists") as mock_exists,
):
mock_exists.return_value = False
diff --git a/src/documents/tests/test_api_schema.py b/src/documents/tests/test_api_schema.py
index d8b023e6a..78e6e3a6d 100644
--- a/src/documents/tests/test_api_schema.py
+++ b/src/documents/tests/test_api_schema.py
@@ -1,5 +1,7 @@
+import pytest
from django.core.management import call_command
from django.core.management.base import CommandError
+from drf_spectacular.generators import SchemaGenerator
from rest_framework import status
from rest_framework.test import APITestCase
@@ -66,3 +68,57 @@ class TestApiSchema(APITestCase):
"delete_pages",
]:
self.assertIn(action_method, advertised_methods)
+
+
+# ---- session-scoped fixture: generate schema once for all TestXxx classes ----
+
+
+@pytest.fixture(scope="session")
+def api_schema():
+ generator = SchemaGenerator()
+ return generator.get_schema(request=None, public=True)
+
+
+class TestTasksSummarySchema:
+ """tasks_summary_retrieve: response must be an array of TaskSummarySerializer."""
+
+ def test_summary_response_is_array(self, api_schema):
+ op = api_schema["paths"]["/api/tasks/summary/"]["get"]
+ resp_200 = op["responses"]["200"]["content"]["application/json"]["schema"]
+ assert resp_200["type"] == "array", (
+ "tasks_summary_retrieve response must be type:array"
+ )
+
+ def test_summary_items_have_total_count(self, api_schema):
+ op = api_schema["paths"]["/api/tasks/summary/"]["get"]
+ resp_200 = op["responses"]["200"]["content"]["application/json"]["schema"]
+ items = resp_200.get("items", {})
+ ref = items.get("$ref", "")
+ component_name = ref.split("/")[-1] if ref else ""
+ if component_name:
+ props = api_schema["components"]["schemas"][component_name]["properties"]
+ else:
+ props = items.get("properties", {})
+ assert "total_count" in props, (
+ "summary items must have 'total_count' (TaskSummarySerializer)"
+ )
+
+
+class TestTasksActiveSchema:
+ """tasks_active_retrieve: response must be an array of TaskSerializerV10."""
+
+ def test_active_response_is_array(self, api_schema):
+ op = api_schema["paths"]["/api/tasks/active/"]["get"]
+ resp_200 = op["responses"]["200"]["content"]["application/json"]["schema"]
+ assert resp_200["type"] == "array", (
+ "tasks_active_retrieve response must be type:array"
+ )
+
+ def test_active_items_ref_named_schema(self, api_schema):
+ op = api_schema["paths"]["/api/tasks/active/"]["get"]
+ resp_200 = op["responses"]["200"]["content"]["application/json"]["schema"]
+ items = resp_200.get("items", {})
+ ref = items.get("$ref", "")
+ component_name = ref.split("/")[-1] if ref else ""
+ assert component_name, "items should be a $ref to a named schema"
+ assert component_name in api_schema["components"]["schemas"]
diff --git a/src/documents/tests/test_api_search.py b/src/documents/tests/test_api_search.py
index 85f479010..50fff3dbb 100644
--- a/src/documents/tests/test_api_search.py
+++ b/src/documents/tests/test_api_search.py
@@ -3,6 +3,7 @@ from datetime import timedelta
from unittest import mock
import pytest
+import time_machine
from dateutil.relativedelta import relativedelta
from django.contrib.auth.models import Group
from django.contrib.auth.models import Permission
@@ -26,6 +27,7 @@ from documents.models import Tag
from documents.models import Workflow
from documents.search import get_backend
from documents.search import reset_backend
+from documents.tests.factories import DocumentFactory
from documents.tests.utils import DirectoriesMixin
from paperless_mail.models import MailAccount
from paperless_mail.models import MailRule
@@ -741,6 +743,49 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
# Tantivy rejects unparsable field queries with a 400
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
+ @override_settings(
+ TIME_ZONE="UTC",
+ )
+ @time_machine.travel(
+ datetime.datetime(2026, 7, 15, 12, 0, tzinfo=datetime.UTC),
+ tick=False,
+ )
+ def test_search_added_previous_quarter(self) -> None:
+ """
+ GIVEN:
+ - Documents inside and outside the previous quarter
+ WHEN:
+ - Query with the legacy natural-language phrase used by the UI
+ THEN:
+ - Previous-quarter documents are returned
+ """
+ d1 = DocumentFactory.create(
+ title="quarterly statement april",
+ content="bank statement",
+ added=datetime.datetime(2026, 4, 10, 12, 0, tzinfo=datetime.UTC),
+ )
+ d2 = DocumentFactory.create(
+ title="quarterly statement june",
+ content="bank statement",
+ added=datetime.datetime(2026, 6, 20, 12, 0, tzinfo=datetime.UTC),
+ )
+ d3 = DocumentFactory.create(
+ title="quarterly statement july",
+ content="bank statement",
+ added=datetime.datetime(2026, 7, 10, 12, 0, tzinfo=datetime.UTC),
+ )
+
+ backend = get_backend()
+ backend.add_or_update(d1)
+ backend.add_or_update(d2)
+ backend.add_or_update(d3)
+
+ response = self.client.get('/api/documents/?query=added:"previous quarter"')
+ self.assertEqual(response.status_code, status.HTTP_200_OK)
+
+ results = response.data["results"]
+ self.assertEqual({r["id"] for r in results}, {1, 2})
+
@mock.patch("documents.search._backend.TantivyBackend.autocomplete")
def test_search_autocomplete_limits(self, m) -> None:
"""
diff --git a/src/documents/tests/test_api_status.py b/src/documents/tests/test_api_status.py
index 4f4511c14..69cfe2c34 100644
--- a/src/documents/tests/test_api_status.py
+++ b/src/documents/tests/test_api_status.py
@@ -4,7 +4,6 @@ import tempfile
from pathlib import Path
from unittest import mock
-from celery import states
from django.contrib.auth.models import Permission
from django.contrib.auth.models import User
from django.test import override_settings
@@ -13,6 +12,7 @@ from rest_framework.test import APITestCase
from documents.models import PaperlessTask
from documents.permissions import has_system_status_permission
+from documents.tests.factories import PaperlessTaskFactory
from paperless import version
@@ -258,10 +258,10 @@ class TestSystemStatus(APITestCase):
THEN:
- The response contains an OK classifier status
"""
- PaperlessTask.objects.create(
- type=PaperlessTask.TaskType.SCHEDULED_TASK,
- status=states.SUCCESS,
- task_name=PaperlessTask.TaskName.TRAIN_CLASSIFIER,
+ PaperlessTaskFactory(
+ task_type=PaperlessTask.TaskType.TRAIN_CLASSIFIER,
+ trigger_source=PaperlessTask.TriggerSource.SCHEDULED,
+ status=PaperlessTask.Status.SUCCESS,
)
self.client.force_login(self.user)
response = self.client.get(self.ENDPOINT)
@@ -295,11 +295,11 @@ class TestSystemStatus(APITestCase):
THEN:
- The response contains an ERROR classifier status
"""
- PaperlessTask.objects.create(
- type=PaperlessTask.TaskType.SCHEDULED_TASK,
- status=states.FAILURE,
- task_name=PaperlessTask.TaskName.TRAIN_CLASSIFIER,
- result="Classifier training failed",
+ PaperlessTaskFactory(
+ task_type=PaperlessTask.TaskType.TRAIN_CLASSIFIER,
+ trigger_source=PaperlessTask.TriggerSource.SCHEDULED,
+ status=PaperlessTask.Status.FAILURE,
+ result_message="Classifier training failed",
)
self.client.force_login(self.user)
response = self.client.get(self.ENDPOINT)
@@ -319,10 +319,10 @@ class TestSystemStatus(APITestCase):
THEN:
- The response contains an OK sanity check status
"""
- PaperlessTask.objects.create(
- type=PaperlessTask.TaskType.SCHEDULED_TASK,
- status=states.SUCCESS,
- task_name=PaperlessTask.TaskName.CHECK_SANITY,
+ PaperlessTaskFactory(
+ task_type=PaperlessTask.TaskType.SANITY_CHECK,
+ trigger_source=PaperlessTask.TriggerSource.SCHEDULED,
+ status=PaperlessTask.Status.SUCCESS,
)
self.client.force_login(self.user)
response = self.client.get(self.ENDPOINT)
@@ -356,11 +356,11 @@ class TestSystemStatus(APITestCase):
THEN:
- The response contains an ERROR sanity check status
"""
- PaperlessTask.objects.create(
- type=PaperlessTask.TaskType.SCHEDULED_TASK,
- status=states.FAILURE,
- task_name=PaperlessTask.TaskName.CHECK_SANITY,
- result="5 issues found.",
+ PaperlessTaskFactory(
+ task_type=PaperlessTask.TaskType.SANITY_CHECK,
+ trigger_source=PaperlessTask.TriggerSource.SCHEDULED,
+ status=PaperlessTask.Status.FAILURE,
+ result_message="5 issues found.",
)
self.client.force_login(self.user)
response = self.client.get(self.ENDPOINT)
@@ -405,10 +405,10 @@ class TestSystemStatus(APITestCase):
self.assertEqual(response.status_code, status.HTTP_200_OK)
self.assertEqual(response.data["tasks"]["llmindex_status"], "WARNING")
- PaperlessTask.objects.create(
- type=PaperlessTask.TaskType.SCHEDULED_TASK,
- status=states.SUCCESS,
- task_name=PaperlessTask.TaskName.LLMINDEX_UPDATE,
+ PaperlessTaskFactory(
+ task_type=PaperlessTask.TaskType.LLM_INDEX,
+ trigger_source=PaperlessTask.TriggerSource.SCHEDULED,
+ status=PaperlessTask.Status.SUCCESS,
)
response = self.client.get(self.ENDPOINT)
self.assertEqual(response.status_code, status.HTTP_200_OK)
@@ -425,11 +425,11 @@ class TestSystemStatus(APITestCase):
- The response contains the correct AI status
"""
with override_settings(AI_ENABLED=True, LLM_EMBEDDING_BACKEND="openai"):
- PaperlessTask.objects.create(
- type=PaperlessTask.TaskType.SCHEDULED_TASK,
- status=states.FAILURE,
- task_name=PaperlessTask.TaskName.LLMINDEX_UPDATE,
- result="AI index update failed",
+ PaperlessTaskFactory(
+ task_type=PaperlessTask.TaskType.LLM_INDEX,
+ trigger_source=PaperlessTask.TriggerSource.SCHEDULED,
+ status=PaperlessTask.Status.FAILURE,
+ result_message="AI index update failed",
)
self.client.force_login(self.user)
response = self.client.get(self.ENDPOINT)
diff --git a/src/documents/tests/test_api_tasks.py b/src/documents/tests/test_api_tasks.py
index 5dd003565..aee080900 100644
--- a/src/documents/tests/test_api_tasks.py
+++ b/src/documents/tests/test_api_tasks.py
@@ -1,425 +1,798 @@
+"""Tests for the /api/tasks/ endpoint.
+
+Covers:
+- v10 serializer (new field names)
+- v9 serializer (backwards-compatible field names)
+- Filtering, ordering, acknowledge, summary, active, run
+"""
+
import uuid
+from datetime import timedelta
from unittest import mock
-import celery
+import pytest
from django.contrib.auth.models import Permission
from django.contrib.auth.models import User
+from django.utils import timezone
+from guardian.shortcuts import assign_perm
from rest_framework import status
-from rest_framework.test import APITestCase
+from rest_framework.test import APIClient
-from documents.models import Document
from documents.models import PaperlessTask
-from documents.tests.utils import DirectoriesMixin
-from documents.views import TasksViewSet
+from documents.tests.factories import DocumentFactory
+from documents.tests.factories import PaperlessTaskFactory
+
+pytestmark = pytest.mark.api
+
+ENDPOINT = "/api/tasks/"
+ACCEPT_V10 = "application/json; version=10"
+ACCEPT_V9 = "application/json; version=9"
-class TestTasks(DirectoriesMixin, APITestCase):
- ENDPOINT = "/api/tasks/"
+@pytest.mark.django_db()
+class TestGetTasksV10:
+ def test_list_returns_tasks(self, admin_client: APIClient) -> None:
+ """GET /api/tasks/ returns all tasks visible to the admin."""
+ PaperlessTaskFactory.create_batch(2)
- def setUp(self) -> None:
- super().setUp()
+ response = admin_client.get(ENDPOINT)
- self.user = User.objects.create_superuser(username="temp_admin")
- self.client.force_authenticate(user=self.user)
+ assert response.status_code == status.HTTP_200_OK
+ assert len(response.data) == 2
- def test_get_tasks(self) -> None:
- """
- GIVEN:
- - Attempted celery tasks
- WHEN:
- - API call is made to get tasks
- THEN:
- - Attempting and pending tasks are serialized and provided
- """
-
- task1 = PaperlessTask.objects.create(
- task_id=str(uuid.uuid4()),
- task_file_name="task_one.pdf",
+ def test_related_document_ids_populated_from_result_data(
+ self,
+ admin_client: APIClient,
+ ) -> None:
+ """related_document_ids includes the consumed document_id from result_data."""
+ PaperlessTaskFactory(
+ status=PaperlessTask.Status.SUCCESS,
+ result_data={"document_id": 7},
)
- task2 = PaperlessTask.objects.create(
- task_id=str(uuid.uuid4()),
- task_file_name="task_two.pdf",
+ response = admin_client.get(ENDPOINT)
+
+ assert response.status_code == status.HTTP_200_OK
+ assert response.data[0]["related_document_ids"] == [7]
+
+ def test_related_document_ids_includes_duplicate_of(
+ self,
+ admin_client: APIClient,
+ ) -> None:
+ """related_document_ids includes duplicate_of when the file was already archived."""
+ PaperlessTaskFactory(
+ status=PaperlessTask.Status.SUCCESS,
+ result_data={"duplicate_of": 12},
)
- response = self.client.get(self.ENDPOINT)
+ response = admin_client.get(ENDPOINT)
- self.assertEqual(response.status_code, status.HTTP_200_OK)
- self.assertEqual(len(response.data), 2)
- returned_task1 = response.data[1]
- returned_task2 = response.data[0]
+ assert response.status_code == status.HTTP_200_OK
+ assert response.data[0]["related_document_ids"] == [12]
- self.assertEqual(returned_task1["task_id"], task1.task_id)
- self.assertEqual(returned_task1["status"], celery.states.PENDING)
- self.assertEqual(returned_task1["task_file_name"], task1.task_file_name)
+ def test_filter_by_task_type(self, admin_client: APIClient) -> None:
+ """?task_type= filters results to tasks of that type only."""
+ PaperlessTaskFactory(task_type=PaperlessTask.TaskType.CONSUME_FILE)
+ PaperlessTaskFactory(task_type=PaperlessTask.TaskType.TRAIN_CLASSIFIER)
- self.assertEqual(returned_task2["task_id"], task2.task_id)
- self.assertEqual(returned_task2["status"], celery.states.PENDING)
- self.assertEqual(returned_task2["task_file_name"], task2.task_file_name)
-
- def test_get_single_task_status(self) -> None:
- """
- GIVEN
- - Query parameter for a valid task ID
- WHEN:
- - API call is made to get task status
- THEN:
- - Single task data is returned
- """
-
- id1 = str(uuid.uuid4())
- task1 = PaperlessTask.objects.create(
- task_id=id1,
- task_file_name="task_one.pdf",
+ response = admin_client.get(
+ ENDPOINT,
+ {"task_type": PaperlessTask.TaskType.TRAIN_CLASSIFIER},
)
- _ = PaperlessTask.objects.create(
- task_id=str(uuid.uuid4()),
- task_file_name="task_two.pdf",
+ assert response.status_code == status.HTTP_200_OK
+ assert len(response.data) == 1
+ assert response.data[0]["task_type"] == PaperlessTask.TaskType.TRAIN_CLASSIFIER
+
+ def test_filter_by_status(self, admin_client: APIClient) -> None:
+ """?status= filters results to tasks with that status only."""
+ PaperlessTaskFactory(status=PaperlessTask.Status.PENDING)
+ PaperlessTaskFactory(status=PaperlessTask.Status.SUCCESS)
+
+ response = admin_client.get(
+ ENDPOINT,
+ {"status": PaperlessTask.Status.SUCCESS},
)
- response = self.client.get(self.ENDPOINT + f"?task_id={id1}")
+ assert response.status_code == status.HTTP_200_OK
+ assert len(response.data) == 1
+ assert response.data[0]["status"] == PaperlessTask.Status.SUCCESS
- self.assertEqual(response.status_code, status.HTTP_200_OK)
- self.assertEqual(len(response.data), 1)
- returned_task1 = response.data[0]
+ def test_filter_by_task_id(self, admin_client: APIClient) -> None:
+ """?task_id= returns only the task with that UUID."""
+ task = PaperlessTaskFactory()
+ PaperlessTaskFactory() # unrelated task that should not appear
- self.assertEqual(returned_task1["task_id"], task1.task_id)
+ response = admin_client.get(ENDPOINT, {"task_id": task.task_id})
- def test_get_single_task_status_not_valid(self) -> None:
- """
- GIVEN
- - Query parameter for a non-existent task ID
- WHEN:
- - API call is made to get task status
- THEN:
- - No task data is returned
- """
- PaperlessTask.objects.create(
- task_id=str(uuid.uuid4()),
- task_file_name="task_one.pdf",
+ assert response.status_code == status.HTTP_200_OK
+ assert len(response.data) == 1
+ assert response.data[0]["task_id"] == task.task_id
+
+ def test_filter_by_acknowledged(self, admin_client: APIClient) -> None:
+ """?acknowledged=false returns only tasks that have not been acknowledged."""
+ PaperlessTaskFactory(acknowledged=False)
+ PaperlessTaskFactory(acknowledged=True)
+
+ response = admin_client.get(ENDPOINT, {"acknowledged": "false"})
+
+ assert response.status_code == status.HTTP_200_OK
+ assert len(response.data) == 1
+ assert response.data[0]["acknowledged"] is False
+
+ def test_filter_is_complete_true(self, admin_client: APIClient) -> None:
+ """?is_complete=true returns only SUCCESS and FAILURE tasks."""
+ PaperlessTaskFactory(status=PaperlessTask.Status.PENDING)
+ PaperlessTaskFactory(status=PaperlessTask.Status.SUCCESS)
+ PaperlessTaskFactory(status=PaperlessTask.Status.FAILURE)
+
+ response = admin_client.get(ENDPOINT, {"is_complete": "true"})
+
+ assert response.status_code == status.HTTP_200_OK
+ assert len(response.data) == 2
+ returned_statuses = {t["status"] for t in response.data}
+ assert returned_statuses == {
+ PaperlessTask.Status.SUCCESS,
+ PaperlessTask.Status.FAILURE,
+ }
+
+ def test_filter_is_complete_false(self, admin_client: APIClient) -> None:
+ """?is_complete=false returns only PENDING and STARTED tasks."""
+ PaperlessTaskFactory(status=PaperlessTask.Status.PENDING)
+ PaperlessTaskFactory(status=PaperlessTask.Status.STARTED)
+ PaperlessTaskFactory(status=PaperlessTask.Status.SUCCESS)
+
+ response = admin_client.get(ENDPOINT, {"is_complete": "false"})
+
+ assert response.status_code == status.HTTP_200_OK
+ assert len(response.data) == 2
+ returned_statuses = {t["status"] for t in response.data}
+ assert returned_statuses == {
+ PaperlessTask.Status.PENDING,
+ PaperlessTask.Status.STARTED,
+ }
+
+ def test_default_ordering_is_newest_first(self, admin_client: APIClient) -> None:
+ """Tasks are returned in descending date_created order (newest first)."""
+ base = timezone.now()
+ t1 = PaperlessTaskFactory(date_created=base)
+ t2 = PaperlessTaskFactory(date_created=base + timedelta(seconds=1))
+ t3 = PaperlessTaskFactory(date_created=base + timedelta(seconds=2))
+
+ response = admin_client.get(ENDPOINT)
+
+ assert response.status_code == status.HTTP_200_OK
+ ids = [t["task_id"] for t in response.data]
+ assert ids == [t3.task_id, t2.task_id, t1.task_id]
+
+ def test_list_scoped_to_own_and_unowned_tasks_for_regular_user(
+ self,
+ admin_user: User,
+ regular_user: User,
+ ) -> None:
+ """Regular users see their own tasks and unowned (system) tasks; other users' tasks are hidden."""
+ regular_user.user_permissions.add(
+ Permission.objects.get(codename="view_paperlesstask"),
)
- _ = PaperlessTask.objects.create(
- task_id=str(uuid.uuid4()),
- task_file_name="task_two.pdf",
+ client = APIClient()
+ client.force_authenticate(user=regular_user)
+ client.credentials(HTTP_ACCEPT=ACCEPT_V10)
+
+ PaperlessTaskFactory(owner=admin_user) # other user — not visible
+ unowned_task = PaperlessTaskFactory() # unowned (system task) — visible
+ own_task = PaperlessTaskFactory(owner=regular_user)
+
+ response = client.get(ENDPOINT)
+
+ assert response.status_code == status.HTTP_200_OK
+ assert len(response.data) == 2
+ visible_ids = {t["task_id"] for t in response.data}
+ assert visible_ids == {own_task.task_id, unowned_task.task_id}
+
+ def test_list_admin_sees_all_tasks(
+ self,
+ admin_client: APIClient,
+ admin_user: User,
+ regular_user: User,
+ ) -> None:
+ """Admin users see all tasks regardless of owner."""
+ PaperlessTaskFactory(owner=admin_user)
+ PaperlessTaskFactory() # unowned system task
+ PaperlessTaskFactory(owner=regular_user)
+
+ response = admin_client.get(ENDPOINT)
+
+ assert response.status_code == status.HTTP_200_OK
+ assert len(response.data) == 3
+
+
+@pytest.mark.django_db()
+class TestGetTasksV9:
+ @pytest.mark.parametrize(
+ ("task_type", "expected_task_name"),
+ [
+ pytest.param(
+ PaperlessTask.TaskType.CONSUME_FILE,
+ "consume_file",
+ id="consume_file-passthrough",
+ ),
+ pytest.param(
+ PaperlessTask.TaskType.SANITY_CHECK,
+ "check_sanity",
+ id="sanity_check-remapped",
+ ),
+ pytest.param(
+ PaperlessTask.TaskType.LLM_INDEX,
+ "llmindex_update",
+ id="llm_index-remapped",
+ ),
+ ],
+ )
+ def test_task_name_mapping(
+ self,
+ v9_client: APIClient,
+ task_type: PaperlessTask.TaskType,
+ expected_task_name: str,
+ ) -> None:
+ """v9 task_name is either a direct pass-through or a legacy remap of task_type."""
+ PaperlessTaskFactory(task_type=task_type)
+
+ response = v9_client.get(ENDPOINT)
+
+ assert response.status_code == status.HTTP_200_OK
+ assert response.data[0]["task_name"] == expected_task_name
+
+ @pytest.mark.parametrize(
+ ("trigger_source", "expected_type"),
+ [
+ pytest.param(
+ PaperlessTask.TriggerSource.SCHEDULED,
+ "scheduled_task",
+ id="scheduled",
+ ),
+ pytest.param(
+ PaperlessTask.TriggerSource.SYSTEM,
+ "auto_task",
+ id="system",
+ ),
+ pytest.param(
+ PaperlessTask.TriggerSource.EMAIL_CONSUME,
+ "auto_task",
+ id="email_consume",
+ ),
+ pytest.param(
+ PaperlessTask.TriggerSource.FOLDER_CONSUME,
+ "auto_task",
+ id="folder_consume",
+ ),
+ pytest.param(
+ PaperlessTask.TriggerSource.WEB_UI,
+ "manual_task",
+ id="web_ui",
+ ),
+ pytest.param(
+ PaperlessTask.TriggerSource.MANUAL,
+ "manual_task",
+ id="manual",
+ ),
+ pytest.param(
+ PaperlessTask.TriggerSource.API_UPLOAD,
+ "manual_task",
+ id="api_upload",
+ ),
+ ],
+ )
+ def test_trigger_source_maps_to_v9_type(
+ self,
+ v9_client: APIClient,
+ trigger_source: PaperlessTask.TriggerSource,
+ expected_type: str,
+ ) -> None:
+ """Every TriggerSource value maps to the correct v9 type string."""
+ PaperlessTaskFactory(trigger_source=trigger_source)
+
+ response = v9_client.get(ENDPOINT)
+
+ assert response.status_code == status.HTTP_200_OK
+ assert response.data[0]["type"] == expected_type
+
+ def test_task_file_name_from_input_data(self, v9_client: APIClient) -> None:
+ """task_file_name is read from input_data['filename']."""
+ PaperlessTaskFactory(input_data={"filename": "report.pdf"})
+
+ response = v9_client.get(ENDPOINT)
+
+ assert response.status_code == status.HTTP_200_OK
+ assert response.data[0]["task_file_name"] == "report.pdf"
+
+ def test_task_file_name_none_when_no_filename_key(
+ self,
+ v9_client: APIClient,
+ ) -> None:
+ """task_file_name is None when filename is absent from input_data."""
+ PaperlessTaskFactory(input_data={})
+
+ response = v9_client.get(ENDPOINT)
+
+ assert response.status_code == status.HTTP_200_OK
+ assert response.data[0]["task_file_name"] is None
+
+ def test_related_document_from_result_data_document_id(
+ self,
+ v9_client: APIClient,
+ ) -> None:
+ """related_document is taken from result_data['document_id'] in v9."""
+ PaperlessTaskFactory(
+ status=PaperlessTask.Status.SUCCESS,
+ result_data={"document_id": 99},
)
- response = self.client.get(self.ENDPOINT + "?task_id=bad-task-id")
+ response = v9_client.get(ENDPOINT)
- self.assertEqual(response.status_code, status.HTTP_200_OK)
- self.assertEqual(len(response.data), 0)
+ assert response.status_code == status.HTTP_200_OK
+ assert response.data[0]["related_document"] == 99
- def test_acknowledge_tasks(self) -> None:
- """
- GIVEN:
- - Attempted celery tasks
- WHEN:
- - API call is made to get mark task as acknowledged
- THEN:
- - Task is marked as acknowledged
- """
- task = PaperlessTask.objects.create(
- task_id=str(uuid.uuid4()),
- task_file_name="task_one.pdf",
+ def test_related_document_none_when_no_result_data(
+ self,
+ v9_client: APIClient,
+ ) -> None:
+ """related_document is None when result_data is absent in v9."""
+ PaperlessTaskFactory(result_data=None)
+
+ response = v9_client.get(ENDPOINT)
+
+ assert response.status_code == status.HTTP_200_OK
+ assert response.data[0]["related_document"] is None
+
+ def test_duplicate_documents_from_result_data(self, v9_client: APIClient) -> None:
+ """duplicate_documents includes duplicate_of from result_data in v9."""
+ doc = DocumentFactory.create(title="Duplicate Target")
+ PaperlessTaskFactory(
+ status=PaperlessTask.Status.SUCCESS,
+ result_data={"duplicate_of": doc.pk},
)
- response = self.client.get(self.ENDPOINT)
- self.assertEqual(len(response.data), 1)
+ response = v9_client.get(ENDPOINT)
- response = self.client.post(
- self.ENDPOINT + "acknowledge/",
+ assert response.status_code == status.HTTP_200_OK
+ dupes = response.data[0]["duplicate_documents"]
+ assert len(dupes) == 1
+ assert dupes[0]["id"] == doc.pk
+ assert dupes[0]["title"] == doc.title
+ assert "deleted_at" in dupes[0]
+
+ def test_duplicate_documents_empty_when_no_result_data(
+ self,
+ v9_client: APIClient,
+ ) -> None:
+ """duplicate_documents is an empty list when result_data is absent in v9."""
+ PaperlessTaskFactory(result_data=None)
+
+ response = v9_client.get(ENDPOINT)
+
+ assert response.status_code == status.HTTP_200_OK
+ assert response.data[0]["duplicate_documents"] == []
+
+ def test_status_remapped_to_uppercase(self, v9_client: APIClient) -> None:
+ """v9 status values are uppercase Celery state strings."""
+ PaperlessTaskFactory(status=PaperlessTask.Status.SUCCESS)
+ PaperlessTaskFactory(status=PaperlessTask.Status.PENDING)
+ PaperlessTaskFactory(status=PaperlessTask.Status.FAILURE)
+
+ response = v9_client.get(ENDPOINT)
+
+ assert response.status_code == status.HTTP_200_OK
+ statuses = {t["status"] for t in response.data}
+ assert statuses == {"SUCCESS", "PENDING", "FAILURE"}
+
+ def test_filter_by_task_name_maps_old_value(self, v9_client: APIClient) -> None:
+ """?task_name=check_sanity maps to task_type=sanity_check in v9."""
+ PaperlessTaskFactory(task_type=PaperlessTask.TaskType.SANITY_CHECK)
+ PaperlessTaskFactory(task_type=PaperlessTask.TaskType.CONSUME_FILE)
+
+ response = v9_client.get(ENDPOINT, {"task_name": "check_sanity"})
+
+ assert response.status_code == status.HTTP_200_OK
+ assert len(response.data) == 1
+ assert response.data[0]["task_name"] == "check_sanity"
+
+ def test_v9_non_staff_sees_own_and_unowned_tasks(
+ self,
+ admin_user: User,
+ regular_user: User,
+ ) -> None:
+ """Non-staff users see their own tasks plus unowned tasks via v9 API."""
+ regular_user.user_permissions.add(
+ Permission.objects.get(codename="view_paperlesstask"),
+ )
+
+ client = APIClient()
+ client.force_authenticate(user=regular_user)
+ client.credentials(HTTP_ACCEPT=ACCEPT_V9)
+
+ PaperlessTaskFactory(owner=admin_user) # other user, not visible
+ PaperlessTaskFactory(owner=None) # unowned, visible in v9
+ PaperlessTaskFactory(owner=regular_user) # own task, visible
+
+ response = client.get(ENDPOINT)
+
+ assert response.status_code == status.HTTP_200_OK
+ assert len(response.data) == 2
+
+ def test_filter_by_task_name_maps_to_task_type(self, v9_client: APIClient) -> None:
+ """?task_name=consume_file filter maps to the task_type field for v9 compatibility."""
+ PaperlessTaskFactory(task_type=PaperlessTask.TaskType.CONSUME_FILE)
+ PaperlessTaskFactory(task_type=PaperlessTask.TaskType.TRAIN_CLASSIFIER)
+
+ response = v9_client.get(ENDPOINT, {"task_name": "consume_file"})
+
+ assert response.status_code == status.HTTP_200_OK
+ assert len(response.data) == 1
+ assert response.data[0]["task_name"] == "consume_file"
+
+ def test_filter_by_type_scheduled_task(self, v9_client: APIClient) -> None:
+ """?type=scheduled_task matches trigger_source=scheduled only."""
+ PaperlessTaskFactory(trigger_source=PaperlessTask.TriggerSource.SCHEDULED)
+ PaperlessTaskFactory(trigger_source=PaperlessTask.TriggerSource.WEB_UI)
+
+ response = v9_client.get(ENDPOINT, {"type": "scheduled_task"})
+
+ assert response.status_code == status.HTTP_200_OK
+ assert len(response.data) == 1
+ assert response.data[0]["type"] == "scheduled_task"
+
+ def test_filter_by_type_auto_task_includes_all_auto_sources(
+ self,
+ v9_client: APIClient,
+ ) -> None:
+ """?type=auto_task matches system, email_consume, and folder_consume tasks."""
+ PaperlessTaskFactory(trigger_source=PaperlessTask.TriggerSource.SYSTEM)
+ PaperlessTaskFactory(trigger_source=PaperlessTask.TriggerSource.EMAIL_CONSUME)
+ PaperlessTaskFactory(trigger_source=PaperlessTask.TriggerSource.FOLDER_CONSUME)
+ PaperlessTaskFactory(
+ trigger_source=PaperlessTask.TriggerSource.MANUAL,
+ ) # excluded
+
+ response = v9_client.get(ENDPOINT, {"type": "auto_task"})
+
+ assert response.status_code == status.HTTP_200_OK
+ assert len(response.data) == 3
+ assert all(t["type"] == "auto_task" for t in response.data)
+
+ def test_filter_by_type_manual_task_includes_all_manual_sources(
+ self,
+ v9_client: APIClient,
+ ) -> None:
+ """?type=manual_task matches manual, web_ui, and api_upload tasks."""
+ PaperlessTaskFactory(trigger_source=PaperlessTask.TriggerSource.MANUAL)
+ PaperlessTaskFactory(trigger_source=PaperlessTask.TriggerSource.WEB_UI)
+ PaperlessTaskFactory(trigger_source=PaperlessTask.TriggerSource.API_UPLOAD)
+ PaperlessTaskFactory(
+ trigger_source=PaperlessTask.TriggerSource.SCHEDULED,
+ ) # excluded
+
+ response = v9_client.get(ENDPOINT, {"type": "manual_task"})
+
+ assert response.status_code == status.HTTP_200_OK
+ assert len(response.data) == 3
+ assert all(t["type"] == "manual_task" for t in response.data)
+
+
+@pytest.mark.django_db()
+class TestAcknowledge:
+ def test_returns_count(self, admin_client: APIClient) -> None:
+ """POST acknowledge/ returns the count of tasks that were acknowledged."""
+ task1 = PaperlessTaskFactory()
+ task2 = PaperlessTaskFactory()
+
+ response = admin_client.post(
+ ENDPOINT + "acknowledge/",
+ {"tasks": [task1.id, task2.id]},
+ format="json",
+ )
+
+ assert response.status_code == status.HTTP_200_OK
+ assert response.data == {"result": 2}
+
+ def test_acknowledged_tasks_excluded_from_unacked_filter(
+ self,
+ admin_client: APIClient,
+ ) -> None:
+ """Acknowledged tasks no longer appear when filtering with ?acknowledged=false."""
+ task = PaperlessTaskFactory()
+ admin_client.post(
+ ENDPOINT + "acknowledge/",
{"tasks": [task.id]},
- )
- self.assertEqual(response.status_code, status.HTTP_200_OK)
-
- response = self.client.get(self.ENDPOINT + "?acknowledged=false")
- self.assertEqual(len(response.data), 0)
-
- def test_acknowledge_tasks_requires_change_permission(self) -> None:
- """
- GIVEN:
- - A regular user initially without change permissions
- - A regular user with change permissions
- WHEN:
- - API call is made to acknowledge tasks
- THEN:
- - The first user is forbidden from acknowledging tasks
- - The second user is allowed to acknowledge tasks
- """
- regular_user = User.objects.create_user(username="test")
- self.client.force_authenticate(user=regular_user)
-
- task = PaperlessTask.objects.create(
- task_id=str(uuid.uuid4()),
- task_file_name="task_one.pdf",
+ format="json",
)
- response = self.client.post(
- self.ENDPOINT + "acknowledge/",
+ response = admin_client.get(ENDPOINT, {"acknowledged": "false"})
+
+ assert response.status_code == status.HTTP_200_OK
+ assert len(response.data) == 0
+
+ def test_requires_change_permission(self, user_client: APIClient) -> None:
+ """Regular users without change_paperlesstask permission receive 403."""
+ task = PaperlessTaskFactory()
+
+ response = user_client.post(
+ ENDPOINT + "acknowledge/",
{"tasks": [task.id]},
+ format="json",
)
- self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
- regular_user2 = User.objects.create_user(username="test2")
- regular_user2.user_permissions.add(
+ assert response.status_code == status.HTTP_403_FORBIDDEN
+
+ def test_succeeds_with_change_permission(self, regular_user: User) -> None:
+ """Users granted change_paperlesstask permission can acknowledge tasks."""
+ regular_user.user_permissions.add(
Permission.objects.get(codename="change_paperlesstask"),
)
- regular_user2.save()
- self.client.force_authenticate(user=regular_user2)
+ regular_user.save()
- response = self.client.post(
- self.ENDPOINT + "acknowledge/",
+ client = APIClient()
+ client.force_authenticate(user=regular_user)
+ client.credentials(HTTP_ACCEPT=ACCEPT_V10)
+
+ task = PaperlessTaskFactory()
+ response = client.post(
+ ENDPOINT + "acknowledge/",
{"tasks": [task.id]},
- )
- self.assertEqual(response.status_code, status.HTTP_200_OK)
-
- def test_tasks_owner_aware(self) -> None:
- """
- GIVEN:
- - Existing PaperlessTasks with owner and with no owner
- WHEN:
- - API call is made to get tasks
- THEN:
- - Only tasks with no owner or request user are returned
- """
-
- regular_user = User.objects.create_user(username="test")
- regular_user.user_permissions.add(*Permission.objects.all())
- self.client.logout()
- self.client.force_authenticate(user=regular_user)
-
- task1 = PaperlessTask.objects.create(
- task_id=str(uuid.uuid4()),
- task_file_name="task_one.pdf",
- owner=self.user,
+ format="json",
)
- task2 = PaperlessTask.objects.create(
- task_id=str(uuid.uuid4()),
- task_file_name="task_two.pdf",
+ assert response.status_code == status.HTTP_200_OK
+
+
+@pytest.mark.django_db()
+class TestSummary:
+ def test_returns_per_type_totals(self, admin_client: APIClient) -> None:
+ """summary/ returns per-type counts of total, success, and failure tasks."""
+ PaperlessTaskFactory(
+ task_type=PaperlessTask.TaskType.CONSUME_FILE,
+ status=PaperlessTask.Status.SUCCESS,
+ )
+ PaperlessTaskFactory(
+ task_type=PaperlessTask.TaskType.CONSUME_FILE,
+ status=PaperlessTask.Status.FAILURE,
+ )
+ PaperlessTaskFactory(
+ task_type=PaperlessTask.TaskType.TRAIN_CLASSIFIER,
+ status=PaperlessTask.Status.SUCCESS,
)
- task3 = PaperlessTask.objects.create(
- task_id=str(uuid.uuid4()),
- task_file_name="task_three.pdf",
- owner=regular_user,
- )
+ response = admin_client.get(ENDPOINT + "summary/")
- response = self.client.get(self.ENDPOINT)
+ assert response.status_code == status.HTTP_200_OK
+ by_type = {item["task_type"]: item for item in response.data}
+ assert by_type["consume_file"]["total_count"] == 2
+ assert by_type["consume_file"]["success_count"] == 1
+ assert by_type["consume_file"]["failure_count"] == 1
+ assert by_type["train_classifier"]["total_count"] == 1
- self.assertEqual(response.status_code, status.HTTP_200_OK)
- self.assertEqual(len(response.data), 2)
- self.assertEqual(response.data[0]["task_id"], task3.task_id)
- self.assertEqual(response.data[1]["task_id"], task2.task_id)
+ def test_rejects_invalid_days_param(self, admin_client: APIClient) -> None:
+ """?days=invalid returns 400 with an error message."""
+ response = admin_client.get(ENDPOINT + "summary/", {"days": "invalid"})
- acknowledge_response = self.client.post(
- self.ENDPOINT + "acknowledge/",
- {"tasks": [task1.id, task2.id, task3.id]},
- )
- self.assertEqual(acknowledge_response.status_code, status.HTTP_200_OK)
- self.assertEqual(acknowledge_response.data, {"result": 2})
+ assert response.status_code == status.HTTP_400_BAD_REQUEST
+ assert "days" in response.data
- def test_task_result_no_error(self) -> None:
- """
- GIVEN:
- - A celery task completed without error
- WHEN:
- - API call is made to get tasks
- THEN:
- - The returned data includes the task result
- """
- PaperlessTask.objects.create(
- task_id=str(uuid.uuid4()),
- task_file_name="task_one.pdf",
- status=celery.states.SUCCESS,
- result="Success. New document id 1 created",
- )
- response = self.client.get(self.ENDPOINT)
+@pytest.mark.django_db()
+class TestActive:
+ def test_returns_pending_and_started_only(self, admin_client: APIClient) -> None:
+ """active/ returns only tasks in PENDING or STARTED status."""
+ PaperlessTaskFactory(status=PaperlessTask.Status.PENDING)
+ PaperlessTaskFactory(status=PaperlessTask.Status.STARTED)
+ PaperlessTaskFactory(status=PaperlessTask.Status.SUCCESS)
+ PaperlessTaskFactory(status=PaperlessTask.Status.FAILURE)
- self.assertEqual(response.status_code, status.HTTP_200_OK)
- self.assertEqual(len(response.data), 1)
+ response = admin_client.get(ENDPOINT + "active/")
- returned_data = response.data[0]
-
- self.assertEqual(returned_data["result"], "Success. New document id 1 created")
- self.assertEqual(returned_data["related_document"], "1")
-
- def test_task_result_with_error(self) -> None:
- """
- GIVEN:
- - A celery task completed with an exception
- WHEN:
- - API call is made to get tasks
- THEN:
- - The returned result is the exception info
- """
- PaperlessTask.objects.create(
- task_id=str(uuid.uuid4()),
- task_file_name="task_one.pdf",
- status=celery.states.FAILURE,
- result="test.pdf: Unexpected error during ingestion.",
- )
-
- response = self.client.get(self.ENDPOINT)
-
- self.assertEqual(response.status_code, status.HTTP_200_OK)
- self.assertEqual(len(response.data), 1)
-
- returned_data = response.data[0]
-
- self.assertEqual(
- returned_data["result"],
- "test.pdf: Unexpected error during ingestion.",
- )
-
- def test_task_name_webui(self) -> None:
- """
- GIVEN:
- - Attempted celery task
- - Task was created through the webui
- WHEN:
- - API call is made to get tasks
- THEN:
- - Returned data include the filename
- """
- PaperlessTask.objects.create(
- task_id=str(uuid.uuid4()),
- task_file_name="test.pdf",
- task_name=PaperlessTask.TaskName.CONSUME_FILE,
- status=celery.states.SUCCESS,
- )
-
- response = self.client.get(self.ENDPOINT)
-
- self.assertEqual(response.status_code, status.HTTP_200_OK)
- self.assertEqual(len(response.data), 1)
-
- returned_data = response.data[0]
-
- self.assertEqual(returned_data["task_file_name"], "test.pdf")
-
- def test_task_name_consume_folder(self) -> None:
- """
- GIVEN:
- - Attempted celery task
- - Task was created through the consume folder
- WHEN:
- - API call is made to get tasks
- THEN:
- - Returned data include the filename
- """
- PaperlessTask.objects.create(
- task_id=str(uuid.uuid4()),
- task_file_name="anothertest.pdf",
- task_name=PaperlessTask.TaskName.CONSUME_FILE,
- status=celery.states.SUCCESS,
- )
-
- response = self.client.get(self.ENDPOINT)
-
- self.assertEqual(response.status_code, status.HTTP_200_OK)
- self.assertEqual(len(response.data), 1)
-
- returned_data = response.data[0]
-
- self.assertEqual(returned_data["task_file_name"], "anothertest.pdf")
-
- def test_task_result_duplicate_warning_includes_count(self) -> None:
- """
- GIVEN:
- - A celery task succeeds, but a duplicate exists
- WHEN:
- - API call is made to get tasks
- THEN:
- - The returned data includes duplicate warning metadata
- """
- checksum = "duplicate-checksum"
- Document.objects.create(
- title="Existing",
- content="",
- mime_type="application/pdf",
- checksum=checksum,
- )
- created_doc = Document.objects.create(
- title="Created",
- content="",
- mime_type="application/pdf",
- checksum=checksum,
- archive_checksum="another-checksum",
- )
- PaperlessTask.objects.create(
- task_id=str(uuid.uuid4()),
- task_file_name="task_one.pdf",
- status=celery.states.SUCCESS,
- result=f"Success. New document id {created_doc.pk} created",
- )
-
- response = self.client.get(self.ENDPOINT)
-
- self.assertEqual(response.status_code, status.HTTP_200_OK)
- self.assertEqual(len(response.data), 1)
-
- returned_data = response.data[0]
-
- self.assertEqual(returned_data["related_document"], str(created_doc.pk))
-
- def test_run_train_classifier_task(self) -> None:
- """
- GIVEN:
- - A superuser
- WHEN:
- - API call is made to run the train classifier task
- THEN:
- - The task is run
- """
- mock_train_classifier = mock.Mock(return_value="Task started")
- TasksViewSet.TASK_AND_ARGS_BY_NAME = {
- PaperlessTask.TaskName.TRAIN_CLASSIFIER: (
- mock_train_classifier,
- {"scheduled": False},
- ),
+ assert response.status_code == status.HTTP_200_OK
+ assert len(response.data) == 2
+ active_statuses = {t["status"] for t in response.data}
+ assert active_statuses == {
+ PaperlessTask.Status.PENDING,
+ PaperlessTask.Status.STARTED,
}
- response = self.client.post(
- self.ENDPOINT + "run/",
- {"task_name": PaperlessTask.TaskName.TRAIN_CLASSIFIER},
+
+ def test_excludes_revoked_tasks_from_active(self, admin_client: APIClient) -> None:
+ """active/ excludes REVOKED tasks."""
+ PaperlessTaskFactory(status=PaperlessTask.Status.REVOKED)
+
+ response = admin_client.get(ENDPOINT + "active/")
+
+ assert response.status_code == status.HTTP_200_OK
+ assert len(response.data) == 0
+
+
+@pytest.mark.django_db()
+class TestRun:
+ def test_forbidden_for_regular_user(self, user_client: APIClient) -> None:
+ """Regular users without add_paperlesstask permission receive 403 from run/."""
+ response = user_client.post(
+ ENDPOINT + "run/",
+ {"task_type": PaperlessTask.TaskType.TRAIN_CLASSIFIER},
+ format="json",
)
- self.assertEqual(response.status_code, status.HTTP_200_OK)
- self.assertEqual(response.data, {"result": "Task started"})
- mock_train_classifier.assert_called_once_with(scheduled=False)
+ assert response.status_code == status.HTTP_403_FORBIDDEN
- # mock error
- mock_train_classifier.reset_mock()
- mock_train_classifier.side_effect = Exception("Error")
- response = self.client.post(
- self.ENDPOINT + "run/",
- {"task_name": PaperlessTask.TaskName.TRAIN_CLASSIFIER},
+ def test_dispatches_via_apply_async_with_manual_trigger_header(
+ self,
+ admin_client: APIClient,
+ ) -> None:
+ """run/ dispatches the task via apply_async with trigger_source=manual in headers."""
+ fake_task_id = str(uuid.uuid4())
+ mock_async_result = mock.Mock()
+ mock_async_result.id = fake_task_id
+
+ mock_apply_async = mock.Mock(return_value=mock_async_result)
+
+ with mock.patch(
+ "documents.views.train_classifier.apply_async",
+ mock_apply_async,
+ ):
+ response = admin_client.post(
+ ENDPOINT + "run/",
+ {"task_type": PaperlessTask.TaskType.TRAIN_CLASSIFIER},
+ format="json",
+ )
+
+ assert response.status_code == status.HTTP_200_OK
+ assert response.data == {"task_id": fake_task_id}
+ mock_apply_async.assert_called_once_with(
+ kwargs={},
+ headers={"trigger_source": PaperlessTask.TriggerSource.MANUAL},
)
- self.assertEqual(response.status_code, status.HTTP_500_INTERNAL_SERVER_ERROR)
- mock_train_classifier.assert_called_once_with(scheduled=False)
-
- @mock.patch("documents.tasks.sanity_check")
- def test_run_task_requires_superuser(self, mock_check_sanity) -> None:
- """
- GIVEN:
- - A regular user
- WHEN:
- - API call is made to run a task
- THEN:
- - The task is not run
- """
- regular_user = User.objects.create_user(username="test")
- regular_user.user_permissions.add(*Permission.objects.all())
- self.client.logout()
- self.client.force_authenticate(user=regular_user)
-
- response = self.client.post(
- self.ENDPOINT + "run/",
- {"task_name": PaperlessTask.TaskName.CHECK_SANITY},
+ @pytest.mark.parametrize(
+ "task_type",
+ [
+ pytest.param(
+ PaperlessTask.TaskType.CONSUME_FILE,
+ id="consume_file-not-runnable",
+ ),
+ pytest.param(
+ "not_a_real_type",
+ id="invalid-task-type",
+ ),
+ ],
+ )
+ def test_returns_400_for_non_runnable_task_type(
+ self,
+ admin_client: APIClient,
+ task_type: str,
+ ) -> None:
+ """run/ returns 400 for task types that cannot be manually triggered."""
+ response = admin_client.post(
+ ENDPOINT + "run/",
+ {"task_type": task_type},
+ format="json",
)
- self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
- mock_check_sanity.assert_not_called()
+ assert response.status_code == status.HTTP_400_BAD_REQUEST
+
+ def test_sanity_check_dispatched_with_correct_kwargs(
+ self,
+ admin_client: APIClient,
+ ) -> None:
+ """run/ dispatches sanity_check with raise_on_error=False and manual trigger header."""
+ fake_task_id = str(uuid.uuid4())
+ mock_async_result = mock.Mock()
+ mock_async_result.id = fake_task_id
+
+ mock_apply_async = mock.Mock(return_value=mock_async_result)
+
+ with mock.patch(
+ "documents.views.sanity_check.apply_async",
+ mock_apply_async,
+ ):
+ response = admin_client.post(
+ ENDPOINT + "run/",
+ {"task_type": PaperlessTask.TaskType.SANITY_CHECK},
+ format="json",
+ )
+
+ assert response.status_code == status.HTTP_200_OK
+ assert response.data == {"task_id": fake_task_id}
+ mock_apply_async.assert_called_once_with(
+ kwargs={"raise_on_error": False},
+ headers={"trigger_source": PaperlessTask.TriggerSource.MANUAL},
+ )
+
+
+@pytest.mark.django_db()
+class TestDuplicateDocumentsPermissions:
+ """duplicate_documents in the v9 response must respect document-level permissions."""
+
+ @pytest.fixture()
+ def user_v9_client(self, regular_user: User) -> APIClient:
+ regular_user.user_permissions.add(
+ Permission.objects.get(codename="view_paperlesstask"),
+ )
+ client = APIClient()
+ client.force_authenticate(user=regular_user)
+ client.credentials(HTTP_ACCEPT=ACCEPT_V9)
+ return client
+
+ def test_owner_sees_duplicate_document(
+ self,
+ user_v9_client: APIClient,
+ regular_user: User,
+ ) -> None:
+ """A non-staff user sees a duplicate_of document they own."""
+ doc = DocumentFactory(owner=regular_user, title="My Doc")
+ PaperlessTaskFactory(
+ owner=regular_user,
+ status=PaperlessTask.Status.SUCCESS,
+ result_data={"duplicate_of": doc.pk},
+ )
+
+ response = user_v9_client.get(ENDPOINT)
+
+ assert response.status_code == status.HTTP_200_OK
+ dupes = response.data[0]["duplicate_documents"]
+ assert len(dupes) == 1
+ assert dupes[0]["id"] == doc.pk
+
+ def test_unowned_duplicate_document_is_visible(
+ self,
+ user_v9_client: APIClient,
+ regular_user: User,
+ ) -> None:
+ """An unowned duplicate_of document is visible to any authenticated user."""
+ doc = DocumentFactory(owner=None, title="Shared Doc")
+ PaperlessTaskFactory(
+ owner=regular_user,
+ status=PaperlessTask.Status.SUCCESS,
+ result_data={"duplicate_of": doc.pk},
+ )
+
+ response = user_v9_client.get(ENDPOINT)
+
+ assert response.status_code == status.HTTP_200_OK
+ assert len(response.data[0]["duplicate_documents"]) == 1
+
+ def test_other_users_duplicate_document_is_hidden(
+ self,
+ user_v9_client: APIClient,
+ regular_user: User,
+ admin_user: User,
+ ) -> None:
+ """A non-staff user cannot see a duplicate_of document owned by another user."""
+ doc = DocumentFactory(owner=admin_user, title="Admin Doc")
+ PaperlessTaskFactory(
+ owner=regular_user,
+ status=PaperlessTask.Status.SUCCESS,
+ result_data={"duplicate_of": doc.pk},
+ )
+
+ response = user_v9_client.get(ENDPOINT)
+
+ assert response.status_code == status.HTTP_200_OK
+ assert response.data[0]["duplicate_documents"] == []
+
+ def test_explicit_permission_grants_visibility(
+ self,
+ user_v9_client: APIClient,
+ regular_user: User,
+ admin_user: User,
+ ) -> None:
+ """A user with explicit guardian view_document permission sees the duplicate_of document."""
+ doc = DocumentFactory(owner=admin_user, title="Granted Doc")
+ assign_perm("view_document", regular_user, doc)
+ PaperlessTaskFactory(
+ owner=regular_user,
+ status=PaperlessTask.Status.SUCCESS,
+ result_data={"duplicate_of": doc.pk},
+ )
+
+ response = user_v9_client.get(ENDPOINT)
+
+ assert response.status_code == status.HTTP_200_OK
+ dupes = response.data[0]["duplicate_documents"]
+ assert len(dupes) == 1
+ assert dupes[0]["id"] == doc.pk
diff --git a/src/documents/tests/test_management.py b/src/documents/tests/test_management.py
index 72476d403..276da942d 100644
--- a/src/documents/tests/test_management.py
+++ b/src/documents/tests/test_management.py
@@ -211,7 +211,7 @@ class TestCreateClassifier:
call_command("document_create_classifier", skip_checks=True)
- m.assert_called_once_with(scheduled=False, status_callback=mocker.ANY)
+ m.assert_called_once_with(status_callback=mocker.ANY)
assert callable(m.call_args.kwargs["status_callback"])
def test_create_classifier_callback_output(self, mocker: MockerFixture) -> None:
diff --git a/src/documents/tests/test_sanity_check.py b/src/documents/tests/test_sanity_check.py
index e62c17303..568e3e444 100644
--- a/src/documents/tests/test_sanity_check.py
+++ b/src/documents/tests/test_sanity_check.py
@@ -1,7 +1,7 @@
"""Tests for the sanity checker module.
Tests exercise ``check_sanity`` as a whole, verifying document validation,
-orphan detection, task recording, and the iter_wrapper contract.
+orphan detection, and the iter_wrapper contract.
"""
from __future__ import annotations
@@ -12,13 +12,12 @@ from typing import TYPE_CHECKING
import pytest
-from documents.models import Document
-from documents.models import PaperlessTask
from documents.sanity_checker import check_sanity
if TYPE_CHECKING:
from collections.abc import Iterable
+ from documents.models import Document
from documents.tests.conftest import PaperlessDirs
@@ -229,35 +228,6 @@ class TestCheckSanityIterWrapper:
assert not messages.has_error
-@pytest.mark.django_db
-class TestCheckSanityTaskRecording:
- @pytest.mark.parametrize(
- ("expected_type", "scheduled"),
- [
- pytest.param(PaperlessTask.TaskType.SCHEDULED_TASK, True, id="scheduled"),
- pytest.param(PaperlessTask.TaskType.MANUAL_TASK, False, id="manual"),
- ],
- )
- @pytest.mark.usefixtures("_media_settings")
- def test_task_type(self, expected_type: str, *, scheduled: bool) -> None:
- check_sanity(scheduled=scheduled)
- task = PaperlessTask.objects.latest("date_created")
- assert task.task_name == PaperlessTask.TaskName.CHECK_SANITY
- assert task.type == expected_type
-
- def test_success_status(self, sample_doc: Document) -> None:
- check_sanity()
- task = PaperlessTask.objects.latest("date_created")
- assert task.status == "SUCCESS"
-
- def test_failure_status(self, sample_doc: Document) -> None:
- Path(sample_doc.source_path).unlink()
- check_sanity()
- task = PaperlessTask.objects.latest("date_created")
- assert task.status == "FAILURE"
- assert "Check logs for details" in task.result
-
-
@pytest.mark.django_db
class TestCheckSanityLogMessages:
def test_logs_doc_issues(
diff --git a/src/documents/tests/test_task_signals.py b/src/documents/tests/test_task_signals.py
index 3dcbbeaff..80b5e5075 100644
--- a/src/documents/tests/test_task_signals.py
+++ b/src/documents/tests/test_task_signals.py
@@ -1,250 +1,390 @@
+import datetime
+import sys
import uuid
+from pathlib import Path
from unittest import mock
-import celery
-from django.contrib.auth import get_user_model
-from django.test import TestCase
+import pytest
+import pytest_mock
+from django.utils import timezone
from documents.data_models import ConsumableDocument
from documents.data_models import DocumentMetadataOverrides
from documents.data_models import DocumentSource
-from documents.models import Document
from documents.models import PaperlessTask
-from documents.signals.handlers import add_to_index
from documents.signals.handlers import before_task_publish_handler
from documents.signals.handlers import task_failure_handler
from documents.signals.handlers import task_postrun_handler
from documents.signals.handlers import task_prerun_handler
-from documents.tests.test_consumer import fake_magic_from_file
-from documents.tests.utils import DirectoriesMixin
+from documents.signals.handlers import task_revoked_handler
+from documents.tests.factories import PaperlessTaskFactory
-@mock.patch("documents.consumer.magic.from_file", fake_magic_from_file)
-class TestTaskSignalHandler(DirectoriesMixin, TestCase):
- @classmethod
- def setUpTestData(cls) -> None:
- super().setUpTestData()
- cls.user = get_user_model().objects.create_user(username="testuser")
+@pytest.fixture
+def consume_input_doc():
+ doc = mock.MagicMock(spec=ConsumableDocument)
+ # original_file is a Path; configure the nested mock so .name works
+ doc.original_file = mock.MagicMock()
+ doc.original_file.name = "invoice.pdf"
+ doc.original_path = None
+ doc.mime_type = "application/pdf"
+ doc.mailrule_id = None
+ doc.source = DocumentSource.WebUI
+ return doc
- def util_call_before_task_publish_handler(
+
+@pytest.fixture
+def consume_overrides(django_user_model):
+ user = django_user_model.objects.create_user(username="testuser")
+ overrides = mock.MagicMock(spec=DocumentMetadataOverrides)
+ overrides.owner_id = user.id
+ return overrides
+
+
+def send_publish(
+ task_name: str,
+ args: tuple,
+ kwargs: dict,
+ headers: dict | None = None,
+) -> str:
+
+ task_id = str(uuid.uuid4())
+ hdrs = {"task": task_name, "id": task_id, **(headers or {})}
+ before_task_publish_handler(sender=task_name, headers=hdrs, body=(args, kwargs, {}))
+ return task_id
+
+
+@pytest.mark.django_db
+class TestBeforeTaskPublishHandler:
+ def test_creates_task_for_consume_file(self, consume_input_doc, consume_overrides):
+ task_id = send_publish(
+ "documents.tasks.consume_file",
+ (consume_input_doc, consume_overrides),
+ {},
+ )
+ task = PaperlessTask.objects.get(task_id=task_id)
+ assert task.task_type == PaperlessTask.TaskType.CONSUME_FILE
+ assert task.status == PaperlessTask.Status.PENDING
+ assert task.trigger_source == PaperlessTask.TriggerSource.WEB_UI
+ assert task.input_data["filename"] == "invoice.pdf"
+ assert task.owner_id == consume_overrides.owner_id
+
+ def test_creates_task_for_train_classifier(self):
+ task_id = send_publish("documents.tasks.train_classifier", (), {})
+ task = PaperlessTask.objects.get(task_id=task_id)
+ assert task.task_type == PaperlessTask.TaskType.TRAIN_CLASSIFIER
+ assert task.trigger_source == PaperlessTask.TriggerSource.MANUAL
+
+ def test_creates_task_for_sanity_check(self):
+ task_id = send_publish("documents.tasks.sanity_check", (), {})
+ task = PaperlessTask.objects.get(task_id=task_id)
+ assert task.task_type == PaperlessTask.TaskType.SANITY_CHECK
+
+ def test_creates_task_for_process_mail_accounts(self):
+ task_id = send_publish(
+ "paperless_mail.tasks.process_mail_accounts",
+ (),
+ {"account_ids": [1, 2]},
+ )
+ task = PaperlessTask.objects.get(task_id=task_id)
+ assert task.task_type == PaperlessTask.TaskType.MAIL_FETCH
+ assert task.input_data["account_ids"] == [1, 2]
+
+ def test_mail_fetch_no_account_ids_stores_empty_input(self):
+ """Beat-scheduled mail checks pass no account_ids; input_data should be {} not {"account_ids": None}."""
+ task_id = send_publish("paperless_mail.tasks.process_mail_accounts", (), {})
+ task = PaperlessTask.objects.get(task_id=task_id)
+ assert task.input_data == {}
+
+ def test_overrides_date_serialized_as_iso_string(self, consume_input_doc):
+ """A datetime.date in overrides is stored as an ISO string so input_data is JSON-safe."""
+ overrides = DocumentMetadataOverrides(created=datetime.date(2024, 1, 15))
+
+ task_id = send_publish(
+ "documents.tasks.consume_file",
+ (consume_input_doc, overrides),
+ {},
+ )
+
+ task = PaperlessTask.objects.get(task_id=task_id)
+ assert task.input_data["overrides"]["created"] == "2024-01-15"
+
+ def test_overrides_path_serialized_as_string(self, consume_input_doc):
+ """A Path value in overrides is stored as a plain string so input_data is JSON-safe."""
+ overrides = DocumentMetadataOverrides()
+ overrides.filename = Path("/uploads/invoice.pdf") # type: ignore[assignment]
+
+ task_id = send_publish(
+ "documents.tasks.consume_file",
+ (consume_input_doc, overrides),
+ {},
+ )
+
+ task = PaperlessTask.objects.get(task_id=task_id)
+ assert task.input_data["overrides"]["filename"] == "/uploads/invoice.pdf"
+
+ @pytest.mark.parametrize(
+ ("header_value", "expected_trigger_source"),
+ [
+ pytest.param(
+ PaperlessTask.TriggerSource.SCHEDULED,
+ PaperlessTask.TriggerSource.SCHEDULED,
+ id="scheduled",
+ ),
+ pytest.param(
+ PaperlessTask.TriggerSource.SYSTEM,
+ PaperlessTask.TriggerSource.SYSTEM,
+ id="system",
+ ),
+ pytest.param(
+ "bogus_value",
+ PaperlessTask.TriggerSource.MANUAL,
+ id="invalid-falls-back-to-manual",
+ ),
+ ],
+ )
+ def test_trigger_source_header_resolution(
self,
- headers_to_use,
- body_to_use,
+ header_value: str,
+ expected_trigger_source: PaperlessTask.TriggerSource,
) -> None:
- """
- Simple utility to call the pre-run handle and ensure it created a single task
- instance
- """
- self.assertEqual(PaperlessTask.objects.all().count(), 0)
-
- before_task_publish_handler(headers=headers_to_use, body=body_to_use)
-
- self.assertEqual(PaperlessTask.objects.all().count(), 1)
-
- def test_before_task_publish_handler_consume(self) -> None:
- """
- GIVEN:
- - A celery task is started via the consume folder
- WHEN:
- - Task before publish handler is called
- THEN:
- - The task is created and marked as pending
- """
- headers = {
- "id": str(uuid.uuid4()),
- "task": "documents.tasks.consume_file",
- }
- body = (
- # args
- (
- ConsumableDocument(
- source=DocumentSource.ConsumeFolder,
- original_file="/consume/hello-999.pdf",
- ),
- DocumentMetadataOverrides(
- title="Hello world",
- owner_id=self.user.id,
- ),
- ),
- # kwargs
+ """trigger_source header maps to the expected TriggerSource; invalid values fall back to MANUAL."""
+ task_id = send_publish(
+ "documents.tasks.train_classifier",
+ (),
{},
- # celery stuff
- {"callbacks": None, "errbacks": None, "chain": None, "chord": None},
- )
- self.util_call_before_task_publish_handler(
- headers_to_use=headers,
- body_to_use=body,
+ headers={"trigger_source": header_value},
)
+ task = PaperlessTask.objects.get(task_id=task_id)
+ assert task.trigger_source == expected_trigger_source
- task = PaperlessTask.objects.get()
- self.assertIsNotNone(task)
- self.assertEqual(headers["id"], task.task_id)
- self.assertEqual("hello-999.pdf", task.task_file_name)
- self.assertEqual(PaperlessTask.TaskName.CONSUME_FILE, task.task_name)
- self.assertEqual(self.user.id, task.owner_id)
- self.assertEqual(celery.states.PENDING, task.status)
+ def test_ignores_untracked_task(self):
+ send_publish("documents.tasks.some_untracked_task", (), {})
+ assert PaperlessTask.objects.count() == 0
- def test_task_prerun_handler(self) -> None:
- """
- GIVEN:
- - A celery task is started via the consume folder
- WHEN:
- - Task starts execution
- THEN:
- - The task is marked as started
- """
+ def test_ignores_none_headers(self):
- headers = {
- "id": str(uuid.uuid4()),
- "task": "documents.tasks.consume_file",
- }
- body = (
- # args
- (
- ConsumableDocument(
- source=DocumentSource.ConsumeFolder,
- original_file="/consume/hello-99.pdf",
- ),
- None,
+ before_task_publish_handler(sender=None, headers=None, body=None)
+ assert PaperlessTask.objects.count() == 0
+
+ @pytest.mark.parametrize(
+ ("document_source", "expected_trigger_source"),
+ [
+ pytest.param(
+ DocumentSource.ConsumeFolder,
+ PaperlessTask.TriggerSource.FOLDER_CONSUME,
+ id="folder_consume",
),
- # kwargs
- {},
- # celery stuff
- {"callbacks": None, "errbacks": None, "chain": None, "chord": None},
- )
-
- self.util_call_before_task_publish_handler(
- headers_to_use=headers,
- body_to_use=body,
- )
-
- task_prerun_handler(task_id=headers["id"])
-
- task = PaperlessTask.objects.get()
-
- self.assertEqual(celery.states.STARTED, task.status)
-
- def test_task_postrun_handler(self) -> None:
- """
- GIVEN:
- - A celery task is started via the consume folder
- WHEN:
- - Task finished execution
- THEN:
- - The task is marked as started
- """
- headers = {
- "id": str(uuid.uuid4()),
- "task": "documents.tasks.consume_file",
- }
- body = (
- # args
- (
- ConsumableDocument(
- source=DocumentSource.ConsumeFolder,
- original_file="/consume/hello-9.pdf",
- ),
- None,
+ pytest.param(
+ DocumentSource.MailFetch,
+ PaperlessTask.TriggerSource.EMAIL_CONSUME,
+ id="email_consume",
),
- # kwargs
+ ],
+ )
+ def test_consume_document_source_maps_to_trigger_source(
+ self,
+ consume_input_doc,
+ consume_overrides,
+ document_source: DocumentSource,
+ expected_trigger_source: PaperlessTask.TriggerSource,
+ ) -> None:
+ """DocumentSource on the input doc maps to the correct TriggerSource."""
+ consume_input_doc.source = document_source
+ task_id = send_publish(
+ "documents.tasks.consume_file",
+ (consume_input_doc, consume_overrides),
{},
- # celery stuff
- {"callbacks": None, "errbacks": None, "chain": None, "chord": None},
)
- self.util_call_before_task_publish_handler(
- headers_to_use=headers,
- body_to_use=body,
+ task = PaperlessTask.objects.get(task_id=task_id)
+ assert task.trigger_source == expected_trigger_source
+
+
+@pytest.mark.django_db
+class TestTaskPrerunHandler:
+ def test_marks_task_started(self):
+ task = PaperlessTaskFactory(status=PaperlessTask.Status.PENDING)
+
+ task_prerun_handler(task_id=task.task_id)
+ task.refresh_from_db()
+ assert task.status == PaperlessTask.Status.STARTED
+ assert task.date_started is not None
+
+ @pytest.mark.parametrize(
+ "task_id",
+ [
+ pytest.param("nonexistent-id", id="unknown"),
+ pytest.param(None, id="none"),
+ ],
+ )
+ def test_ignores_invalid_task_id(self, task_id: str | None) -> None:
+
+ task_prerun_handler(task_id=task_id) # must not raise
+
+
+@pytest.mark.django_db
+class TestTaskPostrunHandler:
+ def _started_task(self) -> PaperlessTask:
+
+ return PaperlessTaskFactory(
+ task_type=PaperlessTask.TaskType.TRAIN_CLASSIFIER,
+ status=PaperlessTask.Status.STARTED,
+ date_started=timezone.now(),
)
+ def test_records_success_with_dict_result(self):
+ task = self._started_task()
+
task_postrun_handler(
- task_id=headers["id"],
- retval="Success. New document id 1 created",
- state=celery.states.SUCCESS,
+ task_id=task.task_id,
+ retval={"document_id": 42},
+ state="SUCCESS",
)
+ task.refresh_from_db()
+ assert task.status == PaperlessTask.Status.SUCCESS
+ assert task.result_data == {"document_id": 42}
+ assert task.date_done is not None
+ assert task.duration_seconds is not None
+ assert task.wait_time_seconds is not None
- task = PaperlessTask.objects.get()
+ def test_skips_failure_state(self):
+ """postrun skips FAILURE; task_failure_handler owns that path."""
+ task = self._started_task()
- self.assertEqual(celery.states.SUCCESS, task.status)
+ task_postrun_handler(task_id=task.task_id, retval="some error", state="FAILURE")
+ task.refresh_from_db()
+ assert task.status == PaperlessTask.Status.STARTED
- def test_task_failure_handler(self) -> None:
- """
- GIVEN:
- - A celery task is started via the consume folder
- WHEN:
- - Task failed execution
- THEN:
- - The task is marked as failed
- """
- headers = {
- "id": str(uuid.uuid4()),
- "task": "documents.tasks.consume_file",
- }
- body = (
- # args
- (
- ConsumableDocument(
- source=DocumentSource.ConsumeFolder,
- original_file="/consume/hello-9.pdf",
- ),
- None,
- ),
- # kwargs
- {},
- # celery stuff
- {"callbacks": None, "errbacks": None, "chain": None, "chord": None},
+ def test_parses_legacy_new_document_string(self):
+ task = self._started_task()
+
+ task_postrun_handler(
+ task_id=task.task_id,
+ retval="New document id 42 created",
+ state="SUCCESS",
)
- self.util_call_before_task_publish_handler(
- headers_to_use=headers,
- body_to_use=body,
+ task.refresh_from_db()
+ assert task.result_data["document_id"] == 42
+ assert task.result_message == "New document id 42 created"
+
+ def test_parses_duplicate_string(self):
+ """Duplicate detection returns a string with SUCCESS state (StopConsumeTaskError is caught and returned, not raised)."""
+ task = self._started_task()
+
+ task_postrun_handler(
+ task_id=task.task_id,
+ retval="It is a duplicate of some document (#99).",
+ state="SUCCESS",
+ )
+ task.refresh_from_db()
+ assert task.result_data["duplicate_of"] == 99
+ assert task.result_data["duplicate_in_trash"] is False
+
+ def test_ignores_unknown_task_id(self):
+
+ task_postrun_handler(
+ task_id="nonexistent",
+ retval=None,
+ state="SUCCESS",
+ ) # must not raise
+
+ def test_records_revoked_state(self):
+ task = self._started_task()
+
+ task_postrun_handler(task_id=task.task_id, retval=None, state="REVOKED")
+ task.refresh_from_db()
+ assert task.status == PaperlessTask.Status.REVOKED
+
+
+@pytest.mark.django_db
+class TestTaskFailureHandler:
+ def test_records_failure_with_exception(self):
+
+ task = PaperlessTaskFactory(
+ task_type=PaperlessTask.TaskType.CONSUME_FILE,
+ status=PaperlessTask.Status.STARTED,
+ date_started=timezone.now(),
)
task_failure_handler(
- task_id=headers["id"],
- exception="Example failure",
+ task_id=task.task_id,
+ exception=ValueError("PDF parse failed"),
+ traceback=None,
+ )
+ task.refresh_from_db()
+ assert task.status == PaperlessTask.Status.FAILURE
+ assert task.result_data["error_type"] == "ValueError"
+ assert task.result_data["error_message"] == "PDF parse failed"
+ assert task.date_done is not None
+
+ def test_records_traceback_when_provided(self):
+
+ task = PaperlessTaskFactory(
+ task_type=PaperlessTask.TaskType.CONSUME_FILE,
+ status=PaperlessTask.Status.STARTED,
+ date_started=timezone.now(),
+ )
+ try:
+ raise ValueError("test error")
+ except ValueError:
+ tb = sys.exc_info()[2]
+
+ from documents.signals.handlers import task_failure_handler
+
+ task_failure_handler(
+ task_id=task.task_id,
+ exception=ValueError("test error"),
+ traceback=tb,
+ )
+ task.refresh_from_db()
+ assert "traceback" in task.result_data
+ assert len(task.result_data["traceback"]) <= 5000
+
+ def test_computes_duration_and_wait_time(self):
+
+ now = timezone.now()
+ task = PaperlessTaskFactory(
+ task_type=PaperlessTask.TaskType.CONSUME_FILE,
+ status=PaperlessTask.Status.STARTED,
+ date_created=now - timezone.timedelta(seconds=10),
+ date_started=now - timezone.timedelta(seconds=5),
)
- task = PaperlessTask.objects.get()
-
- self.assertEqual(celery.states.FAILURE, task.status)
-
- def test_add_to_index_indexes_root_once_for_root_documents(self) -> None:
- root = Document.objects.create(
- title="root",
- checksum="root",
- mime_type="application/pdf",
+ task_failure_handler(
+ task_id=task.task_id,
+ exception=ValueError("boom"),
+ traceback=None,
)
+ task.refresh_from_db()
+ assert task.duration_seconds == pytest.approx(5.0, abs=1.0)
+ assert task.wait_time_seconds == pytest.approx(5.0, abs=1.0)
- with mock.patch("documents.search.get_backend") as mock_get_backend:
- mock_backend = mock.MagicMock()
- mock_get_backend.return_value = mock_backend
- add_to_index(sender=None, document=root)
+ def test_ignores_none_task_id(self):
- mock_backend.add_or_update.assert_called_once_with(root, effective_content="")
+ task_failure_handler(task_id=None, exception=ValueError("x"), traceback=None)
- def test_add_to_index_reindexes_root_for_version_documents(self) -> None:
- root = Document.objects.create(
- title="root",
- checksum="root",
- mime_type="application/pdf",
- )
- version = Document.objects.create(
- title="version",
- checksum="version",
- mime_type="application/pdf",
- root_document=root,
- )
- with mock.patch("documents.search.get_backend") as mock_get_backend:
- mock_backend = mock.MagicMock()
- mock_get_backend.return_value = mock_backend
- add_to_index(sender=None, document=version)
+@pytest.mark.django_db
+class TestTaskRevokedHandler:
+ def test_marks_task_revoked(self, mocker: pytest_mock.MockerFixture):
+ """task_revoked_handler moves a queued task to REVOKED and stamps date_done."""
+ task = PaperlessTaskFactory(status=PaperlessTask.Status.PENDING)
+ request = mocker.MagicMock()
+ request.id = task.task_id
- self.assertEqual(mock_backend.add_or_update.call_count, 1)
- self.assertEqual(
- mock_backend.add_or_update.call_args_list[0].args[0].id,
- version.id,
- )
- self.assertEqual(
- mock_backend.add_or_update.call_args_list[0].kwargs,
- {"effective_content": version.content},
- )
+ task_revoked_handler(request=request)
+ task.refresh_from_db()
+ assert task.status == PaperlessTask.Status.REVOKED
+ assert task.date_done is not None
+
+ def test_ignores_none_request(self):
+ """task_revoked_handler must not raise when request is None."""
+
+ task_revoked_handler(request=None) # must not raise
+
+ def test_ignores_unknown_task_id(self, mocker: pytest_mock.MockerFixture):
+ """task_revoked_handler must not raise for a task_id not in the database."""
+ request = mocker.MagicMock()
+ request.id = "nonexistent-id"
+
+ task_revoked_handler(request=request) # must not raise
diff --git a/src/documents/tests/test_tasks.py b/src/documents/tests/test_tasks.py
index 4502423b3..fb06f7d60 100644
--- a/src/documents/tests/test_tasks.py
+++ b/src/documents/tests/test_tasks.py
@@ -4,7 +4,6 @@ from pathlib import Path
from unittest import mock
import pytest
-from celery import states
from django.conf import settings
from django.test import TestCase
from django.test import override_settings
@@ -14,7 +13,6 @@ from documents import tasks
from documents.models import Correspondent
from documents.models import Document
from documents.models import DocumentType
-from documents.models import PaperlessTask
from documents.models import Tag
from documents.sanity_checker import SanityCheckFailedException
from documents.sanity_checker import SanityCheckMessages
@@ -40,7 +38,8 @@ class TestClassifier(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
def test_train_classifier_with_auto_tag(self, load_classifier) -> None:
load_classifier.return_value = None
Tag.objects.create(matching_algorithm=Tag.MATCH_AUTO, name="test")
- tasks.train_classifier()
+ with self.assertRaises(ValueError):
+ tasks.train_classifier()
load_classifier.assert_called_once()
self.assertIsNotFile(settings.MODEL_FILE)
@@ -48,7 +47,8 @@ class TestClassifier(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
def test_train_classifier_with_auto_type(self, load_classifier) -> None:
load_classifier.return_value = None
DocumentType.objects.create(matching_algorithm=Tag.MATCH_AUTO, name="test")
- tasks.train_classifier()
+ with self.assertRaises(ValueError):
+ tasks.train_classifier()
load_classifier.assert_called_once()
self.assertIsNotFile(settings.MODEL_FILE)
@@ -56,7 +56,8 @@ class TestClassifier(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
def test_train_classifier_with_auto_correspondent(self, load_classifier) -> None:
load_classifier.return_value = None
Correspondent.objects.create(matching_algorithm=Tag.MATCH_AUTO, name="test")
- tasks.train_classifier()
+ with self.assertRaises(ValueError):
+ tasks.train_classifier()
load_classifier.assert_called_once()
self.assertIsNotFile(settings.MODEL_FILE)
@@ -298,7 +299,7 @@ class TestAIIndex(DirectoriesMixin, TestCase):
WHEN:
- llmindex_index task is called
THEN:
- - update_llm_index is called, and the task is marked as success
+ - update_llm_index is called and its result is returned
"""
Document.objects.create(
title="test",
@@ -308,13 +309,9 @@ class TestAIIndex(DirectoriesMixin, TestCase):
# lazy-loaded so mock the actual function
with mock.patch("paperless_ai.indexing.update_llm_index") as update_llm_index:
update_llm_index.return_value = "LLM index updated successfully."
- tasks.llmindex_index()
+ result = tasks.llmindex_index()
update_llm_index.assert_called_once()
- task = PaperlessTask.objects.get(
- task_name=PaperlessTask.TaskName.LLMINDEX_UPDATE,
- )
- self.assertEqual(task.status, states.SUCCESS)
- self.assertEqual(task.result, "LLM index updated successfully.")
+ self.assertEqual(result, "LLM index updated successfully.")
@override_settings(
AI_ENABLED=True,
@@ -325,9 +322,9 @@ class TestAIIndex(DirectoriesMixin, TestCase):
GIVEN:
- Document exists, AI is enabled, llm index backend is set
WHEN:
- - llmindex_index task is called
+ - llmindex_index task is called and update_llm_index raises an exception
THEN:
- - update_llm_index raises an exception, and the task is marked as failure
+ - the exception propagates to the caller
"""
Document.objects.create(
title="test",
@@ -337,13 +334,9 @@ class TestAIIndex(DirectoriesMixin, TestCase):
# lazy-loaded so mock the actual function
with mock.patch("paperless_ai.indexing.update_llm_index") as update_llm_index:
update_llm_index.side_effect = Exception("LLM index update failed.")
- tasks.llmindex_index()
+ with self.assertRaisesRegex(Exception, "LLM index update failed."):
+ tasks.llmindex_index()
update_llm_index.assert_called_once()
- task = PaperlessTask.objects.get(
- task_name=PaperlessTask.TaskName.LLMINDEX_UPDATE,
- )
- self.assertEqual(task.status, states.FAILURE)
- self.assertIn("LLM index update failed.", task.result)
def test_update_document_in_llm_index(self) -> None:
"""
diff --git a/src/documents/views.py b/src/documents/views.py
index c57e43b35..d13760d38 100644
--- a/src/documents/views.py
+++ b/src/documents/views.py
@@ -8,6 +8,7 @@ import zipfile
from collections import defaultdict
from collections import deque
from datetime import datetime
+from datetime import timedelta
from pathlib import Path
from time import mktime
from typing import TYPE_CHECKING
@@ -20,7 +21,6 @@ from urllib.parse import urlparse
import httpx
import magic
import pathvalidate
-from celery import states
from django.conf import settings
from django.contrib.auth.models import Group
from django.contrib.auth.models import User
@@ -29,6 +29,7 @@ from django.core.cache import cache
from django.db import connections
from django.db.migrations.loader import MigrationLoader
from django.db.migrations.recorder import MigrationRecorder
+from django.db.models import Avg
from django.db.models import Case
from django.db.models import Count
from django.db.models import F
@@ -193,7 +194,7 @@ from documents.serialisers import PostDocumentSerializer
from documents.serialisers import RemovePasswordDocumentsSerializer
from documents.serialisers import ReprocessDocumentsSerializer
from documents.serialisers import RotateDocumentsSerializer
-from documents.serialisers import RunTaskViewSerializer
+from documents.serialisers import RunTaskSerializer
from documents.serialisers import SavedViewSerializer
from documents.serialisers import SearchResultSerializer
from documents.serialisers import SerializerWithPerms
@@ -202,7 +203,9 @@ from documents.serialisers import ShareLinkSerializer
from documents.serialisers import StoragePathSerializer
from documents.serialisers import StoragePathTestSerializer
from documents.serialisers import TagSerializer
-from documents.serialisers import TasksViewSerializer
+from documents.serialisers import TaskSerializerV9
+from documents.serialisers import TaskSerializerV10
+from documents.serialisers import TaskSummarySerializer
from documents.serialisers import TrashSerializer
from documents.serialisers import UiSettingsViewSerializer
from documents.serialisers import WorkflowActionSerializer
@@ -212,7 +215,6 @@ from documents.signals import document_updated
from documents.tasks import build_share_link_bundle
from documents.tasks import consume_file
from documents.tasks import empty_trash
-from documents.tasks import index_optimize
from documents.tasks import llmindex_index
from documents.tasks import sanity_check
from documents.tasks import train_classifier
@@ -3729,21 +3731,21 @@ class RemoteVersionView(GenericAPIView[Any]):
@extend_schema_view(
+ list=extend_schema(
+ parameters=[
+ OpenApiParameter(
+ name="task_id",
+ type=str,
+ location=OpenApiParameter.QUERY,
+ required=False,
+ description="Filter tasks by Celery UUID",
+ ),
+ ],
+ ),
acknowledge=extend_schema(
operation_id="acknowledge_tasks",
description="Acknowledge a list of tasks",
- request={
- "application/json": {
- "type": "object",
- "properties": {
- "tasks": {
- "type": "array",
- "items": {"type": "integer"},
- },
- },
- "required": ["tasks"],
- },
- },
+ request=AcknowledgeTasksViewSerializer,
responses={
(200, "application/json"): inline_serializer(
name="AcknowledgeTasks",
@@ -3751,52 +3753,118 @@ class RemoteVersionView(GenericAPIView[Any]):
"result": serializers.IntegerField(),
},
),
- (400, "application/json"): None,
},
),
-)
-@extend_schema(
- parameters=[
- OpenApiParameter(
- name="task_id",
- type=str,
- location=OpenApiParameter.QUERY,
- required=False,
- description="Filter tasks by Celery UUID",
- ),
- ],
+ run=extend_schema(
+ operation_id="run_task",
+ description="Manually dispatch a background task. Superuser only.",
+ request=RunTaskSerializer,
+ responses={
+ (200, "application/json"): inline_serializer(
+ name="RunTask",
+ fields={"task_id": serializers.CharField()},
+ ),
+ (400, "application/json"): inline_serializer(
+ name="RunTaskError",
+ fields={"error": serializers.CharField()},
+ ),
+ },
+ ),
+ summary=extend_schema(
+ responses={200: TaskSummarySerializer(many=True)},
+ parameters=[
+ OpenApiParameter(
+ name="days",
+ type=int,
+ location=OpenApiParameter.QUERY,
+ required=False,
+ description="Number of days to include in aggregation (default 30)",
+ ),
+ ],
+ ),
+ active=extend_schema(
+ description="Currently pending and running tasks (capped at 50).",
+ responses={200: TaskSerializerV10(many=True)},
+ ),
)
class TasksViewSet(ReadOnlyModelViewSet[PaperlessTask]):
permission_classes = (IsAuthenticated, PaperlessObjectPermissions)
- serializer_class = TasksViewSerializer
filter_backends = (
DjangoFilterBackend,
OrderingFilter,
- ObjectOwnedOrGrantedPermissionsFilter,
)
filterset_class = PaperlessTaskFilterSet
+ ordering_fields = [
+ "date_created",
+ "date_done",
+ "status",
+ "task_type",
+ "duration_seconds",
+ "wait_time_seconds",
+ ]
+ ordering = ["-date_created"]
+ # Needed for drf-spectacular schema generation (get_queryset touches request.user)
+ queryset = PaperlessTask.objects.none()
- TASK_AND_ARGS_BY_NAME = {
- PaperlessTask.TaskName.INDEX_OPTIMIZE: (index_optimize, {}),
- PaperlessTask.TaskName.TRAIN_CLASSIFIER: (
- train_classifier,
- {"scheduled": False},
- ),
- PaperlessTask.TaskName.CHECK_SANITY: (
- sanity_check,
- {"scheduled": False, "raise_on_error": False},
- ),
- PaperlessTask.TaskName.LLMINDEX_UPDATE: (
- llmindex_index,
- {"scheduled": False, "rebuild": False},
- ),
+ # v9 backwards compat: maps old task_name values to new task_type values
+ _V9_TASK_NAME_TO_TYPE = {
+ "check_sanity": PaperlessTask.TaskType.SANITY_CHECK,
+ "llmindex_update": PaperlessTask.TaskType.LLM_INDEX,
}
+ # v9 backwards compat: maps old "type" query param values to new TriggerSource.
+ # Must match the reverse of TaskSerializerV9._TRIGGER_SOURCE_TO_V9_TYPE.
+ _V9_TYPE_TO_TRIGGER_SOURCES = {
+ "auto_task": [
+ PaperlessTask.TriggerSource.SYSTEM,
+ PaperlessTask.TriggerSource.EMAIL_CONSUME,
+ PaperlessTask.TriggerSource.FOLDER_CONSUME,
+ ],
+ "scheduled_task": [PaperlessTask.TriggerSource.SCHEDULED],
+ "manual_task": [
+ PaperlessTask.TriggerSource.MANUAL,
+ PaperlessTask.TriggerSource.WEB_UI,
+ PaperlessTask.TriggerSource.API_UPLOAD,
+ ],
+ }
+
+ _RUNNABLE_TASKS = {
+ PaperlessTask.TaskType.TRAIN_CLASSIFIER: (train_classifier, {}),
+ PaperlessTask.TaskType.SANITY_CHECK: (sanity_check, {"raise_on_error": False}),
+ PaperlessTask.TaskType.LLM_INDEX: (llmindex_index, {"rebuild": False}),
+ }
+
+ def get_serializer_class(self):
+ # v9: use backwards-compatible serializer with old field names
+ if self.request.version and int(self.request.version) < 10:
+ return TaskSerializerV9
+ return TaskSerializerV10
+
def get_queryset(self):
- queryset = PaperlessTask.objects.all().order_by("-date_created")
+ is_v9 = self.request.version and int(self.request.version) < 10
+ if self.request.user.is_staff:
+ queryset = PaperlessTask.objects.all()
+ else:
+ # Own tasks + unowned (system/scheduled) tasks. Tasks owned by other
+ # users are never visible to non-staff regardless of API version.
+ queryset = PaperlessTask.objects.filter(
+ Q(owner=self.request.user) | Q(owner__isnull=True),
+ )
+ # v9 backwards compat: map old query params to new field names
+ if is_v9:
+ task_name = self.request.query_params.get("task_name")
+ if task_name is not None:
+ mapped = self._V9_TASK_NAME_TO_TYPE.get(task_name, task_name)
+ queryset = queryset.filter(task_type=mapped)
+ task_type_old = self.request.query_params.get("type")
+ if task_type_old is not None:
+ sources = self._V9_TYPE_TO_TRIGGER_SOURCES.get(task_type_old)
+ if sources:
+ queryset = queryset.filter(trigger_source__in=sources)
+ # v10+: direct task_id param for backwards compat
task_id = self.request.query_params.get("task_id")
if task_id is not None:
- queryset = PaperlessTask.objects.filter(task_id=task_id)
+ queryset = queryset.filter(task_id=task_id)
return queryset
@action(
@@ -3808,33 +3876,86 @@ class TasksViewSet(ReadOnlyModelViewSet[PaperlessTask]):
serializer = AcknowledgeTasksViewSerializer(data=request.data)
serializer.is_valid(raise_exception=True)
task_ids = serializer.validated_data.get("tasks")
+ tasks = self.get_queryset().filter(id__in=task_ids)
+ count = tasks.update(acknowledged=True)
+ return Response({"result": count})
+ @action(methods=["get"], detail=False)
+ def summary(self, request):
+ """Aggregated task statistics per task_type over the last N days (default 30)."""
try:
- tasks = PaperlessTask.objects.filter(id__in=task_ids)
- if request.user is not None and not request.user.is_superuser:
- tasks = tasks.filter(owner=request.user) | tasks.filter(owner=None)
- result = tasks.update(
- acknowledged=True,
+ days = max(1, int(request.query_params.get("days", 30)))
+ except (TypeError, ValueError):
+ return Response(
+ {"days": "Must be a positive integer."},
+ status=status.HTTP_400_BAD_REQUEST,
)
- return Response({"result": result})
- except Exception:
- return HttpResponseBadRequest()
+ cutoff = timezone.now() - timedelta(days=days)
+ queryset = self.get_queryset().filter(date_created__gte=cutoff)
+
+ data = queryset.values("task_type").annotate(
+ total_count=Count("id"),
+ pending_count=Count("id", filter=Q(status=PaperlessTask.Status.PENDING)),
+ success_count=Count("id", filter=Q(status=PaperlessTask.Status.SUCCESS)),
+ failure_count=Count("id", filter=Q(status=PaperlessTask.Status.FAILURE)),
+ avg_duration_seconds=Avg(
+ "duration_seconds",
+ filter=Q(duration_seconds__isnull=False),
+ ),
+ avg_wait_time_seconds=Avg(
+ "wait_time_seconds",
+ filter=Q(wait_time_seconds__isnull=False),
+ ),
+ last_run=Max("date_created"),
+ last_success=Max(
+ "date_done",
+ filter=Q(status=PaperlessTask.Status.SUCCESS),
+ ),
+ last_failure=Max(
+ "date_done",
+ filter=Q(status=PaperlessTask.Status.FAILURE),
+ ),
+ )
+ serializer = TaskSummarySerializer(data, many=True)
+ return Response(serializer.data)
+
+ @action(methods=["get"], detail=False)
+ def active(self, request):
+ """Currently pending and running tasks (capped at 50)."""
+ queryset = (
+ self.get_queryset()
+ .filter(
+ status__in=[PaperlessTask.Status.PENDING, PaperlessTask.Status.STARTED],
+ )
+ .order_by("-date_created")[:50]
+ )
+ serializer = self.get_serializer(queryset, many=True)
+ return Response(serializer.data)
@action(methods=["post"], detail=False)
def run(self, request):
- serializer = RunTaskViewSerializer(data=request.data)
- serializer.is_valid(raise_exception=True)
- task_name = serializer.validated_data.get("task_name")
-
+ """Manually dispatch a background task. Superuser only."""
if not request.user.is_superuser:
return HttpResponseForbidden("Insufficient permissions")
+ serializer = RunTaskSerializer(data=request.data)
+ serializer.is_valid(raise_exception=True)
+ task_type = serializer.validated_data.get("task_type")
+
+ if task_type not in self._RUNNABLE_TASKS:
+ return Response(
+ {"error": f"Task type '{task_type}' cannot be manually triggered"},
+ status=status.HTTP_400_BAD_REQUEST,
+ )
try:
- task_func, task_args = self.TASK_AND_ARGS_BY_NAME[task_name]
- result = task_func(**task_args)
- return Response({"result": result})
+ task_func, task_kwargs = self._RUNNABLE_TASKS[task_type]
+ async_result = task_func.apply_async(
+ kwargs=task_kwargs,
+ headers={"trigger_source": PaperlessTask.TriggerSource.MANUAL},
+ )
+ return Response({"task_id": async_result.id})
except Exception as e:
- logger.warning(f"An error occurred running task: {e!s}")
+ logger.warning(f"Error running task: {e!s}")
return HttpResponseServerError(
"Error running task, check logs for more detail.",
)
@@ -4466,12 +4587,8 @@ class SystemStatusView(PassUserMixin):
last_trained_task = (
PaperlessTask.objects.filter(
- task_name=PaperlessTask.TaskName.TRAIN_CLASSIFIER,
- status__in=[
- states.SUCCESS,
- states.FAILURE,
- states.REVOKED,
- ], # ignore running tasks
+ task_type=PaperlessTask.TaskType.TRAIN_CLASSIFIER,
+ status__in=PaperlessTask.COMPLETE_STATUSES, # ignore running tasks
)
.order_by("-date_done")
.first()
@@ -4481,21 +4598,17 @@ class SystemStatusView(PassUserMixin):
if last_trained_task is None:
classifier_status = "WARNING"
classifier_error = "No classifier training tasks found"
- elif last_trained_task and last_trained_task.status != states.SUCCESS:
+ elif last_trained_task.status != PaperlessTask.Status.SUCCESS:
classifier_status = "ERROR"
- classifier_error = last_trained_task.result
+ classifier_error = last_trained_task.result_message
classifier_last_trained = (
last_trained_task.date_done if last_trained_task else None
)
last_sanity_check = (
PaperlessTask.objects.filter(
- task_name=PaperlessTask.TaskName.CHECK_SANITY,
- status__in=[
- states.SUCCESS,
- states.FAILURE,
- states.REVOKED,
- ], # ignore running tasks
+ task_type=PaperlessTask.TaskType.SANITY_CHECK,
+ status__in=PaperlessTask.COMPLETE_STATUSES, # ignore running tasks
)
.order_by("-date_done")
.first()
@@ -4505,9 +4618,9 @@ class SystemStatusView(PassUserMixin):
if last_sanity_check is None:
sanity_check_status = "WARNING"
sanity_check_error = "No sanity check tasks found"
- elif last_sanity_check and last_sanity_check.status != states.SUCCESS:
+ elif last_sanity_check.status != PaperlessTask.Status.SUCCESS:
sanity_check_status = "ERROR"
- sanity_check_error = last_sanity_check.result
+ sanity_check_error = last_sanity_check.result_message
sanity_check_last_run = (
last_sanity_check.date_done if last_sanity_check else None
)
@@ -4520,7 +4633,7 @@ class SystemStatusView(PassUserMixin):
else:
last_llmindex_update = (
PaperlessTask.objects.filter(
- task_name=PaperlessTask.TaskName.LLMINDEX_UPDATE,
+ task_type=PaperlessTask.TaskType.LLM_INDEX,
)
.order_by("-date_done")
.first()
@@ -4530,9 +4643,9 @@ class SystemStatusView(PassUserMixin):
if last_llmindex_update is None:
llmindex_status = "WARNING"
llmindex_error = "No LLM index update tasks found"
- elif last_llmindex_update and last_llmindex_update.status == states.FAILURE:
+ elif last_llmindex_update.status == PaperlessTask.Status.FAILURE:
llmindex_status = "ERROR"
- llmindex_error = last_llmindex_update.result
+ llmindex_error = last_llmindex_update.result_message
llmindex_last_modified = (
last_llmindex_update.date_done if last_llmindex_update else None
)
diff --git a/src/locale/en_US/LC_MESSAGES/django.po b/src/locale/en_US/LC_MESSAGES/django.po
index 35a06fa8e..99042fbc1 100644
--- a/src/locale/en_US/LC_MESSAGES/django.po
+++ b/src/locale/en_US/LC_MESSAGES/django.po
@@ -2,7 +2,7 @@ msgid ""
msgstr ""
"Project-Id-Version: paperless-ngx\n"
"Report-Msgid-Bugs-To: \n"
-"POT-Creation-Date: 2026-04-15 23:21+0000\n"
+"POT-Creation-Date: 2026-04-20 16:29+0000\n"
"PO-Revision-Date: 2022-02-17 04:17\n"
"Last-Translator: \n"
"Language-Team: English\n"
@@ -21,1295 +21,1347 @@ msgstr ""
msgid "Documents"
msgstr ""
-#: documents/filters.py:431
+#: documents/filters.py:433
msgid "Value must be valid JSON."
msgstr ""
-#: documents/filters.py:450
+#: documents/filters.py:452
msgid "Invalid custom field query expression"
msgstr ""
-#: documents/filters.py:460
+#: documents/filters.py:462
msgid "Invalid expression list. Must be nonempty."
msgstr ""
-#: documents/filters.py:481
+#: documents/filters.py:483
msgid "Invalid logical operator {op!r}"
msgstr ""
-#: documents/filters.py:495
+#: documents/filters.py:497
msgid "Maximum number of query conditions exceeded."
msgstr ""
-#: documents/filters.py:560
+#: documents/filters.py:562
msgid "{name!r} is not a valid custom field."
msgstr ""
-#: documents/filters.py:597
+#: documents/filters.py:599
msgid "{data_type} does not support query expr {expr!r}."
msgstr ""
-#: documents/filters.py:705 documents/models.py:137
+#: documents/filters.py:707 documents/models.py:136
msgid "Maximum nesting depth exceeded."
msgstr ""
-#: documents/filters.py:919
+#: documents/filters.py:954
msgid "Custom field not found"
msgstr ""
-#: documents/models.py:40 documents/models.py:847 documents/models.py:895
+#: documents/models.py:39 documents/models.py:923 documents/models.py:971
msgid "owner"
msgstr ""
-#: documents/models.py:57 documents/models.py:1177
+#: documents/models.py:56 documents/models.py:1253
msgid "None"
msgstr ""
-#: documents/models.py:58 documents/models.py:1178
+#: documents/models.py:57 documents/models.py:1254
msgid "Any word"
msgstr ""
-#: documents/models.py:59 documents/models.py:1179
+#: documents/models.py:58 documents/models.py:1255
msgid "All words"
msgstr ""
-#: documents/models.py:60 documents/models.py:1180
+#: documents/models.py:59 documents/models.py:1256
msgid "Exact match"
msgstr ""
-#: documents/models.py:61 documents/models.py:1181
+#: documents/models.py:60 documents/models.py:1257
msgid "Regular expression"
msgstr ""
-#: documents/models.py:62 documents/models.py:1182
+#: documents/models.py:61 documents/models.py:1258
msgid "Fuzzy word"
msgstr ""
-#: documents/models.py:63
+#: documents/models.py:62
msgid "Automatic"
msgstr ""
-#: documents/models.py:66 documents/models.py:539 documents/models.py:1760
+#: documents/models.py:65 documents/models.py:538 documents/models.py:1836
#: paperless_mail/models.py:23 paperless_mail/models.py:143
msgid "name"
msgstr ""
-#: documents/models.py:68 documents/models.py:1246
+#: documents/models.py:67 documents/models.py:1322
msgid "match"
msgstr ""
-#: documents/models.py:71 documents/models.py:1249
+#: documents/models.py:70 documents/models.py:1325
msgid "matching algorithm"
msgstr ""
-#: documents/models.py:76 documents/models.py:1254
+#: documents/models.py:75 documents/models.py:1330
msgid "is insensitive"
msgstr ""
-#: documents/models.py:99 documents/models.py:167
+#: documents/models.py:98 documents/models.py:166
msgid "correspondent"
msgstr ""
-#: documents/models.py:100
+#: documents/models.py:99
msgid "correspondents"
msgstr ""
-#: documents/models.py:104
+#: documents/models.py:103
msgid "color"
msgstr ""
-#: documents/models.py:109
+#: documents/models.py:108
msgid "is inbox tag"
msgstr ""
-#: documents/models.py:112
+#: documents/models.py:111
msgid ""
"Marks this tag as an inbox tag: All newly consumed documents will be tagged "
"with inbox tags."
msgstr ""
-#: documents/models.py:118
+#: documents/models.py:117
msgid "tag"
msgstr ""
-#: documents/models.py:119 documents/models.py:214
+#: documents/models.py:118 documents/models.py:213
msgid "tags"
msgstr ""
-#: documents/models.py:125
+#: documents/models.py:124
msgid "Cannot set itself as parent."
msgstr ""
-#: documents/models.py:127
+#: documents/models.py:126
msgid "Cannot set parent to a descendant."
msgstr ""
-#: documents/models.py:144 documents/models.py:187
+#: documents/models.py:143 documents/models.py:186
msgid "document type"
msgstr ""
-#: documents/models.py:145
+#: documents/models.py:144
msgid "document types"
msgstr ""
-#: documents/models.py:150
+#: documents/models.py:149
msgid "path"
msgstr ""
-#: documents/models.py:154 documents/models.py:176
+#: documents/models.py:153 documents/models.py:175
msgid "storage path"
msgstr ""
-#: documents/models.py:155
+#: documents/models.py:154
msgid "storage paths"
msgstr ""
-#: documents/models.py:179
+#: documents/models.py:178
msgid "title"
msgstr ""
-#: documents/models.py:191 documents/models.py:761
+#: documents/models.py:190 documents/models.py:837
msgid "content"
msgstr ""
-#: documents/models.py:194
+#: documents/models.py:193
msgid ""
"The raw, text-only data of the document. This field is primarily used for "
"searching."
msgstr ""
-#: documents/models.py:208
+#: documents/models.py:207
msgid "mime type"
msgstr ""
-#: documents/models.py:218
+#: documents/models.py:217
msgid "checksum"
msgstr ""
-#: documents/models.py:221
+#: documents/models.py:220
msgid "The checksum of the original document."
msgstr ""
-#: documents/models.py:225
+#: documents/models.py:224
msgid "archive checksum"
msgstr ""
-#: documents/models.py:230
+#: documents/models.py:229
msgid "The checksum of the archived document."
msgstr ""
-#: documents/models.py:234
+#: documents/models.py:233
msgid "page count"
msgstr ""
-#: documents/models.py:241
+#: documents/models.py:240
msgid "The number of pages of the document."
msgstr ""
-#: documents/models.py:246 documents/models.py:767 documents/models.py:805
-#: documents/models.py:867 documents/models.py:985 documents/models.py:1044
+#: documents/models.py:245 documents/models.py:843 documents/models.py:881
+#: documents/models.py:943 documents/models.py:1061 documents/models.py:1120
msgid "created"
msgstr ""
-#: documents/models.py:252
+#: documents/models.py:251
msgid "modified"
msgstr ""
-#: documents/models.py:259
+#: documents/models.py:258
msgid "added"
msgstr ""
-#: documents/models.py:266
+#: documents/models.py:265
msgid "filename"
msgstr ""
-#: documents/models.py:272
+#: documents/models.py:271
msgid "Current filename in storage"
msgstr ""
-#: documents/models.py:276
+#: documents/models.py:275
msgid "archive filename"
msgstr ""
-#: documents/models.py:282
+#: documents/models.py:281
msgid "Current archive filename in storage"
msgstr ""
-#: documents/models.py:286
+#: documents/models.py:285
msgid "original filename"
msgstr ""
-#: documents/models.py:292
+#: documents/models.py:291
msgid "The original name of the file when it was uploaded"
msgstr ""
-#: documents/models.py:299
+#: documents/models.py:298
msgid "archive serial number"
msgstr ""
-#: documents/models.py:309
+#: documents/models.py:308
msgid "The position of this document in your physical document archive."
msgstr ""
-#: documents/models.py:319
+#: documents/models.py:318
msgid "root document for this version"
msgstr ""
-#: documents/models.py:323
+#: documents/models.py:322
msgid "version index"
msgstr ""
-#: documents/models.py:327
+#: documents/models.py:326
msgid "Index of this version within the root document."
msgstr ""
-#: documents/models.py:331
+#: documents/models.py:330
msgid "version label"
msgstr ""
-#: documents/models.py:335
+#: documents/models.py:334
msgid "Optional short label for a document version."
msgstr ""
-#: documents/models.py:340 documents/models.py:778 documents/models.py:832
-#: documents/models.py:1803
+#: documents/models.py:339 documents/models.py:854 documents/models.py:908
+#: documents/models.py:1879
msgid "document"
msgstr ""
-#: documents/models.py:341 documents/models.py:938
+#: documents/models.py:340 documents/models.py:1014
msgid "documents"
msgstr ""
-#: documents/models.py:520
+#: documents/models.py:519
msgid "Table"
msgstr ""
-#: documents/models.py:521
+#: documents/models.py:520
msgid "Small Cards"
msgstr ""
-#: documents/models.py:522
+#: documents/models.py:521
msgid "Large Cards"
msgstr ""
-#: documents/models.py:525
+#: documents/models.py:524
msgid "Title"
msgstr ""
-#: documents/models.py:526 documents/models.py:1198
+#: documents/models.py:525 documents/models.py:746 documents/models.py:1274
msgid "Created"
msgstr ""
-#: documents/models.py:527 documents/models.py:1197
+#: documents/models.py:526 documents/models.py:1273
msgid "Added"
msgstr ""
-#: documents/models.py:528
+#: documents/models.py:527
msgid "Tags"
msgstr ""
-#: documents/models.py:529
+#: documents/models.py:528
msgid "Correspondent"
msgstr ""
-#: documents/models.py:530
+#: documents/models.py:529
msgid "Document Type"
msgstr ""
-#: documents/models.py:531
+#: documents/models.py:530
msgid "Storage Path"
msgstr ""
-#: documents/models.py:532
+#: documents/models.py:531
msgid "Note"
msgstr ""
-#: documents/models.py:533
+#: documents/models.py:532
msgid "Owner"
msgstr ""
-#: documents/models.py:534
+#: documents/models.py:533
msgid "Shared"
msgstr ""
-#: documents/models.py:535
+#: documents/models.py:534
msgid "ASN"
msgstr ""
-#: documents/models.py:536
+#: documents/models.py:535
msgid "Pages"
msgstr ""
-#: documents/models.py:542
+#: documents/models.py:541
msgid "sort field"
msgstr ""
-#: documents/models.py:547
+#: documents/models.py:546
msgid "sort reverse"
msgstr ""
-#: documents/models.py:550
+#: documents/models.py:549
msgid "View page size"
msgstr ""
-#: documents/models.py:558
+#: documents/models.py:557
msgid "View display mode"
msgstr ""
-#: documents/models.py:565
+#: documents/models.py:564
msgid "Document display fields"
msgstr ""
-#: documents/models.py:572 documents/models.py:637
+#: documents/models.py:571 documents/models.py:636
msgid "saved view"
msgstr ""
-#: documents/models.py:573
+#: documents/models.py:572
msgid "saved views"
msgstr ""
-#: documents/models.py:581
+#: documents/models.py:580
msgid "title contains"
msgstr ""
-#: documents/models.py:582
+#: documents/models.py:581
msgid "content contains"
msgstr ""
-#: documents/models.py:583
+#: documents/models.py:582
msgid "ASN is"
msgstr ""
-#: documents/models.py:584
+#: documents/models.py:583
msgid "correspondent is"
msgstr ""
-#: documents/models.py:585
+#: documents/models.py:584
msgid "document type is"
msgstr ""
-#: documents/models.py:586
+#: documents/models.py:585
msgid "is in inbox"
msgstr ""
-#: documents/models.py:587
+#: documents/models.py:586
msgid "has tag"
msgstr ""
-#: documents/models.py:588
+#: documents/models.py:587
msgid "has any tag"
msgstr ""
-#: documents/models.py:589
+#: documents/models.py:588
msgid "created before"
msgstr ""
-#: documents/models.py:590
+#: documents/models.py:589
msgid "created after"
msgstr ""
-#: documents/models.py:591
+#: documents/models.py:590
msgid "created year is"
msgstr ""
-#: documents/models.py:592
+#: documents/models.py:591
msgid "created month is"
msgstr ""
-#: documents/models.py:593
+#: documents/models.py:592
msgid "created day is"
msgstr ""
-#: documents/models.py:594
+#: documents/models.py:593
msgid "added before"
msgstr ""
-#: documents/models.py:595
+#: documents/models.py:594
msgid "added after"
msgstr ""
-#: documents/models.py:596
+#: documents/models.py:595
msgid "modified before"
msgstr ""
-#: documents/models.py:597
+#: documents/models.py:596
msgid "modified after"
msgstr ""
-#: documents/models.py:598
+#: documents/models.py:597
msgid "does not have tag"
msgstr ""
-#: documents/models.py:599
+#: documents/models.py:598
msgid "does not have ASN"
msgstr ""
-#: documents/models.py:600
+#: documents/models.py:599
msgid "title or content contains"
msgstr ""
-#: documents/models.py:601
+#: documents/models.py:600
msgid "fulltext query"
msgstr ""
-#: documents/models.py:602
+#: documents/models.py:601
msgid "more like this"
msgstr ""
-#: documents/models.py:603
+#: documents/models.py:602
msgid "has tags in"
msgstr ""
-#: documents/models.py:604
+#: documents/models.py:603
msgid "ASN greater than"
msgstr ""
-#: documents/models.py:605
+#: documents/models.py:604
msgid "ASN less than"
msgstr ""
-#: documents/models.py:606
+#: documents/models.py:605
msgid "storage path is"
msgstr ""
-#: documents/models.py:607
+#: documents/models.py:606
msgid "has correspondent in"
msgstr ""
-#: documents/models.py:608
+#: documents/models.py:607
msgid "does not have correspondent in"
msgstr ""
-#: documents/models.py:609
+#: documents/models.py:608
msgid "has document type in"
msgstr ""
-#: documents/models.py:610
+#: documents/models.py:609
msgid "does not have document type in"
msgstr ""
-#: documents/models.py:611
+#: documents/models.py:610
msgid "has storage path in"
msgstr ""
-#: documents/models.py:612
+#: documents/models.py:611
msgid "does not have storage path in"
msgstr ""
-#: documents/models.py:613
+#: documents/models.py:612
msgid "owner is"
msgstr ""
-#: documents/models.py:614
+#: documents/models.py:613
msgid "has owner in"
msgstr ""
-#: documents/models.py:615
+#: documents/models.py:614
msgid "does not have owner"
msgstr ""
-#: documents/models.py:616
+#: documents/models.py:615
msgid "does not have owner in"
msgstr ""
-#: documents/models.py:617
+#: documents/models.py:616
msgid "has custom field value"
msgstr ""
-#: documents/models.py:618
+#: documents/models.py:617
msgid "is shared by me"
msgstr ""
-#: documents/models.py:619
+#: documents/models.py:618
msgid "has custom fields"
msgstr ""
-#: documents/models.py:620
+#: documents/models.py:619
msgid "has custom field in"
msgstr ""
-#: documents/models.py:621
+#: documents/models.py:620
msgid "does not have custom field in"
msgstr ""
-#: documents/models.py:622
+#: documents/models.py:621
msgid "does not have custom field"
msgstr ""
-#: documents/models.py:623
+#: documents/models.py:622
msgid "custom fields query"
msgstr ""
-#: documents/models.py:624
+#: documents/models.py:623
msgid "created to"
msgstr ""
-#: documents/models.py:625
+#: documents/models.py:624
msgid "created from"
msgstr ""
-#: documents/models.py:626
+#: documents/models.py:625
msgid "added to"
msgstr ""
-#: documents/models.py:627
+#: documents/models.py:626
msgid "added from"
msgstr ""
-#: documents/models.py:628
+#: documents/models.py:627
msgid "mime type is"
msgstr ""
-#: documents/models.py:629
+#: documents/models.py:628
msgid "simple title search"
msgstr ""
-#: documents/models.py:630
+#: documents/models.py:629
msgid "simple text search"
msgstr ""
-#: documents/models.py:640
+#: documents/models.py:639
msgid "rule type"
msgstr ""
-#: documents/models.py:642
+#: documents/models.py:641
msgid "value"
msgstr ""
-#: documents/models.py:645
+#: documents/models.py:644
msgid "filter rule"
msgstr ""
-#: documents/models.py:646
+#: documents/models.py:645
msgid "filter rules"
msgstr ""
-#: documents/models.py:670
-msgid "Auto Task"
-msgstr ""
-
-#: documents/models.py:671
-msgid "Scheduled Task"
-msgstr ""
-
-#: documents/models.py:672
-msgid "Manual Task"
-msgstr ""
-
-#: documents/models.py:675
-msgid "Consume File"
-msgstr ""
-
-#: documents/models.py:676
-msgid "Train Classifier"
-msgstr ""
-
-#: documents/models.py:677
-msgid "Check Sanity"
-msgstr ""
-
-#: documents/models.py:678
-msgid "Index Optimize"
-msgstr ""
-
-#: documents/models.py:679
-msgid "LLM Index Update"
-msgstr ""
-
-#: documents/models.py:684
-msgid "Task ID"
-msgstr ""
-
-#: documents/models.py:685
-msgid "Celery ID for the Task that was run"
-msgstr ""
-
-#: documents/models.py:690
-msgid "Acknowledged"
-msgstr ""
-
-#: documents/models.py:691
-msgid "If the task is acknowledged via the frontend or API"
-msgstr ""
-
-#: documents/models.py:697
-msgid "Task Filename"
-msgstr ""
-
-#: documents/models.py:698
-msgid "Name of the file which the Task was run for"
-msgstr ""
-
-#: documents/models.py:705
-msgid "Task Name"
-msgstr ""
-
-#: documents/models.py:706
-msgid "Name of the task that was run"
-msgstr ""
-
-#: documents/models.py:713
-msgid "Task State"
-msgstr ""
-
-#: documents/models.py:714
-msgid "Current state of the task being run"
-msgstr ""
-
-#: documents/models.py:720
-msgid "Created DateTime"
-msgstr ""
-
-#: documents/models.py:721
-msgid "Datetime field when the task result was created in UTC"
-msgstr ""
-
-#: documents/models.py:727
-msgid "Started DateTime"
-msgstr ""
-
-#: documents/models.py:728
-msgid "Datetime field when the task was started in UTC"
-msgstr ""
-
-#: documents/models.py:734
-msgid "Completed DateTime"
-msgstr ""
-
-#: documents/models.py:735
-msgid "Datetime field when the task was completed in UTC"
-msgstr ""
-
-#: documents/models.py:741
-msgid "Result Data"
-msgstr ""
-
-#: documents/models.py:743
-msgid "The data returned by the task"
-msgstr ""
-
-#: documents/models.py:751
-msgid "Task Type"
-msgstr ""
-
-#: documents/models.py:752
-msgid "The type of task that was run"
-msgstr ""
-
-#: documents/models.py:763
-msgid "Note for the document"
-msgstr ""
-
-#: documents/models.py:787
-msgid "user"
-msgstr ""
-
-#: documents/models.py:792
-msgid "note"
-msgstr ""
-
-#: documents/models.py:793
-msgid "notes"
-msgstr ""
-
-#: documents/models.py:801
-msgid "Archive"
-msgstr ""
-
-#: documents/models.py:802
-msgid "Original"
-msgstr ""
-
-#: documents/models.py:813 documents/models.py:875 paperless_mail/models.py:75
-msgid "expiration"
-msgstr ""
-
-#: documents/models.py:820 documents/models.py:882
-msgid "slug"
-msgstr ""
-
-#: documents/models.py:852
-msgid "share link"
-msgstr ""
-
-#: documents/models.py:853
-msgid "share links"
-msgstr ""
-
-#: documents/models.py:861
+#: documents/models.py:675 documents/models.py:937
msgid "Pending"
msgstr ""
-#: documents/models.py:862
-msgid "Processing"
+#: documents/models.py:676 documents/models.py:753
+msgid "Started"
+msgstr ""
+
+#: documents/models.py:677
+msgid "Success"
+msgstr ""
+
+#: documents/models.py:678
+msgid "Failure"
+msgstr ""
+
+#: documents/models.py:679
+msgid "Revoked"
+msgstr ""
+
+#: documents/models.py:682
+msgid "Consume File"
+msgstr ""
+
+#: documents/models.py:683
+msgid "Train Classifier"
+msgstr ""
+
+#: documents/models.py:684
+msgid "Sanity Check"
+msgstr ""
+
+#: documents/models.py:685
+msgid "Index Optimize"
+msgstr ""
+
+#: documents/models.py:686 documents/models.py:1269
+msgid "Mail Fetch"
+msgstr ""
+
+#: documents/models.py:687
+msgid "LLM Index"
+msgstr ""
+
+#: documents/models.py:688
+msgid "Empty Trash"
+msgstr ""
+
+#: documents/models.py:689
+msgid "Check Workflows"
+msgstr ""
+
+#: documents/models.py:690
+msgid "Bulk Update"
+msgstr ""
+
+#: documents/models.py:691
+msgid "Reprocess Document"
+msgstr ""
+
+#: documents/models.py:692
+msgid "Build Share Link"
+msgstr ""
+
+#: documents/models.py:693
+msgid "Bulk Delete"
+msgstr ""
+
+#: documents/models.py:702 documents/models.py:1264
+msgid "Scheduled"
+msgstr ""
+
+#: documents/models.py:703 documents/models.py:1270
+msgid "Web UI"
+msgstr ""
+
+#: documents/models.py:704
+msgid "API Upload"
+msgstr ""
+
+#: documents/models.py:705
+msgid "Folder Consume"
+msgstr ""
+
+#: documents/models.py:706
+msgid "Email Consume"
+msgstr ""
+
+#: documents/models.py:707
+msgid "System"
+msgstr ""
+
+#: documents/models.py:708
+msgid "Manual"
+msgstr ""
+
+#: documents/models.py:714
+msgid "Task ID"
+msgstr ""
+
+#: documents/models.py:715
+msgid "Celery task ID"
+msgstr ""
+
+#: documents/models.py:721
+msgid "Task Type"
+msgstr ""
+
+#: documents/models.py:722
+msgid "The kind of work being performed"
+msgstr ""
+
+#: documents/models.py:729
+msgid "Trigger Source"
+msgstr ""
+
+#: documents/models.py:730
+msgid "What initiated this task"
+msgstr ""
+
+#: documents/models.py:739
+msgid "Status"
+msgstr ""
+
+#: documents/models.py:759
+msgid "Completed"
+msgstr ""
+
+#: documents/models.py:767
+msgid "Duration (seconds)"
+msgstr ""
+
+#: documents/models.py:768
+msgid "Elapsed time from start to completion"
+msgstr ""
+
+#: documents/models.py:774
+msgid "Wait Time (seconds)"
+msgstr ""
+
+#: documents/models.py:775
+msgid "Time from task creation to worker pickup"
+msgstr ""
+
+#: documents/models.py:782
+msgid "Input Data"
+msgstr ""
+
+#: documents/models.py:783
+msgid "Structured input parameters for the task"
+msgstr ""
+
+#: documents/models.py:789
+msgid "Result Data"
+msgstr ""
+
+#: documents/models.py:790
+msgid "Structured result data from task execution"
+msgstr ""
+
+#: documents/models.py:796
+msgid "Result Message"
+msgstr ""
+
+#: documents/models.py:797
+msgid "Human-readable result message"
+msgstr ""
+
+#: documents/models.py:803
+msgid "Acknowledged"
+msgstr ""
+
+#: documents/models.py:808
+msgid "Task"
+msgstr ""
+
+#: documents/models.py:809
+msgid "Tasks"
+msgstr ""
+
+#: documents/models.py:839
+msgid "Note for the document"
msgstr ""
#: documents/models.py:863
+msgid "user"
+msgstr ""
+
+#: documents/models.py:868
+msgid "note"
+msgstr ""
+
+#: documents/models.py:869
+msgid "notes"
+msgstr ""
+
+#: documents/models.py:877
+msgid "Archive"
+msgstr ""
+
+#: documents/models.py:878
+msgid "Original"
+msgstr ""
+
+#: documents/models.py:889 documents/models.py:951 paperless_mail/models.py:75
+msgid "expiration"
+msgstr ""
+
+#: documents/models.py:896 documents/models.py:958
+msgid "slug"
+msgstr ""
+
+#: documents/models.py:928
+msgid "share link"
+msgstr ""
+
+#: documents/models.py:929
+msgid "share links"
+msgstr ""
+
+#: documents/models.py:938
+msgid "Processing"
+msgstr ""
+
+#: documents/models.py:939
msgid "Ready"
msgstr ""
-#: documents/models.py:864
+#: documents/models.py:940
msgid "Failed"
msgstr ""
-#: documents/models.py:911
+#: documents/models.py:987
msgid "size (bytes)"
msgstr ""
-#: documents/models.py:917
+#: documents/models.py:993
msgid "last error"
msgstr ""
-#: documents/models.py:924
+#: documents/models.py:1000
msgid "file path"
msgstr ""
-#: documents/models.py:930
+#: documents/models.py:1006
msgid "built at"
msgstr ""
-#: documents/models.py:943
+#: documents/models.py:1019
msgid "share link bundle"
msgstr ""
-#: documents/models.py:944
+#: documents/models.py:1020
msgid "share link bundles"
msgstr ""
-#: documents/models.py:947
+#: documents/models.py:1023
#, python-format
msgid "Share link bundle %(slug)s"
msgstr ""
-#: documents/models.py:973
+#: documents/models.py:1049
msgid "String"
msgstr ""
-#: documents/models.py:974
+#: documents/models.py:1050
msgid "URL"
msgstr ""
-#: documents/models.py:975
+#: documents/models.py:1051
msgid "Date"
msgstr ""
-#: documents/models.py:976
+#: documents/models.py:1052
msgid "Boolean"
msgstr ""
-#: documents/models.py:977
+#: documents/models.py:1053
msgid "Integer"
msgstr ""
-#: documents/models.py:978
+#: documents/models.py:1054
msgid "Float"
msgstr ""
-#: documents/models.py:979
+#: documents/models.py:1055
msgid "Monetary"
msgstr ""
-#: documents/models.py:980
+#: documents/models.py:1056
msgid "Document Link"
msgstr ""
-#: documents/models.py:981
+#: documents/models.py:1057
msgid "Select"
msgstr ""
-#: documents/models.py:982
+#: documents/models.py:1058
msgid "Long Text"
msgstr ""
-#: documents/models.py:994
+#: documents/models.py:1070
msgid "data type"
msgstr ""
-#: documents/models.py:1001
+#: documents/models.py:1077
msgid "extra data"
msgstr ""
-#: documents/models.py:1005
+#: documents/models.py:1081
msgid "Extra data for the custom field, such as select options"
msgstr ""
-#: documents/models.py:1011
+#: documents/models.py:1087
msgid "custom field"
msgstr ""
-#: documents/models.py:1012
+#: documents/models.py:1088
msgid "custom fields"
msgstr ""
-#: documents/models.py:1112
+#: documents/models.py:1188
msgid "custom field instance"
msgstr ""
-#: documents/models.py:1113
+#: documents/models.py:1189
msgid "custom field instances"
msgstr ""
-#: documents/models.py:1185
+#: documents/models.py:1261
msgid "Consumption Started"
msgstr ""
-#: documents/models.py:1186
+#: documents/models.py:1262
msgid "Document Added"
msgstr ""
-#: documents/models.py:1187
+#: documents/models.py:1263
msgid "Document Updated"
msgstr ""
-#: documents/models.py:1188
-msgid "Scheduled"
-msgstr ""
-
-#: documents/models.py:1191
+#: documents/models.py:1267
msgid "Consume Folder"
msgstr ""
-#: documents/models.py:1192
+#: documents/models.py:1268
msgid "Api Upload"
msgstr ""
-#: documents/models.py:1193
-msgid "Mail Fetch"
-msgstr ""
-
-#: documents/models.py:1194
-msgid "Web UI"
-msgstr ""
-
-#: documents/models.py:1199
+#: documents/models.py:1275
msgid "Modified"
msgstr ""
-#: documents/models.py:1200
+#: documents/models.py:1276
msgid "Custom Field"
msgstr ""
-#: documents/models.py:1203
+#: documents/models.py:1279
msgid "Workflow Trigger Type"
msgstr ""
-#: documents/models.py:1215
+#: documents/models.py:1291
msgid "filter path"
msgstr ""
-#: documents/models.py:1220
+#: documents/models.py:1296
msgid ""
"Only consume documents with a path that matches this if specified. Wildcards "
"specified as * are allowed. Case insensitive."
msgstr ""
-#: documents/models.py:1227
+#: documents/models.py:1303
msgid "filter filename"
msgstr ""
-#: documents/models.py:1232 paperless_mail/models.py:200
+#: documents/models.py:1308 paperless_mail/models.py:200
msgid ""
"Only consume documents which entirely match this filename if specified. "
"Wildcards such as *.pdf or *invoice* are allowed. Case insensitive."
msgstr ""
-#: documents/models.py:1243
+#: documents/models.py:1319
msgid "filter documents from this mail rule"
msgstr ""
-#: documents/models.py:1259
+#: documents/models.py:1335
msgid "has these tag(s)"
msgstr ""
-#: documents/models.py:1266
+#: documents/models.py:1342
msgid "has all of these tag(s)"
msgstr ""
-#: documents/models.py:1273
+#: documents/models.py:1349
msgid "does not have these tag(s)"
msgstr ""
-#: documents/models.py:1281
+#: documents/models.py:1357
msgid "has this document type"
msgstr ""
-#: documents/models.py:1288
+#: documents/models.py:1364
msgid "has one of these document types"
msgstr ""
-#: documents/models.py:1295
+#: documents/models.py:1371
msgid "does not have these document type(s)"
msgstr ""
-#: documents/models.py:1303
+#: documents/models.py:1379
msgid "has this correspondent"
msgstr ""
-#: documents/models.py:1310
+#: documents/models.py:1386
msgid "does not have these correspondent(s)"
msgstr ""
-#: documents/models.py:1317
+#: documents/models.py:1393
msgid "has one of these correspondents"
msgstr ""
-#: documents/models.py:1325
+#: documents/models.py:1401
msgid "has this storage path"
msgstr ""
-#: documents/models.py:1332
+#: documents/models.py:1408
msgid "has one of these storage paths"
msgstr ""
-#: documents/models.py:1339
+#: documents/models.py:1415
msgid "does not have these storage path(s)"
msgstr ""
-#: documents/models.py:1343
+#: documents/models.py:1419
msgid "filter custom field query"
msgstr ""
-#: documents/models.py:1346
+#: documents/models.py:1422
msgid "JSON-encoded custom field query expression."
msgstr ""
-#: documents/models.py:1350
+#: documents/models.py:1426
msgid "schedule offset days"
msgstr ""
-#: documents/models.py:1353
+#: documents/models.py:1429
msgid "The number of days to offset the schedule trigger by."
msgstr ""
-#: documents/models.py:1358
+#: documents/models.py:1434
msgid "schedule is recurring"
msgstr ""
-#: documents/models.py:1361
+#: documents/models.py:1437
msgid "If the schedule should be recurring."
msgstr ""
-#: documents/models.py:1366
+#: documents/models.py:1442
msgid "schedule recurring delay in days"
msgstr ""
-#: documents/models.py:1370
+#: documents/models.py:1446
msgid "The number of days between recurring schedule triggers."
msgstr ""
-#: documents/models.py:1375
+#: documents/models.py:1451
msgid "schedule date field"
msgstr ""
-#: documents/models.py:1380
+#: documents/models.py:1456
msgid "The field to check for a schedule trigger."
msgstr ""
-#: documents/models.py:1389
+#: documents/models.py:1465
msgid "schedule date custom field"
msgstr ""
-#: documents/models.py:1393
+#: documents/models.py:1469
msgid "workflow trigger"
msgstr ""
-#: documents/models.py:1394
+#: documents/models.py:1470
msgid "workflow triggers"
msgstr ""
-#: documents/models.py:1402
+#: documents/models.py:1478
msgid "email subject"
msgstr ""
-#: documents/models.py:1406
+#: documents/models.py:1482
msgid ""
"The subject of the email, can include some placeholders, see documentation."
msgstr ""
-#: documents/models.py:1412
+#: documents/models.py:1488
msgid "email body"
msgstr ""
-#: documents/models.py:1415
+#: documents/models.py:1491
msgid ""
"The body (message) of the email, can include some placeholders, see "
"documentation."
msgstr ""
-#: documents/models.py:1421
+#: documents/models.py:1497
msgid "emails to"
msgstr ""
-#: documents/models.py:1424
+#: documents/models.py:1500
msgid "The destination email addresses, comma separated."
msgstr ""
-#: documents/models.py:1430
+#: documents/models.py:1506
msgid "include document in email"
msgstr ""
-#: documents/models.py:1441
+#: documents/models.py:1517
msgid "webhook url"
msgstr ""
-#: documents/models.py:1444
+#: documents/models.py:1520
msgid "The destination URL for the notification."
msgstr ""
-#: documents/models.py:1449
+#: documents/models.py:1525
msgid "use parameters"
msgstr ""
-#: documents/models.py:1454
+#: documents/models.py:1530
msgid "send as JSON"
msgstr ""
-#: documents/models.py:1458
+#: documents/models.py:1534
msgid "webhook parameters"
msgstr ""
-#: documents/models.py:1461
+#: documents/models.py:1537
msgid "The parameters to send with the webhook URL if body not used."
msgstr ""
-#: documents/models.py:1465
+#: documents/models.py:1541
msgid "webhook body"
msgstr ""
-#: documents/models.py:1468
+#: documents/models.py:1544
msgid "The body to send with the webhook URL if parameters not used."
msgstr ""
-#: documents/models.py:1472
+#: documents/models.py:1548
msgid "webhook headers"
msgstr ""
-#: documents/models.py:1475
+#: documents/models.py:1551
msgid "The headers to send with the webhook URL."
msgstr ""
-#: documents/models.py:1480
+#: documents/models.py:1556
msgid "include document in webhook"
msgstr ""
-#: documents/models.py:1491
+#: documents/models.py:1567
msgid "Assignment"
msgstr ""
-#: documents/models.py:1495
+#: documents/models.py:1571
msgid "Removal"
msgstr ""
-#: documents/models.py:1499 documents/templates/account/password_reset.html:15
+#: documents/models.py:1575 documents/templates/account/password_reset.html:15
msgid "Email"
msgstr ""
-#: documents/models.py:1503
+#: documents/models.py:1579
msgid "Webhook"
msgstr ""
-#: documents/models.py:1507
+#: documents/models.py:1583
msgid "Password removal"
msgstr ""
-#: documents/models.py:1511
+#: documents/models.py:1587
msgid "Move to trash"
msgstr ""
-#: documents/models.py:1515
+#: documents/models.py:1591
msgid "Workflow Action Type"
msgstr ""
-#: documents/models.py:1520 documents/models.py:1762
+#: documents/models.py:1596 documents/models.py:1838
#: paperless_mail/models.py:145
msgid "order"
msgstr ""
-#: documents/models.py:1523
+#: documents/models.py:1599
msgid "assign title"
msgstr ""
-#: documents/models.py:1527
+#: documents/models.py:1603
msgid "Assign a document title, must be a Jinja2 template, see documentation."
msgstr ""
-#: documents/models.py:1535 paperless_mail/models.py:274
+#: documents/models.py:1611 paperless_mail/models.py:274
msgid "assign this tag"
msgstr ""
-#: documents/models.py:1544 paperless_mail/models.py:282
+#: documents/models.py:1620 paperless_mail/models.py:282
msgid "assign this document type"
msgstr ""
-#: documents/models.py:1553 paperless_mail/models.py:296
+#: documents/models.py:1629 paperless_mail/models.py:296
msgid "assign this correspondent"
msgstr ""
-#: documents/models.py:1562
+#: documents/models.py:1638
msgid "assign this storage path"
msgstr ""
-#: documents/models.py:1571
+#: documents/models.py:1647
msgid "assign this owner"
msgstr ""
-#: documents/models.py:1578
+#: documents/models.py:1654
msgid "grant view permissions to these users"
msgstr ""
-#: documents/models.py:1585
+#: documents/models.py:1661
msgid "grant view permissions to these groups"
msgstr ""
-#: documents/models.py:1592
+#: documents/models.py:1668
msgid "grant change permissions to these users"
msgstr ""
-#: documents/models.py:1599
+#: documents/models.py:1675
msgid "grant change permissions to these groups"
msgstr ""
-#: documents/models.py:1606
+#: documents/models.py:1682
msgid "assign these custom fields"
msgstr ""
-#: documents/models.py:1610
+#: documents/models.py:1686
msgid "custom field values"
msgstr ""
-#: documents/models.py:1614
+#: documents/models.py:1690
msgid "Optional values to assign to the custom fields."
msgstr ""
-#: documents/models.py:1623
+#: documents/models.py:1699
msgid "remove these tag(s)"
msgstr ""
-#: documents/models.py:1628
+#: documents/models.py:1704
msgid "remove all tags"
msgstr ""
-#: documents/models.py:1635
+#: documents/models.py:1711
msgid "remove these document type(s)"
msgstr ""
-#: documents/models.py:1640
+#: documents/models.py:1716
msgid "remove all document types"
msgstr ""
-#: documents/models.py:1647
+#: documents/models.py:1723
msgid "remove these correspondent(s)"
msgstr ""
-#: documents/models.py:1652
+#: documents/models.py:1728
msgid "remove all correspondents"
msgstr ""
-#: documents/models.py:1659
+#: documents/models.py:1735
msgid "remove these storage path(s)"
msgstr ""
-#: documents/models.py:1664
+#: documents/models.py:1740
msgid "remove all storage paths"
msgstr ""
-#: documents/models.py:1671
+#: documents/models.py:1747
msgid "remove these owner(s)"
msgstr ""
-#: documents/models.py:1676
+#: documents/models.py:1752
msgid "remove all owners"
msgstr ""
-#: documents/models.py:1683
+#: documents/models.py:1759
msgid "remove view permissions for these users"
msgstr ""
-#: documents/models.py:1690
+#: documents/models.py:1766
msgid "remove view permissions for these groups"
msgstr ""
-#: documents/models.py:1697
+#: documents/models.py:1773
msgid "remove change permissions for these users"
msgstr ""
-#: documents/models.py:1704
+#: documents/models.py:1780
msgid "remove change permissions for these groups"
msgstr ""
-#: documents/models.py:1709
+#: documents/models.py:1785
msgid "remove all permissions"
msgstr ""
-#: documents/models.py:1716
+#: documents/models.py:1792
msgid "remove these custom fields"
msgstr ""
-#: documents/models.py:1721
+#: documents/models.py:1797
msgid "remove all custom fields"
msgstr ""
-#: documents/models.py:1730
+#: documents/models.py:1806
msgid "email"
msgstr ""
-#: documents/models.py:1739
+#: documents/models.py:1815
msgid "webhook"
msgstr ""
-#: documents/models.py:1743
+#: documents/models.py:1819
msgid "passwords"
msgstr ""
-#: documents/models.py:1747
+#: documents/models.py:1823
msgid ""
"Passwords to try when removing PDF protection. Separate with commas or new "
"lines."
msgstr ""
-#: documents/models.py:1752
+#: documents/models.py:1828
msgid "workflow action"
msgstr ""
-#: documents/models.py:1753
+#: documents/models.py:1829
msgid "workflow actions"
msgstr ""
-#: documents/models.py:1768
+#: documents/models.py:1844
msgid "triggers"
msgstr ""
-#: documents/models.py:1775
+#: documents/models.py:1851
msgid "actions"
msgstr ""
-#: documents/models.py:1778 paperless_mail/models.py:154
+#: documents/models.py:1854 paperless_mail/models.py:154
msgid "enabled"
msgstr ""
-#: documents/models.py:1789
+#: documents/models.py:1865
msgid "workflow"
msgstr ""
-#: documents/models.py:1793
+#: documents/models.py:1869
msgid "workflow trigger type"
msgstr ""
-#: documents/models.py:1807
+#: documents/models.py:1883
msgid "date run"
msgstr ""
-#: documents/models.py:1813
+#: documents/models.py:1889
msgid "workflow run"
msgstr ""
-#: documents/models.py:1814
+#: documents/models.py:1890
msgid "workflow runs"
msgstr ""
#: documents/serialisers.py:463 documents/serialisers.py:815
-#: documents/serialisers.py:2547 documents/views.py:2221
-#: documents/views.py:2290 paperless_mail/serialisers.py:143
+#: documents/serialisers.py:2664 documents/views.py:2223
+#: documents/views.py:2292 paperless_mail/serialisers.py:143
msgid "Insufficient permissions."
msgstr ""
@@ -1345,11 +1397,11 @@ msgstr ""
msgid "Invalid variable detected."
msgstr ""
-#: documents/serialisers.py:2603
+#: documents/serialisers.py:2720
msgid "Duplicate document identifiers are not allowed."
msgstr ""
-#: documents/serialisers.py:2633 documents/views.py:3904
+#: documents/serialisers.py:2750 documents/views.py:4025
#, python-format
msgid "Documents not found: %(ids)s"
msgstr ""
@@ -1617,28 +1669,28 @@ msgstr ""
msgid "Unable to parse URI {value}"
msgstr ""
-#: documents/views.py:2101
+#: documents/views.py:2103
msgid "Specify only one of text, title_search, query, or more_like_id."
msgstr ""
-#: documents/views.py:2214 documents/views.py:2287
+#: documents/views.py:2216 documents/views.py:2289
msgid "Invalid more_like_id"
msgstr ""
-#: documents/views.py:3916
+#: documents/views.py:4037
#, python-format
msgid "Insufficient permissions to share document %(id)s."
msgstr ""
-#: documents/views.py:3959
+#: documents/views.py:4080
msgid "Bundle is already being processed."
msgstr ""
-#: documents/views.py:4016
+#: documents/views.py:4137
msgid "The share link bundle is still being prepared. Please try again later."
msgstr ""
-#: documents/views.py:4026
+#: documents/views.py:4147
msgid "The share link bundle is unavailable."
msgstr ""
@@ -1874,151 +1926,151 @@ msgstr ""
msgid "paperless application settings"
msgstr ""
-#: paperless/settings/__init__.py:532
+#: paperless/settings/__init__.py:531
msgid "English (US)"
msgstr ""
-#: paperless/settings/__init__.py:533
+#: paperless/settings/__init__.py:532
msgid "Arabic"
msgstr ""
-#: paperless/settings/__init__.py:534
+#: paperless/settings/__init__.py:533
msgid "Afrikaans"
msgstr ""
-#: paperless/settings/__init__.py:535
+#: paperless/settings/__init__.py:534
msgid "Belarusian"
msgstr ""
-#: paperless/settings/__init__.py:536
+#: paperless/settings/__init__.py:535
msgid "Bulgarian"
msgstr ""
-#: paperless/settings/__init__.py:537
+#: paperless/settings/__init__.py:536
msgid "Catalan"
msgstr ""
-#: paperless/settings/__init__.py:538
+#: paperless/settings/__init__.py:537
msgid "Czech"
msgstr ""
-#: paperless/settings/__init__.py:539
+#: paperless/settings/__init__.py:538
msgid "Danish"
msgstr ""
-#: paperless/settings/__init__.py:540
+#: paperless/settings/__init__.py:539
msgid "German"
msgstr ""
-#: paperless/settings/__init__.py:541
+#: paperless/settings/__init__.py:540
msgid "Greek"
msgstr ""
-#: paperless/settings/__init__.py:542
+#: paperless/settings/__init__.py:541
msgid "English (GB)"
msgstr ""
-#: paperless/settings/__init__.py:543
+#: paperless/settings/__init__.py:542
msgid "Spanish"
msgstr ""
-#: paperless/settings/__init__.py:544
+#: paperless/settings/__init__.py:543
msgid "Persian"
msgstr ""
-#: paperless/settings/__init__.py:545
+#: paperless/settings/__init__.py:544
msgid "Finnish"
msgstr ""
-#: paperless/settings/__init__.py:546
+#: paperless/settings/__init__.py:545
msgid "French"
msgstr ""
-#: paperless/settings/__init__.py:547
+#: paperless/settings/__init__.py:546
msgid "Hungarian"
msgstr ""
-#: paperless/settings/__init__.py:548
+#: paperless/settings/__init__.py:547
msgid "Indonesian"
msgstr ""
-#: paperless/settings/__init__.py:549
+#: paperless/settings/__init__.py:548
msgid "Italian"
msgstr ""
-#: paperless/settings/__init__.py:550
+#: paperless/settings/__init__.py:549
msgid "Japanese"
msgstr ""
-#: paperless/settings/__init__.py:551
+#: paperless/settings/__init__.py:550
msgid "Korean"
msgstr ""
-#: paperless/settings/__init__.py:552
+#: paperless/settings/__init__.py:551
msgid "Luxembourgish"
msgstr ""
-#: paperless/settings/__init__.py:553
+#: paperless/settings/__init__.py:552
msgid "Norwegian"
msgstr ""
-#: paperless/settings/__init__.py:554
+#: paperless/settings/__init__.py:553
msgid "Dutch"
msgstr ""
-#: paperless/settings/__init__.py:555
+#: paperless/settings/__init__.py:554
msgid "Polish"
msgstr ""
-#: paperless/settings/__init__.py:556
+#: paperless/settings/__init__.py:555
msgid "Portuguese (Brazil)"
msgstr ""
-#: paperless/settings/__init__.py:557
+#: paperless/settings/__init__.py:556
msgid "Portuguese"
msgstr ""
-#: paperless/settings/__init__.py:558
+#: paperless/settings/__init__.py:557
msgid "Romanian"
msgstr ""
-#: paperless/settings/__init__.py:559
+#: paperless/settings/__init__.py:558
msgid "Russian"
msgstr ""
-#: paperless/settings/__init__.py:560
+#: paperless/settings/__init__.py:559
msgid "Slovak"
msgstr ""
-#: paperless/settings/__init__.py:561
+#: paperless/settings/__init__.py:560
msgid "Slovenian"
msgstr ""
-#: paperless/settings/__init__.py:562
+#: paperless/settings/__init__.py:561
msgid "Serbian"
msgstr ""
-#: paperless/settings/__init__.py:563
+#: paperless/settings/__init__.py:562
msgid "Swedish"
msgstr ""
-#: paperless/settings/__init__.py:564
+#: paperless/settings/__init__.py:563
msgid "Turkish"
msgstr ""
-#: paperless/settings/__init__.py:565
+#: paperless/settings/__init__.py:564
msgid "Ukrainian"
msgstr ""
-#: paperless/settings/__init__.py:566
+#: paperless/settings/__init__.py:565
msgid "Vietnamese"
msgstr ""
-#: paperless/settings/__init__.py:567
+#: paperless/settings/__init__.py:566
msgid "Chinese Simplified"
msgstr ""
-#: paperless/settings/__init__.py:568
+#: paperless/settings/__init__.py:567
msgid "Chinese Traditional"
msgstr ""
diff --git a/src/paperless/settings/__init__.py b/src/paperless/settings/__init__.py
index bace016cf..6f76d3499 100644
--- a/src/paperless/settings/__init__.py
+++ b/src/paperless/settings/__init__.py
@@ -133,7 +133,6 @@ INSTALLED_APPS = [
"rest_framework",
"rest_framework.authtoken",
"django_filters",
- "django_celery_results",
"guardian",
"allauth",
"allauth.account",
@@ -669,8 +668,6 @@ CELERY_BROKER_TRANSPORT_OPTIONS = {
CELERY_TASK_TRACK_STARTED = True
CELERY_TASK_TIME_LIMIT: Final[int] = get_int_from_env("PAPERLESS_WORKER_TIMEOUT", 1800)
-CELERY_RESULT_EXTENDED = True
-CELERY_RESULT_BACKEND = "django-db"
CELERY_CACHE_BACKEND = "default"
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#task-serializer
diff --git a/src/paperless/settings/custom.py b/src/paperless/settings/custom.py
index 9e9468e21..1c66376a4 100644
--- a/src/paperless/settings/custom.py
+++ b/src/paperless/settings/custom.py
@@ -181,7 +181,11 @@ def parse_beat_schedule() -> dict:
schedule[task["name"]] = {
"task": task["task"],
"schedule": crontab(minute, hour, day_week, day_month, month),
- "options": task["options"],
+ "options": {
+ **task["options"],
+ # PaperlessTask.TriggerSource.SCHEDULED -- models can't be imported here
+ "headers": {"trigger_source": "scheduled"},
+ },
}
return schedule
diff --git a/src/paperless/tests/settings/test_custom_parsers.py b/src/paperless/tests/settings/test_custom_parsers.py
index 0443c5e99..107d57bb2 100644
--- a/src/paperless/tests/settings/test_custom_parsers.py
+++ b/src/paperless/tests/settings/test_custom_parsers.py
@@ -186,42 +186,66 @@ def make_expected_schedule(
"Check all e-mail accounts": {
"task": "paperless_mail.tasks.process_mail_accounts",
"schedule": crontab(minute="*/10"),
- "options": {"expires": mail_expire},
+ "options": {
+ "expires": mail_expire,
+ "headers": {"trigger_source": "scheduled"},
+ },
},
"Train the classifier": {
"task": "documents.tasks.train_classifier",
"schedule": crontab(minute="5", hour="*/1"),
- "options": {"expires": classifier_expire},
+ "options": {
+ "expires": classifier_expire,
+ "headers": {"trigger_source": "scheduled"},
+ },
},
"Optimize the index": {
"task": "documents.tasks.index_optimize",
"schedule": crontab(minute=0, hour=0),
- "options": {"expires": index_expire},
+ "options": {
+ "expires": index_expire,
+ "headers": {"trigger_source": "scheduled"},
+ },
},
"Perform sanity check": {
"task": "documents.tasks.sanity_check",
"schedule": crontab(minute=30, hour=0, day_of_week="sun"),
- "options": {"expires": sanity_expire},
+ "options": {
+ "expires": sanity_expire,
+ "headers": {"trigger_source": "scheduled"},
+ },
},
"Empty trash": {
"task": "documents.tasks.empty_trash",
"schedule": crontab(minute=0, hour="1"),
- "options": {"expires": empty_trash_expire},
+ "options": {
+ "expires": empty_trash_expire,
+ "headers": {"trigger_source": "scheduled"},
+ },
},
"Check and run scheduled workflows": {
"task": "documents.tasks.check_scheduled_workflows",
"schedule": crontab(minute="5", hour="*/1"),
- "options": {"expires": workflow_expire},
+ "options": {
+ "expires": workflow_expire,
+ "headers": {"trigger_source": "scheduled"},
+ },
},
"Rebuild LLM index": {
"task": "documents.tasks.llmindex_index",
"schedule": crontab(minute="10", hour="2"),
- "options": {"expires": llm_index_expire},
+ "options": {
+ "expires": llm_index_expire,
+ "headers": {"trigger_source": "scheduled"},
+ },
},
"Cleanup expired share link bundles": {
"task": "documents.tasks.cleanup_expired_share_link_bundles",
"schedule": crontab(minute=0, hour="2"),
- "options": {"expires": share_link_cleanup_expire},
+ "options": {
+ "expires": share_link_cleanup_expire,
+ "headers": {"trigger_source": "scheduled"},
+ },
},
}
@@ -284,6 +308,16 @@ class TestParseBeatSchedule:
schedule = parse_beat_schedule()
assert schedule == expected
+ def test_parse_beat_schedule_all_entries_have_trigger_source_header(self) -> None:
+ """Every beat entry must carry trigger_source=scheduled so the task signal
+ handler can identify scheduler-originated tasks."""
+ schedule = parse_beat_schedule()
+ for name, entry in schedule.items():
+ headers = entry.get("options", {}).get("headers", {})
+ assert headers.get("trigger_source") == "scheduled", (
+ f"Beat entry '{name}' is missing trigger_source header"
+ )
+
class TestParseDbSettings:
"""Test suite for parse_db_settings function."""
diff --git a/src/paperless/views.py b/src/paperless/views.py
index 9c0c99e5a..4d056ba68 100644
--- a/src/paperless/views.py
+++ b/src/paperless/views.py
@@ -38,6 +38,7 @@ from rest_framework.response import Response
from rest_framework.throttling import ScopedRateThrottle
from rest_framework.viewsets import ModelViewSet
+from documents.models import PaperlessTask
from documents.permissions import PaperlessObjectPermissions
from documents.tasks import llmindex_index
from paperless.filters import GroupFilterSet
@@ -427,10 +428,9 @@ class ApplicationConfigurationViewSet(ModelViewSet[ApplicationConfiguration]):
and not vector_store_file_exists()
):
# AI index was just enabled and vector store file does not exist
- llmindex_index.delay(
- rebuild=True,
- scheduled=False,
- auto=True,
+ llmindex_index.apply_async(
+ kwargs={"rebuild": True},
+ headers={"trigger_source": PaperlessTask.TriggerSource.SYSTEM},
)
diff --git a/src/paperless_ai/indexing.py b/src/paperless_ai/indexing.py
index a54492f1f..b8c865214 100644
--- a/src/paperless_ai/indexing.py
+++ b/src/paperless_ai/indexing.py
@@ -4,7 +4,6 @@ from datetime import timedelta
from pathlib import Path
from typing import TYPE_CHECKING
-from celery import states
from django.conf import settings
from django.utils import timezone
@@ -28,17 +27,20 @@ def queue_llm_index_update_if_needed(*, rebuild: bool, reason: str) -> bool:
from documents.tasks import llmindex_index
has_running = PaperlessTask.objects.filter(
- task_name=PaperlessTask.TaskName.LLMINDEX_UPDATE,
- status__in=[states.PENDING, states.STARTED],
+ task_type=PaperlessTask.TaskType.LLM_INDEX,
+ status__in=[PaperlessTask.Status.PENDING, PaperlessTask.Status.STARTED],
).exists()
has_recent = PaperlessTask.objects.filter(
- task_name=PaperlessTask.TaskName.LLMINDEX_UPDATE,
+ task_type=PaperlessTask.TaskType.LLM_INDEX,
date_created__gte=(timezone.now() - timedelta(minutes=5)),
).exists()
if has_running or has_recent:
return False
- llmindex_index.delay(rebuild=rebuild, scheduled=False, auto=True)
+ llmindex_index.apply_async(
+ kwargs={"rebuild": rebuild},
+ headers={"trigger_source": PaperlessTask.TriggerSource.SYSTEM},
+ )
logger.warning(
"Queued LLM index update%s: %s",
" (rebuild)" if rebuild else "",
diff --git a/src/paperless_ai/tests/test_ai_indexing.py b/src/paperless_ai/tests/test_ai_indexing.py
index c1e3b64d8..7d9f3cdd5 100644
--- a/src/paperless_ai/tests/test_ai_indexing.py
+++ b/src/paperless_ai/tests/test_ai_indexing.py
@@ -3,13 +3,13 @@ from unittest.mock import MagicMock
from unittest.mock import patch
import pytest
-from celery import states
from django.test import override_settings
from django.utils import timezone
from llama_index.core.base.embeddings.base import BaseEmbedding
from documents.models import Document
from documents.models import PaperlessTask
+from documents.tests.factories import PaperlessTaskFactory
from paperless_ai import indexing
@@ -292,13 +292,15 @@ def test_queue_llm_index_update_if_needed_enqueues_when_idle_or_skips_recent() -
)
assert result is True
- mock_task.delay.assert_called_once_with(rebuild=True, scheduled=False, auto=True)
+ mock_task.apply_async.assert_called_once_with(
+ kwargs={"rebuild": True},
+ headers={"trigger_source": "system"},
+ )
- PaperlessTask.objects.create(
- task_id="task-1",
- task_name=PaperlessTask.TaskName.LLMINDEX_UPDATE,
- status=states.STARTED,
- date_created=timezone.now(),
+ PaperlessTaskFactory(
+ task_type=PaperlessTask.TaskType.LLM_INDEX,
+ trigger_source=PaperlessTask.TriggerSource.SYSTEM,
+ status=PaperlessTask.Status.STARTED,
)
# Existing running task
@@ -309,7 +311,7 @@ def test_queue_llm_index_update_if_needed_enqueues_when_idle_or_skips_recent() -
)
assert result is False
- mock_task.delay.assert_not_called()
+ mock_task.apply_async.assert_not_called()
@override_settings(
diff --git a/uv.lock b/uv.lock
index d4bb929cc..5d5a931bc 100644
--- a/uv.lock
+++ b/uv.lock
@@ -935,19 +935,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/12/bf/af8ad2aa5a402f278b444ca70729fb12ee96ddb89c19c32a2d7c5189358f/django_cachalot-2.9.0-py3-none-any.whl", hash = "sha256:b80ac4930613a7849988ea772a53598d262a15eaf55e5ec8c78accae7fdd99ff", size = 57814, upload-time = "2026-01-28T05:23:28.741Z" },
]
-[[package]]
-name = "django-celery-results"
-version = "2.6.0"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
- { name = "celery", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
- { name = "django", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/a6/b5/9966c28e31014c228305e09d48b19b35522a8f941fe5af5f81f40dc8fa80/django_celery_results-2.6.0.tar.gz", hash = "sha256:9abcd836ae6b61063779244d8887a88fe80bbfaba143df36d3cb07034671277c", size = 83985, upload-time = "2025-04-10T08:23:52.677Z" }
-wheels = [
- { url = "https://files.pythonhosted.org/packages/2c/da/70f0f3c5364735344c4bc89e53413bcaae95b4fc1de4e98a7a3b9fb70c88/django_celery_results-2.6.0-py3-none-any.whl", hash = "sha256:b9ccdca2695b98c7cbbb8dea742311ba9a92773d71d7b4944a676e69a7df1c73", size = 38351, upload-time = "2025-04-10T08:23:49.965Z" },
-]
-
[[package]]
name = "django-compression-middleware"
version = "0.5.0"
@@ -2869,7 +2856,6 @@ dependencies = [
{ name = "django-allauth", extra = ["mfa", "socialaccount"], marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "django-auditlog", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "django-cachalot", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
- { name = "django-celery-results", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "django-compression-middleware", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "django-cors-headers", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "django-extensions", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
@@ -3018,7 +3004,6 @@ requires-dist = [
{ name = "django-allauth", extras = ["mfa", "socialaccount"], specifier = "~=65.15.0" },
{ name = "django-auditlog", specifier = "~=3.4.1" },
{ name = "django-cachalot", specifier = "~=2.9.0" },
- { name = "django-celery-results", specifier = "~=2.6.0" },
{ name = "django-compression-middleware", specifier = "~=0.5.0" },
{ name = "django-cors-headers", specifier = "~=4.9.0" },
{ name = "django-extensions", specifier = "~=4.1" },
|