mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-04-15 20:48:53 +00:00
Compare commits
15 Commits
feature-fu
...
feature-ta
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
71c8077ca2 | ||
|
|
fca565a169 | ||
|
|
9cbca02050 | ||
|
|
e00b8e7afa | ||
|
|
8e4ad33702 | ||
|
|
48db462c2c | ||
|
|
e4f11d6cfa | ||
|
|
bedb965b84 | ||
|
|
901237962b | ||
|
|
c4794c0fe7 | ||
|
|
425c32122a | ||
|
|
f5c0834ce3 | ||
|
|
bc561135e5 | ||
|
|
c306a4ffe7 | ||
|
|
0780b7e506 |
@@ -28,7 +28,6 @@ dependencies = [
|
||||
"django-allauth[mfa,socialaccount]~=65.15.0",
|
||||
"django-auditlog~=3.4.1",
|
||||
"django-cachalot~=2.9.0",
|
||||
"django-celery-results~=2.6.0",
|
||||
"django-compression-middleware~=0.5.0",
|
||||
"django-cors-headers~=4.9.0",
|
||||
"django-extensions~=4.1",
|
||||
@@ -312,6 +311,7 @@ markers = [
|
||||
"date_parsing: Tests which cover date parsing from content or filename",
|
||||
"management: Tests which cover management commands/functionality",
|
||||
"search: Tests for the Tantivy search backend",
|
||||
"api: Tests for REST API endpoints",
|
||||
]
|
||||
|
||||
[tool.pytest_env]
|
||||
|
||||
@@ -76,33 +76,27 @@
|
||||
<label class="form-check-label" for="task{{task.id}}"></label>
|
||||
</div>
|
||||
</td>
|
||||
<td class="overflow-auto name-col">{{ task.task_file_name }}</td>
|
||||
<td class="overflow-auto name-col">{{ task.input_data?.filename }}</td>
|
||||
<td class="d-none d-lg-table-cell">{{ task.date_created | customDate:'short' }}</td>
|
||||
@if (activeTab !== 'started' && activeTab !== 'queued') {
|
||||
<td class="d-none d-lg-table-cell">
|
||||
@if (task.result?.length > 50) {
|
||||
@if (task.result_message?.length > 50) {
|
||||
<div class="result" (click)="expandTask(task); $event.stopPropagation();"
|
||||
[ngbPopover]="resultPopover" popoverClass="shadow small mobile" triggers="mouseenter:mouseleave" container="body">
|
||||
<span class="small d-none d-md-inline-block font-monospace text-muted">{{ task.result | slice:0:50 }}…</span>
|
||||
<span class="small d-none d-md-inline-block font-monospace text-muted">{{ task.result_message | slice:0:50 }}…</span>
|
||||
</div>
|
||||
}
|
||||
@if (task.result?.length <= 50) {
|
||||
<span class="small d-none d-md-inline-block font-monospace text-muted">{{ task.result }}</span>
|
||||
@if (task.result_message?.length <= 50) {
|
||||
<span class="small d-none d-md-inline-block font-monospace text-muted">{{ task.result_message }}</span>
|
||||
}
|
||||
<ng-template #resultPopover>
|
||||
<pre class="small mb-0">{{ task.result | slice:0:300 }}@if (task.result.length > 300) {
|
||||
<pre class="small mb-0">{{ task.result_message | slice:0:300 }}@if (task.result_message.length > 300) {
|
||||
…
|
||||
}</pre>
|
||||
@if (task.result?.length > 300) {
|
||||
@if (task.result_message?.length > 300) {
|
||||
<br/><em>(<ng-container i18n>click for full output</ng-container>)</em>
|
||||
}
|
||||
</ng-template>
|
||||
@if (task.duplicate_documents?.length > 0) {
|
||||
<div class="small text-warning-emphasis d-flex align-items-center gap-1">
|
||||
<i-bs class="lh-1" width="1em" height="1em" name="exclamation-triangle"></i-bs>
|
||||
<span i18n>Duplicate(s) detected</span>
|
||||
</div>
|
||||
}
|
||||
</td>
|
||||
}
|
||||
<td class="d-lg-none">
|
||||
@@ -116,7 +110,7 @@
|
||||
<i-bs name="check" class="me-1"></i-bs><ng-container i18n>Dismiss</ng-container>
|
||||
</button>
|
||||
<ng-container *pngxIfPermissions="{ action: PermissionAction.View, type: PermissionType.Document }">
|
||||
@if (task.related_document) {
|
||||
@if (task.related_document_ids?.[0]) {
|
||||
<button class="btn btn-sm btn-outline-primary" (click)="dismissAndGo(task); $event.stopPropagation();">
|
||||
<i-bs name="file-text" class="me-1"></i-bs><ng-container i18n>Open Document</ng-container>
|
||||
</button>
|
||||
@@ -127,7 +121,7 @@
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="p-0" [class.border-0]="expandedTask !== task.id" colspan="5">
|
||||
<pre #collapse="ngbCollapse" [ngbCollapse]="expandedTask !== task.id" class="small mb-0"><div class="small p-1 p-lg-3 ms-lg-3">{{ task.result }}</div></pre>
|
||||
<pre #collapse="ngbCollapse" [ngbCollapse]="expandedTask !== task.id" class="small mb-0"><div class="small p-1 p-lg-3 ms-lg-3">{{ task.result_message }}</div></pre>
|
||||
</td>
|
||||
</tr>
|
||||
}
|
||||
|
||||
@@ -20,8 +20,8 @@ import { throwError } from 'rxjs'
|
||||
import { routes } from 'src/app/app-routing.module'
|
||||
import {
|
||||
PaperlessTask,
|
||||
PaperlessTaskName,
|
||||
PaperlessTaskStatus,
|
||||
PaperlessTaskTriggerSource,
|
||||
PaperlessTaskType,
|
||||
} from 'src/app/data/paperless-task'
|
||||
import { IfPermissionsDirective } from 'src/app/directives/if-permissions.directive'
|
||||
@@ -39,81 +39,100 @@ const tasks: PaperlessTask[] = [
|
||||
{
|
||||
id: 467,
|
||||
task_id: '11ca1a5b-9f81-442c-b2c8-7e4ae53657f1',
|
||||
task_file_name: 'test.pdf',
|
||||
input_data: { filename: 'test.pdf' },
|
||||
date_created: new Date('2023-03-01T10:26:03.093116Z'),
|
||||
date_done: new Date('2023-03-01T10:26:07.223048Z'),
|
||||
type: PaperlessTaskType.Auto,
|
||||
task_name: PaperlessTaskName.ConsumeFile,
|
||||
status: PaperlessTaskStatus.Failed,
|
||||
result: 'test.pd: Not consuming test.pdf: It is a duplicate of test (#100)',
|
||||
task_type: PaperlessTaskType.ConsumeFile,
|
||||
task_type_display: 'Consume File',
|
||||
trigger_source: PaperlessTaskTriggerSource.FolderConsume,
|
||||
trigger_source_display: 'Folder Consume',
|
||||
status: PaperlessTaskStatus.Failure,
|
||||
status_display: 'Failure',
|
||||
result_message:
|
||||
'test.pd: Not consuming test.pdf: It is a duplicate of test (#100)',
|
||||
acknowledged: false,
|
||||
related_document: null,
|
||||
related_document_ids: [],
|
||||
},
|
||||
{
|
||||
id: 466,
|
||||
task_id: '10ca1a5b-3c08-442c-b2c8-7e4ae53657f1',
|
||||
task_file_name: '191092.pdf',
|
||||
input_data: { filename: '191092.pdf' },
|
||||
date_created: new Date('2023-03-01T09:26:03.093116Z'),
|
||||
date_done: new Date('2023-03-01T09:26:07.223048Z'),
|
||||
type: PaperlessTaskType.Auto,
|
||||
task_name: PaperlessTaskName.ConsumeFile,
|
||||
status: PaperlessTaskStatus.Failed,
|
||||
result:
|
||||
task_type: PaperlessTaskType.ConsumeFile,
|
||||
task_type_display: 'Consume File',
|
||||
trigger_source: PaperlessTaskTriggerSource.FolderConsume,
|
||||
trigger_source_display: 'Folder Consume',
|
||||
status: PaperlessTaskStatus.Failure,
|
||||
status_display: 'Failure',
|
||||
result_message:
|
||||
'191092.pd: Not consuming 191092.pdf: It is a duplicate of 191092 (#311)',
|
||||
acknowledged: false,
|
||||
related_document: null,
|
||||
related_document_ids: [],
|
||||
},
|
||||
{
|
||||
id: 465,
|
||||
task_id: '3612d477-bb04-44e3-985b-ac580dd496d8',
|
||||
task_file_name: 'Scan Jun 6, 2023 at 3.19 PM.pdf',
|
||||
input_data: { filename: 'Scan Jun 6, 2023 at 3.19 PM.pdf' },
|
||||
date_created: new Date('2023-06-06T15:22:05.722323-07:00'),
|
||||
date_done: new Date('2023-06-06T15:22:14.564305-07:00'),
|
||||
type: PaperlessTaskType.Auto,
|
||||
task_name: PaperlessTaskName.ConsumeFile,
|
||||
task_type: PaperlessTaskType.ConsumeFile,
|
||||
task_type_display: 'Consume File',
|
||||
trigger_source: PaperlessTaskTriggerSource.FolderConsume,
|
||||
trigger_source_display: 'Folder Consume',
|
||||
status: PaperlessTaskStatus.Pending,
|
||||
result: null,
|
||||
status_display: 'Pending',
|
||||
result_message: null,
|
||||
acknowledged: false,
|
||||
related_document: null,
|
||||
related_document_ids: [],
|
||||
},
|
||||
{
|
||||
id: 464,
|
||||
task_id: '2eac4716-2aa6-4dcd-9953-264e11656d7e',
|
||||
task_file_name: 'paperless-mail-l4dkg8ir',
|
||||
input_data: { filename: 'paperless-mail-l4dkg8ir' },
|
||||
date_created: new Date('2023-06-04T11:24:32.898089-07:00'),
|
||||
date_done: new Date('2023-06-04T11:24:44.678605-07:00'),
|
||||
type: PaperlessTaskType.Auto,
|
||||
task_name: PaperlessTaskName.ConsumeFile,
|
||||
status: PaperlessTaskStatus.Complete,
|
||||
result: 'Success. New document id 422 created',
|
||||
task_type: PaperlessTaskType.ConsumeFile,
|
||||
task_type_display: 'Consume File',
|
||||
trigger_source: PaperlessTaskTriggerSource.EmailConsume,
|
||||
trigger_source_display: 'Email Consume',
|
||||
status: PaperlessTaskStatus.Success,
|
||||
status_display: 'Success',
|
||||
result_message: 'Success. New document id 422 created',
|
||||
acknowledged: false,
|
||||
related_document: 422,
|
||||
related_document_ids: [422],
|
||||
},
|
||||
{
|
||||
id: 463,
|
||||
task_id: '28125528-1575-4d6b-99e6-168906e8fa5c',
|
||||
task_file_name: 'onlinePaymentSummary.pdf',
|
||||
input_data: { filename: 'onlinePaymentSummary.pdf' },
|
||||
date_created: new Date('2023-06-01T13:49:51.631305-07:00'),
|
||||
date_done: new Date('2023-06-01T13:49:54.190220-07:00'),
|
||||
type: PaperlessTaskType.Auto,
|
||||
task_name: PaperlessTaskName.ConsumeFile,
|
||||
status: PaperlessTaskStatus.Complete,
|
||||
result: 'Success. New document id 421 created',
|
||||
task_type: PaperlessTaskType.ConsumeFile,
|
||||
task_type_display: 'Consume File',
|
||||
trigger_source: PaperlessTaskTriggerSource.FolderConsume,
|
||||
trigger_source_display: 'Folder Consume',
|
||||
status: PaperlessTaskStatus.Success,
|
||||
status_display: 'Success',
|
||||
result_message: 'Success. New document id 421 created',
|
||||
acknowledged: false,
|
||||
related_document: 421,
|
||||
related_document_ids: [421],
|
||||
},
|
||||
{
|
||||
id: 462,
|
||||
task_id: 'a5b9ca47-0c8e-490f-a04c-6db5d5fc09e5',
|
||||
task_file_name: 'paperless-mail-_rrpmqk6',
|
||||
input_data: { filename: 'paperless-mail-_rrpmqk6' },
|
||||
date_created: new Date('2023-06-07T02:54:35.694916Z'),
|
||||
date_done: null,
|
||||
type: PaperlessTaskType.Auto,
|
||||
task_name: PaperlessTaskName.ConsumeFile,
|
||||
task_type: PaperlessTaskType.ConsumeFile,
|
||||
task_type_display: 'Consume File',
|
||||
trigger_source: PaperlessTaskTriggerSource.EmailConsume,
|
||||
trigger_source_display: 'Email Consume',
|
||||
status: PaperlessTaskStatus.Started,
|
||||
result: null,
|
||||
status_display: 'Started',
|
||||
result_message: null,
|
||||
acknowledged: false,
|
||||
related_document: null,
|
||||
related_document_ids: [],
|
||||
},
|
||||
]
|
||||
|
||||
@@ -167,7 +186,7 @@ describe('TasksComponent', () => {
|
||||
fixture.detectChanges()
|
||||
httpTestingController
|
||||
.expectOne(
|
||||
`${environment.apiBaseUrl}tasks/?task_name=consume_file&acknowledged=false`
|
||||
`${environment.apiBaseUrl}tasks/?task_type=consume_file&acknowledged=false`
|
||||
)
|
||||
.flush(tasks)
|
||||
})
|
||||
@@ -176,7 +195,7 @@ describe('TasksComponent', () => {
|
||||
const tabButtons = fixture.debugElement.queryAll(By.directive(NgbNavItem))
|
||||
|
||||
let currentTasksLength = tasks.filter(
|
||||
(t) => t.status === PaperlessTaskStatus.Failed
|
||||
(t) => t.status === PaperlessTaskStatus.Failure
|
||||
).length
|
||||
component.activeTab = TaskTab.Failed
|
||||
fixture.detectChanges()
|
||||
@@ -188,7 +207,7 @@ describe('TasksComponent', () => {
|
||||
).toHaveLength(currentTasksLength + 1)
|
||||
|
||||
currentTasksLength = tasks.filter(
|
||||
(t) => t.status === PaperlessTaskStatus.Complete
|
||||
(t) => t.status === PaperlessTaskStatus.Success
|
||||
).length
|
||||
component.activeTab = TaskTab.Completed
|
||||
fixture.detectChanges()
|
||||
@@ -308,7 +327,7 @@ describe('TasksComponent', () => {
|
||||
expect(component.selectedTasks).toEqual(
|
||||
new Set(
|
||||
tasks
|
||||
.filter((t) => t.status === PaperlessTaskStatus.Failed)
|
||||
.filter((t) => t.status === PaperlessTaskStatus.Failure)
|
||||
.map((t) => t.id)
|
||||
)
|
||||
)
|
||||
@@ -322,7 +341,7 @@ describe('TasksComponent', () => {
|
||||
component.dismissAndGo(tasks[3])
|
||||
expect(routerSpy).toHaveBeenCalledWith([
|
||||
'documents',
|
||||
tasks[3].related_document,
|
||||
tasks[3].related_document_ids?.[0],
|
||||
])
|
||||
})
|
||||
|
||||
|
||||
@@ -175,7 +175,7 @@ export class TasksComponent
|
||||
|
||||
dismissAndGo(task: PaperlessTask) {
|
||||
this.dismissTask(task)
|
||||
this.router.navigate(['documents', task.related_document])
|
||||
this.router.navigate(['documents', task.related_document_ids?.[0]])
|
||||
}
|
||||
|
||||
expandTask(task: PaperlessTask) {
|
||||
@@ -207,11 +207,13 @@ export class TasksComponent
|
||||
if (this._filterText.length) {
|
||||
tasks = tasks.filter((t) => {
|
||||
if (this.filterTargetID == TaskFilterTargetID.Name) {
|
||||
return t.task_file_name
|
||||
.toLowerCase()
|
||||
return (t.input_data?.filename as string)
|
||||
?.toLowerCase()
|
||||
.includes(this._filterText.toLowerCase())
|
||||
} else if (this.filterTargetID == TaskFilterTargetID.Result) {
|
||||
return t.result.toLowerCase().includes(this._filterText.toLowerCase())
|
||||
return t.result_message
|
||||
?.toLowerCase()
|
||||
.includes(this._filterText.toLowerCase())
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
@@ -169,10 +169,10 @@
|
||||
}
|
||||
</button>
|
||||
@if (currentUserIsSuperUser) {
|
||||
@if (isRunning(PaperlessTaskName.IndexOptimize)) {
|
||||
@if (isRunning(PaperlessTaskType.IndexOptimize)) {
|
||||
<div class="spinner-border spinner-border-sm ms-2" role="status"></div>
|
||||
} @else {
|
||||
<button class="btn btn-sm d-flex align-items-center btn-dark small ms-2" (click)="runTask(PaperlessTaskName.IndexOptimize)">
|
||||
<button class="btn btn-sm d-flex align-items-center btn-dark small ms-2" (click)="runTask(PaperlessTaskType.IndexOptimize)">
|
||||
<i-bs name="play-fill" class="me-1"></i-bs>
|
||||
<ng-container i18n>Run Task</ng-container>
|
||||
</button>
|
||||
@@ -203,10 +203,10 @@
|
||||
}
|
||||
</button>
|
||||
@if (currentUserIsSuperUser) {
|
||||
@if (isRunning(PaperlessTaskName.TrainClassifier)) {
|
||||
@if (isRunning(PaperlessTaskType.TrainClassifier)) {
|
||||
<div class="spinner-border spinner-border-sm ms-2" role="status"></div>
|
||||
} @else {
|
||||
<button class="btn btn-sm d-flex align-items-center btn-dark small ms-2" (click)="runTask(PaperlessTaskName.TrainClassifier)">
|
||||
<button class="btn btn-sm d-flex align-items-center btn-dark small ms-2" (click)="runTask(PaperlessTaskType.TrainClassifier)">
|
||||
<i-bs name="play-fill" class="me-1"></i-bs>
|
||||
<ng-container i18n>Run Task</ng-container>
|
||||
</button>
|
||||
@@ -237,10 +237,10 @@
|
||||
}
|
||||
</button>
|
||||
@if (currentUserIsSuperUser) {
|
||||
@if (isRunning(PaperlessTaskName.SanityCheck)) {
|
||||
@if (isRunning(PaperlessTaskType.SanityCheck)) {
|
||||
<div class="spinner-border spinner-border-sm ms-2" role="status"></div>
|
||||
} @else {
|
||||
<button class="btn btn-sm d-flex align-items-center btn-dark small ms-2" (click)="runTask(PaperlessTaskName.SanityCheck)">
|
||||
<button class="btn btn-sm d-flex align-items-center btn-dark small ms-2" (click)="runTask(PaperlessTaskType.SanityCheck)">
|
||||
<i-bs name="play-fill" class="me-1"></i-bs>
|
||||
<ng-container i18n>Run Task</ng-container>
|
||||
</button>
|
||||
@@ -285,10 +285,10 @@
|
||||
}
|
||||
</button>
|
||||
@if (currentUserIsSuperUser) {
|
||||
@if (isRunning(PaperlessTaskName.LLMIndexUpdate)) {
|
||||
@if (isRunning(PaperlessTaskType.LlmIndex)) {
|
||||
<div class="spinner-border spinner-border-sm ms-2" role="status"></div>
|
||||
} @else {
|
||||
<button class="btn btn-sm d-flex align-items-center btn-dark small ms-2" (click)="runTask(PaperlessTaskName.LLMIndexUpdate)">
|
||||
<button class="btn btn-sm d-flex align-items-center btn-dark small ms-2" (click)="runTask(PaperlessTaskType.LlmIndex)">
|
||||
<i-bs name="play-fill" class="me-1"></i-bs>
|
||||
<ng-container i18n>Run Task</ng-container>
|
||||
</button>
|
||||
|
||||
@@ -25,7 +25,7 @@ import {
|
||||
import { NgbActiveModal } from '@ng-bootstrap/ng-bootstrap'
|
||||
import { NgxBootstrapIconsModule, allIcons } from 'ngx-bootstrap-icons'
|
||||
import { Subject, of, throwError } from 'rxjs'
|
||||
import { PaperlessTaskName } from 'src/app/data/paperless-task'
|
||||
import { PaperlessTaskType } from 'src/app/data/paperless-task'
|
||||
import {
|
||||
InstallType,
|
||||
SystemStatus,
|
||||
@@ -138,9 +138,9 @@ describe('SystemStatusDialogComponent', () => {
|
||||
})
|
||||
|
||||
it('should check if task is running', () => {
|
||||
component.runTask(PaperlessTaskName.IndexOptimize)
|
||||
expect(component.isRunning(PaperlessTaskName.IndexOptimize)).toBeTruthy()
|
||||
expect(component.isRunning(PaperlessTaskName.SanityCheck)).toBeFalsy()
|
||||
component.runTask(PaperlessTaskType.IndexOptimize)
|
||||
expect(component.isRunning(PaperlessTaskType.IndexOptimize)).toBeTruthy()
|
||||
expect(component.isRunning(PaperlessTaskType.SanityCheck)).toBeFalsy()
|
||||
})
|
||||
|
||||
it('should support running tasks, refresh status and show toasts', () => {
|
||||
@@ -151,22 +151,22 @@ describe('SystemStatusDialogComponent', () => {
|
||||
|
||||
// fail first
|
||||
runSpy.mockReturnValue(throwError(() => new Error('error')))
|
||||
component.runTask(PaperlessTaskName.IndexOptimize)
|
||||
expect(runSpy).toHaveBeenCalledWith(PaperlessTaskName.IndexOptimize)
|
||||
component.runTask(PaperlessTaskType.IndexOptimize)
|
||||
expect(runSpy).toHaveBeenCalledWith(PaperlessTaskType.IndexOptimize)
|
||||
expect(toastErrorSpy).toHaveBeenCalledWith(
|
||||
`Failed to start task ${PaperlessTaskName.IndexOptimize}, see the logs for more details`,
|
||||
`Failed to start task ${PaperlessTaskType.IndexOptimize}, see the logs for more details`,
|
||||
expect.any(Error)
|
||||
)
|
||||
|
||||
// succeed
|
||||
runSpy.mockReturnValue(of({}))
|
||||
getStatusSpy.mockReturnValue(of(status))
|
||||
component.runTask(PaperlessTaskName.IndexOptimize)
|
||||
expect(runSpy).toHaveBeenCalledWith(PaperlessTaskName.IndexOptimize)
|
||||
component.runTask(PaperlessTaskType.IndexOptimize)
|
||||
expect(runSpy).toHaveBeenCalledWith(PaperlessTaskType.IndexOptimize)
|
||||
|
||||
expect(getStatusSpy).toHaveBeenCalled()
|
||||
expect(toastSpy).toHaveBeenCalledWith(
|
||||
`Task ${PaperlessTaskName.IndexOptimize} started`
|
||||
`Task ${PaperlessTaskType.IndexOptimize} started`
|
||||
)
|
||||
})
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@ import {
|
||||
} from '@ng-bootstrap/ng-bootstrap'
|
||||
import { NgxBootstrapIconsModule } from 'ngx-bootstrap-icons'
|
||||
import { Subject, takeUntil } from 'rxjs'
|
||||
import { PaperlessTaskName } from 'src/app/data/paperless-task'
|
||||
import { PaperlessTaskType } from 'src/app/data/paperless-task'
|
||||
import {
|
||||
SystemStatus,
|
||||
SystemStatusItemStatus,
|
||||
@@ -49,14 +49,14 @@ export class SystemStatusDialogComponent implements OnInit, OnDestroy {
|
||||
private settingsService = inject(SettingsService)
|
||||
|
||||
public SystemStatusItemStatus = SystemStatusItemStatus
|
||||
public PaperlessTaskName = PaperlessTaskName
|
||||
public PaperlessTaskType = PaperlessTaskType
|
||||
public status: SystemStatus
|
||||
public frontendVersion: string = environment.version
|
||||
public versionMismatch: boolean = false
|
||||
|
||||
public copied: boolean = false
|
||||
|
||||
private runningTasks: Set<PaperlessTaskName> = new Set()
|
||||
private runningTasks: Set<PaperlessTaskType> = new Set()
|
||||
private unsubscribeNotifier: Subject<any> = new Subject()
|
||||
|
||||
get currentUserIsSuperUser(): boolean {
|
||||
@@ -107,11 +107,11 @@ export class SystemStatusDialogComponent implements OnInit, OnDestroy {
|
||||
return now.getTime() - date.getTime() > hours * 60 * 60 * 1000
|
||||
}
|
||||
|
||||
public isRunning(taskName: PaperlessTaskName): boolean {
|
||||
public isRunning(taskName: PaperlessTaskType): boolean {
|
||||
return this.runningTasks.has(taskName)
|
||||
}
|
||||
|
||||
public runTask(taskName: PaperlessTaskName) {
|
||||
public runTask(taskName: PaperlessTaskType) {
|
||||
this.runningTasks.add(taskName)
|
||||
this.toastService.showInfo(`Task ${taskName} started`)
|
||||
this.tasksService.run(taskName).subscribe({
|
||||
|
||||
@@ -1,49 +1,63 @@
|
||||
import { Document } from './document'
|
||||
import { ObjectWithId } from './object-with-id'
|
||||
|
||||
export enum PaperlessTaskType {
|
||||
Auto = 'auto_task',
|
||||
ScheduledTask = 'scheduled_task',
|
||||
ManualTask = 'manual_task',
|
||||
}
|
||||
|
||||
export enum PaperlessTaskName {
|
||||
ConsumeFile = 'consume_file',
|
||||
TrainClassifier = 'train_classifier',
|
||||
SanityCheck = 'check_sanity',
|
||||
SanityCheck = 'sanity_check',
|
||||
IndexOptimize = 'index_optimize',
|
||||
LLMIndexUpdate = 'llmindex_update',
|
||||
IndexRebuild = 'index_rebuild',
|
||||
MailFetch = 'mail_fetch',
|
||||
LlmIndex = 'llm_index',
|
||||
}
|
||||
|
||||
export enum PaperlessTaskTriggerSource {
|
||||
Scheduled = 'scheduled',
|
||||
WebUI = 'web_ui',
|
||||
ApiUpload = 'api_upload',
|
||||
FolderConsume = 'folder_consume',
|
||||
EmailConsume = 'email_consume',
|
||||
System = 'system',
|
||||
Manual = 'manual',
|
||||
}
|
||||
|
||||
export enum PaperlessTaskStatus {
|
||||
Pending = 'PENDING',
|
||||
Started = 'STARTED',
|
||||
Complete = 'SUCCESS',
|
||||
Failed = 'FAILURE',
|
||||
Pending = 'pending',
|
||||
Started = 'started',
|
||||
Success = 'success',
|
||||
Failure = 'failure',
|
||||
Revoked = 'revoked',
|
||||
}
|
||||
|
||||
export interface PaperlessTask extends ObjectWithId {
|
||||
type: PaperlessTaskType
|
||||
|
||||
status: PaperlessTaskStatus
|
||||
|
||||
acknowledged: boolean
|
||||
|
||||
task_id: string
|
||||
|
||||
task_file_name: string
|
||||
|
||||
task_name: PaperlessTaskName
|
||||
|
||||
task_type: PaperlessTaskType
|
||||
task_type_display: string
|
||||
trigger_source: PaperlessTaskTriggerSource
|
||||
trigger_source_display: string
|
||||
status: PaperlessTaskStatus
|
||||
status_display: string
|
||||
date_created: Date
|
||||
|
||||
date_started?: Date
|
||||
date_done?: Date
|
||||
|
||||
result?: string
|
||||
|
||||
related_document?: number
|
||||
|
||||
duplicate_documents?: Document[]
|
||||
|
||||
duration_seconds?: number
|
||||
wait_time_seconds?: number
|
||||
input_data: Record<string, unknown>
|
||||
result_data?: Record<string, unknown>
|
||||
result_message?: string
|
||||
related_document_ids: number[]
|
||||
acknowledged: boolean
|
||||
owner?: number
|
||||
}
|
||||
|
||||
export interface PaperlessTaskSummary {
|
||||
task_type: PaperlessTaskType
|
||||
total_count: number
|
||||
pending_count: number
|
||||
success_count: number
|
||||
failure_count: number
|
||||
avg_duration_seconds: number | null
|
||||
avg_wait_time_seconds: number | null
|
||||
last_run: Date | null
|
||||
last_success: Date | null
|
||||
last_failure: Date | null
|
||||
}
|
||||
|
||||
@@ -5,11 +5,7 @@ import {
|
||||
} from '@angular/common/http/testing'
|
||||
import { TestBed } from '@angular/core/testing'
|
||||
import { environment } from 'src/environments/environment'
|
||||
import {
|
||||
PaperlessTaskName,
|
||||
PaperlessTaskStatus,
|
||||
PaperlessTaskType,
|
||||
} from '../data/paperless-task'
|
||||
import { PaperlessTaskStatus, PaperlessTaskType } from '../data/paperless-task'
|
||||
import { TasksService } from './tasks.service'
|
||||
|
||||
describe('TasksService', () => {
|
||||
@@ -37,7 +33,7 @@ describe('TasksService', () => {
|
||||
it('calls tasks api endpoint on reload', () => {
|
||||
tasksService.reload()
|
||||
const req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}tasks/?task_name=consume_file&acknowledged=false`
|
||||
`${environment.apiBaseUrl}tasks/?task_type=consume_file&acknowledged=false`
|
||||
)
|
||||
expect(req.request.method).toEqual('GET')
|
||||
})
|
||||
@@ -46,7 +42,7 @@ describe('TasksService', () => {
|
||||
tasksService.loading = true
|
||||
tasksService.reload()
|
||||
httpTestingController.expectNone(
|
||||
`${environment.apiBaseUrl}tasks/?task_name=consume_file&acknowledged=false`
|
||||
`${environment.apiBaseUrl}tasks/?task_type=consume_file&acknowledged=false`
|
||||
)
|
||||
})
|
||||
|
||||
@@ -63,7 +59,7 @@ describe('TasksService', () => {
|
||||
// reload is then called
|
||||
httpTestingController
|
||||
.expectOne(
|
||||
`${environment.apiBaseUrl}tasks/?task_name=consume_file&acknowledged=false`
|
||||
`${environment.apiBaseUrl}tasks/?task_type=consume_file&acknowledged=false`
|
||||
)
|
||||
.flush([])
|
||||
})
|
||||
@@ -72,56 +68,56 @@ describe('TasksService', () => {
|
||||
expect(tasksService.total).toEqual(0)
|
||||
const mockTasks = [
|
||||
{
|
||||
type: PaperlessTaskType.Auto,
|
||||
task_name: PaperlessTaskName.ConsumeFile,
|
||||
status: PaperlessTaskStatus.Complete,
|
||||
task_type: PaperlessTaskType.ConsumeFile,
|
||||
status: PaperlessTaskStatus.Success,
|
||||
acknowledged: false,
|
||||
task_id: '1234',
|
||||
task_file_name: 'file1.pdf',
|
||||
input_data: { filename: 'file1.pdf' },
|
||||
date_created: new Date(),
|
||||
related_document_ids: [],
|
||||
},
|
||||
{
|
||||
type: PaperlessTaskType.Auto,
|
||||
task_name: PaperlessTaskName.ConsumeFile,
|
||||
status: PaperlessTaskStatus.Failed,
|
||||
task_type: PaperlessTaskType.ConsumeFile,
|
||||
status: PaperlessTaskStatus.Failure,
|
||||
acknowledged: false,
|
||||
task_id: '1235',
|
||||
task_file_name: 'file2.pdf',
|
||||
input_data: { filename: 'file2.pdf' },
|
||||
date_created: new Date(),
|
||||
related_document_ids: [],
|
||||
},
|
||||
{
|
||||
type: PaperlessTaskType.Auto,
|
||||
task_name: PaperlessTaskName.ConsumeFile,
|
||||
task_type: PaperlessTaskType.ConsumeFile,
|
||||
status: PaperlessTaskStatus.Pending,
|
||||
acknowledged: false,
|
||||
task_id: '1236',
|
||||
task_file_name: 'file3.pdf',
|
||||
input_data: { filename: 'file3.pdf' },
|
||||
date_created: new Date(),
|
||||
related_document_ids: [],
|
||||
},
|
||||
{
|
||||
type: PaperlessTaskType.Auto,
|
||||
task_name: PaperlessTaskName.ConsumeFile,
|
||||
task_type: PaperlessTaskType.ConsumeFile,
|
||||
status: PaperlessTaskStatus.Started,
|
||||
acknowledged: false,
|
||||
task_id: '1237',
|
||||
task_file_name: 'file4.pdf',
|
||||
input_data: { filename: 'file4.pdf' },
|
||||
date_created: new Date(),
|
||||
related_document_ids: [],
|
||||
},
|
||||
{
|
||||
type: PaperlessTaskType.Auto,
|
||||
task_name: PaperlessTaskName.ConsumeFile,
|
||||
status: PaperlessTaskStatus.Complete,
|
||||
task_type: PaperlessTaskType.ConsumeFile,
|
||||
status: PaperlessTaskStatus.Success,
|
||||
acknowledged: false,
|
||||
task_id: '1238',
|
||||
task_file_name: 'file5.pdf',
|
||||
input_data: { filename: 'file5.pdf' },
|
||||
date_created: new Date(),
|
||||
related_document_ids: [],
|
||||
},
|
||||
]
|
||||
|
||||
tasksService.reload()
|
||||
|
||||
const req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}tasks/?task_name=consume_file&acknowledged=false`
|
||||
`${environment.apiBaseUrl}tasks/?task_type=consume_file&acknowledged=false`
|
||||
)
|
||||
|
||||
req.flush(mockTasks)
|
||||
@@ -134,9 +130,9 @@ describe('TasksService', () => {
|
||||
})
|
||||
|
||||
it('supports running tasks', () => {
|
||||
tasksService.run(PaperlessTaskName.SanityCheck).subscribe((res) => {
|
||||
tasksService.run(PaperlessTaskType.SanityCheck).subscribe((res) => {
|
||||
expect(res).toEqual({
|
||||
result: 'success',
|
||||
task_id: 'abc-123',
|
||||
})
|
||||
})
|
||||
const req = httpTestingController.expectOne(
|
||||
@@ -144,7 +140,7 @@ describe('TasksService', () => {
|
||||
)
|
||||
expect(req.request.method).toEqual('POST')
|
||||
req.flush({
|
||||
result: 'success',
|
||||
task_id: 'abc-123',
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
@@ -4,8 +4,8 @@ import { Observable, Subject } from 'rxjs'
|
||||
import { first, takeUntil, tap } from 'rxjs/operators'
|
||||
import {
|
||||
PaperlessTask,
|
||||
PaperlessTaskName,
|
||||
PaperlessTaskStatus,
|
||||
PaperlessTaskType,
|
||||
} from 'src/app/data/paperless-task'
|
||||
import { environment } from 'src/environments/environment'
|
||||
|
||||
@@ -18,7 +18,7 @@ export class TasksService {
|
||||
private baseUrl: string = environment.apiBaseUrl
|
||||
private endpoint: string = 'tasks'
|
||||
|
||||
public loading: boolean
|
||||
public loading: boolean = false
|
||||
|
||||
private fileTasks: PaperlessTask[] = []
|
||||
|
||||
@@ -33,21 +33,27 @@ export class TasksService {
|
||||
}
|
||||
|
||||
public get queuedFileTasks(): PaperlessTask[] {
|
||||
return this.fileTasks.filter((t) => t.status == PaperlessTaskStatus.Pending)
|
||||
return this.fileTasks.filter(
|
||||
(t) => t.status === PaperlessTaskStatus.Pending
|
||||
)
|
||||
}
|
||||
|
||||
public get startedFileTasks(): PaperlessTask[] {
|
||||
return this.fileTasks.filter((t) => t.status == PaperlessTaskStatus.Started)
|
||||
return this.fileTasks.filter(
|
||||
(t) => t.status === PaperlessTaskStatus.Started
|
||||
)
|
||||
}
|
||||
|
||||
public get completedFileTasks(): PaperlessTask[] {
|
||||
return this.fileTasks.filter(
|
||||
(t) => t.status == PaperlessTaskStatus.Complete
|
||||
(t) => t.status === PaperlessTaskStatus.Success
|
||||
)
|
||||
}
|
||||
|
||||
public get failedFileTasks(): PaperlessTask[] {
|
||||
return this.fileTasks.filter((t) => t.status == PaperlessTaskStatus.Failed)
|
||||
return this.fileTasks.filter(
|
||||
(t) => t.status === PaperlessTaskStatus.Failure
|
||||
)
|
||||
}
|
||||
|
||||
public reload() {
|
||||
@@ -56,18 +62,16 @@ export class TasksService {
|
||||
|
||||
this.http
|
||||
.get<PaperlessTask[]>(
|
||||
`${this.baseUrl}${this.endpoint}/?task_name=consume_file&acknowledged=false`
|
||||
`${this.baseUrl}${this.endpoint}/?task_type=${PaperlessTaskType.ConsumeFile}&acknowledged=false`
|
||||
)
|
||||
.pipe(takeUntil(this.unsubscribeNotifer), first())
|
||||
.subscribe((r) => {
|
||||
this.fileTasks = r.filter(
|
||||
(t) => t.task_name == PaperlessTaskName.ConsumeFile
|
||||
)
|
||||
this.fileTasks = r
|
||||
this.loading = false
|
||||
})
|
||||
}
|
||||
|
||||
public dismissTasks(task_ids: Set<number>) {
|
||||
public dismissTasks(task_ids: Set<number>): Observable<any> {
|
||||
return this.http
|
||||
.post(`${this.baseUrl}tasks/acknowledge/`, {
|
||||
tasks: [...task_ids],
|
||||
@@ -81,16 +85,24 @@ export class TasksService {
|
||||
)
|
||||
}
|
||||
|
||||
public dismissAllTasks(): Observable<any> {
|
||||
return this.http.post(`${this.baseUrl}tasks/acknowledge_all/`, {}).pipe(
|
||||
first(),
|
||||
takeUntil(this.unsubscribeNotifer),
|
||||
tap(() => {
|
||||
this.reload()
|
||||
})
|
||||
)
|
||||
}
|
||||
|
||||
public cancelPending(): void {
|
||||
this.unsubscribeNotifer.next(true)
|
||||
}
|
||||
|
||||
public run(taskName: PaperlessTaskName): Observable<any> {
|
||||
return this.http.post<any>(
|
||||
public run(taskType: PaperlessTaskType): Observable<{ task_id: string }> {
|
||||
return this.http.post<{ task_id: string }>(
|
||||
`${environment.apiBaseUrl}${this.endpoint}/run/`,
|
||||
{
|
||||
task_name: taskName,
|
||||
}
|
||||
{ task_type: taskType }
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -144,18 +144,30 @@ class StoragePathAdmin(GuardedModelAdmin):
|
||||
|
||||
|
||||
class TaskAdmin(admin.ModelAdmin):
|
||||
list_display = ("task_id", "task_file_name", "task_name", "date_done", "status")
|
||||
list_filter = ("status", "date_done", "task_name")
|
||||
search_fields = ("task_name", "task_id", "status", "task_file_name")
|
||||
list_display = (
|
||||
"task_id",
|
||||
"task_type",
|
||||
"trigger_source",
|
||||
"status",
|
||||
"date_created",
|
||||
"date_done",
|
||||
"duration_seconds",
|
||||
)
|
||||
list_filter = ("status", "task_type", "trigger_source", "date_done")
|
||||
search_fields = ("task_id", "task_type", "status")
|
||||
readonly_fields = (
|
||||
"task_id",
|
||||
"task_file_name",
|
||||
"task_name",
|
||||
"task_type",
|
||||
"trigger_source",
|
||||
"status",
|
||||
"date_created",
|
||||
"date_started",
|
||||
"date_done",
|
||||
"result",
|
||||
"duration_seconds",
|
||||
"wait_time_seconds",
|
||||
"input_data",
|
||||
"result_data",
|
||||
"result_message",
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -26,8 +26,10 @@ from django.db.models.functions import Cast
|
||||
from django.utils.translation import gettext_lazy as _
|
||||
from django_filters import DateFilter
|
||||
from django_filters.rest_framework import BooleanFilter
|
||||
from django_filters.rest_framework import DateTimeFilter
|
||||
from django_filters.rest_framework import Filter
|
||||
from django_filters.rest_framework import FilterSet
|
||||
from django_filters.rest_framework import MultipleChoiceFilter
|
||||
from drf_spectacular.utils import extend_schema_field
|
||||
from guardian.utils import get_group_obj_perms_model
|
||||
from guardian.utils import get_user_obj_perms_model
|
||||
@@ -862,18 +864,56 @@ class ShareLinkBundleFilterSet(FilterSet):
|
||||
|
||||
|
||||
class PaperlessTaskFilterSet(FilterSet):
|
||||
task_type = MultipleChoiceFilter(
|
||||
choices=PaperlessTask.TaskType.choices,
|
||||
label="Task Type",
|
||||
)
|
||||
|
||||
trigger_source = MultipleChoiceFilter(
|
||||
choices=PaperlessTask.TriggerSource.choices,
|
||||
label="Trigger Source",
|
||||
)
|
||||
|
||||
status = MultipleChoiceFilter(
|
||||
choices=PaperlessTask.Status.choices,
|
||||
label="Status",
|
||||
)
|
||||
|
||||
is_complete = BooleanFilter(
|
||||
method="filter_is_complete",
|
||||
label="Is Complete",
|
||||
)
|
||||
|
||||
acknowledged = BooleanFilter(
|
||||
label="Acknowledged",
|
||||
field_name="acknowledged",
|
||||
)
|
||||
|
||||
date_created_after = DateTimeFilter(
|
||||
field_name="date_created",
|
||||
lookup_expr="gte",
|
||||
label="Created After",
|
||||
)
|
||||
|
||||
date_created_before = DateTimeFilter(
|
||||
field_name="date_created",
|
||||
lookup_expr="lte",
|
||||
label="Created Before",
|
||||
)
|
||||
|
||||
class Meta:
|
||||
model = PaperlessTask
|
||||
fields = {
|
||||
"type": ["exact"],
|
||||
"task_name": ["exact"],
|
||||
"status": ["exact"],
|
||||
}
|
||||
fields = ["task_type", "trigger_source", "status", "acknowledged", "owner"]
|
||||
|
||||
def filter_is_complete(self, queryset, name, value):
|
||||
complete = [
|
||||
PaperlessTask.Status.SUCCESS,
|
||||
PaperlessTask.Status.FAILURE,
|
||||
PaperlessTask.Status.REVOKED,
|
||||
]
|
||||
if value:
|
||||
return queryset.filter(status__in=complete)
|
||||
return queryset.exclude(status__in=complete)
|
||||
|
||||
|
||||
class ObjectOwnedOrGrantedPermissionsFilter(ObjectPermissionsFilter):
|
||||
|
||||
@@ -22,7 +22,6 @@ class Command(PaperlessCommand):
|
||||
self.buffered_logging("paperless.classifier"),
|
||||
):
|
||||
train_classifier(
|
||||
scheduled=False,
|
||||
status_callback=lambda msg: self.console.print(f" {msg}"),
|
||||
)
|
||||
|
||||
|
||||
@@ -17,7 +17,6 @@ class Command(PaperlessCommand):
|
||||
def handle(self, *args: Any, **options: Any) -> None:
|
||||
llmindex_index(
|
||||
rebuild=options["command"] == "rebuild",
|
||||
scheduled=False,
|
||||
iter_wrapper=lambda docs: self.track(
|
||||
docs,
|
||||
description="Indexing documents...",
|
||||
|
||||
@@ -111,7 +111,6 @@ class Command(PaperlessCommand):
|
||||
|
||||
def handle(self, *args: Any, **options: Any) -> None:
|
||||
messages = check_sanity(
|
||||
scheduled=False,
|
||||
iter_wrapper=lambda docs: self.track(
|
||||
docs,
|
||||
description="Checking documents...",
|
||||
|
||||
213
src/documents/migrations/0019_task_system_redesign.py
Normal file
213
src/documents/migrations/0019_task_system_redesign.py
Normal file
@@ -0,0 +1,213 @@
|
||||
"""
|
||||
Drop and recreate the PaperlessTask table with the new structured schema.
|
||||
|
||||
We intentionally drop all existing task data -- the old schema was
|
||||
string-based and incompatible with the new JSONField result storage.
|
||||
"""
|
||||
|
||||
import django.db.models.deletion
|
||||
import django.utils.timezone
|
||||
from django.conf import settings
|
||||
from django.db import migrations
|
||||
from django.db import models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
("documents", "0018_saved_view_simple_search_rules"),
|
||||
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.DeleteModel(name="PaperlessTask"),
|
||||
migrations.CreateModel(
|
||||
name="PaperlessTask",
|
||||
fields=[
|
||||
(
|
||||
"id",
|
||||
models.AutoField(
|
||||
auto_created=True,
|
||||
primary_key=True,
|
||||
serialize=False,
|
||||
verbose_name="ID",
|
||||
),
|
||||
),
|
||||
(
|
||||
"owner",
|
||||
models.ForeignKey(
|
||||
blank=True,
|
||||
default=None,
|
||||
null=True,
|
||||
on_delete=django.db.models.deletion.SET_NULL,
|
||||
to=settings.AUTH_USER_MODEL,
|
||||
verbose_name="owner",
|
||||
),
|
||||
),
|
||||
(
|
||||
"task_id",
|
||||
models.CharField(
|
||||
help_text="Celery task ID",
|
||||
max_length=255,
|
||||
unique=True,
|
||||
verbose_name="Task ID",
|
||||
),
|
||||
),
|
||||
(
|
||||
"task_type",
|
||||
models.CharField(
|
||||
choices=[
|
||||
("consume_file", "Consume File"),
|
||||
("train_classifier", "Train Classifier"),
|
||||
("sanity_check", "Sanity Check"),
|
||||
("index_optimize", "Index Optimize"),
|
||||
("index_rebuild", "Index Rebuild"),
|
||||
("mail_fetch", "Mail Fetch"),
|
||||
("llm_index", "LLM Index"),
|
||||
],
|
||||
db_index=True,
|
||||
help_text="The kind of work being performed",
|
||||
max_length=50,
|
||||
verbose_name="Task Type",
|
||||
),
|
||||
),
|
||||
(
|
||||
"trigger_source",
|
||||
models.CharField(
|
||||
choices=[
|
||||
("scheduled", "Scheduled"),
|
||||
("web_ui", "Web UI"),
|
||||
("api_upload", "API Upload"),
|
||||
("folder_consume", "Folder Consume"),
|
||||
("email_consume", "Email Consume"),
|
||||
("system", "System"),
|
||||
("manual", "Manual"),
|
||||
],
|
||||
db_index=True,
|
||||
help_text="What initiated this task",
|
||||
max_length=50,
|
||||
verbose_name="Trigger Source",
|
||||
),
|
||||
),
|
||||
(
|
||||
"status",
|
||||
models.CharField(
|
||||
choices=[
|
||||
("pending", "Pending"),
|
||||
("started", "Started"),
|
||||
("success", "Success"),
|
||||
("failure", "Failure"),
|
||||
("revoked", "Revoked"),
|
||||
],
|
||||
db_index=True,
|
||||
default="pending",
|
||||
max_length=30,
|
||||
verbose_name="Status",
|
||||
),
|
||||
),
|
||||
(
|
||||
"date_created",
|
||||
models.DateTimeField(
|
||||
db_index=True,
|
||||
default=django.utils.timezone.now,
|
||||
verbose_name="Created",
|
||||
),
|
||||
),
|
||||
(
|
||||
"date_started",
|
||||
models.DateTimeField(
|
||||
blank=True,
|
||||
null=True,
|
||||
verbose_name="Started",
|
||||
),
|
||||
),
|
||||
(
|
||||
"date_done",
|
||||
models.DateTimeField(
|
||||
blank=True,
|
||||
db_index=True,
|
||||
null=True,
|
||||
verbose_name="Completed",
|
||||
),
|
||||
),
|
||||
(
|
||||
"duration_seconds",
|
||||
models.FloatField(
|
||||
blank=True,
|
||||
help_text="Elapsed time from start to completion",
|
||||
null=True,
|
||||
verbose_name="Duration (seconds)",
|
||||
),
|
||||
),
|
||||
(
|
||||
"wait_time_seconds",
|
||||
models.FloatField(
|
||||
blank=True,
|
||||
help_text="Time from task creation to worker pickup",
|
||||
null=True,
|
||||
verbose_name="Wait Time (seconds)",
|
||||
),
|
||||
),
|
||||
(
|
||||
"input_data",
|
||||
models.JSONField(
|
||||
blank=True,
|
||||
default=dict,
|
||||
help_text="Structured input parameters for the task",
|
||||
verbose_name="Input Data",
|
||||
),
|
||||
),
|
||||
(
|
||||
"result_data",
|
||||
models.JSONField(
|
||||
blank=True,
|
||||
help_text="Structured result data from task execution",
|
||||
null=True,
|
||||
verbose_name="Result Data",
|
||||
),
|
||||
),
|
||||
(
|
||||
"result_message",
|
||||
models.TextField(
|
||||
blank=True,
|
||||
help_text="Human-readable result message",
|
||||
null=True,
|
||||
verbose_name="Result Message",
|
||||
),
|
||||
),
|
||||
(
|
||||
"acknowledged",
|
||||
models.BooleanField(
|
||||
db_index=True,
|
||||
default=False,
|
||||
verbose_name="Acknowledged",
|
||||
),
|
||||
),
|
||||
],
|
||||
options={
|
||||
"verbose_name": "Task",
|
||||
"verbose_name_plural": "Tasks",
|
||||
"ordering": ["-date_created"],
|
||||
},
|
||||
),
|
||||
migrations.AddIndex(
|
||||
model_name="paperlesstask",
|
||||
index=models.Index(
|
||||
fields=["status", "date_created"],
|
||||
name="documents_p_status_8aa687_idx",
|
||||
),
|
||||
),
|
||||
migrations.AddIndex(
|
||||
model_name="paperlesstask",
|
||||
index=models.Index(
|
||||
fields=["task_type", "status"],
|
||||
name="documents_p_task_ty_e4a93f_idx",
|
||||
),
|
||||
),
|
||||
migrations.AddIndex(
|
||||
model_name="paperlesstask",
|
||||
index=models.Index(
|
||||
fields=["owner", "acknowledged", "date_created"],
|
||||
name="documents_p_owner_i_62c545_idx",
|
||||
),
|
||||
),
|
||||
]
|
||||
22
src/documents/migrations/0020_drop_celery_results.py
Normal file
22
src/documents/migrations/0020_drop_celery_results.py
Normal file
@@ -0,0 +1,22 @@
|
||||
from django.db import migrations
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
("documents", "0019_task_system_redesign"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.RunSQL(
|
||||
sql="DROP TABLE IF EXISTS django_celery_results_taskresult;",
|
||||
reverse_sql=migrations.RunSQL.noop,
|
||||
),
|
||||
migrations.RunSQL(
|
||||
sql="DROP TABLE IF EXISTS django_celery_results_groupresult;",
|
||||
reverse_sql=migrations.RunSQL.noop,
|
||||
),
|
||||
migrations.RunSQL(
|
||||
sql="DROP TABLE IF EXISTS django_celery_results_chordcounter;",
|
||||
reverse_sql=migrations.RunSQL.noop,
|
||||
),
|
||||
]
|
||||
@@ -3,7 +3,6 @@ from pathlib import Path
|
||||
from typing import Final
|
||||
|
||||
import pathvalidate
|
||||
from celery import states
|
||||
from django.conf import settings
|
||||
from django.contrib.auth.models import Group
|
||||
from django.contrib.auth.models import User
|
||||
@@ -663,97 +662,170 @@ class UiSettings(models.Model):
|
||||
|
||||
|
||||
class PaperlessTask(ModelWithOwner):
|
||||
ALL_STATES = sorted(states.ALL_STATES)
|
||||
TASK_STATE_CHOICES = sorted(zip(ALL_STATES, ALL_STATES))
|
||||
"""
|
||||
Tracks background task execution for user visibility and debugging.
|
||||
|
||||
State transitions:
|
||||
PENDING -> STARTED -> SUCCESS
|
||||
PENDING -> STARTED -> FAILURE
|
||||
PENDING -> REVOKED (if cancelled before starting)
|
||||
"""
|
||||
|
||||
class Status(models.TextChoices):
|
||||
PENDING = "pending", _("Pending")
|
||||
STARTED = "started", _("Started")
|
||||
SUCCESS = "success", _("Success")
|
||||
FAILURE = "failure", _("Failure")
|
||||
REVOKED = "revoked", _("Revoked")
|
||||
|
||||
class TaskType(models.TextChoices):
|
||||
AUTO = ("auto_task", _("Auto Task"))
|
||||
SCHEDULED_TASK = ("scheduled_task", _("Scheduled Task"))
|
||||
MANUAL_TASK = ("manual_task", _("Manual Task"))
|
||||
CONSUME_FILE = "consume_file", _("Consume File")
|
||||
TRAIN_CLASSIFIER = "train_classifier", _("Train Classifier")
|
||||
SANITY_CHECK = "sanity_check", _("Sanity Check")
|
||||
INDEX_OPTIMIZE = "index_optimize", _("Index Optimize")
|
||||
INDEX_REBUILD = "index_rebuild", _("Index Rebuild")
|
||||
MAIL_FETCH = "mail_fetch", _("Mail Fetch")
|
||||
LLM_INDEX = "llm_index", _("LLM Index")
|
||||
|
||||
class TaskName(models.TextChoices):
|
||||
CONSUME_FILE = ("consume_file", _("Consume File"))
|
||||
TRAIN_CLASSIFIER = ("train_classifier", _("Train Classifier"))
|
||||
CHECK_SANITY = ("check_sanity", _("Check Sanity"))
|
||||
INDEX_OPTIMIZE = ("index_optimize", _("Index Optimize"))
|
||||
LLMINDEX_UPDATE = ("llmindex_update", _("LLM Index Update"))
|
||||
class TriggerSource(models.TextChoices):
|
||||
SCHEDULED = "scheduled", _("Scheduled") # Celery beat
|
||||
WEB_UI = "web_ui", _("Web UI") # Document uploaded via web
|
||||
API_UPLOAD = "api_upload", _("API Upload") # Document uploaded via API
|
||||
FOLDER_CONSUME = "folder_consume", _("Folder Consume") # Consume folder
|
||||
EMAIL_CONSUME = "email_consume", _("Email Consume") # Email attachment
|
||||
SYSTEM = (
|
||||
"system",
|
||||
_("System"),
|
||||
) # Auto-triggered by system (self-heal, config side-effect)
|
||||
MANUAL = "manual", _("Manual") # User explicitly ran via /api/tasks/run/
|
||||
|
||||
# Identification
|
||||
task_id = models.CharField(
|
||||
max_length=255,
|
||||
unique=True,
|
||||
verbose_name=_("Task ID"),
|
||||
help_text=_("Celery ID for the Task that was run"),
|
||||
help_text=_("Celery task ID"),
|
||||
)
|
||||
|
||||
acknowledged = models.BooleanField(
|
||||
default=False,
|
||||
verbose_name=_("Acknowledged"),
|
||||
help_text=_("If the task is acknowledged via the frontend or API"),
|
||||
task_type = models.CharField(
|
||||
max_length=50,
|
||||
choices=TaskType.choices,
|
||||
verbose_name=_("Task Type"),
|
||||
help_text=_("The kind of work being performed"),
|
||||
db_index=True,
|
||||
)
|
||||
|
||||
task_file_name = models.CharField(
|
||||
null=True,
|
||||
max_length=255,
|
||||
verbose_name=_("Task Filename"),
|
||||
help_text=_("Name of the file which the Task was run for"),
|
||||
)
|
||||
|
||||
task_name = models.CharField(
|
||||
null=True,
|
||||
max_length=255,
|
||||
choices=TaskName.choices,
|
||||
verbose_name=_("Task Name"),
|
||||
help_text=_("Name of the task that was run"),
|
||||
trigger_source = models.CharField(
|
||||
max_length=50,
|
||||
choices=TriggerSource.choices,
|
||||
verbose_name=_("Trigger Source"),
|
||||
help_text=_("What initiated this task"),
|
||||
db_index=True,
|
||||
)
|
||||
|
||||
# State tracking
|
||||
status = models.CharField(
|
||||
max_length=30,
|
||||
default=states.PENDING,
|
||||
choices=TASK_STATE_CHOICES,
|
||||
verbose_name=_("Task State"),
|
||||
help_text=_("Current state of the task being run"),
|
||||
choices=Status.choices,
|
||||
default=Status.PENDING,
|
||||
verbose_name=_("Status"),
|
||||
db_index=True,
|
||||
)
|
||||
|
||||
# Timestamps
|
||||
date_created = models.DateTimeField(
|
||||
null=True,
|
||||
default=timezone.now,
|
||||
verbose_name=_("Created DateTime"),
|
||||
help_text=_("Datetime field when the task result was created in UTC"),
|
||||
verbose_name=_("Created"),
|
||||
db_index=True,
|
||||
)
|
||||
|
||||
date_started = models.DateTimeField(
|
||||
null=True,
|
||||
default=None,
|
||||
verbose_name=_("Started DateTime"),
|
||||
help_text=_("Datetime field when the task was started in UTC"),
|
||||
blank=True,
|
||||
verbose_name=_("Started"),
|
||||
)
|
||||
|
||||
date_done = models.DateTimeField(
|
||||
null=True,
|
||||
default=None,
|
||||
verbose_name=_("Completed DateTime"),
|
||||
help_text=_("Datetime field when the task was completed in UTC"),
|
||||
blank=True,
|
||||
verbose_name=_("Completed"),
|
||||
db_index=True,
|
||||
)
|
||||
|
||||
result = models.TextField(
|
||||
# Duration fields -- populated by task_postrun signal handler
|
||||
duration_seconds = models.FloatField(
|
||||
null=True,
|
||||
default=None,
|
||||
verbose_name=_("Result Data"),
|
||||
help_text=_(
|
||||
"The data returned by the task",
|
||||
),
|
||||
blank=True,
|
||||
verbose_name=_("Duration (seconds)"),
|
||||
help_text=_("Elapsed time from start to completion"),
|
||||
)
|
||||
|
||||
type = models.CharField(
|
||||
max_length=30,
|
||||
choices=TaskType.choices,
|
||||
default=TaskType.AUTO,
|
||||
verbose_name=_("Task Type"),
|
||||
help_text=_("The type of task that was run"),
|
||||
wait_time_seconds = models.FloatField(
|
||||
null=True,
|
||||
blank=True,
|
||||
verbose_name=_("Wait Time (seconds)"),
|
||||
help_text=_("Time from task creation to worker pickup"),
|
||||
)
|
||||
|
||||
# Input/Output data
|
||||
input_data = models.JSONField(
|
||||
default=dict,
|
||||
blank=True,
|
||||
verbose_name=_("Input Data"),
|
||||
help_text=_("Structured input parameters for the task"),
|
||||
)
|
||||
|
||||
result_data = models.JSONField(
|
||||
null=True,
|
||||
blank=True,
|
||||
verbose_name=_("Result Data"),
|
||||
help_text=_("Structured result data from task execution"),
|
||||
)
|
||||
|
||||
result_message = models.TextField(
|
||||
null=True,
|
||||
blank=True,
|
||||
verbose_name=_("Result Message"),
|
||||
help_text=_("Human-readable result message"),
|
||||
)
|
||||
|
||||
# Acknowledgment
|
||||
acknowledged = models.BooleanField(
|
||||
default=False,
|
||||
verbose_name=_("Acknowledged"),
|
||||
db_index=True,
|
||||
)
|
||||
|
||||
class Meta:
|
||||
verbose_name = _("Task")
|
||||
verbose_name_plural = _("Tasks")
|
||||
ordering = ["-date_created"]
|
||||
indexes = [
|
||||
models.Index(fields=["status", "date_created"]),
|
||||
models.Index(fields=["task_type", "status"]),
|
||||
models.Index(fields=["owner", "acknowledged", "date_created"]),
|
||||
]
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"Task {self.task_id}"
|
||||
return f"{self.get_task_type_display()} [{self.task_id[:8]}]"
|
||||
|
||||
@property
|
||||
def is_complete(self) -> bool:
|
||||
return self.status in (
|
||||
self.Status.SUCCESS,
|
||||
self.Status.FAILURE,
|
||||
self.Status.REVOKED,
|
||||
)
|
||||
|
||||
@property
|
||||
def related_document_ids(self) -> list[int]:
|
||||
if not self.result_data:
|
||||
return []
|
||||
if doc_id := self.result_data.get("document_id"):
|
||||
return [doc_id]
|
||||
if dup_id := self.result_data.get("duplicate_of"):
|
||||
return [dup_id]
|
||||
return []
|
||||
|
||||
|
||||
class Note(SoftDeleteModel):
|
||||
|
||||
@@ -10,7 +10,6 @@ is an identity function that adds no overhead.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import uuid
|
||||
from collections import defaultdict
|
||||
from collections.abc import Iterator
|
||||
from pathlib import Path
|
||||
@@ -18,12 +17,9 @@ from typing import TYPE_CHECKING
|
||||
from typing import Final
|
||||
from typing import TypedDict
|
||||
|
||||
from celery import states
|
||||
from django.conf import settings
|
||||
from django.utils import timezone
|
||||
|
||||
from documents.models import Document
|
||||
from documents.models import PaperlessTask
|
||||
from documents.utils import IterWrapper
|
||||
from documents.utils import compute_checksum
|
||||
from documents.utils import identity
|
||||
@@ -287,33 +283,17 @@ def _check_document(
|
||||
|
||||
def check_sanity(
|
||||
*,
|
||||
scheduled: bool = True,
|
||||
iter_wrapper: IterWrapper[Document] = identity,
|
||||
) -> SanityCheckMessages:
|
||||
"""Run a full sanity check on the document archive.
|
||||
|
||||
Args:
|
||||
scheduled: Whether this is a scheduled (automatic) or manual check.
|
||||
Controls the task type recorded in the database.
|
||||
iter_wrapper: A callable that wraps the document iterable, e.g.,
|
||||
for progress bar display. Defaults to identity (no wrapping).
|
||||
|
||||
Returns:
|
||||
A SanityCheckMessages instance containing all detected issues.
|
||||
"""
|
||||
paperless_task = PaperlessTask.objects.create(
|
||||
task_id=uuid.uuid4(),
|
||||
type=(
|
||||
PaperlessTask.TaskType.SCHEDULED_TASK
|
||||
if scheduled
|
||||
else PaperlessTask.TaskType.MANUAL_TASK
|
||||
),
|
||||
task_name=PaperlessTask.TaskName.CHECK_SANITY,
|
||||
status=states.STARTED,
|
||||
date_created=timezone.now(),
|
||||
date_started=timezone.now(),
|
||||
)
|
||||
|
||||
messages = SanityCheckMessages()
|
||||
present_files = _build_present_files()
|
||||
|
||||
@@ -332,22 +312,4 @@ def check_sanity(
|
||||
for extra_file in present_files:
|
||||
messages.warning(None, f"Orphaned file in media dir: {extra_file}")
|
||||
|
||||
paperless_task.status = states.SUCCESS if not messages.has_error else states.FAILURE
|
||||
if messages.total_issue_count == 0:
|
||||
paperless_task.result = "No issues found."
|
||||
else:
|
||||
parts: list[str] = []
|
||||
if messages.document_error_count:
|
||||
parts.append(f"{messages.document_error_count} document(s) with errors")
|
||||
if messages.document_warning_count:
|
||||
parts.append(f"{messages.document_warning_count} document(s) with warnings")
|
||||
if messages.global_warning_count:
|
||||
parts.append(f"{messages.global_warning_count} global warning(s)")
|
||||
paperless_task.result = ", ".join(parts) + " found."
|
||||
if messages.has_error:
|
||||
paperless_task.result += " Check logs for details."
|
||||
|
||||
paperless_task.date_done = timezone.now()
|
||||
paperless_task.save(update_fields=["status", "result", "date_done"])
|
||||
|
||||
return messages
|
||||
|
||||
@@ -12,7 +12,6 @@ from typing import Literal
|
||||
from typing import TypedDict
|
||||
|
||||
import magic
|
||||
from celery import states
|
||||
from django.conf import settings
|
||||
from django.contrib.auth.models import Group
|
||||
from django.contrib.auth.models import User
|
||||
@@ -2429,7 +2428,81 @@ class UiSettingsViewSerializer(serializers.ModelSerializer[UiSettings]):
|
||||
return ui_settings
|
||||
|
||||
|
||||
class TasksViewSerializer(OwnedObjectSerializer):
|
||||
class TaskSerializerV10(OwnedObjectSerializer):
|
||||
"""Task serializer for API v10+ using new field names."""
|
||||
|
||||
related_document_ids = serializers.ListField(
|
||||
child=serializers.IntegerField(),
|
||||
read_only=True,
|
||||
)
|
||||
task_type_display = serializers.CharField(
|
||||
source="get_task_type_display",
|
||||
read_only=True,
|
||||
)
|
||||
trigger_source_display = serializers.CharField(
|
||||
source="get_trigger_source_display",
|
||||
read_only=True,
|
||||
)
|
||||
status_display = serializers.CharField(
|
||||
source="get_status_display",
|
||||
read_only=True,
|
||||
)
|
||||
|
||||
class Meta:
|
||||
model = PaperlessTask
|
||||
fields = (
|
||||
"id",
|
||||
"task_id",
|
||||
"task_type",
|
||||
"task_type_display",
|
||||
"trigger_source",
|
||||
"trigger_source_display",
|
||||
"status",
|
||||
"status_display",
|
||||
"date_created",
|
||||
"date_started",
|
||||
"date_done",
|
||||
"duration_seconds",
|
||||
"wait_time_seconds",
|
||||
"input_data",
|
||||
"result_data",
|
||||
"result_message",
|
||||
"related_document_ids",
|
||||
"acknowledged",
|
||||
"owner",
|
||||
)
|
||||
read_only_fields = fields
|
||||
|
||||
|
||||
class TaskSerializerV9(serializers.ModelSerializer):
|
||||
"""Task serializer for API v9 backwards compatibility.
|
||||
|
||||
Maps old field names to the new model fields so existing clients continue
|
||||
to work unchanged.
|
||||
"""
|
||||
|
||||
# v9 field: task_name -> task_type
|
||||
task_name = serializers.CharField(source="task_type", read_only=True)
|
||||
|
||||
# v9 field: task_file_name -> input_data.filename
|
||||
task_file_name = serializers.SerializerMethodField()
|
||||
|
||||
# v9 field: type -> trigger_source (mapped to old enum labels)
|
||||
type = serializers.SerializerMethodField()
|
||||
|
||||
# v9 field: result -> result_message (with legacy format fallback)
|
||||
result = serializers.CharField(
|
||||
source="result_message",
|
||||
read_only=True,
|
||||
allow_null=True,
|
||||
)
|
||||
|
||||
# v9 field: related_document -> first document ID from result_data
|
||||
related_document = serializers.SerializerMethodField()
|
||||
|
||||
# v9 field: duplicate_documents -> list of duplicate IDs from result_data
|
||||
duplicate_documents = serializers.SerializerMethodField()
|
||||
|
||||
class Meta:
|
||||
model = PaperlessTask
|
||||
fields = (
|
||||
@@ -2437,59 +2510,59 @@ class TasksViewSerializer(OwnedObjectSerializer):
|
||||
"task_id",
|
||||
"task_name",
|
||||
"task_file_name",
|
||||
"date_created",
|
||||
"date_done",
|
||||
"type",
|
||||
"status",
|
||||
"date_created",
|
||||
"date_done",
|
||||
"result",
|
||||
"acknowledged",
|
||||
"related_document",
|
||||
"duplicate_documents",
|
||||
"owner",
|
||||
)
|
||||
read_only_fields = fields
|
||||
|
||||
related_document = serializers.SerializerMethodField()
|
||||
duplicate_documents = serializers.SerializerMethodField()
|
||||
created_doc_re = re.compile(r"New document id (\d+) created")
|
||||
duplicate_doc_re = re.compile(r"It is a duplicate of .* \(#(\d+)\)")
|
||||
def get_task_file_name(self, obj: PaperlessTask) -> str | None:
|
||||
if not obj.input_data:
|
||||
return None
|
||||
return obj.input_data.get("filename")
|
||||
|
||||
def get_related_document(self, obj) -> str | None:
|
||||
result = None
|
||||
re = None
|
||||
if obj.result:
|
||||
match obj.status:
|
||||
case states.SUCCESS:
|
||||
re = self.created_doc_re
|
||||
case states.FAILURE:
|
||||
re = (
|
||||
self.duplicate_doc_re
|
||||
if "existing document is in the trash" not in obj.result
|
||||
else None
|
||||
)
|
||||
if re is not None:
|
||||
try:
|
||||
result = re.search(obj.result).group(1)
|
||||
except Exception:
|
||||
pass
|
||||
def get_type(self, obj: PaperlessTask) -> str:
|
||||
# Old type values: AUTO_TASK, SCHEDULED_TASK, MANUAL_TASK
|
||||
source_to_old_type = {
|
||||
PaperlessTask.TriggerSource.SCHEDULED: "SCHEDULED_TASK",
|
||||
PaperlessTask.TriggerSource.SYSTEM: "AUTO_TASK",
|
||||
}
|
||||
return source_to_old_type.get(obj.trigger_source, "MANUAL_TASK")
|
||||
|
||||
return result
|
||||
def get_related_document(self, obj: PaperlessTask) -> int | None:
|
||||
ids = obj.related_document_ids
|
||||
return ids[0] if ids else None
|
||||
|
||||
@extend_schema_field(DuplicateDocumentSummarySerializer(many=True))
|
||||
def get_duplicate_documents(self, obj):
|
||||
related_document = self.get_related_document(obj)
|
||||
request = self.context.get("request")
|
||||
user = request.user if request else None
|
||||
document = Document.global_objects.filter(pk=related_document).first()
|
||||
if not related_document or not user or not document:
|
||||
def get_duplicate_documents(self, obj: PaperlessTask) -> list[int]:
|
||||
if not obj.result_data:
|
||||
return []
|
||||
duplicates = _get_viewable_duplicates(document, user)
|
||||
return list(duplicates.values("id", "title", "deleted_at"))
|
||||
dup_of = obj.result_data.get("duplicate_of")
|
||||
return [dup_of] if dup_of is not None else []
|
||||
|
||||
|
||||
class RunTaskViewSerializer(serializers.Serializer[dict[str, Any]]):
|
||||
task_name = serializers.ChoiceField(
|
||||
choices=PaperlessTask.TaskName.choices,
|
||||
label="Task Name",
|
||||
class TaskSummarySerializer(serializers.Serializer):
|
||||
task_type = serializers.CharField()
|
||||
total_count = serializers.IntegerField()
|
||||
pending_count = serializers.IntegerField()
|
||||
success_count = serializers.IntegerField()
|
||||
failure_count = serializers.IntegerField()
|
||||
avg_duration_seconds = serializers.FloatField(allow_null=True)
|
||||
avg_wait_time_seconds = serializers.FloatField(allow_null=True)
|
||||
last_run = serializers.DateTimeField(allow_null=True)
|
||||
last_success = serializers.DateTimeField(allow_null=True)
|
||||
last_failure = serializers.DateTimeField(allow_null=True)
|
||||
|
||||
|
||||
class RunTaskSerializer(serializers.Serializer):
|
||||
task_type = serializers.ChoiceField(
|
||||
choices=PaperlessTask.TaskType.choices,
|
||||
label="Task Type",
|
||||
write_only=True,
|
||||
)
|
||||
|
||||
|
||||
@@ -8,7 +8,6 @@ from typing import TYPE_CHECKING
|
||||
from typing import Any
|
||||
|
||||
from celery import shared_task
|
||||
from celery import states
|
||||
from celery.signals import before_task_publish
|
||||
from celery.signals import task_failure
|
||||
from celery.signals import task_postrun
|
||||
@@ -31,6 +30,7 @@ from documents import matching
|
||||
from documents.caching import clear_document_caches
|
||||
from documents.caching import invalidate_llm_suggestions_cache
|
||||
from documents.data_models import ConsumableDocument
|
||||
from documents.data_models import DocumentSource
|
||||
from documents.file_handling import create_source_path_directory
|
||||
from documents.file_handling import delete_empty_directories
|
||||
from documents.file_handling import generate_filename
|
||||
@@ -999,68 +999,175 @@ def run_workflows(
|
||||
return overrides, "\n".join(messages)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Task tracking -- Celery signal handlers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
TRACKED_TASKS: dict[str, PaperlessTask.TaskType] = {
|
||||
"documents.tasks.consume_file": PaperlessTask.TaskType.CONSUME_FILE,
|
||||
"documents.tasks.train_classifier": PaperlessTask.TaskType.TRAIN_CLASSIFIER,
|
||||
"documents.tasks.sanity_check": PaperlessTask.TaskType.SANITY_CHECK,
|
||||
"documents.tasks.index_optimize": PaperlessTask.TaskType.INDEX_OPTIMIZE,
|
||||
"documents.tasks.llmindex_index": PaperlessTask.TaskType.LLM_INDEX,
|
||||
"paperless_mail.tasks.process_mail_accounts": PaperlessTask.TaskType.MAIL_FETCH,
|
||||
}
|
||||
|
||||
_DOCUMENT_SOURCE_TO_TRIGGER: dict[Any, PaperlessTask.TriggerSource] = {
|
||||
DocumentSource.ConsumeFolder: PaperlessTask.TriggerSource.FOLDER_CONSUME,
|
||||
DocumentSource.ApiUpload: PaperlessTask.TriggerSource.API_UPLOAD,
|
||||
DocumentSource.MailFetch: PaperlessTask.TriggerSource.EMAIL_CONSUME,
|
||||
DocumentSource.WebUI: PaperlessTask.TriggerSource.WEB_UI,
|
||||
}
|
||||
|
||||
|
||||
def _extract_input_data(
|
||||
task_type: PaperlessTask.TaskType,
|
||||
args: tuple,
|
||||
task_kwargs: dict,
|
||||
) -> dict:
|
||||
if task_type == PaperlessTask.TaskType.CONSUME_FILE:
|
||||
input_doc = args[0] if args else task_kwargs.get("input_doc")
|
||||
overrides = args[1] if len(args) >= 2 else task_kwargs.get("overrides")
|
||||
if input_doc is None:
|
||||
return {}
|
||||
data: dict = {
|
||||
"filename": input_doc.original_file.name,
|
||||
"mime_type": input_doc.mime_type,
|
||||
}
|
||||
if input_doc.original_path:
|
||||
data["source_path"] = str(input_doc.original_path)
|
||||
if input_doc.mailrule_id:
|
||||
data["mailrule_id"] = input_doc.mailrule_id
|
||||
if overrides:
|
||||
override_dict = {
|
||||
k: v
|
||||
for k, v in vars(overrides).items()
|
||||
if v is not None and not k.startswith("_")
|
||||
}
|
||||
if override_dict:
|
||||
data["overrides"] = override_dict
|
||||
return data
|
||||
|
||||
if task_type == PaperlessTask.TaskType.MAIL_FETCH:
|
||||
account_ids = args[0] if args else task_kwargs.get("account_ids")
|
||||
return {"account_ids": account_ids}
|
||||
|
||||
return {}
|
||||
|
||||
|
||||
def _determine_trigger_source(
|
||||
task_type: PaperlessTask.TaskType,
|
||||
args: tuple,
|
||||
task_kwargs: dict,
|
||||
headers: dict,
|
||||
) -> PaperlessTask.TriggerSource:
|
||||
# Explicit header takes priority -- covers beat ("scheduled") and system auto-runs ("system")
|
||||
header_source = headers.get("trigger_source")
|
||||
if header_source == "scheduled":
|
||||
return PaperlessTask.TriggerSource.SCHEDULED
|
||||
if header_source == "system":
|
||||
return PaperlessTask.TriggerSource.SYSTEM
|
||||
|
||||
if task_type == PaperlessTask.TaskType.CONSUME_FILE:
|
||||
input_doc = args[0] if args else task_kwargs.get("input_doc")
|
||||
if input_doc is not None:
|
||||
return _DOCUMENT_SOURCE_TO_TRIGGER.get(
|
||||
input_doc.source,
|
||||
PaperlessTask.TriggerSource.API_UPLOAD,
|
||||
)
|
||||
|
||||
return PaperlessTask.TriggerSource.MANUAL
|
||||
|
||||
|
||||
def _extract_owner_id(
|
||||
task_type: PaperlessTask.TaskType,
|
||||
args: tuple,
|
||||
task_kwargs: dict,
|
||||
) -> int | None:
|
||||
if task_type != PaperlessTask.TaskType.CONSUME_FILE:
|
||||
return None
|
||||
overrides = args[1] if len(args) >= 2 else task_kwargs.get("overrides")
|
||||
if overrides and hasattr(overrides, "owner_id"):
|
||||
return overrides.owner_id
|
||||
return None
|
||||
|
||||
|
||||
def _parse_legacy_result(result: str) -> dict | None:
|
||||
import re as _re
|
||||
|
||||
if match := _re.search(r"New document id (\d+) created", result):
|
||||
return {"document_id": int(match.group(1))}
|
||||
if match := _re.search(r"It is a duplicate of .* \(#(\d+)\)", result):
|
||||
return {
|
||||
"duplicate_of": int(match.group(1)),
|
||||
"duplicate_in_trash": "existing document is in the trash" in result,
|
||||
}
|
||||
return None
|
||||
|
||||
|
||||
@before_task_publish.connect
|
||||
def before_task_publish_handler(sender=None, headers=None, body=None, **kwargs) -> None:
|
||||
def before_task_publish_handler(
|
||||
sender=None,
|
||||
headers=None,
|
||||
body=None,
|
||||
**kwargs,
|
||||
) -> None:
|
||||
"""
|
||||
Creates the PaperlessTask object in a pending state. This is sent before
|
||||
the task reaches the broker, but before it begins executing on a worker.
|
||||
Creates the PaperlessTask record when the task is published to broker.
|
||||
|
||||
https://docs.celeryq.dev/en/stable/userguide/signals.html#before-task-publish
|
||||
|
||||
https://docs.celeryq.dev/en/stable/internals/protocol.html#version-2
|
||||
|
||||
"""
|
||||
if "task" not in headers or headers["task"] != "documents.tasks.consume_file":
|
||||
# Assumption: this is only ever a v2 message
|
||||
if headers is None or body is None:
|
||||
return
|
||||
|
||||
task_name = headers.get("task", "")
|
||||
task_type = TRACKED_TASKS.get(task_name)
|
||||
if task_type is None:
|
||||
return
|
||||
|
||||
try:
|
||||
close_old_connections()
|
||||
args, task_kwargs, _ = body
|
||||
task_id = headers["id"]
|
||||
|
||||
task_args = body[0]
|
||||
input_doc, overrides = task_args
|
||||
|
||||
task_file_name = input_doc.original_file.name
|
||||
user_id = overrides.owner_id if overrides else None
|
||||
input_data = _extract_input_data(task_type, args, task_kwargs)
|
||||
trigger_source = _determine_trigger_source(
|
||||
task_type,
|
||||
args,
|
||||
task_kwargs,
|
||||
headers,
|
||||
)
|
||||
owner_id = _extract_owner_id(task_type, args, task_kwargs)
|
||||
|
||||
PaperlessTask.objects.create(
|
||||
type=PaperlessTask.TaskType.AUTO,
|
||||
task_id=headers["id"],
|
||||
status=states.PENDING,
|
||||
task_file_name=task_file_name,
|
||||
task_name=PaperlessTask.TaskName.CONSUME_FILE,
|
||||
result=None,
|
||||
date_created=timezone.now(),
|
||||
date_started=None,
|
||||
date_done=None,
|
||||
owner_id=user_id,
|
||||
task_id=task_id,
|
||||
task_type=task_type,
|
||||
trigger_source=trigger_source,
|
||||
status=PaperlessTask.Status.PENDING,
|
||||
input_data=input_data,
|
||||
owner_id=owner_id,
|
||||
)
|
||||
except Exception: # pragma: no cover
|
||||
# Don't let an exception in the signal handlers prevent
|
||||
# a document from being consumed.
|
||||
except Exception:
|
||||
logger.exception("Creating PaperlessTask failed")
|
||||
|
||||
|
||||
@task_prerun.connect
|
||||
def task_prerun_handler(sender=None, task_id=None, task=None, **kwargs) -> None:
|
||||
"""
|
||||
|
||||
Updates the PaperlessTask to be started. Sent before the task begins execution
|
||||
on a worker.
|
||||
Marks the task STARTED when execution begins on a worker.
|
||||
|
||||
https://docs.celeryq.dev/en/stable/userguide/signals.html#task-prerun
|
||||
"""
|
||||
if task_id is None:
|
||||
return
|
||||
try:
|
||||
close_old_connections()
|
||||
task_instance = PaperlessTask.objects.filter(task_id=task_id).first()
|
||||
|
||||
if task_instance is not None:
|
||||
task_instance.status = states.STARTED
|
||||
task_instance.date_started = timezone.now()
|
||||
task_instance.save()
|
||||
except Exception: # pragma: no cover
|
||||
# Don't let an exception in the signal handlers prevent
|
||||
# a document from being consumed.
|
||||
PaperlessTask.objects.filter(task_id=task_id).update(
|
||||
status=PaperlessTask.Status.STARTED,
|
||||
date_started=timezone.now(),
|
||||
)
|
||||
except Exception:
|
||||
logger.exception("Setting PaperlessTask started failed")
|
||||
|
||||
|
||||
@@ -1074,22 +1181,53 @@ def task_postrun_handler(
|
||||
**kwargs,
|
||||
) -> None:
|
||||
"""
|
||||
Updates the result of the PaperlessTask.
|
||||
Records task completion and result data.
|
||||
|
||||
https://docs.celeryq.dev/en/stable/userguide/signals.html#task-postrun
|
||||
"""
|
||||
if task_id is None:
|
||||
return
|
||||
try:
|
||||
close_old_connections()
|
||||
task_instance = PaperlessTask.objects.filter(task_id=task_id).first()
|
||||
|
||||
if task_instance is not None:
|
||||
task_instance.status = state or states.FAILURE
|
||||
task_instance.result = retval
|
||||
task_instance.date_done = timezone.now()
|
||||
task_instance.save()
|
||||
except Exception: # pragma: no cover
|
||||
# Don't let an exception in the signal handlers prevent
|
||||
# a document from being consumed.
|
||||
status_map = {
|
||||
"SUCCESS": PaperlessTask.Status.SUCCESS,
|
||||
"FAILURE": PaperlessTask.Status.FAILURE,
|
||||
"REVOKED": PaperlessTask.Status.REVOKED,
|
||||
}
|
||||
new_status = status_map.get(state, PaperlessTask.Status.FAILURE)
|
||||
|
||||
result_data: dict | None = None
|
||||
result_message: str | None = None
|
||||
if isinstance(retval, dict):
|
||||
result_data = retval
|
||||
elif isinstance(retval, str):
|
||||
result_message = retval
|
||||
result_data = _parse_legacy_result(retval)
|
||||
|
||||
now = timezone.now()
|
||||
task_instance = PaperlessTask.objects.filter(task_id=task_id).first()
|
||||
if task_instance is None:
|
||||
return
|
||||
|
||||
duration_seconds: float | None = None
|
||||
wait_time_seconds: float | None = None
|
||||
if task_instance.date_started:
|
||||
duration_seconds = (now - task_instance.date_started).total_seconds()
|
||||
if task_instance.date_started and task_instance.date_created:
|
||||
wait_time_seconds = (
|
||||
task_instance.date_started - task_instance.date_created
|
||||
).total_seconds()
|
||||
|
||||
PaperlessTask.objects.filter(task_id=task_id).update(
|
||||
status=new_status,
|
||||
result_data=result_data,
|
||||
result_message=result_message,
|
||||
date_done=now,
|
||||
duration_seconds=duration_seconds,
|
||||
wait_time_seconds=wait_time_seconds,
|
||||
)
|
||||
except Exception:
|
||||
logger.exception("Updating PaperlessTask failed")
|
||||
|
||||
|
||||
@@ -1103,21 +1241,33 @@ def task_failure_handler(
|
||||
**kwargs,
|
||||
) -> None:
|
||||
"""
|
||||
Updates the result of a failed PaperlessTask.
|
||||
Records failure details when a task raises an exception.
|
||||
|
||||
https://docs.celeryq.dev/en/stable/userguide/signals.html#task-failure
|
||||
"""
|
||||
if task_id is None:
|
||||
return
|
||||
try:
|
||||
close_old_connections()
|
||||
task_instance = PaperlessTask.objects.filter(task_id=task_id).first()
|
||||
|
||||
if task_instance is not None and task_instance.result is None:
|
||||
task_instance.status = states.FAILURE
|
||||
task_instance.result = traceback
|
||||
task_instance.date_done = timezone.now()
|
||||
task_instance.save()
|
||||
except Exception: # pragma: no cover
|
||||
logger.exception("Updating PaperlessTask failed")
|
||||
result_data: dict = {
|
||||
"error_type": type(exception).__name__ if exception else "Unknown",
|
||||
"error_message": str(exception) if exception else "Unknown error",
|
||||
}
|
||||
if traceback:
|
||||
import traceback as _tb
|
||||
|
||||
tb_str = "".join(_tb.format_tb(traceback))
|
||||
result_data["traceback"] = tb_str[:5000]
|
||||
|
||||
PaperlessTask.objects.filter(task_id=task_id).update(
|
||||
status=PaperlessTask.Status.FAILURE,
|
||||
result_data=result_data,
|
||||
result_message=str(exception) if exception else None,
|
||||
date_done=timezone.now(),
|
||||
)
|
||||
except Exception:
|
||||
logger.exception("Updating PaperlessTask on failure failed")
|
||||
|
||||
|
||||
@worker_process_init.connect
|
||||
|
||||
@@ -10,7 +10,6 @@ from tempfile import mkstemp
|
||||
|
||||
from celery import Task
|
||||
from celery import shared_task
|
||||
from celery import states
|
||||
from django.conf import settings
|
||||
from django.contrib.contenttypes.models import ContentType
|
||||
from django.db import models
|
||||
@@ -41,7 +40,6 @@ from documents.models import Correspondent
|
||||
from documents.models import CustomFieldInstance
|
||||
from documents.models import Document
|
||||
from documents.models import DocumentType
|
||||
from documents.models import PaperlessTask
|
||||
from documents.models import ShareLink
|
||||
from documents.models import ShareLinkBundle
|
||||
from documents.models import StoragePath
|
||||
@@ -84,19 +82,8 @@ def index_optimize() -> None:
|
||||
@shared_task
|
||||
def train_classifier(
|
||||
*,
|
||||
scheduled=True,
|
||||
status_callback: Callable[[str], None] | None = None,
|
||||
) -> None:
|
||||
task = PaperlessTask.objects.create(
|
||||
type=PaperlessTask.TaskType.SCHEDULED_TASK
|
||||
if scheduled
|
||||
else PaperlessTask.TaskType.MANUAL_TASK,
|
||||
task_id=uuid.uuid4(),
|
||||
task_name=PaperlessTask.TaskName.TRAIN_CLASSIFIER,
|
||||
status=states.STARTED,
|
||||
date_created=timezone.now(),
|
||||
date_started=timezone.now(),
|
||||
)
|
||||
) -> str:
|
||||
if (
|
||||
not Tag.objects.filter(matching_algorithm=Tag.MATCH_AUTO).exists()
|
||||
and not DocumentType.objects.filter(matching_algorithm=Tag.MATCH_AUTO).exists()
|
||||
@@ -110,37 +97,22 @@ def train_classifier(
|
||||
if settings.MODEL_FILE.exists():
|
||||
logger.info(f"Removing {settings.MODEL_FILE} so it won't be used")
|
||||
settings.MODEL_FILE.unlink()
|
||||
task.status = states.SUCCESS
|
||||
task.result = result
|
||||
task.date_done = timezone.now()
|
||||
task.save()
|
||||
return
|
||||
return result
|
||||
|
||||
classifier = load_classifier()
|
||||
|
||||
if not classifier:
|
||||
classifier = DocumentClassifier()
|
||||
|
||||
try:
|
||||
if classifier.train(status_callback=status_callback):
|
||||
logger.info(
|
||||
f"Saving updated classifier model to {settings.MODEL_FILE}...",
|
||||
)
|
||||
classifier.save()
|
||||
task.result = "Training completed successfully"
|
||||
else:
|
||||
logger.debug("Training data unchanged.")
|
||||
task.result = "Training data unchanged"
|
||||
|
||||
task.status = states.SUCCESS
|
||||
|
||||
except Exception as e:
|
||||
logger.warning("Classifier error: " + str(e))
|
||||
task.status = states.FAILURE
|
||||
task.result = str(e)
|
||||
|
||||
task.date_done = timezone.now()
|
||||
task.save(update_fields=["status", "result", "date_done"])
|
||||
if classifier.train(status_callback=status_callback):
|
||||
logger.info(
|
||||
f"Saving updated classifier model to {settings.MODEL_FILE}...",
|
||||
)
|
||||
classifier.save()
|
||||
return "Training completed successfully"
|
||||
else:
|
||||
logger.debug("Training data unchanged.")
|
||||
return "Training data unchanged"
|
||||
|
||||
|
||||
@shared_task(bind=True)
|
||||
@@ -231,8 +203,8 @@ def consume_file(
|
||||
|
||||
|
||||
@shared_task
|
||||
def sanity_check(*, scheduled=True, raise_on_error=True):
|
||||
messages = sanity_checker.check_sanity(scheduled=scheduled)
|
||||
def sanity_check(*, raise_on_error: bool = True) -> str:
|
||||
messages = sanity_checker.check_sanity()
|
||||
messages.log_messages()
|
||||
|
||||
if not messages.has_error and not messages.has_warning and not messages.has_info:
|
||||
@@ -635,42 +607,19 @@ def update_document_parent_tags(tag: Tag, new_parent: Tag) -> None:
|
||||
def llmindex_index(
|
||||
*,
|
||||
iter_wrapper: IterWrapper[Document] = identity,
|
||||
rebuild=False,
|
||||
scheduled=True,
|
||||
auto=False,
|
||||
) -> None:
|
||||
rebuild: bool = False,
|
||||
) -> str | None:
|
||||
ai_config = AIConfig()
|
||||
if ai_config.llm_index_enabled:
|
||||
task = PaperlessTask.objects.create(
|
||||
type=PaperlessTask.TaskType.SCHEDULED_TASK
|
||||
if scheduled
|
||||
else PaperlessTask.TaskType.AUTO
|
||||
if auto
|
||||
else PaperlessTask.TaskType.MANUAL_TASK,
|
||||
task_id=uuid.uuid4(),
|
||||
task_name=PaperlessTask.TaskName.LLMINDEX_UPDATE,
|
||||
status=states.STARTED,
|
||||
date_created=timezone.now(),
|
||||
date_started=timezone.now(),
|
||||
)
|
||||
from paperless_ai.indexing import update_llm_index
|
||||
|
||||
try:
|
||||
result = update_llm_index(
|
||||
iter_wrapper=iter_wrapper,
|
||||
rebuild=rebuild,
|
||||
)
|
||||
task.status = states.SUCCESS
|
||||
task.result = result
|
||||
except Exception as e:
|
||||
logger.error("LLM index error: " + str(e))
|
||||
task.status = states.FAILURE
|
||||
task.result = str(e)
|
||||
|
||||
task.date_done = timezone.now()
|
||||
task.save(update_fields=["status", "result", "date_done"])
|
||||
else:
|
||||
if not ai_config.llm_index_enabled:
|
||||
logger.info("LLM index is disabled, skipping update.")
|
||||
return None
|
||||
|
||||
from paperless_ai.indexing import update_llm_index
|
||||
|
||||
return update_llm_index(
|
||||
iter_wrapper=iter_wrapper,
|
||||
rebuild=rebuild,
|
||||
)
|
||||
|
||||
|
||||
@shared_task
|
||||
|
||||
@@ -13,6 +13,8 @@ from rest_framework.test import APIClient
|
||||
|
||||
from documents.tests.factories import DocumentFactory
|
||||
|
||||
UserModelT = get_user_model()
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from documents.models import Document
|
||||
|
||||
@@ -126,15 +128,34 @@ def rest_api_client():
|
||||
yield APIClient()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def authenticated_rest_api_client(rest_api_client: APIClient):
|
||||
"""
|
||||
The basic DRF ApiClient which has been authenticated
|
||||
"""
|
||||
UserModel = get_user_model()
|
||||
user = UserModel.objects.create_user(username="testuser", password="password")
|
||||
rest_api_client.force_authenticate(user=user)
|
||||
yield rest_api_client
|
||||
@pytest.fixture()
|
||||
def regular_user(django_user_model: type[UserModelT]) -> UserModelT:
|
||||
"""Unprivileged authenticated user for permission boundary tests."""
|
||||
return django_user_model.objects.create_user(username="regular", password="regular")
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def admin_client(rest_api_client: APIClient, admin_user: UserModelT) -> APIClient:
|
||||
"""Admin client pre-authenticated and sending the v10 Accept header."""
|
||||
rest_api_client.force_authenticate(user=admin_user)
|
||||
rest_api_client.credentials(HTTP_ACCEPT="application/json; version=10")
|
||||
return rest_api_client
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def v9_client(rest_api_client: APIClient, admin_user: UserModelT) -> APIClient:
|
||||
"""Admin client pre-authenticated and sending the v9 Accept header."""
|
||||
rest_api_client.force_authenticate(user=admin_user)
|
||||
rest_api_client.credentials(HTTP_ACCEPT="application/json; version=9")
|
||||
return rest_api_client
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def user_client(rest_api_client: APIClient, regular_user: UserModelT) -> APIClient:
|
||||
"""Regular-user client pre-authenticated and sending the v10 Accept header."""
|
||||
rest_api_client.force_authenticate(user=regular_user)
|
||||
rest_api_client.credentials(HTTP_ACCEPT="application/json; version=10")
|
||||
return rest_api_client
|
||||
|
||||
|
||||
@pytest.fixture(scope="session", autouse=True)
|
||||
|
||||
@@ -11,6 +11,7 @@ from documents.models import Correspondent
|
||||
from documents.models import Document
|
||||
from documents.models import DocumentType
|
||||
from documents.models import MatchingModel
|
||||
from documents.models import PaperlessTask
|
||||
from documents.models import StoragePath
|
||||
from documents.models import Tag
|
||||
|
||||
@@ -65,3 +66,17 @@ class DocumentFactory(DjangoModelFactory):
|
||||
correspondent = None
|
||||
document_type = None
|
||||
storage_path = None
|
||||
|
||||
|
||||
class PaperlessTaskFactory(DjangoModelFactory):
|
||||
class Meta:
|
||||
model = PaperlessTask
|
||||
|
||||
task_id = factory.LazyFunction(lambda: str(__import__("uuid").uuid4()))
|
||||
task_type = PaperlessTask.TaskType.CONSUME_FILE
|
||||
trigger_source = PaperlessTask.TriggerSource.WEB_UI
|
||||
status = PaperlessTask.Status.PENDING
|
||||
input_data = factory.LazyFunction(dict)
|
||||
result_data = None
|
||||
result_message = None
|
||||
acknowledged = False
|
||||
|
||||
@@ -831,7 +831,7 @@ class TestApiAppConfig(DirectoriesMixin, APITestCase):
|
||||
config.save()
|
||||
|
||||
with (
|
||||
patch("documents.tasks.llmindex_index.delay") as mock_update,
|
||||
patch("documents.tasks.llmindex_index.apply_async") as mock_update,
|
||||
patch("paperless_ai.indexing.vector_store_file_exists") as mock_exists,
|
||||
):
|
||||
mock_exists.return_value = False
|
||||
|
||||
@@ -4,7 +4,6 @@ import tempfile
|
||||
from pathlib import Path
|
||||
from unittest import mock
|
||||
|
||||
from celery import states
|
||||
from django.contrib.auth.models import Permission
|
||||
from django.contrib.auth.models import User
|
||||
from django.test import override_settings
|
||||
@@ -13,6 +12,7 @@ from rest_framework.test import APITestCase
|
||||
|
||||
from documents.models import PaperlessTask
|
||||
from documents.permissions import has_system_status_permission
|
||||
from documents.tests.factories import PaperlessTaskFactory
|
||||
from paperless import version
|
||||
|
||||
|
||||
@@ -258,10 +258,10 @@ class TestSystemStatus(APITestCase):
|
||||
THEN:
|
||||
- The response contains an OK classifier status
|
||||
"""
|
||||
PaperlessTask.objects.create(
|
||||
type=PaperlessTask.TaskType.SCHEDULED_TASK,
|
||||
status=states.SUCCESS,
|
||||
task_name=PaperlessTask.TaskName.TRAIN_CLASSIFIER,
|
||||
PaperlessTaskFactory(
|
||||
task_type=PaperlessTask.TaskType.TRAIN_CLASSIFIER,
|
||||
trigger_source=PaperlessTask.TriggerSource.SCHEDULED,
|
||||
status=PaperlessTask.Status.SUCCESS,
|
||||
)
|
||||
self.client.force_login(self.user)
|
||||
response = self.client.get(self.ENDPOINT)
|
||||
@@ -295,11 +295,11 @@ class TestSystemStatus(APITestCase):
|
||||
THEN:
|
||||
- The response contains an ERROR classifier status
|
||||
"""
|
||||
PaperlessTask.objects.create(
|
||||
type=PaperlessTask.TaskType.SCHEDULED_TASK,
|
||||
status=states.FAILURE,
|
||||
task_name=PaperlessTask.TaskName.TRAIN_CLASSIFIER,
|
||||
result="Classifier training failed",
|
||||
PaperlessTaskFactory(
|
||||
task_type=PaperlessTask.TaskType.TRAIN_CLASSIFIER,
|
||||
trigger_source=PaperlessTask.TriggerSource.SCHEDULED,
|
||||
status=PaperlessTask.Status.FAILURE,
|
||||
result_message="Classifier training failed",
|
||||
)
|
||||
self.client.force_login(self.user)
|
||||
response = self.client.get(self.ENDPOINT)
|
||||
@@ -319,10 +319,10 @@ class TestSystemStatus(APITestCase):
|
||||
THEN:
|
||||
- The response contains an OK sanity check status
|
||||
"""
|
||||
PaperlessTask.objects.create(
|
||||
type=PaperlessTask.TaskType.SCHEDULED_TASK,
|
||||
status=states.SUCCESS,
|
||||
task_name=PaperlessTask.TaskName.CHECK_SANITY,
|
||||
PaperlessTaskFactory(
|
||||
task_type=PaperlessTask.TaskType.SANITY_CHECK,
|
||||
trigger_source=PaperlessTask.TriggerSource.SCHEDULED,
|
||||
status=PaperlessTask.Status.SUCCESS,
|
||||
)
|
||||
self.client.force_login(self.user)
|
||||
response = self.client.get(self.ENDPOINT)
|
||||
@@ -356,11 +356,11 @@ class TestSystemStatus(APITestCase):
|
||||
THEN:
|
||||
- The response contains an ERROR sanity check status
|
||||
"""
|
||||
PaperlessTask.objects.create(
|
||||
type=PaperlessTask.TaskType.SCHEDULED_TASK,
|
||||
status=states.FAILURE,
|
||||
task_name=PaperlessTask.TaskName.CHECK_SANITY,
|
||||
result="5 issues found.",
|
||||
PaperlessTaskFactory(
|
||||
task_type=PaperlessTask.TaskType.SANITY_CHECK,
|
||||
trigger_source=PaperlessTask.TriggerSource.SCHEDULED,
|
||||
status=PaperlessTask.Status.FAILURE,
|
||||
result_message="5 issues found.",
|
||||
)
|
||||
self.client.force_login(self.user)
|
||||
response = self.client.get(self.ENDPOINT)
|
||||
@@ -405,10 +405,10 @@ class TestSystemStatus(APITestCase):
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(response.data["tasks"]["llmindex_status"], "WARNING")
|
||||
|
||||
PaperlessTask.objects.create(
|
||||
type=PaperlessTask.TaskType.SCHEDULED_TASK,
|
||||
status=states.SUCCESS,
|
||||
task_name=PaperlessTask.TaskName.LLMINDEX_UPDATE,
|
||||
PaperlessTaskFactory(
|
||||
task_type=PaperlessTask.TaskType.LLM_INDEX,
|
||||
trigger_source=PaperlessTask.TriggerSource.SCHEDULED,
|
||||
status=PaperlessTask.Status.SUCCESS,
|
||||
)
|
||||
response = self.client.get(self.ENDPOINT)
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
@@ -425,11 +425,11 @@ class TestSystemStatus(APITestCase):
|
||||
- The response contains the correct AI status
|
||||
"""
|
||||
with override_settings(AI_ENABLED=True, LLM_EMBEDDING_BACKEND="openai"):
|
||||
PaperlessTask.objects.create(
|
||||
type=PaperlessTask.TaskType.SCHEDULED_TASK,
|
||||
status=states.FAILURE,
|
||||
task_name=PaperlessTask.TaskName.LLMINDEX_UPDATE,
|
||||
result="AI index update failed",
|
||||
PaperlessTaskFactory(
|
||||
task_type=PaperlessTask.TaskType.LLM_INDEX,
|
||||
trigger_source=PaperlessTask.TriggerSource.SCHEDULED,
|
||||
status=PaperlessTask.Status.FAILURE,
|
||||
result_message="AI index update failed",
|
||||
)
|
||||
self.client.force_login(self.user)
|
||||
response = self.client.get(self.ENDPOINT)
|
||||
|
||||
@@ -1,425 +1,586 @@
|
||||
"""Tests for the /api/tasks/ endpoint.
|
||||
|
||||
Covers:
|
||||
- v10 serializer (new field names)
|
||||
- v9 serializer (backwards-compatible field names)
|
||||
- Filtering, ordering, acknowledge, acknowledge_all, summary, active, run
|
||||
"""
|
||||
|
||||
import uuid
|
||||
from datetime import timedelta
|
||||
from unittest import mock
|
||||
|
||||
import celery
|
||||
import pytest
|
||||
from django.contrib.auth.models import Permission
|
||||
from django.contrib.auth.models import User
|
||||
from django.utils import timezone
|
||||
from rest_framework import status
|
||||
from rest_framework.test import APITestCase
|
||||
from rest_framework.test import APIClient
|
||||
|
||||
from documents.models import Document
|
||||
from documents.models import PaperlessTask
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
from documents.views import TasksViewSet
|
||||
from documents.tests.factories import PaperlessTaskFactory
|
||||
|
||||
pytestmark = pytest.mark.api
|
||||
|
||||
ENDPOINT = "/api/tasks/"
|
||||
ACCEPT_V10 = "application/json; version=10"
|
||||
ACCEPT_V9 = "application/json; version=9"
|
||||
|
||||
|
||||
class TestTasks(DirectoriesMixin, APITestCase):
|
||||
ENDPOINT = "/api/tasks/"
|
||||
@pytest.mark.django_db()
|
||||
class TestGetTasksV10:
|
||||
def test_list_returns_tasks(self, admin_client: APIClient) -> None:
|
||||
"""GET /api/tasks/ returns all tasks visible to the admin."""
|
||||
PaperlessTaskFactory.create_batch(2)
|
||||
|
||||
def setUp(self) -> None:
|
||||
super().setUp()
|
||||
response = admin_client.get(ENDPOINT)
|
||||
|
||||
self.user = User.objects.create_superuser(username="temp_admin")
|
||||
self.client.force_authenticate(user=self.user)
|
||||
assert response.status_code == status.HTTP_200_OK
|
||||
assert len(response.data) == 2
|
||||
|
||||
def test_get_tasks(self) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- Attempted celery tasks
|
||||
WHEN:
|
||||
- API call is made to get tasks
|
||||
THEN:
|
||||
- Attempting and pending tasks are serialized and provided
|
||||
"""
|
||||
|
||||
task1 = PaperlessTask.objects.create(
|
||||
task_id=str(uuid.uuid4()),
|
||||
task_file_name="task_one.pdf",
|
||||
def test_related_document_ids_populated_from_result_data(
|
||||
self,
|
||||
admin_client: APIClient,
|
||||
) -> None:
|
||||
"""related_document_ids includes the consumed document_id from result_data."""
|
||||
PaperlessTaskFactory(
|
||||
status=PaperlessTask.Status.SUCCESS,
|
||||
result_data={"document_id": 7},
|
||||
)
|
||||
|
||||
task2 = PaperlessTask.objects.create(
|
||||
task_id=str(uuid.uuid4()),
|
||||
task_file_name="task_two.pdf",
|
||||
response = admin_client.get(ENDPOINT)
|
||||
|
||||
assert response.status_code == status.HTTP_200_OK
|
||||
assert response.data[0]["related_document_ids"] == [7]
|
||||
|
||||
def test_related_document_ids_includes_duplicate_of(
|
||||
self,
|
||||
admin_client: APIClient,
|
||||
) -> None:
|
||||
"""related_document_ids includes duplicate_of when the file was already archived."""
|
||||
PaperlessTaskFactory(
|
||||
status=PaperlessTask.Status.SUCCESS,
|
||||
result_data={"duplicate_of": 12},
|
||||
)
|
||||
|
||||
response = self.client.get(self.ENDPOINT)
|
||||
response = admin_client.get(ENDPOINT)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(len(response.data), 2)
|
||||
returned_task1 = response.data[1]
|
||||
returned_task2 = response.data[0]
|
||||
assert response.status_code == status.HTTP_200_OK
|
||||
assert response.data[0]["related_document_ids"] == [12]
|
||||
|
||||
self.assertEqual(returned_task1["task_id"], task1.task_id)
|
||||
self.assertEqual(returned_task1["status"], celery.states.PENDING)
|
||||
self.assertEqual(returned_task1["task_file_name"], task1.task_file_name)
|
||||
def test_filter_by_task_type(self, admin_client: APIClient) -> None:
|
||||
"""?task_type= filters results to tasks of that type only."""
|
||||
PaperlessTaskFactory(task_type=PaperlessTask.TaskType.CONSUME_FILE)
|
||||
PaperlessTaskFactory(task_type=PaperlessTask.TaskType.TRAIN_CLASSIFIER)
|
||||
|
||||
self.assertEqual(returned_task2["task_id"], task2.task_id)
|
||||
self.assertEqual(returned_task2["status"], celery.states.PENDING)
|
||||
self.assertEqual(returned_task2["task_file_name"], task2.task_file_name)
|
||||
|
||||
def test_get_single_task_status(self) -> None:
|
||||
"""
|
||||
GIVEN
|
||||
- Query parameter for a valid task ID
|
||||
WHEN:
|
||||
- API call is made to get task status
|
||||
THEN:
|
||||
- Single task data is returned
|
||||
"""
|
||||
|
||||
id1 = str(uuid.uuid4())
|
||||
task1 = PaperlessTask.objects.create(
|
||||
task_id=id1,
|
||||
task_file_name="task_one.pdf",
|
||||
response = admin_client.get(
|
||||
ENDPOINT,
|
||||
{"task_type": PaperlessTask.TaskType.TRAIN_CLASSIFIER},
|
||||
)
|
||||
|
||||
_ = PaperlessTask.objects.create(
|
||||
task_id=str(uuid.uuid4()),
|
||||
task_file_name="task_two.pdf",
|
||||
assert response.status_code == status.HTTP_200_OK
|
||||
assert len(response.data) == 1
|
||||
assert response.data[0]["task_type"] == PaperlessTask.TaskType.TRAIN_CLASSIFIER
|
||||
|
||||
def test_filter_by_status(self, admin_client: APIClient) -> None:
|
||||
"""?status= filters results to tasks with that status only."""
|
||||
PaperlessTaskFactory(status=PaperlessTask.Status.PENDING)
|
||||
PaperlessTaskFactory(status=PaperlessTask.Status.SUCCESS)
|
||||
|
||||
response = admin_client.get(
|
||||
ENDPOINT,
|
||||
{"status": PaperlessTask.Status.SUCCESS},
|
||||
)
|
||||
|
||||
response = self.client.get(self.ENDPOINT + f"?task_id={id1}")
|
||||
assert response.status_code == status.HTTP_200_OK
|
||||
assert len(response.data) == 1
|
||||
assert response.data[0]["status"] == PaperlessTask.Status.SUCCESS
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(len(response.data), 1)
|
||||
returned_task1 = response.data[0]
|
||||
def test_filter_by_task_id(self, admin_client: APIClient) -> None:
|
||||
"""?task_id= returns only the task with that UUID."""
|
||||
task = PaperlessTaskFactory()
|
||||
PaperlessTaskFactory() # unrelated task that should not appear
|
||||
|
||||
self.assertEqual(returned_task1["task_id"], task1.task_id)
|
||||
response = admin_client.get(ENDPOINT, {"task_id": task.task_id})
|
||||
|
||||
def test_get_single_task_status_not_valid(self) -> None:
|
||||
"""
|
||||
GIVEN
|
||||
- Query parameter for a non-existent task ID
|
||||
WHEN:
|
||||
- API call is made to get task status
|
||||
THEN:
|
||||
- No task data is returned
|
||||
"""
|
||||
PaperlessTask.objects.create(
|
||||
task_id=str(uuid.uuid4()),
|
||||
task_file_name="task_one.pdf",
|
||||
assert response.status_code == status.HTTP_200_OK
|
||||
assert len(response.data) == 1
|
||||
assert response.data[0]["task_id"] == task.task_id
|
||||
|
||||
def test_filter_by_acknowledged(self, admin_client: APIClient) -> None:
|
||||
"""?acknowledged=false returns only tasks that have not been acknowledged."""
|
||||
PaperlessTaskFactory(acknowledged=False)
|
||||
PaperlessTaskFactory(acknowledged=True)
|
||||
|
||||
response = admin_client.get(ENDPOINT, {"acknowledged": "false"})
|
||||
|
||||
assert response.status_code == status.HTTP_200_OK
|
||||
assert len(response.data) == 1
|
||||
assert response.data[0]["acknowledged"] is False
|
||||
|
||||
def test_filter_is_complete_true(self, admin_client: APIClient) -> None:
|
||||
"""?is_complete=true returns only SUCCESS and FAILURE tasks."""
|
||||
PaperlessTaskFactory(status=PaperlessTask.Status.PENDING)
|
||||
PaperlessTaskFactory(status=PaperlessTask.Status.SUCCESS)
|
||||
PaperlessTaskFactory(status=PaperlessTask.Status.FAILURE)
|
||||
|
||||
response = admin_client.get(ENDPOINT, {"is_complete": "true"})
|
||||
|
||||
assert response.status_code == status.HTTP_200_OK
|
||||
assert len(response.data) == 2
|
||||
returned_statuses = {t["status"] for t in response.data}
|
||||
assert returned_statuses == {
|
||||
PaperlessTask.Status.SUCCESS,
|
||||
PaperlessTask.Status.FAILURE,
|
||||
}
|
||||
|
||||
def test_filter_is_complete_false(self, admin_client: APIClient) -> None:
|
||||
"""?is_complete=false returns only PENDING and STARTED tasks."""
|
||||
PaperlessTaskFactory(status=PaperlessTask.Status.PENDING)
|
||||
PaperlessTaskFactory(status=PaperlessTask.Status.STARTED)
|
||||
PaperlessTaskFactory(status=PaperlessTask.Status.SUCCESS)
|
||||
|
||||
response = admin_client.get(ENDPOINT, {"is_complete": "false"})
|
||||
|
||||
assert response.status_code == status.HTTP_200_OK
|
||||
assert len(response.data) == 2
|
||||
returned_statuses = {t["status"] for t in response.data}
|
||||
assert returned_statuses == {
|
||||
PaperlessTask.Status.PENDING,
|
||||
PaperlessTask.Status.STARTED,
|
||||
}
|
||||
|
||||
def test_default_ordering_is_newest_first(self, admin_client: APIClient) -> None:
|
||||
"""Tasks are returned in descending date_created order (newest first)."""
|
||||
base = timezone.now()
|
||||
t1 = PaperlessTaskFactory(date_created=base)
|
||||
t2 = PaperlessTaskFactory(date_created=base + timedelta(seconds=1))
|
||||
t3 = PaperlessTaskFactory(date_created=base + timedelta(seconds=2))
|
||||
|
||||
response = admin_client.get(ENDPOINT)
|
||||
|
||||
assert response.status_code == status.HTTP_200_OK
|
||||
ids = [t["task_id"] for t in response.data]
|
||||
assert ids == [t3.task_id, t2.task_id, t1.task_id]
|
||||
|
||||
def test_list_is_owner_aware(
|
||||
self,
|
||||
admin_user: User,
|
||||
regular_user: User,
|
||||
) -> None:
|
||||
"""The task list only shows tasks the user owns or that are unowned."""
|
||||
regular_user.user_permissions.add(
|
||||
Permission.objects.get(codename="view_paperlesstask"),
|
||||
)
|
||||
|
||||
_ = PaperlessTask.objects.create(
|
||||
task_id=str(uuid.uuid4()),
|
||||
task_file_name="task_two.pdf",
|
||||
client = APIClient()
|
||||
client.force_authenticate(user=regular_user)
|
||||
client.credentials(HTTP_ACCEPT=ACCEPT_V10)
|
||||
|
||||
PaperlessTaskFactory(owner=admin_user)
|
||||
shared_task = PaperlessTaskFactory()
|
||||
own_task = PaperlessTaskFactory(owner=regular_user)
|
||||
|
||||
response = client.get(ENDPOINT)
|
||||
|
||||
assert response.status_code == status.HTTP_200_OK
|
||||
assert len(response.data) == 2
|
||||
returned_task_ids = {t["task_id"] for t in response.data}
|
||||
assert shared_task.task_id in returned_task_ids
|
||||
assert own_task.task_id in returned_task_ids
|
||||
|
||||
|
||||
@pytest.mark.django_db()
|
||||
class TestGetTasksV9:
|
||||
def test_task_name_equals_task_type_value(self, v9_client: APIClient) -> None:
|
||||
"""task_name mirrors the task_type value for v9 backwards compatibility."""
|
||||
PaperlessTaskFactory(task_type=PaperlessTask.TaskType.CONSUME_FILE)
|
||||
|
||||
response = v9_client.get(ENDPOINT)
|
||||
|
||||
assert response.status_code == status.HTTP_200_OK
|
||||
assert response.data[0]["task_name"] == "consume_file"
|
||||
|
||||
def test_task_file_name_from_input_data(self, v9_client: APIClient) -> None:
|
||||
"""task_file_name is read from input_data['filename']."""
|
||||
PaperlessTaskFactory(input_data={"filename": "report.pdf"})
|
||||
|
||||
response = v9_client.get(ENDPOINT)
|
||||
|
||||
assert response.status_code == status.HTTP_200_OK
|
||||
assert response.data[0]["task_file_name"] == "report.pdf"
|
||||
|
||||
def test_task_file_name_none_when_no_filename_key(
|
||||
self,
|
||||
v9_client: APIClient,
|
||||
) -> None:
|
||||
"""task_file_name is None when filename is absent from input_data."""
|
||||
PaperlessTaskFactory(input_data={})
|
||||
|
||||
response = v9_client.get(ENDPOINT)
|
||||
|
||||
assert response.status_code == status.HTTP_200_OK
|
||||
assert response.data[0]["task_file_name"] is None
|
||||
|
||||
def test_type_scheduled_maps_to_scheduled_task(self, v9_client: APIClient) -> None:
|
||||
"""trigger_source=scheduled maps to type='SCHEDULED_TASK' in v9."""
|
||||
PaperlessTaskFactory(trigger_source=PaperlessTask.TriggerSource.SCHEDULED)
|
||||
|
||||
response = v9_client.get(ENDPOINT)
|
||||
|
||||
assert response.status_code == status.HTTP_200_OK
|
||||
assert response.data[0]["type"] == "SCHEDULED_TASK"
|
||||
|
||||
def test_type_system_maps_to_auto_task(self, v9_client: APIClient) -> None:
|
||||
"""trigger_source=system maps to type='AUTO_TASK' in v9."""
|
||||
PaperlessTaskFactory(trigger_source=PaperlessTask.TriggerSource.SYSTEM)
|
||||
|
||||
response = v9_client.get(ENDPOINT)
|
||||
|
||||
assert response.status_code == status.HTTP_200_OK
|
||||
assert response.data[0]["type"] == "AUTO_TASK"
|
||||
|
||||
def test_type_web_ui_maps_to_manual_task(self, v9_client: APIClient) -> None:
|
||||
"""trigger_source=web_ui maps to type='MANUAL_TASK' in v9."""
|
||||
PaperlessTaskFactory(trigger_source=PaperlessTask.TriggerSource.WEB_UI)
|
||||
|
||||
response = v9_client.get(ENDPOINT)
|
||||
|
||||
assert response.status_code == status.HTTP_200_OK
|
||||
assert response.data[0]["type"] == "MANUAL_TASK"
|
||||
|
||||
def test_type_manual_maps_to_manual_task(self, v9_client: APIClient) -> None:
|
||||
"""trigger_source=manual maps to type='MANUAL_TASK' in v9."""
|
||||
PaperlessTaskFactory(trigger_source=PaperlessTask.TriggerSource.MANUAL)
|
||||
|
||||
response = v9_client.get(ENDPOINT)
|
||||
|
||||
assert response.status_code == status.HTTP_200_OK
|
||||
assert response.data[0]["type"] == "MANUAL_TASK"
|
||||
|
||||
def test_related_document_from_result_data_document_id(
|
||||
self,
|
||||
v9_client: APIClient,
|
||||
) -> None:
|
||||
"""related_document is taken from result_data['document_id'] in v9."""
|
||||
PaperlessTaskFactory(
|
||||
status=PaperlessTask.Status.SUCCESS,
|
||||
result_data={"document_id": 99},
|
||||
)
|
||||
|
||||
response = self.client.get(self.ENDPOINT + "?task_id=bad-task-id")
|
||||
response = v9_client.get(ENDPOINT)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(len(response.data), 0)
|
||||
assert response.status_code == status.HTTP_200_OK
|
||||
assert response.data[0]["related_document"] == 99
|
||||
|
||||
def test_acknowledge_tasks(self) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- Attempted celery tasks
|
||||
WHEN:
|
||||
- API call is made to get mark task as acknowledged
|
||||
THEN:
|
||||
- Task is marked as acknowledged
|
||||
"""
|
||||
task = PaperlessTask.objects.create(
|
||||
task_id=str(uuid.uuid4()),
|
||||
task_file_name="task_one.pdf",
|
||||
def test_related_document_none_when_no_result_data(
|
||||
self,
|
||||
v9_client: APIClient,
|
||||
) -> None:
|
||||
"""related_document is None when result_data is absent in v9."""
|
||||
PaperlessTaskFactory(result_data=None)
|
||||
|
||||
response = v9_client.get(ENDPOINT)
|
||||
|
||||
assert response.status_code == status.HTTP_200_OK
|
||||
assert response.data[0]["related_document"] is None
|
||||
|
||||
def test_duplicate_documents_from_result_data(self, v9_client: APIClient) -> None:
|
||||
"""duplicate_documents includes duplicate_of from result_data in v9."""
|
||||
PaperlessTaskFactory(
|
||||
status=PaperlessTask.Status.SUCCESS,
|
||||
result_data={"duplicate_of": 55},
|
||||
)
|
||||
|
||||
response = self.client.get(self.ENDPOINT)
|
||||
self.assertEqual(len(response.data), 1)
|
||||
response = v9_client.get(ENDPOINT)
|
||||
|
||||
response = self.client.post(
|
||||
self.ENDPOINT + "acknowledge/",
|
||||
assert response.status_code == status.HTTP_200_OK
|
||||
assert response.data[0]["duplicate_documents"] == [55]
|
||||
|
||||
def test_duplicate_documents_empty_when_no_result_data(
|
||||
self,
|
||||
v9_client: APIClient,
|
||||
) -> None:
|
||||
"""duplicate_documents is an empty list when result_data is absent in v9."""
|
||||
PaperlessTaskFactory(result_data=None)
|
||||
|
||||
response = v9_client.get(ENDPOINT)
|
||||
|
||||
assert response.status_code == status.HTTP_200_OK
|
||||
assert response.data[0]["duplicate_documents"] == []
|
||||
|
||||
def test_filter_by_task_name_maps_to_task_type(self, v9_client: APIClient) -> None:
|
||||
"""?task_name=consume_file filter maps to the task_type field for v9 compatibility."""
|
||||
PaperlessTaskFactory(task_type=PaperlessTask.TaskType.CONSUME_FILE)
|
||||
PaperlessTaskFactory(task_type=PaperlessTask.TaskType.TRAIN_CLASSIFIER)
|
||||
|
||||
response = v9_client.get(ENDPOINT, {"task_name": "consume_file"})
|
||||
|
||||
assert response.status_code == status.HTTP_200_OK
|
||||
assert len(response.data) == 1
|
||||
assert response.data[0]["task_name"] == "consume_file"
|
||||
|
||||
def test_filter_by_type_maps_to_trigger_source(self, v9_client: APIClient) -> None:
|
||||
"""?type=SCHEDULED_TASK filter maps to trigger_source=scheduled for v9 compatibility."""
|
||||
PaperlessTaskFactory(trigger_source=PaperlessTask.TriggerSource.SCHEDULED)
|
||||
PaperlessTaskFactory(trigger_source=PaperlessTask.TriggerSource.WEB_UI)
|
||||
|
||||
response = v9_client.get(ENDPOINT, {"type": "SCHEDULED_TASK"})
|
||||
|
||||
assert response.status_code == status.HTTP_200_OK
|
||||
assert len(response.data) == 1
|
||||
assert response.data[0]["type"] == "SCHEDULED_TASK"
|
||||
|
||||
|
||||
@pytest.mark.django_db()
|
||||
class TestAcknowledge:
|
||||
def test_returns_count(self, admin_client: APIClient) -> None:
|
||||
"""POST acknowledge/ returns the count of tasks that were acknowledged."""
|
||||
task1 = PaperlessTaskFactory()
|
||||
task2 = PaperlessTaskFactory()
|
||||
|
||||
response = admin_client.post(
|
||||
ENDPOINT + "acknowledge/",
|
||||
{"tasks": [task1.id, task2.id]},
|
||||
format="json",
|
||||
)
|
||||
|
||||
assert response.status_code == status.HTTP_200_OK
|
||||
assert response.data == {"result": 2}
|
||||
|
||||
def test_acknowledged_tasks_excluded_from_unacked_filter(
|
||||
self,
|
||||
admin_client: APIClient,
|
||||
) -> None:
|
||||
"""Acknowledged tasks no longer appear when filtering with ?acknowledged=false."""
|
||||
task = PaperlessTaskFactory()
|
||||
admin_client.post(
|
||||
ENDPOINT + "acknowledge/",
|
||||
{"tasks": [task.id]},
|
||||
)
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
|
||||
response = self.client.get(self.ENDPOINT + "?acknowledged=false")
|
||||
self.assertEqual(len(response.data), 0)
|
||||
|
||||
def test_acknowledge_tasks_requires_change_permission(self) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- A regular user initially without change permissions
|
||||
- A regular user with change permissions
|
||||
WHEN:
|
||||
- API call is made to acknowledge tasks
|
||||
THEN:
|
||||
- The first user is forbidden from acknowledging tasks
|
||||
- The second user is allowed to acknowledge tasks
|
||||
"""
|
||||
regular_user = User.objects.create_user(username="test")
|
||||
self.client.force_authenticate(user=regular_user)
|
||||
|
||||
task = PaperlessTask.objects.create(
|
||||
task_id=str(uuid.uuid4()),
|
||||
task_file_name="task_one.pdf",
|
||||
format="json",
|
||||
)
|
||||
|
||||
response = self.client.post(
|
||||
self.ENDPOINT + "acknowledge/",
|
||||
response = admin_client.get(ENDPOINT, {"acknowledged": "false"})
|
||||
|
||||
assert response.status_code == status.HTTP_200_OK
|
||||
assert len(response.data) == 0
|
||||
|
||||
def test_requires_change_permission(self, user_client: APIClient) -> None:
|
||||
"""Regular users without change_paperlesstask permission receive 403."""
|
||||
task = PaperlessTaskFactory()
|
||||
|
||||
response = user_client.post(
|
||||
ENDPOINT + "acknowledge/",
|
||||
{"tasks": [task.id]},
|
||||
format="json",
|
||||
)
|
||||
self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
|
||||
|
||||
regular_user2 = User.objects.create_user(username="test2")
|
||||
regular_user2.user_permissions.add(
|
||||
assert response.status_code == status.HTTP_403_FORBIDDEN
|
||||
|
||||
def test_succeeds_with_change_permission(self, regular_user: User) -> None:
|
||||
"""Users granted change_paperlesstask permission can acknowledge tasks."""
|
||||
regular_user.user_permissions.add(
|
||||
Permission.objects.get(codename="change_paperlesstask"),
|
||||
)
|
||||
regular_user2.save()
|
||||
self.client.force_authenticate(user=regular_user2)
|
||||
regular_user.save()
|
||||
|
||||
response = self.client.post(
|
||||
self.ENDPOINT + "acknowledge/",
|
||||
client = APIClient()
|
||||
client.force_authenticate(user=regular_user)
|
||||
client.credentials(HTTP_ACCEPT=ACCEPT_V10)
|
||||
|
||||
task = PaperlessTaskFactory()
|
||||
response = client.post(
|
||||
ENDPOINT + "acknowledge/",
|
||||
{"tasks": [task.id]},
|
||||
)
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
|
||||
def test_tasks_owner_aware(self) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- Existing PaperlessTasks with owner and with no owner
|
||||
WHEN:
|
||||
- API call is made to get tasks
|
||||
THEN:
|
||||
- Only tasks with no owner or request user are returned
|
||||
"""
|
||||
|
||||
regular_user = User.objects.create_user(username="test")
|
||||
regular_user.user_permissions.add(*Permission.objects.all())
|
||||
self.client.logout()
|
||||
self.client.force_authenticate(user=regular_user)
|
||||
|
||||
task1 = PaperlessTask.objects.create(
|
||||
task_id=str(uuid.uuid4()),
|
||||
task_file_name="task_one.pdf",
|
||||
owner=self.user,
|
||||
format="json",
|
||||
)
|
||||
|
||||
task2 = PaperlessTask.objects.create(
|
||||
task_id=str(uuid.uuid4()),
|
||||
task_file_name="task_two.pdf",
|
||||
assert response.status_code == status.HTTP_200_OK
|
||||
|
||||
|
||||
@pytest.mark.django_db()
|
||||
class TestAcknowledgeAll:
|
||||
def test_marks_only_completed_tasks(self, admin_client: APIClient) -> None:
|
||||
"""acknowledge_all/ marks only SUCCESS and FAILURE tasks as acknowledged."""
|
||||
PaperlessTaskFactory(status=PaperlessTask.Status.SUCCESS, acknowledged=False)
|
||||
PaperlessTaskFactory(status=PaperlessTask.Status.FAILURE, acknowledged=False)
|
||||
PaperlessTaskFactory(status=PaperlessTask.Status.PENDING, acknowledged=False)
|
||||
|
||||
response = admin_client.post(ENDPOINT + "acknowledge_all/")
|
||||
|
||||
assert response.status_code == status.HTTP_200_OK
|
||||
assert response.data == {"result": 2}
|
||||
|
||||
def test_skips_already_acknowledged(self, admin_client: APIClient) -> None:
|
||||
"""acknowledge_all/ does not re-acknowledge tasks that are already acknowledged."""
|
||||
PaperlessTaskFactory(status=PaperlessTask.Status.SUCCESS, acknowledged=True)
|
||||
PaperlessTaskFactory(status=PaperlessTask.Status.SUCCESS, acknowledged=False)
|
||||
|
||||
response = admin_client.post(ENDPOINT + "acknowledge_all/")
|
||||
|
||||
assert response.status_code == status.HTTP_200_OK
|
||||
assert response.data == {"result": 1}
|
||||
|
||||
def test_skips_pending_and_started(self, admin_client: APIClient) -> None:
|
||||
"""acknowledge_all/ does not touch PENDING or STARTED tasks."""
|
||||
PaperlessTaskFactory(status=PaperlessTask.Status.PENDING)
|
||||
PaperlessTaskFactory(status=PaperlessTask.Status.STARTED)
|
||||
|
||||
response = admin_client.post(ENDPOINT + "acknowledge_all/")
|
||||
|
||||
assert response.status_code == status.HTTP_200_OK
|
||||
assert response.data == {"result": 0}
|
||||
|
||||
def test_includes_revoked(self, admin_client: APIClient) -> None:
|
||||
"""acknowledge_all/ marks REVOKED tasks as acknowledged."""
|
||||
PaperlessTaskFactory(status=PaperlessTask.Status.REVOKED, acknowledged=False)
|
||||
|
||||
response = admin_client.post(ENDPOINT + "acknowledge_all/")
|
||||
|
||||
assert response.status_code == status.HTTP_200_OK
|
||||
assert response.data == {"result": 1}
|
||||
|
||||
|
||||
@pytest.mark.django_db()
|
||||
class TestSummary:
|
||||
def test_returns_per_type_totals(self, admin_client: APIClient) -> None:
|
||||
"""summary/ returns per-type counts of total, success, and failure tasks."""
|
||||
PaperlessTaskFactory(
|
||||
task_type=PaperlessTask.TaskType.CONSUME_FILE,
|
||||
status=PaperlessTask.Status.SUCCESS,
|
||||
)
|
||||
PaperlessTaskFactory(
|
||||
task_type=PaperlessTask.TaskType.CONSUME_FILE,
|
||||
status=PaperlessTask.Status.FAILURE,
|
||||
)
|
||||
PaperlessTaskFactory(
|
||||
task_type=PaperlessTask.TaskType.TRAIN_CLASSIFIER,
|
||||
status=PaperlessTask.Status.SUCCESS,
|
||||
)
|
||||
|
||||
task3 = PaperlessTask.objects.create(
|
||||
task_id=str(uuid.uuid4()),
|
||||
task_file_name="task_three.pdf",
|
||||
owner=regular_user,
|
||||
)
|
||||
response = admin_client.get(ENDPOINT + "summary/")
|
||||
|
||||
response = self.client.get(self.ENDPOINT)
|
||||
assert response.status_code == status.HTTP_200_OK
|
||||
by_type = {item["task_type"]: item for item in response.data}
|
||||
assert by_type["consume_file"]["total_count"] == 2
|
||||
assert by_type["consume_file"]["success_count"] == 1
|
||||
assert by_type["consume_file"]["failure_count"] == 1
|
||||
assert by_type["train_classifier"]["total_count"] == 1
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(len(response.data), 2)
|
||||
self.assertEqual(response.data[0]["task_id"], task3.task_id)
|
||||
self.assertEqual(response.data[1]["task_id"], task2.task_id)
|
||||
|
||||
acknowledge_response = self.client.post(
|
||||
self.ENDPOINT + "acknowledge/",
|
||||
{"tasks": [task1.id, task2.id, task3.id]},
|
||||
)
|
||||
self.assertEqual(acknowledge_response.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(acknowledge_response.data, {"result": 2})
|
||||
@pytest.mark.django_db()
|
||||
class TestActive:
|
||||
def test_returns_pending_and_started_only(self, admin_client: APIClient) -> None:
|
||||
"""active/ returns only tasks in PENDING or STARTED status."""
|
||||
PaperlessTaskFactory(status=PaperlessTask.Status.PENDING)
|
||||
PaperlessTaskFactory(status=PaperlessTask.Status.STARTED)
|
||||
PaperlessTaskFactory(status=PaperlessTask.Status.SUCCESS)
|
||||
PaperlessTaskFactory(status=PaperlessTask.Status.FAILURE)
|
||||
|
||||
def test_task_result_no_error(self) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- A celery task completed without error
|
||||
WHEN:
|
||||
- API call is made to get tasks
|
||||
THEN:
|
||||
- The returned data includes the task result
|
||||
"""
|
||||
PaperlessTask.objects.create(
|
||||
task_id=str(uuid.uuid4()),
|
||||
task_file_name="task_one.pdf",
|
||||
status=celery.states.SUCCESS,
|
||||
result="Success. New document id 1 created",
|
||||
)
|
||||
response = admin_client.get(ENDPOINT + "active/")
|
||||
|
||||
response = self.client.get(self.ENDPOINT)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(len(response.data), 1)
|
||||
|
||||
returned_data = response.data[0]
|
||||
|
||||
self.assertEqual(returned_data["result"], "Success. New document id 1 created")
|
||||
self.assertEqual(returned_data["related_document"], "1")
|
||||
|
||||
def test_task_result_with_error(self) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- A celery task completed with an exception
|
||||
WHEN:
|
||||
- API call is made to get tasks
|
||||
THEN:
|
||||
- The returned result is the exception info
|
||||
"""
|
||||
PaperlessTask.objects.create(
|
||||
task_id=str(uuid.uuid4()),
|
||||
task_file_name="task_one.pdf",
|
||||
status=celery.states.FAILURE,
|
||||
result="test.pdf: Unexpected error during ingestion.",
|
||||
)
|
||||
|
||||
response = self.client.get(self.ENDPOINT)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(len(response.data), 1)
|
||||
|
||||
returned_data = response.data[0]
|
||||
|
||||
self.assertEqual(
|
||||
returned_data["result"],
|
||||
"test.pdf: Unexpected error during ingestion.",
|
||||
)
|
||||
|
||||
def test_task_name_webui(self) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- Attempted celery task
|
||||
- Task was created through the webui
|
||||
WHEN:
|
||||
- API call is made to get tasks
|
||||
THEN:
|
||||
- Returned data include the filename
|
||||
"""
|
||||
PaperlessTask.objects.create(
|
||||
task_id=str(uuid.uuid4()),
|
||||
task_file_name="test.pdf",
|
||||
task_name=PaperlessTask.TaskName.CONSUME_FILE,
|
||||
status=celery.states.SUCCESS,
|
||||
)
|
||||
|
||||
response = self.client.get(self.ENDPOINT)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(len(response.data), 1)
|
||||
|
||||
returned_data = response.data[0]
|
||||
|
||||
self.assertEqual(returned_data["task_file_name"], "test.pdf")
|
||||
|
||||
def test_task_name_consume_folder(self) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- Attempted celery task
|
||||
- Task was created through the consume folder
|
||||
WHEN:
|
||||
- API call is made to get tasks
|
||||
THEN:
|
||||
- Returned data include the filename
|
||||
"""
|
||||
PaperlessTask.objects.create(
|
||||
task_id=str(uuid.uuid4()),
|
||||
task_file_name="anothertest.pdf",
|
||||
task_name=PaperlessTask.TaskName.CONSUME_FILE,
|
||||
status=celery.states.SUCCESS,
|
||||
)
|
||||
|
||||
response = self.client.get(self.ENDPOINT)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(len(response.data), 1)
|
||||
|
||||
returned_data = response.data[0]
|
||||
|
||||
self.assertEqual(returned_data["task_file_name"], "anothertest.pdf")
|
||||
|
||||
def test_task_result_duplicate_warning_includes_count(self) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- A celery task succeeds, but a duplicate exists
|
||||
WHEN:
|
||||
- API call is made to get tasks
|
||||
THEN:
|
||||
- The returned data includes duplicate warning metadata
|
||||
"""
|
||||
checksum = "duplicate-checksum"
|
||||
Document.objects.create(
|
||||
title="Existing",
|
||||
content="",
|
||||
mime_type="application/pdf",
|
||||
checksum=checksum,
|
||||
)
|
||||
created_doc = Document.objects.create(
|
||||
title="Created",
|
||||
content="",
|
||||
mime_type="application/pdf",
|
||||
checksum=checksum,
|
||||
archive_checksum="another-checksum",
|
||||
)
|
||||
PaperlessTask.objects.create(
|
||||
task_id=str(uuid.uuid4()),
|
||||
task_file_name="task_one.pdf",
|
||||
status=celery.states.SUCCESS,
|
||||
result=f"Success. New document id {created_doc.pk} created",
|
||||
)
|
||||
|
||||
response = self.client.get(self.ENDPOINT)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(len(response.data), 1)
|
||||
|
||||
returned_data = response.data[0]
|
||||
|
||||
self.assertEqual(returned_data["related_document"], str(created_doc.pk))
|
||||
|
||||
def test_run_train_classifier_task(self) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- A superuser
|
||||
WHEN:
|
||||
- API call is made to run the train classifier task
|
||||
THEN:
|
||||
- The task is run
|
||||
"""
|
||||
mock_train_classifier = mock.Mock(return_value="Task started")
|
||||
TasksViewSet.TASK_AND_ARGS_BY_NAME = {
|
||||
PaperlessTask.TaskName.TRAIN_CLASSIFIER: (
|
||||
mock_train_classifier,
|
||||
{"scheduled": False},
|
||||
),
|
||||
assert response.status_code == status.HTTP_200_OK
|
||||
assert len(response.data) == 2
|
||||
active_statuses = {t["status"] for t in response.data}
|
||||
assert active_statuses == {
|
||||
PaperlessTask.Status.PENDING,
|
||||
PaperlessTask.Status.STARTED,
|
||||
}
|
||||
response = self.client.post(
|
||||
self.ENDPOINT + "run/",
|
||||
{"task_name": PaperlessTask.TaskName.TRAIN_CLASSIFIER},
|
||||
|
||||
def test_excludes_revoked_tasks_from_active(self, admin_client: APIClient) -> None:
|
||||
"""active/ excludes REVOKED tasks."""
|
||||
PaperlessTaskFactory(status=PaperlessTask.Status.REVOKED)
|
||||
|
||||
response = admin_client.get(ENDPOINT + "active/")
|
||||
|
||||
assert response.status_code == status.HTTP_200_OK
|
||||
assert len(response.data) == 0
|
||||
|
||||
|
||||
@pytest.mark.django_db()
|
||||
class TestRun:
|
||||
def test_forbidden_for_regular_user(self, user_client: APIClient) -> None:
|
||||
"""Regular users without add_paperlesstask permission receive 403 from run/."""
|
||||
response = user_client.post(
|
||||
ENDPOINT + "run/",
|
||||
{"task_type": PaperlessTask.TaskType.TRAIN_CLASSIFIER},
|
||||
format="json",
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(response.data, {"result": "Task started"})
|
||||
mock_train_classifier.assert_called_once_with(scheduled=False)
|
||||
assert response.status_code == status.HTTP_403_FORBIDDEN
|
||||
|
||||
# mock error
|
||||
mock_train_classifier.reset_mock()
|
||||
mock_train_classifier.side_effect = Exception("Error")
|
||||
response = self.client.post(
|
||||
self.ENDPOINT + "run/",
|
||||
{"task_name": PaperlessTask.TaskName.TRAIN_CLASSIFIER},
|
||||
def test_dispatches_via_apply_async_with_manual_trigger_header(
|
||||
self,
|
||||
admin_client: APIClient,
|
||||
) -> None:
|
||||
"""run/ dispatches the task via apply_async with trigger_source=manual in headers."""
|
||||
fake_task_id = str(uuid.uuid4())
|
||||
mock_async_result = mock.Mock()
|
||||
mock_async_result.id = fake_task_id
|
||||
|
||||
mock_apply_async = mock.Mock(return_value=mock_async_result)
|
||||
|
||||
with mock.patch(
|
||||
"documents.views.train_classifier.apply_async",
|
||||
mock_apply_async,
|
||||
):
|
||||
response = admin_client.post(
|
||||
ENDPOINT + "run/",
|
||||
{"task_type": PaperlessTask.TaskType.TRAIN_CLASSIFIER},
|
||||
format="json",
|
||||
)
|
||||
|
||||
assert response.status_code == status.HTTP_200_OK
|
||||
assert response.data == {"task_id": fake_task_id}
|
||||
mock_apply_async.assert_called_once_with(
|
||||
kwargs={},
|
||||
headers={"trigger_source": "manual"},
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_500_INTERNAL_SERVER_ERROR)
|
||||
mock_train_classifier.assert_called_once_with(scheduled=False)
|
||||
|
||||
@mock.patch("documents.tasks.sanity_check")
|
||||
def test_run_task_requires_superuser(self, mock_check_sanity) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- A regular user
|
||||
WHEN:
|
||||
- API call is made to run a task
|
||||
THEN:
|
||||
- The task is not run
|
||||
"""
|
||||
regular_user = User.objects.create_user(username="test")
|
||||
regular_user.user_permissions.add(*Permission.objects.all())
|
||||
self.client.logout()
|
||||
self.client.force_authenticate(user=regular_user)
|
||||
|
||||
response = self.client.post(
|
||||
self.ENDPOINT + "run/",
|
||||
{"task_name": PaperlessTask.TaskName.CHECK_SANITY},
|
||||
def test_returns_400_for_consume_file(self, admin_client: APIClient) -> None:
|
||||
"""consume_file cannot be manually triggered via the run endpoint."""
|
||||
response = admin_client.post(
|
||||
ENDPOINT + "run/",
|
||||
{"task_type": PaperlessTask.TaskType.CONSUME_FILE},
|
||||
format="json",
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
|
||||
mock_check_sanity.assert_not_called()
|
||||
assert response.status_code == status.HTTP_400_BAD_REQUEST
|
||||
|
||||
def test_returns_400_for_invalid_task_type(self, admin_client: APIClient) -> None:
|
||||
"""run/ returns 400 for an unrecognized task_type value."""
|
||||
response = admin_client.post(
|
||||
ENDPOINT + "run/",
|
||||
{"task_type": "not_a_real_type"},
|
||||
format="json",
|
||||
)
|
||||
|
||||
assert response.status_code == status.HTTP_400_BAD_REQUEST
|
||||
|
||||
def test_sanity_check_dispatched_with_correct_kwargs(
|
||||
self,
|
||||
admin_client: APIClient,
|
||||
) -> None:
|
||||
"""run/ dispatches sanity_check with raise_on_error=False and manual trigger header."""
|
||||
fake_task_id = str(uuid.uuid4())
|
||||
mock_async_result = mock.Mock()
|
||||
mock_async_result.id = fake_task_id
|
||||
|
||||
mock_apply_async = mock.Mock(return_value=mock_async_result)
|
||||
|
||||
with mock.patch(
|
||||
"documents.views.sanity_check.apply_async",
|
||||
mock_apply_async,
|
||||
):
|
||||
response = admin_client.post(
|
||||
ENDPOINT + "run/",
|
||||
{"task_type": PaperlessTask.TaskType.SANITY_CHECK},
|
||||
format="json",
|
||||
)
|
||||
|
||||
assert response.status_code == status.HTTP_200_OK
|
||||
assert response.data == {"task_id": fake_task_id}
|
||||
mock_apply_async.assert_called_once_with(
|
||||
kwargs={"raise_on_error": False},
|
||||
headers={"trigger_source": "manual"},
|
||||
)
|
||||
|
||||
@@ -211,7 +211,7 @@ class TestCreateClassifier:
|
||||
|
||||
call_command("document_create_classifier", skip_checks=True)
|
||||
|
||||
m.assert_called_once_with(scheduled=False, status_callback=mocker.ANY)
|
||||
m.assert_called_once_with(status_callback=mocker.ANY)
|
||||
assert callable(m.call_args.kwargs["status_callback"])
|
||||
|
||||
def test_create_classifier_callback_output(self, mocker: MockerFixture) -> None:
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
"""Tests for the sanity checker module.
|
||||
|
||||
Tests exercise ``check_sanity`` as a whole, verifying document validation,
|
||||
orphan detection, task recording, and the iter_wrapper contract.
|
||||
orphan detection, and the iter_wrapper contract.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -12,13 +12,12 @@ from typing import TYPE_CHECKING
|
||||
|
||||
import pytest
|
||||
|
||||
from documents.models import Document
|
||||
from documents.models import PaperlessTask
|
||||
from documents.sanity_checker import check_sanity
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Iterable
|
||||
|
||||
from documents.models import Document
|
||||
from documents.tests.conftest import PaperlessDirs
|
||||
|
||||
|
||||
@@ -229,35 +228,6 @@ class TestCheckSanityIterWrapper:
|
||||
assert not messages.has_error
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
class TestCheckSanityTaskRecording:
|
||||
@pytest.mark.parametrize(
|
||||
("expected_type", "scheduled"),
|
||||
[
|
||||
pytest.param(PaperlessTask.TaskType.SCHEDULED_TASK, True, id="scheduled"),
|
||||
pytest.param(PaperlessTask.TaskType.MANUAL_TASK, False, id="manual"),
|
||||
],
|
||||
)
|
||||
@pytest.mark.usefixtures("_media_settings")
|
||||
def test_task_type(self, expected_type: str, *, scheduled: bool) -> None:
|
||||
check_sanity(scheduled=scheduled)
|
||||
task = PaperlessTask.objects.latest("date_created")
|
||||
assert task.task_name == PaperlessTask.TaskName.CHECK_SANITY
|
||||
assert task.type == expected_type
|
||||
|
||||
def test_success_status(self, sample_doc: Document) -> None:
|
||||
check_sanity()
|
||||
task = PaperlessTask.objects.latest("date_created")
|
||||
assert task.status == "SUCCESS"
|
||||
|
||||
def test_failure_status(self, sample_doc: Document) -> None:
|
||||
Path(sample_doc.source_path).unlink()
|
||||
check_sanity()
|
||||
task = PaperlessTask.objects.latest("date_created")
|
||||
assert task.status == "FAILURE"
|
||||
assert "Check logs for details" in task.result
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
class TestCheckSanityLogMessages:
|
||||
def test_logs_doc_issues(
|
||||
|
||||
@@ -1,250 +1,302 @@
|
||||
import uuid
|
||||
from unittest import mock
|
||||
|
||||
import celery
|
||||
from django.contrib.auth import get_user_model
|
||||
from django.test import TestCase
|
||||
import pytest
|
||||
|
||||
from documents.data_models import ConsumableDocument
|
||||
from documents.data_models import DocumentMetadataOverrides
|
||||
from documents.data_models import DocumentSource
|
||||
from documents.models import Document
|
||||
from documents.models import PaperlessTask
|
||||
from documents.signals.handlers import add_to_index
|
||||
from documents.signals.handlers import before_task_publish_handler
|
||||
from documents.signals.handlers import task_failure_handler
|
||||
from documents.signals.handlers import task_postrun_handler
|
||||
from documents.signals.handlers import task_prerun_handler
|
||||
from documents.tests.test_consumer import fake_magic_from_file
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
|
||||
|
||||
@mock.patch("documents.consumer.magic.from_file", fake_magic_from_file)
|
||||
class TestTaskSignalHandler(DirectoriesMixin, TestCase):
|
||||
@classmethod
|
||||
def setUpTestData(cls) -> None:
|
||||
super().setUpTestData()
|
||||
cls.user = get_user_model().objects.create_user(username="testuser")
|
||||
@pytest.fixture
|
||||
def consume_input_doc():
|
||||
doc = mock.MagicMock(spec=ConsumableDocument)
|
||||
# original_file is a Path; configure the nested mock so .name works
|
||||
doc.original_file = mock.MagicMock()
|
||||
doc.original_file.name = "invoice.pdf"
|
||||
doc.original_path = None
|
||||
doc.mime_type = "application/pdf"
|
||||
doc.mailrule_id = None
|
||||
doc.source = DocumentSource.WebUI
|
||||
return doc
|
||||
|
||||
def util_call_before_task_publish_handler(
|
||||
|
||||
@pytest.fixture
|
||||
def consume_overrides(django_user_model):
|
||||
user = django_user_model.objects.create_user(username="testuser")
|
||||
overrides = mock.MagicMock(spec=DocumentMetadataOverrides)
|
||||
overrides.owner_id = user.id
|
||||
return overrides
|
||||
|
||||
|
||||
def send_publish(
|
||||
task_name: str,
|
||||
args: tuple,
|
||||
kwargs: dict,
|
||||
headers: dict | None = None,
|
||||
) -> str:
|
||||
from documents.signals.handlers import before_task_publish_handler
|
||||
|
||||
task_id = str(uuid.uuid4())
|
||||
hdrs = {"task": task_name, "id": task_id, **(headers or {})}
|
||||
before_task_publish_handler(sender=task_name, headers=hdrs, body=(args, kwargs, {}))
|
||||
return task_id
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
class TestBeforeTaskPublishHandler:
|
||||
def test_creates_task_for_consume_file(self, consume_input_doc, consume_overrides):
|
||||
task_id = send_publish(
|
||||
"documents.tasks.consume_file",
|
||||
(consume_input_doc, consume_overrides),
|
||||
{},
|
||||
)
|
||||
task = PaperlessTask.objects.get(task_id=task_id)
|
||||
assert task.task_type == PaperlessTask.TaskType.CONSUME_FILE
|
||||
assert task.status == PaperlessTask.Status.PENDING
|
||||
assert task.trigger_source == PaperlessTask.TriggerSource.WEB_UI
|
||||
assert task.input_data["filename"] == "invoice.pdf"
|
||||
assert task.owner_id == consume_overrides.owner_id
|
||||
|
||||
def test_creates_task_for_train_classifier(self):
|
||||
task_id = send_publish("documents.tasks.train_classifier", (), {})
|
||||
task = PaperlessTask.objects.get(task_id=task_id)
|
||||
assert task.task_type == PaperlessTask.TaskType.TRAIN_CLASSIFIER
|
||||
assert task.trigger_source == PaperlessTask.TriggerSource.MANUAL
|
||||
|
||||
def test_creates_task_for_sanity_check(self):
|
||||
task_id = send_publish("documents.tasks.sanity_check", (), {})
|
||||
task = PaperlessTask.objects.get(task_id=task_id)
|
||||
assert task.task_type == PaperlessTask.TaskType.SANITY_CHECK
|
||||
|
||||
def test_creates_task_for_process_mail_accounts(self):
|
||||
task_id = send_publish(
|
||||
"paperless_mail.tasks.process_mail_accounts",
|
||||
(),
|
||||
{"account_ids": [1, 2]},
|
||||
)
|
||||
task = PaperlessTask.objects.get(task_id=task_id)
|
||||
assert task.task_type == PaperlessTask.TaskType.MAIL_FETCH
|
||||
assert task.input_data["account_ids"] == [1, 2]
|
||||
|
||||
def test_scheduled_header_sets_trigger_source(self):
|
||||
task_id = send_publish(
|
||||
"documents.tasks.train_classifier",
|
||||
(),
|
||||
{},
|
||||
headers={"trigger_source": "scheduled"},
|
||||
)
|
||||
task = PaperlessTask.objects.get(task_id=task_id)
|
||||
assert task.trigger_source == PaperlessTask.TriggerSource.SCHEDULED
|
||||
|
||||
def test_system_header_sets_trigger_source(self):
|
||||
task_id = send_publish(
|
||||
"documents.tasks.llmindex_index",
|
||||
(),
|
||||
{"rebuild": True},
|
||||
headers={"trigger_source": "system"},
|
||||
)
|
||||
task = PaperlessTask.objects.get(task_id=task_id)
|
||||
assert task.trigger_source == PaperlessTask.TriggerSource.SYSTEM
|
||||
|
||||
def test_ignores_untracked_task(self):
|
||||
send_publish("documents.tasks.bulk_update_documents", ([1, 2],), {})
|
||||
assert PaperlessTask.objects.count() == 0
|
||||
|
||||
def test_ignores_none_headers(self):
|
||||
from documents.signals.handlers import before_task_publish_handler
|
||||
|
||||
before_task_publish_handler(sender=None, headers=None, body=None)
|
||||
assert PaperlessTask.objects.count() == 0
|
||||
|
||||
def test_consume_folder_source_maps_correctly(
|
||||
self,
|
||||
headers_to_use,
|
||||
body_to_use,
|
||||
) -> None:
|
||||
"""
|
||||
Simple utility to call the pre-run handle and ensure it created a single task
|
||||
instance
|
||||
"""
|
||||
self.assertEqual(PaperlessTask.objects.all().count(), 0)
|
||||
|
||||
before_task_publish_handler(headers=headers_to_use, body=body_to_use)
|
||||
|
||||
self.assertEqual(PaperlessTask.objects.all().count(), 1)
|
||||
|
||||
def test_before_task_publish_handler_consume(self) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- A celery task is started via the consume folder
|
||||
WHEN:
|
||||
- Task before publish handler is called
|
||||
THEN:
|
||||
- The task is created and marked as pending
|
||||
"""
|
||||
headers = {
|
||||
"id": str(uuid.uuid4()),
|
||||
"task": "documents.tasks.consume_file",
|
||||
}
|
||||
body = (
|
||||
# args
|
||||
(
|
||||
ConsumableDocument(
|
||||
source=DocumentSource.ConsumeFolder,
|
||||
original_file="/consume/hello-999.pdf",
|
||||
),
|
||||
DocumentMetadataOverrides(
|
||||
title="Hello world",
|
||||
owner_id=self.user.id,
|
||||
),
|
||||
),
|
||||
# kwargs
|
||||
consume_input_doc,
|
||||
consume_overrides,
|
||||
):
|
||||
consume_input_doc.source = DocumentSource.ConsumeFolder
|
||||
task_id = send_publish(
|
||||
"documents.tasks.consume_file",
|
||||
(consume_input_doc, consume_overrides),
|
||||
{},
|
||||
# celery stuff
|
||||
{"callbacks": None, "errbacks": None, "chain": None, "chord": None},
|
||||
)
|
||||
self.util_call_before_task_publish_handler(
|
||||
headers_to_use=headers,
|
||||
body_to_use=body,
|
||||
)
|
||||
task = PaperlessTask.objects.get(task_id=task_id)
|
||||
assert task.trigger_source == PaperlessTask.TriggerSource.FOLDER_CONSUME
|
||||
|
||||
task = PaperlessTask.objects.get()
|
||||
self.assertIsNotNone(task)
|
||||
self.assertEqual(headers["id"], task.task_id)
|
||||
self.assertEqual("hello-999.pdf", task.task_file_name)
|
||||
self.assertEqual(PaperlessTask.TaskName.CONSUME_FILE, task.task_name)
|
||||
self.assertEqual(self.user.id, task.owner_id)
|
||||
self.assertEqual(celery.states.PENDING, task.status)
|
||||
|
||||
def test_task_prerun_handler(self) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- A celery task is started via the consume folder
|
||||
WHEN:
|
||||
- Task starts execution
|
||||
THEN:
|
||||
- The task is marked as started
|
||||
"""
|
||||
|
||||
headers = {
|
||||
"id": str(uuid.uuid4()),
|
||||
"task": "documents.tasks.consume_file",
|
||||
}
|
||||
body = (
|
||||
# args
|
||||
(
|
||||
ConsumableDocument(
|
||||
source=DocumentSource.ConsumeFolder,
|
||||
original_file="/consume/hello-99.pdf",
|
||||
),
|
||||
None,
|
||||
),
|
||||
# kwargs
|
||||
def test_email_source_maps_correctly(self, consume_input_doc, consume_overrides):
|
||||
consume_input_doc.source = DocumentSource.MailFetch
|
||||
task_id = send_publish(
|
||||
"documents.tasks.consume_file",
|
||||
(consume_input_doc, consume_overrides),
|
||||
{},
|
||||
# celery stuff
|
||||
{"callbacks": None, "errbacks": None, "chain": None, "chord": None},
|
||||
)
|
||||
task = PaperlessTask.objects.get(task_id=task_id)
|
||||
assert task.trigger_source == PaperlessTask.TriggerSource.EMAIL_CONSUME
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
class TestTaskPrerunHandler:
|
||||
def test_marks_task_started(self):
|
||||
task = PaperlessTask.objects.create(
|
||||
task_id=str(uuid.uuid4()),
|
||||
task_type=PaperlessTask.TaskType.CONSUME_FILE,
|
||||
trigger_source=PaperlessTask.TriggerSource.MANUAL,
|
||||
status=PaperlessTask.Status.PENDING,
|
||||
)
|
||||
from documents.signals.handlers import task_prerun_handler
|
||||
|
||||
task_prerun_handler(task_id=task.task_id)
|
||||
task.refresh_from_db()
|
||||
assert task.status == PaperlessTask.Status.STARTED
|
||||
assert task.date_started is not None
|
||||
|
||||
def test_ignores_unknown_task_id(self):
|
||||
from documents.signals.handlers import task_prerun_handler
|
||||
|
||||
task_prerun_handler(task_id="nonexistent-id") # must not raise
|
||||
|
||||
def test_ignores_none_task_id(self):
|
||||
from documents.signals.handlers import task_prerun_handler
|
||||
|
||||
task_prerun_handler(task_id=None) # must not raise
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
class TestTaskPostrunHandler:
|
||||
def _started_task(self) -> PaperlessTask:
|
||||
from django.utils import timezone
|
||||
|
||||
return PaperlessTask.objects.create(
|
||||
task_id=str(uuid.uuid4()),
|
||||
task_type=PaperlessTask.TaskType.TRAIN_CLASSIFIER,
|
||||
trigger_source=PaperlessTask.TriggerSource.MANUAL,
|
||||
status=PaperlessTask.Status.STARTED,
|
||||
date_started=timezone.now(),
|
||||
)
|
||||
|
||||
self.util_call_before_task_publish_handler(
|
||||
headers_to_use=headers,
|
||||
body_to_use=body,
|
||||
)
|
||||
|
||||
task_prerun_handler(task_id=headers["id"])
|
||||
|
||||
task = PaperlessTask.objects.get()
|
||||
|
||||
self.assertEqual(celery.states.STARTED, task.status)
|
||||
|
||||
def test_task_postrun_handler(self) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- A celery task is started via the consume folder
|
||||
WHEN:
|
||||
- Task finished execution
|
||||
THEN:
|
||||
- The task is marked as started
|
||||
"""
|
||||
headers = {
|
||||
"id": str(uuid.uuid4()),
|
||||
"task": "documents.tasks.consume_file",
|
||||
}
|
||||
body = (
|
||||
# args
|
||||
(
|
||||
ConsumableDocument(
|
||||
source=DocumentSource.ConsumeFolder,
|
||||
original_file="/consume/hello-9.pdf",
|
||||
),
|
||||
None,
|
||||
),
|
||||
# kwargs
|
||||
{},
|
||||
# celery stuff
|
||||
{"callbacks": None, "errbacks": None, "chain": None, "chord": None},
|
||||
)
|
||||
self.util_call_before_task_publish_handler(
|
||||
headers_to_use=headers,
|
||||
body_to_use=body,
|
||||
)
|
||||
def test_records_success_with_dict_result(self):
|
||||
task = self._started_task()
|
||||
from documents.signals.handlers import task_postrun_handler
|
||||
|
||||
task_postrun_handler(
|
||||
task_id=headers["id"],
|
||||
retval="Success. New document id 1 created",
|
||||
state=celery.states.SUCCESS,
|
||||
task_id=task.task_id,
|
||||
retval={"document_id": 42},
|
||||
state="SUCCESS",
|
||||
)
|
||||
task.refresh_from_db()
|
||||
assert task.status == PaperlessTask.Status.SUCCESS
|
||||
assert task.result_data == {"document_id": 42}
|
||||
assert task.date_done is not None
|
||||
assert task.duration_seconds is not None
|
||||
assert task.wait_time_seconds is not None
|
||||
|
||||
task = PaperlessTask.objects.get()
|
||||
def test_records_failure_state(self):
|
||||
task = self._started_task()
|
||||
from documents.signals.handlers import task_postrun_handler
|
||||
|
||||
self.assertEqual(celery.states.SUCCESS, task.status)
|
||||
task_postrun_handler(task_id=task.task_id, retval="some error", state="FAILURE")
|
||||
task.refresh_from_db()
|
||||
assert task.status == PaperlessTask.Status.FAILURE
|
||||
|
||||
def test_task_failure_handler(self) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- A celery task is started via the consume folder
|
||||
WHEN:
|
||||
- Task failed execution
|
||||
THEN:
|
||||
- The task is marked as failed
|
||||
"""
|
||||
headers = {
|
||||
"id": str(uuid.uuid4()),
|
||||
"task": "documents.tasks.consume_file",
|
||||
}
|
||||
body = (
|
||||
# args
|
||||
(
|
||||
ConsumableDocument(
|
||||
source=DocumentSource.ConsumeFolder,
|
||||
original_file="/consume/hello-9.pdf",
|
||||
),
|
||||
None,
|
||||
),
|
||||
# kwargs
|
||||
{},
|
||||
# celery stuff
|
||||
{"callbacks": None, "errbacks": None, "chain": None, "chord": None},
|
||||
def test_parses_legacy_new_document_string(self):
|
||||
task = self._started_task()
|
||||
from documents.signals.handlers import task_postrun_handler
|
||||
|
||||
task_postrun_handler(
|
||||
task_id=task.task_id,
|
||||
retval="New document id 42 created",
|
||||
state="SUCCESS",
|
||||
)
|
||||
self.util_call_before_task_publish_handler(
|
||||
headers_to_use=headers,
|
||||
body_to_use=body,
|
||||
task.refresh_from_db()
|
||||
assert task.result_data["document_id"] == 42
|
||||
assert task.result_message == "New document id 42 created"
|
||||
|
||||
def test_parses_legacy_duplicate_string(self):
|
||||
task = self._started_task()
|
||||
from documents.signals.handlers import task_postrun_handler
|
||||
|
||||
task_postrun_handler(
|
||||
task_id=task.task_id,
|
||||
retval="It is a duplicate of some document (#99).",
|
||||
state="FAILURE",
|
||||
)
|
||||
task.refresh_from_db()
|
||||
assert task.result_data["duplicate_of"] == 99
|
||||
assert task.result_data["duplicate_in_trash"] is False
|
||||
|
||||
def test_ignores_unknown_task_id(self):
|
||||
from documents.signals.handlers import task_postrun_handler
|
||||
|
||||
task_postrun_handler(
|
||||
task_id="nonexistent",
|
||||
retval=None,
|
||||
state="SUCCESS",
|
||||
) # must not raise
|
||||
|
||||
def test_records_revoked_state(self):
|
||||
task = self._started_task()
|
||||
from documents.signals.handlers import task_postrun_handler
|
||||
|
||||
task_postrun_handler(task_id=task.task_id, retval=None, state="REVOKED")
|
||||
task.refresh_from_db()
|
||||
assert task.status == PaperlessTask.Status.REVOKED
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
class TestTaskFailureHandler:
|
||||
def test_records_failure_with_exception(self):
|
||||
from django.utils import timezone
|
||||
|
||||
task = PaperlessTask.objects.create(
|
||||
task_id=str(uuid.uuid4()),
|
||||
task_type=PaperlessTask.TaskType.CONSUME_FILE,
|
||||
trigger_source=PaperlessTask.TriggerSource.WEB_UI,
|
||||
status=PaperlessTask.Status.STARTED,
|
||||
date_started=timezone.now(),
|
||||
)
|
||||
from documents.signals.handlers import task_failure_handler
|
||||
|
||||
task_failure_handler(
|
||||
task_id=headers["id"],
|
||||
exception="Example failure",
|
||||
task_id=task.task_id,
|
||||
exception=ValueError("PDF parse failed"),
|
||||
traceback=None,
|
||||
)
|
||||
task.refresh_from_db()
|
||||
assert task.status == PaperlessTask.Status.FAILURE
|
||||
assert task.result_data["error_type"] == "ValueError"
|
||||
assert task.result_data["error_message"] == "PDF parse failed"
|
||||
assert task.date_done is not None
|
||||
|
||||
task = PaperlessTask.objects.get()
|
||||
def test_records_traceback_when_provided(self):
|
||||
import sys
|
||||
|
||||
self.assertEqual(celery.states.FAILURE, task.status)
|
||||
from django.utils import timezone
|
||||
|
||||
def test_add_to_index_indexes_root_once_for_root_documents(self) -> None:
|
||||
root = Document.objects.create(
|
||||
title="root",
|
||||
checksum="root",
|
||||
mime_type="application/pdf",
|
||||
task = PaperlessTask.objects.create(
|
||||
task_id=str(uuid.uuid4()),
|
||||
task_type=PaperlessTask.TaskType.CONSUME_FILE,
|
||||
trigger_source=PaperlessTask.TriggerSource.WEB_UI,
|
||||
status=PaperlessTask.Status.STARTED,
|
||||
date_started=timezone.now(),
|
||||
)
|
||||
try:
|
||||
raise ValueError("test error")
|
||||
except ValueError:
|
||||
tb = sys.exc_info()[2]
|
||||
|
||||
with mock.patch("documents.search.get_backend") as mock_get_backend:
|
||||
mock_backend = mock.MagicMock()
|
||||
mock_get_backend.return_value = mock_backend
|
||||
add_to_index(sender=None, document=root)
|
||||
from documents.signals.handlers import task_failure_handler
|
||||
|
||||
mock_backend.add_or_update.assert_called_once_with(root, effective_content="")
|
||||
|
||||
def test_add_to_index_reindexes_root_for_version_documents(self) -> None:
|
||||
root = Document.objects.create(
|
||||
title="root",
|
||||
checksum="root",
|
||||
mime_type="application/pdf",
|
||||
)
|
||||
version = Document.objects.create(
|
||||
title="version",
|
||||
checksum="version",
|
||||
mime_type="application/pdf",
|
||||
root_document=root,
|
||||
task_failure_handler(
|
||||
task_id=task.task_id,
|
||||
exception=ValueError("test error"),
|
||||
traceback=tb,
|
||||
)
|
||||
task.refresh_from_db()
|
||||
assert "traceback" in task.result_data
|
||||
assert len(task.result_data["traceback"]) <= 5000
|
||||
|
||||
with mock.patch("documents.search.get_backend") as mock_get_backend:
|
||||
mock_backend = mock.MagicMock()
|
||||
mock_get_backend.return_value = mock_backend
|
||||
add_to_index(sender=None, document=version)
|
||||
def test_ignores_none_task_id(self):
|
||||
from documents.signals.handlers import task_failure_handler
|
||||
|
||||
self.assertEqual(mock_backend.add_or_update.call_count, 1)
|
||||
self.assertEqual(
|
||||
mock_backend.add_or_update.call_args_list[0].args[0].id,
|
||||
version.id,
|
||||
)
|
||||
self.assertEqual(
|
||||
mock_backend.add_or_update.call_args_list[0].kwargs,
|
||||
{"effective_content": version.content},
|
||||
)
|
||||
task_failure_handler(task_id=None, exception=ValueError("x"), traceback=None)
|
||||
|
||||
@@ -4,7 +4,6 @@ from pathlib import Path
|
||||
from unittest import mock
|
||||
|
||||
import pytest
|
||||
from celery import states
|
||||
from django.conf import settings
|
||||
from django.test import TestCase
|
||||
from django.test import override_settings
|
||||
@@ -14,7 +13,6 @@ from documents import tasks
|
||||
from documents.models import Correspondent
|
||||
from documents.models import Document
|
||||
from documents.models import DocumentType
|
||||
from documents.models import PaperlessTask
|
||||
from documents.models import Tag
|
||||
from documents.sanity_checker import SanityCheckFailedException
|
||||
from documents.sanity_checker import SanityCheckMessages
|
||||
@@ -40,7 +38,8 @@ class TestClassifier(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
def test_train_classifier_with_auto_tag(self, load_classifier) -> None:
|
||||
load_classifier.return_value = None
|
||||
Tag.objects.create(matching_algorithm=Tag.MATCH_AUTO, name="test")
|
||||
tasks.train_classifier()
|
||||
with self.assertRaises(ValueError):
|
||||
tasks.train_classifier()
|
||||
load_classifier.assert_called_once()
|
||||
self.assertIsNotFile(settings.MODEL_FILE)
|
||||
|
||||
@@ -48,7 +47,8 @@ class TestClassifier(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
def test_train_classifier_with_auto_type(self, load_classifier) -> None:
|
||||
load_classifier.return_value = None
|
||||
DocumentType.objects.create(matching_algorithm=Tag.MATCH_AUTO, name="test")
|
||||
tasks.train_classifier()
|
||||
with self.assertRaises(ValueError):
|
||||
tasks.train_classifier()
|
||||
load_classifier.assert_called_once()
|
||||
self.assertIsNotFile(settings.MODEL_FILE)
|
||||
|
||||
@@ -56,7 +56,8 @@ class TestClassifier(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
def test_train_classifier_with_auto_correspondent(self, load_classifier) -> None:
|
||||
load_classifier.return_value = None
|
||||
Correspondent.objects.create(matching_algorithm=Tag.MATCH_AUTO, name="test")
|
||||
tasks.train_classifier()
|
||||
with self.assertRaises(ValueError):
|
||||
tasks.train_classifier()
|
||||
load_classifier.assert_called_once()
|
||||
self.assertIsNotFile(settings.MODEL_FILE)
|
||||
|
||||
@@ -298,7 +299,7 @@ class TestAIIndex(DirectoriesMixin, TestCase):
|
||||
WHEN:
|
||||
- llmindex_index task is called
|
||||
THEN:
|
||||
- update_llm_index is called, and the task is marked as success
|
||||
- update_llm_index is called and its result is returned
|
||||
"""
|
||||
Document.objects.create(
|
||||
title="test",
|
||||
@@ -308,13 +309,9 @@ class TestAIIndex(DirectoriesMixin, TestCase):
|
||||
# lazy-loaded so mock the actual function
|
||||
with mock.patch("paperless_ai.indexing.update_llm_index") as update_llm_index:
|
||||
update_llm_index.return_value = "LLM index updated successfully."
|
||||
tasks.llmindex_index()
|
||||
result = tasks.llmindex_index()
|
||||
update_llm_index.assert_called_once()
|
||||
task = PaperlessTask.objects.get(
|
||||
task_name=PaperlessTask.TaskName.LLMINDEX_UPDATE,
|
||||
)
|
||||
self.assertEqual(task.status, states.SUCCESS)
|
||||
self.assertEqual(task.result, "LLM index updated successfully.")
|
||||
self.assertEqual(result, "LLM index updated successfully.")
|
||||
|
||||
@override_settings(
|
||||
AI_ENABLED=True,
|
||||
@@ -325,9 +322,9 @@ class TestAIIndex(DirectoriesMixin, TestCase):
|
||||
GIVEN:
|
||||
- Document exists, AI is enabled, llm index backend is set
|
||||
WHEN:
|
||||
- llmindex_index task is called
|
||||
- llmindex_index task is called and update_llm_index raises an exception
|
||||
THEN:
|
||||
- update_llm_index raises an exception, and the task is marked as failure
|
||||
- the exception propagates to the caller
|
||||
"""
|
||||
Document.objects.create(
|
||||
title="test",
|
||||
@@ -337,13 +334,9 @@ class TestAIIndex(DirectoriesMixin, TestCase):
|
||||
# lazy-loaded so mock the actual function
|
||||
with mock.patch("paperless_ai.indexing.update_llm_index") as update_llm_index:
|
||||
update_llm_index.side_effect = Exception("LLM index update failed.")
|
||||
tasks.llmindex_index()
|
||||
with self.assertRaises(Exception, msg="LLM index update failed."):
|
||||
tasks.llmindex_index()
|
||||
update_llm_index.assert_called_once()
|
||||
task = PaperlessTask.objects.get(
|
||||
task_name=PaperlessTask.TaskName.LLMINDEX_UPDATE,
|
||||
)
|
||||
self.assertEqual(task.status, states.FAILURE)
|
||||
self.assertIn("LLM index update failed.", task.result)
|
||||
|
||||
def test_update_document_in_llm_index(self) -> None:
|
||||
"""
|
||||
|
||||
@@ -8,6 +8,7 @@ import zipfile
|
||||
from collections import defaultdict
|
||||
from collections import deque
|
||||
from datetime import datetime
|
||||
from datetime import timedelta
|
||||
from pathlib import Path
|
||||
from time import mktime
|
||||
from typing import TYPE_CHECKING
|
||||
@@ -20,7 +21,6 @@ from urllib.parse import urlparse
|
||||
import httpx
|
||||
import magic
|
||||
import pathvalidate
|
||||
from celery import states
|
||||
from django.conf import settings
|
||||
from django.contrib.auth.models import Group
|
||||
from django.contrib.auth.models import User
|
||||
@@ -91,6 +91,7 @@ from rest_framework.mixins import DestroyModelMixin
|
||||
from rest_framework.mixins import ListModelMixin
|
||||
from rest_framework.mixins import RetrieveModelMixin
|
||||
from rest_framework.mixins import UpdateModelMixin
|
||||
from rest_framework.permissions import IsAdminUser
|
||||
from rest_framework.permissions import IsAuthenticated
|
||||
from rest_framework.request import Request
|
||||
from rest_framework.response import Response
|
||||
@@ -191,7 +192,7 @@ from documents.serialisers import PostDocumentSerializer
|
||||
from documents.serialisers import RemovePasswordDocumentsSerializer
|
||||
from documents.serialisers import ReprocessDocumentsSerializer
|
||||
from documents.serialisers import RotateDocumentsSerializer
|
||||
from documents.serialisers import RunTaskViewSerializer
|
||||
from documents.serialisers import RunTaskSerializer
|
||||
from documents.serialisers import SavedViewSerializer
|
||||
from documents.serialisers import SearchResultSerializer
|
||||
from documents.serialisers import SerializerWithPerms
|
||||
@@ -200,7 +201,9 @@ from documents.serialisers import ShareLinkSerializer
|
||||
from documents.serialisers import StoragePathSerializer
|
||||
from documents.serialisers import StoragePathTestSerializer
|
||||
from documents.serialisers import TagSerializer
|
||||
from documents.serialisers import TasksViewSerializer
|
||||
from documents.serialisers import TaskSerializerV9
|
||||
from documents.serialisers import TaskSerializerV10
|
||||
from documents.serialisers import TaskSummarySerializer
|
||||
from documents.serialisers import TrashSerializer
|
||||
from documents.serialisers import UiSettingsViewSerializer
|
||||
from documents.serialisers import WorkflowActionSerializer
|
||||
@@ -3667,35 +3670,50 @@ class RemoteVersionView(GenericAPIView[Any]):
|
||||
)
|
||||
class TasksViewSet(ReadOnlyModelViewSet[PaperlessTask]):
|
||||
permission_classes = (IsAuthenticated, PaperlessObjectPermissions)
|
||||
serializer_class = TasksViewSerializer
|
||||
filter_backends = (
|
||||
DjangoFilterBackend,
|
||||
OrderingFilter,
|
||||
ObjectOwnedOrGrantedPermissionsFilter,
|
||||
)
|
||||
filterset_class = PaperlessTaskFilterSet
|
||||
ordering_fields = [
|
||||
"date_created",
|
||||
"date_done",
|
||||
"status",
|
||||
"task_type",
|
||||
"duration_seconds",
|
||||
"wait_time_seconds",
|
||||
]
|
||||
ordering = ["-date_created"]
|
||||
|
||||
TASK_AND_ARGS_BY_NAME = {
|
||||
PaperlessTask.TaskName.INDEX_OPTIMIZE: (index_optimize, {}),
|
||||
PaperlessTask.TaskName.TRAIN_CLASSIFIER: (
|
||||
train_classifier,
|
||||
{"scheduled": False},
|
||||
),
|
||||
PaperlessTask.TaskName.CHECK_SANITY: (
|
||||
sanity_check,
|
||||
{"scheduled": False, "raise_on_error": False},
|
||||
),
|
||||
PaperlessTask.TaskName.LLMINDEX_UPDATE: (
|
||||
llmindex_index,
|
||||
{"scheduled": False, "rebuild": False},
|
||||
),
|
||||
}
|
||||
def get_serializer_class(self):
|
||||
# v9: use backwards-compatible serializer with old field names
|
||||
if self.request.version and int(self.request.version) < 10:
|
||||
return TaskSerializerV9
|
||||
return TaskSerializerV10
|
||||
|
||||
def get_queryset(self):
|
||||
queryset = PaperlessTask.objects.all().order_by("-date_created")
|
||||
queryset = PaperlessTask.objects.all()
|
||||
# v9 backwards compat: map old query params to new field names
|
||||
if self.request.version and int(self.request.version) < 10:
|
||||
task_name = self.request.query_params.get("task_name")
|
||||
if task_name is not None:
|
||||
queryset = queryset.filter(task_type=task_name)
|
||||
task_type_old = self.request.query_params.get("type")
|
||||
if task_type_old is not None:
|
||||
# Old type values: AUTO_TASK -> SYSTEM, SCHEDULED_TASK -> SCHEDULED, MANUAL_TASK -> MANUAL
|
||||
old_to_new = {
|
||||
"AUTO_TASK": PaperlessTask.TriggerSource.SYSTEM,
|
||||
"SCHEDULED_TASK": PaperlessTask.TriggerSource.SCHEDULED,
|
||||
"MANUAL_TASK": PaperlessTask.TriggerSource.MANUAL,
|
||||
}
|
||||
new_source = old_to_new.get(task_type_old)
|
||||
if new_source:
|
||||
queryset = queryset.filter(trigger_source=new_source)
|
||||
# v10+: direct task_id param for backwards compat
|
||||
task_id = self.request.query_params.get("task_id")
|
||||
if task_id is not None:
|
||||
queryset = PaperlessTask.objects.filter(task_id=task_id)
|
||||
queryset = queryset.filter(task_id=task_id)
|
||||
return queryset
|
||||
|
||||
@action(
|
||||
@@ -3707,33 +3725,117 @@ class TasksViewSet(ReadOnlyModelViewSet[PaperlessTask]):
|
||||
serializer = AcknowledgeTasksViewSerializer(data=request.data)
|
||||
serializer.is_valid(raise_exception=True)
|
||||
task_ids = serializer.validated_data.get("tasks")
|
||||
tasks = self.get_queryset().filter(id__in=task_ids)
|
||||
count = tasks.update(acknowledged=True)
|
||||
return Response({"result": count})
|
||||
|
||||
try:
|
||||
tasks = PaperlessTask.objects.filter(id__in=task_ids)
|
||||
if request.user is not None and not request.user.is_superuser:
|
||||
tasks = tasks.filter(owner=request.user) | tasks.filter(owner=None)
|
||||
result = tasks.update(
|
||||
acknowledged=True,
|
||||
@action(
|
||||
methods=["post"],
|
||||
detail=False,
|
||||
permission_classes=[IsAuthenticated, AcknowledgeTasksPermissions],
|
||||
)
|
||||
def acknowledge_all(self, request):
|
||||
"""Acknowledge all completed tasks visible to the requesting user."""
|
||||
count = (
|
||||
self.get_queryset()
|
||||
.filter(
|
||||
acknowledged=False,
|
||||
status__in=[
|
||||
PaperlessTask.Status.SUCCESS,
|
||||
PaperlessTask.Status.FAILURE,
|
||||
PaperlessTask.Status.REVOKED,
|
||||
],
|
||||
)
|
||||
return Response({"result": result})
|
||||
except Exception:
|
||||
return HttpResponseBadRequest()
|
||||
.update(acknowledged=True)
|
||||
)
|
||||
return Response({"result": count})
|
||||
|
||||
@action(methods=["post"], detail=False)
|
||||
@action(methods=["get"], detail=False)
|
||||
def summary(self, request):
|
||||
"""Aggregated task statistics per task_type over the last N days (default 30)."""
|
||||
from django.db.models import Avg
|
||||
from django.db.models import Count
|
||||
from django.db.models import Max
|
||||
from django.db.models import Q
|
||||
|
||||
days = int(request.query_params.get("days", 30))
|
||||
cutoff = timezone.now() - timedelta(days=days)
|
||||
queryset = self.get_queryset().filter(date_created__gte=cutoff)
|
||||
|
||||
data = queryset.values("task_type").annotate(
|
||||
total_count=Count("id"),
|
||||
pending_count=Count("id", filter=Q(status=PaperlessTask.Status.PENDING)),
|
||||
success_count=Count("id", filter=Q(status=PaperlessTask.Status.SUCCESS)),
|
||||
failure_count=Count("id", filter=Q(status=PaperlessTask.Status.FAILURE)),
|
||||
avg_duration_seconds=Avg(
|
||||
"duration_seconds",
|
||||
filter=Q(duration_seconds__isnull=False),
|
||||
),
|
||||
avg_wait_time_seconds=Avg(
|
||||
"wait_time_seconds",
|
||||
filter=Q(wait_time_seconds__isnull=False),
|
||||
),
|
||||
last_run=Max("date_created"),
|
||||
last_success=Max(
|
||||
"date_done",
|
||||
filter=Q(status=PaperlessTask.Status.SUCCESS),
|
||||
),
|
||||
last_failure=Max(
|
||||
"date_done",
|
||||
filter=Q(status=PaperlessTask.Status.FAILURE),
|
||||
),
|
||||
)
|
||||
serializer = TaskSummarySerializer(data, many=True)
|
||||
return Response(serializer.data)
|
||||
|
||||
@action(methods=["get"], detail=False)
|
||||
def active(self, request):
|
||||
"""Currently pending and running tasks (capped at 50)."""
|
||||
queryset = (
|
||||
self.get_queryset()
|
||||
.filter(
|
||||
status__in=[PaperlessTask.Status.PENDING, PaperlessTask.Status.STARTED],
|
||||
)
|
||||
.order_by("-date_created")[:50]
|
||||
)
|
||||
serializer = self.get_serializer(queryset, many=True)
|
||||
return Response(serializer.data)
|
||||
|
||||
@action(methods=["post"], detail=False, permission_classes=[IsAdminUser])
|
||||
def run(self, request):
|
||||
serializer = RunTaskViewSerializer(data=request.data)
|
||||
"""Manually dispatch a background task. Superuser (admin) only."""
|
||||
serializer = RunTaskSerializer(data=request.data)
|
||||
serializer.is_valid(raise_exception=True)
|
||||
task_name = serializer.validated_data.get("task_name")
|
||||
task_type = serializer.validated_data.get("task_type")
|
||||
|
||||
if not request.user.is_superuser:
|
||||
return HttpResponseForbidden("Insufficient permissions")
|
||||
task_func_map = {
|
||||
PaperlessTask.TaskType.INDEX_OPTIMIZE: (index_optimize, {}),
|
||||
PaperlessTask.TaskType.TRAIN_CLASSIFIER: (train_classifier, {}),
|
||||
PaperlessTask.TaskType.SANITY_CHECK: (
|
||||
sanity_check,
|
||||
{"raise_on_error": False},
|
||||
),
|
||||
PaperlessTask.TaskType.LLM_INDEX: (
|
||||
llmindex_index,
|
||||
{"rebuild": False},
|
||||
),
|
||||
}
|
||||
|
||||
if task_type not in task_func_map:
|
||||
return Response(
|
||||
{"error": f"Task type '{task_type}' cannot be manually triggered"},
|
||||
status=status.HTTP_400_BAD_REQUEST,
|
||||
)
|
||||
|
||||
try:
|
||||
task_func, task_args = self.TASK_AND_ARGS_BY_NAME[task_name]
|
||||
result = task_func(**task_args)
|
||||
return Response({"result": result})
|
||||
task_func, task_kwargs = task_func_map[task_type]
|
||||
async_result = task_func.apply_async(
|
||||
kwargs=task_kwargs,
|
||||
headers={"trigger_source": "manual"},
|
||||
)
|
||||
return Response({"task_id": async_result.id})
|
||||
except Exception as e:
|
||||
logger.warning(f"An error occurred running task: {e!s}")
|
||||
logger.warning(f"Error running task: {e!s}")
|
||||
return HttpResponseServerError(
|
||||
"Error running task, check logs for more detail.",
|
||||
)
|
||||
@@ -4358,11 +4460,11 @@ class SystemStatusView(PassUserMixin):
|
||||
|
||||
last_trained_task = (
|
||||
PaperlessTask.objects.filter(
|
||||
task_name=PaperlessTask.TaskName.TRAIN_CLASSIFIER,
|
||||
task_type=PaperlessTask.TaskType.TRAIN_CLASSIFIER,
|
||||
status__in=[
|
||||
states.SUCCESS,
|
||||
states.FAILURE,
|
||||
states.REVOKED,
|
||||
PaperlessTask.Status.SUCCESS,
|
||||
PaperlessTask.Status.FAILURE,
|
||||
PaperlessTask.Status.REVOKED,
|
||||
], # ignore running tasks
|
||||
)
|
||||
.order_by("-date_done")
|
||||
@@ -4373,20 +4475,23 @@ class SystemStatusView(PassUserMixin):
|
||||
if last_trained_task is None:
|
||||
classifier_status = "WARNING"
|
||||
classifier_error = "No classifier training tasks found"
|
||||
elif last_trained_task and last_trained_task.status != states.SUCCESS:
|
||||
elif (
|
||||
last_trained_task
|
||||
and last_trained_task.status != PaperlessTask.Status.SUCCESS
|
||||
):
|
||||
classifier_status = "ERROR"
|
||||
classifier_error = last_trained_task.result
|
||||
classifier_error = last_trained_task.result_message
|
||||
classifier_last_trained = (
|
||||
last_trained_task.date_done if last_trained_task else None
|
||||
)
|
||||
|
||||
last_sanity_check = (
|
||||
PaperlessTask.objects.filter(
|
||||
task_name=PaperlessTask.TaskName.CHECK_SANITY,
|
||||
task_type=PaperlessTask.TaskType.SANITY_CHECK,
|
||||
status__in=[
|
||||
states.SUCCESS,
|
||||
states.FAILURE,
|
||||
states.REVOKED,
|
||||
PaperlessTask.Status.SUCCESS,
|
||||
PaperlessTask.Status.FAILURE,
|
||||
PaperlessTask.Status.REVOKED,
|
||||
], # ignore running tasks
|
||||
)
|
||||
.order_by("-date_done")
|
||||
@@ -4397,9 +4502,12 @@ class SystemStatusView(PassUserMixin):
|
||||
if last_sanity_check is None:
|
||||
sanity_check_status = "WARNING"
|
||||
sanity_check_error = "No sanity check tasks found"
|
||||
elif last_sanity_check and last_sanity_check.status != states.SUCCESS:
|
||||
elif (
|
||||
last_sanity_check
|
||||
and last_sanity_check.status != PaperlessTask.Status.SUCCESS
|
||||
):
|
||||
sanity_check_status = "ERROR"
|
||||
sanity_check_error = last_sanity_check.result
|
||||
sanity_check_error = last_sanity_check.result_message
|
||||
sanity_check_last_run = (
|
||||
last_sanity_check.date_done if last_sanity_check else None
|
||||
)
|
||||
@@ -4412,7 +4520,7 @@ class SystemStatusView(PassUserMixin):
|
||||
else:
|
||||
last_llmindex_update = (
|
||||
PaperlessTask.objects.filter(
|
||||
task_name=PaperlessTask.TaskName.LLMINDEX_UPDATE,
|
||||
task_type=PaperlessTask.TaskType.LLM_INDEX,
|
||||
)
|
||||
.order_by("-date_done")
|
||||
.first()
|
||||
@@ -4422,9 +4530,12 @@ class SystemStatusView(PassUserMixin):
|
||||
if last_llmindex_update is None:
|
||||
llmindex_status = "WARNING"
|
||||
llmindex_error = "No LLM index update tasks found"
|
||||
elif last_llmindex_update and last_llmindex_update.status == states.FAILURE:
|
||||
elif (
|
||||
last_llmindex_update
|
||||
and last_llmindex_update.status == PaperlessTask.Status.FAILURE
|
||||
):
|
||||
llmindex_status = "ERROR"
|
||||
llmindex_error = last_llmindex_update.result
|
||||
llmindex_error = last_llmindex_update.result_message
|
||||
llmindex_last_modified = (
|
||||
last_llmindex_update.date_done if last_llmindex_update else None
|
||||
)
|
||||
|
||||
@@ -133,7 +133,6 @@ INSTALLED_APPS = [
|
||||
"rest_framework",
|
||||
"rest_framework.authtoken",
|
||||
"django_filters",
|
||||
"django_celery_results",
|
||||
"guardian",
|
||||
"allauth",
|
||||
"allauth.account",
|
||||
@@ -669,8 +668,6 @@ CELERY_BROKER_TRANSPORT_OPTIONS = {
|
||||
CELERY_TASK_TRACK_STARTED = True
|
||||
CELERY_TASK_TIME_LIMIT: Final[int] = get_int_from_env("PAPERLESS_WORKER_TIMEOUT", 1800)
|
||||
|
||||
CELERY_RESULT_EXTENDED = True
|
||||
CELERY_RESULT_BACKEND = "django-db"
|
||||
CELERY_CACHE_BACKEND = "default"
|
||||
|
||||
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#task-serializer
|
||||
|
||||
@@ -181,7 +181,10 @@ def parse_beat_schedule() -> dict:
|
||||
schedule[task["name"]] = {
|
||||
"task": task["task"],
|
||||
"schedule": crontab(minute, hour, day_week, day_month, month),
|
||||
"options": task["options"],
|
||||
"options": {
|
||||
**task["options"],
|
||||
"headers": {"trigger_source": "scheduled"},
|
||||
},
|
||||
}
|
||||
|
||||
return schedule
|
||||
|
||||
@@ -186,42 +186,66 @@ def make_expected_schedule(
|
||||
"Check all e-mail accounts": {
|
||||
"task": "paperless_mail.tasks.process_mail_accounts",
|
||||
"schedule": crontab(minute="*/10"),
|
||||
"options": {"expires": mail_expire},
|
||||
"options": {
|
||||
"expires": mail_expire,
|
||||
"headers": {"trigger_source": "scheduled"},
|
||||
},
|
||||
},
|
||||
"Train the classifier": {
|
||||
"task": "documents.tasks.train_classifier",
|
||||
"schedule": crontab(minute="5", hour="*/1"),
|
||||
"options": {"expires": classifier_expire},
|
||||
"options": {
|
||||
"expires": classifier_expire,
|
||||
"headers": {"trigger_source": "scheduled"},
|
||||
},
|
||||
},
|
||||
"Optimize the index": {
|
||||
"task": "documents.tasks.index_optimize",
|
||||
"schedule": crontab(minute=0, hour=0),
|
||||
"options": {"expires": index_expire},
|
||||
"options": {
|
||||
"expires": index_expire,
|
||||
"headers": {"trigger_source": "scheduled"},
|
||||
},
|
||||
},
|
||||
"Perform sanity check": {
|
||||
"task": "documents.tasks.sanity_check",
|
||||
"schedule": crontab(minute=30, hour=0, day_of_week="sun"),
|
||||
"options": {"expires": sanity_expire},
|
||||
"options": {
|
||||
"expires": sanity_expire,
|
||||
"headers": {"trigger_source": "scheduled"},
|
||||
},
|
||||
},
|
||||
"Empty trash": {
|
||||
"task": "documents.tasks.empty_trash",
|
||||
"schedule": crontab(minute=0, hour="1"),
|
||||
"options": {"expires": empty_trash_expire},
|
||||
"options": {
|
||||
"expires": empty_trash_expire,
|
||||
"headers": {"trigger_source": "scheduled"},
|
||||
},
|
||||
},
|
||||
"Check and run scheduled workflows": {
|
||||
"task": "documents.tasks.check_scheduled_workflows",
|
||||
"schedule": crontab(minute="5", hour="*/1"),
|
||||
"options": {"expires": workflow_expire},
|
||||
"options": {
|
||||
"expires": workflow_expire,
|
||||
"headers": {"trigger_source": "scheduled"},
|
||||
},
|
||||
},
|
||||
"Rebuild LLM index": {
|
||||
"task": "documents.tasks.llmindex_index",
|
||||
"schedule": crontab(minute="10", hour="2"),
|
||||
"options": {"expires": llm_index_expire},
|
||||
"options": {
|
||||
"expires": llm_index_expire,
|
||||
"headers": {"trigger_source": "scheduled"},
|
||||
},
|
||||
},
|
||||
"Cleanup expired share link bundles": {
|
||||
"task": "documents.tasks.cleanup_expired_share_link_bundles",
|
||||
"schedule": crontab(minute=0, hour="2"),
|
||||
"options": {"expires": share_link_cleanup_expire},
|
||||
"options": {
|
||||
"expires": share_link_cleanup_expire,
|
||||
"headers": {"trigger_source": "scheduled"},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
@@ -284,6 +308,16 @@ class TestParseBeatSchedule:
|
||||
schedule = parse_beat_schedule()
|
||||
assert schedule == expected
|
||||
|
||||
def test_parse_beat_schedule_all_entries_have_trigger_source_header(self) -> None:
|
||||
"""Every beat entry must carry trigger_source=scheduled so the task signal
|
||||
handler can identify scheduler-originated tasks."""
|
||||
schedule = parse_beat_schedule()
|
||||
for name, entry in schedule.items():
|
||||
headers = entry.get("options", {}).get("headers", {})
|
||||
assert headers.get("trigger_source") == "scheduled", (
|
||||
f"Beat entry '{name}' is missing trigger_source header"
|
||||
)
|
||||
|
||||
|
||||
class TestParseDbSettings:
|
||||
"""Test suite for parse_db_settings function."""
|
||||
|
||||
@@ -427,10 +427,9 @@ class ApplicationConfigurationViewSet(ModelViewSet[ApplicationConfiguration]):
|
||||
and not vector_store_file_exists()
|
||||
):
|
||||
# AI index was just enabled and vector store file does not exist
|
||||
llmindex_index.delay(
|
||||
rebuild=True,
|
||||
scheduled=False,
|
||||
auto=True,
|
||||
llmindex_index.apply_async(
|
||||
kwargs={"rebuild": True},
|
||||
headers={"trigger_source": "system"},
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -4,7 +4,6 @@ from datetime import timedelta
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from celery import states
|
||||
from django.conf import settings
|
||||
from django.utils import timezone
|
||||
|
||||
@@ -28,17 +27,20 @@ def queue_llm_index_update_if_needed(*, rebuild: bool, reason: str) -> bool:
|
||||
from documents.tasks import llmindex_index
|
||||
|
||||
has_running = PaperlessTask.objects.filter(
|
||||
task_name=PaperlessTask.TaskName.LLMINDEX_UPDATE,
|
||||
status__in=[states.PENDING, states.STARTED],
|
||||
task_type=PaperlessTask.TaskType.LLM_INDEX,
|
||||
status__in=[PaperlessTask.Status.PENDING, PaperlessTask.Status.STARTED],
|
||||
).exists()
|
||||
has_recent = PaperlessTask.objects.filter(
|
||||
task_name=PaperlessTask.TaskName.LLMINDEX_UPDATE,
|
||||
task_type=PaperlessTask.TaskType.LLM_INDEX,
|
||||
date_created__gte=(timezone.now() - timedelta(minutes=5)),
|
||||
).exists()
|
||||
if has_running or has_recent:
|
||||
return False
|
||||
|
||||
llmindex_index.delay(rebuild=rebuild, scheduled=False, auto=True)
|
||||
llmindex_index.apply_async(
|
||||
kwargs={"rebuild": rebuild},
|
||||
headers={"trigger_source": "system"},
|
||||
)
|
||||
logger.warning(
|
||||
"Queued LLM index update%s: %s",
|
||||
" (rebuild)" if rebuild else "",
|
||||
|
||||
@@ -3,13 +3,13 @@ from unittest.mock import MagicMock
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
from celery import states
|
||||
from django.test import override_settings
|
||||
from django.utils import timezone
|
||||
from llama_index.core.base.embeddings.base import BaseEmbedding
|
||||
|
||||
from documents.models import Document
|
||||
from documents.models import PaperlessTask
|
||||
from documents.tests.factories import PaperlessTaskFactory
|
||||
from paperless_ai import indexing
|
||||
|
||||
|
||||
@@ -292,13 +292,15 @@ def test_queue_llm_index_update_if_needed_enqueues_when_idle_or_skips_recent() -
|
||||
)
|
||||
|
||||
assert result is True
|
||||
mock_task.delay.assert_called_once_with(rebuild=True, scheduled=False, auto=True)
|
||||
mock_task.apply_async.assert_called_once_with(
|
||||
kwargs={"rebuild": True},
|
||||
headers={"trigger_source": "system"},
|
||||
)
|
||||
|
||||
PaperlessTask.objects.create(
|
||||
task_id="task-1",
|
||||
task_name=PaperlessTask.TaskName.LLMINDEX_UPDATE,
|
||||
status=states.STARTED,
|
||||
date_created=timezone.now(),
|
||||
PaperlessTaskFactory(
|
||||
task_type=PaperlessTask.TaskType.LLM_INDEX,
|
||||
trigger_source=PaperlessTask.TriggerSource.SYSTEM,
|
||||
status=PaperlessTask.Status.STARTED,
|
||||
)
|
||||
|
||||
# Existing running task
|
||||
@@ -309,7 +311,7 @@ def test_queue_llm_index_update_if_needed_enqueues_when_idle_or_skips_recent() -
|
||||
)
|
||||
|
||||
assert result is False
|
||||
mock_task.delay.assert_not_called()
|
||||
mock_task.apply_async.assert_not_called()
|
||||
|
||||
|
||||
@override_settings(
|
||||
|
||||
15
uv.lock
generated
15
uv.lock
generated
@@ -935,19 +935,6 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/12/bf/af8ad2aa5a402f278b444ca70729fb12ee96ddb89c19c32a2d7c5189358f/django_cachalot-2.9.0-py3-none-any.whl", hash = "sha256:b80ac4930613a7849988ea772a53598d262a15eaf55e5ec8c78accae7fdd99ff", size = 57814, upload-time = "2026-01-28T05:23:28.741Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "django-celery-results"
|
||||
version = "2.6.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "celery", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
{ name = "django", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/a6/b5/9966c28e31014c228305e09d48b19b35522a8f941fe5af5f81f40dc8fa80/django_celery_results-2.6.0.tar.gz", hash = "sha256:9abcd836ae6b61063779244d8887a88fe80bbfaba143df36d3cb07034671277c", size = 83985, upload-time = "2025-04-10T08:23:52.677Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/2c/da/70f0f3c5364735344c4bc89e53413bcaae95b4fc1de4e98a7a3b9fb70c88/django_celery_results-2.6.0-py3-none-any.whl", hash = "sha256:b9ccdca2695b98c7cbbb8dea742311ba9a92773d71d7b4944a676e69a7df1c73", size = 38351, upload-time = "2025-04-10T08:23:49.965Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "django-compression-middleware"
|
||||
version = "0.5.0"
|
||||
@@ -2869,7 +2856,6 @@ dependencies = [
|
||||
{ name = "django-allauth", extra = ["mfa", "socialaccount"], marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
{ name = "django-auditlog", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
{ name = "django-cachalot", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
{ name = "django-celery-results", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
{ name = "django-compression-middleware", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
{ name = "django-cors-headers", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
{ name = "django-extensions", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
@@ -3018,7 +3004,6 @@ requires-dist = [
|
||||
{ name = "django-allauth", extras = ["mfa", "socialaccount"], specifier = "~=65.15.0" },
|
||||
{ name = "django-auditlog", specifier = "~=3.4.1" },
|
||||
{ name = "django-cachalot", specifier = "~=2.9.0" },
|
||||
{ name = "django-celery-results", specifier = "~=2.6.0" },
|
||||
{ name = "django-compression-middleware", specifier = "~=0.5.0" },
|
||||
{ name = "django-cors-headers", specifier = "~=4.9.0" },
|
||||
{ name = "django-extensions", specifier = "~=4.1" },
|
||||
|
||||
Reference in New Issue
Block a user