mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-04-21 07:19:26 +00:00
Merge branch 'dev' into fix/monetary-exact-filter
This commit is contained in:
3
.github/workflows/pr-bot.yml
vendored
3
.github/workflows/pr-bot.yml
vendored
@@ -3,7 +3,7 @@ on:
|
||||
pull_request_target:
|
||||
types: [opened]
|
||||
jobs:
|
||||
anti-slop:
|
||||
Anti-slop:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
@@ -14,6 +14,7 @@ jobs:
|
||||
with:
|
||||
max-failures: 4
|
||||
failure-add-pr-labels: 'ai'
|
||||
require-pr-template: true
|
||||
pr-bot:
|
||||
name: Automated PR Bot
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
@@ -242,6 +242,12 @@ For example:
|
||||
}
|
||||
```
|
||||
|
||||
## Task History Cleared on Upgrade
|
||||
|
||||
The task tracking system has been redesigned in this release. All existing task history records are dropped from the database during the upgrade. Previously completed, failed, or acknowledged tasks will no longer appear in the task list after upgrading.
|
||||
|
||||
No user action is required.
|
||||
|
||||
## Consume Script Positional Arguments Removed
|
||||
|
||||
Pre- and post-consumption scripts no longer receive positional arguments. All information is
|
||||
|
||||
@@ -855,13 +855,14 @@ Matching natural date keywords:
|
||||
```
|
||||
added:today
|
||||
modified:yesterday
|
||||
created:this_week
|
||||
added:last_month
|
||||
modified:this_year
|
||||
created:"previous week"
|
||||
added:"previous month"
|
||||
modified:"this year"
|
||||
```
|
||||
|
||||
Supported date keywords: `today`, `yesterday`, `this_week`, `last_week`,
|
||||
`this_month`, `last_month`, `this_year`, `last_year`.
|
||||
Supported date keywords: `today`, `yesterday`, `previous week`,
|
||||
`this month`, `previous month`, `this year`, `previous year`,
|
||||
`previous quarter`.
|
||||
|
||||
#### Searching custom fields
|
||||
|
||||
|
||||
@@ -28,7 +28,6 @@ dependencies = [
|
||||
"django-allauth[mfa,socialaccount]~=65.15.0",
|
||||
"django-auditlog~=3.4.1",
|
||||
"django-cachalot~=2.9.0",
|
||||
"django-celery-results~=2.6.0",
|
||||
"django-compression-middleware~=0.5.0",
|
||||
"django-cors-headers~=4.9.0",
|
||||
"django-extensions~=4.1",
|
||||
@@ -312,6 +311,7 @@ markers = [
|
||||
"date_parsing: Tests which cover date parsing from content or filename",
|
||||
"management: Tests which cover management commands/functionality",
|
||||
"search: Tests for the Tantivy search backend",
|
||||
"api: Tests for REST API endpoints",
|
||||
]
|
||||
|
||||
[tool.pytest_env]
|
||||
|
||||
@@ -2064,18 +2064,11 @@
|
||||
<context context-type="linenumber">97</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="1418249006616536391" datatype="html">
|
||||
<source>Duplicate(s) detected</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/tasks/tasks.component.html</context>
|
||||
<context context-type="linenumber">103</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="1536087519743707362" datatype="html">
|
||||
<source>Dismiss</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/tasks/tasks.component.html</context>
|
||||
<context context-type="linenumber">116</context>
|
||||
<context context-type="linenumber">110</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/tasks/tasks.component.ts</context>
|
||||
@@ -2094,49 +2087,49 @@
|
||||
<source>Open Document</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/tasks/tasks.component.html</context>
|
||||
<context context-type="linenumber">121</context>
|
||||
<context context-type="linenumber">115</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="428536141871853903" datatype="html">
|
||||
<source>{VAR_PLURAL, plural, =1 {One <x id="INTERPOLATION"/> task} other {<x id="INTERPOLATION_1"/> total <x id="INTERPOLATION"/> tasks}}</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/tasks/tasks.component.html</context>
|
||||
<context context-type="linenumber">140</context>
|
||||
<context context-type="linenumber">134</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="1943508481059904274" datatype="html">
|
||||
<source> (<x id="INTERPOLATION" equiv-text="{{selectedTasks.size}}"/> selected)</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/tasks/tasks.component.html</context>
|
||||
<context context-type="linenumber">142</context>
|
||||
<context context-type="linenumber">136</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="5639839509673911668" datatype="html">
|
||||
<source>Failed<x id="START_BLOCK_IF" equiv-text="@if (tasksService.failedFileTasks.length > 0) {"/><x id="START_TAG_SPAN" ctype="x-span" equiv-text="<span class="badge bg-danger ms-2">"/><x id="INTERPOLATION" equiv-text="{{tasksService.failedFileTasks.length}}"/><x id="CLOSE_TAG_SPAN" ctype="x-span" equiv-text="</span>"/><x id="CLOSE_BLOCK_IF" equiv-text="}"/></source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/tasks/tasks.component.html</context>
|
||||
<context context-type="linenumber">154,156</context>
|
||||
<context context-type="linenumber">148,150</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="8210778930307085868" datatype="html">
|
||||
<source>Complete<x id="START_BLOCK_IF" equiv-text="@if (tasksService.completedFileTasks.length > 0) {"/><x id="START_TAG_SPAN" ctype="x-span" equiv-text="<span class="badge bg-secondary ms-2">"/><x id="INTERPOLATION" equiv-text="{{tasksService.completedFileTasks.length}}"/><x id="CLOSE_TAG_SPAN" ctype="x-span" equiv-text="</span>"/><x id="CLOSE_BLOCK_IF" equiv-text="}"/></source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/tasks/tasks.component.html</context>
|
||||
<context context-type="linenumber">162,164</context>
|
||||
<context context-type="linenumber">156,158</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="3522801015717851360" datatype="html">
|
||||
<source>Started<x id="START_BLOCK_IF" equiv-text="@if (tasksService.startedFileTasks.length > 0) {"/><x id="START_TAG_SPAN" ctype="x-span" equiv-text="<span class="badge bg-secondary ms-2">"/><x id="INTERPOLATION" equiv-text="{{tasksService.startedFileTasks.length}}"/><x id="CLOSE_TAG_SPAN" ctype="x-span" equiv-text="</span>"/><x id="CLOSE_BLOCK_IF" equiv-text="}"/></source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/tasks/tasks.component.html</context>
|
||||
<context context-type="linenumber">170,172</context>
|
||||
<context context-type="linenumber">164,166</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="2341807459308874922" datatype="html">
|
||||
<source>Queued<x id="START_BLOCK_IF" equiv-text="@if (tasksService.queuedFileTasks.length > 0) {"/><x id="START_TAG_SPAN" ctype="x-span" equiv-text="<span class="badge bg-secondary ms-2">"/><x id="INTERPOLATION" equiv-text="{{tasksService.queuedFileTasks.length}}"/><x id="CLOSE_TAG_SPAN" ctype="x-span" equiv-text="</span>"/><x id="CLOSE_BLOCK_IF" equiv-text="}"/></source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/tasks/tasks.component.html</context>
|
||||
<context context-type="linenumber">178,180</context>
|
||||
<context context-type="linenumber">172,174</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="2525230676386818985" datatype="html">
|
||||
@@ -2192,28 +2185,28 @@
|
||||
<source>queued</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/tasks/tasks.component.ts</context>
|
||||
<context context-type="linenumber">246</context>
|
||||
<context context-type="linenumber">248</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="6415892379431855826" datatype="html">
|
||||
<source>started</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/tasks/tasks.component.ts</context>
|
||||
<context context-type="linenumber">248</context>
|
||||
<context context-type="linenumber">250</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="7510279840486540181" datatype="html">
|
||||
<source>completed</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/tasks/tasks.component.ts</context>
|
||||
<context context-type="linenumber">250</context>
|
||||
<context context-type="linenumber">252</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="4083337005045748464" datatype="html">
|
||||
<source>failed</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/tasks/tasks.component.ts</context>
|
||||
<context context-type="linenumber">252</context>
|
||||
<context context-type="linenumber">254</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="3418677553313974490" datatype="html">
|
||||
@@ -4435,11 +4428,15 @@
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/system-status-dialog/system-status-dialog.component.html</context>
|
||||
<context context-type="linenumber">186</context>
|
||||
<context context-type="linenumber">176</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/system-status-dialog/system-status-dialog.component.html</context>
|
||||
<context context-type="linenumber">220</context>
|
||||
<context context-type="linenumber">210</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/system-status-dialog/system-status-dialog.component.html</context>
|
||||
<context context-type="linenumber">244</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/system-status-dialog/system-status-dialog.component.html</context>
|
||||
@@ -4447,11 +4444,7 @@
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/system-status-dialog/system-status-dialog.component.html</context>
|
||||
<context context-type="linenumber">264</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/system-status-dialog/system-status-dialog.component.html</context>
|
||||
<context context-type="linenumber">302</context>
|
||||
<context context-type="linenumber">292</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/toast/toast.component.html</context>
|
||||
@@ -6263,7 +6256,7 @@
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/system-status-dialog/system-status-dialog.component.html</context>
|
||||
<context context-type="linenumber">321</context>
|
||||
<context context-type="linenumber">311</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/manage/mail/mail.component.html</context>
|
||||
@@ -6986,83 +6979,79 @@
|
||||
<context context-type="linenumber">157</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="9127131074422113272" datatype="html">
|
||||
<source>Run Task</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/system-status-dialog/system-status-dialog.component.html</context>
|
||||
<context context-type="linenumber">177</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/system-status-dialog/system-status-dialog.component.html</context>
|
||||
<context context-type="linenumber">211</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/system-status-dialog/system-status-dialog.component.html</context>
|
||||
<context context-type="linenumber">245</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/system-status-dialog/system-status-dialog.component.html</context>
|
||||
<context context-type="linenumber">293</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="4089509911694721896" datatype="html">
|
||||
<source>Last Updated</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/system-status-dialog/system-status-dialog.component.html</context>
|
||||
<context context-type="linenumber">184</context>
|
||||
<context context-type="linenumber">174</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="46628344485199198" datatype="html">
|
||||
<source>Classifier</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/system-status-dialog/system-status-dialog.component.html</context>
|
||||
<context context-type="linenumber">189</context>
|
||||
<context context-type="linenumber">179</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="9127131074422113272" datatype="html">
|
||||
<source>Run Task</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/system-status-dialog/system-status-dialog.component.html</context>
|
||||
<context context-type="linenumber">201</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/system-status-dialog/system-status-dialog.component.html</context>
|
||||
<context context-type="linenumber">235</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/system-status-dialog/system-status-dialog.component.html</context>
|
||||
<context context-type="linenumber">283</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="6096684179126491743" datatype="html">
|
||||
<source>Last Trained</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/system-status-dialog/system-status-dialog.component.html</context>
|
||||
<context context-type="linenumber">218</context>
|
||||
<context context-type="linenumber">208</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="6427836860962380759" datatype="html">
|
||||
<source>Sanity Checker</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/system-status-dialog/system-status-dialog.component.html</context>
|
||||
<context context-type="linenumber">223</context>
|
||||
<context context-type="linenumber">213</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="6578747070254776938" datatype="html">
|
||||
<source>Last Run</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/system-status-dialog/system-status-dialog.component.html</context>
|
||||
<context context-type="linenumber">252</context>
|
||||
<context context-type="linenumber">242</context>
|
||||
</context-group>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/system-status-dialog/system-status-dialog.component.html</context>
|
||||
<context context-type="linenumber">300</context>
|
||||
<context context-type="linenumber">290</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="5921685253729220446" datatype="html">
|
||||
<source>WebSocket Connection</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/system-status-dialog/system-status-dialog.component.html</context>
|
||||
<context context-type="linenumber">257</context>
|
||||
<context context-type="linenumber">247</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="8998179362936748717" datatype="html">
|
||||
<source>OK</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/system-status-dialog/system-status-dialog.component.html</context>
|
||||
<context context-type="linenumber">261</context>
|
||||
<context context-type="linenumber">251</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="3804349597565969872" datatype="html">
|
||||
<source>AI Index</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/common/system-status-dialog/system-status-dialog.component.html</context>
|
||||
<context context-type="linenumber">270</context>
|
||||
<context context-type="linenumber">260</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="6732151329960766506" datatype="html">
|
||||
|
||||
@@ -76,33 +76,27 @@
|
||||
<label class="form-check-label" for="task{{task.id}}"></label>
|
||||
</div>
|
||||
</td>
|
||||
<td class="overflow-auto name-col">{{ task.task_file_name }}</td>
|
||||
<td class="overflow-auto name-col">{{ task.input_data?.filename }}</td>
|
||||
<td class="d-none d-lg-table-cell">{{ task.date_created | customDate:'short' }}</td>
|
||||
@if (activeTab !== 'started' && activeTab !== 'queued') {
|
||||
<td class="d-none d-lg-table-cell">
|
||||
@if (task.result?.length > 50) {
|
||||
@if (task.result_message?.length > 50) {
|
||||
<div class="result" (click)="expandTask(task); $event.stopPropagation();"
|
||||
[ngbPopover]="resultPopover" popoverClass="shadow small mobile" triggers="mouseenter:mouseleave" container="body">
|
||||
<span class="small d-none d-md-inline-block font-monospace text-muted">{{ task.result | slice:0:50 }}…</span>
|
||||
<span class="small d-none d-md-inline-block font-monospace text-muted">{{ task.result_message | slice:0:50 }}…</span>
|
||||
</div>
|
||||
}
|
||||
@if (task.result?.length <= 50) {
|
||||
<span class="small d-none d-md-inline-block font-monospace text-muted">{{ task.result }}</span>
|
||||
@if (task.result_message?.length <= 50) {
|
||||
<span class="small d-none d-md-inline-block font-monospace text-muted">{{ task.result_message }}</span>
|
||||
}
|
||||
<ng-template #resultPopover>
|
||||
<pre class="small mb-0">{{ task.result | slice:0:300 }}@if (task.result.length > 300) {
|
||||
<pre class="small mb-0">{{ task.result_message | slice:0:300 }}@if (task.result_message.length > 300) {
|
||||
…
|
||||
}</pre>
|
||||
@if (task.result?.length > 300) {
|
||||
@if (task.result_message?.length > 300) {
|
||||
<br/><em>(<ng-container i18n>click for full output</ng-container>)</em>
|
||||
}
|
||||
</ng-template>
|
||||
@if (task.duplicate_documents?.length > 0) {
|
||||
<div class="small text-warning-emphasis d-flex align-items-center gap-1">
|
||||
<i-bs class="lh-1" width="1em" height="1em" name="exclamation-triangle"></i-bs>
|
||||
<span i18n>Duplicate(s) detected</span>
|
||||
</div>
|
||||
}
|
||||
</td>
|
||||
}
|
||||
<td class="d-lg-none">
|
||||
@@ -116,7 +110,7 @@
|
||||
<i-bs name="check" class="me-1"></i-bs><ng-container i18n>Dismiss</ng-container>
|
||||
</button>
|
||||
<ng-container *pngxIfPermissions="{ action: PermissionAction.View, type: PermissionType.Document }">
|
||||
@if (task.related_document) {
|
||||
@if (task.related_document_ids?.[0]) {
|
||||
<button class="btn btn-sm btn-outline-primary" (click)="dismissAndGo(task); $event.stopPropagation();">
|
||||
<i-bs name="file-text" class="me-1"></i-bs><ng-container i18n>Open Document</ng-container>
|
||||
</button>
|
||||
@@ -127,7 +121,7 @@
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="p-0" [class.border-0]="expandedTask !== task.id" colspan="5">
|
||||
<pre #collapse="ngbCollapse" [ngbCollapse]="expandedTask !== task.id" class="small mb-0"><div class="small p-1 p-lg-3 ms-lg-3">{{ task.result }}</div></pre>
|
||||
<pre #collapse="ngbCollapse" [ngbCollapse]="expandedTask !== task.id" class="small mb-0"><div class="small p-1 p-lg-3 ms-lg-3">{{ task.result_message }}</div></pre>
|
||||
</td>
|
||||
</tr>
|
||||
}
|
||||
|
||||
@@ -20,8 +20,8 @@ import { throwError } from 'rxjs'
|
||||
import { routes } from 'src/app/app-routing.module'
|
||||
import {
|
||||
PaperlessTask,
|
||||
PaperlessTaskName,
|
||||
PaperlessTaskStatus,
|
||||
PaperlessTaskTriggerSource,
|
||||
PaperlessTaskType,
|
||||
} from 'src/app/data/paperless-task'
|
||||
import { IfPermissionsDirective } from 'src/app/directives/if-permissions.directive'
|
||||
@@ -39,81 +39,100 @@ const tasks: PaperlessTask[] = [
|
||||
{
|
||||
id: 467,
|
||||
task_id: '11ca1a5b-9f81-442c-b2c8-7e4ae53657f1',
|
||||
task_file_name: 'test.pdf',
|
||||
input_data: { filename: 'test.pdf' },
|
||||
date_created: new Date('2023-03-01T10:26:03.093116Z'),
|
||||
date_done: new Date('2023-03-01T10:26:07.223048Z'),
|
||||
type: PaperlessTaskType.Auto,
|
||||
task_name: PaperlessTaskName.ConsumeFile,
|
||||
status: PaperlessTaskStatus.Failed,
|
||||
result: 'test.pd: Not consuming test.pdf: It is a duplicate of test (#100)',
|
||||
task_type: PaperlessTaskType.ConsumeFile,
|
||||
task_type_display: 'Consume File',
|
||||
trigger_source: PaperlessTaskTriggerSource.FolderConsume,
|
||||
trigger_source_display: 'Folder Consume',
|
||||
status: PaperlessTaskStatus.Failure,
|
||||
status_display: 'Failure',
|
||||
result_message:
|
||||
'test.pd: Not consuming test.pdf: It is a duplicate of test (#100)',
|
||||
acknowledged: false,
|
||||
related_document: null,
|
||||
related_document_ids: [],
|
||||
},
|
||||
{
|
||||
id: 466,
|
||||
task_id: '10ca1a5b-3c08-442c-b2c8-7e4ae53657f1',
|
||||
task_file_name: '191092.pdf',
|
||||
input_data: { filename: '191092.pdf' },
|
||||
date_created: new Date('2023-03-01T09:26:03.093116Z'),
|
||||
date_done: new Date('2023-03-01T09:26:07.223048Z'),
|
||||
type: PaperlessTaskType.Auto,
|
||||
task_name: PaperlessTaskName.ConsumeFile,
|
||||
status: PaperlessTaskStatus.Failed,
|
||||
result:
|
||||
task_type: PaperlessTaskType.ConsumeFile,
|
||||
task_type_display: 'Consume File',
|
||||
trigger_source: PaperlessTaskTriggerSource.FolderConsume,
|
||||
trigger_source_display: 'Folder Consume',
|
||||
status: PaperlessTaskStatus.Failure,
|
||||
status_display: 'Failure',
|
||||
result_message:
|
||||
'191092.pd: Not consuming 191092.pdf: It is a duplicate of 191092 (#311)',
|
||||
acknowledged: false,
|
||||
related_document: null,
|
||||
related_document_ids: [],
|
||||
},
|
||||
{
|
||||
id: 465,
|
||||
task_id: '3612d477-bb04-44e3-985b-ac580dd496d8',
|
||||
task_file_name: 'Scan Jun 6, 2023 at 3.19 PM.pdf',
|
||||
input_data: { filename: 'Scan Jun 6, 2023 at 3.19 PM.pdf' },
|
||||
date_created: new Date('2023-06-06T15:22:05.722323-07:00'),
|
||||
date_done: new Date('2023-06-06T15:22:14.564305-07:00'),
|
||||
type: PaperlessTaskType.Auto,
|
||||
task_name: PaperlessTaskName.ConsumeFile,
|
||||
task_type: PaperlessTaskType.ConsumeFile,
|
||||
task_type_display: 'Consume File',
|
||||
trigger_source: PaperlessTaskTriggerSource.FolderConsume,
|
||||
trigger_source_display: 'Folder Consume',
|
||||
status: PaperlessTaskStatus.Pending,
|
||||
result: null,
|
||||
status_display: 'Pending',
|
||||
result_message: null,
|
||||
acknowledged: false,
|
||||
related_document: null,
|
||||
related_document_ids: [],
|
||||
},
|
||||
{
|
||||
id: 464,
|
||||
task_id: '2eac4716-2aa6-4dcd-9953-264e11656d7e',
|
||||
task_file_name: 'paperless-mail-l4dkg8ir',
|
||||
input_data: { filename: 'paperless-mail-l4dkg8ir' },
|
||||
date_created: new Date('2023-06-04T11:24:32.898089-07:00'),
|
||||
date_done: new Date('2023-06-04T11:24:44.678605-07:00'),
|
||||
type: PaperlessTaskType.Auto,
|
||||
task_name: PaperlessTaskName.ConsumeFile,
|
||||
status: PaperlessTaskStatus.Complete,
|
||||
result: 'Success. New document id 422 created',
|
||||
task_type: PaperlessTaskType.ConsumeFile,
|
||||
task_type_display: 'Consume File',
|
||||
trigger_source: PaperlessTaskTriggerSource.EmailConsume,
|
||||
trigger_source_display: 'Email Consume',
|
||||
status: PaperlessTaskStatus.Success,
|
||||
status_display: 'Success',
|
||||
result_message: 'Success. New document id 422 created',
|
||||
acknowledged: false,
|
||||
related_document: 422,
|
||||
related_document_ids: [422],
|
||||
},
|
||||
{
|
||||
id: 463,
|
||||
task_id: '28125528-1575-4d6b-99e6-168906e8fa5c',
|
||||
task_file_name: 'onlinePaymentSummary.pdf',
|
||||
input_data: { filename: 'onlinePaymentSummary.pdf' },
|
||||
date_created: new Date('2023-06-01T13:49:51.631305-07:00'),
|
||||
date_done: new Date('2023-06-01T13:49:54.190220-07:00'),
|
||||
type: PaperlessTaskType.Auto,
|
||||
task_name: PaperlessTaskName.ConsumeFile,
|
||||
status: PaperlessTaskStatus.Complete,
|
||||
result: 'Success. New document id 421 created',
|
||||
task_type: PaperlessTaskType.ConsumeFile,
|
||||
task_type_display: 'Consume File',
|
||||
trigger_source: PaperlessTaskTriggerSource.FolderConsume,
|
||||
trigger_source_display: 'Folder Consume',
|
||||
status: PaperlessTaskStatus.Success,
|
||||
status_display: 'Success',
|
||||
result_message: 'Success. New document id 421 created',
|
||||
acknowledged: false,
|
||||
related_document: 421,
|
||||
related_document_ids: [421],
|
||||
},
|
||||
{
|
||||
id: 462,
|
||||
task_id: 'a5b9ca47-0c8e-490f-a04c-6db5d5fc09e5',
|
||||
task_file_name: 'paperless-mail-_rrpmqk6',
|
||||
input_data: { filename: 'paperless-mail-_rrpmqk6' },
|
||||
date_created: new Date('2023-06-07T02:54:35.694916Z'),
|
||||
date_done: null,
|
||||
type: PaperlessTaskType.Auto,
|
||||
task_name: PaperlessTaskName.ConsumeFile,
|
||||
task_type: PaperlessTaskType.ConsumeFile,
|
||||
task_type_display: 'Consume File',
|
||||
trigger_source: PaperlessTaskTriggerSource.EmailConsume,
|
||||
trigger_source_display: 'Email Consume',
|
||||
status: PaperlessTaskStatus.Started,
|
||||
result: null,
|
||||
status_display: 'Started',
|
||||
result_message: null,
|
||||
acknowledged: false,
|
||||
related_document: null,
|
||||
related_document_ids: [],
|
||||
},
|
||||
]
|
||||
|
||||
@@ -167,7 +186,7 @@ describe('TasksComponent', () => {
|
||||
fixture.detectChanges()
|
||||
httpTestingController
|
||||
.expectOne(
|
||||
`${environment.apiBaseUrl}tasks/?task_name=consume_file&acknowledged=false`
|
||||
`${environment.apiBaseUrl}tasks/?task_type=consume_file&acknowledged=false`
|
||||
)
|
||||
.flush(tasks)
|
||||
})
|
||||
@@ -176,7 +195,7 @@ describe('TasksComponent', () => {
|
||||
const tabButtons = fixture.debugElement.queryAll(By.directive(NgbNavItem))
|
||||
|
||||
let currentTasksLength = tasks.filter(
|
||||
(t) => t.status === PaperlessTaskStatus.Failed
|
||||
(t) => t.status === PaperlessTaskStatus.Failure
|
||||
).length
|
||||
component.activeTab = TaskTab.Failed
|
||||
fixture.detectChanges()
|
||||
@@ -188,7 +207,7 @@ describe('TasksComponent', () => {
|
||||
).toHaveLength(currentTasksLength + 1)
|
||||
|
||||
currentTasksLength = tasks.filter(
|
||||
(t) => t.status === PaperlessTaskStatus.Complete
|
||||
(t) => t.status === PaperlessTaskStatus.Success
|
||||
).length
|
||||
component.activeTab = TaskTab.Completed
|
||||
fixture.detectChanges()
|
||||
@@ -308,7 +327,7 @@ describe('TasksComponent', () => {
|
||||
expect(component.selectedTasks).toEqual(
|
||||
new Set(
|
||||
tasks
|
||||
.filter((t) => t.status === PaperlessTaskStatus.Failed)
|
||||
.filter((t) => t.status === PaperlessTaskStatus.Failure)
|
||||
.map((t) => t.id)
|
||||
)
|
||||
)
|
||||
@@ -322,7 +341,7 @@ describe('TasksComponent', () => {
|
||||
component.dismissAndGo(tasks[3])
|
||||
expect(routerSpy).toHaveBeenCalledWith([
|
||||
'documents',
|
||||
tasks[3].related_document,
|
||||
tasks[3].related_document_ids?.[0],
|
||||
])
|
||||
})
|
||||
|
||||
|
||||
@@ -175,7 +175,7 @@ export class TasksComponent
|
||||
|
||||
dismissAndGo(task: PaperlessTask) {
|
||||
this.dismissTask(task)
|
||||
this.router.navigate(['documents', task.related_document])
|
||||
this.router.navigate(['documents', task.related_document_ids?.[0]])
|
||||
}
|
||||
|
||||
expandTask(task: PaperlessTask) {
|
||||
@@ -207,11 +207,13 @@ export class TasksComponent
|
||||
if (this._filterText.length) {
|
||||
tasks = tasks.filter((t) => {
|
||||
if (this.filterTargetID == TaskFilterTargetID.Name) {
|
||||
return t.task_file_name
|
||||
.toLowerCase()
|
||||
return (t.input_data?.filename as string)
|
||||
?.toLowerCase()
|
||||
.includes(this._filterText.toLowerCase())
|
||||
} else if (this.filterTargetID == TaskFilterTargetID.Result) {
|
||||
return t.result.toLowerCase().includes(this._filterText.toLowerCase())
|
||||
return t.result_message
|
||||
?.toLowerCase()
|
||||
.includes(this._filterText.toLowerCase())
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
@@ -86,7 +86,7 @@
|
||||
<div class="selected-icon">
|
||||
@if (addedRelativeDate) {
|
||||
<a class="text-light focus-variants" href="javascript:void(0)" (click)="clearAddedRelativeDate()">
|
||||
<i-bs width="1em" height="1em" name="check" class="variant-unfocused text-primary"></i-bs>
|
||||
<i-bs width="1em" height="1em" name="check" class="variant-unfocused text-dark"></i-bs>
|
||||
<i-bs width="1em" height="1em" name="x" class="variant-focused text-primary"></i-bs>
|
||||
</a>
|
||||
}
|
||||
|
||||
@@ -168,16 +168,6 @@
|
||||
<i-bs name="exclamation-triangle-fill" class="text-danger ms-2 lh-1"></i-bs>
|
||||
}
|
||||
</button>
|
||||
@if (currentUserIsSuperUser) {
|
||||
@if (isRunning(PaperlessTaskName.IndexOptimize)) {
|
||||
<div class="spinner-border spinner-border-sm ms-2" role="status"></div>
|
||||
} @else {
|
||||
<button class="btn btn-sm d-flex align-items-center btn-dark small ms-2" (click)="runTask(PaperlessTaskName.IndexOptimize)">
|
||||
<i-bs name="play-fill" class="me-1"></i-bs>
|
||||
<ng-container i18n>Run Task</ng-container>
|
||||
</button>
|
||||
}
|
||||
}
|
||||
</dd>
|
||||
<ng-template #indexStatus>
|
||||
@if (status.tasks.index_status === 'OK') {
|
||||
@@ -203,10 +193,10 @@
|
||||
}
|
||||
</button>
|
||||
@if (currentUserIsSuperUser) {
|
||||
@if (isRunning(PaperlessTaskName.TrainClassifier)) {
|
||||
@if (isRunning(PaperlessTaskType.TrainClassifier)) {
|
||||
<div class="spinner-border spinner-border-sm ms-2" role="status"></div>
|
||||
} @else {
|
||||
<button class="btn btn-sm d-flex align-items-center btn-dark small ms-2" (click)="runTask(PaperlessTaskName.TrainClassifier)">
|
||||
<button class="btn btn-sm d-flex align-items-center btn-dark small ms-2" (click)="runTask(PaperlessTaskType.TrainClassifier)">
|
||||
<i-bs name="play-fill" class="me-1"></i-bs>
|
||||
<ng-container i18n>Run Task</ng-container>
|
||||
</button>
|
||||
@@ -237,10 +227,10 @@
|
||||
}
|
||||
</button>
|
||||
@if (currentUserIsSuperUser) {
|
||||
@if (isRunning(PaperlessTaskName.SanityCheck)) {
|
||||
@if (isRunning(PaperlessTaskType.SanityCheck)) {
|
||||
<div class="spinner-border spinner-border-sm ms-2" role="status"></div>
|
||||
} @else {
|
||||
<button class="btn btn-sm d-flex align-items-center btn-dark small ms-2" (click)="runTask(PaperlessTaskName.SanityCheck)">
|
||||
<button class="btn btn-sm d-flex align-items-center btn-dark small ms-2" (click)="runTask(PaperlessTaskType.SanityCheck)">
|
||||
<i-bs name="play-fill" class="me-1"></i-bs>
|
||||
<ng-container i18n>Run Task</ng-container>
|
||||
</button>
|
||||
@@ -285,10 +275,10 @@
|
||||
}
|
||||
</button>
|
||||
@if (currentUserIsSuperUser) {
|
||||
@if (isRunning(PaperlessTaskName.LLMIndexUpdate)) {
|
||||
@if (isRunning(PaperlessTaskType.LlmIndex)) {
|
||||
<div class="spinner-border spinner-border-sm ms-2" role="status"></div>
|
||||
} @else {
|
||||
<button class="btn btn-sm d-flex align-items-center btn-dark small ms-2" (click)="runTask(PaperlessTaskName.LLMIndexUpdate)">
|
||||
<button class="btn btn-sm d-flex align-items-center btn-dark small ms-2" (click)="runTask(PaperlessTaskType.LlmIndex)">
|
||||
<i-bs name="play-fill" class="me-1"></i-bs>
|
||||
<ng-container i18n>Run Task</ng-container>
|
||||
</button>
|
||||
|
||||
@@ -25,7 +25,7 @@ import {
|
||||
import { NgbActiveModal } from '@ng-bootstrap/ng-bootstrap'
|
||||
import { NgxBootstrapIconsModule, allIcons } from 'ngx-bootstrap-icons'
|
||||
import { Subject, of, throwError } from 'rxjs'
|
||||
import { PaperlessTaskName } from 'src/app/data/paperless-task'
|
||||
import { PaperlessTaskType } from 'src/app/data/paperless-task'
|
||||
import {
|
||||
InstallType,
|
||||
SystemStatus,
|
||||
@@ -138,9 +138,9 @@ describe('SystemStatusDialogComponent', () => {
|
||||
})
|
||||
|
||||
it('should check if task is running', () => {
|
||||
component.runTask(PaperlessTaskName.IndexOptimize)
|
||||
expect(component.isRunning(PaperlessTaskName.IndexOptimize)).toBeTruthy()
|
||||
expect(component.isRunning(PaperlessTaskName.SanityCheck)).toBeFalsy()
|
||||
component.runTask(PaperlessTaskType.SanityCheck)
|
||||
expect(component.isRunning(PaperlessTaskType.SanityCheck)).toBeTruthy()
|
||||
expect(component.isRunning(PaperlessTaskType.TrainClassifier)).toBeFalsy()
|
||||
})
|
||||
|
||||
it('should support running tasks, refresh status and show toasts', () => {
|
||||
@@ -151,22 +151,22 @@ describe('SystemStatusDialogComponent', () => {
|
||||
|
||||
// fail first
|
||||
runSpy.mockReturnValue(throwError(() => new Error('error')))
|
||||
component.runTask(PaperlessTaskName.IndexOptimize)
|
||||
expect(runSpy).toHaveBeenCalledWith(PaperlessTaskName.IndexOptimize)
|
||||
component.runTask(PaperlessTaskType.SanityCheck)
|
||||
expect(runSpy).toHaveBeenCalledWith(PaperlessTaskType.SanityCheck)
|
||||
expect(toastErrorSpy).toHaveBeenCalledWith(
|
||||
`Failed to start task ${PaperlessTaskName.IndexOptimize}, see the logs for more details`,
|
||||
`Failed to start task ${PaperlessTaskType.SanityCheck}, see the logs for more details`,
|
||||
expect.any(Error)
|
||||
)
|
||||
|
||||
// succeed
|
||||
runSpy.mockReturnValue(of({}))
|
||||
getStatusSpy.mockReturnValue(of(status))
|
||||
component.runTask(PaperlessTaskName.IndexOptimize)
|
||||
expect(runSpy).toHaveBeenCalledWith(PaperlessTaskName.IndexOptimize)
|
||||
component.runTask(PaperlessTaskType.SanityCheck)
|
||||
expect(runSpy).toHaveBeenCalledWith(PaperlessTaskType.SanityCheck)
|
||||
|
||||
expect(getStatusSpy).toHaveBeenCalled()
|
||||
expect(toastSpy).toHaveBeenCalledWith(
|
||||
`Task ${PaperlessTaskName.IndexOptimize} started`
|
||||
`Task ${PaperlessTaskType.SanityCheck} started`
|
||||
)
|
||||
})
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@ import {
|
||||
} from '@ng-bootstrap/ng-bootstrap'
|
||||
import { NgxBootstrapIconsModule } from 'ngx-bootstrap-icons'
|
||||
import { Subject, takeUntil } from 'rxjs'
|
||||
import { PaperlessTaskName } from 'src/app/data/paperless-task'
|
||||
import { PaperlessTaskType } from 'src/app/data/paperless-task'
|
||||
import {
|
||||
SystemStatus,
|
||||
SystemStatusItemStatus,
|
||||
@@ -49,14 +49,14 @@ export class SystemStatusDialogComponent implements OnInit, OnDestroy {
|
||||
private settingsService = inject(SettingsService)
|
||||
|
||||
public SystemStatusItemStatus = SystemStatusItemStatus
|
||||
public PaperlessTaskName = PaperlessTaskName
|
||||
public PaperlessTaskType = PaperlessTaskType
|
||||
public status: SystemStatus
|
||||
public frontendVersion: string = environment.version
|
||||
public versionMismatch: boolean = false
|
||||
|
||||
public copied: boolean = false
|
||||
|
||||
private runningTasks: Set<PaperlessTaskName> = new Set()
|
||||
private runningTasks: Set<PaperlessTaskType> = new Set()
|
||||
private unsubscribeNotifier: Subject<any> = new Subject()
|
||||
|
||||
get currentUserIsSuperUser(): boolean {
|
||||
@@ -107,11 +107,11 @@ export class SystemStatusDialogComponent implements OnInit, OnDestroy {
|
||||
return now.getTime() - date.getTime() > hours * 60 * 60 * 1000
|
||||
}
|
||||
|
||||
public isRunning(taskName: PaperlessTaskName): boolean {
|
||||
public isRunning(taskName: PaperlessTaskType): boolean {
|
||||
return this.runningTasks.has(taskName)
|
||||
}
|
||||
|
||||
public runTask(taskName: PaperlessTaskName) {
|
||||
public runTask(taskName: PaperlessTaskType) {
|
||||
this.runningTasks.add(taskName)
|
||||
this.toastService.showInfo(`Task ${taskName} started`)
|
||||
this.tasksService.run(taskName).subscribe({
|
||||
|
||||
@@ -1,49 +1,67 @@
|
||||
import { Document } from './document'
|
||||
import { ObjectWithId } from './object-with-id'
|
||||
|
||||
export enum PaperlessTaskType {
|
||||
Auto = 'auto_task',
|
||||
ScheduledTask = 'scheduled_task',
|
||||
ManualTask = 'manual_task',
|
||||
}
|
||||
|
||||
export enum PaperlessTaskName {
|
||||
ConsumeFile = 'consume_file',
|
||||
TrainClassifier = 'train_classifier',
|
||||
SanityCheck = 'check_sanity',
|
||||
IndexOptimize = 'index_optimize',
|
||||
LLMIndexUpdate = 'llmindex_update',
|
||||
SanityCheck = 'sanity_check',
|
||||
MailFetch = 'mail_fetch',
|
||||
LlmIndex = 'llm_index',
|
||||
EmptyTrash = 'empty_trash',
|
||||
CheckWorkflows = 'check_workflows',
|
||||
BulkUpdate = 'bulk_update',
|
||||
ReprocessDocument = 'reprocess_document',
|
||||
BuildShareLink = 'build_share_link',
|
||||
BulkDelete = 'bulk_delete',
|
||||
}
|
||||
|
||||
export enum PaperlessTaskTriggerSource {
|
||||
Scheduled = 'scheduled',
|
||||
WebUI = 'web_ui',
|
||||
ApiUpload = 'api_upload',
|
||||
FolderConsume = 'folder_consume',
|
||||
EmailConsume = 'email_consume',
|
||||
System = 'system',
|
||||
Manual = 'manual',
|
||||
}
|
||||
|
||||
export enum PaperlessTaskStatus {
|
||||
Pending = 'PENDING',
|
||||
Started = 'STARTED',
|
||||
Complete = 'SUCCESS',
|
||||
Failed = 'FAILURE',
|
||||
Pending = 'pending',
|
||||
Started = 'started',
|
||||
Success = 'success',
|
||||
Failure = 'failure',
|
||||
Revoked = 'revoked',
|
||||
}
|
||||
|
||||
export interface PaperlessTask extends ObjectWithId {
|
||||
type: PaperlessTaskType
|
||||
|
||||
status: PaperlessTaskStatus
|
||||
|
||||
acknowledged: boolean
|
||||
|
||||
task_id: string
|
||||
|
||||
task_file_name: string
|
||||
|
||||
task_name: PaperlessTaskName
|
||||
|
||||
task_type: PaperlessTaskType
|
||||
task_type_display: string
|
||||
trigger_source: PaperlessTaskTriggerSource
|
||||
trigger_source_display: string
|
||||
status: PaperlessTaskStatus
|
||||
status_display: string
|
||||
date_created: Date
|
||||
|
||||
date_started?: Date
|
||||
date_done?: Date
|
||||
|
||||
result?: string
|
||||
|
||||
related_document?: number
|
||||
|
||||
duplicate_documents?: Document[]
|
||||
|
||||
duration_seconds?: number
|
||||
wait_time_seconds?: number
|
||||
input_data: Record<string, unknown>
|
||||
result_data?: Record<string, unknown>
|
||||
result_message?: string
|
||||
related_document_ids: number[]
|
||||
acknowledged: boolean
|
||||
owner?: number
|
||||
}
|
||||
|
||||
export interface PaperlessTaskSummary {
|
||||
task_type: PaperlessTaskType
|
||||
total_count: number
|
||||
pending_count: number
|
||||
success_count: number
|
||||
failure_count: number
|
||||
avg_duration_seconds: number | null
|
||||
avg_wait_time_seconds: number | null
|
||||
last_run: Date | null
|
||||
last_success: Date | null
|
||||
last_failure: Date | null
|
||||
}
|
||||
|
||||
@@ -5,11 +5,7 @@ import {
|
||||
} from '@angular/common/http/testing'
|
||||
import { TestBed } from '@angular/core/testing'
|
||||
import { environment } from 'src/environments/environment'
|
||||
import {
|
||||
PaperlessTaskName,
|
||||
PaperlessTaskStatus,
|
||||
PaperlessTaskType,
|
||||
} from '../data/paperless-task'
|
||||
import { PaperlessTaskStatus, PaperlessTaskType } from '../data/paperless-task'
|
||||
import { TasksService } from './tasks.service'
|
||||
|
||||
describe('TasksService', () => {
|
||||
@@ -37,7 +33,7 @@ describe('TasksService', () => {
|
||||
it('calls tasks api endpoint on reload', () => {
|
||||
tasksService.reload()
|
||||
const req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}tasks/?task_name=consume_file&acknowledged=false`
|
||||
`${environment.apiBaseUrl}tasks/?task_type=consume_file&acknowledged=false`
|
||||
)
|
||||
expect(req.request.method).toEqual('GET')
|
||||
})
|
||||
@@ -46,7 +42,7 @@ describe('TasksService', () => {
|
||||
tasksService.loading = true
|
||||
tasksService.reload()
|
||||
httpTestingController.expectNone(
|
||||
`${environment.apiBaseUrl}tasks/?task_name=consume_file&acknowledged=false`
|
||||
`${environment.apiBaseUrl}tasks/?task_type=consume_file&acknowledged=false`
|
||||
)
|
||||
})
|
||||
|
||||
@@ -63,7 +59,7 @@ describe('TasksService', () => {
|
||||
// reload is then called
|
||||
httpTestingController
|
||||
.expectOne(
|
||||
`${environment.apiBaseUrl}tasks/?task_name=consume_file&acknowledged=false`
|
||||
`${environment.apiBaseUrl}tasks/?task_type=consume_file&acknowledged=false`
|
||||
)
|
||||
.flush([])
|
||||
})
|
||||
@@ -72,56 +68,56 @@ describe('TasksService', () => {
|
||||
expect(tasksService.total).toEqual(0)
|
||||
const mockTasks = [
|
||||
{
|
||||
type: PaperlessTaskType.Auto,
|
||||
task_name: PaperlessTaskName.ConsumeFile,
|
||||
status: PaperlessTaskStatus.Complete,
|
||||
task_type: PaperlessTaskType.ConsumeFile,
|
||||
status: PaperlessTaskStatus.Success,
|
||||
acknowledged: false,
|
||||
task_id: '1234',
|
||||
task_file_name: 'file1.pdf',
|
||||
input_data: { filename: 'file1.pdf' },
|
||||
date_created: new Date(),
|
||||
related_document_ids: [],
|
||||
},
|
||||
{
|
||||
type: PaperlessTaskType.Auto,
|
||||
task_name: PaperlessTaskName.ConsumeFile,
|
||||
status: PaperlessTaskStatus.Failed,
|
||||
task_type: PaperlessTaskType.ConsumeFile,
|
||||
status: PaperlessTaskStatus.Failure,
|
||||
acknowledged: false,
|
||||
task_id: '1235',
|
||||
task_file_name: 'file2.pdf',
|
||||
input_data: { filename: 'file2.pdf' },
|
||||
date_created: new Date(),
|
||||
related_document_ids: [],
|
||||
},
|
||||
{
|
||||
type: PaperlessTaskType.Auto,
|
||||
task_name: PaperlessTaskName.ConsumeFile,
|
||||
task_type: PaperlessTaskType.ConsumeFile,
|
||||
status: PaperlessTaskStatus.Pending,
|
||||
acknowledged: false,
|
||||
task_id: '1236',
|
||||
task_file_name: 'file3.pdf',
|
||||
input_data: { filename: 'file3.pdf' },
|
||||
date_created: new Date(),
|
||||
related_document_ids: [],
|
||||
},
|
||||
{
|
||||
type: PaperlessTaskType.Auto,
|
||||
task_name: PaperlessTaskName.ConsumeFile,
|
||||
task_type: PaperlessTaskType.ConsumeFile,
|
||||
status: PaperlessTaskStatus.Started,
|
||||
acknowledged: false,
|
||||
task_id: '1237',
|
||||
task_file_name: 'file4.pdf',
|
||||
input_data: { filename: 'file4.pdf' },
|
||||
date_created: new Date(),
|
||||
related_document_ids: [],
|
||||
},
|
||||
{
|
||||
type: PaperlessTaskType.Auto,
|
||||
task_name: PaperlessTaskName.ConsumeFile,
|
||||
status: PaperlessTaskStatus.Complete,
|
||||
task_type: PaperlessTaskType.ConsumeFile,
|
||||
status: PaperlessTaskStatus.Success,
|
||||
acknowledged: false,
|
||||
task_id: '1238',
|
||||
task_file_name: 'file5.pdf',
|
||||
input_data: { filename: 'file5.pdf' },
|
||||
date_created: new Date(),
|
||||
related_document_ids: [],
|
||||
},
|
||||
]
|
||||
|
||||
tasksService.reload()
|
||||
|
||||
const req = httpTestingController.expectOne(
|
||||
`${environment.apiBaseUrl}tasks/?task_name=consume_file&acknowledged=false`
|
||||
`${environment.apiBaseUrl}tasks/?task_type=consume_file&acknowledged=false`
|
||||
)
|
||||
|
||||
req.flush(mockTasks)
|
||||
@@ -134,9 +130,9 @@ describe('TasksService', () => {
|
||||
})
|
||||
|
||||
it('supports running tasks', () => {
|
||||
tasksService.run(PaperlessTaskName.SanityCheck).subscribe((res) => {
|
||||
tasksService.run(PaperlessTaskType.SanityCheck).subscribe((res) => {
|
||||
expect(res).toEqual({
|
||||
result: 'success',
|
||||
task_id: 'abc-123',
|
||||
})
|
||||
})
|
||||
const req = httpTestingController.expectOne(
|
||||
@@ -144,7 +140,7 @@ describe('TasksService', () => {
|
||||
)
|
||||
expect(req.request.method).toEqual('POST')
|
||||
req.flush({
|
||||
result: 'success',
|
||||
task_id: 'abc-123',
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
@@ -4,8 +4,8 @@ import { Observable, Subject } from 'rxjs'
|
||||
import { first, takeUntil, tap } from 'rxjs/operators'
|
||||
import {
|
||||
PaperlessTask,
|
||||
PaperlessTaskName,
|
||||
PaperlessTaskStatus,
|
||||
PaperlessTaskType,
|
||||
} from 'src/app/data/paperless-task'
|
||||
import { environment } from 'src/environments/environment'
|
||||
|
||||
@@ -18,7 +18,7 @@ export class TasksService {
|
||||
private baseUrl: string = environment.apiBaseUrl
|
||||
private endpoint: string = 'tasks'
|
||||
|
||||
public loading: boolean
|
||||
public loading: boolean = false
|
||||
|
||||
private fileTasks: PaperlessTask[] = []
|
||||
|
||||
@@ -33,21 +33,27 @@ export class TasksService {
|
||||
}
|
||||
|
||||
public get queuedFileTasks(): PaperlessTask[] {
|
||||
return this.fileTasks.filter((t) => t.status == PaperlessTaskStatus.Pending)
|
||||
return this.fileTasks.filter(
|
||||
(t) => t.status === PaperlessTaskStatus.Pending
|
||||
)
|
||||
}
|
||||
|
||||
public get startedFileTasks(): PaperlessTask[] {
|
||||
return this.fileTasks.filter((t) => t.status == PaperlessTaskStatus.Started)
|
||||
return this.fileTasks.filter(
|
||||
(t) => t.status === PaperlessTaskStatus.Started
|
||||
)
|
||||
}
|
||||
|
||||
public get completedFileTasks(): PaperlessTask[] {
|
||||
return this.fileTasks.filter(
|
||||
(t) => t.status == PaperlessTaskStatus.Complete
|
||||
(t) => t.status === PaperlessTaskStatus.Success
|
||||
)
|
||||
}
|
||||
|
||||
public get failedFileTasks(): PaperlessTask[] {
|
||||
return this.fileTasks.filter((t) => t.status == PaperlessTaskStatus.Failed)
|
||||
return this.fileTasks.filter(
|
||||
(t) => t.status === PaperlessTaskStatus.Failure
|
||||
)
|
||||
}
|
||||
|
||||
public reload() {
|
||||
@@ -56,18 +62,16 @@ export class TasksService {
|
||||
|
||||
this.http
|
||||
.get<PaperlessTask[]>(
|
||||
`${this.baseUrl}${this.endpoint}/?task_name=consume_file&acknowledged=false`
|
||||
`${this.baseUrl}${this.endpoint}/?task_type=${PaperlessTaskType.ConsumeFile}&acknowledged=false`
|
||||
)
|
||||
.pipe(takeUntil(this.unsubscribeNotifer), first())
|
||||
.subscribe((r) => {
|
||||
this.fileTasks = r.filter(
|
||||
(t) => t.task_name == PaperlessTaskName.ConsumeFile
|
||||
)
|
||||
this.fileTasks = r
|
||||
this.loading = false
|
||||
})
|
||||
}
|
||||
|
||||
public dismissTasks(task_ids: Set<number>) {
|
||||
public dismissTasks(task_ids: Set<number>): Observable<any> {
|
||||
return this.http
|
||||
.post(`${this.baseUrl}tasks/acknowledge/`, {
|
||||
tasks: [...task_ids],
|
||||
@@ -85,12 +89,10 @@ export class TasksService {
|
||||
this.unsubscribeNotifer.next(true)
|
||||
}
|
||||
|
||||
public run(taskName: PaperlessTaskName): Observable<any> {
|
||||
return this.http.post<any>(
|
||||
public run(taskType: PaperlessTaskType): Observable<{ task_id: string }> {
|
||||
return this.http.post<{ task_id: string }>(
|
||||
`${environment.apiBaseUrl}${this.endpoint}/run/`,
|
||||
{
|
||||
task_name: taskName,
|
||||
}
|
||||
{ task_type: taskType }
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -144,18 +144,30 @@ class StoragePathAdmin(GuardedModelAdmin):
|
||||
|
||||
|
||||
class TaskAdmin(admin.ModelAdmin):
|
||||
list_display = ("task_id", "task_file_name", "task_name", "date_done", "status")
|
||||
list_filter = ("status", "date_done", "task_name")
|
||||
search_fields = ("task_name", "task_id", "status", "task_file_name")
|
||||
list_display = (
|
||||
"task_id",
|
||||
"task_type",
|
||||
"trigger_source",
|
||||
"status",
|
||||
"date_created",
|
||||
"date_done",
|
||||
"duration_seconds",
|
||||
)
|
||||
list_filter = ("status", "task_type", "trigger_source", "date_done")
|
||||
search_fields = ("task_id", "task_type", "status")
|
||||
readonly_fields = (
|
||||
"task_id",
|
||||
"task_file_name",
|
||||
"task_name",
|
||||
"task_type",
|
||||
"trigger_source",
|
||||
"status",
|
||||
"date_created",
|
||||
"date_started",
|
||||
"date_done",
|
||||
"result",
|
||||
"duration_seconds",
|
||||
"wait_time_seconds",
|
||||
"input_data",
|
||||
"result_data",
|
||||
"result_message",
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -26,8 +26,10 @@ from django.db.models.functions import Cast
|
||||
from django.utils.translation import gettext_lazy as _
|
||||
from django_filters import DateFilter
|
||||
from django_filters.rest_framework import BooleanFilter
|
||||
from django_filters.rest_framework import DateTimeFilter
|
||||
from django_filters.rest_framework import Filter
|
||||
from django_filters.rest_framework import FilterSet
|
||||
from django_filters.rest_framework import MultipleChoiceFilter
|
||||
from drf_spectacular.utils import extend_schema_field
|
||||
from guardian.utils import get_group_obj_perms_model
|
||||
from guardian.utils import get_user_obj_perms_model
|
||||
@@ -861,18 +863,51 @@ class ShareLinkBundleFilterSet(FilterSet):
|
||||
|
||||
|
||||
class PaperlessTaskFilterSet(FilterSet):
|
||||
task_type = MultipleChoiceFilter(
|
||||
choices=PaperlessTask.TaskType.choices,
|
||||
label="Task Type",
|
||||
)
|
||||
|
||||
trigger_source = MultipleChoiceFilter(
|
||||
choices=PaperlessTask.TriggerSource.choices,
|
||||
label="Trigger Source",
|
||||
)
|
||||
|
||||
status = MultipleChoiceFilter(
|
||||
choices=PaperlessTask.Status.choices,
|
||||
label="Status",
|
||||
)
|
||||
|
||||
is_complete = BooleanFilter(
|
||||
method="filter_is_complete",
|
||||
label="Is Complete",
|
||||
)
|
||||
|
||||
acknowledged = BooleanFilter(
|
||||
label="Acknowledged",
|
||||
field_name="acknowledged",
|
||||
)
|
||||
|
||||
date_created_after = DateTimeFilter(
|
||||
field_name="date_created",
|
||||
lookup_expr="gte",
|
||||
label="Created After",
|
||||
)
|
||||
|
||||
date_created_before = DateTimeFilter(
|
||||
field_name="date_created",
|
||||
lookup_expr="lte",
|
||||
label="Created Before",
|
||||
)
|
||||
|
||||
class Meta:
|
||||
model = PaperlessTask
|
||||
fields = {
|
||||
"type": ["exact"],
|
||||
"task_name": ["exact"],
|
||||
"status": ["exact"],
|
||||
}
|
||||
fields = ["task_type", "trigger_source", "status", "acknowledged", "owner"]
|
||||
|
||||
def filter_is_complete(self, queryset, name, value):
|
||||
if value:
|
||||
return queryset.filter(status__in=PaperlessTask.COMPLETE_STATUSES)
|
||||
return queryset.exclude(status__in=PaperlessTask.COMPLETE_STATUSES)
|
||||
|
||||
|
||||
class ObjectOwnedOrGrantedPermissionsFilter(ObjectPermissionsFilter):
|
||||
|
||||
@@ -22,7 +22,6 @@ class Command(PaperlessCommand):
|
||||
self.buffered_logging("paperless.classifier"),
|
||||
):
|
||||
train_classifier(
|
||||
scheduled=False,
|
||||
status_callback=lambda msg: self.console.print(f" {msg}"),
|
||||
)
|
||||
|
||||
|
||||
@@ -17,7 +17,6 @@ class Command(PaperlessCommand):
|
||||
def handle(self, *args: Any, **options: Any) -> None:
|
||||
llmindex_index(
|
||||
rebuild=options["command"] == "rebuild",
|
||||
scheduled=False,
|
||||
iter_wrapper=lambda docs: self.track(
|
||||
docs,
|
||||
description="Indexing documents...",
|
||||
|
||||
@@ -111,7 +111,6 @@ class Command(PaperlessCommand):
|
||||
|
||||
def handle(self, *args: Any, **options: Any) -> None:
|
||||
messages = check_sanity(
|
||||
scheduled=False,
|
||||
iter_wrapper=lambda docs: self.track(
|
||||
docs,
|
||||
description="Checking documents...",
|
||||
|
||||
218
src/documents/migrations/0019_task_system_redesign.py
Normal file
218
src/documents/migrations/0019_task_system_redesign.py
Normal file
@@ -0,0 +1,218 @@
|
||||
"""
|
||||
Drop and recreate the PaperlessTask table with the new structured schema.
|
||||
|
||||
We intentionally drop all existing task data -- the old schema was
|
||||
string-based and incompatible with the new JSONField result storage.
|
||||
"""
|
||||
|
||||
import django.db.models.deletion
|
||||
import django.utils.timezone
|
||||
from django.conf import settings
|
||||
from django.db import migrations
|
||||
from django.db import models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
("documents", "0018_saved_view_simple_search_rules"),
|
||||
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.DeleteModel(name="PaperlessTask"),
|
||||
migrations.CreateModel(
|
||||
name="PaperlessTask",
|
||||
fields=[
|
||||
(
|
||||
"id",
|
||||
models.AutoField(
|
||||
auto_created=True,
|
||||
primary_key=True,
|
||||
serialize=False,
|
||||
verbose_name="ID",
|
||||
),
|
||||
),
|
||||
(
|
||||
"owner",
|
||||
models.ForeignKey(
|
||||
blank=True,
|
||||
default=None,
|
||||
null=True,
|
||||
on_delete=django.db.models.deletion.SET_NULL,
|
||||
to=settings.AUTH_USER_MODEL,
|
||||
verbose_name="owner",
|
||||
),
|
||||
),
|
||||
(
|
||||
"task_id",
|
||||
models.CharField(
|
||||
help_text="Celery task ID",
|
||||
max_length=72,
|
||||
unique=True,
|
||||
verbose_name="Task ID",
|
||||
),
|
||||
),
|
||||
(
|
||||
"task_type",
|
||||
models.CharField(
|
||||
choices=[
|
||||
("consume_file", "Consume File"),
|
||||
("train_classifier", "Train Classifier"),
|
||||
("sanity_check", "Sanity Check"),
|
||||
("index_optimize", "Index Optimize"),
|
||||
("mail_fetch", "Mail Fetch"),
|
||||
("llm_index", "LLM Index"),
|
||||
("empty_trash", "Empty Trash"),
|
||||
("check_workflows", "Check Workflows"),
|
||||
("bulk_update", "Bulk Update"),
|
||||
("reprocess_document", "Reprocess Document"),
|
||||
("build_share_link", "Build Share Link"),
|
||||
("bulk_delete", "Bulk Delete"),
|
||||
],
|
||||
db_index=True,
|
||||
help_text="The kind of work being performed",
|
||||
max_length=50,
|
||||
verbose_name="Task Type",
|
||||
),
|
||||
),
|
||||
(
|
||||
"trigger_source",
|
||||
models.CharField(
|
||||
choices=[
|
||||
("scheduled", "Scheduled"),
|
||||
("web_ui", "Web UI"),
|
||||
("api_upload", "API Upload"),
|
||||
("folder_consume", "Folder Consume"),
|
||||
("email_consume", "Email Consume"),
|
||||
("system", "System"),
|
||||
("manual", "Manual"),
|
||||
],
|
||||
db_index=True,
|
||||
help_text="What initiated this task",
|
||||
max_length=50,
|
||||
verbose_name="Trigger Source",
|
||||
),
|
||||
),
|
||||
(
|
||||
"status",
|
||||
models.CharField(
|
||||
choices=[
|
||||
("pending", "Pending"),
|
||||
("started", "Started"),
|
||||
("success", "Success"),
|
||||
("failure", "Failure"),
|
||||
("revoked", "Revoked"),
|
||||
],
|
||||
db_index=True,
|
||||
default="pending",
|
||||
max_length=30,
|
||||
verbose_name="Status",
|
||||
),
|
||||
),
|
||||
(
|
||||
"date_created",
|
||||
models.DateTimeField(
|
||||
db_index=True,
|
||||
default=django.utils.timezone.now,
|
||||
verbose_name="Created",
|
||||
),
|
||||
),
|
||||
(
|
||||
"date_started",
|
||||
models.DateTimeField(
|
||||
blank=True,
|
||||
null=True,
|
||||
verbose_name="Started",
|
||||
),
|
||||
),
|
||||
(
|
||||
"date_done",
|
||||
models.DateTimeField(
|
||||
blank=True,
|
||||
db_index=True,
|
||||
null=True,
|
||||
verbose_name="Completed",
|
||||
),
|
||||
),
|
||||
(
|
||||
"duration_seconds",
|
||||
models.FloatField(
|
||||
blank=True,
|
||||
help_text="Elapsed time from start to completion",
|
||||
null=True,
|
||||
verbose_name="Duration (seconds)",
|
||||
),
|
||||
),
|
||||
(
|
||||
"wait_time_seconds",
|
||||
models.FloatField(
|
||||
blank=True,
|
||||
help_text="Time from task creation to worker pickup",
|
||||
null=True,
|
||||
verbose_name="Wait Time (seconds)",
|
||||
),
|
||||
),
|
||||
(
|
||||
"input_data",
|
||||
models.JSONField(
|
||||
blank=True,
|
||||
default=dict,
|
||||
help_text="Structured input parameters for the task",
|
||||
verbose_name="Input Data",
|
||||
),
|
||||
),
|
||||
(
|
||||
"result_data",
|
||||
models.JSONField(
|
||||
blank=True,
|
||||
help_text="Structured result data from task execution",
|
||||
null=True,
|
||||
verbose_name="Result Data",
|
||||
),
|
||||
),
|
||||
(
|
||||
"result_message",
|
||||
models.TextField(
|
||||
blank=True,
|
||||
help_text="Human-readable result message",
|
||||
null=True,
|
||||
verbose_name="Result Message",
|
||||
),
|
||||
),
|
||||
(
|
||||
"acknowledged",
|
||||
models.BooleanField(
|
||||
db_index=True,
|
||||
default=False,
|
||||
verbose_name="Acknowledged",
|
||||
),
|
||||
),
|
||||
],
|
||||
options={
|
||||
"verbose_name": "Task",
|
||||
"verbose_name_plural": "Tasks",
|
||||
"ordering": ["-date_created"],
|
||||
},
|
||||
),
|
||||
migrations.AddIndex(
|
||||
model_name="paperlesstask",
|
||||
index=models.Index(
|
||||
fields=["status", "date_created"],
|
||||
name="documents_p_status_8aa687_idx",
|
||||
),
|
||||
),
|
||||
migrations.AddIndex(
|
||||
model_name="paperlesstask",
|
||||
index=models.Index(
|
||||
fields=["task_type", "status"],
|
||||
name="documents_p_task_ty_e4a93f_idx",
|
||||
),
|
||||
),
|
||||
migrations.AddIndex(
|
||||
model_name="paperlesstask",
|
||||
index=models.Index(
|
||||
fields=["owner", "acknowledged", "date_created"],
|
||||
name="documents_p_owner_i_62c545_idx",
|
||||
),
|
||||
),
|
||||
]
|
||||
26
src/documents/migrations/0020_drop_celery_results.py
Normal file
26
src/documents/migrations/0020_drop_celery_results.py
Normal file
@@ -0,0 +1,26 @@
|
||||
from django.db import migrations
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
("documents", "0019_task_system_redesign"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.RunSQL(
|
||||
sql="DROP TABLE IF EXISTS django_celery_results_taskresult;",
|
||||
reverse_sql=migrations.RunSQL.noop,
|
||||
),
|
||||
migrations.RunSQL(
|
||||
sql="DROP TABLE IF EXISTS django_celery_results_groupresult;",
|
||||
reverse_sql=migrations.RunSQL.noop,
|
||||
),
|
||||
migrations.RunSQL(
|
||||
sql="DROP TABLE IF EXISTS django_celery_results_chordcounter;",
|
||||
reverse_sql=migrations.RunSQL.noop,
|
||||
),
|
||||
migrations.RunSQL(
|
||||
sql="DELETE FROM django_migrations WHERE app = 'django_celery_results';",
|
||||
reverse_sql=migrations.RunSQL.noop,
|
||||
),
|
||||
]
|
||||
@@ -3,7 +3,6 @@ from pathlib import Path
|
||||
from typing import Final
|
||||
|
||||
import pathvalidate
|
||||
from celery import states
|
||||
from django.conf import settings
|
||||
from django.contrib.auth.models import Group
|
||||
from django.contrib.auth.models import User
|
||||
@@ -663,97 +662,174 @@ class UiSettings(models.Model):
|
||||
|
||||
|
||||
class PaperlessTask(ModelWithOwner):
|
||||
ALL_STATES = sorted(states.ALL_STATES)
|
||||
TASK_STATE_CHOICES = sorted(zip(ALL_STATES, ALL_STATES))
|
||||
"""
|
||||
Tracks background task execution for user visibility and debugging.
|
||||
|
||||
State transitions:
|
||||
PENDING -> STARTED -> SUCCESS
|
||||
PENDING -> STARTED -> FAILURE
|
||||
PENDING -> REVOKED (if cancelled before starting)
|
||||
"""
|
||||
|
||||
class Status(models.TextChoices):
|
||||
PENDING = "pending", _("Pending")
|
||||
STARTED = "started", _("Started")
|
||||
SUCCESS = "success", _("Success")
|
||||
FAILURE = "failure", _("Failure")
|
||||
REVOKED = "revoked", _("Revoked")
|
||||
|
||||
class TaskType(models.TextChoices):
|
||||
AUTO = ("auto_task", _("Auto Task"))
|
||||
SCHEDULED_TASK = ("scheduled_task", _("Scheduled Task"))
|
||||
MANUAL_TASK = ("manual_task", _("Manual Task"))
|
||||
CONSUME_FILE = "consume_file", _("Consume File")
|
||||
TRAIN_CLASSIFIER = "train_classifier", _("Train Classifier")
|
||||
SANITY_CHECK = "sanity_check", _("Sanity Check")
|
||||
INDEX_OPTIMIZE = "index_optimize", _("Index Optimize")
|
||||
MAIL_FETCH = "mail_fetch", _("Mail Fetch")
|
||||
LLM_INDEX = "llm_index", _("LLM Index")
|
||||
EMPTY_TRASH = "empty_trash", _("Empty Trash")
|
||||
CHECK_WORKFLOWS = "check_workflows", _("Check Workflows")
|
||||
BULK_UPDATE = "bulk_update", _("Bulk Update")
|
||||
REPROCESS_DOCUMENT = "reprocess_document", _("Reprocess Document")
|
||||
BUILD_SHARE_LINK = "build_share_link", _("Build Share Link")
|
||||
BULK_DELETE = "bulk_delete", _("Bulk Delete")
|
||||
|
||||
class TaskName(models.TextChoices):
|
||||
CONSUME_FILE = ("consume_file", _("Consume File"))
|
||||
TRAIN_CLASSIFIER = ("train_classifier", _("Train Classifier"))
|
||||
CHECK_SANITY = ("check_sanity", _("Check Sanity"))
|
||||
INDEX_OPTIMIZE = ("index_optimize", _("Index Optimize"))
|
||||
LLMINDEX_UPDATE = ("llmindex_update", _("LLM Index Update"))
|
||||
COMPLETE_STATUSES = (
|
||||
Status.SUCCESS,
|
||||
Status.FAILURE,
|
||||
Status.REVOKED,
|
||||
)
|
||||
|
||||
class TriggerSource(models.TextChoices):
|
||||
SCHEDULED = "scheduled", _("Scheduled") # Celery beat
|
||||
WEB_UI = "web_ui", _("Web UI") # Document uploaded via web
|
||||
API_UPLOAD = "api_upload", _("API Upload") # Document uploaded via API
|
||||
FOLDER_CONSUME = "folder_consume", _("Folder Consume") # Consume folder
|
||||
EMAIL_CONSUME = "email_consume", _("Email Consume") # Email attachment
|
||||
SYSTEM = "system", _("System") # Auto-triggered (self-heal, config side-effect)
|
||||
MANUAL = "manual", _("Manual") # User explicitly ran via /api/tasks/run/
|
||||
|
||||
# Identification
|
||||
task_id = models.CharField(
|
||||
max_length=255,
|
||||
max_length=72,
|
||||
unique=True,
|
||||
verbose_name=_("Task ID"),
|
||||
help_text=_("Celery ID for the Task that was run"),
|
||||
help_text=_("Celery task ID"),
|
||||
)
|
||||
|
||||
acknowledged = models.BooleanField(
|
||||
default=False,
|
||||
verbose_name=_("Acknowledged"),
|
||||
help_text=_("If the task is acknowledged via the frontend or API"),
|
||||
task_type = models.CharField(
|
||||
max_length=50,
|
||||
choices=TaskType.choices,
|
||||
verbose_name=_("Task Type"),
|
||||
help_text=_("The kind of work being performed"),
|
||||
db_index=True,
|
||||
)
|
||||
|
||||
task_file_name = models.CharField(
|
||||
null=True,
|
||||
max_length=255,
|
||||
verbose_name=_("Task Filename"),
|
||||
help_text=_("Name of the file which the Task was run for"),
|
||||
)
|
||||
|
||||
task_name = models.CharField(
|
||||
null=True,
|
||||
max_length=255,
|
||||
choices=TaskName.choices,
|
||||
verbose_name=_("Task Name"),
|
||||
help_text=_("Name of the task that was run"),
|
||||
trigger_source = models.CharField(
|
||||
max_length=50,
|
||||
choices=TriggerSource.choices,
|
||||
verbose_name=_("Trigger Source"),
|
||||
help_text=_("What initiated this task"),
|
||||
db_index=True,
|
||||
)
|
||||
|
||||
# State tracking
|
||||
status = models.CharField(
|
||||
max_length=30,
|
||||
default=states.PENDING,
|
||||
choices=TASK_STATE_CHOICES,
|
||||
verbose_name=_("Task State"),
|
||||
help_text=_("Current state of the task being run"),
|
||||
choices=Status.choices,
|
||||
default=Status.PENDING,
|
||||
verbose_name=_("Status"),
|
||||
db_index=True,
|
||||
)
|
||||
|
||||
# Timestamps
|
||||
date_created = models.DateTimeField(
|
||||
null=True,
|
||||
default=timezone.now,
|
||||
verbose_name=_("Created DateTime"),
|
||||
help_text=_("Datetime field when the task result was created in UTC"),
|
||||
verbose_name=_("Created"),
|
||||
db_index=True,
|
||||
)
|
||||
|
||||
date_started = models.DateTimeField(
|
||||
null=True,
|
||||
default=None,
|
||||
verbose_name=_("Started DateTime"),
|
||||
help_text=_("Datetime field when the task was started in UTC"),
|
||||
blank=True,
|
||||
verbose_name=_("Started"),
|
||||
)
|
||||
|
||||
date_done = models.DateTimeField(
|
||||
null=True,
|
||||
default=None,
|
||||
verbose_name=_("Completed DateTime"),
|
||||
help_text=_("Datetime field when the task was completed in UTC"),
|
||||
blank=True,
|
||||
verbose_name=_("Completed"),
|
||||
db_index=True,
|
||||
)
|
||||
|
||||
result = models.TextField(
|
||||
# Duration fields -- populated by task_postrun signal handler
|
||||
duration_seconds = models.FloatField(
|
||||
null=True,
|
||||
default=None,
|
||||
blank=True,
|
||||
verbose_name=_("Duration (seconds)"),
|
||||
help_text=_("Elapsed time from start to completion"),
|
||||
)
|
||||
|
||||
wait_time_seconds = models.FloatField(
|
||||
null=True,
|
||||
blank=True,
|
||||
verbose_name=_("Wait Time (seconds)"),
|
||||
help_text=_("Time from task creation to worker pickup"),
|
||||
)
|
||||
|
||||
# Input/Output data
|
||||
input_data = models.JSONField(
|
||||
default=dict,
|
||||
blank=True,
|
||||
verbose_name=_("Input Data"),
|
||||
help_text=_("Structured input parameters for the task"),
|
||||
)
|
||||
|
||||
result_data = models.JSONField(
|
||||
null=True,
|
||||
blank=True,
|
||||
verbose_name=_("Result Data"),
|
||||
help_text=_(
|
||||
"The data returned by the task",
|
||||
),
|
||||
help_text=_("Structured result data from task execution"),
|
||||
)
|
||||
|
||||
type = models.CharField(
|
||||
max_length=30,
|
||||
choices=TaskType.choices,
|
||||
default=TaskType.AUTO,
|
||||
verbose_name=_("Task Type"),
|
||||
help_text=_("The type of task that was run"),
|
||||
result_message = models.TextField(
|
||||
null=True,
|
||||
blank=True,
|
||||
verbose_name=_("Result Message"),
|
||||
help_text=_("Human-readable result message"),
|
||||
)
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"Task {self.task_id}"
|
||||
# Acknowledgment
|
||||
acknowledged = models.BooleanField(
|
||||
default=False,
|
||||
verbose_name=_("Acknowledged"),
|
||||
db_index=True,
|
||||
)
|
||||
|
||||
class Meta:
|
||||
verbose_name = _("Task")
|
||||
verbose_name_plural = _("Tasks")
|
||||
ordering = ["-date_created"]
|
||||
indexes = [
|
||||
models.Index(fields=["status", "date_created"]),
|
||||
models.Index(fields=["task_type", "status"]),
|
||||
models.Index(fields=["owner", "acknowledged", "date_created"]),
|
||||
]
|
||||
|
||||
def __str__(self) -> str: # pragma: no cover
|
||||
return f"{self.get_task_type_display()} [{self.task_id[:8]}]"
|
||||
|
||||
@property
|
||||
def is_complete(self) -> bool: # pragma: no cover
|
||||
return self.status in self.COMPLETE_STATUSES
|
||||
|
||||
@property
|
||||
def related_document_ids(self) -> list[int]: # pragma: no cover
|
||||
if not self.result_data:
|
||||
return []
|
||||
if doc_id := self.result_data.get("document_id"):
|
||||
return [doc_id]
|
||||
if dup_id := self.result_data.get("duplicate_of"):
|
||||
return [dup_id]
|
||||
return []
|
||||
|
||||
|
||||
class Note(SoftDeleteModel):
|
||||
|
||||
@@ -10,7 +10,6 @@ is an identity function that adds no overhead.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import uuid
|
||||
from collections import defaultdict
|
||||
from collections.abc import Iterator
|
||||
from pathlib import Path
|
||||
@@ -18,12 +17,9 @@ from typing import TYPE_CHECKING
|
||||
from typing import Final
|
||||
from typing import TypedDict
|
||||
|
||||
from celery import states
|
||||
from django.conf import settings
|
||||
from django.utils import timezone
|
||||
|
||||
from documents.models import Document
|
||||
from documents.models import PaperlessTask
|
||||
from documents.utils import IterWrapper
|
||||
from documents.utils import compute_checksum
|
||||
from documents.utils import identity
|
||||
@@ -287,33 +283,17 @@ def _check_document(
|
||||
|
||||
def check_sanity(
|
||||
*,
|
||||
scheduled: bool = True,
|
||||
iter_wrapper: IterWrapper[Document] = identity,
|
||||
) -> SanityCheckMessages:
|
||||
"""Run a full sanity check on the document archive.
|
||||
|
||||
Args:
|
||||
scheduled: Whether this is a scheduled (automatic) or manual check.
|
||||
Controls the task type recorded in the database.
|
||||
iter_wrapper: A callable that wraps the document iterable, e.g.,
|
||||
for progress bar display. Defaults to identity (no wrapping).
|
||||
|
||||
Returns:
|
||||
A SanityCheckMessages instance containing all detected issues.
|
||||
"""
|
||||
paperless_task = PaperlessTask.objects.create(
|
||||
task_id=uuid.uuid4(),
|
||||
type=(
|
||||
PaperlessTask.TaskType.SCHEDULED_TASK
|
||||
if scheduled
|
||||
else PaperlessTask.TaskType.MANUAL_TASK
|
||||
),
|
||||
task_name=PaperlessTask.TaskName.CHECK_SANITY,
|
||||
status=states.STARTED,
|
||||
date_created=timezone.now(),
|
||||
date_started=timezone.now(),
|
||||
)
|
||||
|
||||
messages = SanityCheckMessages()
|
||||
present_files = _build_present_files()
|
||||
|
||||
@@ -332,22 +312,4 @@ def check_sanity(
|
||||
for extra_file in present_files:
|
||||
messages.warning(None, f"Orphaned file in media dir: {extra_file}")
|
||||
|
||||
paperless_task.status = states.SUCCESS if not messages.has_error else states.FAILURE
|
||||
if messages.total_issue_count == 0:
|
||||
paperless_task.result = "No issues found."
|
||||
else:
|
||||
parts: list[str] = []
|
||||
if messages.document_error_count:
|
||||
parts.append(f"{messages.document_error_count} document(s) with errors")
|
||||
if messages.document_warning_count:
|
||||
parts.append(f"{messages.document_warning_count} document(s) with warnings")
|
||||
if messages.global_warning_count:
|
||||
parts.append(f"{messages.global_warning_count} global warning(s)")
|
||||
paperless_task.result = ", ".join(parts) + " found."
|
||||
if messages.has_error:
|
||||
paperless_task.result += " Check logs for details."
|
||||
|
||||
paperless_task.date_done = timezone.now()
|
||||
paperless_task.save(update_fields=["status", "result", "date_done"])
|
||||
|
||||
return messages
|
||||
|
||||
@@ -25,21 +25,39 @@ _REGEX_TIMEOUT: Final[float] = 1.0
|
||||
|
||||
_DATE_ONLY_FIELDS = frozenset({"created"})
|
||||
|
||||
_TODAY: Final[str] = "today"
|
||||
_YESTERDAY: Final[str] = "yesterday"
|
||||
_PREVIOUS_WEEK: Final[str] = "previous week"
|
||||
_THIS_MONTH: Final[str] = "this month"
|
||||
_PREVIOUS_MONTH: Final[str] = "previous month"
|
||||
_THIS_YEAR: Final[str] = "this year"
|
||||
_PREVIOUS_YEAR: Final[str] = "previous year"
|
||||
_PREVIOUS_QUARTER: Final[str] = "previous quarter"
|
||||
|
||||
_DATE_KEYWORDS = frozenset(
|
||||
{
|
||||
"today",
|
||||
"yesterday",
|
||||
"this_week",
|
||||
"last_week",
|
||||
"this_month",
|
||||
"last_month",
|
||||
"this_year",
|
||||
"last_year",
|
||||
_TODAY,
|
||||
_YESTERDAY,
|
||||
_PREVIOUS_WEEK,
|
||||
_THIS_MONTH,
|
||||
_PREVIOUS_MONTH,
|
||||
_THIS_YEAR,
|
||||
_PREVIOUS_YEAR,
|
||||
_PREVIOUS_QUARTER,
|
||||
},
|
||||
)
|
||||
|
||||
_DATE_KEYWORD_PATTERN = "|".join(
|
||||
sorted((regex.escape(k) for k in _DATE_KEYWORDS), key=len, reverse=True),
|
||||
)
|
||||
|
||||
_FIELD_DATE_RE = regex.compile(
|
||||
r"(\w+):(" + "|".join(_DATE_KEYWORDS) + r")\b",
|
||||
rf"""(?P<field>\w+)\s*:\s*(?:
|
||||
(?P<quote>["'])(?P<quoted>{_DATE_KEYWORD_PATTERN})(?P=quote)
|
||||
|
|
||||
(?P<bare>{_DATE_KEYWORD_PATTERN})(?![\w-])
|
||||
)""",
|
||||
regex.IGNORECASE | regex.VERBOSE,
|
||||
)
|
||||
_COMPACT_DATE_RE = regex.compile(r"\b(\d{14})\b")
|
||||
_RELATIVE_RANGE_RE = regex.compile(
|
||||
@@ -74,44 +92,59 @@ def _date_only_range(keyword: str, tz: tzinfo) -> str:
|
||||
|
||||
today = datetime.now(tz).date()
|
||||
|
||||
if keyword == "today":
|
||||
def _quarter_start(d: date) -> date:
|
||||
return date(d.year, ((d.month - 1) // 3) * 3 + 1, 1)
|
||||
|
||||
if keyword == _TODAY:
|
||||
lo = datetime(today.year, today.month, today.day, tzinfo=UTC)
|
||||
return _iso_range(lo, lo + timedelta(days=1))
|
||||
if keyword == "yesterday":
|
||||
if keyword == _YESTERDAY:
|
||||
y = today - timedelta(days=1)
|
||||
lo = datetime(y.year, y.month, y.day, tzinfo=UTC)
|
||||
hi = datetime(today.year, today.month, today.day, tzinfo=UTC)
|
||||
return _iso_range(lo, hi)
|
||||
if keyword == "this_week":
|
||||
mon = today - timedelta(days=today.weekday())
|
||||
lo = datetime(mon.year, mon.month, mon.day, tzinfo=UTC)
|
||||
return _iso_range(lo, lo + timedelta(weeks=1))
|
||||
if keyword == "last_week":
|
||||
if keyword == _PREVIOUS_WEEK:
|
||||
this_mon = today - timedelta(days=today.weekday())
|
||||
last_mon = this_mon - timedelta(weeks=1)
|
||||
lo = datetime(last_mon.year, last_mon.month, last_mon.day, tzinfo=UTC)
|
||||
hi = datetime(this_mon.year, this_mon.month, this_mon.day, tzinfo=UTC)
|
||||
return _iso_range(lo, hi)
|
||||
if keyword == "this_month":
|
||||
if keyword == _THIS_MONTH:
|
||||
lo = datetime(today.year, today.month, 1, tzinfo=UTC)
|
||||
if today.month == 12:
|
||||
hi = datetime(today.year + 1, 1, 1, tzinfo=UTC)
|
||||
else:
|
||||
hi = datetime(today.year, today.month + 1, 1, tzinfo=UTC)
|
||||
return _iso_range(lo, hi)
|
||||
if keyword == "last_month":
|
||||
if keyword == _PREVIOUS_MONTH:
|
||||
if today.month == 1:
|
||||
lo = datetime(today.year - 1, 12, 1, tzinfo=UTC)
|
||||
else:
|
||||
lo = datetime(today.year, today.month - 1, 1, tzinfo=UTC)
|
||||
hi = datetime(today.year, today.month, 1, tzinfo=UTC)
|
||||
return _iso_range(lo, hi)
|
||||
if keyword == "this_year":
|
||||
if keyword == _THIS_YEAR:
|
||||
lo = datetime(today.year, 1, 1, tzinfo=UTC)
|
||||
return _iso_range(lo, datetime(today.year + 1, 1, 1, tzinfo=UTC))
|
||||
if keyword == "last_year":
|
||||
if keyword == _PREVIOUS_YEAR:
|
||||
lo = datetime(today.year - 1, 1, 1, tzinfo=UTC)
|
||||
return _iso_range(lo, datetime(today.year, 1, 1, tzinfo=UTC))
|
||||
if keyword == _PREVIOUS_QUARTER:
|
||||
this_quarter = _quarter_start(today)
|
||||
last_quarter = this_quarter - relativedelta(months=3)
|
||||
lo = datetime(
|
||||
last_quarter.year,
|
||||
last_quarter.month,
|
||||
last_quarter.day,
|
||||
tzinfo=UTC,
|
||||
)
|
||||
hi = datetime(
|
||||
this_quarter.year,
|
||||
this_quarter.month,
|
||||
this_quarter.day,
|
||||
tzinfo=UTC,
|
||||
)
|
||||
return _iso_range(lo, hi)
|
||||
raise ValueError(f"Unknown keyword: {keyword}")
|
||||
|
||||
|
||||
@@ -127,42 +160,46 @@ def _datetime_range(keyword: str, tz: tzinfo) -> str:
|
||||
def _midnight(d: date) -> datetime:
|
||||
return datetime(d.year, d.month, d.day, tzinfo=tz).astimezone(UTC)
|
||||
|
||||
if keyword == "today":
|
||||
def _quarter_start(d: date) -> date:
|
||||
return date(d.year, ((d.month - 1) // 3) * 3 + 1, 1)
|
||||
|
||||
if keyword == _TODAY:
|
||||
return _iso_range(_midnight(today), _midnight(today + timedelta(days=1)))
|
||||
if keyword == "yesterday":
|
||||
if keyword == _YESTERDAY:
|
||||
y = today - timedelta(days=1)
|
||||
return _iso_range(_midnight(y), _midnight(today))
|
||||
if keyword == "this_week":
|
||||
mon = today - timedelta(days=today.weekday())
|
||||
return _iso_range(_midnight(mon), _midnight(mon + timedelta(weeks=1)))
|
||||
if keyword == "last_week":
|
||||
if keyword == _PREVIOUS_WEEK:
|
||||
this_mon = today - timedelta(days=today.weekday())
|
||||
last_mon = this_mon - timedelta(weeks=1)
|
||||
return _iso_range(_midnight(last_mon), _midnight(this_mon))
|
||||
if keyword == "this_month":
|
||||
if keyword == _THIS_MONTH:
|
||||
first = today.replace(day=1)
|
||||
if today.month == 12:
|
||||
next_first = date(today.year + 1, 1, 1)
|
||||
else:
|
||||
next_first = date(today.year, today.month + 1, 1)
|
||||
return _iso_range(_midnight(first), _midnight(next_first))
|
||||
if keyword == "last_month":
|
||||
if keyword == _PREVIOUS_MONTH:
|
||||
this_first = today.replace(day=1)
|
||||
if today.month == 1:
|
||||
last_first = date(today.year - 1, 12, 1)
|
||||
else:
|
||||
last_first = date(today.year, today.month - 1, 1)
|
||||
return _iso_range(_midnight(last_first), _midnight(this_first))
|
||||
if keyword == "this_year":
|
||||
if keyword == _THIS_YEAR:
|
||||
return _iso_range(
|
||||
_midnight(date(today.year, 1, 1)),
|
||||
_midnight(date(today.year + 1, 1, 1)),
|
||||
)
|
||||
if keyword == "last_year":
|
||||
if keyword == _PREVIOUS_YEAR:
|
||||
return _iso_range(
|
||||
_midnight(date(today.year - 1, 1, 1)),
|
||||
_midnight(date(today.year, 1, 1)),
|
||||
)
|
||||
if keyword == _PREVIOUS_QUARTER:
|
||||
this_quarter = _quarter_start(today)
|
||||
last_quarter = this_quarter - relativedelta(months=3)
|
||||
return _iso_range(_midnight(last_quarter), _midnight(this_quarter))
|
||||
raise ValueError(f"Unknown keyword: {keyword}")
|
||||
|
||||
|
||||
@@ -308,7 +345,7 @@ def rewrite_natural_date_keywords(query: str, tz: tzinfo) -> str:
|
||||
- Compact 14-digit dates (YYYYMMDDHHmmss)
|
||||
- Whoosh relative ranges ([-7 days to now], [now-1h TO now+2h])
|
||||
- 8-digit dates with field awareness (created:20240115)
|
||||
- Natural keywords (field:today, field:last_week, etc.)
|
||||
- Natural keywords (field:today, field:"previous quarter", etc.)
|
||||
|
||||
Args:
|
||||
query: Raw user query string
|
||||
@@ -326,7 +363,8 @@ def rewrite_natural_date_keywords(query: str, tz: tzinfo) -> str:
|
||||
query = _rewrite_relative_range(query)
|
||||
|
||||
def _replace(m: regex.Match[str]) -> str:
|
||||
field, keyword = m.group(1), m.group(2)
|
||||
field = m.group("field")
|
||||
keyword = (m.group("quoted") or m.group("bare")).lower()
|
||||
if field in _DATE_ONLY_FIELDS:
|
||||
return f"{field}:{_date_only_range(keyword, tz)}"
|
||||
return f"{field}:{_datetime_range(keyword, tz)}"
|
||||
|
||||
@@ -12,7 +12,6 @@ from typing import Literal
|
||||
from typing import TypedDict
|
||||
|
||||
import magic
|
||||
from celery import states
|
||||
from django.conf import settings
|
||||
from django.contrib.auth.models import Group
|
||||
from django.contrib.auth.models import User
|
||||
@@ -40,6 +39,7 @@ from drf_spectacular.utils import extend_schema_field
|
||||
from drf_spectacular.utils import extend_schema_serializer
|
||||
from drf_writable_nested.serializers import NestedUpdateMixin
|
||||
from guardian.core import ObjectPermissionChecker
|
||||
from guardian.shortcuts import get_objects_for_user
|
||||
from guardian.shortcuts import get_users_with_perms
|
||||
from guardian.utils import get_group_obj_perms_model
|
||||
from guardian.utils import get_user_obj_perms_model
|
||||
@@ -2431,7 +2431,84 @@ class UiSettingsViewSerializer(serializers.ModelSerializer[UiSettings]):
|
||||
return ui_settings
|
||||
|
||||
|
||||
class TasksViewSerializer(OwnedObjectSerializer):
|
||||
class TaskSerializerV10(OwnedObjectSerializer):
|
||||
"""Task serializer for API v10+ using new field names."""
|
||||
|
||||
related_document_ids = serializers.ListField(
|
||||
child=serializers.IntegerField(),
|
||||
read_only=True,
|
||||
)
|
||||
task_type_display = serializers.CharField(
|
||||
source="get_task_type_display",
|
||||
read_only=True,
|
||||
)
|
||||
trigger_source_display = serializers.CharField(
|
||||
source="get_trigger_source_display",
|
||||
read_only=True,
|
||||
)
|
||||
status_display = serializers.CharField(
|
||||
source="get_status_display",
|
||||
read_only=True,
|
||||
)
|
||||
|
||||
class Meta:
|
||||
model = PaperlessTask
|
||||
fields = (
|
||||
"id",
|
||||
"task_id",
|
||||
"task_type",
|
||||
"task_type_display",
|
||||
"trigger_source",
|
||||
"trigger_source_display",
|
||||
"status",
|
||||
"status_display",
|
||||
"date_created",
|
||||
"date_started",
|
||||
"date_done",
|
||||
"duration_seconds",
|
||||
"wait_time_seconds",
|
||||
"input_data",
|
||||
"result_data",
|
||||
"result_message",
|
||||
"related_document_ids",
|
||||
"acknowledged",
|
||||
"owner",
|
||||
)
|
||||
read_only_fields = fields
|
||||
|
||||
|
||||
class TaskSerializerV9(serializers.ModelSerializer):
|
||||
"""Task serializer for API v9 backwards compatibility.
|
||||
|
||||
Maps old field names to the new model fields so existing clients continue
|
||||
to work unchanged.
|
||||
"""
|
||||
|
||||
# v9 field: task_name -> task_type (with value remapping for renamed tasks)
|
||||
task_name = serializers.SerializerMethodField()
|
||||
|
||||
# v9 field: task_file_name -> input_data.filename
|
||||
task_file_name = serializers.SerializerMethodField()
|
||||
|
||||
# v9 field: type -> trigger_source (mapped to old enum labels)
|
||||
type = serializers.SerializerMethodField()
|
||||
|
||||
# v9 field: status -> uppercase Celery state strings
|
||||
status = serializers.SerializerMethodField()
|
||||
|
||||
# v9 field: result -> result_message (with legacy format fallback)
|
||||
result = serializers.CharField(
|
||||
source="result_message",
|
||||
read_only=True,
|
||||
allow_null=True,
|
||||
)
|
||||
|
||||
# v9 field: related_document -> first document ID from result_data
|
||||
related_document = serializers.SerializerMethodField()
|
||||
|
||||
# v9 field: duplicate_documents -> list of duplicate IDs from result_data
|
||||
duplicate_documents = serializers.SerializerMethodField()
|
||||
|
||||
class Meta:
|
||||
model = PaperlessTask
|
||||
fields = (
|
||||
@@ -2439,59 +2516,99 @@ class TasksViewSerializer(OwnedObjectSerializer):
|
||||
"task_id",
|
||||
"task_name",
|
||||
"task_file_name",
|
||||
"date_created",
|
||||
"date_done",
|
||||
"type",
|
||||
"status",
|
||||
"date_created",
|
||||
"date_done",
|
||||
"result",
|
||||
"acknowledged",
|
||||
"related_document",
|
||||
"duplicate_documents",
|
||||
"owner",
|
||||
)
|
||||
read_only_fields = fields
|
||||
|
||||
related_document = serializers.SerializerMethodField()
|
||||
duplicate_documents = serializers.SerializerMethodField()
|
||||
created_doc_re = re.compile(r"New document id (\d+) created")
|
||||
duplicate_doc_re = re.compile(r"It is a duplicate of .* \(#(\d+)\)")
|
||||
_TASK_TYPE_TO_V9_NAME = {
|
||||
PaperlessTask.TaskType.SANITY_CHECK: "check_sanity",
|
||||
PaperlessTask.TaskType.LLM_INDEX: "llmindex_update",
|
||||
}
|
||||
|
||||
def get_related_document(self, obj) -> str | None:
|
||||
result = None
|
||||
re = None
|
||||
if obj.result:
|
||||
match obj.status:
|
||||
case states.SUCCESS:
|
||||
re = self.created_doc_re
|
||||
case states.FAILURE:
|
||||
re = (
|
||||
self.duplicate_doc_re
|
||||
if "existing document is in the trash" not in obj.result
|
||||
else None
|
||||
)
|
||||
if re is not None:
|
||||
try:
|
||||
result = re.search(obj.result).group(1)
|
||||
except Exception:
|
||||
pass
|
||||
def get_task_name(self, obj: PaperlessTask) -> str:
|
||||
return self._TASK_TYPE_TO_V9_NAME.get(obj.task_type, obj.task_type)
|
||||
|
||||
return result
|
||||
def get_task_file_name(self, obj: PaperlessTask) -> str | None:
|
||||
if not obj.input_data:
|
||||
return None
|
||||
return obj.input_data.get("filename")
|
||||
|
||||
@extend_schema_field(DuplicateDocumentSummarySerializer(many=True))
|
||||
def get_duplicate_documents(self, obj):
|
||||
related_document = self.get_related_document(obj)
|
||||
request = self.context.get("request")
|
||||
user = request.user if request else None
|
||||
document = Document.global_objects.filter(pk=related_document).first()
|
||||
if not related_document or not user or not document:
|
||||
_STATUS_TO_V9 = {
|
||||
PaperlessTask.Status.PENDING: "PENDING",
|
||||
PaperlessTask.Status.STARTED: "STARTED",
|
||||
PaperlessTask.Status.SUCCESS: "SUCCESS",
|
||||
PaperlessTask.Status.FAILURE: "FAILURE",
|
||||
PaperlessTask.Status.REVOKED: "REVOKED",
|
||||
}
|
||||
|
||||
def get_status(self, obj: PaperlessTask) -> str:
|
||||
return self._STATUS_TO_V9.get(obj.status, obj.status.upper())
|
||||
|
||||
_TRIGGER_SOURCE_TO_V9_TYPE = {
|
||||
PaperlessTask.TriggerSource.SCHEDULED: "scheduled_task",
|
||||
PaperlessTask.TriggerSource.SYSTEM: "auto_task",
|
||||
# Email and folder-consumer documents are system-initiated, not manually triggered
|
||||
PaperlessTask.TriggerSource.EMAIL_CONSUME: "auto_task",
|
||||
PaperlessTask.TriggerSource.FOLDER_CONSUME: "auto_task",
|
||||
}
|
||||
|
||||
def get_type(self, obj: PaperlessTask) -> str:
|
||||
return self._TRIGGER_SOURCE_TO_V9_TYPE.get(obj.trigger_source, "manual_task")
|
||||
|
||||
def get_related_document(self, obj: PaperlessTask) -> int | None:
|
||||
ids = obj.related_document_ids
|
||||
return ids[0] if ids else None
|
||||
|
||||
def get_duplicate_documents(
|
||||
self,
|
||||
obj: PaperlessTask,
|
||||
) -> list[dict[str, Any]]:
|
||||
if not obj.result_data:
|
||||
return []
|
||||
duplicates = _get_viewable_duplicates(document, user)
|
||||
return list(duplicates.values("id", "title", "deleted_at"))
|
||||
dup_of = obj.result_data.get("duplicate_of")
|
||||
if dup_of is None:
|
||||
return []
|
||||
request = self.context.get("request")
|
||||
if request is None:
|
||||
return []
|
||||
user = request.user
|
||||
qs = Document.global_objects.filter(pk=dup_of)
|
||||
if not user.is_staff:
|
||||
with_perms = get_objects_for_user(
|
||||
user,
|
||||
"documents.view_document",
|
||||
qs,
|
||||
accept_global_perms=False,
|
||||
)
|
||||
qs = with_perms | qs.filter(owner=user) | qs.filter(owner__isnull=True)
|
||||
return list(qs.values("id", "title", "deleted_at"))
|
||||
|
||||
|
||||
class RunTaskViewSerializer(serializers.Serializer[dict[str, Any]]):
|
||||
task_name = serializers.ChoiceField(
|
||||
choices=PaperlessTask.TaskName.choices,
|
||||
label="Task Name",
|
||||
class TaskSummarySerializer(serializers.Serializer):
|
||||
task_type = serializers.CharField()
|
||||
total_count = serializers.IntegerField()
|
||||
pending_count = serializers.IntegerField()
|
||||
success_count = serializers.IntegerField()
|
||||
failure_count = serializers.IntegerField()
|
||||
avg_duration_seconds = serializers.FloatField(allow_null=True)
|
||||
avg_wait_time_seconds = serializers.FloatField(allow_null=True)
|
||||
last_run = serializers.DateTimeField(allow_null=True)
|
||||
last_success = serializers.DateTimeField(allow_null=True)
|
||||
last_failure = serializers.DateTimeField(allow_null=True)
|
||||
|
||||
|
||||
class RunTaskSerializer(serializers.Serializer):
|
||||
task_type = serializers.ChoiceField(
|
||||
choices=PaperlessTask.TaskType.choices,
|
||||
label="Task Type",
|
||||
write_only=True,
|
||||
)
|
||||
|
||||
|
||||
@@ -1,18 +1,21 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime
|
||||
import hashlib
|
||||
import logging
|
||||
import re as _re
|
||||
import shutil
|
||||
import traceback as _tb
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
from typing import Any
|
||||
|
||||
from celery import shared_task
|
||||
from celery import states
|
||||
from celery.signals import before_task_publish
|
||||
from celery.signals import task_failure
|
||||
from celery.signals import task_postrun
|
||||
from celery.signals import task_prerun
|
||||
from celery.signals import task_revoked
|
||||
from celery.signals import worker_process_init
|
||||
from django.conf import settings
|
||||
from django.contrib.auth.models import Group
|
||||
@@ -31,6 +34,7 @@ from documents import matching
|
||||
from documents.caching import clear_document_caches
|
||||
from documents.caching import invalidate_llm_suggestions_cache
|
||||
from documents.data_models import ConsumableDocument
|
||||
from documents.data_models import DocumentSource
|
||||
from documents.file_handling import create_source_path_directory
|
||||
from documents.file_handling import delete_empty_directories
|
||||
from documents.file_handling import generate_filename
|
||||
@@ -996,68 +1000,225 @@ def run_workflows(
|
||||
return overrides, "\n".join(messages)
|
||||
|
||||
|
||||
@before_task_publish.connect
|
||||
def before_task_publish_handler(sender=None, headers=None, body=None, **kwargs) -> None:
|
||||
# ---------------------------------------------------------------------------
|
||||
# Task tracking -- Celery signal handlers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
TRACKED_TASKS: dict[str, PaperlessTask.TaskType] = {
|
||||
"documents.tasks.consume_file": PaperlessTask.TaskType.CONSUME_FILE,
|
||||
"documents.tasks.train_classifier": PaperlessTask.TaskType.TRAIN_CLASSIFIER,
|
||||
"documents.tasks.sanity_check": PaperlessTask.TaskType.SANITY_CHECK,
|
||||
"documents.tasks.llmindex_index": PaperlessTask.TaskType.LLM_INDEX,
|
||||
"documents.tasks.empty_trash": PaperlessTask.TaskType.EMPTY_TRASH,
|
||||
"documents.tasks.check_scheduled_workflows": PaperlessTask.TaskType.CHECK_WORKFLOWS,
|
||||
"paperless_mail.tasks.process_mail_accounts": PaperlessTask.TaskType.MAIL_FETCH,
|
||||
"documents.tasks.bulk_update_documents": PaperlessTask.TaskType.BULK_UPDATE,
|
||||
"documents.tasks.update_document_content_maybe_archive_file": PaperlessTask.TaskType.REPROCESS_DOCUMENT,
|
||||
"documents.tasks.build_share_link_bundle": PaperlessTask.TaskType.BUILD_SHARE_LINK,
|
||||
"documents.bulk_edit.delete": PaperlessTask.TaskType.BULK_DELETE,
|
||||
}
|
||||
|
||||
_CELERY_STATE_TO_STATUS: dict[str, PaperlessTask.Status] = {
|
||||
"SUCCESS": PaperlessTask.Status.SUCCESS,
|
||||
"FAILURE": PaperlessTask.Status.FAILURE,
|
||||
"REVOKED": PaperlessTask.Status.REVOKED,
|
||||
}
|
||||
|
||||
_DOCUMENT_SOURCE_TO_TRIGGER: dict[DocumentSource, PaperlessTask.TriggerSource] = {
|
||||
DocumentSource.ConsumeFolder: PaperlessTask.TriggerSource.FOLDER_CONSUME,
|
||||
DocumentSource.ApiUpload: PaperlessTask.TriggerSource.API_UPLOAD,
|
||||
DocumentSource.MailFetch: PaperlessTask.TriggerSource.EMAIL_CONSUME,
|
||||
DocumentSource.WebUI: PaperlessTask.TriggerSource.WEB_UI,
|
||||
}
|
||||
|
||||
|
||||
def _get_consume_args(
|
||||
args: tuple,
|
||||
task_kwargs: dict,
|
||||
) -> tuple[Any | None, Any | None]:
|
||||
"""Extract (input_doc, overrides) from consume_file task arguments."""
|
||||
input_doc = args[0] if args else task_kwargs.get("input_doc")
|
||||
overrides = args[1] if len(args) >= 2 else task_kwargs.get("overrides")
|
||||
return input_doc, overrides
|
||||
|
||||
|
||||
def _extract_input_data(
|
||||
task_type: PaperlessTask.TaskType,
|
||||
args: tuple,
|
||||
task_kwargs: dict,
|
||||
) -> dict:
|
||||
"""Build the input_data dict stored on the PaperlessTask record.
|
||||
|
||||
For consume_file tasks this includes the filename, MIME type, and any
|
||||
non-null overrides from the DocumentMetadataOverrides object. For
|
||||
mail_fetch tasks it captures the account_ids list. All other task
|
||||
types store no input data and return {}.
|
||||
"""
|
||||
Creates the PaperlessTask object in a pending state. This is sent before
|
||||
the task reaches the broker, but before it begins executing on a worker.
|
||||
if task_type == PaperlessTask.TaskType.CONSUME_FILE:
|
||||
input_doc, overrides = _get_consume_args(args, task_kwargs)
|
||||
if input_doc is None: # pragma: no cover
|
||||
return {}
|
||||
data: dict = {
|
||||
"filename": input_doc.original_file.name,
|
||||
"mime_type": input_doc.mime_type,
|
||||
}
|
||||
if input_doc.original_path: # pragma: no cover
|
||||
data["source_path"] = str(input_doc.original_path)
|
||||
if input_doc.mailrule_id: # pragma: no cover
|
||||
data["mailrule_id"] = input_doc.mailrule_id
|
||||
if overrides:
|
||||
override_dict = {}
|
||||
for k, v in vars(overrides).items():
|
||||
if v is None or k.startswith("_"):
|
||||
continue
|
||||
if isinstance(v, datetime.date):
|
||||
v = v.isoformat()
|
||||
elif isinstance(v, Path):
|
||||
v = str(v)
|
||||
override_dict[k] = v
|
||||
if override_dict:
|
||||
data["overrides"] = override_dict
|
||||
return data
|
||||
|
||||
if task_type == PaperlessTask.TaskType.MAIL_FETCH:
|
||||
account_ids = args[0] if args else task_kwargs.get("account_ids")
|
||||
if account_ids is not None:
|
||||
return {"account_ids": account_ids}
|
||||
return {}
|
||||
|
||||
return {}
|
||||
|
||||
|
||||
def _determine_trigger_source(
|
||||
task_type: PaperlessTask.TaskType,
|
||||
args: tuple,
|
||||
task_kwargs: dict,
|
||||
headers: dict,
|
||||
) -> PaperlessTask.TriggerSource:
|
||||
"""Resolve the TriggerSource for a task being published to the broker.
|
||||
|
||||
Priority order:
|
||||
1. Explicit trigger_source header (set by beat schedule or apply_async callers).
|
||||
2. For consume_file tasks, the DocumentSource on the input document.
|
||||
3. MANUAL as the catch-all for all other cases.
|
||||
"""
|
||||
# Explicit header takes priority -- callers pass a TriggerSource DB value directly.
|
||||
header_source = headers.get("trigger_source")
|
||||
if header_source is not None:
|
||||
try:
|
||||
return PaperlessTask.TriggerSource(header_source)
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
if task_type == PaperlessTask.TaskType.CONSUME_FILE:
|
||||
input_doc, _ = _get_consume_args(args, task_kwargs)
|
||||
if input_doc is not None:
|
||||
return _DOCUMENT_SOURCE_TO_TRIGGER.get(
|
||||
input_doc.source,
|
||||
PaperlessTask.TriggerSource.API_UPLOAD,
|
||||
)
|
||||
|
||||
return PaperlessTask.TriggerSource.MANUAL
|
||||
|
||||
|
||||
def _extract_owner_id(
|
||||
task_type: PaperlessTask.TaskType,
|
||||
args: tuple,
|
||||
task_kwargs: dict,
|
||||
) -> int | None:
|
||||
"""Return the owner_id from consume_file overrides, or None for all other task types."""
|
||||
if task_type != PaperlessTask.TaskType.CONSUME_FILE:
|
||||
return None
|
||||
_, overrides = _get_consume_args(args, task_kwargs)
|
||||
if overrides and hasattr(overrides, "owner_id"):
|
||||
return overrides.owner_id
|
||||
return None # pragma: no cover
|
||||
|
||||
|
||||
def _parse_consume_result(result: str) -> dict | None:
|
||||
"""Parse a consume_file string result into a structured dict.
|
||||
|
||||
consume_file returns human-readable strings rather than dicts (e.g.
|
||||
"Success. New document id 42 created" or "It is a duplicate of foo (#7)").
|
||||
This function extracts the document ID or duplicate reference so the
|
||||
result can be stored as structured data on the PaperlessTask record.
|
||||
Returns None when the string does not match any known pattern.
|
||||
"""
|
||||
if match := _re.search(r"New document id (\d+) created", result):
|
||||
return {"document_id": int(match.group(1))}
|
||||
if match := _re.search(r"It is a duplicate of .* \(#(\d+)\)", result):
|
||||
return {
|
||||
"duplicate_of": int(match.group(1)),
|
||||
"duplicate_in_trash": "existing document is in the trash" in result,
|
||||
}
|
||||
return None # pragma: no cover
|
||||
|
||||
|
||||
@before_task_publish.connect
|
||||
def before_task_publish_handler(
|
||||
sender=None,
|
||||
headers=None,
|
||||
body=None,
|
||||
**kwargs,
|
||||
) -> None:
|
||||
"""
|
||||
Creates the PaperlessTask record when the task is published to broker.
|
||||
|
||||
https://docs.celeryq.dev/en/stable/userguide/signals.html#before-task-publish
|
||||
|
||||
https://docs.celeryq.dev/en/stable/internals/protocol.html#version-2
|
||||
|
||||
"""
|
||||
if "task" not in headers or headers["task"] != "documents.tasks.consume_file":
|
||||
# Assumption: this is only ever a v2 message
|
||||
if headers is None or body is None:
|
||||
return
|
||||
|
||||
task_name = headers.get("task", "")
|
||||
task_type = TRACKED_TASKS.get(task_name)
|
||||
if task_type is None:
|
||||
return
|
||||
|
||||
try:
|
||||
close_old_connections()
|
||||
args, task_kwargs, _ = body
|
||||
task_id = headers["id"]
|
||||
|
||||
task_args = body[0]
|
||||
input_doc, overrides = task_args
|
||||
|
||||
task_file_name = input_doc.original_file.name
|
||||
user_id = overrides.owner_id if overrides else None
|
||||
input_data = _extract_input_data(task_type, args, task_kwargs)
|
||||
trigger_source = _determine_trigger_source(
|
||||
task_type,
|
||||
args,
|
||||
task_kwargs,
|
||||
headers,
|
||||
)
|
||||
owner_id = _extract_owner_id(task_type, args, task_kwargs)
|
||||
|
||||
PaperlessTask.objects.create(
|
||||
type=PaperlessTask.TaskType.AUTO,
|
||||
task_id=headers["id"],
|
||||
status=states.PENDING,
|
||||
task_file_name=task_file_name,
|
||||
task_name=PaperlessTask.TaskName.CONSUME_FILE,
|
||||
result=None,
|
||||
date_created=timezone.now(),
|
||||
date_started=None,
|
||||
date_done=None,
|
||||
owner_id=user_id,
|
||||
task_id=task_id,
|
||||
task_type=task_type,
|
||||
trigger_source=trigger_source,
|
||||
status=PaperlessTask.Status.PENDING,
|
||||
input_data=input_data,
|
||||
owner_id=owner_id,
|
||||
)
|
||||
except Exception: # pragma: no cover
|
||||
# Don't let an exception in the signal handlers prevent
|
||||
# a document from being consumed.
|
||||
logger.exception("Creating PaperlessTask failed")
|
||||
|
||||
|
||||
@task_prerun.connect
|
||||
def task_prerun_handler(sender=None, task_id=None, task=None, **kwargs) -> None:
|
||||
"""
|
||||
|
||||
Updates the PaperlessTask to be started. Sent before the task begins execution
|
||||
on a worker.
|
||||
Marks the task STARTED when execution begins on a worker.
|
||||
|
||||
https://docs.celeryq.dev/en/stable/userguide/signals.html#task-prerun
|
||||
"""
|
||||
if task_id is None: # pragma: no cover
|
||||
return
|
||||
if task and task.name not in TRACKED_TASKS:
|
||||
return
|
||||
try:
|
||||
close_old_connections()
|
||||
task_instance = PaperlessTask.objects.filter(task_id=task_id).first()
|
||||
|
||||
if task_instance is not None:
|
||||
task_instance.status = states.STARTED
|
||||
task_instance.date_started = timezone.now()
|
||||
task_instance.save()
|
||||
PaperlessTask.objects.filter(task_id=task_id).update(
|
||||
status=PaperlessTask.Status.STARTED,
|
||||
date_started=timezone.now(),
|
||||
)
|
||||
except Exception: # pragma: no cover
|
||||
# Don't let an exception in the signal handlers prevent
|
||||
# a document from being consumed.
|
||||
logger.exception("Setting PaperlessTask started failed")
|
||||
|
||||
|
||||
@@ -1071,22 +1232,56 @@ def task_postrun_handler(
|
||||
**kwargs,
|
||||
) -> None:
|
||||
"""
|
||||
Updates the result of the PaperlessTask.
|
||||
Records task completion and result data for non-failure outcomes.
|
||||
|
||||
Skips FAILURE states entirely, since task_failure_handler fires first
|
||||
and fully owns the failure path (status, date_done, duration,
|
||||
result_data, result_message).
|
||||
|
||||
https://docs.celeryq.dev/en/stable/userguide/signals.html#task-postrun
|
||||
"""
|
||||
if task_id is None: # pragma: no cover
|
||||
return
|
||||
if task and task.name not in TRACKED_TASKS:
|
||||
return
|
||||
try:
|
||||
close_old_connections()
|
||||
task_instance = PaperlessTask.objects.filter(task_id=task_id).first()
|
||||
|
||||
if task_instance is not None:
|
||||
task_instance.status = state or states.FAILURE
|
||||
task_instance.result = retval
|
||||
task_instance.date_done = timezone.now()
|
||||
task_instance.save()
|
||||
new_status = _CELERY_STATE_TO_STATUS.get(state, PaperlessTask.Status.FAILURE)
|
||||
if new_status == PaperlessTask.Status.FAILURE:
|
||||
return
|
||||
|
||||
now = timezone.now()
|
||||
try:
|
||||
task_instance = PaperlessTask.objects.get(task_id=task_id)
|
||||
except PaperlessTask.DoesNotExist:
|
||||
return
|
||||
|
||||
task_instance.status = new_status
|
||||
task_instance.date_done = now
|
||||
changed_fields = ["status", "date_done"]
|
||||
|
||||
if task_instance.date_started:
|
||||
task_instance.duration_seconds = (
|
||||
now - task_instance.date_started
|
||||
).total_seconds()
|
||||
changed_fields.append("duration_seconds")
|
||||
if task_instance.date_started and task_instance.date_created:
|
||||
task_instance.wait_time_seconds = (
|
||||
task_instance.date_started - task_instance.date_created
|
||||
).total_seconds()
|
||||
changed_fields.append("wait_time_seconds")
|
||||
|
||||
if isinstance(retval, dict):
|
||||
task_instance.result_data = retval
|
||||
changed_fields.append("result_data")
|
||||
elif isinstance(retval, str):
|
||||
task_instance.result_message = retval
|
||||
task_instance.result_data = _parse_consume_result(retval)
|
||||
changed_fields.extend(["result_message", "result_data"])
|
||||
|
||||
task_instance.save(update_fields=changed_fields)
|
||||
except Exception: # pragma: no cover
|
||||
# Don't let an exception in the signal handlers prevent
|
||||
# a document from being consumed.
|
||||
logger.exception("Updating PaperlessTask failed")
|
||||
|
||||
|
||||
@@ -1100,21 +1295,85 @@ def task_failure_handler(
|
||||
**kwargs,
|
||||
) -> None:
|
||||
"""
|
||||
Updates the result of a failed PaperlessTask.
|
||||
Records failure details when a task raises an exception.
|
||||
|
||||
Fully owns the FAILURE path. task_postrun_handler skips FAILURE
|
||||
states so there is no overlap.
|
||||
|
||||
https://docs.celeryq.dev/en/stable/userguide/signals.html#task-failure
|
||||
"""
|
||||
if task_id is None: # pragma: no cover
|
||||
return
|
||||
if sender and sender.name not in TRACKED_TASKS: # pragma: no cover
|
||||
return
|
||||
try:
|
||||
close_old_connections()
|
||||
task_instance = PaperlessTask.objects.filter(task_id=task_id).first()
|
||||
|
||||
if task_instance is not None and task_instance.result is None:
|
||||
task_instance.status = states.FAILURE
|
||||
task_instance.result = traceback
|
||||
task_instance.date_done = timezone.now()
|
||||
task_instance.save()
|
||||
result_data: dict = {
|
||||
"error_type": type(exception).__name__ if exception else "Unknown",
|
||||
"error_message": str(exception) if exception else "Unknown error",
|
||||
}
|
||||
if traceback:
|
||||
tb_str = "".join(_tb.format_tb(traceback))
|
||||
result_data["traceback"] = tb_str[:5000]
|
||||
|
||||
now = timezone.now()
|
||||
update_fields: dict = {
|
||||
"status": PaperlessTask.Status.FAILURE,
|
||||
"result_data": result_data,
|
||||
"result_message": str(exception) if exception else None,
|
||||
"date_done": now,
|
||||
}
|
||||
|
||||
task_qs = PaperlessTask.objects.filter(task_id=task_id)
|
||||
task_instance = task_qs.values("date_started", "date_created").first()
|
||||
if task_instance:
|
||||
date_started = task_instance["date_started"]
|
||||
if date_started:
|
||||
update_fields["duration_seconds"] = (now - date_started).total_seconds()
|
||||
date_created = task_instance["date_created"]
|
||||
if date_started and date_created:
|
||||
update_fields["wait_time_seconds"] = (
|
||||
date_started - date_created
|
||||
).total_seconds()
|
||||
task_qs.update(**update_fields)
|
||||
except Exception: # pragma: no cover
|
||||
logger.exception("Updating PaperlessTask failed")
|
||||
logger.exception("Updating PaperlessTask on failure failed")
|
||||
|
||||
|
||||
@task_revoked.connect
|
||||
def task_revoked_handler(
|
||||
sender=None,
|
||||
request=None,
|
||||
*,
|
||||
terminated: bool = False,
|
||||
signum=None,
|
||||
expired: bool = False,
|
||||
**kwargs,
|
||||
) -> None:
|
||||
"""
|
||||
Marks the task REVOKED when it is cancelled before or during execution.
|
||||
|
||||
This fires for tasks revoked while still queued (before task_prerun) as
|
||||
well as for tasks terminated mid-run. task_postrun does NOT fire for
|
||||
pre-start revocations, so this handler is the only way to move those
|
||||
records out of PENDING.
|
||||
|
||||
https://docs.celeryq.dev/en/stable/userguide/signals.html#task-revoked
|
||||
"""
|
||||
task_id = request.id if request else None
|
||||
if task_id is None: # pragma: no cover
|
||||
return
|
||||
if sender and sender.name not in TRACKED_TASKS: # pragma: no cover
|
||||
return
|
||||
try:
|
||||
close_old_connections()
|
||||
PaperlessTask.objects.filter(task_id=task_id).update(
|
||||
status=PaperlessTask.Status.REVOKED,
|
||||
date_done=timezone.now(),
|
||||
)
|
||||
except Exception: # pragma: no cover
|
||||
logger.exception("Updating PaperlessTask on revocation failed")
|
||||
|
||||
|
||||
@worker_process_init.connect
|
||||
|
||||
@@ -10,7 +10,6 @@ from tempfile import mkstemp
|
||||
|
||||
from celery import Task
|
||||
from celery import shared_task
|
||||
from celery import states
|
||||
from django.conf import settings
|
||||
from django.contrib.contenttypes.models import ContentType
|
||||
from django.db import models
|
||||
@@ -41,7 +40,6 @@ from documents.models import Correspondent
|
||||
from documents.models import CustomFieldInstance
|
||||
from documents.models import Document
|
||||
from documents.models import DocumentType
|
||||
from documents.models import PaperlessTask
|
||||
from documents.models import ShareLink
|
||||
from documents.models import ShareLinkBundle
|
||||
from documents.models import StoragePath
|
||||
@@ -84,19 +82,8 @@ def index_optimize() -> None:
|
||||
@shared_task
|
||||
def train_classifier(
|
||||
*,
|
||||
scheduled=True,
|
||||
status_callback: Callable[[str], None] | None = None,
|
||||
) -> None:
|
||||
task = PaperlessTask.objects.create(
|
||||
type=PaperlessTask.TaskType.SCHEDULED_TASK
|
||||
if scheduled
|
||||
else PaperlessTask.TaskType.MANUAL_TASK,
|
||||
task_id=uuid.uuid4(),
|
||||
task_name=PaperlessTask.TaskName.TRAIN_CLASSIFIER,
|
||||
status=states.STARTED,
|
||||
date_created=timezone.now(),
|
||||
date_started=timezone.now(),
|
||||
)
|
||||
) -> str:
|
||||
if (
|
||||
not Tag.objects.filter(matching_algorithm=Tag.MATCH_AUTO).exists()
|
||||
and not DocumentType.objects.filter(matching_algorithm=Tag.MATCH_AUTO).exists()
|
||||
@@ -107,40 +94,25 @@ def train_classifier(
|
||||
logger.info(result)
|
||||
# Special case, items were once auto and trained, so remove the model
|
||||
# and prevent its use again
|
||||
if settings.MODEL_FILE.exists():
|
||||
if settings.MODEL_FILE.exists(): # pragma: no cover
|
||||
logger.info(f"Removing {settings.MODEL_FILE} so it won't be used")
|
||||
settings.MODEL_FILE.unlink()
|
||||
task.status = states.SUCCESS
|
||||
task.result = result
|
||||
task.date_done = timezone.now()
|
||||
task.save()
|
||||
return
|
||||
return result
|
||||
|
||||
classifier = load_classifier()
|
||||
|
||||
if not classifier:
|
||||
classifier = DocumentClassifier()
|
||||
|
||||
try:
|
||||
if classifier.train(status_callback=status_callback):
|
||||
logger.info(
|
||||
f"Saving updated classifier model to {settings.MODEL_FILE}...",
|
||||
)
|
||||
classifier.save()
|
||||
task.result = "Training completed successfully"
|
||||
else:
|
||||
logger.debug("Training data unchanged.")
|
||||
task.result = "Training data unchanged"
|
||||
|
||||
task.status = states.SUCCESS
|
||||
|
||||
except Exception as e:
|
||||
logger.warning("Classifier error: " + str(e))
|
||||
task.status = states.FAILURE
|
||||
task.result = str(e)
|
||||
|
||||
task.date_done = timezone.now()
|
||||
task.save(update_fields=["status", "result", "date_done"])
|
||||
if classifier.train(status_callback=status_callback):
|
||||
logger.info(
|
||||
f"Saving updated classifier model to {settings.MODEL_FILE}...",
|
||||
)
|
||||
classifier.save()
|
||||
return "Training completed successfully"
|
||||
else:
|
||||
logger.debug("Training data unchanged.")
|
||||
return "Training data unchanged"
|
||||
|
||||
|
||||
@shared_task(bind=True)
|
||||
@@ -231,8 +203,8 @@ def consume_file(
|
||||
|
||||
|
||||
@shared_task
|
||||
def sanity_check(*, scheduled=True, raise_on_error=True):
|
||||
messages = sanity_checker.check_sanity(scheduled=scheduled)
|
||||
def sanity_check(*, raise_on_error: bool = True) -> str:
|
||||
messages = sanity_checker.check_sanity()
|
||||
messages.log_messages()
|
||||
|
||||
if not messages.has_error and not messages.has_warning and not messages.has_info:
|
||||
@@ -635,42 +607,19 @@ def update_document_parent_tags(tag: Tag, new_parent: Tag) -> None:
|
||||
def llmindex_index(
|
||||
*,
|
||||
iter_wrapper: IterWrapper[Document] = identity,
|
||||
rebuild=False,
|
||||
scheduled=True,
|
||||
auto=False,
|
||||
) -> None:
|
||||
rebuild: bool = False,
|
||||
) -> str | None:
|
||||
ai_config = AIConfig()
|
||||
if ai_config.llm_index_enabled:
|
||||
task = PaperlessTask.objects.create(
|
||||
type=PaperlessTask.TaskType.SCHEDULED_TASK
|
||||
if scheduled
|
||||
else PaperlessTask.TaskType.AUTO
|
||||
if auto
|
||||
else PaperlessTask.TaskType.MANUAL_TASK,
|
||||
task_id=uuid.uuid4(),
|
||||
task_name=PaperlessTask.TaskName.LLMINDEX_UPDATE,
|
||||
status=states.STARTED,
|
||||
date_created=timezone.now(),
|
||||
date_started=timezone.now(),
|
||||
)
|
||||
from paperless_ai.indexing import update_llm_index
|
||||
|
||||
try:
|
||||
result = update_llm_index(
|
||||
iter_wrapper=iter_wrapper,
|
||||
rebuild=rebuild,
|
||||
)
|
||||
task.status = states.SUCCESS
|
||||
task.result = result
|
||||
except Exception as e:
|
||||
logger.error("LLM index error: " + str(e))
|
||||
task.status = states.FAILURE
|
||||
task.result = str(e)
|
||||
|
||||
task.date_done = timezone.now()
|
||||
task.save(update_fields=["status", "result", "date_done"])
|
||||
else:
|
||||
if not ai_config.llm_index_enabled: # pragma: no cover
|
||||
logger.info("LLM index is disabled, skipping update.")
|
||||
return None
|
||||
|
||||
from paperless_ai.indexing import update_llm_index
|
||||
|
||||
return update_llm_index(
|
||||
iter_wrapper=iter_wrapper,
|
||||
rebuild=rebuild,
|
||||
)
|
||||
|
||||
|
||||
@shared_task
|
||||
|
||||
@@ -13,6 +13,8 @@ from rest_framework.test import APIClient
|
||||
|
||||
from documents.tests.factories import DocumentFactory
|
||||
|
||||
UserModelT = get_user_model()
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from documents.models import Document
|
||||
|
||||
@@ -126,15 +128,34 @@ def rest_api_client():
|
||||
yield APIClient()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def authenticated_rest_api_client(rest_api_client: APIClient):
|
||||
"""
|
||||
The basic DRF ApiClient which has been authenticated
|
||||
"""
|
||||
UserModel = get_user_model()
|
||||
user = UserModel.objects.create_user(username="testuser", password="password")
|
||||
rest_api_client.force_authenticate(user=user)
|
||||
yield rest_api_client
|
||||
@pytest.fixture()
|
||||
def regular_user(django_user_model: type[UserModelT]) -> UserModelT:
|
||||
"""Unprivileged authenticated user for permission boundary tests."""
|
||||
return django_user_model.objects.create_user(username="regular", password="regular")
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def admin_client(rest_api_client: APIClient, admin_user: UserModelT) -> APIClient:
|
||||
"""Admin client pre-authenticated and sending the v10 Accept header."""
|
||||
rest_api_client.force_authenticate(user=admin_user)
|
||||
rest_api_client.credentials(HTTP_ACCEPT="application/json; version=10")
|
||||
return rest_api_client
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def v9_client(rest_api_client: APIClient, admin_user: UserModelT) -> APIClient:
|
||||
"""Admin client pre-authenticated and sending the v9 Accept header."""
|
||||
rest_api_client.force_authenticate(user=admin_user)
|
||||
rest_api_client.credentials(HTTP_ACCEPT="application/json; version=9")
|
||||
return rest_api_client
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def user_client(rest_api_client: APIClient, regular_user: UserModelT) -> APIClient:
|
||||
"""Regular-user client pre-authenticated and sending the v10 Accept header."""
|
||||
rest_api_client.force_authenticate(user=regular_user)
|
||||
rest_api_client.credentials(HTTP_ACCEPT="application/json; version=10")
|
||||
return rest_api_client
|
||||
|
||||
|
||||
@pytest.fixture(scope="session", autouse=True)
|
||||
|
||||
@@ -11,6 +11,7 @@ from documents.models import Correspondent
|
||||
from documents.models import Document
|
||||
from documents.models import DocumentType
|
||||
from documents.models import MatchingModel
|
||||
from documents.models import PaperlessTask
|
||||
from documents.models import StoragePath
|
||||
from documents.models import Tag
|
||||
|
||||
@@ -65,3 +66,17 @@ class DocumentFactory(DjangoModelFactory):
|
||||
correspondent = None
|
||||
document_type = None
|
||||
storage_path = None
|
||||
|
||||
|
||||
class PaperlessTaskFactory(DjangoModelFactory):
|
||||
class Meta:
|
||||
model = PaperlessTask
|
||||
|
||||
task_id = factory.Faker("uuid4")
|
||||
task_type = PaperlessTask.TaskType.CONSUME_FILE
|
||||
trigger_source = PaperlessTask.TriggerSource.WEB_UI
|
||||
status = PaperlessTask.Status.PENDING
|
||||
input_data = factory.LazyFunction(dict)
|
||||
result_data = None
|
||||
result_message = None
|
||||
acknowledged = False
|
||||
|
||||
@@ -81,45 +81,38 @@ class TestCreatedDateField:
|
||||
),
|
||||
pytest.param(
|
||||
"created",
|
||||
"this_week",
|
||||
"2026-03-23T00:00:00Z",
|
||||
"2026-03-30T00:00:00Z",
|
||||
id="this_week_mon_sun",
|
||||
),
|
||||
pytest.param(
|
||||
"created",
|
||||
"last_week",
|
||||
"previous week",
|
||||
"2026-03-16T00:00:00Z",
|
||||
"2026-03-23T00:00:00Z",
|
||||
id="last_week",
|
||||
id="previous_week",
|
||||
),
|
||||
pytest.param(
|
||||
"created",
|
||||
"this_month",
|
||||
"this month",
|
||||
"2026-03-01T00:00:00Z",
|
||||
"2026-04-01T00:00:00Z",
|
||||
id="this_month",
|
||||
),
|
||||
pytest.param(
|
||||
"created",
|
||||
"last_month",
|
||||
"previous month",
|
||||
"2026-02-01T00:00:00Z",
|
||||
"2026-03-01T00:00:00Z",
|
||||
id="last_month",
|
||||
id="previous_month",
|
||||
),
|
||||
pytest.param(
|
||||
"created",
|
||||
"this_year",
|
||||
"this year",
|
||||
"2026-01-01T00:00:00Z",
|
||||
"2027-01-01T00:00:00Z",
|
||||
id="this_year",
|
||||
),
|
||||
pytest.param(
|
||||
"created",
|
||||
"last_year",
|
||||
"previous year",
|
||||
"2025-01-01T00:00:00Z",
|
||||
"2026-01-01T00:00:00Z",
|
||||
id="last_year",
|
||||
id="previous_year",
|
||||
),
|
||||
],
|
||||
)
|
||||
@@ -141,7 +134,7 @@ class TestCreatedDateField:
|
||||
def test_this_month_december_wraps_to_next_year(self) -> None:
|
||||
# December: next month must roll over to January 1 of next year
|
||||
lo, hi = _range(
|
||||
rewrite_natural_date_keywords("created:this_month", UTC),
|
||||
rewrite_natural_date_keywords("created:this month", UTC),
|
||||
"created",
|
||||
)
|
||||
assert lo == "2026-12-01T00:00:00Z"
|
||||
@@ -151,12 +144,21 @@ class TestCreatedDateField:
|
||||
def test_last_month_january_wraps_to_previous_year(self) -> None:
|
||||
# January: last month must roll back to December 1 of previous year
|
||||
lo, hi = _range(
|
||||
rewrite_natural_date_keywords("created:last_month", UTC),
|
||||
rewrite_natural_date_keywords("created:previous month", UTC),
|
||||
"created",
|
||||
)
|
||||
assert lo == "2025-12-01T00:00:00Z"
|
||||
assert hi == "2026-01-01T00:00:00Z"
|
||||
|
||||
@time_machine.travel(datetime(2026, 7, 15, 12, 0, tzinfo=UTC), tick=False)
|
||||
def test_previous_quarter(self) -> None:
|
||||
lo, hi = _range(
|
||||
rewrite_natural_date_keywords('created:"previous quarter"', UTC),
|
||||
"created",
|
||||
)
|
||||
assert lo == "2026-04-01T00:00:00Z"
|
||||
assert hi == "2026-07-01T00:00:00Z"
|
||||
|
||||
def test_unknown_keyword_raises(self) -> None:
|
||||
with pytest.raises(ValueError, match="Unknown keyword"):
|
||||
_date_only_range("bogus_keyword", UTC)
|
||||
@@ -202,40 +204,34 @@ class TestDateTimeFields:
|
||||
id="yesterday",
|
||||
),
|
||||
pytest.param(
|
||||
"this_week",
|
||||
"2026-03-23T00:00:00Z",
|
||||
"2026-03-30T00:00:00Z",
|
||||
id="this_week",
|
||||
),
|
||||
pytest.param(
|
||||
"last_week",
|
||||
"previous week",
|
||||
"2026-03-16T00:00:00Z",
|
||||
"2026-03-23T00:00:00Z",
|
||||
id="last_week",
|
||||
id="previous_week",
|
||||
),
|
||||
pytest.param(
|
||||
"this_month",
|
||||
"this month",
|
||||
"2026-03-01T00:00:00Z",
|
||||
"2026-04-01T00:00:00Z",
|
||||
id="this_month",
|
||||
),
|
||||
pytest.param(
|
||||
"last_month",
|
||||
"previous month",
|
||||
"2026-02-01T00:00:00Z",
|
||||
"2026-03-01T00:00:00Z",
|
||||
id="last_month",
|
||||
id="previous_month",
|
||||
),
|
||||
pytest.param(
|
||||
"this_year",
|
||||
"this year",
|
||||
"2026-01-01T00:00:00Z",
|
||||
"2027-01-01T00:00:00Z",
|
||||
id="this_year",
|
||||
),
|
||||
pytest.param(
|
||||
"last_year",
|
||||
"previous year",
|
||||
"2025-01-01T00:00:00Z",
|
||||
"2026-01-01T00:00:00Z",
|
||||
id="last_year",
|
||||
id="previous_year",
|
||||
),
|
||||
],
|
||||
)
|
||||
@@ -254,17 +250,54 @@ class TestDateTimeFields:
|
||||
@time_machine.travel(datetime(2026, 12, 15, 12, 0, tzinfo=UTC), tick=False)
|
||||
def test_this_month_december_wraps_to_next_year(self) -> None:
|
||||
# December: next month wraps to January of next year
|
||||
lo, hi = _range(rewrite_natural_date_keywords("added:this_month", UTC), "added")
|
||||
lo, hi = _range(rewrite_natural_date_keywords("added:this month", UTC), "added")
|
||||
assert lo == "2026-12-01T00:00:00Z"
|
||||
assert hi == "2027-01-01T00:00:00Z"
|
||||
|
||||
@time_machine.travel(datetime(2026, 1, 15, 12, 0, tzinfo=UTC), tick=False)
|
||||
def test_last_month_january_wraps_to_previous_year(self) -> None:
|
||||
# January: last month wraps back to December of previous year
|
||||
lo, hi = _range(rewrite_natural_date_keywords("added:last_month", UTC), "added")
|
||||
lo, hi = _range(
|
||||
rewrite_natural_date_keywords("added:previous month", UTC),
|
||||
"added",
|
||||
)
|
||||
assert lo == "2025-12-01T00:00:00Z"
|
||||
assert hi == "2026-01-01T00:00:00Z"
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("query", "expected_lo", "expected_hi"),
|
||||
[
|
||||
pytest.param(
|
||||
'added:"previous quarter"',
|
||||
"2026-04-01T00:00:00Z",
|
||||
"2026-07-01T00:00:00Z",
|
||||
id="quoted_previous_quarter",
|
||||
),
|
||||
pytest.param(
|
||||
"added:previous month",
|
||||
"2026-06-01T00:00:00Z",
|
||||
"2026-07-01T00:00:00Z",
|
||||
id="bare_previous_month",
|
||||
),
|
||||
pytest.param(
|
||||
"added:this month",
|
||||
"2026-07-01T00:00:00Z",
|
||||
"2026-08-01T00:00:00Z",
|
||||
id="bare_this_month",
|
||||
),
|
||||
],
|
||||
)
|
||||
@time_machine.travel(datetime(2026, 7, 15, 12, 0, tzinfo=UTC), tick=False)
|
||||
def test_legacy_natural_language_aliases(
|
||||
self,
|
||||
query: str,
|
||||
expected_lo: str,
|
||||
expected_hi: str,
|
||||
) -> None:
|
||||
lo, hi = _range(rewrite_natural_date_keywords(query, UTC), "added")
|
||||
assert lo == expected_lo
|
||||
assert hi == expected_hi
|
||||
|
||||
def test_unknown_keyword_raises(self) -> None:
|
||||
with pytest.raises(ValueError, match="Unknown keyword"):
|
||||
_datetime_range("bogus_keyword", UTC)
|
||||
|
||||
@@ -831,7 +831,7 @@ class TestApiAppConfig(DirectoriesMixin, APITestCase):
|
||||
config.save()
|
||||
|
||||
with (
|
||||
patch("documents.tasks.llmindex_index.delay") as mock_update,
|
||||
patch("documents.tasks.llmindex_index.apply_async") as mock_update,
|
||||
patch("paperless_ai.indexing.vector_store_file_exists") as mock_exists,
|
||||
):
|
||||
mock_exists.return_value = False
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
import pytest
|
||||
from django.core.management import call_command
|
||||
from django.core.management.base import CommandError
|
||||
from drf_spectacular.generators import SchemaGenerator
|
||||
from rest_framework import status
|
||||
from rest_framework.test import APITestCase
|
||||
|
||||
@@ -66,3 +68,57 @@ class TestApiSchema(APITestCase):
|
||||
"delete_pages",
|
||||
]:
|
||||
self.assertIn(action_method, advertised_methods)
|
||||
|
||||
|
||||
# ---- session-scoped fixture: generate schema once for all TestXxx classes ----
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def api_schema():
|
||||
generator = SchemaGenerator()
|
||||
return generator.get_schema(request=None, public=True)
|
||||
|
||||
|
||||
class TestTasksSummarySchema:
|
||||
"""tasks_summary_retrieve: response must be an array of TaskSummarySerializer."""
|
||||
|
||||
def test_summary_response_is_array(self, api_schema):
|
||||
op = api_schema["paths"]["/api/tasks/summary/"]["get"]
|
||||
resp_200 = op["responses"]["200"]["content"]["application/json"]["schema"]
|
||||
assert resp_200["type"] == "array", (
|
||||
"tasks_summary_retrieve response must be type:array"
|
||||
)
|
||||
|
||||
def test_summary_items_have_total_count(self, api_schema):
|
||||
op = api_schema["paths"]["/api/tasks/summary/"]["get"]
|
||||
resp_200 = op["responses"]["200"]["content"]["application/json"]["schema"]
|
||||
items = resp_200.get("items", {})
|
||||
ref = items.get("$ref", "")
|
||||
component_name = ref.split("/")[-1] if ref else ""
|
||||
if component_name:
|
||||
props = api_schema["components"]["schemas"][component_name]["properties"]
|
||||
else:
|
||||
props = items.get("properties", {})
|
||||
assert "total_count" in props, (
|
||||
"summary items must have 'total_count' (TaskSummarySerializer)"
|
||||
)
|
||||
|
||||
|
||||
class TestTasksActiveSchema:
|
||||
"""tasks_active_retrieve: response must be an array of TaskSerializerV10."""
|
||||
|
||||
def test_active_response_is_array(self, api_schema):
|
||||
op = api_schema["paths"]["/api/tasks/active/"]["get"]
|
||||
resp_200 = op["responses"]["200"]["content"]["application/json"]["schema"]
|
||||
assert resp_200["type"] == "array", (
|
||||
"tasks_active_retrieve response must be type:array"
|
||||
)
|
||||
|
||||
def test_active_items_ref_named_schema(self, api_schema):
|
||||
op = api_schema["paths"]["/api/tasks/active/"]["get"]
|
||||
resp_200 = op["responses"]["200"]["content"]["application/json"]["schema"]
|
||||
items = resp_200.get("items", {})
|
||||
ref = items.get("$ref", "")
|
||||
component_name = ref.split("/")[-1] if ref else ""
|
||||
assert component_name, "items should be a $ref to a named schema"
|
||||
assert component_name in api_schema["components"]["schemas"]
|
||||
|
||||
@@ -3,6 +3,7 @@ from datetime import timedelta
|
||||
from unittest import mock
|
||||
|
||||
import pytest
|
||||
import time_machine
|
||||
from dateutil.relativedelta import relativedelta
|
||||
from django.contrib.auth.models import Group
|
||||
from django.contrib.auth.models import Permission
|
||||
@@ -26,6 +27,7 @@ from documents.models import Tag
|
||||
from documents.models import Workflow
|
||||
from documents.search import get_backend
|
||||
from documents.search import reset_backend
|
||||
from documents.tests.factories import DocumentFactory
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
from paperless_mail.models import MailAccount
|
||||
from paperless_mail.models import MailRule
|
||||
@@ -741,6 +743,49 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
# Tantivy rejects unparsable field queries with a 400
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
|
||||
@override_settings(
|
||||
TIME_ZONE="UTC",
|
||||
)
|
||||
@time_machine.travel(
|
||||
datetime.datetime(2026, 7, 15, 12, 0, tzinfo=datetime.UTC),
|
||||
tick=False,
|
||||
)
|
||||
def test_search_added_previous_quarter(self) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- Documents inside and outside the previous quarter
|
||||
WHEN:
|
||||
- Query with the legacy natural-language phrase used by the UI
|
||||
THEN:
|
||||
- Previous-quarter documents are returned
|
||||
"""
|
||||
d1 = DocumentFactory.create(
|
||||
title="quarterly statement april",
|
||||
content="bank statement",
|
||||
added=datetime.datetime(2026, 4, 10, 12, 0, tzinfo=datetime.UTC),
|
||||
)
|
||||
d2 = DocumentFactory.create(
|
||||
title="quarterly statement june",
|
||||
content="bank statement",
|
||||
added=datetime.datetime(2026, 6, 20, 12, 0, tzinfo=datetime.UTC),
|
||||
)
|
||||
d3 = DocumentFactory.create(
|
||||
title="quarterly statement july",
|
||||
content="bank statement",
|
||||
added=datetime.datetime(2026, 7, 10, 12, 0, tzinfo=datetime.UTC),
|
||||
)
|
||||
|
||||
backend = get_backend()
|
||||
backend.add_or_update(d1)
|
||||
backend.add_or_update(d2)
|
||||
backend.add_or_update(d3)
|
||||
|
||||
response = self.client.get('/api/documents/?query=added:"previous quarter"')
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
|
||||
results = response.data["results"]
|
||||
self.assertEqual({r["id"] for r in results}, {1, 2})
|
||||
|
||||
@mock.patch("documents.search._backend.TantivyBackend.autocomplete")
|
||||
def test_search_autocomplete_limits(self, m) -> None:
|
||||
"""
|
||||
|
||||
@@ -4,7 +4,6 @@ import tempfile
|
||||
from pathlib import Path
|
||||
from unittest import mock
|
||||
|
||||
from celery import states
|
||||
from django.contrib.auth.models import Permission
|
||||
from django.contrib.auth.models import User
|
||||
from django.test import override_settings
|
||||
@@ -13,6 +12,7 @@ from rest_framework.test import APITestCase
|
||||
|
||||
from documents.models import PaperlessTask
|
||||
from documents.permissions import has_system_status_permission
|
||||
from documents.tests.factories import PaperlessTaskFactory
|
||||
from paperless import version
|
||||
|
||||
|
||||
@@ -258,10 +258,10 @@ class TestSystemStatus(APITestCase):
|
||||
THEN:
|
||||
- The response contains an OK classifier status
|
||||
"""
|
||||
PaperlessTask.objects.create(
|
||||
type=PaperlessTask.TaskType.SCHEDULED_TASK,
|
||||
status=states.SUCCESS,
|
||||
task_name=PaperlessTask.TaskName.TRAIN_CLASSIFIER,
|
||||
PaperlessTaskFactory(
|
||||
task_type=PaperlessTask.TaskType.TRAIN_CLASSIFIER,
|
||||
trigger_source=PaperlessTask.TriggerSource.SCHEDULED,
|
||||
status=PaperlessTask.Status.SUCCESS,
|
||||
)
|
||||
self.client.force_login(self.user)
|
||||
response = self.client.get(self.ENDPOINT)
|
||||
@@ -295,11 +295,11 @@ class TestSystemStatus(APITestCase):
|
||||
THEN:
|
||||
- The response contains an ERROR classifier status
|
||||
"""
|
||||
PaperlessTask.objects.create(
|
||||
type=PaperlessTask.TaskType.SCHEDULED_TASK,
|
||||
status=states.FAILURE,
|
||||
task_name=PaperlessTask.TaskName.TRAIN_CLASSIFIER,
|
||||
result="Classifier training failed",
|
||||
PaperlessTaskFactory(
|
||||
task_type=PaperlessTask.TaskType.TRAIN_CLASSIFIER,
|
||||
trigger_source=PaperlessTask.TriggerSource.SCHEDULED,
|
||||
status=PaperlessTask.Status.FAILURE,
|
||||
result_message="Classifier training failed",
|
||||
)
|
||||
self.client.force_login(self.user)
|
||||
response = self.client.get(self.ENDPOINT)
|
||||
@@ -319,10 +319,10 @@ class TestSystemStatus(APITestCase):
|
||||
THEN:
|
||||
- The response contains an OK sanity check status
|
||||
"""
|
||||
PaperlessTask.objects.create(
|
||||
type=PaperlessTask.TaskType.SCHEDULED_TASK,
|
||||
status=states.SUCCESS,
|
||||
task_name=PaperlessTask.TaskName.CHECK_SANITY,
|
||||
PaperlessTaskFactory(
|
||||
task_type=PaperlessTask.TaskType.SANITY_CHECK,
|
||||
trigger_source=PaperlessTask.TriggerSource.SCHEDULED,
|
||||
status=PaperlessTask.Status.SUCCESS,
|
||||
)
|
||||
self.client.force_login(self.user)
|
||||
response = self.client.get(self.ENDPOINT)
|
||||
@@ -356,11 +356,11 @@ class TestSystemStatus(APITestCase):
|
||||
THEN:
|
||||
- The response contains an ERROR sanity check status
|
||||
"""
|
||||
PaperlessTask.objects.create(
|
||||
type=PaperlessTask.TaskType.SCHEDULED_TASK,
|
||||
status=states.FAILURE,
|
||||
task_name=PaperlessTask.TaskName.CHECK_SANITY,
|
||||
result="5 issues found.",
|
||||
PaperlessTaskFactory(
|
||||
task_type=PaperlessTask.TaskType.SANITY_CHECK,
|
||||
trigger_source=PaperlessTask.TriggerSource.SCHEDULED,
|
||||
status=PaperlessTask.Status.FAILURE,
|
||||
result_message="5 issues found.",
|
||||
)
|
||||
self.client.force_login(self.user)
|
||||
response = self.client.get(self.ENDPOINT)
|
||||
@@ -405,10 +405,10 @@ class TestSystemStatus(APITestCase):
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(response.data["tasks"]["llmindex_status"], "WARNING")
|
||||
|
||||
PaperlessTask.objects.create(
|
||||
type=PaperlessTask.TaskType.SCHEDULED_TASK,
|
||||
status=states.SUCCESS,
|
||||
task_name=PaperlessTask.TaskName.LLMINDEX_UPDATE,
|
||||
PaperlessTaskFactory(
|
||||
task_type=PaperlessTask.TaskType.LLM_INDEX,
|
||||
trigger_source=PaperlessTask.TriggerSource.SCHEDULED,
|
||||
status=PaperlessTask.Status.SUCCESS,
|
||||
)
|
||||
response = self.client.get(self.ENDPOINT)
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
@@ -425,11 +425,11 @@ class TestSystemStatus(APITestCase):
|
||||
- The response contains the correct AI status
|
||||
"""
|
||||
with override_settings(AI_ENABLED=True, LLM_EMBEDDING_BACKEND="openai"):
|
||||
PaperlessTask.objects.create(
|
||||
type=PaperlessTask.TaskType.SCHEDULED_TASK,
|
||||
status=states.FAILURE,
|
||||
task_name=PaperlessTask.TaskName.LLMINDEX_UPDATE,
|
||||
result="AI index update failed",
|
||||
PaperlessTaskFactory(
|
||||
task_type=PaperlessTask.TaskType.LLM_INDEX,
|
||||
trigger_source=PaperlessTask.TriggerSource.SCHEDULED,
|
||||
status=PaperlessTask.Status.FAILURE,
|
||||
result_message="AI index update failed",
|
||||
)
|
||||
self.client.force_login(self.user)
|
||||
response = self.client.get(self.ENDPOINT)
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -211,7 +211,7 @@ class TestCreateClassifier:
|
||||
|
||||
call_command("document_create_classifier", skip_checks=True)
|
||||
|
||||
m.assert_called_once_with(scheduled=False, status_callback=mocker.ANY)
|
||||
m.assert_called_once_with(status_callback=mocker.ANY)
|
||||
assert callable(m.call_args.kwargs["status_callback"])
|
||||
|
||||
def test_create_classifier_callback_output(self, mocker: MockerFixture) -> None:
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
"""Tests for the sanity checker module.
|
||||
|
||||
Tests exercise ``check_sanity`` as a whole, verifying document validation,
|
||||
orphan detection, task recording, and the iter_wrapper contract.
|
||||
orphan detection, and the iter_wrapper contract.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -12,13 +12,12 @@ from typing import TYPE_CHECKING
|
||||
|
||||
import pytest
|
||||
|
||||
from documents.models import Document
|
||||
from documents.models import PaperlessTask
|
||||
from documents.sanity_checker import check_sanity
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Iterable
|
||||
|
||||
from documents.models import Document
|
||||
from documents.tests.conftest import PaperlessDirs
|
||||
|
||||
|
||||
@@ -229,35 +228,6 @@ class TestCheckSanityIterWrapper:
|
||||
assert not messages.has_error
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
class TestCheckSanityTaskRecording:
|
||||
@pytest.mark.parametrize(
|
||||
("expected_type", "scheduled"),
|
||||
[
|
||||
pytest.param(PaperlessTask.TaskType.SCHEDULED_TASK, True, id="scheduled"),
|
||||
pytest.param(PaperlessTask.TaskType.MANUAL_TASK, False, id="manual"),
|
||||
],
|
||||
)
|
||||
@pytest.mark.usefixtures("_media_settings")
|
||||
def test_task_type(self, expected_type: str, *, scheduled: bool) -> None:
|
||||
check_sanity(scheduled=scheduled)
|
||||
task = PaperlessTask.objects.latest("date_created")
|
||||
assert task.task_name == PaperlessTask.TaskName.CHECK_SANITY
|
||||
assert task.type == expected_type
|
||||
|
||||
def test_success_status(self, sample_doc: Document) -> None:
|
||||
check_sanity()
|
||||
task = PaperlessTask.objects.latest("date_created")
|
||||
assert task.status == "SUCCESS"
|
||||
|
||||
def test_failure_status(self, sample_doc: Document) -> None:
|
||||
Path(sample_doc.source_path).unlink()
|
||||
check_sanity()
|
||||
task = PaperlessTask.objects.latest("date_created")
|
||||
assert task.status == "FAILURE"
|
||||
assert "Check logs for details" in task.result
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
class TestCheckSanityLogMessages:
|
||||
def test_logs_doc_issues(
|
||||
|
||||
@@ -1,250 +1,390 @@
|
||||
import datetime
|
||||
import sys
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
from unittest import mock
|
||||
|
||||
import celery
|
||||
from django.contrib.auth import get_user_model
|
||||
from django.test import TestCase
|
||||
import pytest
|
||||
import pytest_mock
|
||||
from django.utils import timezone
|
||||
|
||||
from documents.data_models import ConsumableDocument
|
||||
from documents.data_models import DocumentMetadataOverrides
|
||||
from documents.data_models import DocumentSource
|
||||
from documents.models import Document
|
||||
from documents.models import PaperlessTask
|
||||
from documents.signals.handlers import add_to_index
|
||||
from documents.signals.handlers import before_task_publish_handler
|
||||
from documents.signals.handlers import task_failure_handler
|
||||
from documents.signals.handlers import task_postrun_handler
|
||||
from documents.signals.handlers import task_prerun_handler
|
||||
from documents.tests.test_consumer import fake_magic_from_file
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
from documents.signals.handlers import task_revoked_handler
|
||||
from documents.tests.factories import PaperlessTaskFactory
|
||||
|
||||
|
||||
@mock.patch("documents.consumer.magic.from_file", fake_magic_from_file)
|
||||
class TestTaskSignalHandler(DirectoriesMixin, TestCase):
|
||||
@classmethod
|
||||
def setUpTestData(cls) -> None:
|
||||
super().setUpTestData()
|
||||
cls.user = get_user_model().objects.create_user(username="testuser")
|
||||
@pytest.fixture
|
||||
def consume_input_doc():
|
||||
doc = mock.MagicMock(spec=ConsumableDocument)
|
||||
# original_file is a Path; configure the nested mock so .name works
|
||||
doc.original_file = mock.MagicMock()
|
||||
doc.original_file.name = "invoice.pdf"
|
||||
doc.original_path = None
|
||||
doc.mime_type = "application/pdf"
|
||||
doc.mailrule_id = None
|
||||
doc.source = DocumentSource.WebUI
|
||||
return doc
|
||||
|
||||
def util_call_before_task_publish_handler(
|
||||
|
||||
@pytest.fixture
|
||||
def consume_overrides(django_user_model):
|
||||
user = django_user_model.objects.create_user(username="testuser")
|
||||
overrides = mock.MagicMock(spec=DocumentMetadataOverrides)
|
||||
overrides.owner_id = user.id
|
||||
return overrides
|
||||
|
||||
|
||||
def send_publish(
|
||||
task_name: str,
|
||||
args: tuple,
|
||||
kwargs: dict,
|
||||
headers: dict | None = None,
|
||||
) -> str:
|
||||
|
||||
task_id = str(uuid.uuid4())
|
||||
hdrs = {"task": task_name, "id": task_id, **(headers or {})}
|
||||
before_task_publish_handler(sender=task_name, headers=hdrs, body=(args, kwargs, {}))
|
||||
return task_id
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
class TestBeforeTaskPublishHandler:
|
||||
def test_creates_task_for_consume_file(self, consume_input_doc, consume_overrides):
|
||||
task_id = send_publish(
|
||||
"documents.tasks.consume_file",
|
||||
(consume_input_doc, consume_overrides),
|
||||
{},
|
||||
)
|
||||
task = PaperlessTask.objects.get(task_id=task_id)
|
||||
assert task.task_type == PaperlessTask.TaskType.CONSUME_FILE
|
||||
assert task.status == PaperlessTask.Status.PENDING
|
||||
assert task.trigger_source == PaperlessTask.TriggerSource.WEB_UI
|
||||
assert task.input_data["filename"] == "invoice.pdf"
|
||||
assert task.owner_id == consume_overrides.owner_id
|
||||
|
||||
def test_creates_task_for_train_classifier(self):
|
||||
task_id = send_publish("documents.tasks.train_classifier", (), {})
|
||||
task = PaperlessTask.objects.get(task_id=task_id)
|
||||
assert task.task_type == PaperlessTask.TaskType.TRAIN_CLASSIFIER
|
||||
assert task.trigger_source == PaperlessTask.TriggerSource.MANUAL
|
||||
|
||||
def test_creates_task_for_sanity_check(self):
|
||||
task_id = send_publish("documents.tasks.sanity_check", (), {})
|
||||
task = PaperlessTask.objects.get(task_id=task_id)
|
||||
assert task.task_type == PaperlessTask.TaskType.SANITY_CHECK
|
||||
|
||||
def test_creates_task_for_process_mail_accounts(self):
|
||||
task_id = send_publish(
|
||||
"paperless_mail.tasks.process_mail_accounts",
|
||||
(),
|
||||
{"account_ids": [1, 2]},
|
||||
)
|
||||
task = PaperlessTask.objects.get(task_id=task_id)
|
||||
assert task.task_type == PaperlessTask.TaskType.MAIL_FETCH
|
||||
assert task.input_data["account_ids"] == [1, 2]
|
||||
|
||||
def test_mail_fetch_no_account_ids_stores_empty_input(self):
|
||||
"""Beat-scheduled mail checks pass no account_ids; input_data should be {} not {"account_ids": None}."""
|
||||
task_id = send_publish("paperless_mail.tasks.process_mail_accounts", (), {})
|
||||
task = PaperlessTask.objects.get(task_id=task_id)
|
||||
assert task.input_data == {}
|
||||
|
||||
def test_overrides_date_serialized_as_iso_string(self, consume_input_doc):
|
||||
"""A datetime.date in overrides is stored as an ISO string so input_data is JSON-safe."""
|
||||
overrides = DocumentMetadataOverrides(created=datetime.date(2024, 1, 15))
|
||||
|
||||
task_id = send_publish(
|
||||
"documents.tasks.consume_file",
|
||||
(consume_input_doc, overrides),
|
||||
{},
|
||||
)
|
||||
|
||||
task = PaperlessTask.objects.get(task_id=task_id)
|
||||
assert task.input_data["overrides"]["created"] == "2024-01-15"
|
||||
|
||||
def test_overrides_path_serialized_as_string(self, consume_input_doc):
|
||||
"""A Path value in overrides is stored as a plain string so input_data is JSON-safe."""
|
||||
overrides = DocumentMetadataOverrides()
|
||||
overrides.filename = Path("/uploads/invoice.pdf") # type: ignore[assignment]
|
||||
|
||||
task_id = send_publish(
|
||||
"documents.tasks.consume_file",
|
||||
(consume_input_doc, overrides),
|
||||
{},
|
||||
)
|
||||
|
||||
task = PaperlessTask.objects.get(task_id=task_id)
|
||||
assert task.input_data["overrides"]["filename"] == "/uploads/invoice.pdf"
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("header_value", "expected_trigger_source"),
|
||||
[
|
||||
pytest.param(
|
||||
PaperlessTask.TriggerSource.SCHEDULED,
|
||||
PaperlessTask.TriggerSource.SCHEDULED,
|
||||
id="scheduled",
|
||||
),
|
||||
pytest.param(
|
||||
PaperlessTask.TriggerSource.SYSTEM,
|
||||
PaperlessTask.TriggerSource.SYSTEM,
|
||||
id="system",
|
||||
),
|
||||
pytest.param(
|
||||
"bogus_value",
|
||||
PaperlessTask.TriggerSource.MANUAL,
|
||||
id="invalid-falls-back-to-manual",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_trigger_source_header_resolution(
|
||||
self,
|
||||
headers_to_use,
|
||||
body_to_use,
|
||||
header_value: str,
|
||||
expected_trigger_source: PaperlessTask.TriggerSource,
|
||||
) -> None:
|
||||
"""
|
||||
Simple utility to call the pre-run handle and ensure it created a single task
|
||||
instance
|
||||
"""
|
||||
self.assertEqual(PaperlessTask.objects.all().count(), 0)
|
||||
|
||||
before_task_publish_handler(headers=headers_to_use, body=body_to_use)
|
||||
|
||||
self.assertEqual(PaperlessTask.objects.all().count(), 1)
|
||||
|
||||
def test_before_task_publish_handler_consume(self) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- A celery task is started via the consume folder
|
||||
WHEN:
|
||||
- Task before publish handler is called
|
||||
THEN:
|
||||
- The task is created and marked as pending
|
||||
"""
|
||||
headers = {
|
||||
"id": str(uuid.uuid4()),
|
||||
"task": "documents.tasks.consume_file",
|
||||
}
|
||||
body = (
|
||||
# args
|
||||
(
|
||||
ConsumableDocument(
|
||||
source=DocumentSource.ConsumeFolder,
|
||||
original_file="/consume/hello-999.pdf",
|
||||
),
|
||||
DocumentMetadataOverrides(
|
||||
title="Hello world",
|
||||
owner_id=self.user.id,
|
||||
),
|
||||
),
|
||||
# kwargs
|
||||
"""trigger_source header maps to the expected TriggerSource; invalid values fall back to MANUAL."""
|
||||
task_id = send_publish(
|
||||
"documents.tasks.train_classifier",
|
||||
(),
|
||||
{},
|
||||
# celery stuff
|
||||
{"callbacks": None, "errbacks": None, "chain": None, "chord": None},
|
||||
)
|
||||
self.util_call_before_task_publish_handler(
|
||||
headers_to_use=headers,
|
||||
body_to_use=body,
|
||||
headers={"trigger_source": header_value},
|
||||
)
|
||||
task = PaperlessTask.objects.get(task_id=task_id)
|
||||
assert task.trigger_source == expected_trigger_source
|
||||
|
||||
task = PaperlessTask.objects.get()
|
||||
self.assertIsNotNone(task)
|
||||
self.assertEqual(headers["id"], task.task_id)
|
||||
self.assertEqual("hello-999.pdf", task.task_file_name)
|
||||
self.assertEqual(PaperlessTask.TaskName.CONSUME_FILE, task.task_name)
|
||||
self.assertEqual(self.user.id, task.owner_id)
|
||||
self.assertEqual(celery.states.PENDING, task.status)
|
||||
def test_ignores_untracked_task(self):
|
||||
send_publish("documents.tasks.some_untracked_task", (), {})
|
||||
assert PaperlessTask.objects.count() == 0
|
||||
|
||||
def test_task_prerun_handler(self) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- A celery task is started via the consume folder
|
||||
WHEN:
|
||||
- Task starts execution
|
||||
THEN:
|
||||
- The task is marked as started
|
||||
"""
|
||||
def test_ignores_none_headers(self):
|
||||
|
||||
headers = {
|
||||
"id": str(uuid.uuid4()),
|
||||
"task": "documents.tasks.consume_file",
|
||||
}
|
||||
body = (
|
||||
# args
|
||||
(
|
||||
ConsumableDocument(
|
||||
source=DocumentSource.ConsumeFolder,
|
||||
original_file="/consume/hello-99.pdf",
|
||||
),
|
||||
None,
|
||||
before_task_publish_handler(sender=None, headers=None, body=None)
|
||||
assert PaperlessTask.objects.count() == 0
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("document_source", "expected_trigger_source"),
|
||||
[
|
||||
pytest.param(
|
||||
DocumentSource.ConsumeFolder,
|
||||
PaperlessTask.TriggerSource.FOLDER_CONSUME,
|
||||
id="folder_consume",
|
||||
),
|
||||
# kwargs
|
||||
{},
|
||||
# celery stuff
|
||||
{"callbacks": None, "errbacks": None, "chain": None, "chord": None},
|
||||
)
|
||||
|
||||
self.util_call_before_task_publish_handler(
|
||||
headers_to_use=headers,
|
||||
body_to_use=body,
|
||||
)
|
||||
|
||||
task_prerun_handler(task_id=headers["id"])
|
||||
|
||||
task = PaperlessTask.objects.get()
|
||||
|
||||
self.assertEqual(celery.states.STARTED, task.status)
|
||||
|
||||
def test_task_postrun_handler(self) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- A celery task is started via the consume folder
|
||||
WHEN:
|
||||
- Task finished execution
|
||||
THEN:
|
||||
- The task is marked as started
|
||||
"""
|
||||
headers = {
|
||||
"id": str(uuid.uuid4()),
|
||||
"task": "documents.tasks.consume_file",
|
||||
}
|
||||
body = (
|
||||
# args
|
||||
(
|
||||
ConsumableDocument(
|
||||
source=DocumentSource.ConsumeFolder,
|
||||
original_file="/consume/hello-9.pdf",
|
||||
),
|
||||
None,
|
||||
pytest.param(
|
||||
DocumentSource.MailFetch,
|
||||
PaperlessTask.TriggerSource.EMAIL_CONSUME,
|
||||
id="email_consume",
|
||||
),
|
||||
# kwargs
|
||||
],
|
||||
)
|
||||
def test_consume_document_source_maps_to_trigger_source(
|
||||
self,
|
||||
consume_input_doc,
|
||||
consume_overrides,
|
||||
document_source: DocumentSource,
|
||||
expected_trigger_source: PaperlessTask.TriggerSource,
|
||||
) -> None:
|
||||
"""DocumentSource on the input doc maps to the correct TriggerSource."""
|
||||
consume_input_doc.source = document_source
|
||||
task_id = send_publish(
|
||||
"documents.tasks.consume_file",
|
||||
(consume_input_doc, consume_overrides),
|
||||
{},
|
||||
# celery stuff
|
||||
{"callbacks": None, "errbacks": None, "chain": None, "chord": None},
|
||||
)
|
||||
self.util_call_before_task_publish_handler(
|
||||
headers_to_use=headers,
|
||||
body_to_use=body,
|
||||
task = PaperlessTask.objects.get(task_id=task_id)
|
||||
assert task.trigger_source == expected_trigger_source
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
class TestTaskPrerunHandler:
|
||||
def test_marks_task_started(self):
|
||||
task = PaperlessTaskFactory(status=PaperlessTask.Status.PENDING)
|
||||
|
||||
task_prerun_handler(task_id=task.task_id)
|
||||
task.refresh_from_db()
|
||||
assert task.status == PaperlessTask.Status.STARTED
|
||||
assert task.date_started is not None
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"task_id",
|
||||
[
|
||||
pytest.param("nonexistent-id", id="unknown"),
|
||||
pytest.param(None, id="none"),
|
||||
],
|
||||
)
|
||||
def test_ignores_invalid_task_id(self, task_id: str | None) -> None:
|
||||
|
||||
task_prerun_handler(task_id=task_id) # must not raise
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
class TestTaskPostrunHandler:
|
||||
def _started_task(self) -> PaperlessTask:
|
||||
|
||||
return PaperlessTaskFactory(
|
||||
task_type=PaperlessTask.TaskType.TRAIN_CLASSIFIER,
|
||||
status=PaperlessTask.Status.STARTED,
|
||||
date_started=timezone.now(),
|
||||
)
|
||||
|
||||
def test_records_success_with_dict_result(self):
|
||||
task = self._started_task()
|
||||
|
||||
task_postrun_handler(
|
||||
task_id=headers["id"],
|
||||
retval="Success. New document id 1 created",
|
||||
state=celery.states.SUCCESS,
|
||||
task_id=task.task_id,
|
||||
retval={"document_id": 42},
|
||||
state="SUCCESS",
|
||||
)
|
||||
task.refresh_from_db()
|
||||
assert task.status == PaperlessTask.Status.SUCCESS
|
||||
assert task.result_data == {"document_id": 42}
|
||||
assert task.date_done is not None
|
||||
assert task.duration_seconds is not None
|
||||
assert task.wait_time_seconds is not None
|
||||
|
||||
task = PaperlessTask.objects.get()
|
||||
def test_skips_failure_state(self):
|
||||
"""postrun skips FAILURE; task_failure_handler owns that path."""
|
||||
task = self._started_task()
|
||||
|
||||
self.assertEqual(celery.states.SUCCESS, task.status)
|
||||
task_postrun_handler(task_id=task.task_id, retval="some error", state="FAILURE")
|
||||
task.refresh_from_db()
|
||||
assert task.status == PaperlessTask.Status.STARTED
|
||||
|
||||
def test_task_failure_handler(self) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- A celery task is started via the consume folder
|
||||
WHEN:
|
||||
- Task failed execution
|
||||
THEN:
|
||||
- The task is marked as failed
|
||||
"""
|
||||
headers = {
|
||||
"id": str(uuid.uuid4()),
|
||||
"task": "documents.tasks.consume_file",
|
||||
}
|
||||
body = (
|
||||
# args
|
||||
(
|
||||
ConsumableDocument(
|
||||
source=DocumentSource.ConsumeFolder,
|
||||
original_file="/consume/hello-9.pdf",
|
||||
),
|
||||
None,
|
||||
),
|
||||
# kwargs
|
||||
{},
|
||||
# celery stuff
|
||||
{"callbacks": None, "errbacks": None, "chain": None, "chord": None},
|
||||
def test_parses_legacy_new_document_string(self):
|
||||
task = self._started_task()
|
||||
|
||||
task_postrun_handler(
|
||||
task_id=task.task_id,
|
||||
retval="New document id 42 created",
|
||||
state="SUCCESS",
|
||||
)
|
||||
self.util_call_before_task_publish_handler(
|
||||
headers_to_use=headers,
|
||||
body_to_use=body,
|
||||
task.refresh_from_db()
|
||||
assert task.result_data["document_id"] == 42
|
||||
assert task.result_message == "New document id 42 created"
|
||||
|
||||
def test_parses_duplicate_string(self):
|
||||
"""Duplicate detection returns a string with SUCCESS state (StopConsumeTaskError is caught and returned, not raised)."""
|
||||
task = self._started_task()
|
||||
|
||||
task_postrun_handler(
|
||||
task_id=task.task_id,
|
||||
retval="It is a duplicate of some document (#99).",
|
||||
state="SUCCESS",
|
||||
)
|
||||
task.refresh_from_db()
|
||||
assert task.result_data["duplicate_of"] == 99
|
||||
assert task.result_data["duplicate_in_trash"] is False
|
||||
|
||||
def test_ignores_unknown_task_id(self):
|
||||
|
||||
task_postrun_handler(
|
||||
task_id="nonexistent",
|
||||
retval=None,
|
||||
state="SUCCESS",
|
||||
) # must not raise
|
||||
|
||||
def test_records_revoked_state(self):
|
||||
task = self._started_task()
|
||||
|
||||
task_postrun_handler(task_id=task.task_id, retval=None, state="REVOKED")
|
||||
task.refresh_from_db()
|
||||
assert task.status == PaperlessTask.Status.REVOKED
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
class TestTaskFailureHandler:
|
||||
def test_records_failure_with_exception(self):
|
||||
|
||||
task = PaperlessTaskFactory(
|
||||
task_type=PaperlessTask.TaskType.CONSUME_FILE,
|
||||
status=PaperlessTask.Status.STARTED,
|
||||
date_started=timezone.now(),
|
||||
)
|
||||
|
||||
task_failure_handler(
|
||||
task_id=headers["id"],
|
||||
exception="Example failure",
|
||||
task_id=task.task_id,
|
||||
exception=ValueError("PDF parse failed"),
|
||||
traceback=None,
|
||||
)
|
||||
task.refresh_from_db()
|
||||
assert task.status == PaperlessTask.Status.FAILURE
|
||||
assert task.result_data["error_type"] == "ValueError"
|
||||
assert task.result_data["error_message"] == "PDF parse failed"
|
||||
assert task.date_done is not None
|
||||
|
||||
def test_records_traceback_when_provided(self):
|
||||
|
||||
task = PaperlessTaskFactory(
|
||||
task_type=PaperlessTask.TaskType.CONSUME_FILE,
|
||||
status=PaperlessTask.Status.STARTED,
|
||||
date_started=timezone.now(),
|
||||
)
|
||||
try:
|
||||
raise ValueError("test error")
|
||||
except ValueError:
|
||||
tb = sys.exc_info()[2]
|
||||
|
||||
from documents.signals.handlers import task_failure_handler
|
||||
|
||||
task_failure_handler(
|
||||
task_id=task.task_id,
|
||||
exception=ValueError("test error"),
|
||||
traceback=tb,
|
||||
)
|
||||
task.refresh_from_db()
|
||||
assert "traceback" in task.result_data
|
||||
assert len(task.result_data["traceback"]) <= 5000
|
||||
|
||||
def test_computes_duration_and_wait_time(self):
|
||||
|
||||
now = timezone.now()
|
||||
task = PaperlessTaskFactory(
|
||||
task_type=PaperlessTask.TaskType.CONSUME_FILE,
|
||||
status=PaperlessTask.Status.STARTED,
|
||||
date_created=now - timezone.timedelta(seconds=10),
|
||||
date_started=now - timezone.timedelta(seconds=5),
|
||||
)
|
||||
|
||||
task = PaperlessTask.objects.get()
|
||||
|
||||
self.assertEqual(celery.states.FAILURE, task.status)
|
||||
|
||||
def test_add_to_index_indexes_root_once_for_root_documents(self) -> None:
|
||||
root = Document.objects.create(
|
||||
title="root",
|
||||
checksum="root",
|
||||
mime_type="application/pdf",
|
||||
task_failure_handler(
|
||||
task_id=task.task_id,
|
||||
exception=ValueError("boom"),
|
||||
traceback=None,
|
||||
)
|
||||
task.refresh_from_db()
|
||||
assert task.duration_seconds == pytest.approx(5.0, abs=1.0)
|
||||
assert task.wait_time_seconds == pytest.approx(5.0, abs=1.0)
|
||||
|
||||
with mock.patch("documents.search.get_backend") as mock_get_backend:
|
||||
mock_backend = mock.MagicMock()
|
||||
mock_get_backend.return_value = mock_backend
|
||||
add_to_index(sender=None, document=root)
|
||||
def test_ignores_none_task_id(self):
|
||||
|
||||
mock_backend.add_or_update.assert_called_once_with(root, effective_content="")
|
||||
task_failure_handler(task_id=None, exception=ValueError("x"), traceback=None)
|
||||
|
||||
def test_add_to_index_reindexes_root_for_version_documents(self) -> None:
|
||||
root = Document.objects.create(
|
||||
title="root",
|
||||
checksum="root",
|
||||
mime_type="application/pdf",
|
||||
)
|
||||
version = Document.objects.create(
|
||||
title="version",
|
||||
checksum="version",
|
||||
mime_type="application/pdf",
|
||||
root_document=root,
|
||||
)
|
||||
|
||||
with mock.patch("documents.search.get_backend") as mock_get_backend:
|
||||
mock_backend = mock.MagicMock()
|
||||
mock_get_backend.return_value = mock_backend
|
||||
add_to_index(sender=None, document=version)
|
||||
@pytest.mark.django_db
|
||||
class TestTaskRevokedHandler:
|
||||
def test_marks_task_revoked(self, mocker: pytest_mock.MockerFixture):
|
||||
"""task_revoked_handler moves a queued task to REVOKED and stamps date_done."""
|
||||
task = PaperlessTaskFactory(status=PaperlessTask.Status.PENDING)
|
||||
request = mocker.MagicMock()
|
||||
request.id = task.task_id
|
||||
|
||||
self.assertEqual(mock_backend.add_or_update.call_count, 1)
|
||||
self.assertEqual(
|
||||
mock_backend.add_or_update.call_args_list[0].args[0].id,
|
||||
version.id,
|
||||
)
|
||||
self.assertEqual(
|
||||
mock_backend.add_or_update.call_args_list[0].kwargs,
|
||||
{"effective_content": version.content},
|
||||
)
|
||||
task_revoked_handler(request=request)
|
||||
task.refresh_from_db()
|
||||
assert task.status == PaperlessTask.Status.REVOKED
|
||||
assert task.date_done is not None
|
||||
|
||||
def test_ignores_none_request(self):
|
||||
"""task_revoked_handler must not raise when request is None."""
|
||||
|
||||
task_revoked_handler(request=None) # must not raise
|
||||
|
||||
def test_ignores_unknown_task_id(self, mocker: pytest_mock.MockerFixture):
|
||||
"""task_revoked_handler must not raise for a task_id not in the database."""
|
||||
request = mocker.MagicMock()
|
||||
request.id = "nonexistent-id"
|
||||
|
||||
task_revoked_handler(request=request) # must not raise
|
||||
|
||||
@@ -4,7 +4,6 @@ from pathlib import Path
|
||||
from unittest import mock
|
||||
|
||||
import pytest
|
||||
from celery import states
|
||||
from django.conf import settings
|
||||
from django.test import TestCase
|
||||
from django.test import override_settings
|
||||
@@ -14,7 +13,6 @@ from documents import tasks
|
||||
from documents.models import Correspondent
|
||||
from documents.models import Document
|
||||
from documents.models import DocumentType
|
||||
from documents.models import PaperlessTask
|
||||
from documents.models import Tag
|
||||
from documents.sanity_checker import SanityCheckFailedException
|
||||
from documents.sanity_checker import SanityCheckMessages
|
||||
@@ -40,7 +38,8 @@ class TestClassifier(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
def test_train_classifier_with_auto_tag(self, load_classifier) -> None:
|
||||
load_classifier.return_value = None
|
||||
Tag.objects.create(matching_algorithm=Tag.MATCH_AUTO, name="test")
|
||||
tasks.train_classifier()
|
||||
with self.assertRaises(ValueError):
|
||||
tasks.train_classifier()
|
||||
load_classifier.assert_called_once()
|
||||
self.assertIsNotFile(settings.MODEL_FILE)
|
||||
|
||||
@@ -48,7 +47,8 @@ class TestClassifier(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
def test_train_classifier_with_auto_type(self, load_classifier) -> None:
|
||||
load_classifier.return_value = None
|
||||
DocumentType.objects.create(matching_algorithm=Tag.MATCH_AUTO, name="test")
|
||||
tasks.train_classifier()
|
||||
with self.assertRaises(ValueError):
|
||||
tasks.train_classifier()
|
||||
load_classifier.assert_called_once()
|
||||
self.assertIsNotFile(settings.MODEL_FILE)
|
||||
|
||||
@@ -56,7 +56,8 @@ class TestClassifier(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
def test_train_classifier_with_auto_correspondent(self, load_classifier) -> None:
|
||||
load_classifier.return_value = None
|
||||
Correspondent.objects.create(matching_algorithm=Tag.MATCH_AUTO, name="test")
|
||||
tasks.train_classifier()
|
||||
with self.assertRaises(ValueError):
|
||||
tasks.train_classifier()
|
||||
load_classifier.assert_called_once()
|
||||
self.assertIsNotFile(settings.MODEL_FILE)
|
||||
|
||||
@@ -298,7 +299,7 @@ class TestAIIndex(DirectoriesMixin, TestCase):
|
||||
WHEN:
|
||||
- llmindex_index task is called
|
||||
THEN:
|
||||
- update_llm_index is called, and the task is marked as success
|
||||
- update_llm_index is called and its result is returned
|
||||
"""
|
||||
Document.objects.create(
|
||||
title="test",
|
||||
@@ -308,13 +309,9 @@ class TestAIIndex(DirectoriesMixin, TestCase):
|
||||
# lazy-loaded so mock the actual function
|
||||
with mock.patch("paperless_ai.indexing.update_llm_index") as update_llm_index:
|
||||
update_llm_index.return_value = "LLM index updated successfully."
|
||||
tasks.llmindex_index()
|
||||
result = tasks.llmindex_index()
|
||||
update_llm_index.assert_called_once()
|
||||
task = PaperlessTask.objects.get(
|
||||
task_name=PaperlessTask.TaskName.LLMINDEX_UPDATE,
|
||||
)
|
||||
self.assertEqual(task.status, states.SUCCESS)
|
||||
self.assertEqual(task.result, "LLM index updated successfully.")
|
||||
self.assertEqual(result, "LLM index updated successfully.")
|
||||
|
||||
@override_settings(
|
||||
AI_ENABLED=True,
|
||||
@@ -325,9 +322,9 @@ class TestAIIndex(DirectoriesMixin, TestCase):
|
||||
GIVEN:
|
||||
- Document exists, AI is enabled, llm index backend is set
|
||||
WHEN:
|
||||
- llmindex_index task is called
|
||||
- llmindex_index task is called and update_llm_index raises an exception
|
||||
THEN:
|
||||
- update_llm_index raises an exception, and the task is marked as failure
|
||||
- the exception propagates to the caller
|
||||
"""
|
||||
Document.objects.create(
|
||||
title="test",
|
||||
@@ -337,13 +334,9 @@ class TestAIIndex(DirectoriesMixin, TestCase):
|
||||
# lazy-loaded so mock the actual function
|
||||
with mock.patch("paperless_ai.indexing.update_llm_index") as update_llm_index:
|
||||
update_llm_index.side_effect = Exception("LLM index update failed.")
|
||||
tasks.llmindex_index()
|
||||
with self.assertRaisesRegex(Exception, "LLM index update failed."):
|
||||
tasks.llmindex_index()
|
||||
update_llm_index.assert_called_once()
|
||||
task = PaperlessTask.objects.get(
|
||||
task_name=PaperlessTask.TaskName.LLMINDEX_UPDATE,
|
||||
)
|
||||
self.assertEqual(task.status, states.FAILURE)
|
||||
self.assertIn("LLM index update failed.", task.result)
|
||||
|
||||
def test_update_document_in_llm_index(self) -> None:
|
||||
"""
|
||||
|
||||
@@ -8,6 +8,7 @@ import zipfile
|
||||
from collections import defaultdict
|
||||
from collections import deque
|
||||
from datetime import datetime
|
||||
from datetime import timedelta
|
||||
from pathlib import Path
|
||||
from time import mktime
|
||||
from typing import TYPE_CHECKING
|
||||
@@ -20,7 +21,6 @@ from urllib.parse import urlparse
|
||||
import httpx
|
||||
import magic
|
||||
import pathvalidate
|
||||
from celery import states
|
||||
from django.conf import settings
|
||||
from django.contrib.auth.models import Group
|
||||
from django.contrib.auth.models import User
|
||||
@@ -29,6 +29,7 @@ from django.core.cache import cache
|
||||
from django.db import connections
|
||||
from django.db.migrations.loader import MigrationLoader
|
||||
from django.db.migrations.recorder import MigrationRecorder
|
||||
from django.db.models import Avg
|
||||
from django.db.models import Case
|
||||
from django.db.models import Count
|
||||
from django.db.models import F
|
||||
@@ -193,7 +194,7 @@ from documents.serialisers import PostDocumentSerializer
|
||||
from documents.serialisers import RemovePasswordDocumentsSerializer
|
||||
from documents.serialisers import ReprocessDocumentsSerializer
|
||||
from documents.serialisers import RotateDocumentsSerializer
|
||||
from documents.serialisers import RunTaskViewSerializer
|
||||
from documents.serialisers import RunTaskSerializer
|
||||
from documents.serialisers import SavedViewSerializer
|
||||
from documents.serialisers import SearchResultSerializer
|
||||
from documents.serialisers import SerializerWithPerms
|
||||
@@ -202,7 +203,9 @@ from documents.serialisers import ShareLinkSerializer
|
||||
from documents.serialisers import StoragePathSerializer
|
||||
from documents.serialisers import StoragePathTestSerializer
|
||||
from documents.serialisers import TagSerializer
|
||||
from documents.serialisers import TasksViewSerializer
|
||||
from documents.serialisers import TaskSerializerV9
|
||||
from documents.serialisers import TaskSerializerV10
|
||||
from documents.serialisers import TaskSummarySerializer
|
||||
from documents.serialisers import TrashSerializer
|
||||
from documents.serialisers import UiSettingsViewSerializer
|
||||
from documents.serialisers import WorkflowActionSerializer
|
||||
@@ -212,7 +215,6 @@ from documents.signals import document_updated
|
||||
from documents.tasks import build_share_link_bundle
|
||||
from documents.tasks import consume_file
|
||||
from documents.tasks import empty_trash
|
||||
from documents.tasks import index_optimize
|
||||
from documents.tasks import llmindex_index
|
||||
from documents.tasks import sanity_check
|
||||
from documents.tasks import train_classifier
|
||||
@@ -3729,21 +3731,21 @@ class RemoteVersionView(GenericAPIView[Any]):
|
||||
|
||||
|
||||
@extend_schema_view(
|
||||
list=extend_schema(
|
||||
parameters=[
|
||||
OpenApiParameter(
|
||||
name="task_id",
|
||||
type=str,
|
||||
location=OpenApiParameter.QUERY,
|
||||
required=False,
|
||||
description="Filter tasks by Celery UUID",
|
||||
),
|
||||
],
|
||||
),
|
||||
acknowledge=extend_schema(
|
||||
operation_id="acknowledge_tasks",
|
||||
description="Acknowledge a list of tasks",
|
||||
request={
|
||||
"application/json": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"tasks": {
|
||||
"type": "array",
|
||||
"items": {"type": "integer"},
|
||||
},
|
||||
},
|
||||
"required": ["tasks"],
|
||||
},
|
||||
},
|
||||
request=AcknowledgeTasksViewSerializer,
|
||||
responses={
|
||||
(200, "application/json"): inline_serializer(
|
||||
name="AcknowledgeTasks",
|
||||
@@ -3751,52 +3753,118 @@ class RemoteVersionView(GenericAPIView[Any]):
|
||||
"result": serializers.IntegerField(),
|
||||
},
|
||||
),
|
||||
(400, "application/json"): None,
|
||||
},
|
||||
),
|
||||
)
|
||||
@extend_schema(
|
||||
parameters=[
|
||||
OpenApiParameter(
|
||||
name="task_id",
|
||||
type=str,
|
||||
location=OpenApiParameter.QUERY,
|
||||
required=False,
|
||||
description="Filter tasks by Celery UUID",
|
||||
),
|
||||
],
|
||||
run=extend_schema(
|
||||
operation_id="run_task",
|
||||
description="Manually dispatch a background task. Superuser only.",
|
||||
request=RunTaskSerializer,
|
||||
responses={
|
||||
(200, "application/json"): inline_serializer(
|
||||
name="RunTask",
|
||||
fields={"task_id": serializers.CharField()},
|
||||
),
|
||||
(400, "application/json"): inline_serializer(
|
||||
name="RunTaskError",
|
||||
fields={"error": serializers.CharField()},
|
||||
),
|
||||
},
|
||||
),
|
||||
summary=extend_schema(
|
||||
responses={200: TaskSummarySerializer(many=True)},
|
||||
parameters=[
|
||||
OpenApiParameter(
|
||||
name="days",
|
||||
type=int,
|
||||
location=OpenApiParameter.QUERY,
|
||||
required=False,
|
||||
description="Number of days to include in aggregation (default 30)",
|
||||
),
|
||||
],
|
||||
),
|
||||
active=extend_schema(
|
||||
description="Currently pending and running tasks (capped at 50).",
|
||||
responses={200: TaskSerializerV10(many=True)},
|
||||
),
|
||||
)
|
||||
class TasksViewSet(ReadOnlyModelViewSet[PaperlessTask]):
|
||||
permission_classes = (IsAuthenticated, PaperlessObjectPermissions)
|
||||
serializer_class = TasksViewSerializer
|
||||
filter_backends = (
|
||||
DjangoFilterBackend,
|
||||
OrderingFilter,
|
||||
ObjectOwnedOrGrantedPermissionsFilter,
|
||||
)
|
||||
filterset_class = PaperlessTaskFilterSet
|
||||
ordering_fields = [
|
||||
"date_created",
|
||||
"date_done",
|
||||
"status",
|
||||
"task_type",
|
||||
"duration_seconds",
|
||||
"wait_time_seconds",
|
||||
]
|
||||
ordering = ["-date_created"]
|
||||
# Needed for drf-spectacular schema generation (get_queryset touches request.user)
|
||||
queryset = PaperlessTask.objects.none()
|
||||
|
||||
TASK_AND_ARGS_BY_NAME = {
|
||||
PaperlessTask.TaskName.INDEX_OPTIMIZE: (index_optimize, {}),
|
||||
PaperlessTask.TaskName.TRAIN_CLASSIFIER: (
|
||||
train_classifier,
|
||||
{"scheduled": False},
|
||||
),
|
||||
PaperlessTask.TaskName.CHECK_SANITY: (
|
||||
sanity_check,
|
||||
{"scheduled": False, "raise_on_error": False},
|
||||
),
|
||||
PaperlessTask.TaskName.LLMINDEX_UPDATE: (
|
||||
llmindex_index,
|
||||
{"scheduled": False, "rebuild": False},
|
||||
),
|
||||
# v9 backwards compat: maps old task_name values to new task_type values
|
||||
_V9_TASK_NAME_TO_TYPE = {
|
||||
"check_sanity": PaperlessTask.TaskType.SANITY_CHECK,
|
||||
"llmindex_update": PaperlessTask.TaskType.LLM_INDEX,
|
||||
}
|
||||
|
||||
# v9 backwards compat: maps old "type" query param values to new TriggerSource.
|
||||
# Must match the reverse of TaskSerializerV9._TRIGGER_SOURCE_TO_V9_TYPE.
|
||||
_V9_TYPE_TO_TRIGGER_SOURCES = {
|
||||
"auto_task": [
|
||||
PaperlessTask.TriggerSource.SYSTEM,
|
||||
PaperlessTask.TriggerSource.EMAIL_CONSUME,
|
||||
PaperlessTask.TriggerSource.FOLDER_CONSUME,
|
||||
],
|
||||
"scheduled_task": [PaperlessTask.TriggerSource.SCHEDULED],
|
||||
"manual_task": [
|
||||
PaperlessTask.TriggerSource.MANUAL,
|
||||
PaperlessTask.TriggerSource.WEB_UI,
|
||||
PaperlessTask.TriggerSource.API_UPLOAD,
|
||||
],
|
||||
}
|
||||
|
||||
_RUNNABLE_TASKS = {
|
||||
PaperlessTask.TaskType.TRAIN_CLASSIFIER: (train_classifier, {}),
|
||||
PaperlessTask.TaskType.SANITY_CHECK: (sanity_check, {"raise_on_error": False}),
|
||||
PaperlessTask.TaskType.LLM_INDEX: (llmindex_index, {"rebuild": False}),
|
||||
}
|
||||
|
||||
def get_serializer_class(self):
|
||||
# v9: use backwards-compatible serializer with old field names
|
||||
if self.request.version and int(self.request.version) < 10:
|
||||
return TaskSerializerV9
|
||||
return TaskSerializerV10
|
||||
|
||||
def get_queryset(self):
|
||||
queryset = PaperlessTask.objects.all().order_by("-date_created")
|
||||
is_v9 = self.request.version and int(self.request.version) < 10
|
||||
if self.request.user.is_staff:
|
||||
queryset = PaperlessTask.objects.all()
|
||||
else:
|
||||
# Own tasks + unowned (system/scheduled) tasks. Tasks owned by other
|
||||
# users are never visible to non-staff regardless of API version.
|
||||
queryset = PaperlessTask.objects.filter(
|
||||
Q(owner=self.request.user) | Q(owner__isnull=True),
|
||||
)
|
||||
# v9 backwards compat: map old query params to new field names
|
||||
if is_v9:
|
||||
task_name = self.request.query_params.get("task_name")
|
||||
if task_name is not None:
|
||||
mapped = self._V9_TASK_NAME_TO_TYPE.get(task_name, task_name)
|
||||
queryset = queryset.filter(task_type=mapped)
|
||||
task_type_old = self.request.query_params.get("type")
|
||||
if task_type_old is not None:
|
||||
sources = self._V9_TYPE_TO_TRIGGER_SOURCES.get(task_type_old)
|
||||
if sources:
|
||||
queryset = queryset.filter(trigger_source__in=sources)
|
||||
# v10+: direct task_id param for backwards compat
|
||||
task_id = self.request.query_params.get("task_id")
|
||||
if task_id is not None:
|
||||
queryset = PaperlessTask.objects.filter(task_id=task_id)
|
||||
queryset = queryset.filter(task_id=task_id)
|
||||
return queryset
|
||||
|
||||
@action(
|
||||
@@ -3808,33 +3876,86 @@ class TasksViewSet(ReadOnlyModelViewSet[PaperlessTask]):
|
||||
serializer = AcknowledgeTasksViewSerializer(data=request.data)
|
||||
serializer.is_valid(raise_exception=True)
|
||||
task_ids = serializer.validated_data.get("tasks")
|
||||
tasks = self.get_queryset().filter(id__in=task_ids)
|
||||
count = tasks.update(acknowledged=True)
|
||||
return Response({"result": count})
|
||||
|
||||
@action(methods=["get"], detail=False)
|
||||
def summary(self, request):
|
||||
"""Aggregated task statistics per task_type over the last N days (default 30)."""
|
||||
try:
|
||||
tasks = PaperlessTask.objects.filter(id__in=task_ids)
|
||||
if request.user is not None and not request.user.is_superuser:
|
||||
tasks = tasks.filter(owner=request.user) | tasks.filter(owner=None)
|
||||
result = tasks.update(
|
||||
acknowledged=True,
|
||||
days = max(1, int(request.query_params.get("days", 30)))
|
||||
except (TypeError, ValueError):
|
||||
return Response(
|
||||
{"days": "Must be a positive integer."},
|
||||
status=status.HTTP_400_BAD_REQUEST,
|
||||
)
|
||||
return Response({"result": result})
|
||||
except Exception:
|
||||
return HttpResponseBadRequest()
|
||||
cutoff = timezone.now() - timedelta(days=days)
|
||||
queryset = self.get_queryset().filter(date_created__gte=cutoff)
|
||||
|
||||
data = queryset.values("task_type").annotate(
|
||||
total_count=Count("id"),
|
||||
pending_count=Count("id", filter=Q(status=PaperlessTask.Status.PENDING)),
|
||||
success_count=Count("id", filter=Q(status=PaperlessTask.Status.SUCCESS)),
|
||||
failure_count=Count("id", filter=Q(status=PaperlessTask.Status.FAILURE)),
|
||||
avg_duration_seconds=Avg(
|
||||
"duration_seconds",
|
||||
filter=Q(duration_seconds__isnull=False),
|
||||
),
|
||||
avg_wait_time_seconds=Avg(
|
||||
"wait_time_seconds",
|
||||
filter=Q(wait_time_seconds__isnull=False),
|
||||
),
|
||||
last_run=Max("date_created"),
|
||||
last_success=Max(
|
||||
"date_done",
|
||||
filter=Q(status=PaperlessTask.Status.SUCCESS),
|
||||
),
|
||||
last_failure=Max(
|
||||
"date_done",
|
||||
filter=Q(status=PaperlessTask.Status.FAILURE),
|
||||
),
|
||||
)
|
||||
serializer = TaskSummarySerializer(data, many=True)
|
||||
return Response(serializer.data)
|
||||
|
||||
@action(methods=["get"], detail=False)
|
||||
def active(self, request):
|
||||
"""Currently pending and running tasks (capped at 50)."""
|
||||
queryset = (
|
||||
self.get_queryset()
|
||||
.filter(
|
||||
status__in=[PaperlessTask.Status.PENDING, PaperlessTask.Status.STARTED],
|
||||
)
|
||||
.order_by("-date_created")[:50]
|
||||
)
|
||||
serializer = self.get_serializer(queryset, many=True)
|
||||
return Response(serializer.data)
|
||||
|
||||
@action(methods=["post"], detail=False)
|
||||
def run(self, request):
|
||||
serializer = RunTaskViewSerializer(data=request.data)
|
||||
serializer.is_valid(raise_exception=True)
|
||||
task_name = serializer.validated_data.get("task_name")
|
||||
|
||||
"""Manually dispatch a background task. Superuser only."""
|
||||
if not request.user.is_superuser:
|
||||
return HttpResponseForbidden("Insufficient permissions")
|
||||
serializer = RunTaskSerializer(data=request.data)
|
||||
serializer.is_valid(raise_exception=True)
|
||||
task_type = serializer.validated_data.get("task_type")
|
||||
|
||||
if task_type not in self._RUNNABLE_TASKS:
|
||||
return Response(
|
||||
{"error": f"Task type '{task_type}' cannot be manually triggered"},
|
||||
status=status.HTTP_400_BAD_REQUEST,
|
||||
)
|
||||
|
||||
try:
|
||||
task_func, task_args = self.TASK_AND_ARGS_BY_NAME[task_name]
|
||||
result = task_func(**task_args)
|
||||
return Response({"result": result})
|
||||
task_func, task_kwargs = self._RUNNABLE_TASKS[task_type]
|
||||
async_result = task_func.apply_async(
|
||||
kwargs=task_kwargs,
|
||||
headers={"trigger_source": PaperlessTask.TriggerSource.MANUAL},
|
||||
)
|
||||
return Response({"task_id": async_result.id})
|
||||
except Exception as e:
|
||||
logger.warning(f"An error occurred running task: {e!s}")
|
||||
logger.warning(f"Error running task: {e!s}")
|
||||
return HttpResponseServerError(
|
||||
"Error running task, check logs for more detail.",
|
||||
)
|
||||
@@ -4466,12 +4587,8 @@ class SystemStatusView(PassUserMixin):
|
||||
|
||||
last_trained_task = (
|
||||
PaperlessTask.objects.filter(
|
||||
task_name=PaperlessTask.TaskName.TRAIN_CLASSIFIER,
|
||||
status__in=[
|
||||
states.SUCCESS,
|
||||
states.FAILURE,
|
||||
states.REVOKED,
|
||||
], # ignore running tasks
|
||||
task_type=PaperlessTask.TaskType.TRAIN_CLASSIFIER,
|
||||
status__in=PaperlessTask.COMPLETE_STATUSES, # ignore running tasks
|
||||
)
|
||||
.order_by("-date_done")
|
||||
.first()
|
||||
@@ -4481,21 +4598,17 @@ class SystemStatusView(PassUserMixin):
|
||||
if last_trained_task is None:
|
||||
classifier_status = "WARNING"
|
||||
classifier_error = "No classifier training tasks found"
|
||||
elif last_trained_task and last_trained_task.status != states.SUCCESS:
|
||||
elif last_trained_task.status != PaperlessTask.Status.SUCCESS:
|
||||
classifier_status = "ERROR"
|
||||
classifier_error = last_trained_task.result
|
||||
classifier_error = last_trained_task.result_message
|
||||
classifier_last_trained = (
|
||||
last_trained_task.date_done if last_trained_task else None
|
||||
)
|
||||
|
||||
last_sanity_check = (
|
||||
PaperlessTask.objects.filter(
|
||||
task_name=PaperlessTask.TaskName.CHECK_SANITY,
|
||||
status__in=[
|
||||
states.SUCCESS,
|
||||
states.FAILURE,
|
||||
states.REVOKED,
|
||||
], # ignore running tasks
|
||||
task_type=PaperlessTask.TaskType.SANITY_CHECK,
|
||||
status__in=PaperlessTask.COMPLETE_STATUSES, # ignore running tasks
|
||||
)
|
||||
.order_by("-date_done")
|
||||
.first()
|
||||
@@ -4505,9 +4618,9 @@ class SystemStatusView(PassUserMixin):
|
||||
if last_sanity_check is None:
|
||||
sanity_check_status = "WARNING"
|
||||
sanity_check_error = "No sanity check tasks found"
|
||||
elif last_sanity_check and last_sanity_check.status != states.SUCCESS:
|
||||
elif last_sanity_check.status != PaperlessTask.Status.SUCCESS:
|
||||
sanity_check_status = "ERROR"
|
||||
sanity_check_error = last_sanity_check.result
|
||||
sanity_check_error = last_sanity_check.result_message
|
||||
sanity_check_last_run = (
|
||||
last_sanity_check.date_done if last_sanity_check else None
|
||||
)
|
||||
@@ -4520,7 +4633,7 @@ class SystemStatusView(PassUserMixin):
|
||||
else:
|
||||
last_llmindex_update = (
|
||||
PaperlessTask.objects.filter(
|
||||
task_name=PaperlessTask.TaskName.LLMINDEX_UPDATE,
|
||||
task_type=PaperlessTask.TaskType.LLM_INDEX,
|
||||
)
|
||||
.order_by("-date_done")
|
||||
.first()
|
||||
@@ -4530,9 +4643,9 @@ class SystemStatusView(PassUserMixin):
|
||||
if last_llmindex_update is None:
|
||||
llmindex_status = "WARNING"
|
||||
llmindex_error = "No LLM index update tasks found"
|
||||
elif last_llmindex_update and last_llmindex_update.status == states.FAILURE:
|
||||
elif last_llmindex_update.status == PaperlessTask.Status.FAILURE:
|
||||
llmindex_status = "ERROR"
|
||||
llmindex_error = last_llmindex_update.result
|
||||
llmindex_error = last_llmindex_update.result_message
|
||||
llmindex_last_modified = (
|
||||
last_llmindex_update.date_done if last_llmindex_update else None
|
||||
)
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -133,7 +133,6 @@ INSTALLED_APPS = [
|
||||
"rest_framework",
|
||||
"rest_framework.authtoken",
|
||||
"django_filters",
|
||||
"django_celery_results",
|
||||
"guardian",
|
||||
"allauth",
|
||||
"allauth.account",
|
||||
@@ -669,8 +668,6 @@ CELERY_BROKER_TRANSPORT_OPTIONS = {
|
||||
CELERY_TASK_TRACK_STARTED = True
|
||||
CELERY_TASK_TIME_LIMIT: Final[int] = get_int_from_env("PAPERLESS_WORKER_TIMEOUT", 1800)
|
||||
|
||||
CELERY_RESULT_EXTENDED = True
|
||||
CELERY_RESULT_BACKEND = "django-db"
|
||||
CELERY_CACHE_BACKEND = "default"
|
||||
|
||||
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#task-serializer
|
||||
|
||||
@@ -181,7 +181,11 @@ def parse_beat_schedule() -> dict:
|
||||
schedule[task["name"]] = {
|
||||
"task": task["task"],
|
||||
"schedule": crontab(minute, hour, day_week, day_month, month),
|
||||
"options": task["options"],
|
||||
"options": {
|
||||
**task["options"],
|
||||
# PaperlessTask.TriggerSource.SCHEDULED -- models can't be imported here
|
||||
"headers": {"trigger_source": "scheduled"},
|
||||
},
|
||||
}
|
||||
|
||||
return schedule
|
||||
|
||||
@@ -186,42 +186,66 @@ def make_expected_schedule(
|
||||
"Check all e-mail accounts": {
|
||||
"task": "paperless_mail.tasks.process_mail_accounts",
|
||||
"schedule": crontab(minute="*/10"),
|
||||
"options": {"expires": mail_expire},
|
||||
"options": {
|
||||
"expires": mail_expire,
|
||||
"headers": {"trigger_source": "scheduled"},
|
||||
},
|
||||
},
|
||||
"Train the classifier": {
|
||||
"task": "documents.tasks.train_classifier",
|
||||
"schedule": crontab(minute="5", hour="*/1"),
|
||||
"options": {"expires": classifier_expire},
|
||||
"options": {
|
||||
"expires": classifier_expire,
|
||||
"headers": {"trigger_source": "scheduled"},
|
||||
},
|
||||
},
|
||||
"Optimize the index": {
|
||||
"task": "documents.tasks.index_optimize",
|
||||
"schedule": crontab(minute=0, hour=0),
|
||||
"options": {"expires": index_expire},
|
||||
"options": {
|
||||
"expires": index_expire,
|
||||
"headers": {"trigger_source": "scheduled"},
|
||||
},
|
||||
},
|
||||
"Perform sanity check": {
|
||||
"task": "documents.tasks.sanity_check",
|
||||
"schedule": crontab(minute=30, hour=0, day_of_week="sun"),
|
||||
"options": {"expires": sanity_expire},
|
||||
"options": {
|
||||
"expires": sanity_expire,
|
||||
"headers": {"trigger_source": "scheduled"},
|
||||
},
|
||||
},
|
||||
"Empty trash": {
|
||||
"task": "documents.tasks.empty_trash",
|
||||
"schedule": crontab(minute=0, hour="1"),
|
||||
"options": {"expires": empty_trash_expire},
|
||||
"options": {
|
||||
"expires": empty_trash_expire,
|
||||
"headers": {"trigger_source": "scheduled"},
|
||||
},
|
||||
},
|
||||
"Check and run scheduled workflows": {
|
||||
"task": "documents.tasks.check_scheduled_workflows",
|
||||
"schedule": crontab(minute="5", hour="*/1"),
|
||||
"options": {"expires": workflow_expire},
|
||||
"options": {
|
||||
"expires": workflow_expire,
|
||||
"headers": {"trigger_source": "scheduled"},
|
||||
},
|
||||
},
|
||||
"Rebuild LLM index": {
|
||||
"task": "documents.tasks.llmindex_index",
|
||||
"schedule": crontab(minute="10", hour="2"),
|
||||
"options": {"expires": llm_index_expire},
|
||||
"options": {
|
||||
"expires": llm_index_expire,
|
||||
"headers": {"trigger_source": "scheduled"},
|
||||
},
|
||||
},
|
||||
"Cleanup expired share link bundles": {
|
||||
"task": "documents.tasks.cleanup_expired_share_link_bundles",
|
||||
"schedule": crontab(minute=0, hour="2"),
|
||||
"options": {"expires": share_link_cleanup_expire},
|
||||
"options": {
|
||||
"expires": share_link_cleanup_expire,
|
||||
"headers": {"trigger_source": "scheduled"},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
@@ -284,6 +308,16 @@ class TestParseBeatSchedule:
|
||||
schedule = parse_beat_schedule()
|
||||
assert schedule == expected
|
||||
|
||||
def test_parse_beat_schedule_all_entries_have_trigger_source_header(self) -> None:
|
||||
"""Every beat entry must carry trigger_source=scheduled so the task signal
|
||||
handler can identify scheduler-originated tasks."""
|
||||
schedule = parse_beat_schedule()
|
||||
for name, entry in schedule.items():
|
||||
headers = entry.get("options", {}).get("headers", {})
|
||||
assert headers.get("trigger_source") == "scheduled", (
|
||||
f"Beat entry '{name}' is missing trigger_source header"
|
||||
)
|
||||
|
||||
|
||||
class TestParseDbSettings:
|
||||
"""Test suite for parse_db_settings function."""
|
||||
|
||||
@@ -38,6 +38,7 @@ from rest_framework.response import Response
|
||||
from rest_framework.throttling import ScopedRateThrottle
|
||||
from rest_framework.viewsets import ModelViewSet
|
||||
|
||||
from documents.models import PaperlessTask
|
||||
from documents.permissions import PaperlessObjectPermissions
|
||||
from documents.tasks import llmindex_index
|
||||
from paperless.filters import GroupFilterSet
|
||||
@@ -427,10 +428,9 @@ class ApplicationConfigurationViewSet(ModelViewSet[ApplicationConfiguration]):
|
||||
and not vector_store_file_exists()
|
||||
):
|
||||
# AI index was just enabled and vector store file does not exist
|
||||
llmindex_index.delay(
|
||||
rebuild=True,
|
||||
scheduled=False,
|
||||
auto=True,
|
||||
llmindex_index.apply_async(
|
||||
kwargs={"rebuild": True},
|
||||
headers={"trigger_source": PaperlessTask.TriggerSource.SYSTEM},
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -4,7 +4,6 @@ from datetime import timedelta
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from celery import states
|
||||
from django.conf import settings
|
||||
from django.utils import timezone
|
||||
|
||||
@@ -28,17 +27,20 @@ def queue_llm_index_update_if_needed(*, rebuild: bool, reason: str) -> bool:
|
||||
from documents.tasks import llmindex_index
|
||||
|
||||
has_running = PaperlessTask.objects.filter(
|
||||
task_name=PaperlessTask.TaskName.LLMINDEX_UPDATE,
|
||||
status__in=[states.PENDING, states.STARTED],
|
||||
task_type=PaperlessTask.TaskType.LLM_INDEX,
|
||||
status__in=[PaperlessTask.Status.PENDING, PaperlessTask.Status.STARTED],
|
||||
).exists()
|
||||
has_recent = PaperlessTask.objects.filter(
|
||||
task_name=PaperlessTask.TaskName.LLMINDEX_UPDATE,
|
||||
task_type=PaperlessTask.TaskType.LLM_INDEX,
|
||||
date_created__gte=(timezone.now() - timedelta(minutes=5)),
|
||||
).exists()
|
||||
if has_running or has_recent:
|
||||
return False
|
||||
|
||||
llmindex_index.delay(rebuild=rebuild, scheduled=False, auto=True)
|
||||
llmindex_index.apply_async(
|
||||
kwargs={"rebuild": rebuild},
|
||||
headers={"trigger_source": PaperlessTask.TriggerSource.SYSTEM},
|
||||
)
|
||||
logger.warning(
|
||||
"Queued LLM index update%s: %s",
|
||||
" (rebuild)" if rebuild else "",
|
||||
|
||||
@@ -3,13 +3,13 @@ from unittest.mock import MagicMock
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
from celery import states
|
||||
from django.test import override_settings
|
||||
from django.utils import timezone
|
||||
from llama_index.core.base.embeddings.base import BaseEmbedding
|
||||
|
||||
from documents.models import Document
|
||||
from documents.models import PaperlessTask
|
||||
from documents.tests.factories import PaperlessTaskFactory
|
||||
from paperless_ai import indexing
|
||||
|
||||
|
||||
@@ -292,13 +292,15 @@ def test_queue_llm_index_update_if_needed_enqueues_when_idle_or_skips_recent() -
|
||||
)
|
||||
|
||||
assert result is True
|
||||
mock_task.delay.assert_called_once_with(rebuild=True, scheduled=False, auto=True)
|
||||
mock_task.apply_async.assert_called_once_with(
|
||||
kwargs={"rebuild": True},
|
||||
headers={"trigger_source": "system"},
|
||||
)
|
||||
|
||||
PaperlessTask.objects.create(
|
||||
task_id="task-1",
|
||||
task_name=PaperlessTask.TaskName.LLMINDEX_UPDATE,
|
||||
status=states.STARTED,
|
||||
date_created=timezone.now(),
|
||||
PaperlessTaskFactory(
|
||||
task_type=PaperlessTask.TaskType.LLM_INDEX,
|
||||
trigger_source=PaperlessTask.TriggerSource.SYSTEM,
|
||||
status=PaperlessTask.Status.STARTED,
|
||||
)
|
||||
|
||||
# Existing running task
|
||||
@@ -309,7 +311,7 @@ def test_queue_llm_index_update_if_needed_enqueues_when_idle_or_skips_recent() -
|
||||
)
|
||||
|
||||
assert result is False
|
||||
mock_task.delay.assert_not_called()
|
||||
mock_task.apply_async.assert_not_called()
|
||||
|
||||
|
||||
@override_settings(
|
||||
|
||||
15
uv.lock
generated
15
uv.lock
generated
@@ -935,19 +935,6 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/12/bf/af8ad2aa5a402f278b444ca70729fb12ee96ddb89c19c32a2d7c5189358f/django_cachalot-2.9.0-py3-none-any.whl", hash = "sha256:b80ac4930613a7849988ea772a53598d262a15eaf55e5ec8c78accae7fdd99ff", size = 57814, upload-time = "2026-01-28T05:23:28.741Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "django-celery-results"
|
||||
version = "2.6.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "celery", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
{ name = "django", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/a6/b5/9966c28e31014c228305e09d48b19b35522a8f941fe5af5f81f40dc8fa80/django_celery_results-2.6.0.tar.gz", hash = "sha256:9abcd836ae6b61063779244d8887a88fe80bbfaba143df36d3cb07034671277c", size = 83985, upload-time = "2025-04-10T08:23:52.677Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/2c/da/70f0f3c5364735344c4bc89e53413bcaae95b4fc1de4e98a7a3b9fb70c88/django_celery_results-2.6.0-py3-none-any.whl", hash = "sha256:b9ccdca2695b98c7cbbb8dea742311ba9a92773d71d7b4944a676e69a7df1c73", size = 38351, upload-time = "2025-04-10T08:23:49.965Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "django-compression-middleware"
|
||||
version = "0.5.0"
|
||||
@@ -2869,7 +2856,6 @@ dependencies = [
|
||||
{ name = "django-allauth", extra = ["mfa", "socialaccount"], marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
{ name = "django-auditlog", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
{ name = "django-cachalot", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
{ name = "django-celery-results", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
{ name = "django-compression-middleware", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
{ name = "django-cors-headers", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
{ name = "django-extensions", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
@@ -3018,7 +3004,6 @@ requires-dist = [
|
||||
{ name = "django-allauth", extras = ["mfa", "socialaccount"], specifier = "~=65.15.0" },
|
||||
{ name = "django-auditlog", specifier = "~=3.4.1" },
|
||||
{ name = "django-cachalot", specifier = "~=2.9.0" },
|
||||
{ name = "django-celery-results", specifier = "~=2.6.0" },
|
||||
{ name = "django-compression-middleware", specifier = "~=0.5.0" },
|
||||
{ name = "django-cors-headers", specifier = "~=4.9.0" },
|
||||
{ name = "django-extensions", specifier = "~=4.1" },
|
||||
|
||||
Reference in New Issue
Block a user