Compare commits

..

2 Commits

Author SHA1 Message Date
Trenton H
c92abb8bb7 Handles the linter updates now 2026-02-27 15:06:10 -08:00
Trenton H
d8c01c78e6 Working to improve the retagger output 2026-02-27 14:57:10 -08:00
20 changed files with 970 additions and 1133 deletions

View File

@@ -440,6 +440,9 @@ src/documents/permissions.py:0: error: Item "list[str]" of "Any | list[str] | Qu
src/documents/permissions.py:0: error: Item "list[str]" of "Any | list[str] | QuerySet[User, User]" has no attribute "exists" [union-attr]
src/documents/permissions.py:0: error: Missing type parameters for generic type "QuerySet" [type-arg]
src/documents/permissions.py:0: error: Missing type parameters for generic type "dict" [type-arg]
src/documents/plugins/helpers.py:0: error: "Collection[str]" has no attribute "update" [attr-defined]
src/documents/plugins/helpers.py:0: error: Argument 1 to "send" of "BaseStatusManager" has incompatible type "dict[str, Collection[str]]"; expected "dict[str, str | int | None]" [arg-type]
src/documents/plugins/helpers.py:0: error: Argument 1 to "send" of "BaseStatusManager" has incompatible type "dict[str, Collection[str]]"; expected "dict[str, str | int | None]" [arg-type]
src/documents/plugins/helpers.py:0: error: Function is missing a type annotation [no-untyped-def]
src/documents/plugins/helpers.py:0: error: Function is missing a type annotation [no-untyped-def]
src/documents/plugins/helpers.py:0: error: Skipping analyzing "channels_redis.pubsub": module is installed, but missing library stubs or py.typed marker [import-untyped]
@@ -664,6 +667,7 @@ src/documents/signals/handlers.py:0: error: Argument 3 to "validate_move" has in
src/documents/signals/handlers.py:0: error: Argument 5 to "_suggestion_printer" has incompatible type "Any | None"; expected "MatchingModel" [arg-type]
src/documents/signals/handlers.py:0: error: Argument 5 to "_suggestion_printer" has incompatible type "Any | None"; expected "MatchingModel" [arg-type]
src/documents/signals/handlers.py:0: error: Argument 5 to "_suggestion_printer" has incompatible type "Any | None"; expected "MatchingModel" [arg-type]
src/documents/signals/handlers.py:0: error: Function is missing a return type annotation [no-untyped-def]
src/documents/signals/handlers.py:0: error: Function is missing a type annotation [no-untyped-def]
src/documents/signals/handlers.py:0: error: Function is missing a type annotation [no-untyped-def]
src/documents/signals/handlers.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def]
@@ -1924,7 +1928,6 @@ src/paperless/tests/test_websockets.py:0: error: Item "None" of "BaseChannelLaye
src/paperless/tests/test_websockets.py:0: error: Item "None" of "BaseChannelLayer | None" has no attribute "group_send" [union-attr]
src/paperless/tests/test_websockets.py:0: error: Item "None" of "BaseChannelLayer | None" has no attribute "group_send" [union-attr]
src/paperless/tests/test_websockets.py:0: error: Item "None" of "BaseChannelLayer | None" has no attribute "group_send" [union-attr]
src/paperless/tests/test_websockets.py:0: error: Item "None" of "BaseChannelLayer | None" has no attribute "group_send" [union-attr]
src/paperless/tests/test_websockets.py:0: error: TypedDict "_WebsocketTestScope" has no key "user" [typeddict-item]
src/paperless/tests/test_websockets.py:0: error: TypedDict "_WebsocketTestScope" has no key "user" [typeddict-item]
src/paperless/tests/test_websockets.py:0: error: TypedDict "_WebsocketTestScope" has no key "user" [typeddict-item]

View File

@@ -65,7 +65,6 @@ import { TagService } from 'src/app/services/rest/tag.service'
import { UserService } from 'src/app/services/rest/user.service'
import { SettingsService } from 'src/app/services/settings.service'
import { ToastService } from 'src/app/services/toast.service'
import { WebsocketStatusService } from 'src/app/services/websocket-status.service'
import { environment } from 'src/environments/environment'
import { ConfirmDialogComponent } from '../common/confirm-dialog/confirm-dialog.component'
import { PasswordRemovalConfirmDialogComponent } from '../common/confirm-dialog/password-removal-confirm-dialog/password-removal-confirm-dialog.component'
@@ -84,9 +83,9 @@ const doc: Document = {
storage_path: 31,
tags: [41, 42, 43],
content: 'text content',
added: new Date('May 4, 2014 03:24:00').toISOString(),
created: new Date('May 4, 2014 03:24:00').toISOString(),
modified: new Date('May 4, 2014 03:24:00').toISOString(),
added: new Date('May 4, 2014 03:24:00'),
created: new Date('May 4, 2014 03:24:00'),
modified: new Date('May 4, 2014 03:24:00'),
archive_serial_number: null,
original_file_name: 'file.pdf',
owner: null,
@@ -328,29 +327,6 @@ describe('DocumentDetailComponent', () => {
expect(component.activeNavID).toEqual(component.DocumentDetailNavIDs.Notes)
})
it('should switch from preview to details when pdf preview enters the DOM', fakeAsync(() => {
component.nav = {
activeId: component.DocumentDetailNavIDs.Preview,
select: jest.fn(),
} as any
;(component as any).pdfPreview = {
nativeElement: { offsetParent: {} },
}
tick()
expect(component.nav.select).toHaveBeenCalledWith(
component.DocumentDetailNavIDs.Details
)
}))
it('should forward title key up value to titleSubject', () => {
const subjectSpy = jest.spyOn(component.titleSubject, 'next')
component.titleKeyUp({ target: { value: 'Updated title' } })
expect(subjectSpy).toHaveBeenCalledWith('Updated title')
})
it('should change url on tab switch', () => {
initNormally()
const navigateSpy = jest.spyOn(router, 'navigate')
@@ -548,7 +524,7 @@ describe('DocumentDetailComponent', () => {
jest.spyOn(documentService, 'get').mockReturnValue(
of({
...doc,
modified: '2024-01-02T00:00:00Z',
modified: new Date('2024-01-02T00:00:00Z'),
duplicate_documents: updatedDuplicates,
})
)
@@ -1410,21 +1386,17 @@ describe('DocumentDetailComponent', () => {
expect(errorSpy).toHaveBeenCalled()
})
it('should show incoming update modal when open local draft is older than backend on init', () => {
it('should warn when open document does not match doc retrieved from backend on init', () => {
let openModal: NgbModalRef
modalService.activeInstances.subscribe((modals) => (openModal = modals[0]))
const modalSpy = jest.spyOn(modalService, 'open')
const openDoc = Object.assign({}, doc, {
__changedFields: ['title'],
})
const openDoc = Object.assign({}, doc)
// simulate a document being modified elsewhere and db updated
const remoteDoc = Object.assign({}, doc, {
modified: new Date(new Date(doc.modified).getTime() + 1000).toISOString(),
})
doc.modified = new Date()
jest
.spyOn(activatedRoute, 'paramMap', 'get')
.mockReturnValue(of(convertToParamMap({ id: 3, section: 'details' })))
jest.spyOn(documentService, 'get').mockReturnValueOnce(of(remoteDoc))
jest.spyOn(documentService, 'get').mockReturnValueOnce(of(doc))
jest.spyOn(openDocumentsService, 'getOpenDocument').mockReturnValue(openDoc)
jest.spyOn(customFieldsService, 'listAll').mockReturnValue(
of({
@@ -1434,185 +1406,11 @@ describe('DocumentDetailComponent', () => {
})
)
fixture.detectChanges() // calls ngOnInit
expect(modalSpy).toHaveBeenCalledWith(ConfirmDialogComponent, {
backdrop: 'static',
})
expect(modalSpy).toHaveBeenCalledWith(ConfirmDialogComponent)
const closeSpy = jest.spyOn(openModal, 'close')
const confirmDialog = openModal.componentInstance as ConfirmDialogComponent
expect(confirmDialog.messageBold).toContain('Document was updated at')
})
it('should react to websocket document updated notifications', () => {
initNormally()
const updateMessage = {
document_id: component.documentId,
modified: '2026-02-17T00:00:00Z',
owner_id: 1,
}
const handleSpy = jest
.spyOn(component as any, 'handleIncomingDocumentUpdated')
.mockImplementation(() => {})
const websocketStatusService = TestBed.inject(WebsocketStatusService)
websocketStatusService.handleDocumentUpdated(updateMessage)
expect(handleSpy).toHaveBeenCalledWith(updateMessage)
})
it('should queue incoming update while network is active and flush after', () => {
initNormally()
const loadSpy = jest.spyOn(component as any, 'loadDocument')
const toastSpy = jest.spyOn(toastService, 'showInfo')
component.networkActive = true
;(component as any).handleIncomingDocumentUpdated({
document_id: component.documentId,
modified: '2026-02-17T00:00:00Z',
})
expect(loadSpy).not.toHaveBeenCalled()
component.networkActive = false
;(component as any).flushPendingIncomingUpdate()
expect(loadSpy).toHaveBeenCalledWith(component.documentId, true)
expect(toastSpy).toHaveBeenCalledWith(
'Document reloaded with latest changes.'
)
})
it('should ignore queued incoming update matching local save modified', () => {
initNormally()
const loadSpy = jest.spyOn(component as any, 'loadDocument')
const toastSpy = jest.spyOn(toastService, 'showInfo')
component.networkActive = true
;(component as any).lastLocalSaveModified = '2026-02-17T00:00:00+00:00'
;(component as any).handleIncomingDocumentUpdated({
document_id: component.documentId,
modified: '2026-02-17T00:00:00+00:00',
})
component.networkActive = false
;(component as any).flushPendingIncomingUpdate()
expect(loadSpy).not.toHaveBeenCalled()
expect(toastSpy).not.toHaveBeenCalled()
})
it('should clear pdf source if preview URL is empty', () => {
component.pdfSource = { url: '/preview', password: 'secret' } as any
component.previewUrl = null
;(component as any).updatePdfSource()
expect(component.pdfSource).toEqual({ url: null, password: undefined })
})
it('should close incoming update modal if one is open', () => {
const modalRef = { close: jest.fn() } as unknown as NgbModalRef
;(component as any).incomingUpdateModal = modalRef
;(component as any).closeIncomingUpdateModal()
expect(modalRef.close).toHaveBeenCalled()
expect((component as any).incomingUpdateModal).toBeNull()
})
it('should reload remote version when incoming update modal is confirmed', async () => {
let openModal: NgbModalRef
modalService.activeInstances.subscribe((modals) => (openModal = modals[0]))
const reloadSpy = jest
.spyOn(component as any, 'reloadRemoteVersion')
.mockImplementation(() => {})
;(component as any).showIncomingUpdateModal('2026-02-17T00:00:00Z')
const dialog = openModal.componentInstance as ConfirmDialogComponent
dialog.confirmClicked.next()
await openModal.result
expect(dialog.buttonsEnabled).toBe(false)
expect(reloadSpy).toHaveBeenCalled()
expect((component as any).incomingUpdateModal).toBeNull()
})
it('should overwrite open document state when loading remote version with force', () => {
const openDoc = Object.assign({}, doc, {
title: 'Locally edited title',
__changedFields: ['title'],
})
const remoteDoc = Object.assign({}, doc, {
title: 'Remote title',
modified: '2026-02-17T00:00:00Z',
})
jest.spyOn(documentService, 'get').mockReturnValue(of(remoteDoc))
jest.spyOn(documentService, 'getMetadata').mockReturnValue(
of({
has_archive_version: false,
original_mime_type: 'application/pdf',
})
)
jest.spyOn(documentService, 'getSuggestions').mockReturnValue(
of({
suggested_tags: [],
suggested_document_types: [],
suggested_correspondents: [],
})
)
jest.spyOn(openDocumentsService, 'getOpenDocument').mockReturnValue(openDoc)
const setDirtySpy = jest.spyOn(openDocumentsService, 'setDirty')
const saveSpy = jest.spyOn(openDocumentsService, 'save')
;(component as any).loadDocument(doc.id, true)
expect(openDoc.title).toEqual('Remote title')
expect(openDoc.__changedFields).toEqual([])
expect(setDirtySpy).toHaveBeenCalledWith(openDoc, false)
expect(saveSpy).toHaveBeenCalled()
})
it('should ignore incoming update for a different document id', () => {
initNormally()
const loadSpy = jest.spyOn(component as any, 'loadDocument')
;(component as any).handleIncomingDocumentUpdated({
document_id: component.documentId + 1,
modified: '2026-02-17T00:00:00Z',
})
expect(loadSpy).not.toHaveBeenCalled()
})
it('should show incoming update modal when local document has unsaved edits', () => {
initNormally()
jest.spyOn(openDocumentsService, 'isDirty').mockReturnValue(true)
const modalSpy = jest
.spyOn(component as any, 'showIncomingUpdateModal')
.mockImplementation(() => {})
;(component as any).handleIncomingDocumentUpdated({
document_id: component.documentId,
modified: '2026-02-17T00:00:00Z',
})
expect(modalSpy).toHaveBeenCalledWith('2026-02-17T00:00:00Z')
})
it('should reload current document and show toast when reloading remote version', () => {
component.documentId = doc.id
const closeModalSpy = jest
.spyOn(component as any, 'closeIncomingUpdateModal')
.mockImplementation(() => {})
const loadSpy = jest
.spyOn(component as any, 'loadDocument')
.mockImplementation(() => {})
const notifySpy = jest.spyOn(component.docChangeNotifier, 'next')
const toastSpy = jest.spyOn(toastService, 'showInfo')
;(component as any).reloadRemoteVersion()
expect(closeModalSpy).toHaveBeenCalled()
expect(notifySpy).toHaveBeenCalledWith(doc.id)
expect(loadSpy).toHaveBeenCalledWith(doc.id, true)
expect(toastSpy).toHaveBeenCalledWith('Document reloaded.')
confirmDialog.confirmClicked.next(confirmDialog)
expect(closeSpy).toHaveBeenCalled()
})
it('should change preview element by render type', () => {
@@ -1923,14 +1721,6 @@ describe('DocumentDetailComponent', () => {
expect(component.createDisabled(DataType.Tag)).toBeFalsy()
})
it('should expose add permission via userCanAdd getter', () => {
currentUserCan = true
expect(component.userCanAdd).toBeTruthy()
currentUserCan = false
expect(component.userCanAdd).toBeFalsy()
})
it('should call tryRenderTiff when no archive and file is tiff', () => {
initNormally()
const tiffRenderSpy = jest.spyOn(

View File

@@ -13,7 +13,6 @@ import {
NgbDateStruct,
NgbDropdownModule,
NgbModal,
NgbModalRef,
NgbNav,
NgbNavChangeEvent,
NgbNavModule,
@@ -81,7 +80,6 @@ import { TagService } from 'src/app/services/rest/tag.service'
import { UserService } from 'src/app/services/rest/user.service'
import { SettingsService } from 'src/app/services/settings.service'
import { ToastService } from 'src/app/services/toast.service'
import { WebsocketStatusService } from 'src/app/services/websocket-status.service'
import { getFilenameFromContentDisposition } from 'src/app/utils/http'
import { ISODateAdapter } from 'src/app/utils/ngb-iso-date-adapter'
import * as UTIF from 'utif'
@@ -145,11 +143,6 @@ enum ContentRenderType {
TIFF = 'tiff',
}
interface IncomingDocumentUpdate {
document_id: number
modified: string
}
@Component({
selector: 'pngx-document-detail',
templateUrl: './document-detail.component.html',
@@ -215,7 +208,6 @@ export class DocumentDetailComponent
private componentRouterService = inject(ComponentRouterService)
private deviceDetectorService = inject(DeviceDetectorService)
private savedViewService = inject(SavedViewService)
private readonly websocketStatusService = inject(WebsocketStatusService)
@ViewChild('inputTitle')
titleInput: TextComponent
@@ -275,9 +267,6 @@ export class DocumentDetailComponent
isDirty$: Observable<boolean>
unsubscribeNotifier: Subject<any> = new Subject()
docChangeNotifier: Subject<any> = new Subject()
private incomingUpdateModal: NgbModalRef
private pendingIncomingUpdate: IncomingDocumentUpdate
private lastLocalSaveModified: string | null = null
requiresPassword: boolean = false
password: string
@@ -486,12 +475,9 @@ export class DocumentDetailComponent
)
}
private loadDocument(documentId: number, forceRemote: boolean = false): void {
private loadDocument(documentId: number): void {
let redirectedToRoot = false
this.closeIncomingUpdateModal()
this.pendingIncomingUpdate = null
this.selectedVersionId = documentId
this.lastLocalSaveModified = null
this.previewUrl = this.documentsService.getPreviewUrl(
this.selectedVersionId
)
@@ -559,25 +545,21 @@ export class DocumentDetailComponent
openDocument.duplicate_documents = doc.duplicate_documents
this.openDocumentService.save()
}
let useDoc = openDocument || doc
if (openDocument && forceRemote) {
Object.assign(openDocument, doc)
openDocument.__changedFields = []
this.openDocumentService.setDirty(openDocument, false)
this.openDocumentService.save()
useDoc = openDocument
} else if (openDocument) {
if (new Date(doc.modified) > new Date(openDocument.modified)) {
if (this.hasLocalEdits(openDocument)) {
this.showIncomingUpdateModal(doc.modified)
} else {
// No local edits to preserve, so keep the tab in sync automatically.
Object.assign(openDocument, doc)
openDocument.__changedFields = []
this.openDocumentService.setDirty(openDocument, false)
this.openDocumentService.save()
useDoc = openDocument
}
const useDoc = openDocument || doc
if (openDocument) {
if (
new Date(doc.modified) > new Date(openDocument.modified) &&
!this.modalService.hasOpenModals()
) {
const modal = this.modalService.open(ConfirmDialogComponent)
modal.componentInstance.title = $localize`Document changes detected`
modal.componentInstance.messageBold = $localize`The version of this document in your browser session appears older than the existing version.`
modal.componentInstance.message = $localize`Saving the document here may overwrite other changes that were made. To restore the existing version, discard your changes or close the document.`
modal.componentInstance.cancelBtnClass = 'visually-hidden'
modal.componentInstance.btnCaption = $localize`Ok`
modal.componentInstance.confirmClicked.subscribe(() =>
modal.close()
)
}
} else {
this.openDocumentService
@@ -608,98 +590,6 @@ export class DocumentDetailComponent
})
}
private hasLocalEdits(doc: Document): boolean {
return (
this.openDocumentService.isDirty(doc) || !!doc.__changedFields?.length
)
}
private showIncomingUpdateModal(modified: string): void {
if (this.incomingUpdateModal) return
const modal = this.modalService.open(ConfirmDialogComponent, {
backdrop: 'static',
})
this.incomingUpdateModal = modal
let formattedModified = null
const parsed = new Date(modified)
formattedModified = parsed.toLocaleString()
modal.componentInstance.title = $localize`Document was updated`
modal.componentInstance.messageBold = $localize`Document was updated at ${formattedModified}.`
modal.componentInstance.message = $localize`Reload to discard your local unsaved edits and load the latest remote version.`
modal.componentInstance.btnClass = 'btn-warning'
modal.componentInstance.btnCaption = $localize`Reload`
modal.componentInstance.cancelBtnCaption = $localize`Dismiss`
modal.componentInstance.confirmClicked.pipe(first()).subscribe(() => {
modal.componentInstance.buttonsEnabled = false
modal.close()
this.reloadRemoteVersion()
})
modal.result.finally(() => {
this.incomingUpdateModal = null
})
}
private closeIncomingUpdateModal() {
if (!this.incomingUpdateModal) return
this.incomingUpdateModal.close()
this.incomingUpdateModal = null
}
private flushPendingIncomingUpdate() {
if (!this.pendingIncomingUpdate || this.networkActive) return
const pendingUpdate = this.pendingIncomingUpdate
this.pendingIncomingUpdate = null
this.handleIncomingDocumentUpdated(pendingUpdate)
}
private handleIncomingDocumentUpdated(data: IncomingDocumentUpdate): void {
if (
!this.documentId ||
!this.document ||
data.document_id !== this.documentId
)
return
if (this.networkActive) {
this.pendingIncomingUpdate = data
return
}
// If modified timestamp of the incoming update is the same as the last local save,
// we assume this update is from our own save and dont notify
const incomingModified = data.modified
if (
incomingModified &&
this.lastLocalSaveModified &&
incomingModified === this.lastLocalSaveModified
) {
this.lastLocalSaveModified = null
return
}
this.lastLocalSaveModified = null
if (this.openDocumentService.isDirty(this.document)) {
this.showIncomingUpdateModal(data.modified)
} else {
this.docChangeNotifier.next(this.documentId)
this.loadDocument(this.documentId, true)
this.toastService.showInfo(
$localize`Document reloaded with latest changes.`
)
}
}
private reloadRemoteVersion() {
if (!this.documentId) return
this.closeIncomingUpdateModal()
this.docChangeNotifier.next(this.documentId)
this.loadDocument(this.documentId, true)
this.toastService.showInfo($localize`Document reloaded.`)
}
ngOnInit(): void {
this.setZoom(
this.settings.get(SETTINGS_KEYS.PDF_VIEWER_ZOOM_SETTING) as PdfZoomScale
@@ -758,11 +648,6 @@ export class DocumentDetailComponent
this.getCustomFields()
this.websocketStatusService
.onDocumentUpdated()
.pipe(takeUntil(this.unsubscribeNotifier))
.subscribe((data) => this.handleIncomingDocumentUpdated(data))
this.route.paramMap
.pipe(
filter(
@@ -1148,7 +1033,6 @@ export class DocumentDetailComponent
)
.subscribe({
next: (doc) => {
this.closeIncomingUpdateModal()
Object.assign(this.document, doc)
doc['permissions_form'] = {
owner: doc.owner,
@@ -1195,8 +1079,6 @@ export class DocumentDetailComponent
.pipe(first())
.subscribe({
next: (docValues) => {
this.closeIncomingUpdateModal()
this.lastLocalSaveModified = docValues.modified ?? null
// in case data changed while saving eg removing inbox_tags
this.documentForm.patchValue(docValues)
const newValues = Object.assign({}, this.documentForm.value)
@@ -1211,19 +1093,16 @@ export class DocumentDetailComponent
this.networkActive = false
this.error = null
if (close) {
this.pendingIncomingUpdate = null
this.close(() =>
this.openDocumentService.refreshDocument(this.documentId)
)
} else {
this.openDocumentService.refreshDocument(this.documentId)
this.flushPendingIncomingUpdate()
}
this.savedViewService.maybeRefreshDocumentCounts()
},
error: (error) => {
this.networkActive = false
this.lastLocalSaveModified = null
const canEdit =
this.permissionsService.currentUserHasObjectPermissions(
PermissionAction.Change,
@@ -1243,7 +1122,6 @@ export class DocumentDetailComponent
error
)
}
this.flushPendingIncomingUpdate()
},
})
}
@@ -1280,11 +1158,8 @@ export class DocumentDetailComponent
.pipe(first())
.subscribe({
next: ({ updateResult, nextDocId, closeResult }) => {
this.closeIncomingUpdateModal()
this.error = null
this.networkActive = false
this.pendingIncomingUpdate = null
this.lastLocalSaveModified = null
if (closeResult && updateResult && nextDocId) {
this.router.navigate(['documents', nextDocId])
this.titleInput?.focus()
@@ -1292,10 +1167,8 @@ export class DocumentDetailComponent
},
error: (error) => {
this.networkActive = false
this.lastLocalSaveModified = null
this.error = error.error
this.toastService.showError($localize`Error saving document`, error)
this.flushPendingIncomingUpdate()
},
})
}
@@ -1381,7 +1254,7 @@ export class DocumentDetailComponent
.subscribe({
next: () => {
this.toastService.showInfo(
$localize`Reprocess operation for "${this.document.title}" will begin in the background.`
$localize`Reprocess operation for "${this.document.title}" will begin in the background. Close and re-open or reload this document after the operation has completed to see new content.`
)
if (modal) {
modal.close()

View File

@@ -128,15 +128,15 @@ export interface Document extends ObjectWithPermissions {
checksum?: string
// UTC
created?: string // ISO string
created?: Date
modified?: string // ISO string
modified?: Date
added?: string // ISO string
added?: Date
mime_type?: string
deleted_at?: string // ISO string
deleted_at?: Date
original_file_name?: string

View File

@@ -1,7 +0,0 @@
export interface WebsocketDocumentUpdatedMessage {
document_id: number
modified: string
owner_id?: number
users_can_view?: number[]
groups_can_view?: number[]
}

View File

@@ -416,42 +416,4 @@ describe('ConsumerStatusService', () => {
websocketStatusService.disconnect()
expect(deleted).toBeTruthy()
})
it('should trigger updated subject on document updated', () => {
let updated = false
websocketStatusService.onDocumentUpdated().subscribe((data) => {
updated = true
expect(data.document_id).toEqual(12)
})
websocketStatusService.connect()
server.send({
type: WebsocketStatusType.DOCUMENT_UPDATED,
data: {
document_id: 12,
modified: '2026-02-17T00:00:00Z',
owner_id: 1,
},
})
websocketStatusService.disconnect()
expect(updated).toBeTruthy()
})
it('should ignore document updated events the user cannot view', () => {
let updated = false
websocketStatusService.onDocumentUpdated().subscribe(() => {
updated = true
})
websocketStatusService.handleDocumentUpdated({
document_id: 12,
modified: '2026-02-17T00:00:00Z',
owner_id: 2,
users_can_view: [],
groups_can_view: [],
})
expect(updated).toBeFalsy()
})
})

View File

@@ -2,7 +2,6 @@ import { Injectable, inject } from '@angular/core'
import { Subject } from 'rxjs'
import { environment } from 'src/environments/environment'
import { User } from '../data/user'
import { WebsocketDocumentUpdatedMessage } from '../data/websocket-document-updated-message'
import { WebsocketDocumentsDeletedMessage } from '../data/websocket-documents-deleted-message'
import { WebsocketProgressMessage } from '../data/websocket-progress-message'
import { SettingsService } from './settings.service'
@@ -10,7 +9,6 @@ import { SettingsService } from './settings.service'
export enum WebsocketStatusType {
STATUS_UPDATE = 'status_update',
DOCUMENTS_DELETED = 'documents_deleted',
DOCUMENT_UPDATED = 'document_updated',
}
// see ProgressStatusOptions in src/documents/plugins/helpers.py
@@ -102,20 +100,17 @@ export enum UploadState {
providedIn: 'root',
})
export class WebsocketStatusService {
private readonly settingsService = inject(SettingsService)
private settingsService = inject(SettingsService)
private statusWebSocket: WebSocket
private consumerStatus: FileStatus[] = []
private readonly documentDetectedSubject = new Subject<FileStatus>()
private readonly documentConsumptionFinishedSubject =
new Subject<FileStatus>()
private readonly documentConsumptionFailedSubject = new Subject<FileStatus>()
private readonly documentDeletedSubject = new Subject<boolean>()
private readonly documentUpdatedSubject =
new Subject<WebsocketDocumentUpdatedMessage>()
private readonly connectionStatusSubject = new Subject<boolean>()
private documentDetectedSubject = new Subject<FileStatus>()
private documentConsumptionFinishedSubject = new Subject<FileStatus>()
private documentConsumptionFailedSubject = new Subject<FileStatus>()
private documentDeletedSubject = new Subject<boolean>()
private connectionStatusSubject = new Subject<boolean>()
private get(taskId: string, filename?: string) {
let status =
@@ -181,10 +176,7 @@ export class WebsocketStatusService {
data: messageData,
}: {
type: WebsocketStatusType
data:
| WebsocketProgressMessage
| WebsocketDocumentsDeletedMessage
| WebsocketDocumentUpdatedMessage
data: WebsocketProgressMessage | WebsocketDocumentsDeletedMessage
} = JSON.parse(ev.data)
switch (type) {
@@ -192,12 +184,6 @@ export class WebsocketStatusService {
this.documentDeletedSubject.next(true)
break
case WebsocketStatusType.DOCUMENT_UPDATED:
this.handleDocumentUpdated(
messageData as WebsocketDocumentUpdatedMessage
)
break
case WebsocketStatusType.STATUS_UPDATE:
this.handleProgressUpdate(messageData as WebsocketProgressMessage)
break
@@ -205,11 +191,7 @@ export class WebsocketStatusService {
}
}
private canViewMessage(messageData: {
owner_id?: number
users_can_view?: number[]
groups_can_view?: number[]
}): boolean {
private canViewMessage(messageData: WebsocketProgressMessage): boolean {
// see paperless.consumers.StatusConsumer._can_view
const user: User = this.settingsService.currentUser
return (
@@ -269,15 +251,6 @@ export class WebsocketStatusService {
}
}
handleDocumentUpdated(messageData: WebsocketDocumentUpdatedMessage) {
// fallback if backend didn't restrict message
if (!this.canViewMessage(messageData)) {
return
}
this.documentUpdatedSubject.next(messageData)
}
fail(status: FileStatus, message: string) {
status.message = message
status.phase = FileStatusPhase.FAILED
@@ -331,10 +304,6 @@ export class WebsocketStatusService {
return this.documentDeletedSubject
}
onDocumentUpdated() {
return this.documentUpdatedSubject
}
onConnectionStatus() {
return this.connectionStatusSubject.asObservable()
}

View File

@@ -1,11 +0,0 @@
import pytest
from pytest_django.fixtures import SettingsWrapper
@pytest.fixture(autouse=True)
def in_memory_channel_layers(settings: SettingsWrapper) -> None:
settings.CHANNEL_LAYERS = {
"default": {
"BACKEND": "channels.layers.InMemoryChannelLayer",
},
}

View File

@@ -15,7 +15,6 @@ class DocumentsConfig(AppConfig):
from documents.signals.handlers import add_to_index
from documents.signals.handlers import run_workflows_added
from documents.signals.handlers import run_workflows_updated
from documents.signals.handlers import send_websocket_document_updated
from documents.signals.handlers import set_correspondent
from documents.signals.handlers import set_document_type
from documents.signals.handlers import set_storage_path
@@ -30,7 +29,6 @@ class DocumentsConfig(AppConfig):
document_consumption_finished.connect(run_workflows_added)
document_consumption_finished.connect(add_or_update_document_in_llm_index)
document_updated.connect(run_workflows_updated)
document_updated.connect(send_websocket_document_updated)
import documents.schema # noqa: F401

View File

@@ -7,6 +7,7 @@ Provides automatic progress bar and multiprocessing support with minimal boilerp
from __future__ import annotations
import os
from collections.abc import Callable
from collections.abc import Iterable
from collections.abc import Sized
from concurrent.futures import ProcessPoolExecutor
@@ -23,6 +24,9 @@ from django.core.management import CommandError
from django.db.models import QuerySet
from django_rich.management import RichCommand
from rich.console import Console
from rich.console import Group
from rich.console import RenderableType
from rich.live import Live
from rich.progress import BarColumn
from rich.progress import MofNCompleteColumn
from rich.progress import Progress
@@ -32,9 +36,7 @@ from rich.progress import TimeElapsedColumn
from rich.progress import TimeRemainingColumn
if TYPE_CHECKING:
from collections.abc import Callable
from collections.abc import Generator
from collections.abc import Iterable
from collections.abc import Sequence
from django.core.management import CommandParser
@@ -91,6 +93,23 @@ class PaperlessCommand(RichCommand):
for result in self.process_parallel(process_doc, ids):
if result.error:
self.console.print(f"[red]Failed: {result.error}[/red]")
class Command(PaperlessCommand):
help = "Import documents with live stats"
def handle(self, *args, **options):
stats = ImportStats()
def render_stats() -> Table:
... # build Rich Table from stats
for item in self.track_with_stats(
items,
description="Importing...",
stats_renderer=render_stats,
):
result = import_item(item)
stats.imported += 1
"""
supports_progress_bar: ClassVar[bool] = True
@@ -128,13 +147,11 @@ class PaperlessCommand(RichCommand):
This is called by Django's command infrastructure after argument parsing
but before handle(). We use it to set instance attributes from options.
"""
# Set progress bar state
if self.supports_progress_bar:
self.no_progress_bar = options.get("no_progress_bar", False)
else:
self.no_progress_bar = True
# Set multiprocessing state
if self.supports_multiprocessing:
self.process_count = options.get("processes", 1)
if self.process_count < 1:
@@ -144,9 +161,29 @@ class PaperlessCommand(RichCommand):
return super().execute(*args, **options)
@staticmethod
def _progress_columns() -> tuple[Any, ...]:
"""
Return the standard set of progress bar columns.
Extracted so both _create_progress (standalone) and track_with_stats
(inside Live) use identical column configuration without duplication.
"""
return (
SpinnerColumn(),
TextColumn("[progress.description]{task.description}"),
BarColumn(),
MofNCompleteColumn(),
TimeElapsedColumn(),
TimeRemainingColumn(),
)
def _create_progress(self, description: str) -> Progress:
"""
Create a configured Progress instance.
Create a standalone Progress instance with its own stderr Console.
Use this for track(). For track_with_stats(), Progress is created
directly inside a Live context instead.
Progress output is directed to stderr to match the convention that
progress bars are transient UI feedback, not command output. This
@@ -161,12 +198,7 @@ class PaperlessCommand(RichCommand):
A Progress instance configured with appropriate columns.
"""
return Progress(
SpinnerColumn(),
TextColumn("[progress.description]{task.description}"),
BarColumn(),
MofNCompleteColumn(),
TimeElapsedColumn(),
TimeRemainingColumn(),
*self._progress_columns(),
console=Console(stderr=True),
transient=False,
)
@@ -222,7 +254,6 @@ class PaperlessCommand(RichCommand):
yield from iterable
return
# Attempt to determine total if not provided
if total is None:
total = self._get_iterable_length(iterable)
@@ -232,6 +263,87 @@ class PaperlessCommand(RichCommand):
yield item
progress.advance(task_id)
def track_with_stats(
self,
iterable: Iterable[T],
*,
description: str = "Processing...",
stats_renderer: Callable[[], RenderableType],
total: int | None = None,
) -> Generator[T, None, None]:
"""
Iterate over items with a progress bar and a live-updating stats display.
The progress bar and stats renderable are combined in a single Live
context, so the stats panel re-renders in place below the progress bar
after each item is processed.
Respects --no-progress-bar flag. When disabled, yields items without
any display (stats are still updated by the caller's loop body, so
they will be accurate for any post-loop summary the caller prints).
Args:
iterable: The items to iterate over.
description: Text to display alongside the progress bar.
stats_renderer: Zero-argument callable that returns a Rich
renderable. Called after each item to refresh the display.
The caller typically closes over a mutable dataclass and
rebuilds a Table from it on each call.
total: Total number of items. If None, attempts to determine
automatically via .count() (for querysets) or len().
Yields:
Items from the iterable.
Example:
@dataclass
class Stats:
processed: int = 0
failed: int = 0
stats = Stats()
def render_stats() -> Table:
table = Table(box=None)
table.add_column("Processed")
table.add_column("Failed")
table.add_row(str(stats.processed), str(stats.failed))
return table
for item in self.track_with_stats(
items,
description="Importing...",
stats_renderer=render_stats,
):
try:
import_item(item)
stats.processed += 1
except Exception:
stats.failed += 1
"""
if self.no_progress_bar:
yield from iterable
return
if total is None:
total = self._get_iterable_length(iterable)
stderr_console = Console(stderr=True)
# Progress is created without its own console so Live controls rendering.
progress = Progress(*self._progress_columns())
task_id = progress.add_task(description, total=total)
with Live(
Group(progress, stats_renderer()),
console=stderr_console,
refresh_per_second=4,
) as live:
for item in iterable:
yield item
progress.advance(task_id)
live.update(Group(progress, stats_renderer()))
def process_parallel(
self,
fn: Callable[[T], R],
@@ -269,10 +381,8 @@ class PaperlessCommand(RichCommand):
total = len(items)
if self.process_count == 1:
# Sequential execution in main process - critical for testing
yield from self._process_sequential(fn, items, description, total)
else:
# Parallel execution with ProcessPoolExecutor
yield from self._process_parallel(fn, items, description, total)
def _process_sequential(
@@ -298,17 +408,14 @@ class PaperlessCommand(RichCommand):
total: int,
) -> Generator[ProcessResult[T, R], None, None]:
"""Process items in parallel using ProcessPoolExecutor."""
# Close database connections before forking - required for PostgreSQL
db.connections.close_all()
with self._create_progress(description) as progress:
task_id = progress.add_task(description, total=total)
with ProcessPoolExecutor(max_workers=self.process_count) as executor:
# Submit all tasks and map futures back to items
future_to_item = {executor.submit(fn, item): item for item in items}
# Yield results as they complete
for future in as_completed(future_to_item):
item = future_to_item[future]
try:

View File

@@ -1,4 +1,12 @@
from __future__ import annotations
import logging
from dataclasses import dataclass
from dataclasses import field
from typing import TYPE_CHECKING
from rich.table import Table
from rich.text import Text
from documents.classifier import load_classifier
from documents.management.commands.base import PaperlessCommand
@@ -8,9 +16,162 @@ from documents.signals.handlers import set_document_type
from documents.signals.handlers import set_storage_path
from documents.signals.handlers import set_tags
if TYPE_CHECKING:
from rich.console import RenderableType
from documents.models import Correspondent
from documents.models import DocumentType
from documents.models import StoragePath
from documents.models import Tag
logger = logging.getLogger("paperless.management.retagger")
@dataclass(slots=True)
class RetaggerStats:
"""Cumulative counters updated as the retagger processes documents.
Mutable by design -- fields are incremented in the processing loop.
slots=True reduces per-instance memory overhead and speeds attribute access.
"""
correspondents: int = 0
document_types: int = 0
tags_added: int = 0
tags_removed: int = 0
storage_paths: int = 0
documents_processed: int = 0
@dataclass(slots=True)
class DocumentSuggestion:
"""Buffered classifier suggestions for a single document (suggest mode only).
Mutable by design -- fields are assigned incrementally as each setter runs.
"""
document: Document
correspondent: Correspondent | None = None
document_type: DocumentType | None = None
tags_to_add: frozenset[Tag] = field(default_factory=frozenset)
tags_to_remove: frozenset[Tag] = field(default_factory=frozenset)
storage_path: StoragePath | None = None
@property
def has_suggestions(self) -> bool:
return bool(
self.correspondent is not None
or self.document_type is not None
or self.tags_to_add
or self.tags_to_remove
or self.storage_path is not None,
)
def _build_stats_table(stats: RetaggerStats, *, suggest: bool) -> Table:
"""
Build the live-updating stats table shown below the progress bar.
In suggest mode the labels read "would set / would add" to make clear
that nothing has been written to the database.
"""
table = Table(box=None, padding=(0, 2), show_header=True, header_style="bold")
table.add_column("Documents")
table.add_column("Correspondents")
table.add_column("Doc Types")
table.add_column("Tags (+)")
table.add_column("Tags (-)")
table.add_column("Storage Paths")
verb = "would set" if suggest else "set"
table.add_row(
str(stats.documents_processed),
f"{stats.correspondents} {verb}",
f"{stats.document_types} {verb}",
f"+{stats.tags_added}",
f"-{stats.tags_removed}",
f"{stats.storage_paths} {verb}",
)
return table
def _build_suggestion_table(
suggestions: list[DocumentSuggestion],
base_url: str | None,
) -> Table:
"""
Build the final suggestion table printed after the progress bar completes.
Only documents with at least one suggestion are included.
"""
table = Table(
title="Suggested Changes",
show_header=True,
header_style="bold cyan",
show_lines=True,
)
table.add_column("Document", style="bold", no_wrap=False, min_width=20)
table.add_column("Correspondent")
table.add_column("Doc Type")
table.add_column("Tags")
table.add_column("Storage Path")
for suggestion in suggestions:
if not suggestion.has_suggestions:
continue
doc = suggestion.document
if base_url:
doc_cell = Text()
doc_cell.append(str(doc))
doc_cell.append(f"\n{base_url}/documents/{doc.pk}", style="dim")
else:
doc_cell = Text(f"{doc} [{doc.pk}]")
tag_parts: list[str] = []
for tag in sorted(suggestion.tags_to_add, key=lambda t: t.name):
tag_parts.append(f"[green]+{tag.name}[/green]")
for tag in sorted(suggestion.tags_to_remove, key=lambda t: t.name):
tag_parts.append(f"[red]-{tag.name}[/red]")
tag_cell = Text.from_markup(", ".join(tag_parts)) if tag_parts else Text("-")
table.add_row(
doc_cell,
str(suggestion.correspondent) if suggestion.correspondent else "-",
str(suggestion.document_type) if suggestion.document_type else "-",
tag_cell,
str(suggestion.storage_path) if suggestion.storage_path else "-",
)
return table
def _build_summary_table(stats: RetaggerStats) -> Table:
"""Build the final applied-changes summary table."""
table = Table(
title="Retagger Summary",
show_header=True,
header_style="bold cyan",
)
table.add_column("Metric", style="bold")
table.add_column("Count", justify="right")
table.add_row("Documents processed", str(stats.documents_processed))
table.add_row("Correspondents set", str(stats.correspondents))
table.add_row("Document types set", str(stats.document_types))
table.add_row("Tags added", str(stats.tags_added))
table.add_row("Tags removed", str(stats.tags_removed))
table.add_row("Storage paths set", str(stats.storage_paths))
return table
class Command(PaperlessCommand):
help = (
"Using the current classification model, assigns correspondents, tags "
@@ -19,7 +180,7 @@ class Command(PaperlessCommand):
"modified) after their initial import."
)
def add_arguments(self, parser):
def add_arguments(self, parser) -> None:
super().add_arguments(parser)
parser.add_argument("-c", "--correspondent", default=False, action="store_true")
parser.add_argument("-T", "--tags", default=False, action="store_true")
@@ -31,9 +192,9 @@ class Command(PaperlessCommand):
default=False,
action="store_true",
help=(
"By default this command won't try to assign a correspondent "
"if more than one matches the document. Use this flag if "
"you'd rather it just pick the first one it finds."
"By default this command will not try to assign a correspondent "
"if more than one matches the document. Use this flag to pick "
"the first match instead."
),
)
parser.add_argument(
@@ -42,91 +203,133 @@ class Command(PaperlessCommand):
default=False,
action="store_true",
help=(
"If set, the document retagger will overwrite any previously "
"set correspondent, document and remove correspondents, types "
"and tags that do not match anymore due to changed rules."
"Overwrite any previously set correspondent, document type, and "
"remove tags that no longer match due to changed rules."
),
)
parser.add_argument(
"--suggest",
default=False,
action="store_true",
help="Return the suggestion, don't change anything.",
help="Show what would be changed without applying anything.",
)
parser.add_argument(
"--base-url",
help="The base URL to use to build the link to the documents.",
help="Base URL used to build document links in suggest output.",
)
parser.add_argument(
"--id-range",
help="A range of document ids on which the retagging should be applied.",
help="Restrict retagging to documents within this ID range (inclusive).",
nargs=2,
type=int,
)
def handle(self, *args, **options):
def handle(self, *args, **options) -> None:
suggest: bool = options["suggest"]
overwrite: bool = options["overwrite"]
use_first: bool = options["use_first"]
base_url: str | None = options["base_url"]
do_correspondent: bool = options["correspondent"]
do_document_type: bool = options["document_type"]
do_tags: bool = options["tags"]
do_storage_path: bool = options["storage_path"]
if not any([do_correspondent, do_document_type, do_tags, do_storage_path]):
self.console.print(
"[yellow]No classifier targets specified. "
"Use -c, -T, -t, or -s to select what to retag.[/yellow]",
)
return
if options["inbox_only"]:
queryset = Document.objects.filter(tags__is_inbox_tag=True)
else:
queryset = Document.objects.all()
if options["id_range"]:
queryset = queryset.filter(
id__range=(options["id_range"][0], options["id_range"][1]),
)
lo, hi = options["id_range"]
queryset = queryset.filter(id__range=(lo, hi))
documents = queryset.distinct()
classifier = load_classifier()
for document in self.track(documents, description="Retagging..."):
if options["correspondent"]:
set_correspondent(
sender=None,
document=document,
classifier=classifier,
replace=options["overwrite"],
use_first=options["use_first"],
suggest=options["suggest"],
base_url=options["base_url"],
stdout=self.stdout,
style_func=self.style,
)
stats = RetaggerStats()
suggestions: list[DocumentSuggestion] = []
if options["document_type"]:
set_document_type(
sender=None,
document=document,
classifier=classifier,
replace=options["overwrite"],
use_first=options["use_first"],
suggest=options["suggest"],
base_url=options["base_url"],
stdout=self.stdout,
style_func=self.style,
)
def render_stats() -> RenderableType:
return _build_stats_table(stats, suggest=suggest)
if options["tags"]:
set_tags(
sender=None,
document=document,
classifier=classifier,
replace=options["overwrite"],
suggest=options["suggest"],
base_url=options["base_url"],
stdout=self.stdout,
style_func=self.style,
)
for document in self.track_with_stats(
documents,
description="Retagging...",
stats_renderer=render_stats,
):
suggestion = DocumentSuggestion(document=document)
if options["storage_path"]:
set_storage_path(
sender=None,
document=document,
if do_correspondent:
correspondent = set_correspondent(
None,
document,
classifier=classifier,
replace=options["overwrite"],
use_first=options["use_first"],
suggest=options["suggest"],
base_url=options["base_url"],
stdout=self.stdout,
style_func=self.style,
replace=overwrite,
use_first=use_first,
dry_run=suggest,
)
if correspondent is not None:
stats.correspondents += 1
suggestion.correspondent = correspondent
if do_document_type:
document_type = set_document_type(
None,
document,
classifier=classifier,
replace=overwrite,
use_first=use_first,
dry_run=suggest,
)
if document_type is not None:
stats.document_types += 1
suggestion.document_type = document_type
if do_tags:
tags_to_add, tags_to_remove = set_tags(
None,
document,
classifier=classifier,
replace=overwrite,
dry_run=suggest,
)
stats.tags_added += len(tags_to_add)
stats.tags_removed += len(tags_to_remove)
suggestion.tags_to_add = frozenset(tags_to_add)
suggestion.tags_to_remove = frozenset(tags_to_remove)
if do_storage_path:
storage_path = set_storage_path(
None,
document,
classifier=classifier,
replace=overwrite,
use_first=use_first,
dry_run=suggest,
)
if storage_path is not None:
stats.storage_paths += 1
suggestion.storage_path = storage_path
stats.documents_processed += 1
if suggest:
suggestions.append(suggestion)
# Post-loop output
if suggest:
visible = [s for s in suggestions if s.has_suggestions]
if visible:
self.console.print(_build_suggestion_table(visible, base_url))
else:
self.console.print("[green]No changes suggested.[/green]")
else:
self.console.print(_build_summary_table(stats))

View File

@@ -1,5 +1,4 @@
import enum
from collections.abc import Mapping
from typing import TYPE_CHECKING
from asgiref.sync import async_to_sync
@@ -48,7 +47,7 @@ class BaseStatusManager:
async_to_sync(self._channel.flush)
self._channel = None
def send(self, payload: Mapping[str, object]) -> None:
def send(self, payload: dict[str, str | int | None]) -> None:
# Ensure the layer is open
self.open()
@@ -74,28 +73,26 @@ class ProgressManager(BaseStatusManager):
max_progress: int,
extra_args: dict[str, str | int | None] | None = None,
) -> None:
data: dict[str, object] = {
"filename": self.filename,
"task_id": self.task_id,
"current_progress": current_progress,
"max_progress": max_progress,
"status": status,
"message": message,
payload = {
"type": "status_update",
"data": {
"filename": self.filename,
"task_id": self.task_id,
"current_progress": current_progress,
"max_progress": max_progress,
"status": status,
"message": message,
},
}
if extra_args is not None:
data.update(extra_args)
payload: dict[str, object] = {
"type": "status_update",
"data": data,
}
payload["data"].update(extra_args)
self.send(payload)
class DocumentsStatusManager(BaseStatusManager):
def send_documents_deleted(self, documents: list[int]) -> None:
payload: dict[str, object] = {
payload = {
"type": "documents_deleted",
"data": {
"documents": documents,
@@ -103,25 +100,3 @@ class DocumentsStatusManager(BaseStatusManager):
}
self.send(payload)
def send_document_updated(
self,
*,
document_id: int,
modified: str,
owner_id: int | None = None,
users_can_view: list[int] | None = None,
groups_can_view: list[int] | None = None,
) -> None:
payload: dict[str, object] = {
"type": "document_updated",
"data": {
"document_id": document_id,
"modified": modified,
"owner_id": owner_id,
"users_can_view": users_can_view or [],
"groups_can_view": groups_can_view or [],
},
}
self.send(payload)

View File

@@ -24,7 +24,6 @@ from django.db.models import Q
from django.dispatch import receiver
from django.utils import timezone
from filelock import FileLock
from rest_framework import serializers
from documents import matching
from documents.caching import clear_document_caches
@@ -34,12 +33,14 @@ from documents.file_handling import create_source_path_directory
from documents.file_handling import delete_empty_directories
from documents.file_handling import generate_filename
from documents.file_handling import generate_unique_filename
from documents.models import Correspondent
from documents.models import CustomField
from documents.models import CustomFieldInstance
from documents.models import Document
from documents.models import MatchingModel
from documents.models import DocumentType
from documents.models import PaperlessTask
from documents.models import SavedView
from documents.models import StoragePath
from documents.models import Tag
from documents.models import UiSettings
from documents.models import Workflow
@@ -47,7 +48,6 @@ from documents.models import WorkflowAction
from documents.models import WorkflowRun
from documents.models import WorkflowTrigger
from documents.permissions import get_objects_for_user_owner_aware
from documents.plugins.helpers import DocumentsStatusManager
from documents.templating.utils import convert_format_str_to_template_format
from documents.workflows.actions import build_workflow_action_context
from documents.workflows.actions import execute_email_action
@@ -69,7 +69,6 @@ if TYPE_CHECKING:
from documents.data_models import DocumentMetadataOverrides
logger = logging.getLogger("paperless.handlers")
DRF_DATETIME_FIELD = serializers.DateTimeField()
def add_inbox_tags(sender, document: Document, logging_group=None, **kwargs) -> None:
@@ -85,47 +84,41 @@ def add_inbox_tags(sender, document: Document, logging_group=None, **kwargs) ->
document.add_nested_tags(inbox_tags)
def _suggestion_printer(
stdout,
style_func,
suggestion_type: str,
document: Document,
selected: MatchingModel,
base_url: str | None = None,
) -> None:
"""
Smaller helper to reduce duplication when just outputting suggestions to the console
"""
doc_str = str(document)
if base_url is not None:
stdout.write(style_func.SUCCESS(doc_str))
stdout.write(style_func.SUCCESS(f"{base_url}/documents/{document.pk}"))
else:
stdout.write(style_func.SUCCESS(f"{doc_str} [{document.pk}]"))
stdout.write(f"Suggest {suggestion_type}: {selected}")
def set_correspondent(
sender,
sender: object,
document: Document,
*,
logging_group=None,
logging_group: object = None,
classifier: DocumentClassifier | None = None,
replace=False,
use_first=True,
suggest=False,
base_url=None,
stdout=None,
style_func=None,
**kwargs,
) -> None:
replace: bool = False,
use_first: bool = True,
dry_run: bool = False,
**kwargs: Any,
) -> Correspondent | None:
"""
Assign a correspondent to a document based on classifier results.
Args:
document: The document to classify.
logging_group: Optional logging group for structured log output.
classifier: The trained classifier. If None, only rule-based matching runs.
replace: If True, overwrite an existing correspondent assignment.
use_first: If True, pick the first match when multiple correspondents
match. If False, skip assignment when multiple match.
dry_run: If True, compute and return the selection without saving.
**kwargs: Absorbed for Django signal compatibility (e.g. sender, signal).
Returns:
The correspondent that was (or would be) assigned, or None if no match
was found or assignment was skipped.
"""
if document.correspondent and not replace:
return
return None
potential_correspondents = matching.match_correspondents(document, classifier)
potential_count = len(potential_correspondents)
selected = potential_correspondents[0] if potential_correspondents else None
if potential_count > 1:
if use_first:
logger.debug(
@@ -139,49 +132,53 @@ def set_correspondent(
f"not assigning any correspondent",
extra={"group": logging_group},
)
return
return None
if selected or replace:
if suggest:
_suggestion_printer(
stdout,
style_func,
"correspondent",
document,
selected,
base_url,
)
else:
logger.info(
f"Assigning correspondent {selected} to {document}",
extra={"group": logging_group},
)
if (selected or replace) and not dry_run:
logger.info(
f"Assigning correspondent {selected} to {document}",
extra={"group": logging_group},
)
document.correspondent = selected
document.save(update_fields=("correspondent",))
document.correspondent = selected
document.save(update_fields=("correspondent",))
return selected
def set_document_type(
sender,
sender: object,
document: Document,
*,
logging_group=None,
logging_group: object = None,
classifier: DocumentClassifier | None = None,
replace=False,
use_first=True,
suggest=False,
base_url=None,
stdout=None,
style_func=None,
**kwargs,
) -> None:
replace: bool = False,
use_first: bool = True,
dry_run: bool = False,
**kwargs: Any,
) -> DocumentType | None:
"""
Assign a document type to a document based on classifier results.
Args:
document: The document to classify.
logging_group: Optional logging group for structured log output.
classifier: The trained classifier. If None, only rule-based matching runs.
replace: If True, overwrite an existing document type assignment.
use_first: If True, pick the first match when multiple types match.
If False, skip assignment when multiple match.
dry_run: If True, compute and return the selection without saving.
**kwargs: Absorbed for Django signal compatibility (e.g. sender, signal).
Returns:
The document type that was (or would be) assigned, or None if no match
was found or assignment was skipped.
"""
if document.document_type and not replace:
return
return None
potential_document_type = matching.match_document_types(document, classifier)
potential_count = len(potential_document_type)
selected = potential_document_type[0] if potential_document_type else None
potential_document_types = matching.match_document_types(document, classifier)
potential_count = len(potential_document_types)
selected = potential_document_types[0] if potential_document_types else None
if potential_count > 1:
if use_first:
@@ -196,42 +193,64 @@ def set_document_type(
f"not assigning any document type",
extra={"group": logging_group},
)
return
return None
if selected or replace:
if suggest:
_suggestion_printer(
stdout,
style_func,
"document type",
document,
selected,
base_url,
)
else:
logger.info(
f"Assigning document type {selected} to {document}",
extra={"group": logging_group},
)
if (selected or replace) and not dry_run:
logger.info(
f"Assigning document type {selected} to {document}",
extra={"group": logging_group},
)
document.document_type = selected
document.save(update_fields=("document_type",))
document.document_type = selected
document.save(update_fields=("document_type",))
return selected
def set_tags(
sender,
sender: object,
document: Document,
*,
logging_group=None,
logging_group: object = None,
classifier: DocumentClassifier | None = None,
replace=False,
suggest=False,
base_url=None,
stdout=None,
style_func=None,
**kwargs,
) -> None:
replace: bool = False,
dry_run: bool = False,
**kwargs: Any,
) -> tuple[set[Tag], set[Tag]]:
"""
Assign tags to a document based on classifier results.
When replace=True, existing auto-matched and rule-matched tags are removed
before applying the new set (inbox tags and manually-added tags are preserved).
Args:
document: The document to classify.
logging_group: Optional logging group for structured log output.
classifier: The trained classifier. If None, only rule-based matching runs.
replace: If True, remove existing classifier-managed tags before applying
new ones. Inbox tags and manually-added tags are always preserved.
dry_run: If True, compute what would change without saving anything.
**kwargs: Absorbed for Django signal compatibility (e.g. sender, signal).
Returns:
A two-tuple of (tags_added, tags_removed). In non-replace mode,
tags_removed is always an empty set. In dry_run mode, neither set
is applied to the database.
"""
# Compute which tags would be removed under replace mode.
# The filter mirrors the .delete() call below: keep inbox tags and
# manually-added tags (match="" and not auto-matched).
if replace:
tags_to_remove: set[Tag] = set(
document.tags.exclude(
is_inbox_tag=True,
).exclude(
Q(match="") & ~Q(matching_algorithm=Tag.MATCH_AUTO),
),
)
else:
tags_to_remove = set()
if replace and not dry_run:
Document.tags.through.objects.filter(document=document).exclude(
Q(tag__is_inbox_tag=True),
).exclude(
@@ -239,65 +258,53 @@ def set_tags(
).delete()
current_tags = set(document.tags.all())
matched_tags = matching.match_tags(document, classifier)
tags_to_add = set(matched_tags) - current_tags
relevant_tags = set(matched_tags) - current_tags
if suggest:
extra_tags = current_tags - set(matched_tags)
extra_tags = [
t for t in extra_tags if t.matching_algorithm == MatchingModel.MATCH_AUTO
]
if not relevant_tags and not extra_tags:
return
doc_str = style_func.SUCCESS(str(document))
if base_url:
stdout.write(doc_str)
stdout.write(f"{base_url}/documents/{document.pk}")
else:
stdout.write(doc_str + style_func.SUCCESS(f" [{document.pk}]"))
if relevant_tags:
stdout.write("Suggest tags: " + ", ".join([t.name for t in relevant_tags]))
if extra_tags:
stdout.write("Extra tags: " + ", ".join([t.name for t in extra_tags]))
else:
if not relevant_tags:
return
message = 'Tagging "{}" with "{}"'
if tags_to_add and not dry_run:
logger.info(
message.format(document, ", ".join([t.name for t in relevant_tags])),
f'Tagging "{document}" with "{", ".join(t.name for t in tags_to_add)}"',
extra={"group": logging_group},
)
document.add_nested_tags(tags_to_add)
document.add_nested_tags(relevant_tags)
return tags_to_add, tags_to_remove
def set_storage_path(
sender,
sender: object,
document: Document,
*,
logging_group=None,
logging_group: object = None,
classifier: DocumentClassifier | None = None,
replace=False,
use_first=True,
suggest=False,
base_url=None,
stdout=None,
style_func=None,
**kwargs,
) -> None:
replace: bool = False,
use_first: bool = True,
dry_run: bool = False,
**kwargs: Any,
) -> StoragePath | None:
"""
Assign a storage path to a document based on classifier results.
Args:
document: The document to classify.
logging_group: Optional logging group for structured log output.
classifier: The trained classifier. If None, only rule-based matching runs.
replace: If True, overwrite an existing storage path assignment.
use_first: If True, pick the first match when multiple paths match.
If False, skip assignment when multiple match.
dry_run: If True, compute and return the selection without saving.
**kwargs: Absorbed for Django signal compatibility (e.g. sender, signal).
Returns:
The storage path that was (or would be) assigned, or None if no match
was found or assignment was skipped.
"""
if document.storage_path and not replace:
return
return None
potential_storage_path = matching.match_storage_paths(
document,
classifier,
)
potential_count = len(potential_storage_path)
selected = potential_storage_path[0] if potential_storage_path else None
potential_storage_paths = matching.match_storage_paths(document, classifier)
potential_count = len(potential_storage_paths)
selected = potential_storage_paths[0] if potential_storage_paths else None
if potential_count > 1:
if use_first:
@@ -312,26 +319,17 @@ def set_storage_path(
f"not assigning any storage directory",
extra={"group": logging_group},
)
return
return None
if selected or replace:
if suggest:
_suggestion_printer(
stdout,
style_func,
"storage directory",
document,
selected,
base_url,
)
else:
logger.info(
f"Assigning storage path {selected} to {document}",
extra={"group": logging_group},
)
if (selected or replace) and not dry_run:
logger.info(
f"Assigning storage path {selected} to {document}",
extra={"group": logging_group},
)
document.storage_path = selected
document.save(update_fields=("storage_path",))
document.storage_path = selected
document.save(update_fields=("storage_path",))
return selected
# see empty_trash in documents/tasks.py for signal handling
@@ -766,28 +764,6 @@ def run_workflows_updated(
)
def send_websocket_document_updated(
sender,
document: Document,
**kwargs,
) -> None:
# At this point, workflows may already have applied additional changes.
document.refresh_from_db()
from documents.data_models import DocumentMetadataOverrides
doc_overrides = DocumentMetadataOverrides.from_document(document)
with DocumentsStatusManager() as status_mgr:
status_mgr.send_document_updated(
document_id=document.id,
modified=DRF_DATETIME_FIELD.to_representation(document.modified),
owner_id=doc_overrides.owner_id,
users_can_view=doc_overrides.view_users,
groups_can_view=doc_overrides.view_groups,
)
def run_workflows(
trigger_type: WorkflowTrigger.WorkflowTriggerType,
document: Document | ConsumableDocument,
@@ -1061,11 +1037,7 @@ def add_or_update_document_in_llm_index(sender, document, **kwargs):
@receiver(models.signals.post_delete, sender=Document)
def delete_document_from_llm_index(
sender: Any,
instance: Document,
**kwargs: Any,
) -> None:
def delete_document_from_llm_index(sender, instance: Document, **kwargs):
"""
Delete a document from the LLM index when it is deleted.
"""

View File

@@ -60,7 +60,6 @@ from documents.sanity_checker import SanityCheckFailedException
from documents.signals import document_updated
from documents.signals.handlers import cleanup_document_deletion
from documents.signals.handlers import run_workflows
from documents.signals.handlers import send_websocket_document_updated
from documents.workflows.utils import get_workflows_for_trigger
from paperless.config import AIConfig
from paperless_ai.indexing import llm_index_add_or_update_document
@@ -542,11 +541,6 @@ def check_scheduled_workflows() -> None:
workflow_to_run=workflow,
document=document,
)
# Scheduled workflows dont send document_updated signal, so send a websocket update here to ensure clients are updated
send_websocket_document_updated(
sender=None,
document=document,
)
def update_document_parent_tags(tag: Tag, new_parent: Tag) -> None:

View File

@@ -1,17 +1,65 @@
from factory import Faker
"""
Factory-boy factories for documents app models.
"""
from __future__ import annotations
import factory
from factory.django import DjangoModelFactory
from documents.models import Correspondent
from documents.models import Document
from documents.models import DocumentType
from documents.models import MatchingModel
from documents.models import StoragePath
from documents.models import Tag
class CorrespondentFactory(DjangoModelFactory):
class Meta:
model = Correspondent
name = Faker("name")
name = factory.Faker("company")
match = ""
matching_algorithm = MatchingModel.MATCH_NONE
class DocumentTypeFactory(DjangoModelFactory):
class Meta:
model = DocumentType
name = factory.Faker("bs")
match = ""
matching_algorithm = MatchingModel.MATCH_NONE
class TagFactory(DjangoModelFactory):
class Meta:
model = Tag
name = factory.Faker("word")
match = ""
matching_algorithm = MatchingModel.MATCH_NONE
is_inbox_tag = False
class StoragePathFactory(DjangoModelFactory):
class Meta:
model = StoragePath
name = factory.Faker("file_path", depth=2, extension="")
path = factory.LazyAttribute(lambda o: f"{o.name}/{{title}}")
match = ""
matching_algorithm = MatchingModel.MATCH_NONE
class DocumentFactory(DjangoModelFactory):
class Meta:
model = Document
title = factory.Faker("sentence", nb_words=4)
checksum = factory.Faker("md5")
content = factory.Faker("paragraph")
correspondent = None
document_type = None
storage_path = None

View File

@@ -1270,11 +1270,7 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
input_doc, overrides = self.get_last_consume_delay_call_args()
self.assertEqual(input_doc.original_file.name, "simple.pdf")
self.assertTrue(
input_doc.original_file.resolve(strict=False).is_relative_to(
Path(settings.SCRATCH_DIR).resolve(strict=False),
),
)
self.assertIn(Path(settings.SCRATCH_DIR), input_doc.original_file.parents)
self.assertIsNone(overrides.title)
self.assertIsNone(overrides.correspondent_id)
self.assertIsNone(overrides.document_type_id)
@@ -1355,11 +1351,7 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
input_doc, overrides = self.get_last_consume_delay_call_args()
self.assertEqual(input_doc.original_file.name, "simple.pdf")
self.assertTrue(
input_doc.original_file.resolve(strict=False).is_relative_to(
Path(settings.SCRATCH_DIR).resolve(strict=False),
),
)
self.assertIn(Path(settings.SCRATCH_DIR), input_doc.original_file.parents)
self.assertIsNone(overrides.title)
self.assertIsNone(overrides.correspondent_id)
self.assertIsNone(overrides.document_type_id)

View File

@@ -1,298 +1,358 @@
"""
Tests for the document_retagger management command.
"""
from __future__ import annotations
import pytest
from django.core.management import call_command
from django.core.management.base import CommandError
from django.test import TestCase
from documents.models import Correspondent
from documents.models import Document
from documents.models import DocumentType
from documents.models import MatchingModel
from documents.models import StoragePath
from documents.models import Tag
from documents.tests.factories import CorrespondentFactory
from documents.tests.factories import DocumentFactory
from documents.tests.factories import DocumentTypeFactory
from documents.tests.factories import StoragePathFactory
from documents.tests.factories import TagFactory
from documents.tests.utils import DirectoriesMixin
# ---------------------------------------------------------------------------
# Module-level type aliases
# ---------------------------------------------------------------------------
StoragePathTuple = tuple[StoragePath, StoragePath, StoragePath]
TagTuple = tuple[Tag, Tag, Tag, Tag, Tag]
CorrespondentTuple = tuple[Correspondent, Correspondent]
DocumentTypeTuple = tuple[DocumentType, DocumentType]
DocumentTuple = tuple[Document, Document, Document, Document]
# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------
@pytest.fixture()
def storage_paths(db) -> StoragePathTuple:
"""Three storage paths with varying match rules."""
sp1 = StoragePathFactory(
path="{created_data}/{title}",
match="auto document",
matching_algorithm=MatchingModel.MATCH_LITERAL,
)
sp2 = StoragePathFactory(
path="{title}",
match="^first|^unrelated",
matching_algorithm=MatchingModel.MATCH_REGEX,
)
sp3 = StoragePathFactory(
path="{title}",
match="^blah",
matching_algorithm=MatchingModel.MATCH_REGEX,
)
return sp1, sp2, sp3
@pytest.fixture()
def tags(db) -> TagTuple:
"""Tags covering the common matching scenarios."""
tag_first = TagFactory(match="first", matching_algorithm=Tag.MATCH_ANY)
tag_second = TagFactory(match="second", matching_algorithm=Tag.MATCH_ANY)
tag_inbox = TagFactory(is_inbox_tag=True)
tag_no_match = TagFactory()
tag_auto = TagFactory(matching_algorithm=Tag.MATCH_AUTO)
return tag_first, tag_second, tag_inbox, tag_no_match, tag_auto
@pytest.fixture()
def correspondents(db) -> CorrespondentTuple:
"""Two correspondents matching 'first' and 'second' content."""
c_first = CorrespondentFactory(
match="first",
matching_algorithm=MatchingModel.MATCH_ANY,
)
c_second = CorrespondentFactory(
match="second",
matching_algorithm=MatchingModel.MATCH_ANY,
)
return c_first, c_second
@pytest.fixture()
def document_types(db) -> DocumentTypeTuple:
"""Two document types matching 'first' and 'second' content."""
dt_first = DocumentTypeFactory(
match="first",
matching_algorithm=MatchingModel.MATCH_ANY,
)
dt_second = DocumentTypeFactory(
match="second",
matching_algorithm=MatchingModel.MATCH_ANY,
)
return dt_first, dt_second
@pytest.fixture()
def documents(storage_paths: StoragePathTuple, tags: TagTuple) -> DocumentTuple:
"""Four documents with varied content used across most retagger tests."""
_, _, sp3 = storage_paths
_, _, tag_inbox, tag_no_match, tag_auto = tags
d1 = DocumentFactory(checksum="A", title="A", content="first document")
d2 = DocumentFactory(checksum="B", title="B", content="second document")
d3 = DocumentFactory(
checksum="C",
title="C",
content="unrelated document",
storage_path=sp3,
)
d4 = DocumentFactory(checksum="D", title="D", content="auto document")
d3.tags.add(tag_inbox, tag_no_match)
d4.tags.add(tag_auto)
return d1, d2, d3, d4
def _get_docs() -> DocumentTuple:
return (
Document.objects.get(title="A"),
Document.objects.get(title="B"),
Document.objects.get(title="C"),
Document.objects.get(title="D"),
)
# ---------------------------------------------------------------------------
# Tag assignment
# ---------------------------------------------------------------------------
@pytest.mark.management
class TestRetagger(DirectoriesMixin, TestCase):
def make_models(self) -> None:
self.sp1 = StoragePath.objects.create(
name="dummy a",
path="{created_data}/{title}",
match="auto document",
matching_algorithm=StoragePath.MATCH_LITERAL,
)
self.sp2 = StoragePath.objects.create(
name="dummy b",
path="{title}",
match="^first|^unrelated",
matching_algorithm=StoragePath.MATCH_REGEX,
)
self.sp3 = StoragePath.objects.create(
name="dummy c",
path="{title}",
match="^blah",
matching_algorithm=StoragePath.MATCH_REGEX,
)
self.d1 = Document.objects.create(
checksum="A",
title="A",
content="first document",
)
self.d2 = Document.objects.create(
checksum="B",
title="B",
content="second document",
)
self.d3 = Document.objects.create(
checksum="C",
title="C",
content="unrelated document",
storage_path=self.sp3,
)
self.d4 = Document.objects.create(
checksum="D",
title="D",
content="auto document",
)
self.tag_first = Tag.objects.create(
name="tag1",
match="first",
matching_algorithm=Tag.MATCH_ANY,
)
self.tag_second = Tag.objects.create(
name="tag2",
match="second",
matching_algorithm=Tag.MATCH_ANY,
)
self.tag_inbox = Tag.objects.create(name="test", is_inbox_tag=True)
self.tag_no_match = Tag.objects.create(name="test2")
self.tag_auto = Tag.objects.create(
name="tagauto",
matching_algorithm=Tag.MATCH_AUTO,
)
self.d3.tags.add(self.tag_inbox)
self.d3.tags.add(self.tag_no_match)
self.d4.tags.add(self.tag_auto)
self.correspondent_first = Correspondent.objects.create(
name="c1",
match="first",
matching_algorithm=Correspondent.MATCH_ANY,
)
self.correspondent_second = Correspondent.objects.create(
name="c2",
match="second",
matching_algorithm=Correspondent.MATCH_ANY,
)
self.doctype_first = DocumentType.objects.create(
name="dt1",
match="first",
matching_algorithm=DocumentType.MATCH_ANY,
)
self.doctype_second = DocumentType.objects.create(
name="dt2",
match="second",
matching_algorithm=DocumentType.MATCH_ANY,
)
def get_updated_docs(self):
return (
Document.objects.get(title="A"),
Document.objects.get(title="B"),
Document.objects.get(title="C"),
Document.objects.get(title="D"),
)
def setUp(self) -> None:
super().setUp()
self.make_models()
def test_add_tags(self) -> None:
@pytest.mark.django_db
class TestRetaggerTags(DirectoriesMixin):
@pytest.mark.usefixtures("documents")
def test_add_tags(self, tags: TagTuple) -> None:
tag_first, tag_second, *_ = tags
call_command("document_retagger", "--tags")
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
d_first, d_second, d_unrelated, d_auto = _get_docs()
self.assertEqual(d_first.tags.count(), 1)
self.assertEqual(d_second.tags.count(), 1)
self.assertEqual(d_unrelated.tags.count(), 2)
self.assertEqual(d_auto.tags.count(), 1)
assert d_first.tags.count() == 1
assert d_second.tags.count() == 1
assert d_unrelated.tags.count() == 2
assert d_auto.tags.count() == 1
assert d_first.tags.first() == tag_first
assert d_second.tags.first() == tag_second
self.assertEqual(d_first.tags.first(), self.tag_first)
self.assertEqual(d_second.tags.first(), self.tag_second)
def test_add_type(self) -> None:
call_command("document_retagger", "--document_type")
d_first, d_second, _, _ = self.get_updated_docs()
self.assertEqual(d_first.document_type, self.doctype_first)
self.assertEqual(d_second.document_type, self.doctype_second)
def test_add_correspondent(self) -> None:
call_command("document_retagger", "--correspondent")
d_first, d_second, _, _ = self.get_updated_docs()
self.assertEqual(d_first.correspondent, self.correspondent_first)
self.assertEqual(d_second.correspondent, self.correspondent_second)
def test_overwrite_preserve_inbox(self) -> None:
self.d1.tags.add(self.tag_second)
def test_overwrite_removes_stale_tags_and_preserves_inbox(
self,
documents: DocumentTuple,
tags: TagTuple,
) -> None:
d1, *_ = documents
tag_first, tag_second, tag_inbox, tag_no_match, _ = tags
d1.tags.add(tag_second)
call_command("document_retagger", "--tags", "--overwrite")
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
d_first, d_second, d_unrelated, d_auto = _get_docs()
self.assertIsNotNone(Tag.objects.get(id=self.tag_second.id))
assert Tag.objects.filter(id=tag_second.id).exists()
assert list(d_first.tags.values_list("id", flat=True)) == [tag_first.id]
assert list(d_second.tags.values_list("id", flat=True)) == [tag_second.id]
assert set(d_unrelated.tags.values_list("id", flat=True)) == {
tag_inbox.id,
tag_no_match.id,
}
assert d_auto.tags.count() == 0
self.assertCountEqual(
[tag.id for tag in d_first.tags.all()],
[self.tag_first.id],
)
self.assertCountEqual(
[tag.id for tag in d_second.tags.all()],
[self.tag_second.id],
)
self.assertCountEqual(
[tag.id for tag in d_unrelated.tags.all()],
[self.tag_inbox.id, self.tag_no_match.id],
)
self.assertEqual(d_auto.tags.count(), 0)
@pytest.mark.usefixtures("documents")
@pytest.mark.parametrize(
"extra_args",
[
pytest.param([], id="no_base_url"),
pytest.param(["--base-url=http://localhost"], id="with_base_url"),
],
)
def test_suggest_does_not_apply_tags(self, extra_args: list[str]) -> None:
call_command("document_retagger", "--tags", "--suggest", *extra_args)
d_first, d_second, _, d_auto = _get_docs()
def test_add_tags_suggest(self) -> None:
call_command("document_retagger", "--tags", "--suggest")
d_first, d_second, _, d_auto = self.get_updated_docs()
assert d_first.tags.count() == 0
assert d_second.tags.count() == 0
assert d_auto.tags.count() == 1
self.assertEqual(d_first.tags.count(), 0)
self.assertEqual(d_second.tags.count(), 0)
self.assertEqual(d_auto.tags.count(), 1)
def test_add_type_suggest(self) -> None:
call_command("document_retagger", "--document_type", "--suggest")
d_first, d_second, _, _ = self.get_updated_docs()
# ---------------------------------------------------------------------------
# Document type assignment
# ---------------------------------------------------------------------------
self.assertIsNone(d_first.document_type)
self.assertIsNone(d_second.document_type)
def test_add_correspondent_suggest(self) -> None:
call_command("document_retagger", "--correspondent", "--suggest")
d_first, d_second, _, _ = self.get_updated_docs()
@pytest.mark.management
@pytest.mark.django_db
class TestRetaggerDocumentType(DirectoriesMixin):
@pytest.mark.usefixtures("documents")
def test_add_type(self, document_types: DocumentTypeTuple) -> None:
dt_first, dt_second = document_types
call_command("document_retagger", "--document_type")
d_first, d_second, _, _ = _get_docs()
self.assertIsNone(d_first.correspondent)
self.assertIsNone(d_second.correspondent)
assert d_first.document_type == dt_first
assert d_second.document_type == dt_second
def test_add_tags_suggest_url(self) -> None:
call_command(
"document_retagger",
"--tags",
"--suggest",
"--base-url=http://localhost",
)
d_first, d_second, _, d_auto = self.get_updated_docs()
@pytest.mark.usefixtures("documents", "document_types")
@pytest.mark.parametrize(
"extra_args",
[
pytest.param([], id="no_base_url"),
pytest.param(["--base-url=http://localhost"], id="with_base_url"),
],
)
def test_suggest_does_not_apply_document_type(self, extra_args: list[str]) -> None:
call_command("document_retagger", "--document_type", "--suggest", *extra_args)
d_first, d_second, _, _ = _get_docs()
self.assertEqual(d_first.tags.count(), 0)
self.assertEqual(d_second.tags.count(), 0)
self.assertEqual(d_auto.tags.count(), 1)
assert d_first.document_type is None
assert d_second.document_type is None
def test_add_type_suggest_url(self) -> None:
call_command(
"document_retagger",
"--document_type",
"--suggest",
"--base-url=http://localhost",
)
d_first, d_second, _, _ = self.get_updated_docs()
self.assertIsNone(d_first.document_type)
self.assertIsNone(d_second.document_type)
# ---------------------------------------------------------------------------
# Correspondent assignment
# ---------------------------------------------------------------------------
def test_add_correspondent_suggest_url(self) -> None:
call_command(
"document_retagger",
"--correspondent",
"--suggest",
"--base-url=http://localhost",
)
d_first, d_second, _, _ = self.get_updated_docs()
self.assertIsNone(d_first.correspondent)
self.assertIsNone(d_second.correspondent)
@pytest.mark.management
@pytest.mark.django_db
class TestRetaggerCorrespondent(DirectoriesMixin):
@pytest.mark.usefixtures("documents")
def test_add_correspondent(self, correspondents: CorrespondentTuple) -> None:
c_first, c_second = correspondents
call_command("document_retagger", "--correspondent")
d_first, d_second, _, _ = _get_docs()
def test_add_storage_path(self) -> None:
assert d_first.correspondent == c_first
assert d_second.correspondent == c_second
@pytest.mark.usefixtures("documents", "correspondents")
@pytest.mark.parametrize(
"extra_args",
[
pytest.param([], id="no_base_url"),
pytest.param(["--base-url=http://localhost"], id="with_base_url"),
],
)
def test_suggest_does_not_apply_correspondent(self, extra_args: list[str]) -> None:
call_command("document_retagger", "--correspondent", "--suggest", *extra_args)
d_first, d_second, _, _ = _get_docs()
assert d_first.correspondent is None
assert d_second.correspondent is None
# ---------------------------------------------------------------------------
# Storage path assignment
# ---------------------------------------------------------------------------
@pytest.mark.management
@pytest.mark.django_db
class TestRetaggerStoragePath(DirectoriesMixin):
@pytest.mark.usefixtures("documents")
def test_add_storage_path(self, storage_paths: StoragePathTuple) -> None:
"""
GIVEN:
- 2 storage paths with documents which match them
- 1 document which matches but has a storage path
WHEN:
- document retagger is called
THEN:
- Matching document's storage paths updated
- Non-matching documents have no storage path
- Existing storage patch left unchanged
GIVEN documents matching various storage path rules
WHEN document_retagger --storage_path is called
THEN matching documents get the correct path; existing path is unchanged
"""
call_command(
"document_retagger",
"--storage_path",
)
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
sp1, sp2, sp3 = storage_paths
call_command("document_retagger", "--storage_path")
d_first, d_second, d_unrelated, d_auto = _get_docs()
self.assertEqual(d_first.storage_path, self.sp2)
self.assertEqual(d_auto.storage_path, self.sp1)
self.assertIsNone(d_second.storage_path)
self.assertEqual(d_unrelated.storage_path, self.sp3)
assert d_first.storage_path == sp2
assert d_auto.storage_path == sp1
assert d_second.storage_path is None
assert d_unrelated.storage_path == sp3
def test_overwrite_storage_path(self) -> None:
@pytest.mark.usefixtures("documents")
def test_overwrite_storage_path(self, storage_paths: StoragePathTuple) -> None:
"""
GIVEN:
- 2 storage paths with documents which match them
- 1 document which matches but has a storage path
WHEN:
- document retagger is called with overwrite
THEN:
- Matching document's storage paths updated
- Non-matching documents have no storage path
- Existing storage patch overwritten
GIVEN a document with an existing storage path that matches a different rule
WHEN document_retagger --storage_path --overwrite is called
THEN the existing path is replaced by the newly matched path
"""
sp1, sp2, _ = storage_paths
call_command("document_retagger", "--storage_path", "--overwrite")
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
d_first, d_second, d_unrelated, d_auto = _get_docs()
self.assertEqual(d_first.storage_path, self.sp2)
self.assertEqual(d_auto.storage_path, self.sp1)
self.assertIsNone(d_second.storage_path)
self.assertEqual(d_unrelated.storage_path, self.sp2)
assert d_first.storage_path == sp2
assert d_auto.storage_path == sp1
assert d_second.storage_path is None
assert d_unrelated.storage_path == sp2
def test_id_range_parameter(self) -> None:
commandOutput = ""
Document.objects.create(
checksum="E",
title="E",
content="NOT the first document",
)
call_command("document_retagger", "--tags", "--id-range", "1", "2")
# The retagger shouldn`t apply the 'first' tag to our new document
self.assertEqual(Document.objects.filter(tags__id=self.tag_first.id).count(), 1)
try:
commandOutput = call_command("document_retagger", "--tags", "--id-range")
except CommandError:
# Just ignore the error
None
self.assertIn(commandOutput, "Error: argument --id-range: expected 2 arguments")
# ---------------------------------------------------------------------------
# ID range filtering
# ---------------------------------------------------------------------------
try:
commandOutput = call_command(
"document_retagger",
"--tags",
"--id-range",
"a",
"b",
)
except CommandError:
# Just ignore the error
None
self.assertIn(commandOutput, "error: argument --id-range: invalid int value:")
call_command("document_retagger", "--tags", "--id-range", "1", "9999")
# Now we should have 2 documents
self.assertEqual(Document.objects.filter(tags__id=self.tag_first.id).count(), 2)
@pytest.mark.management
@pytest.mark.django_db
class TestRetaggerIdRange(DirectoriesMixin):
@pytest.mark.usefixtures("documents")
@pytest.mark.parametrize(
("id_range_args", "expected_count"),
[
pytest.param(["1", "2"], 1, id="narrow_range_limits_scope"),
pytest.param(["1", "9999"], 2, id="wide_range_tags_all_matches"),
],
)
def test_id_range_limits_scope(
self,
tags: TagTuple,
id_range_args: list[str],
expected_count: int,
) -> None:
DocumentFactory(content="NOT the first document")
call_command("document_retagger", "--tags", "--id-range", *id_range_args)
tag_first, *_ = tags
assert Document.objects.filter(tags__id=tag_first.id).count() == expected_count
@pytest.mark.usefixtures("documents")
@pytest.mark.parametrize(
"args",
[
pytest.param(["--tags", "--id-range"], id="missing_both_values"),
pytest.param(["--tags", "--id-range", "a", "b"], id="non_integer_values"),
],
)
def test_id_range_invalid_arguments_raise(self, args: list[str]) -> None:
with pytest.raises((CommandError, SystemExit)):
call_command("document_retagger", *args)
# ---------------------------------------------------------------------------
# Edge cases
# ---------------------------------------------------------------------------
@pytest.mark.management
@pytest.mark.django_db
class TestRetaggerEdgeCases(DirectoriesMixin):
@pytest.mark.usefixtures("documents")
def test_no_targets_exits_cleanly(self) -> None:
"""Calling the retagger with no classifier targets should not raise."""
call_command("document_retagger")
@pytest.mark.usefixtures("documents")
def test_inbox_only_skips_non_inbox_documents(self) -> None:
"""--inbox-only must restrict processing to documents with an inbox tag."""
call_command("document_retagger", "--tags", "--inbox-only")
d_first, _, d_unrelated, _ = _get_docs()
assert d_first.tags.count() == 0
assert d_unrelated.tags.count() == 2

View File

@@ -643,9 +643,7 @@ class TestWorkflows(
expected_str = f"Document did not match {w}"
self.assertIn(expected_str, cm.output[0])
expected_str = (
f"Document path {Path(test_file).resolve(strict=False)} does not match"
)
expected_str = f"Document path {test_file} does not match"
self.assertIn(expected_str, cm.output[1])
def test_workflow_no_match_mail_rule(self) -> None:
@@ -1970,36 +1968,6 @@ class TestWorkflows(
doc.refresh_from_db()
self.assertEqual(doc.owner, self.user2)
@mock.patch("documents.tasks.send_websocket_document_updated")
def test_workflow_scheduled_trigger_sends_websocket_update(
self,
mock_send_websocket_document_updated,
) -> None:
trigger = WorkflowTrigger.objects.create(
type=WorkflowTrigger.WorkflowTriggerType.SCHEDULED,
schedule_offset_days=1,
schedule_date_field=WorkflowTrigger.ScheduleDateField.CREATED,
)
action = WorkflowAction.objects.create(assign_owner=self.user2)
workflow = Workflow.objects.create(name="Workflow 1", order=0)
workflow.triggers.add(trigger)
workflow.actions.add(action)
doc = Document.objects.create(
title="sample test",
correspondent=self.c,
original_filename="sample.pdf",
created=timezone.now() - timedelta(days=2),
)
tasks.check_scheduled_workflows()
self.assertEqual(mock_send_websocket_document_updated.call_count, 1)
self.assertEqual(
mock_send_websocket_document_updated.call_args.kwargs["document"].pk,
doc.pk,
)
def test_workflow_scheduled_trigger_added(self) -> None:
"""
GIVEN:

View File

@@ -1,5 +1,4 @@
import json
from typing import Any
from asgiref.sync import async_to_sync
from channels.exceptions import AcceptConnection
@@ -53,10 +52,3 @@ class StatusConsumer(WebsocketConsumer):
self.close()
else:
self.send(json.dumps(event))
def document_updated(self, event: Any) -> None:
if not self._authenticated():
self.close()
else:
if self._can_view(event["data"]):
self.send(json.dumps(event))

View File

@@ -48,20 +48,6 @@ class TestWebSockets(TestCase):
mock_close.assert_called_once()
mock_close.reset_mock()
message = {
"type": "document_updated",
"data": {"document_id": 10, "modified": "2026-02-17T00:00:00Z"},
}
await channel_layer.group_send(
"status_updates",
message,
)
await communicator.receive_nothing()
mock_close.assert_called_once()
mock_close.reset_mock()
message = {"type": "documents_deleted", "data": {"documents": [1, 2, 3]}}
await channel_layer.group_send(
@@ -172,40 +158,6 @@ class TestWebSockets(TestCase):
await communicator.disconnect()
@mock.patch("paperless.consumers.StatusConsumer._can_view")
@mock.patch("paperless.consumers.StatusConsumer._authenticated")
async def test_receive_document_updated(self, _authenticated, _can_view) -> None:
_authenticated.return_value = True
_can_view.return_value = True
communicator = WebsocketCommunicator(application, "/ws/status/")
connected, _ = await communicator.connect()
self.assertTrue(connected)
message = {
"type": "document_updated",
"data": {
"document_id": 10,
"modified": "2026-02-17T00:00:00Z",
"owner_id": 1,
"users_can_view": [1],
"groups_can_view": [],
},
}
channel_layer = get_channel_layer()
assert channel_layer is not None
await channel_layer.group_send(
"status_updates",
message,
)
response = await communicator.receive_json_from()
self.assertEqual(response, message)
await communicator.disconnect()
@mock.patch("channels.layers.InMemoryChannelLayer.group_send")
def test_manager_send_progress(self, mock_group_send) -> None:
with ProgressManager(task_id="test") as manager:
@@ -238,10 +190,7 @@ class TestWebSockets(TestCase):
)
@mock.patch("channels.layers.InMemoryChannelLayer.group_send")
def test_manager_send_documents_deleted(
self,
mock_group_send: mock.MagicMock,
) -> None:
def test_manager_send_documents_deleted(self, mock_group_send) -> None:
with DocumentsStatusManager() as manager:
manager.send_documents_deleted([1, 2, 3])