mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-02-28 06:16:23 +00:00
Compare commits
21 Commits
feature-im
...
feature-li
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
027e7717fc | ||
|
|
4c47b6768e | ||
|
|
ab55d1d4bf | ||
|
|
763d352ec8 | ||
|
|
99e2f9c30c | ||
|
|
1088e28d9a | ||
|
|
29cb7262bc | ||
|
|
4ec58f7e85 | ||
|
|
56eb093819 | ||
|
|
d20bf32ef0 | ||
|
|
ab4cd99e61 | ||
|
|
292c41f961 | ||
|
|
f853f424e0 | ||
|
|
30149c8bd4 | ||
|
|
600614cf5a | ||
|
|
b49200e060 | ||
|
|
a536dd14c5 | ||
|
|
34033029ba | ||
|
|
77f7f437bb | ||
|
|
081bed32d0 | ||
|
|
4032d81cac |
@@ -440,9 +440,6 @@ src/documents/permissions.py:0: error: Item "list[str]" of "Any | list[str] | Qu
|
||||
src/documents/permissions.py:0: error: Item "list[str]" of "Any | list[str] | QuerySet[User, User]" has no attribute "exists" [union-attr]
|
||||
src/documents/permissions.py:0: error: Missing type parameters for generic type "QuerySet" [type-arg]
|
||||
src/documents/permissions.py:0: error: Missing type parameters for generic type "dict" [type-arg]
|
||||
src/documents/plugins/helpers.py:0: error: "Collection[str]" has no attribute "update" [attr-defined]
|
||||
src/documents/plugins/helpers.py:0: error: Argument 1 to "send" of "BaseStatusManager" has incompatible type "dict[str, Collection[str]]"; expected "dict[str, str | int | None]" [arg-type]
|
||||
src/documents/plugins/helpers.py:0: error: Argument 1 to "send" of "BaseStatusManager" has incompatible type "dict[str, Collection[str]]"; expected "dict[str, str | int | None]" [arg-type]
|
||||
src/documents/plugins/helpers.py:0: error: Function is missing a type annotation [no-untyped-def]
|
||||
src/documents/plugins/helpers.py:0: error: Function is missing a type annotation [no-untyped-def]
|
||||
src/documents/plugins/helpers.py:0: error: Skipping analyzing "channels_redis.pubsub": module is installed, but missing library stubs or py.typed marker [import-untyped]
|
||||
@@ -667,7 +664,6 @@ src/documents/signals/handlers.py:0: error: Argument 3 to "validate_move" has in
|
||||
src/documents/signals/handlers.py:0: error: Argument 5 to "_suggestion_printer" has incompatible type "Any | None"; expected "MatchingModel" [arg-type]
|
||||
src/documents/signals/handlers.py:0: error: Argument 5 to "_suggestion_printer" has incompatible type "Any | None"; expected "MatchingModel" [arg-type]
|
||||
src/documents/signals/handlers.py:0: error: Argument 5 to "_suggestion_printer" has incompatible type "Any | None"; expected "MatchingModel" [arg-type]
|
||||
src/documents/signals/handlers.py:0: error: Function is missing a return type annotation [no-untyped-def]
|
||||
src/documents/signals/handlers.py:0: error: Function is missing a type annotation [no-untyped-def]
|
||||
src/documents/signals/handlers.py:0: error: Function is missing a type annotation [no-untyped-def]
|
||||
src/documents/signals/handlers.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def]
|
||||
@@ -1928,6 +1924,7 @@ src/paperless/tests/test_websockets.py:0: error: Item "None" of "BaseChannelLaye
|
||||
src/paperless/tests/test_websockets.py:0: error: Item "None" of "BaseChannelLayer | None" has no attribute "group_send" [union-attr]
|
||||
src/paperless/tests/test_websockets.py:0: error: Item "None" of "BaseChannelLayer | None" has no attribute "group_send" [union-attr]
|
||||
src/paperless/tests/test_websockets.py:0: error: Item "None" of "BaseChannelLayer | None" has no attribute "group_send" [union-attr]
|
||||
src/paperless/tests/test_websockets.py:0: error: Item "None" of "BaseChannelLayer | None" has no attribute "group_send" [union-attr]
|
||||
src/paperless/tests/test_websockets.py:0: error: TypedDict "_WebsocketTestScope" has no key "user" [typeddict-item]
|
||||
src/paperless/tests/test_websockets.py:0: error: TypedDict "_WebsocketTestScope" has no key "user" [typeddict-item]
|
||||
src/paperless/tests/test_websockets.py:0: error: TypedDict "_WebsocketTestScope" has no key "user" [typeddict-item]
|
||||
|
||||
@@ -65,6 +65,7 @@ import { TagService } from 'src/app/services/rest/tag.service'
|
||||
import { UserService } from 'src/app/services/rest/user.service'
|
||||
import { SettingsService } from 'src/app/services/settings.service'
|
||||
import { ToastService } from 'src/app/services/toast.service'
|
||||
import { WebsocketStatusService } from 'src/app/services/websocket-status.service'
|
||||
import { environment } from 'src/environments/environment'
|
||||
import { ConfirmDialogComponent } from '../common/confirm-dialog/confirm-dialog.component'
|
||||
import { PasswordRemovalConfirmDialogComponent } from '../common/confirm-dialog/password-removal-confirm-dialog/password-removal-confirm-dialog.component'
|
||||
@@ -83,9 +84,9 @@ const doc: Document = {
|
||||
storage_path: 31,
|
||||
tags: [41, 42, 43],
|
||||
content: 'text content',
|
||||
added: new Date('May 4, 2014 03:24:00'),
|
||||
created: new Date('May 4, 2014 03:24:00'),
|
||||
modified: new Date('May 4, 2014 03:24:00'),
|
||||
added: new Date('May 4, 2014 03:24:00').toISOString(),
|
||||
created: new Date('May 4, 2014 03:24:00').toISOString(),
|
||||
modified: new Date('May 4, 2014 03:24:00').toISOString(),
|
||||
archive_serial_number: null,
|
||||
original_file_name: 'file.pdf',
|
||||
owner: null,
|
||||
@@ -327,6 +328,29 @@ describe('DocumentDetailComponent', () => {
|
||||
expect(component.activeNavID).toEqual(component.DocumentDetailNavIDs.Notes)
|
||||
})
|
||||
|
||||
it('should switch from preview to details when pdf preview enters the DOM', fakeAsync(() => {
|
||||
component.nav = {
|
||||
activeId: component.DocumentDetailNavIDs.Preview,
|
||||
select: jest.fn(),
|
||||
} as any
|
||||
;(component as any).pdfPreview = {
|
||||
nativeElement: { offsetParent: {} },
|
||||
}
|
||||
|
||||
tick()
|
||||
expect(component.nav.select).toHaveBeenCalledWith(
|
||||
component.DocumentDetailNavIDs.Details
|
||||
)
|
||||
}))
|
||||
|
||||
it('should forward title key up value to titleSubject', () => {
|
||||
const subjectSpy = jest.spyOn(component.titleSubject, 'next')
|
||||
|
||||
component.titleKeyUp({ target: { value: 'Updated title' } })
|
||||
|
||||
expect(subjectSpy).toHaveBeenCalledWith('Updated title')
|
||||
})
|
||||
|
||||
it('should change url on tab switch', () => {
|
||||
initNormally()
|
||||
const navigateSpy = jest.spyOn(router, 'navigate')
|
||||
@@ -524,7 +548,7 @@ describe('DocumentDetailComponent', () => {
|
||||
jest.spyOn(documentService, 'get').mockReturnValue(
|
||||
of({
|
||||
...doc,
|
||||
modified: new Date('2024-01-02T00:00:00Z'),
|
||||
modified: '2024-01-02T00:00:00Z',
|
||||
duplicate_documents: updatedDuplicates,
|
||||
})
|
||||
)
|
||||
@@ -1386,17 +1410,21 @@ describe('DocumentDetailComponent', () => {
|
||||
expect(errorSpy).toHaveBeenCalled()
|
||||
})
|
||||
|
||||
it('should warn when open document does not match doc retrieved from backend on init', () => {
|
||||
it('should show incoming update modal when open local draft is older than backend on init', () => {
|
||||
let openModal: NgbModalRef
|
||||
modalService.activeInstances.subscribe((modals) => (openModal = modals[0]))
|
||||
const modalSpy = jest.spyOn(modalService, 'open')
|
||||
const openDoc = Object.assign({}, doc)
|
||||
const openDoc = Object.assign({}, doc, {
|
||||
__changedFields: ['title'],
|
||||
})
|
||||
// simulate a document being modified elsewhere and db updated
|
||||
doc.modified = new Date()
|
||||
const remoteDoc = Object.assign({}, doc, {
|
||||
modified: new Date(new Date(doc.modified).getTime() + 1000).toISOString(),
|
||||
})
|
||||
jest
|
||||
.spyOn(activatedRoute, 'paramMap', 'get')
|
||||
.mockReturnValue(of(convertToParamMap({ id: 3, section: 'details' })))
|
||||
jest.spyOn(documentService, 'get').mockReturnValueOnce(of(doc))
|
||||
jest.spyOn(documentService, 'get').mockReturnValueOnce(of(remoteDoc))
|
||||
jest.spyOn(openDocumentsService, 'getOpenDocument').mockReturnValue(openDoc)
|
||||
jest.spyOn(customFieldsService, 'listAll').mockReturnValue(
|
||||
of({
|
||||
@@ -1406,11 +1434,185 @@ describe('DocumentDetailComponent', () => {
|
||||
})
|
||||
)
|
||||
fixture.detectChanges() // calls ngOnInit
|
||||
expect(modalSpy).toHaveBeenCalledWith(ConfirmDialogComponent)
|
||||
const closeSpy = jest.spyOn(openModal, 'close')
|
||||
expect(modalSpy).toHaveBeenCalledWith(ConfirmDialogComponent, {
|
||||
backdrop: 'static',
|
||||
})
|
||||
const confirmDialog = openModal.componentInstance as ConfirmDialogComponent
|
||||
confirmDialog.confirmClicked.next(confirmDialog)
|
||||
expect(closeSpy).toHaveBeenCalled()
|
||||
expect(confirmDialog.messageBold).toContain('Document was updated at')
|
||||
})
|
||||
|
||||
it('should react to websocket document updated notifications', () => {
|
||||
initNormally()
|
||||
const updateMessage = {
|
||||
document_id: component.documentId,
|
||||
modified: '2026-02-17T00:00:00Z',
|
||||
owner_id: 1,
|
||||
}
|
||||
const handleSpy = jest
|
||||
.spyOn(component as any, 'handleIncomingDocumentUpdated')
|
||||
.mockImplementation(() => {})
|
||||
const websocketStatusService = TestBed.inject(WebsocketStatusService)
|
||||
|
||||
websocketStatusService.handleDocumentUpdated(updateMessage)
|
||||
|
||||
expect(handleSpy).toHaveBeenCalledWith(updateMessage)
|
||||
})
|
||||
|
||||
it('should queue incoming update while network is active and flush after', () => {
|
||||
initNormally()
|
||||
const loadSpy = jest.spyOn(component as any, 'loadDocument')
|
||||
const toastSpy = jest.spyOn(toastService, 'showInfo')
|
||||
|
||||
component.networkActive = true
|
||||
;(component as any).handleIncomingDocumentUpdated({
|
||||
document_id: component.documentId,
|
||||
modified: '2026-02-17T00:00:00Z',
|
||||
})
|
||||
|
||||
expect(loadSpy).not.toHaveBeenCalled()
|
||||
|
||||
component.networkActive = false
|
||||
;(component as any).flushPendingIncomingUpdate()
|
||||
|
||||
expect(loadSpy).toHaveBeenCalledWith(component.documentId, true)
|
||||
expect(toastSpy).toHaveBeenCalledWith(
|
||||
'Document reloaded with latest changes.'
|
||||
)
|
||||
})
|
||||
|
||||
it('should ignore queued incoming update matching local save modified', () => {
|
||||
initNormally()
|
||||
const loadSpy = jest.spyOn(component as any, 'loadDocument')
|
||||
const toastSpy = jest.spyOn(toastService, 'showInfo')
|
||||
|
||||
component.networkActive = true
|
||||
;(component as any).lastLocalSaveModified = '2026-02-17T00:00:00+00:00'
|
||||
;(component as any).handleIncomingDocumentUpdated({
|
||||
document_id: component.documentId,
|
||||
modified: '2026-02-17T00:00:00+00:00',
|
||||
})
|
||||
|
||||
component.networkActive = false
|
||||
;(component as any).flushPendingIncomingUpdate()
|
||||
|
||||
expect(loadSpy).not.toHaveBeenCalled()
|
||||
expect(toastSpy).not.toHaveBeenCalled()
|
||||
})
|
||||
|
||||
it('should clear pdf source if preview URL is empty', () => {
|
||||
component.pdfSource = { url: '/preview', password: 'secret' } as any
|
||||
component.previewUrl = null
|
||||
;(component as any).updatePdfSource()
|
||||
|
||||
expect(component.pdfSource).toEqual({ url: null, password: undefined })
|
||||
})
|
||||
|
||||
it('should close incoming update modal if one is open', () => {
|
||||
const modalRef = { close: jest.fn() } as unknown as NgbModalRef
|
||||
;(component as any).incomingUpdateModal = modalRef
|
||||
;(component as any).closeIncomingUpdateModal()
|
||||
|
||||
expect(modalRef.close).toHaveBeenCalled()
|
||||
expect((component as any).incomingUpdateModal).toBeNull()
|
||||
})
|
||||
|
||||
it('should reload remote version when incoming update modal is confirmed', async () => {
|
||||
let openModal: NgbModalRef
|
||||
modalService.activeInstances.subscribe((modals) => (openModal = modals[0]))
|
||||
const reloadSpy = jest
|
||||
.spyOn(component as any, 'reloadRemoteVersion')
|
||||
.mockImplementation(() => {})
|
||||
|
||||
;(component as any).showIncomingUpdateModal('2026-02-17T00:00:00Z')
|
||||
|
||||
const dialog = openModal.componentInstance as ConfirmDialogComponent
|
||||
dialog.confirmClicked.next()
|
||||
await openModal.result
|
||||
|
||||
expect(dialog.buttonsEnabled).toBe(false)
|
||||
expect(reloadSpy).toHaveBeenCalled()
|
||||
expect((component as any).incomingUpdateModal).toBeNull()
|
||||
})
|
||||
|
||||
it('should overwrite open document state when loading remote version with force', () => {
|
||||
const openDoc = Object.assign({}, doc, {
|
||||
title: 'Locally edited title',
|
||||
__changedFields: ['title'],
|
||||
})
|
||||
const remoteDoc = Object.assign({}, doc, {
|
||||
title: 'Remote title',
|
||||
modified: '2026-02-17T00:00:00Z',
|
||||
})
|
||||
jest.spyOn(documentService, 'get').mockReturnValue(of(remoteDoc))
|
||||
jest.spyOn(documentService, 'getMetadata').mockReturnValue(
|
||||
of({
|
||||
has_archive_version: false,
|
||||
original_mime_type: 'application/pdf',
|
||||
})
|
||||
)
|
||||
jest.spyOn(documentService, 'getSuggestions').mockReturnValue(
|
||||
of({
|
||||
suggested_tags: [],
|
||||
suggested_document_types: [],
|
||||
suggested_correspondents: [],
|
||||
})
|
||||
)
|
||||
jest.spyOn(openDocumentsService, 'getOpenDocument').mockReturnValue(openDoc)
|
||||
const setDirtySpy = jest.spyOn(openDocumentsService, 'setDirty')
|
||||
const saveSpy = jest.spyOn(openDocumentsService, 'save')
|
||||
|
||||
;(component as any).loadDocument(doc.id, true)
|
||||
|
||||
expect(openDoc.title).toEqual('Remote title')
|
||||
expect(openDoc.__changedFields).toEqual([])
|
||||
expect(setDirtySpy).toHaveBeenCalledWith(openDoc, false)
|
||||
expect(saveSpy).toHaveBeenCalled()
|
||||
})
|
||||
|
||||
it('should ignore incoming update for a different document id', () => {
|
||||
initNormally()
|
||||
const loadSpy = jest.spyOn(component as any, 'loadDocument')
|
||||
|
||||
;(component as any).handleIncomingDocumentUpdated({
|
||||
document_id: component.documentId + 1,
|
||||
modified: '2026-02-17T00:00:00Z',
|
||||
})
|
||||
|
||||
expect(loadSpy).not.toHaveBeenCalled()
|
||||
})
|
||||
|
||||
it('should show incoming update modal when local document has unsaved edits', () => {
|
||||
initNormally()
|
||||
jest.spyOn(openDocumentsService, 'isDirty').mockReturnValue(true)
|
||||
const modalSpy = jest
|
||||
.spyOn(component as any, 'showIncomingUpdateModal')
|
||||
.mockImplementation(() => {})
|
||||
|
||||
;(component as any).handleIncomingDocumentUpdated({
|
||||
document_id: component.documentId,
|
||||
modified: '2026-02-17T00:00:00Z',
|
||||
})
|
||||
|
||||
expect(modalSpy).toHaveBeenCalledWith('2026-02-17T00:00:00Z')
|
||||
})
|
||||
|
||||
it('should reload current document and show toast when reloading remote version', () => {
|
||||
component.documentId = doc.id
|
||||
const closeModalSpy = jest
|
||||
.spyOn(component as any, 'closeIncomingUpdateModal')
|
||||
.mockImplementation(() => {})
|
||||
const loadSpy = jest
|
||||
.spyOn(component as any, 'loadDocument')
|
||||
.mockImplementation(() => {})
|
||||
const notifySpy = jest.spyOn(component.docChangeNotifier, 'next')
|
||||
const toastSpy = jest.spyOn(toastService, 'showInfo')
|
||||
|
||||
;(component as any).reloadRemoteVersion()
|
||||
|
||||
expect(closeModalSpy).toHaveBeenCalled()
|
||||
expect(notifySpy).toHaveBeenCalledWith(doc.id)
|
||||
expect(loadSpy).toHaveBeenCalledWith(doc.id, true)
|
||||
expect(toastSpy).toHaveBeenCalledWith('Document reloaded.')
|
||||
})
|
||||
|
||||
it('should change preview element by render type', () => {
|
||||
@@ -1721,6 +1923,14 @@ describe('DocumentDetailComponent', () => {
|
||||
expect(component.createDisabled(DataType.Tag)).toBeFalsy()
|
||||
})
|
||||
|
||||
it('should expose add permission via userCanAdd getter', () => {
|
||||
currentUserCan = true
|
||||
expect(component.userCanAdd).toBeTruthy()
|
||||
|
||||
currentUserCan = false
|
||||
expect(component.userCanAdd).toBeFalsy()
|
||||
})
|
||||
|
||||
it('should call tryRenderTiff when no archive and file is tiff', () => {
|
||||
initNormally()
|
||||
const tiffRenderSpy = jest.spyOn(
|
||||
|
||||
@@ -13,6 +13,7 @@ import {
|
||||
NgbDateStruct,
|
||||
NgbDropdownModule,
|
||||
NgbModal,
|
||||
NgbModalRef,
|
||||
NgbNav,
|
||||
NgbNavChangeEvent,
|
||||
NgbNavModule,
|
||||
@@ -80,6 +81,7 @@ import { TagService } from 'src/app/services/rest/tag.service'
|
||||
import { UserService } from 'src/app/services/rest/user.service'
|
||||
import { SettingsService } from 'src/app/services/settings.service'
|
||||
import { ToastService } from 'src/app/services/toast.service'
|
||||
import { WebsocketStatusService } from 'src/app/services/websocket-status.service'
|
||||
import { getFilenameFromContentDisposition } from 'src/app/utils/http'
|
||||
import { ISODateAdapter } from 'src/app/utils/ngb-iso-date-adapter'
|
||||
import * as UTIF from 'utif'
|
||||
@@ -143,6 +145,11 @@ enum ContentRenderType {
|
||||
TIFF = 'tiff',
|
||||
}
|
||||
|
||||
interface IncomingDocumentUpdate {
|
||||
document_id: number
|
||||
modified: string
|
||||
}
|
||||
|
||||
@Component({
|
||||
selector: 'pngx-document-detail',
|
||||
templateUrl: './document-detail.component.html',
|
||||
@@ -208,6 +215,7 @@ export class DocumentDetailComponent
|
||||
private componentRouterService = inject(ComponentRouterService)
|
||||
private deviceDetectorService = inject(DeviceDetectorService)
|
||||
private savedViewService = inject(SavedViewService)
|
||||
private readonly websocketStatusService = inject(WebsocketStatusService)
|
||||
|
||||
@ViewChild('inputTitle')
|
||||
titleInput: TextComponent
|
||||
@@ -267,6 +275,9 @@ export class DocumentDetailComponent
|
||||
isDirty$: Observable<boolean>
|
||||
unsubscribeNotifier: Subject<any> = new Subject()
|
||||
docChangeNotifier: Subject<any> = new Subject()
|
||||
private incomingUpdateModal: NgbModalRef
|
||||
private pendingIncomingUpdate: IncomingDocumentUpdate
|
||||
private lastLocalSaveModified: string | null = null
|
||||
|
||||
requiresPassword: boolean = false
|
||||
password: string
|
||||
@@ -475,9 +486,12 @@ export class DocumentDetailComponent
|
||||
)
|
||||
}
|
||||
|
||||
private loadDocument(documentId: number): void {
|
||||
private loadDocument(documentId: number, forceRemote: boolean = false): void {
|
||||
let redirectedToRoot = false
|
||||
this.closeIncomingUpdateModal()
|
||||
this.pendingIncomingUpdate = null
|
||||
this.selectedVersionId = documentId
|
||||
this.lastLocalSaveModified = null
|
||||
this.previewUrl = this.documentsService.getPreviewUrl(
|
||||
this.selectedVersionId
|
||||
)
|
||||
@@ -545,21 +559,25 @@ export class DocumentDetailComponent
|
||||
openDocument.duplicate_documents = doc.duplicate_documents
|
||||
this.openDocumentService.save()
|
||||
}
|
||||
const useDoc = openDocument || doc
|
||||
if (openDocument) {
|
||||
if (
|
||||
new Date(doc.modified) > new Date(openDocument.modified) &&
|
||||
!this.modalService.hasOpenModals()
|
||||
) {
|
||||
const modal = this.modalService.open(ConfirmDialogComponent)
|
||||
modal.componentInstance.title = $localize`Document changes detected`
|
||||
modal.componentInstance.messageBold = $localize`The version of this document in your browser session appears older than the existing version.`
|
||||
modal.componentInstance.message = $localize`Saving the document here may overwrite other changes that were made. To restore the existing version, discard your changes or close the document.`
|
||||
modal.componentInstance.cancelBtnClass = 'visually-hidden'
|
||||
modal.componentInstance.btnCaption = $localize`Ok`
|
||||
modal.componentInstance.confirmClicked.subscribe(() =>
|
||||
modal.close()
|
||||
)
|
||||
let useDoc = openDocument || doc
|
||||
if (openDocument && forceRemote) {
|
||||
Object.assign(openDocument, doc)
|
||||
openDocument.__changedFields = []
|
||||
this.openDocumentService.setDirty(openDocument, false)
|
||||
this.openDocumentService.save()
|
||||
useDoc = openDocument
|
||||
} else if (openDocument) {
|
||||
if (new Date(doc.modified) > new Date(openDocument.modified)) {
|
||||
if (this.hasLocalEdits(openDocument)) {
|
||||
this.showIncomingUpdateModal(doc.modified)
|
||||
} else {
|
||||
// No local edits to preserve, so keep the tab in sync automatically.
|
||||
Object.assign(openDocument, doc)
|
||||
openDocument.__changedFields = []
|
||||
this.openDocumentService.setDirty(openDocument, false)
|
||||
this.openDocumentService.save()
|
||||
useDoc = openDocument
|
||||
}
|
||||
}
|
||||
} else {
|
||||
this.openDocumentService
|
||||
@@ -590,6 +608,98 @@ export class DocumentDetailComponent
|
||||
})
|
||||
}
|
||||
|
||||
private hasLocalEdits(doc: Document): boolean {
|
||||
return (
|
||||
this.openDocumentService.isDirty(doc) || !!doc.__changedFields?.length
|
||||
)
|
||||
}
|
||||
|
||||
private showIncomingUpdateModal(modified: string): void {
|
||||
if (this.incomingUpdateModal) return
|
||||
|
||||
const modal = this.modalService.open(ConfirmDialogComponent, {
|
||||
backdrop: 'static',
|
||||
})
|
||||
this.incomingUpdateModal = modal
|
||||
|
||||
let formattedModified = null
|
||||
const parsed = new Date(modified)
|
||||
formattedModified = parsed.toLocaleString()
|
||||
|
||||
modal.componentInstance.title = $localize`Document was updated`
|
||||
modal.componentInstance.messageBold = $localize`Document was updated at ${formattedModified}.`
|
||||
modal.componentInstance.message = $localize`Reload to discard your local unsaved edits and load the latest remote version.`
|
||||
modal.componentInstance.btnClass = 'btn-warning'
|
||||
modal.componentInstance.btnCaption = $localize`Reload`
|
||||
modal.componentInstance.cancelBtnCaption = $localize`Dismiss`
|
||||
|
||||
modal.componentInstance.confirmClicked.pipe(first()).subscribe(() => {
|
||||
modal.componentInstance.buttonsEnabled = false
|
||||
modal.close()
|
||||
this.reloadRemoteVersion()
|
||||
})
|
||||
modal.result.finally(() => {
|
||||
this.incomingUpdateModal = null
|
||||
})
|
||||
}
|
||||
|
||||
private closeIncomingUpdateModal() {
|
||||
if (!this.incomingUpdateModal) return
|
||||
this.incomingUpdateModal.close()
|
||||
this.incomingUpdateModal = null
|
||||
}
|
||||
|
||||
private flushPendingIncomingUpdate() {
|
||||
if (!this.pendingIncomingUpdate || this.networkActive) return
|
||||
const pendingUpdate = this.pendingIncomingUpdate
|
||||
this.pendingIncomingUpdate = null
|
||||
this.handleIncomingDocumentUpdated(pendingUpdate)
|
||||
}
|
||||
|
||||
private handleIncomingDocumentUpdated(data: IncomingDocumentUpdate): void {
|
||||
if (
|
||||
!this.documentId ||
|
||||
!this.document ||
|
||||
data.document_id !== this.documentId
|
||||
)
|
||||
return
|
||||
if (this.networkActive) {
|
||||
this.pendingIncomingUpdate = data
|
||||
return
|
||||
}
|
||||
// If modified timestamp of the incoming update is the same as the last local save,
|
||||
// we assume this update is from our own save and dont notify
|
||||
const incomingModified = data.modified
|
||||
if (
|
||||
incomingModified &&
|
||||
this.lastLocalSaveModified &&
|
||||
incomingModified === this.lastLocalSaveModified
|
||||
) {
|
||||
this.lastLocalSaveModified = null
|
||||
return
|
||||
}
|
||||
this.lastLocalSaveModified = null
|
||||
|
||||
if (this.openDocumentService.isDirty(this.document)) {
|
||||
this.showIncomingUpdateModal(data.modified)
|
||||
} else {
|
||||
this.docChangeNotifier.next(this.documentId)
|
||||
this.loadDocument(this.documentId, true)
|
||||
this.toastService.showInfo(
|
||||
$localize`Document reloaded with latest changes.`
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
private reloadRemoteVersion() {
|
||||
if (!this.documentId) return
|
||||
|
||||
this.closeIncomingUpdateModal()
|
||||
this.docChangeNotifier.next(this.documentId)
|
||||
this.loadDocument(this.documentId, true)
|
||||
this.toastService.showInfo($localize`Document reloaded.`)
|
||||
}
|
||||
|
||||
ngOnInit(): void {
|
||||
this.setZoom(
|
||||
this.settings.get(SETTINGS_KEYS.PDF_VIEWER_ZOOM_SETTING) as PdfZoomScale
|
||||
@@ -648,6 +758,11 @@ export class DocumentDetailComponent
|
||||
|
||||
this.getCustomFields()
|
||||
|
||||
this.websocketStatusService
|
||||
.onDocumentUpdated()
|
||||
.pipe(takeUntil(this.unsubscribeNotifier))
|
||||
.subscribe((data) => this.handleIncomingDocumentUpdated(data))
|
||||
|
||||
this.route.paramMap
|
||||
.pipe(
|
||||
filter(
|
||||
@@ -1033,6 +1148,7 @@ export class DocumentDetailComponent
|
||||
)
|
||||
.subscribe({
|
||||
next: (doc) => {
|
||||
this.closeIncomingUpdateModal()
|
||||
Object.assign(this.document, doc)
|
||||
doc['permissions_form'] = {
|
||||
owner: doc.owner,
|
||||
@@ -1079,6 +1195,8 @@ export class DocumentDetailComponent
|
||||
.pipe(first())
|
||||
.subscribe({
|
||||
next: (docValues) => {
|
||||
this.closeIncomingUpdateModal()
|
||||
this.lastLocalSaveModified = docValues.modified ?? null
|
||||
// in case data changed while saving eg removing inbox_tags
|
||||
this.documentForm.patchValue(docValues)
|
||||
const newValues = Object.assign({}, this.documentForm.value)
|
||||
@@ -1093,16 +1211,19 @@ export class DocumentDetailComponent
|
||||
this.networkActive = false
|
||||
this.error = null
|
||||
if (close) {
|
||||
this.pendingIncomingUpdate = null
|
||||
this.close(() =>
|
||||
this.openDocumentService.refreshDocument(this.documentId)
|
||||
)
|
||||
} else {
|
||||
this.openDocumentService.refreshDocument(this.documentId)
|
||||
this.flushPendingIncomingUpdate()
|
||||
}
|
||||
this.savedViewService.maybeRefreshDocumentCounts()
|
||||
},
|
||||
error: (error) => {
|
||||
this.networkActive = false
|
||||
this.lastLocalSaveModified = null
|
||||
const canEdit =
|
||||
this.permissionsService.currentUserHasObjectPermissions(
|
||||
PermissionAction.Change,
|
||||
@@ -1122,6 +1243,7 @@ export class DocumentDetailComponent
|
||||
error
|
||||
)
|
||||
}
|
||||
this.flushPendingIncomingUpdate()
|
||||
},
|
||||
})
|
||||
}
|
||||
@@ -1158,8 +1280,11 @@ export class DocumentDetailComponent
|
||||
.pipe(first())
|
||||
.subscribe({
|
||||
next: ({ updateResult, nextDocId, closeResult }) => {
|
||||
this.closeIncomingUpdateModal()
|
||||
this.error = null
|
||||
this.networkActive = false
|
||||
this.pendingIncomingUpdate = null
|
||||
this.lastLocalSaveModified = null
|
||||
if (closeResult && updateResult && nextDocId) {
|
||||
this.router.navigate(['documents', nextDocId])
|
||||
this.titleInput?.focus()
|
||||
@@ -1167,8 +1292,10 @@ export class DocumentDetailComponent
|
||||
},
|
||||
error: (error) => {
|
||||
this.networkActive = false
|
||||
this.lastLocalSaveModified = null
|
||||
this.error = error.error
|
||||
this.toastService.showError($localize`Error saving document`, error)
|
||||
this.flushPendingIncomingUpdate()
|
||||
},
|
||||
})
|
||||
}
|
||||
@@ -1254,7 +1381,7 @@ export class DocumentDetailComponent
|
||||
.subscribe({
|
||||
next: () => {
|
||||
this.toastService.showInfo(
|
||||
$localize`Reprocess operation for "${this.document.title}" will begin in the background. Close and re-open or reload this document after the operation has completed to see new content.`
|
||||
$localize`Reprocess operation for "${this.document.title}" will begin in the background.`
|
||||
)
|
||||
if (modal) {
|
||||
modal.close()
|
||||
|
||||
@@ -128,15 +128,15 @@ export interface Document extends ObjectWithPermissions {
|
||||
checksum?: string
|
||||
|
||||
// UTC
|
||||
created?: Date
|
||||
created?: string // ISO string
|
||||
|
||||
modified?: Date
|
||||
modified?: string // ISO string
|
||||
|
||||
added?: Date
|
||||
added?: string // ISO string
|
||||
|
||||
mime_type?: string
|
||||
|
||||
deleted_at?: Date
|
||||
deleted_at?: string // ISO string
|
||||
|
||||
original_file_name?: string
|
||||
|
||||
|
||||
@@ -0,0 +1,7 @@
|
||||
export interface WebsocketDocumentUpdatedMessage {
|
||||
document_id: number
|
||||
modified: string
|
||||
owner_id?: number
|
||||
users_can_view?: number[]
|
||||
groups_can_view?: number[]
|
||||
}
|
||||
@@ -416,4 +416,42 @@ describe('ConsumerStatusService', () => {
|
||||
websocketStatusService.disconnect()
|
||||
expect(deleted).toBeTruthy()
|
||||
})
|
||||
|
||||
it('should trigger updated subject on document updated', () => {
|
||||
let updated = false
|
||||
websocketStatusService.onDocumentUpdated().subscribe((data) => {
|
||||
updated = true
|
||||
expect(data.document_id).toEqual(12)
|
||||
})
|
||||
|
||||
websocketStatusService.connect()
|
||||
server.send({
|
||||
type: WebsocketStatusType.DOCUMENT_UPDATED,
|
||||
data: {
|
||||
document_id: 12,
|
||||
modified: '2026-02-17T00:00:00Z',
|
||||
owner_id: 1,
|
||||
},
|
||||
})
|
||||
|
||||
websocketStatusService.disconnect()
|
||||
expect(updated).toBeTruthy()
|
||||
})
|
||||
|
||||
it('should ignore document updated events the user cannot view', () => {
|
||||
let updated = false
|
||||
websocketStatusService.onDocumentUpdated().subscribe(() => {
|
||||
updated = true
|
||||
})
|
||||
|
||||
websocketStatusService.handleDocumentUpdated({
|
||||
document_id: 12,
|
||||
modified: '2026-02-17T00:00:00Z',
|
||||
owner_id: 2,
|
||||
users_can_view: [],
|
||||
groups_can_view: [],
|
||||
})
|
||||
|
||||
expect(updated).toBeFalsy()
|
||||
})
|
||||
})
|
||||
|
||||
@@ -2,6 +2,7 @@ import { Injectable, inject } from '@angular/core'
|
||||
import { Subject } from 'rxjs'
|
||||
import { environment } from 'src/environments/environment'
|
||||
import { User } from '../data/user'
|
||||
import { WebsocketDocumentUpdatedMessage } from '../data/websocket-document-updated-message'
|
||||
import { WebsocketDocumentsDeletedMessage } from '../data/websocket-documents-deleted-message'
|
||||
import { WebsocketProgressMessage } from '../data/websocket-progress-message'
|
||||
import { SettingsService } from './settings.service'
|
||||
@@ -9,6 +10,7 @@ import { SettingsService } from './settings.service'
|
||||
export enum WebsocketStatusType {
|
||||
STATUS_UPDATE = 'status_update',
|
||||
DOCUMENTS_DELETED = 'documents_deleted',
|
||||
DOCUMENT_UPDATED = 'document_updated',
|
||||
}
|
||||
|
||||
// see ProgressStatusOptions in src/documents/plugins/helpers.py
|
||||
@@ -100,17 +102,20 @@ export enum UploadState {
|
||||
providedIn: 'root',
|
||||
})
|
||||
export class WebsocketStatusService {
|
||||
private settingsService = inject(SettingsService)
|
||||
private readonly settingsService = inject(SettingsService)
|
||||
|
||||
private statusWebSocket: WebSocket
|
||||
|
||||
private consumerStatus: FileStatus[] = []
|
||||
|
||||
private documentDetectedSubject = new Subject<FileStatus>()
|
||||
private documentConsumptionFinishedSubject = new Subject<FileStatus>()
|
||||
private documentConsumptionFailedSubject = new Subject<FileStatus>()
|
||||
private documentDeletedSubject = new Subject<boolean>()
|
||||
private connectionStatusSubject = new Subject<boolean>()
|
||||
private readonly documentDetectedSubject = new Subject<FileStatus>()
|
||||
private readonly documentConsumptionFinishedSubject =
|
||||
new Subject<FileStatus>()
|
||||
private readonly documentConsumptionFailedSubject = new Subject<FileStatus>()
|
||||
private readonly documentDeletedSubject = new Subject<boolean>()
|
||||
private readonly documentUpdatedSubject =
|
||||
new Subject<WebsocketDocumentUpdatedMessage>()
|
||||
private readonly connectionStatusSubject = new Subject<boolean>()
|
||||
|
||||
private get(taskId: string, filename?: string) {
|
||||
let status =
|
||||
@@ -176,7 +181,10 @@ export class WebsocketStatusService {
|
||||
data: messageData,
|
||||
}: {
|
||||
type: WebsocketStatusType
|
||||
data: WebsocketProgressMessage | WebsocketDocumentsDeletedMessage
|
||||
data:
|
||||
| WebsocketProgressMessage
|
||||
| WebsocketDocumentsDeletedMessage
|
||||
| WebsocketDocumentUpdatedMessage
|
||||
} = JSON.parse(ev.data)
|
||||
|
||||
switch (type) {
|
||||
@@ -184,6 +192,12 @@ export class WebsocketStatusService {
|
||||
this.documentDeletedSubject.next(true)
|
||||
break
|
||||
|
||||
case WebsocketStatusType.DOCUMENT_UPDATED:
|
||||
this.handleDocumentUpdated(
|
||||
messageData as WebsocketDocumentUpdatedMessage
|
||||
)
|
||||
break
|
||||
|
||||
case WebsocketStatusType.STATUS_UPDATE:
|
||||
this.handleProgressUpdate(messageData as WebsocketProgressMessage)
|
||||
break
|
||||
@@ -191,7 +205,11 @@ export class WebsocketStatusService {
|
||||
}
|
||||
}
|
||||
|
||||
private canViewMessage(messageData: WebsocketProgressMessage): boolean {
|
||||
private canViewMessage(messageData: {
|
||||
owner_id?: number
|
||||
users_can_view?: number[]
|
||||
groups_can_view?: number[]
|
||||
}): boolean {
|
||||
// see paperless.consumers.StatusConsumer._can_view
|
||||
const user: User = this.settingsService.currentUser
|
||||
return (
|
||||
@@ -251,6 +269,15 @@ export class WebsocketStatusService {
|
||||
}
|
||||
}
|
||||
|
||||
handleDocumentUpdated(messageData: WebsocketDocumentUpdatedMessage) {
|
||||
// fallback if backend didn't restrict message
|
||||
if (!this.canViewMessage(messageData)) {
|
||||
return
|
||||
}
|
||||
|
||||
this.documentUpdatedSubject.next(messageData)
|
||||
}
|
||||
|
||||
fail(status: FileStatus, message: string) {
|
||||
status.message = message
|
||||
status.phase = FileStatusPhase.FAILED
|
||||
@@ -304,6 +331,10 @@ export class WebsocketStatusService {
|
||||
return this.documentDeletedSubject
|
||||
}
|
||||
|
||||
onDocumentUpdated() {
|
||||
return this.documentUpdatedSubject
|
||||
}
|
||||
|
||||
onConnectionStatus() {
|
||||
return this.connectionStatusSubject.asObservable()
|
||||
}
|
||||
|
||||
11
src/conftest.py
Normal file
11
src/conftest.py
Normal file
@@ -0,0 +1,11 @@
|
||||
import pytest
|
||||
from pytest_django.fixtures import SettingsWrapper
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def in_memory_channel_layers(settings: SettingsWrapper) -> None:
|
||||
settings.CHANNEL_LAYERS = {
|
||||
"default": {
|
||||
"BACKEND": "channels.layers.InMemoryChannelLayer",
|
||||
},
|
||||
}
|
||||
@@ -15,6 +15,7 @@ class DocumentsConfig(AppConfig):
|
||||
from documents.signals.handlers import add_to_index
|
||||
from documents.signals.handlers import run_workflows_added
|
||||
from documents.signals.handlers import run_workflows_updated
|
||||
from documents.signals.handlers import send_websocket_document_updated
|
||||
from documents.signals.handlers import set_correspondent
|
||||
from documents.signals.handlers import set_document_type
|
||||
from documents.signals.handlers import set_storage_path
|
||||
@@ -29,6 +30,7 @@ class DocumentsConfig(AppConfig):
|
||||
document_consumption_finished.connect(run_workflows_added)
|
||||
document_consumption_finished.connect(add_or_update_document_in_llm_index)
|
||||
document_updated.connect(run_workflows_updated)
|
||||
document_updated.connect(send_websocket_document_updated)
|
||||
|
||||
import documents.schema # noqa: F401
|
||||
|
||||
|
||||
@@ -7,7 +7,6 @@ Provides automatic progress bar and multiprocessing support with minimal boilerp
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from collections.abc import Callable
|
||||
from collections.abc import Iterable
|
||||
from collections.abc import Sized
|
||||
from concurrent.futures import ProcessPoolExecutor
|
||||
@@ -24,9 +23,6 @@ from django.core.management import CommandError
|
||||
from django.db.models import QuerySet
|
||||
from django_rich.management import RichCommand
|
||||
from rich.console import Console
|
||||
from rich.console import Group
|
||||
from rich.console import RenderableType
|
||||
from rich.live import Live
|
||||
from rich.progress import BarColumn
|
||||
from rich.progress import MofNCompleteColumn
|
||||
from rich.progress import Progress
|
||||
@@ -36,7 +32,9 @@ from rich.progress import TimeElapsedColumn
|
||||
from rich.progress import TimeRemainingColumn
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Callable
|
||||
from collections.abc import Generator
|
||||
from collections.abc import Iterable
|
||||
from collections.abc import Sequence
|
||||
|
||||
from django.core.management import CommandParser
|
||||
@@ -93,23 +91,6 @@ class PaperlessCommand(RichCommand):
|
||||
for result in self.process_parallel(process_doc, ids):
|
||||
if result.error:
|
||||
self.console.print(f"[red]Failed: {result.error}[/red]")
|
||||
|
||||
class Command(PaperlessCommand):
|
||||
help = "Import documents with live stats"
|
||||
|
||||
def handle(self, *args, **options):
|
||||
stats = ImportStats()
|
||||
|
||||
def render_stats() -> Table:
|
||||
... # build Rich Table from stats
|
||||
|
||||
for item in self.track_with_stats(
|
||||
items,
|
||||
description="Importing...",
|
||||
stats_renderer=render_stats,
|
||||
):
|
||||
result = import_item(item)
|
||||
stats.imported += 1
|
||||
"""
|
||||
|
||||
supports_progress_bar: ClassVar[bool] = True
|
||||
@@ -147,11 +128,13 @@ class PaperlessCommand(RichCommand):
|
||||
This is called by Django's command infrastructure after argument parsing
|
||||
but before handle(). We use it to set instance attributes from options.
|
||||
"""
|
||||
# Set progress bar state
|
||||
if self.supports_progress_bar:
|
||||
self.no_progress_bar = options.get("no_progress_bar", False)
|
||||
else:
|
||||
self.no_progress_bar = True
|
||||
|
||||
# Set multiprocessing state
|
||||
if self.supports_multiprocessing:
|
||||
self.process_count = options.get("processes", 1)
|
||||
if self.process_count < 1:
|
||||
@@ -161,29 +144,9 @@ class PaperlessCommand(RichCommand):
|
||||
|
||||
return super().execute(*args, **options)
|
||||
|
||||
@staticmethod
|
||||
def _progress_columns() -> tuple[Any, ...]:
|
||||
"""
|
||||
Return the standard set of progress bar columns.
|
||||
|
||||
Extracted so both _create_progress (standalone) and track_with_stats
|
||||
(inside Live) use identical column configuration without duplication.
|
||||
"""
|
||||
return (
|
||||
SpinnerColumn(),
|
||||
TextColumn("[progress.description]{task.description}"),
|
||||
BarColumn(),
|
||||
MofNCompleteColumn(),
|
||||
TimeElapsedColumn(),
|
||||
TimeRemainingColumn(),
|
||||
)
|
||||
|
||||
def _create_progress(self, description: str) -> Progress:
|
||||
"""
|
||||
Create a standalone Progress instance with its own stderr Console.
|
||||
|
||||
Use this for track(). For track_with_stats(), Progress is created
|
||||
directly inside a Live context instead.
|
||||
Create a configured Progress instance.
|
||||
|
||||
Progress output is directed to stderr to match the convention that
|
||||
progress bars are transient UI feedback, not command output. This
|
||||
@@ -198,7 +161,12 @@ class PaperlessCommand(RichCommand):
|
||||
A Progress instance configured with appropriate columns.
|
||||
"""
|
||||
return Progress(
|
||||
*self._progress_columns(),
|
||||
SpinnerColumn(),
|
||||
TextColumn("[progress.description]{task.description}"),
|
||||
BarColumn(),
|
||||
MofNCompleteColumn(),
|
||||
TimeElapsedColumn(),
|
||||
TimeRemainingColumn(),
|
||||
console=Console(stderr=True),
|
||||
transient=False,
|
||||
)
|
||||
@@ -254,6 +222,7 @@ class PaperlessCommand(RichCommand):
|
||||
yield from iterable
|
||||
return
|
||||
|
||||
# Attempt to determine total if not provided
|
||||
if total is None:
|
||||
total = self._get_iterable_length(iterable)
|
||||
|
||||
@@ -263,87 +232,6 @@ class PaperlessCommand(RichCommand):
|
||||
yield item
|
||||
progress.advance(task_id)
|
||||
|
||||
def track_with_stats(
|
||||
self,
|
||||
iterable: Iterable[T],
|
||||
*,
|
||||
description: str = "Processing...",
|
||||
stats_renderer: Callable[[], RenderableType],
|
||||
total: int | None = None,
|
||||
) -> Generator[T, None, None]:
|
||||
"""
|
||||
Iterate over items with a progress bar and a live-updating stats display.
|
||||
|
||||
The progress bar and stats renderable are combined in a single Live
|
||||
context, so the stats panel re-renders in place below the progress bar
|
||||
after each item is processed.
|
||||
|
||||
Respects --no-progress-bar flag. When disabled, yields items without
|
||||
any display (stats are still updated by the caller's loop body, so
|
||||
they will be accurate for any post-loop summary the caller prints).
|
||||
|
||||
Args:
|
||||
iterable: The items to iterate over.
|
||||
description: Text to display alongside the progress bar.
|
||||
stats_renderer: Zero-argument callable that returns a Rich
|
||||
renderable. Called after each item to refresh the display.
|
||||
The caller typically closes over a mutable dataclass and
|
||||
rebuilds a Table from it on each call.
|
||||
total: Total number of items. If None, attempts to determine
|
||||
automatically via .count() (for querysets) or len().
|
||||
|
||||
Yields:
|
||||
Items from the iterable.
|
||||
|
||||
Example:
|
||||
@dataclass
|
||||
class Stats:
|
||||
processed: int = 0
|
||||
failed: int = 0
|
||||
|
||||
stats = Stats()
|
||||
|
||||
def render_stats() -> Table:
|
||||
table = Table(box=None)
|
||||
table.add_column("Processed")
|
||||
table.add_column("Failed")
|
||||
table.add_row(str(stats.processed), str(stats.failed))
|
||||
return table
|
||||
|
||||
for item in self.track_with_stats(
|
||||
items,
|
||||
description="Importing...",
|
||||
stats_renderer=render_stats,
|
||||
):
|
||||
try:
|
||||
import_item(item)
|
||||
stats.processed += 1
|
||||
except Exception:
|
||||
stats.failed += 1
|
||||
"""
|
||||
if self.no_progress_bar:
|
||||
yield from iterable
|
||||
return
|
||||
|
||||
if total is None:
|
||||
total = self._get_iterable_length(iterable)
|
||||
|
||||
stderr_console = Console(stderr=True)
|
||||
|
||||
# Progress is created without its own console so Live controls rendering.
|
||||
progress = Progress(*self._progress_columns())
|
||||
task_id = progress.add_task(description, total=total)
|
||||
|
||||
with Live(
|
||||
Group(progress, stats_renderer()),
|
||||
console=stderr_console,
|
||||
refresh_per_second=4,
|
||||
) as live:
|
||||
for item in iterable:
|
||||
yield item
|
||||
progress.advance(task_id)
|
||||
live.update(Group(progress, stats_renderer()))
|
||||
|
||||
def process_parallel(
|
||||
self,
|
||||
fn: Callable[[T], R],
|
||||
@@ -381,8 +269,10 @@ class PaperlessCommand(RichCommand):
|
||||
total = len(items)
|
||||
|
||||
if self.process_count == 1:
|
||||
# Sequential execution in main process - critical for testing
|
||||
yield from self._process_sequential(fn, items, description, total)
|
||||
else:
|
||||
# Parallel execution with ProcessPoolExecutor
|
||||
yield from self._process_parallel(fn, items, description, total)
|
||||
|
||||
def _process_sequential(
|
||||
@@ -408,14 +298,17 @@ class PaperlessCommand(RichCommand):
|
||||
total: int,
|
||||
) -> Generator[ProcessResult[T, R], None, None]:
|
||||
"""Process items in parallel using ProcessPoolExecutor."""
|
||||
# Close database connections before forking - required for PostgreSQL
|
||||
db.connections.close_all()
|
||||
|
||||
with self._create_progress(description) as progress:
|
||||
task_id = progress.add_task(description, total=total)
|
||||
|
||||
with ProcessPoolExecutor(max_workers=self.process_count) as executor:
|
||||
# Submit all tasks and map futures back to items
|
||||
future_to_item = {executor.submit(fn, item): item for item in items}
|
||||
|
||||
# Yield results as they complete
|
||||
for future in as_completed(future_to_item):
|
||||
item = future_to_item[future]
|
||||
try:
|
||||
|
||||
@@ -1,12 +1,4 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass
|
||||
from dataclasses import field
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from rich.table import Table
|
||||
from rich.text import Text
|
||||
|
||||
from documents.classifier import load_classifier
|
||||
from documents.management.commands.base import PaperlessCommand
|
||||
@@ -16,162 +8,9 @@ from documents.signals.handlers import set_document_type
|
||||
from documents.signals.handlers import set_storage_path
|
||||
from documents.signals.handlers import set_tags
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from rich.console import RenderableType
|
||||
|
||||
from documents.models import Correspondent
|
||||
from documents.models import DocumentType
|
||||
from documents.models import StoragePath
|
||||
from documents.models import Tag
|
||||
|
||||
logger = logging.getLogger("paperless.management.retagger")
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class RetaggerStats:
|
||||
"""Cumulative counters updated as the retagger processes documents.
|
||||
|
||||
Mutable by design -- fields are incremented in the processing loop.
|
||||
slots=True reduces per-instance memory overhead and speeds attribute access.
|
||||
"""
|
||||
|
||||
correspondents: int = 0
|
||||
document_types: int = 0
|
||||
tags_added: int = 0
|
||||
tags_removed: int = 0
|
||||
storage_paths: int = 0
|
||||
documents_processed: int = 0
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class DocumentSuggestion:
|
||||
"""Buffered classifier suggestions for a single document (suggest mode only).
|
||||
|
||||
Mutable by design -- fields are assigned incrementally as each setter runs.
|
||||
"""
|
||||
|
||||
document: Document
|
||||
correspondent: Correspondent | None = None
|
||||
document_type: DocumentType | None = None
|
||||
tags_to_add: frozenset[Tag] = field(default_factory=frozenset)
|
||||
tags_to_remove: frozenset[Tag] = field(default_factory=frozenset)
|
||||
storage_path: StoragePath | None = None
|
||||
|
||||
@property
|
||||
def has_suggestions(self) -> bool:
|
||||
return bool(
|
||||
self.correspondent is not None
|
||||
or self.document_type is not None
|
||||
or self.tags_to_add
|
||||
or self.tags_to_remove
|
||||
or self.storage_path is not None,
|
||||
)
|
||||
|
||||
|
||||
def _build_stats_table(stats: RetaggerStats, *, suggest: bool) -> Table:
|
||||
"""
|
||||
Build the live-updating stats table shown below the progress bar.
|
||||
|
||||
In suggest mode the labels read "would set / would add" to make clear
|
||||
that nothing has been written to the database.
|
||||
"""
|
||||
table = Table(box=None, padding=(0, 2), show_header=True, header_style="bold")
|
||||
|
||||
table.add_column("Documents")
|
||||
table.add_column("Correspondents")
|
||||
table.add_column("Doc Types")
|
||||
table.add_column("Tags (+)")
|
||||
table.add_column("Tags (-)")
|
||||
table.add_column("Storage Paths")
|
||||
|
||||
verb = "would set" if suggest else "set"
|
||||
|
||||
table.add_row(
|
||||
str(stats.documents_processed),
|
||||
f"{stats.correspondents} {verb}",
|
||||
f"{stats.document_types} {verb}",
|
||||
f"+{stats.tags_added}",
|
||||
f"-{stats.tags_removed}",
|
||||
f"{stats.storage_paths} {verb}",
|
||||
)
|
||||
|
||||
return table
|
||||
|
||||
|
||||
def _build_suggestion_table(
|
||||
suggestions: list[DocumentSuggestion],
|
||||
base_url: str | None,
|
||||
) -> Table:
|
||||
"""
|
||||
Build the final suggestion table printed after the progress bar completes.
|
||||
|
||||
Only documents with at least one suggestion are included.
|
||||
"""
|
||||
table = Table(
|
||||
title="Suggested Changes",
|
||||
show_header=True,
|
||||
header_style="bold cyan",
|
||||
show_lines=True,
|
||||
)
|
||||
|
||||
table.add_column("Document", style="bold", no_wrap=False, min_width=20)
|
||||
table.add_column("Correspondent")
|
||||
table.add_column("Doc Type")
|
||||
table.add_column("Tags")
|
||||
table.add_column("Storage Path")
|
||||
|
||||
for suggestion in suggestions:
|
||||
if not suggestion.has_suggestions:
|
||||
continue
|
||||
|
||||
doc = suggestion.document
|
||||
|
||||
if base_url:
|
||||
doc_cell = Text()
|
||||
doc_cell.append(str(doc))
|
||||
doc_cell.append(f"\n{base_url}/documents/{doc.pk}", style="dim")
|
||||
else:
|
||||
doc_cell = Text(f"{doc} [{doc.pk}]")
|
||||
|
||||
tag_parts: list[str] = []
|
||||
for tag in sorted(suggestion.tags_to_add, key=lambda t: t.name):
|
||||
tag_parts.append(f"[green]+{tag.name}[/green]")
|
||||
for tag in sorted(suggestion.tags_to_remove, key=lambda t: t.name):
|
||||
tag_parts.append(f"[red]-{tag.name}[/red]")
|
||||
tag_cell = Text.from_markup(", ".join(tag_parts)) if tag_parts else Text("-")
|
||||
|
||||
table.add_row(
|
||||
doc_cell,
|
||||
str(suggestion.correspondent) if suggestion.correspondent else "-",
|
||||
str(suggestion.document_type) if suggestion.document_type else "-",
|
||||
tag_cell,
|
||||
str(suggestion.storage_path) if suggestion.storage_path else "-",
|
||||
)
|
||||
|
||||
return table
|
||||
|
||||
|
||||
def _build_summary_table(stats: RetaggerStats) -> Table:
|
||||
"""Build the final applied-changes summary table."""
|
||||
table = Table(
|
||||
title="Retagger Summary",
|
||||
show_header=True,
|
||||
header_style="bold cyan",
|
||||
)
|
||||
|
||||
table.add_column("Metric", style="bold")
|
||||
table.add_column("Count", justify="right")
|
||||
|
||||
table.add_row("Documents processed", str(stats.documents_processed))
|
||||
table.add_row("Correspondents set", str(stats.correspondents))
|
||||
table.add_row("Document types set", str(stats.document_types))
|
||||
table.add_row("Tags added", str(stats.tags_added))
|
||||
table.add_row("Tags removed", str(stats.tags_removed))
|
||||
table.add_row("Storage paths set", str(stats.storage_paths))
|
||||
|
||||
return table
|
||||
|
||||
|
||||
class Command(PaperlessCommand):
|
||||
help = (
|
||||
"Using the current classification model, assigns correspondents, tags "
|
||||
@@ -180,7 +19,7 @@ class Command(PaperlessCommand):
|
||||
"modified) after their initial import."
|
||||
)
|
||||
|
||||
def add_arguments(self, parser) -> None:
|
||||
def add_arguments(self, parser):
|
||||
super().add_arguments(parser)
|
||||
parser.add_argument("-c", "--correspondent", default=False, action="store_true")
|
||||
parser.add_argument("-T", "--tags", default=False, action="store_true")
|
||||
@@ -192,9 +31,9 @@ class Command(PaperlessCommand):
|
||||
default=False,
|
||||
action="store_true",
|
||||
help=(
|
||||
"By default this command will not try to assign a correspondent "
|
||||
"if more than one matches the document. Use this flag to pick "
|
||||
"the first match instead."
|
||||
"By default this command won't try to assign a correspondent "
|
||||
"if more than one matches the document. Use this flag if "
|
||||
"you'd rather it just pick the first one it finds."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
@@ -203,133 +42,91 @@ class Command(PaperlessCommand):
|
||||
default=False,
|
||||
action="store_true",
|
||||
help=(
|
||||
"Overwrite any previously set correspondent, document type, and "
|
||||
"remove tags that no longer match due to changed rules."
|
||||
"If set, the document retagger will overwrite any previously "
|
||||
"set correspondent, document and remove correspondents, types "
|
||||
"and tags that do not match anymore due to changed rules."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--suggest",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="Show what would be changed without applying anything.",
|
||||
help="Return the suggestion, don't change anything.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--base-url",
|
||||
help="Base URL used to build document links in suggest output.",
|
||||
help="The base URL to use to build the link to the documents.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--id-range",
|
||||
help="Restrict retagging to documents within this ID range (inclusive).",
|
||||
help="A range of document ids on which the retagging should be applied.",
|
||||
nargs=2,
|
||||
type=int,
|
||||
)
|
||||
|
||||
def handle(self, *args, **options) -> None:
|
||||
suggest: bool = options["suggest"]
|
||||
overwrite: bool = options["overwrite"]
|
||||
use_first: bool = options["use_first"]
|
||||
base_url: str | None = options["base_url"]
|
||||
|
||||
do_correspondent: bool = options["correspondent"]
|
||||
do_document_type: bool = options["document_type"]
|
||||
do_tags: bool = options["tags"]
|
||||
do_storage_path: bool = options["storage_path"]
|
||||
|
||||
if not any([do_correspondent, do_document_type, do_tags, do_storage_path]):
|
||||
self.console.print(
|
||||
"[yellow]No classifier targets specified. "
|
||||
"Use -c, -T, -t, or -s to select what to retag.[/yellow]",
|
||||
)
|
||||
return
|
||||
|
||||
def handle(self, *args, **options):
|
||||
if options["inbox_only"]:
|
||||
queryset = Document.objects.filter(tags__is_inbox_tag=True)
|
||||
else:
|
||||
queryset = Document.objects.all()
|
||||
|
||||
if options["id_range"]:
|
||||
lo, hi = options["id_range"]
|
||||
queryset = queryset.filter(id__range=(lo, hi))
|
||||
queryset = queryset.filter(
|
||||
id__range=(options["id_range"][0], options["id_range"][1]),
|
||||
)
|
||||
|
||||
documents = queryset.distinct()
|
||||
|
||||
classifier = load_classifier()
|
||||
|
||||
stats = RetaggerStats()
|
||||
suggestions: list[DocumentSuggestion] = []
|
||||
|
||||
def render_stats() -> RenderableType:
|
||||
return _build_stats_table(stats, suggest=suggest)
|
||||
|
||||
for document in self.track_with_stats(
|
||||
documents,
|
||||
description="Retagging...",
|
||||
stats_renderer=render_stats,
|
||||
):
|
||||
suggestion = DocumentSuggestion(document=document)
|
||||
|
||||
if do_correspondent:
|
||||
correspondent = set_correspondent(
|
||||
None,
|
||||
document,
|
||||
for document in self.track(documents, description="Retagging..."):
|
||||
if options["correspondent"]:
|
||||
set_correspondent(
|
||||
sender=None,
|
||||
document=document,
|
||||
classifier=classifier,
|
||||
replace=overwrite,
|
||||
use_first=use_first,
|
||||
dry_run=suggest,
|
||||
replace=options["overwrite"],
|
||||
use_first=options["use_first"],
|
||||
suggest=options["suggest"],
|
||||
base_url=options["base_url"],
|
||||
stdout=self.stdout,
|
||||
style_func=self.style,
|
||||
)
|
||||
if correspondent is not None:
|
||||
stats.correspondents += 1
|
||||
suggestion.correspondent = correspondent
|
||||
|
||||
if do_document_type:
|
||||
document_type = set_document_type(
|
||||
None,
|
||||
document,
|
||||
if options["document_type"]:
|
||||
set_document_type(
|
||||
sender=None,
|
||||
document=document,
|
||||
classifier=classifier,
|
||||
replace=overwrite,
|
||||
use_first=use_first,
|
||||
dry_run=suggest,
|
||||
replace=options["overwrite"],
|
||||
use_first=options["use_first"],
|
||||
suggest=options["suggest"],
|
||||
base_url=options["base_url"],
|
||||
stdout=self.stdout,
|
||||
style_func=self.style,
|
||||
)
|
||||
if document_type is not None:
|
||||
stats.document_types += 1
|
||||
suggestion.document_type = document_type
|
||||
|
||||
if do_tags:
|
||||
tags_to_add, tags_to_remove = set_tags(
|
||||
None,
|
||||
document,
|
||||
if options["tags"]:
|
||||
set_tags(
|
||||
sender=None,
|
||||
document=document,
|
||||
classifier=classifier,
|
||||
replace=overwrite,
|
||||
dry_run=suggest,
|
||||
replace=options["overwrite"],
|
||||
suggest=options["suggest"],
|
||||
base_url=options["base_url"],
|
||||
stdout=self.stdout,
|
||||
style_func=self.style,
|
||||
)
|
||||
stats.tags_added += len(tags_to_add)
|
||||
stats.tags_removed += len(tags_to_remove)
|
||||
suggestion.tags_to_add = frozenset(tags_to_add)
|
||||
suggestion.tags_to_remove = frozenset(tags_to_remove)
|
||||
|
||||
if do_storage_path:
|
||||
storage_path = set_storage_path(
|
||||
None,
|
||||
document,
|
||||
if options["storage_path"]:
|
||||
set_storage_path(
|
||||
sender=None,
|
||||
document=document,
|
||||
classifier=classifier,
|
||||
replace=overwrite,
|
||||
use_first=use_first,
|
||||
dry_run=suggest,
|
||||
replace=options["overwrite"],
|
||||
use_first=options["use_first"],
|
||||
suggest=options["suggest"],
|
||||
base_url=options["base_url"],
|
||||
stdout=self.stdout,
|
||||
style_func=self.style,
|
||||
)
|
||||
if storage_path is not None:
|
||||
stats.storage_paths += 1
|
||||
suggestion.storage_path = storage_path
|
||||
|
||||
stats.documents_processed += 1
|
||||
|
||||
if suggest:
|
||||
suggestions.append(suggestion)
|
||||
|
||||
# Post-loop output
|
||||
if suggest:
|
||||
visible = [s for s in suggestions if s.has_suggestions]
|
||||
if visible:
|
||||
self.console.print(_build_suggestion_table(visible, base_url))
|
||||
else:
|
||||
self.console.print("[green]No changes suggested.[/green]")
|
||||
else:
|
||||
self.console.print(_build_summary_table(stats))
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import enum
|
||||
from collections.abc import Mapping
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from asgiref.sync import async_to_sync
|
||||
@@ -47,7 +48,7 @@ class BaseStatusManager:
|
||||
async_to_sync(self._channel.flush)
|
||||
self._channel = None
|
||||
|
||||
def send(self, payload: dict[str, str | int | None]) -> None:
|
||||
def send(self, payload: Mapping[str, object]) -> None:
|
||||
# Ensure the layer is open
|
||||
self.open()
|
||||
|
||||
@@ -73,26 +74,28 @@ class ProgressManager(BaseStatusManager):
|
||||
max_progress: int,
|
||||
extra_args: dict[str, str | int | None] | None = None,
|
||||
) -> None:
|
||||
payload = {
|
||||
"type": "status_update",
|
||||
"data": {
|
||||
"filename": self.filename,
|
||||
"task_id": self.task_id,
|
||||
"current_progress": current_progress,
|
||||
"max_progress": max_progress,
|
||||
"status": status,
|
||||
"message": message,
|
||||
},
|
||||
data: dict[str, object] = {
|
||||
"filename": self.filename,
|
||||
"task_id": self.task_id,
|
||||
"current_progress": current_progress,
|
||||
"max_progress": max_progress,
|
||||
"status": status,
|
||||
"message": message,
|
||||
}
|
||||
if extra_args is not None:
|
||||
payload["data"].update(extra_args)
|
||||
data.update(extra_args)
|
||||
|
||||
payload: dict[str, object] = {
|
||||
"type": "status_update",
|
||||
"data": data,
|
||||
}
|
||||
|
||||
self.send(payload)
|
||||
|
||||
|
||||
class DocumentsStatusManager(BaseStatusManager):
|
||||
def send_documents_deleted(self, documents: list[int]) -> None:
|
||||
payload = {
|
||||
payload: dict[str, object] = {
|
||||
"type": "documents_deleted",
|
||||
"data": {
|
||||
"documents": documents,
|
||||
@@ -100,3 +103,25 @@ class DocumentsStatusManager(BaseStatusManager):
|
||||
}
|
||||
|
||||
self.send(payload)
|
||||
|
||||
def send_document_updated(
|
||||
self,
|
||||
*,
|
||||
document_id: int,
|
||||
modified: str,
|
||||
owner_id: int | None = None,
|
||||
users_can_view: list[int] | None = None,
|
||||
groups_can_view: list[int] | None = None,
|
||||
) -> None:
|
||||
payload: dict[str, object] = {
|
||||
"type": "document_updated",
|
||||
"data": {
|
||||
"document_id": document_id,
|
||||
"modified": modified,
|
||||
"owner_id": owner_id,
|
||||
"users_can_view": users_can_view or [],
|
||||
"groups_can_view": groups_can_view or [],
|
||||
},
|
||||
}
|
||||
|
||||
self.send(payload)
|
||||
|
||||
@@ -24,6 +24,7 @@ from django.db.models import Q
|
||||
from django.dispatch import receiver
|
||||
from django.utils import timezone
|
||||
from filelock import FileLock
|
||||
from rest_framework import serializers
|
||||
|
||||
from documents import matching
|
||||
from documents.caching import clear_document_caches
|
||||
@@ -33,14 +34,12 @@ from documents.file_handling import create_source_path_directory
|
||||
from documents.file_handling import delete_empty_directories
|
||||
from documents.file_handling import generate_filename
|
||||
from documents.file_handling import generate_unique_filename
|
||||
from documents.models import Correspondent
|
||||
from documents.models import CustomField
|
||||
from documents.models import CustomFieldInstance
|
||||
from documents.models import Document
|
||||
from documents.models import DocumentType
|
||||
from documents.models import MatchingModel
|
||||
from documents.models import PaperlessTask
|
||||
from documents.models import SavedView
|
||||
from documents.models import StoragePath
|
||||
from documents.models import Tag
|
||||
from documents.models import UiSettings
|
||||
from documents.models import Workflow
|
||||
@@ -48,6 +47,7 @@ from documents.models import WorkflowAction
|
||||
from documents.models import WorkflowRun
|
||||
from documents.models import WorkflowTrigger
|
||||
from documents.permissions import get_objects_for_user_owner_aware
|
||||
from documents.plugins.helpers import DocumentsStatusManager
|
||||
from documents.templating.utils import convert_format_str_to_template_format
|
||||
from documents.workflows.actions import build_workflow_action_context
|
||||
from documents.workflows.actions import execute_email_action
|
||||
@@ -69,6 +69,7 @@ if TYPE_CHECKING:
|
||||
from documents.data_models import DocumentMetadataOverrides
|
||||
|
||||
logger = logging.getLogger("paperless.handlers")
|
||||
DRF_DATETIME_FIELD = serializers.DateTimeField()
|
||||
|
||||
|
||||
def add_inbox_tags(sender, document: Document, logging_group=None, **kwargs) -> None:
|
||||
@@ -84,41 +85,47 @@ def add_inbox_tags(sender, document: Document, logging_group=None, **kwargs) ->
|
||||
document.add_nested_tags(inbox_tags)
|
||||
|
||||
|
||||
def _suggestion_printer(
|
||||
stdout,
|
||||
style_func,
|
||||
suggestion_type: str,
|
||||
document: Document,
|
||||
selected: MatchingModel,
|
||||
base_url: str | None = None,
|
||||
) -> None:
|
||||
"""
|
||||
Smaller helper to reduce duplication when just outputting suggestions to the console
|
||||
"""
|
||||
doc_str = str(document)
|
||||
if base_url is not None:
|
||||
stdout.write(style_func.SUCCESS(doc_str))
|
||||
stdout.write(style_func.SUCCESS(f"{base_url}/documents/{document.pk}"))
|
||||
else:
|
||||
stdout.write(style_func.SUCCESS(f"{doc_str} [{document.pk}]"))
|
||||
stdout.write(f"Suggest {suggestion_type}: {selected}")
|
||||
|
||||
|
||||
def set_correspondent(
|
||||
sender: object,
|
||||
sender,
|
||||
document: Document,
|
||||
*,
|
||||
logging_group: object = None,
|
||||
logging_group=None,
|
||||
classifier: DocumentClassifier | None = None,
|
||||
replace: bool = False,
|
||||
use_first: bool = True,
|
||||
dry_run: bool = False,
|
||||
**kwargs: Any,
|
||||
) -> Correspondent | None:
|
||||
"""
|
||||
Assign a correspondent to a document based on classifier results.
|
||||
|
||||
Args:
|
||||
document: The document to classify.
|
||||
logging_group: Optional logging group for structured log output.
|
||||
classifier: The trained classifier. If None, only rule-based matching runs.
|
||||
replace: If True, overwrite an existing correspondent assignment.
|
||||
use_first: If True, pick the first match when multiple correspondents
|
||||
match. If False, skip assignment when multiple match.
|
||||
dry_run: If True, compute and return the selection without saving.
|
||||
**kwargs: Absorbed for Django signal compatibility (e.g. sender, signal).
|
||||
|
||||
Returns:
|
||||
The correspondent that was (or would be) assigned, or None if no match
|
||||
was found or assignment was skipped.
|
||||
"""
|
||||
replace=False,
|
||||
use_first=True,
|
||||
suggest=False,
|
||||
base_url=None,
|
||||
stdout=None,
|
||||
style_func=None,
|
||||
**kwargs,
|
||||
) -> None:
|
||||
if document.correspondent and not replace:
|
||||
return None
|
||||
return
|
||||
|
||||
potential_correspondents = matching.match_correspondents(document, classifier)
|
||||
|
||||
potential_count = len(potential_correspondents)
|
||||
selected = potential_correspondents[0] if potential_correspondents else None
|
||||
|
||||
if potential_count > 1:
|
||||
if use_first:
|
||||
logger.debug(
|
||||
@@ -132,53 +139,49 @@ def set_correspondent(
|
||||
f"not assigning any correspondent",
|
||||
extra={"group": logging_group},
|
||||
)
|
||||
return None
|
||||
return
|
||||
|
||||
if (selected or replace) and not dry_run:
|
||||
logger.info(
|
||||
f"Assigning correspondent {selected} to {document}",
|
||||
extra={"group": logging_group},
|
||||
)
|
||||
document.correspondent = selected
|
||||
document.save(update_fields=("correspondent",))
|
||||
if selected or replace:
|
||||
if suggest:
|
||||
_suggestion_printer(
|
||||
stdout,
|
||||
style_func,
|
||||
"correspondent",
|
||||
document,
|
||||
selected,
|
||||
base_url,
|
||||
)
|
||||
else:
|
||||
logger.info(
|
||||
f"Assigning correspondent {selected} to {document}",
|
||||
extra={"group": logging_group},
|
||||
)
|
||||
|
||||
return selected
|
||||
document.correspondent = selected
|
||||
document.save(update_fields=("correspondent",))
|
||||
|
||||
|
||||
def set_document_type(
|
||||
sender: object,
|
||||
sender,
|
||||
document: Document,
|
||||
*,
|
||||
logging_group: object = None,
|
||||
logging_group=None,
|
||||
classifier: DocumentClassifier | None = None,
|
||||
replace: bool = False,
|
||||
use_first: bool = True,
|
||||
dry_run: bool = False,
|
||||
**kwargs: Any,
|
||||
) -> DocumentType | None:
|
||||
"""
|
||||
Assign a document type to a document based on classifier results.
|
||||
|
||||
Args:
|
||||
document: The document to classify.
|
||||
logging_group: Optional logging group for structured log output.
|
||||
classifier: The trained classifier. If None, only rule-based matching runs.
|
||||
replace: If True, overwrite an existing document type assignment.
|
||||
use_first: If True, pick the first match when multiple types match.
|
||||
If False, skip assignment when multiple match.
|
||||
dry_run: If True, compute and return the selection without saving.
|
||||
**kwargs: Absorbed for Django signal compatibility (e.g. sender, signal).
|
||||
|
||||
Returns:
|
||||
The document type that was (or would be) assigned, or None if no match
|
||||
was found or assignment was skipped.
|
||||
"""
|
||||
replace=False,
|
||||
use_first=True,
|
||||
suggest=False,
|
||||
base_url=None,
|
||||
stdout=None,
|
||||
style_func=None,
|
||||
**kwargs,
|
||||
) -> None:
|
||||
if document.document_type and not replace:
|
||||
return None
|
||||
return
|
||||
|
||||
potential_document_types = matching.match_document_types(document, classifier)
|
||||
potential_count = len(potential_document_types)
|
||||
selected = potential_document_types[0] if potential_document_types else None
|
||||
potential_document_type = matching.match_document_types(document, classifier)
|
||||
|
||||
potential_count = len(potential_document_type)
|
||||
selected = potential_document_type[0] if potential_document_type else None
|
||||
|
||||
if potential_count > 1:
|
||||
if use_first:
|
||||
@@ -193,64 +196,42 @@ def set_document_type(
|
||||
f"not assigning any document type",
|
||||
extra={"group": logging_group},
|
||||
)
|
||||
return None
|
||||
return
|
||||
|
||||
if (selected or replace) and not dry_run:
|
||||
logger.info(
|
||||
f"Assigning document type {selected} to {document}",
|
||||
extra={"group": logging_group},
|
||||
)
|
||||
document.document_type = selected
|
||||
document.save(update_fields=("document_type",))
|
||||
if selected or replace:
|
||||
if suggest:
|
||||
_suggestion_printer(
|
||||
stdout,
|
||||
style_func,
|
||||
"document type",
|
||||
document,
|
||||
selected,
|
||||
base_url,
|
||||
)
|
||||
else:
|
||||
logger.info(
|
||||
f"Assigning document type {selected} to {document}",
|
||||
extra={"group": logging_group},
|
||||
)
|
||||
|
||||
return selected
|
||||
document.document_type = selected
|
||||
document.save(update_fields=("document_type",))
|
||||
|
||||
|
||||
def set_tags(
|
||||
sender: object,
|
||||
sender,
|
||||
document: Document,
|
||||
*,
|
||||
logging_group: object = None,
|
||||
logging_group=None,
|
||||
classifier: DocumentClassifier | None = None,
|
||||
replace: bool = False,
|
||||
dry_run: bool = False,
|
||||
**kwargs: Any,
|
||||
) -> tuple[set[Tag], set[Tag]]:
|
||||
"""
|
||||
Assign tags to a document based on classifier results.
|
||||
|
||||
When replace=True, existing auto-matched and rule-matched tags are removed
|
||||
before applying the new set (inbox tags and manually-added tags are preserved).
|
||||
|
||||
Args:
|
||||
document: The document to classify.
|
||||
logging_group: Optional logging group for structured log output.
|
||||
classifier: The trained classifier. If None, only rule-based matching runs.
|
||||
replace: If True, remove existing classifier-managed tags before applying
|
||||
new ones. Inbox tags and manually-added tags are always preserved.
|
||||
dry_run: If True, compute what would change without saving anything.
|
||||
**kwargs: Absorbed for Django signal compatibility (e.g. sender, signal).
|
||||
|
||||
Returns:
|
||||
A two-tuple of (tags_added, tags_removed). In non-replace mode,
|
||||
tags_removed is always an empty set. In dry_run mode, neither set
|
||||
is applied to the database.
|
||||
"""
|
||||
# Compute which tags would be removed under replace mode.
|
||||
# The filter mirrors the .delete() call below: keep inbox tags and
|
||||
# manually-added tags (match="" and not auto-matched).
|
||||
replace=False,
|
||||
suggest=False,
|
||||
base_url=None,
|
||||
stdout=None,
|
||||
style_func=None,
|
||||
**kwargs,
|
||||
) -> None:
|
||||
if replace:
|
||||
tags_to_remove: set[Tag] = set(
|
||||
document.tags.exclude(
|
||||
is_inbox_tag=True,
|
||||
).exclude(
|
||||
Q(match="") & ~Q(matching_algorithm=Tag.MATCH_AUTO),
|
||||
),
|
||||
)
|
||||
else:
|
||||
tags_to_remove = set()
|
||||
|
||||
if replace and not dry_run:
|
||||
Document.tags.through.objects.filter(document=document).exclude(
|
||||
Q(tag__is_inbox_tag=True),
|
||||
).exclude(
|
||||
@@ -258,53 +239,65 @@ def set_tags(
|
||||
).delete()
|
||||
|
||||
current_tags = set(document.tags.all())
|
||||
matched_tags = matching.match_tags(document, classifier)
|
||||
tags_to_add = set(matched_tags) - current_tags
|
||||
|
||||
if tags_to_add and not dry_run:
|
||||
matched_tags = matching.match_tags(document, classifier)
|
||||
|
||||
relevant_tags = set(matched_tags) - current_tags
|
||||
|
||||
if suggest:
|
||||
extra_tags = current_tags - set(matched_tags)
|
||||
extra_tags = [
|
||||
t for t in extra_tags if t.matching_algorithm == MatchingModel.MATCH_AUTO
|
||||
]
|
||||
if not relevant_tags and not extra_tags:
|
||||
return
|
||||
doc_str = style_func.SUCCESS(str(document))
|
||||
if base_url:
|
||||
stdout.write(doc_str)
|
||||
stdout.write(f"{base_url}/documents/{document.pk}")
|
||||
else:
|
||||
stdout.write(doc_str + style_func.SUCCESS(f" [{document.pk}]"))
|
||||
if relevant_tags:
|
||||
stdout.write("Suggest tags: " + ", ".join([t.name for t in relevant_tags]))
|
||||
if extra_tags:
|
||||
stdout.write("Extra tags: " + ", ".join([t.name for t in extra_tags]))
|
||||
else:
|
||||
if not relevant_tags:
|
||||
return
|
||||
|
||||
message = 'Tagging "{}" with "{}"'
|
||||
logger.info(
|
||||
f'Tagging "{document}" with "{", ".join(t.name for t in tags_to_add)}"',
|
||||
message.format(document, ", ".join([t.name for t in relevant_tags])),
|
||||
extra={"group": logging_group},
|
||||
)
|
||||
document.add_nested_tags(tags_to_add)
|
||||
|
||||
return tags_to_add, tags_to_remove
|
||||
document.add_nested_tags(relevant_tags)
|
||||
|
||||
|
||||
def set_storage_path(
|
||||
sender: object,
|
||||
sender,
|
||||
document: Document,
|
||||
*,
|
||||
logging_group: object = None,
|
||||
logging_group=None,
|
||||
classifier: DocumentClassifier | None = None,
|
||||
replace: bool = False,
|
||||
use_first: bool = True,
|
||||
dry_run: bool = False,
|
||||
**kwargs: Any,
|
||||
) -> StoragePath | None:
|
||||
"""
|
||||
Assign a storage path to a document based on classifier results.
|
||||
|
||||
Args:
|
||||
document: The document to classify.
|
||||
logging_group: Optional logging group for structured log output.
|
||||
classifier: The trained classifier. If None, only rule-based matching runs.
|
||||
replace: If True, overwrite an existing storage path assignment.
|
||||
use_first: If True, pick the first match when multiple paths match.
|
||||
If False, skip assignment when multiple match.
|
||||
dry_run: If True, compute and return the selection without saving.
|
||||
**kwargs: Absorbed for Django signal compatibility (e.g. sender, signal).
|
||||
|
||||
Returns:
|
||||
The storage path that was (or would be) assigned, or None if no match
|
||||
was found or assignment was skipped.
|
||||
"""
|
||||
replace=False,
|
||||
use_first=True,
|
||||
suggest=False,
|
||||
base_url=None,
|
||||
stdout=None,
|
||||
style_func=None,
|
||||
**kwargs,
|
||||
) -> None:
|
||||
if document.storage_path and not replace:
|
||||
return None
|
||||
return
|
||||
|
||||
potential_storage_paths = matching.match_storage_paths(document, classifier)
|
||||
potential_count = len(potential_storage_paths)
|
||||
selected = potential_storage_paths[0] if potential_storage_paths else None
|
||||
potential_storage_path = matching.match_storage_paths(
|
||||
document,
|
||||
classifier,
|
||||
)
|
||||
|
||||
potential_count = len(potential_storage_path)
|
||||
selected = potential_storage_path[0] if potential_storage_path else None
|
||||
|
||||
if potential_count > 1:
|
||||
if use_first:
|
||||
@@ -319,17 +312,26 @@ def set_storage_path(
|
||||
f"not assigning any storage directory",
|
||||
extra={"group": logging_group},
|
||||
)
|
||||
return None
|
||||
return
|
||||
|
||||
if (selected or replace) and not dry_run:
|
||||
logger.info(
|
||||
f"Assigning storage path {selected} to {document}",
|
||||
extra={"group": logging_group},
|
||||
)
|
||||
document.storage_path = selected
|
||||
document.save(update_fields=("storage_path",))
|
||||
if selected or replace:
|
||||
if suggest:
|
||||
_suggestion_printer(
|
||||
stdout,
|
||||
style_func,
|
||||
"storage directory",
|
||||
document,
|
||||
selected,
|
||||
base_url,
|
||||
)
|
||||
else:
|
||||
logger.info(
|
||||
f"Assigning storage path {selected} to {document}",
|
||||
extra={"group": logging_group},
|
||||
)
|
||||
|
||||
return selected
|
||||
document.storage_path = selected
|
||||
document.save(update_fields=("storage_path",))
|
||||
|
||||
|
||||
# see empty_trash in documents/tasks.py for signal handling
|
||||
@@ -764,6 +766,28 @@ def run_workflows_updated(
|
||||
)
|
||||
|
||||
|
||||
def send_websocket_document_updated(
|
||||
sender,
|
||||
document: Document,
|
||||
**kwargs,
|
||||
) -> None:
|
||||
# At this point, workflows may already have applied additional changes.
|
||||
document.refresh_from_db()
|
||||
|
||||
from documents.data_models import DocumentMetadataOverrides
|
||||
|
||||
doc_overrides = DocumentMetadataOverrides.from_document(document)
|
||||
|
||||
with DocumentsStatusManager() as status_mgr:
|
||||
status_mgr.send_document_updated(
|
||||
document_id=document.id,
|
||||
modified=DRF_DATETIME_FIELD.to_representation(document.modified),
|
||||
owner_id=doc_overrides.owner_id,
|
||||
users_can_view=doc_overrides.view_users,
|
||||
groups_can_view=doc_overrides.view_groups,
|
||||
)
|
||||
|
||||
|
||||
def run_workflows(
|
||||
trigger_type: WorkflowTrigger.WorkflowTriggerType,
|
||||
document: Document | ConsumableDocument,
|
||||
@@ -1037,7 +1061,11 @@ def add_or_update_document_in_llm_index(sender, document, **kwargs):
|
||||
|
||||
|
||||
@receiver(models.signals.post_delete, sender=Document)
|
||||
def delete_document_from_llm_index(sender, instance: Document, **kwargs):
|
||||
def delete_document_from_llm_index(
|
||||
sender: Any,
|
||||
instance: Document,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
"""
|
||||
Delete a document from the LLM index when it is deleted.
|
||||
"""
|
||||
|
||||
@@ -60,6 +60,7 @@ from documents.sanity_checker import SanityCheckFailedException
|
||||
from documents.signals import document_updated
|
||||
from documents.signals.handlers import cleanup_document_deletion
|
||||
from documents.signals.handlers import run_workflows
|
||||
from documents.signals.handlers import send_websocket_document_updated
|
||||
from documents.workflows.utils import get_workflows_for_trigger
|
||||
from paperless.config import AIConfig
|
||||
from paperless_ai.indexing import llm_index_add_or_update_document
|
||||
@@ -541,6 +542,11 @@ def check_scheduled_workflows() -> None:
|
||||
workflow_to_run=workflow,
|
||||
document=document,
|
||||
)
|
||||
# Scheduled workflows dont send document_updated signal, so send a websocket update here to ensure clients are updated
|
||||
send_websocket_document_updated(
|
||||
sender=None,
|
||||
document=document,
|
||||
)
|
||||
|
||||
|
||||
def update_document_parent_tags(tag: Tag, new_parent: Tag) -> None:
|
||||
|
||||
@@ -1,65 +1,17 @@
|
||||
"""
|
||||
Factory-boy factories for documents app models.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import factory
|
||||
from factory import Faker
|
||||
from factory.django import DjangoModelFactory
|
||||
|
||||
from documents.models import Correspondent
|
||||
from documents.models import Document
|
||||
from documents.models import DocumentType
|
||||
from documents.models import MatchingModel
|
||||
from documents.models import StoragePath
|
||||
from documents.models import Tag
|
||||
|
||||
|
||||
class CorrespondentFactory(DjangoModelFactory):
|
||||
class Meta:
|
||||
model = Correspondent
|
||||
|
||||
name = factory.Faker("company")
|
||||
match = ""
|
||||
matching_algorithm = MatchingModel.MATCH_NONE
|
||||
|
||||
|
||||
class DocumentTypeFactory(DjangoModelFactory):
|
||||
class Meta:
|
||||
model = DocumentType
|
||||
|
||||
name = factory.Faker("bs")
|
||||
match = ""
|
||||
matching_algorithm = MatchingModel.MATCH_NONE
|
||||
|
||||
|
||||
class TagFactory(DjangoModelFactory):
|
||||
class Meta:
|
||||
model = Tag
|
||||
|
||||
name = factory.Faker("word")
|
||||
match = ""
|
||||
matching_algorithm = MatchingModel.MATCH_NONE
|
||||
is_inbox_tag = False
|
||||
|
||||
|
||||
class StoragePathFactory(DjangoModelFactory):
|
||||
class Meta:
|
||||
model = StoragePath
|
||||
|
||||
name = factory.Faker("file_path", depth=2, extension="")
|
||||
path = factory.LazyAttribute(lambda o: f"{o.name}/{{title}}")
|
||||
match = ""
|
||||
matching_algorithm = MatchingModel.MATCH_NONE
|
||||
name = Faker("name")
|
||||
|
||||
|
||||
class DocumentFactory(DjangoModelFactory):
|
||||
class Meta:
|
||||
model = Document
|
||||
|
||||
title = factory.Faker("sentence", nb_words=4)
|
||||
checksum = factory.Faker("md5")
|
||||
content = factory.Faker("paragraph")
|
||||
correspondent = None
|
||||
document_type = None
|
||||
storage_path = None
|
||||
|
||||
@@ -1270,7 +1270,11 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
|
||||
input_doc, overrides = self.get_last_consume_delay_call_args()
|
||||
|
||||
self.assertEqual(input_doc.original_file.name, "simple.pdf")
|
||||
self.assertIn(Path(settings.SCRATCH_DIR), input_doc.original_file.parents)
|
||||
self.assertTrue(
|
||||
input_doc.original_file.resolve(strict=False).is_relative_to(
|
||||
Path(settings.SCRATCH_DIR).resolve(strict=False),
|
||||
),
|
||||
)
|
||||
self.assertIsNone(overrides.title)
|
||||
self.assertIsNone(overrides.correspondent_id)
|
||||
self.assertIsNone(overrides.document_type_id)
|
||||
@@ -1351,7 +1355,11 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
|
||||
input_doc, overrides = self.get_last_consume_delay_call_args()
|
||||
|
||||
self.assertEqual(input_doc.original_file.name, "simple.pdf")
|
||||
self.assertIn(Path(settings.SCRATCH_DIR), input_doc.original_file.parents)
|
||||
self.assertTrue(
|
||||
input_doc.original_file.resolve(strict=False).is_relative_to(
|
||||
Path(settings.SCRATCH_DIR).resolve(strict=False),
|
||||
),
|
||||
)
|
||||
self.assertIsNone(overrides.title)
|
||||
self.assertIsNone(overrides.correspondent_id)
|
||||
self.assertIsNone(overrides.document_type_id)
|
||||
|
||||
@@ -1,358 +1,298 @@
|
||||
"""
|
||||
Tests for the document_retagger management command.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
from django.core.management import call_command
|
||||
from django.core.management.base import CommandError
|
||||
from django.test import TestCase
|
||||
|
||||
from documents.models import Correspondent
|
||||
from documents.models import Document
|
||||
from documents.models import DocumentType
|
||||
from documents.models import MatchingModel
|
||||
from documents.models import StoragePath
|
||||
from documents.models import Tag
|
||||
from documents.tests.factories import CorrespondentFactory
|
||||
from documents.tests.factories import DocumentFactory
|
||||
from documents.tests.factories import DocumentTypeFactory
|
||||
from documents.tests.factories import StoragePathFactory
|
||||
from documents.tests.factories import TagFactory
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Module-level type aliases
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
StoragePathTuple = tuple[StoragePath, StoragePath, StoragePath]
|
||||
TagTuple = tuple[Tag, Tag, Tag, Tag, Tag]
|
||||
CorrespondentTuple = tuple[Correspondent, Correspondent]
|
||||
DocumentTypeTuple = tuple[DocumentType, DocumentType]
|
||||
DocumentTuple = tuple[Document, Document, Document, Document]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fixtures
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def storage_paths(db) -> StoragePathTuple:
|
||||
"""Three storage paths with varying match rules."""
|
||||
sp1 = StoragePathFactory(
|
||||
path="{created_data}/{title}",
|
||||
match="auto document",
|
||||
matching_algorithm=MatchingModel.MATCH_LITERAL,
|
||||
)
|
||||
sp2 = StoragePathFactory(
|
||||
path="{title}",
|
||||
match="^first|^unrelated",
|
||||
matching_algorithm=MatchingModel.MATCH_REGEX,
|
||||
)
|
||||
sp3 = StoragePathFactory(
|
||||
path="{title}",
|
||||
match="^blah",
|
||||
matching_algorithm=MatchingModel.MATCH_REGEX,
|
||||
)
|
||||
return sp1, sp2, sp3
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def tags(db) -> TagTuple:
|
||||
"""Tags covering the common matching scenarios."""
|
||||
tag_first = TagFactory(match="first", matching_algorithm=Tag.MATCH_ANY)
|
||||
tag_second = TagFactory(match="second", matching_algorithm=Tag.MATCH_ANY)
|
||||
tag_inbox = TagFactory(is_inbox_tag=True)
|
||||
tag_no_match = TagFactory()
|
||||
tag_auto = TagFactory(matching_algorithm=Tag.MATCH_AUTO)
|
||||
return tag_first, tag_second, tag_inbox, tag_no_match, tag_auto
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def correspondents(db) -> CorrespondentTuple:
|
||||
"""Two correspondents matching 'first' and 'second' content."""
|
||||
c_first = CorrespondentFactory(
|
||||
match="first",
|
||||
matching_algorithm=MatchingModel.MATCH_ANY,
|
||||
)
|
||||
c_second = CorrespondentFactory(
|
||||
match="second",
|
||||
matching_algorithm=MatchingModel.MATCH_ANY,
|
||||
)
|
||||
return c_first, c_second
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def document_types(db) -> DocumentTypeTuple:
|
||||
"""Two document types matching 'first' and 'second' content."""
|
||||
dt_first = DocumentTypeFactory(
|
||||
match="first",
|
||||
matching_algorithm=MatchingModel.MATCH_ANY,
|
||||
)
|
||||
dt_second = DocumentTypeFactory(
|
||||
match="second",
|
||||
matching_algorithm=MatchingModel.MATCH_ANY,
|
||||
)
|
||||
return dt_first, dt_second
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def documents(storage_paths: StoragePathTuple, tags: TagTuple) -> DocumentTuple:
|
||||
"""Four documents with varied content used across most retagger tests."""
|
||||
_, _, sp3 = storage_paths
|
||||
_, _, tag_inbox, tag_no_match, tag_auto = tags
|
||||
|
||||
d1 = DocumentFactory(checksum="A", title="A", content="first document")
|
||||
d2 = DocumentFactory(checksum="B", title="B", content="second document")
|
||||
d3 = DocumentFactory(
|
||||
checksum="C",
|
||||
title="C",
|
||||
content="unrelated document",
|
||||
storage_path=sp3,
|
||||
)
|
||||
d4 = DocumentFactory(checksum="D", title="D", content="auto document")
|
||||
|
||||
d3.tags.add(tag_inbox, tag_no_match)
|
||||
d4.tags.add(tag_auto)
|
||||
|
||||
return d1, d2, d3, d4
|
||||
|
||||
|
||||
def _get_docs() -> DocumentTuple:
|
||||
return (
|
||||
Document.objects.get(title="A"),
|
||||
Document.objects.get(title="B"),
|
||||
Document.objects.get(title="C"),
|
||||
Document.objects.get(title="D"),
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tag assignment
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.management
|
||||
@pytest.mark.django_db
|
||||
class TestRetaggerTags(DirectoriesMixin):
|
||||
@pytest.mark.usefixtures("documents")
|
||||
def test_add_tags(self, tags: TagTuple) -> None:
|
||||
tag_first, tag_second, *_ = tags
|
||||
class TestRetagger(DirectoriesMixin, TestCase):
|
||||
def make_models(self) -> None:
|
||||
self.sp1 = StoragePath.objects.create(
|
||||
name="dummy a",
|
||||
path="{created_data}/{title}",
|
||||
match="auto document",
|
||||
matching_algorithm=StoragePath.MATCH_LITERAL,
|
||||
)
|
||||
self.sp2 = StoragePath.objects.create(
|
||||
name="dummy b",
|
||||
path="{title}",
|
||||
match="^first|^unrelated",
|
||||
matching_algorithm=StoragePath.MATCH_REGEX,
|
||||
)
|
||||
|
||||
self.sp3 = StoragePath.objects.create(
|
||||
name="dummy c",
|
||||
path="{title}",
|
||||
match="^blah",
|
||||
matching_algorithm=StoragePath.MATCH_REGEX,
|
||||
)
|
||||
|
||||
self.d1 = Document.objects.create(
|
||||
checksum="A",
|
||||
title="A",
|
||||
content="first document",
|
||||
)
|
||||
self.d2 = Document.objects.create(
|
||||
checksum="B",
|
||||
title="B",
|
||||
content="second document",
|
||||
)
|
||||
self.d3 = Document.objects.create(
|
||||
checksum="C",
|
||||
title="C",
|
||||
content="unrelated document",
|
||||
storage_path=self.sp3,
|
||||
)
|
||||
self.d4 = Document.objects.create(
|
||||
checksum="D",
|
||||
title="D",
|
||||
content="auto document",
|
||||
)
|
||||
|
||||
self.tag_first = Tag.objects.create(
|
||||
name="tag1",
|
||||
match="first",
|
||||
matching_algorithm=Tag.MATCH_ANY,
|
||||
)
|
||||
self.tag_second = Tag.objects.create(
|
||||
name="tag2",
|
||||
match="second",
|
||||
matching_algorithm=Tag.MATCH_ANY,
|
||||
)
|
||||
self.tag_inbox = Tag.objects.create(name="test", is_inbox_tag=True)
|
||||
self.tag_no_match = Tag.objects.create(name="test2")
|
||||
self.tag_auto = Tag.objects.create(
|
||||
name="tagauto",
|
||||
matching_algorithm=Tag.MATCH_AUTO,
|
||||
)
|
||||
|
||||
self.d3.tags.add(self.tag_inbox)
|
||||
self.d3.tags.add(self.tag_no_match)
|
||||
self.d4.tags.add(self.tag_auto)
|
||||
|
||||
self.correspondent_first = Correspondent.objects.create(
|
||||
name="c1",
|
||||
match="first",
|
||||
matching_algorithm=Correspondent.MATCH_ANY,
|
||||
)
|
||||
self.correspondent_second = Correspondent.objects.create(
|
||||
name="c2",
|
||||
match="second",
|
||||
matching_algorithm=Correspondent.MATCH_ANY,
|
||||
)
|
||||
|
||||
self.doctype_first = DocumentType.objects.create(
|
||||
name="dt1",
|
||||
match="first",
|
||||
matching_algorithm=DocumentType.MATCH_ANY,
|
||||
)
|
||||
self.doctype_second = DocumentType.objects.create(
|
||||
name="dt2",
|
||||
match="second",
|
||||
matching_algorithm=DocumentType.MATCH_ANY,
|
||||
)
|
||||
|
||||
def get_updated_docs(self):
|
||||
return (
|
||||
Document.objects.get(title="A"),
|
||||
Document.objects.get(title="B"),
|
||||
Document.objects.get(title="C"),
|
||||
Document.objects.get(title="D"),
|
||||
)
|
||||
|
||||
def setUp(self) -> None:
|
||||
super().setUp()
|
||||
self.make_models()
|
||||
|
||||
def test_add_tags(self) -> None:
|
||||
call_command("document_retagger", "--tags")
|
||||
d_first, d_second, d_unrelated, d_auto = _get_docs()
|
||||
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
|
||||
|
||||
assert d_first.tags.count() == 1
|
||||
assert d_second.tags.count() == 1
|
||||
assert d_unrelated.tags.count() == 2
|
||||
assert d_auto.tags.count() == 1
|
||||
assert d_first.tags.first() == tag_first
|
||||
assert d_second.tags.first() == tag_second
|
||||
self.assertEqual(d_first.tags.count(), 1)
|
||||
self.assertEqual(d_second.tags.count(), 1)
|
||||
self.assertEqual(d_unrelated.tags.count(), 2)
|
||||
self.assertEqual(d_auto.tags.count(), 1)
|
||||
|
||||
def test_overwrite_removes_stale_tags_and_preserves_inbox(
|
||||
self,
|
||||
documents: DocumentTuple,
|
||||
tags: TagTuple,
|
||||
) -> None:
|
||||
d1, *_ = documents
|
||||
tag_first, tag_second, tag_inbox, tag_no_match, _ = tags
|
||||
d1.tags.add(tag_second)
|
||||
self.assertEqual(d_first.tags.first(), self.tag_first)
|
||||
self.assertEqual(d_second.tags.first(), self.tag_second)
|
||||
|
||||
def test_add_type(self) -> None:
|
||||
call_command("document_retagger", "--document_type")
|
||||
d_first, d_second, _, _ = self.get_updated_docs()
|
||||
|
||||
self.assertEqual(d_first.document_type, self.doctype_first)
|
||||
self.assertEqual(d_second.document_type, self.doctype_second)
|
||||
|
||||
def test_add_correspondent(self) -> None:
|
||||
call_command("document_retagger", "--correspondent")
|
||||
d_first, d_second, _, _ = self.get_updated_docs()
|
||||
|
||||
self.assertEqual(d_first.correspondent, self.correspondent_first)
|
||||
self.assertEqual(d_second.correspondent, self.correspondent_second)
|
||||
|
||||
def test_overwrite_preserve_inbox(self) -> None:
|
||||
self.d1.tags.add(self.tag_second)
|
||||
|
||||
call_command("document_retagger", "--tags", "--overwrite")
|
||||
|
||||
d_first, d_second, d_unrelated, d_auto = _get_docs()
|
||||
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
|
||||
|
||||
assert Tag.objects.filter(id=tag_second.id).exists()
|
||||
assert list(d_first.tags.values_list("id", flat=True)) == [tag_first.id]
|
||||
assert list(d_second.tags.values_list("id", flat=True)) == [tag_second.id]
|
||||
assert set(d_unrelated.tags.values_list("id", flat=True)) == {
|
||||
tag_inbox.id,
|
||||
tag_no_match.id,
|
||||
}
|
||||
assert d_auto.tags.count() == 0
|
||||
self.assertIsNotNone(Tag.objects.get(id=self.tag_second.id))
|
||||
|
||||
@pytest.mark.usefixtures("documents")
|
||||
@pytest.mark.parametrize(
|
||||
"extra_args",
|
||||
[
|
||||
pytest.param([], id="no_base_url"),
|
||||
pytest.param(["--base-url=http://localhost"], id="with_base_url"),
|
||||
],
|
||||
)
|
||||
def test_suggest_does_not_apply_tags(self, extra_args: list[str]) -> None:
|
||||
call_command("document_retagger", "--tags", "--suggest", *extra_args)
|
||||
d_first, d_second, _, d_auto = _get_docs()
|
||||
self.assertCountEqual(
|
||||
[tag.id for tag in d_first.tags.all()],
|
||||
[self.tag_first.id],
|
||||
)
|
||||
self.assertCountEqual(
|
||||
[tag.id for tag in d_second.tags.all()],
|
||||
[self.tag_second.id],
|
||||
)
|
||||
self.assertCountEqual(
|
||||
[tag.id for tag in d_unrelated.tags.all()],
|
||||
[self.tag_inbox.id, self.tag_no_match.id],
|
||||
)
|
||||
self.assertEqual(d_auto.tags.count(), 0)
|
||||
|
||||
assert d_first.tags.count() == 0
|
||||
assert d_second.tags.count() == 0
|
||||
assert d_auto.tags.count() == 1
|
||||
def test_add_tags_suggest(self) -> None:
|
||||
call_command("document_retagger", "--tags", "--suggest")
|
||||
d_first, d_second, _, d_auto = self.get_updated_docs()
|
||||
|
||||
self.assertEqual(d_first.tags.count(), 0)
|
||||
self.assertEqual(d_second.tags.count(), 0)
|
||||
self.assertEqual(d_auto.tags.count(), 1)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Document type assignment
|
||||
# ---------------------------------------------------------------------------
|
||||
def test_add_type_suggest(self) -> None:
|
||||
call_command("document_retagger", "--document_type", "--suggest")
|
||||
d_first, d_second, _, _ = self.get_updated_docs()
|
||||
|
||||
self.assertIsNone(d_first.document_type)
|
||||
self.assertIsNone(d_second.document_type)
|
||||
|
||||
@pytest.mark.management
|
||||
@pytest.mark.django_db
|
||||
class TestRetaggerDocumentType(DirectoriesMixin):
|
||||
@pytest.mark.usefixtures("documents")
|
||||
def test_add_type(self, document_types: DocumentTypeTuple) -> None:
|
||||
dt_first, dt_second = document_types
|
||||
call_command("document_retagger", "--document_type")
|
||||
d_first, d_second, _, _ = _get_docs()
|
||||
def test_add_correspondent_suggest(self) -> None:
|
||||
call_command("document_retagger", "--correspondent", "--suggest")
|
||||
d_first, d_second, _, _ = self.get_updated_docs()
|
||||
|
||||
assert d_first.document_type == dt_first
|
||||
assert d_second.document_type == dt_second
|
||||
self.assertIsNone(d_first.correspondent)
|
||||
self.assertIsNone(d_second.correspondent)
|
||||
|
||||
@pytest.mark.usefixtures("documents", "document_types")
|
||||
@pytest.mark.parametrize(
|
||||
"extra_args",
|
||||
[
|
||||
pytest.param([], id="no_base_url"),
|
||||
pytest.param(["--base-url=http://localhost"], id="with_base_url"),
|
||||
],
|
||||
)
|
||||
def test_suggest_does_not_apply_document_type(self, extra_args: list[str]) -> None:
|
||||
call_command("document_retagger", "--document_type", "--suggest", *extra_args)
|
||||
d_first, d_second, _, _ = _get_docs()
|
||||
def test_add_tags_suggest_url(self) -> None:
|
||||
call_command(
|
||||
"document_retagger",
|
||||
"--tags",
|
||||
"--suggest",
|
||||
"--base-url=http://localhost",
|
||||
)
|
||||
d_first, d_second, _, d_auto = self.get_updated_docs()
|
||||
|
||||
assert d_first.document_type is None
|
||||
assert d_second.document_type is None
|
||||
self.assertEqual(d_first.tags.count(), 0)
|
||||
self.assertEqual(d_second.tags.count(), 0)
|
||||
self.assertEqual(d_auto.tags.count(), 1)
|
||||
|
||||
def test_add_type_suggest_url(self) -> None:
|
||||
call_command(
|
||||
"document_retagger",
|
||||
"--document_type",
|
||||
"--suggest",
|
||||
"--base-url=http://localhost",
|
||||
)
|
||||
d_first, d_second, _, _ = self.get_updated_docs()
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Correspondent assignment
|
||||
# ---------------------------------------------------------------------------
|
||||
self.assertIsNone(d_first.document_type)
|
||||
self.assertIsNone(d_second.document_type)
|
||||
|
||||
def test_add_correspondent_suggest_url(self) -> None:
|
||||
call_command(
|
||||
"document_retagger",
|
||||
"--correspondent",
|
||||
"--suggest",
|
||||
"--base-url=http://localhost",
|
||||
)
|
||||
d_first, d_second, _, _ = self.get_updated_docs()
|
||||
|
||||
@pytest.mark.management
|
||||
@pytest.mark.django_db
|
||||
class TestRetaggerCorrespondent(DirectoriesMixin):
|
||||
@pytest.mark.usefixtures("documents")
|
||||
def test_add_correspondent(self, correspondents: CorrespondentTuple) -> None:
|
||||
c_first, c_second = correspondents
|
||||
call_command("document_retagger", "--correspondent")
|
||||
d_first, d_second, _, _ = _get_docs()
|
||||
self.assertIsNone(d_first.correspondent)
|
||||
self.assertIsNone(d_second.correspondent)
|
||||
|
||||
assert d_first.correspondent == c_first
|
||||
assert d_second.correspondent == c_second
|
||||
|
||||
@pytest.mark.usefixtures("documents", "correspondents")
|
||||
@pytest.mark.parametrize(
|
||||
"extra_args",
|
||||
[
|
||||
pytest.param([], id="no_base_url"),
|
||||
pytest.param(["--base-url=http://localhost"], id="with_base_url"),
|
||||
],
|
||||
)
|
||||
def test_suggest_does_not_apply_correspondent(self, extra_args: list[str]) -> None:
|
||||
call_command("document_retagger", "--correspondent", "--suggest", *extra_args)
|
||||
d_first, d_second, _, _ = _get_docs()
|
||||
|
||||
assert d_first.correspondent is None
|
||||
assert d_second.correspondent is None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Storage path assignment
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.management
|
||||
@pytest.mark.django_db
|
||||
class TestRetaggerStoragePath(DirectoriesMixin):
|
||||
@pytest.mark.usefixtures("documents")
|
||||
def test_add_storage_path(self, storage_paths: StoragePathTuple) -> None:
|
||||
def test_add_storage_path(self) -> None:
|
||||
"""
|
||||
GIVEN documents matching various storage path rules
|
||||
WHEN document_retagger --storage_path is called
|
||||
THEN matching documents get the correct path; existing path is unchanged
|
||||
GIVEN:
|
||||
- 2 storage paths with documents which match them
|
||||
- 1 document which matches but has a storage path
|
||||
WHEN:
|
||||
- document retagger is called
|
||||
THEN:
|
||||
- Matching document's storage paths updated
|
||||
- Non-matching documents have no storage path
|
||||
- Existing storage patch left unchanged
|
||||
"""
|
||||
sp1, sp2, sp3 = storage_paths
|
||||
call_command("document_retagger", "--storage_path")
|
||||
d_first, d_second, d_unrelated, d_auto = _get_docs()
|
||||
call_command(
|
||||
"document_retagger",
|
||||
"--storage_path",
|
||||
)
|
||||
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
|
||||
|
||||
assert d_first.storage_path == sp2
|
||||
assert d_auto.storage_path == sp1
|
||||
assert d_second.storage_path is None
|
||||
assert d_unrelated.storage_path == sp3
|
||||
self.assertEqual(d_first.storage_path, self.sp2)
|
||||
self.assertEqual(d_auto.storage_path, self.sp1)
|
||||
self.assertIsNone(d_second.storage_path)
|
||||
self.assertEqual(d_unrelated.storage_path, self.sp3)
|
||||
|
||||
@pytest.mark.usefixtures("documents")
|
||||
def test_overwrite_storage_path(self, storage_paths: StoragePathTuple) -> None:
|
||||
def test_overwrite_storage_path(self) -> None:
|
||||
"""
|
||||
GIVEN a document with an existing storage path that matches a different rule
|
||||
WHEN document_retagger --storage_path --overwrite is called
|
||||
THEN the existing path is replaced by the newly matched path
|
||||
GIVEN:
|
||||
- 2 storage paths with documents which match them
|
||||
- 1 document which matches but has a storage path
|
||||
WHEN:
|
||||
- document retagger is called with overwrite
|
||||
THEN:
|
||||
- Matching document's storage paths updated
|
||||
- Non-matching documents have no storage path
|
||||
- Existing storage patch overwritten
|
||||
"""
|
||||
sp1, sp2, _ = storage_paths
|
||||
call_command("document_retagger", "--storage_path", "--overwrite")
|
||||
d_first, d_second, d_unrelated, d_auto = _get_docs()
|
||||
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
|
||||
|
||||
assert d_first.storage_path == sp2
|
||||
assert d_auto.storage_path == sp1
|
||||
assert d_second.storage_path is None
|
||||
assert d_unrelated.storage_path == sp2
|
||||
self.assertEqual(d_first.storage_path, self.sp2)
|
||||
self.assertEqual(d_auto.storage_path, self.sp1)
|
||||
self.assertIsNone(d_second.storage_path)
|
||||
self.assertEqual(d_unrelated.storage_path, self.sp2)
|
||||
|
||||
def test_id_range_parameter(self) -> None:
|
||||
commandOutput = ""
|
||||
Document.objects.create(
|
||||
checksum="E",
|
||||
title="E",
|
||||
content="NOT the first document",
|
||||
)
|
||||
call_command("document_retagger", "--tags", "--id-range", "1", "2")
|
||||
# The retagger shouldn`t apply the 'first' tag to our new document
|
||||
self.assertEqual(Document.objects.filter(tags__id=self.tag_first.id).count(), 1)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# ID range filtering
|
||||
# ---------------------------------------------------------------------------
|
||||
try:
|
||||
commandOutput = call_command("document_retagger", "--tags", "--id-range")
|
||||
except CommandError:
|
||||
# Just ignore the error
|
||||
None
|
||||
self.assertIn(commandOutput, "Error: argument --id-range: expected 2 arguments")
|
||||
|
||||
try:
|
||||
commandOutput = call_command(
|
||||
"document_retagger",
|
||||
"--tags",
|
||||
"--id-range",
|
||||
"a",
|
||||
"b",
|
||||
)
|
||||
except CommandError:
|
||||
# Just ignore the error
|
||||
None
|
||||
self.assertIn(commandOutput, "error: argument --id-range: invalid int value:")
|
||||
|
||||
@pytest.mark.management
|
||||
@pytest.mark.django_db
|
||||
class TestRetaggerIdRange(DirectoriesMixin):
|
||||
@pytest.mark.usefixtures("documents")
|
||||
@pytest.mark.parametrize(
|
||||
("id_range_args", "expected_count"),
|
||||
[
|
||||
pytest.param(["1", "2"], 1, id="narrow_range_limits_scope"),
|
||||
pytest.param(["1", "9999"], 2, id="wide_range_tags_all_matches"),
|
||||
],
|
||||
)
|
||||
def test_id_range_limits_scope(
|
||||
self,
|
||||
tags: TagTuple,
|
||||
id_range_args: list[str],
|
||||
expected_count: int,
|
||||
) -> None:
|
||||
DocumentFactory(content="NOT the first document")
|
||||
call_command("document_retagger", "--tags", "--id-range", *id_range_args)
|
||||
tag_first, *_ = tags
|
||||
assert Document.objects.filter(tags__id=tag_first.id).count() == expected_count
|
||||
|
||||
@pytest.mark.usefixtures("documents")
|
||||
@pytest.mark.parametrize(
|
||||
"args",
|
||||
[
|
||||
pytest.param(["--tags", "--id-range"], id="missing_both_values"),
|
||||
pytest.param(["--tags", "--id-range", "a", "b"], id="non_integer_values"),
|
||||
],
|
||||
)
|
||||
def test_id_range_invalid_arguments_raise(self, args: list[str]) -> None:
|
||||
with pytest.raises((CommandError, SystemExit)):
|
||||
call_command("document_retagger", *args)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Edge cases
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.management
|
||||
@pytest.mark.django_db
|
||||
class TestRetaggerEdgeCases(DirectoriesMixin):
|
||||
@pytest.mark.usefixtures("documents")
|
||||
def test_no_targets_exits_cleanly(self) -> None:
|
||||
"""Calling the retagger with no classifier targets should not raise."""
|
||||
call_command("document_retagger")
|
||||
|
||||
@pytest.mark.usefixtures("documents")
|
||||
def test_inbox_only_skips_non_inbox_documents(self) -> None:
|
||||
"""--inbox-only must restrict processing to documents with an inbox tag."""
|
||||
call_command("document_retagger", "--tags", "--inbox-only")
|
||||
d_first, _, d_unrelated, _ = _get_docs()
|
||||
|
||||
assert d_first.tags.count() == 0
|
||||
assert d_unrelated.tags.count() == 2
|
||||
call_command("document_retagger", "--tags", "--id-range", "1", "9999")
|
||||
# Now we should have 2 documents
|
||||
self.assertEqual(Document.objects.filter(tags__id=self.tag_first.id).count(), 2)
|
||||
|
||||
@@ -643,7 +643,9 @@ class TestWorkflows(
|
||||
|
||||
expected_str = f"Document did not match {w}"
|
||||
self.assertIn(expected_str, cm.output[0])
|
||||
expected_str = f"Document path {test_file} does not match"
|
||||
expected_str = (
|
||||
f"Document path {Path(test_file).resolve(strict=False)} does not match"
|
||||
)
|
||||
self.assertIn(expected_str, cm.output[1])
|
||||
|
||||
def test_workflow_no_match_mail_rule(self) -> None:
|
||||
@@ -1968,6 +1970,36 @@ class TestWorkflows(
|
||||
doc.refresh_from_db()
|
||||
self.assertEqual(doc.owner, self.user2)
|
||||
|
||||
@mock.patch("documents.tasks.send_websocket_document_updated")
|
||||
def test_workflow_scheduled_trigger_sends_websocket_update(
|
||||
self,
|
||||
mock_send_websocket_document_updated,
|
||||
) -> None:
|
||||
trigger = WorkflowTrigger.objects.create(
|
||||
type=WorkflowTrigger.WorkflowTriggerType.SCHEDULED,
|
||||
schedule_offset_days=1,
|
||||
schedule_date_field=WorkflowTrigger.ScheduleDateField.CREATED,
|
||||
)
|
||||
action = WorkflowAction.objects.create(assign_owner=self.user2)
|
||||
workflow = Workflow.objects.create(name="Workflow 1", order=0)
|
||||
workflow.triggers.add(trigger)
|
||||
workflow.actions.add(action)
|
||||
|
||||
doc = Document.objects.create(
|
||||
title="sample test",
|
||||
correspondent=self.c,
|
||||
original_filename="sample.pdf",
|
||||
created=timezone.now() - timedelta(days=2),
|
||||
)
|
||||
|
||||
tasks.check_scheduled_workflows()
|
||||
|
||||
self.assertEqual(mock_send_websocket_document_updated.call_count, 1)
|
||||
self.assertEqual(
|
||||
mock_send_websocket_document_updated.call_args.kwargs["document"].pk,
|
||||
doc.pk,
|
||||
)
|
||||
|
||||
def test_workflow_scheduled_trigger_added(self) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import json
|
||||
from typing import Any
|
||||
|
||||
from asgiref.sync import async_to_sync
|
||||
from channels.exceptions import AcceptConnection
|
||||
@@ -52,3 +53,10 @@ class StatusConsumer(WebsocketConsumer):
|
||||
self.close()
|
||||
else:
|
||||
self.send(json.dumps(event))
|
||||
|
||||
def document_updated(self, event: Any) -> None:
|
||||
if not self._authenticated():
|
||||
self.close()
|
||||
else:
|
||||
if self._can_view(event["data"]):
|
||||
self.send(json.dumps(event))
|
||||
|
||||
@@ -48,6 +48,20 @@ class TestWebSockets(TestCase):
|
||||
mock_close.assert_called_once()
|
||||
mock_close.reset_mock()
|
||||
|
||||
message = {
|
||||
"type": "document_updated",
|
||||
"data": {"document_id": 10, "modified": "2026-02-17T00:00:00Z"},
|
||||
}
|
||||
|
||||
await channel_layer.group_send(
|
||||
"status_updates",
|
||||
message,
|
||||
)
|
||||
await communicator.receive_nothing()
|
||||
|
||||
mock_close.assert_called_once()
|
||||
mock_close.reset_mock()
|
||||
|
||||
message = {"type": "documents_deleted", "data": {"documents": [1, 2, 3]}}
|
||||
|
||||
await channel_layer.group_send(
|
||||
@@ -158,6 +172,40 @@ class TestWebSockets(TestCase):
|
||||
|
||||
await communicator.disconnect()
|
||||
|
||||
@mock.patch("paperless.consumers.StatusConsumer._can_view")
|
||||
@mock.patch("paperless.consumers.StatusConsumer._authenticated")
|
||||
async def test_receive_document_updated(self, _authenticated, _can_view) -> None:
|
||||
_authenticated.return_value = True
|
||||
_can_view.return_value = True
|
||||
|
||||
communicator = WebsocketCommunicator(application, "/ws/status/")
|
||||
connected, _ = await communicator.connect()
|
||||
self.assertTrue(connected)
|
||||
|
||||
message = {
|
||||
"type": "document_updated",
|
||||
"data": {
|
||||
"document_id": 10,
|
||||
"modified": "2026-02-17T00:00:00Z",
|
||||
"owner_id": 1,
|
||||
"users_can_view": [1],
|
||||
"groups_can_view": [],
|
||||
},
|
||||
}
|
||||
|
||||
channel_layer = get_channel_layer()
|
||||
assert channel_layer is not None
|
||||
await channel_layer.group_send(
|
||||
"status_updates",
|
||||
message,
|
||||
)
|
||||
|
||||
response = await communicator.receive_json_from()
|
||||
|
||||
self.assertEqual(response, message)
|
||||
|
||||
await communicator.disconnect()
|
||||
|
||||
@mock.patch("channels.layers.InMemoryChannelLayer.group_send")
|
||||
def test_manager_send_progress(self, mock_group_send) -> None:
|
||||
with ProgressManager(task_id="test") as manager:
|
||||
@@ -190,7 +238,10 @@ class TestWebSockets(TestCase):
|
||||
)
|
||||
|
||||
@mock.patch("channels.layers.InMemoryChannelLayer.group_send")
|
||||
def test_manager_send_documents_deleted(self, mock_group_send) -> None:
|
||||
def test_manager_send_documents_deleted(
|
||||
self,
|
||||
mock_group_send: mock.MagicMock,
|
||||
) -> None:
|
||||
with DocumentsStatusManager() as manager:
|
||||
manager.send_documents_deleted([1, 2, 3])
|
||||
|
||||
|
||||
Reference in New Issue
Block a user