mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-03-04 16:26:24 +00:00
Compare commits
7 Commits
fix-doc-ve
...
feature-ex
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
77754e6780 | ||
|
|
49d1e773d9 | ||
|
|
5498503d60 | ||
|
|
16b58c2de5 | ||
|
|
c724fbb5d9 | ||
|
|
9c0f112e94 | ||
|
|
43406f44f2 |
@@ -30,7 +30,7 @@ RUN set -eux \
|
||||
# Purpose: Installs s6-overlay and rootfs
|
||||
# Comments:
|
||||
# - Don't leave anything extra in here either
|
||||
FROM ghcr.io/astral-sh/uv:0.10.5-python3.12-trixie-slim AS s6-overlay-base
|
||||
FROM ghcr.io/astral-sh/uv:0.10.7-python3.12-trixie-slim AS s6-overlay-base
|
||||
|
||||
WORKDIR /usr/src/s6
|
||||
|
||||
|
||||
@@ -262,10 +262,6 @@ your files differently, you can do that by adjusting the
|
||||
or using [storage paths (see below)](#storage-paths). Paperless adds the
|
||||
correct file extension e.g. `.pdf`, `.jpg` automatically.
|
||||
|
||||
When a document has file versions, each version uses the same naming rules and
|
||||
storage path resolution as any other document file, with an added version suffix
|
||||
such as `_v1`, `_v2`, etc.
|
||||
|
||||
This variable allows you to configure the filename (folders are allowed)
|
||||
using placeholders. For example, configuring this to
|
||||
|
||||
@@ -357,8 +353,6 @@ If paperless detects that two documents share the same filename,
|
||||
paperless will automatically append `_01`, `_02`, etc to the filename.
|
||||
This happens if all the placeholders in a filename evaluate to the same
|
||||
value.
|
||||
For versioned files, this counter is appended after the version suffix
|
||||
(for example `statement_v2_01.pdf`).
|
||||
|
||||
If there are any errors in the placeholders included in `PAPERLESS_FILENAME_FORMAT`,
|
||||
paperless will fall back to using the default naming scheme instead.
|
||||
|
||||
@@ -95,7 +95,6 @@ Think of versions as **file history** for a document.
|
||||
|
||||
- Versions track the underlying file and extracted text content (OCR/text).
|
||||
- Metadata such as tags, correspondent, document type, storage path and custom fields stay on the "root" document.
|
||||
- Version files follow normal filename formatting (including storage paths) and add a `_vN` suffix (for example `_v1`, `_v2`).
|
||||
- By default, search and document content use the latest version.
|
||||
- In document detail, selecting a version switches the preview, file metadata and content (and download etc buttons) to that version.
|
||||
- Deleting a non-root version keeps metadata and falls back to the latest remaining version.
|
||||
|
||||
@@ -111,6 +111,7 @@ docs = [
|
||||
testing = [
|
||||
"daphne",
|
||||
"factory-boy~=3.3.1",
|
||||
"faker~=40.5.1",
|
||||
"imagehash",
|
||||
"pytest~=9.0.0",
|
||||
"pytest-cov~=7.0.0",
|
||||
@@ -304,6 +305,7 @@ markers = [
|
||||
"greenmail: Tests requiring Greenmail service",
|
||||
"date_parsing: Tests which cover date parsing from content or filename",
|
||||
"management: Tests which cover management commands/functionality",
|
||||
"profiling: Benchmarks that profile and compare implementation performance",
|
||||
]
|
||||
|
||||
[tool.pytest_env]
|
||||
|
||||
@@ -1238,8 +1238,8 @@
|
||||
<context context-type="linenumber">82</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="8035757452478567832" datatype="html">
|
||||
<source>Update existing document</source>
|
||||
<trans-unit id="7860582931776068318" datatype="html">
|
||||
<source>Add document version</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/admin/settings/settings.component.html</context>
|
||||
<context context-type="linenumber">280</context>
|
||||
@@ -8411,8 +8411,8 @@
|
||||
<context context-type="linenumber">832</context>
|
||||
</context-group>
|
||||
</trans-unit>
|
||||
<trans-unit id="6390006284731990222" datatype="html">
|
||||
<source>This operation will permanently rotate the original version of <x id="PH" equiv-text="this.list.selected.size"/> document(s).</source>
|
||||
<trans-unit id="5203024009814367559" datatype="html">
|
||||
<source>This operation will add rotated versions of the <x id="PH" equiv-text="this.list.selected.size"/> document(s).</source>
|
||||
<context-group purpose="location">
|
||||
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
|
||||
<context context-type="linenumber">833</context>
|
||||
|
||||
@@ -277,7 +277,7 @@
|
||||
<div class="col">
|
||||
<select class="form-select" formControlName="pdfEditorDefaultEditMode">
|
||||
<option [ngValue]="PdfEditorEditMode.Create" i18n>Create new document(s)</option>
|
||||
<option [ngValue]="PdfEditorEditMode.Update" i18n>Update existing document</option>
|
||||
<option [ngValue]="PdfEditorEditMode.Update" i18n>Add document version</option>
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -84,7 +84,7 @@
|
||||
<input type="radio" class="btn-check" [(ngModel)]="editMode" [value]="PdfEditorEditMode.Update" id="editModeUpdate" name="editmode" [disabled]="hasSplit()">
|
||||
<label for="editModeUpdate" class="btn btn-outline-primary btn-sm">
|
||||
<i-bs name="pencil"></i-bs>
|
||||
<span class="form-check-label ms-2" i18n>Update existing document</span>
|
||||
<span class="form-check-label ms-2" i18n>Add document version</span>
|
||||
</label>
|
||||
</div>
|
||||
@if (editMode === PdfEditorEditMode.Create) {
|
||||
|
||||
@@ -830,7 +830,7 @@ export class BulkEditorComponent
|
||||
})
|
||||
const rotateDialog = modal.componentInstance as RotateConfirmDialogComponent
|
||||
rotateDialog.title = $localize`Rotate confirm`
|
||||
rotateDialog.messageBold = $localize`This operation will permanently rotate the original version of ${this.list.selected.size} document(s).`
|
||||
rotateDialog.messageBold = $localize`This operation will add rotated versions of the ${this.list.selected.size} document(s).`
|
||||
rotateDialog.btnClass = 'btn-danger'
|
||||
rotateDialog.btnCaption = $localize`Proceed`
|
||||
rotateDialog.documentID = Array.from(this.list.selected)[0]
|
||||
|
||||
@@ -11,7 +11,6 @@ import magic
|
||||
from django.conf import settings
|
||||
from django.contrib.auth.models import User
|
||||
from django.db import transaction
|
||||
from django.db.models import Max
|
||||
from django.db.models import Q
|
||||
from django.utils import timezone
|
||||
from filelock import FileLock
|
||||
@@ -124,6 +123,22 @@ class ConsumerPluginMixin:
|
||||
|
||||
self.filename = self.metadata.filename or self.input_doc.original_file.name
|
||||
|
||||
if input_doc.root_document_id:
|
||||
self.log.debug(
|
||||
f"Document root document id: {input_doc.root_document_id}",
|
||||
)
|
||||
root_document = Document.objects.get(pk=input_doc.root_document_id)
|
||||
version_index = Document.objects.filter(root_document=root_document).count()
|
||||
filename_path = Path(self.filename)
|
||||
if filename_path.suffix:
|
||||
self.filename = str(
|
||||
filename_path.with_name(
|
||||
f"{filename_path.stem}_v{version_index}{filename_path.suffix}",
|
||||
),
|
||||
)
|
||||
else:
|
||||
self.filename = f"{self.filename}_v{version_index}"
|
||||
|
||||
def _send_progress(
|
||||
self,
|
||||
current_progress: int,
|
||||
@@ -178,19 +193,9 @@ class ConsumerPlugin(
|
||||
mime_type: str,
|
||||
) -> Document:
|
||||
self.log.debug("Saving record for updated version to database")
|
||||
root_doc_frozen = Document.objects.select_for_update().get(pk=root_doc.pk)
|
||||
next_version_index = (
|
||||
Document.global_objects.filter(
|
||||
root_document_id=root_doc_frozen.pk,
|
||||
).aggregate(
|
||||
max_index=Max("version_index"),
|
||||
)["max_index"]
|
||||
or 0
|
||||
)
|
||||
version_doc = Document.objects.get(pk=root_doc_frozen.pk)
|
||||
version_doc = Document.objects.get(pk=root_doc.pk)
|
||||
setattr(version_doc, "pk", None)
|
||||
version_doc.root_document = root_doc_frozen
|
||||
version_doc.version_index = next_version_index + 1
|
||||
version_doc.root_document = root_doc
|
||||
file_for_checksum = (
|
||||
self.unmodified_original
|
||||
if self.unmodified_original is not None
|
||||
@@ -203,6 +208,7 @@ class ConsumerPlugin(
|
||||
version_doc.page_count = page_count
|
||||
version_doc.mime_type = mime_type
|
||||
version_doc.original_filename = self.filename
|
||||
version_doc.storage_path = root_doc.storage_path
|
||||
# Clear unique file path fields so they can be generated uniquely later
|
||||
version_doc.filename = None
|
||||
version_doc.archive_filename = None
|
||||
|
||||
@@ -128,18 +128,11 @@ def generate_filename(
|
||||
counter=0,
|
||||
archive_filename=False,
|
||||
) -> Path:
|
||||
# version docs use the root document for formatting, just with a suffix
|
||||
context_doc = doc if doc.root_document_id is None else doc.root_document
|
||||
version_suffix = (
|
||||
f"_v{doc.version_index}"
|
||||
if doc.root_document_id is not None and doc.version_index is not None
|
||||
else ""
|
||||
)
|
||||
base_path: Path | None = None
|
||||
|
||||
# Determine the source of the format string
|
||||
if context_doc.storage_path is not None:
|
||||
filename_format = context_doc.storage_path.path
|
||||
if doc.storage_path is not None:
|
||||
filename_format = doc.storage_path.path
|
||||
elif settings.FILENAME_FORMAT is not None:
|
||||
# Maybe convert old to new style
|
||||
filename_format = convert_format_str_to_template_format(
|
||||
@@ -150,7 +143,7 @@ def generate_filename(
|
||||
|
||||
# If we have one, render it
|
||||
if filename_format is not None:
|
||||
rendered_path: str | None = format_filename(context_doc, filename_format)
|
||||
rendered_path: str | None = format_filename(doc, filename_format)
|
||||
if rendered_path:
|
||||
base_path = Path(rendered_path)
|
||||
|
||||
@@ -164,7 +157,7 @@ def generate_filename(
|
||||
base_filename = base_path.name
|
||||
|
||||
# Build the final filename with counter and filetype
|
||||
final_filename = f"{base_filename}{version_suffix}{counter_str}{filetype_str}"
|
||||
final_filename = f"{base_filename}{counter_str}{filetype_str}"
|
||||
|
||||
# If we have a directory component, include it
|
||||
if str(directory) != ".":
|
||||
@@ -173,9 +166,7 @@ def generate_filename(
|
||||
full_path = Path(final_filename)
|
||||
else:
|
||||
# No template, use document ID
|
||||
final_filename = (
|
||||
f"{context_doc.pk:07}{version_suffix}{counter_str}{filetype_str}"
|
||||
)
|
||||
final_filename = f"{doc.pk:07}{counter_str}{filetype_str}"
|
||||
full_path = Path(final_filename)
|
||||
|
||||
return full_path
|
||||
|
||||
@@ -6,11 +6,14 @@ Provides automatic progress bar and multiprocessing support with minimal boilerp
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
from collections.abc import Callable
|
||||
from collections.abc import Iterable
|
||||
from collections.abc import Sized
|
||||
from concurrent.futures import ProcessPoolExecutor
|
||||
from concurrent.futures import as_completed
|
||||
from contextlib import contextmanager
|
||||
from dataclasses import dataclass
|
||||
from typing import TYPE_CHECKING
|
||||
from typing import Any
|
||||
@@ -22,7 +25,11 @@ from django import db
|
||||
from django.core.management import CommandError
|
||||
from django.db.models import QuerySet
|
||||
from django_rich.management import RichCommand
|
||||
from rich import box
|
||||
from rich.console import Console
|
||||
from rich.console import Group
|
||||
from rich.console import RenderableType
|
||||
from rich.live import Live
|
||||
from rich.progress import BarColumn
|
||||
from rich.progress import MofNCompleteColumn
|
||||
from rich.progress import Progress
|
||||
@@ -30,11 +37,11 @@ from rich.progress import SpinnerColumn
|
||||
from rich.progress import TextColumn
|
||||
from rich.progress import TimeElapsedColumn
|
||||
from rich.progress import TimeRemainingColumn
|
||||
from rich.table import Table
|
||||
from rich.text import Text
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Callable
|
||||
from collections.abc import Generator
|
||||
from collections.abc import Iterable
|
||||
from collections.abc import Sequence
|
||||
|
||||
from django.core.management import CommandParser
|
||||
@@ -43,6 +50,78 @@ T = TypeVar("T")
|
||||
R = TypeVar("R")
|
||||
|
||||
|
||||
@dataclass(slots=True, frozen=True)
|
||||
class _BufferedRecord:
|
||||
level: int
|
||||
name: str
|
||||
message: str
|
||||
|
||||
|
||||
class BufferingLogHandler(logging.Handler):
|
||||
"""Captures log records during a command run for deferred rendering.
|
||||
|
||||
Attach to a logger before a long operation and call ``render()``
|
||||
afterwards to emit the buffered records via Rich, optionally filtered
|
||||
by minimum level.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
self._records: list[_BufferedRecord] = []
|
||||
|
||||
def emit(self, record: logging.LogRecord) -> None:
|
||||
self._records.append(
|
||||
_BufferedRecord(
|
||||
level=record.levelno,
|
||||
name=record.name,
|
||||
message=self.format(record),
|
||||
),
|
||||
)
|
||||
|
||||
def render(
|
||||
self,
|
||||
console: Console,
|
||||
*,
|
||||
min_level: int = logging.DEBUG,
|
||||
title: str = "Log Output",
|
||||
) -> None:
|
||||
records = [r for r in self._records if r.level >= min_level]
|
||||
if not records:
|
||||
return
|
||||
|
||||
table = Table(
|
||||
title=title,
|
||||
show_header=True,
|
||||
header_style="bold",
|
||||
show_lines=False,
|
||||
box=box.SIMPLE,
|
||||
)
|
||||
table.add_column("Level", style="bold", width=8)
|
||||
table.add_column("Logger", style="dim")
|
||||
table.add_column("Message", no_wrap=False)
|
||||
|
||||
_level_styles: dict[int, str] = {
|
||||
logging.DEBUG: "dim",
|
||||
logging.INFO: "cyan",
|
||||
logging.WARNING: "yellow",
|
||||
logging.ERROR: "red",
|
||||
logging.CRITICAL: "bold red",
|
||||
}
|
||||
|
||||
for record in records:
|
||||
style = _level_styles.get(record.level, "")
|
||||
table.add_row(
|
||||
Text(logging.getLevelName(record.level), style=style),
|
||||
record.name,
|
||||
record.message,
|
||||
)
|
||||
|
||||
console.print(table)
|
||||
|
||||
def clear(self) -> None:
|
||||
self._records.clear()
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class ProcessResult(Generic[T, R]):
|
||||
"""
|
||||
@@ -91,6 +170,23 @@ class PaperlessCommand(RichCommand):
|
||||
for result in self.process_parallel(process_doc, ids):
|
||||
if result.error:
|
||||
self.console.print(f"[red]Failed: {result.error}[/red]")
|
||||
|
||||
class Command(PaperlessCommand):
|
||||
help = "Import documents with live stats"
|
||||
|
||||
def handle(self, *args, **options):
|
||||
stats = ImportStats()
|
||||
|
||||
def render_stats() -> Table:
|
||||
... # build Rich Table from stats
|
||||
|
||||
for item in self.track_with_stats(
|
||||
items,
|
||||
description="Importing...",
|
||||
stats_renderer=render_stats,
|
||||
):
|
||||
result = import_item(item)
|
||||
stats.imported += 1
|
||||
"""
|
||||
|
||||
supports_progress_bar: ClassVar[bool] = True
|
||||
@@ -128,13 +224,11 @@ class PaperlessCommand(RichCommand):
|
||||
This is called by Django's command infrastructure after argument parsing
|
||||
but before handle(). We use it to set instance attributes from options.
|
||||
"""
|
||||
# Set progress bar state
|
||||
if self.supports_progress_bar:
|
||||
self.no_progress_bar = options.get("no_progress_bar", False)
|
||||
else:
|
||||
self.no_progress_bar = True
|
||||
|
||||
# Set multiprocessing state
|
||||
if self.supports_multiprocessing:
|
||||
self.process_count = options.get("processes", 1)
|
||||
if self.process_count < 1:
|
||||
@@ -144,9 +238,69 @@ class PaperlessCommand(RichCommand):
|
||||
|
||||
return super().execute(*args, **options)
|
||||
|
||||
@contextmanager
|
||||
def buffered_logging(
|
||||
self,
|
||||
*logger_names: str,
|
||||
level: int = logging.DEBUG,
|
||||
) -> Generator[BufferingLogHandler, None, None]:
|
||||
"""Context manager that captures log output from named loggers.
|
||||
|
||||
Installs a ``BufferingLogHandler`` on each named logger for the
|
||||
duration of the block, suppressing propagation to avoid interleaving
|
||||
with the Rich live display. The handler is removed on exit regardless
|
||||
of whether an exception occurred.
|
||||
|
||||
Usage::
|
||||
|
||||
with self.buffered_logging("paperless", "documents") as log_buf:
|
||||
# ... run progress loop ...
|
||||
if options["verbose"]:
|
||||
log_buf.render(self.console)
|
||||
"""
|
||||
handler = BufferingLogHandler()
|
||||
handler.setFormatter(logging.Formatter("%(message)s"))
|
||||
|
||||
loggers: list[logging.Logger] = []
|
||||
original_propagate: dict[str, bool] = {}
|
||||
|
||||
for name in logger_names:
|
||||
log = logging.getLogger(name)
|
||||
log.addHandler(handler)
|
||||
original_propagate[name] = log.propagate
|
||||
log.propagate = False
|
||||
loggers.append(log)
|
||||
|
||||
try:
|
||||
yield handler
|
||||
finally:
|
||||
for log in loggers:
|
||||
log.removeHandler(handler)
|
||||
log.propagate = original_propagate[log.name]
|
||||
|
||||
@staticmethod
|
||||
def _progress_columns() -> tuple[Any, ...]:
|
||||
"""
|
||||
Return the standard set of progress bar columns.
|
||||
|
||||
Extracted so both _create_progress (standalone) and track_with_stats
|
||||
(inside Live) use identical column configuration without duplication.
|
||||
"""
|
||||
return (
|
||||
SpinnerColumn(),
|
||||
TextColumn("[progress.description]{task.description}"),
|
||||
BarColumn(),
|
||||
MofNCompleteColumn(),
|
||||
TimeElapsedColumn(),
|
||||
TimeRemainingColumn(),
|
||||
)
|
||||
|
||||
def _create_progress(self, description: str) -> Progress:
|
||||
"""
|
||||
Create a configured Progress instance.
|
||||
Create a standalone Progress instance with its own stderr Console.
|
||||
|
||||
Use this for track(). For track_with_stats(), Progress is created
|
||||
directly inside a Live context instead.
|
||||
|
||||
Progress output is directed to stderr to match the convention that
|
||||
progress bars are transient UI feedback, not command output. This
|
||||
@@ -161,12 +315,7 @@ class PaperlessCommand(RichCommand):
|
||||
A Progress instance configured with appropriate columns.
|
||||
"""
|
||||
return Progress(
|
||||
SpinnerColumn(),
|
||||
TextColumn("[progress.description]{task.description}"),
|
||||
BarColumn(),
|
||||
MofNCompleteColumn(),
|
||||
TimeElapsedColumn(),
|
||||
TimeRemainingColumn(),
|
||||
*self._progress_columns(),
|
||||
console=Console(stderr=True),
|
||||
transient=False,
|
||||
)
|
||||
@@ -222,7 +371,6 @@ class PaperlessCommand(RichCommand):
|
||||
yield from iterable
|
||||
return
|
||||
|
||||
# Attempt to determine total if not provided
|
||||
if total is None:
|
||||
total = self._get_iterable_length(iterable)
|
||||
|
||||
@@ -232,6 +380,87 @@ class PaperlessCommand(RichCommand):
|
||||
yield item
|
||||
progress.advance(task_id)
|
||||
|
||||
def track_with_stats(
|
||||
self,
|
||||
iterable: Iterable[T],
|
||||
*,
|
||||
description: str = "Processing...",
|
||||
stats_renderer: Callable[[], RenderableType],
|
||||
total: int | None = None,
|
||||
) -> Generator[T, None, None]:
|
||||
"""
|
||||
Iterate over items with a progress bar and a live-updating stats display.
|
||||
|
||||
The progress bar and stats renderable are combined in a single Live
|
||||
context, so the stats panel re-renders in place below the progress bar
|
||||
after each item is processed.
|
||||
|
||||
Respects --no-progress-bar flag. When disabled, yields items without
|
||||
any display (stats are still updated by the caller's loop body, so
|
||||
they will be accurate for any post-loop summary the caller prints).
|
||||
|
||||
Args:
|
||||
iterable: The items to iterate over.
|
||||
description: Text to display alongside the progress bar.
|
||||
stats_renderer: Zero-argument callable that returns a Rich
|
||||
renderable. Called after each item to refresh the display.
|
||||
The caller typically closes over a mutable dataclass and
|
||||
rebuilds a Table from it on each call.
|
||||
total: Total number of items. If None, attempts to determine
|
||||
automatically via .count() (for querysets) or len().
|
||||
|
||||
Yields:
|
||||
Items from the iterable.
|
||||
|
||||
Example:
|
||||
@dataclass
|
||||
class Stats:
|
||||
processed: int = 0
|
||||
failed: int = 0
|
||||
|
||||
stats = Stats()
|
||||
|
||||
def render_stats() -> Table:
|
||||
table = Table(box=None)
|
||||
table.add_column("Processed")
|
||||
table.add_column("Failed")
|
||||
table.add_row(str(stats.processed), str(stats.failed))
|
||||
return table
|
||||
|
||||
for item in self.track_with_stats(
|
||||
items,
|
||||
description="Importing...",
|
||||
stats_renderer=render_stats,
|
||||
):
|
||||
try:
|
||||
import_item(item)
|
||||
stats.processed += 1
|
||||
except Exception:
|
||||
stats.failed += 1
|
||||
"""
|
||||
if self.no_progress_bar:
|
||||
yield from iterable
|
||||
return
|
||||
|
||||
if total is None:
|
||||
total = self._get_iterable_length(iterable)
|
||||
|
||||
stderr_console = Console(stderr=True)
|
||||
|
||||
# Progress is created without its own console so Live controls rendering.
|
||||
progress = Progress(*self._progress_columns())
|
||||
task_id = progress.add_task(description, total=total)
|
||||
|
||||
with Live(
|
||||
Group(progress, stats_renderer()),
|
||||
console=stderr_console,
|
||||
refresh_per_second=4,
|
||||
) as live:
|
||||
for item in iterable:
|
||||
yield item
|
||||
progress.advance(task_id)
|
||||
live.update(Group(progress, stats_renderer()))
|
||||
|
||||
def process_parallel(
|
||||
self,
|
||||
fn: Callable[[T], R],
|
||||
@@ -269,7 +498,7 @@ class PaperlessCommand(RichCommand):
|
||||
total = len(items)
|
||||
|
||||
if self.process_count == 1:
|
||||
# Sequential execution in main process - critical for testing
|
||||
# Sequential execution in main process - critical for testing, so we don't fork in fork, etc
|
||||
yield from self._process_sequential(fn, items, description, total)
|
||||
else:
|
||||
# Parallel execution with ProcessPoolExecutor
|
||||
@@ -298,6 +527,7 @@ class PaperlessCommand(RichCommand):
|
||||
total: int,
|
||||
) -> Generator[ProcessResult[T, R], None, None]:
|
||||
"""Process items in parallel using ProcessPoolExecutor."""
|
||||
|
||||
# Close database connections before forking - required for PostgreSQL
|
||||
db.connections.close_all()
|
||||
|
||||
|
||||
@@ -3,6 +3,8 @@ import json
|
||||
import os
|
||||
import shutil
|
||||
import tempfile
|
||||
from itertools import chain
|
||||
from itertools import islice
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
@@ -19,6 +21,7 @@ from django.contrib.contenttypes.models import ContentType
|
||||
from django.core import serializers
|
||||
from django.core.management.base import BaseCommand
|
||||
from django.core.management.base import CommandError
|
||||
from django.core.serializers.json import DjangoJSONEncoder
|
||||
from django.db import transaction
|
||||
from django.utils import timezone
|
||||
from filelock import FileLock
|
||||
@@ -26,6 +29,8 @@ from guardian.models import GroupObjectPermission
|
||||
from guardian.models import UserObjectPermission
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Generator
|
||||
|
||||
from django.db.models import QuerySet
|
||||
|
||||
if settings.AUDIT_LOG_ENABLED:
|
||||
@@ -60,6 +65,22 @@ from paperless_mail.models import MailAccount
|
||||
from paperless_mail.models import MailRule
|
||||
|
||||
|
||||
def serialize_queryset_batched(
|
||||
queryset: "QuerySet",
|
||||
*,
|
||||
batch_size: int = 500,
|
||||
) -> "Generator[list[dict], None, None]":
|
||||
"""Yield batches of serialized records from a QuerySet.
|
||||
|
||||
Each batch is a list of dicts in Django's Python serialization format.
|
||||
Uses QuerySet.iterator() to avoid loading the full queryset into memory,
|
||||
and islice to collect chunk-sized batches serialized in a single call.
|
||||
"""
|
||||
iterator = queryset.iterator(chunk_size=batch_size)
|
||||
while chunk := list(islice(iterator, batch_size)):
|
||||
yield serializers.serialize("python", chunk)
|
||||
|
||||
|
||||
class Command(CryptMixin, BaseCommand):
|
||||
help = (
|
||||
"Decrypt and rename all files in our collection into a given target "
|
||||
@@ -186,6 +207,17 @@ class Command(CryptMixin, BaseCommand):
|
||||
help="If provided, is used to encrypt sensitive data in the export",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--batch-size",
|
||||
type=int,
|
||||
default=500,
|
||||
help=(
|
||||
"Number of records to process per batch during serialization. "
|
||||
"Lower values reduce peak memory usage; higher values improve "
|
||||
"throughput. Default: 500."
|
||||
),
|
||||
)
|
||||
|
||||
def handle(self, *args, **options) -> None:
|
||||
self.target = Path(options["target"]).resolve()
|
||||
self.split_manifest: bool = options["split_manifest"]
|
||||
@@ -200,6 +232,7 @@ class Command(CryptMixin, BaseCommand):
|
||||
self.data_only: bool = options["data_only"]
|
||||
self.no_progress_bar: bool = options["no_progress_bar"]
|
||||
self.passphrase: str | None = options.get("passphrase")
|
||||
self.batch_size: int = options["batch_size"]
|
||||
|
||||
self.files_in_export_dir: set[Path] = set()
|
||||
self.exported_files: set[str] = set()
|
||||
@@ -294,8 +327,13 @@ class Command(CryptMixin, BaseCommand):
|
||||
|
||||
# Build an overall manifest
|
||||
for key, object_query in manifest_key_to_object_query.items():
|
||||
manifest_dict[key] = json.loads(
|
||||
serializers.serialize("json", object_query),
|
||||
manifest_dict[key] = list(
|
||||
chain.from_iterable(
|
||||
serialize_queryset_batched(
|
||||
object_query,
|
||||
batch_size=self.batch_size,
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
self.encrypt_secret_fields(manifest_dict)
|
||||
@@ -512,14 +550,24 @@ class Command(CryptMixin, BaseCommand):
|
||||
self.files_in_export_dir.remove(target)
|
||||
if self.compare_json:
|
||||
target_checksum = hashlib.md5(target.read_bytes()).hexdigest()
|
||||
src_str = json.dumps(content, indent=2, ensure_ascii=False)
|
||||
src_str = json.dumps(
|
||||
content,
|
||||
cls=DjangoJSONEncoder,
|
||||
indent=2,
|
||||
ensure_ascii=False,
|
||||
)
|
||||
src_checksum = hashlib.md5(src_str.encode("utf-8")).hexdigest()
|
||||
if src_checksum == target_checksum:
|
||||
perform_write = False
|
||||
|
||||
if perform_write:
|
||||
target.write_text(
|
||||
json.dumps(content, indent=2, ensure_ascii=False),
|
||||
json.dumps(
|
||||
content,
|
||||
cls=DjangoJSONEncoder,
|
||||
indent=2,
|
||||
ensure_ascii=False,
|
||||
),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
|
||||
@@ -1,4 +1,12 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass
|
||||
from dataclasses import field
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from rich.table import Table
|
||||
from rich.text import Text
|
||||
|
||||
from documents.classifier import load_classifier
|
||||
from documents.management.commands.base import PaperlessCommand
|
||||
@@ -8,9 +16,162 @@ from documents.signals.handlers import set_document_type
|
||||
from documents.signals.handlers import set_storage_path
|
||||
from documents.signals.handlers import set_tags
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from rich.console import RenderableType
|
||||
|
||||
from documents.models import Correspondent
|
||||
from documents.models import DocumentType
|
||||
from documents.models import StoragePath
|
||||
from documents.models import Tag
|
||||
|
||||
logger = logging.getLogger("paperless.management.retagger")
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class RetaggerStats:
|
||||
"""Cumulative counters updated as the retagger processes documents.
|
||||
|
||||
Mutable by design -- fields are incremented in the processing loop.
|
||||
slots=True reduces per-instance memory overhead and speeds attribute access.
|
||||
"""
|
||||
|
||||
correspondents: int = 0
|
||||
document_types: int = 0
|
||||
tags_added: int = 0
|
||||
tags_removed: int = 0
|
||||
storage_paths: int = 0
|
||||
documents_processed: int = 0
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class DocumentSuggestion:
|
||||
"""Buffered classifier suggestions for a single document (suggest mode only).
|
||||
|
||||
Mutable by design -- fields are assigned incrementally as each setter runs.
|
||||
"""
|
||||
|
||||
document: Document
|
||||
correspondent: Correspondent | None = None
|
||||
document_type: DocumentType | None = None
|
||||
tags_to_add: frozenset[Tag] = field(default_factory=frozenset)
|
||||
tags_to_remove: frozenset[Tag] = field(default_factory=frozenset)
|
||||
storage_path: StoragePath | None = None
|
||||
|
||||
@property
|
||||
def has_suggestions(self) -> bool:
|
||||
return bool(
|
||||
self.correspondent is not None
|
||||
or self.document_type is not None
|
||||
or self.tags_to_add
|
||||
or self.tags_to_remove
|
||||
or self.storage_path is not None,
|
||||
)
|
||||
|
||||
|
||||
def _build_stats_table(stats: RetaggerStats, *, suggest: bool) -> Table:
|
||||
"""
|
||||
Build the live-updating stats table shown below the progress bar.
|
||||
|
||||
In suggest mode the labels read "would set / would add" to make clear
|
||||
that nothing has been written to the database.
|
||||
"""
|
||||
table = Table(box=None, padding=(0, 2), show_header=True, header_style="bold")
|
||||
|
||||
table.add_column("Documents")
|
||||
table.add_column("Correspondents")
|
||||
table.add_column("Doc Types")
|
||||
table.add_column("Tags (+)")
|
||||
table.add_column("Tags (-)")
|
||||
table.add_column("Storage Paths")
|
||||
|
||||
verb = "would set" if suggest else "set"
|
||||
|
||||
table.add_row(
|
||||
str(stats.documents_processed),
|
||||
f"{stats.correspondents} {verb}",
|
||||
f"{stats.document_types} {verb}",
|
||||
f"+{stats.tags_added}",
|
||||
f"-{stats.tags_removed}",
|
||||
f"{stats.storage_paths} {verb}",
|
||||
)
|
||||
|
||||
return table
|
||||
|
||||
|
||||
def _build_suggestion_table(
|
||||
suggestions: list[DocumentSuggestion],
|
||||
base_url: str | None,
|
||||
) -> Table:
|
||||
"""
|
||||
Build the final suggestion table printed after the progress bar completes.
|
||||
|
||||
Only documents with at least one suggestion are included.
|
||||
"""
|
||||
table = Table(
|
||||
title="Suggested Changes",
|
||||
show_header=True,
|
||||
header_style="bold cyan",
|
||||
show_lines=True,
|
||||
)
|
||||
|
||||
table.add_column("Document", style="bold", no_wrap=False, min_width=20)
|
||||
table.add_column("Correspondent")
|
||||
table.add_column("Doc Type")
|
||||
table.add_column("Tags")
|
||||
table.add_column("Storage Path")
|
||||
|
||||
for suggestion in suggestions:
|
||||
if not suggestion.has_suggestions:
|
||||
continue
|
||||
|
||||
doc = suggestion.document
|
||||
|
||||
if base_url:
|
||||
doc_cell = Text()
|
||||
doc_cell.append(str(doc))
|
||||
doc_cell.append(f"\n{base_url}/documents/{doc.pk}", style="dim")
|
||||
else:
|
||||
doc_cell = Text(f"{doc} [{doc.pk}]")
|
||||
|
||||
tag_parts: list[str] = []
|
||||
for tag in sorted(suggestion.tags_to_add, key=lambda t: t.name):
|
||||
tag_parts.append(f"[green]+{tag.name}[/green]")
|
||||
for tag in sorted(suggestion.tags_to_remove, key=lambda t: t.name):
|
||||
tag_parts.append(f"[red]-{tag.name}[/red]")
|
||||
tag_cell = Text.from_markup(", ".join(tag_parts)) if tag_parts else Text("-")
|
||||
|
||||
table.add_row(
|
||||
doc_cell,
|
||||
str(suggestion.correspondent) if suggestion.correspondent else "-",
|
||||
str(suggestion.document_type) if suggestion.document_type else "-",
|
||||
tag_cell,
|
||||
str(suggestion.storage_path) if suggestion.storage_path else "-",
|
||||
)
|
||||
|
||||
return table
|
||||
|
||||
|
||||
def _build_summary_table(stats: RetaggerStats) -> Table:
|
||||
"""Build the final applied-changes summary table."""
|
||||
table = Table(
|
||||
title="Retagger Summary",
|
||||
show_header=True,
|
||||
header_style="bold cyan",
|
||||
)
|
||||
|
||||
table.add_column("Metric", style="bold")
|
||||
table.add_column("Count", justify="right")
|
||||
|
||||
table.add_row("Documents processed", str(stats.documents_processed))
|
||||
table.add_row("Correspondents set", str(stats.correspondents))
|
||||
table.add_row("Document types set", str(stats.document_types))
|
||||
table.add_row("Tags added", str(stats.tags_added))
|
||||
table.add_row("Tags removed", str(stats.tags_removed))
|
||||
table.add_row("Storage paths set", str(stats.storage_paths))
|
||||
|
||||
return table
|
||||
|
||||
|
||||
class Command(PaperlessCommand):
|
||||
help = (
|
||||
"Using the current classification model, assigns correspondents, tags "
|
||||
@@ -19,7 +180,7 @@ class Command(PaperlessCommand):
|
||||
"modified) after their initial import."
|
||||
)
|
||||
|
||||
def add_arguments(self, parser):
|
||||
def add_arguments(self, parser) -> None:
|
||||
super().add_arguments(parser)
|
||||
parser.add_argument("-c", "--correspondent", default=False, action="store_true")
|
||||
parser.add_argument("-T", "--tags", default=False, action="store_true")
|
||||
@@ -31,9 +192,9 @@ class Command(PaperlessCommand):
|
||||
default=False,
|
||||
action="store_true",
|
||||
help=(
|
||||
"By default this command won't try to assign a correspondent "
|
||||
"if more than one matches the document. Use this flag if "
|
||||
"you'd rather it just pick the first one it finds."
|
||||
"By default this command will not try to assign a correspondent "
|
||||
"if more than one matches the document. Use this flag to pick "
|
||||
"the first match instead."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
@@ -42,91 +203,140 @@ class Command(PaperlessCommand):
|
||||
default=False,
|
||||
action="store_true",
|
||||
help=(
|
||||
"If set, the document retagger will overwrite any previously "
|
||||
"set correspondent, document and remove correspondents, types "
|
||||
"and tags that do not match anymore due to changed rules."
|
||||
"Overwrite any previously set correspondent, document type, and "
|
||||
"remove tags that no longer match due to changed rules."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--suggest",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="Return the suggestion, don't change anything.",
|
||||
help="Show what would be changed without applying anything.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--base-url",
|
||||
help="The base URL to use to build the link to the documents.",
|
||||
help="Base URL used to build document links in suggest output.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--id-range",
|
||||
help="A range of document ids on which the retagging should be applied.",
|
||||
help="Restrict retagging to documents within this ID range (inclusive).",
|
||||
nargs=2,
|
||||
type=int,
|
||||
)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
def handle(self, *args, **options) -> None:
|
||||
suggest: bool = options["suggest"]
|
||||
overwrite: bool = options["overwrite"]
|
||||
use_first: bool = options["use_first"]
|
||||
base_url: str | None = options["base_url"]
|
||||
|
||||
do_correspondent: bool = options["correspondent"]
|
||||
do_document_type: bool = options["document_type"]
|
||||
do_tags: bool = options["tags"]
|
||||
do_storage_path: bool = options["storage_path"]
|
||||
|
||||
if not any([do_correspondent, do_document_type, do_tags, do_storage_path]):
|
||||
self.console.print(
|
||||
"[yellow]No classifier targets specified. "
|
||||
"Use -c, -T, -t, or -s to select what to retag.[/yellow]",
|
||||
)
|
||||
return
|
||||
|
||||
if options["inbox_only"]:
|
||||
queryset = Document.objects.filter(tags__is_inbox_tag=True)
|
||||
else:
|
||||
queryset = Document.objects.all()
|
||||
|
||||
if options["id_range"]:
|
||||
queryset = queryset.filter(
|
||||
id__range=(options["id_range"][0], options["id_range"][1]),
|
||||
)
|
||||
lo, hi = options["id_range"]
|
||||
queryset = queryset.filter(id__range=(lo, hi))
|
||||
|
||||
documents = queryset.distinct()
|
||||
|
||||
classifier = load_classifier()
|
||||
|
||||
for document in self.track(documents, description="Retagging..."):
|
||||
if options["correspondent"]:
|
||||
set_correspondent(
|
||||
sender=None,
|
||||
document=document,
|
||||
classifier=classifier,
|
||||
replace=options["overwrite"],
|
||||
use_first=options["use_first"],
|
||||
suggest=options["suggest"],
|
||||
base_url=options["base_url"],
|
||||
stdout=self.stdout,
|
||||
style_func=self.style,
|
||||
)
|
||||
stats = RetaggerStats()
|
||||
suggestions: list[DocumentSuggestion] = []
|
||||
|
||||
if options["document_type"]:
|
||||
set_document_type(
|
||||
sender=None,
|
||||
document=document,
|
||||
classifier=classifier,
|
||||
replace=options["overwrite"],
|
||||
use_first=options["use_first"],
|
||||
suggest=options["suggest"],
|
||||
base_url=options["base_url"],
|
||||
stdout=self.stdout,
|
||||
style_func=self.style,
|
||||
)
|
||||
def render_stats() -> RenderableType:
|
||||
return _build_stats_table(stats, suggest=suggest)
|
||||
|
||||
if options["tags"]:
|
||||
set_tags(
|
||||
sender=None,
|
||||
document=document,
|
||||
classifier=classifier,
|
||||
replace=options["overwrite"],
|
||||
suggest=options["suggest"],
|
||||
base_url=options["base_url"],
|
||||
stdout=self.stdout,
|
||||
style_func=self.style,
|
||||
)
|
||||
with self.buffered_logging(
|
||||
"paperless",
|
||||
"paperless.handlers",
|
||||
"documents",
|
||||
) as log_buf:
|
||||
for document in self.track_with_stats(
|
||||
documents,
|
||||
description="Retagging...",
|
||||
stats_renderer=render_stats,
|
||||
):
|
||||
suggestion = DocumentSuggestion(document=document)
|
||||
|
||||
if options["storage_path"]:
|
||||
set_storage_path(
|
||||
sender=None,
|
||||
document=document,
|
||||
classifier=classifier,
|
||||
replace=options["overwrite"],
|
||||
use_first=options["use_first"],
|
||||
suggest=options["suggest"],
|
||||
base_url=options["base_url"],
|
||||
stdout=self.stdout,
|
||||
style_func=self.style,
|
||||
)
|
||||
if do_correspondent:
|
||||
correspondent = set_correspondent(
|
||||
None,
|
||||
document,
|
||||
classifier=classifier,
|
||||
replace=overwrite,
|
||||
use_first=use_first,
|
||||
dry_run=suggest,
|
||||
)
|
||||
if correspondent is not None:
|
||||
stats.correspondents += 1
|
||||
suggestion.correspondent = correspondent
|
||||
|
||||
if do_document_type:
|
||||
document_type = set_document_type(
|
||||
None,
|
||||
document,
|
||||
classifier=classifier,
|
||||
replace=overwrite,
|
||||
use_first=use_first,
|
||||
dry_run=suggest,
|
||||
)
|
||||
if document_type is not None:
|
||||
stats.document_types += 1
|
||||
suggestion.document_type = document_type
|
||||
|
||||
if do_tags:
|
||||
tags_to_add, tags_to_remove = set_tags(
|
||||
None,
|
||||
document,
|
||||
classifier=classifier,
|
||||
replace=overwrite,
|
||||
dry_run=suggest,
|
||||
)
|
||||
stats.tags_added += len(tags_to_add)
|
||||
stats.tags_removed += len(tags_to_remove)
|
||||
suggestion.tags_to_add = frozenset(tags_to_add)
|
||||
suggestion.tags_to_remove = frozenset(tags_to_remove)
|
||||
|
||||
if do_storage_path:
|
||||
storage_path = set_storage_path(
|
||||
None,
|
||||
document,
|
||||
classifier=classifier,
|
||||
replace=overwrite,
|
||||
use_first=use_first,
|
||||
dry_run=suggest,
|
||||
)
|
||||
if storage_path is not None:
|
||||
stats.storage_paths += 1
|
||||
suggestion.storage_path = storage_path
|
||||
|
||||
stats.documents_processed += 1
|
||||
|
||||
if suggest:
|
||||
suggestions.append(suggestion)
|
||||
|
||||
# Post-loop output
|
||||
if suggest:
|
||||
visible = [s for s in suggestions if s.has_suggestions]
|
||||
if visible:
|
||||
self.console.print(_build_suggestion_table(visible, base_url))
|
||||
else:
|
||||
self.console.print("[green]No changes suggested.[/green]")
|
||||
else:
|
||||
self.console.print(_build_summary_table(stats))
|
||||
|
||||
log_buf.render(self.console, min_level=logging.INFO, title="Retagger Log")
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
# Generated by Django 5.2.7 on 2026-01-15 22:08
|
||||
# Generated by Django 5.2.11 on 2026-03-03 16:27
|
||||
|
||||
import datetime
|
||||
|
||||
@@ -21,6 +21,207 @@ class Migration(migrations.Migration):
|
||||
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
|
||||
]
|
||||
|
||||
replaces = [
|
||||
("documents", "0001_initial"),
|
||||
("documents", "0002_auto_20151226_1316"),
|
||||
("documents", "0003_sender"),
|
||||
("documents", "0004_auto_20160114_1844"),
|
||||
(
|
||||
"documents",
|
||||
"0004_auto_20160114_1844_squashed_0011_auto_20160303_1929",
|
||||
),
|
||||
("documents", "0005_auto_20160123_0313"),
|
||||
("documents", "0006_auto_20160123_0430"),
|
||||
("documents", "0007_auto_20160126_2114"),
|
||||
("documents", "0008_document_file_type"),
|
||||
("documents", "0009_auto_20160214_0040"),
|
||||
("documents", "0010_log"),
|
||||
("documents", "0011_auto_20160303_1929"),
|
||||
("documents", "0012_auto_20160305_0040"),
|
||||
("documents", "0013_auto_20160325_2111"),
|
||||
("documents", "0014_document_checksum"),
|
||||
("documents", "0015_add_insensitive_to_match"),
|
||||
(
|
||||
"documents",
|
||||
"0015_add_insensitive_to_match_squashed_0018_auto_20170715_1712",
|
||||
),
|
||||
("documents", "0016_auto_20170325_1558"),
|
||||
("documents", "0017_auto_20170512_0507"),
|
||||
("documents", "0018_auto_20170715_1712"),
|
||||
("documents", "0019_add_consumer_user"),
|
||||
("documents", "0020_document_added"),
|
||||
("documents", "0021_document_storage_type"),
|
||||
("documents", "0022_auto_20181007_1420"),
|
||||
("documents", "0023_document_current_filename"),
|
||||
("documents", "1000_update_paperless_all"),
|
||||
("documents", "1001_auto_20201109_1636"),
|
||||
("documents", "1002_auto_20201111_1105"),
|
||||
("documents", "1003_mime_types"),
|
||||
("documents", "1004_sanity_check_schedule"),
|
||||
("documents", "1005_checksums"),
|
||||
("documents", "1006_auto_20201208_2209"),
|
||||
(
|
||||
"documents",
|
||||
"1006_auto_20201208_2209_squashed_1011_auto_20210101_2340",
|
||||
),
|
||||
("documents", "1007_savedview_savedviewfilterrule"),
|
||||
("documents", "1008_auto_20201216_1736"),
|
||||
("documents", "1009_auto_20201216_2005"),
|
||||
("documents", "1010_auto_20210101_2159"),
|
||||
("documents", "1011_auto_20210101_2340"),
|
||||
("documents", "1012_fix_archive_files"),
|
||||
("documents", "1013_migrate_tag_colour"),
|
||||
("documents", "1014_auto_20210228_1614"),
|
||||
("documents", "1015_remove_null_characters"),
|
||||
("documents", "1016_auto_20210317_1351"),
|
||||
(
|
||||
"documents",
|
||||
"1016_auto_20210317_1351_squashed_1020_merge_20220518_1839",
|
||||
),
|
||||
("documents", "1017_alter_savedviewfilterrule_rule_type"),
|
||||
("documents", "1018_alter_savedviewfilterrule_value"),
|
||||
("documents", "1019_storagepath_document_storage_path"),
|
||||
("documents", "1019_uisettings"),
|
||||
("documents", "1020_merge_20220518_1839"),
|
||||
("documents", "1021_webp_thumbnail_conversion"),
|
||||
("documents", "1022_paperlesstask"),
|
||||
(
|
||||
"documents",
|
||||
"1022_paperlesstask_squashed_1036_alter_savedviewfilterrule_rule_type",
|
||||
),
|
||||
("documents", "1023_add_comments"),
|
||||
("documents", "1024_document_original_filename"),
|
||||
("documents", "1025_alter_savedviewfilterrule_rule_type"),
|
||||
("documents", "1026_transition_to_celery"),
|
||||
(
|
||||
"documents",
|
||||
"1027_remove_paperlesstask_attempted_task_and_more",
|
||||
),
|
||||
(
|
||||
"documents",
|
||||
"1028_remove_paperlesstask_task_args_and_more",
|
||||
),
|
||||
("documents", "1029_alter_document_archive_serial_number"),
|
||||
("documents", "1030_alter_paperlesstask_task_file_name"),
|
||||
(
|
||||
"documents",
|
||||
"1031_remove_savedview_user_correspondent_owner_and_more",
|
||||
),
|
||||
(
|
||||
"documents",
|
||||
"1032_alter_correspondent_matching_algorithm_and_more",
|
||||
),
|
||||
(
|
||||
"documents",
|
||||
"1033_alter_documenttype_options_alter_tag_options_and_more",
|
||||
),
|
||||
("documents", "1034_alter_savedviewfilterrule_rule_type"),
|
||||
("documents", "1035_rename_comment_note"),
|
||||
("documents", "1036_alter_savedviewfilterrule_rule_type"),
|
||||
("documents", "1037_webp_encrypted_thumbnail_conversion"),
|
||||
("documents", "1038_sharelink"),
|
||||
("documents", "1039_consumptiontemplate"),
|
||||
(
|
||||
"documents",
|
||||
"1040_customfield_customfieldinstance_and_more",
|
||||
),
|
||||
("documents", "1041_alter_consumptiontemplate_sources"),
|
||||
(
|
||||
"documents",
|
||||
"1042_consumptiontemplate_assign_custom_fields_and_more",
|
||||
),
|
||||
("documents", "1043_alter_savedviewfilterrule_rule_type"),
|
||||
(
|
||||
"documents",
|
||||
"1044_workflow_workflowaction_workflowtrigger_and_more",
|
||||
),
|
||||
(
|
||||
"documents",
|
||||
"1045_alter_customfieldinstance_value_monetary",
|
||||
),
|
||||
(
|
||||
"documents",
|
||||
"1045_alter_customfieldinstance_value_monetary_squashed_1049_document_deleted_at_document_restored_at",
|
||||
),
|
||||
(
|
||||
"documents",
|
||||
"1046_workflowaction_remove_all_correspondents_and_more",
|
||||
),
|
||||
("documents", "1047_savedview_display_mode_and_more"),
|
||||
("documents", "1048_alter_savedviewfilterrule_rule_type"),
|
||||
(
|
||||
"documents",
|
||||
"1049_document_deleted_at_document_restored_at",
|
||||
),
|
||||
("documents", "1050_customfield_extra_data_and_more"),
|
||||
(
|
||||
"documents",
|
||||
"1051_alter_correspondent_owner_alter_document_owner_and_more",
|
||||
),
|
||||
("documents", "1052_document_transaction_id"),
|
||||
("documents", "1053_document_page_count"),
|
||||
(
|
||||
"documents",
|
||||
"1054_customfieldinstance_value_monetary_amount_and_more",
|
||||
),
|
||||
("documents", "1055_alter_storagepath_path"),
|
||||
(
|
||||
"documents",
|
||||
"1056_customfieldinstance_deleted_at_and_more",
|
||||
),
|
||||
("documents", "1057_paperlesstask_owner"),
|
||||
(
|
||||
"documents",
|
||||
"1058_workflowtrigger_schedule_date_custom_field_and_more",
|
||||
),
|
||||
(
|
||||
"documents",
|
||||
"1059_workflowactionemail_workflowactionwebhook_and_more",
|
||||
),
|
||||
(
|
||||
"documents",
|
||||
"1060_alter_customfieldinstance_value_select",
|
||||
),
|
||||
("documents", "1061_workflowactionwebhook_as_json"),
|
||||
("documents", "1062_alter_savedviewfilterrule_rule_type"),
|
||||
(
|
||||
"documents",
|
||||
"1063_paperlesstask_type_alter_paperlesstask_task_name_and_more",
|
||||
),
|
||||
("documents", "1064_delete_log"),
|
||||
(
|
||||
"documents",
|
||||
"1065_workflowaction_assign_custom_fields_values",
|
||||
),
|
||||
(
|
||||
"documents",
|
||||
"1066_alter_workflowtrigger_schedule_offset_days",
|
||||
),
|
||||
("documents", "1067_alter_document_created"),
|
||||
("documents", "1068_alter_document_created"),
|
||||
(
|
||||
"documents",
|
||||
"1069_workflowtrigger_filter_has_storage_path_and_more",
|
||||
),
|
||||
(
|
||||
"documents",
|
||||
"1070_customfieldinstance_value_long_text_and_more",
|
||||
),
|
||||
(
|
||||
"documents",
|
||||
"1071_tag_tn_ancestors_count_tag_tn_ancestors_pks_and_more",
|
||||
),
|
||||
(
|
||||
"documents",
|
||||
"1072_workflowtrigger_filter_custom_field_query_and_more",
|
||||
),
|
||||
("documents", "1073_migrate_workflow_title_jinja"),
|
||||
(
|
||||
"documents",
|
||||
"1074_workflowrun_deleted_at_workflowrun_restored_at_and_more",
|
||||
),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name="WorkflowActionEmail",
|
||||
@@ -185,70 +386,6 @@ class Migration(migrations.Migration):
|
||||
"abstract": False,
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name="CustomField",
|
||||
fields=[
|
||||
(
|
||||
"id",
|
||||
models.AutoField(
|
||||
auto_created=True,
|
||||
primary_key=True,
|
||||
serialize=False,
|
||||
verbose_name="ID",
|
||||
),
|
||||
),
|
||||
(
|
||||
"created",
|
||||
models.DateTimeField(
|
||||
db_index=True,
|
||||
default=django.utils.timezone.now,
|
||||
editable=False,
|
||||
verbose_name="created",
|
||||
),
|
||||
),
|
||||
("name", models.CharField(max_length=128)),
|
||||
(
|
||||
"data_type",
|
||||
models.CharField(
|
||||
choices=[
|
||||
("string", "String"),
|
||||
("url", "URL"),
|
||||
("date", "Date"),
|
||||
("boolean", "Boolean"),
|
||||
("integer", "Integer"),
|
||||
("float", "Float"),
|
||||
("monetary", "Monetary"),
|
||||
("documentlink", "Document Link"),
|
||||
("select", "Select"),
|
||||
("longtext", "Long Text"),
|
||||
],
|
||||
editable=False,
|
||||
max_length=50,
|
||||
verbose_name="data type",
|
||||
),
|
||||
),
|
||||
(
|
||||
"extra_data",
|
||||
models.JSONField(
|
||||
blank=True,
|
||||
help_text="Extra data for the custom field, such as select options",
|
||||
null=True,
|
||||
verbose_name="extra data",
|
||||
),
|
||||
),
|
||||
],
|
||||
options={
|
||||
"verbose_name": "custom field",
|
||||
"verbose_name_plural": "custom fields",
|
||||
"ordering": ("created",),
|
||||
"constraints": [
|
||||
models.UniqueConstraint(
|
||||
fields=("name",),
|
||||
name="documents_customfield_unique_name",
|
||||
),
|
||||
],
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name="DocumentType",
|
||||
fields=[
|
||||
@@ -733,17 +870,6 @@ class Migration(migrations.Migration):
|
||||
verbose_name="correspondent",
|
||||
),
|
||||
),
|
||||
(
|
||||
"owner",
|
||||
models.ForeignKey(
|
||||
blank=True,
|
||||
default=None,
|
||||
null=True,
|
||||
on_delete=django.db.models.deletion.SET_NULL,
|
||||
to=settings.AUTH_USER_MODEL,
|
||||
verbose_name="owner",
|
||||
),
|
||||
),
|
||||
(
|
||||
"document_type",
|
||||
models.ForeignKey(
|
||||
@@ -767,12 +893,14 @@ class Migration(migrations.Migration):
|
||||
),
|
||||
),
|
||||
(
|
||||
"tags",
|
||||
models.ManyToManyField(
|
||||
"owner",
|
||||
models.ForeignKey(
|
||||
blank=True,
|
||||
related_name="documents",
|
||||
to="documents.tag",
|
||||
verbose_name="tags",
|
||||
default=None,
|
||||
null=True,
|
||||
on_delete=django.db.models.deletion.SET_NULL,
|
||||
to=settings.AUTH_USER_MODEL,
|
||||
verbose_name="owner",
|
||||
),
|
||||
),
|
||||
],
|
||||
@@ -782,6 +910,140 @@ class Migration(migrations.Migration):
|
||||
"ordering": ("-created",),
|
||||
},
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name="document",
|
||||
name="tags",
|
||||
field=models.ManyToManyField(
|
||||
blank=True,
|
||||
related_name="documents",
|
||||
to="documents.tag",
|
||||
verbose_name="tags",
|
||||
),
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name="Note",
|
||||
fields=[
|
||||
(
|
||||
"id",
|
||||
models.AutoField(
|
||||
auto_created=True,
|
||||
primary_key=True,
|
||||
serialize=False,
|
||||
verbose_name="ID",
|
||||
),
|
||||
),
|
||||
("deleted_at", models.DateTimeField(blank=True, null=True)),
|
||||
("restored_at", models.DateTimeField(blank=True, null=True)),
|
||||
("transaction_id", models.UUIDField(blank=True, null=True)),
|
||||
(
|
||||
"note",
|
||||
models.TextField(
|
||||
blank=True,
|
||||
help_text="Note for the document",
|
||||
verbose_name="content",
|
||||
),
|
||||
),
|
||||
(
|
||||
"created",
|
||||
models.DateTimeField(
|
||||
db_index=True,
|
||||
default=django.utils.timezone.now,
|
||||
verbose_name="created",
|
||||
),
|
||||
),
|
||||
(
|
||||
"document",
|
||||
models.ForeignKey(
|
||||
blank=True,
|
||||
null=True,
|
||||
on_delete=django.db.models.deletion.CASCADE,
|
||||
related_name="notes",
|
||||
to="documents.document",
|
||||
verbose_name="document",
|
||||
),
|
||||
),
|
||||
(
|
||||
"user",
|
||||
models.ForeignKey(
|
||||
blank=True,
|
||||
null=True,
|
||||
on_delete=django.db.models.deletion.SET_NULL,
|
||||
related_name="notes",
|
||||
to=settings.AUTH_USER_MODEL,
|
||||
verbose_name="user",
|
||||
),
|
||||
),
|
||||
],
|
||||
options={
|
||||
"verbose_name": "note",
|
||||
"verbose_name_plural": "notes",
|
||||
"ordering": ("created",),
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name="CustomField",
|
||||
fields=[
|
||||
(
|
||||
"id",
|
||||
models.AutoField(
|
||||
auto_created=True,
|
||||
primary_key=True,
|
||||
serialize=False,
|
||||
verbose_name="ID",
|
||||
),
|
||||
),
|
||||
(
|
||||
"created",
|
||||
models.DateTimeField(
|
||||
db_index=True,
|
||||
default=django.utils.timezone.now,
|
||||
editable=False,
|
||||
verbose_name="created",
|
||||
),
|
||||
),
|
||||
("name", models.CharField(max_length=128)),
|
||||
(
|
||||
"data_type",
|
||||
models.CharField(
|
||||
choices=[
|
||||
("string", "String"),
|
||||
("url", "URL"),
|
||||
("date", "Date"),
|
||||
("boolean", "Boolean"),
|
||||
("integer", "Integer"),
|
||||
("float", "Float"),
|
||||
("monetary", "Monetary"),
|
||||
("documentlink", "Document Link"),
|
||||
("select", "Select"),
|
||||
("longtext", "Long Text"),
|
||||
],
|
||||
editable=False,
|
||||
max_length=50,
|
||||
verbose_name="data type",
|
||||
),
|
||||
),
|
||||
(
|
||||
"extra_data",
|
||||
models.JSONField(
|
||||
blank=True,
|
||||
help_text="Extra data for the custom field, such as select options",
|
||||
null=True,
|
||||
verbose_name="extra data",
|
||||
),
|
||||
),
|
||||
],
|
||||
options={
|
||||
"verbose_name": "custom field",
|
||||
"verbose_name_plural": "custom fields",
|
||||
"ordering": ("created",),
|
||||
"constraints": [
|
||||
models.UniqueConstraint(
|
||||
fields=("name",),
|
||||
name="documents_customfield_unique_name",
|
||||
),
|
||||
],
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name="CustomFieldInstance",
|
||||
fields=[
|
||||
@@ -880,66 +1142,6 @@ class Migration(migrations.Migration):
|
||||
"ordering": ("created",),
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name="Note",
|
||||
fields=[
|
||||
(
|
||||
"id",
|
||||
models.AutoField(
|
||||
auto_created=True,
|
||||
primary_key=True,
|
||||
serialize=False,
|
||||
verbose_name="ID",
|
||||
),
|
||||
),
|
||||
("deleted_at", models.DateTimeField(blank=True, null=True)),
|
||||
("restored_at", models.DateTimeField(blank=True, null=True)),
|
||||
("transaction_id", models.UUIDField(blank=True, null=True)),
|
||||
(
|
||||
"note",
|
||||
models.TextField(
|
||||
blank=True,
|
||||
help_text="Note for the document",
|
||||
verbose_name="content",
|
||||
),
|
||||
),
|
||||
(
|
||||
"created",
|
||||
models.DateTimeField(
|
||||
db_index=True,
|
||||
default=django.utils.timezone.now,
|
||||
verbose_name="created",
|
||||
),
|
||||
),
|
||||
(
|
||||
"document",
|
||||
models.ForeignKey(
|
||||
blank=True,
|
||||
null=True,
|
||||
on_delete=django.db.models.deletion.CASCADE,
|
||||
related_name="notes",
|
||||
to="documents.document",
|
||||
verbose_name="document",
|
||||
),
|
||||
),
|
||||
(
|
||||
"user",
|
||||
models.ForeignKey(
|
||||
blank=True,
|
||||
null=True,
|
||||
on_delete=django.db.models.deletion.SET_NULL,
|
||||
related_name="notes",
|
||||
to=settings.AUTH_USER_MODEL,
|
||||
verbose_name="user",
|
||||
),
|
||||
),
|
||||
],
|
||||
options={
|
||||
"verbose_name": "note",
|
||||
"verbose_name_plural": "notes",
|
||||
"ordering": ("created",),
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name="PaperlessTask",
|
||||
fields=[
|
||||
@@ -986,7 +1188,6 @@ class Migration(migrations.Migration):
|
||||
("train_classifier", "Train Classifier"),
|
||||
("check_sanity", "Check Sanity"),
|
||||
("index_optimize", "Index Optimize"),
|
||||
("llmindex_update", "LLM Index Update"),
|
||||
],
|
||||
help_text="Name of the task that was run",
|
||||
max_length=255,
|
||||
@@ -1380,6 +1581,7 @@ class Migration(migrations.Migration):
|
||||
verbose_name="Workflow Action Type",
|
||||
),
|
||||
),
|
||||
("order", models.PositiveIntegerField(default=0, verbose_name="order")),
|
||||
(
|
||||
"assign_title",
|
||||
models.TextField(
|
||||
@@ -1,4 +1,4 @@
|
||||
# Generated by Django 5.2.9 on 2026-01-20 18:46
|
||||
# Generated by Django 5.2.11 on 2026-03-03 16:27
|
||||
|
||||
import django.db.models.deletion
|
||||
from django.db import migrations
|
||||
@@ -9,8 +9,14 @@ class Migration(migrations.Migration):
|
||||
initial = True
|
||||
|
||||
dependencies = [
|
||||
("documents", "0001_initial"),
|
||||
("paperless_mail", "0001_initial"),
|
||||
("documents", "0001_squashed"),
|
||||
("paperless_mail", "0001_squashed"),
|
||||
]
|
||||
|
||||
# This migration needs a "replaces", but it doesn't matter which.
|
||||
# Chose the last 2.20.x migration
|
||||
replaces = [
|
||||
("documents", "1075_workflowaction_order"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
@@ -6,7 +6,7 @@ from django.db import models
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
("documents", "0002_initial"),
|
||||
("documents", "0002_squashed"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
|
||||
@@ -0,0 +1,30 @@
|
||||
# Generated by Django 5.2.11 on 2026-03-03 16:42
|
||||
|
||||
from django.db import migrations
|
||||
from django.db import models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
("documents", "0013_document_root_document"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name="paperlesstask",
|
||||
name="task_name",
|
||||
field=models.CharField(
|
||||
choices=[
|
||||
("consume_file", "Consume File"),
|
||||
("train_classifier", "Train Classifier"),
|
||||
("check_sanity", "Check Sanity"),
|
||||
("index_optimize", "Index Optimize"),
|
||||
("llmindex_update", "LLM Index Update"),
|
||||
],
|
||||
help_text="Name of the task that was run",
|
||||
max_length=255,
|
||||
null=True,
|
||||
verbose_name="Task Name",
|
||||
),
|
||||
),
|
||||
]
|
||||
@@ -1,37 +0,0 @@
|
||||
# Generated by Django 5.2.11 on 2026-03-02 17:48
|
||||
|
||||
from django.conf import settings
|
||||
from django.db import migrations
|
||||
from django.db import models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
("documents", "0013_document_root_document"),
|
||||
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name="document",
|
||||
name="version_index",
|
||||
field=models.PositiveIntegerField(
|
||||
blank=True,
|
||||
db_index=True,
|
||||
help_text="Index of this version within the root document.",
|
||||
null=True,
|
||||
verbose_name="version index",
|
||||
),
|
||||
),
|
||||
migrations.AddConstraint(
|
||||
model_name="document",
|
||||
constraint=models.UniqueConstraint(
|
||||
condition=models.Q(
|
||||
("root_document__isnull", False),
|
||||
("version_index__isnull", False),
|
||||
),
|
||||
fields=("root_document", "version_index"),
|
||||
name="documents_document_root_version_index_uniq",
|
||||
),
|
||||
),
|
||||
]
|
||||
@@ -75,7 +75,7 @@ class MatchingModel(ModelWithOwner):
|
||||
|
||||
is_insensitive = models.BooleanField(_("is insensitive"), default=True)
|
||||
|
||||
class Meta:
|
||||
class Meta(ModelWithOwner.Meta):
|
||||
abstract = True
|
||||
ordering = ("name",)
|
||||
constraints = [
|
||||
@@ -317,14 +317,6 @@ class Document(SoftDeleteModel, ModelWithOwner): # type: ignore[django-manager-
|
||||
verbose_name=_("root document for this version"),
|
||||
)
|
||||
|
||||
version_index = models.PositiveIntegerField(
|
||||
_("version index"),
|
||||
blank=True,
|
||||
null=True,
|
||||
db_index=True,
|
||||
help_text=_("Index of this version within the root document."),
|
||||
)
|
||||
|
||||
version_label = models.CharField(
|
||||
_("version label"),
|
||||
max_length=64,
|
||||
@@ -337,16 +329,6 @@ class Document(SoftDeleteModel, ModelWithOwner): # type: ignore[django-manager-
|
||||
ordering = ("-created",)
|
||||
verbose_name = _("document")
|
||||
verbose_name_plural = _("documents")
|
||||
constraints = [
|
||||
models.UniqueConstraint(
|
||||
fields=["root_document", "version_index"],
|
||||
condition=models.Q(
|
||||
root_document__isnull=False,
|
||||
version_index__isnull=False,
|
||||
),
|
||||
name="documents_document_root_version_index_uniq",
|
||||
),
|
||||
]
|
||||
|
||||
def __str__(self) -> str:
|
||||
created = self.created.isoformat()
|
||||
|
||||
@@ -4,6 +4,7 @@ import logging
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
from typing import Any
|
||||
|
||||
from celery import shared_task
|
||||
from celery import states
|
||||
@@ -32,12 +33,14 @@ from documents.file_handling import create_source_path_directory
|
||||
from documents.file_handling import delete_empty_directories
|
||||
from documents.file_handling import generate_filename
|
||||
from documents.file_handling import generate_unique_filename
|
||||
from documents.models import Correspondent
|
||||
from documents.models import CustomField
|
||||
from documents.models import CustomFieldInstance
|
||||
from documents.models import Document
|
||||
from documents.models import MatchingModel
|
||||
from documents.models import DocumentType
|
||||
from documents.models import PaperlessTask
|
||||
from documents.models import SavedView
|
||||
from documents.models import StoragePath
|
||||
from documents.models import Tag
|
||||
from documents.models import UiSettings
|
||||
from documents.models import Workflow
|
||||
@@ -81,47 +84,41 @@ def add_inbox_tags(sender, document: Document, logging_group=None, **kwargs) ->
|
||||
document.add_nested_tags(inbox_tags)
|
||||
|
||||
|
||||
def _suggestion_printer(
|
||||
stdout,
|
||||
style_func,
|
||||
suggestion_type: str,
|
||||
document: Document,
|
||||
selected: MatchingModel,
|
||||
base_url: str | None = None,
|
||||
) -> None:
|
||||
"""
|
||||
Smaller helper to reduce duplication when just outputting suggestions to the console
|
||||
"""
|
||||
doc_str = str(document)
|
||||
if base_url is not None:
|
||||
stdout.write(style_func.SUCCESS(doc_str))
|
||||
stdout.write(style_func.SUCCESS(f"{base_url}/documents/{document.pk}"))
|
||||
else:
|
||||
stdout.write(style_func.SUCCESS(f"{doc_str} [{document.pk}]"))
|
||||
stdout.write(f"Suggest {suggestion_type}: {selected}")
|
||||
|
||||
|
||||
def set_correspondent(
|
||||
sender,
|
||||
sender: object,
|
||||
document: Document,
|
||||
*,
|
||||
logging_group=None,
|
||||
logging_group: object = None,
|
||||
classifier: DocumentClassifier | None = None,
|
||||
replace=False,
|
||||
use_first=True,
|
||||
suggest=False,
|
||||
base_url=None,
|
||||
stdout=None,
|
||||
style_func=None,
|
||||
**kwargs,
|
||||
) -> None:
|
||||
replace: bool = False,
|
||||
use_first: bool = True,
|
||||
dry_run: bool = False,
|
||||
**kwargs: Any,
|
||||
) -> Correspondent | None:
|
||||
"""
|
||||
Assign a correspondent to a document based on classifier results.
|
||||
|
||||
Args:
|
||||
document: The document to classify.
|
||||
logging_group: Optional logging group for structured log output.
|
||||
classifier: The trained classifier. If None, only rule-based matching runs.
|
||||
replace: If True, overwrite an existing correspondent assignment.
|
||||
use_first: If True, pick the first match when multiple correspondents
|
||||
match. If False, skip assignment when multiple match.
|
||||
dry_run: If True, compute and return the selection without saving.
|
||||
**kwargs: Absorbed for Django signal compatibility (e.g. sender, signal).
|
||||
|
||||
Returns:
|
||||
The correspondent that was (or would be) assigned, or None if no match
|
||||
was found or assignment was skipped.
|
||||
"""
|
||||
if document.correspondent and not replace:
|
||||
return
|
||||
return None
|
||||
|
||||
potential_correspondents = matching.match_correspondents(document, classifier)
|
||||
|
||||
potential_count = len(potential_correspondents)
|
||||
selected = potential_correspondents[0] if potential_correspondents else None
|
||||
|
||||
if potential_count > 1:
|
||||
if use_first:
|
||||
logger.debug(
|
||||
@@ -135,49 +132,53 @@ def set_correspondent(
|
||||
f"not assigning any correspondent",
|
||||
extra={"group": logging_group},
|
||||
)
|
||||
return
|
||||
return None
|
||||
|
||||
if selected or replace:
|
||||
if suggest:
|
||||
_suggestion_printer(
|
||||
stdout,
|
||||
style_func,
|
||||
"correspondent",
|
||||
document,
|
||||
selected,
|
||||
base_url,
|
||||
)
|
||||
else:
|
||||
logger.info(
|
||||
f"Assigning correspondent {selected} to {document}",
|
||||
extra={"group": logging_group},
|
||||
)
|
||||
if (selected or replace) and not dry_run:
|
||||
logger.info(
|
||||
f"Assigning correspondent {selected} to {document}",
|
||||
extra={"group": logging_group},
|
||||
)
|
||||
document.correspondent = selected
|
||||
document.save(update_fields=("correspondent",))
|
||||
|
||||
document.correspondent = selected
|
||||
document.save(update_fields=("correspondent",))
|
||||
return selected
|
||||
|
||||
|
||||
def set_document_type(
|
||||
sender,
|
||||
sender: object,
|
||||
document: Document,
|
||||
*,
|
||||
logging_group=None,
|
||||
logging_group: object = None,
|
||||
classifier: DocumentClassifier | None = None,
|
||||
replace=False,
|
||||
use_first=True,
|
||||
suggest=False,
|
||||
base_url=None,
|
||||
stdout=None,
|
||||
style_func=None,
|
||||
**kwargs,
|
||||
) -> None:
|
||||
replace: bool = False,
|
||||
use_first: bool = True,
|
||||
dry_run: bool = False,
|
||||
**kwargs: Any,
|
||||
) -> DocumentType | None:
|
||||
"""
|
||||
Assign a document type to a document based on classifier results.
|
||||
|
||||
Args:
|
||||
document: The document to classify.
|
||||
logging_group: Optional logging group for structured log output.
|
||||
classifier: The trained classifier. If None, only rule-based matching runs.
|
||||
replace: If True, overwrite an existing document type assignment.
|
||||
use_first: If True, pick the first match when multiple types match.
|
||||
If False, skip assignment when multiple match.
|
||||
dry_run: If True, compute and return the selection without saving.
|
||||
**kwargs: Absorbed for Django signal compatibility (e.g. sender, signal).
|
||||
|
||||
Returns:
|
||||
The document type that was (or would be) assigned, or None if no match
|
||||
was found or assignment was skipped.
|
||||
"""
|
||||
if document.document_type and not replace:
|
||||
return
|
||||
return None
|
||||
|
||||
potential_document_type = matching.match_document_types(document, classifier)
|
||||
|
||||
potential_count = len(potential_document_type)
|
||||
selected = potential_document_type[0] if potential_document_type else None
|
||||
potential_document_types = matching.match_document_types(document, classifier)
|
||||
potential_count = len(potential_document_types)
|
||||
selected = potential_document_types[0] if potential_document_types else None
|
||||
|
||||
if potential_count > 1:
|
||||
if use_first:
|
||||
@@ -192,42 +193,64 @@ def set_document_type(
|
||||
f"not assigning any document type",
|
||||
extra={"group": logging_group},
|
||||
)
|
||||
return
|
||||
return None
|
||||
|
||||
if selected or replace:
|
||||
if suggest:
|
||||
_suggestion_printer(
|
||||
stdout,
|
||||
style_func,
|
||||
"document type",
|
||||
document,
|
||||
selected,
|
||||
base_url,
|
||||
)
|
||||
else:
|
||||
logger.info(
|
||||
f"Assigning document type {selected} to {document}",
|
||||
extra={"group": logging_group},
|
||||
)
|
||||
if (selected or replace) and not dry_run:
|
||||
logger.info(
|
||||
f"Assigning document type {selected} to {document}",
|
||||
extra={"group": logging_group},
|
||||
)
|
||||
document.document_type = selected
|
||||
document.save(update_fields=("document_type",))
|
||||
|
||||
document.document_type = selected
|
||||
document.save(update_fields=("document_type",))
|
||||
return selected
|
||||
|
||||
|
||||
def set_tags(
|
||||
sender,
|
||||
sender: object,
|
||||
document: Document,
|
||||
*,
|
||||
logging_group=None,
|
||||
logging_group: object = None,
|
||||
classifier: DocumentClassifier | None = None,
|
||||
replace=False,
|
||||
suggest=False,
|
||||
base_url=None,
|
||||
stdout=None,
|
||||
style_func=None,
|
||||
**kwargs,
|
||||
) -> None:
|
||||
replace: bool = False,
|
||||
dry_run: bool = False,
|
||||
**kwargs: Any,
|
||||
) -> tuple[set[Tag], set[Tag]]:
|
||||
"""
|
||||
Assign tags to a document based on classifier results.
|
||||
|
||||
When replace=True, existing auto-matched and rule-matched tags are removed
|
||||
before applying the new set (inbox tags and manually-added tags are preserved).
|
||||
|
||||
Args:
|
||||
document: The document to classify.
|
||||
logging_group: Optional logging group for structured log output.
|
||||
classifier: The trained classifier. If None, only rule-based matching runs.
|
||||
replace: If True, remove existing classifier-managed tags before applying
|
||||
new ones. Inbox tags and manually-added tags are always preserved.
|
||||
dry_run: If True, compute what would change without saving anything.
|
||||
**kwargs: Absorbed for Django signal compatibility (e.g. sender, signal).
|
||||
|
||||
Returns:
|
||||
A two-tuple of (tags_added, tags_removed). In non-replace mode,
|
||||
tags_removed is always an empty set. In dry_run mode, neither set
|
||||
is applied to the database.
|
||||
"""
|
||||
# Compute which tags would be removed under replace mode.
|
||||
# The filter mirrors the .delete() call below: keep inbox tags and
|
||||
# manually-added tags (match="" and not auto-matched).
|
||||
if replace:
|
||||
tags_to_remove: set[Tag] = set(
|
||||
document.tags.exclude(
|
||||
is_inbox_tag=True,
|
||||
).exclude(
|
||||
Q(match="") & ~Q(matching_algorithm=Tag.MATCH_AUTO),
|
||||
),
|
||||
)
|
||||
else:
|
||||
tags_to_remove = set()
|
||||
|
||||
if replace and not dry_run:
|
||||
Document.tags.through.objects.filter(document=document).exclude(
|
||||
Q(tag__is_inbox_tag=True),
|
||||
).exclude(
|
||||
@@ -235,65 +258,53 @@ def set_tags(
|
||||
).delete()
|
||||
|
||||
current_tags = set(document.tags.all())
|
||||
|
||||
matched_tags = matching.match_tags(document, classifier)
|
||||
tags_to_add = set(matched_tags) - current_tags
|
||||
|
||||
relevant_tags = set(matched_tags) - current_tags
|
||||
|
||||
if suggest:
|
||||
extra_tags = current_tags - set(matched_tags)
|
||||
extra_tags = [
|
||||
t for t in extra_tags if t.matching_algorithm == MatchingModel.MATCH_AUTO
|
||||
]
|
||||
if not relevant_tags and not extra_tags:
|
||||
return
|
||||
doc_str = style_func.SUCCESS(str(document))
|
||||
if base_url:
|
||||
stdout.write(doc_str)
|
||||
stdout.write(f"{base_url}/documents/{document.pk}")
|
||||
else:
|
||||
stdout.write(doc_str + style_func.SUCCESS(f" [{document.pk}]"))
|
||||
if relevant_tags:
|
||||
stdout.write("Suggest tags: " + ", ".join([t.name for t in relevant_tags]))
|
||||
if extra_tags:
|
||||
stdout.write("Extra tags: " + ", ".join([t.name for t in extra_tags]))
|
||||
else:
|
||||
if not relevant_tags:
|
||||
return
|
||||
|
||||
message = 'Tagging "{}" with "{}"'
|
||||
if tags_to_add and not dry_run:
|
||||
logger.info(
|
||||
message.format(document, ", ".join([t.name for t in relevant_tags])),
|
||||
f'Tagging "{document}" with "{", ".join(t.name for t in tags_to_add)}"',
|
||||
extra={"group": logging_group},
|
||||
)
|
||||
document.add_nested_tags(tags_to_add)
|
||||
|
||||
document.add_nested_tags(relevant_tags)
|
||||
return tags_to_add, tags_to_remove
|
||||
|
||||
|
||||
def set_storage_path(
|
||||
sender,
|
||||
sender: object,
|
||||
document: Document,
|
||||
*,
|
||||
logging_group=None,
|
||||
logging_group: object = None,
|
||||
classifier: DocumentClassifier | None = None,
|
||||
replace=False,
|
||||
use_first=True,
|
||||
suggest=False,
|
||||
base_url=None,
|
||||
stdout=None,
|
||||
style_func=None,
|
||||
**kwargs,
|
||||
) -> None:
|
||||
replace: bool = False,
|
||||
use_first: bool = True,
|
||||
dry_run: bool = False,
|
||||
**kwargs: Any,
|
||||
) -> StoragePath | None:
|
||||
"""
|
||||
Assign a storage path to a document based on classifier results.
|
||||
|
||||
Args:
|
||||
document: The document to classify.
|
||||
logging_group: Optional logging group for structured log output.
|
||||
classifier: The trained classifier. If None, only rule-based matching runs.
|
||||
replace: If True, overwrite an existing storage path assignment.
|
||||
use_first: If True, pick the first match when multiple paths match.
|
||||
If False, skip assignment when multiple match.
|
||||
dry_run: If True, compute and return the selection without saving.
|
||||
**kwargs: Absorbed for Django signal compatibility (e.g. sender, signal).
|
||||
|
||||
Returns:
|
||||
The storage path that was (or would be) assigned, or None if no match
|
||||
was found or assignment was skipped.
|
||||
"""
|
||||
if document.storage_path and not replace:
|
||||
return
|
||||
return None
|
||||
|
||||
potential_storage_path = matching.match_storage_paths(
|
||||
document,
|
||||
classifier,
|
||||
)
|
||||
|
||||
potential_count = len(potential_storage_path)
|
||||
selected = potential_storage_path[0] if potential_storage_path else None
|
||||
potential_storage_paths = matching.match_storage_paths(document, classifier)
|
||||
potential_count = len(potential_storage_paths)
|
||||
selected = potential_storage_paths[0] if potential_storage_paths else None
|
||||
|
||||
if potential_count > 1:
|
||||
if use_first:
|
||||
@@ -308,26 +319,17 @@ def set_storage_path(
|
||||
f"not assigning any storage directory",
|
||||
extra={"group": logging_group},
|
||||
)
|
||||
return
|
||||
return None
|
||||
|
||||
if selected or replace:
|
||||
if suggest:
|
||||
_suggestion_printer(
|
||||
stdout,
|
||||
style_func,
|
||||
"storage directory",
|
||||
document,
|
||||
selected,
|
||||
base_url,
|
||||
)
|
||||
else:
|
||||
logger.info(
|
||||
f"Assigning storage path {selected} to {document}",
|
||||
extra={"group": logging_group},
|
||||
)
|
||||
if (selected or replace) and not dry_run:
|
||||
logger.info(
|
||||
f"Assigning storage path {selected} to {document}",
|
||||
extra={"group": logging_group},
|
||||
)
|
||||
document.storage_path = selected
|
||||
document.save(update_fields=("storage_path",))
|
||||
|
||||
document.storage_path = selected
|
||||
document.save(update_fields=("storage_path",))
|
||||
return selected
|
||||
|
||||
|
||||
# see empty_trash in documents/tasks.py for signal handling
|
||||
@@ -596,16 +598,6 @@ def update_filename_and_move_files(
|
||||
root=settings.ARCHIVE_DIR,
|
||||
)
|
||||
|
||||
# Keep version files in sync with root
|
||||
if instance.root_document_id is None:
|
||||
for version_doc in Document.objects.filter(root_document_id=instance.pk).only(
|
||||
"pk",
|
||||
):
|
||||
update_filename_and_move_files(
|
||||
Document,
|
||||
version_doc,
|
||||
)
|
||||
|
||||
|
||||
@shared_task
|
||||
def process_cf_select_update(custom_field: CustomField) -> None:
|
||||
|
||||
@@ -114,3 +114,14 @@ def authenticated_rest_api_client(rest_api_client: APIClient):
|
||||
user = UserModel.objects.create_user(username="testuser", password="password")
|
||||
rest_api_client.force_authenticate(user=user)
|
||||
yield rest_api_client
|
||||
|
||||
|
||||
@pytest.fixture(scope="session", autouse=True)
|
||||
def faker_session_locale():
|
||||
"""Set Faker locale for reproducibility."""
|
||||
return "en_US"
|
||||
|
||||
|
||||
@pytest.fixture(scope="session", autouse=True)
|
||||
def faker_seed():
|
||||
return 12345
|
||||
|
||||
@@ -1,17 +1,67 @@
|
||||
from factory import Faker
|
||||
"""
|
||||
Factory-boy factories for documents app models.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import factory
|
||||
from factory.django import DjangoModelFactory
|
||||
|
||||
from documents.models import Correspondent
|
||||
from documents.models import Document
|
||||
from documents.models import DocumentType
|
||||
from documents.models import MatchingModel
|
||||
from documents.models import StoragePath
|
||||
from documents.models import Tag
|
||||
|
||||
|
||||
class CorrespondentFactory(DjangoModelFactory):
|
||||
class Meta:
|
||||
model = Correspondent
|
||||
|
||||
name = Faker("name")
|
||||
name = factory.Sequence(lambda n: f"{factory.Faker('company')} {n}")
|
||||
match = ""
|
||||
matching_algorithm = MatchingModel.MATCH_NONE
|
||||
|
||||
|
||||
class DocumentTypeFactory(DjangoModelFactory):
|
||||
class Meta:
|
||||
model = DocumentType
|
||||
|
||||
name = factory.Sequence(lambda n: f"{factory.Faker('bs')} {n}")
|
||||
match = ""
|
||||
matching_algorithm = MatchingModel.MATCH_NONE
|
||||
|
||||
|
||||
class TagFactory(DjangoModelFactory):
|
||||
class Meta:
|
||||
model = Tag
|
||||
|
||||
name = factory.Sequence(lambda n: f"{factory.Faker('word')} {n}")
|
||||
match = ""
|
||||
matching_algorithm = MatchingModel.MATCH_NONE
|
||||
is_inbox_tag = False
|
||||
|
||||
|
||||
class StoragePathFactory(DjangoModelFactory):
|
||||
class Meta:
|
||||
model = StoragePath
|
||||
|
||||
name = factory.Sequence(
|
||||
lambda n: f"{factory.Faker('file_path', depth=2, extension='')} {n}",
|
||||
)
|
||||
path = factory.LazyAttribute(lambda o: f"{o.name}/{{title}}")
|
||||
match = ""
|
||||
matching_algorithm = MatchingModel.MATCH_NONE
|
||||
|
||||
|
||||
class DocumentFactory(DjangoModelFactory):
|
||||
class Meta:
|
||||
model = Document
|
||||
|
||||
title = factory.Faker("sentence", nb_words=4)
|
||||
checksum = factory.Faker("md5")
|
||||
content = factory.Faker("paragraph")
|
||||
correspondent = None
|
||||
document_type = None
|
||||
storage_path = None
|
||||
|
||||
@@ -699,13 +699,6 @@ class TestConsumer(
|
||||
self.assertIsNotNone(root_doc)
|
||||
assert root_doc is not None
|
||||
|
||||
root_storage_path = StoragePath.objects.create(
|
||||
name="version-root-path",
|
||||
path="root/{{title}}",
|
||||
)
|
||||
root_doc.storage_path = root_storage_path
|
||||
root_doc.save()
|
||||
|
||||
actor = User.objects.create_user(
|
||||
username="actor",
|
||||
email="actor@example.com",
|
||||
@@ -742,7 +735,7 @@ class TestConsumer(
|
||||
)
|
||||
consumer.setup()
|
||||
try:
|
||||
self.assertEqual(consumer.filename, version_file.name)
|
||||
self.assertTrue(consumer.filename.endswith("_v0.pdf"))
|
||||
consumer.run()
|
||||
finally:
|
||||
consumer.cleanup()
|
||||
@@ -752,9 +745,8 @@ class TestConsumer(
|
||||
version = versions.first()
|
||||
assert version is not None
|
||||
assert version.original_filename is not None
|
||||
self.assertEqual(version.version_index, 1)
|
||||
self.assertEqual(version.version_label, "v2")
|
||||
self.assertEqual(version.original_filename, version_file.name)
|
||||
self.assertTrue(version.original_filename.endswith("_v0.pdf"))
|
||||
self.assertTrue(bool(version.content))
|
||||
|
||||
@override_settings(AUDIT_LOG_ENABLED=True)
|
||||
@@ -803,7 +795,7 @@ class TestConsumer(
|
||||
)
|
||||
consumer.setup()
|
||||
try:
|
||||
self.assertEqual(consumer.filename, "valid_pdf_version-upload")
|
||||
self.assertEqual(consumer.filename, "valid_pdf_version-upload_v0")
|
||||
consumer.run()
|
||||
finally:
|
||||
consumer.cleanup()
|
||||
@@ -813,67 +805,9 @@ class TestConsumer(
|
||||
)
|
||||
self.assertIsNotNone(version)
|
||||
assert version is not None
|
||||
self.assertEqual(version.version_index, 1)
|
||||
self.assertEqual(version.original_filename, "valid_pdf_version-upload")
|
||||
self.assertEqual(version.original_filename, "valid_pdf_version-upload_v0")
|
||||
self.assertTrue(bool(version.content))
|
||||
|
||||
@override_settings(AUDIT_LOG_ENABLED=True)
|
||||
@mock.patch("documents.consumer.load_classifier")
|
||||
def test_consume_version_index_monotonic_after_version_deletion(self, m) -> None:
|
||||
m.return_value = MagicMock()
|
||||
|
||||
with self.get_consumer(self.get_test_file()) as consumer:
|
||||
consumer.run()
|
||||
|
||||
root_doc = Document.objects.first()
|
||||
self.assertIsNotNone(root_doc)
|
||||
assert root_doc is not None
|
||||
|
||||
def consume_version(version_file: Path) -> Document:
|
||||
status = DummyProgressManager(version_file.name, None)
|
||||
overrides = DocumentMetadataOverrides()
|
||||
doc = ConsumableDocument(
|
||||
DocumentSource.ApiUpload,
|
||||
original_file=version_file,
|
||||
root_document_id=root_doc.pk,
|
||||
)
|
||||
preflight = ConsumerPreflightPlugin(
|
||||
doc,
|
||||
overrides,
|
||||
status, # type: ignore[arg-type]
|
||||
self.dirs.scratch_dir,
|
||||
"task-id",
|
||||
)
|
||||
preflight.setup()
|
||||
preflight.run()
|
||||
|
||||
consumer = ConsumerPlugin(
|
||||
doc,
|
||||
overrides,
|
||||
status, # type: ignore[arg-type]
|
||||
self.dirs.scratch_dir,
|
||||
"task-id",
|
||||
)
|
||||
consumer.setup()
|
||||
try:
|
||||
consumer.run()
|
||||
finally:
|
||||
consumer.cleanup()
|
||||
|
||||
version = (
|
||||
Document.objects.filter(root_document=root_doc).order_by("-id").first()
|
||||
)
|
||||
assert version is not None
|
||||
return version
|
||||
|
||||
v1 = consume_version(self.get_test_file2())
|
||||
self.assertEqual(v1.version_index, 1)
|
||||
v1.delete()
|
||||
|
||||
# The next version should have version_index 2, even though version_index 1 was deleted
|
||||
v2 = consume_version(self.get_test_file())
|
||||
self.assertEqual(v2.version_index, 2)
|
||||
|
||||
@mock.patch("documents.consumer.load_classifier")
|
||||
def testClassifyDocument(self, m) -> None:
|
||||
correspondent = Correspondent.objects.create(
|
||||
|
||||
@@ -77,58 +77,6 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
settings.ORIGINALS_DIR / "test" / "test.pdf",
|
||||
)
|
||||
|
||||
@override_settings(FILENAME_FORMAT=None)
|
||||
def test_root_storage_path_change_updates_version_files(self) -> None:
|
||||
old_storage_path = StoragePath.objects.create(
|
||||
name="old-path",
|
||||
path="old/{{title}}",
|
||||
)
|
||||
new_storage_path = StoragePath.objects.create(
|
||||
name="new-path",
|
||||
path="new/{{title}}",
|
||||
)
|
||||
|
||||
root_doc = Document.objects.create(
|
||||
title="rootdoc",
|
||||
mime_type="application/pdf",
|
||||
checksum="root-checksum",
|
||||
storage_path=old_storage_path,
|
||||
)
|
||||
version_doc = Document.objects.create(
|
||||
title="version-title",
|
||||
mime_type="application/pdf",
|
||||
checksum="version-checksum",
|
||||
root_document=root_doc,
|
||||
version_index=1,
|
||||
)
|
||||
|
||||
Document.objects.filter(pk=root_doc.pk).update(
|
||||
filename=generate_filename(root_doc),
|
||||
)
|
||||
Document.objects.filter(pk=version_doc.pk).update(
|
||||
filename=generate_filename(version_doc),
|
||||
)
|
||||
root_doc.refresh_from_db()
|
||||
version_doc.refresh_from_db()
|
||||
|
||||
create_source_path_directory(root_doc.source_path)
|
||||
Path(root_doc.source_path).touch()
|
||||
create_source_path_directory(version_doc.source_path)
|
||||
Path(version_doc.source_path).touch()
|
||||
|
||||
root_doc.storage_path = new_storage_path
|
||||
root_doc.save()
|
||||
|
||||
root_doc.refresh_from_db()
|
||||
version_doc.refresh_from_db()
|
||||
|
||||
self.assertEqual(root_doc.filename, "new/rootdoc.pdf")
|
||||
self.assertEqual(version_doc.filename, "new/rootdoc_v1.pdf")
|
||||
self.assertIsFile(root_doc.source_path)
|
||||
self.assertIsFile(version_doc.source_path)
|
||||
self.assertIsNotFile(settings.ORIGINALS_DIR / "old" / "rootdoc.pdf")
|
||||
self.assertIsNotFile(settings.ORIGINALS_DIR / "old" / "rootdoc_v1.pdf")
|
||||
|
||||
@override_settings(FILENAME_FORMAT="{correspondent}/{correspondent}")
|
||||
def test_file_renaming_missing_permissions(self) -> None:
|
||||
document = Document()
|
||||
@@ -1274,94 +1222,6 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
|
||||
Path("logs.pdf"),
|
||||
)
|
||||
|
||||
@override_settings(FILENAME_FORMAT="{title}")
|
||||
def test_version_index_suffix_for_template_filename(self) -> None:
|
||||
root_doc = Document.objects.create(
|
||||
title="the_doc",
|
||||
mime_type="application/pdf",
|
||||
checksum="root-checksum",
|
||||
)
|
||||
version_doc = Document.objects.create(
|
||||
title="the_doc",
|
||||
mime_type="application/pdf",
|
||||
checksum="version-checksum",
|
||||
root_document=root_doc,
|
||||
version_index=1,
|
||||
)
|
||||
|
||||
self.assertEqual(generate_filename(version_doc), Path("the_doc_v1.pdf"))
|
||||
self.assertEqual(
|
||||
generate_filename(version_doc, counter=1),
|
||||
Path("the_doc_v1_01.pdf"),
|
||||
)
|
||||
|
||||
@override_settings(FILENAME_FORMAT=None)
|
||||
def test_version_index_suffix_for_default_filename(self) -> None:
|
||||
root_doc = Document.objects.create(
|
||||
title="root",
|
||||
mime_type="text/plain",
|
||||
checksum="root-checksum",
|
||||
)
|
||||
version_doc = Document.objects.create(
|
||||
title="root",
|
||||
mime_type="text/plain",
|
||||
checksum="version-checksum",
|
||||
root_document=root_doc,
|
||||
version_index=2,
|
||||
)
|
||||
|
||||
self.assertEqual(
|
||||
generate_filename(version_doc),
|
||||
Path(f"{root_doc.pk:07d}_v2.txt"),
|
||||
)
|
||||
self.assertEqual(
|
||||
generate_filename(version_doc, archive_filename=True),
|
||||
Path(f"{root_doc.pk:07d}_v2.pdf"),
|
||||
)
|
||||
|
||||
@override_settings(FILENAME_FORMAT="{original_name}")
|
||||
def test_version_index_suffix_with_original_name_placeholder(self) -> None:
|
||||
root_doc = Document.objects.create(
|
||||
title="root",
|
||||
mime_type="application/pdf",
|
||||
checksum="root-checksum",
|
||||
original_filename="root-upload.pdf",
|
||||
)
|
||||
version_doc = Document.objects.create(
|
||||
title="root",
|
||||
mime_type="application/pdf",
|
||||
checksum="version-checksum",
|
||||
root_document=root_doc,
|
||||
version_index=1,
|
||||
original_filename="version-upload.pdf",
|
||||
)
|
||||
|
||||
self.assertEqual(generate_filename(version_doc), Path("root-upload_v1.pdf"))
|
||||
|
||||
def test_version_index_suffix_with_storage_path(self) -> None:
|
||||
storage_path = StoragePath.objects.create(
|
||||
name="vtest",
|
||||
path="folder/{{title}}",
|
||||
)
|
||||
root_doc = Document.objects.create(
|
||||
title="storage_doc",
|
||||
mime_type="application/pdf",
|
||||
checksum="root-checksum",
|
||||
storage_path=storage_path,
|
||||
)
|
||||
version_doc = Document.objects.create(
|
||||
title="version_title_should_not_be_used",
|
||||
mime_type="application/pdf",
|
||||
checksum="version-checksum",
|
||||
root_document=root_doc,
|
||||
version_index=3,
|
||||
)
|
||||
|
||||
self.assertEqual(
|
||||
generate_filename(version_doc),
|
||||
Path("folder/storage_doc_v3.pdf"),
|
||||
)
|
||||
|
||||
@override_settings(
|
||||
FILENAME_FORMAT="XX{correspondent}/{title}",
|
||||
FILENAME_FORMAT_REMOVE_NONE=True,
|
||||
|
||||
@@ -1,298 +1,442 @@
|
||||
"""
|
||||
Tests for the document_retagger management command.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
from django.core.management import call_command
|
||||
from django.core.management.base import CommandError
|
||||
from django.test import TestCase
|
||||
|
||||
from documents.models import Correspondent
|
||||
from documents.models import Document
|
||||
from documents.models import DocumentType
|
||||
from documents.models import MatchingModel
|
||||
from documents.models import StoragePath
|
||||
from documents.models import Tag
|
||||
from documents.tests.factories import CorrespondentFactory
|
||||
from documents.tests.factories import DocumentFactory
|
||||
from documents.tests.factories import DocumentTypeFactory
|
||||
from documents.tests.factories import StoragePathFactory
|
||||
from documents.tests.factories import TagFactory
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Module-level type aliases
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
StoragePathTuple = tuple[StoragePath, StoragePath, StoragePath]
|
||||
TagTuple = tuple[Tag, Tag, Tag, Tag, Tag]
|
||||
CorrespondentTuple = tuple[Correspondent, Correspondent]
|
||||
DocumentTypeTuple = tuple[DocumentType, DocumentType]
|
||||
DocumentTuple = tuple[Document, Document, Document, Document]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fixtures
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def storage_paths(db) -> StoragePathTuple:
|
||||
"""Three storage paths with varying match rules."""
|
||||
sp1 = StoragePathFactory(
|
||||
path="{created_data}/{title}",
|
||||
match="auto document",
|
||||
matching_algorithm=MatchingModel.MATCH_LITERAL,
|
||||
)
|
||||
sp2 = StoragePathFactory(
|
||||
path="{title}",
|
||||
match="^first|^unrelated",
|
||||
matching_algorithm=MatchingModel.MATCH_REGEX,
|
||||
)
|
||||
sp3 = StoragePathFactory(
|
||||
path="{title}",
|
||||
match="^blah",
|
||||
matching_algorithm=MatchingModel.MATCH_REGEX,
|
||||
)
|
||||
return sp1, sp2, sp3
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def tags(db) -> TagTuple:
|
||||
"""Tags covering the common matching scenarios."""
|
||||
tag_first = TagFactory(match="first", matching_algorithm=Tag.MATCH_ANY)
|
||||
tag_second = TagFactory(match="second", matching_algorithm=Tag.MATCH_ANY)
|
||||
tag_inbox = TagFactory(is_inbox_tag=True)
|
||||
tag_no_match = TagFactory()
|
||||
tag_auto = TagFactory(matching_algorithm=Tag.MATCH_AUTO)
|
||||
return tag_first, tag_second, tag_inbox, tag_no_match, tag_auto
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def correspondents(db) -> CorrespondentTuple:
|
||||
"""Two correspondents matching 'first' and 'second' content."""
|
||||
c_first = CorrespondentFactory(
|
||||
match="first",
|
||||
matching_algorithm=MatchingModel.MATCH_ANY,
|
||||
)
|
||||
c_second = CorrespondentFactory(
|
||||
match="second",
|
||||
matching_algorithm=MatchingModel.MATCH_ANY,
|
||||
)
|
||||
return c_first, c_second
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def document_types(db) -> DocumentTypeTuple:
|
||||
"""Two document types matching 'first' and 'second' content."""
|
||||
dt_first = DocumentTypeFactory(
|
||||
match="first",
|
||||
matching_algorithm=MatchingModel.MATCH_ANY,
|
||||
)
|
||||
dt_second = DocumentTypeFactory(
|
||||
match="second",
|
||||
matching_algorithm=MatchingModel.MATCH_ANY,
|
||||
)
|
||||
return dt_first, dt_second
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def documents(storage_paths: StoragePathTuple, tags: TagTuple) -> DocumentTuple:
|
||||
"""Four documents with varied content used across most retagger tests."""
|
||||
_, _, sp3 = storage_paths
|
||||
_, _, tag_inbox, tag_no_match, tag_auto = tags
|
||||
|
||||
d1 = DocumentFactory(checksum="A", title="A", content="first document")
|
||||
d2 = DocumentFactory(checksum="B", title="B", content="second document")
|
||||
d3 = DocumentFactory(
|
||||
checksum="C",
|
||||
title="C",
|
||||
content="unrelated document",
|
||||
storage_path=sp3,
|
||||
)
|
||||
d4 = DocumentFactory(checksum="D", title="D", content="auto document")
|
||||
|
||||
d3.tags.add(tag_inbox, tag_no_match)
|
||||
d4.tags.add(tag_auto)
|
||||
|
||||
return d1, d2, d3, d4
|
||||
|
||||
|
||||
def _get_docs() -> DocumentTuple:
|
||||
return (
|
||||
Document.objects.get(title="A"),
|
||||
Document.objects.get(title="B"),
|
||||
Document.objects.get(title="C"),
|
||||
Document.objects.get(title="D"),
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tag assignment
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.management
|
||||
class TestRetagger(DirectoriesMixin, TestCase):
|
||||
def make_models(self) -> None:
|
||||
self.sp1 = StoragePath.objects.create(
|
||||
name="dummy a",
|
||||
path="{created_data}/{title}",
|
||||
match="auto document",
|
||||
matching_algorithm=StoragePath.MATCH_LITERAL,
|
||||
)
|
||||
self.sp2 = StoragePath.objects.create(
|
||||
name="dummy b",
|
||||
path="{title}",
|
||||
match="^first|^unrelated",
|
||||
matching_algorithm=StoragePath.MATCH_REGEX,
|
||||
)
|
||||
|
||||
self.sp3 = StoragePath.objects.create(
|
||||
name="dummy c",
|
||||
path="{title}",
|
||||
match="^blah",
|
||||
matching_algorithm=StoragePath.MATCH_REGEX,
|
||||
)
|
||||
|
||||
self.d1 = Document.objects.create(
|
||||
checksum="A",
|
||||
title="A",
|
||||
content="first document",
|
||||
)
|
||||
self.d2 = Document.objects.create(
|
||||
checksum="B",
|
||||
title="B",
|
||||
content="second document",
|
||||
)
|
||||
self.d3 = Document.objects.create(
|
||||
checksum="C",
|
||||
title="C",
|
||||
content="unrelated document",
|
||||
storage_path=self.sp3,
|
||||
)
|
||||
self.d4 = Document.objects.create(
|
||||
checksum="D",
|
||||
title="D",
|
||||
content="auto document",
|
||||
)
|
||||
|
||||
self.tag_first = Tag.objects.create(
|
||||
name="tag1",
|
||||
match="first",
|
||||
matching_algorithm=Tag.MATCH_ANY,
|
||||
)
|
||||
self.tag_second = Tag.objects.create(
|
||||
name="tag2",
|
||||
match="second",
|
||||
matching_algorithm=Tag.MATCH_ANY,
|
||||
)
|
||||
self.tag_inbox = Tag.objects.create(name="test", is_inbox_tag=True)
|
||||
self.tag_no_match = Tag.objects.create(name="test2")
|
||||
self.tag_auto = Tag.objects.create(
|
||||
name="tagauto",
|
||||
matching_algorithm=Tag.MATCH_AUTO,
|
||||
)
|
||||
|
||||
self.d3.tags.add(self.tag_inbox)
|
||||
self.d3.tags.add(self.tag_no_match)
|
||||
self.d4.tags.add(self.tag_auto)
|
||||
|
||||
self.correspondent_first = Correspondent.objects.create(
|
||||
name="c1",
|
||||
match="first",
|
||||
matching_algorithm=Correspondent.MATCH_ANY,
|
||||
)
|
||||
self.correspondent_second = Correspondent.objects.create(
|
||||
name="c2",
|
||||
match="second",
|
||||
matching_algorithm=Correspondent.MATCH_ANY,
|
||||
)
|
||||
|
||||
self.doctype_first = DocumentType.objects.create(
|
||||
name="dt1",
|
||||
match="first",
|
||||
matching_algorithm=DocumentType.MATCH_ANY,
|
||||
)
|
||||
self.doctype_second = DocumentType.objects.create(
|
||||
name="dt2",
|
||||
match="second",
|
||||
matching_algorithm=DocumentType.MATCH_ANY,
|
||||
)
|
||||
|
||||
def get_updated_docs(self):
|
||||
return (
|
||||
Document.objects.get(title="A"),
|
||||
Document.objects.get(title="B"),
|
||||
Document.objects.get(title="C"),
|
||||
Document.objects.get(title="D"),
|
||||
)
|
||||
|
||||
def setUp(self) -> None:
|
||||
super().setUp()
|
||||
self.make_models()
|
||||
|
||||
def test_add_tags(self) -> None:
|
||||
@pytest.mark.django_db
|
||||
class TestRetaggerTags(DirectoriesMixin):
|
||||
@pytest.mark.usefixtures("documents")
|
||||
def test_add_tags(self, tags: TagTuple) -> None:
|
||||
tag_first, tag_second, *_ = tags
|
||||
call_command("document_retagger", "--tags")
|
||||
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
|
||||
d_first, d_second, d_unrelated, d_auto = _get_docs()
|
||||
|
||||
self.assertEqual(d_first.tags.count(), 1)
|
||||
self.assertEqual(d_second.tags.count(), 1)
|
||||
self.assertEqual(d_unrelated.tags.count(), 2)
|
||||
self.assertEqual(d_auto.tags.count(), 1)
|
||||
assert d_first.tags.count() == 1
|
||||
assert d_second.tags.count() == 1
|
||||
assert d_unrelated.tags.count() == 2
|
||||
assert d_auto.tags.count() == 1
|
||||
assert d_first.tags.first() == tag_first
|
||||
assert d_second.tags.first() == tag_second
|
||||
|
||||
self.assertEqual(d_first.tags.first(), self.tag_first)
|
||||
self.assertEqual(d_second.tags.first(), self.tag_second)
|
||||
|
||||
def test_add_type(self) -> None:
|
||||
call_command("document_retagger", "--document_type")
|
||||
d_first, d_second, _, _ = self.get_updated_docs()
|
||||
|
||||
self.assertEqual(d_first.document_type, self.doctype_first)
|
||||
self.assertEqual(d_second.document_type, self.doctype_second)
|
||||
|
||||
def test_add_correspondent(self) -> None:
|
||||
call_command("document_retagger", "--correspondent")
|
||||
d_first, d_second, _, _ = self.get_updated_docs()
|
||||
|
||||
self.assertEqual(d_first.correspondent, self.correspondent_first)
|
||||
self.assertEqual(d_second.correspondent, self.correspondent_second)
|
||||
|
||||
def test_overwrite_preserve_inbox(self) -> None:
|
||||
self.d1.tags.add(self.tag_second)
|
||||
def test_overwrite_removes_stale_tags_and_preserves_inbox(
|
||||
self,
|
||||
documents: DocumentTuple,
|
||||
tags: TagTuple,
|
||||
) -> None:
|
||||
d1, *_ = documents
|
||||
tag_first, tag_second, tag_inbox, tag_no_match, _ = tags
|
||||
d1.tags.add(tag_second)
|
||||
|
||||
call_command("document_retagger", "--tags", "--overwrite")
|
||||
|
||||
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
|
||||
d_first, d_second, d_unrelated, d_auto = _get_docs()
|
||||
|
||||
self.assertIsNotNone(Tag.objects.get(id=self.tag_second.id))
|
||||
assert Tag.objects.filter(id=tag_second.id).exists()
|
||||
assert list(d_first.tags.values_list("id", flat=True)) == [tag_first.id]
|
||||
assert list(d_second.tags.values_list("id", flat=True)) == [tag_second.id]
|
||||
assert set(d_unrelated.tags.values_list("id", flat=True)) == {
|
||||
tag_inbox.id,
|
||||
tag_no_match.id,
|
||||
}
|
||||
assert d_auto.tags.count() == 0
|
||||
|
||||
self.assertCountEqual(
|
||||
[tag.id for tag in d_first.tags.all()],
|
||||
[self.tag_first.id],
|
||||
@pytest.mark.usefixtures("documents")
|
||||
@pytest.mark.parametrize(
|
||||
"extra_args",
|
||||
[
|
||||
pytest.param([], id="no_base_url"),
|
||||
pytest.param(["--base-url=http://localhost"], id="with_base_url"),
|
||||
],
|
||||
)
|
||||
def test_suggest_does_not_apply_tags(self, extra_args: list[str]) -> None:
|
||||
call_command("document_retagger", "--tags", "--suggest", *extra_args)
|
||||
d_first, d_second, _, d_auto = _get_docs()
|
||||
|
||||
assert d_first.tags.count() == 0
|
||||
assert d_second.tags.count() == 0
|
||||
assert d_auto.tags.count() == 1
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Document type assignment
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.management
|
||||
@pytest.mark.django_db
|
||||
class TestRetaggerDocumentType(DirectoriesMixin):
|
||||
@pytest.mark.usefixtures("documents")
|
||||
def test_add_type(self, document_types: DocumentTypeTuple) -> None:
|
||||
dt_first, dt_second = document_types
|
||||
call_command("document_retagger", "--document_type")
|
||||
d_first, d_second, _, _ = _get_docs()
|
||||
|
||||
assert d_first.document_type == dt_first
|
||||
assert d_second.document_type == dt_second
|
||||
|
||||
@pytest.mark.usefixtures("documents", "document_types")
|
||||
@pytest.mark.parametrize(
|
||||
"extra_args",
|
||||
[
|
||||
pytest.param([], id="no_base_url"),
|
||||
pytest.param(["--base-url=http://localhost"], id="with_base_url"),
|
||||
],
|
||||
)
|
||||
def test_suggest_does_not_apply_document_type(self, extra_args: list[str]) -> None:
|
||||
call_command("document_retagger", "--document_type", "--suggest", *extra_args)
|
||||
d_first, d_second, _, _ = _get_docs()
|
||||
|
||||
assert d_first.document_type is None
|
||||
assert d_second.document_type is None
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("use_first_flag", "expects_assignment"),
|
||||
[
|
||||
pytest.param(["--use-first"], True, id="use_first_assigns_first_match"),
|
||||
pytest.param([], False, id="no_use_first_skips_ambiguous_match"),
|
||||
],
|
||||
)
|
||||
def test_use_first_with_multiple_matches(
|
||||
self,
|
||||
use_first_flag: list[str],
|
||||
*,
|
||||
expects_assignment: bool,
|
||||
) -> None:
|
||||
DocumentTypeFactory(
|
||||
match="ambiguous",
|
||||
matching_algorithm=MatchingModel.MATCH_ANY,
|
||||
)
|
||||
self.assertCountEqual(
|
||||
[tag.id for tag in d_second.tags.all()],
|
||||
[self.tag_second.id],
|
||||
DocumentTypeFactory(
|
||||
match="ambiguous",
|
||||
matching_algorithm=MatchingModel.MATCH_ANY,
|
||||
)
|
||||
self.assertCountEqual(
|
||||
[tag.id for tag in d_unrelated.tags.all()],
|
||||
[self.tag_inbox.id, self.tag_no_match.id],
|
||||
doc = DocumentFactory(content="ambiguous content")
|
||||
|
||||
call_command("document_retagger", "--document_type", *use_first_flag)
|
||||
|
||||
doc.refresh_from_db()
|
||||
assert (doc.document_type is not None) is expects_assignment
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Correspondent assignment
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.management
|
||||
@pytest.mark.django_db
|
||||
class TestRetaggerCorrespondent(DirectoriesMixin):
|
||||
@pytest.mark.usefixtures("documents")
|
||||
def test_add_correspondent(self, correspondents: CorrespondentTuple) -> None:
|
||||
c_first, c_second = correspondents
|
||||
call_command("document_retagger", "--correspondent")
|
||||
d_first, d_second, _, _ = _get_docs()
|
||||
|
||||
assert d_first.correspondent == c_first
|
||||
assert d_second.correspondent == c_second
|
||||
|
||||
@pytest.mark.usefixtures("documents", "correspondents")
|
||||
@pytest.mark.parametrize(
|
||||
"extra_args",
|
||||
[
|
||||
pytest.param([], id="no_base_url"),
|
||||
pytest.param(["--base-url=http://localhost"], id="with_base_url"),
|
||||
],
|
||||
)
|
||||
def test_suggest_does_not_apply_correspondent(self, extra_args: list[str]) -> None:
|
||||
call_command("document_retagger", "--correspondent", "--suggest", *extra_args)
|
||||
d_first, d_second, _, _ = _get_docs()
|
||||
|
||||
assert d_first.correspondent is None
|
||||
assert d_second.correspondent is None
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("use_first_flag", "expects_assignment"),
|
||||
[
|
||||
pytest.param(["--use-first"], True, id="use_first_assigns_first_match"),
|
||||
pytest.param([], False, id="no_use_first_skips_ambiguous_match"),
|
||||
],
|
||||
)
|
||||
def test_use_first_with_multiple_matches(
|
||||
self,
|
||||
use_first_flag: list[str],
|
||||
*,
|
||||
expects_assignment: bool,
|
||||
) -> None:
|
||||
CorrespondentFactory(
|
||||
match="ambiguous",
|
||||
matching_algorithm=MatchingModel.MATCH_ANY,
|
||||
)
|
||||
self.assertEqual(d_auto.tags.count(), 0)
|
||||
|
||||
def test_add_tags_suggest(self) -> None:
|
||||
call_command("document_retagger", "--tags", "--suggest")
|
||||
d_first, d_second, _, d_auto = self.get_updated_docs()
|
||||
|
||||
self.assertEqual(d_first.tags.count(), 0)
|
||||
self.assertEqual(d_second.tags.count(), 0)
|
||||
self.assertEqual(d_auto.tags.count(), 1)
|
||||
|
||||
def test_add_type_suggest(self) -> None:
|
||||
call_command("document_retagger", "--document_type", "--suggest")
|
||||
d_first, d_second, _, _ = self.get_updated_docs()
|
||||
|
||||
self.assertIsNone(d_first.document_type)
|
||||
self.assertIsNone(d_second.document_type)
|
||||
|
||||
def test_add_correspondent_suggest(self) -> None:
|
||||
call_command("document_retagger", "--correspondent", "--suggest")
|
||||
d_first, d_second, _, _ = self.get_updated_docs()
|
||||
|
||||
self.assertIsNone(d_first.correspondent)
|
||||
self.assertIsNone(d_second.correspondent)
|
||||
|
||||
def test_add_tags_suggest_url(self) -> None:
|
||||
call_command(
|
||||
"document_retagger",
|
||||
"--tags",
|
||||
"--suggest",
|
||||
"--base-url=http://localhost",
|
||||
CorrespondentFactory(
|
||||
match="ambiguous",
|
||||
matching_algorithm=MatchingModel.MATCH_ANY,
|
||||
)
|
||||
d_first, d_second, _, d_auto = self.get_updated_docs()
|
||||
doc = DocumentFactory(content="ambiguous content")
|
||||
|
||||
self.assertEqual(d_first.tags.count(), 0)
|
||||
self.assertEqual(d_second.tags.count(), 0)
|
||||
self.assertEqual(d_auto.tags.count(), 1)
|
||||
call_command("document_retagger", "--correspondent", *use_first_flag)
|
||||
|
||||
def test_add_type_suggest_url(self) -> None:
|
||||
call_command(
|
||||
"document_retagger",
|
||||
"--document_type",
|
||||
"--suggest",
|
||||
"--base-url=http://localhost",
|
||||
)
|
||||
d_first, d_second, _, _ = self.get_updated_docs()
|
||||
doc.refresh_from_db()
|
||||
assert (doc.correspondent is not None) is expects_assignment
|
||||
|
||||
self.assertIsNone(d_first.document_type)
|
||||
self.assertIsNone(d_second.document_type)
|
||||
|
||||
def test_add_correspondent_suggest_url(self) -> None:
|
||||
call_command(
|
||||
"document_retagger",
|
||||
"--correspondent",
|
||||
"--suggest",
|
||||
"--base-url=http://localhost",
|
||||
)
|
||||
d_first, d_second, _, _ = self.get_updated_docs()
|
||||
# ---------------------------------------------------------------------------
|
||||
# Storage path assignment
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
self.assertIsNone(d_first.correspondent)
|
||||
self.assertIsNone(d_second.correspondent)
|
||||
|
||||
def test_add_storage_path(self) -> None:
|
||||
@pytest.mark.management
|
||||
@pytest.mark.django_db
|
||||
class TestRetaggerStoragePath(DirectoriesMixin):
|
||||
@pytest.mark.usefixtures("documents")
|
||||
def test_add_storage_path(self, storage_paths: StoragePathTuple) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- 2 storage paths with documents which match them
|
||||
- 1 document which matches but has a storage path
|
||||
WHEN:
|
||||
- document retagger is called
|
||||
THEN:
|
||||
- Matching document's storage paths updated
|
||||
- Non-matching documents have no storage path
|
||||
- Existing storage patch left unchanged
|
||||
GIVEN documents matching various storage path rules
|
||||
WHEN document_retagger --storage_path is called
|
||||
THEN matching documents get the correct path; existing path is unchanged
|
||||
"""
|
||||
call_command(
|
||||
"document_retagger",
|
||||
"--storage_path",
|
||||
)
|
||||
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
|
||||
sp1, sp2, sp3 = storage_paths
|
||||
call_command("document_retagger", "--storage_path")
|
||||
d_first, d_second, d_unrelated, d_auto = _get_docs()
|
||||
|
||||
self.assertEqual(d_first.storage_path, self.sp2)
|
||||
self.assertEqual(d_auto.storage_path, self.sp1)
|
||||
self.assertIsNone(d_second.storage_path)
|
||||
self.assertEqual(d_unrelated.storage_path, self.sp3)
|
||||
assert d_first.storage_path == sp2
|
||||
assert d_auto.storage_path == sp1
|
||||
assert d_second.storage_path is None
|
||||
assert d_unrelated.storage_path == sp3
|
||||
|
||||
def test_overwrite_storage_path(self) -> None:
|
||||
@pytest.mark.usefixtures("documents")
|
||||
def test_overwrite_storage_path(self, storage_paths: StoragePathTuple) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- 2 storage paths with documents which match them
|
||||
- 1 document which matches but has a storage path
|
||||
WHEN:
|
||||
- document retagger is called with overwrite
|
||||
THEN:
|
||||
- Matching document's storage paths updated
|
||||
- Non-matching documents have no storage path
|
||||
- Existing storage patch overwritten
|
||||
GIVEN a document with an existing storage path that matches a different rule
|
||||
WHEN document_retagger --storage_path --overwrite is called
|
||||
THEN the existing path is replaced by the newly matched path
|
||||
"""
|
||||
sp1, sp2, _ = storage_paths
|
||||
call_command("document_retagger", "--storage_path", "--overwrite")
|
||||
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
|
||||
d_first, d_second, d_unrelated, d_auto = _get_docs()
|
||||
|
||||
self.assertEqual(d_first.storage_path, self.sp2)
|
||||
self.assertEqual(d_auto.storage_path, self.sp1)
|
||||
self.assertIsNone(d_second.storage_path)
|
||||
self.assertEqual(d_unrelated.storage_path, self.sp2)
|
||||
assert d_first.storage_path == sp2
|
||||
assert d_auto.storage_path == sp1
|
||||
assert d_second.storage_path is None
|
||||
assert d_unrelated.storage_path == sp2
|
||||
|
||||
def test_id_range_parameter(self) -> None:
|
||||
commandOutput = ""
|
||||
Document.objects.create(
|
||||
checksum="E",
|
||||
title="E",
|
||||
content="NOT the first document",
|
||||
@pytest.mark.parametrize(
|
||||
("use_first_flag", "expects_assignment"),
|
||||
[
|
||||
pytest.param(["--use-first"], True, id="use_first_assigns_first_match"),
|
||||
pytest.param([], False, id="no_use_first_skips_ambiguous_match"),
|
||||
],
|
||||
)
|
||||
def test_use_first_with_multiple_matches(
|
||||
self,
|
||||
use_first_flag: list[str],
|
||||
*,
|
||||
expects_assignment: bool,
|
||||
) -> None:
|
||||
StoragePathFactory(
|
||||
match="ambiguous",
|
||||
matching_algorithm=MatchingModel.MATCH_ANY,
|
||||
)
|
||||
call_command("document_retagger", "--tags", "--id-range", "1", "2")
|
||||
# The retagger shouldn`t apply the 'first' tag to our new document
|
||||
self.assertEqual(Document.objects.filter(tags__id=self.tag_first.id).count(), 1)
|
||||
StoragePathFactory(
|
||||
match="ambiguous",
|
||||
matching_algorithm=MatchingModel.MATCH_ANY,
|
||||
)
|
||||
doc = DocumentFactory(content="ambiguous content")
|
||||
|
||||
try:
|
||||
commandOutput = call_command("document_retagger", "--tags", "--id-range")
|
||||
except CommandError:
|
||||
# Just ignore the error
|
||||
None
|
||||
self.assertIn(commandOutput, "Error: argument --id-range: expected 2 arguments")
|
||||
call_command("document_retagger", "--storage_path", *use_first_flag)
|
||||
|
||||
try:
|
||||
commandOutput = call_command(
|
||||
"document_retagger",
|
||||
"--tags",
|
||||
"--id-range",
|
||||
"a",
|
||||
"b",
|
||||
)
|
||||
except CommandError:
|
||||
# Just ignore the error
|
||||
None
|
||||
self.assertIn(commandOutput, "error: argument --id-range: invalid int value:")
|
||||
doc.refresh_from_db()
|
||||
assert (doc.storage_path is not None) is expects_assignment
|
||||
|
||||
call_command("document_retagger", "--tags", "--id-range", "1", "9999")
|
||||
# Now we should have 2 documents
|
||||
self.assertEqual(Document.objects.filter(tags__id=self.tag_first.id).count(), 2)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# ID range filtering
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.management
|
||||
@pytest.mark.django_db
|
||||
class TestRetaggerIdRange(DirectoriesMixin):
|
||||
@pytest.mark.usefixtures("documents")
|
||||
@pytest.mark.parametrize(
|
||||
("id_range_args", "expected_count"),
|
||||
[
|
||||
pytest.param(["1", "2"], 1, id="narrow_range_limits_scope"),
|
||||
pytest.param(["1", "9999"], 2, id="wide_range_tags_all_matches"),
|
||||
],
|
||||
)
|
||||
def test_id_range_limits_scope(
|
||||
self,
|
||||
tags: TagTuple,
|
||||
id_range_args: list[str],
|
||||
expected_count: int,
|
||||
) -> None:
|
||||
DocumentFactory(content="NOT the first document")
|
||||
call_command("document_retagger", "--tags", "--id-range", *id_range_args)
|
||||
tag_first, *_ = tags
|
||||
assert Document.objects.filter(tags__id=tag_first.id).count() == expected_count
|
||||
|
||||
@pytest.mark.usefixtures("documents")
|
||||
@pytest.mark.parametrize(
|
||||
"args",
|
||||
[
|
||||
pytest.param(["--tags", "--id-range"], id="missing_both_values"),
|
||||
pytest.param(["--tags", "--id-range", "a", "b"], id="non_integer_values"),
|
||||
],
|
||||
)
|
||||
def test_id_range_invalid_arguments_raise(self, args: list[str]) -> None:
|
||||
with pytest.raises((CommandError, SystemExit)):
|
||||
call_command("document_retagger", *args)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Edge cases
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.management
|
||||
@pytest.mark.django_db
|
||||
class TestRetaggerEdgeCases(DirectoriesMixin):
|
||||
@pytest.mark.usefixtures("documents")
|
||||
def test_no_targets_exits_cleanly(self) -> None:
|
||||
"""Calling the retagger with no classifier targets should not raise."""
|
||||
call_command("document_retagger")
|
||||
|
||||
@pytest.mark.usefixtures("documents")
|
||||
def test_inbox_only_skips_non_inbox_documents(self) -> None:
|
||||
"""--inbox-only must restrict processing to documents with an inbox tag."""
|
||||
call_command("document_retagger", "--tags", "--inbox-only")
|
||||
d_first, _, d_unrelated, _ = _get_docs()
|
||||
|
||||
assert d_first.tags.count() == 0
|
||||
assert d_unrelated.tags.count() == 2
|
||||
|
||||
@@ -204,6 +204,61 @@ def audit_log_check(app_configs, **kwargs):
|
||||
return result
|
||||
|
||||
|
||||
@register()
|
||||
def check_v3_minimum_upgrade_version(
|
||||
app_configs: object,
|
||||
**kwargs: object,
|
||||
) -> list[Error]:
|
||||
"""Enforce that upgrades to v3 must start from v2.20.9.
|
||||
|
||||
v3 squashes all prior migrations into 0001_squashed and 0002_squashed.
|
||||
If a user skips v2.20.9, the data migration in 1075_workflowaction_order
|
||||
never runs and the squash may apply schema changes against an incomplete
|
||||
database state.
|
||||
"""
|
||||
from django.db import DatabaseError
|
||||
from django.db import OperationalError
|
||||
|
||||
try:
|
||||
all_tables = connections["default"].introspection.table_names()
|
||||
|
||||
if "django_migrations" not in all_tables:
|
||||
return []
|
||||
|
||||
with connections["default"].cursor() as cursor:
|
||||
cursor.execute(
|
||||
"SELECT name FROM django_migrations WHERE app = %s",
|
||||
["documents"],
|
||||
)
|
||||
applied: set[str] = {row[0] for row in cursor.fetchall()}
|
||||
|
||||
if not applied:
|
||||
return []
|
||||
|
||||
# Already in a valid v3 state
|
||||
if {"0001_squashed", "0002_squashed"} & applied:
|
||||
return []
|
||||
|
||||
# On v2.20.9 exactly — squash will pick up cleanly from here
|
||||
if "1075_workflowaction_order" in applied:
|
||||
return []
|
||||
|
||||
except (DatabaseError, OperationalError):
|
||||
return []
|
||||
|
||||
return [
|
||||
Error(
|
||||
"Cannot upgrade to Paperless-ngx v3 from this version.",
|
||||
hint=(
|
||||
"Upgrading to v3 can only be performed from v2.20.9."
|
||||
"Please upgrade to v2.20.9, run migrations, then upgrade to v3."
|
||||
"See https://docs.paperless-ngx.com/setup/#upgrading for details."
|
||||
),
|
||||
id="paperless.E002",
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
@register()
|
||||
def check_deprecated_db_settings(
|
||||
app_configs: object,
|
||||
|
||||
@@ -3,6 +3,7 @@ from pathlib import Path
|
||||
from unittest import mock
|
||||
|
||||
import pytest
|
||||
from django.core.checks import Error
|
||||
from django.core.checks import Warning
|
||||
from django.test import TestCase
|
||||
from django.test import override_settings
|
||||
@@ -13,6 +14,7 @@ from documents.tests.utils import FileSystemAssertsMixin
|
||||
from paperless.checks import audit_log_check
|
||||
from paperless.checks import binaries_check
|
||||
from paperless.checks import check_deprecated_db_settings
|
||||
from paperless.checks import check_v3_minimum_upgrade_version
|
||||
from paperless.checks import debug_mode_check
|
||||
from paperless.checks import paths_check
|
||||
from paperless.checks import settings_values_check
|
||||
@@ -395,3 +397,240 @@ class TestDeprecatedDbSettings:
|
||||
|
||||
assert len(result) == 1
|
||||
assert "PAPERLESS_DBSSLCERT" in result[0].msg
|
||||
|
||||
|
||||
class TestV3MinimumUpgradeVersionCheck:
|
||||
"""Test suite for check_v3_minimum_upgrade_version system check."""
|
||||
|
||||
@pytest.fixture
|
||||
def build_conn_mock(self, mocker: MockerFixture):
|
||||
"""Factory fixture that builds a connections['default'] mock.
|
||||
|
||||
Usage::
|
||||
|
||||
conn = build_conn_mock(tables=["django_migrations"], applied=["1075_..."])
|
||||
"""
|
||||
|
||||
def _build(tables: list[str], applied: list[str]) -> mock.MagicMock:
|
||||
conn = mocker.MagicMock()
|
||||
conn.introspection.table_names.return_value = tables
|
||||
cursor = conn.cursor.return_value.__enter__.return_value
|
||||
cursor.fetchall.return_value = [(name,) for name in applied]
|
||||
return conn
|
||||
|
||||
return _build
|
||||
|
||||
def test_no_migrations_table_fresh_install(
|
||||
self,
|
||||
mocker: MockerFixture,
|
||||
build_conn_mock,
|
||||
) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- No django_migrations table exists in the database
|
||||
WHEN:
|
||||
- The v3 upgrade check runs
|
||||
THEN:
|
||||
- No errors are reported (fresh install, nothing to enforce)
|
||||
"""
|
||||
mocker.patch.dict(
|
||||
"paperless.checks.connections",
|
||||
{"default": build_conn_mock([], [])},
|
||||
)
|
||||
assert check_v3_minimum_upgrade_version(None) == []
|
||||
|
||||
def test_no_documents_migrations_fresh_install(
|
||||
self,
|
||||
mocker: MockerFixture,
|
||||
build_conn_mock,
|
||||
) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- django_migrations table exists but has no documents app rows
|
||||
WHEN:
|
||||
- The v3 upgrade check runs
|
||||
THEN:
|
||||
- No errors are reported (fresh install, nothing to enforce)
|
||||
"""
|
||||
mocker.patch.dict(
|
||||
"paperless.checks.connections",
|
||||
{"default": build_conn_mock(["django_migrations"], [])},
|
||||
)
|
||||
assert check_v3_minimum_upgrade_version(None) == []
|
||||
|
||||
def test_v3_state_with_0001_squashed(
|
||||
self,
|
||||
mocker: MockerFixture,
|
||||
build_conn_mock,
|
||||
) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- 0001_squashed is recorded in django_migrations
|
||||
WHEN:
|
||||
- The v3 upgrade check runs
|
||||
THEN:
|
||||
- No errors are reported (DB is already in a valid v3 state)
|
||||
"""
|
||||
mocker.patch.dict(
|
||||
"paperless.checks.connections",
|
||||
{
|
||||
"default": build_conn_mock(
|
||||
["django_migrations"],
|
||||
["0001_squashed", "0002_squashed", "0003_workflowaction_order"],
|
||||
),
|
||||
},
|
||||
)
|
||||
assert check_v3_minimum_upgrade_version(None) == []
|
||||
|
||||
def test_v3_state_with_0002_squashed_only(
|
||||
self,
|
||||
mocker: MockerFixture,
|
||||
build_conn_mock,
|
||||
) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- Only 0002_squashed is recorded in django_migrations
|
||||
WHEN:
|
||||
- The v3 upgrade check runs
|
||||
THEN:
|
||||
- No errors are reported (0002_squashed alone confirms a valid v3 state)
|
||||
"""
|
||||
mocker.patch.dict(
|
||||
"paperless.checks.connections",
|
||||
{"default": build_conn_mock(["django_migrations"], ["0002_squashed"])},
|
||||
)
|
||||
assert check_v3_minimum_upgrade_version(None) == []
|
||||
|
||||
def test_v2_20_9_state_ready_to_upgrade(
|
||||
self,
|
||||
mocker: MockerFixture,
|
||||
build_conn_mock,
|
||||
) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- 1075_workflowaction_order (the last v2.20.9 migration) is in the DB
|
||||
WHEN:
|
||||
- The v3 upgrade check runs
|
||||
THEN:
|
||||
- No errors are reported (squash will pick up cleanly from this state)
|
||||
"""
|
||||
mocker.patch.dict(
|
||||
"paperless.checks.connections",
|
||||
{
|
||||
"default": build_conn_mock(
|
||||
["django_migrations"],
|
||||
[
|
||||
"1074_workflowrun_deleted_at_workflowrun_restored_at_and_more",
|
||||
"1075_workflowaction_order",
|
||||
],
|
||||
),
|
||||
},
|
||||
)
|
||||
assert check_v3_minimum_upgrade_version(None) == []
|
||||
|
||||
def test_v2_20_8_raises_error(
|
||||
self,
|
||||
mocker: MockerFixture,
|
||||
build_conn_mock,
|
||||
) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- 1074 (last v2.20.8 migration) is applied but 1075 is not
|
||||
WHEN:
|
||||
- The v3 upgrade check runs
|
||||
THEN:
|
||||
- An Error with id paperless.E002 is returned
|
||||
"""
|
||||
mocker.patch.dict(
|
||||
"paperless.checks.connections",
|
||||
{
|
||||
"default": build_conn_mock(
|
||||
["django_migrations"],
|
||||
["1074_workflowrun_deleted_at_workflowrun_restored_at_and_more"],
|
||||
),
|
||||
},
|
||||
)
|
||||
result = check_v3_minimum_upgrade_version(None)
|
||||
assert len(result) == 1
|
||||
assert isinstance(result[0], Error)
|
||||
assert result[0].id == "paperless.E002"
|
||||
|
||||
def test_very_old_version_raises_error(
|
||||
self,
|
||||
mocker: MockerFixture,
|
||||
build_conn_mock,
|
||||
) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- Only old migrations (well below v2.20.9) are applied
|
||||
WHEN:
|
||||
- The v3 upgrade check runs
|
||||
THEN:
|
||||
- An Error with id paperless.E002 is returned
|
||||
"""
|
||||
mocker.patch.dict(
|
||||
"paperless.checks.connections",
|
||||
{
|
||||
"default": build_conn_mock(
|
||||
["django_migrations"],
|
||||
["1000_update_paperless_all", "1022_paperlesstask"],
|
||||
),
|
||||
},
|
||||
)
|
||||
result = check_v3_minimum_upgrade_version(None)
|
||||
assert len(result) == 1
|
||||
assert isinstance(result[0], Error)
|
||||
assert result[0].id == "paperless.E002"
|
||||
|
||||
def test_error_hint_mentions_v2_20_9(
|
||||
self,
|
||||
mocker: MockerFixture,
|
||||
build_conn_mock,
|
||||
) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- DB is on an old v2 version (pre-v2.20.9)
|
||||
WHEN:
|
||||
- The v3 upgrade check runs
|
||||
THEN:
|
||||
- The error hint explicitly references v2.20.9 so users know what to do
|
||||
"""
|
||||
mocker.patch.dict(
|
||||
"paperless.checks.connections",
|
||||
{"default": build_conn_mock(["django_migrations"], ["1022_paperlesstask"])},
|
||||
)
|
||||
result = check_v3_minimum_upgrade_version(None)
|
||||
assert len(result) == 1
|
||||
assert "v2.20.9" in result[0].hint
|
||||
|
||||
def test_db_error_is_swallowed(self, mocker: MockerFixture) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- A DatabaseError is raised when querying the DB
|
||||
WHEN:
|
||||
- The v3 upgrade check runs
|
||||
THEN:
|
||||
- No exception propagates and an empty list is returned
|
||||
"""
|
||||
from django.db import DatabaseError
|
||||
|
||||
conn = mocker.MagicMock()
|
||||
conn.introspection.table_names.side_effect = DatabaseError("connection refused")
|
||||
mocker.patch.dict("paperless.checks.connections", {"default": conn})
|
||||
assert check_v3_minimum_upgrade_version(None) == []
|
||||
|
||||
def test_operational_error_is_swallowed(self, mocker: MockerFixture) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- An OperationalError is raised when querying the DB
|
||||
WHEN:
|
||||
- The v3 upgrade check runs
|
||||
THEN:
|
||||
- No exception propagates and an empty list is returned
|
||||
"""
|
||||
from django.db import OperationalError
|
||||
|
||||
conn = mocker.MagicMock()
|
||||
conn.introspection.table_names.side_effect = OperationalError("DB unavailable")
|
||||
mocker.patch.dict("paperless.checks.connections", {"default": conn})
|
||||
assert check_v3_minimum_upgrade_version(None) == []
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
# Generated by Django 5.2.9 on 2026-01-20 18:46
|
||||
# Generated by Django 5.2.11 on 2026-03-03 16:27
|
||||
|
||||
import django.db.models.deletion
|
||||
import django.utils.timezone
|
||||
@@ -15,6 +15,50 @@ class Migration(migrations.Migration):
|
||||
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
|
||||
]
|
||||
|
||||
replaces = [
|
||||
("paperless_mail", "0001_initial"),
|
||||
("paperless_mail", "0001_initial_squashed_0009_mailrule_assign_tags"),
|
||||
("paperless_mail", "0002_auto_20201117_1334"),
|
||||
("paperless_mail", "0003_auto_20201118_1940"),
|
||||
("paperless_mail", "0004_mailrule_order"),
|
||||
("paperless_mail", "0005_help_texts"),
|
||||
("paperless_mail", "0006_auto_20210101_2340"),
|
||||
("paperless_mail", "0007_auto_20210106_0138"),
|
||||
("paperless_mail", "0008_auto_20210516_0940"),
|
||||
("paperless_mail", "0009_alter_mailrule_action_alter_mailrule_folder"),
|
||||
("paperless_mail", "0009_mailrule_assign_tags"),
|
||||
("paperless_mail", "0010_auto_20220311_1602"),
|
||||
("paperless_mail", "0011_remove_mailrule_assign_tag"),
|
||||
(
|
||||
"paperless_mail",
|
||||
"0011_remove_mailrule_assign_tag_squashed_0024_alter_mailrule_name_and_more",
|
||||
),
|
||||
("paperless_mail", "0012_alter_mailrule_assign_tags"),
|
||||
("paperless_mail", "0013_merge_20220412_1051"),
|
||||
("paperless_mail", "0014_alter_mailrule_action"),
|
||||
("paperless_mail", "0015_alter_mailrule_action"),
|
||||
("paperless_mail", "0016_mailrule_consumption_scope"),
|
||||
("paperless_mail", "0017_mailaccount_owner_mailrule_owner"),
|
||||
("paperless_mail", "0018_processedmail"),
|
||||
("paperless_mail", "0019_mailrule_filter_to"),
|
||||
("paperless_mail", "0020_mailaccount_is_token"),
|
||||
("paperless_mail", "0021_alter_mailaccount_password"),
|
||||
("paperless_mail", "0022_mailrule_assign_owner_from_rule_and_more"),
|
||||
("paperless_mail", "0023_remove_mailrule_filter_attachment_filename_and_more"),
|
||||
("paperless_mail", "0024_alter_mailrule_name_and_more"),
|
||||
(
|
||||
"paperless_mail",
|
||||
"0025_alter_mailaccount_owner_alter_mailrule_owner_and_more",
|
||||
),
|
||||
("paperless_mail", "0026_mailrule_enabled"),
|
||||
(
|
||||
"paperless_mail",
|
||||
"0027_mailaccount_expiration_mailaccount_account_type_and_more",
|
||||
),
|
||||
("paperless_mail", "0028_alter_mailaccount_password_and_more"),
|
||||
("paperless_mail", "0029_mailrule_pdf_layout"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name="MailAccount",
|
||||
@@ -6,7 +6,7 @@ from django.db import models
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
("paperless_mail", "0001_initial"),
|
||||
("paperless_mail", "0001_squashed"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
|
||||
10
uv.lock
generated
10
uv.lock
generated
@@ -1342,11 +1342,11 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "faker"
|
||||
version = "40.1.2"
|
||||
version = "40.5.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/5e/77/1c3ff07b6739b9a1d23ca01ec0a90a309a33b78e345a3eb52f9ce9240e36/faker-40.1.2.tar.gz", hash = "sha256:b76a68163aa5f171d260fc24827a8349bc1db672f6a665359e8d0095e8135d30", size = 1949802, upload-time = "2026-01-13T20:51:49.917Z" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/03/2a/96fff3edcb10f6505143448a4b91535f77b74865cec45be52690ee280443/faker-40.5.1.tar.gz", hash = "sha256:70222361cd82aa10cb86066d1a4e8f47f2bcdc919615c412045a69c4e6da0cd3", size = 1952684, upload-time = "2026-02-23T21:34:38.362Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/46/ec/91a434c8a53d40c3598966621dea9c50512bec6ce8e76fa1751015e74cef/faker-40.1.2-py3-none-any.whl", hash = "sha256:93503165c165d330260e4379fd6dc07c94da90c611ed3191a0174d2ab9966a42", size = 1985633, upload-time = "2026-01-13T20:51:47.982Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/4d/a9/1eed4db92d0aec2f9bfdf1faae0ab0418b5e121dda5701f118a7a4f0cd6a/faker-40.5.1-py3-none-any.whl", hash = "sha256:c69640c1e13bad49b4bcebcbf1b52f9f1a872b6ea186c248ada34d798f1661bf", size = 1987053, upload-time = "2026-02-23T21:34:36.418Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -3121,6 +3121,7 @@ webserver = [
|
||||
dev = [
|
||||
{ name = "daphne", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
{ name = "factory-boy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
{ name = "faker", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
{ name = "imagehash", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
{ name = "prek", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
{ name = "pytest", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
@@ -3145,6 +3146,7 @@ lint = [
|
||||
testing = [
|
||||
{ name = "daphne", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
{ name = "factory-boy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
{ name = "faker", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
{ name = "imagehash", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
{ name = "pytest", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
{ name = "pytest-cov", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||
@@ -3257,6 +3259,7 @@ provides-extras = ["mariadb", "postgres", "webserver"]
|
||||
dev = [
|
||||
{ name = "daphne" },
|
||||
{ name = "factory-boy", specifier = "~=3.3.1" },
|
||||
{ name = "faker", specifier = "~=40.5.1" },
|
||||
{ name = "imagehash" },
|
||||
{ name = "prek", specifier = "~=0.3.0" },
|
||||
{ name = "pytest", specifier = "~=9.0.0" },
|
||||
@@ -3279,6 +3282,7 @@ lint = [
|
||||
testing = [
|
||||
{ name = "daphne" },
|
||||
{ name = "factory-boy", specifier = "~=3.3.1" },
|
||||
{ name = "faker", specifier = "~=40.5.1" },
|
||||
{ name = "imagehash" },
|
||||
{ name = "pytest", specifier = "~=9.0.0" },
|
||||
{ name = "pytest-cov", specifier = "~=7.0.0" },
|
||||
|
||||
Reference in New Issue
Block a user