mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-02-26 21:36:26 +00:00
Compare commits
3 Commits
dependabot
...
feature-py
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
98d5d665f0 | ||
|
|
3fa9e75fa0 | ||
|
|
c94b6ce792 |
2
.github/workflows/ci-backend.yml
vendored
2
.github/workflows/ci-backend.yml
vendored
@@ -31,7 +31,7 @@ jobs:
|
||||
runs-on: ubuntu-24.04
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ['3.10', '3.11', '3.12']
|
||||
python-version: ['3.11', '3.12', '3.13', '3.14']
|
||||
fail-fast: false
|
||||
steps:
|
||||
- name: Checkout
|
||||
|
||||
@@ -3,10 +3,9 @@ name = "paperless-ngx"
|
||||
version = "2.20.8"
|
||||
description = "A community-supported supercharged document management system: scan, index and archive all your physical documents"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.10"
|
||||
requires-python = ">=3.11"
|
||||
classifiers = [
|
||||
"Programming Language :: Python :: 3 :: Only",
|
||||
"Programming Language :: Python :: 3.10",
|
||||
"Programming Language :: Python :: 3.11",
|
||||
"Programming Language :: Python :: 3.12",
|
||||
"Programming Language :: Python :: 3.13",
|
||||
@@ -37,7 +36,6 @@ dependencies = [
|
||||
"django-filter~=25.1",
|
||||
"django-guardian~=3.3.0",
|
||||
"django-multiselectfield~=1.0.1",
|
||||
"django-rich~=2.2.0",
|
||||
"django-soft-delete~=1.0.18",
|
||||
"django-treenode>=0.23.2",
|
||||
"djangorestframework~=3.16",
|
||||
@@ -77,6 +75,7 @@ dependencies = [
|
||||
"setproctitle~=1.3.4",
|
||||
"tika-client~=0.10.0",
|
||||
"torch~=2.10.0",
|
||||
"tqdm~=4.67.1",
|
||||
"watchfiles>=1.1.1",
|
||||
"whitenoise~=6.11",
|
||||
"whoosh-reloaded>=2.7.5",
|
||||
@@ -149,6 +148,7 @@ typing = [
|
||||
"types-pytz",
|
||||
"types-redis",
|
||||
"types-setuptools",
|
||||
"types-tqdm",
|
||||
]
|
||||
|
||||
[tool.uv]
|
||||
@@ -176,7 +176,7 @@ torch = [
|
||||
]
|
||||
|
||||
[tool.ruff]
|
||||
target-version = "py310"
|
||||
target-version = "py311"
|
||||
line-length = 88
|
||||
src = [
|
||||
"src",
|
||||
@@ -303,7 +303,6 @@ markers = [
|
||||
"tika: Tests requiring Tika service",
|
||||
"greenmail: Tests requiring Greenmail service",
|
||||
"date_parsing: Tests which cover date parsing from content or filename",
|
||||
"management: Tests which cover management commands/functionality",
|
||||
]
|
||||
|
||||
[tool.pytest_env]
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from datetime import UTC
|
||||
from datetime import datetime
|
||||
from datetime import timezone
|
||||
from typing import Any
|
||||
|
||||
from django.conf import settings
|
||||
@@ -139,7 +139,7 @@ def thumbnail_last_modified(request: Any, pk: int) -> datetime | None:
|
||||
# No cache, get the timestamp and cache the datetime
|
||||
last_modified = datetime.fromtimestamp(
|
||||
doc.thumbnail_path.stat().st_mtime,
|
||||
tz=timezone.utc,
|
||||
tz=UTC,
|
||||
)
|
||||
cache.set(doc_key, last_modified, CACHE_50_MINUTES)
|
||||
return last_modified
|
||||
|
||||
@@ -2,7 +2,7 @@ import datetime
|
||||
import hashlib
|
||||
import os
|
||||
import tempfile
|
||||
from enum import Enum
|
||||
from enum import StrEnum
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
from typing import Final
|
||||
@@ -80,7 +80,7 @@ class ConsumerError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class ConsumerStatusShortMessage(str, Enum):
|
||||
class ConsumerStatusShortMessage(StrEnum):
|
||||
DOCUMENT_ALREADY_EXISTS = "document_already_exists"
|
||||
DOCUMENT_ALREADY_EXISTS_IN_TRASH = "document_already_exists_in_trash"
|
||||
ASN_ALREADY_EXISTS = "asn_already_exists"
|
||||
|
||||
@@ -5,10 +5,10 @@ import math
|
||||
import re
|
||||
from collections import Counter
|
||||
from contextlib import contextmanager
|
||||
from datetime import UTC
|
||||
from datetime import datetime
|
||||
from datetime import time
|
||||
from datetime import timedelta
|
||||
from datetime import timezone
|
||||
from shutil import rmtree
|
||||
from time import sleep
|
||||
from typing import TYPE_CHECKING
|
||||
@@ -437,7 +437,7 @@ class ManualResults:
|
||||
class LocalDateParser(English):
|
||||
def reverse_timezone_offset(self, d):
|
||||
return (d.replace(tzinfo=django_timezone.get_current_timezone())).astimezone(
|
||||
timezone.utc,
|
||||
UTC,
|
||||
)
|
||||
|
||||
def date_from(self, *args, **kwargs):
|
||||
@@ -641,8 +641,8 @@ def rewrite_natural_date_keywords(query_string: str) -> str:
|
||||
end = datetime(local_now.year - 1, 12, 31, 23, 59, 59, tzinfo=tz)
|
||||
|
||||
# Convert to UTC and format
|
||||
start_str = start.astimezone(timezone.utc).strftime("%Y%m%d%H%M%S")
|
||||
end_str = end.astimezone(timezone.utc).strftime("%Y%m%d%H%M%S")
|
||||
start_str = start.astimezone(UTC).strftime("%Y%m%d%H%M%S")
|
||||
end_str = end.astimezone(UTC).strftime("%Y%m%d%H%M%S")
|
||||
return f"{field}:[{start_str} TO {end_str}]"
|
||||
|
||||
return re.sub(pattern, repl, query_string, flags=re.IGNORECASE)
|
||||
|
||||
@@ -1,320 +0,0 @@
|
||||
"""
|
||||
Base command class for Paperless-ngx management commands.
|
||||
|
||||
Provides automatic progress bar and multiprocessing support with minimal boilerplate.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from collections.abc import Iterable
|
||||
from collections.abc import Sized
|
||||
from concurrent.futures import ProcessPoolExecutor
|
||||
from concurrent.futures import as_completed
|
||||
from dataclasses import dataclass
|
||||
from typing import TYPE_CHECKING
|
||||
from typing import Any
|
||||
from typing import ClassVar
|
||||
from typing import Generic
|
||||
from typing import TypeVar
|
||||
|
||||
from django import db
|
||||
from django.core.management import CommandError
|
||||
from django.db.models import QuerySet
|
||||
from django_rich.management import RichCommand
|
||||
from rich.console import Console
|
||||
from rich.progress import BarColumn
|
||||
from rich.progress import MofNCompleteColumn
|
||||
from rich.progress import Progress
|
||||
from rich.progress import SpinnerColumn
|
||||
from rich.progress import TextColumn
|
||||
from rich.progress import TimeElapsedColumn
|
||||
from rich.progress import TimeRemainingColumn
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Callable
|
||||
from collections.abc import Generator
|
||||
from collections.abc import Iterable
|
||||
from collections.abc import Sequence
|
||||
|
||||
from django.core.management import CommandParser
|
||||
|
||||
T = TypeVar("T")
|
||||
R = TypeVar("R")
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class ProcessResult(Generic[T, R]):
|
||||
"""
|
||||
Result of processing a single item in parallel.
|
||||
|
||||
Attributes:
|
||||
item: The input item that was processed.
|
||||
result: The return value from the processing function, or None if an error occurred.
|
||||
error: The exception if processing failed, or None on success.
|
||||
"""
|
||||
|
||||
item: T
|
||||
result: R | None
|
||||
error: BaseException | None
|
||||
|
||||
@property
|
||||
def success(self) -> bool:
|
||||
"""Return True if the item was processed successfully."""
|
||||
return self.error is None
|
||||
|
||||
|
||||
class PaperlessCommand(RichCommand):
|
||||
"""
|
||||
Base command class with automatic progress bar and multiprocessing support.
|
||||
|
||||
Features are opt-in via class attributes:
|
||||
supports_progress_bar: Adds --no-progress-bar argument (default: True)
|
||||
supports_multiprocessing: Adds --processes argument (default: False)
|
||||
|
||||
Example usage:
|
||||
|
||||
class Command(PaperlessCommand):
|
||||
help = "Process all documents"
|
||||
|
||||
def handle(self, *args, **options):
|
||||
documents = Document.objects.all()
|
||||
for doc in self.track(documents, description="Processing..."):
|
||||
process_document(doc)
|
||||
|
||||
class Command(PaperlessCommand):
|
||||
help = "Regenerate thumbnails"
|
||||
supports_multiprocessing = True
|
||||
|
||||
def handle(self, *args, **options):
|
||||
ids = list(Document.objects.values_list("id", flat=True))
|
||||
for result in self.process_parallel(process_doc, ids):
|
||||
if result.error:
|
||||
self.console.print(f"[red]Failed: {result.error}[/red]")
|
||||
"""
|
||||
|
||||
supports_progress_bar: ClassVar[bool] = True
|
||||
supports_multiprocessing: ClassVar[bool] = False
|
||||
|
||||
# Instance attributes set by execute() before handle() runs
|
||||
no_progress_bar: bool
|
||||
process_count: int
|
||||
|
||||
def add_arguments(self, parser: CommandParser) -> None:
|
||||
"""Add arguments based on supported features."""
|
||||
super().add_arguments(parser)
|
||||
|
||||
if self.supports_progress_bar:
|
||||
parser.add_argument(
|
||||
"--no-progress-bar",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="Disable the progress bar",
|
||||
)
|
||||
|
||||
if self.supports_multiprocessing:
|
||||
default_processes = max(1, (os.cpu_count() or 1) // 4)
|
||||
parser.add_argument(
|
||||
"--processes",
|
||||
default=default_processes,
|
||||
type=int,
|
||||
help=f"Number of processes to use (default: {default_processes})",
|
||||
)
|
||||
|
||||
def execute(self, *args: Any, **options: Any) -> str | None:
|
||||
"""
|
||||
Set up instance state before handle() is called.
|
||||
|
||||
This is called by Django's command infrastructure after argument parsing
|
||||
but before handle(). We use it to set instance attributes from options.
|
||||
"""
|
||||
# Set progress bar state
|
||||
if self.supports_progress_bar:
|
||||
self.no_progress_bar = options.get("no_progress_bar", False)
|
||||
else:
|
||||
self.no_progress_bar = True
|
||||
|
||||
# Set multiprocessing state
|
||||
if self.supports_multiprocessing:
|
||||
self.process_count = options.get("processes", 1)
|
||||
if self.process_count < 1:
|
||||
raise CommandError("--processes must be at least 1")
|
||||
else:
|
||||
self.process_count = 1
|
||||
|
||||
return super().execute(*args, **options)
|
||||
|
||||
def _create_progress(self, description: str) -> Progress:
|
||||
"""
|
||||
Create a configured Progress instance.
|
||||
|
||||
Progress output is directed to stderr to match the convention that
|
||||
progress bars are transient UI feedback, not command output. This
|
||||
mirrors tqdm's default behavior and prevents progress bar rendering
|
||||
from interfering with stdout-based assertions in tests or piped
|
||||
command output.
|
||||
|
||||
Args:
|
||||
description: Text to display alongside the progress bar.
|
||||
|
||||
Returns:
|
||||
A Progress instance configured with appropriate columns.
|
||||
"""
|
||||
return Progress(
|
||||
SpinnerColumn(),
|
||||
TextColumn("[progress.description]{task.description}"),
|
||||
BarColumn(),
|
||||
MofNCompleteColumn(),
|
||||
TimeElapsedColumn(),
|
||||
TimeRemainingColumn(),
|
||||
console=Console(stderr=True),
|
||||
transient=False,
|
||||
)
|
||||
|
||||
def _get_iterable_length(self, iterable: Iterable[object]) -> int | None:
|
||||
"""
|
||||
Attempt to determine the length of an iterable without consuming it.
|
||||
|
||||
Tries .count() first (for Django querysets - executes SELECT COUNT(*)),
|
||||
then falls back to len() for sequences.
|
||||
|
||||
Args:
|
||||
iterable: The iterable to measure.
|
||||
|
||||
Returns:
|
||||
The length if determinable, None otherwise.
|
||||
"""
|
||||
if isinstance(iterable, QuerySet):
|
||||
return iterable.count()
|
||||
|
||||
if isinstance(iterable, Sized):
|
||||
return len(iterable)
|
||||
|
||||
return None
|
||||
|
||||
def track(
|
||||
self,
|
||||
iterable: Iterable[T],
|
||||
*,
|
||||
description: str = "Processing...",
|
||||
total: int | None = None,
|
||||
) -> Generator[T, None, None]:
|
||||
"""
|
||||
Iterate over items with an optional progress bar.
|
||||
|
||||
Respects --no-progress-bar flag. When disabled, simply yields items
|
||||
without any progress display.
|
||||
|
||||
Args:
|
||||
iterable: The items to iterate over.
|
||||
description: Text to display alongside the progress bar.
|
||||
total: Total number of items. If None, attempts to determine
|
||||
automatically via .count() (for querysets) or len().
|
||||
|
||||
Yields:
|
||||
Items from the iterable.
|
||||
|
||||
Example:
|
||||
for doc in self.track(documents, description="Renaming..."):
|
||||
process(doc)
|
||||
"""
|
||||
if self.no_progress_bar:
|
||||
yield from iterable
|
||||
return
|
||||
|
||||
# Attempt to determine total if not provided
|
||||
if total is None:
|
||||
total = self._get_iterable_length(iterable)
|
||||
|
||||
with self._create_progress(description) as progress:
|
||||
task_id = progress.add_task(description, total=total)
|
||||
for item in iterable:
|
||||
yield item
|
||||
progress.advance(task_id)
|
||||
|
||||
def process_parallel(
|
||||
self,
|
||||
fn: Callable[[T], R],
|
||||
items: Sequence[T],
|
||||
*,
|
||||
description: str = "Processing...",
|
||||
) -> Generator[ProcessResult[T, R], None, None]:
|
||||
"""
|
||||
Process items in parallel with progress tracking.
|
||||
|
||||
When --processes=1, runs sequentially in the main process without
|
||||
spawning subprocesses. This is critical for testing, as multiprocessing
|
||||
breaks fixtures, mocks, and database transactions.
|
||||
|
||||
When --processes > 1, uses ProcessPoolExecutor and automatically closes
|
||||
database connections before spawning workers (required for PostgreSQL).
|
||||
|
||||
Args:
|
||||
fn: Function to apply to each item. Must be picklable for parallel
|
||||
execution (i.e., defined at module level, not a lambda or closure).
|
||||
items: Sequence of items to process.
|
||||
description: Text to display alongside the progress bar.
|
||||
|
||||
Yields:
|
||||
ProcessResult for each item, containing the item, result, and any error.
|
||||
|
||||
Example:
|
||||
def regenerate_thumbnail(doc_id: int) -> Path:
|
||||
...
|
||||
|
||||
for result in self.process_parallel(regenerate_thumbnail, doc_ids):
|
||||
if result.error:
|
||||
self.console.print(f"[red]Failed {result.item}[/red]")
|
||||
"""
|
||||
total = len(items)
|
||||
|
||||
if self.process_count == 1:
|
||||
# Sequential execution in main process - critical for testing
|
||||
yield from self._process_sequential(fn, items, description, total)
|
||||
else:
|
||||
# Parallel execution with ProcessPoolExecutor
|
||||
yield from self._process_parallel(fn, items, description, total)
|
||||
|
||||
def _process_sequential(
|
||||
self,
|
||||
fn: Callable[[T], R],
|
||||
items: Sequence[T],
|
||||
description: str,
|
||||
total: int,
|
||||
) -> Generator[ProcessResult[T, R], None, None]:
|
||||
"""Process items sequentially in the main process."""
|
||||
for item in self.track(items, description=description, total=total):
|
||||
try:
|
||||
result = fn(item)
|
||||
yield ProcessResult(item=item, result=result, error=None)
|
||||
except Exception as e:
|
||||
yield ProcessResult(item=item, result=None, error=e)
|
||||
|
||||
def _process_parallel(
|
||||
self,
|
||||
fn: Callable[[T], R],
|
||||
items: Sequence[T],
|
||||
description: str,
|
||||
total: int,
|
||||
) -> Generator[ProcessResult[T, R], None, None]:
|
||||
"""Process items in parallel using ProcessPoolExecutor."""
|
||||
# Close database connections before forking - required for PostgreSQL
|
||||
db.connections.close_all()
|
||||
|
||||
with self._create_progress(description) as progress:
|
||||
task_id = progress.add_task(description, total=total)
|
||||
|
||||
with ProcessPoolExecutor(max_workers=self.process_count) as executor:
|
||||
# Submit all tasks and map futures back to items
|
||||
future_to_item = {executor.submit(fn, item): item for item in items}
|
||||
|
||||
# Yield results as they complete
|
||||
for future in as_completed(future_to_item):
|
||||
item = future_to_item[future]
|
||||
try:
|
||||
result = future.result()
|
||||
yield ProcessResult(item=item, result=result, error=None)
|
||||
except Exception as e:
|
||||
yield ProcessResult(item=item, result=None, error=e)
|
||||
finally:
|
||||
progress.advance(task_id)
|
||||
@@ -1,15 +1,20 @@
|
||||
import logging
|
||||
import multiprocessing
|
||||
|
||||
import tqdm
|
||||
from django import db
|
||||
from django.conf import settings
|
||||
from django.core.management.base import BaseCommand
|
||||
|
||||
from documents.management.commands.base import PaperlessCommand
|
||||
from documents.management.commands.mixins import MultiProcessMixin
|
||||
from documents.management.commands.mixins import ProgressBarMixin
|
||||
from documents.models import Document
|
||||
from documents.tasks import update_document_content_maybe_archive_file
|
||||
|
||||
logger = logging.getLogger("paperless.management.archiver")
|
||||
|
||||
|
||||
class Command(PaperlessCommand):
|
||||
class Command(MultiProcessMixin, ProgressBarMixin, BaseCommand):
|
||||
help = (
|
||||
"Using the current classification model, assigns correspondents, tags "
|
||||
"and document types to all documents, effectively allowing you to "
|
||||
@@ -17,10 +22,7 @@ class Command(PaperlessCommand):
|
||||
"modified) after their initial import."
|
||||
)
|
||||
|
||||
supports_multiprocessing = True
|
||||
|
||||
def add_arguments(self, parser):
|
||||
super().add_arguments(parser)
|
||||
parser.add_argument(
|
||||
"-f",
|
||||
"--overwrite",
|
||||
@@ -42,8 +44,13 @@ class Command(PaperlessCommand):
|
||||
"run on this specific document."
|
||||
),
|
||||
)
|
||||
self.add_argument_progress_bar_mixin(parser)
|
||||
self.add_argument_processes_mixin(parser)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
self.handle_processes_mixin(**options)
|
||||
self.handle_progress_bar_mixin(**options)
|
||||
|
||||
settings.SCRATCH_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
overwrite = options["overwrite"]
|
||||
@@ -53,21 +60,35 @@ class Command(PaperlessCommand):
|
||||
else:
|
||||
documents = Document.objects.all()
|
||||
|
||||
document_ids = [
|
||||
doc.id for doc in documents if overwrite or not doc.has_archive_version
|
||||
]
|
||||
document_ids = list(
|
||||
map(
|
||||
lambda doc: doc.id,
|
||||
filter(lambda d: overwrite or not d.has_archive_version, documents),
|
||||
),
|
||||
)
|
||||
|
||||
# Note to future self: this prevents django from reusing database
|
||||
# connections between processes, which is bad and does not work
|
||||
# with postgres.
|
||||
db.connections.close_all()
|
||||
|
||||
try:
|
||||
logging.getLogger().handlers[0].level = logging.ERROR
|
||||
|
||||
for result in self.process_parallel(
|
||||
update_document_content_maybe_archive_file,
|
||||
document_ids,
|
||||
description="Archiving...",
|
||||
):
|
||||
if result.error:
|
||||
self.console.print(
|
||||
f"[red]Failed document {result.item}: {result.error}[/red]",
|
||||
if self.process_count == 1:
|
||||
for doc_id in document_ids:
|
||||
update_document_content_maybe_archive_file(doc_id)
|
||||
else: # pragma: no cover
|
||||
with multiprocessing.Pool(self.process_count) as pool:
|
||||
list(
|
||||
tqdm.tqdm(
|
||||
pool.imap_unordered(
|
||||
update_document_content_maybe_archive_file,
|
||||
document_ids,
|
||||
),
|
||||
total=len(document_ids),
|
||||
disable=self.no_progress_bar,
|
||||
),
|
||||
)
|
||||
except KeyboardInterrupt: # pragma: no cover
|
||||
self.console.print("[yellow]Aborting...[/yellow]")
|
||||
except KeyboardInterrupt:
|
||||
self.stdout.write(self.style.NOTICE("Aborting..."))
|
||||
|
||||
@@ -1,20 +1,24 @@
|
||||
import dataclasses
|
||||
import multiprocessing
|
||||
from typing import Final
|
||||
|
||||
import rapidfuzz
|
||||
import tqdm
|
||||
from django.core.management import BaseCommand
|
||||
from django.core.management import CommandError
|
||||
|
||||
from documents.management.commands.base import PaperlessCommand
|
||||
from documents.management.commands.mixins import MultiProcessMixin
|
||||
from documents.management.commands.mixins import ProgressBarMixin
|
||||
from documents.models import Document
|
||||
|
||||
|
||||
@dataclasses.dataclass(frozen=True, slots=True)
|
||||
@dataclasses.dataclass(frozen=True)
|
||||
class _WorkPackage:
|
||||
first_doc: Document
|
||||
second_doc: Document
|
||||
|
||||
|
||||
@dataclasses.dataclass(frozen=True, slots=True)
|
||||
@dataclasses.dataclass(frozen=True)
|
||||
class _WorkResult:
|
||||
doc_one_pk: int
|
||||
doc_two_pk: int
|
||||
@@ -27,23 +31,22 @@ class _WorkResult:
|
||||
def _process_and_match(work: _WorkPackage) -> _WorkResult:
|
||||
"""
|
||||
Does basic processing of document content, gets the basic ratio
|
||||
and returns the result package.
|
||||
and returns the result package
|
||||
"""
|
||||
# Normalize the string some, lower case, whitespace, etc
|
||||
first_string = rapidfuzz.utils.default_process(work.first_doc.content)
|
||||
second_string = rapidfuzz.utils.default_process(work.second_doc.content)
|
||||
|
||||
# Basic matching ratio
|
||||
match = rapidfuzz.fuzz.ratio(first_string, second_string)
|
||||
|
||||
return _WorkResult(work.first_doc.pk, work.second_doc.pk, match)
|
||||
|
||||
|
||||
class Command(PaperlessCommand):
|
||||
class Command(MultiProcessMixin, ProgressBarMixin, BaseCommand):
|
||||
help = "Searches for documents where the content almost matches"
|
||||
|
||||
supports_multiprocessing = True
|
||||
|
||||
def add_arguments(self, parser):
|
||||
super().add_arguments(parser)
|
||||
parser.add_argument(
|
||||
"--ratio",
|
||||
default=85.0,
|
||||
@@ -56,11 +59,16 @@ class Command(PaperlessCommand):
|
||||
action="store_true",
|
||||
help="If set, one document of matches above the ratio WILL BE DELETED",
|
||||
)
|
||||
self.add_argument_progress_bar_mixin(parser)
|
||||
self.add_argument_processes_mixin(parser)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
RATIO_MIN: Final[float] = 0.0
|
||||
RATIO_MAX: Final[float] = 100.0
|
||||
|
||||
self.handle_processes_mixin(**options)
|
||||
self.handle_progress_bar_mixin(**options)
|
||||
|
||||
if options["delete"]:
|
||||
self.stdout.write(
|
||||
self.style.WARNING(
|
||||
@@ -72,58 +80,66 @@ class Command(PaperlessCommand):
|
||||
checked_pairs: set[tuple[int, int]] = set()
|
||||
work_pkgs: list[_WorkPackage] = []
|
||||
|
||||
# Ratio is a float from 0.0 to 100.0
|
||||
if opt_ratio < RATIO_MIN or opt_ratio > RATIO_MAX:
|
||||
raise CommandError("The ratio must be between 0 and 100")
|
||||
|
||||
all_docs = Document.objects.all().order_by("id")
|
||||
|
||||
# Build work packages for processing
|
||||
for first_doc in all_docs:
|
||||
for second_doc in all_docs:
|
||||
# doc to doc is obviously not useful
|
||||
if first_doc.pk == second_doc.pk:
|
||||
continue
|
||||
# Skip empty documents (e.g. password-protected)
|
||||
if first_doc.content.strip() == "" or second_doc.content.strip() == "":
|
||||
continue
|
||||
# Skip matching which have already been matched together
|
||||
# doc 1 to doc 2 is the same as doc 2 to doc 1
|
||||
doc_1_to_doc_2 = (first_doc.pk, second_doc.pk)
|
||||
doc_2_to_doc_1 = doc_1_to_doc_2[::-1]
|
||||
if doc_1_to_doc_2 in checked_pairs or doc_2_to_doc_1 in checked_pairs:
|
||||
continue
|
||||
checked_pairs.update([doc_1_to_doc_2, doc_2_to_doc_1])
|
||||
# Actually something useful to work on now
|
||||
work_pkgs.append(_WorkPackage(first_doc, second_doc))
|
||||
|
||||
results: list[_WorkResult] = []
|
||||
# Don't spin up a pool of 1 process
|
||||
if self.process_count == 1:
|
||||
for work in self.track(work_pkgs, description="Matching..."):
|
||||
results = []
|
||||
for work in tqdm.tqdm(work_pkgs, disable=self.no_progress_bar):
|
||||
results.append(_process_and_match(work))
|
||||
else: # pragma: no cover
|
||||
for proc_result in self.process_parallel(
|
||||
_process_and_match,
|
||||
work_pkgs,
|
||||
description="Matching...",
|
||||
):
|
||||
if proc_result.error:
|
||||
self.console.print(
|
||||
f"[red]Failed: {proc_result.error}[/red]",
|
||||
)
|
||||
elif proc_result.result is not None:
|
||||
results.append(proc_result.result)
|
||||
|
||||
messages: list[str] = []
|
||||
maybe_delete_ids: list[int] = []
|
||||
for match_result in sorted(results):
|
||||
if match_result.ratio >= opt_ratio:
|
||||
messages.append(
|
||||
self.style.NOTICE(
|
||||
f"Document {match_result.doc_one_pk} fuzzy match"
|
||||
f" to {match_result.doc_two_pk}"
|
||||
f" (confidence {match_result.ratio:.3f})\n",
|
||||
with multiprocessing.Pool(processes=self.process_count) as pool:
|
||||
results = list(
|
||||
tqdm.tqdm(
|
||||
pool.imap_unordered(_process_and_match, work_pkgs),
|
||||
total=len(work_pkgs),
|
||||
disable=self.no_progress_bar,
|
||||
),
|
||||
)
|
||||
maybe_delete_ids.append(match_result.doc_two_pk)
|
||||
|
||||
# Check results
|
||||
messages = []
|
||||
maybe_delete_ids = []
|
||||
for result in sorted(results):
|
||||
if result.ratio >= opt_ratio:
|
||||
messages.append(
|
||||
self.style.NOTICE(
|
||||
f"Document {result.doc_one_pk} fuzzy match"
|
||||
f" to {result.doc_two_pk} (confidence {result.ratio:.3f})\n",
|
||||
),
|
||||
)
|
||||
maybe_delete_ids.append(result.doc_two_pk)
|
||||
|
||||
if len(messages) == 0:
|
||||
messages.append(self.style.SUCCESS("No matches found\n"))
|
||||
self.stdout.writelines(messages)
|
||||
|
||||
messages.append(
|
||||
self.style.SUCCESS("No matches found\n"),
|
||||
)
|
||||
self.stdout.writelines(
|
||||
messages,
|
||||
)
|
||||
if options["delete"]:
|
||||
self.stdout.write(
|
||||
self.style.NOTICE(
|
||||
|
||||
@@ -1,12 +1,25 @@
|
||||
import logging
|
||||
|
||||
import tqdm
|
||||
from django.core.management.base import BaseCommand
|
||||
from django.db.models.signals import post_save
|
||||
|
||||
from documents.management.commands.base import PaperlessCommand
|
||||
from documents.management.commands.mixins import ProgressBarMixin
|
||||
from documents.models import Document
|
||||
|
||||
|
||||
class Command(PaperlessCommand):
|
||||
help = "Rename all documents"
|
||||
class Command(ProgressBarMixin, BaseCommand):
|
||||
help = "This will rename all documents to match the latest filename format."
|
||||
|
||||
def add_arguments(self, parser):
|
||||
self.add_argument_progress_bar_mixin(parser)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
for document in self.track(Document.objects.all(), description="Renaming..."):
|
||||
self.handle_progress_bar_mixin(**options)
|
||||
logging.getLogger().handlers[0].level = logging.ERROR
|
||||
|
||||
for document in tqdm.tqdm(
|
||||
Document.objects.all(),
|
||||
disable=self.no_progress_bar,
|
||||
):
|
||||
post_save.send(Document, instance=document, created=False)
|
||||
|
||||
@@ -1,7 +1,10 @@
|
||||
import logging
|
||||
|
||||
import tqdm
|
||||
from django.core.management.base import BaseCommand
|
||||
|
||||
from documents.classifier import load_classifier
|
||||
from documents.management.commands.base import PaperlessCommand
|
||||
from documents.management.commands.mixins import ProgressBarMixin
|
||||
from documents.models import Document
|
||||
from documents.signals.handlers import set_correspondent
|
||||
from documents.signals.handlers import set_document_type
|
||||
@@ -11,7 +14,7 @@ from documents.signals.handlers import set_tags
|
||||
logger = logging.getLogger("paperless.management.retagger")
|
||||
|
||||
|
||||
class Command(PaperlessCommand):
|
||||
class Command(ProgressBarMixin, BaseCommand):
|
||||
help = (
|
||||
"Using the current classification model, assigns correspondents, tags "
|
||||
"and document types to all documents, effectively allowing you to "
|
||||
@@ -20,7 +23,6 @@ class Command(PaperlessCommand):
|
||||
)
|
||||
|
||||
def add_arguments(self, parser):
|
||||
super().add_arguments(parser)
|
||||
parser.add_argument("-c", "--correspondent", default=False, action="store_true")
|
||||
parser.add_argument("-T", "--tags", default=False, action="store_true")
|
||||
parser.add_argument("-t", "--document_type", default=False, action="store_true")
|
||||
@@ -32,7 +34,7 @@ class Command(PaperlessCommand):
|
||||
action="store_true",
|
||||
help=(
|
||||
"By default this command won't try to assign a correspondent "
|
||||
"if more than one matches the document. Use this flag if "
|
||||
"if more than one matches the document. Use this flag if "
|
||||
"you'd rather it just pick the first one it finds."
|
||||
),
|
||||
)
|
||||
@@ -47,6 +49,7 @@ class Command(PaperlessCommand):
|
||||
"and tags that do not match anymore due to changed rules."
|
||||
),
|
||||
)
|
||||
self.add_argument_progress_bar_mixin(parser)
|
||||
parser.add_argument(
|
||||
"--suggest",
|
||||
default=False,
|
||||
@@ -65,6 +68,8 @@ class Command(PaperlessCommand):
|
||||
)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
self.handle_progress_bar_mixin(**options)
|
||||
|
||||
if options["inbox_only"]:
|
||||
queryset = Document.objects.filter(tags__is_inbox_tag=True)
|
||||
else:
|
||||
@@ -79,7 +84,7 @@ class Command(PaperlessCommand):
|
||||
|
||||
classifier = load_classifier()
|
||||
|
||||
for document in self.track(documents, description="Retagging..."):
|
||||
for document in tqdm.tqdm(documents, disable=self.no_progress_bar):
|
||||
if options["correspondent"]:
|
||||
set_correspondent(
|
||||
sender=None,
|
||||
@@ -117,7 +122,6 @@ class Command(PaperlessCommand):
|
||||
stdout=self.stdout,
|
||||
style_func=self.style,
|
||||
)
|
||||
|
||||
if options["storage_path"]:
|
||||
set_storage_path(
|
||||
sender=None,
|
||||
|
||||
@@ -1,45 +1,43 @@
|
||||
import logging
|
||||
import multiprocessing
|
||||
import shutil
|
||||
|
||||
from documents.management.commands.base import PaperlessCommand
|
||||
import tqdm
|
||||
from django import db
|
||||
from django.core.management.base import BaseCommand
|
||||
|
||||
from documents.management.commands.mixins import MultiProcessMixin
|
||||
from documents.management.commands.mixins import ProgressBarMixin
|
||||
from documents.models import Document
|
||||
from documents.parsers import get_parser_class_for_mime_type
|
||||
|
||||
logger = logging.getLogger("paperless.management.thumbnails")
|
||||
|
||||
|
||||
def _process_document(doc_id: int) -> None:
|
||||
def _process_document(doc_id) -> None:
|
||||
document: Document = Document.objects.get(id=doc_id)
|
||||
parser_class = get_parser_class_for_mime_type(document.mime_type)
|
||||
|
||||
if parser_class is None:
|
||||
logger.warning(
|
||||
"%s: No parser for mime type %s",
|
||||
document,
|
||||
document.mime_type,
|
||||
)
|
||||
if parser_class:
|
||||
parser = parser_class(logging_group=None)
|
||||
else:
|
||||
print(f"{document} No parser for mime type {document.mime_type}") # noqa: T201
|
||||
return
|
||||
|
||||
parser = parser_class(logging_group=None)
|
||||
|
||||
try:
|
||||
thumb = parser.get_thumbnail(
|
||||
document.source_path,
|
||||
document.mime_type,
|
||||
document.get_public_filename(),
|
||||
)
|
||||
|
||||
shutil.move(thumb, document.thumbnail_path)
|
||||
finally:
|
||||
parser.cleanup()
|
||||
|
||||
|
||||
class Command(PaperlessCommand):
|
||||
class Command(MultiProcessMixin, ProgressBarMixin, BaseCommand):
|
||||
help = "This will regenerate the thumbnails for all documents."
|
||||
|
||||
supports_multiprocessing = True
|
||||
|
||||
def add_arguments(self, parser) -> None:
|
||||
super().add_arguments(parser)
|
||||
parser.add_argument(
|
||||
"-d",
|
||||
"--document",
|
||||
@@ -51,23 +49,36 @@ class Command(PaperlessCommand):
|
||||
"run on this specific document."
|
||||
),
|
||||
)
|
||||
self.add_argument_progress_bar_mixin(parser)
|
||||
self.add_argument_processes_mixin(parser)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
logging.getLogger().handlers[0].level = logging.ERROR
|
||||
|
||||
self.handle_processes_mixin(**options)
|
||||
self.handle_progress_bar_mixin(**options)
|
||||
|
||||
if options["document"]:
|
||||
documents = Document.objects.filter(pk=options["document"])
|
||||
else:
|
||||
documents = Document.objects.all()
|
||||
|
||||
ids = list(documents.values_list("id", flat=True))
|
||||
ids = [doc.id for doc in documents]
|
||||
|
||||
for result in self.process_parallel(
|
||||
_process_document,
|
||||
ids,
|
||||
description="Regenerating thumbnails...",
|
||||
):
|
||||
if result.error: # pragma: no cover
|
||||
self.console.print(
|
||||
f"[red]Failed document {result.item}: {result.error}[/red]",
|
||||
# Note to future self: this prevents django from reusing database
|
||||
# connections between processes, which is bad and does not work
|
||||
# with postgres.
|
||||
db.connections.close_all()
|
||||
|
||||
if self.process_count == 1:
|
||||
for doc_id in ids:
|
||||
_process_document(doc_id)
|
||||
else: # pragma: no cover
|
||||
with multiprocessing.Pool(processes=self.process_count) as pool:
|
||||
list(
|
||||
tqdm.tqdm(
|
||||
pool.imap_unordered(_process_document, ids),
|
||||
total=len(ids),
|
||||
disable=self.no_progress_bar,
|
||||
),
|
||||
)
|
||||
|
||||
@@ -21,6 +21,26 @@ class CryptFields(TypedDict):
|
||||
fields: list[str]
|
||||
|
||||
|
||||
class MultiProcessMixin:
|
||||
"""
|
||||
Small class to handle adding an argument and validating it
|
||||
for the use of multiple processes
|
||||
"""
|
||||
|
||||
def add_argument_processes_mixin(self, parser: ArgumentParser) -> None:
|
||||
parser.add_argument(
|
||||
"--processes",
|
||||
default=max(1, os.cpu_count() // 4),
|
||||
type=int,
|
||||
help="Number of processes to distribute work amongst",
|
||||
)
|
||||
|
||||
def handle_processes_mixin(self, *args, **options) -> None:
|
||||
self.process_count = options["processes"]
|
||||
if self.process_count < 1:
|
||||
raise CommandError("There must be at least 1 process")
|
||||
|
||||
|
||||
class ProgressBarMixin:
|
||||
"""
|
||||
Many commands use a progress bar, which can be disabled
|
||||
|
||||
@@ -1,21 +1,27 @@
|
||||
from auditlog.models import LogEntry
|
||||
from django.core.management.base import BaseCommand
|
||||
from django.db import transaction
|
||||
from tqdm import tqdm
|
||||
|
||||
from documents.management.commands.base import PaperlessCommand
|
||||
from documents.management.commands.mixins import ProgressBarMixin
|
||||
|
||||
|
||||
class Command(PaperlessCommand):
|
||||
"""Prune the audit logs of objects that no longer exist."""
|
||||
class Command(BaseCommand, ProgressBarMixin):
|
||||
"""
|
||||
Prune the audit logs of objects that no longer exist.
|
||||
"""
|
||||
|
||||
help = "Prunes the audit logs of objects that no longer exist."
|
||||
|
||||
def handle(self, *args, **options):
|
||||
def add_arguments(self, parser):
|
||||
self.add_argument_progress_bar_mixin(parser)
|
||||
|
||||
def handle(self, **options):
|
||||
self.handle_progress_bar_mixin(**options)
|
||||
with transaction.atomic():
|
||||
for log_entry in self.track(
|
||||
LogEntry.objects.all(),
|
||||
description="Pruning audit logs...",
|
||||
):
|
||||
for log_entry in tqdm(LogEntry.objects.all(), disable=self.no_progress_bar):
|
||||
model_class = log_entry.content_type.model_class()
|
||||
# use global_objects for SoftDeleteModel
|
||||
objects = (
|
||||
model_class.global_objects
|
||||
if hasattr(model_class, "global_objects")
|
||||
@@ -26,8 +32,8 @@ class Command(PaperlessCommand):
|
||||
and not objects.filter(pk=log_entry.object_id).exists()
|
||||
):
|
||||
log_entry.delete()
|
||||
self.console.print(
|
||||
f"Deleted audit log entry for "
|
||||
f"{model_class.__name__} #{log_entry.object_id}",
|
||||
style="yellow",
|
||||
tqdm.write(
|
||||
self.style.NOTICE(
|
||||
f"Deleted audit log entry for {model_class.__name__} #{log_entry.object_id}",
|
||||
),
|
||||
)
|
||||
|
||||
@@ -9,7 +9,7 @@ from types import TracebackType
|
||||
try:
|
||||
from typing import Self
|
||||
except ImportError:
|
||||
from typing_extensions import Self
|
||||
from typing import Self
|
||||
|
||||
import dateparser
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@ if TYPE_CHECKING:
|
||||
from channels_redis.pubsub import RedisPubSubChannelLayer
|
||||
|
||||
|
||||
class ProgressStatusOptions(str, enum.Enum):
|
||||
class ProgressStatusOptions(enum.StrEnum):
|
||||
STARTED = "STARTED"
|
||||
WORKING = "WORKING"
|
||||
SUCCESS = "SUCCESS"
|
||||
|
||||
@@ -24,7 +24,7 @@ def base_config() -> DateParserConfig:
|
||||
12,
|
||||
0,
|
||||
0,
|
||||
tzinfo=datetime.timezone.utc,
|
||||
tzinfo=datetime.UTC,
|
||||
),
|
||||
filename_date_order="YMD",
|
||||
content_date_order="DMY",
|
||||
@@ -45,7 +45,7 @@ def config_with_ignore_dates() -> DateParserConfig:
|
||||
12,
|
||||
0,
|
||||
0,
|
||||
tzinfo=datetime.timezone.utc,
|
||||
tzinfo=datetime.UTC,
|
||||
),
|
||||
filename_date_order="DMY",
|
||||
content_date_order="MDY",
|
||||
|
||||
@@ -101,50 +101,50 @@ class TestFilterDate:
|
||||
[
|
||||
# Valid Dates
|
||||
pytest.param(
|
||||
datetime.datetime(2024, 1, 10, tzinfo=datetime.timezone.utc),
|
||||
datetime.datetime(2024, 1, 10, tzinfo=datetime.timezone.utc),
|
||||
datetime.datetime(2024, 1, 10, tzinfo=datetime.UTC),
|
||||
datetime.datetime(2024, 1, 10, tzinfo=datetime.UTC),
|
||||
id="valid_past_date",
|
||||
),
|
||||
pytest.param(
|
||||
datetime.datetime(2024, 1, 15, 12, 0, 0, tzinfo=datetime.timezone.utc),
|
||||
datetime.datetime(2024, 1, 15, 12, 0, 0, tzinfo=datetime.timezone.utc),
|
||||
datetime.datetime(2024, 1, 15, 12, 0, 0, tzinfo=datetime.UTC),
|
||||
datetime.datetime(2024, 1, 15, 12, 0, 0, tzinfo=datetime.UTC),
|
||||
id="exactly_at_reference",
|
||||
),
|
||||
pytest.param(
|
||||
datetime.datetime(1901, 1, 1, tzinfo=datetime.timezone.utc),
|
||||
datetime.datetime(1901, 1, 1, tzinfo=datetime.timezone.utc),
|
||||
datetime.datetime(1901, 1, 1, tzinfo=datetime.UTC),
|
||||
datetime.datetime(1901, 1, 1, tzinfo=datetime.UTC),
|
||||
id="year_1901_valid",
|
||||
),
|
||||
# Date is > reference_time
|
||||
pytest.param(
|
||||
datetime.datetime(2024, 1, 16, tzinfo=datetime.timezone.utc),
|
||||
datetime.datetime(2024, 1, 16, tzinfo=datetime.UTC),
|
||||
None,
|
||||
id="future_date_day_after",
|
||||
),
|
||||
# date.date() in ignore_dates
|
||||
pytest.param(
|
||||
datetime.datetime(2024, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc),
|
||||
datetime.datetime(2024, 1, 1, 0, 0, 0, tzinfo=datetime.UTC),
|
||||
None,
|
||||
id="ignored_date_midnight_jan1",
|
||||
),
|
||||
pytest.param(
|
||||
datetime.datetime(2024, 1, 1, 10, 30, 0, tzinfo=datetime.timezone.utc),
|
||||
datetime.datetime(2024, 1, 1, 10, 30, 0, tzinfo=datetime.UTC),
|
||||
None,
|
||||
id="ignored_date_midday_jan1",
|
||||
),
|
||||
pytest.param(
|
||||
datetime.datetime(2024, 12, 25, 15, 0, 0, tzinfo=datetime.timezone.utc),
|
||||
datetime.datetime(2024, 12, 25, 15, 0, 0, tzinfo=datetime.UTC),
|
||||
None,
|
||||
id="ignored_date_dec25_future",
|
||||
),
|
||||
# date.year <= 1900
|
||||
pytest.param(
|
||||
datetime.datetime(1899, 12, 31, tzinfo=datetime.timezone.utc),
|
||||
datetime.datetime(1899, 12, 31, tzinfo=datetime.UTC),
|
||||
None,
|
||||
id="year_1899",
|
||||
),
|
||||
pytest.param(
|
||||
datetime.datetime(1900, 1, 1, tzinfo=datetime.timezone.utc),
|
||||
datetime.datetime(1900, 1, 1, tzinfo=datetime.UTC),
|
||||
None,
|
||||
id="year_1900_boundary",
|
||||
),
|
||||
@@ -176,7 +176,7 @@ class TestFilterDate:
|
||||
1,
|
||||
12,
|
||||
0,
|
||||
tzinfo=datetime.timezone.utc,
|
||||
tzinfo=datetime.UTC,
|
||||
)
|
||||
another_ignored = datetime.datetime(
|
||||
2024,
|
||||
@@ -184,7 +184,7 @@ class TestFilterDate:
|
||||
25,
|
||||
15,
|
||||
30,
|
||||
tzinfo=datetime.timezone.utc,
|
||||
tzinfo=datetime.UTC,
|
||||
)
|
||||
allowed_date = datetime.datetime(
|
||||
2024,
|
||||
@@ -192,7 +192,7 @@ class TestFilterDate:
|
||||
2,
|
||||
12,
|
||||
0,
|
||||
tzinfo=datetime.timezone.utc,
|
||||
tzinfo=datetime.UTC,
|
||||
)
|
||||
|
||||
assert parser._filter_date(ignored_date) is None
|
||||
@@ -204,7 +204,7 @@ class TestFilterDate:
|
||||
regex_parser: RegexDateParserPlugin,
|
||||
) -> None:
|
||||
"""Should work with timezone-aware datetimes."""
|
||||
date_utc = datetime.datetime(2024, 1, 10, 12, 0, tzinfo=datetime.timezone.utc)
|
||||
date_utc = datetime.datetime(2024, 1, 10, 12, 0, tzinfo=datetime.UTC)
|
||||
|
||||
result = regex_parser._filter_date(date_utc)
|
||||
|
||||
@@ -221,8 +221,8 @@ class TestRegexDateParser:
|
||||
"report-2023-12-25.txt",
|
||||
"Event recorded on 25/12/2022.",
|
||||
[
|
||||
datetime.datetime(2023, 12, 25, tzinfo=datetime.timezone.utc),
|
||||
datetime.datetime(2022, 12, 25, tzinfo=datetime.timezone.utc),
|
||||
datetime.datetime(2023, 12, 25, tzinfo=datetime.UTC),
|
||||
datetime.datetime(2022, 12, 25, tzinfo=datetime.UTC),
|
||||
],
|
||||
id="filename-y-m-d_and_content-d-m-y",
|
||||
),
|
||||
@@ -230,8 +230,8 @@ class TestRegexDateParser:
|
||||
"img_2023.01.02.jpg",
|
||||
"Taken on 01/02/2023",
|
||||
[
|
||||
datetime.datetime(2023, 1, 2, tzinfo=datetime.timezone.utc),
|
||||
datetime.datetime(2023, 2, 1, tzinfo=datetime.timezone.utc),
|
||||
datetime.datetime(2023, 1, 2, tzinfo=datetime.UTC),
|
||||
datetime.datetime(2023, 2, 1, tzinfo=datetime.UTC),
|
||||
],
|
||||
id="ambiguous-dates-respect-orders",
|
||||
),
|
||||
@@ -239,7 +239,7 @@ class TestRegexDateParser:
|
||||
"notes.txt",
|
||||
"bad date 99/99/9999 and 25/12/2022",
|
||||
[
|
||||
datetime.datetime(2022, 12, 25, tzinfo=datetime.timezone.utc),
|
||||
datetime.datetime(2022, 12, 25, tzinfo=datetime.UTC),
|
||||
],
|
||||
id="parse-exception-skips-bad-and-yields-good",
|
||||
),
|
||||
@@ -275,24 +275,24 @@ class TestRegexDateParser:
|
||||
or "2023.12.25" in date_string
|
||||
or "2023-12-25" in date_string
|
||||
):
|
||||
return datetime.datetime(2023, 12, 25, tzinfo=datetime.timezone.utc)
|
||||
return datetime.datetime(2023, 12, 25, tzinfo=datetime.UTC)
|
||||
|
||||
# content DMY 25/12/2022
|
||||
if "25/12/2022" in date_string or "25-12-2022" in date_string:
|
||||
return datetime.datetime(2022, 12, 25, tzinfo=datetime.timezone.utc)
|
||||
return datetime.datetime(2022, 12, 25, tzinfo=datetime.UTC)
|
||||
|
||||
# filename YMD 2023.01.02
|
||||
if "2023.01.02" in date_string or "2023-01-02" in date_string:
|
||||
return datetime.datetime(2023, 1, 2, tzinfo=datetime.timezone.utc)
|
||||
return datetime.datetime(2023, 1, 2, tzinfo=datetime.UTC)
|
||||
|
||||
# ambiguous 01/02/2023 -> respect DATE_ORDER setting
|
||||
if "01/02/2023" in date_string:
|
||||
if date_order == "DMY":
|
||||
return datetime.datetime(2023, 2, 1, tzinfo=datetime.timezone.utc)
|
||||
return datetime.datetime(2023, 2, 1, tzinfo=datetime.UTC)
|
||||
if date_order == "YMD":
|
||||
return datetime.datetime(2023, 1, 2, tzinfo=datetime.timezone.utc)
|
||||
return datetime.datetime(2023, 1, 2, tzinfo=datetime.UTC)
|
||||
# fallback
|
||||
return datetime.datetime(2023, 2, 1, tzinfo=datetime.timezone.utc)
|
||||
return datetime.datetime(2023, 2, 1, tzinfo=datetime.UTC)
|
||||
|
||||
# simulate parse failure for malformed input
|
||||
if "99/99/9999" in date_string or "bad date" in date_string:
|
||||
@@ -328,7 +328,7 @@ class TestRegexDateParser:
|
||||
12,
|
||||
0,
|
||||
0,
|
||||
tzinfo=datetime.timezone.utc,
|
||||
tzinfo=datetime.UTC,
|
||||
),
|
||||
filename_date_order="YMD",
|
||||
content_date_order="DMY",
|
||||
@@ -344,13 +344,13 @@ class TestRegexDateParser:
|
||||
) -> datetime.datetime | None:
|
||||
if "10/12/2023" in date_string or "10-12-2023" in date_string:
|
||||
# ignored date
|
||||
return datetime.datetime(2023, 12, 10, tzinfo=datetime.timezone.utc)
|
||||
return datetime.datetime(2023, 12, 10, tzinfo=datetime.UTC)
|
||||
if "01/02/2024" in date_string or "01-02-2024" in date_string:
|
||||
# future relative to reference_time -> filtered
|
||||
return datetime.datetime(2024, 2, 1, tzinfo=datetime.timezone.utc)
|
||||
return datetime.datetime(2024, 2, 1, tzinfo=datetime.UTC)
|
||||
if "05/01/2023" in date_string or "05-01-2023" in date_string:
|
||||
# valid
|
||||
return datetime.datetime(2023, 1, 5, tzinfo=datetime.timezone.utc)
|
||||
return datetime.datetime(2023, 1, 5, tzinfo=datetime.UTC)
|
||||
return None
|
||||
|
||||
mocker.patch(target, side_effect=fake_parse)
|
||||
@@ -358,7 +358,7 @@ class TestRegexDateParser:
|
||||
content = "Ignored: 10/12/2023, Future: 01/02/2024, Keep: 05/01/2023"
|
||||
results = list(parser.parse("whatever.txt", content))
|
||||
|
||||
assert results == [datetime.datetime(2023, 1, 5, tzinfo=datetime.timezone.utc)]
|
||||
assert results == [datetime.datetime(2023, 1, 5, tzinfo=datetime.UTC)]
|
||||
|
||||
def test_parse_handles_no_matches_and_returns_empty_list(
|
||||
self,
|
||||
@@ -392,7 +392,7 @@ class TestRegexDateParser:
|
||||
12,
|
||||
0,
|
||||
0,
|
||||
tzinfo=datetime.timezone.utc,
|
||||
tzinfo=datetime.UTC,
|
||||
),
|
||||
filename_date_order=None,
|
||||
content_date_order="DMY",
|
||||
@@ -409,9 +409,9 @@ class TestRegexDateParser:
|
||||
) -> datetime.datetime | None:
|
||||
# return distinct datetimes so we can tell which source was parsed
|
||||
if "25/12/2022" in date_string:
|
||||
return datetime.datetime(2022, 12, 25, tzinfo=datetime.timezone.utc)
|
||||
return datetime.datetime(2022, 12, 25, tzinfo=datetime.UTC)
|
||||
if "2023-12-25" in date_string:
|
||||
return datetime.datetime(2023, 12, 25, tzinfo=datetime.timezone.utc)
|
||||
return datetime.datetime(2023, 12, 25, tzinfo=datetime.UTC)
|
||||
return None
|
||||
|
||||
mock = mocker.patch(target, side_effect=fake_parse)
|
||||
@@ -429,5 +429,5 @@ class TestRegexDateParser:
|
||||
assert "25/12/2022" in called_date_string
|
||||
# And the parser should have yielded the corresponding datetime
|
||||
assert results == [
|
||||
datetime.datetime(2022, 12, 25, tzinfo=datetime.timezone.utc),
|
||||
datetime.datetime(2022, 12, 25, tzinfo=datetime.UTC),
|
||||
]
|
||||
|
||||
@@ -1,518 +0,0 @@
|
||||
"""Tests for PaperlessCommand base class."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import io
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import pytest
|
||||
from django.core.management import CommandError
|
||||
from django.db.models import QuerySet
|
||||
from rich.console import Console
|
||||
|
||||
from documents.management.commands.base import PaperlessCommand
|
||||
from documents.management.commands.base import ProcessResult
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pytest_mock import MockerFixture
|
||||
|
||||
|
||||
# --- Test Commands ---
|
||||
# These simulate real command implementations for testing
|
||||
|
||||
|
||||
class SimpleCommand(PaperlessCommand):
|
||||
"""Command with default settings (progress bar, no multiprocessing)."""
|
||||
|
||||
help = "Simple test command"
|
||||
|
||||
def handle(self, *args, **options):
|
||||
items = list(range(5))
|
||||
results = []
|
||||
for item in self.track(items, description="Processing..."):
|
||||
results.append(item * 2)
|
||||
self.stdout.write(f"Results: {results}")
|
||||
|
||||
|
||||
class NoProgressBarCommand(PaperlessCommand):
|
||||
"""Command with progress bar disabled."""
|
||||
|
||||
help = "No progress bar command"
|
||||
supports_progress_bar = False
|
||||
|
||||
def handle(self, *args, **options):
|
||||
items = list(range(3))
|
||||
for _ in self.track(items):
|
||||
# We don't need to actually work
|
||||
pass
|
||||
self.stdout.write("Done")
|
||||
|
||||
|
||||
class MultiprocessCommand(PaperlessCommand):
|
||||
"""Command with multiprocessing support."""
|
||||
|
||||
help = "Multiprocess test command"
|
||||
supports_multiprocessing = True
|
||||
|
||||
def handle(self, *args, **options):
|
||||
items = list(range(5))
|
||||
results = []
|
||||
for result in self.process_parallel(
|
||||
_double_value,
|
||||
items,
|
||||
description="Processing...",
|
||||
):
|
||||
results.append(result)
|
||||
successes = sum(1 for r in results if r.success)
|
||||
self.stdout.write(f"Successes: {successes}")
|
||||
|
||||
|
||||
# --- Helper Functions for Multiprocessing ---
|
||||
# Must be at module level to be picklable
|
||||
|
||||
|
||||
def _double_value(x: int) -> int:
|
||||
"""Double the input value."""
|
||||
return x * 2
|
||||
|
||||
|
||||
def _divide_ten_by(x: int) -> float:
|
||||
"""Divide 10 by x. Raises ZeroDivisionError if x is 0."""
|
||||
return 10 / x
|
||||
|
||||
|
||||
# --- Fixtures ---
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def console() -> Console:
|
||||
"""Create a non-interactive console for testing."""
|
||||
return Console(force_terminal=False, force_interactive=False)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def simple_command(console: Console) -> SimpleCommand:
|
||||
"""Create a SimpleCommand instance configured for testing."""
|
||||
command = SimpleCommand()
|
||||
command.stdout = io.StringIO()
|
||||
command.stderr = io.StringIO()
|
||||
command.console = console
|
||||
command.no_progress_bar = True
|
||||
command.process_count = 1
|
||||
return command
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def multiprocess_command(console: Console) -> MultiprocessCommand:
|
||||
"""Create a MultiprocessCommand instance configured for testing."""
|
||||
command = MultiprocessCommand()
|
||||
command.stdout = io.StringIO()
|
||||
command.stderr = io.StringIO()
|
||||
command.console = console
|
||||
command.no_progress_bar = True
|
||||
command.process_count = 1
|
||||
return command
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_queryset():
|
||||
"""
|
||||
Create a mock Django QuerySet that tracks method calls.
|
||||
|
||||
This verifies we use .count() instead of len() for querysets.
|
||||
"""
|
||||
|
||||
class MockQuerySet(QuerySet):
|
||||
def __init__(self, items: list):
|
||||
self._items = items
|
||||
self.count_called = False
|
||||
|
||||
def count(self) -> int:
|
||||
self.count_called = True
|
||||
return len(self._items)
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self._items)
|
||||
|
||||
def __len__(self):
|
||||
raise AssertionError("len() should not be called on querysets")
|
||||
|
||||
return MockQuerySet
|
||||
|
||||
|
||||
# --- Test Classes ---
|
||||
|
||||
|
||||
@pytest.mark.management
|
||||
class TestProcessResult:
|
||||
"""Tests for the ProcessResult dataclass."""
|
||||
|
||||
def test_success_result(self):
|
||||
result = ProcessResult(item=1, result=2, error=None)
|
||||
|
||||
assert result.item == 1
|
||||
assert result.result == 2
|
||||
assert result.error is None
|
||||
assert result.success is True
|
||||
|
||||
def test_error_result(self):
|
||||
error = ValueError("test error")
|
||||
result = ProcessResult(item=1, result=None, error=error)
|
||||
|
||||
assert result.item == 1
|
||||
assert result.result is None
|
||||
assert result.error is error
|
||||
assert result.success is False
|
||||
|
||||
|
||||
@pytest.mark.management
|
||||
class TestPaperlessCommandArguments:
|
||||
"""Tests for argument parsing behavior."""
|
||||
|
||||
def test_progress_bar_argument_added_by_default(self):
|
||||
command = SimpleCommand()
|
||||
parser = command.create_parser("manage.py", "simple")
|
||||
|
||||
options = parser.parse_args(["--no-progress-bar"])
|
||||
assert options.no_progress_bar is True
|
||||
|
||||
options = parser.parse_args([])
|
||||
assert options.no_progress_bar is False
|
||||
|
||||
def test_progress_bar_argument_not_added_when_disabled(self):
|
||||
command = NoProgressBarCommand()
|
||||
parser = command.create_parser("manage.py", "noprogress")
|
||||
|
||||
options = parser.parse_args([])
|
||||
assert not hasattr(options, "no_progress_bar")
|
||||
|
||||
def test_processes_argument_added_when_multiprocessing_enabled(self):
|
||||
command = MultiprocessCommand()
|
||||
parser = command.create_parser("manage.py", "multiprocess")
|
||||
|
||||
options = parser.parse_args(["--processes", "4"])
|
||||
assert options.processes == 4
|
||||
|
||||
options = parser.parse_args([])
|
||||
assert options.processes >= 1
|
||||
|
||||
def test_processes_argument_not_added_when_multiprocessing_disabled(self):
|
||||
command = SimpleCommand()
|
||||
parser = command.create_parser("manage.py", "simple")
|
||||
|
||||
options = parser.parse_args([])
|
||||
assert not hasattr(options, "processes")
|
||||
|
||||
|
||||
@pytest.mark.management
|
||||
class TestPaperlessCommandExecute:
|
||||
"""Tests for the execute() setup behavior."""
|
||||
|
||||
@pytest.fixture
|
||||
def base_options(self) -> dict:
|
||||
"""Base options required for execute()."""
|
||||
return {
|
||||
"verbosity": 1,
|
||||
"no_color": True,
|
||||
"force_color": False,
|
||||
"skip_checks": True,
|
||||
}
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("no_progress_bar_flag", "expected"),
|
||||
[
|
||||
pytest.param(False, False, id="progress-bar-enabled"),
|
||||
pytest.param(True, True, id="progress-bar-disabled"),
|
||||
],
|
||||
)
|
||||
def test_no_progress_bar_state_set(
|
||||
self,
|
||||
base_options: dict,
|
||||
*,
|
||||
no_progress_bar_flag: bool,
|
||||
expected: bool,
|
||||
):
|
||||
command = SimpleCommand()
|
||||
command.stdout = io.StringIO()
|
||||
command.stderr = io.StringIO()
|
||||
|
||||
options = {**base_options, "no_progress_bar": no_progress_bar_flag}
|
||||
command.execute(**options)
|
||||
|
||||
assert command.no_progress_bar is expected
|
||||
|
||||
def test_no_progress_bar_always_true_when_not_supported(self, base_options: dict):
|
||||
command = NoProgressBarCommand()
|
||||
command.stdout = io.StringIO()
|
||||
command.stderr = io.StringIO()
|
||||
|
||||
command.execute(**base_options)
|
||||
|
||||
assert command.no_progress_bar is True
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("processes", "expected"),
|
||||
[
|
||||
pytest.param(1, 1, id="single-process"),
|
||||
pytest.param(4, 4, id="four-processes"),
|
||||
],
|
||||
)
|
||||
def test_process_count_set(
|
||||
self,
|
||||
base_options: dict,
|
||||
processes: int,
|
||||
expected: int,
|
||||
):
|
||||
command = MultiprocessCommand()
|
||||
command.stdout = io.StringIO()
|
||||
command.stderr = io.StringIO()
|
||||
|
||||
options = {**base_options, "processes": processes, "no_progress_bar": True}
|
||||
command.execute(**options)
|
||||
|
||||
assert command.process_count == expected
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"invalid_count",
|
||||
[
|
||||
pytest.param(0, id="zero"),
|
||||
pytest.param(-1, id="negative"),
|
||||
],
|
||||
)
|
||||
def test_process_count_validation_rejects_invalid(
|
||||
self,
|
||||
base_options: dict,
|
||||
invalid_count: int,
|
||||
):
|
||||
command = MultiprocessCommand()
|
||||
command.stdout = io.StringIO()
|
||||
command.stderr = io.StringIO()
|
||||
|
||||
options = {**base_options, "processes": invalid_count, "no_progress_bar": True}
|
||||
|
||||
with pytest.raises(CommandError, match="--processes must be at least 1"):
|
||||
command.execute(**options)
|
||||
|
||||
def test_process_count_defaults_to_one_when_not_supported(self, base_options: dict):
|
||||
command = SimpleCommand()
|
||||
command.stdout = io.StringIO()
|
||||
command.stderr = io.StringIO()
|
||||
|
||||
options = {**base_options, "no_progress_bar": True}
|
||||
command.execute(**options)
|
||||
|
||||
assert command.process_count == 1
|
||||
|
||||
|
||||
@pytest.mark.management
|
||||
class TestGetIterableLength:
|
||||
"""Tests for the _get_iterable_length() method."""
|
||||
|
||||
def test_uses_count_for_querysets(
|
||||
self,
|
||||
simple_command: SimpleCommand,
|
||||
mock_queryset,
|
||||
):
|
||||
"""Should call .count() on Django querysets rather than len()."""
|
||||
queryset = mock_queryset([1, 2, 3, 4, 5])
|
||||
|
||||
result = simple_command._get_iterable_length(queryset)
|
||||
|
||||
assert result == 5
|
||||
assert queryset.count_called is True
|
||||
|
||||
def test_uses_len_for_sized(self, simple_command: SimpleCommand):
|
||||
"""Should use len() for sequences and other Sized types."""
|
||||
result = simple_command._get_iterable_length([1, 2, 3, 4])
|
||||
|
||||
assert result == 4
|
||||
|
||||
def test_returns_none_for_unsized_iterables(self, simple_command: SimpleCommand):
|
||||
"""Should return None for generators and other iterables without len()."""
|
||||
result = simple_command._get_iterable_length(x for x in [1, 2, 3])
|
||||
|
||||
assert result is None
|
||||
|
||||
|
||||
@pytest.mark.management
|
||||
class TestTrack:
|
||||
"""Tests for the track() method."""
|
||||
|
||||
def test_with_progress_bar_disabled(self, simple_command: SimpleCommand):
|
||||
simple_command.no_progress_bar = True
|
||||
items = ["a", "b", "c"]
|
||||
|
||||
result = list(simple_command.track(items, description="Test..."))
|
||||
|
||||
assert result == items
|
||||
|
||||
def test_with_progress_bar_enabled(self, simple_command: SimpleCommand):
|
||||
simple_command.no_progress_bar = False
|
||||
items = [1, 2, 3]
|
||||
|
||||
result = list(simple_command.track(items, description="Processing..."))
|
||||
|
||||
assert result == items
|
||||
|
||||
def test_with_explicit_total(self, simple_command: SimpleCommand):
|
||||
simple_command.no_progress_bar = False
|
||||
|
||||
def gen():
|
||||
yield from [1, 2, 3]
|
||||
|
||||
result = list(simple_command.track(gen(), total=3))
|
||||
|
||||
assert result == [1, 2, 3]
|
||||
|
||||
def test_with_generator_no_total(self, simple_command: SimpleCommand):
|
||||
def gen():
|
||||
yield from [1, 2, 3]
|
||||
|
||||
result = list(simple_command.track(gen()))
|
||||
|
||||
assert result == [1, 2, 3]
|
||||
|
||||
def test_empty_iterable(self, simple_command: SimpleCommand):
|
||||
result = list(simple_command.track([]))
|
||||
|
||||
assert result == []
|
||||
|
||||
def test_uses_queryset_count(
|
||||
self,
|
||||
simple_command: SimpleCommand,
|
||||
mock_queryset,
|
||||
mocker: MockerFixture,
|
||||
):
|
||||
"""Verify track() uses .count() for querysets."""
|
||||
simple_command.no_progress_bar = False
|
||||
queryset = mock_queryset([1, 2, 3])
|
||||
|
||||
spy = mocker.spy(simple_command, "_get_iterable_length")
|
||||
|
||||
result = list(simple_command.track(queryset))
|
||||
|
||||
assert result == [1, 2, 3]
|
||||
spy.assert_called_once_with(queryset)
|
||||
assert queryset.count_called is True
|
||||
|
||||
|
||||
@pytest.mark.management
|
||||
class TestProcessParallel:
|
||||
"""Tests for the process_parallel() method."""
|
||||
|
||||
def test_sequential_processing_single_process(
|
||||
self,
|
||||
multiprocess_command: MultiprocessCommand,
|
||||
):
|
||||
multiprocess_command.process_count = 1
|
||||
items = [1, 2, 3, 4, 5]
|
||||
|
||||
results = list(multiprocess_command.process_parallel(_double_value, items))
|
||||
|
||||
assert len(results) == 5
|
||||
assert all(r.success for r in results)
|
||||
|
||||
result_map = {r.item: r.result for r in results}
|
||||
assert result_map == {1: 2, 2: 4, 3: 6, 4: 8, 5: 10}
|
||||
|
||||
def test_sequential_processing_handles_errors(
|
||||
self,
|
||||
multiprocess_command: MultiprocessCommand,
|
||||
):
|
||||
multiprocess_command.process_count = 1
|
||||
items = [1, 2, 0, 4] # 0 causes ZeroDivisionError
|
||||
|
||||
results = list(multiprocess_command.process_parallel(_divide_ten_by, items))
|
||||
|
||||
assert len(results) == 4
|
||||
|
||||
successes = [r for r in results if r.success]
|
||||
failures = [r for r in results if not r.success]
|
||||
|
||||
assert len(successes) == 3
|
||||
assert len(failures) == 1
|
||||
assert failures[0].item == 0
|
||||
assert isinstance(failures[0].error, ZeroDivisionError)
|
||||
|
||||
def test_parallel_closes_db_connections(
|
||||
self,
|
||||
multiprocess_command: MultiprocessCommand,
|
||||
mocker: MockerFixture,
|
||||
):
|
||||
multiprocess_command.process_count = 2
|
||||
items = [1, 2, 3]
|
||||
|
||||
mock_connections = mocker.patch(
|
||||
"documents.management.commands.base.db.connections",
|
||||
)
|
||||
|
||||
results = list(multiprocess_command.process_parallel(_double_value, items))
|
||||
|
||||
mock_connections.close_all.assert_called_once()
|
||||
assert len(results) == 3
|
||||
|
||||
def test_parallel_processing_handles_errors(
|
||||
self,
|
||||
multiprocess_command: MultiprocessCommand,
|
||||
mocker: MockerFixture,
|
||||
):
|
||||
multiprocess_command.process_count = 2
|
||||
items = [1, 2, 0, 4]
|
||||
|
||||
mocker.patch("documents.management.commands.base.db.connections")
|
||||
|
||||
results = list(multiprocess_command.process_parallel(_divide_ten_by, items))
|
||||
|
||||
failures = [r for r in results if not r.success]
|
||||
assert len(failures) == 1
|
||||
assert failures[0].item == 0
|
||||
|
||||
def test_empty_items(self, multiprocess_command: MultiprocessCommand):
|
||||
results = list(multiprocess_command.process_parallel(_double_value, []))
|
||||
|
||||
assert results == []
|
||||
|
||||
def test_result_contains_original_item(
|
||||
self,
|
||||
multiprocess_command: MultiprocessCommand,
|
||||
):
|
||||
items = [10, 20, 30]
|
||||
|
||||
results = list(multiprocess_command.process_parallel(_double_value, items))
|
||||
|
||||
for result in results:
|
||||
assert result.item in items
|
||||
assert result.result == result.item * 2
|
||||
|
||||
def test_sequential_path_used_for_single_process(
|
||||
self,
|
||||
multiprocess_command: MultiprocessCommand,
|
||||
mocker: MockerFixture,
|
||||
):
|
||||
"""Verify single process uses sequential path (important for testing)."""
|
||||
multiprocess_command.process_count = 1
|
||||
|
||||
spy_sequential = mocker.spy(multiprocess_command, "_process_sequential")
|
||||
spy_parallel = mocker.spy(multiprocess_command, "_process_parallel")
|
||||
|
||||
list(multiprocess_command.process_parallel(_double_value, [1, 2, 3]))
|
||||
|
||||
spy_sequential.assert_called_once()
|
||||
spy_parallel.assert_not_called()
|
||||
|
||||
def test_parallel_path_used_for_multiple_processes(
|
||||
self,
|
||||
multiprocess_command: MultiprocessCommand,
|
||||
mocker: MockerFixture,
|
||||
):
|
||||
"""Verify multiple processes uses parallel path."""
|
||||
multiprocess_command.process_count = 2
|
||||
|
||||
mocker.patch("documents.management.commands.base.db.connections")
|
||||
spy_sequential = mocker.spy(multiprocess_command, "_process_sequential")
|
||||
spy_parallel = mocker.spy(multiprocess_command, "_process_parallel")
|
||||
|
||||
list(multiprocess_command.process_parallel(_double_value, [1, 2, 3]))
|
||||
|
||||
spy_parallel.assert_called_once()
|
||||
spy_sequential.assert_not_called()
|
||||
@@ -336,7 +336,11 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
added=d1,
|
||||
)
|
||||
|
||||
self.assertEqual(generate_filename(doc1), Path("232-01-09.pdf"))
|
||||
# Account for 3.14 padding changes
|
||||
expected_year: str = d1.strftime("%Y")
|
||||
expected_filename: Path = Path(f"{expected_year}-01-09.pdf")
|
||||
|
||||
self.assertEqual(generate_filename(doc1), expected_filename)
|
||||
|
||||
doc1.added = timezone.make_aware(datetime.datetime(2020, 11, 16, 1, 1, 1))
|
||||
|
||||
|
||||
@@ -21,7 +21,7 @@ class TestDateLocalization:
|
||||
14,
|
||||
30,
|
||||
5,
|
||||
tzinfo=datetime.timezone.utc,
|
||||
tzinfo=datetime.UTC,
|
||||
)
|
||||
|
||||
TEST_DATETIME_STRING: str = "2023-10-26T14:30:05+00:00"
|
||||
|
||||
@@ -4,7 +4,6 @@ from io import StringIO
|
||||
from pathlib import Path
|
||||
from unittest import mock
|
||||
|
||||
import pytest
|
||||
from auditlog.models import LogEntry
|
||||
from django.contrib.contenttypes.models import ContentType
|
||||
from django.core.management import call_command
|
||||
@@ -20,7 +19,6 @@ from documents.tests.utils import FileSystemAssertsMixin
|
||||
sample_file: Path = Path(__file__).parent / "samples" / "simple.pdf"
|
||||
|
||||
|
||||
@pytest.mark.management
|
||||
@override_settings(FILENAME_FORMAT="{correspondent}/{title}")
|
||||
class TestArchiver(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
def make_models(self):
|
||||
@@ -96,7 +94,6 @@ class TestArchiver(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
self.assertEqual(doc2.archive_filename, "document_01.pdf")
|
||||
|
||||
|
||||
@pytest.mark.management
|
||||
class TestMakeIndex(TestCase):
|
||||
@mock.patch("documents.management.commands.document_index.index_reindex")
|
||||
def test_reindex(self, m) -> None:
|
||||
@@ -109,7 +106,6 @@ class TestMakeIndex(TestCase):
|
||||
m.assert_called_once()
|
||||
|
||||
|
||||
@pytest.mark.management
|
||||
class TestRenamer(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
@override_settings(FILENAME_FORMAT="")
|
||||
def test_rename(self) -> None:
|
||||
@@ -144,7 +140,6 @@ class TestCreateClassifier(TestCase):
|
||||
m.assert_called_once()
|
||||
|
||||
|
||||
@pytest.mark.management
|
||||
class TestSanityChecker(DirectoriesMixin, TestCase):
|
||||
def test_no_issues(self) -> None:
|
||||
with self.assertLogs() as capture:
|
||||
@@ -170,7 +165,6 @@ class TestSanityChecker(DirectoriesMixin, TestCase):
|
||||
self.assertIn("Checksum mismatch. Stored: abc, actual:", capture.output[1])
|
||||
|
||||
|
||||
@pytest.mark.management
|
||||
class TestConvertMariaDBUUID(TestCase):
|
||||
@mock.patch("django.db.connection.schema_editor")
|
||||
def test_convert(self, m) -> None:
|
||||
@@ -184,7 +178,6 @@ class TestConvertMariaDBUUID(TestCase):
|
||||
self.assertIn("Successfully converted", stdout.getvalue())
|
||||
|
||||
|
||||
@pytest.mark.management
|
||||
class TestPruneAuditLogs(TestCase):
|
||||
def test_prune_audit_logs(self) -> None:
|
||||
LogEntry.objects.create(
|
||||
|
||||
@@ -577,7 +577,6 @@ class TestTagsFromPath:
|
||||
assert len(tag_ids) == 0
|
||||
|
||||
|
||||
@pytest.mark.management
|
||||
class TestCommandValidation:
|
||||
"""Tests for command argument validation."""
|
||||
|
||||
@@ -606,7 +605,6 @@ class TestCommandValidation:
|
||||
cmd.handle(directory=str(sample_pdf), oneshot=True, testing=False)
|
||||
|
||||
|
||||
@pytest.mark.management
|
||||
@pytest.mark.usefixtures("mock_supported_extensions")
|
||||
class TestCommandOneshot:
|
||||
"""Tests for oneshot mode."""
|
||||
@@ -777,7 +775,6 @@ def start_consumer(
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.management
|
||||
@pytest.mark.django_db
|
||||
class TestCommandWatch:
|
||||
"""Integration tests for the watch loop."""
|
||||
@@ -899,7 +896,6 @@ class TestCommandWatch:
|
||||
assert not thread.is_alive()
|
||||
|
||||
|
||||
@pytest.mark.management
|
||||
@pytest.mark.django_db
|
||||
class TestCommandWatchPolling:
|
||||
"""Tests for polling mode."""
|
||||
@@ -932,7 +928,6 @@ class TestCommandWatchPolling:
|
||||
mock_consume_file_delay.delay.assert_called()
|
||||
|
||||
|
||||
@pytest.mark.management
|
||||
@pytest.mark.django_db
|
||||
class TestCommandWatchRecursive:
|
||||
"""Tests for recursive watching."""
|
||||
@@ -996,7 +991,6 @@ class TestCommandWatchRecursive:
|
||||
assert len(overrides.tag_ids) == 2
|
||||
|
||||
|
||||
@pytest.mark.management
|
||||
@pytest.mark.django_db
|
||||
class TestCommandWatchEdgeCases:
|
||||
"""Tests for edge cases and error handling."""
|
||||
|
||||
@@ -7,7 +7,6 @@ from pathlib import Path
|
||||
from unittest import mock
|
||||
from zipfile import ZipFile
|
||||
|
||||
import pytest
|
||||
from allauth.socialaccount.models import SocialAccount
|
||||
from allauth.socialaccount.models import SocialApp
|
||||
from allauth.socialaccount.models import SocialToken
|
||||
@@ -46,7 +45,6 @@ from documents.tests.utils import paperless_environment
|
||||
from paperless_mail.models import MailAccount
|
||||
|
||||
|
||||
@pytest.mark.management
|
||||
class TestExportImport(
|
||||
DirectoriesMixin,
|
||||
FileSystemAssertsMixin,
|
||||
@@ -848,7 +846,6 @@ class TestExportImport(
|
||||
self.assertEqual(Document.objects.all().count(), 4)
|
||||
|
||||
|
||||
@pytest.mark.management
|
||||
class TestCryptExportImport(
|
||||
DirectoriesMixin,
|
||||
FileSystemAssertsMixin,
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
from io import StringIO
|
||||
|
||||
import pytest
|
||||
from django.core.management import CommandError
|
||||
from django.core.management import call_command
|
||||
from django.test import TestCase
|
||||
@@ -8,7 +7,6 @@ from django.test import TestCase
|
||||
from documents.models import Document
|
||||
|
||||
|
||||
@pytest.mark.management
|
||||
class TestFuzzyMatchCommand(TestCase):
|
||||
MSG_REGEX = r"Document \d fuzzy match to \d \(confidence \d\d\.\d\d\d\)"
|
||||
|
||||
@@ -51,6 +49,19 @@ class TestFuzzyMatchCommand(TestCase):
|
||||
self.call_command("--ratio", "101")
|
||||
self.assertIn("The ratio must be between 0 and 100", str(e.exception))
|
||||
|
||||
def test_invalid_process_count(self) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- Invalid process count less than 0 above upper
|
||||
WHEN:
|
||||
- Command is called
|
||||
THEN:
|
||||
- Error is raised indicating issue
|
||||
"""
|
||||
with self.assertRaises(CommandError) as e:
|
||||
self.call_command("--processes", "0")
|
||||
self.assertIn("There must be at least 1 process", str(e.exception))
|
||||
|
||||
def test_no_matches(self) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
@@ -140,7 +151,7 @@ class TestFuzzyMatchCommand(TestCase):
|
||||
mime_type="application/pdf",
|
||||
filename="final_test.pdf",
|
||||
)
|
||||
stdout, _ = self.call_command("--no-progress-bar")
|
||||
stdout, _ = self.call_command()
|
||||
lines = [x.strip() for x in stdout.splitlines() if x.strip()]
|
||||
self.assertEqual(len(lines), 3)
|
||||
for line in lines:
|
||||
@@ -183,7 +194,7 @@ class TestFuzzyMatchCommand(TestCase):
|
||||
|
||||
self.assertEqual(Document.objects.count(), 3)
|
||||
|
||||
stdout, _ = self.call_command("--delete", "--no-progress-bar")
|
||||
stdout, _ = self.call_command("--delete")
|
||||
|
||||
self.assertIn(
|
||||
"The command is configured to delete documents. Use with caution",
|
||||
|
||||
@@ -4,7 +4,6 @@ from io import StringIO
|
||||
from pathlib import Path
|
||||
from zipfile import ZipFile
|
||||
|
||||
import pytest
|
||||
from django.contrib.auth.models import User
|
||||
from django.core.management import call_command
|
||||
from django.core.management.base import CommandError
|
||||
@@ -19,7 +18,6 @@ from documents.tests.utils import FileSystemAssertsMixin
|
||||
from documents.tests.utils import SampleDirMixin
|
||||
|
||||
|
||||
@pytest.mark.management
|
||||
class TestCommandImport(
|
||||
DirectoriesMixin,
|
||||
FileSystemAssertsMixin,
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
import pytest
|
||||
from django.core.management import call_command
|
||||
from django.core.management.base import CommandError
|
||||
from django.test import TestCase
|
||||
@@ -11,7 +10,6 @@ from documents.models import Tag
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
|
||||
|
||||
@pytest.mark.management
|
||||
class TestRetagger(DirectoriesMixin, TestCase):
|
||||
def make_models(self) -> None:
|
||||
self.sp1 = StoragePath.objects.create(
|
||||
|
||||
@@ -2,7 +2,6 @@ import os
|
||||
from io import StringIO
|
||||
from unittest import mock
|
||||
|
||||
import pytest
|
||||
from django.contrib.auth.models import User
|
||||
from django.core.management import call_command
|
||||
from django.test import TestCase
|
||||
@@ -10,7 +9,6 @@ from django.test import TestCase
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
|
||||
|
||||
@pytest.mark.management
|
||||
class TestManageSuperUser(DirectoriesMixin, TestCase):
|
||||
def call_command(self, environ):
|
||||
out = StringIO()
|
||||
|
||||
@@ -2,7 +2,6 @@ import shutil
|
||||
from pathlib import Path
|
||||
from unittest import mock
|
||||
|
||||
import pytest
|
||||
from django.core.management import call_command
|
||||
from django.test import TestCase
|
||||
|
||||
@@ -13,7 +12,6 @@ from documents.tests.utils import DirectoriesMixin
|
||||
from documents.tests.utils import FileSystemAssertsMixin
|
||||
|
||||
|
||||
@pytest.mark.management
|
||||
class TestMakeThumbnails(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
def make_models(self) -> None:
|
||||
self.d1 = Document.objects.create(
|
||||
|
||||
@@ -4570,7 +4570,7 @@ class TestDateWorkflowLocalization(
|
||||
14,
|
||||
30,
|
||||
5,
|
||||
tzinfo=datetime.timezone.utc,
|
||||
tzinfo=datetime.UTC,
|
||||
)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
from enum import StrEnum
|
||||
from typing import TYPE_CHECKING
|
||||
from typing import Any
|
||||
|
||||
@@ -11,7 +11,7 @@ if TYPE_CHECKING:
|
||||
from django.http import HttpRequest
|
||||
|
||||
|
||||
class VersionResolutionError(str, Enum):
|
||||
class VersionResolutionError(StrEnum):
|
||||
INVALID = "invalid"
|
||||
NOT_FOUND = "not_found"
|
||||
|
||||
|
||||
Reference in New Issue
Block a user