mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-03-06 01:06:25 +00:00
Compare commits
6 Commits
feature-mi
...
dev
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a5a267fe49 | ||
|
|
24a2cfd957 | ||
|
|
7cf2ef6398 | ||
|
|
df03207eef | ||
|
|
fa998ecd49 | ||
|
|
1e21bcd26e |
18
.codecov.yml
18
.codecov.yml
@@ -14,10 +14,6 @@ component_management:
|
||||
# https://docs.codecov.com/docs/carryforward-flags
|
||||
flags:
|
||||
# Backend Python versions
|
||||
backend-python-3.10:
|
||||
paths:
|
||||
- src/**
|
||||
carryforward: true
|
||||
backend-python-3.11:
|
||||
paths:
|
||||
- src/**
|
||||
@@ -26,6 +22,14 @@ flags:
|
||||
paths:
|
||||
- src/**
|
||||
carryforward: true
|
||||
backend-python-3.13:
|
||||
paths:
|
||||
- src/**
|
||||
carryforward: true
|
||||
backend-python-3.14:
|
||||
paths:
|
||||
- src/**
|
||||
carryforward: true
|
||||
# Frontend (shards merge into single flag)
|
||||
frontend-node-24.x:
|
||||
paths:
|
||||
@@ -41,9 +45,10 @@ coverage:
|
||||
project:
|
||||
backend:
|
||||
flags:
|
||||
- backend-python-3.10
|
||||
- backend-python-3.11
|
||||
- backend-python-3.12
|
||||
- backend-python-3.13
|
||||
- backend-python-3.14
|
||||
paths:
|
||||
- src/**
|
||||
# https://docs.codecov.com/docs/commit-status#threshold
|
||||
@@ -59,9 +64,10 @@ coverage:
|
||||
patch:
|
||||
backend:
|
||||
flags:
|
||||
- backend-python-3.10
|
||||
- backend-python-3.11
|
||||
- backend-python-3.12
|
||||
- backend-python-3.13
|
||||
- backend-python-3.14
|
||||
paths:
|
||||
- src/**
|
||||
target: 100%
|
||||
|
||||
2
.github/workflows/ci-backend.yml
vendored
2
.github/workflows/ci-backend.yml
vendored
@@ -31,7 +31,7 @@ jobs:
|
||||
runs-on: ubuntu-24.04
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ['3.10', '3.11', '3.12']
|
||||
python-version: ['3.11', '3.12', '3.13', '3.14']
|
||||
fail-fast: false
|
||||
steps:
|
||||
- name: Checkout
|
||||
|
||||
@@ -13,7 +13,9 @@ If you want to implement something big:
|
||||
|
||||
## Python
|
||||
|
||||
Paperless supports python 3.10 - 3.12 at this time. We format Python code with [ruff](https://docs.astral.sh/ruff/formatter/).
|
||||
Paperless-ngx currently supports Python 3.11, 3.12, 3.13, and 3.14. As a policy, we aim to support at least the three most recent Python versions, and drop support for versions as they reach end-of-life. Older versions may be supported if dependencies permit, but this is not guaranteed.
|
||||
|
||||
We format Python code with [ruff](https://docs.astral.sh/ruff/formatter/).
|
||||
|
||||
## Branches
|
||||
|
||||
|
||||
@@ -262,6 +262,10 @@ your files differently, you can do that by adjusting the
|
||||
or using [storage paths (see below)](#storage-paths). Paperless adds the
|
||||
correct file extension e.g. `.pdf`, `.jpg` automatically.
|
||||
|
||||
When a document has file versions, each version uses the same naming rules and
|
||||
storage path resolution as any other document file, with an added version suffix
|
||||
such as `_v1`, `_v2`, etc.
|
||||
|
||||
This variable allows you to configure the filename (folders are allowed)
|
||||
using placeholders. For example, configuring this to
|
||||
|
||||
@@ -353,6 +357,8 @@ If paperless detects that two documents share the same filename,
|
||||
paperless will automatically append `_01`, `_02`, etc to the filename.
|
||||
This happens if all the placeholders in a filename evaluate to the same
|
||||
value.
|
||||
For versioned files, this counter is appended after the version suffix
|
||||
(for example `statement_v2_01.pdf`).
|
||||
|
||||
If there are any errors in the placeholders included in `PAPERLESS_FILENAME_FORMAT`,
|
||||
paperless will fall back to using the default naming scheme instead.
|
||||
|
||||
@@ -172,7 +172,7 @@ to enable polling and disable inotify. See [here](configuration.md#polling).
|
||||
#### Prerequisites
|
||||
|
||||
- Paperless runs on Linux only, Windows is not supported.
|
||||
- Python 3 is required with versions 3.10 - 3.12 currently supported. Newer versions may work, but some dependencies may not be fully compatible.
|
||||
- Python 3.11, 3.12, 3.13, or 3.14 is required. As a policy, Paperless-ngx aims to support at least the three most recent Python versions and drops support for versions as they reach end-of-life. Newer versions may work, but some dependencies may not be fully compatible.
|
||||
|
||||
#### Installation
|
||||
|
||||
|
||||
@@ -95,6 +95,7 @@ Think of versions as **file history** for a document.
|
||||
|
||||
- Versions track the underlying file and extracted text content (OCR/text).
|
||||
- Metadata such as tags, correspondent, document type, storage path and custom fields stay on the "root" document.
|
||||
- Version files follow normal filename formatting (including storage paths) and add a `_vN` suffix (for example `_v1`, `_v2`).
|
||||
- By default, search and document content use the latest version.
|
||||
- In document detail, selecting a version switches the preview, file metadata and content (and download etc buttons) to that version.
|
||||
- Deleting a non-root version keeps metadata and falls back to the latest remaining version.
|
||||
|
||||
@@ -3,10 +3,9 @@ name = "paperless-ngx"
|
||||
version = "2.20.10"
|
||||
description = "A community-supported supercharged document management system: scan, index and archive all your physical documents"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.10"
|
||||
requires-python = ">=3.11"
|
||||
classifiers = [
|
||||
"Programming Language :: Python :: 3 :: Only",
|
||||
"Programming Language :: Python :: 3.10",
|
||||
"Programming Language :: Python :: 3.11",
|
||||
"Programming Language :: Python :: 3.12",
|
||||
"Programming Language :: Python :: 3.13",
|
||||
@@ -177,7 +176,7 @@ torch = [
|
||||
]
|
||||
|
||||
[tool.ruff]
|
||||
target-version = "py310"
|
||||
target-version = "py311"
|
||||
line-length = 88
|
||||
src = [
|
||||
"src",
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from datetime import UTC
|
||||
from datetime import datetime
|
||||
from datetime import timezone
|
||||
from typing import Any
|
||||
|
||||
from django.conf import settings
|
||||
@@ -139,7 +139,7 @@ def thumbnail_last_modified(request: Any, pk: int) -> datetime | None:
|
||||
# No cache, get the timestamp and cache the datetime
|
||||
last_modified = datetime.fromtimestamp(
|
||||
doc.thumbnail_path.stat().st_mtime,
|
||||
tz=timezone.utc,
|
||||
tz=UTC,
|
||||
)
|
||||
cache.set(doc_key, last_modified, CACHE_50_MINUTES)
|
||||
return last_modified
|
||||
|
||||
@@ -2,7 +2,7 @@ import datetime
|
||||
import hashlib
|
||||
import os
|
||||
import tempfile
|
||||
from enum import Enum
|
||||
from enum import StrEnum
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
from typing import Final
|
||||
@@ -11,6 +11,7 @@ import magic
|
||||
from django.conf import settings
|
||||
from django.contrib.auth.models import User
|
||||
from django.db import transaction
|
||||
from django.db.models import Max
|
||||
from django.db.models import Q
|
||||
from django.utils import timezone
|
||||
from filelock import FileLock
|
||||
@@ -82,7 +83,7 @@ class ConsumerError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class ConsumerStatusShortMessage(str, Enum):
|
||||
class ConsumerStatusShortMessage(StrEnum):
|
||||
DOCUMENT_ALREADY_EXISTS = "document_already_exists"
|
||||
DOCUMENT_ALREADY_EXISTS_IN_TRASH = "document_already_exists_in_trash"
|
||||
ASN_ALREADY_EXISTS = "asn_already_exists"
|
||||
@@ -124,22 +125,6 @@ class ConsumerPluginMixin:
|
||||
|
||||
self.filename = self.metadata.filename or self.input_doc.original_file.name
|
||||
|
||||
if input_doc.root_document_id:
|
||||
self.log.debug(
|
||||
f"Document root document id: {input_doc.root_document_id}",
|
||||
)
|
||||
root_document = Document.objects.get(pk=input_doc.root_document_id)
|
||||
version_index = Document.objects.filter(root_document=root_document).count()
|
||||
filename_path = Path(self.filename)
|
||||
if filename_path.suffix:
|
||||
self.filename = str(
|
||||
filename_path.with_name(
|
||||
f"{filename_path.stem}_v{version_index}{filename_path.suffix}",
|
||||
),
|
||||
)
|
||||
else:
|
||||
self.filename = f"{self.filename}_v{version_index}"
|
||||
|
||||
def _send_progress(
|
||||
self,
|
||||
current_progress: int,
|
||||
@@ -185,7 +170,7 @@ class ConsumerPlugin(
|
||||
):
|
||||
logging_name = LOGGING_NAME
|
||||
|
||||
def _clone_root_into_version(
|
||||
def _create_version_from_root(
|
||||
self,
|
||||
root_doc: Document,
|
||||
*,
|
||||
@@ -194,30 +179,38 @@ class ConsumerPlugin(
|
||||
mime_type: str,
|
||||
) -> Document:
|
||||
self.log.debug("Saving record for updated version to database")
|
||||
version_doc = Document.objects.get(pk=root_doc.pk)
|
||||
setattr(version_doc, "pk", None)
|
||||
version_doc.root_document = root_doc
|
||||
root_doc_frozen = Document.objects.select_for_update().get(pk=root_doc.pk)
|
||||
next_version_index = (
|
||||
Document.global_objects.filter(
|
||||
root_document_id=root_doc_frozen.pk,
|
||||
).aggregate(
|
||||
max_index=Max("version_index"),
|
||||
)["max_index"]
|
||||
or 0
|
||||
)
|
||||
file_for_checksum = (
|
||||
self.unmodified_original
|
||||
if self.unmodified_original is not None
|
||||
else self.working_copy
|
||||
)
|
||||
version_doc.checksum = hashlib.md5(
|
||||
file_for_checksum.read_bytes(),
|
||||
).hexdigest()
|
||||
version_doc.content = text or ""
|
||||
version_doc.page_count = page_count
|
||||
version_doc.mime_type = mime_type
|
||||
version_doc.original_filename = self.filename
|
||||
version_doc.storage_path = root_doc.storage_path
|
||||
# Clear unique file path fields so they can be generated uniquely later
|
||||
version_doc.filename = None
|
||||
version_doc.archive_filename = None
|
||||
version_doc.archive_checksum = None
|
||||
version_doc = Document(
|
||||
root_document=root_doc_frozen,
|
||||
version_index=next_version_index + 1,
|
||||
checksum=hashlib.md5(
|
||||
file_for_checksum.read_bytes(),
|
||||
).hexdigest(),
|
||||
content=text or "",
|
||||
page_count=page_count,
|
||||
mime_type=mime_type,
|
||||
original_filename=self.filename,
|
||||
owner_id=root_doc_frozen.owner_id,
|
||||
created=root_doc_frozen.created,
|
||||
title=root_doc_frozen.title,
|
||||
added=timezone.now(),
|
||||
modified=timezone.now(),
|
||||
)
|
||||
if self.metadata.version_label is not None:
|
||||
version_doc.version_label = self.metadata.version_label
|
||||
version_doc.added = timezone.now()
|
||||
version_doc.modified = timezone.now()
|
||||
return version_doc
|
||||
|
||||
def run_pre_consume_script(self) -> None:
|
||||
@@ -543,7 +536,7 @@ class ConsumerPlugin(
|
||||
root_doc = Document.objects.get(
|
||||
pk=self.input_doc.root_document_id,
|
||||
)
|
||||
original_document = self._clone_root_into_version(
|
||||
original_document = self._create_version_from_root(
|
||||
root_doc,
|
||||
text=text,
|
||||
page_count=page_count,
|
||||
|
||||
@@ -129,12 +129,19 @@ def generate_filename(
|
||||
archive_filename=False,
|
||||
use_format=True,
|
||||
) -> Path:
|
||||
# version docs use the root document for formatting, just with a suffix
|
||||
context_doc = doc if doc.root_document_id is None else doc.root_document
|
||||
version_suffix = (
|
||||
f"_v{doc.version_index}"
|
||||
if doc.root_document_id is not None and doc.version_index is not None
|
||||
else ""
|
||||
)
|
||||
base_path: Path | None = None
|
||||
|
||||
# Determine the source of the format string
|
||||
if use_format:
|
||||
if doc.storage_path is not None:
|
||||
filename_format = doc.storage_path.path
|
||||
if context_doc.storage_path is not None:
|
||||
filename_format = context_doc.storage_path.path
|
||||
elif settings.FILENAME_FORMAT is not None:
|
||||
# Maybe convert old to new style
|
||||
filename_format = convert_format_str_to_template_format(
|
||||
@@ -147,7 +154,7 @@ def generate_filename(
|
||||
|
||||
# If we have one, render it
|
||||
if filename_format is not None:
|
||||
rendered_path: str | None = format_filename(doc, filename_format)
|
||||
rendered_path: str | None = format_filename(context_doc, filename_format)
|
||||
if rendered_path:
|
||||
base_path = Path(rendered_path)
|
||||
|
||||
@@ -161,7 +168,7 @@ def generate_filename(
|
||||
base_filename = base_path.name
|
||||
|
||||
# Build the final filename with counter and filetype
|
||||
final_filename = f"{base_filename}{counter_str}{filetype_str}"
|
||||
final_filename = f"{base_filename}{version_suffix}{counter_str}{filetype_str}"
|
||||
|
||||
# If we have a directory component, include it
|
||||
if str(directory) != ".":
|
||||
@@ -170,7 +177,9 @@ def generate_filename(
|
||||
full_path = Path(final_filename)
|
||||
else:
|
||||
# No template, use document ID
|
||||
final_filename = f"{doc.pk:07}{counter_str}{filetype_str}"
|
||||
final_filename = (
|
||||
f"{context_doc.pk:07}{version_suffix}{counter_str}{filetype_str}"
|
||||
)
|
||||
full_path = Path(final_filename)
|
||||
|
||||
return full_path
|
||||
|
||||
@@ -5,10 +5,10 @@ import math
|
||||
import re
|
||||
from collections import Counter
|
||||
from contextlib import contextmanager
|
||||
from datetime import UTC
|
||||
from datetime import datetime
|
||||
from datetime import time
|
||||
from datetime import timedelta
|
||||
from datetime import timezone
|
||||
from shutil import rmtree
|
||||
from time import sleep
|
||||
from typing import TYPE_CHECKING
|
||||
@@ -437,7 +437,7 @@ class ManualResults:
|
||||
class LocalDateParser(English):
|
||||
def reverse_timezone_offset(self, d):
|
||||
return (d.replace(tzinfo=django_timezone.get_current_timezone())).astimezone(
|
||||
timezone.utc,
|
||||
UTC,
|
||||
)
|
||||
|
||||
def date_from(self, *args, **kwargs):
|
||||
@@ -641,8 +641,8 @@ def rewrite_natural_date_keywords(query_string: str) -> str:
|
||||
end = datetime(local_now.year - 1, 12, 31, 23, 59, 59, tzinfo=tz)
|
||||
|
||||
# Convert to UTC and format
|
||||
start_str = start.astimezone(timezone.utc).strftime("%Y%m%d%H%M%S")
|
||||
end_str = end.astimezone(timezone.utc).strftime("%Y%m%d%H%M%S")
|
||||
start_str = start.astimezone(UTC).strftime("%Y%m%d%H%M%S")
|
||||
end_str = end.astimezone(UTC).strftime("%Y%m%d%H%M%S")
|
||||
return f"{field}:[{start_str} TO {end_str}]"
|
||||
|
||||
return re.sub(pattern, repl, query_string, flags=re.IGNORECASE)
|
||||
|
||||
@@ -0,0 +1,37 @@
|
||||
# Generated by Django 5.2.11 on 2026-03-02 17:48
|
||||
|
||||
from django.conf import settings
|
||||
from django.db import migrations
|
||||
from django.db import models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
("documents", "0015_savedview_visibility_to_ui_settings"),
|
||||
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name="document",
|
||||
name="version_index",
|
||||
field=models.PositiveIntegerField(
|
||||
blank=True,
|
||||
db_index=True,
|
||||
help_text="Index of this version within the root document.",
|
||||
null=True,
|
||||
verbose_name="version index",
|
||||
),
|
||||
),
|
||||
migrations.AddConstraint(
|
||||
model_name="document",
|
||||
constraint=models.UniqueConstraint(
|
||||
condition=models.Q(
|
||||
("root_document__isnull", False),
|
||||
("version_index__isnull", False),
|
||||
),
|
||||
fields=("root_document", "version_index"),
|
||||
name="documents_document_root_version_index_uniq",
|
||||
),
|
||||
),
|
||||
]
|
||||
@@ -319,6 +319,14 @@ class Document(SoftDeleteModel, ModelWithOwner): # type: ignore[django-manager-
|
||||
verbose_name=_("root document for this version"),
|
||||
)
|
||||
|
||||
version_index = models.PositiveIntegerField(
|
||||
_("version index"),
|
||||
blank=True,
|
||||
null=True,
|
||||
db_index=True,
|
||||
help_text=_("Index of this version within the root document."),
|
||||
)
|
||||
|
||||
version_label = models.CharField(
|
||||
_("version label"),
|
||||
max_length=64,
|
||||
@@ -331,6 +339,16 @@ class Document(SoftDeleteModel, ModelWithOwner): # type: ignore[django-manager-
|
||||
ordering = ("-created",)
|
||||
verbose_name = _("document")
|
||||
verbose_name_plural = _("documents")
|
||||
constraints = [
|
||||
models.UniqueConstraint(
|
||||
fields=["root_document", "version_index"],
|
||||
condition=models.Q(
|
||||
root_document__isnull=False,
|
||||
version_index__isnull=False,
|
||||
),
|
||||
name="documents_document_root_version_index_uniq",
|
||||
),
|
||||
]
|
||||
|
||||
def __str__(self) -> str:
|
||||
created = self.created.isoformat()
|
||||
|
||||
@@ -5,11 +5,7 @@ from abc import abstractmethod
|
||||
from collections.abc import Iterator
|
||||
from dataclasses import dataclass
|
||||
from types import TracebackType
|
||||
|
||||
try:
|
||||
from typing import Self
|
||||
except ImportError:
|
||||
from typing_extensions import Self
|
||||
from typing import Self
|
||||
|
||||
import dateparser
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@ if TYPE_CHECKING:
|
||||
from channels_redis.pubsub import RedisPubSubChannelLayer
|
||||
|
||||
|
||||
class ProgressStatusOptions(str, enum.Enum):
|
||||
class ProgressStatusOptions(enum.StrEnum):
|
||||
STARTED = "STARTED"
|
||||
WORKING = "WORKING"
|
||||
SUCCESS = "SUCCESS"
|
||||
|
||||
@@ -620,6 +620,16 @@ def update_filename_and_move_files(
|
||||
root=settings.ARCHIVE_DIR,
|
||||
)
|
||||
|
||||
# Keep version files in sync with root
|
||||
if instance.root_document_id is None:
|
||||
for version_doc in Document.objects.filter(root_document_id=instance.pk).only(
|
||||
"pk",
|
||||
):
|
||||
update_filename_and_move_files(
|
||||
Document,
|
||||
version_doc,
|
||||
)
|
||||
|
||||
|
||||
@shared_task
|
||||
def process_cf_select_update(custom_field: CustomField) -> None:
|
||||
|
||||
@@ -24,7 +24,7 @@ def base_config() -> DateParserConfig:
|
||||
12,
|
||||
0,
|
||||
0,
|
||||
tzinfo=datetime.timezone.utc,
|
||||
tzinfo=datetime.UTC,
|
||||
),
|
||||
filename_date_order="YMD",
|
||||
content_date_order="DMY",
|
||||
@@ -45,7 +45,7 @@ def config_with_ignore_dates() -> DateParserConfig:
|
||||
12,
|
||||
0,
|
||||
0,
|
||||
tzinfo=datetime.timezone.utc,
|
||||
tzinfo=datetime.UTC,
|
||||
),
|
||||
filename_date_order="DMY",
|
||||
content_date_order="MDY",
|
||||
|
||||
@@ -101,50 +101,50 @@ class TestFilterDate:
|
||||
[
|
||||
# Valid Dates
|
||||
pytest.param(
|
||||
datetime.datetime(2024, 1, 10, tzinfo=datetime.timezone.utc),
|
||||
datetime.datetime(2024, 1, 10, tzinfo=datetime.timezone.utc),
|
||||
datetime.datetime(2024, 1, 10, tzinfo=datetime.UTC),
|
||||
datetime.datetime(2024, 1, 10, tzinfo=datetime.UTC),
|
||||
id="valid_past_date",
|
||||
),
|
||||
pytest.param(
|
||||
datetime.datetime(2024, 1, 15, 12, 0, 0, tzinfo=datetime.timezone.utc),
|
||||
datetime.datetime(2024, 1, 15, 12, 0, 0, tzinfo=datetime.timezone.utc),
|
||||
datetime.datetime(2024, 1, 15, 12, 0, 0, tzinfo=datetime.UTC),
|
||||
datetime.datetime(2024, 1, 15, 12, 0, 0, tzinfo=datetime.UTC),
|
||||
id="exactly_at_reference",
|
||||
),
|
||||
pytest.param(
|
||||
datetime.datetime(1901, 1, 1, tzinfo=datetime.timezone.utc),
|
||||
datetime.datetime(1901, 1, 1, tzinfo=datetime.timezone.utc),
|
||||
datetime.datetime(1901, 1, 1, tzinfo=datetime.UTC),
|
||||
datetime.datetime(1901, 1, 1, tzinfo=datetime.UTC),
|
||||
id="year_1901_valid",
|
||||
),
|
||||
# Date is > reference_time
|
||||
pytest.param(
|
||||
datetime.datetime(2024, 1, 16, tzinfo=datetime.timezone.utc),
|
||||
datetime.datetime(2024, 1, 16, tzinfo=datetime.UTC),
|
||||
None,
|
||||
id="future_date_day_after",
|
||||
),
|
||||
# date.date() in ignore_dates
|
||||
pytest.param(
|
||||
datetime.datetime(2024, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc),
|
||||
datetime.datetime(2024, 1, 1, 0, 0, 0, tzinfo=datetime.UTC),
|
||||
None,
|
||||
id="ignored_date_midnight_jan1",
|
||||
),
|
||||
pytest.param(
|
||||
datetime.datetime(2024, 1, 1, 10, 30, 0, tzinfo=datetime.timezone.utc),
|
||||
datetime.datetime(2024, 1, 1, 10, 30, 0, tzinfo=datetime.UTC),
|
||||
None,
|
||||
id="ignored_date_midday_jan1",
|
||||
),
|
||||
pytest.param(
|
||||
datetime.datetime(2024, 12, 25, 15, 0, 0, tzinfo=datetime.timezone.utc),
|
||||
datetime.datetime(2024, 12, 25, 15, 0, 0, tzinfo=datetime.UTC),
|
||||
None,
|
||||
id="ignored_date_dec25_future",
|
||||
),
|
||||
# date.year <= 1900
|
||||
pytest.param(
|
||||
datetime.datetime(1899, 12, 31, tzinfo=datetime.timezone.utc),
|
||||
datetime.datetime(1899, 12, 31, tzinfo=datetime.UTC),
|
||||
None,
|
||||
id="year_1899",
|
||||
),
|
||||
pytest.param(
|
||||
datetime.datetime(1900, 1, 1, tzinfo=datetime.timezone.utc),
|
||||
datetime.datetime(1900, 1, 1, tzinfo=datetime.UTC),
|
||||
None,
|
||||
id="year_1900_boundary",
|
||||
),
|
||||
@@ -176,7 +176,7 @@ class TestFilterDate:
|
||||
1,
|
||||
12,
|
||||
0,
|
||||
tzinfo=datetime.timezone.utc,
|
||||
tzinfo=datetime.UTC,
|
||||
)
|
||||
another_ignored = datetime.datetime(
|
||||
2024,
|
||||
@@ -184,7 +184,7 @@ class TestFilterDate:
|
||||
25,
|
||||
15,
|
||||
30,
|
||||
tzinfo=datetime.timezone.utc,
|
||||
tzinfo=datetime.UTC,
|
||||
)
|
||||
allowed_date = datetime.datetime(
|
||||
2024,
|
||||
@@ -192,7 +192,7 @@ class TestFilterDate:
|
||||
2,
|
||||
12,
|
||||
0,
|
||||
tzinfo=datetime.timezone.utc,
|
||||
tzinfo=datetime.UTC,
|
||||
)
|
||||
|
||||
assert parser._filter_date(ignored_date) is None
|
||||
@@ -204,7 +204,7 @@ class TestFilterDate:
|
||||
regex_parser: RegexDateParserPlugin,
|
||||
) -> None:
|
||||
"""Should work with timezone-aware datetimes."""
|
||||
date_utc = datetime.datetime(2024, 1, 10, 12, 0, tzinfo=datetime.timezone.utc)
|
||||
date_utc = datetime.datetime(2024, 1, 10, 12, 0, tzinfo=datetime.UTC)
|
||||
|
||||
result = regex_parser._filter_date(date_utc)
|
||||
|
||||
@@ -221,8 +221,8 @@ class TestRegexDateParser:
|
||||
"report-2023-12-25.txt",
|
||||
"Event recorded on 25/12/2022.",
|
||||
[
|
||||
datetime.datetime(2023, 12, 25, tzinfo=datetime.timezone.utc),
|
||||
datetime.datetime(2022, 12, 25, tzinfo=datetime.timezone.utc),
|
||||
datetime.datetime(2023, 12, 25, tzinfo=datetime.UTC),
|
||||
datetime.datetime(2022, 12, 25, tzinfo=datetime.UTC),
|
||||
],
|
||||
id="filename-y-m-d_and_content-d-m-y",
|
||||
),
|
||||
@@ -230,8 +230,8 @@ class TestRegexDateParser:
|
||||
"img_2023.01.02.jpg",
|
||||
"Taken on 01/02/2023",
|
||||
[
|
||||
datetime.datetime(2023, 1, 2, tzinfo=datetime.timezone.utc),
|
||||
datetime.datetime(2023, 2, 1, tzinfo=datetime.timezone.utc),
|
||||
datetime.datetime(2023, 1, 2, tzinfo=datetime.UTC),
|
||||
datetime.datetime(2023, 2, 1, tzinfo=datetime.UTC),
|
||||
],
|
||||
id="ambiguous-dates-respect-orders",
|
||||
),
|
||||
@@ -239,7 +239,7 @@ class TestRegexDateParser:
|
||||
"notes.txt",
|
||||
"bad date 99/99/9999 and 25/12/2022",
|
||||
[
|
||||
datetime.datetime(2022, 12, 25, tzinfo=datetime.timezone.utc),
|
||||
datetime.datetime(2022, 12, 25, tzinfo=datetime.UTC),
|
||||
],
|
||||
id="parse-exception-skips-bad-and-yields-good",
|
||||
),
|
||||
@@ -275,24 +275,24 @@ class TestRegexDateParser:
|
||||
or "2023.12.25" in date_string
|
||||
or "2023-12-25" in date_string
|
||||
):
|
||||
return datetime.datetime(2023, 12, 25, tzinfo=datetime.timezone.utc)
|
||||
return datetime.datetime(2023, 12, 25, tzinfo=datetime.UTC)
|
||||
|
||||
# content DMY 25/12/2022
|
||||
if "25/12/2022" in date_string or "25-12-2022" in date_string:
|
||||
return datetime.datetime(2022, 12, 25, tzinfo=datetime.timezone.utc)
|
||||
return datetime.datetime(2022, 12, 25, tzinfo=datetime.UTC)
|
||||
|
||||
# filename YMD 2023.01.02
|
||||
if "2023.01.02" in date_string or "2023-01-02" in date_string:
|
||||
return datetime.datetime(2023, 1, 2, tzinfo=datetime.timezone.utc)
|
||||
return datetime.datetime(2023, 1, 2, tzinfo=datetime.UTC)
|
||||
|
||||
# ambiguous 01/02/2023 -> respect DATE_ORDER setting
|
||||
if "01/02/2023" in date_string:
|
||||
if date_order == "DMY":
|
||||
return datetime.datetime(2023, 2, 1, tzinfo=datetime.timezone.utc)
|
||||
return datetime.datetime(2023, 2, 1, tzinfo=datetime.UTC)
|
||||
if date_order == "YMD":
|
||||
return datetime.datetime(2023, 1, 2, tzinfo=datetime.timezone.utc)
|
||||
return datetime.datetime(2023, 1, 2, tzinfo=datetime.UTC)
|
||||
# fallback
|
||||
return datetime.datetime(2023, 2, 1, tzinfo=datetime.timezone.utc)
|
||||
return datetime.datetime(2023, 2, 1, tzinfo=datetime.UTC)
|
||||
|
||||
# simulate parse failure for malformed input
|
||||
if "99/99/9999" in date_string or "bad date" in date_string:
|
||||
@@ -328,7 +328,7 @@ class TestRegexDateParser:
|
||||
12,
|
||||
0,
|
||||
0,
|
||||
tzinfo=datetime.timezone.utc,
|
||||
tzinfo=datetime.UTC,
|
||||
),
|
||||
filename_date_order="YMD",
|
||||
content_date_order="DMY",
|
||||
@@ -344,13 +344,13 @@ class TestRegexDateParser:
|
||||
) -> datetime.datetime | None:
|
||||
if "10/12/2023" in date_string or "10-12-2023" in date_string:
|
||||
# ignored date
|
||||
return datetime.datetime(2023, 12, 10, tzinfo=datetime.timezone.utc)
|
||||
return datetime.datetime(2023, 12, 10, tzinfo=datetime.UTC)
|
||||
if "01/02/2024" in date_string or "01-02-2024" in date_string:
|
||||
# future relative to reference_time -> filtered
|
||||
return datetime.datetime(2024, 2, 1, tzinfo=datetime.timezone.utc)
|
||||
return datetime.datetime(2024, 2, 1, tzinfo=datetime.UTC)
|
||||
if "05/01/2023" in date_string or "05-01-2023" in date_string:
|
||||
# valid
|
||||
return datetime.datetime(2023, 1, 5, tzinfo=datetime.timezone.utc)
|
||||
return datetime.datetime(2023, 1, 5, tzinfo=datetime.UTC)
|
||||
return None
|
||||
|
||||
mocker.patch(target, side_effect=fake_parse)
|
||||
@@ -358,7 +358,7 @@ class TestRegexDateParser:
|
||||
content = "Ignored: 10/12/2023, Future: 01/02/2024, Keep: 05/01/2023"
|
||||
results = list(parser.parse("whatever.txt", content))
|
||||
|
||||
assert results == [datetime.datetime(2023, 1, 5, tzinfo=datetime.timezone.utc)]
|
||||
assert results == [datetime.datetime(2023, 1, 5, tzinfo=datetime.UTC)]
|
||||
|
||||
def test_parse_handles_no_matches_and_returns_empty_list(
|
||||
self,
|
||||
@@ -392,7 +392,7 @@ class TestRegexDateParser:
|
||||
12,
|
||||
0,
|
||||
0,
|
||||
tzinfo=datetime.timezone.utc,
|
||||
tzinfo=datetime.UTC,
|
||||
),
|
||||
filename_date_order=None,
|
||||
content_date_order="DMY",
|
||||
@@ -409,9 +409,9 @@ class TestRegexDateParser:
|
||||
) -> datetime.datetime | None:
|
||||
# return distinct datetimes so we can tell which source was parsed
|
||||
if "25/12/2022" in date_string:
|
||||
return datetime.datetime(2022, 12, 25, tzinfo=datetime.timezone.utc)
|
||||
return datetime.datetime(2022, 12, 25, tzinfo=datetime.UTC)
|
||||
if "2023-12-25" in date_string:
|
||||
return datetime.datetime(2023, 12, 25, tzinfo=datetime.timezone.utc)
|
||||
return datetime.datetime(2023, 12, 25, tzinfo=datetime.UTC)
|
||||
return None
|
||||
|
||||
mock = mocker.patch(target, side_effect=fake_parse)
|
||||
@@ -429,5 +429,5 @@ class TestRegexDateParser:
|
||||
assert "25/12/2022" in called_date_string
|
||||
# And the parser should have yielded the corresponding datetime
|
||||
assert results == [
|
||||
datetime.datetime(2022, 12, 25, tzinfo=datetime.timezone.utc),
|
||||
datetime.datetime(2022, 12, 25, tzinfo=datetime.UTC),
|
||||
]
|
||||
|
||||
@@ -726,6 +726,14 @@ class TestConsumer(
|
||||
self.assertIsNotNone(root_doc)
|
||||
assert root_doc is not None
|
||||
|
||||
root_storage_path = StoragePath.objects.create(
|
||||
name="version-root-path",
|
||||
path="root/{{title}}",
|
||||
)
|
||||
root_doc.storage_path = root_storage_path
|
||||
root_doc.archive_serial_number = 42
|
||||
root_doc.save()
|
||||
|
||||
actor = User.objects.create_user(
|
||||
username="actor",
|
||||
email="actor@example.com",
|
||||
@@ -762,7 +770,7 @@ class TestConsumer(
|
||||
)
|
||||
consumer.setup()
|
||||
try:
|
||||
self.assertTrue(consumer.filename.endswith("_v0.pdf"))
|
||||
self.assertEqual(consumer.filename, version_file.name)
|
||||
consumer.run()
|
||||
finally:
|
||||
consumer.cleanup()
|
||||
@@ -772,8 +780,10 @@ class TestConsumer(
|
||||
version = versions.first()
|
||||
assert version is not None
|
||||
assert version.original_filename is not None
|
||||
self.assertEqual(version.version_index, 1)
|
||||
self.assertEqual(version.version_label, "v2")
|
||||
self.assertTrue(version.original_filename.endswith("_v0.pdf"))
|
||||
self.assertIsNone(version.archive_serial_number)
|
||||
self.assertEqual(version.original_filename, version_file.name)
|
||||
self.assertTrue(bool(version.content))
|
||||
|
||||
@override_settings(AUDIT_LOG_ENABLED=True)
|
||||
@@ -822,7 +832,7 @@ class TestConsumer(
|
||||
)
|
||||
consumer.setup()
|
||||
try:
|
||||
self.assertEqual(consumer.filename, "valid_pdf_version-upload_v0")
|
||||
self.assertEqual(consumer.filename, "valid_pdf_version-upload")
|
||||
consumer.run()
|
||||
finally:
|
||||
consumer.cleanup()
|
||||
@@ -832,9 +842,67 @@ class TestConsumer(
|
||||
)
|
||||
self.assertIsNotNone(version)
|
||||
assert version is not None
|
||||
self.assertEqual(version.original_filename, "valid_pdf_version-upload_v0")
|
||||
self.assertEqual(version.version_index, 1)
|
||||
self.assertEqual(version.original_filename, "valid_pdf_version-upload")
|
||||
self.assertTrue(bool(version.content))
|
||||
|
||||
@override_settings(AUDIT_LOG_ENABLED=True)
|
||||
@mock.patch("documents.consumer.load_classifier")
|
||||
def test_consume_version_index_monotonic_after_version_deletion(self, m) -> None:
|
||||
m.return_value = MagicMock()
|
||||
|
||||
with self.get_consumer(self.get_test_file()) as consumer:
|
||||
consumer.run()
|
||||
|
||||
root_doc = Document.objects.first()
|
||||
self.assertIsNotNone(root_doc)
|
||||
assert root_doc is not None
|
||||
|
||||
def consume_version(version_file: Path) -> Document:
|
||||
status = DummyProgressManager(version_file.name, None)
|
||||
overrides = DocumentMetadataOverrides()
|
||||
doc = ConsumableDocument(
|
||||
DocumentSource.ApiUpload,
|
||||
original_file=version_file,
|
||||
root_document_id=root_doc.pk,
|
||||
)
|
||||
preflight = ConsumerPreflightPlugin(
|
||||
doc,
|
||||
overrides,
|
||||
status, # type: ignore[arg-type]
|
||||
self.dirs.scratch_dir,
|
||||
"task-id",
|
||||
)
|
||||
preflight.setup()
|
||||
preflight.run()
|
||||
|
||||
consumer = ConsumerPlugin(
|
||||
doc,
|
||||
overrides,
|
||||
status, # type: ignore[arg-type]
|
||||
self.dirs.scratch_dir,
|
||||
"task-id",
|
||||
)
|
||||
consumer.setup()
|
||||
try:
|
||||
consumer.run()
|
||||
finally:
|
||||
consumer.cleanup()
|
||||
|
||||
version = (
|
||||
Document.objects.filter(root_document=root_doc).order_by("-id").first()
|
||||
)
|
||||
assert version is not None
|
||||
return version
|
||||
|
||||
v1 = consume_version(self.get_test_file2())
|
||||
self.assertEqual(v1.version_index, 1)
|
||||
v1.delete()
|
||||
|
||||
# The next version should have version_index 2, even though version_index 1 was deleted
|
||||
v2 = consume_version(self.get_test_file())
|
||||
self.assertEqual(v2.version_index, 2)
|
||||
|
||||
@mock.patch("documents.consumer.load_classifier")
|
||||
def testClassifyDocument(self, m) -> None:
|
||||
correspondent = Correspondent.objects.create(
|
||||
|
||||
@@ -77,6 +77,58 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
settings.ORIGINALS_DIR / "test" / "test.pdf",
|
||||
)
|
||||
|
||||
@override_settings(FILENAME_FORMAT=None)
|
||||
def test_root_storage_path_change_updates_version_files(self) -> None:
|
||||
old_storage_path = StoragePath.objects.create(
|
||||
name="old-path",
|
||||
path="old/{{title}}",
|
||||
)
|
||||
new_storage_path = StoragePath.objects.create(
|
||||
name="new-path",
|
||||
path="new/{{title}}",
|
||||
)
|
||||
|
||||
root_doc = Document.objects.create(
|
||||
title="rootdoc",
|
||||
mime_type="application/pdf",
|
||||
checksum="root-checksum",
|
||||
storage_path=old_storage_path,
|
||||
)
|
||||
version_doc = Document.objects.create(
|
||||
title="version-title",
|
||||
mime_type="application/pdf",
|
||||
checksum="version-checksum",
|
||||
root_document=root_doc,
|
||||
version_index=1,
|
||||
)
|
||||
|
||||
Document.objects.filter(pk=root_doc.pk).update(
|
||||
filename=generate_filename(root_doc),
|
||||
)
|
||||
Document.objects.filter(pk=version_doc.pk).update(
|
||||
filename=generate_filename(version_doc),
|
||||
)
|
||||
root_doc.refresh_from_db()
|
||||
version_doc.refresh_from_db()
|
||||
|
||||
create_source_path_directory(root_doc.source_path)
|
||||
Path(root_doc.source_path).touch()
|
||||
create_source_path_directory(version_doc.source_path)
|
||||
Path(version_doc.source_path).touch()
|
||||
|
||||
root_doc.storage_path = new_storage_path
|
||||
root_doc.save()
|
||||
|
||||
root_doc.refresh_from_db()
|
||||
version_doc.refresh_from_db()
|
||||
|
||||
self.assertEqual(root_doc.filename, "new/rootdoc.pdf")
|
||||
self.assertEqual(version_doc.filename, "new/rootdoc_v1.pdf")
|
||||
self.assertIsFile(root_doc.source_path)
|
||||
self.assertIsFile(version_doc.source_path)
|
||||
self.assertIsNotFile(settings.ORIGINALS_DIR / "old" / "rootdoc.pdf")
|
||||
self.assertIsNotFile(settings.ORIGINALS_DIR / "old" / "rootdoc_v1.pdf")
|
||||
|
||||
@override_settings(FILENAME_FORMAT="{correspondent}/{correspondent}")
|
||||
def test_file_renaming_missing_permissions(self) -> None:
|
||||
document = Document()
|
||||
@@ -336,7 +388,11 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
added=d1,
|
||||
)
|
||||
|
||||
self.assertEqual(generate_filename(doc1), Path("1232-01-09.pdf"))
|
||||
# Account for 3.14 padding changes
|
||||
expected_year: str = d1.strftime("%Y")
|
||||
expected_filename: Path = Path(f"{expected_year}-01-09.pdf")
|
||||
|
||||
self.assertEqual(generate_filename(doc1), expected_filename)
|
||||
|
||||
doc1.added = timezone.make_aware(datetime.datetime(2020, 11, 16, 1, 1, 1))
|
||||
|
||||
@@ -1222,6 +1278,94 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
|
||||
Path("logs.pdf"),
|
||||
)
|
||||
|
||||
@override_settings(FILENAME_FORMAT="{title}")
|
||||
def test_version_index_suffix_for_template_filename(self) -> None:
|
||||
root_doc = Document.objects.create(
|
||||
title="the_doc",
|
||||
mime_type="application/pdf",
|
||||
checksum="root-checksum",
|
||||
)
|
||||
version_doc = Document.objects.create(
|
||||
title="the_doc",
|
||||
mime_type="application/pdf",
|
||||
checksum="version-checksum",
|
||||
root_document=root_doc,
|
||||
version_index=1,
|
||||
)
|
||||
|
||||
self.assertEqual(generate_filename(version_doc), Path("the_doc_v1.pdf"))
|
||||
self.assertEqual(
|
||||
generate_filename(version_doc, counter=1),
|
||||
Path("the_doc_v1_01.pdf"),
|
||||
)
|
||||
|
||||
@override_settings(FILENAME_FORMAT=None)
|
||||
def test_version_index_suffix_for_default_filename(self) -> None:
|
||||
root_doc = Document.objects.create(
|
||||
title="root",
|
||||
mime_type="text/plain",
|
||||
checksum="root-checksum",
|
||||
)
|
||||
version_doc = Document.objects.create(
|
||||
title="root",
|
||||
mime_type="text/plain",
|
||||
checksum="version-checksum",
|
||||
root_document=root_doc,
|
||||
version_index=2,
|
||||
)
|
||||
|
||||
self.assertEqual(
|
||||
generate_filename(version_doc),
|
||||
Path(f"{root_doc.pk:07d}_v2.txt"),
|
||||
)
|
||||
self.assertEqual(
|
||||
generate_filename(version_doc, archive_filename=True),
|
||||
Path(f"{root_doc.pk:07d}_v2.pdf"),
|
||||
)
|
||||
|
||||
@override_settings(FILENAME_FORMAT="{original_name}")
|
||||
def test_version_index_suffix_with_original_name_placeholder(self) -> None:
|
||||
root_doc = Document.objects.create(
|
||||
title="root",
|
||||
mime_type="application/pdf",
|
||||
checksum="root-checksum",
|
||||
original_filename="root-upload.pdf",
|
||||
)
|
||||
version_doc = Document.objects.create(
|
||||
title="root",
|
||||
mime_type="application/pdf",
|
||||
checksum="version-checksum",
|
||||
root_document=root_doc,
|
||||
version_index=1,
|
||||
original_filename="version-upload.pdf",
|
||||
)
|
||||
|
||||
self.assertEqual(generate_filename(version_doc), Path("root-upload_v1.pdf"))
|
||||
|
||||
def test_version_index_suffix_with_storage_path(self) -> None:
|
||||
storage_path = StoragePath.objects.create(
|
||||
name="vtest",
|
||||
path="folder/{{title}}",
|
||||
)
|
||||
root_doc = Document.objects.create(
|
||||
title="storage_doc",
|
||||
mime_type="application/pdf",
|
||||
checksum="root-checksum",
|
||||
storage_path=storage_path,
|
||||
)
|
||||
version_doc = Document.objects.create(
|
||||
title="version_title_should_not_be_used",
|
||||
mime_type="application/pdf",
|
||||
checksum="version-checksum",
|
||||
root_document=root_doc,
|
||||
version_index=3,
|
||||
)
|
||||
|
||||
self.assertEqual(
|
||||
generate_filename(version_doc),
|
||||
Path("folder/storage_doc_v3.pdf"),
|
||||
)
|
||||
|
||||
@override_settings(
|
||||
FILENAME_FORMAT="XX{correspondent}/{title}",
|
||||
FILENAME_FORMAT_REMOVE_NONE=True,
|
||||
|
||||
@@ -21,7 +21,7 @@ class TestDateLocalization:
|
||||
14,
|
||||
30,
|
||||
5,
|
||||
tzinfo=datetime.timezone.utc,
|
||||
tzinfo=datetime.UTC,
|
||||
)
|
||||
|
||||
TEST_DATETIME_STRING: str = "2023-10-26T14:30:05+00:00"
|
||||
|
||||
@@ -4698,7 +4698,7 @@ class TestDateWorkflowLocalization(
|
||||
14,
|
||||
30,
|
||||
5,
|
||||
tzinfo=datetime.timezone.utc,
|
||||
tzinfo=datetime.UTC,
|
||||
)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
from enum import StrEnum
|
||||
from typing import TYPE_CHECKING
|
||||
from typing import Any
|
||||
|
||||
@@ -11,7 +11,7 @@ if TYPE_CHECKING:
|
||||
from django.http import HttpRequest
|
||||
|
||||
|
||||
class VersionResolutionError(str, Enum):
|
||||
class VersionResolutionError(StrEnum):
|
||||
INVALID = "invalid"
|
||||
NOT_FOUND = "not_found"
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user