Compare commits

...

6 Commits

Author SHA1 Message Date
stumpylog a1e7c0614e Updates the script in docker too 2026-06-04 12:02:45 -07:00
stumpylog dac05107a7 ruff: enable S324 (hashlib insecure hash functions)
Adds usedforsecurity=False to all hashlib.md5() calls, documenting
that these are used for file checksum comparison, not security.
The production call in _path_matches_checksum will be replaced with
compute_checksum() (SHA-256) in a separate branch.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-04 11:37:17 -07:00
stumpylog 89ce62d97d ruff: enable PERF (perflint)
Fixes 9 violations — loop-based append replaced with comprehensions
or extend throughout production and test code:
- PERF401: list comprehensions / extend for transformed lists
- PERF402: list() around a generator for copied lists

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-04 11:22:07 -07:00
stumpylog 50f5d5f2e9 ruff: enable DTZ (flake8-datetimez)
Fixes 44 violations — naive datetime usage replaced with tz-aware
equivalents throughout production and test code:
- datetime.now() → timezone.now() (Django) or datetime.now(tz=UTC)
- datetime.fromtimestamp() → datetime.fromtimestamp(ts, tz=UTC)
- datetime.date.today() → timezone.now().date()
- datetime.datetime(...) constructors → tzinfo=UTC in tests
- UP017 auto-converted datetime.timezone.utc → datetime.UTC (py3.11+)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-04 10:47:13 -07:00
stumpylog 92b59eebfc ruff: enable B (flake8-bugbear)
Fixes 71 violations across production and test code:
- B904 (~50): raise-from in except blocks; from None at API/view
  boundaries, from exc where the cause is the direct origin
- B017 (9): pytest.raises(Exception) → specific type or match= arg
- B007 (5): unused loop vars renamed to _
- B027 (1): missing @abstractmethod on DateParserPluginBase.__exit__
- B028 (3): warnings.warn without stacklevel=2 in test utils
- B011 (1): assert False → raise AssertionError()
- B905 (3): zip() without strict=False
- B009 (3): getattr with constant string (auto-fixed)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-04 10:26:08 -07:00
stumpylog 59fd2ff9e8 ruff: enable G (logging format), ignore G004 (f-strings)
Replaces the single G201 selector with the full G group.
Fixes 2x G003 (string concat in log calls) and 2x G202 (redundant
exc_info on logger.exception). G004 (f-strings in logging) is ignored
as f-string style is accepted throughout this codebase.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-04 09:32:52 -07:00
40 changed files with 250 additions and 190 deletions
+1 -1
View File
@@ -61,7 +61,7 @@ def replace_with_symlinks(
total_duplicates = 0
space_saved = 0
for file_hash, file_list in duplicate_groups.items():
for file_list in duplicate_groups.values():
# Keep the first file as the original, replace others with symlinks
original_file = file_list[0]
duplicates = file_list[1:]
+6 -1
View File
@@ -185,12 +185,16 @@ line-ending = "lf"
[tool.ruff.lint]
# https://docs.astral.sh/ruff/rules/
extend-select = [
"B", # https://docs.astral.sh/ruff/rules/#flake8-bugbear-b
"COM", # https://docs.astral.sh/ruff/rules/#flake8-commas-com
"DTZ", # https://docs.astral.sh/ruff/rules/#flake8-datetimez-dtz
"PERF", # https://docs.astral.sh/ruff/rules/#perflint-perf
"S324", # https://docs.astral.sh/ruff/rules/hashlib-insecure-hash-functions/
"DJ", # https://docs.astral.sh/ruff/rules/#flake8-django-dj
"EXE", # https://docs.astral.sh/ruff/rules/#flake8-executable-exe
"FBT", # https://docs.astral.sh/ruff/rules/#flake8-boolean-trap-fbt
"FLY", # https://docs.astral.sh/ruff/rules/#flynt-fly
"G201", # https://docs.astral.sh/ruff/rules/#flake8-logging-format-g
"G", # https://docs.astral.sh/ruff/rules/#flake8-logging-format-g
"I", # https://docs.astral.sh/ruff/rules/#isort-i
"ICN", # https://docs.astral.sh/ruff/rules/#flake8-import-conventions-icn
"INP", # https://docs.astral.sh/ruff/rules/#flake8-no-pep420-inp
@@ -211,6 +215,7 @@ extend-select = [
]
ignore = [
"DJ001",
"G004", # f-strings in logging: accepted style in this codebase
"PLC0415",
"RUF012",
"SIM105",
+3 -2
View File
@@ -834,8 +834,9 @@ class ConsumerPlugin(
self.log.debug(f"Creation date from parse_date: {create_date}")
else:
stats = Path(self.input_doc.original_file).stat()
create_date = timezone.make_aware(
datetime.datetime.fromtimestamp(stats.st_mtime),
create_date = datetime.datetime.fromtimestamp(
stats.st_mtime,
tz=datetime.UTC,
)
self.log.debug(f"Creation date from st_mtime: {create_date}")
+4 -4
View File
@@ -1,4 +1,3 @@
import datetime as dt
import logging
import os
import shutil
@@ -6,6 +5,7 @@ from pathlib import Path
from typing import Final
from django.conf import settings
from django.utils import timezone
from pikepdf import Pdf
from documents.consumer import ConsumerError
@@ -78,7 +78,7 @@ class CollatePlugin(NoCleanupPluginMixin, NoSetupPluginMixin, ConsumeTaskPlugin)
stats = staging.stat()
# if the file is older than the timeout, we don't consider
# it valid
if (dt.datetime.now().timestamp() - stats.st_mtime) > TIMEOUT_SECONDS:
if (timezone.now().timestamp() - stats.st_mtime) > TIMEOUT_SECONDS:
logger.warning("Outdated double sided staging file exists, deleting it")
staging.unlink()
else:
@@ -99,7 +99,7 @@ class CollatePlugin(NoCleanupPluginMixin, NoSetupPluginMixin, ConsumeTaskPlugin)
"two uploaded files don't belong to the same double-"
"sided scan. Please retry, starting with the odd "
"numbered pages again.",
)
) from None
# Merged file has the same path, but without the
# double-sided subdir. Therefore, it is also in the
# consumption dir and will be picked up for processing
@@ -134,7 +134,7 @@ class CollatePlugin(NoCleanupPluginMixin, NoSetupPluginMixin, ConsumeTaskPlugin)
shutil.move(pdf_file, staging)
# update access to modification time so we know if the file
# is outdated when another file gets uploaded
timestamp = dt.datetime.now().timestamp()
timestamp = timezone.now().timestamp()
os.utime(staging, (timestamp, timestamp))
logger.info(
"Got scan with odd numbered pages of double-sided scan, moved it to %s",
+4 -4
View File
@@ -350,7 +350,7 @@ def handle_validation_prefix(func: Callable):
try:
return func(*args, **kwargs)
except serializers.ValidationError as e:
raise serializers.ValidationError({validation_prefix: e.detail})
raise serializers.ValidationError({validation_prefix: e.detail}) from e
# Update the signature to include the validation_prefix argument
old_sig = inspect.signature(func)
@@ -461,7 +461,7 @@ class CustomFieldQueryParser:
except json.JSONDecodeError:
raise serializers.ValidationError(
{self._validation_prefix: [_("Value must be valid JSON.")]},
)
) from None
return (
self._parse_expr(expr, validation_prefix=self._validation_prefix),
self._annotations,
@@ -589,7 +589,7 @@ class CustomFieldQueryParser:
except CustomField.DoesNotExist:
raise serializers.ValidationError(
[_("{name!r} is not a valid custom field.").format(name=id_or_name)],
)
) from None
self._custom_fields[custom_field.id] = custom_field
self._custom_fields[custom_field.name] = custom_field
return custom_field
@@ -988,7 +988,7 @@ class DocumentsOrderingFilter(OrderingFilter):
except CustomField.DoesNotExist:
raise serializers.ValidationError(
{self.prefix + str(custom_field_id): [_("Custom field not found")]},
)
) from None
annotation = None
match field.data_type:
@@ -480,7 +480,7 @@ class Command(CryptMixin, PaperlessCommand):
}
# 3. Export files from each document
for index, document_dict in enumerate(
for _, document_dict in enumerate(
self.track(
document_manifest,
description="Exporting documents...",
@@ -133,11 +133,14 @@ def _build_suggestion_table(
else:
doc_cell = Text(f"{doc} [{doc.pk}]")
tag_parts: list[str] = []
for tag in sorted(suggestion.tags_to_add, key=lambda t: t.name):
tag_parts.append(f"[green]+{tag.name}[/green]")
for tag in sorted(suggestion.tags_to_remove, key=lambda t: t.name):
tag_parts.append(f"[red]-{tag.name}[/red]")
tag_parts: list[str] = [
f"[green]+{tag.name}[/green]"
for tag in sorted(suggestion.tags_to_add, key=lambda t: t.name)
]
tag_parts.extend(
f"[red]-{tag.name}[/red]"
for tag in sorted(suggestion.tags_to_remove, key=lambda t: t.name)
)
tag_cell = Text.from_markup(", ".join(tag_parts)) if tag_parts else Text("-")
table.add_row(
+3 -3
View File
@@ -369,7 +369,7 @@ class Document(SoftDeleteModel, ModelWithOwner): # type: ignore[django-manager-
If the queryset already annotated ``effective_content``, that value is used.
"""
if hasattr(self, "effective_content"):
return getattr(self, "effective_content")
return self.effective_content
if self.root_document_id is not None or self.pk is None:
return self.content
@@ -1204,8 +1204,8 @@ class CustomFieldInstance(SoftDeleteModel):
def get_value_field_name(cls, data_type: CustomField.FieldDataType):
try:
return cls.TYPE_TO_DATA_STORE_NAME_MAP[data_type]
except KeyError: # pragma: no cover
raise NotImplementedError(data_type)
except KeyError as exc: # pragma: no cover
raise NotImplementedError(data_type) from exc
@property
def value(self):
+1 -1
View File
@@ -110,7 +110,7 @@ def run_convert(
args += ["-define", "pdf:use-cropbox=true"] if use_cropbox else []
args += [str(input_file), str(output_file)]
logger.debug("Execute: " + " ".join(args), extra={"group": logging_group})
logger.debug("Execute: %s", " ".join(args), extra={"group": logging_group})
try:
run_subprocess(args, environment, logger)
+1 -2
View File
@@ -67,8 +67,7 @@ class DateParserPluginBase(ABC):
Subclasses can override this to release resources.
"""
# Default implementation does nothing.
# Returning None implies exceptions are propagated.
return None
def _parse_string(
self,
+7 -3
View File
@@ -195,12 +195,12 @@ class WriteBatch:
try:
self._lock.acquire(timeout=self._lock_timeout)
break
except filelock.Timeout:
except filelock.Timeout as exc:
if attempt == _LOCK_RETRY_ATTEMPTS - 1:
raise SearchIndexLockError(
f"Could not acquire index lock after {_LOCK_RETRY_ATTEMPTS} "
f"attempts (timeout={self._lock_timeout}s each)",
)
) from exc
sleep_s = random.uniform(
0,
min(_LOCK_BACKOFF_CAP, _LOCK_BACKOFF_BASE * (2**attempt)),
@@ -651,7 +651,11 @@ class TantivyBackend:
result_ids = cast("list[int]", searcher.fast_field_values("id", result_addrs))
addr_by_id: dict[int, tuple[float, tantivy.DocAddress]] = {
doc_id: (score, addr)
for (score, addr), doc_id in zip(batch_results.hits, result_ids)
for (score, addr), doc_id in zip(
batch_results.hits,
result_ids,
strict=False,
)
}
snippet_generator = None
+11 -7
View File
@@ -270,7 +270,7 @@ def _rewrite_compact_date(query: str) -> str:
except TimeoutError: # pragma: no cover
raise ValueError(
"Query too complex to process (compact date rewrite timed out)",
)
) from None
def _rewrite_relative_range(query: str) -> str:
@@ -303,7 +303,7 @@ def _rewrite_relative_range(query: str) -> str:
except TimeoutError: # pragma: no cover
raise ValueError(
"Query too complex to process (relative range rewrite timed out)",
)
) from None
def _rewrite_whoosh_relative_range(query: str) -> str:
@@ -334,7 +334,7 @@ def _rewrite_whoosh_relative_range(query: str) -> str:
except TimeoutError: # pragma: no cover
raise ValueError(
"Query too complex to process (Whoosh relative range rewrite timed out)",
)
) from None
def _rewrite_8digit_date(query: str, tz: tzinfo) -> str:
@@ -376,7 +376,7 @@ def _rewrite_8digit_date(query: str, tz: tzinfo) -> str:
except TimeoutError: # pragma: no cover
raise ValueError(
"Query too complex to process (8-digit date rewrite timed out)",
)
) from None
def _rewrite_year_range(query: str) -> str:
@@ -401,7 +401,9 @@ def _rewrite_year_range(query: str) -> str:
try:
return _YEAR_RANGE_RE.sub(_sub, query, timeout=_REGEX_TIMEOUT)
except TimeoutError: # pragma: no cover
raise ValueError("Query too complex to process (year range rewrite timed out)")
raise ValueError(
"Query too complex to process (year range rewrite timed out)",
) from None
def rewrite_natural_date_keywords(query: str, tz: tzinfo) -> str:
@@ -443,7 +445,7 @@ def rewrite_natural_date_keywords(query: str, tz: tzinfo) -> str:
except TimeoutError: # pragma: no cover
raise ValueError(
"Query too complex to process (date keyword rewrite timed out)",
)
) from None
def normalize_query(query: str) -> str:
@@ -483,7 +485,9 @@ def normalize_query(query: str) -> str:
query = _SPACED_OPERATOR_RE.sub(" ", query, timeout=_REGEX_TIMEOUT).strip()
return query
except TimeoutError: # pragma: no cover
raise ValueError("Query too complex to process (normalization timed out)")
raise ValueError(
"Query too complex to process (normalization timed out)",
) from None
def build_permission_filter(
+26 -18
View File
@@ -163,7 +163,7 @@ class MatchingModelSerializer(serializers.ModelSerializer[Any]):
logger.debug(f"Invalid regular expression: {e!s}")
raise serializers.ValidationError(
"Invalid regular expression, see log for details.",
)
) from None
return match
@@ -867,7 +867,9 @@ class CustomFieldInstanceSerializer(serializers.ModelSerializer[CustomFieldInsta
try:
value_int = int(data["value"])
except (TypeError, ValueError):
raise serializers.ValidationError("Enter a valid integer.")
raise serializers.ValidationError(
"Enter a valid integer.",
) from None
# Keep values within the PostgreSQL integer range
MinValueValidator(-2147483648)(value_int)
MaxValueValidator(2147483647)(value_int)
@@ -899,7 +901,7 @@ class CustomFieldInstanceSerializer(serializers.ModelSerializer[CustomFieldInsta
except Exception:
raise serializers.ValidationError(
f"Value must be an id of an element in {select_options}",
)
) from None
elif field.data_type == CustomField.FieldDataType.DOCUMENTLINK:
if not (isinstance(data["value"], list) or data["value"] is None):
raise serializers.ValidationError(
@@ -1090,7 +1092,7 @@ class DocumentSerializer(
def to_representation(self, instance):
doc = super().to_representation(instance)
if "content" in self.fields and hasattr(instance, "effective_content"):
doc["content"] = getattr(instance, "effective_content") or ""
doc["content"] = instance.effective_content or ""
if self.truncate_content and "content" in self.fields:
doc["content"] = doc.get("content")[0:550]
return doc
@@ -1452,7 +1454,7 @@ class SavedViewSerializer(OwnedObjectSerializer):
)
)
except serializers.ValidationError as exc:
raise serializers.ValidationError({field_name: exc.detail})
raise serializers.ValidationError({field_name: exc.detail}) from exc
del normalized_data[field_name]
ret = super().to_internal_value(normalized_data)
@@ -1756,7 +1758,7 @@ class BulkEditSerializer(
logger.exception(f"Error validating custom fields: {e}")
raise serializers.ValidationError(
f"{name} must be a list of integers or a dict of id:value pairs, see the log for details",
)
) from None
elif not isinstance(custom_fields, list) or not all(
isinstance(i, int) for i in ids
):
@@ -1824,7 +1826,7 @@ class BulkEditSerializer(
try:
Tag.objects.get(id=tag_id)
except Tag.DoesNotExist:
raise serializers.ValidationError("Tag does not exist")
raise serializers.ValidationError("Tag does not exist") from None
else:
raise serializers.ValidationError("tag not specified")
@@ -1837,7 +1839,9 @@ class BulkEditSerializer(
try:
DocumentType.objects.get(id=document_type_id)
except DocumentType.DoesNotExist:
raise serializers.ValidationError("Document type does not exist")
raise serializers.ValidationError(
"Document type does not exist",
) from None
else:
raise serializers.ValidationError("document_type not specified")
@@ -1849,7 +1853,9 @@ class BulkEditSerializer(
try:
Correspondent.objects.get(id=correspondent_id)
except Correspondent.DoesNotExist:
raise serializers.ValidationError("Correspondent does not exist")
raise serializers.ValidationError(
"Correspondent does not exist",
) from None
else:
raise serializers.ValidationError("correspondent not specified")
@@ -1863,7 +1869,7 @@ class BulkEditSerializer(
except StoragePath.DoesNotExist:
raise serializers.ValidationError(
"Storage path does not exist",
)
) from None
else:
raise serializers.ValidationError("storage path not specified")
@@ -1918,7 +1924,7 @@ class BulkEditSerializer(
):
raise serializers.ValidationError("invalid rotation degrees")
except ValueError:
raise serializers.ValidationError("invalid rotation degrees")
raise serializers.ValidationError("invalid rotation degrees") from None
def _validate_source_mode(self, parameters) -> None:
source_mode = parameters.get(
@@ -1948,7 +1954,7 @@ class BulkEditSerializer(
pages.append([int(doc)])
parameters["pages"] = pages
except ValueError:
raise serializers.ValidationError("invalid pages specified")
raise serializers.ValidationError("invalid pages specified") from None
if "delete_originals" in parameters:
if not isinstance(parameters["delete_originals"], bool):
@@ -2218,14 +2224,14 @@ class PostDocumentSerializer(serializers.Serializer[dict[str, Any]]):
raise serializers.ValidationError(
_("Custom field id must be an integer: %(id)s")
% {"id": field_id},
)
) from None
try:
field = CustomField.objects.get(id=field_id_int)
except CustomField.DoesNotExist:
raise serializers.ValidationError(
_("Custom field with id %(id)s does not exist")
% {"id": field_id_int},
)
) from None
custom_field_serializer.validate(
{
"field": field,
@@ -2242,7 +2248,7 @@ class PostDocumentSerializer(serializers.Serializer[dict[str, Any]]):
_(
"Custom fields must be a list of integers or an object mapping ids to values.",
),
)
) from None
if CustomField.objects.filter(id__in=ids).count() != len(set(ids)):
raise serializers.ValidationError(
_("Some custom fields don't exist or were specified twice."),
@@ -2353,7 +2359,9 @@ class EmailSerializer(DocumentListSerializer):
for address in address_list:
email_validator(address)
except ValidationError:
raise serializers.ValidationError(f"Invalid email address: {address}")
raise serializers.ValidationError(
f"Invalid email address: {address}",
) from None
return ",".join(address_list)
@@ -2777,7 +2785,7 @@ class ShareLinkBundleSerializer(OwnedObjectSerializer):
return share_link_bundle
def get_document_count(self, obj: ShareLinkBundle) -> int:
return getattr(obj, "document_total") or obj.documents.count()
return obj.document_total or obj.documents.count()
class BulkEditObjectsSerializer(SerializerWithPerms, SetPermissionsMixin):
@@ -3125,7 +3133,7 @@ class WorkflowActionSerializer(serializers.ModelSerializer[WorkflowAction]):
except (ValueError, KeyError) as e:
raise serializers.ValidationError(
{"assign_title": f'Invalid f-string detected: "{e.args[0]}"'},
)
) from None
if (
"type" in attrs
+1 -1
View File
@@ -411,7 +411,7 @@ def _path_matches_checksum(path: Path, checksum: str | None) -> bool:
return False
with path.open("rb") as f:
return hashlib.md5(f.read()).hexdigest() == checksum
return hashlib.md5(f.read(), usedforsecurity=False).hexdigest() == checksum
def _filename_template_uses_custom_fields(doc: Document) -> bool:
@@ -29,9 +29,7 @@ class SimpleCommand(PaperlessCommand):
def handle(self, *args, **options):
items = list(range(5))
results = []
for item in self.track(items, description="Processing..."):
results.append(item * 2)
results = [item * 2 for item in self.track(items, description="Processing...")]
self.stdout.write(f"Results: {results}")
@@ -57,13 +55,13 @@ class MultiprocessCommand(PaperlessCommand):
def handle(self, *args, **options):
items = list(range(5))
results = []
for result in self.process_parallel(
_double_value,
items,
description="Processing...",
):
results.append(result)
results = list(
self.process_parallel(
_double_value,
items,
description="Processing...",
),
)
successes = sum(1 for r in results if r.success)
self.stdout.write(f"Successes: {successes}")
@@ -6,7 +6,6 @@ import zipfile
from django.contrib.auth.models import User
from django.test import override_settings
from django.utils import timezone
from rest_framework import status
from rest_framework.test import APITestCase
@@ -33,21 +32,21 @@ class TestBulkDownload(DirectoriesMixin, SampleDirMixin, APITestCase):
filename="docA.pdf",
mime_type="application/pdf",
checksum="B",
created=timezone.make_aware(datetime.datetime(2021, 1, 1)),
created=datetime.datetime(2021, 1, 1, tzinfo=datetime.UTC),
)
self.doc2b = Document.objects.create(
title="document A",
filename="docA2.pdf",
mime_type="application/pdf",
checksum="D",
created=timezone.make_aware(datetime.datetime(2021, 1, 1)),
created=datetime.datetime(2021, 1, 1, tzinfo=datetime.UTC),
)
self.doc3 = Document.objects.create(
title="document B",
filename="docB.jpg",
mime_type="image/jpeg",
checksum="C",
created=timezone.make_aware(datetime.datetime(2020, 3, 21)),
created=datetime.datetime(2020, 3, 21, tzinfo=datetime.UTC),
archive_filename="docB.pdf",
archive_checksum="D",
)
@@ -1,5 +1,5 @@
import datetime
import json
from datetime import date
from unittest import mock
from unittest.mock import ANY
@@ -456,7 +456,7 @@ class TestCustomFieldsAPI(DirectoriesMixin, APITestCase):
},
)
date_value = date.today()
date_value = datetime.datetime.now(tz=datetime.UTC).date()
resp = self.client.patch(
f"/api/documents/{doc.id}/",
@@ -618,7 +618,7 @@ class TestCustomFieldsAPI(DirectoriesMixin, APITestCase):
data_type=CustomField.FieldDataType.DATE,
)
date_value = date.today()
date_value = datetime.datetime.now(tz=datetime.UTC).date()
resp = self.client.patch(
f"/api/documents/{doc.id}/",
+1 -1
View File
@@ -265,7 +265,7 @@ class TestDocumentApi(DirectoriesMixin, ConsumeTaskMixin, APITestCase):
created=date(2023, 1, 1),
)
created_datetime = datetime.datetime(2023, 2, 1, 12, 0, 0)
created_datetime = datetime.datetime(2023, 2, 1, 12, 0, 0, tzinfo=datetime.UTC)
response = self.client.patch(
f"/api/documents/{doc.pk}/",
{"created": created_datetime},
+30 -14
View File
@@ -700,7 +700,7 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
pk=3,
checksum="C",
# specific time zone aware date
added=timezone.make_aware(datetime.datetime(2023, 12, 1)),
added=datetime.datetime(2023, 12, 1, tzinfo=datetime.UTC),
)
# refresh doc instance to ensure we operate on date objects that Django uses
# Django converts dates to UTC
@@ -994,25 +994,25 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
title="invoice",
content="the thing i bought at a shop and paid with bank account",
created=datetime.date(2018, 1, 1),
added=timezone.make_aware(datetime.datetime(2018, 1, 1)),
added=datetime.datetime(2018, 1, 1, tzinfo=datetime.UTC),
)
d2 = DocumentFactory(
title="bank statement 1",
content="things i paid for in august",
created=datetime.date(2019, 3, 4),
added=timezone.make_aware(datetime.datetime(2019, 3, 4)),
added=datetime.datetime(2019, 3, 4, tzinfo=datetime.UTC),
)
d3 = DocumentFactory(
title="bank statement 3",
content="things i paid for in september",
created=datetime.date(2020, 7, 9),
added=timezone.make_aware(datetime.datetime(2020, 7, 9)),
added=datetime.datetime(2020, 7, 9, tzinfo=datetime.UTC),
)
d4 = DocumentFactory(
title="Quarterly Report",
content="quarterly revenue profit margin earnings growth",
created=datetime.date(2021, 11, 30),
added=timezone.make_aware(datetime.datetime(2021, 11, 30)),
added=datetime.datetime(2021, 11, 30, tzinfo=datetime.UTC),
)
backend = get_backend()
backend.add_or_update(d1)
@@ -1131,7 +1131,7 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
d4.tags.add(t2)
d5 = Document.objects.create(
checksum="5",
added=timezone.make_aware(datetime.datetime(2020, 7, 13)),
added=datetime.datetime(2020, 7, 13, tzinfo=datetime.UTC),
content="test",
original_filename="doc5.pdf",
)
@@ -1241,14 +1241,18 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
d4.id,
search_query(
"&created__date__lt="
+ datetime.datetime(2020, 9, 2).strftime("%Y-%m-%d"),
+ datetime.datetime(2020, 9, 2, tzinfo=datetime.UTC).strftime(
"%Y-%m-%d",
),
),
)
self.assertNotIn(
d4.id,
search_query(
"&created__date__gt="
+ datetime.datetime(2020, 9, 2).strftime("%Y-%m-%d"),
+ datetime.datetime(2020, 9, 2, tzinfo=datetime.UTC).strftime(
"%Y-%m-%d",
),
),
)
@@ -1256,14 +1260,18 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
d4.id,
search_query(
"&created__date__lt="
+ datetime.datetime(2020, 1, 2).strftime("%Y-%m-%d"),
+ datetime.datetime(2020, 1, 2, tzinfo=datetime.UTC).strftime(
"%Y-%m-%d",
),
),
)
self.assertIn(
d4.id,
search_query(
"&created__date__gt="
+ datetime.datetime(2020, 1, 2).strftime("%Y-%m-%d"),
+ datetime.datetime(2020, 1, 2, tzinfo=datetime.UTC).strftime(
"%Y-%m-%d",
),
),
)
@@ -1271,14 +1279,18 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
d5.id,
search_query(
"&added__date__lt="
+ datetime.datetime(2020, 9, 2).strftime("%Y-%m-%d"),
+ datetime.datetime(2020, 9, 2, tzinfo=datetime.UTC).strftime(
"%Y-%m-%d",
),
),
)
self.assertNotIn(
d5.id,
search_query(
"&added__date__gt="
+ datetime.datetime(2020, 9, 2).strftime("%Y-%m-%d"),
+ datetime.datetime(2020, 9, 2, tzinfo=datetime.UTC).strftime(
"%Y-%m-%d",
),
),
)
@@ -1286,7 +1298,9 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
d5.id,
search_query(
"&added__date__lt="
+ datetime.datetime(2020, 1, 2).strftime("%Y-%m-%d"),
+ datetime.datetime(2020, 1, 2, tzinfo=datetime.UTC).strftime(
"%Y-%m-%d",
),
),
)
@@ -1294,7 +1308,9 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
d5.id,
search_query(
"&added__date__gt="
+ datetime.datetime(2020, 1, 2).strftime("%Y-%m-%d"),
+ datetime.datetime(2020, 1, 2, tzinfo=datetime.UTC).strftime(
"%Y-%m-%d",
),
),
)
+4 -3
View File
@@ -764,7 +764,7 @@ class TestPDFActions(DirectoriesMixin, TestCase):
sig.set.return_value.apply_async.side_effect = Exception("boom")
mock_consume_file.return_value = sig
with self.assertRaises(Exception):
with self.assertRaisesRegex(Exception, "boom"):
bulk_edit.merge(doc_ids, delete_originals=True)
self.doc1.refresh_from_db()
@@ -1047,6 +1047,7 @@ class TestPDFActions(DirectoriesMixin, TestCase):
for call, expected_id in zip(
mock_consume_delay.call_args_list,
doc_ids,
strict=False,
):
task_kwargs = call.kwargs["kwargs"]
self.assertEqual(task_kwargs["input_doc"].root_document_id, expected_id)
@@ -1305,7 +1306,7 @@ class TestPDFActions(DirectoriesMixin, TestCase):
sig.apply_async.side_effect = Exception("boom")
mock_chord.return_value = sig
with self.assertRaises(Exception):
with self.assertRaisesRegex(Exception, "boom"):
bulk_edit.edit_pdf(doc_ids, operations, delete_original=True)
self.doc2.refresh_from_db()
@@ -1417,7 +1418,7 @@ class TestPDFActions(DirectoriesMixin, TestCase):
{"page": 9999}, # invalid page, forces error during PDF load
]
with self.assertLogs("paperless.bulk_edit", level="ERROR"):
with self.assertRaises(Exception):
with self.assertRaises(ValueError):
bulk_edit.edit_pdf(doc_ids, operations)
mock_group.assert_not_called()
mock_consume_file.assert_not_called()
+2 -2
View File
@@ -782,8 +782,8 @@ class TestClassifier(DirectoriesMixin, TestCase):
load_classifier(raise_exception=True)
Path(settings.MODEL_FILE).touch()
mock_load.side_effect = Exception()
with self.assertRaises(Exception):
mock_load.side_effect = RuntimeError()
with self.assertRaises(RuntimeError):
load_classifier(raise_exception=True)
+4 -4
View File
@@ -59,7 +59,7 @@ class TestDoubleSided(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
def create_staging_file(self, src="double-sided-odd.pdf", datetime=None) -> None:
shutil.copy(self.SAMPLE_DIR / src, self.staging_file)
if datetime is None:
datetime = dt.datetime.now()
datetime = dt.datetime.now(tz=dt.UTC)
os.utime(str(self.staging_file), (datetime.timestamp(),) * 2)
def test_odd_numbered_moved_to_staging(self) -> None:
@@ -79,8 +79,8 @@ class TestDoubleSided(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
self.assertIsFile(self.staging_file)
self.assertAlmostEqual(
dt.datetime.fromtimestamp(self.staging_file.stat().st_mtime),
dt.datetime.now(),
dt.datetime.fromtimestamp(self.staging_file.stat().st_mtime, tz=dt.UTC),
dt.datetime.now(tz=dt.UTC),
delta=dt.timedelta(seconds=5),
)
self.assertIn("Received odd numbered pages", msg["reason"])
@@ -124,7 +124,7 @@ class TestDoubleSided(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
"""
self.create_staging_file(
datetime=dt.datetime.now()
datetime=dt.datetime.now(tz=dt.UTC)
- dt.timedelta(minutes=TIMEOUT_MINUTES, seconds=1),
)
msg = self.consume_file("double-sided-odd.pdf")
+25 -16
View File
@@ -12,7 +12,6 @@ from django.contrib.auth.models import User
from django.db import DatabaseError
from django.test import TestCase
from django.test import override_settings
from django.utils import timezone
from documents.file_handling import create_source_path_directory
from documents.file_handling import delete_empty_directories
@@ -221,8 +220,11 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
doc = Document.objects.create(
title="document",
mime_type="application/pdf",
checksum=hashlib.md5(original_bytes).hexdigest(),
archive_checksum=hashlib.md5(archive_bytes).hexdigest(),
checksum=hashlib.md5(original_bytes, usedforsecurity=False).hexdigest(),
archive_checksum=hashlib.md5(
archive_bytes,
usedforsecurity=False,
).hexdigest(),
filename="old/document.pdf",
archive_filename="old/document.pdf",
storage_path=old_storage_path,
@@ -411,7 +413,7 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
FILENAME_FORMAT="{created_year}-{created_month}-{created_day}",
)
def test_created_year_month_day(self) -> None:
d1 = timezone.make_aware(datetime.datetime(2020, 3, 6, 1, 1, 1))
d1 = datetime.datetime(2020, 3, 6, 1, 1, 1, tzinfo=datetime.UTC)
doc1 = Document.objects.create(
title="doc1",
mime_type="application/pdf",
@@ -428,7 +430,7 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
FILENAME_FORMAT="{added_year}-{added_month}-{added_day}",
)
def test_added_year_month_day(self) -> None:
d1 = timezone.make_aware(datetime.datetime(1232, 1, 9, 1, 1, 1))
d1 = datetime.datetime(1232, 1, 9, 1, 1, 1, tzinfo=datetime.UTC)
doc1 = Document.objects.create(
title="doc1",
mime_type="application/pdf",
@@ -441,7 +443,7 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
self.assertEqual(generate_filename(doc1), expected_filename)
doc1.added = timezone.make_aware(datetime.datetime(2020, 11, 16, 1, 1, 1))
doc1.added = datetime.datetime(2020, 11, 16, 1, 1, 1, tzinfo=datetime.UTC)
self.assertEqual(generate_filename(doc1), Path("2020-11-16.pdf"))
@@ -1225,7 +1227,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
def test_short_names_added(self) -> None:
doc = Document.objects.create(
title="The Title",
added=timezone.make_aware(datetime.datetime(1984, 8, 21, 7, 36, 51, 153)),
added=datetime.datetime(1984, 8, 21, 7, 36, 51, 153, tzinfo=datetime.UTC),
mime_type="application/pdf",
pk=2,
checksum="2",
@@ -1464,7 +1466,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
doc_a = Document.objects.create(
title="Does Matter",
created=datetime.date(2020, 6, 25),
added=timezone.make_aware(datetime.datetime(2024, 10, 1, 7, 36, 51, 153)),
added=datetime.datetime(2024, 10, 1, 7, 36, 51, 153, tzinfo=datetime.UTC),
mime_type="application/pdf",
pk=2,
checksum="2",
@@ -1536,7 +1538,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
doc = Document.objects.create(
title="scan_017562",
created=datetime.date(2025, 7, 2),
added=timezone.make_aware(datetime.datetime(2026, 3, 3, 11, 53, 16)),
added=datetime.datetime(2026, 3, 3, 11, 53, 16, tzinfo=datetime.UTC),
mime_type="application/pdf",
checksum="test-checksum",
storage_path=sp,
@@ -1565,7 +1567,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
doc_a = Document.objects.create(
title="Does Matter",
created=datetime.date(2020, 6, 25),
added=timezone.make_aware(datetime.datetime(2024, 10, 1, 7, 36, 51, 153)),
added=datetime.datetime(2024, 10, 1, 7, 36, 51, 153, tzinfo=datetime.UTC),
mime_type="application/pdf",
pk=2,
checksum="2",
@@ -1600,7 +1602,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
doc_a = Document.objects.create(
title="Does Matter",
created=datetime.date(2020, 6, 25),
added=timezone.make_aware(datetime.datetime(2024, 10, 1, 7, 36, 51, 153)),
added=datetime.datetime(2024, 10, 1, 7, 36, 51, 153, tzinfo=datetime.UTC),
mime_type="application/pdf",
pk=2,
checksum="2",
@@ -1632,7 +1634,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
doc_a = Document.objects.create(
title="Some Title",
created=datetime.date(2020, 6, 25),
added=timezone.make_aware(datetime.datetime(2024, 10, 1, 7, 36, 51, 153)),
added=datetime.datetime(2024, 10, 1, 7, 36, 51, 153, tzinfo=datetime.UTC),
mime_type="application/pdf",
pk=2,
checksum="2",
@@ -1737,7 +1739,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
doc_a = Document.objects.create(
title="Some Title",
created=datetime.date(2020, 6, 25),
added=timezone.make_aware(datetime.datetime(2024, 10, 1, 7, 36, 51, 153)),
added=datetime.datetime(2024, 10, 1, 7, 36, 51, 153, tzinfo=datetime.UTC),
mime_type="application/pdf",
pk=2,
checksum="2",
@@ -1751,8 +1753,15 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
CustomFieldInstance.objects.create(
document=doc_a,
field=CustomField.objects.get(name="Invoice Date"),
value_date=timezone.make_aware(
datetime.datetime(2024, 10, 1, 7, 36, 51, 153),
value_date=datetime.datetime(
2024,
10,
1,
7,
36,
51,
153,
tzinfo=datetime.UTC,
),
)
@@ -1792,7 +1801,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
doc = Document.objects.create(
title="Some Title! With @ Special # Characters",
created=datetime.date(2020, 6, 25),
added=timezone.make_aware(datetime.datetime(2024, 10, 1, 7, 36, 51, 153)),
added=datetime.datetime(2024, 10, 1, 7, 36, 51, 153, tzinfo=datetime.UTC),
mime_type="application/pdf",
pk=2,
checksum="2",
+1 -1
View File
@@ -243,7 +243,7 @@ class TestViews(DirectoriesMixin, TestCase):
"change": {"users": [], "groups": []},
}
else:
assert False, f"Unexpected tag found: {tag['name']}"
raise AssertionError(f"Unexpected tag found: {tag['name']}")
def test_list_no_n_plus_1_queries(self) -> None:
"""
+8 -1
View File
@@ -2760,7 +2760,14 @@ class TestWorkflows(
doc = Document.objects.create(
title="test",
)
self.assertRaises(Exception, document_matches_workflow, doc, w, 99)
self.assertRaisesRegex(
Exception,
"not yet supported",
document_matches_workflow,
doc,
w,
99,
)
def test_removal_action_document_updated_workflow(self) -> None:
"""
+3 -2
View File
@@ -129,11 +129,12 @@ def util_call_with_backoff(
status_codes.append(cause_exec.response.status_code)
warnings.warn(
f"HTTP Exception for {cause_exec.request.url} - {cause_exec}",
stacklevel=2,
)
else:
warnings.warn(f"Unexpected error: {e}")
warnings.warn(f"Unexpected error: {e}", stacklevel=2)
except Exception as e: # pragma: no cover
warnings.warn(f"Unexpected error: {e}")
warnings.warn(f"Unexpected error: {e}", stacklevel=2)
retry_count = retry_count + 1
+38 -40
View File
@@ -7,11 +7,11 @@ import tempfile
import zipfile
from collections import defaultdict
from collections import deque
from datetime import UTC
from datetime import datetime
from datetime import timedelta
from http import HTTPStatus
from pathlib import Path
from time import mktime
from typing import TYPE_CHECKING
from typing import Any
from typing import Literal
@@ -60,7 +60,6 @@ from django.http import StreamingHttpResponse
from django.shortcuts import get_object_or_404
from django.utils import timezone
from django.utils.decorators import method_decorator
from django.utils.timezone import make_aware
from django.utils.translation import get_language
from django.utils.translation import gettext_lazy as _
from django.views import View
@@ -285,7 +284,7 @@ def _get_more_like_id(query_params: dict[str, Any], user: User | None) -> int:
pk=more_like_doc_id,
)
except (TypeError, ValueError, Document.DoesNotExist):
raise PermissionDenied(_("Invalid more_like_id"))
raise PermissionDenied(_("Invalid more_like_id")) from None
if user and not has_perms_owner_aware(
user,
@@ -1101,7 +1100,7 @@ class DocumentViewSet(
"root_document",
).get(pk=pk)
except Document.DoesNotExist:
raise Http404
raise Http404 from None
root_doc = get_root_document(doc)
if request.user is not None and not has_perms_owner_aware(
@@ -1264,7 +1263,7 @@ class DocumentViewSet(
"root_document",
).get(id=pk)
except Document.DoesNotExist:
raise Http404
raise Http404 from None
root_doc = get_root_document(
request_doc,
@@ -1506,7 +1505,6 @@ class DocumentViewSet(
"document %s: %s",
doc.pk,
exc,
exc_info=True,
)
raise ValidationError({"ai": [_("Invalid AI configuration.")]}) from exc
@@ -1580,7 +1578,7 @@ class DocumentViewSet(
disposition="inline",
)
except FileNotFoundError:
raise Http404
raise Http404 from None
@action(methods=["get"], detail=True, filter_backends=[])
@method_decorator(cache_control(no_cache=True))
@@ -1605,14 +1603,14 @@ class DocumentViewSet(
return FileResponse(handle, content_type="image/webp")
except FileNotFoundError:
raise Http404
raise Http404 from None
@action(methods=["get"], detail=True)
def download(self, request, pk=None):
try:
return self.file_response(pk, request, "attachment")
except (FileNotFoundError, Document.DoesNotExist):
raise Http404
raise Http404 from None
@action(
methods=["get", "post", "delete"],
@@ -1637,7 +1635,7 @@ class DocumentViewSet(
):
return HttpResponseForbidden("Insufficient permissions to view notes")
except Document.DoesNotExist:
raise Http404
raise Http404 from None
serializer = self.get_serializer(doc)
@@ -1708,7 +1706,7 @@ class DocumentViewSet(
try:
note_id_int = int(note_id)
except ValueError:
raise ValidationError({"id": "A valid integer is required."})
raise ValidationError({"id": "A valid integer is required."}) from None
note = get_object_or_404(Note, id=note_id_int, document=doc)
if settings.AUDIT_LOG_ENABLED:
LogEntry.objects.log_create(
@@ -1752,7 +1750,7 @@ class DocumentViewSet(
"Insufficient permissions to add share link",
)
except Document.DoesNotExist:
raise Http404
raise Http404 from None
if request.method == "GET":
now = timezone.now()
@@ -1780,7 +1778,7 @@ class DocumentViewSet(
"Insufficient permissions",
)
except Document.DoesNotExist: # pragma: no cover
raise Http404
raise Http404 from None
# documents
entries = [
@@ -1801,28 +1799,28 @@ class DocumentViewSet(
]
# custom fields
for entry in LogEntry.objects.get_for_objects(
doc.custom_fields.all(),
).select_related("actor"):
entries.append(
{
"id": entry.id,
"timestamp": entry.timestamp,
"action": entry.get_action_display(),
"changes": {
"custom_fields": {
"type": "custom_field",
"field": str(entry.object_repr).split(":")[0].strip(),
"value": str(entry.object_repr).split(":")[1].strip(),
},
entries.extend(
{
"id": entry.id,
"timestamp": entry.timestamp,
"action": entry.get_action_display(),
"changes": {
"custom_fields": {
"type": "custom_field",
"field": str(entry.object_repr).split(":")[0].strip(),
"value": str(entry.object_repr).split(":")[1].strip(),
},
"actor": (
{"id": entry.actor.id, "username": entry.actor.username}
if entry.actor
else None
),
},
)
"actor": (
{"id": entry.actor.id, "username": entry.actor.username}
if entry.actor
else None
),
}
for entry in LogEntry.objects.get_for_objects(
doc.custom_fields.all(),
).select_related("actor")
)
return Response(sorted(entries, key=lambda x: x["timestamp"], reverse=True))
@@ -1930,13 +1928,13 @@ class DocumentViewSet(
):
return HttpResponseForbidden("Insufficient permissions")
except Document.DoesNotExist:
raise Http404
raise Http404 from None
try:
doc_name, doc_data = serializer.validated_data.get("document")
version_label = serializer.validated_data.get("version_label")
t = int(mktime(datetime.now().timetuple()))
t = int(timezone.now().timestamp())
settings.SCRATCH_DIR.mkdir(parents=True, exist_ok=True)
@@ -1981,7 +1979,7 @@ class DocumentViewSet(
"root_document",
).get(pk=pk)
except Document.DoesNotExist:
raise Http404
raise Http404 from None
return get_root_document(root_doc)
def _get_version_doc_for_root(self, root_doc: Document, version_id) -> Document:
@@ -1990,7 +1988,7 @@ class DocumentViewSet(
pk=version_id,
)
except Document.DoesNotExist:
raise Http404
raise Http404 from None
if (
version_doc.id != root_doc.id
@@ -2545,7 +2543,7 @@ class LogViewSet(ViewSet):
try:
limit = int(limit_param)
except (TypeError, ValueError):
raise ValidationError({"limit": "Must be a positive integer"})
raise ValidationError({"limit": "Must be a positive integer"}) from None
if limit < 1:
raise ValidationError({"limit": "Must be a positive integer"})
else:
@@ -3136,7 +3134,7 @@ class PostDocumentView(GenericAPIView[Any]):
cf = serializer.validated_data.get("custom_fields")
from_webui = serializer.validated_data.get("from_webui")
t = int(mktime(datetime.now().timetuple()))
t = int(timezone.now().timestamp())
settings.SCRATCH_DIR.mkdir(parents=True, exist_ok=True)
@@ -4948,7 +4946,7 @@ class SystemStatusView(PassUserMixin):
index_dir = settings.INDEX_DIR
mtimes = [p.stat().st_mtime for p in index_dir.iterdir() if p.is_file()]
index_last_modified = (
make_aware(datetime.fromtimestamp(max(mtimes))) if mtimes else None
datetime.fromtimestamp(max(mtimes), tz=UTC) if mtimes else None
)
except Exception as e:
index_status = "ERROR"
+14 -13
View File
@@ -84,10 +84,11 @@ def binaries_check(app_configs: Any, **kwargs: Any) -> list[Error]:
binaries = (settings.CONVERT_BINARY, "tesseract", "gs")
check_messages = []
for binary in binaries:
if shutil.which(binary) is None:
check_messages.append(Warning(error.format(binary), hint))
check_messages = [
Warning(error.format(binary), hint)
for binary in binaries
if shutil.which(binary) is None
]
return check_messages
@@ -383,14 +384,14 @@ def check_default_language_available(app_configs: Any, **kwargs: Any) -> list[Er
specified_langs = [x.strip() for x in settings.OCR_LANGUAGE.split("+")]
for lang in specified_langs:
if lang not in installed_langs:
errs.append(
Error(
f"The selected ocr language {lang} is "
f"not installed. Paperless cannot OCR your documents "
f"without it. Please fix PAPERLESS_OCR_LANGUAGE.",
),
)
errs.extend(
Error(
f"The selected ocr language {lang} is "
f"not installed. Paperless cannot OCR your documents "
f"without it. Please fix PAPERLESS_OCR_LANGUAGE.",
)
for lang in specified_langs
if lang not in installed_langs
)
return errs
+4 -5
View File
@@ -649,11 +649,10 @@ class MailDocumentParser:
if data["bcc"]:
data["bcc_label"] = "BCC"
att = []
for a in mail.attachments:
att.append(
f"{a.filename} ({naturalsize(a.size, binary=True, format='%.2f')})",
)
att = [
f"{a.filename} ({naturalsize(a.size, binary=True, format='%.2f')})"
for a in mail.attachments
]
data["attachments"] = clean_html(", ".join(att))
if data["attachments"]:
data["attachments_label"] = "Attachments"
+1 -1
View File
@@ -331,7 +331,7 @@ def parse_dateparser_languages(languages: str | None) -> list[str]:
language_list = languages.split("+") if languages else []
# There is an unfixed issue in zh-Hant and zh-Hans locales in the dateparser lib.
# See: https://github.com/scrapinghub/dateparser/issues/875
for index, language in enumerate(language_list):
for _, language in enumerate(language_list):
if language.startswith("zh-") and "zh" not in language_list:
logger.warning(
f"Chinese locale detected: {language}. dateparser might fail to parse"
+1 -1
View File
@@ -193,7 +193,7 @@ def reject_dangerous_svg(file: UploadedFile) -> None:
tree = etree.parse(file, parser)
root = tree.getroot()
except etree.XMLSyntaxError:
raise ValidationError("Invalid SVG file.")
raise ValidationError("Invalid SVG file.") from None
for element in root.iter():
tag: str = etree.QName(element.tag).localname.lower()
+1 -1
View File
@@ -155,7 +155,7 @@ def stream_chat_with_documents(query_str: str, documents: list[Document]):
try:
yield from _stream_chat_with_documents(query_str, documents)
except Exception as e:
logger.exception(f"Failed to stream document chat response: {e}", exc_info=True)
logger.exception("Failed to stream document chat response: %s", e)
yield CHAT_ERROR_MESSAGE
+4 -2
View File
@@ -152,8 +152,10 @@ def build_llm_index_text(doc: Document) -> str:
f"Notes: {','.join([str(c.note) for c in Note.objects.filter(document=doc)])}",
]
for instance in doc.custom_fields.all():
lines.append(f"Custom Field - {instance.field.name}: {instance}")
lines.extend(
f"Custom Field - {instance.field.name}: {instance}"
for instance in doc.custom_fields.all()
)
lines.append("\nContent:\n")
lines.append(doc.content or "")
+1 -1
View File
@@ -313,7 +313,7 @@ def update_llm_index(
continue
# Delete from docstore, FAISS IndexFlatL2 are append-only
for node in doc_nodes:
for _ in doc_nodes:
remove_document_docstore_nodes(document, index)
nodes.extend(build_document_node(document, chunk_size=chunk_size))
+1 -1
View File
@@ -155,7 +155,7 @@ def test_get_ai_document_classification_failure(mock_run_llm_query, mock_documen
mock_run_llm_query.side_effect = Exception("LLM query failed")
# assert raises an exception
with pytest.raises(Exception):
with pytest.raises(ValueError, match="Unsupported LLM backend"):
get_ai_document_classification(mock_document)
+2 -2
View File
@@ -226,7 +226,7 @@ def test_get_or_create_storage_context_raises_exception(
temp_llm_index_dir,
mock_embed_model,
) -> None:
with pytest.raises(Exception):
with pytest.raises(ValueError):
indexing.get_or_create_storage_context(rebuild=False)
@@ -273,7 +273,7 @@ def test_load_or_build_index_raises_exception_when_no_nodes(
return_value=MagicMock(),
),
):
with pytest.raises(Exception):
with pytest.raises(Exception): # noqa: B017
indexing.load_or_build_index()
+3 -4
View File
@@ -4,7 +4,6 @@ import logging
import ssl
import tempfile
import traceback
from datetime import date
from datetime import timedelta
from fnmatch import fnmatch
from pathlib import Path
@@ -385,7 +384,7 @@ def make_criterias(rule: MailRule, *, supports_gmail_labels: bool):
Returns criteria to be applied to MailBox.fetch for the given rule.
"""
maximum_age = date.today() - timedelta(days=rule.maximum_age)
maximum_age = timezone.now().date() - timedelta(days=rule.maximum_age)
criterias = {}
if rule.maximum_age > 0:
criterias["date_gte"] = maximum_age
@@ -637,8 +636,8 @@ class MailAccountHandler(LoggingMixin):
self.log.info(f"Located folder: {folder_info.name}")
except Exception as e:
self.log.error(
"Exception during folder listing, unable to provide list folders: "
+ str(e),
"Exception during folder listing, unable to provide list folders: %s",
e,
)
raise MailError(
+4 -2
View File
@@ -349,9 +349,10 @@ class MailMocker(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
len(expected_call_args),
)
for (mock_args, mock_kwargs), expected_signatures in zip(
for (_, mock_kwargs), expected_signatures in zip(
self._queue_consumption_tasks_mock.call_args_list,
expected_call_args,
strict=False,
):
consume_tasks = mock_kwargs["consume_tasks"]
@@ -361,6 +362,7 @@ class MailMocker(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
for consume_task, expected_signature in zip(
consume_tasks,
expected_signatures,
strict=False,
):
input_doc = consume_task.kwargs["input_doc"]
overrides = consume_task.kwargs["overrides"]
@@ -383,7 +385,7 @@ class MailMocker(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
"""
Applies pending actions to mails by inspecting calls to the queue_consumption_tasks method.
"""
for args, kwargs in self._queue_consumption_tasks_mock.call_args_list:
for _, kwargs in self._queue_consumption_tasks_mock.call_args_list:
message = kwargs["message"]
rule = kwargs["rule"]
apply_mail_action([], rule.pk, message.uid, message.subject, message.date)
@@ -184,7 +184,12 @@ class TestMailMessageGpgDecryptor(TestMail):
EMAIL_GNUPG_HOME=empty_gpg_home,
):
message_decryptor = MailMessageDecryptor()
self.assertRaises(Exception, message_decryptor.run, encrypted_message)
self.assertRaisesRegex(
Exception,
"Decryption failed",
message_decryptor.run,
encrypted_message,
)
finally:
# Clean up the temporary GPG home used only by this test
try:
+1 -2
View File
@@ -1,4 +1,3 @@
import datetime
import logging
from datetime import timedelta
from http import HTTPStatus
@@ -86,7 +85,7 @@ class MailAccountViewSet(PassUserMixin, ModelViewSet[MailAccount]):
@action(methods=["post"], detail=False)
def test(self, request):
logger = logging.getLogger("paperless_mail")
request.data["name"] = datetime.datetime.now().isoformat()
request.data["name"] = timezone.now().isoformat()
serializer = self.get_serializer(data=request.data)
serializer.is_valid(raise_exception=True)
existing_account = None