Updates the script in docker too

ruff: enable S324 (hashlib insecure hash functions)
Adds usedforsecurity=False to all hashlib.md5() calls, documenting that these are used for file checksum comparison, not security. The production call in _path_matches_checksum will be replaced with compute_checksum() (SHA-256) in a separate branch. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-29 08:44:24 +00:00 · 2026-06-04 12:02:45 -07:00 · 2026-06-04 11:37:17 -07:00 · 2026-06-04 11:22:07 -07:00 · 2026-06-04 10:47:13 -07:00 · 2026-06-04 10:26:08 -07:00
40 changed files with 250 additions and 190 deletions
@@ -61,7 +61,7 @@ def replace_with_symlinks(
    total_duplicates = 0
    space_saved = 0

-    for file_hash, file_list in duplicate_groups.items():
+    for file_list in duplicate_groups.values():
        # Keep the first file as the original, replace others with symlinks
        original_file = file_list[0]
        duplicates = file_list[1:]
@@ -185,12 +185,16 @@ line-ending = "lf"
 [tool.ruff.lint]
 # https://docs.astral.sh/ruff/rules/
 extend-select = [
+  "B",    # https://docs.astral.sh/ruff/rules/#flake8-bugbear-b
  "COM",  # https://docs.astral.sh/ruff/rules/#flake8-commas-com
+  "DTZ",  # https://docs.astral.sh/ruff/rules/#flake8-datetimez-dtz
+  "PERF", # https://docs.astral.sh/ruff/rules/#perflint-perf
+  "S324", # https://docs.astral.sh/ruff/rules/hashlib-insecure-hash-functions/
  "DJ",   # https://docs.astral.sh/ruff/rules/#flake8-django-dj
  "EXE",  # https://docs.astral.sh/ruff/rules/#flake8-executable-exe
  "FBT",  # https://docs.astral.sh/ruff/rules/#flake8-boolean-trap-fbt
  "FLY",  # https://docs.astral.sh/ruff/rules/#flynt-fly
-  "G201", # https://docs.astral.sh/ruff/rules/#flake8-logging-format-g
+  "G",    # https://docs.astral.sh/ruff/rules/#flake8-logging-format-g
  "I",    # https://docs.astral.sh/ruff/rules/#isort-i
  "ICN",  # https://docs.astral.sh/ruff/rules/#flake8-import-conventions-icn
  "INP",  # https://docs.astral.sh/ruff/rules/#flake8-no-pep420-inp
@@ -211,6 +215,7 @@ extend-select = [
 ]
 ignore = [
  "DJ001",
+  "G004",    # f-strings in logging: accepted style in this codebase
  "PLC0415",
  "RUF012",
  "SIM105",
@@ -834,8 +834,9 @@ class ConsumerPlugin(
            self.log.debug(f"Creation date from parse_date: {create_date}")
        else:
            stats = Path(self.input_doc.original_file).stat()
-            create_date = timezone.make_aware(
-                datetime.datetime.fromtimestamp(stats.st_mtime),
+            create_date = datetime.datetime.fromtimestamp(
+                stats.st_mtime,
+                tz=datetime.UTC,
            )
            self.log.debug(f"Creation date from st_mtime: {create_date}")

@@ -1,4 +1,3 @@
-import datetime as dt
 import logging
 import os
 import shutil
@@ -6,6 +5,7 @@ from pathlib import Path
 from typing import Final

 from django.conf import settings
+from django.utils import timezone
 from pikepdf import Pdf

 from documents.consumer import ConsumerError
@@ -78,7 +78,7 @@ class CollatePlugin(NoCleanupPluginMixin, NoSetupPluginMixin, ConsumeTaskPlugin)
            stats = staging.stat()
            # if the file is older than the timeout, we don't consider
            # it valid
-            if (dt.datetime.now().timestamp() - stats.st_mtime) > TIMEOUT_SECONDS:
+            if (timezone.now().timestamp() - stats.st_mtime) > TIMEOUT_SECONDS:
                logger.warning("Outdated double sided staging file exists, deleting it")
                staging.unlink()
            else:
@@ -99,7 +99,7 @@ class CollatePlugin(NoCleanupPluginMixin, NoSetupPluginMixin, ConsumeTaskPlugin)
                            "two uploaded files don't belong to the same double-"
                            "sided scan. Please retry, starting with the odd "
                            "numbered pages again.",
-                        )
+                        ) from None
                    # Merged file has the same path, but without the
                    # double-sided subdir. Therefore, it is also in the
                    # consumption dir and will be picked up for processing
@@ -134,7 +134,7 @@ class CollatePlugin(NoCleanupPluginMixin, NoSetupPluginMixin, ConsumeTaskPlugin)
            shutil.move(pdf_file, staging)
            # update access to modification time so we know if the file
            # is outdated when another file gets uploaded
-            timestamp = dt.datetime.now().timestamp()
+            timestamp = timezone.now().timestamp()
            os.utime(staging, (timestamp, timestamp))
            logger.info(
                "Got scan with odd numbered pages of double-sided scan, moved it to %s",
@@ -350,7 +350,7 @@ def handle_validation_prefix(func: Callable):
        try:
            return func(*args, **kwargs)
        except serializers.ValidationError as e:
-            raise serializers.ValidationError({validation_prefix: e.detail})
+            raise serializers.ValidationError({validation_prefix: e.detail}) from e

    # Update the signature to include the validation_prefix argument
    old_sig = inspect.signature(func)
@@ -461,7 +461,7 @@ class CustomFieldQueryParser:
        except json.JSONDecodeError:
            raise serializers.ValidationError(
                {self._validation_prefix: [_("Value must be valid JSON.")]},
-            )
+            ) from None
        return (
            self._parse_expr(expr, validation_prefix=self._validation_prefix),
            self._annotations,
@@ -589,7 +589,7 @@ class CustomFieldQueryParser:
        except CustomField.DoesNotExist:
            raise serializers.ValidationError(
                [_("{name!r} is not a valid custom field.").format(name=id_or_name)],
-            )
+            ) from None
        self._custom_fields[custom_field.id] = custom_field
        self._custom_fields[custom_field.name] = custom_field
        return custom_field
@@ -988,7 +988,7 @@ class DocumentsOrderingFilter(OrderingFilter):
            except CustomField.DoesNotExist:
                raise serializers.ValidationError(
                    {self.prefix + str(custom_field_id): [_("Custom field not found")]},
-                )
+                ) from None

            annotation = None
            match field.data_type:
@@ -480,7 +480,7 @@ class Command(CryptMixin, PaperlessCommand):
            }

            # 3. Export files from each document
-            for index, document_dict in enumerate(
+            for _, document_dict in enumerate(
                self.track(
                    document_manifest,
                    description="Exporting documents...",
@@ -133,11 +133,14 @@ def _build_suggestion_table(
        else:
            doc_cell = Text(f"{doc} [{doc.pk}]")

-        tag_parts: list[str] = []
-        for tag in sorted(suggestion.tags_to_add, key=lambda t: t.name):
-            tag_parts.append(f"[green]+{tag.name}[/green]")
-        for tag in sorted(suggestion.tags_to_remove, key=lambda t: t.name):
-            tag_parts.append(f"[red]-{tag.name}[/red]")
+        tag_parts: list[str] = [
+            f"[green]+{tag.name}[/green]"
+            for tag in sorted(suggestion.tags_to_add, key=lambda t: t.name)
+        ]
+        tag_parts.extend(
+            f"[red]-{tag.name}[/red]"
+            for tag in sorted(suggestion.tags_to_remove, key=lambda t: t.name)
+        )
        tag_cell = Text.from_markup(", ".join(tag_parts)) if tag_parts else Text("-")

        table.add_row(
@@ -369,7 +369,7 @@ class Document(SoftDeleteModel, ModelWithOwner):  # type: ignore[django-manager-
        If the queryset already annotated ``effective_content``, that value is used.
        """
        if hasattr(self, "effective_content"):
-            return getattr(self, "effective_content")
+            return self.effective_content

        if self.root_document_id is not None or self.pk is None:
            return self.content
@@ -1204,8 +1204,8 @@ class CustomFieldInstance(SoftDeleteModel):
    def get_value_field_name(cls, data_type: CustomField.FieldDataType):
        try:
            return cls.TYPE_TO_DATA_STORE_NAME_MAP[data_type]
-        except KeyError:  # pragma: no cover
-            raise NotImplementedError(data_type)
+        except KeyError as exc:  # pragma: no cover
+            raise NotImplementedError(data_type) from exc

    @property
    def value(self):
@@ -110,7 +110,7 @@ def run_convert(
    args += ["-define", "pdf:use-cropbox=true"] if use_cropbox else []
    args += [str(input_file), str(output_file)]

-    logger.debug("Execute: " + " ".join(args), extra={"group": logging_group})
+    logger.debug("Execute: %s", " ".join(args), extra={"group": logging_group})

    try:
        run_subprocess(args, environment, logger)
@@ -67,8 +67,7 @@ class DateParserPluginBase(ABC):

        Subclasses can override this to release resources.
        """
-        # Default implementation does nothing.
-        # Returning None implies exceptions are propagated.
+        return None

    def _parse_string(
        self,
@@ -195,12 +195,12 @@ class WriteBatch:
                try:
                    self._lock.acquire(timeout=self._lock_timeout)
                    break
-                except filelock.Timeout:
+                except filelock.Timeout as exc:
                    if attempt == _LOCK_RETRY_ATTEMPTS - 1:
                        raise SearchIndexLockError(
                            f"Could not acquire index lock after {_LOCK_RETRY_ATTEMPTS} "
                            f"attempts (timeout={self._lock_timeout}s each)",
-                        )
+                        ) from exc
                    sleep_s = random.uniform(
                        0,
                        min(_LOCK_BACKOFF_CAP, _LOCK_BACKOFF_BASE * (2**attempt)),
@@ -651,7 +651,11 @@ class TantivyBackend:
        result_ids = cast("list[int]", searcher.fast_field_values("id", result_addrs))
        addr_by_id: dict[int, tuple[float, tantivy.DocAddress]] = {
            doc_id: (score, addr)
-            for (score, addr), doc_id in zip(batch_results.hits, result_ids)
+            for (score, addr), doc_id in zip(
+                batch_results.hits,
+                result_ids,
+                strict=False,
+            )
        }

        snippet_generator = None
@@ -270,7 +270,7 @@ def _rewrite_compact_date(query: str) -> str:
    except TimeoutError:  # pragma: no cover
        raise ValueError(
            "Query too complex to process (compact date rewrite timed out)",
-        )
+        ) from None


 def _rewrite_relative_range(query: str) -> str:
@@ -303,7 +303,7 @@ def _rewrite_relative_range(query: str) -> str:
    except TimeoutError:  # pragma: no cover
        raise ValueError(
            "Query too complex to process (relative range rewrite timed out)",
-        )
+        ) from None


 def _rewrite_whoosh_relative_range(query: str) -> str:
@@ -334,7 +334,7 @@ def _rewrite_whoosh_relative_range(query: str) -> str:
    except TimeoutError:  # pragma: no cover
        raise ValueError(
            "Query too complex to process (Whoosh relative range rewrite timed out)",
-        )
+        ) from None


 def _rewrite_8digit_date(query: str, tz: tzinfo) -> str:
@@ -376,7 +376,7 @@ def _rewrite_8digit_date(query: str, tz: tzinfo) -> str:
    except TimeoutError:  # pragma: no cover
        raise ValueError(
            "Query too complex to process (8-digit date rewrite timed out)",
-        )
+        ) from None


 def _rewrite_year_range(query: str) -> str:
@@ -401,7 +401,9 @@ def _rewrite_year_range(query: str) -> str:
    try:
        return _YEAR_RANGE_RE.sub(_sub, query, timeout=_REGEX_TIMEOUT)
    except TimeoutError:  # pragma: no cover
-        raise ValueError("Query too complex to process (year range rewrite timed out)")
+        raise ValueError(
+            "Query too complex to process (year range rewrite timed out)",
+        ) from None


 def rewrite_natural_date_keywords(query: str, tz: tzinfo) -> str:
@@ -443,7 +445,7 @@ def rewrite_natural_date_keywords(query: str, tz: tzinfo) -> str:
    except TimeoutError:  # pragma: no cover
        raise ValueError(
            "Query too complex to process (date keyword rewrite timed out)",
-        )
+        ) from None


 def normalize_query(query: str) -> str:
@@ -483,7 +485,9 @@ def normalize_query(query: str) -> str:
        query = _SPACED_OPERATOR_RE.sub(" ", query, timeout=_REGEX_TIMEOUT).strip()
        return query
    except TimeoutError:  # pragma: no cover
-        raise ValueError("Query too complex to process (normalization timed out)")
+        raise ValueError(
+            "Query too complex to process (normalization timed out)",
+        ) from None


 def build_permission_filter(
@@ -163,7 +163,7 @@ class MatchingModelSerializer(serializers.ModelSerializer[Any]):
                logger.debug(f"Invalid regular expression: {e!s}")
                raise serializers.ValidationError(
                    "Invalid regular expression, see log for details.",
-                )
+                ) from None
        return match


@@ -867,7 +867,9 @@ class CustomFieldInstanceSerializer(serializers.ModelSerializer[CustomFieldInsta
                try:
                    value_int = int(data["value"])
                except (TypeError, ValueError):
-                    raise serializers.ValidationError("Enter a valid integer.")
+                    raise serializers.ValidationError(
+                        "Enter a valid integer.",
+                    ) from None
                # Keep values within the PostgreSQL integer range
                MinValueValidator(-2147483648)(value_int)
                MaxValueValidator(2147483647)(value_int)
@@ -899,7 +901,7 @@ class CustomFieldInstanceSerializer(serializers.ModelSerializer[CustomFieldInsta
                except Exception:
                    raise serializers.ValidationError(
                        f"Value must be an id of an element in {select_options}",
-                    )
+                    ) from None
            elif field.data_type == CustomField.FieldDataType.DOCUMENTLINK:
                if not (isinstance(data["value"], list) or data["value"] is None):
                    raise serializers.ValidationError(
@@ -1090,7 +1092,7 @@ class DocumentSerializer(
    def to_representation(self, instance):
        doc = super().to_representation(instance)
        if "content" in self.fields and hasattr(instance, "effective_content"):
-            doc["content"] = getattr(instance, "effective_content") or ""
+            doc["content"] = instance.effective_content or ""
        if self.truncate_content and "content" in self.fields:
            doc["content"] = doc.get("content")[0:550]
        return doc
@@ -1452,7 +1454,7 @@ class SavedViewSerializer(OwnedObjectSerializer):
                        )
                    )
                except serializers.ValidationError as exc:
-                    raise serializers.ValidationError({field_name: exc.detail})
+                    raise serializers.ValidationError({field_name: exc.detail}) from exc
                del normalized_data[field_name]

        ret = super().to_internal_value(normalized_data)
@@ -1756,7 +1758,7 @@ class BulkEditSerializer(
                logger.exception(f"Error validating custom fields: {e}")
                raise serializers.ValidationError(
                    f"{name} must be a list of integers or a dict of id:value pairs, see the log for details",
-                )
+                ) from None
        elif not isinstance(custom_fields, list) or not all(
            isinstance(i, int) for i in ids
        ):
@@ -1824,7 +1826,7 @@ class BulkEditSerializer(
            try:
                Tag.objects.get(id=tag_id)
            except Tag.DoesNotExist:
-                raise serializers.ValidationError("Tag does not exist")
+                raise serializers.ValidationError("Tag does not exist") from None
        else:
            raise serializers.ValidationError("tag not specified")

@@ -1837,7 +1839,9 @@ class BulkEditSerializer(
            try:
                DocumentType.objects.get(id=document_type_id)
            except DocumentType.DoesNotExist:
-                raise serializers.ValidationError("Document type does not exist")
+                raise serializers.ValidationError(
+                    "Document type does not exist",
+                ) from None
        else:
            raise serializers.ValidationError("document_type not specified")

@@ -1849,7 +1853,9 @@ class BulkEditSerializer(
            try:
                Correspondent.objects.get(id=correspondent_id)
            except Correspondent.DoesNotExist:
-                raise serializers.ValidationError("Correspondent does not exist")
+                raise serializers.ValidationError(
+                    "Correspondent does not exist",
+                ) from None
        else:
            raise serializers.ValidationError("correspondent not specified")

@@ -1863,7 +1869,7 @@ class BulkEditSerializer(
            except StoragePath.DoesNotExist:
                raise serializers.ValidationError(
                    "Storage path does not exist",
-                )
+                ) from None
        else:
            raise serializers.ValidationError("storage path not specified")

@@ -1918,7 +1924,7 @@ class BulkEditSerializer(
            ):
                raise serializers.ValidationError("invalid rotation degrees")
        except ValueError:
-            raise serializers.ValidationError("invalid rotation degrees")
+            raise serializers.ValidationError("invalid rotation degrees") from None

    def _validate_source_mode(self, parameters) -> None:
        source_mode = parameters.get(
@@ -1948,7 +1954,7 @@ class BulkEditSerializer(
                    pages.append([int(doc)])
            parameters["pages"] = pages
        except ValueError:
-            raise serializers.ValidationError("invalid pages specified")
+            raise serializers.ValidationError("invalid pages specified") from None

        if "delete_originals" in parameters:
            if not isinstance(parameters["delete_originals"], bool):
@@ -2218,14 +2224,14 @@ class PostDocumentSerializer(serializers.Serializer[dict[str, Any]]):
                    raise serializers.ValidationError(
                        _("Custom field id must be an integer: %(id)s")
                        % {"id": field_id},
-                    )
+                    ) from None
                try:
                    field = CustomField.objects.get(id=field_id_int)
                except CustomField.DoesNotExist:
                    raise serializers.ValidationError(
                        _("Custom field with id %(id)s does not exist")
                        % {"id": field_id_int},
-                    )
+                    ) from None
                custom_field_serializer.validate(
                    {
                        "field": field,
@@ -2242,7 +2248,7 @@ class PostDocumentSerializer(serializers.Serializer[dict[str, Any]]):
                    _(
                        "Custom fields must be a list of integers or an object mapping ids to values.",
                    ),
-                )
+                ) from None
            if CustomField.objects.filter(id__in=ids).count() != len(set(ids)):
                raise serializers.ValidationError(
                    _("Some custom fields don't exist or were specified twice."),
@@ -2353,7 +2359,9 @@ class EmailSerializer(DocumentListSerializer):
            for address in address_list:
                email_validator(address)
        except ValidationError:
-            raise serializers.ValidationError(f"Invalid email address: {address}")
+            raise serializers.ValidationError(
+                f"Invalid email address: {address}",
+            ) from None

        return ",".join(address_list)

@@ -2777,7 +2785,7 @@ class ShareLinkBundleSerializer(OwnedObjectSerializer):
        return share_link_bundle

    def get_document_count(self, obj: ShareLinkBundle) -> int:
-        return getattr(obj, "document_total") or obj.documents.count()
+        return obj.document_total or obj.documents.count()


 class BulkEditObjectsSerializer(SerializerWithPerms, SetPermissionsMixin):
@@ -3125,7 +3133,7 @@ class WorkflowActionSerializer(serializers.ModelSerializer[WorkflowAction]):
                except (ValueError, KeyError) as e:
                    raise serializers.ValidationError(
                        {"assign_title": f'Invalid f-string detected: "{e.args[0]}"'},
-                    )
+                    ) from None

        if (
            "type" in attrs
@@ -411,7 +411,7 @@ def _path_matches_checksum(path: Path, checksum: str | None) -> bool:
        return False

    with path.open("rb") as f:
-        return hashlib.md5(f.read()).hexdigest() == checksum
+        return hashlib.md5(f.read(), usedforsecurity=False).hexdigest() == checksum


 def _filename_template_uses_custom_fields(doc: Document) -> bool:
@@ -29,9 +29,7 @@ class SimpleCommand(PaperlessCommand):

    def handle(self, *args, **options):
        items = list(range(5))
-        results = []
-        for item in self.track(items, description="Processing..."):
-            results.append(item * 2)
+        results = [item * 2 for item in self.track(items, description="Processing...")]
        self.stdout.write(f"Results: {results}")


@@ -57,13 +55,13 @@ class MultiprocessCommand(PaperlessCommand):

    def handle(self, *args, **options):
        items = list(range(5))
-        results = []
-        for result in self.process_parallel(
-            _double_value,
-            items,
-            description="Processing...",
-        ):
-            results.append(result)
+        results = list(
+            self.process_parallel(
+                _double_value,
+                items,
+                description="Processing...",
+            ),
+        )
        successes = sum(1 for r in results if r.success)
        self.stdout.write(f"Successes: {successes}")

@@ -6,7 +6,6 @@ import zipfile

 from django.contrib.auth.models import User
 from django.test import override_settings
-from django.utils import timezone
 from rest_framework import status
 from rest_framework.test import APITestCase

@@ -33,21 +32,21 @@ class TestBulkDownload(DirectoriesMixin, SampleDirMixin, APITestCase):
            filename="docA.pdf",
            mime_type="application/pdf",
            checksum="B",
-            created=timezone.make_aware(datetime.datetime(2021, 1, 1)),
+            created=datetime.datetime(2021, 1, 1, tzinfo=datetime.UTC),
        )
        self.doc2b = Document.objects.create(
            title="document A",
            filename="docA2.pdf",
            mime_type="application/pdf",
            checksum="D",
-            created=timezone.make_aware(datetime.datetime(2021, 1, 1)),
+            created=datetime.datetime(2021, 1, 1, tzinfo=datetime.UTC),
        )
        self.doc3 = Document.objects.create(
            title="document B",
            filename="docB.jpg",
            mime_type="image/jpeg",
            checksum="C",
-            created=timezone.make_aware(datetime.datetime(2020, 3, 21)),
+            created=datetime.datetime(2020, 3, 21, tzinfo=datetime.UTC),
            archive_filename="docB.pdf",
            archive_checksum="D",
        )
@@ -1,5 +1,5 @@
+import datetime
 import json
-from datetime import date
 from unittest import mock
 from unittest.mock import ANY

@@ -456,7 +456,7 @@ class TestCustomFieldsAPI(DirectoriesMixin, APITestCase):
            },
        )

-        date_value = date.today()
+        date_value = datetime.datetime.now(tz=datetime.UTC).date()

        resp = self.client.patch(
            f"/api/documents/{doc.id}/",
@@ -618,7 +618,7 @@ class TestCustomFieldsAPI(DirectoriesMixin, APITestCase):
            data_type=CustomField.FieldDataType.DATE,
        )

-        date_value = date.today()
+        date_value = datetime.datetime.now(tz=datetime.UTC).date()

        resp = self.client.patch(
            f"/api/documents/{doc.id}/",
@@ -265,7 +265,7 @@ class TestDocumentApi(DirectoriesMixin, ConsumeTaskMixin, APITestCase):
            created=date(2023, 1, 1),
        )

-        created_datetime = datetime.datetime(2023, 2, 1, 12, 0, 0)
+        created_datetime = datetime.datetime(2023, 2, 1, 12, 0, 0, tzinfo=datetime.UTC)
        response = self.client.patch(
            f"/api/documents/{doc.pk}/",
            {"created": created_datetime},
@@ -700,7 +700,7 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
            pk=3,
            checksum="C",
            # specific time zone aware date
-            added=timezone.make_aware(datetime.datetime(2023, 12, 1)),
+            added=datetime.datetime(2023, 12, 1, tzinfo=datetime.UTC),
        )
        # refresh doc instance to ensure we operate on date objects that Django uses
        # Django converts dates to UTC
@@ -994,25 +994,25 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
            title="invoice",
            content="the thing i bought at a shop and paid with bank account",
            created=datetime.date(2018, 1, 1),
-            added=timezone.make_aware(datetime.datetime(2018, 1, 1)),
+            added=datetime.datetime(2018, 1, 1, tzinfo=datetime.UTC),
        )
        d2 = DocumentFactory(
            title="bank statement 1",
            content="things i paid for in august",
            created=datetime.date(2019, 3, 4),
-            added=timezone.make_aware(datetime.datetime(2019, 3, 4)),
+            added=datetime.datetime(2019, 3, 4, tzinfo=datetime.UTC),
        )
        d3 = DocumentFactory(
            title="bank statement 3",
            content="things i paid for in september",
            created=datetime.date(2020, 7, 9),
-            added=timezone.make_aware(datetime.datetime(2020, 7, 9)),
+            added=datetime.datetime(2020, 7, 9, tzinfo=datetime.UTC),
        )
        d4 = DocumentFactory(
            title="Quarterly Report",
            content="quarterly revenue profit margin earnings growth",
            created=datetime.date(2021, 11, 30),
-            added=timezone.make_aware(datetime.datetime(2021, 11, 30)),
+            added=datetime.datetime(2021, 11, 30, tzinfo=datetime.UTC),
        )
        backend = get_backend()
        backend.add_or_update(d1)
@@ -1131,7 +1131,7 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
        d4.tags.add(t2)
        d5 = Document.objects.create(
            checksum="5",
-            added=timezone.make_aware(datetime.datetime(2020, 7, 13)),
+            added=datetime.datetime(2020, 7, 13, tzinfo=datetime.UTC),
            content="test",
            original_filename="doc5.pdf",
        )
@@ -1241,14 +1241,18 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
            d4.id,
            search_query(
                "&created__date__lt="
-                + datetime.datetime(2020, 9, 2).strftime("%Y-%m-%d"),
+                + datetime.datetime(2020, 9, 2, tzinfo=datetime.UTC).strftime(
+                    "%Y-%m-%d",
+                ),
            ),
        )
        self.assertNotIn(
            d4.id,
            search_query(
                "&created__date__gt="
-                + datetime.datetime(2020, 9, 2).strftime("%Y-%m-%d"),
+                + datetime.datetime(2020, 9, 2, tzinfo=datetime.UTC).strftime(
+                    "%Y-%m-%d",
+                ),
            ),
        )

@@ -1256,14 +1260,18 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
            d4.id,
            search_query(
                "&created__date__lt="
-                + datetime.datetime(2020, 1, 2).strftime("%Y-%m-%d"),
+                + datetime.datetime(2020, 1, 2, tzinfo=datetime.UTC).strftime(
+                    "%Y-%m-%d",
+                ),
            ),
        )
        self.assertIn(
            d4.id,
            search_query(
                "&created__date__gt="
-                + datetime.datetime(2020, 1, 2).strftime("%Y-%m-%d"),
+                + datetime.datetime(2020, 1, 2, tzinfo=datetime.UTC).strftime(
+                    "%Y-%m-%d",
+                ),
            ),
        )

@@ -1271,14 +1279,18 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
            d5.id,
            search_query(
                "&added__date__lt="
-                + datetime.datetime(2020, 9, 2).strftime("%Y-%m-%d"),
+                + datetime.datetime(2020, 9, 2, tzinfo=datetime.UTC).strftime(
+                    "%Y-%m-%d",
+                ),
            ),
        )
        self.assertNotIn(
            d5.id,
            search_query(
                "&added__date__gt="
-                + datetime.datetime(2020, 9, 2).strftime("%Y-%m-%d"),
+                + datetime.datetime(2020, 9, 2, tzinfo=datetime.UTC).strftime(
+                    "%Y-%m-%d",
+                ),
            ),
        )

@@ -1286,7 +1298,9 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
            d5.id,
            search_query(
                "&added__date__lt="
-                + datetime.datetime(2020, 1, 2).strftime("%Y-%m-%d"),
+                + datetime.datetime(2020, 1, 2, tzinfo=datetime.UTC).strftime(
+                    "%Y-%m-%d",
+                ),
            ),
        )

@@ -1294,7 +1308,9 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
            d5.id,
            search_query(
                "&added__date__gt="
-                + datetime.datetime(2020, 1, 2).strftime("%Y-%m-%d"),
+                + datetime.datetime(2020, 1, 2, tzinfo=datetime.UTC).strftime(
+                    "%Y-%m-%d",
+                ),
            ),
        )

@@ -764,7 +764,7 @@ class TestPDFActions(DirectoriesMixin, TestCase):
        sig.set.return_value.apply_async.side_effect = Exception("boom")
        mock_consume_file.return_value = sig

-        with self.assertRaises(Exception):
+        with self.assertRaisesRegex(Exception, "boom"):
            bulk_edit.merge(doc_ids, delete_originals=True)

        self.doc1.refresh_from_db()
@@ -1047,6 +1047,7 @@ class TestPDFActions(DirectoriesMixin, TestCase):
        for call, expected_id in zip(
            mock_consume_delay.call_args_list,
            doc_ids,
+            strict=False,
        ):
            task_kwargs = call.kwargs["kwargs"]
            self.assertEqual(task_kwargs["input_doc"].root_document_id, expected_id)
@@ -1305,7 +1306,7 @@ class TestPDFActions(DirectoriesMixin, TestCase):
        sig.apply_async.side_effect = Exception("boom")
        mock_chord.return_value = sig

-        with self.assertRaises(Exception):
+        with self.assertRaisesRegex(Exception, "boom"):
            bulk_edit.edit_pdf(doc_ids, operations, delete_original=True)

        self.doc2.refresh_from_db()
@@ -1417,7 +1418,7 @@ class TestPDFActions(DirectoriesMixin, TestCase):
            {"page": 9999},  # invalid page, forces error during PDF load
        ]
        with self.assertLogs("paperless.bulk_edit", level="ERROR"):
-            with self.assertRaises(Exception):
+            with self.assertRaises(ValueError):
                bulk_edit.edit_pdf(doc_ids, operations)
        mock_group.assert_not_called()
        mock_consume_file.assert_not_called()
@@ -782,8 +782,8 @@ class TestClassifier(DirectoriesMixin, TestCase):
            load_classifier(raise_exception=True)

        Path(settings.MODEL_FILE).touch()
-        mock_load.side_effect = Exception()
-        with self.assertRaises(Exception):
+        mock_load.side_effect = RuntimeError()
+        with self.assertRaises(RuntimeError):
            load_classifier(raise_exception=True)


@@ -59,7 +59,7 @@ class TestDoubleSided(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
    def create_staging_file(self, src="double-sided-odd.pdf", datetime=None) -> None:
        shutil.copy(self.SAMPLE_DIR / src, self.staging_file)
        if datetime is None:
-            datetime = dt.datetime.now()
+            datetime = dt.datetime.now(tz=dt.UTC)
        os.utime(str(self.staging_file), (datetime.timestamp(),) * 2)

    def test_odd_numbered_moved_to_staging(self) -> None:
@@ -79,8 +79,8 @@ class TestDoubleSided(DirectoriesMixin, FileSystemAssertsMixin, TestCase):

        self.assertIsFile(self.staging_file)
        self.assertAlmostEqual(
-            dt.datetime.fromtimestamp(self.staging_file.stat().st_mtime),
-            dt.datetime.now(),
+            dt.datetime.fromtimestamp(self.staging_file.stat().st_mtime, tz=dt.UTC),
+            dt.datetime.now(tz=dt.UTC),
            delta=dt.timedelta(seconds=5),
        )
        self.assertIn("Received odd numbered pages", msg["reason"])
@@ -124,7 +124,7 @@ class TestDoubleSided(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
        """

        self.create_staging_file(
-            datetime=dt.datetime.now()
+            datetime=dt.datetime.now(tz=dt.UTC)
            - dt.timedelta(minutes=TIMEOUT_MINUTES, seconds=1),
        )
        msg = self.consume_file("double-sided-odd.pdf")
@@ -12,7 +12,6 @@ from django.contrib.auth.models import User
 from django.db import DatabaseError
 from django.test import TestCase
 from django.test import override_settings
-from django.utils import timezone

 from documents.file_handling import create_source_path_directory
 from documents.file_handling import delete_empty_directories
@@ -221,8 +220,11 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
        doc = Document.objects.create(
            title="document",
            mime_type="application/pdf",
-            checksum=hashlib.md5(original_bytes).hexdigest(),
-            archive_checksum=hashlib.md5(archive_bytes).hexdigest(),
+            checksum=hashlib.md5(original_bytes, usedforsecurity=False).hexdigest(),
+            archive_checksum=hashlib.md5(
+                archive_bytes,
+                usedforsecurity=False,
+            ).hexdigest(),
            filename="old/document.pdf",
            archive_filename="old/document.pdf",
            storage_path=old_storage_path,
@@ -411,7 +413,7 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
        FILENAME_FORMAT="{created_year}-{created_month}-{created_day}",
    )
    def test_created_year_month_day(self) -> None:
-        d1 = timezone.make_aware(datetime.datetime(2020, 3, 6, 1, 1, 1))
+        d1 = datetime.datetime(2020, 3, 6, 1, 1, 1, tzinfo=datetime.UTC)
        doc1 = Document.objects.create(
            title="doc1",
            mime_type="application/pdf",
@@ -428,7 +430,7 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
        FILENAME_FORMAT="{added_year}-{added_month}-{added_day}",
    )
    def test_added_year_month_day(self) -> None:
-        d1 = timezone.make_aware(datetime.datetime(1232, 1, 9, 1, 1, 1))
+        d1 = datetime.datetime(1232, 1, 9, 1, 1, 1, tzinfo=datetime.UTC)
        doc1 = Document.objects.create(
            title="doc1",
            mime_type="application/pdf",
@@ -441,7 +443,7 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):

        self.assertEqual(generate_filename(doc1), expected_filename)

-        doc1.added = timezone.make_aware(datetime.datetime(2020, 11, 16, 1, 1, 1))
+        doc1.added = datetime.datetime(2020, 11, 16, 1, 1, 1, tzinfo=datetime.UTC)

        self.assertEqual(generate_filename(doc1), Path("2020-11-16.pdf"))

@@ -1225,7 +1227,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
    def test_short_names_added(self) -> None:
        doc = Document.objects.create(
            title="The Title",
-            added=timezone.make_aware(datetime.datetime(1984, 8, 21, 7, 36, 51, 153)),
+            added=datetime.datetime(1984, 8, 21, 7, 36, 51, 153, tzinfo=datetime.UTC),
            mime_type="application/pdf",
            pk=2,
            checksum="2",
@@ -1464,7 +1466,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
        doc_a = Document.objects.create(
            title="Does Matter",
            created=datetime.date(2020, 6, 25),
-            added=timezone.make_aware(datetime.datetime(2024, 10, 1, 7, 36, 51, 153)),
+            added=datetime.datetime(2024, 10, 1, 7, 36, 51, 153, tzinfo=datetime.UTC),
            mime_type="application/pdf",
            pk=2,
            checksum="2",
@@ -1536,7 +1538,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
        doc = Document.objects.create(
            title="scan_017562",
            created=datetime.date(2025, 7, 2),
-            added=timezone.make_aware(datetime.datetime(2026, 3, 3, 11, 53, 16)),
+            added=datetime.datetime(2026, 3, 3, 11, 53, 16, tzinfo=datetime.UTC),
            mime_type="application/pdf",
            checksum="test-checksum",
            storage_path=sp,
@@ -1565,7 +1567,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
        doc_a = Document.objects.create(
            title="Does Matter",
            created=datetime.date(2020, 6, 25),
-            added=timezone.make_aware(datetime.datetime(2024, 10, 1, 7, 36, 51, 153)),
+            added=datetime.datetime(2024, 10, 1, 7, 36, 51, 153, tzinfo=datetime.UTC),
            mime_type="application/pdf",
            pk=2,
            checksum="2",
@@ -1600,7 +1602,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
        doc_a = Document.objects.create(
            title="Does Matter",
            created=datetime.date(2020, 6, 25),
-            added=timezone.make_aware(datetime.datetime(2024, 10, 1, 7, 36, 51, 153)),
+            added=datetime.datetime(2024, 10, 1, 7, 36, 51, 153, tzinfo=datetime.UTC),
            mime_type="application/pdf",
            pk=2,
            checksum="2",
@@ -1632,7 +1634,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
        doc_a = Document.objects.create(
            title="Some Title",
            created=datetime.date(2020, 6, 25),
-            added=timezone.make_aware(datetime.datetime(2024, 10, 1, 7, 36, 51, 153)),
+            added=datetime.datetime(2024, 10, 1, 7, 36, 51, 153, tzinfo=datetime.UTC),
            mime_type="application/pdf",
            pk=2,
            checksum="2",
@@ -1737,7 +1739,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
        doc_a = Document.objects.create(
            title="Some Title",
            created=datetime.date(2020, 6, 25),
-            added=timezone.make_aware(datetime.datetime(2024, 10, 1, 7, 36, 51, 153)),
+            added=datetime.datetime(2024, 10, 1, 7, 36, 51, 153, tzinfo=datetime.UTC),
            mime_type="application/pdf",
            pk=2,
            checksum="2",
@@ -1751,8 +1753,15 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
        CustomFieldInstance.objects.create(
            document=doc_a,
            field=CustomField.objects.get(name="Invoice Date"),
-            value_date=timezone.make_aware(
-                datetime.datetime(2024, 10, 1, 7, 36, 51, 153),
+            value_date=datetime.datetime(
+                2024,
+                10,
+                1,
+                7,
+                36,
+                51,
+                153,
+                tzinfo=datetime.UTC,
            ),
        )

@@ -1792,7 +1801,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
        doc = Document.objects.create(
            title="Some Title! With @ Special # Characters",
            created=datetime.date(2020, 6, 25),
-            added=timezone.make_aware(datetime.datetime(2024, 10, 1, 7, 36, 51, 153)),
+            added=datetime.datetime(2024, 10, 1, 7, 36, 51, 153, tzinfo=datetime.UTC),
            mime_type="application/pdf",
            pk=2,
            checksum="2",
@@ -243,7 +243,7 @@ class TestViews(DirectoriesMixin, TestCase):
                    "change": {"users": [], "groups": []},
                }
            else:
-                assert False, f"Unexpected tag found: {tag['name']}"
+                raise AssertionError(f"Unexpected tag found: {tag['name']}")

    def test_list_no_n_plus_1_queries(self) -> None:
        """
@@ -2760,7 +2760,14 @@ class TestWorkflows(
        doc = Document.objects.create(
            title="test",
        )
-        self.assertRaises(Exception, document_matches_workflow, doc, w, 99)
+        self.assertRaisesRegex(
+            Exception,
+            "not yet supported",
+            document_matches_workflow,
+            doc,
+            w,
+            99,
+        )

    def test_removal_action_document_updated_workflow(self) -> None:
        """
@@ -129,11 +129,12 @@ def util_call_with_backoff(
                status_codes.append(cause_exec.response.status_code)
                warnings.warn(
                    f"HTTP Exception for {cause_exec.request.url} - {cause_exec}",
+                    stacklevel=2,
                )
            else:
-                warnings.warn(f"Unexpected error: {e}")
+                warnings.warn(f"Unexpected error: {e}", stacklevel=2)
        except Exception as e:  # pragma: no cover
-            warnings.warn(f"Unexpected error: {e}")
+            warnings.warn(f"Unexpected error: {e}", stacklevel=2)

        retry_count = retry_count + 1

@@ -7,11 +7,11 @@ import tempfile
 import zipfile
 from collections import defaultdict
 from collections import deque
+from datetime import UTC
 from datetime import datetime
 from datetime import timedelta
 from http import HTTPStatus
 from pathlib import Path
-from time import mktime
 from typing import TYPE_CHECKING
 from typing import Any
 from typing import Literal
@@ -60,7 +60,6 @@ from django.http import StreamingHttpResponse
 from django.shortcuts import get_object_or_404
 from django.utils import timezone
 from django.utils.decorators import method_decorator
-from django.utils.timezone import make_aware
 from django.utils.translation import get_language
 from django.utils.translation import gettext_lazy as _
 from django.views import View
@@ -285,7 +284,7 @@ def _get_more_like_id(query_params: dict[str, Any], user: User | None) -> int:
            pk=more_like_doc_id,
        )
    except (TypeError, ValueError, Document.DoesNotExist):
-        raise PermissionDenied(_("Invalid more_like_id"))
+        raise PermissionDenied(_("Invalid more_like_id")) from None

    if user and not has_perms_owner_aware(
        user,
@@ -1101,7 +1100,7 @@ class DocumentViewSet(
                "root_document",
            ).get(pk=pk)
        except Document.DoesNotExist:
-            raise Http404
+            raise Http404 from None

        root_doc = get_root_document(doc)
        if request.user is not None and not has_perms_owner_aware(
@@ -1264,7 +1263,7 @@ class DocumentViewSet(
                "root_document",
            ).get(id=pk)
        except Document.DoesNotExist:
-            raise Http404
+            raise Http404 from None

        root_doc = get_root_document(
            request_doc,
@@ -1506,7 +1505,6 @@ class DocumentViewSet(
                "document %s: %s",
                doc.pk,
                exc,
-                exc_info=True,
            )
            raise ValidationError({"ai": [_("Invalid AI configuration.")]}) from exc

@@ -1580,7 +1578,7 @@ class DocumentViewSet(
                disposition="inline",
            )
        except FileNotFoundError:
-            raise Http404
+            raise Http404 from None

    @action(methods=["get"], detail=True, filter_backends=[])
    @method_decorator(cache_control(no_cache=True))
@@ -1605,14 +1603,14 @@ class DocumentViewSet(

            return FileResponse(handle, content_type="image/webp")
        except FileNotFoundError:
-            raise Http404
+            raise Http404 from None

    @action(methods=["get"], detail=True)
    def download(self, request, pk=None):
        try:
            return self.file_response(pk, request, "attachment")
        except (FileNotFoundError, Document.DoesNotExist):
-            raise Http404
+            raise Http404 from None

    @action(
        methods=["get", "post", "delete"],
@@ -1637,7 +1635,7 @@ class DocumentViewSet(
            ):
                return HttpResponseForbidden("Insufficient permissions to view notes")
        except Document.DoesNotExist:
-            raise Http404
+            raise Http404 from None

        serializer = self.get_serializer(doc)

@@ -1708,7 +1706,7 @@ class DocumentViewSet(
            try:
                note_id_int = int(note_id)
            except ValueError:
-                raise ValidationError({"id": "A valid integer is required."})
+                raise ValidationError({"id": "A valid integer is required."}) from None
            note = get_object_or_404(Note, id=note_id_int, document=doc)
            if settings.AUDIT_LOG_ENABLED:
                LogEntry.objects.log_create(
@@ -1752,7 +1750,7 @@ class DocumentViewSet(
                    "Insufficient permissions to add share link",
                )
        except Document.DoesNotExist:
-            raise Http404
+            raise Http404 from None

        if request.method == "GET":
            now = timezone.now()
@@ -1780,7 +1778,7 @@ class DocumentViewSet(
                    "Insufficient permissions",
                )
        except Document.DoesNotExist:  # pragma: no cover
-            raise Http404
+            raise Http404 from None

        # documents
        entries = [
@@ -1801,28 +1799,28 @@ class DocumentViewSet(
        ]

        # custom fields
-        for entry in LogEntry.objects.get_for_objects(
-            doc.custom_fields.all(),
-        ).select_related("actor"):
-            entries.append(
-                {
-                    "id": entry.id,
-                    "timestamp": entry.timestamp,
-                    "action": entry.get_action_display(),
-                    "changes": {
-                        "custom_fields": {
-                            "type": "custom_field",
-                            "field": str(entry.object_repr).split(":")[0].strip(),
-                            "value": str(entry.object_repr).split(":")[1].strip(),
-                        },
+        entries.extend(
+            {
+                "id": entry.id,
+                "timestamp": entry.timestamp,
+                "action": entry.get_action_display(),
+                "changes": {
+                    "custom_fields": {
+                        "type": "custom_field",
+                        "field": str(entry.object_repr).split(":")[0].strip(),
+                        "value": str(entry.object_repr).split(":")[1].strip(),
                    },
-                    "actor": (
-                        {"id": entry.actor.id, "username": entry.actor.username}
-                        if entry.actor
-                        else None
-                    ),
                },
-            )
+                "actor": (
+                    {"id": entry.actor.id, "username": entry.actor.username}
+                    if entry.actor
+                    else None
+                ),
+            }
+            for entry in LogEntry.objects.get_for_objects(
+                doc.custom_fields.all(),
+            ).select_related("actor")
+        )

        return Response(sorted(entries, key=lambda x: x["timestamp"], reverse=True))

@@ -1930,13 +1928,13 @@ class DocumentViewSet(
            ):
                return HttpResponseForbidden("Insufficient permissions")
        except Document.DoesNotExist:
-            raise Http404
+            raise Http404 from None

        try:
            doc_name, doc_data = serializer.validated_data.get("document")
            version_label = serializer.validated_data.get("version_label")

-            t = int(mktime(datetime.now().timetuple()))
+            t = int(timezone.now().timestamp())

            settings.SCRATCH_DIR.mkdir(parents=True, exist_ok=True)

@@ -1981,7 +1979,7 @@ class DocumentViewSet(
                "root_document",
            ).get(pk=pk)
        except Document.DoesNotExist:
-            raise Http404
+            raise Http404 from None
        return get_root_document(root_doc)

    def _get_version_doc_for_root(self, root_doc: Document, version_id) -> Document:
@@ -1990,7 +1988,7 @@ class DocumentViewSet(
                pk=version_id,
            )
        except Document.DoesNotExist:
-            raise Http404
+            raise Http404 from None

        if (
            version_doc.id != root_doc.id
@@ -2545,7 +2543,7 @@ class LogViewSet(ViewSet):
            try:
                limit = int(limit_param)
            except (TypeError, ValueError):
-                raise ValidationError({"limit": "Must be a positive integer"})
+                raise ValidationError({"limit": "Must be a positive integer"}) from None
            if limit < 1:
                raise ValidationError({"limit": "Must be a positive integer"})
        else:
@@ -3136,7 +3134,7 @@ class PostDocumentView(GenericAPIView[Any]):
        cf = serializer.validated_data.get("custom_fields")
        from_webui = serializer.validated_data.get("from_webui")

-        t = int(mktime(datetime.now().timetuple()))
+        t = int(timezone.now().timestamp())

        settings.SCRATCH_DIR.mkdir(parents=True, exist_ok=True)

@@ -4948,7 +4946,7 @@ class SystemStatusView(PassUserMixin):
            index_dir = settings.INDEX_DIR
            mtimes = [p.stat().st_mtime for p in index_dir.iterdir() if p.is_file()]
            index_last_modified = (
-                make_aware(datetime.fromtimestamp(max(mtimes))) if mtimes else None
+                datetime.fromtimestamp(max(mtimes), tz=UTC) if mtimes else None
            )
        except Exception as e:
            index_status = "ERROR"
@@ -84,10 +84,11 @@ def binaries_check(app_configs: Any, **kwargs: Any) -> list[Error]:

    binaries = (settings.CONVERT_BINARY, "tesseract", "gs")

-    check_messages = []
-    for binary in binaries:
-        if shutil.which(binary) is None:
-            check_messages.append(Warning(error.format(binary), hint))
+    check_messages = [
+        Warning(error.format(binary), hint)
+        for binary in binaries
+        if shutil.which(binary) is None
+    ]

    return check_messages

@@ -383,14 +384,14 @@ def check_default_language_available(app_configs: Any, **kwargs: Any) -> list[Er

        specified_langs = [x.strip() for x in settings.OCR_LANGUAGE.split("+")]

-        for lang in specified_langs:
-            if lang not in installed_langs:
-                errs.append(
-                    Error(
-                        f"The selected ocr language {lang} is "
-                        f"not installed. Paperless cannot OCR your documents "
-                        f"without it. Please fix PAPERLESS_OCR_LANGUAGE.",
-                    ),
-                )
+        errs.extend(
+            Error(
+                f"The selected ocr language {lang} is "
+                f"not installed. Paperless cannot OCR your documents "
+                f"without it. Please fix PAPERLESS_OCR_LANGUAGE.",
+            )
+            for lang in specified_langs
+            if lang not in installed_langs
+        )

    return errs
@@ -649,11 +649,10 @@ class MailDocumentParser:
        if data["bcc"]:
            data["bcc_label"] = "BCC"

-        att = []
-        for a in mail.attachments:
-            att.append(
-                f"{a.filename} ({naturalsize(a.size, binary=True, format='%.2f')})",
-            )
+        att = [
+            f"{a.filename} ({naturalsize(a.size, binary=True, format='%.2f')})"
+            for a in mail.attachments
+        ]
        data["attachments"] = clean_html(", ".join(att))
        if data["attachments"]:
            data["attachments_label"] = "Attachments"
@@ -331,7 +331,7 @@ def parse_dateparser_languages(languages: str | None) -> list[str]:
    language_list = languages.split("+") if languages else []
    # There is an unfixed issue in zh-Hant and zh-Hans locales in the dateparser lib.
    # See: https://github.com/scrapinghub/dateparser/issues/875
-    for index, language in enumerate(language_list):
+    for _, language in enumerate(language_list):
        if language.startswith("zh-") and "zh" not in language_list:
            logger.warning(
                f"Chinese locale detected: {language}. dateparser might fail to parse"
@@ -193,7 +193,7 @@ def reject_dangerous_svg(file: UploadedFile) -> None:
        tree = etree.parse(file, parser)
        root = tree.getroot()
    except etree.XMLSyntaxError:
-        raise ValidationError("Invalid SVG file.")
+        raise ValidationError("Invalid SVG file.") from None

    for element in root.iter():
        tag: str = etree.QName(element.tag).localname.lower()
@@ -155,7 +155,7 @@ def stream_chat_with_documents(query_str: str, documents: list[Document]):
    try:
        yield from _stream_chat_with_documents(query_str, documents)
    except Exception as e:
-        logger.exception(f"Failed to stream document chat response: {e}", exc_info=True)
+        logger.exception("Failed to stream document chat response: %s", e)
        yield CHAT_ERROR_MESSAGE


@@ -152,8 +152,10 @@ def build_llm_index_text(doc: Document) -> str:
        f"Notes: {','.join([str(c.note) for c in Note.objects.filter(document=doc)])}",
    ]

-    for instance in doc.custom_fields.all():
-        lines.append(f"Custom Field - {instance.field.name}: {instance}")
+    lines.extend(
+        f"Custom Field - {instance.field.name}: {instance}"
+        for instance in doc.custom_fields.all()
+    )

    lines.append("\nContent:\n")
    lines.append(doc.content or "")
@@ -313,7 +313,7 @@ def update_llm_index(
                        continue

                    # Delete from docstore, FAISS IndexFlatL2 are append-only
-                    for node in doc_nodes:
+                    for _ in doc_nodes:
                        remove_document_docstore_nodes(document, index)

                nodes.extend(build_document_node(document, chunk_size=chunk_size))
@@ -155,7 +155,7 @@ def test_get_ai_document_classification_failure(mock_run_llm_query, mock_documen
    mock_run_llm_query.side_effect = Exception("LLM query failed")

    # assert raises an exception
-    with pytest.raises(Exception):
+    with pytest.raises(ValueError, match="Unsupported LLM backend"):
        get_ai_document_classification(mock_document)


@@ -226,7 +226,7 @@ def test_get_or_create_storage_context_raises_exception(
    temp_llm_index_dir,
    mock_embed_model,
 ) -> None:
-    with pytest.raises(Exception):
+    with pytest.raises(ValueError):
        indexing.get_or_create_storage_context(rebuild=False)


@@ -273,7 +273,7 @@ def test_load_or_build_index_raises_exception_when_no_nodes(
            return_value=MagicMock(),
        ),
    ):
-        with pytest.raises(Exception):
+        with pytest.raises(Exception):  # noqa: B017
            indexing.load_or_build_index()


@@ -4,7 +4,6 @@ import logging
 import ssl
 import tempfile
 import traceback
-from datetime import date
 from datetime import timedelta
 from fnmatch import fnmatch
 from pathlib import Path
@@ -385,7 +384,7 @@ def make_criterias(rule: MailRule, *, supports_gmail_labels: bool):
    Returns criteria to be applied to MailBox.fetch for the given rule.
    """

-    maximum_age = date.today() - timedelta(days=rule.maximum_age)
+    maximum_age = timezone.now().date() - timedelta(days=rule.maximum_age)
    criterias = {}
    if rule.maximum_age > 0:
        criterias["date_gte"] = maximum_age
@@ -637,8 +636,8 @@ class MailAccountHandler(LoggingMixin):
                    self.log.info(f"Located folder: {folder_info.name}")
            except Exception as e:
                self.log.error(
-                    "Exception during folder listing, unable to provide list folders: "
-                    + str(e),
+                    "Exception during folder listing, unable to provide list folders: %s",
+                    e,
                )

            raise MailError(
@@ -349,9 +349,10 @@ class MailMocker(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
            len(expected_call_args),
        )

-        for (mock_args, mock_kwargs), expected_signatures in zip(
+        for (_, mock_kwargs), expected_signatures in zip(
            self._queue_consumption_tasks_mock.call_args_list,
            expected_call_args,
+            strict=False,
        ):
            consume_tasks = mock_kwargs["consume_tasks"]

@@ -361,6 +362,7 @@ class MailMocker(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
            for consume_task, expected_signature in zip(
                consume_tasks,
                expected_signatures,
+                strict=False,
            ):
                input_doc = consume_task.kwargs["input_doc"]
                overrides = consume_task.kwargs["overrides"]
@@ -383,7 +385,7 @@ class MailMocker(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
        """
        Applies pending actions to mails by inspecting calls to the queue_consumption_tasks method.
        """
-        for args, kwargs in self._queue_consumption_tasks_mock.call_args_list:
+        for _, kwargs in self._queue_consumption_tasks_mock.call_args_list:
            message = kwargs["message"]
            rule = kwargs["rule"]
            apply_mail_action([], rule.pk, message.uid, message.subject, message.date)
@@ -184,7 +184,12 @@ class TestMailMessageGpgDecryptor(TestMail):
                EMAIL_GNUPG_HOME=empty_gpg_home,
            ):
                message_decryptor = MailMessageDecryptor()
-                self.assertRaises(Exception, message_decryptor.run, encrypted_message)
+                self.assertRaisesRegex(
+                    Exception,
+                    "Decryption failed",
+                    message_decryptor.run,
+                    encrypted_message,
+                )
        finally:
            # Clean up the temporary GPG home used only by this test
            try:
@@ -1,4 +1,3 @@
-import datetime
 import logging
 from datetime import timedelta
 from http import HTTPStatus
@@ -86,7 +85,7 @@ class MailAccountViewSet(PassUserMixin, ModelViewSet[MailAccount]):
    @action(methods=["post"], detail=False)
    def test(self, request):
        logger = logging.getLogger("paperless_mail")
-        request.data["name"] = datetime.datetime.now().isoformat()
+        request.data["name"] = timezone.now().isoformat()
        serializer = self.get_serializer(data=request.data)
        serializer.is_valid(raise_exception=True)
        existing_account = None
Author	SHA1	Message	Date
stumpylog	a1e7c0614e	Updates the script in docker too	2026-06-04 12:02:45 -07:00
stumpylog	dac05107a7	ruff: enable S324 (hashlib insecure hash functions) Adds usedforsecurity=False to all hashlib.md5() calls, documenting that these are used for file checksum comparison, not security. The production call in _path_matches_checksum will be replaced with compute_checksum() (SHA-256) in a separate branch. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-06-04 11:37:17 -07:00
stumpylog	89ce62d97d	ruff: enable PERF (perflint) Fixes 9 violations — loop-based append replaced with comprehensions or extend throughout production and test code: - PERF401: list comprehensions / extend for transformed lists - PERF402: list() around a generator for copied lists Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-06-04 11:22:07 -07:00
stumpylog	50f5d5f2e9	ruff: enable DTZ (flake8-datetimez) Fixes 44 violations — naive datetime usage replaced with tz-aware equivalents throughout production and test code: - datetime.now() → timezone.now() (Django) or datetime.now(tz=UTC) - datetime.fromtimestamp() → datetime.fromtimestamp(ts, tz=UTC) - datetime.date.today() → timezone.now().date() - datetime.datetime(...) constructors → tzinfo=UTC in tests - UP017 auto-converted datetime.timezone.utc → datetime.UTC (py3.11+) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-06-04 10:47:13 -07:00
stumpylog	92b59eebfc	ruff: enable B (flake8-bugbear) Fixes 71 violations across production and test code: - B904 (~50): raise-from in except blocks; from None at API/view boundaries, from exc where the cause is the direct origin - B017 (9): pytest.raises(Exception) → specific type or match= arg - B007 (5): unused loop vars renamed to _ - B027 (1): missing @abstractmethod on DateParserPluginBase.__exit__ - B028 (3): warnings.warn without stacklevel=2 in test utils - B011 (1): assert False → raise AssertionError() - B905 (3): zip() without strict=False - B009 (3): getattr with constant string (auto-fixed) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-06-04 10:26:08 -07:00
stumpylog	59fd2ff9e8	ruff: enable G (logging format), ignore G004 (f-strings) Replaces the single G201 selector with the full G group. Fixes 2x G003 (string concat in log calls) and 2x G202 (redundant exc_info on logger.exception). G004 (f-strings in logging) is ignored as f-string style is accepted throughout this codebase. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-06-04 09:32:52 -07:00