Compare commits

..

2 Commits

Author SHA1 Message Date
Trenton H
4064cc0e07 Enables prek workspace mode and moves hooks to specific locations as possible, moving to mdformat instead of prettier 2026-02-26 13:42:11 -08:00
dependabot[bot]
e67e28a509 Chore(deps): Bump nltk from 3.9.2 to 3.9.3 (#12177)
Bumps [nltk](https://github.com/nltk/nltk) from 3.9.2 to 3.9.3.
- [Changelog](https://github.com/nltk/nltk/blob/develop/ChangeLog)
- [Commits](https://github.com/nltk/nltk/compare/3.9.2...3.9.3)

---
updated-dependencies:
- dependency-name: nltk
  dependency-version: 3.9.3
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2026-02-26 10:40:25 -08:00
20 changed files with 821 additions and 143 deletions

View File

@@ -31,7 +31,7 @@ jobs:
runs-on: ubuntu-24.04
strategy:
matrix:
python-version: ['3.11', '3.12', '3.13', '3.14']
python-version: ['3.10', '3.11', '3.12']
fail-fast: false
steps:
- name: Checkout

View File

@@ -1,9 +1,4 @@
# This file configures pre-commit hooks.
# See https://pre-commit.com/ for general information
# See https://pre-commit.com/hooks.html for a listing of possible hooks
# We actually run via https://github.com/j178/prek which is compatible
repos:
# General hooks
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v6.0.0
hooks:
@@ -11,21 +6,16 @@ repos:
- id: check-json
exclude: "tsconfig.*json"
- id: check-yaml
args:
- "--unsafe"
args: ["--unsafe"]
- id: check-toml
- id: check-executables-have-shebangs
- id: end-of-file-fixer
exclude_types:
- svg
- pofile
exclude_types: [svg, pofile]
exclude: "(^LICENSE$|^src/documents/static/bootstrap.min.css$)"
- id: mixed-line-ending
args:
- "--fix=lf"
args: ["--fix=lf"]
- id: trailing-whitespace
exclude_types:
- svg
exclude_types: [svg]
- id: check-case-conflict
- id: detect-private-key
- repo: https://github.com/codespell-project/codespell
@@ -33,53 +23,29 @@ repos:
hooks:
- id: codespell
additional_dependencies: [tomli]
exclude_types:
- pofile
- json
# See https://github.com/prettier/prettier/issues/15742 for the fork reason
- repo: https://github.com/rbubley/mirrors-prettier
rev: 'v3.8.1'
exclude_types: [pofile, json]
- repo: https://github.com/google/yamlfmt
rev: v0.21.0
hooks:
- id: prettier
types_or:
- javascript
- ts
- markdown
additional_dependencies:
- prettier@3.3.3
- 'prettier-plugin-organize-imports@4.1.0'
# Python hooks
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.15.0
hooks:
- id: ruff-check
- id: ruff-format
- repo: https://github.com/tox-dev/pyproject-fmt
rev: "v2.12.1"
hooks:
- id: pyproject-fmt
# Dockerfile hooks
- id: yamlfmt
exclude: "^src-ui/pnpm-lock.yaml"
types: [yaml]
- repo: https://github.com/AleksaC/hadolint-py
rev: v2.14.0
hooks:
- id: hadolint
# Shell script hooks
- repo: https://github.com/lovesegfault/beautysh
rev: v6.4.2
hooks:
- id: beautysh
types: [file]
files: (\.sh$|/run$|/finish$)
args:
- "--tab"
args: ["--tab"]
- repo: https://github.com/shellcheck-py/shellcheck-py
rev: "v0.11.0.1"
hooks:
- id: shellcheck
- repo: https://github.com/google/yamlfmt
rev: v0.21.0
- repo: https://github.com/tox-dev/pyproject-fmt
rev: "v2.16.2"
hooks:
- id: yamlfmt
exclude: "^src-ui/pnpm-lock.yaml"
types:
- yaml
- id: pyproject-fmt

View File

@@ -0,0 +1,6 @@
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.15.4
hooks:
- id: ruff-check
- id: ruff-format

6
docs/.markdownlint.yaml Normal file
View File

@@ -0,0 +1,6 @@
default: true
MD013: false # line length -- mdformat handles this
MD033: false # inline HTML -- MkDocs uses it
MD041: false # first line heading -- not always true in MkDocs
MD046: # code block style
style: fenced

View File

@@ -0,0 +1,13 @@
repos:
- repo: https://github.com/executablebooks/mdformat
rev: 1.0.0
hooks:
- id: mdformat
additional_dependencies:
- mdformat-mkdocs
- mdformat-ruff
- repo: https://github.com/DavidAnson/markdownlint-cli2
rev: v0.21.0
hooks:
- id: markdownlint-cli2
args: ["--config", ".markdownlint.yaml"]

View File

@@ -3,9 +3,10 @@ name = "paperless-ngx"
version = "2.20.8"
description = "A community-supported supercharged document management system: scan, index and archive all your physical documents"
readme = "README.md"
requires-python = ">=3.11"
requires-python = ">=3.10"
classifiers = [
"Programming Language :: Python :: 3 :: Only",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
@@ -176,7 +177,7 @@ torch = [
]
[tool.ruff]
target-version = "py311"
target-version = "py310"
line-length = 88
src = [
"src",

View File

@@ -0,0 +1,9 @@
repos:
- repo: https://github.com/rbubley/mirrors-prettier
rev: 'v3.8.1'
hooks:
- id: prettier
types_or: [javascript, ts]
additional_dependencies:
- prettier@3.3.3
- 'prettier-plugin-organize-imports@4.1.0'

10
src/.pre-commit-config.yaml Executable file
View File

@@ -0,0 +1,10 @@
orphan: true
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.15.4
hooks:
- id: ruff-check
- id: ruff-format

View File

@@ -1,5 +1,5 @@
from datetime import UTC
from datetime import datetime
from datetime import timezone
from typing import Any
from django.conf import settings
@@ -139,7 +139,7 @@ def thumbnail_last_modified(request: Any, pk: int) -> datetime | None:
# No cache, get the timestamp and cache the datetime
last_modified = datetime.fromtimestamp(
doc.thumbnail_path.stat().st_mtime,
tz=UTC,
tz=timezone.utc,
)
cache.set(doc_key, last_modified, CACHE_50_MINUTES)
return last_modified

View File

@@ -2,7 +2,7 @@ import datetime
import hashlib
import os
import tempfile
from enum import StrEnum
from enum import Enum
from pathlib import Path
from typing import TYPE_CHECKING
from typing import Final
@@ -80,7 +80,7 @@ class ConsumerError(Exception):
pass
class ConsumerStatusShortMessage(StrEnum):
class ConsumerStatusShortMessage(str, Enum):
DOCUMENT_ALREADY_EXISTS = "document_already_exists"
DOCUMENT_ALREADY_EXISTS_IN_TRASH = "document_already_exists_in_trash"
ASN_ALREADY_EXISTS = "asn_already_exists"

View File

@@ -5,10 +5,10 @@ import math
import re
from collections import Counter
from contextlib import contextmanager
from datetime import UTC
from datetime import datetime
from datetime import time
from datetime import timedelta
from datetime import timezone
from shutil import rmtree
from time import sleep
from typing import TYPE_CHECKING
@@ -437,7 +437,7 @@ class ManualResults:
class LocalDateParser(English):
def reverse_timezone_offset(self, d):
return (d.replace(tzinfo=django_timezone.get_current_timezone())).astimezone(
UTC,
timezone.utc,
)
def date_from(self, *args, **kwargs):
@@ -641,8 +641,8 @@ def rewrite_natural_date_keywords(query_string: str) -> str:
end = datetime(local_now.year - 1, 12, 31, 23, 59, 59, tzinfo=tz)
# Convert to UTC and format
start_str = start.astimezone(UTC).strftime("%Y%m%d%H%M%S")
end_str = end.astimezone(UTC).strftime("%Y%m%d%H%M%S")
start_str = start.astimezone(timezone.utc).strftime("%Y%m%d%H%M%S")
end_str = end.astimezone(timezone.utc).strftime("%Y%m%d%H%M%S")
return f"{field}:[{start_str} TO {end_str}]"
return re.sub(pattern, repl, query_string, flags=re.IGNORECASE)

View File

@@ -9,7 +9,7 @@ from types import TracebackType
try:
from typing import Self
except ImportError:
from typing import Self
from typing_extensions import Self
import dateparser

View File

@@ -8,7 +8,7 @@ if TYPE_CHECKING:
from channels_redis.pubsub import RedisPubSubChannelLayer
class ProgressStatusOptions(enum.StrEnum):
class ProgressStatusOptions(str, enum.Enum):
STARTED = "STARTED"
WORKING = "WORKING"
SUCCESS = "SUCCESS"

View File

@@ -24,7 +24,7 @@ def base_config() -> DateParserConfig:
12,
0,
0,
tzinfo=datetime.UTC,
tzinfo=datetime.timezone.utc,
),
filename_date_order="YMD",
content_date_order="DMY",
@@ -45,7 +45,7 @@ def config_with_ignore_dates() -> DateParserConfig:
12,
0,
0,
tzinfo=datetime.UTC,
tzinfo=datetime.timezone.utc,
),
filename_date_order="DMY",
content_date_order="MDY",

View File

@@ -101,50 +101,50 @@ class TestFilterDate:
[
# Valid Dates
pytest.param(
datetime.datetime(2024, 1, 10, tzinfo=datetime.UTC),
datetime.datetime(2024, 1, 10, tzinfo=datetime.UTC),
datetime.datetime(2024, 1, 10, tzinfo=datetime.timezone.utc),
datetime.datetime(2024, 1, 10, tzinfo=datetime.timezone.utc),
id="valid_past_date",
),
pytest.param(
datetime.datetime(2024, 1, 15, 12, 0, 0, tzinfo=datetime.UTC),
datetime.datetime(2024, 1, 15, 12, 0, 0, tzinfo=datetime.UTC),
datetime.datetime(2024, 1, 15, 12, 0, 0, tzinfo=datetime.timezone.utc),
datetime.datetime(2024, 1, 15, 12, 0, 0, tzinfo=datetime.timezone.utc),
id="exactly_at_reference",
),
pytest.param(
datetime.datetime(1901, 1, 1, tzinfo=datetime.UTC),
datetime.datetime(1901, 1, 1, tzinfo=datetime.UTC),
datetime.datetime(1901, 1, 1, tzinfo=datetime.timezone.utc),
datetime.datetime(1901, 1, 1, tzinfo=datetime.timezone.utc),
id="year_1901_valid",
),
# Date is > reference_time
pytest.param(
datetime.datetime(2024, 1, 16, tzinfo=datetime.UTC),
datetime.datetime(2024, 1, 16, tzinfo=datetime.timezone.utc),
None,
id="future_date_day_after",
),
# date.date() in ignore_dates
pytest.param(
datetime.datetime(2024, 1, 1, 0, 0, 0, tzinfo=datetime.UTC),
datetime.datetime(2024, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc),
None,
id="ignored_date_midnight_jan1",
),
pytest.param(
datetime.datetime(2024, 1, 1, 10, 30, 0, tzinfo=datetime.UTC),
datetime.datetime(2024, 1, 1, 10, 30, 0, tzinfo=datetime.timezone.utc),
None,
id="ignored_date_midday_jan1",
),
pytest.param(
datetime.datetime(2024, 12, 25, 15, 0, 0, tzinfo=datetime.UTC),
datetime.datetime(2024, 12, 25, 15, 0, 0, tzinfo=datetime.timezone.utc),
None,
id="ignored_date_dec25_future",
),
# date.year <= 1900
pytest.param(
datetime.datetime(1899, 12, 31, tzinfo=datetime.UTC),
datetime.datetime(1899, 12, 31, tzinfo=datetime.timezone.utc),
None,
id="year_1899",
),
pytest.param(
datetime.datetime(1900, 1, 1, tzinfo=datetime.UTC),
datetime.datetime(1900, 1, 1, tzinfo=datetime.timezone.utc),
None,
id="year_1900_boundary",
),
@@ -176,7 +176,7 @@ class TestFilterDate:
1,
12,
0,
tzinfo=datetime.UTC,
tzinfo=datetime.timezone.utc,
)
another_ignored = datetime.datetime(
2024,
@@ -184,7 +184,7 @@ class TestFilterDate:
25,
15,
30,
tzinfo=datetime.UTC,
tzinfo=datetime.timezone.utc,
)
allowed_date = datetime.datetime(
2024,
@@ -192,7 +192,7 @@ class TestFilterDate:
2,
12,
0,
tzinfo=datetime.UTC,
tzinfo=datetime.timezone.utc,
)
assert parser._filter_date(ignored_date) is None
@@ -204,7 +204,7 @@ class TestFilterDate:
regex_parser: RegexDateParserPlugin,
) -> None:
"""Should work with timezone-aware datetimes."""
date_utc = datetime.datetime(2024, 1, 10, 12, 0, tzinfo=datetime.UTC)
date_utc = datetime.datetime(2024, 1, 10, 12, 0, tzinfo=datetime.timezone.utc)
result = regex_parser._filter_date(date_utc)
@@ -221,8 +221,8 @@ class TestRegexDateParser:
"report-2023-12-25.txt",
"Event recorded on 25/12/2022.",
[
datetime.datetime(2023, 12, 25, tzinfo=datetime.UTC),
datetime.datetime(2022, 12, 25, tzinfo=datetime.UTC),
datetime.datetime(2023, 12, 25, tzinfo=datetime.timezone.utc),
datetime.datetime(2022, 12, 25, tzinfo=datetime.timezone.utc),
],
id="filename-y-m-d_and_content-d-m-y",
),
@@ -230,8 +230,8 @@ class TestRegexDateParser:
"img_2023.01.02.jpg",
"Taken on 01/02/2023",
[
datetime.datetime(2023, 1, 2, tzinfo=datetime.UTC),
datetime.datetime(2023, 2, 1, tzinfo=datetime.UTC),
datetime.datetime(2023, 1, 2, tzinfo=datetime.timezone.utc),
datetime.datetime(2023, 2, 1, tzinfo=datetime.timezone.utc),
],
id="ambiguous-dates-respect-orders",
),
@@ -239,7 +239,7 @@ class TestRegexDateParser:
"notes.txt",
"bad date 99/99/9999 and 25/12/2022",
[
datetime.datetime(2022, 12, 25, tzinfo=datetime.UTC),
datetime.datetime(2022, 12, 25, tzinfo=datetime.timezone.utc),
],
id="parse-exception-skips-bad-and-yields-good",
),
@@ -275,24 +275,24 @@ class TestRegexDateParser:
or "2023.12.25" in date_string
or "2023-12-25" in date_string
):
return datetime.datetime(2023, 12, 25, tzinfo=datetime.UTC)
return datetime.datetime(2023, 12, 25, tzinfo=datetime.timezone.utc)
# content DMY 25/12/2022
if "25/12/2022" in date_string or "25-12-2022" in date_string:
return datetime.datetime(2022, 12, 25, tzinfo=datetime.UTC)
return datetime.datetime(2022, 12, 25, tzinfo=datetime.timezone.utc)
# filename YMD 2023.01.02
if "2023.01.02" in date_string or "2023-01-02" in date_string:
return datetime.datetime(2023, 1, 2, tzinfo=datetime.UTC)
return datetime.datetime(2023, 1, 2, tzinfo=datetime.timezone.utc)
# ambiguous 01/02/2023 -> respect DATE_ORDER setting
if "01/02/2023" in date_string:
if date_order == "DMY":
return datetime.datetime(2023, 2, 1, tzinfo=datetime.UTC)
return datetime.datetime(2023, 2, 1, tzinfo=datetime.timezone.utc)
if date_order == "YMD":
return datetime.datetime(2023, 1, 2, tzinfo=datetime.UTC)
return datetime.datetime(2023, 1, 2, tzinfo=datetime.timezone.utc)
# fallback
return datetime.datetime(2023, 2, 1, tzinfo=datetime.UTC)
return datetime.datetime(2023, 2, 1, tzinfo=datetime.timezone.utc)
# simulate parse failure for malformed input
if "99/99/9999" in date_string or "bad date" in date_string:
@@ -328,7 +328,7 @@ class TestRegexDateParser:
12,
0,
0,
tzinfo=datetime.UTC,
tzinfo=datetime.timezone.utc,
),
filename_date_order="YMD",
content_date_order="DMY",
@@ -344,13 +344,13 @@ class TestRegexDateParser:
) -> datetime.datetime | None:
if "10/12/2023" in date_string or "10-12-2023" in date_string:
# ignored date
return datetime.datetime(2023, 12, 10, tzinfo=datetime.UTC)
return datetime.datetime(2023, 12, 10, tzinfo=datetime.timezone.utc)
if "01/02/2024" in date_string or "01-02-2024" in date_string:
# future relative to reference_time -> filtered
return datetime.datetime(2024, 2, 1, tzinfo=datetime.UTC)
return datetime.datetime(2024, 2, 1, tzinfo=datetime.timezone.utc)
if "05/01/2023" in date_string or "05-01-2023" in date_string:
# valid
return datetime.datetime(2023, 1, 5, tzinfo=datetime.UTC)
return datetime.datetime(2023, 1, 5, tzinfo=datetime.timezone.utc)
return None
mocker.patch(target, side_effect=fake_parse)
@@ -358,7 +358,7 @@ class TestRegexDateParser:
content = "Ignored: 10/12/2023, Future: 01/02/2024, Keep: 05/01/2023"
results = list(parser.parse("whatever.txt", content))
assert results == [datetime.datetime(2023, 1, 5, tzinfo=datetime.UTC)]
assert results == [datetime.datetime(2023, 1, 5, tzinfo=datetime.timezone.utc)]
def test_parse_handles_no_matches_and_returns_empty_list(
self,
@@ -392,7 +392,7 @@ class TestRegexDateParser:
12,
0,
0,
tzinfo=datetime.UTC,
tzinfo=datetime.timezone.utc,
),
filename_date_order=None,
content_date_order="DMY",
@@ -409,9 +409,9 @@ class TestRegexDateParser:
) -> datetime.datetime | None:
# return distinct datetimes so we can tell which source was parsed
if "25/12/2022" in date_string:
return datetime.datetime(2022, 12, 25, tzinfo=datetime.UTC)
return datetime.datetime(2022, 12, 25, tzinfo=datetime.timezone.utc)
if "2023-12-25" in date_string:
return datetime.datetime(2023, 12, 25, tzinfo=datetime.UTC)
return datetime.datetime(2023, 12, 25, tzinfo=datetime.timezone.utc)
return None
mock = mocker.patch(target, side_effect=fake_parse)
@@ -429,5 +429,5 @@ class TestRegexDateParser:
assert "25/12/2022" in called_date_string
# And the parser should have yielded the corresponding datetime
assert results == [
datetime.datetime(2022, 12, 25, tzinfo=datetime.UTC),
datetime.datetime(2022, 12, 25, tzinfo=datetime.timezone.utc),
]

View File

@@ -336,11 +336,7 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
added=d1,
)
# Account for 3.14 padding changes
expected_year: str = d1.strftime("%Y")
expected_filename: Path = Path(f"{expected_year}-01-09.pdf")
self.assertEqual(generate_filename(doc1), expected_filename)
self.assertEqual(generate_filename(doc1), Path("232-01-09.pdf"))
doc1.added = timezone.make_aware(datetime.datetime(2020, 11, 16, 1, 1, 1))

View File

@@ -21,7 +21,7 @@ class TestDateLocalization:
14,
30,
5,
tzinfo=datetime.UTC,
tzinfo=datetime.timezone.utc,
)
TEST_DATETIME_STRING: str = "2023-10-26T14:30:05+00:00"

View File

@@ -4570,7 +4570,7 @@ class TestDateWorkflowLocalization(
14,
30,
5,
tzinfo=datetime.UTC,
tzinfo=datetime.timezone.utc,
)
@pytest.mark.parametrize(

View File

@@ -1,7 +1,7 @@
from __future__ import annotations
from dataclasses import dataclass
from enum import StrEnum
from enum import Enum
from typing import TYPE_CHECKING
from typing import Any
@@ -11,7 +11,7 @@ if TYPE_CHECKING:
from django.http import HttpRequest
class VersionResolutionError(StrEnum):
class VersionResolutionError(str, Enum):
INVALID = "invalid"
NOT_FOUND = "not_found"

741
uv.lock generated

File diff suppressed because it is too large Load Diff