Compare commits

..

1 Commits

Author SHA1 Message Date
dependabot[bot]
bd84882489 Bump ocrmypdf from 16.13.0 to 17.3.0 in the document-processing group
Bumps the document-processing group with 1 update: [ocrmypdf](https://github.com/ocrmypdf/OCRmyPDF).


Updates `ocrmypdf` from 16.13.0 to 17.3.0
- [Release notes](https://github.com/ocrmypdf/OCRmyPDF/releases)
- [Commits](https://github.com/ocrmypdf/OCRmyPDF/compare/v16.13.0...v17.3.0)

---
updated-dependencies:
- dependency-name: ocrmypdf
  dependency-version: 17.3.0
  dependency-type: direct:production
  update-type: version-update:semver-major
  dependency-group: document-processing
...

Signed-off-by: dependabot[bot] <support@github.com>
2026-03-06 20:16:28 +00:00
16 changed files with 493 additions and 424 deletions

View File

@@ -149,16 +149,15 @@ jobs:
mkdir -p /tmp/digests
digest="${{ steps.build.outputs.digest }}"
echo "digest=${digest}"
echo "${digest}" > "/tmp/digests/digest-${{ matrix.arch }}.txt"
touch "/tmp/digests/${digest#sha256:}"
- name: Upload digest
if: steps.check-push.outputs.should-push == 'true'
uses: actions/upload-artifact@v7.0.0
with:
name: digests-${{ matrix.arch }}
path: /tmp/digests/digest-${{ matrix.arch }}.txt
path: /tmp/digests/*
if-no-files-found: error
retention-days: 1
archive: false
merge-and-push:
name: Merge and Push Manifest
runs-on: ubuntu-24.04
@@ -172,7 +171,7 @@ jobs:
uses: actions/download-artifact@v8.0.0
with:
path: /tmp/digests
pattern: digest-*.txt
pattern: digests-*
merge-multiple: true
- name: List digests
run: |
@@ -218,9 +217,8 @@ jobs:
tags=$(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "${DOCKER_METADATA_OUTPUT_JSON}")
digests=""
for digest_file in digest-*.txt; do
digest=$(cat "${digest_file}")
digests+="${{ env.REGISTRY }}/${REPOSITORY}@${digest} "
for digest in *; do
digests+="${{ env.REGISTRY }}/${REPOSITORY}@sha256:${digest} "
done
echo "Creating manifest with tags: ${tags}"

View File

@@ -2,24 +2,13 @@ name: PR Bot
on:
pull_request_target:
types: [opened]
permissions:
contents: read
pull-requests: write
jobs:
anti-slop:
runs-on: ubuntu-latest
permissions:
contents: read
issues: read
pull-requests: write
steps:
- uses: peakoss/anti-slop@v0.2.1
with:
max-failures: 4
failure-add-pr-labels: 'ai'
pr-bot:
name: Automated PR Bot
runs-on: ubuntu-latest
permissions:
contents: read
pull-requests: write
steps:
- name: Label PR by file path or branch name
# see .github/labeler.yml for the labeler config

View File

@@ -59,7 +59,7 @@ dependencies = [
"llama-index-llms-openai>=0.6.13",
"llama-index-vector-stores-faiss>=0.5.2",
"nltk~=3.9.1",
"ocrmypdf~=16.13.0",
"ocrmypdf~=17.3.0",
"openai>=1.76",
"pathvalidate~=3.3.1",
"pdf2image~=1.17.0",

View File

@@ -1,100 +1,107 @@
import logging
from unittest import mock
import pytest
from allauth.account.adapter import get_adapter
from allauth.core import context
from allauth.socialaccount.adapter import get_adapter as get_social_adapter
from django.conf import settings
from django.contrib.auth.models import AnonymousUser
from django.contrib.auth.models import Group
from django.contrib.auth.models import User
from django.forms import ValidationError
from django.http import HttpRequest
from django.test import TestCase
from django.test import override_settings
from django.urls import reverse
from pytest_django.fixtures import SettingsWrapper
from pytest_mock import MockerFixture
from rest_framework.authtoken.models import Token
from paperless.adapter import DrfTokenStrategy
@pytest.mark.django_db
class TestCustomAccountAdapter:
def test_is_open_for_signup(self, settings: SettingsWrapper) -> None:
class TestCustomAccountAdapter(TestCase):
def test_is_open_for_signup(self) -> None:
adapter = get_adapter()
# With no accounts, signups should be allowed
assert adapter.is_open_for_signup(None)
self.assertTrue(adapter.is_open_for_signup(None))
User.objects.create_user("testuser")
# Test when ACCOUNT_ALLOW_SIGNUPS is True
settings.ACCOUNT_ALLOW_SIGNUPS = True
assert adapter.is_open_for_signup(None)
self.assertTrue(adapter.is_open_for_signup(None))
# Test when ACCOUNT_ALLOW_SIGNUPS is False
settings.ACCOUNT_ALLOW_SIGNUPS = False
assert not adapter.is_open_for_signup(None)
self.assertFalse(adapter.is_open_for_signup(None))
def test_is_safe_url(self, settings: SettingsWrapper) -> None:
def test_is_safe_url(self) -> None:
request = HttpRequest()
request.get_host = lambda: "example.com"
request.get_host = mock.Mock(return_value="example.com")
with context.request_context(request):
adapter = get_adapter()
with override_settings(ALLOWED_HOSTS=["*"]):
# True because request host is same
url = "https://example.com"
self.assertTrue(adapter.is_safe_url(url))
settings.ALLOWED_HOSTS = ["*"]
# True because request host is same
assert adapter.is_safe_url("https://example.com")
url = "https://evil.com"
# False despite wildcard because request host is different
assert not adapter.is_safe_url("https://evil.com")
self.assertFalse(adapter.is_safe_url(url))
settings.ALLOWED_HOSTS = ["example.com"]
url = "https://example.com"
# True because request host is same
assert adapter.is_safe_url("https://example.com")
self.assertTrue(adapter.is_safe_url(url))
settings.ALLOWED_HOSTS = ["*", "example.com"]
url = "//evil.com"
# False because request host is not in allowed hosts
assert not adapter.is_safe_url("//evil.com")
self.assertFalse(adapter.is_safe_url(url))
def test_pre_authenticate(
self,
settings: SettingsWrapper,
mocker: MockerFixture,
) -> None:
mocker.patch("allauth.core.internal.ratelimit.consume", return_value=True)
@mock.patch("allauth.core.internal.ratelimit.consume", return_value=True)
def test_pre_authenticate(self, mock_consume) -> None:
adapter = get_adapter()
request = HttpRequest()
request.get_host = lambda: "example.com"
request.get_host = mock.Mock(return_value="example.com")
settings.DISABLE_REGULAR_LOGIN = False
adapter.pre_authenticate(request)
settings.DISABLE_REGULAR_LOGIN = True
with pytest.raises(ValidationError):
with self.assertRaises(ValidationError):
adapter.pre_authenticate(request)
def test_get_reset_password_from_key_url(self, settings: SettingsWrapper) -> None:
def test_get_reset_password_from_key_url(self) -> None:
request = HttpRequest()
request.get_host = lambda: "foo.org"
request.get_host = mock.Mock(return_value="foo.org")
with context.request_context(request):
adapter = get_adapter()
settings.PAPERLESS_URL = None
settings.ACCOUNT_DEFAULT_HTTP_PROTOCOL = "https"
expected_url = f"https://foo.org{reverse('account_reset_password_from_key', kwargs={'uidb36': 'UID', 'key': 'KEY'})}"
assert adapter.get_reset_password_from_key_url("UID-KEY") == expected_url
# Test when PAPERLESS_URL is None
with override_settings(
PAPERLESS_URL=None,
ACCOUNT_DEFAULT_HTTP_PROTOCOL="https",
):
expected_url = f"https://foo.org{reverse('account_reset_password_from_key', kwargs={'uidb36': 'UID', 'key': 'KEY'})}"
self.assertEqual(
adapter.get_reset_password_from_key_url("UID-KEY"),
expected_url,
)
settings.PAPERLESS_URL = "https://bar.com"
expected_url = f"https://bar.com{reverse('account_reset_password_from_key', kwargs={'uidb36': 'UID', 'key': 'KEY'})}"
assert adapter.get_reset_password_from_key_url("UID-KEY") == expected_url
# Test when PAPERLESS_URL is not None
with override_settings(PAPERLESS_URL="https://bar.com"):
expected_url = f"https://bar.com{reverse('account_reset_password_from_key', kwargs={'uidb36': 'UID', 'key': 'KEY'})}"
self.assertEqual(
adapter.get_reset_password_from_key_url("UID-KEY"),
expected_url,
)
def test_save_user_adds_groups(
self,
settings: SettingsWrapper,
mocker: MockerFixture,
) -> None:
settings.ACCOUNT_DEFAULT_GROUPS = ["group1", "group2"]
@override_settings(ACCOUNT_DEFAULT_GROUPS=["group1", "group2"])
def test_save_user_adds_groups(self) -> None:
Group.objects.create(name="group1")
user = User.objects.create_user("testuser")
adapter = get_adapter()
form = mocker.MagicMock(
form = mock.Mock(
cleaned_data={
"username": "testuser",
"email": "user@example.com",
@@ -103,81 +110,88 @@ class TestCustomAccountAdapter:
user = adapter.save_user(HttpRequest(), user, form, commit=True)
assert user.groups.count() == 1
assert user.groups.filter(name="group1").exists()
assert not user.groups.filter(name="group2").exists()
self.assertEqual(user.groups.count(), 1)
self.assertTrue(user.groups.filter(name="group1").exists())
self.assertFalse(user.groups.filter(name="group2").exists())
def test_fresh_install_save_creates_superuser(self, mocker: MockerFixture) -> None:
def test_fresh_install_save_creates_superuser(self) -> None:
adapter = get_adapter()
form = mocker.MagicMock(
form = mock.Mock(
cleaned_data={
"username": "testuser",
"email": "user@paperless-ngx.com",
},
)
user = adapter.save_user(HttpRequest(), User(), form, commit=True)
assert user.is_superuser
self.assertTrue(user.is_superuser)
form = mocker.MagicMock(
# Next time, it should not create a superuser
form = mock.Mock(
cleaned_data={
"username": "testuser2",
"email": "user2@paperless-ngx.com",
},
)
user2 = adapter.save_user(HttpRequest(), User(), form, commit=True)
assert not user2.is_superuser
self.assertFalse(user2.is_superuser)
class TestCustomSocialAccountAdapter:
@pytest.mark.django_db
def test_is_open_for_signup(self, settings: SettingsWrapper) -> None:
class TestCustomSocialAccountAdapter(TestCase):
def test_is_open_for_signup(self) -> None:
adapter = get_social_adapter()
# Test when SOCIALACCOUNT_ALLOW_SIGNUPS is True
settings.SOCIALACCOUNT_ALLOW_SIGNUPS = True
assert adapter.is_open_for_signup(None, None)
self.assertTrue(adapter.is_open_for_signup(None, None))
# Test when SOCIALACCOUNT_ALLOW_SIGNUPS is False
settings.SOCIALACCOUNT_ALLOW_SIGNUPS = False
assert not adapter.is_open_for_signup(None, None)
self.assertFalse(adapter.is_open_for_signup(None, None))
def test_get_connect_redirect_url(self) -> None:
adapter = get_social_adapter()
assert adapter.get_connect_redirect_url(None, None) == reverse("base")
request = None
socialaccount = None
@pytest.mark.django_db
def test_save_user_adds_groups(
self,
settings: SettingsWrapper,
mocker: MockerFixture,
) -> None:
settings.SOCIAL_ACCOUNT_DEFAULT_GROUPS = ["group1", "group2"]
# Test the default URL
expected_url = reverse("base")
self.assertEqual(
adapter.get_connect_redirect_url(request, socialaccount),
expected_url,
)
@override_settings(SOCIAL_ACCOUNT_DEFAULT_GROUPS=["group1", "group2"])
def test_save_user_adds_groups(self) -> None:
Group.objects.create(name="group1")
adapter = get_social_adapter()
request = HttpRequest()
user = User.objects.create_user("testuser")
sociallogin = mocker.MagicMock(user=user)
sociallogin = mock.Mock(
user=user,
)
user = adapter.save_user(HttpRequest(), sociallogin, None)
user = adapter.save_user(request, sociallogin, None)
assert user.groups.count() == 1
assert user.groups.filter(name="group1").exists()
assert not user.groups.filter(name="group2").exists()
self.assertEqual(user.groups.count(), 1)
self.assertTrue(user.groups.filter(name="group1").exists())
self.assertFalse(user.groups.filter(name="group2").exists())
def test_error_logged_on_authentication_error(
self,
caplog: pytest.LogCaptureFixture,
) -> None:
def test_error_logged_on_authentication_error(self) -> None:
adapter = get_social_adapter()
with caplog.at_level(logging.INFO, logger="paperless.auth"):
request = HttpRequest()
with self.assertLogs("paperless.auth", level="INFO") as log_cm:
adapter.on_authentication_error(
HttpRequest(),
request,
provider="test-provider",
error="Error",
exception="Test authentication error",
)
assert any("Test authentication error" in msg for msg in caplog.messages)
self.assertTrue(
any("Test authentication error" in message for message in log_cm.output),
)
@pytest.mark.django_db
class TestDrfTokenStrategy:
class TestDrfTokenStrategy(TestCase):
def test_create_access_token_creates_new_token(self) -> None:
"""
GIVEN:
@@ -187,6 +201,7 @@ class TestDrfTokenStrategy:
THEN:
- A new token is created and its key is returned
"""
user = User.objects.create_user("testuser")
request = HttpRequest()
request.user = user
@@ -194,9 +209,13 @@ class TestDrfTokenStrategy:
strategy = DrfTokenStrategy()
token_key = strategy.create_access_token(request)
assert token_key is not None
assert Token.objects.filter(user=user).exists()
assert token_key == Token.objects.get(user=user).key
# Verify a token was created
self.assertIsNotNone(token_key)
self.assertTrue(Token.objects.filter(user=user).exists())
# Verify the returned key matches the created token
token = Token.objects.get(user=user)
self.assertEqual(token_key, token.key)
def test_create_access_token_returns_existing_token(self) -> None:
"""
@@ -207,6 +226,7 @@ class TestDrfTokenStrategy:
THEN:
- The same token key is returned (no new token created)
"""
user = User.objects.create_user("testuser")
existing_token = Token.objects.create(user=user)
@@ -216,8 +236,11 @@ class TestDrfTokenStrategy:
strategy = DrfTokenStrategy()
token_key = strategy.create_access_token(request)
assert token_key == existing_token.key
assert Token.objects.filter(user=user).count() == 1
# Verify the existing token key is returned
self.assertEqual(token_key, existing_token.key)
# Verify only one token exists (no duplicate created)
self.assertEqual(Token.objects.filter(user=user).count(), 1)
def test_create_access_token_returns_none_for_unauthenticated_user(self) -> None:
"""
@@ -228,11 +251,12 @@ class TestDrfTokenStrategy:
THEN:
- None is returned and no token is created
"""
request = HttpRequest()
request.user = AnonymousUser()
strategy = DrfTokenStrategy()
token_key = strategy.create_access_token(request)
assert token_key is None
assert Token.objects.count() == 0
self.assertIsNone(token_key)
self.assertEqual(Token.objects.count(), 0)

View File

@@ -1,15 +1,16 @@
import os
from collections.abc import Callable
from dataclasses import dataclass
from pathlib import Path
from unittest import mock
import pytest
from django.core.checks import Error
from django.core.checks import Warning
from pytest_django.fixtures import SettingsWrapper
from django.test import TestCase
from django.test import override_settings
from pytest_mock import MockerFixture
from documents.tests.utils import DirectoriesMixin
from documents.tests.utils import FileSystemAssertsMixin
from paperless.checks import audit_log_check
from paperless.checks import binaries_check
from paperless.checks import check_deprecated_db_settings
@@ -19,84 +20,54 @@ from paperless.checks import paths_check
from paperless.checks import settings_values_check
@dataclass(frozen=True, slots=True)
class PaperlessTestDirs:
data_dir: Path
media_dir: Path
consumption_dir: Path
# TODO: consolidate with documents/tests/conftest.py PaperlessDirs/paperless_dirs
# once the paperless and documents test suites are ready to share fixtures.
@pytest.fixture()
def directories(tmp_path: Path, settings: SettingsWrapper) -> PaperlessTestDirs:
data_dir = tmp_path / "data"
media_dir = tmp_path / "media"
consumption_dir = tmp_path / "consumption"
for d in (data_dir, media_dir, consumption_dir):
d.mkdir()
settings.DATA_DIR = data_dir
settings.MEDIA_ROOT = media_dir
settings.CONSUMPTION_DIR = consumption_dir
return PaperlessTestDirs(
data_dir=data_dir,
media_dir=media_dir,
consumption_dir=consumption_dir,
)
class TestChecks:
class TestChecks(DirectoriesMixin, TestCase):
def test_binaries(self) -> None:
assert binaries_check(None) == []
self.assertEqual(binaries_check(None), [])
def test_binaries_fail(self, settings: SettingsWrapper) -> None:
settings.CONVERT_BINARY = "uuuhh"
assert len(binaries_check(None)) == 1
@override_settings(CONVERT_BINARY="uuuhh")
def test_binaries_fail(self) -> None:
self.assertEqual(len(binaries_check(None)), 1)
@pytest.mark.usefixtures("directories")
def test_paths_check(self) -> None:
assert paths_check(None) == []
self.assertEqual(paths_check(None), [])
def test_paths_check_dont_exist(self, settings: SettingsWrapper) -> None:
settings.MEDIA_ROOT = Path("uuh")
settings.DATA_DIR = Path("whatever")
settings.CONSUMPTION_DIR = Path("idontcare")
@override_settings(
MEDIA_ROOT=Path("uuh"),
DATA_DIR=Path("whatever"),
CONSUMPTION_DIR=Path("idontcare"),
)
def test_paths_check_dont_exist(self) -> None:
msgs = paths_check(None)
self.assertEqual(len(msgs), 3, str(msgs))
for msg in msgs:
self.assertTrue(msg.msg.endswith("is set but doesn't exist."))
def test_paths_check_no_access(self) -> None:
Path(self.dirs.data_dir).chmod(0o000)
Path(self.dirs.media_dir).chmod(0o000)
Path(self.dirs.consumption_dir).chmod(0o000)
self.addCleanup(os.chmod, self.dirs.data_dir, 0o777)
self.addCleanup(os.chmod, self.dirs.media_dir, 0o777)
self.addCleanup(os.chmod, self.dirs.consumption_dir, 0o777)
msgs = paths_check(None)
self.assertEqual(len(msgs), 3)
assert len(msgs) == 3, str(msgs)
for msg in msgs:
assert msg.msg.endswith("is set but doesn't exist.")
self.assertTrue(msg.msg.endswith("is not writeable"))
def test_paths_check_no_access(self, directories: PaperlessTestDirs) -> None:
directories.data_dir.chmod(0o000)
directories.media_dir.chmod(0o000)
directories.consumption_dir.chmod(0o000)
@override_settings(DEBUG=False)
def test_debug_disabled(self) -> None:
self.assertEqual(debug_mode_check(None), [])
try:
msgs = paths_check(None)
finally:
directories.data_dir.chmod(0o777)
directories.media_dir.chmod(0o777)
directories.consumption_dir.chmod(0o777)
assert len(msgs) == 3
for msg in msgs:
assert msg.msg.endswith("is not writeable")
def test_debug_disabled(self, settings: SettingsWrapper) -> None:
settings.DEBUG = False
assert debug_mode_check(None) == []
def test_debug_enabled(self, settings: SettingsWrapper) -> None:
settings.DEBUG = True
assert len(debug_mode_check(None)) == 1
@override_settings(DEBUG=True)
def test_debug_enabled(self) -> None:
self.assertEqual(len(debug_mode_check(None)), 1)
class TestSettingsChecksAgainstDefaults:
class TestSettingsChecksAgainstDefaults(DirectoriesMixin, TestCase):
def test_all_valid(self) -> None:
"""
GIVEN:
@@ -107,71 +78,104 @@ class TestSettingsChecksAgainstDefaults:
- No system check errors reported
"""
msgs = settings_values_check(None)
assert len(msgs) == 0
self.assertEqual(len(msgs), 0)
class TestOcrSettingsChecks:
@pytest.mark.parametrize(
("setting", "value", "expected_msg"),
[
pytest.param(
"OCR_OUTPUT_TYPE",
"notapdf",
'OCR output type "notapdf"',
id="invalid-output-type",
),
pytest.param(
"OCR_MODE",
"makeitso",
'OCR output mode "makeitso"',
id="invalid-mode",
),
pytest.param(
"OCR_MODE",
"skip_noarchive",
"deprecated",
id="deprecated-mode",
),
pytest.param(
"OCR_SKIP_ARCHIVE_FILE",
"invalid",
'OCR_SKIP_ARCHIVE_FILE setting "invalid"',
id="invalid-skip-archive-file",
),
pytest.param(
"OCR_CLEAN",
"cleanme",
'OCR clean mode "cleanme"',
id="invalid-clean",
),
],
)
def test_invalid_setting_produces_one_error(
self,
settings: SettingsWrapper,
setting: str,
value: str,
expected_msg: str,
) -> None:
class TestOcrSettingsChecks(DirectoriesMixin, TestCase):
@override_settings(OCR_OUTPUT_TYPE="notapdf")
def test_invalid_output_type(self) -> None:
"""
GIVEN:
- Default settings
- One OCR setting is set to an invalid value
- OCR output type is invalid
WHEN:
- Settings are validated
THEN:
- Exactly one system check error is reported containing the expected message
- system check error reported for OCR output type
"""
setattr(settings, setting, value)
msgs = settings_values_check(None)
self.assertEqual(len(msgs), 1)
assert len(msgs) == 1
assert expected_msg in msgs[0].msg
msg = msgs[0]
self.assertIn('OCR output type "notapdf"', msg.msg)
@override_settings(OCR_MODE="makeitso")
def test_invalid_ocr_type(self) -> None:
"""
GIVEN:
- Default settings
- OCR type is invalid
WHEN:
- Settings are validated
THEN:
- system check error reported for OCR type
"""
msgs = settings_values_check(None)
self.assertEqual(len(msgs), 1)
msg = msgs[0]
self.assertIn('OCR output mode "makeitso"', msg.msg)
@override_settings(OCR_MODE="skip_noarchive")
def test_deprecated_ocr_type(self) -> None:
"""
GIVEN:
- Default settings
- OCR type is deprecated
WHEN:
- Settings are validated
THEN:
- deprecation warning reported for OCR type
"""
msgs = settings_values_check(None)
self.assertEqual(len(msgs), 1)
msg = msgs[0]
self.assertIn("deprecated", msg.msg)
@override_settings(OCR_SKIP_ARCHIVE_FILE="invalid")
def test_invalid_ocr_skip_archive_file(self) -> None:
"""
GIVEN:
- Default settings
- OCR_SKIP_ARCHIVE_FILE is invalid
WHEN:
- Settings are validated
THEN:
- system check error reported for OCR_SKIP_ARCHIVE_FILE
"""
msgs = settings_values_check(None)
self.assertEqual(len(msgs), 1)
msg = msgs[0]
self.assertIn('OCR_SKIP_ARCHIVE_FILE setting "invalid"', msg.msg)
@override_settings(OCR_CLEAN="cleanme")
def test_invalid_ocr_clean(self) -> None:
"""
GIVEN:
- Default settings
- OCR cleaning type is invalid
WHEN:
- Settings are validated
THEN:
- system check error reported for OCR cleaning type
"""
msgs = settings_values_check(None)
self.assertEqual(len(msgs), 1)
msg = msgs[0]
self.assertIn('OCR clean mode "cleanme"', msg.msg)
class TestTimezoneSettingsChecks:
def test_invalid_timezone(self, settings: SettingsWrapper) -> None:
class TestTimezoneSettingsChecks(DirectoriesMixin, TestCase):
@override_settings(TIME_ZONE="TheMoon\\MyCrater")
def test_invalid_timezone(self) -> None:
"""
GIVEN:
- Default settings
@@ -181,16 +185,17 @@ class TestTimezoneSettingsChecks:
THEN:
- system check error reported for timezone
"""
settings.TIME_ZONE = "TheMoon\\MyCrater"
msgs = settings_values_check(None)
self.assertEqual(len(msgs), 1)
assert len(msgs) == 1
assert 'Timezone "TheMoon\\MyCrater"' in msgs[0].msg
msg = msgs[0]
self.assertIn('Timezone "TheMoon\\MyCrater"', msg.msg)
class TestEmailCertSettingsChecks:
def test_not_valid_file(self, settings: SettingsWrapper) -> None:
class TestEmailCertSettingsChecks(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
@override_settings(EMAIL_CERTIFICATE_FILE=Path("/tmp/not_actually_here.pem"))
def test_not_valid_file(self) -> None:
"""
GIVEN:
- Default settings
@@ -200,22 +205,19 @@ class TestEmailCertSettingsChecks:
THEN:
- system check error reported for email certificate
"""
cert_path = Path("/tmp/not_actually_here.pem")
assert not cert_path.is_file()
settings.EMAIL_CERTIFICATE_FILE = cert_path
self.assertIsNotFile("/tmp/not_actually_here.pem")
msgs = settings_values_check(None)
assert len(msgs) == 1
assert "Email cert /tmp/not_actually_here.pem is not a file" in msgs[0].msg
self.assertEqual(len(msgs), 1)
msg = msgs[0]
self.assertIn("Email cert /tmp/not_actually_here.pem is not a file", msg.msg)
class TestAuditLogChecks:
def test_was_enabled_once(
self,
settings: SettingsWrapper,
mocker: MockerFixture,
) -> None:
class TestAuditLogChecks(TestCase):
def test_was_enabled_once(self) -> None:
"""
GIVEN:
- Audit log is not enabled
@@ -224,18 +226,23 @@ class TestAuditLogChecks:
THEN:
- system check error reported for disabling audit log
"""
settings.AUDIT_LOG_ENABLED = False
introspect_mock = mocker.MagicMock()
introspect_mock = mock.MagicMock()
introspect_mock.introspection.table_names.return_value = ["auditlog_logentry"]
mocker.patch.dict(
"paperless.checks.connections",
{"default": introspect_mock},
)
with override_settings(AUDIT_LOG_ENABLED=False):
with mock.patch.dict(
"paperless.checks.connections",
{"default": introspect_mock},
):
msgs = audit_log_check(None)
msgs = audit_log_check(None)
self.assertEqual(len(msgs), 1)
assert len(msgs) == 1
assert "auditlog table was found but audit log is disabled." in msgs[0].msg
msg = msgs[0]
self.assertIn(
("auditlog table was found but audit log is disabled."),
msg.msg,
)
DEPRECATED_VARS: dict[str, str] = {
@@ -264,16 +271,20 @@ class TestDeprecatedDbSettings:
@pytest.mark.parametrize(
("env_var", "db_option_key"),
[
pytest.param("PAPERLESS_DB_TIMEOUT", "timeout", id="db-timeout"),
pytest.param(
"PAPERLESS_DB_POOLSIZE",
"pool.min_size / pool.max_size",
id="db-poolsize",
),
pytest.param("PAPERLESS_DBSSLMODE", "sslmode", id="ssl-mode"),
pytest.param("PAPERLESS_DBSSLROOTCERT", "sslrootcert", id="ssl-rootcert"),
pytest.param("PAPERLESS_DBSSLCERT", "sslcert", id="ssl-cert"),
pytest.param("PAPERLESS_DBSSLKEY", "sslkey", id="ssl-key"),
("PAPERLESS_DB_TIMEOUT", "timeout"),
("PAPERLESS_DB_POOLSIZE", "pool.min_size / pool.max_size"),
("PAPERLESS_DBSSLMODE", "sslmode"),
("PAPERLESS_DBSSLROOTCERT", "sslrootcert"),
("PAPERLESS_DBSSLCERT", "sslcert"),
("PAPERLESS_DBSSLKEY", "sslkey"),
],
ids=[
"db-timeout",
"db-poolsize",
"ssl-mode",
"ssl-rootcert",
"ssl-cert",
"ssl-key",
],
)
def test_single_deprecated_var_produces_one_warning(
@@ -392,10 +403,7 @@ class TestV3MinimumUpgradeVersionCheck:
"""Test suite for check_v3_minimum_upgrade_version system check."""
@pytest.fixture
def build_conn_mock(
self,
mocker: MockerFixture,
) -> Callable[[list[str], list[str]], mock.MagicMock]:
def build_conn_mock(self, mocker: MockerFixture):
"""Factory fixture that builds a connections['default'] mock.
Usage::
@@ -415,7 +423,7 @@ class TestV3MinimumUpgradeVersionCheck:
def test_no_migrations_table_fresh_install(
self,
mocker: MockerFixture,
build_conn_mock: Callable[[list[str], list[str]], mock.MagicMock],
build_conn_mock,
) -> None:
"""
GIVEN:
@@ -434,7 +442,7 @@ class TestV3MinimumUpgradeVersionCheck:
def test_no_documents_migrations_fresh_install(
self,
mocker: MockerFixture,
build_conn_mock: Callable[[list[str], list[str]], mock.MagicMock],
build_conn_mock,
) -> None:
"""
GIVEN:
@@ -453,7 +461,7 @@ class TestV3MinimumUpgradeVersionCheck:
def test_v3_state_with_0001_squashed(
self,
mocker: MockerFixture,
build_conn_mock: Callable[[list[str], list[str]], mock.MagicMock],
build_conn_mock,
) -> None:
"""
GIVEN:
@@ -477,7 +485,7 @@ class TestV3MinimumUpgradeVersionCheck:
def test_v3_state_with_0002_squashed_only(
self,
mocker: MockerFixture,
build_conn_mock: Callable[[list[str], list[str]], mock.MagicMock],
build_conn_mock,
) -> None:
"""
GIVEN:
@@ -496,7 +504,7 @@ class TestV3MinimumUpgradeVersionCheck:
def test_v2_20_9_state_ready_to_upgrade(
self,
mocker: MockerFixture,
build_conn_mock: Callable[[list[str], list[str]], mock.MagicMock],
build_conn_mock,
) -> None:
"""
GIVEN:
@@ -523,7 +531,7 @@ class TestV3MinimumUpgradeVersionCheck:
def test_v2_20_8_raises_error(
self,
mocker: MockerFixture,
build_conn_mock: Callable[[list[str], list[str]], mock.MagicMock],
build_conn_mock,
) -> None:
"""
GIVEN:
@@ -550,7 +558,7 @@ class TestV3MinimumUpgradeVersionCheck:
def test_very_old_version_raises_error(
self,
mocker: MockerFixture,
build_conn_mock: Callable[[list[str], list[str]], mock.MagicMock],
build_conn_mock,
) -> None:
"""
GIVEN:
@@ -577,7 +585,7 @@ class TestV3MinimumUpgradeVersionCheck:
def test_error_hint_mentions_v2_20_9(
self,
mocker: MockerFixture,
build_conn_mock: Callable[[list[str], list[str]], mock.MagicMock],
build_conn_mock,
) -> None:
"""
GIVEN:

View File

@@ -9,50 +9,35 @@ from paperless.utils import ocr_to_dateparser_languages
@pytest.mark.parametrize(
("ocr_language", "expected"),
[
pytest.param("eng", ["en"], id="single-language"),
pytest.param("fra+ita+lao", ["fr", "it", "lo"], id="multiple-languages"),
pytest.param("fil", ["fil"], id="no-two-letter-equivalent"),
pytest.param(
"aze_cyrl+srp_latn",
["az-Cyrl", "sr-Latn"],
id="script-supported-by-dateparser",
),
pytest.param(
"deu_frak",
["de"],
id="script-not-supported-falls-back-to-language",
),
pytest.param(
"chi_tra+chi_sim",
["zh"],
id="chinese-variants-collapse-to-general",
),
pytest.param(
"eng+unsupported_language+por",
["en", "pt"],
id="unsupported-language-skipped",
),
pytest.param(
"unsupported1+unsupported2",
[],
id="all-unsupported-returns-empty",
),
pytest.param("eng+eng", ["en"], id="duplicates-deduplicated"),
pytest.param(
"ita_unknownscript",
["it"],
id="unknown-script-falls-back-to-language",
),
# One language
("eng", ["en"]),
# Multiple languages
("fra+ita+lao", ["fr", "it", "lo"]),
# Languages that don't have a two-letter equivalent
("fil", ["fil"]),
# Languages with a script part supported by dateparser
("aze_cyrl+srp_latn", ["az-Cyrl", "sr-Latn"]),
# Languages with a script part not supported by dateparser
# In this case, default to the language without script
("deu_frak", ["de"]),
# Traditional and simplified chinese don't have the same name in dateparser,
# so they're converted to the general chinese language
("chi_tra+chi_sim", ["zh"]),
# If a language is not supported by dateparser, fallback to the supported ones
("eng+unsupported_language+por", ["en", "pt"]),
# If no language is supported, fallback to default
("unsupported1+unsupported2", []),
# Duplicate languages, should not duplicate in result
("eng+eng", ["en"]),
# Language with script, but script is not mapped
("ita_unknownscript", ["it"]),
],
)
def test_ocr_to_dateparser_languages(ocr_language: str, expected: list[str]) -> None:
def test_ocr_to_dateparser_languages(ocr_language, expected):
assert sorted(ocr_to_dateparser_languages(ocr_language)) == sorted(expected)
def test_ocr_to_dateparser_languages_exception(
monkeypatch: pytest.MonkeyPatch,
caplog: pytest.LogCaptureFixture,
) -> None:
def test_ocr_to_dateparser_languages_exception(monkeypatch, caplog):
# Patch LocaleDataLoader.get_locale_map to raise an exception
class DummyLoader:
def get_locale_map(self, locales=None):

View File

@@ -1,31 +1,24 @@
import tempfile
from pathlib import Path
from django.test import Client
from pytest_django.fixtures import SettingsWrapper
from django.test import override_settings
def test_favicon_view(
client: Client,
tmp_path: Path,
settings: SettingsWrapper,
) -> None:
favicon_path = tmp_path / "paperless" / "img" / "favicon.ico"
favicon_path.parent.mkdir(parents=True)
favicon_path.write_bytes(b"FAKE ICON DATA")
def test_favicon_view(client):
with tempfile.TemporaryDirectory() as tmpdir:
static_dir = Path(tmpdir)
favicon_path = static_dir / "paperless" / "img" / "favicon.ico"
favicon_path.parent.mkdir(parents=True, exist_ok=True)
favicon_path.write_bytes(b"FAKE ICON DATA")
settings.STATIC_ROOT = tmp_path
response = client.get("/favicon.ico")
assert response.status_code == 200
assert response["Content-Type"] == "image/x-icon"
assert b"".join(response.streaming_content) == b"FAKE ICON DATA"
with override_settings(STATIC_ROOT=static_dir):
response = client.get("/favicon.ico")
assert response.status_code == 200
assert response["Content-Type"] == "image/x-icon"
assert b"".join(response.streaming_content) == b"FAKE ICON DATA"
def test_favicon_view_missing_file(
client: Client,
tmp_path: Path,
settings: SettingsWrapper,
) -> None:
settings.STATIC_ROOT = tmp_path
response = client.get("/favicon.ico")
assert response.status_code == 404
def test_favicon_view_missing_file(client):
with override_settings(STATIC_ROOT=Path(tempfile.mkdtemp())):
response = client.get("/favicon.ico")
assert response.status_code == 404

View File

@@ -1,4 +1,4 @@
from pydantic import BaseModel
from llama_index.core.bridge.pydantic import BaseModel
class DocumentClassifierSchema(BaseModel):

View File

@@ -1,6 +1,10 @@
import logging
import sys
from llama_index.core import VectorStoreIndex
from llama_index.core.prompts import PromptTemplate
from llama_index.core.query_engine import RetrieverQueryEngine
from documents.models import Document
from paperless_ai.client import AIClient
from paperless_ai.indexing import load_or_build_index
@@ -10,21 +14,15 @@ logger = logging.getLogger("paperless_ai.chat")
MAX_SINGLE_DOC_CONTEXT_CHARS = 15000
SINGLE_DOC_SNIPPET_CHARS = 800
CHAT_PROMPT_TEMPLATE = """Context information is below.
CHAT_PROMPT_TMPL = PromptTemplate(
template="""Context information is below.
---------------------
{context_str}
---------------------
Given the context information and not prior knowledge, answer the query.
Query: {query_str}
Answer:"""
def _get_prompt_template():
from llama_index.core.prompts import PromptTemplate
return PromptTemplate(
template=CHAT_PROMPT_TEMPLATE,
)
Answer:""",
)
def stream_chat_with_documents(query_str: str, documents: list[Document]):
@@ -45,8 +43,6 @@ def stream_chat_with_documents(query_str: str, documents: list[Document]):
yield "Sorry, I couldn't find any content to answer your question."
return
from llama_index.core import VectorStoreIndex
local_index = VectorStoreIndex(nodes=nodes)
retriever = local_index.as_retriever(
similarity_top_k=3 if len(documents) == 1 else 5,
@@ -89,16 +85,10 @@ def stream_chat_with_documents(query_str: str, documents: list[Document]):
for node in top_nodes
)
prompt = (
_get_prompt_template()
.partial_format(
context_str=context,
query_str=query_str,
)
.format(llm=client.llm)
)
from llama_index.core.query_engine import RetrieverQueryEngine
prompt = CHAT_PROMPT_TMPL.partial_format(
context_str=context,
query_str=query_str,
).format(llm=client.llm)
query_engine = RetrieverQueryEngine.from_args(
retriever=retriever,

View File

@@ -1,10 +1,9 @@
import logging
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from llama_index.core.llms import ChatMessage
from llama_index.llms.ollama import Ollama
from llama_index.llms.openai import OpenAI
from llama_index.core.llms import ChatMessage
from llama_index.core.program.function_program import get_function_tool
from llama_index.llms.ollama import Ollama
from llama_index.llms.openai import OpenAI
from paperless.config import AIConfig
from paperless_ai.base_model import DocumentClassifierSchema
@@ -21,18 +20,14 @@ class AIClient:
self.settings = AIConfig()
self.llm = self.get_llm()
def get_llm(self) -> "Ollama | OpenAI":
def get_llm(self) -> Ollama | OpenAI:
if self.settings.llm_backend == "ollama":
from llama_index.llms.ollama import Ollama
return Ollama(
model=self.settings.llm_model or "llama3.1",
base_url=self.settings.llm_endpoint or "http://localhost:11434",
request_timeout=120,
)
elif self.settings.llm_backend == "openai":
from llama_index.llms.openai import OpenAI
return OpenAI(
model=self.settings.llm_model or "gpt-3.5-turbo",
api_base=self.settings.llm_endpoint or None,
@@ -48,9 +43,6 @@ class AIClient:
self.settings.llm_model,
)
from llama_index.core.llms import ChatMessage
from llama_index.core.program.function_program import get_function_tool
user_msg = ChatMessage(role="user", content=prompt)
tool = get_function_tool(DocumentClassifierSchema)
result = self.llm.chat_with_tools(
@@ -66,7 +58,7 @@ class AIClient:
parsed = DocumentClassifierSchema(**tool_calls[0].tool_kwargs)
return parsed.model_dump()
def run_chat(self, messages: list["ChatMessage"]) -> str:
def run_chat(self, messages: list[ChatMessage]) -> str:
logger.debug(
"Running chat query against %s with model %s",
self.settings.llm_backend,

View File

@@ -5,9 +5,9 @@ if TYPE_CHECKING:
from pathlib import Path
from django.conf import settings
if TYPE_CHECKING:
from llama_index.core.base.embeddings.base import BaseEmbedding
from llama_index.core.base.embeddings.base import BaseEmbedding
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.embeddings.openai import OpenAIEmbedding
from documents.models import Document
from documents.models import Note
@@ -15,21 +15,17 @@ from paperless.config import AIConfig
from paperless.models import LLMEmbeddingBackend
def get_embedding_model() -> "BaseEmbedding":
def get_embedding_model() -> BaseEmbedding:
config = AIConfig()
match config.llm_embedding_backend:
case LLMEmbeddingBackend.OPENAI:
from llama_index.embeddings.openai import OpenAIEmbedding
return OpenAIEmbedding(
model=config.llm_embedding_model or "text-embedding-3-small",
api_key=config.llm_api_key,
api_base=config.llm_endpoint or None,
)
case LLMEmbeddingBackend.HUGGINGFACE:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
return HuggingFaceEmbedding(
model_name=config.llm_embedding_model
or "sentence-transformers/all-MiniLM-L6-v2",

View File

@@ -4,12 +4,26 @@ from collections.abc import Callable
from collections.abc import Iterable
from datetime import timedelta
from pathlib import Path
from typing import TYPE_CHECKING
from typing import TypeVar
import faiss
import llama_index.core.settings as llama_settings
from celery import states
from django.conf import settings
from django.utils import timezone
from llama_index.core import Document as LlamaDocument
from llama_index.core import StorageContext
from llama_index.core import VectorStoreIndex
from llama_index.core import load_index_from_storage
from llama_index.core.indices.prompt_helper import PromptHelper
from llama_index.core.node_parser import SimpleNodeParser
from llama_index.core.prompts import PromptTemplate
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.schema import BaseNode
from llama_index.core.storage.docstore import SimpleDocumentStore
from llama_index.core.storage.index_store import SimpleIndexStore
from llama_index.core.text_splitter import TokenTextSplitter
from llama_index.vector_stores.faiss import FaissVectorStore
from documents.models import Document
from documents.models import PaperlessTask
@@ -17,11 +31,6 @@ from paperless_ai.embedding import build_llm_index_text
from paperless_ai.embedding import get_embedding_dim
from paperless_ai.embedding import get_embedding_model
if TYPE_CHECKING:
from llama_index.core import StorageContext
from llama_index.core import VectorStoreIndex
from llama_index.core.schema import BaseNode
_T = TypeVar("_T")
IterWrapper = Callable[[Iterable[_T]], Iterable[_T]]
@@ -56,17 +65,11 @@ def queue_llm_index_update_if_needed(*, rebuild: bool, reason: str) -> bool:
return True
def get_or_create_storage_context(*, rebuild=False) -> "StorageContext":
def get_or_create_storage_context(*, rebuild=False):
"""
Loads or creates the StorageContext (vector store, docstore, index store).
If rebuild=True, deletes and recreates everything.
"""
import faiss
from llama_index.core import StorageContext
from llama_index.core.storage.docstore import SimpleDocumentStore
from llama_index.core.storage.index_store import SimpleIndexStore
from llama_index.vector_stores.faiss import FaissVectorStore
if rebuild:
shutil.rmtree(settings.LLM_INDEX_DIR, ignore_errors=True)
settings.LLM_INDEX_DIR.mkdir(parents=True, exist_ok=True)
@@ -90,7 +93,7 @@ def get_or_create_storage_context(*, rebuild=False) -> "StorageContext":
)
def build_document_node(document: Document) -> list["BaseNode"]:
def build_document_node(document: Document) -> list[BaseNode]:
"""
Given a Document, returns parsed Nodes ready for indexing.
"""
@@ -109,23 +112,16 @@ def build_document_node(document: Document) -> list["BaseNode"]:
"added": document.added.isoformat() if document.added else None,
"modified": document.modified.isoformat(),
}
from llama_index.core import Document as LlamaDocument
from llama_index.core.node_parser import SimpleNodeParser
doc = LlamaDocument(text=text, metadata=metadata)
parser = SimpleNodeParser()
return parser.get_nodes_from_documents([doc])
def load_or_build_index(nodes=None) -> "VectorStoreIndex":
def load_or_build_index(nodes=None):
"""
Load an existing VectorStoreIndex if present,
or build a new one using provided nodes if storage is empty.
"""
import llama_index.core.settings as llama_settings
from llama_index.core import VectorStoreIndex
from llama_index.core import load_index_from_storage
embed_model = get_embedding_model()
llama_settings.Settings.embed_model = embed_model
storage_context = get_or_create_storage_context()
@@ -147,7 +143,7 @@ def load_or_build_index(nodes=None) -> "VectorStoreIndex":
)
def remove_document_docstore_nodes(document: Document, index: "VectorStoreIndex"):
def remove_document_docstore_nodes(document: Document, index: VectorStoreIndex):
"""
Removes existing documents from docstore for a given document from the index.
This is necessary because FAISS IndexFlatL2 is append-only.
@@ -187,9 +183,6 @@ def update_llm_index(
return msg
if rebuild or not vector_store_file_exists():
import llama_index.core.settings as llama_settings
from llama_index.core import VectorStoreIndex
# remove meta.json to force re-detection of embedding dim
(settings.LLM_INDEX_DIR / "meta.json").unlink(missing_ok=True)
# Rebuild index from scratch
@@ -278,10 +271,6 @@ def llm_index_remove_document(document: Document):
def truncate_content(content: str) -> str:
from llama_index.core.indices.prompt_helper import PromptHelper
from llama_index.core.prompts import PromptTemplate
from llama_index.core.text_splitter import TokenTextSplitter
prompt_helper = PromptHelper(
context_window=8192,
num_output=512,
@@ -326,8 +315,6 @@ def query_similar_documents(
else None
)
from llama_index.core.retrievers import VectorIndexRetriever
retriever = VectorIndexRetriever(
index=index,
similarity_top_k=top_k,

View File

@@ -45,7 +45,7 @@ def test_stream_chat_with_one_document_full_content(mock_document) -> None:
patch("paperless_ai.chat.AIClient") as mock_client_cls,
patch("paperless_ai.chat.load_or_build_index") as mock_load_index,
patch(
"llama_index.core.query_engine.RetrieverQueryEngine.from_args",
"paperless_ai.chat.RetrieverQueryEngine.from_args",
) as mock_query_engine_cls,
):
mock_client = MagicMock()
@@ -76,7 +76,7 @@ def test_stream_chat_with_multiple_documents_retrieval(patch_embed_nodes) -> Non
patch("paperless_ai.chat.AIClient") as mock_client_cls,
patch("paperless_ai.chat.load_or_build_index") as mock_load_index,
patch(
"llama_index.core.query_engine.RetrieverQueryEngine.from_args",
"paperless_ai.chat.RetrieverQueryEngine.from_args",
) as mock_query_engine_cls,
patch.object(VectorStoreIndex, "as_retriever") as mock_as_retriever,
):

View File

@@ -18,13 +18,13 @@ def mock_ai_config():
@pytest.fixture
def mock_ollama_llm():
with patch("llama_index.llms.ollama.Ollama") as MockOllama:
with patch("paperless_ai.client.Ollama") as MockOllama:
yield MockOllama
@pytest.fixture
def mock_openai_llm():
with patch("llama_index.llms.openai.OpenAI") as MockOpenAI:
with patch("paperless_ai.client.OpenAI") as MockOpenAI:
yield MockOpenAI

View File

@@ -67,7 +67,7 @@ def test_get_embedding_model_openai(mock_ai_config):
mock_ai_config.return_value.llm_api_key = "test_api_key"
mock_ai_config.return_value.llm_endpoint = "http://test-url"
with patch("llama_index.embeddings.openai.OpenAIEmbedding") as MockOpenAIEmbedding:
with patch("paperless_ai.embedding.OpenAIEmbedding") as MockOpenAIEmbedding:
model = get_embedding_model()
MockOpenAIEmbedding.assert_called_once_with(
model="text-embedding-3-small",
@@ -84,7 +84,7 @@ def test_get_embedding_model_huggingface(mock_ai_config):
)
with patch(
"llama_index.embeddings.huggingface.HuggingFaceEmbedding",
"paperless_ai.embedding.HuggingFaceEmbedding",
) as MockHuggingFaceEmbedding:
model = get_embedding_model()
MockHuggingFaceEmbedding.assert_called_once_with(

115
uv.lock generated
View File

@@ -831,6 +831,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/2d/82/e5d2c1c67d19841e9edc74954c827444ae826978499bde3dfc1d007c8c11/deepmerge-2.0-py3-none-any.whl", hash = "sha256:6de9ce507115cff0bed95ff0ce9ecc31088ef50cbdf09bc90a09349a318b3d00", size = 13475, upload-time = "2024-08-30T05:31:48.659Z" },
]
[[package]]
name = "defusedxml"
version = "0.7.1"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/0f/d5/c66da9b79e5bdb124974bfe172b4daf3c984ebd9c2a06e2b8a4dc7331c72/defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69", size = 75520, upload-time = "2021-03-08T10:59:26.269Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/07/6c/aa3f2f849e01cb6a001cd8554a88d4c77c5c1a31c95bdf1cf9301e6d9ef4/defusedxml-0.7.1-py2.py3-none-any.whl", hash = "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61", size = 25604, upload-time = "2021-03-08T10:59:24.45Z" },
]
[[package]]
name = "deprecated"
version = "1.3.1"
@@ -1283,6 +1292,59 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/a6/ff/ee2f67c0ff146ec98b5df1df637b2bc2d17beeb05df9f427a67bd7a7d79c/flower-2.0.1-py2.py3-none-any.whl", hash = "sha256:9db2c621eeefbc844c8dd88be64aef61e84e2deb29b271e02ab2b5b9f01068e2", size = 383553, upload-time = "2023-08-13T14:37:41.552Z" },
]
[[package]]
name = "fonttools"
version = "4.61.1"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/ec/ca/cf17b88a8df95691275a3d77dc0a5ad9907f328ae53acbe6795da1b2f5ed/fonttools-4.61.1.tar.gz", hash = "sha256:6675329885c44657f826ef01d9e4fb33b9158e9d93c537d84ad8399539bc6f69", size = 3565756, upload-time = "2025-12-12T17:31:24.246Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/69/12/bf9f4eaa2fad039356cc627587e30ed008c03f1cebd3034376b5ee8d1d44/fonttools-4.61.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:c6604b735bb12fef8e0efd5578c9fb5d3d8532d5001ea13a19cddf295673ee09", size = 2852213, upload-time = "2025-12-12T17:29:46.675Z" },
{ url = "https://files.pythonhosted.org/packages/ac/49/4138d1acb6261499bedde1c07f8c2605d1d8f9d77a151e5507fd3ef084b6/fonttools-4.61.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5ce02f38a754f207f2f06557523cd39a06438ba3aafc0639c477ac409fc64e37", size = 2401689, upload-time = "2025-12-12T17:29:48.769Z" },
{ url = "https://files.pythonhosted.org/packages/e5/fe/e6ce0fe20a40e03aef906af60aa87668696f9e4802fa283627d0b5ed777f/fonttools-4.61.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:77efb033d8d7ff233385f30c62c7c79271c8885d5c9657d967ede124671bbdfb", size = 5058809, upload-time = "2025-12-12T17:29:51.701Z" },
{ url = "https://files.pythonhosted.org/packages/79/61/1ca198af22f7dd22c17ab86e9024ed3c06299cfdb08170640e9996d501a0/fonttools-4.61.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:75c1a6dfac6abd407634420c93864a1e274ebc1c7531346d9254c0d8f6ca00f9", size = 5036039, upload-time = "2025-12-12T17:29:53.659Z" },
{ url = "https://files.pythonhosted.org/packages/99/cc/fa1801e408586b5fce4da9f5455af8d770f4fc57391cd5da7256bb364d38/fonttools-4.61.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:0de30bfe7745c0d1ffa2b0b7048fb7123ad0d71107e10ee090fa0b16b9452e87", size = 5034714, upload-time = "2025-12-12T17:29:55.592Z" },
{ url = "https://files.pythonhosted.org/packages/bf/aa/b7aeafe65adb1b0a925f8f25725e09f078c635bc22754f3fecb7456955b0/fonttools-4.61.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:58b0ee0ab5b1fc9921eccfe11d1435added19d6494dde14e323f25ad2bc30c56", size = 5158648, upload-time = "2025-12-12T17:29:57.861Z" },
{ url = "https://files.pythonhosted.org/packages/6f/16/7decaa24a1bd3a70c607b2e29f0adc6159f36a7e40eaba59846414765fd4/fonttools-4.61.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:f3cb4a569029b9f291f88aafc927dd53683757e640081ca8c412781ea144565e", size = 2851593, upload-time = "2025-12-12T17:30:04.225Z" },
{ url = "https://files.pythonhosted.org/packages/94/98/3c4cb97c64713a8cf499b3245c3bf9a2b8fd16a3e375feff2aed78f96259/fonttools-4.61.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:41a7170d042e8c0024703ed13b71893519a1a6d6e18e933e3ec7507a2c26a4b2", size = 2400231, upload-time = "2025-12-12T17:30:06.47Z" },
{ url = "https://files.pythonhosted.org/packages/b7/37/82dbef0f6342eb01f54bca073ac1498433d6ce71e50c3c3282b655733b31/fonttools-4.61.1-cp312-cp312-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:10d88e55330e092940584774ee5e8a6971b01fc2f4d3466a1d6c158230880796", size = 4954103, upload-time = "2025-12-12T17:30:08.432Z" },
{ url = "https://files.pythonhosted.org/packages/6c/44/f3aeac0fa98e7ad527f479e161aca6c3a1e47bb6996b053d45226fe37bf2/fonttools-4.61.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:15acc09befd16a0fb8a8f62bc147e1a82817542d72184acca9ce6e0aeda9fa6d", size = 5004295, upload-time = "2025-12-12T17:30:10.56Z" },
{ url = "https://files.pythonhosted.org/packages/14/e8/7424ced75473983b964d09f6747fa09f054a6d656f60e9ac9324cf40c743/fonttools-4.61.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e6bcdf33aec38d16508ce61fd81838f24c83c90a1d1b8c68982857038673d6b8", size = 4944109, upload-time = "2025-12-12T17:30:12.874Z" },
{ url = "https://files.pythonhosted.org/packages/c8/8b/6391b257fa3d0b553d73e778f953a2f0154292a7a7a085e2374b111e5410/fonttools-4.61.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:5fade934607a523614726119164ff621e8c30e8fa1ffffbbd358662056ba69f0", size = 5093598, upload-time = "2025-12-12T17:30:15.79Z" },
{ url = "https://files.pythonhosted.org/packages/4b/cf/00ba28b0990982530addb8dc3e9e6f2fa9cb5c20df2abdda7baa755e8fe1/fonttools-4.61.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:8c56c488ab471628ff3bfa80964372fc13504ece601e0d97a78ee74126b2045c", size = 2846454, upload-time = "2025-12-12T17:30:24.938Z" },
{ url = "https://files.pythonhosted.org/packages/5a/ca/468c9a8446a2103ae645d14fee3f610567b7042aba85031c1c65e3ef7471/fonttools-4.61.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:dc492779501fa723b04d0ab1f5be046797fee17d27700476edc7ee9ae535a61e", size = 2398191, upload-time = "2025-12-12T17:30:27.343Z" },
{ url = "https://files.pythonhosted.org/packages/a3/4b/d67eedaed19def5967fade3297fed8161b25ba94699efc124b14fb68cdbc/fonttools-4.61.1-cp313-cp313-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:64102ca87e84261419c3747a0d20f396eb024bdbeb04c2bfb37e2891f5fadcb5", size = 4928410, upload-time = "2025-12-12T17:30:29.771Z" },
{ url = "https://files.pythonhosted.org/packages/b0/8d/6fb3494dfe61a46258cd93d979cf4725ded4eb46c2a4ca35e4490d84daea/fonttools-4.61.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4c1b526c8d3f615a7b1867f38a9410849c8f4aef078535742198e942fba0e9bd", size = 4984460, upload-time = "2025-12-12T17:30:32.073Z" },
{ url = "https://files.pythonhosted.org/packages/f7/f1/a47f1d30b3dc00d75e7af762652d4cbc3dff5c2697a0dbd5203c81afd9c3/fonttools-4.61.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:41ed4b5ec103bd306bb68f81dc166e77409e5209443e5773cb4ed837bcc9b0d3", size = 4925800, upload-time = "2025-12-12T17:30:34.339Z" },
{ url = "https://files.pythonhosted.org/packages/a7/01/e6ae64a0981076e8a66906fab01539799546181e32a37a0257b77e4aa88b/fonttools-4.61.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b501c862d4901792adaec7c25b1ecc749e2662543f68bb194c42ba18d6eec98d", size = 5067859, upload-time = "2025-12-12T17:30:36.593Z" },
{ url = "https://files.pythonhosted.org/packages/32/8f/4e7bf82c0cbb738d3c2206c920ca34ca74ef9dabde779030145d28665104/fonttools-4.61.1-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:fff4f534200a04b4a36e7ae3cb74493afe807b517a09e99cb4faa89a34ed6ecd", size = 2846094, upload-time = "2025-12-12T17:30:43.511Z" },
{ url = "https://files.pythonhosted.org/packages/71/09/d44e45d0a4f3a651f23a1e9d42de43bc643cce2971b19e784cc67d823676/fonttools-4.61.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:d9203500f7c63545b4ce3799319fe4d9feb1a1b89b28d3cb5abd11b9dd64147e", size = 2396589, upload-time = "2025-12-12T17:30:45.681Z" },
{ url = "https://files.pythonhosted.org/packages/89/18/58c64cafcf8eb677a99ef593121f719e6dcbdb7d1c594ae5a10d4997ca8a/fonttools-4.61.1-cp314-cp314-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:fa646ecec9528bef693415c79a86e733c70a4965dd938e9a226b0fc64c9d2e6c", size = 4877892, upload-time = "2025-12-12T17:30:47.709Z" },
{ url = "https://files.pythonhosted.org/packages/8a/ec/9e6b38c7ba1e09eb51db849d5450f4c05b7e78481f662c3b79dbde6f3d04/fonttools-4.61.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:11f35ad7805edba3aac1a3710d104592df59f4b957e30108ae0ba6c10b11dd75", size = 4972884, upload-time = "2025-12-12T17:30:49.656Z" },
{ url = "https://files.pythonhosted.org/packages/5e/87/b5339da8e0256734ba0dbbf5b6cdebb1dd79b01dc8c270989b7bcd465541/fonttools-4.61.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:b931ae8f62db78861b0ff1ac017851764602288575d65b8e8ff1963fed419063", size = 4924405, upload-time = "2025-12-12T17:30:51.735Z" },
{ url = "https://files.pythonhosted.org/packages/0b/47/e3409f1e1e69c073a3a6fd8cb886eb18c0bae0ee13db2c8d5e7f8495e8b7/fonttools-4.61.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:b148b56f5de675ee16d45e769e69f87623a4944f7443850bf9a9376e628a89d2", size = 5035553, upload-time = "2025-12-12T17:30:54.823Z" },
{ url = "https://files.pythonhosted.org/packages/39/5c/908ad78e46c61c3e3ed70c3b58ff82ab48437faf84ec84f109592cabbd9f/fonttools-4.61.1-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:e76ce097e3c57c4bcb67c5aa24a0ecdbd9f74ea9219997a707a4061fbe2707aa", size = 2929571, upload-time = "2025-12-12T17:31:02.574Z" },
{ url = "https://files.pythonhosted.org/packages/bd/41/975804132c6dea64cdbfbaa59f3518a21c137a10cccf962805b301ac6ab2/fonttools-4.61.1-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:9cfef3ab326780c04d6646f68d4b4742aae222e8b8ea1d627c74e38afcbc9d91", size = 2435317, upload-time = "2025-12-12T17:31:04.974Z" },
{ url = "https://files.pythonhosted.org/packages/b0/5a/aef2a0a8daf1ebaae4cfd83f84186d4a72ee08fd6a8451289fcd03ffa8a4/fonttools-4.61.1-cp314-cp314t-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:a75c301f96db737e1c5ed5fd7d77d9c34466de16095a266509e13da09751bd19", size = 4882124, upload-time = "2025-12-12T17:31:07.456Z" },
{ url = "https://files.pythonhosted.org/packages/80/33/d6db3485b645b81cea538c9d1c9219d5805f0877fda18777add4671c5240/fonttools-4.61.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:91669ccac46bbc1d09e9273546181919064e8df73488ea087dcac3e2968df9ba", size = 5100391, upload-time = "2025-12-12T17:31:09.732Z" },
{ url = "https://files.pythonhosted.org/packages/6c/d6/675ba631454043c75fcf76f0ca5463eac8eb0666ea1d7badae5fea001155/fonttools-4.61.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:c33ab3ca9d3ccd581d58e989d67554e42d8d4ded94ab3ade3508455fe70e65f7", size = 4978800, upload-time = "2025-12-12T17:31:11.681Z" },
{ url = "https://files.pythonhosted.org/packages/7f/33/d3ec753d547a8d2bdaedd390d4a814e8d5b45a093d558f025c6b990b554c/fonttools-4.61.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:664c5a68ec406f6b1547946683008576ef8b38275608e1cee6c061828171c118", size = 5006426, upload-time = "2025-12-12T17:31:13.764Z" },
{ url = "https://files.pythonhosted.org/packages/c7/4e/ce75a57ff3aebf6fc1f4e9d508b8e5810618a33d900ad6c19eb30b290b97/fonttools-4.61.1-py3-none-any.whl", hash = "sha256:17d2bf5d541add43822bcf0c43d7d847b160c9bb01d15d5007d84e2217aaa371", size = 1148996, upload-time = "2025-12-12T17:31:21.03Z" },
]
[[package]]
name = "fpdf2"
version = "2.8.7"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "defusedxml", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "fonttools", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "pillow", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
]
sdist = { url = "https://files.pythonhosted.org/packages/27/f2/72feae0b2827ed38013e4307b14f95bf0b3d124adfef4d38a7d57533f7be/fpdf2-2.8.7.tar.gz", hash = "sha256:7060ccee5a9c7ab0a271fb765a36a23639f83ef8996c34e3d46af0a17ede57f9", size = 362351, upload-time = "2026-02-28T05:39:16.456Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/66/0a/cf50ecffa1e3747ed9380a3adfc829259f1f86b3fdbd9e505af789003141/fpdf2-2.8.7-py3-none-any.whl", hash = "sha256:d391fc508a3ce02fc43a577c830cda4fe6f37646f2d143d489839940932fbc19", size = 327056, upload-time = "2026-02-28T05:39:14.619Z" },
]
[[package]]
name = "frozenlist"
version = "1.8.0"
@@ -2655,10 +2717,11 @@ wheels = [
[[package]]
name = "ocrmypdf"
version = "16.13.0"
version = "17.3.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "deprecation", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "fpdf2", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "img2pdf", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "packaging", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "pdfminer-six", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
@@ -2666,11 +2729,14 @@ dependencies = [
{ name = "pikepdf", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "pillow", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "pluggy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "pydantic", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "pypdfium2", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "rich", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "uharfbuzz", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
]
sdist = { url = "https://files.pythonhosted.org/packages/8c/52/be1aaece0703a736757d8957c0d4f19c37561054169b501eb0e7132f15e5/ocrmypdf-16.13.0.tar.gz", hash = "sha256:29d37e915234ce717374863a9cc5dd32d29e063dfe60c51380dda71254c88248", size = 7042247, upload-time = "2025-12-24T07:58:35.86Z" }
sdist = { url = "https://files.pythonhosted.org/packages/fa/fe/60bdc79529be1ad8b151d426ed2020d5ac90328c54e9ba92bd808e1535c1/ocrmypdf-17.3.0.tar.gz", hash = "sha256:4022f13aad3f405e330056a07aa8bd63714b48b414693831b56e2cf2c325f52d", size = 7378015, upload-time = "2026-02-21T09:30:07.207Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/41/b1/e2e7ad98de0d3ee05b44dbc3f78ccb158a620f3add82d00c85490120e7f2/ocrmypdf-16.13.0-py3-none-any.whl", hash = "sha256:fad8a6f7cc52cdc6225095c401a1766c778c47efe9f1e854ae4dc64a550a3d37", size = 165377, upload-time = "2025-12-24T07:58:33.925Z" },
{ url = "https://files.pythonhosted.org/packages/3d/b1/b7ae057a1bcb1495067ee3c4d48c1ce5fc66addd9492307c5a0ff799a7f2/ocrmypdf-17.3.0-py3-none-any.whl", hash = "sha256:c8882e7864954d3db6bcee49cc9f261b65bff66b7e5925eb68a1c281f41cad23", size = 488130, upload-time = "2026-02-21T09:30:05.236Z" },
]
[[package]]
@@ -2909,7 +2975,7 @@ requires-dist = [
{ name = "llama-index-vector-stores-faiss", specifier = ">=0.5.2" },
{ name = "mysqlclient", marker = "extra == 'mariadb'", specifier = "~=2.2.7" },
{ name = "nltk", specifier = "~=3.9.1" },
{ name = "ocrmypdf", specifier = "~=16.13.0" },
{ name = "ocrmypdf", specifier = "~=17.3.0" },
{ name = "openai", specifier = ">=1.76" },
{ name = "pathvalidate", specifier = "~=3.3.1" },
{ name = "pdf2image", specifier = "~=1.17.0" },
@@ -3586,6 +3652,30 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/d1/81/ef2b1dfd1862567d573a4fdbc9f969067621764fbb74338496840a1d2977/pyopenssl-25.3.0-py3-none-any.whl", hash = "sha256:1fda6fc034d5e3d179d39e59c1895c9faeaf40a79de5fc4cbbfbe0d36f4a77b6", size = 57268, upload-time = "2025-09-17T00:32:19.474Z" },
]
[[package]]
name = "pypdfium2"
version = "5.5.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/fb/f6/42f5f1b9beb7e036f5532832b9c590fd107c52a78f704302c03bc6793954/pypdfium2-5.5.0.tar.gz", hash = "sha256:3283c61f54c3c546d140da201ef48a51c18b0ad54293091a010029ac13ece23a", size = 270502, upload-time = "2026-02-18T23:22:37.643Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/bc/a9/379ec56c4481f39f0e37a7ce42f4844e6ddd7662571922e2b348105960ab/pypdfium2-5.5.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:0770bd3f0be5c68443fc4017e43b1b1fe8f36877481cab70fd29b68b2c362e1b", size = 2815036, upload-time = "2026-02-18T23:22:00.288Z" },
{ url = "https://files.pythonhosted.org/packages/91/a4/b0cc01aaae1fdf1ca4e080cc55bb432f5a2234f33209a602bc498a47850d/pypdfium2-5.5.0-py3-none-macosx_11_0_x86_64.whl", hash = "sha256:5ab41a3b9953d9be44be35c36a2340f1d67c602db98a0d6f70006610871ae43a", size = 2948686, upload-time = "2026-02-18T23:22:02.213Z" },
{ url = "https://files.pythonhosted.org/packages/26/99/25a0c71b551d100b505c618910afec0df402b230e087078c8078f8b1fcff/pypdfium2-5.5.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2492a22c3126a004cee2fa208ea4aa03ede2c7e205d05814934ab18f83d073e9", size = 2977311, upload-time = "2026-02-18T23:22:03.603Z" },
{ url = "https://files.pythonhosted.org/packages/85/64/691e21539566f7a0521295948b5589d2fdfe3df5acab9c29ff410633a839/pypdfium2-5.5.0-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:83ff93e08b1fadb00040564e2eccc99147fc1a632ba5daff745126b373d78446", size = 2762449, upload-time = "2026-02-18T23:22:05.044Z" },
{ url = "https://files.pythonhosted.org/packages/74/b1/9af288557291e2964bf5ffd460b7ed1090fcb8c54addfd6c7c5deb9ba7c7/pypdfium2-5.5.0-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b7e85de3332bedf8e5f157c248063b4eaf968660e1e490353b6e581d9f96a4c6", size = 3074851, upload-time = "2026-02-18T23:22:07.431Z" },
{ url = "https://files.pythonhosted.org/packages/a4/1e/c61fddbdea5ea1ba478dc7ecc9d68069d17b858e5fed04e4e071811f0858/pypdfium2-5.5.0-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e258365f34b6e334bb415e44dd9b1ee78a6e525bf854a1e74af67af7ede7555b", size = 3423003, upload-time = "2026-02-18T23:22:09.749Z" },
{ url = "https://files.pythonhosted.org/packages/36/5f/d2eb58c54abba3a6c3bc4c297b3a11348dd4b4deb073f1aa8a872a298278/pypdfium2-5.5.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bec21d833404ca771f02fa5cefb0b73e2148f05cbdb3b5b9989bdd51d9b5cbac", size = 3002104, upload-time = "2026-02-18T23:22:12.035Z" },
{ url = "https://files.pythonhosted.org/packages/1c/33/87423eec4f5d4287d5a1726dbb9f06fb1f1aebc38ff75dcff817c492769d/pypdfium2-5.5.0-py3-none-manylinux_2_27_s390x.manylinux_2_28_s390x.whl", hash = "sha256:1dd6ccbe1b5e2e778e8b021e47f9485b4fd42eaa6c9bdda2631641724e1fcc04", size = 3097209, upload-time = "2026-02-18T23:22:13.809Z" },
{ url = "https://files.pythonhosted.org/packages/97/0a/a3fd71f00838bba7922691107219bee67f50fbda6d12df330ef485a97848/pypdfium2-5.5.0-py3-none-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:da3eada345570cec5e34872d1472d4ac542f0e650ccdb6c2eac08ae1a5f07c82", size = 2965027, upload-time = "2026-02-18T23:22:16.324Z" },
{ url = "https://files.pythonhosted.org/packages/75/4a/2181260bd8a0b1b30ac50b7fd6ee3366e04f3a9f1c29351d882652da7fa7/pypdfium2-5.5.0-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:a087fb4088c7433fd3d78833dbe42cfb66df3d5ac98e3edf66110520fb33c0f0", size = 4131431, upload-time = "2026-02-18T23:22:18.469Z" },
{ url = "https://files.pythonhosted.org/packages/15/bb/3ccf481191346eda11c0c208bd4e46f8de019ae7d9e9c1b660633f0bb3f4/pypdfium2-5.5.0-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:e6418cdc500ef85a90319f9bc7f1c54fc133460379f509429403225d8a4c157f", size = 3747468, upload-time = "2026-02-18T23:22:20.679Z" },
{ url = "https://files.pythonhosted.org/packages/15/51/17e50ec72cf2235ac18d9cbe907859501c769d3e964818fefac6a3e10727/pypdfium2-5.5.0-py3-none-musllinux_1_2_i686.whl", hash = "sha256:8f7b66eedfac26eb2df4b00936e081b0a1c76fb8ee1c12639d85c2e73b0769ef", size = 4337579, upload-time = "2026-02-18T23:22:23.245Z" },
{ url = "https://files.pythonhosted.org/packages/c6/e4/f9bdf06f4d3f1e56eff9d997392a00a4b66cbc9c20f33934c4edc2a7943f/pypdfium2-5.5.0-py3-none-musllinux_1_2_ppc64le.whl", hash = "sha256:faea3246591ce2ea6218cd06679071275e3c65f11c3f5c9091eb7fb07610af6a", size = 4376104, upload-time = "2026-02-18T23:22:25.337Z" },
{ url = "https://files.pythonhosted.org/packages/8c/20/06baf1f5d494e035f50fc895fa1da5ed652d03ecc59aeb3aabb0daa5adfc/pypdfium2-5.5.0-py3-none-musllinux_1_2_riscv64.whl", hash = "sha256:aba26d404b51a9de3d3e80c867a95c71abf1c79552001ae22707451e59186b3d", size = 3929824, upload-time = "2026-02-18T23:22:26.889Z" },
{ url = "https://files.pythonhosted.org/packages/3a/01/28940e54e6936674e9a05eb58ccce7c54d8e2ac81cd84ec0b76e7d32a010/pypdfium2-5.5.0-py3-none-musllinux_1_2_s390x.whl", hash = "sha256:e0fa8f81679e6e71f26806f4db853571ee6435dc3bde7a46acdd182ef886a5b9", size = 4270200, upload-time = "2026-02-18T23:22:28.668Z" },
{ url = "https://files.pythonhosted.org/packages/cb/d4/1f36c505a3770aad9a88c895a46d61fd4c0535f79548f02c93b97ff89604/pypdfium2-5.5.0-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:ee22df3376d350eeb64d2002a1071e3a02c0d874c557a3cd8229a8fc572cdaac", size = 4180794, upload-time = "2026-02-18T23:22:30.11Z" },
]
[[package]]
name = "pyrefly"
version = "0.54.0"
@@ -5072,6 +5162,23 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/b1/5e/512aeb40fd819f4660d00f96f5c7371ee36fc8c6b605128c5ee59e0b28c6/u_msgpack_python-2.8.0-py2.py3-none-any.whl", hash = "sha256:1d853d33e78b72c4228a2025b4db28cda81214076e5b0422ed0ae1b1b2bb586a", size = 10590, upload-time = "2023-05-18T09:28:10.323Z" },
]
[[package]]
name = "uharfbuzz"
version = "0.53.3"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/1c/8d/7c82298bfa5c96f018541661bc2ccdf90dfe397bb2724db46725bf495466/uharfbuzz-0.53.3.tar.gz", hash = "sha256:9a87175c14d1361322ce2a3504e63c6b66062934a5edf47266aed5b33416806c", size = 1714488, upload-time = "2026-01-24T13:10:43.693Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/51/88/5df9337adb60d7b1ad150b162bbc5c56d783d15546714085d92b9531f8f3/uharfbuzz-0.53.3-cp310-abi3-macosx_10_9_universal2.whl", hash = "sha256:d977e41a501d9e8af3f2c329d75031037ee79634bc29ca3872e9115c44e67d25", size = 2722639, upload-time = "2026-01-24T13:10:22.436Z" },
{ url = "https://files.pythonhosted.org/packages/39/c4/8b4b050e77d6cb9a84af509e5796734f0e687bd02ad11757a581bd6f197d/uharfbuzz-0.53.3-cp310-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:21d512c94aa992691aaf5b433deaca7e51f4ea54c68b99f535974073364f806f", size = 1647506, upload-time = "2026-01-24T13:10:24.16Z" },
{ url = "https://files.pythonhosted.org/packages/30/ff/8e7cf78d525604f3e0a43b9468263fcf2acb5d208a3979c3bfa8dc61112d/uharfbuzz-0.53.3-cp310-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dca9a2e071c0c59ba8f382356f31a2518ac3dc7cc77e4f3519defc454c5b9a97", size = 1706448, upload-time = "2026-01-24T13:10:25.729Z" },
{ url = "https://files.pythonhosted.org/packages/9b/a0/739471cdd52723ecc9fc80f36fb92c706a87265dc258521c1b14d99414f7/uharfbuzz-0.53.3-cp310-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:1191a74ddcf18ec721161b6b33a8ab31b0c6a2b15c6724a9b663127bf7f07d2e", size = 2664628, upload-time = "2026-01-24T13:10:27.814Z" },
{ url = "https://files.pythonhosted.org/packages/ae/4a/63a81e9eef922b9f26bd948b518b73704d01a8d8e83324b2f99084ab7af0/uharfbuzz-0.53.3-cp310-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:35ec3b600b3f63e7659792f9bd43e1ffb389d3d2aac8285f269d11efbe04787d", size = 2757384, upload-time = "2026-01-24T13:10:29.669Z" },
{ url = "https://files.pythonhosted.org/packages/e2/d2/27be1201488323d0ff0c99fb966a0522b2736f79bd5a5b7b99526fca3d98/uharfbuzz-0.53.3-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:6f0ad2812303d2c7ccff596fd6c9d5629874f3a83f30255e11639c9b7ba4e89d", size = 1335822, upload-time = "2026-01-24T13:10:34.774Z" },
{ url = "https://files.pythonhosted.org/packages/70/99/53e39bcd4dec5981eb70a6a76285a862c8a76b80cd52e8f40fe51adab032/uharfbuzz-0.53.3-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:757d9ed1841912e8f229319f335cf7dd25a2fd377e444bda9deb720617192e12", size = 1237560, upload-time = "2026-01-24T13:10:36.971Z" },
{ url = "https://files.pythonhosted.org/packages/aa/2b/04d8cde466acfe70373d4f489da5c6eab0aba07d50442dd21217cb0fd167/uharfbuzz-0.53.3-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5d3a0b824811bd1be129356818e6cdbf0e4b056bb60aa9a5eb270bff9d21f24c", size = 1497923, upload-time = "2026-01-24T13:10:38.743Z" },
{ url = "https://files.pythonhosted.org/packages/f3/01/a250521491bc995609275e0062c552b16f437a3ce15de83250176245093e/uharfbuzz-0.53.3-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9211d798b2921a99b8c34e810676137f66372d3b5447765b72d969bdfa6abe6a", size = 1556794, upload-time = "2026-01-24T13:10:40.262Z" },
]
[[package]]
name = "ujson"
version = "5.11.0"