mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-03-07 09:41:22 +00:00
Compare commits
5 Commits
dependabot
...
chore/lazy
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
22e5ac58ba | ||
|
|
91ddda9256 | ||
|
|
9d5e618de8 | ||
|
|
50ae49c7da | ||
|
|
ba023ef332 |
12
.github/workflows/ci-docker.yml
vendored
12
.github/workflows/ci-docker.yml
vendored
@@ -149,15 +149,16 @@ jobs:
|
||||
mkdir -p /tmp/digests
|
||||
digest="${{ steps.build.outputs.digest }}"
|
||||
echo "digest=${digest}"
|
||||
touch "/tmp/digests/${digest#sha256:}"
|
||||
echo "${digest}" > "/tmp/digests/digest-${{ matrix.arch }}.txt"
|
||||
- name: Upload digest
|
||||
if: steps.check-push.outputs.should-push == 'true'
|
||||
uses: actions/upload-artifact@v7.0.0
|
||||
with:
|
||||
name: digests-${{ matrix.arch }}
|
||||
path: /tmp/digests/*
|
||||
path: /tmp/digests/digest-${{ matrix.arch }}.txt
|
||||
if-no-files-found: error
|
||||
retention-days: 1
|
||||
archive: false
|
||||
merge-and-push:
|
||||
name: Merge and Push Manifest
|
||||
runs-on: ubuntu-24.04
|
||||
@@ -171,7 +172,7 @@ jobs:
|
||||
uses: actions/download-artifact@v8.0.0
|
||||
with:
|
||||
path: /tmp/digests
|
||||
pattern: digests-*
|
||||
pattern: digest-*.txt
|
||||
merge-multiple: true
|
||||
- name: List digests
|
||||
run: |
|
||||
@@ -217,8 +218,9 @@ jobs:
|
||||
tags=$(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "${DOCKER_METADATA_OUTPUT_JSON}")
|
||||
|
||||
digests=""
|
||||
for digest in *; do
|
||||
digests+="${{ env.REGISTRY }}/${REPOSITORY}@sha256:${digest} "
|
||||
for digest_file in digest-*.txt; do
|
||||
digest=$(cat "${digest_file}")
|
||||
digests+="${{ env.REGISTRY }}/${REPOSITORY}@${digest} "
|
||||
done
|
||||
|
||||
echo "Creating manifest with tags: ${tags}"
|
||||
|
||||
17
.github/workflows/pr-bot.yml
vendored
17
.github/workflows/pr-bot.yml
vendored
@@ -2,13 +2,24 @@ name: PR Bot
|
||||
on:
|
||||
pull_request_target:
|
||||
types: [opened]
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: write
|
||||
jobs:
|
||||
anti-slop:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
issues: read
|
||||
pull-requests: write
|
||||
steps:
|
||||
- uses: peakoss/anti-slop@v0.2.1
|
||||
with:
|
||||
max-failures: 4
|
||||
failure-add-pr-labels: 'ai'
|
||||
pr-bot:
|
||||
name: Automated PR Bot
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: write
|
||||
steps:
|
||||
- name: Label PR by file path or branch name
|
||||
# see .github/labeler.yml for the labeler config
|
||||
|
||||
@@ -1,107 +1,100 @@
|
||||
from unittest import mock
|
||||
import logging
|
||||
|
||||
import pytest
|
||||
from allauth.account.adapter import get_adapter
|
||||
from allauth.core import context
|
||||
from allauth.socialaccount.adapter import get_adapter as get_social_adapter
|
||||
from django.conf import settings
|
||||
from django.contrib.auth.models import AnonymousUser
|
||||
from django.contrib.auth.models import Group
|
||||
from django.contrib.auth.models import User
|
||||
from django.forms import ValidationError
|
||||
from django.http import HttpRequest
|
||||
from django.test import TestCase
|
||||
from django.test import override_settings
|
||||
from django.urls import reverse
|
||||
from pytest_django.fixtures import SettingsWrapper
|
||||
from pytest_mock import MockerFixture
|
||||
from rest_framework.authtoken.models import Token
|
||||
|
||||
from paperless.adapter import DrfTokenStrategy
|
||||
|
||||
|
||||
class TestCustomAccountAdapter(TestCase):
|
||||
def test_is_open_for_signup(self) -> None:
|
||||
@pytest.mark.django_db
|
||||
class TestCustomAccountAdapter:
|
||||
def test_is_open_for_signup(self, settings: SettingsWrapper) -> None:
|
||||
adapter = get_adapter()
|
||||
|
||||
# With no accounts, signups should be allowed
|
||||
self.assertTrue(adapter.is_open_for_signup(None))
|
||||
assert adapter.is_open_for_signup(None)
|
||||
|
||||
User.objects.create_user("testuser")
|
||||
|
||||
# Test when ACCOUNT_ALLOW_SIGNUPS is True
|
||||
settings.ACCOUNT_ALLOW_SIGNUPS = True
|
||||
self.assertTrue(adapter.is_open_for_signup(None))
|
||||
assert adapter.is_open_for_signup(None)
|
||||
|
||||
# Test when ACCOUNT_ALLOW_SIGNUPS is False
|
||||
settings.ACCOUNT_ALLOW_SIGNUPS = False
|
||||
self.assertFalse(adapter.is_open_for_signup(None))
|
||||
assert not adapter.is_open_for_signup(None)
|
||||
|
||||
def test_is_safe_url(self) -> None:
|
||||
def test_is_safe_url(self, settings: SettingsWrapper) -> None:
|
||||
request = HttpRequest()
|
||||
request.get_host = mock.Mock(return_value="example.com")
|
||||
request.get_host = lambda: "example.com"
|
||||
with context.request_context(request):
|
||||
adapter = get_adapter()
|
||||
with override_settings(ALLOWED_HOSTS=["*"]):
|
||||
# True because request host is same
|
||||
url = "https://example.com"
|
||||
self.assertTrue(adapter.is_safe_url(url))
|
||||
|
||||
url = "https://evil.com"
|
||||
settings.ALLOWED_HOSTS = ["*"]
|
||||
# True because request host is same
|
||||
assert adapter.is_safe_url("https://example.com")
|
||||
# False despite wildcard because request host is different
|
||||
self.assertFalse(adapter.is_safe_url(url))
|
||||
assert not adapter.is_safe_url("https://evil.com")
|
||||
|
||||
settings.ALLOWED_HOSTS = ["example.com"]
|
||||
url = "https://example.com"
|
||||
# True because request host is same
|
||||
self.assertTrue(adapter.is_safe_url(url))
|
||||
assert adapter.is_safe_url("https://example.com")
|
||||
|
||||
settings.ALLOWED_HOSTS = ["*", "example.com"]
|
||||
url = "//evil.com"
|
||||
# False because request host is not in allowed hosts
|
||||
self.assertFalse(adapter.is_safe_url(url))
|
||||
assert not adapter.is_safe_url("//evil.com")
|
||||
|
||||
@mock.patch("allauth.core.internal.ratelimit.consume", return_value=True)
|
||||
def test_pre_authenticate(self, mock_consume) -> None:
|
||||
def test_pre_authenticate(
|
||||
self,
|
||||
settings: SettingsWrapper,
|
||||
mocker: MockerFixture,
|
||||
) -> None:
|
||||
mocker.patch("allauth.core.internal.ratelimit.consume", return_value=True)
|
||||
adapter = get_adapter()
|
||||
request = HttpRequest()
|
||||
request.get_host = mock.Mock(return_value="example.com")
|
||||
request.get_host = lambda: "example.com"
|
||||
|
||||
settings.DISABLE_REGULAR_LOGIN = False
|
||||
adapter.pre_authenticate(request)
|
||||
|
||||
settings.DISABLE_REGULAR_LOGIN = True
|
||||
with self.assertRaises(ValidationError):
|
||||
with pytest.raises(ValidationError):
|
||||
adapter.pre_authenticate(request)
|
||||
|
||||
def test_get_reset_password_from_key_url(self) -> None:
|
||||
def test_get_reset_password_from_key_url(self, settings: SettingsWrapper) -> None:
|
||||
request = HttpRequest()
|
||||
request.get_host = mock.Mock(return_value="foo.org")
|
||||
request.get_host = lambda: "foo.org"
|
||||
with context.request_context(request):
|
||||
adapter = get_adapter()
|
||||
|
||||
# Test when PAPERLESS_URL is None
|
||||
with override_settings(
|
||||
PAPERLESS_URL=None,
|
||||
ACCOUNT_DEFAULT_HTTP_PROTOCOL="https",
|
||||
):
|
||||
expected_url = f"https://foo.org{reverse('account_reset_password_from_key', kwargs={'uidb36': 'UID', 'key': 'KEY'})}"
|
||||
self.assertEqual(
|
||||
adapter.get_reset_password_from_key_url("UID-KEY"),
|
||||
expected_url,
|
||||
)
|
||||
settings.PAPERLESS_URL = None
|
||||
settings.ACCOUNT_DEFAULT_HTTP_PROTOCOL = "https"
|
||||
expected_url = f"https://foo.org{reverse('account_reset_password_from_key', kwargs={'uidb36': 'UID', 'key': 'KEY'})}"
|
||||
assert adapter.get_reset_password_from_key_url("UID-KEY") == expected_url
|
||||
|
||||
# Test when PAPERLESS_URL is not None
|
||||
with override_settings(PAPERLESS_URL="https://bar.com"):
|
||||
expected_url = f"https://bar.com{reverse('account_reset_password_from_key', kwargs={'uidb36': 'UID', 'key': 'KEY'})}"
|
||||
self.assertEqual(
|
||||
adapter.get_reset_password_from_key_url("UID-KEY"),
|
||||
expected_url,
|
||||
)
|
||||
settings.PAPERLESS_URL = "https://bar.com"
|
||||
expected_url = f"https://bar.com{reverse('account_reset_password_from_key', kwargs={'uidb36': 'UID', 'key': 'KEY'})}"
|
||||
assert adapter.get_reset_password_from_key_url("UID-KEY") == expected_url
|
||||
|
||||
@override_settings(ACCOUNT_DEFAULT_GROUPS=["group1", "group2"])
|
||||
def test_save_user_adds_groups(self) -> None:
|
||||
def test_save_user_adds_groups(
|
||||
self,
|
||||
settings: SettingsWrapper,
|
||||
mocker: MockerFixture,
|
||||
) -> None:
|
||||
settings.ACCOUNT_DEFAULT_GROUPS = ["group1", "group2"]
|
||||
Group.objects.create(name="group1")
|
||||
user = User.objects.create_user("testuser")
|
||||
adapter = get_adapter()
|
||||
form = mock.Mock(
|
||||
form = mocker.MagicMock(
|
||||
cleaned_data={
|
||||
"username": "testuser",
|
||||
"email": "user@example.com",
|
||||
@@ -110,88 +103,81 @@ class TestCustomAccountAdapter(TestCase):
|
||||
|
||||
user = adapter.save_user(HttpRequest(), user, form, commit=True)
|
||||
|
||||
self.assertEqual(user.groups.count(), 1)
|
||||
self.assertTrue(user.groups.filter(name="group1").exists())
|
||||
self.assertFalse(user.groups.filter(name="group2").exists())
|
||||
assert user.groups.count() == 1
|
||||
assert user.groups.filter(name="group1").exists()
|
||||
assert not user.groups.filter(name="group2").exists()
|
||||
|
||||
def test_fresh_install_save_creates_superuser(self) -> None:
|
||||
def test_fresh_install_save_creates_superuser(self, mocker: MockerFixture) -> None:
|
||||
adapter = get_adapter()
|
||||
form = mock.Mock(
|
||||
form = mocker.MagicMock(
|
||||
cleaned_data={
|
||||
"username": "testuser",
|
||||
"email": "user@paperless-ngx.com",
|
||||
},
|
||||
)
|
||||
user = adapter.save_user(HttpRequest(), User(), form, commit=True)
|
||||
self.assertTrue(user.is_superuser)
|
||||
assert user.is_superuser
|
||||
|
||||
# Next time, it should not create a superuser
|
||||
form = mock.Mock(
|
||||
form = mocker.MagicMock(
|
||||
cleaned_data={
|
||||
"username": "testuser2",
|
||||
"email": "user2@paperless-ngx.com",
|
||||
},
|
||||
)
|
||||
user2 = adapter.save_user(HttpRequest(), User(), form, commit=True)
|
||||
self.assertFalse(user2.is_superuser)
|
||||
assert not user2.is_superuser
|
||||
|
||||
|
||||
class TestCustomSocialAccountAdapter(TestCase):
|
||||
def test_is_open_for_signup(self) -> None:
|
||||
class TestCustomSocialAccountAdapter:
|
||||
@pytest.mark.django_db
|
||||
def test_is_open_for_signup(self, settings: SettingsWrapper) -> None:
|
||||
adapter = get_social_adapter()
|
||||
|
||||
# Test when SOCIALACCOUNT_ALLOW_SIGNUPS is True
|
||||
settings.SOCIALACCOUNT_ALLOW_SIGNUPS = True
|
||||
self.assertTrue(adapter.is_open_for_signup(None, None))
|
||||
assert adapter.is_open_for_signup(None, None)
|
||||
|
||||
# Test when SOCIALACCOUNT_ALLOW_SIGNUPS is False
|
||||
settings.SOCIALACCOUNT_ALLOW_SIGNUPS = False
|
||||
self.assertFalse(adapter.is_open_for_signup(None, None))
|
||||
assert not adapter.is_open_for_signup(None, None)
|
||||
|
||||
def test_get_connect_redirect_url(self) -> None:
|
||||
adapter = get_social_adapter()
|
||||
request = None
|
||||
socialaccount = None
|
||||
assert adapter.get_connect_redirect_url(None, None) == reverse("base")
|
||||
|
||||
# Test the default URL
|
||||
expected_url = reverse("base")
|
||||
self.assertEqual(
|
||||
adapter.get_connect_redirect_url(request, socialaccount),
|
||||
expected_url,
|
||||
)
|
||||
|
||||
@override_settings(SOCIAL_ACCOUNT_DEFAULT_GROUPS=["group1", "group2"])
|
||||
def test_save_user_adds_groups(self) -> None:
|
||||
@pytest.mark.django_db
|
||||
def test_save_user_adds_groups(
|
||||
self,
|
||||
settings: SettingsWrapper,
|
||||
mocker: MockerFixture,
|
||||
) -> None:
|
||||
settings.SOCIAL_ACCOUNT_DEFAULT_GROUPS = ["group1", "group2"]
|
||||
Group.objects.create(name="group1")
|
||||
adapter = get_social_adapter()
|
||||
request = HttpRequest()
|
||||
user = User.objects.create_user("testuser")
|
||||
sociallogin = mock.Mock(
|
||||
user=user,
|
||||
)
|
||||
sociallogin = mocker.MagicMock(user=user)
|
||||
|
||||
user = adapter.save_user(request, sociallogin, None)
|
||||
user = adapter.save_user(HttpRequest(), sociallogin, None)
|
||||
|
||||
self.assertEqual(user.groups.count(), 1)
|
||||
self.assertTrue(user.groups.filter(name="group1").exists())
|
||||
self.assertFalse(user.groups.filter(name="group2").exists())
|
||||
assert user.groups.count() == 1
|
||||
assert user.groups.filter(name="group1").exists()
|
||||
assert not user.groups.filter(name="group2").exists()
|
||||
|
||||
def test_error_logged_on_authentication_error(self) -> None:
|
||||
def test_error_logged_on_authentication_error(
|
||||
self,
|
||||
caplog: pytest.LogCaptureFixture,
|
||||
) -> None:
|
||||
adapter = get_social_adapter()
|
||||
request = HttpRequest()
|
||||
with self.assertLogs("paperless.auth", level="INFO") as log_cm:
|
||||
with caplog.at_level(logging.INFO, logger="paperless.auth"):
|
||||
adapter.on_authentication_error(
|
||||
request,
|
||||
HttpRequest(),
|
||||
provider="test-provider",
|
||||
error="Error",
|
||||
exception="Test authentication error",
|
||||
)
|
||||
self.assertTrue(
|
||||
any("Test authentication error" in message for message in log_cm.output),
|
||||
)
|
||||
assert any("Test authentication error" in msg for msg in caplog.messages)
|
||||
|
||||
|
||||
class TestDrfTokenStrategy(TestCase):
|
||||
@pytest.mark.django_db
|
||||
class TestDrfTokenStrategy:
|
||||
def test_create_access_token_creates_new_token(self) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
@@ -201,7 +187,6 @@ class TestDrfTokenStrategy(TestCase):
|
||||
THEN:
|
||||
- A new token is created and its key is returned
|
||||
"""
|
||||
|
||||
user = User.objects.create_user("testuser")
|
||||
request = HttpRequest()
|
||||
request.user = user
|
||||
@@ -209,13 +194,9 @@ class TestDrfTokenStrategy(TestCase):
|
||||
strategy = DrfTokenStrategy()
|
||||
token_key = strategy.create_access_token(request)
|
||||
|
||||
# Verify a token was created
|
||||
self.assertIsNotNone(token_key)
|
||||
self.assertTrue(Token.objects.filter(user=user).exists())
|
||||
|
||||
# Verify the returned key matches the created token
|
||||
token = Token.objects.get(user=user)
|
||||
self.assertEqual(token_key, token.key)
|
||||
assert token_key is not None
|
||||
assert Token.objects.filter(user=user).exists()
|
||||
assert token_key == Token.objects.get(user=user).key
|
||||
|
||||
def test_create_access_token_returns_existing_token(self) -> None:
|
||||
"""
|
||||
@@ -226,7 +207,6 @@ class TestDrfTokenStrategy(TestCase):
|
||||
THEN:
|
||||
- The same token key is returned (no new token created)
|
||||
"""
|
||||
|
||||
user = User.objects.create_user("testuser")
|
||||
existing_token = Token.objects.create(user=user)
|
||||
|
||||
@@ -236,11 +216,8 @@ class TestDrfTokenStrategy(TestCase):
|
||||
strategy = DrfTokenStrategy()
|
||||
token_key = strategy.create_access_token(request)
|
||||
|
||||
# Verify the existing token key is returned
|
||||
self.assertEqual(token_key, existing_token.key)
|
||||
|
||||
# Verify only one token exists (no duplicate created)
|
||||
self.assertEqual(Token.objects.filter(user=user).count(), 1)
|
||||
assert token_key == existing_token.key
|
||||
assert Token.objects.filter(user=user).count() == 1
|
||||
|
||||
def test_create_access_token_returns_none_for_unauthenticated_user(self) -> None:
|
||||
"""
|
||||
@@ -251,12 +228,11 @@ class TestDrfTokenStrategy(TestCase):
|
||||
THEN:
|
||||
- None is returned and no token is created
|
||||
"""
|
||||
|
||||
request = HttpRequest()
|
||||
request.user = AnonymousUser()
|
||||
|
||||
strategy = DrfTokenStrategy()
|
||||
token_key = strategy.create_access_token(request)
|
||||
|
||||
self.assertIsNone(token_key)
|
||||
self.assertEqual(Token.objects.count(), 0)
|
||||
assert token_key is None
|
||||
assert Token.objects.count() == 0
|
||||
|
||||
@@ -1,16 +1,15 @@
|
||||
import os
|
||||
from collections.abc import Callable
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from unittest import mock
|
||||
|
||||
import pytest
|
||||
from django.core.checks import Error
|
||||
from django.core.checks import Warning
|
||||
from django.test import TestCase
|
||||
from django.test import override_settings
|
||||
from pytest_django.fixtures import SettingsWrapper
|
||||
from pytest_mock import MockerFixture
|
||||
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
from documents.tests.utils import FileSystemAssertsMixin
|
||||
from paperless.checks import audit_log_check
|
||||
from paperless.checks import binaries_check
|
||||
from paperless.checks import check_deprecated_db_settings
|
||||
@@ -20,54 +19,84 @@ from paperless.checks import paths_check
|
||||
from paperless.checks import settings_values_check
|
||||
|
||||
|
||||
class TestChecks(DirectoriesMixin, TestCase):
|
||||
def test_binaries(self) -> None:
|
||||
self.assertEqual(binaries_check(None), [])
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class PaperlessTestDirs:
|
||||
data_dir: Path
|
||||
media_dir: Path
|
||||
consumption_dir: Path
|
||||
|
||||
@override_settings(CONVERT_BINARY="uuuhh")
|
||||
def test_binaries_fail(self) -> None:
|
||||
self.assertEqual(len(binaries_check(None)), 1)
|
||||
|
||||
def test_paths_check(self) -> None:
|
||||
self.assertEqual(paths_check(None), [])
|
||||
# TODO: consolidate with documents/tests/conftest.py PaperlessDirs/paperless_dirs
|
||||
# once the paperless and documents test suites are ready to share fixtures.
|
||||
@pytest.fixture()
|
||||
def directories(tmp_path: Path, settings: SettingsWrapper) -> PaperlessTestDirs:
|
||||
data_dir = tmp_path / "data"
|
||||
media_dir = tmp_path / "media"
|
||||
consumption_dir = tmp_path / "consumption"
|
||||
|
||||
@override_settings(
|
||||
MEDIA_ROOT=Path("uuh"),
|
||||
DATA_DIR=Path("whatever"),
|
||||
CONSUMPTION_DIR=Path("idontcare"),
|
||||
for d in (data_dir, media_dir, consumption_dir):
|
||||
d.mkdir()
|
||||
|
||||
settings.DATA_DIR = data_dir
|
||||
settings.MEDIA_ROOT = media_dir
|
||||
settings.CONSUMPTION_DIR = consumption_dir
|
||||
|
||||
return PaperlessTestDirs(
|
||||
data_dir=data_dir,
|
||||
media_dir=media_dir,
|
||||
consumption_dir=consumption_dir,
|
||||
)
|
||||
def test_paths_check_dont_exist(self) -> None:
|
||||
msgs = paths_check(None)
|
||||
self.assertEqual(len(msgs), 3, str(msgs))
|
||||
|
||||
for msg in msgs:
|
||||
self.assertTrue(msg.msg.endswith("is set but doesn't exist."))
|
||||
|
||||
def test_paths_check_no_access(self) -> None:
|
||||
Path(self.dirs.data_dir).chmod(0o000)
|
||||
Path(self.dirs.media_dir).chmod(0o000)
|
||||
Path(self.dirs.consumption_dir).chmod(0o000)
|
||||
class TestChecks:
|
||||
def test_binaries(self) -> None:
|
||||
assert binaries_check(None) == []
|
||||
|
||||
self.addCleanup(os.chmod, self.dirs.data_dir, 0o777)
|
||||
self.addCleanup(os.chmod, self.dirs.media_dir, 0o777)
|
||||
self.addCleanup(os.chmod, self.dirs.consumption_dir, 0o777)
|
||||
def test_binaries_fail(self, settings: SettingsWrapper) -> None:
|
||||
settings.CONVERT_BINARY = "uuuhh"
|
||||
assert len(binaries_check(None)) == 1
|
||||
|
||||
@pytest.mark.usefixtures("directories")
|
||||
def test_paths_check(self) -> None:
|
||||
assert paths_check(None) == []
|
||||
|
||||
def test_paths_check_dont_exist(self, settings: SettingsWrapper) -> None:
|
||||
settings.MEDIA_ROOT = Path("uuh")
|
||||
settings.DATA_DIR = Path("whatever")
|
||||
settings.CONSUMPTION_DIR = Path("idontcare")
|
||||
|
||||
msgs = paths_check(None)
|
||||
self.assertEqual(len(msgs), 3)
|
||||
|
||||
assert len(msgs) == 3, str(msgs)
|
||||
for msg in msgs:
|
||||
self.assertTrue(msg.msg.endswith("is not writeable"))
|
||||
assert msg.msg.endswith("is set but doesn't exist.")
|
||||
|
||||
@override_settings(DEBUG=False)
|
||||
def test_debug_disabled(self) -> None:
|
||||
self.assertEqual(debug_mode_check(None), [])
|
||||
def test_paths_check_no_access(self, directories: PaperlessTestDirs) -> None:
|
||||
directories.data_dir.chmod(0o000)
|
||||
directories.media_dir.chmod(0o000)
|
||||
directories.consumption_dir.chmod(0o000)
|
||||
|
||||
@override_settings(DEBUG=True)
|
||||
def test_debug_enabled(self) -> None:
|
||||
self.assertEqual(len(debug_mode_check(None)), 1)
|
||||
try:
|
||||
msgs = paths_check(None)
|
||||
finally:
|
||||
directories.data_dir.chmod(0o777)
|
||||
directories.media_dir.chmod(0o777)
|
||||
directories.consumption_dir.chmod(0o777)
|
||||
|
||||
assert len(msgs) == 3
|
||||
for msg in msgs:
|
||||
assert msg.msg.endswith("is not writeable")
|
||||
|
||||
def test_debug_disabled(self, settings: SettingsWrapper) -> None:
|
||||
settings.DEBUG = False
|
||||
assert debug_mode_check(None) == []
|
||||
|
||||
def test_debug_enabled(self, settings: SettingsWrapper) -> None:
|
||||
settings.DEBUG = True
|
||||
assert len(debug_mode_check(None)) == 1
|
||||
|
||||
|
||||
class TestSettingsChecksAgainstDefaults(DirectoriesMixin, TestCase):
|
||||
class TestSettingsChecksAgainstDefaults:
|
||||
def test_all_valid(self) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
@@ -78,104 +107,71 @@ class TestSettingsChecksAgainstDefaults(DirectoriesMixin, TestCase):
|
||||
- No system check errors reported
|
||||
"""
|
||||
msgs = settings_values_check(None)
|
||||
self.assertEqual(len(msgs), 0)
|
||||
assert len(msgs) == 0
|
||||
|
||||
|
||||
class TestOcrSettingsChecks(DirectoriesMixin, TestCase):
|
||||
@override_settings(OCR_OUTPUT_TYPE="notapdf")
|
||||
def test_invalid_output_type(self) -> None:
|
||||
class TestOcrSettingsChecks:
|
||||
@pytest.mark.parametrize(
|
||||
("setting", "value", "expected_msg"),
|
||||
[
|
||||
pytest.param(
|
||||
"OCR_OUTPUT_TYPE",
|
||||
"notapdf",
|
||||
'OCR output type "notapdf"',
|
||||
id="invalid-output-type",
|
||||
),
|
||||
pytest.param(
|
||||
"OCR_MODE",
|
||||
"makeitso",
|
||||
'OCR output mode "makeitso"',
|
||||
id="invalid-mode",
|
||||
),
|
||||
pytest.param(
|
||||
"OCR_MODE",
|
||||
"skip_noarchive",
|
||||
"deprecated",
|
||||
id="deprecated-mode",
|
||||
),
|
||||
pytest.param(
|
||||
"OCR_SKIP_ARCHIVE_FILE",
|
||||
"invalid",
|
||||
'OCR_SKIP_ARCHIVE_FILE setting "invalid"',
|
||||
id="invalid-skip-archive-file",
|
||||
),
|
||||
pytest.param(
|
||||
"OCR_CLEAN",
|
||||
"cleanme",
|
||||
'OCR clean mode "cleanme"',
|
||||
id="invalid-clean",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_invalid_setting_produces_one_error(
|
||||
self,
|
||||
settings: SettingsWrapper,
|
||||
setting: str,
|
||||
value: str,
|
||||
expected_msg: str,
|
||||
) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- Default settings
|
||||
- OCR output type is invalid
|
||||
- One OCR setting is set to an invalid value
|
||||
WHEN:
|
||||
- Settings are validated
|
||||
THEN:
|
||||
- system check error reported for OCR output type
|
||||
- Exactly one system check error is reported containing the expected message
|
||||
"""
|
||||
setattr(settings, setting, value)
|
||||
|
||||
msgs = settings_values_check(None)
|
||||
self.assertEqual(len(msgs), 1)
|
||||
|
||||
msg = msgs[0]
|
||||
|
||||
self.assertIn('OCR output type "notapdf"', msg.msg)
|
||||
|
||||
@override_settings(OCR_MODE="makeitso")
|
||||
def test_invalid_ocr_type(self) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- Default settings
|
||||
- OCR type is invalid
|
||||
WHEN:
|
||||
- Settings are validated
|
||||
THEN:
|
||||
- system check error reported for OCR type
|
||||
"""
|
||||
msgs = settings_values_check(None)
|
||||
self.assertEqual(len(msgs), 1)
|
||||
|
||||
msg = msgs[0]
|
||||
|
||||
self.assertIn('OCR output mode "makeitso"', msg.msg)
|
||||
|
||||
@override_settings(OCR_MODE="skip_noarchive")
|
||||
def test_deprecated_ocr_type(self) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- Default settings
|
||||
- OCR type is deprecated
|
||||
WHEN:
|
||||
- Settings are validated
|
||||
THEN:
|
||||
- deprecation warning reported for OCR type
|
||||
"""
|
||||
msgs = settings_values_check(None)
|
||||
self.assertEqual(len(msgs), 1)
|
||||
|
||||
msg = msgs[0]
|
||||
|
||||
self.assertIn("deprecated", msg.msg)
|
||||
|
||||
@override_settings(OCR_SKIP_ARCHIVE_FILE="invalid")
|
||||
def test_invalid_ocr_skip_archive_file(self) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- Default settings
|
||||
- OCR_SKIP_ARCHIVE_FILE is invalid
|
||||
WHEN:
|
||||
- Settings are validated
|
||||
THEN:
|
||||
- system check error reported for OCR_SKIP_ARCHIVE_FILE
|
||||
"""
|
||||
msgs = settings_values_check(None)
|
||||
self.assertEqual(len(msgs), 1)
|
||||
|
||||
msg = msgs[0]
|
||||
|
||||
self.assertIn('OCR_SKIP_ARCHIVE_FILE setting "invalid"', msg.msg)
|
||||
|
||||
@override_settings(OCR_CLEAN="cleanme")
|
||||
def test_invalid_ocr_clean(self) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- Default settings
|
||||
- OCR cleaning type is invalid
|
||||
WHEN:
|
||||
- Settings are validated
|
||||
THEN:
|
||||
- system check error reported for OCR cleaning type
|
||||
"""
|
||||
msgs = settings_values_check(None)
|
||||
self.assertEqual(len(msgs), 1)
|
||||
|
||||
msg = msgs[0]
|
||||
|
||||
self.assertIn('OCR clean mode "cleanme"', msg.msg)
|
||||
assert len(msgs) == 1
|
||||
assert expected_msg in msgs[0].msg
|
||||
|
||||
|
||||
class TestTimezoneSettingsChecks(DirectoriesMixin, TestCase):
|
||||
@override_settings(TIME_ZONE="TheMoon\\MyCrater")
|
||||
def test_invalid_timezone(self) -> None:
|
||||
class TestTimezoneSettingsChecks:
|
||||
def test_invalid_timezone(self, settings: SettingsWrapper) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- Default settings
|
||||
@@ -185,17 +181,16 @@ class TestTimezoneSettingsChecks(DirectoriesMixin, TestCase):
|
||||
THEN:
|
||||
- system check error reported for timezone
|
||||
"""
|
||||
settings.TIME_ZONE = "TheMoon\\MyCrater"
|
||||
|
||||
msgs = settings_values_check(None)
|
||||
self.assertEqual(len(msgs), 1)
|
||||
|
||||
msg = msgs[0]
|
||||
|
||||
self.assertIn('Timezone "TheMoon\\MyCrater"', msg.msg)
|
||||
assert len(msgs) == 1
|
||||
assert 'Timezone "TheMoon\\MyCrater"' in msgs[0].msg
|
||||
|
||||
|
||||
class TestEmailCertSettingsChecks(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
@override_settings(EMAIL_CERTIFICATE_FILE=Path("/tmp/not_actually_here.pem"))
|
||||
def test_not_valid_file(self) -> None:
|
||||
class TestEmailCertSettingsChecks:
|
||||
def test_not_valid_file(self, settings: SettingsWrapper) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- Default settings
|
||||
@@ -205,19 +200,22 @@ class TestEmailCertSettingsChecks(DirectoriesMixin, FileSystemAssertsMixin, Test
|
||||
THEN:
|
||||
- system check error reported for email certificate
|
||||
"""
|
||||
self.assertIsNotFile("/tmp/not_actually_here.pem")
|
||||
cert_path = Path("/tmp/not_actually_here.pem")
|
||||
assert not cert_path.is_file()
|
||||
settings.EMAIL_CERTIFICATE_FILE = cert_path
|
||||
|
||||
msgs = settings_values_check(None)
|
||||
|
||||
self.assertEqual(len(msgs), 1)
|
||||
|
||||
msg = msgs[0]
|
||||
|
||||
self.assertIn("Email cert /tmp/not_actually_here.pem is not a file", msg.msg)
|
||||
assert len(msgs) == 1
|
||||
assert "Email cert /tmp/not_actually_here.pem is not a file" in msgs[0].msg
|
||||
|
||||
|
||||
class TestAuditLogChecks(TestCase):
|
||||
def test_was_enabled_once(self) -> None:
|
||||
class TestAuditLogChecks:
|
||||
def test_was_enabled_once(
|
||||
self,
|
||||
settings: SettingsWrapper,
|
||||
mocker: MockerFixture,
|
||||
) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
- Audit log is not enabled
|
||||
@@ -226,23 +224,18 @@ class TestAuditLogChecks(TestCase):
|
||||
THEN:
|
||||
- system check error reported for disabling audit log
|
||||
"""
|
||||
introspect_mock = mock.MagicMock()
|
||||
settings.AUDIT_LOG_ENABLED = False
|
||||
introspect_mock = mocker.MagicMock()
|
||||
introspect_mock.introspection.table_names.return_value = ["auditlog_logentry"]
|
||||
with override_settings(AUDIT_LOG_ENABLED=False):
|
||||
with mock.patch.dict(
|
||||
"paperless.checks.connections",
|
||||
{"default": introspect_mock},
|
||||
):
|
||||
msgs = audit_log_check(None)
|
||||
mocker.patch.dict(
|
||||
"paperless.checks.connections",
|
||||
{"default": introspect_mock},
|
||||
)
|
||||
|
||||
self.assertEqual(len(msgs), 1)
|
||||
msgs = audit_log_check(None)
|
||||
|
||||
msg = msgs[0]
|
||||
|
||||
self.assertIn(
|
||||
("auditlog table was found but audit log is disabled."),
|
||||
msg.msg,
|
||||
)
|
||||
assert len(msgs) == 1
|
||||
assert "auditlog table was found but audit log is disabled." in msgs[0].msg
|
||||
|
||||
|
||||
DEPRECATED_VARS: dict[str, str] = {
|
||||
@@ -271,20 +264,16 @@ class TestDeprecatedDbSettings:
|
||||
@pytest.mark.parametrize(
|
||||
("env_var", "db_option_key"),
|
||||
[
|
||||
("PAPERLESS_DB_TIMEOUT", "timeout"),
|
||||
("PAPERLESS_DB_POOLSIZE", "pool.min_size / pool.max_size"),
|
||||
("PAPERLESS_DBSSLMODE", "sslmode"),
|
||||
("PAPERLESS_DBSSLROOTCERT", "sslrootcert"),
|
||||
("PAPERLESS_DBSSLCERT", "sslcert"),
|
||||
("PAPERLESS_DBSSLKEY", "sslkey"),
|
||||
],
|
||||
ids=[
|
||||
"db-timeout",
|
||||
"db-poolsize",
|
||||
"ssl-mode",
|
||||
"ssl-rootcert",
|
||||
"ssl-cert",
|
||||
"ssl-key",
|
||||
pytest.param("PAPERLESS_DB_TIMEOUT", "timeout", id="db-timeout"),
|
||||
pytest.param(
|
||||
"PAPERLESS_DB_POOLSIZE",
|
||||
"pool.min_size / pool.max_size",
|
||||
id="db-poolsize",
|
||||
),
|
||||
pytest.param("PAPERLESS_DBSSLMODE", "sslmode", id="ssl-mode"),
|
||||
pytest.param("PAPERLESS_DBSSLROOTCERT", "sslrootcert", id="ssl-rootcert"),
|
||||
pytest.param("PAPERLESS_DBSSLCERT", "sslcert", id="ssl-cert"),
|
||||
pytest.param("PAPERLESS_DBSSLKEY", "sslkey", id="ssl-key"),
|
||||
],
|
||||
)
|
||||
def test_single_deprecated_var_produces_one_warning(
|
||||
@@ -403,7 +392,10 @@ class TestV3MinimumUpgradeVersionCheck:
|
||||
"""Test suite for check_v3_minimum_upgrade_version system check."""
|
||||
|
||||
@pytest.fixture
|
||||
def build_conn_mock(self, mocker: MockerFixture):
|
||||
def build_conn_mock(
|
||||
self,
|
||||
mocker: MockerFixture,
|
||||
) -> Callable[[list[str], list[str]], mock.MagicMock]:
|
||||
"""Factory fixture that builds a connections['default'] mock.
|
||||
|
||||
Usage::
|
||||
@@ -423,7 +415,7 @@ class TestV3MinimumUpgradeVersionCheck:
|
||||
def test_no_migrations_table_fresh_install(
|
||||
self,
|
||||
mocker: MockerFixture,
|
||||
build_conn_mock,
|
||||
build_conn_mock: Callable[[list[str], list[str]], mock.MagicMock],
|
||||
) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
@@ -442,7 +434,7 @@ class TestV3MinimumUpgradeVersionCheck:
|
||||
def test_no_documents_migrations_fresh_install(
|
||||
self,
|
||||
mocker: MockerFixture,
|
||||
build_conn_mock,
|
||||
build_conn_mock: Callable[[list[str], list[str]], mock.MagicMock],
|
||||
) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
@@ -461,7 +453,7 @@ class TestV3MinimumUpgradeVersionCheck:
|
||||
def test_v3_state_with_0001_squashed(
|
||||
self,
|
||||
mocker: MockerFixture,
|
||||
build_conn_mock,
|
||||
build_conn_mock: Callable[[list[str], list[str]], mock.MagicMock],
|
||||
) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
@@ -485,7 +477,7 @@ class TestV3MinimumUpgradeVersionCheck:
|
||||
def test_v3_state_with_0002_squashed_only(
|
||||
self,
|
||||
mocker: MockerFixture,
|
||||
build_conn_mock,
|
||||
build_conn_mock: Callable[[list[str], list[str]], mock.MagicMock],
|
||||
) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
@@ -504,7 +496,7 @@ class TestV3MinimumUpgradeVersionCheck:
|
||||
def test_v2_20_9_state_ready_to_upgrade(
|
||||
self,
|
||||
mocker: MockerFixture,
|
||||
build_conn_mock,
|
||||
build_conn_mock: Callable[[list[str], list[str]], mock.MagicMock],
|
||||
) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
@@ -531,7 +523,7 @@ class TestV3MinimumUpgradeVersionCheck:
|
||||
def test_v2_20_8_raises_error(
|
||||
self,
|
||||
mocker: MockerFixture,
|
||||
build_conn_mock,
|
||||
build_conn_mock: Callable[[list[str], list[str]], mock.MagicMock],
|
||||
) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
@@ -558,7 +550,7 @@ class TestV3MinimumUpgradeVersionCheck:
|
||||
def test_very_old_version_raises_error(
|
||||
self,
|
||||
mocker: MockerFixture,
|
||||
build_conn_mock,
|
||||
build_conn_mock: Callable[[list[str], list[str]], mock.MagicMock],
|
||||
) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
@@ -585,7 +577,7 @@ class TestV3MinimumUpgradeVersionCheck:
|
||||
def test_error_hint_mentions_v2_20_9(
|
||||
self,
|
||||
mocker: MockerFixture,
|
||||
build_conn_mock,
|
||||
build_conn_mock: Callable[[list[str], list[str]], mock.MagicMock],
|
||||
) -> None:
|
||||
"""
|
||||
GIVEN:
|
||||
|
||||
@@ -9,35 +9,50 @@ from paperless.utils import ocr_to_dateparser_languages
|
||||
@pytest.mark.parametrize(
|
||||
("ocr_language", "expected"),
|
||||
[
|
||||
# One language
|
||||
("eng", ["en"]),
|
||||
# Multiple languages
|
||||
("fra+ita+lao", ["fr", "it", "lo"]),
|
||||
# Languages that don't have a two-letter equivalent
|
||||
("fil", ["fil"]),
|
||||
# Languages with a script part supported by dateparser
|
||||
("aze_cyrl+srp_latn", ["az-Cyrl", "sr-Latn"]),
|
||||
# Languages with a script part not supported by dateparser
|
||||
# In this case, default to the language without script
|
||||
("deu_frak", ["de"]),
|
||||
# Traditional and simplified chinese don't have the same name in dateparser,
|
||||
# so they're converted to the general chinese language
|
||||
("chi_tra+chi_sim", ["zh"]),
|
||||
# If a language is not supported by dateparser, fallback to the supported ones
|
||||
("eng+unsupported_language+por", ["en", "pt"]),
|
||||
# If no language is supported, fallback to default
|
||||
("unsupported1+unsupported2", []),
|
||||
# Duplicate languages, should not duplicate in result
|
||||
("eng+eng", ["en"]),
|
||||
# Language with script, but script is not mapped
|
||||
("ita_unknownscript", ["it"]),
|
||||
pytest.param("eng", ["en"], id="single-language"),
|
||||
pytest.param("fra+ita+lao", ["fr", "it", "lo"], id="multiple-languages"),
|
||||
pytest.param("fil", ["fil"], id="no-two-letter-equivalent"),
|
||||
pytest.param(
|
||||
"aze_cyrl+srp_latn",
|
||||
["az-Cyrl", "sr-Latn"],
|
||||
id="script-supported-by-dateparser",
|
||||
),
|
||||
pytest.param(
|
||||
"deu_frak",
|
||||
["de"],
|
||||
id="script-not-supported-falls-back-to-language",
|
||||
),
|
||||
pytest.param(
|
||||
"chi_tra+chi_sim",
|
||||
["zh"],
|
||||
id="chinese-variants-collapse-to-general",
|
||||
),
|
||||
pytest.param(
|
||||
"eng+unsupported_language+por",
|
||||
["en", "pt"],
|
||||
id="unsupported-language-skipped",
|
||||
),
|
||||
pytest.param(
|
||||
"unsupported1+unsupported2",
|
||||
[],
|
||||
id="all-unsupported-returns-empty",
|
||||
),
|
||||
pytest.param("eng+eng", ["en"], id="duplicates-deduplicated"),
|
||||
pytest.param(
|
||||
"ita_unknownscript",
|
||||
["it"],
|
||||
id="unknown-script-falls-back-to-language",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_ocr_to_dateparser_languages(ocr_language, expected):
|
||||
def test_ocr_to_dateparser_languages(ocr_language: str, expected: list[str]) -> None:
|
||||
assert sorted(ocr_to_dateparser_languages(ocr_language)) == sorted(expected)
|
||||
|
||||
|
||||
def test_ocr_to_dateparser_languages_exception(monkeypatch, caplog):
|
||||
def test_ocr_to_dateparser_languages_exception(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
caplog: pytest.LogCaptureFixture,
|
||||
) -> None:
|
||||
# Patch LocaleDataLoader.get_locale_map to raise an exception
|
||||
class DummyLoader:
|
||||
def get_locale_map(self, locales=None):
|
||||
|
||||
@@ -1,24 +1,31 @@
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
from django.test import override_settings
|
||||
from django.test import Client
|
||||
from pytest_django.fixtures import SettingsWrapper
|
||||
|
||||
|
||||
def test_favicon_view(client):
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
static_dir = Path(tmpdir)
|
||||
favicon_path = static_dir / "paperless" / "img" / "favicon.ico"
|
||||
favicon_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
favicon_path.write_bytes(b"FAKE ICON DATA")
|
||||
def test_favicon_view(
|
||||
client: Client,
|
||||
tmp_path: Path,
|
||||
settings: SettingsWrapper,
|
||||
) -> None:
|
||||
favicon_path = tmp_path / "paperless" / "img" / "favicon.ico"
|
||||
favicon_path.parent.mkdir(parents=True)
|
||||
favicon_path.write_bytes(b"FAKE ICON DATA")
|
||||
|
||||
with override_settings(STATIC_ROOT=static_dir):
|
||||
response = client.get("/favicon.ico")
|
||||
assert response.status_code == 200
|
||||
assert response["Content-Type"] == "image/x-icon"
|
||||
assert b"".join(response.streaming_content) == b"FAKE ICON DATA"
|
||||
settings.STATIC_ROOT = tmp_path
|
||||
|
||||
response = client.get("/favicon.ico")
|
||||
assert response.status_code == 200
|
||||
assert response["Content-Type"] == "image/x-icon"
|
||||
assert b"".join(response.streaming_content) == b"FAKE ICON DATA"
|
||||
|
||||
|
||||
def test_favicon_view_missing_file(client):
|
||||
with override_settings(STATIC_ROOT=Path(tempfile.mkdtemp())):
|
||||
response = client.get("/favicon.ico")
|
||||
assert response.status_code == 404
|
||||
def test_favicon_view_missing_file(
|
||||
client: Client,
|
||||
tmp_path: Path,
|
||||
settings: SettingsWrapper,
|
||||
) -> None:
|
||||
settings.STATIC_ROOT = tmp_path
|
||||
response = client.get("/favicon.ico")
|
||||
assert response.status_code == 404
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from llama_index.core.bridge.pydantic import BaseModel
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class DocumentClassifierSchema(BaseModel):
|
||||
|
||||
@@ -1,10 +1,6 @@
|
||||
import logging
|
||||
import sys
|
||||
|
||||
from llama_index.core import VectorStoreIndex
|
||||
from llama_index.core.prompts import PromptTemplate
|
||||
from llama_index.core.query_engine import RetrieverQueryEngine
|
||||
|
||||
from documents.models import Document
|
||||
from paperless_ai.client import AIClient
|
||||
from paperless_ai.indexing import load_or_build_index
|
||||
@@ -14,15 +10,21 @@ logger = logging.getLogger("paperless_ai.chat")
|
||||
MAX_SINGLE_DOC_CONTEXT_CHARS = 15000
|
||||
SINGLE_DOC_SNIPPET_CHARS = 800
|
||||
|
||||
CHAT_PROMPT_TMPL = PromptTemplate(
|
||||
template="""Context information is below.
|
||||
CHAT_PROMPT_TEMPLATE = """Context information is below.
|
||||
---------------------
|
||||
{context_str}
|
||||
---------------------
|
||||
Given the context information and not prior knowledge, answer the query.
|
||||
Query: {query_str}
|
||||
Answer:""",
|
||||
)
|
||||
Answer:"""
|
||||
|
||||
|
||||
def _get_prompt_template():
|
||||
from llama_index.core.prompts import PromptTemplate
|
||||
|
||||
return PromptTemplate(
|
||||
template=CHAT_PROMPT_TEMPLATE,
|
||||
)
|
||||
|
||||
|
||||
def stream_chat_with_documents(query_str: str, documents: list[Document]):
|
||||
@@ -43,6 +45,8 @@ def stream_chat_with_documents(query_str: str, documents: list[Document]):
|
||||
yield "Sorry, I couldn't find any content to answer your question."
|
||||
return
|
||||
|
||||
from llama_index.core import VectorStoreIndex
|
||||
|
||||
local_index = VectorStoreIndex(nodes=nodes)
|
||||
retriever = local_index.as_retriever(
|
||||
similarity_top_k=3 if len(documents) == 1 else 5,
|
||||
@@ -85,10 +89,16 @@ def stream_chat_with_documents(query_str: str, documents: list[Document]):
|
||||
for node in top_nodes
|
||||
)
|
||||
|
||||
prompt = CHAT_PROMPT_TMPL.partial_format(
|
||||
context_str=context,
|
||||
query_str=query_str,
|
||||
).format(llm=client.llm)
|
||||
prompt = (
|
||||
_get_prompt_template()
|
||||
.partial_format(
|
||||
context_str=context,
|
||||
query_str=query_str,
|
||||
)
|
||||
.format(llm=client.llm)
|
||||
)
|
||||
|
||||
from llama_index.core.query_engine import RetrieverQueryEngine
|
||||
|
||||
query_engine = RetrieverQueryEngine.from_args(
|
||||
retriever=retriever,
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
import logging
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from llama_index.core.llms import ChatMessage
|
||||
from llama_index.core.program.function_program import get_function_tool
|
||||
from llama_index.llms.ollama import Ollama
|
||||
from llama_index.llms.openai import OpenAI
|
||||
if TYPE_CHECKING:
|
||||
from llama_index.core.llms import ChatMessage
|
||||
from llama_index.llms.ollama import Ollama
|
||||
from llama_index.llms.openai import OpenAI
|
||||
|
||||
from paperless.config import AIConfig
|
||||
from paperless_ai.base_model import DocumentClassifierSchema
|
||||
@@ -20,14 +21,18 @@ class AIClient:
|
||||
self.settings = AIConfig()
|
||||
self.llm = self.get_llm()
|
||||
|
||||
def get_llm(self) -> Ollama | OpenAI:
|
||||
def get_llm(self) -> "Ollama | OpenAI":
|
||||
if self.settings.llm_backend == "ollama":
|
||||
from llama_index.llms.ollama import Ollama
|
||||
|
||||
return Ollama(
|
||||
model=self.settings.llm_model or "llama3.1",
|
||||
base_url=self.settings.llm_endpoint or "http://localhost:11434",
|
||||
request_timeout=120,
|
||||
)
|
||||
elif self.settings.llm_backend == "openai":
|
||||
from llama_index.llms.openai import OpenAI
|
||||
|
||||
return OpenAI(
|
||||
model=self.settings.llm_model or "gpt-3.5-turbo",
|
||||
api_base=self.settings.llm_endpoint or None,
|
||||
@@ -43,6 +48,9 @@ class AIClient:
|
||||
self.settings.llm_model,
|
||||
)
|
||||
|
||||
from llama_index.core.llms import ChatMessage
|
||||
from llama_index.core.program.function_program import get_function_tool
|
||||
|
||||
user_msg = ChatMessage(role="user", content=prompt)
|
||||
tool = get_function_tool(DocumentClassifierSchema)
|
||||
result = self.llm.chat_with_tools(
|
||||
@@ -58,7 +66,7 @@ class AIClient:
|
||||
parsed = DocumentClassifierSchema(**tool_calls[0].tool_kwargs)
|
||||
return parsed.model_dump()
|
||||
|
||||
def run_chat(self, messages: list[ChatMessage]) -> str:
|
||||
def run_chat(self, messages: list["ChatMessage"]) -> str:
|
||||
logger.debug(
|
||||
"Running chat query against %s with model %s",
|
||||
self.settings.llm_backend,
|
||||
|
||||
@@ -5,9 +5,9 @@ if TYPE_CHECKING:
|
||||
from pathlib import Path
|
||||
|
||||
from django.conf import settings
|
||||
from llama_index.core.base.embeddings.base import BaseEmbedding
|
||||
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
||||
from llama_index.embeddings.openai import OpenAIEmbedding
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from llama_index.core.base.embeddings.base import BaseEmbedding
|
||||
|
||||
from documents.models import Document
|
||||
from documents.models import Note
|
||||
@@ -15,17 +15,21 @@ from paperless.config import AIConfig
|
||||
from paperless.models import LLMEmbeddingBackend
|
||||
|
||||
|
||||
def get_embedding_model() -> BaseEmbedding:
|
||||
def get_embedding_model() -> "BaseEmbedding":
|
||||
config = AIConfig()
|
||||
|
||||
match config.llm_embedding_backend:
|
||||
case LLMEmbeddingBackend.OPENAI:
|
||||
from llama_index.embeddings.openai import OpenAIEmbedding
|
||||
|
||||
return OpenAIEmbedding(
|
||||
model=config.llm_embedding_model or "text-embedding-3-small",
|
||||
api_key=config.llm_api_key,
|
||||
api_base=config.llm_endpoint or None,
|
||||
)
|
||||
case LLMEmbeddingBackend.HUGGINGFACE:
|
||||
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
||||
|
||||
return HuggingFaceEmbedding(
|
||||
model_name=config.llm_embedding_model
|
||||
or "sentence-transformers/all-MiniLM-L6-v2",
|
||||
|
||||
@@ -4,26 +4,12 @@ from collections.abc import Callable
|
||||
from collections.abc import Iterable
|
||||
from datetime import timedelta
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
from typing import TypeVar
|
||||
|
||||
import faiss
|
||||
import llama_index.core.settings as llama_settings
|
||||
from celery import states
|
||||
from django.conf import settings
|
||||
from django.utils import timezone
|
||||
from llama_index.core import Document as LlamaDocument
|
||||
from llama_index.core import StorageContext
|
||||
from llama_index.core import VectorStoreIndex
|
||||
from llama_index.core import load_index_from_storage
|
||||
from llama_index.core.indices.prompt_helper import PromptHelper
|
||||
from llama_index.core.node_parser import SimpleNodeParser
|
||||
from llama_index.core.prompts import PromptTemplate
|
||||
from llama_index.core.retrievers import VectorIndexRetriever
|
||||
from llama_index.core.schema import BaseNode
|
||||
from llama_index.core.storage.docstore import SimpleDocumentStore
|
||||
from llama_index.core.storage.index_store import SimpleIndexStore
|
||||
from llama_index.core.text_splitter import TokenTextSplitter
|
||||
from llama_index.vector_stores.faiss import FaissVectorStore
|
||||
|
||||
from documents.models import Document
|
||||
from documents.models import PaperlessTask
|
||||
@@ -31,6 +17,11 @@ from paperless_ai.embedding import build_llm_index_text
|
||||
from paperless_ai.embedding import get_embedding_dim
|
||||
from paperless_ai.embedding import get_embedding_model
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from llama_index.core import StorageContext
|
||||
from llama_index.core import VectorStoreIndex
|
||||
from llama_index.core.schema import BaseNode
|
||||
|
||||
_T = TypeVar("_T")
|
||||
IterWrapper = Callable[[Iterable[_T]], Iterable[_T]]
|
||||
|
||||
@@ -65,11 +56,17 @@ def queue_llm_index_update_if_needed(*, rebuild: bool, reason: str) -> bool:
|
||||
return True
|
||||
|
||||
|
||||
def get_or_create_storage_context(*, rebuild=False):
|
||||
def get_or_create_storage_context(*, rebuild=False) -> "StorageContext":
|
||||
"""
|
||||
Loads or creates the StorageContext (vector store, docstore, index store).
|
||||
If rebuild=True, deletes and recreates everything.
|
||||
"""
|
||||
import faiss
|
||||
from llama_index.core import StorageContext
|
||||
from llama_index.core.storage.docstore import SimpleDocumentStore
|
||||
from llama_index.core.storage.index_store import SimpleIndexStore
|
||||
from llama_index.vector_stores.faiss import FaissVectorStore
|
||||
|
||||
if rebuild:
|
||||
shutil.rmtree(settings.LLM_INDEX_DIR, ignore_errors=True)
|
||||
settings.LLM_INDEX_DIR.mkdir(parents=True, exist_ok=True)
|
||||
@@ -93,7 +90,7 @@ def get_or_create_storage_context(*, rebuild=False):
|
||||
)
|
||||
|
||||
|
||||
def build_document_node(document: Document) -> list[BaseNode]:
|
||||
def build_document_node(document: Document) -> list["BaseNode"]:
|
||||
"""
|
||||
Given a Document, returns parsed Nodes ready for indexing.
|
||||
"""
|
||||
@@ -112,16 +109,23 @@ def build_document_node(document: Document) -> list[BaseNode]:
|
||||
"added": document.added.isoformat() if document.added else None,
|
||||
"modified": document.modified.isoformat(),
|
||||
}
|
||||
from llama_index.core import Document as LlamaDocument
|
||||
from llama_index.core.node_parser import SimpleNodeParser
|
||||
|
||||
doc = LlamaDocument(text=text, metadata=metadata)
|
||||
parser = SimpleNodeParser()
|
||||
return parser.get_nodes_from_documents([doc])
|
||||
|
||||
|
||||
def load_or_build_index(nodes=None):
|
||||
def load_or_build_index(nodes=None) -> "VectorStoreIndex":
|
||||
"""
|
||||
Load an existing VectorStoreIndex if present,
|
||||
or build a new one using provided nodes if storage is empty.
|
||||
"""
|
||||
import llama_index.core.settings as llama_settings
|
||||
from llama_index.core import VectorStoreIndex
|
||||
from llama_index.core import load_index_from_storage
|
||||
|
||||
embed_model = get_embedding_model()
|
||||
llama_settings.Settings.embed_model = embed_model
|
||||
storage_context = get_or_create_storage_context()
|
||||
@@ -143,7 +147,7 @@ def load_or_build_index(nodes=None):
|
||||
)
|
||||
|
||||
|
||||
def remove_document_docstore_nodes(document: Document, index: VectorStoreIndex):
|
||||
def remove_document_docstore_nodes(document: Document, index: "VectorStoreIndex"):
|
||||
"""
|
||||
Removes existing documents from docstore for a given document from the index.
|
||||
This is necessary because FAISS IndexFlatL2 is append-only.
|
||||
@@ -183,6 +187,9 @@ def update_llm_index(
|
||||
return msg
|
||||
|
||||
if rebuild or not vector_store_file_exists():
|
||||
import llama_index.core.settings as llama_settings
|
||||
from llama_index.core import VectorStoreIndex
|
||||
|
||||
# remove meta.json to force re-detection of embedding dim
|
||||
(settings.LLM_INDEX_DIR / "meta.json").unlink(missing_ok=True)
|
||||
# Rebuild index from scratch
|
||||
@@ -271,6 +278,10 @@ def llm_index_remove_document(document: Document):
|
||||
|
||||
|
||||
def truncate_content(content: str) -> str:
|
||||
from llama_index.core.indices.prompt_helper import PromptHelper
|
||||
from llama_index.core.prompts import PromptTemplate
|
||||
from llama_index.core.text_splitter import TokenTextSplitter
|
||||
|
||||
prompt_helper = PromptHelper(
|
||||
context_window=8192,
|
||||
num_output=512,
|
||||
@@ -315,6 +326,8 @@ def query_similar_documents(
|
||||
else None
|
||||
)
|
||||
|
||||
from llama_index.core.retrievers import VectorIndexRetriever
|
||||
|
||||
retriever = VectorIndexRetriever(
|
||||
index=index,
|
||||
similarity_top_k=top_k,
|
||||
|
||||
@@ -45,7 +45,7 @@ def test_stream_chat_with_one_document_full_content(mock_document) -> None:
|
||||
patch("paperless_ai.chat.AIClient") as mock_client_cls,
|
||||
patch("paperless_ai.chat.load_or_build_index") as mock_load_index,
|
||||
patch(
|
||||
"paperless_ai.chat.RetrieverQueryEngine.from_args",
|
||||
"llama_index.core.query_engine.RetrieverQueryEngine.from_args",
|
||||
) as mock_query_engine_cls,
|
||||
):
|
||||
mock_client = MagicMock()
|
||||
@@ -76,7 +76,7 @@ def test_stream_chat_with_multiple_documents_retrieval(patch_embed_nodes) -> Non
|
||||
patch("paperless_ai.chat.AIClient") as mock_client_cls,
|
||||
patch("paperless_ai.chat.load_or_build_index") as mock_load_index,
|
||||
patch(
|
||||
"paperless_ai.chat.RetrieverQueryEngine.from_args",
|
||||
"llama_index.core.query_engine.RetrieverQueryEngine.from_args",
|
||||
) as mock_query_engine_cls,
|
||||
patch.object(VectorStoreIndex, "as_retriever") as mock_as_retriever,
|
||||
):
|
||||
|
||||
@@ -18,13 +18,13 @@ def mock_ai_config():
|
||||
|
||||
@pytest.fixture
|
||||
def mock_ollama_llm():
|
||||
with patch("paperless_ai.client.Ollama") as MockOllama:
|
||||
with patch("llama_index.llms.ollama.Ollama") as MockOllama:
|
||||
yield MockOllama
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_openai_llm():
|
||||
with patch("paperless_ai.client.OpenAI") as MockOpenAI:
|
||||
with patch("llama_index.llms.openai.OpenAI") as MockOpenAI:
|
||||
yield MockOpenAI
|
||||
|
||||
|
||||
|
||||
@@ -67,7 +67,7 @@ def test_get_embedding_model_openai(mock_ai_config):
|
||||
mock_ai_config.return_value.llm_api_key = "test_api_key"
|
||||
mock_ai_config.return_value.llm_endpoint = "http://test-url"
|
||||
|
||||
with patch("paperless_ai.embedding.OpenAIEmbedding") as MockOpenAIEmbedding:
|
||||
with patch("llama_index.embeddings.openai.OpenAIEmbedding") as MockOpenAIEmbedding:
|
||||
model = get_embedding_model()
|
||||
MockOpenAIEmbedding.assert_called_once_with(
|
||||
model="text-embedding-3-small",
|
||||
@@ -84,7 +84,7 @@ def test_get_embedding_model_huggingface(mock_ai_config):
|
||||
)
|
||||
|
||||
with patch(
|
||||
"paperless_ai.embedding.HuggingFaceEmbedding",
|
||||
"llama_index.embeddings.huggingface.HuggingFaceEmbedding",
|
||||
) as MockHuggingFaceEmbedding:
|
||||
model = get_embedding_model()
|
||||
MockHuggingFaceEmbedding.assert_called_once_with(
|
||||
|
||||
Reference in New Issue
Block a user