Compare commits

..

3 Commits

Author SHA1 Message Date
Trenton Holmes
31aaba4b5e Revert "perf: pre-filter autocomplete candidates with regex prefix query"
This reverts commit 733559413e.
2026-04-02 20:08:13 -07:00
Trenton Holmes
733559413e perf: pre-filter autocomplete candidates with regex prefix query 2026-04-02 20:04:10 -07:00
Trenton Holmes
4f84282ef3 fix: create notes SnippetGenerator once per search, not per hit 2026-04-02 18:38:10 -07:00
9 changed files with 61 additions and 217 deletions

View File

@@ -1440,14 +1440,6 @@ ports.
## Incoming Mail {#incoming_mail}
#### [`PAPERLESS_EMAIL_ALLOW_INTERNAL_HOSTS=<bool>`](#PAPERLESS_EMAIL_ALLOW_INTERNAL_HOSTS) {#PAPERLESS_EMAIL_ALLOW_INTERNAL_HOSTS}
: If set to false, incoming mail account connections are blocked when the
configured IMAP hostname resolves to a non-public address (for example,
localhost, link-local, or RFC1918 private ranges).
Defaults to true, which allows internal hosts.
### Email OAuth {#email_oauth}
#### [`PAPERLESS_OAUTH_CALLBACK_BASE_URL=<str>`](#PAPERLESS_OAUTH_CALLBACK_BASE_URL) {#PAPERLESS_OAUTH_CALLBACK_BASE_URL}

View File

@@ -518,6 +518,7 @@ class TantivyBackend:
# Build result hits with highlights
hits: list[SearchHit] = []
snippet_generator = None
notes_snippet_generator = None
for rank, (doc_address, score) in enumerate(page_hits, start=offset + 1):
# Get the actual document from the searcher using the doc address
@@ -544,13 +545,16 @@ class TantivyBackend:
# Try notes highlights
if "notes" in doc_dict:
notes_generator = tantivy.SnippetGenerator.create(
searcher,
final_query,
self._schema,
"notes",
if notes_snippet_generator is None:
notes_snippet_generator = tantivy.SnippetGenerator.create(
searcher,
final_query,
self._schema,
"notes",
)
notes_snippet = notes_snippet_generator.snippet_from_doc(
actual_doc,
)
notes_snippet = notes_generator.snippet_from_doc(actual_doc)
if notes_snippet:
highlights["notes"] = str(notes_snippet)

View File

@@ -2,7 +2,7 @@ msgid ""
msgstr ""
"Project-Id-Version: paperless-ngx\n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2026-04-03 03:25+0000\n"
"POT-Creation-Date: 2026-04-02 22:35+0000\n"
"PO-Revision-Date: 2022-02-17 04:17\n"
"Last-Translator: \n"
"Language-Team: English\n"
@@ -1866,151 +1866,151 @@ msgstr ""
msgid "paperless application settings"
msgstr ""
#: paperless/settings/__init__.py:532
#: paperless/settings/__init__.py:528
msgid "English (US)"
msgstr ""
#: paperless/settings/__init__.py:533
#: paperless/settings/__init__.py:529
msgid "Arabic"
msgstr ""
#: paperless/settings/__init__.py:534
#: paperless/settings/__init__.py:530
msgid "Afrikaans"
msgstr ""
#: paperless/settings/__init__.py:535
#: paperless/settings/__init__.py:531
msgid "Belarusian"
msgstr ""
#: paperless/settings/__init__.py:536
#: paperless/settings/__init__.py:532
msgid "Bulgarian"
msgstr ""
#: paperless/settings/__init__.py:537
#: paperless/settings/__init__.py:533
msgid "Catalan"
msgstr ""
#: paperless/settings/__init__.py:538
#: paperless/settings/__init__.py:534
msgid "Czech"
msgstr ""
#: paperless/settings/__init__.py:539
#: paperless/settings/__init__.py:535
msgid "Danish"
msgstr ""
#: paperless/settings/__init__.py:540
#: paperless/settings/__init__.py:536
msgid "German"
msgstr ""
#: paperless/settings/__init__.py:541
#: paperless/settings/__init__.py:537
msgid "Greek"
msgstr ""
#: paperless/settings/__init__.py:542
#: paperless/settings/__init__.py:538
msgid "English (GB)"
msgstr ""
#: paperless/settings/__init__.py:543
#: paperless/settings/__init__.py:539
msgid "Spanish"
msgstr ""
#: paperless/settings/__init__.py:544
#: paperless/settings/__init__.py:540
msgid "Persian"
msgstr ""
#: paperless/settings/__init__.py:545
#: paperless/settings/__init__.py:541
msgid "Finnish"
msgstr ""
#: paperless/settings/__init__.py:546
#: paperless/settings/__init__.py:542
msgid "French"
msgstr ""
#: paperless/settings/__init__.py:547
#: paperless/settings/__init__.py:543
msgid "Hungarian"
msgstr ""
#: paperless/settings/__init__.py:548
#: paperless/settings/__init__.py:544
msgid "Indonesian"
msgstr ""
#: paperless/settings/__init__.py:549
#: paperless/settings/__init__.py:545
msgid "Italian"
msgstr ""
#: paperless/settings/__init__.py:550
#: paperless/settings/__init__.py:546
msgid "Japanese"
msgstr ""
#: paperless/settings/__init__.py:551
#: paperless/settings/__init__.py:547
msgid "Korean"
msgstr ""
#: paperless/settings/__init__.py:552
#: paperless/settings/__init__.py:548
msgid "Luxembourgish"
msgstr ""
#: paperless/settings/__init__.py:553
#: paperless/settings/__init__.py:549
msgid "Norwegian"
msgstr ""
#: paperless/settings/__init__.py:554
#: paperless/settings/__init__.py:550
msgid "Dutch"
msgstr ""
#: paperless/settings/__init__.py:555
#: paperless/settings/__init__.py:551
msgid "Polish"
msgstr ""
#: paperless/settings/__init__.py:556
#: paperless/settings/__init__.py:552
msgid "Portuguese (Brazil)"
msgstr ""
#: paperless/settings/__init__.py:557
#: paperless/settings/__init__.py:553
msgid "Portuguese"
msgstr ""
#: paperless/settings/__init__.py:558
#: paperless/settings/__init__.py:554
msgid "Romanian"
msgstr ""
#: paperless/settings/__init__.py:559
#: paperless/settings/__init__.py:555
msgid "Russian"
msgstr ""
#: paperless/settings/__init__.py:560
#: paperless/settings/__init__.py:556
msgid "Slovak"
msgstr ""
#: paperless/settings/__init__.py:561
#: paperless/settings/__init__.py:557
msgid "Slovenian"
msgstr ""
#: paperless/settings/__init__.py:562
#: paperless/settings/__init__.py:558
msgid "Serbian"
msgstr ""
#: paperless/settings/__init__.py:563
#: paperless/settings/__init__.py:559
msgid "Swedish"
msgstr ""
#: paperless/settings/__init__.py:564
#: paperless/settings/__init__.py:560
msgid "Turkish"
msgstr ""
#: paperless/settings/__init__.py:565
#: paperless/settings/__init__.py:561
msgid "Ukrainian"
msgstr ""
#: paperless/settings/__init__.py:566
#: paperless/settings/__init__.py:562
msgid "Vietnamese"
msgstr ""
#: paperless/settings/__init__.py:567
#: paperless/settings/__init__.py:563
msgid "Chinese Simplified"
msgstr ""
#: paperless/settings/__init__.py:568
#: paperless/settings/__init__.py:564
msgid "Chinese Traditional"
msgstr ""

View File

@@ -1,59 +1,11 @@
import hmac
import os
import pickle
from hashlib import sha256
from celery import Celery
from celery.signals import worker_process_init
from kombu.serialization import register
# Set the default Django settings module for the 'celery' program.
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "paperless.settings")
# ---------------------------------------------------------------------------
# Signed-pickle serializer: pickle with HMAC-SHA256 integrity verification.
#
# Protects against malicious pickle injection via an exposed Redis broker.
# Messages are signed on the producer side and verified before deserialization
# on the worker side using Django's SECRET_KEY.
# ---------------------------------------------------------------------------
HMAC_SIZE = 32 # SHA-256 digest length
def _get_signing_key() -> bytes:
from django.conf import settings
return settings.SECRET_KEY.encode()
def signed_pickle_dumps(obj: object) -> bytes:
data = pickle.dumps(obj, protocol=pickle.HIGHEST_PROTOCOL)
signature = hmac.new(_get_signing_key(), data, sha256).digest()
return signature + data
def signed_pickle_loads(payload: bytes) -> object:
if len(payload) < HMAC_SIZE:
msg = "Signed-pickle payload too short"
raise ValueError(msg)
signature = payload[:HMAC_SIZE]
data = payload[HMAC_SIZE:]
expected = hmac.new(_get_signing_key(), data, sha256).digest()
if not hmac.compare_digest(signature, expected):
msg = "Signed-pickle HMAC verification failed — message may have been tampered with"
raise ValueError(msg)
return pickle.loads(data)
register(
"signed-pickle",
signed_pickle_dumps,
signed_pickle_loads,
content_type="application/x-signed-pickle",
content_encoding="binary",
)
app = Celery("paperless")
# Using a string here means the worker doesn't have to serialize

View File

@@ -501,10 +501,6 @@ SESSION_COOKIE_NAME = f"{COOKIE_PREFIX}sessionid"
LANGUAGE_COOKIE_NAME = f"{COOKIE_PREFIX}django_language"
EMAIL_CERTIFICATE_FILE = get_path_from_env("PAPERLESS_EMAIL_CERTIFICATE_LOCATION")
EMAIL_ALLOW_INTERNAL_HOSTS = get_bool_from_env(
"PAPERLESS_EMAIL_ALLOW_INTERNAL_HOSTS",
"true",
)
###############################################################################
@@ -675,11 +671,9 @@ CELERY_RESULT_BACKEND = "django-db"
CELERY_CACHE_BACKEND = "default"
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#task-serializer
# Uses HMAC-signed pickle to prevent RCE via malicious messages on an exposed Redis broker.
# The signed-pickle serializer is registered in paperless/celery.py.
CELERY_TASK_SERIALIZER = "signed-pickle"
CELERY_TASK_SERIALIZER = "pickle"
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#std-setting-accept_content
CELERY_ACCEPT_CONTENT = ["application/json", "application/x-signed-pickle"]
CELERY_ACCEPT_CONTENT = ["application/json", "application/x-python-serialize"]
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#beat-schedule
CELERY_BEAT_SCHEDULE = parse_beat_schedule()

View File

@@ -1,69 +0,0 @@
import hmac
import pickle
from hashlib import sha256
import pytest
from django.test import override_settings
from paperless.celery import HMAC_SIZE
from paperless.celery import signed_pickle_dumps
from paperless.celery import signed_pickle_loads
class TestSignedPickleSerializer:
def test_roundtrip_simple_types(self):
"""Signed pickle can round-trip basic JSON-like types."""
for obj in [42, "hello", [1, 2, 3], {"key": "value"}, None, True]:
assert signed_pickle_loads(signed_pickle_dumps(obj)) == obj
def test_roundtrip_complex_types(self):
"""Signed pickle can round-trip types that JSON cannot."""
from pathlib import Path
obj = {"path": Path("/tmp/test"), "data": {1, 2, 3}}
result = signed_pickle_loads(signed_pickle_dumps(obj))
assert result["path"] == Path("/tmp/test")
assert result["data"] == {1, 2, 3}
def test_tampered_data_rejected(self):
"""Flipping a byte in the data portion causes HMAC failure."""
payload = signed_pickle_dumps({"task": "test"})
tampered = bytearray(payload)
tampered[-1] ^= 0xFF
with pytest.raises(ValueError, match="HMAC verification failed"):
signed_pickle_loads(bytes(tampered))
def test_tampered_signature_rejected(self):
"""Flipping a byte in the signature portion causes HMAC failure."""
payload = signed_pickle_dumps({"task": "test"})
tampered = bytearray(payload)
tampered[0] ^= 0xFF
with pytest.raises(ValueError, match="HMAC verification failed"):
signed_pickle_loads(bytes(tampered))
def test_truncated_payload_rejected(self):
"""A payload shorter than HMAC_SIZE is rejected."""
with pytest.raises(ValueError, match="too short"):
signed_pickle_loads(b"\x00" * (HMAC_SIZE - 1))
def test_empty_payload_rejected(self):
with pytest.raises(ValueError, match="too short"):
signed_pickle_loads(b"")
@override_settings(SECRET_KEY="different-secret-key")
def test_wrong_secret_key_rejected(self):
"""A message signed with one key cannot be loaded with another."""
original_key = b"test-secret-key-do-not-use-in-production"
obj = {"task": "test"}
data = pickle.dumps(obj, protocol=pickle.HIGHEST_PROTOCOL)
signature = hmac.new(original_key, data, sha256).digest()
payload = signature + data
with pytest.raises(ValueError, match="HMAC verification failed"):
signed_pickle_loads(payload)
def test_forged_pickle_rejected(self):
"""A raw pickle payload (no signature) is rejected."""
raw_pickle = pickle.dumps({"task": "test"})
# Raw pickle won't have a valid HMAC prefix
with pytest.raises(ValueError, match="HMAC verification failed"):
signed_pickle_loads(b"\x00" * HMAC_SIZE + raw_pickle)

View File

@@ -39,8 +39,6 @@ from documents.loggers import LoggingMixin
from documents.models import Correspondent
from documents.parsers import is_mime_type_supported
from documents.tasks import consume_file
from paperless.network import is_public_ip
from paperless.network import resolve_hostname_ips
from paperless_mail.models import MailAccount
from paperless_mail.models import MailRule
from paperless_mail.models import ProcessedMail
@@ -414,13 +412,6 @@ def get_mailbox(server, port, security) -> MailBox:
"""
Returns the correct MailBox instance for the given configuration.
"""
if not settings.EMAIL_ALLOW_INTERNAL_HOSTS:
for ip_str in resolve_hostname_ips(server):
if not is_public_ip(ip_str):
raise MailError(
f"Connection blocked: {server} resolves to a non-public address",
)
ssl_context = ssl.create_default_context()
if settings.EMAIL_CERTIFICATE_FILE is not None: # pragma: no cover
ssl_context.load_verify_locations(cafile=settings.EMAIL_CERTIFICATE_FILE)

View File

@@ -13,7 +13,6 @@ from django.contrib.auth.models import User
from django.core.management import call_command
from django.db import DatabaseError
from django.test import TestCase
from django.test import override_settings
from django.utils import timezone
from imap_tools import NOT
from imap_tools import EmailAddress
@@ -1847,25 +1846,6 @@ class TestMailAccountTestView(APITestCase):
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertEqual(response.content.decode(), "Unable to connect to server")
@override_settings(EMAIL_ALLOW_INTERNAL_HOSTS=False)
@mock.patch("paperless_mail.mail.resolve_hostname_ips", return_value=["127.0.0.1"])
def test_mail_account_test_view_blocks_internal_host_when_disabled(
self,
_mock_resolve_hostname_ips,
) -> None:
data = {
"imap_server": "internal.example",
"imap_port": 993,
"imap_security": MailAccount.ImapSecurity.SSL,
"username": "admin",
"password": "secret",
"account_type": MailAccount.MailAccountType.IMAP,
"is_token": False,
}
response = self.client.post(self.url, data, format="json")
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertEqual(response.content.decode(), "Unable to connect to server")
@mock.patch(
"paperless_mail.oauth.PaperlessMailOAuth2Manager.refresh_account_oauth_token",
)

View File

@@ -120,12 +120,12 @@ class MailAccountViewSet(ModelViewSet, PassUserMixin):
serializer.validated_data["expiration"] = existing_account.expiration
account = MailAccount(**serializer.validated_data)
try:
with get_mailbox(
account.imap_server,
account.imap_port,
account.imap_security,
) as M:
with get_mailbox(
account.imap_server,
account.imap_port,
account.imap_security,
) as M:
try:
if (
existing_account is not None
and account.is_token
@@ -145,11 +145,11 @@ class MailAccountViewSet(ModelViewSet, PassUserMixin):
mailbox_login(M, account)
return Response({"success": True})
except MailError:
logger.error(
"Mail account connectivity test failed",
)
return HttpResponseBadRequest("Unable to connect to server")
except MailError:
logger.error(
"Mail account connectivity test failed",
)
return HttpResponseBadRequest("Unable to connect to server")
@action(methods=["post"], detail=True)
def process(self, request, pk=None):