Compare commits

...

3 Commits

Author SHA1 Message Date
Trenton H
656d2dc34f Merge branch 'dev' into fix-signed-celery-pickle 2026-04-02 15:17:16 -07:00
Trenton H
376af81b9c Fix: Resolve another TC assuming an object has been created somewhere (#12503) 2026-04-02 14:58:28 -07:00
Trenton H
96051e01a9 Registers a custom serializer which signs the payload 2026-04-02 14:07:56 -07:00
4 changed files with 126 additions and 2 deletions

View File

@@ -31,6 +31,11 @@ from paperless.models import ApplicationConfiguration
class TestViews(DirectoriesMixin, TestCase):
@classmethod
def setUpTestData(cls) -> None:
super().setUpTestData()
ApplicationConfiguration.objects.get_or_create()
def setUp(self) -> None:
self.user = User.objects.create_user("testuser")
super().setUp()

View File

@@ -1,11 +1,59 @@
import hmac
import os
import pickle
from hashlib import sha256
from celery import Celery
from celery.signals import worker_process_init
from kombu.serialization import register
# Set the default Django settings module for the 'celery' program.
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "paperless.settings")
# ---------------------------------------------------------------------------
# Signed-pickle serializer: pickle with HMAC-SHA256 integrity verification.
#
# Protects against malicious pickle injection via an exposed Redis broker.
# Messages are signed on the producer side and verified before deserialization
# on the worker side using Django's SECRET_KEY.
# ---------------------------------------------------------------------------
HMAC_SIZE = 32 # SHA-256 digest length
def _get_signing_key() -> bytes:
from django.conf import settings
return settings.SECRET_KEY.encode()
def signed_pickle_dumps(obj: object) -> bytes:
data = pickle.dumps(obj, protocol=pickle.HIGHEST_PROTOCOL)
signature = hmac.new(_get_signing_key(), data, sha256).digest()
return signature + data
def signed_pickle_loads(payload: bytes) -> object:
if len(payload) < HMAC_SIZE:
msg = "Signed-pickle payload too short"
raise ValueError(msg)
signature = payload[:HMAC_SIZE]
data = payload[HMAC_SIZE:]
expected = hmac.new(_get_signing_key(), data, sha256).digest()
if not hmac.compare_digest(signature, expected):
msg = "Signed-pickle HMAC verification failed — message may have been tampered with"
raise ValueError(msg)
return pickle.loads(data)
register(
"signed-pickle",
signed_pickle_dumps,
signed_pickle_loads,
content_type="application/x-signed-pickle",
content_encoding="binary",
)
app = Celery("paperless")
# Using a string here means the worker doesn't have to serialize

View File

@@ -667,9 +667,11 @@ CELERY_RESULT_BACKEND = "django-db"
CELERY_CACHE_BACKEND = "default"
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#task-serializer
CELERY_TASK_SERIALIZER = "pickle"
# Uses HMAC-signed pickle to prevent RCE via malicious messages on an exposed Redis broker.
# The signed-pickle serializer is registered in paperless/celery.py.
CELERY_TASK_SERIALIZER = "signed-pickle"
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#std-setting-accept_content
CELERY_ACCEPT_CONTENT = ["application/json", "application/x-python-serialize"]
CELERY_ACCEPT_CONTENT = ["application/json", "application/x-signed-pickle"]
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#beat-schedule
CELERY_BEAT_SCHEDULE = parse_beat_schedule()

View File

@@ -0,0 +1,69 @@
import hmac
import pickle
from hashlib import sha256
import pytest
from django.test import override_settings
from paperless.celery import HMAC_SIZE
from paperless.celery import signed_pickle_dumps
from paperless.celery import signed_pickle_loads
class TestSignedPickleSerializer:
def test_roundtrip_simple_types(self):
"""Signed pickle can round-trip basic JSON-like types."""
for obj in [42, "hello", [1, 2, 3], {"key": "value"}, None, True]:
assert signed_pickle_loads(signed_pickle_dumps(obj)) == obj
def test_roundtrip_complex_types(self):
"""Signed pickle can round-trip types that JSON cannot."""
from pathlib import Path
obj = {"path": Path("/tmp/test"), "data": {1, 2, 3}}
result = signed_pickle_loads(signed_pickle_dumps(obj))
assert result["path"] == Path("/tmp/test")
assert result["data"] == {1, 2, 3}
def test_tampered_data_rejected(self):
"""Flipping a byte in the data portion causes HMAC failure."""
payload = signed_pickle_dumps({"task": "test"})
tampered = bytearray(payload)
tampered[-1] ^= 0xFF
with pytest.raises(ValueError, match="HMAC verification failed"):
signed_pickle_loads(bytes(tampered))
def test_tampered_signature_rejected(self):
"""Flipping a byte in the signature portion causes HMAC failure."""
payload = signed_pickle_dumps({"task": "test"})
tampered = bytearray(payload)
tampered[0] ^= 0xFF
with pytest.raises(ValueError, match="HMAC verification failed"):
signed_pickle_loads(bytes(tampered))
def test_truncated_payload_rejected(self):
"""A payload shorter than HMAC_SIZE is rejected."""
with pytest.raises(ValueError, match="too short"):
signed_pickle_loads(b"\x00" * (HMAC_SIZE - 1))
def test_empty_payload_rejected(self):
with pytest.raises(ValueError, match="too short"):
signed_pickle_loads(b"")
@override_settings(SECRET_KEY="different-secret-key")
def test_wrong_secret_key_rejected(self):
"""A message signed with one key cannot be loaded with another."""
original_key = b"test-secret-key-do-not-use-in-production"
obj = {"task": "test"}
data = pickle.dumps(obj, protocol=pickle.HIGHEST_PROTOCOL)
signature = hmac.new(original_key, data, sha256).digest()
payload = signature + data
with pytest.raises(ValueError, match="HMAC verification failed"):
signed_pickle_loads(payload)
def test_forged_pickle_rejected(self):
"""A raw pickle payload (no signature) is rejected."""
raw_pickle = pickle.dumps({"task": "test"})
# Raw pickle won't have a valid HMAC prefix
with pytest.raises(ValueError, match="HMAC verification failed"):
signed_pickle_loads(b"\x00" * HMAC_SIZE + raw_pickle)