From 96051e01a92327d509cf41d28b2498954b7fbafa Mon Sep 17 00:00:00 2001 From: Trenton H <797416+stumpylog@users.noreply.github.com> Date: Thu, 2 Apr 2026 14:07:56 -0700 Subject: [PATCH] Registers a custom serializer which signs the payload --- src/paperless/celery.py | 48 +++++++++++++++++++++ src/paperless/settings/__init__.py | 6 ++- src/paperless/tests/test_celery.py | 69 ++++++++++++++++++++++++++++++ 3 files changed, 121 insertions(+), 2 deletions(-) create mode 100644 src/paperless/tests/test_celery.py diff --git a/src/paperless/celery.py b/src/paperless/celery.py index d937b3ada..3797c840c 100644 --- a/src/paperless/celery.py +++ b/src/paperless/celery.py @@ -1,11 +1,59 @@ +import hmac import os +import pickle +from hashlib import sha256 from celery import Celery from celery.signals import worker_process_init +from kombu.serialization import register # Set the default Django settings module for the 'celery' program. os.environ.setdefault("DJANGO_SETTINGS_MODULE", "paperless.settings") +# --------------------------------------------------------------------------- +# Signed-pickle serializer: pickle with HMAC-SHA256 integrity verification. +# +# Protects against malicious pickle injection via an exposed Redis broker. +# Messages are signed on the producer side and verified before deserialization +# on the worker side using Django's SECRET_KEY. +# --------------------------------------------------------------------------- + +HMAC_SIZE = 32 # SHA-256 digest length + + +def _get_signing_key() -> bytes: + from django.conf import settings + + return settings.SECRET_KEY.encode() + + +def signed_pickle_dumps(obj: object) -> bytes: + data = pickle.dumps(obj, protocol=pickle.HIGHEST_PROTOCOL) + signature = hmac.new(_get_signing_key(), data, sha256).digest() + return signature + data + + +def signed_pickle_loads(payload: bytes) -> object: + if len(payload) < HMAC_SIZE: + msg = "Signed-pickle payload too short" + raise ValueError(msg) + signature = payload[:HMAC_SIZE] + data = payload[HMAC_SIZE:] + expected = hmac.new(_get_signing_key(), data, sha256).digest() + if not hmac.compare_digest(signature, expected): + msg = "Signed-pickle HMAC verification failed — message may have been tampered with" + raise ValueError(msg) + return pickle.loads(data) + + +register( + "signed-pickle", + signed_pickle_dumps, + signed_pickle_loads, + content_type="application/x-signed-pickle", + content_encoding="binary", +) + app = Celery("paperless") # Using a string here means the worker doesn't have to serialize diff --git a/src/paperless/settings/__init__.py b/src/paperless/settings/__init__.py index 3522b3187..a627e33c9 100644 --- a/src/paperless/settings/__init__.py +++ b/src/paperless/settings/__init__.py @@ -667,9 +667,11 @@ CELERY_RESULT_BACKEND = "django-db" CELERY_CACHE_BACKEND = "default" # https://docs.celeryq.dev/en/stable/userguide/configuration.html#task-serializer -CELERY_TASK_SERIALIZER = "pickle" +# Uses HMAC-signed pickle to prevent RCE via malicious messages on an exposed Redis broker. +# The signed-pickle serializer is registered in paperless/celery.py. +CELERY_TASK_SERIALIZER = "signed-pickle" # https://docs.celeryq.dev/en/stable/userguide/configuration.html#std-setting-accept_content -CELERY_ACCEPT_CONTENT = ["application/json", "application/x-python-serialize"] +CELERY_ACCEPT_CONTENT = ["application/json", "application/x-signed-pickle"] # https://docs.celeryq.dev/en/stable/userguide/configuration.html#beat-schedule CELERY_BEAT_SCHEDULE = parse_beat_schedule() diff --git a/src/paperless/tests/test_celery.py b/src/paperless/tests/test_celery.py new file mode 100644 index 000000000..0c0e51272 --- /dev/null +++ b/src/paperless/tests/test_celery.py @@ -0,0 +1,69 @@ +import hmac +import pickle +from hashlib import sha256 + +import pytest +from django.test import override_settings + +from paperless.celery import HMAC_SIZE +from paperless.celery import signed_pickle_dumps +from paperless.celery import signed_pickle_loads + + +class TestSignedPickleSerializer: + def test_roundtrip_simple_types(self): + """Signed pickle can round-trip basic JSON-like types.""" + for obj in [42, "hello", [1, 2, 3], {"key": "value"}, None, True]: + assert signed_pickle_loads(signed_pickle_dumps(obj)) == obj + + def test_roundtrip_complex_types(self): + """Signed pickle can round-trip types that JSON cannot.""" + from pathlib import Path + + obj = {"path": Path("/tmp/test"), "data": {1, 2, 3}} + result = signed_pickle_loads(signed_pickle_dumps(obj)) + assert result["path"] == Path("/tmp/test") + assert result["data"] == {1, 2, 3} + + def test_tampered_data_rejected(self): + """Flipping a byte in the data portion causes HMAC failure.""" + payload = signed_pickle_dumps({"task": "test"}) + tampered = bytearray(payload) + tampered[-1] ^= 0xFF + with pytest.raises(ValueError, match="HMAC verification failed"): + signed_pickle_loads(bytes(tampered)) + + def test_tampered_signature_rejected(self): + """Flipping a byte in the signature portion causes HMAC failure.""" + payload = signed_pickle_dumps({"task": "test"}) + tampered = bytearray(payload) + tampered[0] ^= 0xFF + with pytest.raises(ValueError, match="HMAC verification failed"): + signed_pickle_loads(bytes(tampered)) + + def test_truncated_payload_rejected(self): + """A payload shorter than HMAC_SIZE is rejected.""" + with pytest.raises(ValueError, match="too short"): + signed_pickle_loads(b"\x00" * (HMAC_SIZE - 1)) + + def test_empty_payload_rejected(self): + with pytest.raises(ValueError, match="too short"): + signed_pickle_loads(b"") + + @override_settings(SECRET_KEY="different-secret-key") + def test_wrong_secret_key_rejected(self): + """A message signed with one key cannot be loaded with another.""" + original_key = b"test-secret-key-do-not-use-in-production" + obj = {"task": "test"} + data = pickle.dumps(obj, protocol=pickle.HIGHEST_PROTOCOL) + signature = hmac.new(original_key, data, sha256).digest() + payload = signature + data + with pytest.raises(ValueError, match="HMAC verification failed"): + signed_pickle_loads(payload) + + def test_forged_pickle_rejected(self): + """A raw pickle payload (no signature) is rejected.""" + raw_pickle = pickle.dumps({"task": "test"}) + # Raw pickle won't have a valid HMAC prefix + with pytest.raises(ValueError, match="HMAC verification failed"): + signed_pickle_loads(b"\x00" * HMAC_SIZE + raw_pickle)