diff --git a/pyproject.toml b/pyproject.toml
index 1b6a11419..52e87d164 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -16,7 +16,7 @@ classifiers = [
dependencies = [
"azure-ai-documentintelligence>=1.0.2",
"babel>=2.17",
- "bleach~=6.3.0",
+ "bleach[css]~=6.3.0",
"celery[redis]~=5.6.2",
"channels~=4.2",
"channels-redis~=4.2",
diff --git a/src/paperless/parsers/mail.py b/src/paperless/parsers/mail.py
index 29355f3bd..88b54ef27 100644
--- a/src/paperless/parsers/mail.py
+++ b/src/paperless/parsers/mail.py
@@ -24,6 +24,7 @@ from typing import Self
from bleach import clean
from bleach import linkify
+from bleach.css_sanitizer import CSSSanitizer
from django.conf import settings
from django.utils import timezone
from django.utils.timezone import is_naive
@@ -109,6 +110,7 @@ _EMAIL_HTML_GLOBAL_ATTRIBUTES = {
"align",
"alt",
"height",
+ "style",
"title",
"width",
}
@@ -120,6 +122,46 @@ _EMAIL_HTML_TAG_ATTRIBUTES = {
"th": {"colspan", "headers", "rowspan", "scope"},
"ul": {"type"},
}
+_EMAIL_CSS_PROPERTIES = {
+ "background-color",
+ "border",
+ "border-bottom",
+ "border-collapse",
+ "border-color",
+ "border-left",
+ "border-right",
+ "border-spacing",
+ "border-style",
+ "border-top",
+ "border-width",
+ "color",
+ "display",
+ "font",
+ "font-family",
+ "font-size",
+ "font-style",
+ "font-weight",
+ "height",
+ "line-height",
+ "margin",
+ "margin-bottom",
+ "margin-left",
+ "margin-right",
+ "margin-top",
+ "max-width",
+ "min-width",
+ "padding",
+ "padding-bottom",
+ "padding-left",
+ "padding-right",
+ "padding-top",
+ "text-align",
+ "text-decoration",
+ "vertical-align",
+ "white-space",
+ "width",
+}
+_EMAIL_CSS_SANITIZER = CSSSanitizer(allowed_css_properties=_EMAIL_CSS_PROPERTIES)
def _linkify_text_as_html(text: object) -> str:
@@ -159,6 +201,7 @@ def _clean_email_html(text: str) -> str:
tags=_EMAIL_HTML_TAGS,
attributes=_allow_email_html_attribute,
protocols=_EMAIL_HTML_PROTOCOLS,
+ css_sanitizer=_EMAIL_CSS_SANITIZER,
strip=True,
strip_comments=True,
),
diff --git a/src/paperless/tests/parsers/test_mail_parser.py b/src/paperless/tests/parsers/test_mail_parser.py
index 54444bc7a..0dedfbf02 100644
--- a/src/paperless/tests/parsers/test_mail_parser.py
+++ b/src/paperless/tests/parsers/test_mail_parser.py
@@ -87,16 +87,23 @@ class TestMailHtmlCleaning:
def test_email_html_preserves_safe_structure(self) -> None:
result = _clean_email_html(
"""
-
-
-
+
+
+
Visit https://example.com
""",
)
- assert "" in result
- assert 'Total | ' in result
- assert '
' in result
+ assert 'style="margin: 0; padding: 8px; color: #333;"' in result
+ assert "Hello there
" in result
+ assert 'style="width: 100%; border-collapse: collapse;"' in result
+ assert 'Total | ' in result
+ assert 'style="display: block;"' in result
+ assert '
None:
@@ -107,7 +114,8 @@ class TestMailHtmlCleaning:
bad link
bad cid link
-
+
""",
)
@@ -117,9 +125,11 @@ class TestMailHtmlCleaning:
assert "onclick" not in result
assert "onerror" not in result
assert "javascript:" not in result
+ assert "background-image" not in result
+ assert "position" not in result
assert "bad link" in result
assert "bad cid link" in result
- assert '
' in result
+ assert '
' in result
class TestEmailFileParsing:
diff --git a/uv.lock b/uv.lock
index 3591fa29c..ae52bc275 100644
--- a/uv.lock
+++ b/uv.lock
@@ -313,6 +313,11 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/cd/3a/577b549de0cc09d95f11087ee63c739bba856cd3952697eec4c4bb91350a/bleach-6.3.0-py3-none-any.whl", hash = "sha256:fe10ec77c93ddf3d13a73b035abaac7a9f5e436513864ccdad516693213c65d6", size = 164437, upload-time = "2025-10-27T17:57:37.538Z" },
]
+[package.optional-dependencies]
+css = [
+ { name = "tinycss2", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+]
+
[[package]]
name = "brotli"
version = "1.2.0"
@@ -2888,7 +2893,7 @@ source = { virtual = "." }
dependencies = [
{ name = "azure-ai-documentintelligence", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "babel", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
- { name = "bleach", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+ { name = "bleach", extra = ["css"], marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "celery", extra = ["redis"], marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "channels", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "channels-redis", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
@@ -3038,7 +3043,7 @@ typing = [
requires-dist = [
{ name = "azure-ai-documentintelligence", specifier = ">=1.0.2" },
{ name = "babel", specifier = ">=2.17" },
- { name = "bleach", specifier = "~=6.3.0" },
+ { name = "bleach", extras = ["css"], specifier = "~=6.3.0" },
{ name = "celery", extras = ["redis"], specifier = "~=5.6.2" },
{ name = "channels", specifier = "~=4.2" },
{ name = "channels-redis", specifier = "~=4.2" },
@@ -4892,6 +4897,18 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/40/d0/ad3feb0a392ef4e0c08bc32024950373ddc0669002cbdcbb9f3bf0c2d114/time_machine-3.2.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:528217cad85ede5f85c8bc78b0341868d3c3cfefc6ecb5b622e1cacb6c73247b", size = 39837, upload-time = "2025-12-17T23:32:58.283Z" },
]
+[[package]]
+name = "tinycss2"
+version = "1.4.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "webencodings", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/7a/fd/7a5ee21fd08ff70d3d33a5781c255cbe779659bd03278feb98b19ee550f4/tinycss2-1.4.0.tar.gz", hash = "sha256:10c0972f6fc0fbee87c3edb76549357415e94548c1ae10ebccdea16fb404a9b7", size = 87085, upload-time = "2024-10-24T14:58:29.895Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/e6/34/ebdc18bae6aa14fbee1a08b63c015c72b64868ff7dae68808ab500c492e2/tinycss2-1.4.0-py3-none-any.whl", hash = "sha256:3a49cf47b7675da0b15d0c6e1df8df4ebd96e9394bb905a5775adb0d884c5289", size = 26610, upload-time = "2024-10-24T14:58:28.029Z" },
+]
+
[[package]]
name = "tinytag"
version = "2.2.1"