From ac996a8edfdf2bb8061332c6aaefec73458f5cb3 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Fri, 20 Feb 2026 20:40:37 +0000
Subject: [PATCH] Align DMARCbis fields with actual XSD schema: testing,
discovery_method, generator, human_result; handle namespaced XML
Co-authored-by: seanthegeek <44679+seanthegeek@users.noreply.github.com>
---
parsedmarc/__init__.py | 42 +++++++++-------
parsedmarc/types.py | 7 ++-
samples/aggregate/dmarcbis-draft-sample.xml | 48 +++++++++++++++++++
....net!example.com!1700000000!1700086399.xml | 5 +-
4 files changed, 81 insertions(+), 21 deletions(-)
create mode 100644 samples/aggregate/dmarcbis-draft-sample.xml
diff --git a/parsedmarc/__init__.py b/parsedmarc/__init__.py
index cdc9d95..e87d4b8 100644
--- a/parsedmarc/__init__.py
+++ b/parsedmarc/__init__.py
@@ -74,6 +74,7 @@ text_report_regex = re.compile(r"\s*([a-zA-Z\s]+):\s(.+)", re.MULTILINE)
MAGIC_ZIP = b"\x50\x4b\x03\x04"
MAGIC_GZIP = b"\x1f\x8b"
MAGIC_XML = b"\x3c\x3f\x78\x6d\x6c\x20"
+MAGIC_XML_TAG = b"\x3c" # '<' - XML starting with an element tag (no declaration)
MAGIC_JSON = b"\7b"
EMAIL_SAMPLE_CONTENT_TYPES = (
@@ -415,6 +416,7 @@ def _parse_report_record(
new_result["result"] = result["result"]
else:
new_result["result"] = "none"
+ new_result["human_result"] = result.get("human_result", None)
new_record["auth_results"]["dkim"].append(new_result)
if not isinstance(auth_results["spf"], list):
@@ -430,6 +432,7 @@ def _parse_report_record(
new_result["result"] = result["result"]
else:
new_result["result"] = "none"
+ new_result["human_result"] = result.get("human_result", None)
new_record["auth_results"]["spf"].append(new_result)
if "envelope_from" not in new_record["identifiers"]:
@@ -782,6 +785,10 @@ def parse_aggregate_report_xml(
else:
errors = report["report_metadata"]["error"]
new_report_metadata["errors"] = errors
+ generator = None
+ if "generator" in report_metadata:
+ generator = report_metadata["generator"]
+ new_report_metadata["generator"] = generator
new_report["report_metadata"] = new_report_metadata
records = []
policy_published = report["policy_published"]
@@ -820,16 +827,16 @@ def parse_aggregate_report_xml(
if policy_published["np"] is not None:
np_ = policy_published["np"]
new_policy_published["np"] = np_
- psd = None
- if "psd" in policy_published:
- if policy_published["psd"] is not None:
- psd = policy_published["psd"]
- new_policy_published["psd"] = psd
- t = None
- if "t" in policy_published:
- if policy_published["t"] is not None:
- t = policy_published["t"]
- new_policy_published["t"] = t
+ testing = None
+ if "testing" in policy_published:
+ if policy_published["testing"] is not None:
+ testing = policy_published["testing"]
+ new_policy_published["testing"] = testing
+ discovery_method = None
+ if "discovery_method" in policy_published:
+ if policy_published["discovery_method"] is not None:
+ discovery_method = policy_published["discovery_method"]
+ new_policy_published["discovery_method"] = discovery_method
new_report["policy_published"] = new_policy_published
if type(report["record"]) is list:
@@ -962,6 +969,7 @@ def extract_report(content: Union[bytes, str, BinaryIO]) -> str:
)
elif (
header[: len(MAGIC_XML)] == MAGIC_XML
+ or header[: len(MAGIC_XML_TAG)] == MAGIC_XML_TAG
or header[: len(MAGIC_JSON)] == MAGIC_JSON
):
report = file_object.read().decode(errors="ignore")
@@ -1088,8 +1096,10 @@ def parsed_aggregate_reports_to_csv_rows(
pct = report["policy_published"]["pct"]
fo = report["policy_published"]["fo"]
np_ = report["policy_published"].get("np", None)
- psd = report["policy_published"].get("psd", None)
- t = report["policy_published"].get("t", None)
+ testing = report["policy_published"].get("testing", None)
+ discovery_method = report["policy_published"].get(
+ "discovery_method", None
+ )
report_dict: dict[str, Any] = dict(
xml_schema=xml_schema,
@@ -1109,8 +1119,8 @@ def parsed_aggregate_reports_to_csv_rows(
pct=pct,
fo=fo,
np=np_,
- psd=psd,
- t=t,
+ testing=testing,
+ discovery_method=discovery_method,
)
for record in report["records"]:
@@ -1209,8 +1219,8 @@ def parsed_aggregate_reports_to_csv(
"pct",
"fo",
"np",
- "psd",
- "t",
+ "testing",
+ "discovery_method",
"source_ip_address",
"source_country",
"source_reverse_dns",
diff --git a/parsedmarc/types.py b/parsedmarc/types.py
index 61a9a83..03d2d49 100644
--- a/parsedmarc/types.py
+++ b/parsedmarc/types.py
@@ -21,6 +21,7 @@ class AggregateReportMetadata(TypedDict):
timespan_requires_normalization: bool
original_timespan_seconds: int
errors: List[str]
+ generator: Optional[str]
class AggregatePolicyPublished(TypedDict):
@@ -32,8 +33,8 @@ class AggregatePolicyPublished(TypedDict):
pct: str
fo: str
np: Optional[str]
- psd: Optional[str]
- t: Optional[str]
+ testing: Optional[str]
+ discovery_method: Optional[str]
class IPSourceInfo(TypedDict):
@@ -66,12 +67,14 @@ class AggregateAuthResultDKIM(TypedDict):
domain: str
result: str
selector: str
+ human_result: Optional[str]
class AggregateAuthResultSPF(TypedDict):
domain: str
result: str
scope: str
+ human_result: Optional[str]
class AggregateAuthResults(TypedDict):
diff --git a/samples/aggregate/dmarcbis-draft-sample.xml b/samples/aggregate/dmarcbis-draft-sample.xml
new file mode 100644
index 0000000..b75408c
--- /dev/null
+++ b/samples/aggregate/dmarcbis-draft-sample.xml
@@ -0,0 +1,48 @@
+ quarantine
reject