From ac996a8edfdf2bb8061332c6aaefec73458f5cb3 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 20 Feb 2026 20:40:37 +0000 Subject: [PATCH] Align DMARCbis fields with actual XSD schema: testing, discovery_method, generator, human_result; handle namespaced XML Co-authored-by: seanthegeek <44679+seanthegeek@users.noreply.github.com> --- parsedmarc/__init__.py | 42 +++++++++------- parsedmarc/types.py | 7 ++- samples/aggregate/dmarcbis-draft-sample.xml | 48 +++++++++++++++++++ ....net!example.com!1700000000!1700086399.xml | 5 +- 4 files changed, 81 insertions(+), 21 deletions(-) create mode 100644 samples/aggregate/dmarcbis-draft-sample.xml diff --git a/parsedmarc/__init__.py b/parsedmarc/__init__.py index cdc9d95..e87d4b8 100644 --- a/parsedmarc/__init__.py +++ b/parsedmarc/__init__.py @@ -74,6 +74,7 @@ text_report_regex = re.compile(r"\s*([a-zA-Z\s]+):\s(.+)", re.MULTILINE) MAGIC_ZIP = b"\x50\x4b\x03\x04" MAGIC_GZIP = b"\x1f\x8b" MAGIC_XML = b"\x3c\x3f\x78\x6d\x6c\x20" +MAGIC_XML_TAG = b"\x3c" # '<' - XML starting with an element tag (no declaration) MAGIC_JSON = b"\7b" EMAIL_SAMPLE_CONTENT_TYPES = ( @@ -415,6 +416,7 @@ def _parse_report_record( new_result["result"] = result["result"] else: new_result["result"] = "none" + new_result["human_result"] = result.get("human_result", None) new_record["auth_results"]["dkim"].append(new_result) if not isinstance(auth_results["spf"], list): @@ -430,6 +432,7 @@ def _parse_report_record( new_result["result"] = result["result"] else: new_result["result"] = "none" + new_result["human_result"] = result.get("human_result", None) new_record["auth_results"]["spf"].append(new_result) if "envelope_from" not in new_record["identifiers"]: @@ -782,6 +785,10 @@ def parse_aggregate_report_xml( else: errors = report["report_metadata"]["error"] new_report_metadata["errors"] = errors + generator = None + if "generator" in report_metadata: + generator = report_metadata["generator"] + new_report_metadata["generator"] = generator new_report["report_metadata"] = new_report_metadata records = [] policy_published = report["policy_published"] @@ -820,16 +827,16 @@ def parse_aggregate_report_xml( if policy_published["np"] is not None: np_ = policy_published["np"] new_policy_published["np"] = np_ - psd = None - if "psd" in policy_published: - if policy_published["psd"] is not None: - psd = policy_published["psd"] - new_policy_published["psd"] = psd - t = None - if "t" in policy_published: - if policy_published["t"] is not None: - t = policy_published["t"] - new_policy_published["t"] = t + testing = None + if "testing" in policy_published: + if policy_published["testing"] is not None: + testing = policy_published["testing"] + new_policy_published["testing"] = testing + discovery_method = None + if "discovery_method" in policy_published: + if policy_published["discovery_method"] is not None: + discovery_method = policy_published["discovery_method"] + new_policy_published["discovery_method"] = discovery_method new_report["policy_published"] = new_policy_published if type(report["record"]) is list: @@ -962,6 +969,7 @@ def extract_report(content: Union[bytes, str, BinaryIO]) -> str: ) elif ( header[: len(MAGIC_XML)] == MAGIC_XML + or header[: len(MAGIC_XML_TAG)] == MAGIC_XML_TAG or header[: len(MAGIC_JSON)] == MAGIC_JSON ): report = file_object.read().decode(errors="ignore") @@ -1088,8 +1096,10 @@ def parsed_aggregate_reports_to_csv_rows( pct = report["policy_published"]["pct"] fo = report["policy_published"]["fo"] np_ = report["policy_published"].get("np", None) - psd = report["policy_published"].get("psd", None) - t = report["policy_published"].get("t", None) + testing = report["policy_published"].get("testing", None) + discovery_method = report["policy_published"].get( + "discovery_method", None + ) report_dict: dict[str, Any] = dict( xml_schema=xml_schema, @@ -1109,8 +1119,8 @@ def parsed_aggregate_reports_to_csv_rows( pct=pct, fo=fo, np=np_, - psd=psd, - t=t, + testing=testing, + discovery_method=discovery_method, ) for record in report["records"]: @@ -1209,8 +1219,8 @@ def parsed_aggregate_reports_to_csv( "pct", "fo", "np", - "psd", - "t", + "testing", + "discovery_method", "source_ip_address", "source_country", "source_reverse_dns", diff --git a/parsedmarc/types.py b/parsedmarc/types.py index 61a9a83..03d2d49 100644 --- a/parsedmarc/types.py +++ b/parsedmarc/types.py @@ -21,6 +21,7 @@ class AggregateReportMetadata(TypedDict): timespan_requires_normalization: bool original_timespan_seconds: int errors: List[str] + generator: Optional[str] class AggregatePolicyPublished(TypedDict): @@ -32,8 +33,8 @@ class AggregatePolicyPublished(TypedDict): pct: str fo: str np: Optional[str] - psd: Optional[str] - t: Optional[str] + testing: Optional[str] + discovery_method: Optional[str] class IPSourceInfo(TypedDict): @@ -66,12 +67,14 @@ class AggregateAuthResultDKIM(TypedDict): domain: str result: str selector: str + human_result: Optional[str] class AggregateAuthResultSPF(TypedDict): domain: str result: str scope: str + human_result: Optional[str] class AggregateAuthResults(TypedDict): diff --git a/samples/aggregate/dmarcbis-draft-sample.xml b/samples/aggregate/dmarcbis-draft-sample.xml new file mode 100644 index 0000000..b75408c --- /dev/null +++ b/samples/aggregate/dmarcbis-draft-sample.xml @@ -0,0 +1,48 @@ + + 1.0 + + Sample Reporter + report_sender@example-reporter.com + ... + 3v98abbp8ya9n3va8yr8oa3ya + + 302832000 + 302918399 + + Example DMARC Aggregate Reporter v1.2 + + + example.com +

quarantine

+ none + none + n + treewalk +
+ + + 192.0.2.123 + 123 + + pass + pass + fail + + + + example.com + example.com + + + + example.com + pass + abc123 + + + example.com + fail + + + +
diff --git a/samples/aggregate/dmarcbis-example.net!example.com!1700000000!1700086399.xml b/samples/aggregate/dmarcbis-example.net!example.com!1700000000!1700086399.xml index 59cb1a5..4a1baa9 100644 --- a/samples/aggregate/dmarcbis-example.net!example.com!1700000000!1700086399.xml +++ b/samples/aggregate/dmarcbis-example.net!example.com!1700000000!1700086399.xml @@ -17,9 +17,8 @@

reject

quarantine reject - n - y - 100 + y + treewalk 1