From ac996a8edfdf2bb8061332c6aaefec73458f5cb3 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Fri, 20 Feb 2026 20:40:37 +0000
Subject: [PATCH] Align DMARCbis fields with actual XSD schema: testing,
 discovery_method, generator, human_result; handle namespaced XML

Co-authored-by: seanthegeek <44679+seanthegeek@users.noreply.github.com>
---
 parsedmarc/__init__.py                        | 42 +++++++++-------
 parsedmarc/types.py                           |  7 ++-
 samples/aggregate/dmarcbis-draft-sample.xml   | 48 +++++++++++++++++++
 ....net!example.com!1700000000!1700086399.xml |  5 +-
 4 files changed, 81 insertions(+), 21 deletions(-)
 create mode 100644 samples/aggregate/dmarcbis-draft-sample.xml

diff --git a/parsedmarc/__init__.py b/parsedmarc/__init__.py
index cdc9d95..e87d4b8 100644
--- a/parsedmarc/__init__.py
+++ b/parsedmarc/__init__.py
@@ -74,6 +74,7 @@ text_report_regex = re.compile(r"\s*([a-zA-Z\s]+):\s(.+)", re.MULTILINE)
 MAGIC_ZIP = b"\x50\x4b\x03\x04"
 MAGIC_GZIP = b"\x1f\x8b"
 MAGIC_XML = b"\x3c\x3f\x78\x6d\x6c\x20"
+MAGIC_XML_TAG = b"\x3c"  # '<' - XML starting with an element tag (no declaration)
 MAGIC_JSON = b"\7b"
 
 EMAIL_SAMPLE_CONTENT_TYPES = (
@@ -415,6 +416,7 @@ def _parse_report_record(
                 new_result["result"] = result["result"]
             else:
                 new_result["result"] = "none"
+            new_result["human_result"] = result.get("human_result", None)
             new_record["auth_results"]["dkim"].append(new_result)
 
     if not isinstance(auth_results["spf"], list):
@@ -430,6 +432,7 @@ def _parse_report_record(
                 new_result["result"] = result["result"]
             else:
                 new_result["result"] = "none"
+            new_result["human_result"] = result.get("human_result", None)
             new_record["auth_results"]["spf"].append(new_result)
 
     if "envelope_from" not in new_record["identifiers"]:
@@ -782,6 +785,10 @@ def parse_aggregate_report_xml(
             else:
                 errors = report["report_metadata"]["error"]
         new_report_metadata["errors"] = errors
+        generator = None
+        if "generator" in report_metadata:
+            generator = report_metadata["generator"]
+        new_report_metadata["generator"] = generator
         new_report["report_metadata"] = new_report_metadata
         records = []
         policy_published = report["policy_published"]
@@ -820,16 +827,16 @@ def parse_aggregate_report_xml(
             if policy_published["np"] is not None:
                 np_ = policy_published["np"]
         new_policy_published["np"] = np_
-        psd = None
-        if "psd" in policy_published:
-            if policy_published["psd"] is not None:
-                psd = policy_published["psd"]
-        new_policy_published["psd"] = psd
-        t = None
-        if "t" in policy_published:
-            if policy_published["t"] is not None:
-                t = policy_published["t"]
-        new_policy_published["t"] = t
+        testing = None
+        if "testing" in policy_published:
+            if policy_published["testing"] is not None:
+                testing = policy_published["testing"]
+        new_policy_published["testing"] = testing
+        discovery_method = None
+        if "discovery_method" in policy_published:
+            if policy_published["discovery_method"] is not None:
+                discovery_method = policy_published["discovery_method"]
+        new_policy_published["discovery_method"] = discovery_method
         new_report["policy_published"] = new_policy_published
 
         if type(report["record"]) is list:
@@ -962,6 +969,7 @@ def extract_report(content: Union[bytes, str, BinaryIO]) -> str:
             )
         elif (
             header[: len(MAGIC_XML)] == MAGIC_XML
+            or header[: len(MAGIC_XML_TAG)] == MAGIC_XML_TAG
             or header[: len(MAGIC_JSON)] == MAGIC_JSON
         ):
             report = file_object.read().decode(errors="ignore")
@@ -1088,8 +1096,10 @@ def parsed_aggregate_reports_to_csv_rows(
         pct = report["policy_published"]["pct"]
         fo = report["policy_published"]["fo"]
         np_ = report["policy_published"].get("np", None)
-        psd = report["policy_published"].get("psd", None)
-        t = report["policy_published"].get("t", None)
+        testing = report["policy_published"].get("testing", None)
+        discovery_method = report["policy_published"].get(
+            "discovery_method", None
+        )
 
         report_dict: dict[str, Any] = dict(
             xml_schema=xml_schema,
@@ -1109,8 +1119,8 @@ def parsed_aggregate_reports_to_csv_rows(
             pct=pct,
             fo=fo,
             np=np_,
-            psd=psd,
-            t=t,
+            testing=testing,
+            discovery_method=discovery_method,
         )
 
         for record in report["records"]:
@@ -1209,8 +1219,8 @@ def parsed_aggregate_reports_to_csv(
         "pct",
         "fo",
         "np",
-        "psd",
-        "t",
+        "testing",
+        "discovery_method",
         "source_ip_address",
         "source_country",
         "source_reverse_dns",
diff --git a/parsedmarc/types.py b/parsedmarc/types.py
index 61a9a83..03d2d49 100644
--- a/parsedmarc/types.py
+++ b/parsedmarc/types.py
@@ -21,6 +21,7 @@ class AggregateReportMetadata(TypedDict):
     timespan_requires_normalization: bool
     original_timespan_seconds: int
     errors: List[str]
+    generator: Optional[str]
 
 
 class AggregatePolicyPublished(TypedDict):
@@ -32,8 +33,8 @@ class AggregatePolicyPublished(TypedDict):
     pct: str
     fo: str
     np: Optional[str]
-    psd: Optional[str]
-    t: Optional[str]
+    testing: Optional[str]
+    discovery_method: Optional[str]
 
 
 class IPSourceInfo(TypedDict):
@@ -66,12 +67,14 @@ class AggregateAuthResultDKIM(TypedDict):
     domain: str
     result: str
     selector: str
+    human_result: Optional[str]
 
 
 class AggregateAuthResultSPF(TypedDict):
     domain: str
     result: str
     scope: str
+    human_result: Optional[str]
 
 
 class AggregateAuthResults(TypedDict):
diff --git a/samples/aggregate/dmarcbis-draft-sample.xml b/samples/aggregate/dmarcbis-draft-sample.xml
new file mode 100644
index 0000000..b75408c
--- /dev/null
+++ b/samples/aggregate/dmarcbis-draft-sample.xml
@@ -0,0 +1,48 @@
+<feedback xmlns="urn:ietf:params:xml:ns:dmarc-2.0">
+  <version>1.0</version>
+  <report_metadata>
+    <org_name>Sample Reporter</org_name>
+    <email>report_sender@example-reporter.com</email>
+    <extra_contact_info>...</extra_contact_info>
+    <report_id>3v98abbp8ya9n3va8yr8oa3ya</report_id>
+    <date_range>
+      <begin>302832000</begin>
+      <end>302918399</end>
+    </date_range>
+    <generator>Example DMARC Aggregate Reporter v1.2</generator>
+  </report_metadata>
+  <policy_published>
+    <domain>example.com</domain>
+    <p>quarantine</p>
+    <sp>none</sp>
+    <np>none</np>
+    <testing>n</testing>
+    <discovery_method>treewalk</discovery_method>
+  </policy_published>
+  <record>
+    <row>
+      <source_ip>192.0.2.123</source_ip>
+      <count>123</count>
+      <policy_evaluated>
+        <disposition>pass</disposition>
+        <dkim>pass</dkim>
+        <spf>fail</spf>
+      </policy_evaluated>
+    </row>
+    <identifiers>
+      <envelope_from>example.com</envelope_from>
+      <header_from>example.com</header_from>
+    </identifiers>
+    <auth_results>
+      <dkim>
+        <domain>example.com</domain>
+        <result>pass</result>
+        <selector>abc123</selector>
+      </dkim>
+      <spf>
+        <domain>example.com</domain>
+        <result>fail</result>
+      </spf>
+    </auth_results>
+  </record>
+</feedback>
diff --git a/samples/aggregate/dmarcbis-example.net!example.com!1700000000!1700086399.xml b/samples/aggregate/dmarcbis-example.net!example.com!1700000000!1700086399.xml
index 59cb1a5..4a1baa9 100644
--- a/samples/aggregate/dmarcbis-example.net!example.com!1700000000!1700086399.xml
+++ b/samples/aggregate/dmarcbis-example.net!example.com!1700000000!1700086399.xml
@@ -17,9 +17,8 @@
     <p>reject</p>
     <sp>quarantine</sp>
     <np>reject</np>
-    <psd>n</psd>
-    <t>y</t>
-    <pct>100</pct>
+    <testing>y</testing>
+    <discovery_method>treewalk</discovery_method>
     <fo>1</fo>
   </policy_published>
   <record>