From 46e694502defebc3bb3eb12a9fdef15f0c962890 Mon Sep 17 00:00:00 2001 From: Sean Whalen <44679+seanthegeek@users.noreply.github.com> Date: Thu, 4 Jun 2026 10:36:49 -0400 Subject: [PATCH] Detect aggregate reports by "xml_schema" instead of "domain" xml_schema is aggregate-only (failure/SMTP TLS rows don't carry it) and a distinctive, non-generic field name, addressing the concern that "domain" could be confused with other logs. parsedmarc defaults xml_schema to "draft" when the report omits (parsedmarc/__init__.py:832), so it survives a missing version element -- unlike a field with no default. It is also a native JSON string straight out of the json{} filter, so unlike dmarc_aligned it needs no convert step to be testable, keeping detection independent of the type-conversion in step 1b. xml_schema is added to the pre-json init block (required for any if-tested field); domain stays initialized since it is still mapped to target.hostname. Co-Authored-By: Claude Opus 4.8 (1M context) --- google_secops_parser/README.md | 2 +- google_secops_parser/parsedmarc.conf | 18 +++++++++++------- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/google_secops_parser/README.md b/google_secops_parser/README.md index ba403f7..8ab4df6 100644 --- a/google_secops_parser/README.md +++ b/google_secops_parser/README.md @@ -27,7 +27,7 @@ detects them by a field unique to each and maps them as follows: | parsedmarc report | Detected by | UDM `metadata.event_type` | |---|---|---| -| DMARC aggregate | `domain` | `EMAIL_TRANSACTION` | +| DMARC aggregate | `xml_schema` | `EMAIL_TRANSACTION` | | DMARC failure | `feedback_type` | `EMAIL_TRANSACTION` | | SMTP TLS (RFC 8460) | `policy_type` | `GENERIC_EVENT` | diff --git a/google_secops_parser/parsedmarc.conf b/google_secops_parser/parsedmarc.conf index a3f4b37..d4dee5a 100644 --- a/google_secops_parser/parsedmarc.conf +++ b/google_secops_parser/parsedmarc.conf @@ -10,7 +10,7 @@ filter { # # parsedmarc emits three flat JSON shapes, one object per syslog line, via the # CSV-row serializers (parsed_aggregate/failure/smtp_tls_reports_to_csv_rows): - # * DMARC aggregate report record -> detected by "domain" + # * DMARC aggregate report record -> detected by "xml_schema" # * DMARC failure report record -> detected by "feedback_type" # * SMTP TLS report record -> detected by "policy_type" # @@ -81,9 +81,10 @@ filter { # report-type detection "feedback_type" => "" "policy_type" => "" - "domain" => "" + "xml_schema" => "" # aggregate + "domain" => "" "report_id" => "" "org_name" => "" "org_email" => "" @@ -207,16 +208,19 @@ filter { # and unique to each shape: # feedback_type -> failure # policy_type -> smtp_tls - # domain -> aggregate (the reported From-domain; a required - # element of every aggregate record) - # domain is preferred over header_from (which can be empty when a record - # carries no identifiers) and over adkim (a defaulted policy field). + # xml_schema -> aggregate + # xml_schema is aggregate-only and parsedmarc defaults it to "draft" when + # the report omits (parsedmarc/__init__.py), so it survives a + # missing version. It is preferred over: header_from (can be empty when a + # record carries no identifiers), adkim (a defaulted policy field), domain + # (a generic name), and dmarc_aligned (a boolean that only becomes testable + # after the convert in step 1b -- detection should not depend on that). # --------------------------------------------------------------------------- if [feedback_type] { mutate { replace => { "report_type" => "failure" } } } else if [policy_type] { mutate { replace => { "report_type" => "smtp_tls" } } - } else if [domain] { + } else if [xml_schema] { mutate { replace => { "report_type" => "aggregate" } } }