diff --git a/CHANGELOG.md b/CHANGELOG.md index 3e55127..3dbbc9a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,11 @@ - Renamed `[general] ip_db_url` to `ipinfo_url` to reflect what it actually overrides (the bundled IPinfo Lite MMDB download URL). The old name is still accepted as a deprecated alias and logs a warning on use; the env-var equivalent is now `PARSEDMARC_GENERAL_IPINFO_URL`, with `PARSEDMARC_GENERAL_IP_DB_URL` also still honored. - Added an optional IPinfo Lite REST API path for country + ASN lookups, so deployments that want the freshest data can query the API directly instead of waiting for the next MMDB release. Configure `[general] ipinfo_api_token` (or `PARSEDMARC_GENERAL_IPINFO_API_TOKEN`) and every IP lookup hits `https://api.ipinfo.io/lite/` first. At startup the `https://ipinfo.io/me` account endpoint is hit once to validate the token and log the plan, month-to-date usage, and remaining quota at info level (e.g. `IPinfo API configured — plan: Lite, usage: 12345/50000 this month, 37655 remaining`). An invalid token exits the process with a fatal error. Rate-limit (HTTP 429) and quota-exhausted (HTTP 402) responses put the API in a cooldown (honoring `Retry-After`, with a 5-minute / 1-hour default) and fall through to the bundled/cached MMDB; the first event is logged once at warning level and recovery is logged once at info level when the next lookup succeeds. Transient network errors fall through per-request without triggering a cooldown. The API token is never logged. +- Renamed the ASN name and domain fields to match the IPinfo Lite MMDB's native schema: `asn_name` → `as_name` and `asn_domain` → `as_domain` on every source record (JSON output), and `source_asn_name` → `source_as_name` / `source_asn_domain` → `source_as_domain` in CSV output (aggregate + forensic) and the Elasticsearch / OpenSearch / Splunk integrations. The integer `asn` / `source_asn` field is unchanged. The emitted order is `asn`, `as_name`, `as_domain`. + +### Upgrade notes + +- CSV / JSON / Elasticsearch / OpenSearch / Splunk consumers that query the 9.9.0 field names (`asn_name`, `asn_domain`, `source_asn_name`, `source_asn_domain`) must switch to `as_name`, `as_domain`, `source_as_name`, `source_as_domain`. Elasticsearch / OpenSearch will add the new mappings on next document write; existing documents indexed under the old names will stay in place until reindexed. ## 9.9.0 diff --git a/docs/source/output.md b/docs/source/output.md index bc73403..095193b 100644 --- a/docs/source/output.md +++ b/docs/source/output.md @@ -46,8 +46,8 @@ of the report schema. "name": null, "type": null, "asn": 7018, - "asn_name": "AT&T Services, Inc.", - "asn_domain": "att.com" + "as_name": "AT&T Services, Inc.", + "as_domain": "att.com" }, "count": 2, "alignment": { @@ -93,7 +93,7 @@ of the report schema. ### CSV aggregate report ```text -xml_schema,org_name,org_email,org_extra_contact_info,report_id,begin_date,end_date,normalized_timespan,errors,domain,adkim,aspf,p,sp,pct,fo,source_ip_address,source_country,source_reverse_dns,source_base_domain,source_name,source_type,source_asn,source_asn_name,source_asn_domain,count,spf_aligned,dkim_aligned,dmarc_aligned,disposition,policy_override_reasons,policy_override_comments,envelope_from,header_from,envelope_to,dkim_domains,dkim_selectors,dkim_results,spf_domains,spf_scopes,spf_results +xml_schema,org_name,org_email,org_extra_contact_info,report_id,begin_date,end_date,normalized_timespan,errors,domain,adkim,aspf,p,sp,pct,fo,source_ip_address,source_country,source_reverse_dns,source_base_domain,source_name,source_type,source_asn,source_as_name,source_as_domain,count,spf_aligned,dkim_aligned,dmarc_aligned,disposition,policy_override_reasons,policy_override_comments,envelope_from,header_from,envelope_to,dkim_domains,dkim_selectors,dkim_results,spf_domains,spf_scopes,spf_results draft,acme.com,noreply-dmarc-support@acme.com,http://acme.com/dmarc/support,9391651994964116463,2012-04-28 00:00:00,2012-04-28 23:59:59,False,,example.com,r,r,none,none,100,0,72.150.241.94,US,,,,,2,True,False,True,none,,,example.com,example.com,,example.com,none,fail,example.com,mfrom,pass draft,acme.com,noreply-dmarc-support@acme.com,http://acme.com/dmarc/support,9391651994964116463,2012-04-28 00:00:00,2012-04-28 23:59:59,False,,example.com,r,r,none,none,100,0,72.150.241.94,US,,,,,2,True,False,True,none,,,example.com,example.com,,example.com,none,fail,example.com,mfrom,pass @@ -130,8 +130,8 @@ Thanks to GitHub user [xennn](https://github.com/xennn) for the anonymized "name": null, "type": null, "asn": null, - "asn_name": null, - "asn_domain": null + "as_name": null, + "as_domain": null }, "authentication_mechanisms": [], "original_envelope_id": null, @@ -201,7 +201,7 @@ Thanks to GitHub user [xennn](https://github.com/xennn) for the anonymized ### CSV forensic report ```text -feedback_type,user_agent,version,original_envelope_id,original_mail_from,original_rcpt_to,arrival_date,arrival_date_utc,subject,message_id,authentication_results,dkim_domain,source_ip_address,source_country,source_reverse_dns,source_base_domain,source_name,source_type,source_asn,source_asn_name,source_asn_domain,delivery_result,auth_failure,reported_domain,authentication_mechanisms,sample_headers_only +feedback_type,user_agent,version,original_envelope_id,original_mail_from,original_rcpt_to,arrival_date,arrival_date_utc,subject,message_id,authentication_results,dkim_domain,source_ip_address,source_country,source_reverse_dns,source_base_domain,source_name,source_type,source_asn,source_as_name,source_as_domain,delivery_result,auth_failure,reported_domain,authentication_mechanisms,sample_headers_only auth-failure,Lua/1.0,1.0,,sharepoint@domain.de,peter.pan@domain.de,"Mon, 01 Oct 2018 11:20:27 +0200",2018-10-01 09:20:27,Subject,<38.E7.30937.BD6E1BB5@ mailrelay.de>,"dmarc=fail (p=none, dis=none) header.from=domain.de",,10.10.10.10,,,,policy,dmarc,domain.de,,False ``` diff --git a/parsedmarc/__init__.py b/parsedmarc/__init__.py index 103520b..ce48c19 100644 --- a/parsedmarc/__init__.py +++ b/parsedmarc/__init__.py @@ -1115,8 +1115,8 @@ def parsed_aggregate_reports_to_csv_rows( row["source_name"] = record["source"]["name"] row["source_type"] = record["source"]["type"] row["source_asn"] = record["source"]["asn"] - row["source_asn_name"] = record["source"]["asn_name"] - row["source_asn_domain"] = record["source"]["asn_domain"] + row["source_as_name"] = record["source"]["as_name"] + row["source_as_domain"] = record["source"]["as_domain"] row["count"] = record["count"] row["spf_aligned"] = record["alignment"]["spf"] row["dkim_aligned"] = record["alignment"]["dkim"] @@ -1209,8 +1209,8 @@ def parsed_aggregate_reports_to_csv( "source_name", "source_type", "source_asn", - "source_asn_name", - "source_asn_domain", + "source_as_name", + "source_as_domain", "count", "spf_aligned", "dkim_aligned", @@ -1413,8 +1413,8 @@ def parsed_forensic_reports_to_csv_rows( row["source_name"] = report["source"]["name"] row["source_type"] = report["source"]["type"] row["source_asn"] = report["source"]["asn"] - row["source_asn_name"] = report["source"]["asn_name"] - row["source_asn_domain"] = report["source"]["asn_domain"] + row["source_as_name"] = report["source"]["as_name"] + row["source_as_domain"] = report["source"]["as_domain"] row["source_country"] = report["source"]["country"] del row["source"] row["subject"] = report["parsed_sample"].get("subject") @@ -1461,8 +1461,8 @@ def parsed_forensic_reports_to_csv( "source_name", "source_type", "source_asn", - "source_asn_name", - "source_asn_domain", + "source_as_name", + "source_as_domain", "delivery_result", "auth_failure", "reported_domain", diff --git a/parsedmarc/elastic.py b/parsedmarc/elastic.py index 72223fb..bec69cb 100644 --- a/parsedmarc/elastic.py +++ b/parsedmarc/elastic.py @@ -80,8 +80,8 @@ class _AggregateReportDoc(Document): source_type = Text() source_name = Text() source_asn = Integer() - source_asn_name = Text() - source_asn_domain = Text() + source_as_name = Text() + source_as_domain = Text() message_count = Integer disposition = Text() dkim_aligned = Boolean() @@ -177,8 +177,8 @@ class _ForensicReportDoc(Document): source_country = Text() source_reverse_dns = Text() source_asn = Integer() - source_asn_name = Text() - source_asn_domain = Text() + source_as_name = Text() + source_as_domain = Text() source_authentication_mechanisms = Text() source_auth_failures = Text() dkim_domain = Text() @@ -496,8 +496,8 @@ def save_aggregate_report_to_elasticsearch( source_type=record["source"]["type"], source_name=record["source"]["name"], source_asn=record["source"]["asn"], - source_asn_name=record["source"]["asn_name"], - source_asn_domain=record["source"]["asn_domain"], + source_as_name=record["source"]["as_name"], + source_as_domain=record["source"]["as_domain"], message_count=record["count"], disposition=record["policy_evaluated"]["disposition"], dkim_aligned=record["policy_evaluated"]["dkim"] is not None @@ -683,8 +683,8 @@ def save_forensic_report_to_elasticsearch( source_reverse_dns=forensic_report["source"]["reverse_dns"], source_base_domain=forensic_report["source"]["base_domain"], source_asn=forensic_report["source"]["asn"], - source_asn_name=forensic_report["source"]["asn_name"], - source_asn_domain=forensic_report["source"]["asn_domain"], + source_as_name=forensic_report["source"]["as_name"], + source_as_domain=forensic_report["source"]["as_domain"], authentication_mechanisms=forensic_report["authentication_mechanisms"], auth_failure=forensic_report["auth_failure"], dkim_domain=forensic_report["dkim_domain"], diff --git a/parsedmarc/opensearch.py b/parsedmarc/opensearch.py index 5260c1f..f3826bf 100644 --- a/parsedmarc/opensearch.py +++ b/parsedmarc/opensearch.py @@ -83,8 +83,8 @@ class _AggregateReportDoc(Document): source_type = Text() source_name = Text() source_asn = Integer() - source_asn_name = Text() - source_asn_domain = Text() + source_as_name = Text() + source_as_domain = Text() message_count = Integer disposition = Text() dkim_aligned = Boolean() @@ -180,8 +180,8 @@ class _ForensicReportDoc(Document): source_country = Text() source_reverse_dns = Text() source_asn = Integer() - source_asn_name = Text() - source_asn_domain = Text() + source_as_name = Text() + source_as_domain = Text() source_authentication_mechanisms = Text() source_auth_failures = Text() dkim_domain = Text() @@ -526,8 +526,8 @@ def save_aggregate_report_to_opensearch( source_type=record["source"]["type"], source_name=record["source"]["name"], source_asn=record["source"]["asn"], - source_asn_name=record["source"]["asn_name"], - source_asn_domain=record["source"]["asn_domain"], + source_as_name=record["source"]["as_name"], + source_as_domain=record["source"]["as_domain"], message_count=record["count"], disposition=record["policy_evaluated"]["disposition"], dkim_aligned=record["policy_evaluated"]["dkim"] is not None @@ -713,8 +713,8 @@ def save_forensic_report_to_opensearch( source_reverse_dns=forensic_report["source"]["reverse_dns"], source_base_domain=forensic_report["source"]["base_domain"], source_asn=forensic_report["source"]["asn"], - source_asn_name=forensic_report["source"]["asn_name"], - source_asn_domain=forensic_report["source"]["asn_domain"], + source_as_name=forensic_report["source"]["as_name"], + source_as_domain=forensic_report["source"]["as_domain"], authentication_mechanisms=forensic_report["authentication_mechanisms"], auth_failure=forensic_report["auth_failure"], dkim_domain=forensic_report["dkim_domain"], diff --git a/parsedmarc/splunk.py b/parsedmarc/splunk.py index 9f83c2a..7e3754f 100644 --- a/parsedmarc/splunk.py +++ b/parsedmarc/splunk.py @@ -105,8 +105,8 @@ class HECClient(object): new_report["source_type"] = record["source"]["type"] new_report["source_name"] = record["source"]["name"] new_report["source_asn"] = record["source"]["asn"] - new_report["source_asn_name"] = record["source"]["asn_name"] - new_report["source_asn_domain"] = record["source"]["asn_domain"] + new_report["source_as_name"] = record["source"]["as_name"] + new_report["source_as_domain"] = record["source"]["as_domain"] new_report["message_count"] = record["count"] new_report["disposition"] = record["policy_evaluated"]["disposition"] new_report["spf_aligned"] = record["alignment"]["spf"] diff --git a/parsedmarc/types.py b/parsedmarc/types.py index 91e4b35..6a7c325 100644 --- a/parsedmarc/types.py +++ b/parsedmarc/types.py @@ -41,8 +41,8 @@ class IPSourceInfo(TypedDict): name: Optional[str] type: Optional[str] asn: Optional[int] - asn_name: Optional[str] - asn_domain: Optional[str] + as_name: Optional[str] + as_domain: Optional[str] class AggregateAlignment(TypedDict): diff --git a/parsedmarc/utils.py b/parsedmarc/utils.py index 8535399..a467e0e 100644 --- a/parsedmarc/utils.py +++ b/parsedmarc/utils.py @@ -153,8 +153,8 @@ class IPAddressInfo(TypedDict): name: Optional[str] type: Optional[str] asn: Optional[int] - asn_name: Optional[str] - asn_domain: Optional[str] + as_name: Optional[str] + as_domain: Optional[str] def decode_base64(data: str) -> bytes: @@ -464,8 +464,8 @@ def load_ip_db( class _IPDatabaseRecord(TypedDict): country: Optional[str] asn: Optional[int] - asn_name: Optional[str] - asn_domain: Optional[str] + as_name: Optional[str] + as_domain: Optional[str] class InvalidIPinfoAPIKey(Exception): @@ -734,13 +734,13 @@ def _normalize_ip_record(record: dict) -> _IPDatabaseRecord: """Normalize an IPinfo / MaxMind record to the internal shape. Shared between the API path and the MMDB path so both schemas produce the - same output: country as ISO code, ASN as plain int, asn_name string, - asn_domain lowercased. + same output: country as ISO code, ASN as plain int, as_name string, + as_domain lowercased. """ country: Optional[str] = None asn: Optional[int] = None - asn_name: Optional[str] = None - asn_domain: Optional[str] = None + as_name: Optional[str] = None + as_domain: Optional[str] = None code = record.get("country_code") if code is None: @@ -764,16 +764,16 @@ def _normalize_ip_record(record: dict) -> _IPDatabaseRecord: name = record.get("as_name") or record.get("autonomous_system_organization") if isinstance(name, str) and name: - asn_name = name + as_name = name domain = record.get("as_domain") if isinstance(domain, str) and domain: - asn_domain = domain.lower() + as_domain = domain.lower() return { "country": country, "asn": asn, - "asn_name": asn_name, - "asn_domain": asn_domain, + "as_name": as_name, + "as_domain": as_domain, } @@ -834,7 +834,7 @@ def get_ip_address_db_record( IPinfo Lite carries ``country_code``, ``as_name``, and ``as_domain`` on every record. MaxMind/DBIP country-only databases carry only country, so - ``asn_name`` / ``asn_domain`` come back None for those users. + ``as_name`` / ``as_domain`` come back None for those users. """ api_record = _ipinfo_api_lookup(ip_address) if api_record is not None: @@ -847,8 +847,8 @@ def get_ip_address_db_record( return { "country": None, "asn": None, - "asn_name": None, - "asn_domain": None, + "as_name": None, + "as_domain": None, } return _normalize_ip_record(record) @@ -1062,8 +1062,8 @@ def get_ip_address_info( "name": None, "type": None, "asn": None, - "asn_name": None, - "asn_domain": None, + "as_name": None, + "as_domain": None, } if offline: reverse_dns = None @@ -1077,8 +1077,8 @@ def get_ip_address_info( db_record = get_ip_address_db_record(ip_address, db_path=ip_db_path) info["country"] = db_record["country"] info["asn"] = db_record["asn"] - info["asn_name"] = db_record["asn_name"] - info["asn_domain"] = db_record["asn_domain"] + info["as_name"] = db_record["as_name"] + info["as_domain"] = db_record["as_domain"] info["reverse_dns"] = reverse_dns if reverse_dns is not None: @@ -1111,14 +1111,14 @@ def get_ip_address_info( url=reverse_dns_map_url, offline=offline, ) - if info["asn_domain"] and info["asn_domain"] in map_value: - service = map_value[info["asn_domain"]] + if info["as_domain"] and info["as_domain"] in map_value: + service = map_value[info["as_domain"]] info["name"] = service["name"] info["type"] = service["type"] - elif info["asn_name"]: + elif info["as_name"]: # ASN-domain not in the map: surface the raw AS name with no # classification. Better than leaving the row unattributed. - info["name"] = info["asn_name"] + info["name"] = info["as_name"] if cache is not None: cache[ip_address] = info diff --git a/tests.py b/tests.py index 561d0e1..4dc6835 100755 --- a/tests.py +++ b/tests.py @@ -233,8 +233,8 @@ class Test(unittest.TestCase): info = parsedmarc.utils.get_ip_address_info("8.8.8.8", offline=True) self.assertEqual(info["asn"], 15169) self.assertIsInstance(info["asn"], int) - self.assertEqual(info["asn_domain"], "google.com") - self.assertTrue(info["asn_name"]) + self.assertEqual(info["as_domain"], "google.com") + self.assertTrue(info["as_name"]) def testIpAddressInfoFallsBackToASNMapEntryWhenNoPTR(self): """When reverse DNS is absent, the ASN domain should be used as a @@ -251,7 +251,7 @@ class Test(unittest.TestCase): is used as source_name with type left null — better than leaving the row unattributed.""" # 204.79.197.100 is in an ASN whose as_domain is not in the map at - # the time of this test (msn.com); this exercises the asn_name + # the time of this test (msn.com); this exercises the as_name # fallback branch without depending on a specific map state. from unittest.mock import patch @@ -260,8 +260,8 @@ class Test(unittest.TestCase): return_value={ "country": "US", "asn": 64496, - "asn_name": "Some Unmapped Org, Inc.", - "asn_domain": "unmapped-for-this-test.example", + "as_name": "Some Unmapped Org, Inc.", + "as_domain": "unmapped-for-this-test.example", }, ): # Bypass cache to avoid prior-test pollution. @@ -272,7 +272,7 @@ class Test(unittest.TestCase): self.assertIsNone(info["base_domain"]) self.assertIsNone(info["type"]) self.assertEqual(info["name"], "Some Unmapped Org, Inc.") - self.assertEqual(info["asn_domain"], "unmapped-for-this-test.example") + self.assertEqual(info["as_domain"], "unmapped-for-this-test.example") def testIPinfoAPIPrimarySourceAndInvalidKeyIsFatal(self): """With an API token configured, lookups hit the API first. A 401/403 @@ -313,7 +313,7 @@ class Test(unittest.TestCase): record = get_ip_address_db_record("8.8.8.8") self.assertEqual(record["country"], "US") self.assertEqual(record["asn"], 15169) - self.assertEqual(record["asn_domain"], "google.com") + self.assertEqual(record["as_domain"], "google.com") # Invalid key: 401 raises a fatal exception even on a random lookup. with patch( @@ -361,7 +361,7 @@ class Test(unittest.TestCase): ): with self.assertLogs("parsedmarc.log", level="INFO") as cm: record = get_ip_address_db_record("8.8.8.8") - self.assertEqual(record["asn_domain"], "google.com") + self.assertEqual(record["as_domain"], "google.com") self.assertTrue( any("recovered" in line.lower() for line in cm.output), f"expected a recovery info log, got: {cm.output}", @@ -406,8 +406,8 @@ class Test(unittest.TestCase): configure_ipinfo_api(None) def testAggregateCsvExposesASNColumns(self): - """The aggregate CSV output should include source_asn, source_asn_name, - and source_asn_domain columns.""" + """The aggregate CSV output should include source_asn, source_as_name, + and source_as_domain columns.""" result = parsedmarc.parse_report_file( "samples/aggregate/!example.com!1538204542!1538463818.xml", always_use_local_files=True, @@ -416,8 +416,8 @@ class Test(unittest.TestCase): csv_text = parsedmarc.parsed_aggregate_reports_to_csv(result["report"]) header = csv_text.splitlines()[0].split(",") self.assertIn("source_asn", header) - self.assertIn("source_asn_name", header) - self.assertIn("source_asn_domain", header) + self.assertIn("source_as_name", header) + self.assertIn("source_as_domain", header) def testOpenSearchSigV4RequiresRegion(self): with self.assertRaises(opensearch_module.OpenSearchError):