From b9df12700b691a357fd817786b658d01a3acb6cb Mon Sep 17 00:00:00 2001 From: Sean Whalen Date: Tue, 7 Jan 2025 11:56:51 -0500 Subject: [PATCH] Check for duplicate aggregate report IDs when processing a mailbox Fix #535 --- parsedmarc/__init__.py | 8 ++++++++ parsedmarc/cli.py | 3 ++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/parsedmarc/__init__.py b/parsedmarc/__init__.py index 38070c4..6da67a9 100644 --- a/parsedmarc/__init__.py +++ b/parsedmarc/__init__.py @@ -54,6 +54,7 @@ MAGIC_XML = b"\x3c\x3f\x78\x6d\x6c\x20" MAGIC_JSON = b"\7b" IP_ADDRESS_CACHE = ExpiringDict(max_len=10000, max_age_seconds=14400) +SEEN_AGGREGATE_REPORT_IDS = ExpiringDict(max_len=100000000, max_age_seconds=3600) REVERSE_DNS_MAP = dict() @@ -1470,6 +1471,13 @@ def get_dmarc_reports_from_mbox( strip_attachment_payloads=sa, ) if parsed_email["report_type"] == "aggregate": + report_id = parsed_email["report"]["report_metadata"]["report_id"] + if report_id not in SEEN_AGGREGATE_REPORT_IDS: + SEEN_AGGREGATE_REPORT_IDS[report_id] = report_id + aggregate_reports.append(parsed_email["report"]) + else: + logger.debug("Skipping duplicate aggregate report " + f"with ID: {report_id}") aggregate_reports.append(parsed_email["report"]) elif parsed_email["report_type"] == "forensic": forensic_reports.append(parsed_email["report"]) diff --git a/parsedmarc/cli.py b/parsedmarc/cli.py index f5454f4..0a55cc1 100644 --- a/parsedmarc/cli.py +++ b/parsedmarc/cli.py @@ -1426,7 +1426,8 @@ def _main(): SEEN_AGGREGATE_REPORT_IDS[report_id] = report_id aggregate_reports.append(result[0]["report"]) else: - logger.debug(f"Skipping duplicate report ID: {report_id}") + logger.debug("Skipping duplicate aggregate report " + f"with ID: {report_id}") elif result[0]["report_type"] == "forensic": forensic_reports.append(result[0]["report"]) elif result[0]["report_type"] == "smtp_tls":