diff --git a/CHANGELOG.md b/CHANGELOG.md index 0b06303..2d1ecae 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,11 @@ Changelog ========= +8.16.1 +------ + +- Ignore aggregate DMARC reports seen within a period of one hour (#535) + 8.16.0 ------ diff --git a/parsedmarc/__init__.py b/parsedmarc/__init__.py index 8f5923a..38070c4 100644 --- a/parsedmarc/__init__.py +++ b/parsedmarc/__init__.py @@ -39,7 +39,7 @@ from parsedmarc.utils import is_outlook_msg, convert_outlook_msg from parsedmarc.utils import parse_email from parsedmarc.utils import timestamp_to_human, human_timestamp_to_datetime -__version__ = "8.16.0" +__version__ = "8.16.1" logger.debug("parsedmarc v{0}".format(__version__)) diff --git a/parsedmarc/cli.py b/parsedmarc/cli.py index 167eba6..c5a7f43 100644 --- a/parsedmarc/cli.py +++ b/parsedmarc/cli.py @@ -15,6 +15,7 @@ from ssl import CERT_NONE, create_default_context from multiprocessing import Pipe, Process import sys from tqdm import tqdm +from expiringdict import ExpiringDict from parsedmarc import ( get_dmarc_reports_from_mailbox, @@ -55,6 +56,8 @@ handler = logging.StreamHandler() handler.setFormatter(formatter) logger.addHandler(handler) +SEEN_AGGREGATE_REPORT_IDS = ExpiringDict(max_age_seconds=3600) + def _str_to_list(s): """Converts a comma separated string to a list""" @@ -1418,7 +1421,12 @@ def _main(): logger.error("Failed to parse {0} - {1}".format(result[1], result[0])) else: if result[0]["report_type"] == "aggregate": - aggregate_reports.append(result[0]["report"]) + report_id = result[0]["report"]["report_id"] + if report_id not in SEEN_AGGREGATE_REPORT_IDS: + SEEN_AGGREGATE_REPORT_IDS[report_id] = report_id + aggregate_reports.append(result[0]["report"]) + else: + logger.debug(f"Skipping duplicate report ID: {report_id}") elif result[0]["report_type"] == "forensic": forensic_reports.append(result[0]["report"]) elif result[0]["report_type"] == "smtp_tls":