From e3bbb4e008345e8dd2522318e8b0b120fb0583e8 Mon Sep 17 00:00:00 2001 From: Sean Whalen Date: Tue, 19 Mar 2019 11:36:06 -0400 Subject: [PATCH] 6.2.2 Fix crash when trying to save forensic reports with missing fields to Elasticsearch --- CHANGELOG.md | 5 +++ parsedmarc/__init__.py | 10 +++++- parsedmarc/cli.py | 2 ++ parsedmarc/elastic.py | 77 ++++++++++++++++++++++-------------------- parsedmarc/utils.py | 4 +-- setup.py | 2 +- 6 files changed, 60 insertions(+), 40 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c6d7b59..f5131d7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,8 @@ +6.2.2 +----- + +- Fix crash when trying to save forensic reports with missing fields to Elasticsearch + 6.2.1 ----- diff --git a/parsedmarc/__init__.py b/parsedmarc/__init__.py index 3a726aa..9df048b 100644 --- a/parsedmarc/__init__.py +++ b/parsedmarc/__init__.py @@ -38,7 +38,7 @@ from parsedmarc.utils import is_outlook_msg, convert_outlook_msg from parsedmarc.utils import timestamp_to_human, human_timestamp_to_datetime from parsedmarc.utils import parse_email -__version__ = "6.2.1" +__version__ = "6.2.2" logging.basicConfig( format='%(levelname)8s:%(filename)s:%(lineno)d:' @@ -217,6 +217,7 @@ def parse_aggregate_report_xml(xml, nameservers=None, timeout=2.0, nameservers (list): A list of one or more nameservers to use (Cloudflare's public DNS resolvers by default) timeout (float): Sets the DNS timeout in seconds + parallel (bool): Parallel processing Returns: OrderedDict: The parsed aggregate DMARC report @@ -392,6 +393,7 @@ def parse_aggregate_report_file(_input, nameservers=None, dns_timeout=2.0, nameservers (list): A list of one or more nameservers to use (Cloudflare's public DNS resolvers by default) dns_timeout (float): Sets the DNS timeout in seconds + parallel (bool): Parallel processing Returns: OrderedDict: The parsed DMARC aggregate report @@ -530,6 +532,7 @@ def parse_forensic_report(feedback_report, sample, msg_date, dns_timeout (float): Sets the DNS timeout in seconds strip_attachment_payloads (bool): Remove attachment payloads from forensic report results + parallel (bool): Parallel processing Returns: OrderedDict: A parsed report and sample @@ -553,6 +556,9 @@ def parse_forensic_report(feedback_report, sample, msg_date, if "user_agent" not in parsed_report: parsed_report["user_agent"] = None + if "delivery_result" not in parsed_report: + parsed_report["delivery_result"] = None + arrival_utc = human_timestamp_to_datetime( parsed_report["arrival_date"], to_utc=True) arrival_utc = arrival_utc.strftime("%Y-%m-%d %H:%M:%S") @@ -672,6 +678,7 @@ def parse_report_email(input_, nameservers=None, dns_timeout=2.0, dns_timeout (float): Sets the DNS timeout in seconds strip_attachment_payloads (bool): Remove attachment payloads from forensic report results + parallel (bool): Parallel processing Returns: OrderedDict: @@ -793,6 +800,7 @@ def parse_report_file(input_, nameservers=None, dns_timeout=2.0, dns_timeout (float): Sets the DNS timeout in seconds strip_attachment_payloads (bool): Remove attachment payloads from forensic report results + parallel (bool): Parallel processing Returns: OrderedDict: The parsed DMARC report diff --git a/parsedmarc/cli.py b/parsedmarc/cli.py index b6b3044..93f6720 100644 --- a/parsedmarc/cli.py +++ b/parsedmarc/cli.py @@ -110,6 +110,8 @@ def _main(): except elastic.ElasticsearchError as error_: logger.error("Elasticsearch Error: {0}".format( error_.__str__())) + except InvalidDMARCReport as error_: + logger.error(error_.__str__()) try: if opts.kafka_hosts: kafka_client.save_forensic_reports_to_kafka( diff --git a/parsedmarc/elastic.py b/parsedmarc/elastic.py index 593313e..c7cd7eb 100644 --- a/parsedmarc/elastic.py +++ b/parsedmarc/elastic.py @@ -10,6 +10,7 @@ from elasticsearch.helpers import reindex from parsedmarc.utils import human_timestamp_to_datetime +from parsedmarc import InvalidForensicReport logger = logging.getLogger("parsedmarc") @@ -470,40 +471,44 @@ def save_forensic_report_to_elasticsearch(forensic_report, sample.add_attachment(filename=attachment["filename"], content_type=attachment["mail_content_type"], sha256=attachment["sha256"]) - - forensic_doc = _ForensicReportDoc( - feedback_type=forensic_report["feedback_type"], - user_agent=forensic_report["user_agent"], - version=forensic_report["version"], - original_mail_from=forensic_report["original_mail_from"], - arrival_date=arrival_date, - domain=forensic_report["reported_domain"], - original_envelope_id=forensic_report["original_envelope_id"], - authentication_results=forensic_report["authentication_results"], - delivery_results=forensic_report["delivery_result"], - source_ip_address=forensic_report["source"]["ip_address"], - source_country=forensic_report["source"]["country"], - source_reverse_dns=forensic_report["source"]["reverse_dns"], - source_base_domain=forensic_report["source"]["base_domain"], - authentication_mechanisms=forensic_report["authentication_mechanisms"], - auth_failure=forensic_report["auth_failure"], - dkim_domain=forensic_report["dkim_domain"], - original_rcpt_to=forensic_report["original_rcpt_to"], - sample=sample - ) - - index = "dmarc_forensic" - if index_suffix: - index = "{0}_{1}".format(index, index_suffix) - if monthly_indexes: - index_date = arrival_date.strftime("%Y-%m") - else: - index_date = arrival_date.strftime("%Y-%m-%d") - index = "{0}-{1}".format(index, index_date) - create_indexes([index]) - forensic_doc.meta.index = index try: - forensic_doc.save() - except Exception as e: - raise ElasticsearchError( - "Elasticsearch error: {0}".format(e.__str__())) + forensic_doc = _ForensicReportDoc( + feedback_type=forensic_report["feedback_type"], + user_agent=forensic_report["user_agent"], + version=forensic_report["version"], + original_mail_from=forensic_report["original_mail_from"], + arrival_date=arrival_date, + domain=forensic_report["reported_domain"], + original_envelope_id=forensic_report["original_envelope_id"], + authentication_results=forensic_report["authentication_results"], + delivery_results=forensic_report["delivery_result"], + source_ip_address=forensic_report["source"]["ip_address"], + source_country=forensic_report["source"]["country"], + source_reverse_dns=forensic_report["source"]["reverse_dns"], + source_base_domain=forensic_report["source"]["base_domain"], + authentication_mechanisms=forensic_report[ + "authentication_mechanisms"], + auth_failure=forensic_report["auth_failure"], + dkim_domain=forensic_report["dkim_domain"], + original_rcpt_to=forensic_report["original_rcpt_to"], + sample=sample + ) + + index = "dmarc_forensic" + if index_suffix: + index = "{0}_{1}".format(index, index_suffix) + if monthly_indexes: + index_date = arrival_date.strftime("%Y-%m") + else: + index_date = arrival_date.strftime("%Y-%m-%d") + index = "{0}-{1}".format(index, index_date) + create_indexes([index]) + forensic_doc.meta.index = index + try: + forensic_doc.save() + except Exception as e: + raise ElasticsearchError( + "Elasticsearch error: {0}".format(e.__str__())) + except KeyError as e: + raise InvalidForensicReport( + "Forensic report missing required field: {0}".format(e.__str__())) diff --git a/parsedmarc/utils.py b/parsedmarc/utils.py index 26b016d..c4b1a46 100644 --- a/parsedmarc/utils.py +++ b/parsedmarc/utils.py @@ -256,8 +256,8 @@ def get_ip_address_country(ip_address, parallel=False): country associated with the given IPv4 or IPv6 address Args: - ip_address (str): The IP address to query for, - parallel (bool): parallel processing + ip_address (str): The IP address to query for + parallel (bool): Parallel processing Returns: str: And ISO country code associated with the given IP address diff --git a/setup.py b/setup.py index 078ccf4..04ef204 100644 --- a/setup.py +++ b/setup.py @@ -14,7 +14,7 @@ from setuptools import setup from codecs import open from os import path -__version__ = "6.2.1" +__version__ = "6.2.2" description = "A Python package and CLI for parsing aggregate and " \ "forensic DMARC reports"