From b46fec8983eb7080d93c698c85bf3bfb7294b2e5 Mon Sep 17 00:00:00 2001 From: Sean Whalen Date: Sun, 16 Sep 2018 22:51:49 -0400 Subject: [PATCH 01/30] 4.0.0 prelease --- CHANGELOG.md | 8 ++++ parsedmarc/__init__.py | 14 +++++- parsedmarc/cli.py | 58 +++++++++++++++++++------ parsedmarc/splunk.py | 98 ++++++++++++++++++++++++++++++++++++++++++ setup.py | 2 +- 5 files changed, 165 insertions(+), 15 deletions(-) create mode 100644 parsedmarc/splunk.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 98af1d5..c7dca80 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,11 @@ +4.0.0 +----- + +- Add support for sending DMARC reports to a Splunk HTTP Events +Collector (HEC) +- Use a browser-like `User-Agent` when downloading the Public Suffix List, to +avoid being blocked by security proxies + 3.9.7 ----- diff --git a/parsedmarc/__init__.py b/parsedmarc/__init__.py index 73c7fde..534a20e 100644 --- a/parsedmarc/__init__.py +++ b/parsedmarc/__init__.py @@ -4,6 +4,7 @@ import logging import os +import platform import xml.parsers.expat as expat import json from datetime import datetime @@ -30,6 +31,7 @@ import smtplib import ssl import time +import requests import publicsuffix import xmltodict import dns.reversename @@ -43,7 +45,7 @@ import imapclient.exceptions import dateparser import mailparser -__version__ = "3.9.7" +__version__ = "4.0.0" logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) @@ -100,7 +102,15 @@ def _get_base_domain(domain): psl_path = ".public_suffix_list.dat" def download_psl(): - fresh_psl = publicsuffix.fetch().read() + url = "https://publicsuffix.org/list/public_suffix_list.dat" + # Use a browser-like user agent string to bypass some proxy blocks + user_agent = "Mozilla/5.0 ((0 {1})) parsedmarc/{2}".format( + platform.system(), + platform.release(), + __version__ + ) + headers = {"User-Agent": user_agent} + fresh_psl = requests.get(url, headers=headers).content with open(psl_path, "w", encoding="utf-8") as fresh_psl_file: fresh_psl_file.write(fresh_psl) diff --git a/parsedmarc/cli.py b/parsedmarc/cli.py index 8039544..f99ad7f 100644 --- a/parsedmarc/cli.py +++ b/parsedmarc/cli.py @@ -13,9 +13,8 @@ import json from elasticsearch.exceptions import ElasticsearchException from parsedmarc import logger, IMAPError, get_dmarc_reports_from_inbox, \ - parse_report_file, elastic, save_output, watch_inbox, email_results, \ - SMTPError, ParserError, __version__ - + parse_report_file, elastic, splunk, save_output, watch_inbox, \ + email_results, SMTPError, ParserError, __version__ def _main(): """Called when the module is executed""" @@ -28,22 +27,38 @@ def _main(): if args.save_aggregate: for report in reports_["aggregate_reports"]: try: - elastic.save_aggregate_report_to_elasticsearch(report) + if args.elasticsearch_host: + elastic.save_aggregate_report_to_elasticsearch(report) except elastic.AlreadySaved as warning: logger.warning(warning.__str__()) except ElasticsearchException as error_: logger.error("Elasticsearch Error: {0}".format( error_.__str__())) exit(1) + if args.hec: + try: + aggregate_reports_ = reports_["aggregate_reports"] + hec_client.save_aggregate_reports_to_splunk( + aggregate_reports_) + except splunk.SplunkError as e: + logger.error("Splunk HEC error: {0{".format(e.__str__())) if args.save_forensic: for report in reports_["forensic_reports"]: try: - elastic.save_forensic_report_to_elasticsearch(report) + if args.elasticsearch_host: + elastic.save_forensic_report_to_elasticsearch(report) except elastic.AlreadySaved as warning: logger.warning(warning.__str__()) except ElasticsearchException as error_: logger.error("Elasticsearch Error: {0}".format( error_.__str__())) + if args.hec: + try: + forensic_reports_ = reports_["forensic_reports"] + hec_client.save_forensic_reports_to_splunk( + forensic_reports_) + except splunk.SplunkError as e: + logger.error("Splunk HEC error: {0{".format(e.__str__())) arg_parser = ArgumentParser(description="Parses DMARC reports") arg_parser.add_argument("file_path", nargs="*", @@ -76,15 +91,23 @@ def _main(): arg_parser.add_argument("-E", "--elasticsearch-host", nargs="*", help="A list of one or more Elasticsearch " - "hostnames or URLs to use (Default " - "localhost:9200)", - default=["localhost:9200"]) + "hostnames or URLs to use (e.g. " + "localhost:9200)") + arg_parser.add_argument("--hec", help="URL to a Splunk HTTP Event " + "Collector (HEC)") + arg_parser.add_argument("--hec-key", help="The authorization key for a " + "Splunk HTTP event collector " + "(HEC)") + arg_parser.add_argument("--hec-index", help="The index to use when " + "sending events to the " + "Splunk HTTP Events " + "(Default: dmarc)") arg_parser.add_argument("--save-aggregate", action="store_true", default=False, - help="Save aggregate reports to Elasticsearch") + help="Save aggregate reports to search indexes") arg_parser.add_argument("--save-forensic", action="store_true", default=False, - help="Save forensic reports to Elasticsearch") + help="Save forensic reports to search indexes") arg_parser.add_argument("-O", "--outgoing-host", help="Email the results using this host") arg_parser.add_argument("-U", "--outgoing-user", @@ -129,13 +152,24 @@ def _main(): exit(1) if args.save_aggregate or args.save_forensic: + if args.elasticsearch_host is None and args.hec is None: + args.elasticsearch_host = ["localhost:9200"] try: - elastic.set_hosts(args.elasticsearch_host) - elastic.create_indexes() + if args.elasticsearch_host: + elastic.set_hosts(args.elasticsearch_host) + elastic.create_indexes() + if args.hec: + hec_client = splunk.HECClient(args.hec, args.hec_token, + index=args.hec_index) except ElasticsearchException as error: logger.error("Elasticsearch Error: {0}".format(error.__str__())) exit(1) + if args.hec: + if args.hec_key is None: + logger.error("HEC URL provided without HEC key") + exit(1) + file_paths = [] for file_path in args.file_path: file_paths += glob(file_path) diff --git a/parsedmarc/splunk.py b/parsedmarc/splunk.py new file mode 100644 index 0000000..cd6306c --- /dev/null +++ b/parsedmarc/splunk.py @@ -0,0 +1,98 @@ +from urllib.parse import urlparse +import socket +import json + +import requests + +from parsedmarc import __version__ + + +class SplunkError(RuntimeError): + """Raised when a Splunk API error occurs""" + + +class HECClient(object): + """A client for a Splunk HTTP Events Collector (HEC)""" + + # http://docs.splunk.com/Documentation/Splunk/latest/Data/AboutHEC + # http://docs.splunk.com/Documentation/Splunk/latest/RESTREF/RESTinput#services.2Fcollector + + def __init__(self, url, access_token, index="dmarc", + source="parsedmarc", verify=True): + """ + Initializes the HECClient + Args: + url (str): The URL of the HEC + access_token (str): The HEC access token + index (str): The name of the index + source (str): The source name + verify (bool): Verify SSL certificates + """ + url = urlparse(url) + self.url = "{0}://{1}/services/collector/event/1.0".format(url.scheme, + url.netloc) + self.access_token = access_token.lstrip("Splunk ") + self.index = index + self.host = socket.getfqdn() + self.source = source + self.session = requests.Session() + self.session.verify = verify + self._common_data = dict(host=self.host, source=self.source, + index=self.index) + + self.session.headers = { + "User-Agent": "parsedmarc/{0}".format(__version__), + "Authorization": "Splunk {0}".format(self.access_token) + } + + def save_aggregate_reports_to_splunk(self, aggregate_reports): + """ + Saves aggregate DMARC reports to Splunk + + Args: + aggregate_reports (list): A list of aggregate report dictionaries + to save in Splunk + + """ + if type(aggregate_reports) == dict: + aggregate_reports = [aggregate_reports] + + json_str = "" + for report in aggregate_reports: + data = self._common_data.copy() + data["sourcetype"] = "dmarc_aggregate" + data["event"] = report.copy() + json_str += "{0}\n".format(json.dumps(data)) + + try: + response = self.session.post(self.url, json=json_str).json() + except Exception as e: + raise SplunkError(e.__str__()) + if response["code"] != 0: + raise SplunkError(response["text"]) + + def save_forensic_reports_to_splunk(self, aggregate_reports): + """ + Saves forensic DMARC reports to Splunk + + Args: + aggregate_reports (list): A list of forensic report dictionaries + to save in Splunk + + """ + if type(aggregate_reports) == dict: + aggregate_reports = [aggregate_reports] + + json_str = "" + for report in aggregate_reports: + data = self._common_data.copy() + data["sourcetype"] = "dmarc_forensic" + data["event"] = report.copy() + json_str += "{0}\n".format(json.dumps(data)) + + try: + response = self.session.post(self.url, json=json_str).json() + except Exception as e: + raise SplunkError(e.__str__()) + if response["code"] != 0: + raise SplunkError(response["text"]) diff --git a/setup.py b/setup.py index bb67150..4de2dd0 100644 --- a/setup.py +++ b/setup.py @@ -14,7 +14,7 @@ from setuptools import setup from codecs import open from os import path -__version__ = "3.9.7" +__version__ = "4.0.0" description = "A Python package and CLI for parsing aggregate and " \ "forensic DMARC reports" From 5160d687f3399bc4e010c98d435c4cc8df22b931 Mon Sep 17 00:00:00 2001 From: Sean Whalen Date: Sun, 16 Sep 2018 22:56:51 -0400 Subject: [PATCH 02/30] Update CLI docs --- README.rst | 119 ++++++++++++++++++++++++++----------------------- docs/index.rst | 35 +++++++++------ 2 files changed, 84 insertions(+), 70 deletions(-) diff --git a/README.rst b/README.rst index af7cbdf..1f7e920 100644 --- a/README.rst +++ b/README.rst @@ -38,16 +38,17 @@ CLI help :: - usage: parsedmarc [-h] [-o OUTPUT] [-n NAMESERVERS [NAMESERVERS ...]] - [-t TIMEOUT] [-H HOST] [-u USER] [-p PASSWORD] - [-r REPORTS_FOLDER] [-a ARCHIVE_FOLDER] [-d] - [-E [ELASTICSEARCH_HOST [ELASTICSEARCH_HOST ...]]] - [--save-aggregate] [--save-forensic] [-O OUTGOING_HOST] - [-U OUTGOING_USER] [-P OUTGOING_PASSWORD] [-F OUTGOING_FROM] - [-T OUTGOING_TO [OUTGOING_TO ...]] [-S OUTGOING_SUBJECT] - [-A OUTGOING_ATTACHMENT] [-M OUTGOING_MESSAGE] [-w] [--test] - [-s] [--debug] [-v] - [file_path [file_path ...]] + usage: cli.py [-h] [-o OUTPUT] [-n NAMESERVERS [NAMESERVERS ...]] [-t TIMEOUT] + [-H HOST] [-u USER] [-p PASSWORD] [-r REPORTS_FOLDER] + [-a ARCHIVE_FOLDER] [-d] + [-E [ELASTICSEARCH_HOST [ELASTICSEARCH_HOST ...]]] [--hec HEC] + [--hec-key HEC_KEY] [--hec-index HEC_INDEX] [--save-aggregate] + [--save-forensic] [-O OUTGOING_HOST] [-U OUTGOING_USER] + [-P OUTGOING_PASSWORD] [-F OUTGOING_FROM] + [-T OUTGOING_TO [OUTGOING_TO ...]] [-S OUTGOING_SUBJECT] + [-A OUTGOING_ATTACHMENT] [-M OUTGOING_MESSAGE] [-w] [--test] + [-s] [--debug] [-v] + [file_path [file_path ...]] Parses DMARC reports @@ -55,52 +56,58 @@ CLI help file_path one or more paths to aggregate or forensic report files or emails - optional arguments: - -h, --help show this help message and exit - -o OUTPUT, --output OUTPUT - Write output files to the given directory - -n NAMESERVERS [NAMESERVERS ...], --nameservers NAMESERVERS [NAMESERVERS ...] - nameservers to query (Default 8.8.8.8 4.4.4.4) - -t TIMEOUT, --timeout TIMEOUT - number of seconds to wait for an answer from DNS - (default 6.0) - -H HOST, --host HOST IMAP hostname or IP address - -u USER, --user USER IMAP user - -p PASSWORD, --password PASSWORD - IMAP password - -r REPORTS_FOLDER, --reports-folder REPORTS_FOLDER - The IMAP folder containing the reports Default: INBOX - -a ARCHIVE_FOLDER, --archive-folder ARCHIVE_FOLDER - Specifies the IMAP folder to move messages to after - processing them Default: Archive - -d, --delete Delete the reports after processing them - -E [ELASTICSEARCH_HOST [ELASTICSEARCH_HOST ...]], --elasticsearch-host [ELASTICSEARCH_HOST [ELASTICSEARCH_HOST ...]] - A list of one or more Elasticsearch hostnames or URLs - to use (Default localhost:9200) - --save-aggregate Save aggregate reports to Elasticsearch - --save-forensic Save forensic reports to Elasticsearch - -O OUTGOING_HOST, --outgoing-host OUTGOING_HOST - Email the results using this host - -U OUTGOING_USER, --outgoing-user OUTGOING_USER - Email the results using this user - -P OUTGOING_PASSWORD, --outgoing-password OUTGOING_PASSWORD - Email the results using this password - -F OUTGOING_FROM, --outgoing-from OUTGOING_FROM - Email the results using this from address - -T OUTGOING_TO [OUTGOING_TO ...], --outgoing-to OUTGOING_TO [OUTGOING_TO ...] - Email the results to these addresses - -S OUTGOING_SUBJECT, --outgoing-subject OUTGOING_SUBJECT - Email the results using this subject - -A OUTGOING_ATTACHMENT, --outgoing-attachment OUTGOING_ATTACHMENT - Email the results using this filename - -M OUTGOING_MESSAGE, --outgoing-message OUTGOING_MESSAGE - Email the results using this message - -w, --watch Use an IMAP IDLE connection to process reports as they - arrive in the inbox - --test Do not move or delete IMAP messages - -s, --silent Only print errors - --debug Print debugging information - -v, --version show program's version number and exit + optional arguments: + -h, --help show this help message and exit + -o OUTPUT, --output OUTPUT + Write output files to the given directory + -n NAMESERVERS [NAMESERVERS ...], --nameservers NAMESERVERS [NAMESERVERS ...] + nameservers to query (Default is Cloudflare's) + -t TIMEOUT, --timeout TIMEOUT + number of seconds to wait for an answer from DNS + (default 6.0) + -H HOST, --host HOST IMAP hostname or IP address + -u USER, --user USER IMAP user + -p PASSWORD, --password PASSWORD + IMAP password + -r REPORTS_FOLDER, --reports-folder REPORTS_FOLDER + The IMAP folder containing the reports Default: INBOX + -a ARCHIVE_FOLDER, --archive-folder ARCHIVE_FOLDER + Specifies the IMAP folder to move messages to after + processing them Default: Archive + -d, --delete Delete the reports after processing them + -E [ELASTICSEARCH_HOST [ELASTICSEARCH_HOST ...]], --elasticsearch-host [ELASTICSEARCH_HOST [ELASTICSEARCH_HOST ...]] + A list of one or more Elasticsearch hostnames or URLs + to use (e.g. localhost:9200) + --hec HEC URL to a Splunk HTTP Event Collector (HEC) + --hec-key HEC_KEY The authorization key for a Splunk HTTP event + collector (HEC) + --hec-index HEC_INDEX + The index to use when sending events to the Splunk + HTTP Events (Default: dmarc) + --save-aggregate Save aggregate reports to search indexes + --save-forensic Save forensic reports to search indexes + -O OUTGOING_HOST, --outgoing-host OUTGOING_HOST + Email the results using this host + -U OUTGOING_USER, --outgoing-user OUTGOING_USER + Email the results using this user + -P OUTGOING_PASSWORD, --outgoing-password OUTGOING_PASSWORD + Email the results using this password + -F OUTGOING_FROM, --outgoing-from OUTGOING_FROM + Email the results using this from address + -T OUTGOING_TO [OUTGOING_TO ...], --outgoing-to OUTGOING_TO [OUTGOING_TO ...] + Email the results to these addresses + -S OUTGOING_SUBJECT, --outgoing-subject OUTGOING_SUBJECT + Email the results using this subject + -A OUTGOING_ATTACHMENT, --outgoing-attachment OUTGOING_ATTACHMENT + Email the results using this filename + -M OUTGOING_MESSAGE, --outgoing-message OUTGOING_MESSAGE + Email the results using this message + -w, --watch Use an IMAP IDLE connection to process reports as they + arrive in the inbox + --test Do not move or delete IMAP messages + -s, --silent Only print errors + --debug Print debugging information + -v, --version show program's version number and exit SPF and DMARC record validation =============================== diff --git a/docs/index.rst b/docs/index.rst index f8afea7..a1484e3 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -45,16 +45,17 @@ CLI help :: - usage: parsedmarc [-h] [-o OUTPUT] [-n NAMESERVERS [NAMESERVERS ...]] - [-t TIMEOUT] [-H HOST] [-u USER] [-p PASSWORD] - [-r REPORTS_FOLDER] [-a ARCHIVE_FOLDER] [-d] - [-E [ELASTICSEARCH_HOST [ELASTICSEARCH_HOST ...]]] - [--save-aggregate] [--save-forensic] [-O OUTGOING_HOST] - [-U OUTGOING_USER] [-P OUTGOING_PASSWORD] [-F OUTGOING_FROM] - [-T OUTGOING_TO [OUTGOING_TO ...]] [-S OUTGOING_SUBJECT] - [-A OUTGOING_ATTACHMENT] [-M OUTGOING_MESSAGE] [-w] [--test] - [-s] [--debug] [-v] - [file_path [file_path ...]] + usage: cli.py [-h] [-o OUTPUT] [-n NAMESERVERS [NAMESERVERS ...]] [-t TIMEOUT] + [-H HOST] [-u USER] [-p PASSWORD] [-r REPORTS_FOLDER] + [-a ARCHIVE_FOLDER] [-d] + [-E [ELASTICSEARCH_HOST [ELASTICSEARCH_HOST ...]]] [--hec HEC] + [--hec-key HEC_KEY] [--hec-index HEC_INDEX] [--save-aggregate] + [--save-forensic] [-O OUTGOING_HOST] [-U OUTGOING_USER] + [-P OUTGOING_PASSWORD] [-F OUTGOING_FROM] + [-T OUTGOING_TO [OUTGOING_TO ...]] [-S OUTGOING_SUBJECT] + [-A OUTGOING_ATTACHMENT] [-M OUTGOING_MESSAGE] [-w] [--test] + [-s] [--debug] [-v] + [file_path [file_path ...]] Parses DMARC reports @@ -67,7 +68,7 @@ CLI help -o OUTPUT, --output OUTPUT Write output files to the given directory -n NAMESERVERS [NAMESERVERS ...], --nameservers NAMESERVERS [NAMESERVERS ...] - nameservers to query ((Default is Cloudflare's) + nameservers to query (Default is Cloudflare's) -t TIMEOUT, --timeout TIMEOUT number of seconds to wait for an answer from DNS (default 6.0) @@ -83,9 +84,15 @@ CLI help -d, --delete Delete the reports after processing them -E [ELASTICSEARCH_HOST [ELASTICSEARCH_HOST ...]], --elasticsearch-host [ELASTICSEARCH_HOST [ELASTICSEARCH_HOST ...]] A list of one or more Elasticsearch hostnames or URLs - to use (Default localhost:9200) - --save-aggregate Save aggregate reports to Elasticsearch - --save-forensic Save forensic reports to Elasticsearch + to use (e.g. localhost:9200) + --hec HEC URL to a Splunk HTTP Event Collector (HEC) + --hec-key HEC_KEY The authorization key for a Splunk HTTP event + collector (HEC) + --hec-index HEC_INDEX + The index to use when sending events to the Splunk + HTTP Events (Default: dmarc) + --save-aggregate Save aggregate reports to search indexes + --save-forensic Save forensic reports to search indexes -O OUTGOING_HOST, --outgoing-host OUTGOING_HOST Email the results using this host -U OUTGOING_USER, --outgoing-user OUTGOING_USER From 2796fdd6915afe529ad9b1cbe710a90d58a9c833 Mon Sep 17 00:00:00 2001 From: Sean Whalen Date: Sun, 16 Sep 2018 23:02:18 -0400 Subject: [PATCH 03/30] PEP 8 fix --- parsedmarc/cli.py | 1 + 1 file changed, 1 insertion(+) diff --git a/parsedmarc/cli.py b/parsedmarc/cli.py index f99ad7f..ec4701a 100644 --- a/parsedmarc/cli.py +++ b/parsedmarc/cli.py @@ -16,6 +16,7 @@ from parsedmarc import logger, IMAPError, get_dmarc_reports_from_inbox, \ parse_report_file, elastic, splunk, save_output, watch_inbox, \ email_results, SMTPError, ParserError, __version__ + def _main(): """Called when the module is executed""" def process_reports(reports_): From 9cf113abdcd65440352016c5bf6472b7d5d6eb45 Mon Sep 17 00:00:00 2001 From: Sean Whalen Date: Sun, 16 Sep 2018 23:05:52 -0400 Subject: [PATCH 04/30] Fix PSL download --- parsedmarc/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parsedmarc/__init__.py b/parsedmarc/__init__.py index 534a20e..c345e00 100644 --- a/parsedmarc/__init__.py +++ b/parsedmarc/__init__.py @@ -110,7 +110,7 @@ def _get_base_domain(domain): __version__ ) headers = {"User-Agent": user_agent} - fresh_psl = requests.get(url, headers=headers).content + fresh_psl = requests.get(url, headers=headers).text with open(psl_path, "w", encoding="utf-8") as fresh_psl_file: fresh_psl_file.write(fresh_psl) From d18d9cf5d0e8bc80b6f644552a949a5d81d788fa Mon Sep 17 00:00:00 2001 From: Sean Whalen Date: Mon, 17 Sep 2018 08:04:51 -0400 Subject: [PATCH 05/30] Fix changelog typo --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c7dca80..60a5d7d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -32,7 +32,7 @@ downloads 3.9.3 ----- -- Fix crash when forensic recorts are missing `Arrival-Date` +- Fix crash when forensic reports are missing `Arrival-Date` 3.9.2 ----- From 3fdd5457b1f084b794b5b9150783fd0645e72fdc Mon Sep 17 00:00:00 2001 From: Sean Whalen Date: Mon, 17 Sep 2018 11:45:08 -0400 Subject: [PATCH 06/30] Reduce default DNS timeout to 0.5 seconds --- CHANGELOG.md | 1 + README.rst | 2 +- docs/index.rst | 2 +- parsedmarc/__init__.py | 18 +++++++++--------- parsedmarc/cli.py | 2 +- 5 files changed, 13 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 60a5d7d..cbd2dfc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ Collector (HEC) - Use a browser-like `User-Agent` when downloading the Public Suffix List, to avoid being blocked by security proxies +- Reduce default DNS timeout to 0.5 seconds 3.9.7 ----- diff --git a/README.rst b/README.rst index 1f7e920..19ee735 100644 --- a/README.rst +++ b/README.rst @@ -64,7 +64,7 @@ CLI help nameservers to query (Default is Cloudflare's) -t TIMEOUT, --timeout TIMEOUT number of seconds to wait for an answer from DNS - (default 6.0) + (default 0.5) -H HOST, --host HOST IMAP hostname or IP address -u USER, --user USER IMAP user -p PASSWORD, --password PASSWORD diff --git a/docs/index.rst b/docs/index.rst index a1484e3..7a69416 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -71,7 +71,7 @@ CLI help nameservers to query (Default is Cloudflare's) -t TIMEOUT, --timeout TIMEOUT number of seconds to wait for an answer from DNS - (default 6.0) + (default 0.5) -H HOST, --host HOST IMAP hostname or IP address -u USER, --user USER IMAP user -p PASSWORD, --password PASSWORD diff --git a/parsedmarc/__init__.py b/parsedmarc/__init__.py index c345e00..88f991e 100644 --- a/parsedmarc/__init__.py +++ b/parsedmarc/__init__.py @@ -131,7 +131,7 @@ def _get_base_domain(domain): return psl.get_public_suffix(domain) -def _query_dns(domain, record_type, nameservers=None, timeout=6.0): +def _query_dns(domain, record_type, nameservers=None, timeout=0.5): """ Queries DNS @@ -159,7 +159,7 @@ def _query_dns(domain, record_type, nameservers=None, timeout=6.0): resolver.query(domain, record_type, tcp=True))) -def _get_reverse_dns(ip_address, nameservers=None, timeout=6.0): +def _get_reverse_dns(ip_address, nameservers=None, timeout=0.5): """ Resolves an IP address to a hostname using a reverse DNS query @@ -284,7 +284,7 @@ def _get_ip_address_country(ip_address): return country -def _get_ip_address_info(ip_address, nameservers=None, timeout=6.0): +def _get_ip_address_info(ip_address, nameservers=None, timeout=0.5): """ Returns reverse DNS and country information for the given IP address @@ -315,7 +315,7 @@ def _get_ip_address_info(ip_address, nameservers=None, timeout=6.0): return info -def _parse_report_record(record, nameservers=None, timeout=6.0): +def _parse_report_record(record, nameservers=None, timeout=0.5): """ Converts a record from a DMARC aggregate report into a more consistent format @@ -418,7 +418,7 @@ def _parse_report_record(record, nameservers=None, timeout=6.0): return new_record -def parse_aggregate_report_xml(xml, nameservers=None, timeout=6.0): +def parse_aggregate_report_xml(xml, nameservers=None, timeout=0.5): """Parses a DMARC XML report string and returns a consistent OrderedDict Args: @@ -568,7 +568,7 @@ def extract_xml(input_): return xml -def parse_aggregate_report_file(_input, nameservers=None, timeout=6.0): +def parse_aggregate_report_file(_input, nameservers=None, timeout=0.5): """Parses a file at the given path, a file-like object. or bytes as a aggregate DMARC report @@ -697,7 +697,7 @@ def parsed_aggregate_reports_to_csv(reports): def parse_forensic_report(feedback_report, sample, sample_headers_only, - nameservers=None, timeout=6.0): + nameservers=None, timeout=0.5): """ Converts a DMARC forensic report and sample to a ``OrderedDict`` @@ -904,7 +904,7 @@ def parsed_forensic_reports_to_csv(reports): return csv_file.getvalue() -def parse_report_email(input_, nameservers=None, timeout=6.0): +def parse_report_email(input_, nameservers=None, timeout=0.5): """ Parses a DMARC report from an email @@ -1058,7 +1058,7 @@ def parse_report_email(input_, nameservers=None, timeout=6.0): return result -def parse_report_file(input_, nameservers=None, timeout=6.0): +def parse_report_file(input_, nameservers=None, timeout=0.5): """Parses a DMARC aggregate or forensic file at the given path, a file-like object. or bytes diff --git a/parsedmarc/cli.py b/parsedmarc/cli.py index ec4701a..2db1746 100644 --- a/parsedmarc/cli.py +++ b/parsedmarc/cli.py @@ -72,7 +72,7 @@ def _main(): "(Default is Cloudflare's)") arg_parser.add_argument("-t", "--timeout", help="number of seconds to wait for an answer " - "from DNS (default 6.0)", + "from DNS (default 0.5)", type=float, default=6.0) arg_parser.add_argument("-H", "--host", help="IMAP hostname or IP address") From aead7ee7546d6954ba0ced33554348aeb6adfe18 Mon Sep 17 00:00:00 2001 From: Sean Whalen Date: Mon, 17 Sep 2018 12:35:27 -0400 Subject: [PATCH 07/30] Add alignment booleans to JSON output --- CHANGELOG.md | 1 + parsedmarc/__init__.py | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index cbd2dfc..30c60c5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ Collector (HEC) - Use a browser-like `User-Agent` when downloading the Public Suffix List, to avoid being blocked by security proxies - Reduce default DNS timeout to 0.5 seconds +- Add alignment booleans to JSON output 3.9.7 ----- diff --git a/parsedmarc/__init__.py b/parsedmarc/__init__.py index 88f991e..7f3fc0e 100644 --- a/parsedmarc/__init__.py +++ b/parsedmarc/__init__.py @@ -350,6 +350,12 @@ def _parse_report_record(record, nameservers=None, timeout=0.5): if "spf" in policy_evaluated: new_policy_evaluated["spf"] = policy_evaluated["spf"] reasons = [] + spf_aligned = policy_evaluated["spf"] == "pass" + dkim_aligned = policy_evaluated["dkim"] == "pass" + dmarc_aligned = spf_aligned or dkim_aligned + new_record["alignment"]["spf"] = spf_aligned + new_record["alignment"]["dkim"] = dkim_aligned + new_record["alignment"]["dmarc"] = dmarc_aligned if "reason" in policy_evaluated: if type(policy_evaluated["reason"]) == list: reasons = policy_evaluated["reason"] From 68c54d4c5cf4be3ba945bb971c31939c60a6390a Mon Sep 17 00:00:00 2001 From: Sean Whalen Date: Mon, 17 Sep 2018 12:40:48 -0400 Subject: [PATCH 08/30] Add missing sub dictionary --- parsedmarc/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/parsedmarc/__init__.py b/parsedmarc/__init__.py index 7f3fc0e..98165e5 100644 --- a/parsedmarc/__init__.py +++ b/parsedmarc/__init__.py @@ -353,6 +353,7 @@ def _parse_report_record(record, nameservers=None, timeout=0.5): spf_aligned = policy_evaluated["spf"] == "pass" dkim_aligned = policy_evaluated["dkim"] == "pass" dmarc_aligned = spf_aligned or dkim_aligned + new_record["alignment"] = dict() new_record["alignment"]["spf"] = spf_aligned new_record["alignment"]["dkim"] = dkim_aligned new_record["alignment"]["dmarc"] = dmarc_aligned From db4e145b7a5f0c5055571dfba57d8172f360bc89 Mon Sep 17 00:00:00 2001 From: Sean Whalen Date: Tue, 18 Sep 2018 21:40:34 -0400 Subject: [PATCH 09/30] Add User-Agent to GioIP DB download --- CHANGELOG.md | 6 +++--- README.rst | 2 +- docs/index.rst | 2 +- parsedmarc/__init__.py | 38 +++++++++++++++++++++----------------- parsedmarc/cli.py | 2 +- 5 files changed, 27 insertions(+), 23 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 30c60c5..465ba47 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,9 +3,9 @@ - Add support for sending DMARC reports to a Splunk HTTP Events Collector (HEC) -- Use a browser-like `User-Agent` when downloading the Public Suffix List, to -avoid being blocked by security proxies -- Reduce default DNS timeout to 0.5 seconds +- Use a browser-like `User-Agent` when downloading the Public Suffix List and +GeoIP DB to avoid being blocked by security proxies +- Reduce default DNS timeout to 2.0 seconds - Add alignment booleans to JSON output 3.9.7 diff --git a/README.rst b/README.rst index 19ee735..dcb69d9 100644 --- a/README.rst +++ b/README.rst @@ -64,7 +64,7 @@ CLI help nameservers to query (Default is Cloudflare's) -t TIMEOUT, --timeout TIMEOUT number of seconds to wait for an answer from DNS - (default 0.5) + (default 2.0) -H HOST, --host HOST IMAP hostname or IP address -u USER, --user USER IMAP user -p PASSWORD, --password PASSWORD diff --git a/docs/index.rst b/docs/index.rst index 7a69416..73ceca0 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -71,7 +71,7 @@ CLI help nameservers to query (Default is Cloudflare's) -t TIMEOUT, --timeout TIMEOUT number of seconds to wait for an answer from DNS - (default 0.5) + (default 2.0) -H HOST, --host HOST IMAP hostname or IP address -u USER, --user USER IMAP user -p PASSWORD, --password PASSWORD diff --git a/parsedmarc/__init__.py b/parsedmarc/__init__.py index 98165e5..6235ede 100644 --- a/parsedmarc/__init__.py +++ b/parsedmarc/__init__.py @@ -37,7 +37,6 @@ import xmltodict import dns.reversename import dns.resolver import dns.exception -from requests import get import geoip2.database import geoip2.errors import imapclient @@ -57,6 +56,13 @@ MAGIC_GZIP = b"\x1F\x8B" MAGIC_XML = b"\x3c\x3f\x78\x6d\x6c\x20" +USER_AGENT = "Mozilla/5.0 ((0 {1})) parsedmarc/{2}".format( + platform.system(), + platform.release(), + __version__ + ) + + class ParserError(RuntimeError): """Raised whenever the parser fails for some reason""" @@ -104,12 +110,7 @@ def _get_base_domain(domain): def download_psl(): url = "https://publicsuffix.org/list/public_suffix_list.dat" # Use a browser-like user agent string to bypass some proxy blocks - user_agent = "Mozilla/5.0 ((0 {1})) parsedmarc/{2}".format( - platform.system(), - platform.release(), - __version__ - ) - headers = {"User-Agent": user_agent} + headers = {"User-Agent": USER_AGENT} fresh_psl = requests.get(url, headers=headers).text with open(psl_path, "w", encoding="utf-8") as fresh_psl_file: fresh_psl_file.write(fresh_psl) @@ -131,7 +132,7 @@ def _get_base_domain(domain): return psl.get_public_suffix(domain) -def _query_dns(domain, record_type, nameservers=None, timeout=0.5): +def _query_dns(domain, record_type, nameservers=None, timeout=2.0): """ Queries DNS @@ -159,7 +160,7 @@ def _query_dns(domain, record_type, nameservers=None, timeout=0.5): resolver.query(domain, record_type, tcp=True))) -def _get_reverse_dns(ip_address, nameservers=None, timeout=0.5): +def _get_reverse_dns(ip_address, nameservers=None, timeout=2.0): """ Resolves an IP address to a hostname using a reverse DNS query @@ -245,8 +246,11 @@ def _get_ip_address_country(ip_address): """ url = "https://geolite.maxmind.com/download/geoip/database/" \ "GeoLite2-Country.tar.gz" + # Use a browser-like user agent string to bypass some proxy blocks + headers = {"User-Agent": USER_AGENT} original_filename = "GeoLite2-Country.mmdb" - tar_file = tarfile.open(fileobj=BytesIO(get(url).content), mode="r:gz") + tar_bytes = requests.get(url, headers=headers).content + tar_file = tarfile.open(fileobj=BytesIO(tar_bytes), mode="r:gz") tar_dir = tar_file.getnames()[0] tar_path = "{0}/{1}".format(tar_dir, original_filename) tar_file.extract(tar_path) @@ -284,7 +288,7 @@ def _get_ip_address_country(ip_address): return country -def _get_ip_address_info(ip_address, nameservers=None, timeout=0.5): +def _get_ip_address_info(ip_address, nameservers=None, timeout=2.0): """ Returns reverse DNS and country information for the given IP address @@ -315,7 +319,7 @@ def _get_ip_address_info(ip_address, nameservers=None, timeout=0.5): return info -def _parse_report_record(record, nameservers=None, timeout=0.5): +def _parse_report_record(record, nameservers=None, timeout=2.0): """ Converts a record from a DMARC aggregate report into a more consistent format @@ -425,7 +429,7 @@ def _parse_report_record(record, nameservers=None, timeout=0.5): return new_record -def parse_aggregate_report_xml(xml, nameservers=None, timeout=0.5): +def parse_aggregate_report_xml(xml, nameservers=None, timeout=2.0): """Parses a DMARC XML report string and returns a consistent OrderedDict Args: @@ -575,7 +579,7 @@ def extract_xml(input_): return xml -def parse_aggregate_report_file(_input, nameservers=None, timeout=0.5): +def parse_aggregate_report_file(_input, nameservers=None, timeout=2.0): """Parses a file at the given path, a file-like object. or bytes as a aggregate DMARC report @@ -704,7 +708,7 @@ def parsed_aggregate_reports_to_csv(reports): def parse_forensic_report(feedback_report, sample, sample_headers_only, - nameservers=None, timeout=0.5): + nameservers=None, timeout=2.0): """ Converts a DMARC forensic report and sample to a ``OrderedDict`` @@ -911,7 +915,7 @@ def parsed_forensic_reports_to_csv(reports): return csv_file.getvalue() -def parse_report_email(input_, nameservers=None, timeout=0.5): +def parse_report_email(input_, nameservers=None, timeout=2.0): """ Parses a DMARC report from an email @@ -1065,7 +1069,7 @@ def parse_report_email(input_, nameservers=None, timeout=0.5): return result -def parse_report_file(input_, nameservers=None, timeout=0.5): +def parse_report_file(input_, nameservers=None, timeout=2.0): """Parses a DMARC aggregate or forensic file at the given path, a file-like object. or bytes diff --git a/parsedmarc/cli.py b/parsedmarc/cli.py index 2db1746..3ac88ea 100644 --- a/parsedmarc/cli.py +++ b/parsedmarc/cli.py @@ -72,7 +72,7 @@ def _main(): "(Default is Cloudflare's)") arg_parser.add_argument("-t", "--timeout", help="number of seconds to wait for an answer " - "from DNS (default 0.5)", + "from DNS (default 2.0)", type=float, default=6.0) arg_parser.add_argument("-H", "--host", help="IMAP hostname or IP address") From 6b01fc0f3fe069a881859de0cbe3a605410cc369 Mon Sep 17 00:00:00 2001 From: Sean Whalen Date: Tue, 18 Sep 2018 21:54:26 -0400 Subject: [PATCH 10/30] Fix `.msg` parsing CLI exception when `msgconvert` is not found in the system path --- CHANGELOG.md | 2 ++ parsedmarc/__init__.py | 5 +---- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 465ba47..05a5c94 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ Collector (HEC) GeoIP DB to avoid being blocked by security proxies - Reduce default DNS timeout to 2.0 seconds - Add alignment booleans to JSON output +- Fix `.msg` parsing CLI exception when `msgconvert` is not found in the +system path 3.9.7 ----- diff --git a/parsedmarc/__init__.py b/parsedmarc/__init__.py index 6235ede..7438401 100644 --- a/parsedmarc/__init__.py +++ b/parsedmarc/__init__.py @@ -958,10 +958,7 @@ def parse_report_email(input_, nameservers=None, timeout=2.0): with open(eml_path, "rb") as eml_file: rfc822 = eml_file.read() except FileNotFoundError: - raise FileNotFoundError( - "msgconvert not found. Please ensure it is installed\n" - "sudo apt install libemail-outlook-message-perl\n" - "https://github.com/mvz/email-outlook-message-perl") + raise ParserError("msgconvert utility not found") finally: os.chdir(orig_dir) shutil.rmtree(tmp_dir) From c051980f26f85b4ac9b6814c4015a0c71238a498 Mon Sep 17 00:00:00 2001 From: Sean Whalen Date: Wed, 19 Sep 2018 07:35:06 -0400 Subject: [PATCH 11/30] Update output example in documentation --- README.rst | 5 +++++ docs/index.rst | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/README.rst b/README.rst index dcb69d9..b3db504 100644 --- a/README.rst +++ b/README.rst @@ -161,6 +161,11 @@ JSON "base_domain": "bellsouth.net" }, "count": 2, + "alignment": { + "spf": true, + "dkim": false, + "dmarc": true + }, "policy_evaluated": { "disposition": "none", "dkim": "fail", diff --git a/docs/index.rst b/docs/index.rst index 73ceca0..0b01ed0 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -169,6 +169,11 @@ JSON "base_domain": "bellsouth.net" }, "count": 2, + "alignment": { + "spf": true, + "dkim": false, + "dmarc": true + }, "policy_evaluated": { "disposition": "none", "dkim": "fail", From 0989a8bb8a99fc20445b28370d1b0a64cbaf0c85 Mon Sep 17 00:00:00 2001 From: Sean Whalen Date: Wed, 19 Sep 2018 08:01:30 -0400 Subject: [PATCH 12/30] Fix `SMTP AUTH extension not supported by server` error on some SMTP servers Issue #12 --- CHANGELOG.md | 2 ++ parsedmarc/__init__.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 05a5c94..1572cf9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ GeoIP DB to avoid being blocked by security proxies - Add alignment booleans to JSON output - Fix `.msg` parsing CLI exception when `msgconvert` is not found in the system path +- Fix `SMTP AUTH extension not supported by server` error on some SMTP servers +(#12) 3.9.7 ----- diff --git a/parsedmarc/__init__.py b/parsedmarc/__init__.py index 7438401..013c409 100644 --- a/parsedmarc/__init__.py +++ b/parsedmarc/__init__.py @@ -1438,9 +1438,11 @@ def email_results(results, host, mail_from, mail_to, port=0, starttls=True, ssl_context = ssl.create_default_context() if use_ssl: server = smtplib.SMTP_SSL(host, port=port, context=ssl_context) + server.connect(host, port) server.helo() else: server = smtplib.SMTP(host, port=port) + server.connect(host, port) server.ehlo() if starttls: server.starttls(context=ssl_context) From 0a694b0a24fd2f311e3924edbb83153ba5815f3f Mon Sep 17 00:00:00 2001 From: Sean Whalen Date: Wed, 19 Sep 2018 11:30:34 -0400 Subject: [PATCH 13/30] - Always use `\n` as the newline when generating CSVs --- CHANGELOG.md | 1 + parsedmarc/__init__.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1572cf9..e094c27 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ GeoIP DB to avoid being blocked by security proxies system path - Fix `SMTP AUTH extension not supported by server` error on some SMTP servers (#12) +- Always use `\n` as the newline when generating CSVs 3.9.7 ----- diff --git a/parsedmarc/__init__.py b/parsedmarc/__init__.py index 013c409..e643f85 100644 --- a/parsedmarc/__init__.py +++ b/parsedmarc/__init__.py @@ -624,7 +624,7 @@ def parsed_aggregate_reports_to_csv(reports): "envelope_to", "dkim_domains", "dkim_selectors", "dkim_results", "spf_domains", "spf_scopes", "spf_results"] - csv_file_object = StringIO() + csv_file_object = StringIO(newline="\n") writer = DictWriter(csv_file_object, fields) writer.writeheader() From 5657a27262471068abcd89a94b900872d26b64ca Mon Sep 17 00:00:00 2001 From: Sean Whalen Date: Fri, 21 Sep 2018 08:42:44 -0400 Subject: [PATCH 14/30] Use port 587 by default when sending email Hopefully fixes issue #12 --- parsedmarc/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parsedmarc/__init__.py b/parsedmarc/__init__.py index e643f85..66ab752 100644 --- a/parsedmarc/__init__.py +++ b/parsedmarc/__init__.py @@ -1387,7 +1387,7 @@ def get_report_zip(results): return storage.getvalue() -def email_results(results, host, mail_from, mail_to, port=0, starttls=True, +def email_results(results, host, mail_from, mail_to, port=587, starttls=True, use_ssl=False, user=None, password=None, subject=None, attachment_filename=None, message=None, ssl_context=None): """ From b11c6d587c8952401f0ba33248ae76436579f6ce Mon Sep 17 00:00:00 2001 From: Sean Whalen Date: Fri, 21 Sep 2018 23:45:24 -0400 Subject: [PATCH 15/30] Fix IMAP reconnection --- parsedmarc/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/parsedmarc/__init__.py b/parsedmarc/__init__.py index 66ab752..704ba5b 100644 --- a/parsedmarc/__init__.py +++ b/parsedmarc/__init__.py @@ -1529,6 +1529,7 @@ def watch_inbox(host, username, password, callback, reports_folder="INBOX", logger.debug("IMAP error: Broken pipe") logger.debug("Reconnecting watcher") server = imapclient.IMAPClient(host) + server.login(username, password) server.select_folder(rf) idle_start_time = time.monotonic() ms = "MOVE" in get_imap_capabilities(server) From 2a4f558bbc338e5595418e904fdab7f3d4a3d378 Mon Sep 17 00:00:00 2001 From: Sean Whalen Date: Mon, 24 Sep 2018 05:17:29 -0400 Subject: [PATCH 16/30] Always send creds when reconnecting to IMAP --- parsedmarc/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/parsedmarc/__init__.py b/parsedmarc/__init__.py index 704ba5b..62ab5d4 100644 --- a/parsedmarc/__init__.py +++ b/parsedmarc/__init__.py @@ -1590,6 +1590,7 @@ def watch_inbox(host, username, password, callback, reports_folder="INBOX", logger.debug("IMAP error: Broken pipe") logger.debug("Reconnecting watcher") server = imapclient.IMAPClient(host) + server.login(username, password) server.select_folder(rf) idle_start_time = time.monotonic() res = get_dmarc_reports_from_inbox(connection=server, From a1a4cbbf28b9583a68a371fad4b3324c1551d2d0 Mon Sep 17 00:00:00 2001 From: Sean Whalen Date: Tue, 25 Sep 2018 10:01:02 -0400 Subject: [PATCH 17/30] Use correct splunk sourcetype format --- parsedmarc/__init__.py | 12 ++++++++++++ parsedmarc/splunk.py | 6 +++--- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/parsedmarc/__init__.py b/parsedmarc/__init__.py index 62ab5d4..4523dcb 100644 --- a/parsedmarc/__init__.py +++ b/parsedmarc/__init__.py @@ -225,6 +225,18 @@ def human_timestamp_to_datetime(human_timestamp): return datetime.strptime(human_timestamp, "%Y-%m-%d %H:%M:%S") +def human_timestamp_to_timestamp(human_timestamp): + """ + Converts a human-readable timestamp into a into a UNIX timestamp + + Args: + human_timestamp (str): A timestamp in `YYYY-MM-DD HH:MM:SS`` format + + Returns: + float: The converted timestamp + """ + return human_timestamp_to_datetime(human_timestamp).timestamp() + def _get_ip_address_country(ip_address): """ Uses the MaxMind Geolite2 Country database to return the ISO code for the diff --git a/parsedmarc/splunk.py b/parsedmarc/splunk.py index cd6306c..13ab216 100644 --- a/parsedmarc/splunk.py +++ b/parsedmarc/splunk.py @@ -17,7 +17,7 @@ class HECClient(object): # http://docs.splunk.com/Documentation/Splunk/latest/Data/AboutHEC # http://docs.splunk.com/Documentation/Splunk/latest/RESTREF/RESTinput#services.2Fcollector - def __init__(self, url, access_token, index="dmarc", + def __init__(self, url, access_token,index="dmarc", source="parsedmarc", verify=True): """ Initializes the HECClient @@ -60,7 +60,7 @@ class HECClient(object): json_str = "" for report in aggregate_reports: data = self._common_data.copy() - data["sourcetype"] = "dmarc_aggregate" + data["sourcetype"] = "dmarc:aggregate" data["event"] = report.copy() json_str += "{0}\n".format(json.dumps(data)) @@ -86,7 +86,7 @@ class HECClient(object): json_str = "" for report in aggregate_reports: data = self._common_data.copy() - data["sourcetype"] = "dmarc_forensic" + data["sourcetype"] = "dmarc:forensic" data["event"] = report.copy() json_str += "{0}\n".format(json.dumps(data)) From 861ee7d247c78d9b2f16178e1adfaaa37a75a2ce Mon Sep 17 00:00:00 2001 From: Sean Whalen Date: Tue, 25 Sep 2018 13:06:27 -0400 Subject: [PATCH 18/30] Update Splunk support --- README.rst | 2 +- docs/index.rst | 2 +- parsedmarc/cli.py | 7 +++++-- parsedmarc/splunk.py | 38 ++++++++++++++++++++++++++++++++++---- 4 files changed, 41 insertions(+), 8 deletions(-) diff --git a/README.rst b/README.rst index b3db504..1910f7c 100644 --- a/README.rst +++ b/README.rst @@ -83,7 +83,7 @@ CLI help collector (HEC) --hec-index HEC_INDEX The index to use when sending events to the Splunk - HTTP Events (Default: dmarc) + HTTP Events --save-aggregate Save aggregate reports to search indexes --save-forensic Save forensic reports to search indexes -O OUTGOING_HOST, --outgoing-host OUTGOING_HOST diff --git a/docs/index.rst b/docs/index.rst index 0b01ed0..7ce562c 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -90,7 +90,7 @@ CLI help collector (HEC) --hec-index HEC_INDEX The index to use when sending events to the Splunk - HTTP Events (Default: dmarc) + HTTP Events --save-aggregate Save aggregate reports to search indexes --save-forensic Save forensic reports to search indexes -O OUTGOING_HOST, --outgoing-host OUTGOING_HOST diff --git a/parsedmarc/cli.py b/parsedmarc/cli.py index 3ac88ea..f754bec 100644 --- a/parsedmarc/cli.py +++ b/parsedmarc/cli.py @@ -101,8 +101,7 @@ def _main(): "(HEC)") arg_parser.add_argument("--hec-index", help="The index to use when " "sending events to the " - "Splunk HTTP Events " - "(Default: dmarc)") + "Splunk HTTP Events") arg_parser.add_argument("--save-aggregate", action="store_true", default=False, help="Save aggregate reports to search indexes") @@ -160,6 +159,10 @@ def _main(): elastic.set_hosts(args.elasticsearch_host) elastic.create_indexes() if args.hec: + if args.hec_token is None or args.hec_index is None: + logger.error("HEC token and HEC index are required when " + "using HEC URL") + exit(1) hec_client = splunk.HECClient(args.hec, args.hec_token, index=args.hec_index) except ElasticsearchException as error: diff --git a/parsedmarc/splunk.py b/parsedmarc/splunk.py index 13ab216..e5ed768 100644 --- a/parsedmarc/splunk.py +++ b/parsedmarc/splunk.py @@ -17,7 +17,7 @@ class HECClient(object): # http://docs.splunk.com/Documentation/Splunk/latest/Data/AboutHEC # http://docs.splunk.com/Documentation/Splunk/latest/RESTREF/RESTinput#services.2Fcollector - def __init__(self, url, access_token,index="dmarc", + def __init__(self, url, access_token, index, source="parsedmarc", verify=True): """ Initializes the HECClient @@ -50,18 +50,48 @@ class HECClient(object): Saves aggregate DMARC reports to Splunk Args: - aggregate_reports (list): A list of aggregate report dictionaries + aggregate_reports: A list of aggregate report dictionaries to save in Splunk """ if type(aggregate_reports) == dict: aggregate_reports = [aggregate_reports] + data = self._common_data.copy() json_str = "" for report in aggregate_reports: - data = self._common_data.copy() + for record in report["records"]: + new_report = dict() + for metadata in report["report_metadata"]: + new_report[metadata] = report["report_metadata"][metadata] + new_report["policy_published"] = report["policy_published"] + new_report["source_ip_address"] = record["source"][ + "ip_address"] + new_report["source_country"] = record["source"]["country"] + new_report["source_reverse_dns"] = record["source"][ + "reverse_dns"] + new_report["source_base_domain"] = record["source"][ + "base_domain"] + new_report["message_count"] = record["count"] + new_report["disposition"] = record["policy_evaluated"][ + "disposition" + ] + new_report["spf_aligned"] = record["alignment"]["spf"] + new_report["dkim_aligned"] = record["alignment"]["dkim"] + new_report["passed_dmarc"] = record["alignment"]["dmarc"] + new_report["header_from"] = record["identifiers"][ + "header_from"] + new_report["envelope_from"] = record["identifiers"][ + "envelope_from"] + if "dkim" in record["auth_results"]: + new_report["dkim_results"] = record["auth_results"][ + "dkim"] + if "spf" in record["spf_results"]: + new_report["spf_results"] = record["auth_results"][ + "spf"] + data["sourcetype"] = "dmarc:aggregate" - data["event"] = report.copy() + data["event"] = new_report.copy() json_str += "{0}\n".format(json.dumps(data)) try: From 253d421e295bbc43ba2e1819a1c5436d66d86728 Mon Sep 17 00:00:00 2001 From: Sean Whalen Date: Tue, 25 Sep 2018 13:40:55 -0400 Subject: [PATCH 19/30] Splunk and SMTP improvements SMTP issue #12 fixed (based on PR #13 ) --- CHANGELOG.md | 5 +- README.rst | 10 +++- docs/index.rst | 121 ++++++++++++++++++++++------------------- parsedmarc/__init__.py | 15 +++-- parsedmarc/cli.py | 9 ++- 5 files changed, 92 insertions(+), 68 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e094c27..58b21e7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,8 +9,9 @@ GeoIP DB to avoid being blocked by security proxies - Add alignment booleans to JSON output - Fix `.msg` parsing CLI exception when `msgconvert` is not found in the system path -- Fix `SMTP AUTH extension not supported by server` error on some SMTP servers -(#12) +- Add `--outgoing-port` and `--outgoing-ssl` options +- Fall back to plain text SMTP if `--outgoing-ssl` is not used and `STARTTLS` +is not supported by the server - Always use `\n` as the newline when generating CSVs 3.9.7 diff --git a/README.rst b/README.rst index 1910f7c..49411c5 100644 --- a/README.rst +++ b/README.rst @@ -38,13 +38,14 @@ CLI help :: - usage: cli.py [-h] [-o OUTPUT] [-n NAMESERVERS [NAMESERVERS ...]] [-t TIMEOUT] + usage: cli.py [-h] [-o OUTPUT] [-n NAMESERVERS [NAMESERVERS ...]] [-t TIMEOUT] [-H HOST] [-u USER] [-p PASSWORD] [-r REPORTS_FOLDER] [-a ARCHIVE_FOLDER] [-d] [-E [ELASTICSEARCH_HOST [ELASTICSEARCH_HOST ...]]] [--hec HEC] [--hec-key HEC_KEY] [--hec-index HEC_INDEX] [--save-aggregate] [--save-forensic] [-O OUTGOING_HOST] [-U OUTGOING_USER] - [-P OUTGOING_PASSWORD] [-F OUTGOING_FROM] + [-P OUTGOING_PASSWORD] [--outgoing-port OUTGOING_PORT] + [--outgoing-SSL OUTGOING_SSL] [-F OUTGOING_FROM] [-T OUTGOING_TO [OUTGOING_TO ...]] [-S OUTGOING_SUBJECT] [-A OUTGOING_ATTACHMENT] [-M OUTGOING_MESSAGE] [-w] [--test] [-s] [--debug] [-v] @@ -92,6 +93,11 @@ CLI help Email the results using this user -P OUTGOING_PASSWORD, --outgoing-password OUTGOING_PASSWORD Email the results using this password + --outgoing-port OUTGOING_PORT + Email the results using this port + --outgoing-SSL OUTGOING_SSL + Use SSL/TLS instead of STARTTLS (more secure, and + required by some providers, like Gmail) -F OUTGOING_FROM, --outgoing-from OUTGOING_FROM Email the results using this from address -T OUTGOING_TO [OUTGOING_TO ...], --outgoing-to OUTGOING_TO [OUTGOING_TO ...] diff --git a/docs/index.rst b/docs/index.rst index 7ce562c..177e03f 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -51,70 +51,77 @@ CLI help [-E [ELASTICSEARCH_HOST [ELASTICSEARCH_HOST ...]]] [--hec HEC] [--hec-key HEC_KEY] [--hec-index HEC_INDEX] [--save-aggregate] [--save-forensic] [-O OUTGOING_HOST] [-U OUTGOING_USER] - [-P OUTGOING_PASSWORD] [-F OUTGOING_FROM] + [-P OUTGOING_PASSWORD] [--outgoing-port OUTGOING_PORT] + [--outgoing-SSL OUTGOING_SSL] [-F OUTGOING_FROM] [-T OUTGOING_TO [OUTGOING_TO ...]] [-S OUTGOING_SUBJECT] [-A OUTGOING_ATTACHMENT] [-M OUTGOING_MESSAGE] [-w] [--test] [-s] [--debug] [-v] [file_path [file_path ...]] - Parses DMARC reports + Parses DMARC reports - positional arguments: - file_path one or more paths to aggregate or forensic report - files or emails + positional arguments: + file_path one or more paths to aggregate or forensic report + files or emails + + optional arguments: + -h, --help show this help message and exit + -o OUTPUT, --output OUTPUT + Write output files to the given directory + -n NAMESERVERS [NAMESERVERS ...], --nameservers NAMESERVERS [NAMESERVERS ...] + nameservers to query (Default is Cloudflare's) + -t TIMEOUT, --timeout TIMEOUT + number of seconds to wait for an answer from DNS + (default 2.0) + -H HOST, --host HOST IMAP hostname or IP address + -u USER, --user USER IMAP user + -p PASSWORD, --password PASSWORD + IMAP password + -r REPORTS_FOLDER, --reports-folder REPORTS_FOLDER + The IMAP folder containing the reports Default: INBOX + -a ARCHIVE_FOLDER, --archive-folder ARCHIVE_FOLDER + Specifies the IMAP folder to move messages to after + processing them Default: Archive + -d, --delete Delete the reports after processing them + -E [ELASTICSEARCH_HOST [ELASTICSEARCH_HOST ...]], --elasticsearch-host [ELASTICSEARCH_HOST [ELASTICSEARCH_HOST ...]] + A list of one or more Elasticsearch hostnames or URLs + to use (e.g. localhost:9200) + --hec HEC URL to a Splunk HTTP Event Collector (HEC) + --hec-key HEC_KEY The authorization key for a Splunk HTTP event + collector (HEC) + --hec-index HEC_INDEX + The index to use when sending events to the Splunk + HTTP Events + --save-aggregate Save aggregate reports to search indexes + --save-forensic Save forensic reports to search indexes + -O OUTGOING_HOST, --outgoing-host OUTGOING_HOST + Email the results using this host + -U OUTGOING_USER, --outgoing-user OUTGOING_USER + Email the results using this user + -P OUTGOING_PASSWORD, --outgoing-password OUTGOING_PASSWORD + Email the results using this password + --outgoing-port OUTGOING_PORT + Email the results using this port + --outgoing-SSL OUTGOING_SSL + Use SSL/TLS instead of STARTTLS (more secure, and + required by some providers, like Gmail) + -F OUTGOING_FROM, --outgoing-from OUTGOING_FROM + Email the results using this from address + -T OUTGOING_TO [OUTGOING_TO ...], --outgoing-to OUTGOING_TO [OUTGOING_TO ...] + Email the results to these addresses + -S OUTGOING_SUBJECT, --outgoing-subject OUTGOING_SUBJECT + Email the results using this subject + -A OUTGOING_ATTACHMENT, --outgoing-attachment OUTGOING_ATTACHMENT + Email the results using this filename + -M OUTGOING_MESSAGE, --outgoing-message OUTGOING_MESSAGE + Email the results using this message + -w, --watch Use an IMAP IDLE connection to process reports as they + arrive in the inbox + --test Do not move or delete IMAP messages + -s, --silent Only print errors + --debug Print debugging information + -v, --version show program's version number and exit - optional arguments: - -h, --help show this help message and exit - -o OUTPUT, --output OUTPUT - Write output files to the given directory - -n NAMESERVERS [NAMESERVERS ...], --nameservers NAMESERVERS [NAMESERVERS ...] - nameservers to query (Default is Cloudflare's) - -t TIMEOUT, --timeout TIMEOUT - number of seconds to wait for an answer from DNS - (default 2.0) - -H HOST, --host HOST IMAP hostname or IP address - -u USER, --user USER IMAP user - -p PASSWORD, --password PASSWORD - IMAP password - -r REPORTS_FOLDER, --reports-folder REPORTS_FOLDER - The IMAP folder containing the reports Default: INBOX - -a ARCHIVE_FOLDER, --archive-folder ARCHIVE_FOLDER - Specifies the IMAP folder to move messages to after - processing them Default: Archive - -d, --delete Delete the reports after processing them - -E [ELASTICSEARCH_HOST [ELASTICSEARCH_HOST ...]], --elasticsearch-host [ELASTICSEARCH_HOST [ELASTICSEARCH_HOST ...]] - A list of one or more Elasticsearch hostnames or URLs - to use (e.g. localhost:9200) - --hec HEC URL to a Splunk HTTP Event Collector (HEC) - --hec-key HEC_KEY The authorization key for a Splunk HTTP event - collector (HEC) - --hec-index HEC_INDEX - The index to use when sending events to the Splunk - HTTP Events - --save-aggregate Save aggregate reports to search indexes - --save-forensic Save forensic reports to search indexes - -O OUTGOING_HOST, --outgoing-host OUTGOING_HOST - Email the results using this host - -U OUTGOING_USER, --outgoing-user OUTGOING_USER - Email the results using this user - -P OUTGOING_PASSWORD, --outgoing-password OUTGOING_PASSWORD - Email the results using this password - -F OUTGOING_FROM, --outgoing-from OUTGOING_FROM - Email the results using this from address - -T OUTGOING_TO [OUTGOING_TO ...], --outgoing-to OUTGOING_TO [OUTGOING_TO ...] - Email the results to these addresses - -S OUTGOING_SUBJECT, --outgoing-subject OUTGOING_SUBJECT - Email the results using this subject - -A OUTGOING_ATTACHMENT, --outgoing-attachment OUTGOING_ATTACHMENT - Email the results using this filename - -M OUTGOING_MESSAGE, --outgoing-message OUTGOING_MESSAGE - Email the results using this message - -w, --watch Use an IMAP IDLE connection to process reports as they - arrive in the inbox - --test Do not move or delete IMAP messages - -s, --silent Only print errors - --debug Print debugging information - -v, --version show program's version number and exit SPF and DMARC record validation =============================== diff --git a/parsedmarc/__init__.py b/parsedmarc/__init__.py index 4523dcb..4bbcd29 100644 --- a/parsedmarc/__init__.py +++ b/parsedmarc/__init__.py @@ -237,6 +237,7 @@ def human_timestamp_to_timestamp(human_timestamp): """ return human_timestamp_to_datetime(human_timestamp).timestamp() + def _get_ip_address_country(ip_address): """ Uses the MaxMind Geolite2 Country database to return the ISO code for the @@ -1399,7 +1400,7 @@ def get_report_zip(results): return storage.getvalue() -def email_results(results, host, mail_from, mail_to, port=587, starttls=True, +def email_results(results, host, mail_from, mail_to, port=0, use_ssl=False, user=None, password=None, subject=None, attachment_filename=None, message=None, ssl_context=None): """ @@ -1411,7 +1412,6 @@ def email_results(results, host, mail_from, mail_to, port=587, starttls=True, mail_from: The value of the message from header mail_to : A list of addresses to mail to port (int): Port to use - starttls (bool): use STARTTLS use_ssl (bool): Require a SSL connection from the start user: An optional username password: An optional password @@ -1451,14 +1451,17 @@ def email_results(results, host, mail_from, mail_to, port=587, starttls=True, if use_ssl: server = smtplib.SMTP_SSL(host, port=port, context=ssl_context) server.connect(host, port) - server.helo() + server.ehlo_or_helo_if_needed() else: server = smtplib.SMTP(host, port=port) server.connect(host, port) - server.ehlo() - if starttls: + server.ehlo_or_helo_if_needed() + if server.has_extn("starttls"): server.starttls(context=ssl_context) - server.helo() + server.ehlo() + else: + logger.warning("SMTP server does not support STARTTLS. " + "Proceeding in plain text!") if user and password: server.login(user, password) server.sendmail(mail_from, mail_to, msg.as_string()) diff --git a/parsedmarc/cli.py b/parsedmarc/cli.py index f754bec..cb4cbf4 100644 --- a/parsedmarc/cli.py +++ b/parsedmarc/cli.py @@ -114,6 +114,12 @@ def _main(): help="Email the results using this user") arg_parser.add_argument("-P", "--outgoing-password", help="Email the results using this password") + arg_parser.add_argument("--outgoing-port", + help="Email the results using this port") + arg_parser.add_argument("--outgoing-SSL", + help="Use SSL/TLS instead of STARTTLS (more " + "secure, and required by some providers, " + "like Gmail)") arg_parser.add_argument("-F", "--outgoing-from", help="Email the results using this from address") arg_parser.add_argument("-T", "--outgoing-to", nargs="+", @@ -234,7 +240,8 @@ def _main(): try: email_results(results, args.outgoing_host, args.outgoing_from, - args.outgoing_to, user=args.outgoing_user, + args.outgoing_to, use_ssl=args.outgoing_ssl, + user=args.outgoing_user, password=args.outgoing_password, subject=args.outgoing_subject) except SMTPError as error: From ba3c9de9b7fd48da9d051798a7758e0798025c0e Mon Sep 17 00:00:00 2001 From: Sean Whalen Date: Tue, 25 Sep 2018 14:15:09 -0400 Subject: [PATCH 20/30] Fix HEC key check --- parsedmarc/cli.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/parsedmarc/cli.py b/parsedmarc/cli.py index cb4cbf4..ad87b25 100644 --- a/parsedmarc/cli.py +++ b/parsedmarc/cli.py @@ -165,8 +165,8 @@ def _main(): elastic.set_hosts(args.elasticsearch_host) elastic.create_indexes() if args.hec: - if args.hec_token is None or args.hec_index is None: - logger.error("HEC token and HEC index are required when " + if args.hec_key is None or args.hec_index is None: + logger.error("HEC key and HEC index are required when " "using HEC URL") exit(1) hec_client = splunk.HECClient(args.hec, args.hec_token, From c964241cbaa7c3a48b9595e5024a1dd54986c0db Mon Sep 17 00:00:00 2001 From: Sean Whalen Date: Tue, 25 Sep 2018 14:21:03 -0400 Subject: [PATCH 21/30] Splunk HEC token not HEC key --- README.rst | 16 +++++++++++++++- docs/index.rst | 20 +++++++++++--------- parsedmarc/cli.py | 10 +++++----- 3 files changed, 31 insertions(+), 15 deletions(-) diff --git a/README.rst b/README.rst index 49411c5..0bdc43d 100644 --- a/README.rst +++ b/README.rst @@ -51,6 +51,19 @@ CLI help [-s] [--debug] [-v] [file_path [file_path ...]] + usage: cli.py [-h] [-o OUTPUT] [-n NAMESERVERS [NAMESERVERS ...]] [-t TIMEOUT] + [-H HOST] [-u USER] [-p PASSWORD] [-r REPORTS_FOLDER] + [-a ARCHIVE_FOLDER] [-d] + [-E [ELASTICSEARCH_HOST [ELASTICSEARCH_HOST ...]]] [--hec HEC] + [--hec-token HEC_TOKEN] [--hec-index HEC_INDEX] + [--save-aggregate] [--save-forensic] [-O OUTGOING_HOST] + [-U OUTGOING_USER] [-P OUTGOING_PASSWORD] + [--outgoing-port OUTGOING_PORT] [--outgoing-SSL OUTGOING_SSL] + [-F OUTGOING_FROM] [-T OUTGOING_TO [OUTGOING_TO ...]] + [-S OUTGOING_SUBJECT] [-A OUTGOING_ATTACHMENT] + [-M OUTGOING_MESSAGE] [-w] [--test] [-s] [--debug] [-v] + [file_path [file_path ...]] + Parses DMARC reports positional arguments: @@ -80,7 +93,8 @@ CLI help A list of one or more Elasticsearch hostnames or URLs to use (e.g. localhost:9200) --hec HEC URL to a Splunk HTTP Event Collector (HEC) - --hec-key HEC_KEY The authorization key for a Splunk HTTP event + --hec-token HEC_TOKEN + The authorization token for a Splunk HTTP event collector (HEC) --hec-index HEC_INDEX The index to use when sending events to the Splunk diff --git a/docs/index.rst b/docs/index.rst index 177e03f..40fdb67 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -45,17 +45,17 @@ CLI help :: - usage: cli.py [-h] [-o OUTPUT] [-n NAMESERVERS [NAMESERVERS ...]] [-t TIMEOUT] + usage: cli.py [-h] [-o OUTPUT] [-n NAMESERVERS [NAMESERVERS ...]] [-t TIMEOUT] [-H HOST] [-u USER] [-p PASSWORD] [-r REPORTS_FOLDER] [-a ARCHIVE_FOLDER] [-d] [-E [ELASTICSEARCH_HOST [ELASTICSEARCH_HOST ...]]] [--hec HEC] - [--hec-key HEC_KEY] [--hec-index HEC_INDEX] [--save-aggregate] - [--save-forensic] [-O OUTGOING_HOST] [-U OUTGOING_USER] - [-P OUTGOING_PASSWORD] [--outgoing-port OUTGOING_PORT] - [--outgoing-SSL OUTGOING_SSL] [-F OUTGOING_FROM] - [-T OUTGOING_TO [OUTGOING_TO ...]] [-S OUTGOING_SUBJECT] - [-A OUTGOING_ATTACHMENT] [-M OUTGOING_MESSAGE] [-w] [--test] - [-s] [--debug] [-v] + [--hec-token HEC_TOKEN] [--hec-index HEC_INDEX] + [--save-aggregate] [--save-forensic] [-O OUTGOING_HOST] + [-U OUTGOING_USER] [-P OUTGOING_PASSWORD] + [--outgoing-port OUTGOING_PORT] [--outgoing-SSL OUTGOING_SSL] + [-F OUTGOING_FROM] [-T OUTGOING_TO [OUTGOING_TO ...]] + [-S OUTGOING_SUBJECT] [-A OUTGOING_ATTACHMENT] + [-M OUTGOING_MESSAGE] [-w] [--test] [-s] [--debug] [-v] [file_path [file_path ...]] Parses DMARC reports @@ -87,7 +87,8 @@ CLI help A list of one or more Elasticsearch hostnames or URLs to use (e.g. localhost:9200) --hec HEC URL to a Splunk HTTP Event Collector (HEC) - --hec-key HEC_KEY The authorization key for a Splunk HTTP event + --hec-token HEC_TOKEN + The authorization token for a Splunk HTTP event collector (HEC) --hec-index HEC_INDEX The index to use when sending events to the Splunk @@ -123,6 +124,7 @@ CLI help -v, --version show program's version number and exit + SPF and DMARC record validation =============================== diff --git a/parsedmarc/cli.py b/parsedmarc/cli.py index ad87b25..b37484b 100644 --- a/parsedmarc/cli.py +++ b/parsedmarc/cli.py @@ -96,9 +96,9 @@ def _main(): "localhost:9200)") arg_parser.add_argument("--hec", help="URL to a Splunk HTTP Event " "Collector (HEC)") - arg_parser.add_argument("--hec-key", help="The authorization key for a " - "Splunk HTTP event collector " - "(HEC)") + arg_parser.add_argument("--hec-token", help="The authorization token for " + "a Splunk " + "HTTP event collector (HEC)") arg_parser.add_argument("--hec-index", help="The index to use when " "sending events to the " "Splunk HTTP Events") @@ -165,8 +165,8 @@ def _main(): elastic.set_hosts(args.elasticsearch_host) elastic.create_indexes() if args.hec: - if args.hec_key is None or args.hec_index is None: - logger.error("HEC key and HEC index are required when " + if args.hec_token is None or args.hec_index is None: + logger.error("HEC token and HEC index are required when " "using HEC URL") exit(1) hec_client = splunk.HECClient(args.hec, args.hec_token, From eff7c552c9e79ac87bdafa7834f5f30246582d5f Mon Sep 17 00:00:00 2001 From: Sean Whalen Date: Tue, 25 Sep 2018 14:26:30 -0400 Subject: [PATCH 22/30] Fix CLI argument logic --- parsedmarc/cli.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/parsedmarc/cli.py b/parsedmarc/cli.py index b37484b..75ce8de 100644 --- a/parsedmarc/cli.py +++ b/parsedmarc/cli.py @@ -164,21 +164,17 @@ def _main(): if args.elasticsearch_host: elastic.set_hosts(args.elasticsearch_host) elastic.create_indexes() - if args.hec: - if args.hec_token is None or args.hec_index is None: - logger.error("HEC token and HEC index are required when " - "using HEC URL") - exit(1) - hec_client = splunk.HECClient(args.hec, args.hec_token, - index=args.hec_index) except ElasticsearchException as error: logger.error("Elasticsearch Error: {0}".format(error.__str__())) exit(1) if args.hec: - if args.hec_key is None: - logger.error("HEC URL provided without HEC key") + if args.hec_token is None or args.hec_index is None: + logger.error("HEC token and HEC index are required when " + "using HEC URL") exit(1) + hec_client = splunk.HECClient(args.hec, args.hec_token, + args.hec_index) file_paths = [] for file_path in args.file_path: From fa38bea8ea1be87ba85d8327ab7060cb8d06b081 Mon Sep 17 00:00:00 2001 From: Sean Whalen Date: Tue, 25 Sep 2018 14:44:23 -0400 Subject: [PATCH 23/30] Fix error output --- parsedmarc/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parsedmarc/cli.py b/parsedmarc/cli.py index 75ce8de..1d5391b 100644 --- a/parsedmarc/cli.py +++ b/parsedmarc/cli.py @@ -42,7 +42,7 @@ def _main(): hec_client.save_aggregate_reports_to_splunk( aggregate_reports_) except splunk.SplunkError as e: - logger.error("Splunk HEC error: {0{".format(e.__str__())) + logger.error("Splunk HEC error: {0}".format(e.__str__())) if args.save_forensic: for report in reports_["forensic_reports"]: try: From caf6cd1872449caa0f47b03462564255645a0e8b Mon Sep 17 00:00:00 2001 From: Sean Whalen Date: Tue, 25 Sep 2018 14:47:06 -0400 Subject: [PATCH 24/30] Fix error formatting --- parsedmarc/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parsedmarc/cli.py b/parsedmarc/cli.py index 1d5391b..a413f65 100644 --- a/parsedmarc/cli.py +++ b/parsedmarc/cli.py @@ -59,7 +59,7 @@ def _main(): hec_client.save_forensic_reports_to_splunk( forensic_reports_) except splunk.SplunkError as e: - logger.error("Splunk HEC error: {0{".format(e.__str__())) + logger.error("Splunk HEC error: {0}".format(e.__str__())) arg_parser = ArgumentParser(description="Parses DMARC reports") arg_parser.add_argument("file_path", nargs="*", From c9177f3342850158f41a03855eaa5b8722e018db Mon Sep 17 00:00:00 2001 From: Sean Whalen Date: Tue, 25 Sep 2018 15:50:53 -0400 Subject: [PATCH 25/30] Only save to Splunk when there are things to save --- parsedmarc/splunk.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/parsedmarc/splunk.py b/parsedmarc/splunk.py index e5ed768..19bd0d4 100644 --- a/parsedmarc/splunk.py +++ b/parsedmarc/splunk.py @@ -57,6 +57,9 @@ class HECClient(object): if type(aggregate_reports) == dict: aggregate_reports = [aggregate_reports] + if len(aggregate_reports) < 1: + return + data = self._common_data.copy() json_str = "" for report in aggregate_reports: @@ -101,20 +104,23 @@ class HECClient(object): if response["code"] != 0: raise SplunkError(response["text"]) - def save_forensic_reports_to_splunk(self, aggregate_reports): + def save_forensic_reports_to_splunk(self, forensic_reports): """ Saves forensic DMARC reports to Splunk Args: - aggregate_reports (list): A list of forensic report dictionaries + forensic_reports (list): A list of forensic report dictionaries to save in Splunk """ - if type(aggregate_reports) == dict: - aggregate_reports = [aggregate_reports] + if type(forensic_reports) == dict: + forensic_reports = [forensic_reports] + + if len(forensic_reports) < 1: + return json_str = "" - for report in aggregate_reports: + for report in forensic_reports: data = self._common_data.copy() data["sourcetype"] = "dmarc:forensic" data["event"] = report.copy() From cdb9546bc02940b501e6f9cdcb82a76eb6d23f70 Mon Sep 17 00:00:00 2001 From: Sean Whalen Date: Tue, 25 Sep 2018 16:04:05 -0400 Subject: [PATCH 26/30] Add --hec-skip-certificate-verification option --- README.rst | 16 ++++++++++++++++ docs/index.rst | 15 +++++++++------ parsedmarc/cli.py | 12 +++++++++++- 3 files changed, 36 insertions(+), 7 deletions(-) diff --git a/README.rst b/README.rst index 0bdc43d..8285db4 100644 --- a/README.rst +++ b/README.rst @@ -64,6 +64,20 @@ CLI help [-M OUTGOING_MESSAGE] [-w] [--test] [-s] [--debug] [-v] [file_path [file_path ...]] + usage: cli.py [-h] [-o OUTPUT] [-n NAMESERVERS [NAMESERVERS ...]] [-t TIMEOUT] + [-H HOST] [-u USER] [-p PASSWORD] [-r REPORTS_FOLDER] + [-a ARCHIVE_FOLDER] [-d] + [-E [ELASTICSEARCH_HOST [ELASTICSEARCH_HOST ...]]] [--hec HEC] + [--hec-token HEC_TOKEN] [--hec-index HEC_INDEX] + [--hec-skip-certificate-verification] [--save-aggregate] + [--save-forensic] [-O OUTGOING_HOST] [-U OUTGOING_USER] + [-P OUTGOING_PASSWORD] [--outgoing-port OUTGOING_PORT] + [--outgoing-SSL OUTGOING_SSL] [-F OUTGOING_FROM] + [-T OUTGOING_TO [OUTGOING_TO ...]] [-S OUTGOING_SUBJECT] + [-A OUTGOING_ATTACHMENT] [-M OUTGOING_MESSAGE] [-w] [--test] + [-s] [--debug] [-v] + [file_path [file_path ...]] + Parses DMARC reports positional arguments: @@ -99,6 +113,8 @@ CLI help --hec-index HEC_INDEX The index to use when sending events to the Splunk HTTP Events + --hec-skip-certificate-verification + Skip certificate verification for Splunk HEC --save-aggregate Save aggregate reports to search indexes --save-forensic Save forensic reports to search indexes -O OUTGOING_HOST, --outgoing-host OUTGOING_HOST diff --git a/docs/index.rst b/docs/index.rst index 40fdb67..891e347 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -50,12 +50,13 @@ CLI help [-a ARCHIVE_FOLDER] [-d] [-E [ELASTICSEARCH_HOST [ELASTICSEARCH_HOST ...]]] [--hec HEC] [--hec-token HEC_TOKEN] [--hec-index HEC_INDEX] - [--save-aggregate] [--save-forensic] [-O OUTGOING_HOST] - [-U OUTGOING_USER] [-P OUTGOING_PASSWORD] - [--outgoing-port OUTGOING_PORT] [--outgoing-SSL OUTGOING_SSL] - [-F OUTGOING_FROM] [-T OUTGOING_TO [OUTGOING_TO ...]] - [-S OUTGOING_SUBJECT] [-A OUTGOING_ATTACHMENT] - [-M OUTGOING_MESSAGE] [-w] [--test] [-s] [--debug] [-v] + [--hec-skip-certificate-verification] [--save-aggregate] + [--save-forensic] [-O OUTGOING_HOST] [-U OUTGOING_USER] + [-P OUTGOING_PASSWORD] [--outgoing-port OUTGOING_PORT] + [--outgoing-SSL OUTGOING_SSL] [-F OUTGOING_FROM] + [-T OUTGOING_TO [OUTGOING_TO ...]] [-S OUTGOING_SUBJECT] + [-A OUTGOING_ATTACHMENT] [-M OUTGOING_MESSAGE] [-w] [--test] + [-s] [--debug] [-v] [file_path [file_path ...]] Parses DMARC reports @@ -93,6 +94,8 @@ CLI help --hec-index HEC_INDEX The index to use when sending events to the Splunk HTTP Events + --hec-skip-certificate-verification + Skip certificate verification for Splunk HEC --save-aggregate Save aggregate reports to search indexes --save-forensic Save forensic reports to search indexes -O OUTGOING_HOST, --outgoing-host OUTGOING_HOST diff --git a/parsedmarc/cli.py b/parsedmarc/cli.py index a413f65..8e14132 100644 --- a/parsedmarc/cli.py +++ b/parsedmarc/cli.py @@ -102,6 +102,11 @@ def _main(): arg_parser.add_argument("--hec-index", help="The index to use when " "sending events to the " "Splunk HTTP Events") + arg_parser.add_argument("--hec-skip-certificate-verification", + action="store_true", + default=False, + help="Skip certificate verification for Splunk " + "HEC") arg_parser.add_argument("--save-aggregate", action="store_true", default=False, help="Save aggregate reports to search indexes") @@ -173,8 +178,13 @@ def _main(): logger.error("HEC token and HEC index are required when " "using HEC URL") exit(1) + + verify = True + if args.hec_skip_certificate_verification: + verify = False hec_client = splunk.HECClient(args.hec, args.hec_token, - args.hec_index) + args.hec_index, + verify=verify) file_paths = [] for file_path in args.file_path: From 83a76ec0cd1444d5925eff2ee9d59d66b678c8a0 Mon Sep 17 00:00:00 2001 From: Sean Whalen Date: Tue, 25 Sep 2018 17:37:43 -0400 Subject: [PATCH 27/30] Fix aggregate report splunk conversion --- parsedmarc/splunk.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parsedmarc/splunk.py b/parsedmarc/splunk.py index 19bd0d4..cf19051 100644 --- a/parsedmarc/splunk.py +++ b/parsedmarc/splunk.py @@ -89,7 +89,7 @@ class HECClient(object): if "dkim" in record["auth_results"]: new_report["dkim_results"] = record["auth_results"][ "dkim"] - if "spf" in record["spf_results"]: + if "spf" in record["auth_results"]: new_report["spf_results"] = record["auth_results"][ "spf"] From c102c2f21c54d58663638726d4ea333a4b45c920 Mon Sep 17 00:00:00 2001 From: Sean Whalen Date: Wed, 26 Sep 2018 12:32:39 -0400 Subject: [PATCH 28/30] Fix splunk HEC submission and --outgoing-ssl option Changed --outgoing-SSL to --outgoing-ssl --- README.rst | 2 +- docs/index.rst | 2 +- parsedmarc/cli.py | 8 +++++--- parsedmarc/splunk.py | 6 ++---- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/README.rst b/README.rst index 8285db4..98d8bf3 100644 --- a/README.rst +++ b/README.rst @@ -125,7 +125,7 @@ CLI help Email the results using this password --outgoing-port OUTGOING_PORT Email the results using this port - --outgoing-SSL OUTGOING_SSL + --outgoing-ssl OUTGOING_SSL Use SSL/TLS instead of STARTTLS (more secure, and required by some providers, like Gmail) -F OUTGOING_FROM, --outgoing-from OUTGOING_FROM diff --git a/docs/index.rst b/docs/index.rst index 891e347..d20d4d1 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -106,7 +106,7 @@ CLI help Email the results using this password --outgoing-port OUTGOING_PORT Email the results using this port - --outgoing-SSL OUTGOING_SSL + --outgoing-ssl OUTGOING_SSL Use SSL/TLS instead of STARTTLS (more secure, and required by some providers, like Gmail) -F OUTGOING_FROM, --outgoing-from OUTGOING_FROM diff --git a/parsedmarc/cli.py b/parsedmarc/cli.py index 8e14132..ef9635f 100644 --- a/parsedmarc/cli.py +++ b/parsedmarc/cli.py @@ -121,7 +121,7 @@ def _main(): help="Email the results using this password") arg_parser.add_argument("--outgoing-port", help="Email the results using this port") - arg_parser.add_argument("--outgoing-SSL", + arg_parser.add_argument("--outgoing-ssl", help="Use SSL/TLS instead of STARTTLS (more " "secure, and required by some providers, " "like Gmail)") @@ -227,7 +227,8 @@ def _main(): forensic_reports += reports["forensic_reports"] except IMAPError as error: - logger.error("IMAP Error: {0}".format(error.__str__())) + error_str = error.__str__().replace("b'", "").lstrip("'") + logger.error("IMAP Error: {0}".format(error_str)) exit(1) results = OrderedDict([("aggregate_reports", aggregate_reports), @@ -263,7 +264,8 @@ def _main(): test=args.test, nameservers=args.nameservers, dns_timeout=args.timeout) except IMAPError as error: - logger.error("IMAP Error: {0}".format(error.__str__())) + error_str = error.__str__().replace("b'", "").lstrip("'") + logger.error("IMAP Error: {0}".format(error_str)) exit(1) diff --git a/parsedmarc/splunk.py b/parsedmarc/splunk.py index cf19051..b7aad75 100644 --- a/parsedmarc/splunk.py +++ b/parsedmarc/splunk.py @@ -96,9 +96,8 @@ class HECClient(object): data["sourcetype"] = "dmarc:aggregate" data["event"] = new_report.copy() json_str += "{0}\n".format(json.dumps(data)) - try: - response = self.session.post(self.url, json=json_str).json() + response = self.session.post(self.url, data=json_str).json() except Exception as e: raise SplunkError(e.__str__()) if response["code"] != 0: @@ -125,9 +124,8 @@ class HECClient(object): data["sourcetype"] = "dmarc:forensic" data["event"] = report.copy() json_str += "{0}\n".format(json.dumps(data)) - try: - response = self.session.post(self.url, json=json_str).json() + response = self.session.post(self.url, data=json_str).json() except Exception as e: raise SplunkError(e.__str__()) if response["code"] != 0: From 4a34445b8132aead940f5cc1df0a46cd8713a07d Mon Sep 17 00:00:00 2001 From: Sean Whalen Date: Wed, 26 Sep 2018 12:45:50 -0400 Subject: [PATCH 29/30] Update documentation --- docs/index.rst | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/docs/index.rst b/docs/index.rst index d20d4d1..79c28af 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -646,12 +646,6 @@ Then, enable the service You must also run the above commands whenever you edit ``parsedmarc.service``. -Use this command to check the status of the service: - -.. code-block:: bash - - sudo service parsedmarc status - .. warning:: Always restart the service every time you upgrade to a new version of @@ -661,6 +655,23 @@ Use this command to check the status of the service: sudo service parsedmarc restart +To check the status of the service, run: + +.. code-block:: bash + + service parsedmarc status + +.. note:: + + In the event of a crash, systemd will restart the service after 10 minutes, + but the `service parsedmarc status` command will only show the logs for the + current process. To vew the logs for previous runs as well as the + current process (newest to oldest), run: + + .. code-block:: bash + + journalctl -u parsedmarc.service -r + Using the Kibana dashboards =========================== From 08806f0d0c8d9f6a61312bb30f003e55287f9991 Mon Sep 17 00:00:00 2001 From: Sean Whalen Date: Wed, 26 Sep 2018 13:03:33 -0400 Subject: [PATCH 30/30] Workaround for random Exchange/Office365 `Server Unavailable` IMAP errors --- CHANGELOG.md | 1 + parsedmarc/__init__.py | 24 ++++++++++++++++++++++-- parsedmarc/cli.py | 6 ++---- 3 files changed, 25 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 58b21e7..bea519b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ system path - Fall back to plain text SMTP if `--outgoing-ssl` is not used and `STARTTLS` is not supported by the server - Always use `\n` as the newline when generating CSVs +- Workaround for random Exchange/Office365 `Server Unavailable` IMAP errors 3.9.7 ----- diff --git a/parsedmarc/__init__.py b/parsedmarc/__init__.py index 4bbcd29..0e92ca8 100644 --- a/parsedmarc/__init__.py +++ b/parsedmarc/__init__.py @@ -1524,8 +1524,28 @@ def watch_inbox(host, username, password, callback, reports_folder="INBOX", server.idle() except imapclient.exceptions.IMAPClientError as error: - error = error.__str__().lstrip("b'").rstrip("'").rstrip(".") - raise IMAPError(error) + error = error.__str__().replace("b'", "").replace("'", "") + # Workaround for random Exchange/Office365 IMAP errors + if "Server Unavailable. 15" in error: + logger.debug("IMAP error: {0}".format(error)) + logger.debug("Reconnecting watcher") + server = imapclient.IMAPClient(host) + server.login(username, password) + server.select_folder(rf) + idle_start_time = time.monotonic() + ms = "MOVE" in get_imap_capabilities(server) + res = get_dmarc_reports_from_inbox(connection=server, + move_supported=ms, + reports_folder=rf, + archive_folder=af, + delete=delete, + test=test, + nameservers=ns, + dns_timeout=dt) + callback(res) + server.idle() + else: + raise IMAPError(error) except socket.gaierror: raise IMAPError("DNS resolution failed") except ConnectionRefusedError: diff --git a/parsedmarc/cli.py b/parsedmarc/cli.py index ef9635f..ddb678f 100644 --- a/parsedmarc/cli.py +++ b/parsedmarc/cli.py @@ -227,8 +227,7 @@ def _main(): forensic_reports += reports["forensic_reports"] except IMAPError as error: - error_str = error.__str__().replace("b'", "").lstrip("'") - logger.error("IMAP Error: {0}".format(error_str)) + logger.error("IMAP Error: {0}".format(error.__str__())) exit(1) results = OrderedDict([("aggregate_reports", aggregate_reports), @@ -264,8 +263,7 @@ def _main(): test=args.test, nameservers=args.nameservers, dns_timeout=args.timeout) except IMAPError as error: - error_str = error.__str__().replace("b'", "").lstrip("'") - logger.error("IMAP Error: {0}".format(error_str)) + logger.error("IMAP Error: {0}".format(error.__str__())) exit(1)