From b46fec8983eb7080d93c698c85bf3bfb7294b2e5 Mon Sep 17 00:00:00 2001 From: Sean Whalen Date: Sun, 16 Sep 2018 22:51:49 -0400 Subject: [PATCH] 4.0.0 prelease --- CHANGELOG.md | 8 ++++ parsedmarc/__init__.py | 14 +++++- parsedmarc/cli.py | 58 +++++++++++++++++++------ parsedmarc/splunk.py | 98 ++++++++++++++++++++++++++++++++++++++++++ setup.py | 2 +- 5 files changed, 165 insertions(+), 15 deletions(-) create mode 100644 parsedmarc/splunk.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 98af1d5..c7dca80 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,11 @@ +4.0.0 +----- + +- Add support for sending DMARC reports to a Splunk HTTP Events +Collector (HEC) +- Use a browser-like `User-Agent` when downloading the Public Suffix List, to +avoid being blocked by security proxies + 3.9.7 ----- diff --git a/parsedmarc/__init__.py b/parsedmarc/__init__.py index 73c7fde..534a20e 100644 --- a/parsedmarc/__init__.py +++ b/parsedmarc/__init__.py @@ -4,6 +4,7 @@ import logging import os +import platform import xml.parsers.expat as expat import json from datetime import datetime @@ -30,6 +31,7 @@ import smtplib import ssl import time +import requests import publicsuffix import xmltodict import dns.reversename @@ -43,7 +45,7 @@ import imapclient.exceptions import dateparser import mailparser -__version__ = "3.9.7" +__version__ = "4.0.0" logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) @@ -100,7 +102,15 @@ def _get_base_domain(domain): psl_path = ".public_suffix_list.dat" def download_psl(): - fresh_psl = publicsuffix.fetch().read() + url = "https://publicsuffix.org/list/public_suffix_list.dat" + # Use a browser-like user agent string to bypass some proxy blocks + user_agent = "Mozilla/5.0 ((0 {1})) parsedmarc/{2}".format( + platform.system(), + platform.release(), + __version__ + ) + headers = {"User-Agent": user_agent} + fresh_psl = requests.get(url, headers=headers).content with open(psl_path, "w", encoding="utf-8") as fresh_psl_file: fresh_psl_file.write(fresh_psl) diff --git a/parsedmarc/cli.py b/parsedmarc/cli.py index 8039544..f99ad7f 100644 --- a/parsedmarc/cli.py +++ b/parsedmarc/cli.py @@ -13,9 +13,8 @@ import json from elasticsearch.exceptions import ElasticsearchException from parsedmarc import logger, IMAPError, get_dmarc_reports_from_inbox, \ - parse_report_file, elastic, save_output, watch_inbox, email_results, \ - SMTPError, ParserError, __version__ - + parse_report_file, elastic, splunk, save_output, watch_inbox, \ + email_results, SMTPError, ParserError, __version__ def _main(): """Called when the module is executed""" @@ -28,22 +27,38 @@ def _main(): if args.save_aggregate: for report in reports_["aggregate_reports"]: try: - elastic.save_aggregate_report_to_elasticsearch(report) + if args.elasticsearch_host: + elastic.save_aggregate_report_to_elasticsearch(report) except elastic.AlreadySaved as warning: logger.warning(warning.__str__()) except ElasticsearchException as error_: logger.error("Elasticsearch Error: {0}".format( error_.__str__())) exit(1) + if args.hec: + try: + aggregate_reports_ = reports_["aggregate_reports"] + hec_client.save_aggregate_reports_to_splunk( + aggregate_reports_) + except splunk.SplunkError as e: + logger.error("Splunk HEC error: {0{".format(e.__str__())) if args.save_forensic: for report in reports_["forensic_reports"]: try: - elastic.save_forensic_report_to_elasticsearch(report) + if args.elasticsearch_host: + elastic.save_forensic_report_to_elasticsearch(report) except elastic.AlreadySaved as warning: logger.warning(warning.__str__()) except ElasticsearchException as error_: logger.error("Elasticsearch Error: {0}".format( error_.__str__())) + if args.hec: + try: + forensic_reports_ = reports_["forensic_reports"] + hec_client.save_forensic_reports_to_splunk( + forensic_reports_) + except splunk.SplunkError as e: + logger.error("Splunk HEC error: {0{".format(e.__str__())) arg_parser = ArgumentParser(description="Parses DMARC reports") arg_parser.add_argument("file_path", nargs="*", @@ -76,15 +91,23 @@ def _main(): arg_parser.add_argument("-E", "--elasticsearch-host", nargs="*", help="A list of one or more Elasticsearch " - "hostnames or URLs to use (Default " - "localhost:9200)", - default=["localhost:9200"]) + "hostnames or URLs to use (e.g. " + "localhost:9200)") + arg_parser.add_argument("--hec", help="URL to a Splunk HTTP Event " + "Collector (HEC)") + arg_parser.add_argument("--hec-key", help="The authorization key for a " + "Splunk HTTP event collector " + "(HEC)") + arg_parser.add_argument("--hec-index", help="The index to use when " + "sending events to the " + "Splunk HTTP Events " + "(Default: dmarc)") arg_parser.add_argument("--save-aggregate", action="store_true", default=False, - help="Save aggregate reports to Elasticsearch") + help="Save aggregate reports to search indexes") arg_parser.add_argument("--save-forensic", action="store_true", default=False, - help="Save forensic reports to Elasticsearch") + help="Save forensic reports to search indexes") arg_parser.add_argument("-O", "--outgoing-host", help="Email the results using this host") arg_parser.add_argument("-U", "--outgoing-user", @@ -129,13 +152,24 @@ def _main(): exit(1) if args.save_aggregate or args.save_forensic: + if args.elasticsearch_host is None and args.hec is None: + args.elasticsearch_host = ["localhost:9200"] try: - elastic.set_hosts(args.elasticsearch_host) - elastic.create_indexes() + if args.elasticsearch_host: + elastic.set_hosts(args.elasticsearch_host) + elastic.create_indexes() + if args.hec: + hec_client = splunk.HECClient(args.hec, args.hec_token, + index=args.hec_index) except ElasticsearchException as error: logger.error("Elasticsearch Error: {0}".format(error.__str__())) exit(1) + if args.hec: + if args.hec_key is None: + logger.error("HEC URL provided without HEC key") + exit(1) + file_paths = [] for file_path in args.file_path: file_paths += glob(file_path) diff --git a/parsedmarc/splunk.py b/parsedmarc/splunk.py new file mode 100644 index 0000000..cd6306c --- /dev/null +++ b/parsedmarc/splunk.py @@ -0,0 +1,98 @@ +from urllib.parse import urlparse +import socket +import json + +import requests + +from parsedmarc import __version__ + + +class SplunkError(RuntimeError): + """Raised when a Splunk API error occurs""" + + +class HECClient(object): + """A client for a Splunk HTTP Events Collector (HEC)""" + + # http://docs.splunk.com/Documentation/Splunk/latest/Data/AboutHEC + # http://docs.splunk.com/Documentation/Splunk/latest/RESTREF/RESTinput#services.2Fcollector + + def __init__(self, url, access_token, index="dmarc", + source="parsedmarc", verify=True): + """ + Initializes the HECClient + Args: + url (str): The URL of the HEC + access_token (str): The HEC access token + index (str): The name of the index + source (str): The source name + verify (bool): Verify SSL certificates + """ + url = urlparse(url) + self.url = "{0}://{1}/services/collector/event/1.0".format(url.scheme, + url.netloc) + self.access_token = access_token.lstrip("Splunk ") + self.index = index + self.host = socket.getfqdn() + self.source = source + self.session = requests.Session() + self.session.verify = verify + self._common_data = dict(host=self.host, source=self.source, + index=self.index) + + self.session.headers = { + "User-Agent": "parsedmarc/{0}".format(__version__), + "Authorization": "Splunk {0}".format(self.access_token) + } + + def save_aggregate_reports_to_splunk(self, aggregate_reports): + """ + Saves aggregate DMARC reports to Splunk + + Args: + aggregate_reports (list): A list of aggregate report dictionaries + to save in Splunk + + """ + if type(aggregate_reports) == dict: + aggregate_reports = [aggregate_reports] + + json_str = "" + for report in aggregate_reports: + data = self._common_data.copy() + data["sourcetype"] = "dmarc_aggregate" + data["event"] = report.copy() + json_str += "{0}\n".format(json.dumps(data)) + + try: + response = self.session.post(self.url, json=json_str).json() + except Exception as e: + raise SplunkError(e.__str__()) + if response["code"] != 0: + raise SplunkError(response["text"]) + + def save_forensic_reports_to_splunk(self, aggregate_reports): + """ + Saves forensic DMARC reports to Splunk + + Args: + aggregate_reports (list): A list of forensic report dictionaries + to save in Splunk + + """ + if type(aggregate_reports) == dict: + aggregate_reports = [aggregate_reports] + + json_str = "" + for report in aggregate_reports: + data = self._common_data.copy() + data["sourcetype"] = "dmarc_forensic" + data["event"] = report.copy() + json_str += "{0}\n".format(json.dumps(data)) + + try: + response = self.session.post(self.url, json=json_str).json() + except Exception as e: + raise SplunkError(e.__str__()) + if response["code"] != 0: + raise SplunkError(response["text"]) diff --git a/setup.py b/setup.py index bb67150..4de2dd0 100644 --- a/setup.py +++ b/setup.py @@ -14,7 +14,7 @@ from setuptools import setup from codecs import open from os import path -__version__ = "3.9.7" +__version__ = "4.0.0" description = "A Python package and CLI for parsing aggregate and " \ "forensic DMARC reports"