This commit is contained in:
Sean Whalen
2018-10-09 11:12:04 -04:00
13 changed files with 301 additions and 63 deletions
+3 -1
View File
@@ -106,7 +106,6 @@ ENV/
.idea/
# I/O files
output/
*.zip
*.gz
@@ -121,3 +120,6 @@ output/
# Data files
*.dat
*.mmdb
# Temp files
tmp/
+20
View File
@@ -1,3 +1,23 @@
4.2.0
------
- Save each aggregate report record as a separate Splunk event
- Fix IAMP delete action
- Suppress Splunk SSL validation warnings
- Change default logging level to `WARNING`
4.1.9
-----
- Workaround for forensic/ruf reports that are missing `Arrival-Date` and/or
`Reported-Domain`
4.1.8
-----
- Be more forgiving of weird XML
4.1.7
-----
+1 -1
View File
@@ -390,7 +390,7 @@ To set up visual dashboards of DMARC data, install Elasticsearch and Kibana.
wget -qO - https://artifacts.elastic.co/GPG-KEY-elasticsearch | sudo apt-key add -
echo "deb https://artifacts.elastic.co/packages/6.x/apt stable main" | sudo tee -a /etc/apt/sources.list.d/elastic-6.x.list
sudo apt-get update
sudo apt-get install -y elasticsearch kibana
sudo apt-get install -y default-jre-headless elasticsearch kibana
sudo systemctl daemon-reload
sudo systemctl enable elasticsearch.service
sudo systemctl enable kibana.service
+78 -45
View File
@@ -44,13 +44,14 @@ import imapclient.exceptions
import dateparser
import mailparser
__version__ = "4.1.7"
__version__ = "4.2.0"
logger = logging.getLogger(__name__)
logger.setLevel(logging.ERROR)
logger.setLevel(logging.WARNING)
feedback_report_regex = re.compile(r"^([\w\-]+): (.+)$", re.MULTILINE)
xml_schema_regex = re.compile(r"\s*<xs:schema.*>", re.MULTILINE)
xml_header_regex = re.compile(r"^<\?xml .*$", re.MULTILINE)
xml_schema_regex = re.compile(r"<\/?xs:schema.>", re.MULTILINE)
MAGIC_ZIP = b"\x50\x4B\x03\x04"
MAGIC_GZIP = b"\x1F\x8B"
@@ -392,23 +393,28 @@ def _parse_report_record(record, nameservers=None, timeout=2.0):
new_record["auth_results"] = OrderedDict([("dkim", []), ("spf", [])])
if record["auth_results"] is not None:
auth_results = record["auth_results"].copy()
if "spf" not in auth_results:
auth_results["spf"] = []
if "dkim" not in auth_results:
auth_results["dkim"] = []
else:
auth_results = new_record["auth_results"].copy()
if "dkim" in auth_results:
if type(auth_results["dkim"]) != list:
auth_results["dkim"] = [auth_results["dkim"]]
for result in auth_results["dkim"]:
if "domain" in result and result["domain"] is not None:
new_result = OrderedDict([("domain", result["domain"])])
if "selector" in result and result["selector"] is not None:
new_result["selector"] = result["selector"]
else:
new_result["selector"] = "none"
if "result" in result and result["result"] is not None:
new_result["result"] = result["result"]
else:
new_result["result"] = "none"
new_record["auth_results"]["dkim"].append(new_result)
if type(auth_results["dkim"]) != list:
auth_results["dkim"] = [auth_results["dkim"]]
for result in auth_results["dkim"]:
if "domain" in result and result["domain"] is not None:
new_result = OrderedDict([("domain", result["domain"])])
if "selector" in result and result["selector"] is not None:
new_result["selector"] = result["selector"]
else:
new_result["selector"] = "none"
if "result" in result and result["result"] is not None:
new_result["result"] = result["result"]
else:
new_result["result"] = "none"
new_record["auth_results"]["dkim"].append(new_result)
if type(auth_results["spf"]) != list:
auth_results["spf"] = [auth_results["spf"]]
for result in auth_results["spf"]:
@@ -424,16 +430,19 @@ def _parse_report_record(record, nameservers=None, timeout=2.0):
new_record["auth_results"]["spf"].append(new_result)
if "envelope_from" not in new_record["identifiers"]:
envelope_from = new_record["auth_results"]["spf"][-1]["domain"]
envelope_from = None
if len(auth_results["spf"]) > 0:
envelope_from = new_record["auth_results"]["spf"][-1]["domain"]
if envelope_from is not None:
envelope_from = str(envelope_from).lower()
new_record["identifiers"]["envelope_from"] = envelope_from
elif new_record["identifiers"]["envelope_from"] is None:
envelope_from = new_record["auth_results"]["spf"][-1]["domain"]
if envelope_from is not None:
envelope_from = str(envelope_from).lower()
new_record["identifiers"]["envelope_from"] = envelope_from
if len(auth_results["spf"]) > 0:
envelope_from = new_record["auth_results"]["spf"][-1]["domain"]
if envelope_from is not None:
envelope_from = str(envelope_from).lower()
new_record["identifiers"]["envelope_from"] = envelope_from
envelope_to = None
if "envelope_to" in new_record["identifiers"]:
@@ -457,9 +466,20 @@ def parse_aggregate_report_xml(xml, nameservers=None, timeout=2.0):
Returns:
OrderedDict: The parsed aggregate DMARC report
"""
errors = []
try:
xmltodict.parse(xml)["feedback"]
except Exception as e:
errors.append(e.__str__())
try:
# Replace XML header (sometimes they are invalid)
xml = xml_header_regex.sub("", xml)
# Remove invalid schema tags
xml = xml_schema_regex.sub("", xml)
xml = xml_schema_regex.sub('<?xml version="1.0"?>', xml)
report = xmltodict.parse(xml)["feedback"]
report_metadata = report["report_metadata"]
schema = "draft"
@@ -467,7 +487,13 @@ def parse_aggregate_report_xml(xml, nameservers=None, timeout=2.0):
schema = report["version"]
new_report = OrderedDict([("xml_schema", schema)])
new_report_metadata = OrderedDict()
org_name = _get_base_domain(report_metadata["org_name"])
if report_metadata["org_name"] is None:
if report_metadata["email"] is not None:
report_metadata["org_name"] = report_metadata[
"email"].split("@")[-1]
org_name = report_metadata["org_name"]
if org_name is not None:
org_name = _get_base_domain(org_name)
new_report_metadata["org_name"] = org_name
new_report_metadata["org_email"] = report_metadata["email"]
extra = None
@@ -484,7 +510,6 @@ def parse_aggregate_report_xml(xml, nameservers=None, timeout=2.0):
date_range["end"] = _timestamp_to_human(date_range["end"])
new_report_metadata["begin_date"] = date_range["begin"]
new_report_metadata["end_date"] = date_range["end"]
errors = []
if "error" in report["report_metadata"]:
if type(report["report_metadata"]["error"]) != list:
errors = [report["report_metadata"]["error"]]
@@ -526,13 +551,16 @@ def parse_aggregate_report_xml(xml, nameservers=None, timeout=2.0):
if type(report["record"]) == list:
for record in report["record"]:
records.append(_parse_report_record(record,
nameservers=nameservers,
timeout=timeout))
report_record = _parse_report_record(record,
nameservers=nameservers,
timeout=timeout)
records.append(report_record)
else:
records.append(_parse_report_record(report["record"],
nameservers=nameservers))
report_record = _parse_report_record(report["record"],
nameservers=nameservers,
timeout=timeout)
records.append(report_record)
new_report["records"] = records
@@ -726,7 +754,7 @@ def parsed_aggregate_reports_to_csv(reports):
def parse_forensic_report(feedback_report, sample, sample_headers_only,
nameservers=None, timeout=2.0):
msg_date, nameservers=None, timeout=2.0):
"""
Converts a DMARC forensic report and sample to a ``OrderedDict``
@@ -734,12 +762,13 @@ def parse_forensic_report(feedback_report, sample, sample_headers_only,
feedback_report (str): A message's feedback report as a string
sample (str): The RFC 822 headers or RFC 822 message sample
sample_headers_only (bool): Set true if the sample is only headers
msg_date (str): The message's date header
nameservers (list): A list of one or more nameservers to use
(Cloudflare's public DNS resolvers by default)
timeout (float): Sets the DNS timeout in seconds
Returns:
OrderedDict: An parsed report and sample
OrderedDict: A parsed report and sample
"""
def convert_address(original_address):
@@ -777,14 +806,14 @@ def parse_forensic_report(feedback_report, sample, sample_headers_only,
for report_value in report_values:
key = report_value[0].lower().replace("-", "_")
parsed_report[key] = report_value[1]
if key == "arrival_date":
arrival_utc = dateparser.parse(parsed_report["arrival_date"],
settings={"TO_TIMEZONE": "UTC"})
arrival_utc = arrival_utc.strftime("%Y-%m-%d %H:%M:%S")
parsed_report["arrival_date_utc"] = arrival_utc
if "arrival_date_utc" not in parsed_report:
raise InvalidForensicReport("Missing Arrival-Date")
if "arrival_date" not in parsed_report:
parsed_report["arrival_date"] = msg_date
arrival_utc = dateparser.parse(parsed_report["arrival_date"],
settings={"TO_TIMEZONE": "UTC"})
arrival_utc = arrival_utc.strftime("%Y-%m-%d %H:%M:%S")
parsed_report["arrival_date_utc"] = arrival_utc
ip_address = parsed_report["source_ip"]
parsed_report["source"] = _get_ip_address_info(ip_address,
@@ -827,7 +856,11 @@ def parse_forensic_report(feedback_report, sample, sample_headers_only,
if "date_utc" in received:
received["date_utc"] = received["date_utc"].replace("T",
" ")
parsed_sample["from"] = convert_address(parsed_sample["from"][0])
msg_from = convert_address(parsed_sample["from"][0])
parsed_sample["from"] = msg_from
if "reported_domain" not in parsed_report:
domain = msg_from["address"].split("@")[-1].lower()
parsed_report["reported_domain"] = domain
if "reply_to" in parsed_sample:
parsed_sample["reply_to"] = list(map(lambda x: convert_address(x),
@@ -1012,6 +1045,7 @@ def parse_report_email(input_, nameservers=None, timeout=2.0):
sample = None
if "subject" in msg:
subject = decode_header(msg["subject"])
date = decode_header(msg["date"])
for part in msg.walk():
content_type = part.get_content_type()
payload = part.get_payload()
@@ -1039,6 +1073,7 @@ def parse_report_email(input_, nameservers=None, timeout=2.0):
forensic_report = parse_forensic_report(feedback_report,
sample,
sample_headers_only,
date,
nameservers=nameservers,
timeout=timeout)
@@ -1214,10 +1249,8 @@ def get_dmarc_reports_from_inbox(host=None,
if type(msg_uids) == str:
msg_uids = [msg_uids]
for chunk in chunks(msg_uids, 100):
server.add_flags(chunk, [imapclient.DELETED])
server.expunge()
server.delete_messages(msg_uids, silent=True)
server.expunge(msg_uids)
def move_messages(msg_uids, folder):
if type(msg_uids) == str:
+5 -4
View File
@@ -83,7 +83,7 @@ def _main():
arg_parser.add_argument("--imap-port", default=None, help="IMAP port")
arg_parser.add_argument("--imap-no-ssl", action="store_true",
default=False,
help="Do not use SSL when connecting to IMAP")
help="Do not use SSL/TLS when connecting to IMAP")
arg_parser.add_argument("-r", "--reports-folder", default="INBOX",
help="The IMAP folder containing the reports\n"
"Default: INBOX")
@@ -157,7 +157,7 @@ def _main():
help="Do not move or delete IMAP messages",
action="store_true", default=False)
arg_parser.add_argument("-s", "--silent", action="store_true",
help="Only print errors")
help="Only print errors and warnings")
arg_parser.add_argument("--debug", action="store_true",
help="Print debugging information")
arg_parser.add_argument("-v", "--version", action="version",
@@ -168,8 +168,9 @@ def _main():
args = arg_parser.parse_args()
logging.basicConfig(level=logging.ERROR)
logger.setLevel(logging.ERROR)
logging.basicConfig(level=logging.WARNING)
logger.setLevel(logging.WARNING)
if args.debug:
logging.basicConfig(level=logging.DEBUG)
logger.setLevel(logging.DEBUG)
+12 -5
View File
@@ -1,11 +1,14 @@
from urllib.parse import urlparse
import socket
import json
import urllib3
import requests
from parsedmarc import __version__, human_timestamp_to_timestamp
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
class SplunkError(RuntimeError):
"""Raised when a Splunk API error occurs"""
@@ -93,11 +96,12 @@ class HECClient(object):
new_report["spf_results"] = record["auth_results"][
"spf"]
data["sourcetype"] = "dmarc:aggregate"
timestamp = human_timestamp_to_timestamp(new_report["begin_date"])
data["time"] = timestamp
data["event"] = new_report.copy()
json_str += "{0}\n".format(json.dumps(data))
data["sourcetype"] = "dmarc:aggregate"
timestamp = human_timestamp_to_timestamp(
new_report["begin_date"])
data["time"] = timestamp
data["event"] = new_report.copy()
json_str += "{0}\n".format(json.dumps(data))
try:
response = self.session.post(self.url, data=json_str).json()
except Exception as e:
@@ -135,3 +139,6 @@ class HECClient(object):
raise SplunkError(e.__str__())
if response["code"] != 0:
raise SplunkError(response["text"])
+1
View File
@@ -1,4 +1,5 @@
dnspython
urllib3
requests
publicsuffix
xmltodict
@@ -0,0 +1,40 @@
<?xml version="1.0" encoding="UTF-8" ?>
<feedback>
<report_metadata>
<org_name></org_name>
<email>administrator@accurateplastics.com</email>
<report_id>example.com:1538463741</report_id>
<date_range>
<begin>1538413632</begin>
<end>1538413632</end>
</date_range>
</report_metadata>
<policy_published>
<domain>example.com</domain>
<adkim>r</adkim>
<aspf>r</aspf>
<p>none</p>
<sp>reject</sp>
<pct>100</pct>
</policy_published>
<record>
<row>
<source_ip>12.20.127.122</source_ip>
<count>1</count>
<policy_evaluated>
<disposition>none</disposition>
<dkim>fail</dkim>
<spf>fail</spf>
</policy_evaluated>
</row>
<identifiers>
<header_from>example.com</header_from>
</identifiers>
<auth_results>
<spf>
<domain></domain>
<result>none</result>
</spf>
</auth_results>
</record>
</feedback>
@@ -0,0 +1,44 @@
<?xml version="1.0"?>
<feedback>
<version>1.0</version>
<report_metadata>
<org_name>addisonfoods.com</org_name>
<email>postmaster@addisonfoods.com</email>
<report_id>3ceb5548498640beaeb47327e202b0b9</report_id>
<date_range>
<begin>1536105600</begin>
<end>1536191999</end>
</date_range>
</report_metadata>
<policy_published>
<domain>example.com</domain>
<adkim>r</adkim>
<aspf>r</aspf>
<p>none</p>
<sp>none</sp>
<pct>100</pct>
<fo>0</fo>
</policy_published>
<record>
<row>
<source_ip>109.203.100.17</source_ip>
<count>1</count>
<policy_evaluated>
<disposition>none</disposition>
<dkim>fail</dkim>
<spf>fail</spf>
</policy_evaluated>
</row>
<identifiers>
<envelope_from>example.com</envelope_from>
<header_from>example.com</header_from>
</identifiers>
<auth_results>
<dkim>
<domain>toptierhighticket.club</domain>
<selector>default</selector>
<result>pass</result>
</dkim>
</auth_results>
</record>
</feedback>
@@ -0,0 +1,34 @@
<?xml version="1.0"?>
<feedback>
<report_metadata>
<report_id>2940</report_id>
<org_name>XYZ Corporation</org_name>
<email>admin@estadocuenta1.infonacot.gob.mx</email>
<extra_contact_info>http://estadocuenta1.infonacot.gob.mx</extra_contact_info>
<date_range>
<begin>1536853302</begin>
<end>1536939702</end>
</date_range>
</report_metadata>
<policy_published>
<domain>example.com</domain>
<p>none</p>
</policy_published>
<record>
<row>
<source_ip>148.243.137.254</source_ip>
<count>1</count>
<policy_evaluated>
<disposition>none</disposition>
<dkim>fail</dkim>
<spf>fail</spf>
</policy_evaluated>
</row>
<identifiers>
<envelope_to>estadocuenta1.infonacot.gob.mx</envelope_to>
<header_from>example.com</header_from>
</identifiers>
<auth_results>
</auth_results>
</record>
</feedback>
@@ -10,7 +10,7 @@
</date_range>
</report_metadata>
<policy_published>
<domain>xxxxxx.de</domain>
<domain>example.de</domain>
<adkim>r</adkim>
<aspf>r</aspf>
<p>none</p>
@@ -29,12 +29,12 @@
</policy_evaluated>
</row>
<identifiers>
<header_from>xxxxxx.de</header_from>
<envelope_from>xxxxxx.de</envelope_from>
<header_from>example.de</header_from>
<envelope_from>example.de</envelope_from>
</identifiers>
<auth_results>
<dkim>
<domain>xxxxxx.de</domain>
<domain>example.de</domain>
<result>pass</result>
</dkim>
<spf>
@@ -0,0 +1,56 @@
<?xml version="1.0"?>
<feedback>
<version>1.0</version>
<report_metadata>
<org_name>usssa.com</org_name>
<email>postmaster@usssa.com</email>
<report_id>8953b4d4a4ee4218b6ac0e2cb2667ee1</report_id>
<date_range>
<begin>1538784000</begin>
<end>1538870399</end>
</date_range>
</report_metadata>
<policy_published>
<domain>example.com</domain>
<adkim>r</adkim>
<aspf>r</aspf>
<p>none</p>
<sp>none</sp>
<pct>100</pct>
<fo>0</fo>
</policy_published>
<record>
<row>
<source_ip>12.20.127.40</source_ip>
<count>1</count>
<policy_evaluated>
<disposition>none</disposition>
<dkim>fail</dkim>
<spf>fail</spf>
</policy_evaluated>
</row>
<identifiers>
<envelope_from></envelope_from>
<header_from>example.com</header_from>
</identifiers>
<auth_results>
</auth_results>
</record>
<record>
<row>
<source_ip>199.230.200.36</source_ip>
<count>1</count>
<policy_evaluated>
<disposition>none</disposition>
<dkim>fail</dkim>
<spf>fail</spf>
</policy_evaluated>
</row>
<identifiers>
<envelope_from></envelope_from>
<header_from>example.com</header_from>
</identifiers>
<auth_results>
</auth_results>
</record>
</feedback>
+3 -3
View File
@@ -14,7 +14,7 @@ from setuptools import setup
from codecs import open
from os import path
__version__ = "4.1.7"
__version__ = "4.2.0"
description = "A Python package and CLI for parsing aggregate and " \
"forensic DMARC reports"
@@ -92,8 +92,8 @@ setup(
# requirements files see:
# https://packaging.python.org/en/latest/requirements.html
install_requires=['dnspython', 'publicsuffix', 'xmltodict', 'geoip2',
'dnspython', 'imapclient', 'mail-parser', 'dateparser',
'elasticsearch>=6.3.0,<7.0.0',
'urllib3', 'requests', 'imapclient', 'mail-parser',
'dateparser', 'elasticsearch>=6.3.0,<7.0.0',
'elasticsearch-dsl>=6.2.1,<7.0.0'
],