Merge pull request #106 from ardovm/mbox

Allow parsing reports archived in mbox files
This commit is contained in:
Sean Whalen
2019-09-21 14:28:48 -04:00
committed by GitHub
3 changed files with 90 additions and 5 deletions

View File

@@ -1,4 +1,19 @@
#!/usr/bin/env bash
set -e
. venv/bin/activate
pip install -U -r requirements.txt && rstcheck --report warning README.rst && cd docs && make html && touch _build/html/.nojekyll && cp -rf _build/html/* ../../parsedmarc-docs/ && cd .. && flake8 parsedmarc && flake8 tests.py && rm -rf dist/ build/ && python3 setup.py sdist && python3 setup.py bdist_wheel
pip install -U -r requirements.txt
rstcheck --report warning README.rst
cd docs
make html
touch _build/html/.nojekyll
mkdir -p ../../parsedmarc-docs/
cp -rf _build/html/* ../../parsedmarc-docs/
cd ..
flake8 parsedmarc
flake8 tests.py
rm -rf dist/ build/
python3 setup.py sdist
python3 setup.py bdist_wheel

View File

@@ -894,6 +894,61 @@ def parse_report_file(input_, nameservers=None, dns_timeout=2.0,
return results
def get_dmarc_reports_from_mbox(input_, nameservers=None, dns_timeout=2.0,
strip_attachment_payloads=False,
offline=False, parallel=False):
"""Parses a mailbox in mbox format containing e-mails with attached
DMARC reports
Args:
input_: A path to a mbox file
nameservers (list): A list of one or more nameservers to use
(Cloudflare's public DNS resolvers by default)
dns_timeout (float): Sets the DNS timeout in seconds
strip_attachment_payloads (bool): Remove attachment payloads from
forensic report results
offline (bool): Do not make online queries for geolocation or DNS
parallel (bool): Parallel processing
Returns:
OrderedDict: Lists of ``aggregate_reports`` and ``forensic_reports``
"""
import mailbox
aggregate_reports = []
forensic_reports = []
try:
mbox = mailbox.mbox(input_)
message_keys = mbox.keys()
total_messages = len(message_keys)
logger.debug("Found {0} messages in {1}".format(total_messages,
input_))
for i in range(len(message_keys)):
message_key = message_keys[i]
logger.debug("Processing message {0} of {1}".format(
i+1, total_messages
))
msg_content = mbox.get_string(message_key)
try:
sa = strip_attachment_payloads
parsed_email = parse_report_email(msg_content,
offline=offline,
nameservers=nameservers,
dns_timeout=dns_timeout,
strip_attachment_payloads=sa,
parallel=parallel)
if parsed_email["report_type"] == "aggregate":
aggregate_reports.append(parsed_email["report"])
elif parsed_email["report_type"] == "forensic":
forensic_reports.append(parsed_email["report"])
except InvalidDMARCReport as error:
logger.warning(error.__str__())
except mailbox.NoSuchMailboxError:
raise InvalidDMARCReport("Mailbox {0} does not exist".format(input_))
return OrderedDict([("aggregate_reports", aggregate_reports),
("forensic_reports", forensic_reports)])
def get_imap_capabilities(server):
"""
Returns a list of an IMAP server's capabilities

View File

@@ -17,8 +17,8 @@ import time
from tqdm import tqdm
from parsedmarc import get_dmarc_reports_from_inbox, watch_inbox, \
parse_report_file, elastic, kafkaclient, splunk, save_output, \
email_results, ParserError, __version__, \
parse_report_file, get_dmarc_reports_from_mbox, elastic, kafkaclient, \
splunk, save_output, email_results, ParserError, __version__, \
InvalidDMARCReport
logger = logging.getLogger("parsedmarc")
@@ -152,7 +152,8 @@ def _main():
"(--silent implied)")
arg_parser.add_argument("file_path", nargs="*",
help="one or more paths to aggregate or forensic "
"report files or emails")
"report files or emails; prepend "
"mailboxes with 'mbox:' ")
strip_attachment_help = "remove attachment payloads from forensic " \
"report output"
arg_parser.add_argument("--strip-attachment-payloads",
@@ -501,9 +502,15 @@ def _main():
kafka_forensic_topic = opts.kafka_forensic_topic
file_paths = []
mbox_paths = []
for file_path in args.file_path:
file_paths += glob(file_path)
if not file_path.startswith("mbox:"):
file_paths += glob(file_path)
else:
mbox_paths += glob(file_path[5:])
file_paths = list(set(file_paths))
mbox_paths = list(set(mbox_paths))
counter = Value('i', 0)
pool = Pool(opts.n_procs, initializer=init, initargs=(counter,))
@@ -534,6 +541,14 @@ def _main():
elif result[0]["report_type"] == "forensic":
forensic_reports.append(result[0]["report"])
for mbox_path in mbox_paths:
reports = get_dmarc_reports_from_mbox(mbox_path, opts.nameservers,
opts.dns_timeout,
opts.strip_attachment_payloads,
opts.offline, False)
aggregate_reports += reports["aggregate_reports"]
forensic_reports += reports["forensic_reports"]
if opts.imap_host:
try:
if opts.imap_user is None or opts.imap_password is None: