mirror of
https://github.com/domainaware/parsedmarc.git
synced 2026-03-20 05:25:59 +00:00
Merge pull request #106 from ardovm/mbox
Allow parsing reports archived in mbox files
This commit is contained in:
17
build.sh
17
build.sh
@@ -1,4 +1,19 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -e
|
||||
|
||||
. venv/bin/activate
|
||||
pip install -U -r requirements.txt && rstcheck --report warning README.rst && cd docs && make html && touch _build/html/.nojekyll && cp -rf _build/html/* ../../parsedmarc-docs/ && cd .. && flake8 parsedmarc && flake8 tests.py && rm -rf dist/ build/ && python3 setup.py sdist && python3 setup.py bdist_wheel
|
||||
|
||||
pip install -U -r requirements.txt
|
||||
rstcheck --report warning README.rst
|
||||
cd docs
|
||||
make html
|
||||
touch _build/html/.nojekyll
|
||||
mkdir -p ../../parsedmarc-docs/
|
||||
cp -rf _build/html/* ../../parsedmarc-docs/
|
||||
cd ..
|
||||
flake8 parsedmarc
|
||||
flake8 tests.py
|
||||
rm -rf dist/ build/
|
||||
python3 setup.py sdist
|
||||
python3 setup.py bdist_wheel
|
||||
|
||||
@@ -894,6 +894,61 @@ def parse_report_file(input_, nameservers=None, dns_timeout=2.0,
|
||||
return results
|
||||
|
||||
|
||||
def get_dmarc_reports_from_mbox(input_, nameservers=None, dns_timeout=2.0,
|
||||
strip_attachment_payloads=False,
|
||||
offline=False, parallel=False):
|
||||
"""Parses a mailbox in mbox format containing e-mails with attached
|
||||
DMARC reports
|
||||
|
||||
Args:
|
||||
input_: A path to a mbox file
|
||||
nameservers (list): A list of one or more nameservers to use
|
||||
(Cloudflare's public DNS resolvers by default)
|
||||
dns_timeout (float): Sets the DNS timeout in seconds
|
||||
strip_attachment_payloads (bool): Remove attachment payloads from
|
||||
forensic report results
|
||||
offline (bool): Do not make online queries for geolocation or DNS
|
||||
parallel (bool): Parallel processing
|
||||
|
||||
Returns:
|
||||
OrderedDict: Lists of ``aggregate_reports`` and ``forensic_reports``
|
||||
|
||||
"""
|
||||
import mailbox
|
||||
aggregate_reports = []
|
||||
forensic_reports = []
|
||||
try:
|
||||
mbox = mailbox.mbox(input_)
|
||||
message_keys = mbox.keys()
|
||||
total_messages = len(message_keys)
|
||||
logger.debug("Found {0} messages in {1}".format(total_messages,
|
||||
input_))
|
||||
for i in range(len(message_keys)):
|
||||
message_key = message_keys[i]
|
||||
logger.debug("Processing message {0} of {1}".format(
|
||||
i+1, total_messages
|
||||
))
|
||||
msg_content = mbox.get_string(message_key)
|
||||
try:
|
||||
sa = strip_attachment_payloads
|
||||
parsed_email = parse_report_email(msg_content,
|
||||
offline=offline,
|
||||
nameservers=nameservers,
|
||||
dns_timeout=dns_timeout,
|
||||
strip_attachment_payloads=sa,
|
||||
parallel=parallel)
|
||||
if parsed_email["report_type"] == "aggregate":
|
||||
aggregate_reports.append(parsed_email["report"])
|
||||
elif parsed_email["report_type"] == "forensic":
|
||||
forensic_reports.append(parsed_email["report"])
|
||||
except InvalidDMARCReport as error:
|
||||
logger.warning(error.__str__())
|
||||
except mailbox.NoSuchMailboxError:
|
||||
raise InvalidDMARCReport("Mailbox {0} does not exist".format(input_))
|
||||
return OrderedDict([("aggregate_reports", aggregate_reports),
|
||||
("forensic_reports", forensic_reports)])
|
||||
|
||||
|
||||
def get_imap_capabilities(server):
|
||||
"""
|
||||
Returns a list of an IMAP server's capabilities
|
||||
|
||||
@@ -17,8 +17,8 @@ import time
|
||||
from tqdm import tqdm
|
||||
|
||||
from parsedmarc import get_dmarc_reports_from_inbox, watch_inbox, \
|
||||
parse_report_file, elastic, kafkaclient, splunk, save_output, \
|
||||
email_results, ParserError, __version__, \
|
||||
parse_report_file, get_dmarc_reports_from_mbox, elastic, kafkaclient, \
|
||||
splunk, save_output, email_results, ParserError, __version__, \
|
||||
InvalidDMARCReport
|
||||
|
||||
logger = logging.getLogger("parsedmarc")
|
||||
@@ -152,7 +152,8 @@ def _main():
|
||||
"(--silent implied)")
|
||||
arg_parser.add_argument("file_path", nargs="*",
|
||||
help="one or more paths to aggregate or forensic "
|
||||
"report files or emails")
|
||||
"report files or emails; prepend "
|
||||
"mailboxes with 'mbox:' ")
|
||||
strip_attachment_help = "remove attachment payloads from forensic " \
|
||||
"report output"
|
||||
arg_parser.add_argument("--strip-attachment-payloads",
|
||||
@@ -501,9 +502,15 @@ def _main():
|
||||
kafka_forensic_topic = opts.kafka_forensic_topic
|
||||
|
||||
file_paths = []
|
||||
mbox_paths = []
|
||||
for file_path in args.file_path:
|
||||
file_paths += glob(file_path)
|
||||
if not file_path.startswith("mbox:"):
|
||||
file_paths += glob(file_path)
|
||||
else:
|
||||
mbox_paths += glob(file_path[5:])
|
||||
|
||||
file_paths = list(set(file_paths))
|
||||
mbox_paths = list(set(mbox_paths))
|
||||
|
||||
counter = Value('i', 0)
|
||||
pool = Pool(opts.n_procs, initializer=init, initargs=(counter,))
|
||||
@@ -534,6 +541,14 @@ def _main():
|
||||
elif result[0]["report_type"] == "forensic":
|
||||
forensic_reports.append(result[0]["report"])
|
||||
|
||||
for mbox_path in mbox_paths:
|
||||
reports = get_dmarc_reports_from_mbox(mbox_path, opts.nameservers,
|
||||
opts.dns_timeout,
|
||||
opts.strip_attachment_payloads,
|
||||
opts.offline, False)
|
||||
aggregate_reports += reports["aggregate_reports"]
|
||||
forensic_reports += reports["forensic_reports"]
|
||||
|
||||
if opts.imap_host:
|
||||
try:
|
||||
if opts.imap_user is None or opts.imap_password is None:
|
||||
|
||||
Reference in New Issue
Block a user