diff --git a/build.sh b/build.sh index 9e8591b..b2e4f09 100755 --- a/build.sh +++ b/build.sh @@ -1,4 +1,19 @@ #!/usr/bin/env bash +set -e + . venv/bin/activate -pip install -U -r requirements.txt && rstcheck --report warning README.rst && cd docs && make html && touch _build/html/.nojekyll && cp -rf _build/html/* ../../parsedmarc-docs/ && cd .. && flake8 parsedmarc && flake8 tests.py && rm -rf dist/ build/ && python3 setup.py sdist && python3 setup.py bdist_wheel + +pip install -U -r requirements.txt +rstcheck --report warning README.rst +cd docs +make html +touch _build/html/.nojekyll +mkdir -p ../../parsedmarc-docs/ +cp -rf _build/html/* ../../parsedmarc-docs/ +cd .. +flake8 parsedmarc +flake8 tests.py +rm -rf dist/ build/ +python3 setup.py sdist +python3 setup.py bdist_wheel diff --git a/parsedmarc/__init__.py b/parsedmarc/__init__.py index 3d9fe37..4a3dc03 100644 --- a/parsedmarc/__init__.py +++ b/parsedmarc/__init__.py @@ -894,6 +894,61 @@ def parse_report_file(input_, nameservers=None, dns_timeout=2.0, return results +def get_dmarc_reports_from_mbox(input_, nameservers=None, dns_timeout=2.0, + strip_attachment_payloads=False, + offline=False, parallel=False): + """Parses a mailbox in mbox format containing e-mails with attached + DMARC reports + + Args: + input_: A path to a mbox file + nameservers (list): A list of one or more nameservers to use + (Cloudflare's public DNS resolvers by default) + dns_timeout (float): Sets the DNS timeout in seconds + strip_attachment_payloads (bool): Remove attachment payloads from + forensic report results + offline (bool): Do not make online queries for geolocation or DNS + parallel (bool): Parallel processing + + Returns: + OrderedDict: Lists of ``aggregate_reports`` and ``forensic_reports`` + + """ + import mailbox + aggregate_reports = [] + forensic_reports = [] + try: + mbox = mailbox.mbox(input_) + message_keys = mbox.keys() + total_messages = len(message_keys) + logger.debug("Found {0} messages in {1}".format(total_messages, + input_)) + for i in range(len(message_keys)): + message_key = message_keys[i] + logger.debug("Processing message {0} of {1}".format( + i+1, total_messages + )) + msg_content = mbox.get_string(message_key) + try: + sa = strip_attachment_payloads + parsed_email = parse_report_email(msg_content, + offline=offline, + nameservers=nameservers, + dns_timeout=dns_timeout, + strip_attachment_payloads=sa, + parallel=parallel) + if parsed_email["report_type"] == "aggregate": + aggregate_reports.append(parsed_email["report"]) + elif parsed_email["report_type"] == "forensic": + forensic_reports.append(parsed_email["report"]) + except InvalidDMARCReport as error: + logger.warning(error.__str__()) + except mailbox.NoSuchMailboxError: + raise InvalidDMARCReport("Mailbox {0} does not exist".format(input_)) + return OrderedDict([("aggregate_reports", aggregate_reports), + ("forensic_reports", forensic_reports)]) + + def get_imap_capabilities(server): """ Returns a list of an IMAP server's capabilities diff --git a/parsedmarc/cli.py b/parsedmarc/cli.py index e3ae77c..523766e 100644 --- a/parsedmarc/cli.py +++ b/parsedmarc/cli.py @@ -17,8 +17,8 @@ import time from tqdm import tqdm from parsedmarc import get_dmarc_reports_from_inbox, watch_inbox, \ - parse_report_file, elastic, kafkaclient, splunk, save_output, \ - email_results, ParserError, __version__, \ + parse_report_file, get_dmarc_reports_from_mbox, elastic, kafkaclient, \ + splunk, save_output, email_results, ParserError, __version__, \ InvalidDMARCReport logger = logging.getLogger("parsedmarc") @@ -152,7 +152,8 @@ def _main(): "(--silent implied)") arg_parser.add_argument("file_path", nargs="*", help="one or more paths to aggregate or forensic " - "report files or emails") + "report files or emails; prepend " + "mailboxes with 'mbox:' ") strip_attachment_help = "remove attachment payloads from forensic " \ "report output" arg_parser.add_argument("--strip-attachment-payloads", @@ -501,9 +502,15 @@ def _main(): kafka_forensic_topic = opts.kafka_forensic_topic file_paths = [] + mbox_paths = [] for file_path in args.file_path: - file_paths += glob(file_path) + if not file_path.startswith("mbox:"): + file_paths += glob(file_path) + else: + mbox_paths += glob(file_path[5:]) + file_paths = list(set(file_paths)) + mbox_paths = list(set(mbox_paths)) counter = Value('i', 0) pool = Pool(opts.n_procs, initializer=init, initargs=(counter,)) @@ -534,6 +541,14 @@ def _main(): elif result[0]["report_type"] == "forensic": forensic_reports.append(result[0]["report"]) + for mbox_path in mbox_paths: + reports = get_dmarc_reports_from_mbox(mbox_path, opts.nameservers, + opts.dns_timeout, + opts.strip_attachment_payloads, + opts.offline, False) + aggregate_reports += reports["aggregate_reports"] + forensic_reports += reports["forensic_reports"] + if opts.imap_host: try: if opts.imap_user is None or opts.imap_password is None: