Auto detect mbox files and add IMAP timeouts

This commit is contained in:
Sean Whalen
2019-09-23 00:12:51 -04:00
parent 3fef3b58a8
commit c991feb860
5 changed files with 60 additions and 27 deletions

View File

@@ -976,6 +976,8 @@ def get_dmarc_reports_from_inbox(connection=None,
port=None,
ssl=True,
verify=True,
timeout=30,
max_retries=4,
reports_folder="INBOX",
archive_folder="Archive",
delete=False,
@@ -996,6 +998,8 @@ def get_dmarc_reports_from_inbox(connection=None,
port: The mail server port
ssl (bool): Use SSL/TLS
verify (bool): Verify SSL/TLS certificate
timeout (float): IMAP timeout in seconds
max_retries (int): The maximum number of retries after a timeout
reports_folder: The IMAP folder where reports can be found
archive_folder: The folder to move processed mail to
delete (bool): Delete messages after processing them
@@ -1034,6 +1038,8 @@ def get_dmarc_reports_from_inbox(connection=None,
else:
server = IMAPClient(host, user, password, port=port,
ssl=ssl, verify=verify,
timeout=timeout,
max_retries=max_retries,
initial_folder=reports_folder)
server.create_folder(archive_folder)

View File

@@ -20,6 +20,7 @@ from parsedmarc import get_dmarc_reports_from_inbox, watch_inbox, \
parse_report_file, get_dmarc_reports_from_mbox, elastic, kafkaclient, \
splunk, save_output, email_results, ParserError, __version__, \
InvalidDMARCReport
from parsedmarc.utils import is_mbox
logger = logging.getLogger("parsedmarc")
@@ -152,8 +153,7 @@ def _main():
"(--silent implied)")
arg_parser.add_argument("file_path", nargs="*",
help="one or more paths to aggregate or forensic "
"report files or emails; prepend "
"mailboxes with 'mbox:' ")
"report files, emails, or mbox files'")
strip_attachment_help = "remove attachment payloads from forensic " \
"report output"
arg_parser.add_argument("--strip-attachment-payloads",
@@ -199,6 +199,8 @@ def _main():
imap_skip_certificate_verification=False,
imap_ssl=True,
imap_port=993,
imap_timeout=30,
imap_max_retries=4,
imap_user=None,
imap_password=None,
imap_reports_folder="INBOX",
@@ -211,6 +213,7 @@ def _main():
hec_index=None,
hec_skip_certificate_verification=False,
elasticsearch_hosts=None,
elasticsearch_timeout=60,
elasticsearch_number_of_shards=1,
elasticsearch_number_of_replicas=1,
elasticsearch_index_suffix=None,
@@ -284,7 +287,11 @@ def _main():
"imap config section")
exit(-1)
if "port" in imap_config:
opts.imap_port = imap_config["port"]
opts.imap_port = imap_config.getint("port")
if "timeout" in imap_config:
opts.imap_timeout = imap_config.getfloat("timeout")
if "max_retries" in imap_config:
opts.imap_port = imap_config.getint("max_retries")
if "ssl" in imap_config:
opts.imap_ssl = imap_config.getboolean("ssl")
if "skip_certificate_verification" in imap_config:
@@ -323,6 +330,9 @@ def _main():
logger.critical("hosts setting missing from the "
"elasticsearch config section")
exit(-1)
if "timeout" in elasticsearch_config:
timeout = elasticsearch_config.getfloat("timeout")
opts.elasticsearch_timeout = timeout
if "number_of_shards" in elasticsearch_config:
number_of_shards = elasticsearch_config.getint(
"number_of_shards")
@@ -478,7 +488,8 @@ def _main():
es_forensic_index, suffix)
elastic.set_hosts(opts.elasticsearch_hosts,
opts.elasticsearch_ssl,
opts.elasticsearch_ssl_cert_path)
opts.elasticsearch_ssl_cert_path,
timeout=opts.elasticsearch_timeout)
elastic.migrate_indexes(aggregate_indexes=[es_aggregate_index],
forensic_indexes=[es_forensic_index])
except elastic.ElasticsearchError as error:
@@ -503,15 +514,19 @@ def _main():
file_paths = []
mbox_paths = []
for file_path in args.file_path:
if not file_path.startswith("mbox:"):
file_paths += glob(file_path)
else:
mbox_paths += glob(file_path[5:])
file_paths += glob(file_path)
for file_path in file_paths:
if is_mbox(file_path):
mbox_paths.append(file_path)
file_paths = list(set(file_paths))
mbox_paths = list(set(mbox_paths))
for mbox_path in mbox_paths:
file_paths.remove(mbox_path)
counter = Value('i', 0)
pool = Pool(opts.n_procs, initializer=init, initargs=(counter,))
results = pool.starmap_async(cli_parse,
@@ -566,19 +581,22 @@ def _main():
verify = False
if opts.imap_ssl is False:
ssl = False
reports = get_dmarc_reports_from_inbox(host=opts.imap_host,
port=opts.imap_port,
ssl=ssl,
verify=verify,
user=opts.imap_user,
password=opts.imap_password,
reports_folder=rf,
archive_folder=af,
delete=opts.imap_delete,
offline=opts.offline,
nameservers=ns,
test=opts.imap_test,
strip_attachment_payloads=sa
reports = get_dmarc_reports_from_inbox(
host=opts.imap_host,
port=opts.imap_port,
ssl=ssl,
verify=verify,
timeout=opts.imap_timeout,
max_retries=opts.imap_max_retries,
user=opts.imap_user,
password=opts.imap_password,
reports_folder=rf,
archive_folder=af,
delete=opts.imap_delete,
offline=opts.offline,
nameservers=ns,
test=opts.imap_test,
strip_attachment_payloads=sa
)
aggregate_reports += reports["aggregate_reports"]

View File

@@ -16,6 +16,8 @@ import hashlib
import base64
import platform
import atexit
import io
import mailbox
import dateparser
import dns.reversename
@@ -422,18 +424,25 @@ def get_filename_safe_string(string):
return string
def is_mbox(content):
def is_mbox(path):
"""
Checks if the given content is a MBOX mailbox file
Args:
content: Content to check
path: Content to check
Returns:
bool: A flag the indicates if a file is a MBOX mailbox file
"""
return type(content) == bytes and content.startswith(
b"\xD0\x0D\xBB\xAD")
_is_mbox = False
try:
mbox = mailbox.mbox(path)
if len(mbox.keys()) > 0:
_is_mbox = True
except Exception as e:
logger.debug("Error checking for MBOX file: {0}".format(e.__str__()))
return _is_mbox
def is_outlook_msg(content):

View File

@@ -14,7 +14,7 @@ dateparser>=0.7.1
elasticsearch>=6.3.1,<7.0.0
elasticsearch-dsl>=6.3.1,<7.0.0
kafka-python>=1.4.4
mailsuite>=1.1.0
mailsuite>=1.3.0
nose>=1.3.7
flake8>=3.7.8
doc8>=0.8.0

View File

@@ -98,7 +98,7 @@ setup(
'requests>=2.2.16.0', 'imapclient>=2.1.0',
'mail-parser>=3.9.2',
'dateparser>=0.7.1',
'mailsuite>=1.2.1',
'mailsuite>=1.3.1',
'elasticsearch>=6.3.1,<7.0.0',
'elasticsearch-dsl>=6.3.1,<7.0.0',
'kafka-python>=1.4.4',