From 83e229aeb1e563088de76c5d573bfab8a0a0ae37 Mon Sep 17 00:00:00 2001
From: atanas argirov <atanas@argirov.org>
Date: Mon, 28 Dec 2020 15:57:32 +0000
Subject: [PATCH 01/31] * added output_{json,csv}_{aggregate,forensic}_file
 command line args * refactored save_output() to support output_*

---
 parsedmarc/__init__.py | 14 +++++++++-----
 parsedmarc/cli.py      | 14 +++++++++++++-
 2 files changed, 22 insertions(+), 6 deletions(-)

diff --git a/parsedmarc/__init__.py b/parsedmarc/__init__.py
index 4924d07..49de20b 100644
--- a/parsedmarc/__init__.py
+++ b/parsedmarc/__init__.py
@@ -1235,7 +1235,11 @@ def watch_inbox(host, username, password, callback, port=None, ssl=True,
             logger.warning("IMAP connection timeout. Reconnecting...")
 
 
-def save_output(results, output_directory="output"):
+def save_output(results, output_directory="output", \
+                output_json_aggregate_file="aggregate.json", \
+                output_json_forensic_file="forensic.json", \
+                output_csv_aggregate_file="aggregate.csv", \
+                output_csv_forensic_file="forensic.csv"):
     """
     Save report data in the given directory
 
@@ -1253,22 +1257,22 @@ def save_output(results, output_directory="output"):
     else:
         os.makedirs(output_directory)
 
-    with open("{0}".format(os.path.join(output_directory, "aggregate.json")),
+    with open("{0}".format(os.path.join(output_directory, output_json_aggregate_file)),
               "w", newline="\n", encoding="utf-8") as agg_json:
         agg_json.write(json.dumps(aggregate_reports, ensure_ascii=False,
                                   indent=2))
 
-    with open("{0}".format(os.path.join(output_directory, "aggregate.csv")),
+    with open("{0}".format(os.path.join(output_directory, output_csv_aggregate_file)),
               "w", newline="\n", encoding="utf-8") as agg_csv:
         csv = parsed_aggregate_reports_to_csv(aggregate_reports)
         agg_csv.write(csv)
 
-    with open("{0}".format(os.path.join(output_directory, "forensic.json")),
+    with open("{0}".format(os.path.join(output_directory, output_json_forensic_file)),
               "w", newline="\n", encoding="utf-8") as for_json:
         for_json.write(json.dumps(forensic_reports, ensure_ascii=False,
                                   indent=2))
 
-    with open("{0}".format(os.path.join(output_directory, "forensic.csv")),
+    with open("{0}".format(os.path.join(output_directory, output_csv_forensic_file)),
               "w", newline="\n", encoding="utf-8") as for_csv:
         csv = parsed_forensic_reports_to_csv(forensic_reports)
         for_csv.write(csv)
diff --git a/parsedmarc/cli.py b/parsedmarc/cli.py
index 9dad3fb..262a82b 100644
--- a/parsedmarc/cli.py
+++ b/parsedmarc/cli.py
@@ -82,7 +82,7 @@ def _main():
         if opts.save_aggregate:
             for report in reports_["aggregate_reports"]:
                 try:
-                    if opts.elasticsearch_hosts:
+                   opts.elasticsearch_hosts:
                         shards = opts.elasticsearch_number_of_shards
                         replicas = opts.elasticsearch_number_of_replicas
                         elastic.save_aggregate_report_to_elasticsearch(
@@ -160,6 +160,14 @@ def _main():
                             help=strip_attachment_help, action="store_true")
     arg_parser.add_argument("-o", "--output",
                             help="write output files to the given directory")
+    arg_parser.add_argument("--output-json-aggregate-file",
+                            help="output aggregate JSON file")
+    arg_parser.add_argument("--output-json-forensic-file",
+                            help="output forensic JSON file")
+    arg_parser.add_argument("--output-csv-aggregate-file",
+                            help="output aggregate CSV file")
+    arg_parser.add_argument("--output-csv-forensic-file",
+                            help="output forensic CSV file")
     arg_parser.add_argument("-n", "--nameservers", nargs="+",
                             help="nameservers to query")
     arg_parser.add_argument("-t", "--dns_timeout",
@@ -188,6 +196,10 @@ def _main():
                      offline=args.offline,
                      strip_attachment_payloads=args.strip_attachment_payloads,
                      output=args.output,
+                     output_json_aggregate_file=args.output_json_aggregate_file,
+                     output_json_forensic_file=args.output_json_forensic_file,
+                     output_csv_aggregate_file=args.output_csv_aggregate_file,
+                     output_csv_forensic_file=args.output_csv_forensic_file,
                      nameservers=args.nameservers,
                      silent=args.silent,
                      dns_timeout=args.dns_timeout,

From 478452de203fbdcb4644b989c066124f6c98eb6a Mon Sep 17 00:00:00 2001
From: Mauro Faccenda <faccenda@gmail.com>
Date: Wed, 20 Jan 2021 15:53:19 +0100
Subject: [PATCH 02/31] pass offline parameter to wait_inbox()

---
 parsedmarc/cli.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/parsedmarc/cli.py b/parsedmarc/cli.py
index e9119e0..68d9e89 100644
--- a/parsedmarc/cli.py
+++ b/parsedmarc/cli.py
@@ -670,6 +670,7 @@ def _main():
                 test=opts.imap_test,
                 nameservers=opts.nameservers,
                 dns_timeout=opts.dns_timeout,
+                offline=opts.offline,
                 strip_attachment_payloads=sa)
         except FileExistsError as error:
             logger.error("{0}".format(error.__str__()))

From be8395dbe341e5cca09fd7ff3630b6240f846ac3 Mon Sep 17 00:00:00 2001
From: Ola Thoresen <ola@nytt.no>
Date: Wed, 20 Jan 2021 19:56:15 +0100
Subject: [PATCH 03/31] Detecting other IMAP-errors. Adding short sleep to
 avoid hammering the IMAP-server on error

---
 parsedmarc/__init__.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/parsedmarc/__init__.py b/parsedmarc/__init__.py
index ca4f1e7..5fe1f29 100644
--- a/parsedmarc/__init__.py
+++ b/parsedmarc/__init__.py
@@ -8,6 +8,7 @@ import shutil
 import xml.parsers.expat as expat
 import json
 from datetime import datetime
+from time import sleep
 from collections import OrderedDict
 from io import BytesIO, StringIO
 from gzip import GzipFile
@@ -1234,6 +1235,10 @@ def watch_inbox(host, username, password, callback, port=None, ssl=True,
                        idle_timeout=idle_timeout)
         except (timeout, IMAPClientError):
             logger.warning("IMAP connection timeout. Reconnecting...")
+            sleep(5)
+        except Exception as e:
+            logger.warning("IMAP connection error. {0}. Reconnecting...".format(e))
+            sleep(5)
 
 
 def save_output(results, output_directory="output"):

From 0e2636225e7ffdb2ca7705075da665cab047a0c3 Mon Sep 17 00:00:00 2001
From: Ola Thoresen <ola@nytt.no>
Date: Thu, 21 Jan 2021 08:24:44 +0100
Subject: [PATCH 04/31] Modifying some log-levels to INFO

---
 parsedmarc/__init__.py | 4 ++--
 parsedmarc/cli.py      | 2 ++
 parsedmarc/elastic.py  | 4 ++--
 3 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/parsedmarc/__init__.py b/parsedmarc/__init__.py
index ca4f1e7..cde57c9 100644
--- a/parsedmarc/__init__.py
+++ b/parsedmarc/__init__.py
@@ -943,7 +943,7 @@ def get_dmarc_reports_from_mbox(input_, nameservers=None, dns_timeout=2.0,
                                                         input_))
         for i in range(len(message_keys)):
             message_key = message_keys[i]
-            logger.debug("Processing message {0} of {1}".format(
+            logger.info("Processing message {0} of {1}".format(
                 i+1, total_messages
             ))
             msg_content = mbox.get_string(message_key)
@@ -1071,7 +1071,7 @@ def get_dmarc_reports_from_inbox(connection=None,
                                                     reports_folder))
     for i in range(len(messages)):
         msg_uid = messages[i]
-        logger.debug("Processing message {0} of {1}: UID {2}".format(
+        logger.info("Processing message {0} of {1}: UID {2}".format(
             i+1, total_messages, msg_uid
 
         ))
diff --git a/parsedmarc/cli.py b/parsedmarc/cli.py
index e9119e0..752eacd 100644
--- a/parsedmarc/cli.py
+++ b/parsedmarc/cli.py
@@ -490,6 +490,8 @@ def _main():
         logger.error("You must supply input files, or an IMAP configuration")
         exit(1)
 
+    logger.info("Starting dmarcparse")
+
     if opts.save_aggregate or opts.save_forensic:
         try:
             if opts.elasticsearch_hosts:
diff --git a/parsedmarc/elastic.py b/parsedmarc/elastic.py
index b448f28..3f2d8a2 100644
--- a/parsedmarc/elastic.py
+++ b/parsedmarc/elastic.py
@@ -295,7 +295,7 @@ def save_aggregate_report_to_elasticsearch(aggregate_report,
     Raises:
             AlreadySaved
     """
-    logger.debug("Saving aggregate report to Elasticsearch")
+    logger.info("Saving aggregate report to Elasticsearch")
     aggregate_report = aggregate_report.copy()
     metadata = aggregate_report["report_metadata"]
     org_name = metadata["org_name"]
@@ -423,7 +423,7 @@ def save_forensic_report_to_elasticsearch(forensic_report,
             AlreadySaved
 
         """
-    logger.debug("Saving forensic report to Elasticsearch")
+    logger.info("Saving forensic report to Elasticsearch")
     forensic_report = forensic_report.copy()
     sample_date = None
     if forensic_report["parsed_sample"]["date"] is not None:

From 76614bdc94acc796b0307372e640d3836f0a77c2 Mon Sep 17 00:00:00 2001
From: Ola Thoresen <ola@nytt.no>
Date: Thu, 21 Jan 2021 08:34:56 +0100
Subject: [PATCH 05/31] Fixing flake-error

---
 parsedmarc/__init__.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/parsedmarc/__init__.py b/parsedmarc/__init__.py
index 5fe1f29..809dd63 100644
--- a/parsedmarc/__init__.py
+++ b/parsedmarc/__init__.py
@@ -1237,7 +1237,8 @@ def watch_inbox(host, username, password, callback, port=None, ssl=True,
             logger.warning("IMAP connection timeout. Reconnecting...")
             sleep(5)
         except Exception as e:
-            logger.warning("IMAP connection error. {0}. Reconnecting...".format(e))
+            logger.warning("IMAP connection error. {0}. "
+                           "Reconnecting...".format(e))
             sleep(5)
 
 

From a00cee8ba4c493f37194e948a5de55655307fb86 Mon Sep 17 00:00:00 2001
From: Ola Thoresen <ola@nytt.no>
Date: Fri, 22 Jan 2021 10:38:04 +0100
Subject: [PATCH 06/31] Adding a log line to see the sender of a report when it
 is parsed

---
 parsedmarc/__init__.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/parsedmarc/__init__.py b/parsedmarc/__init__.py
index cde57c9..5ac6965 100644
--- a/parsedmarc/__init__.py
+++ b/parsedmarc/__init__.py
@@ -776,6 +776,7 @@ def parse_report_email(input_, offline=False, nameservers=None,
     subject = None
     feedback_report = None
     sample = None
+    logger.info("Parsing mail from {0}".format(msg_headers["From"]))
     if "Subject" in msg_headers:
         subject = msg_headers["Subject"]
     for part in msg.walk():

From c853c470879ad2ba2cc56595197cb9fe9f2927d9 Mon Sep 17 00:00:00 2001
From: Ola Thoresen <ola@nytt.no>
Date: Fri, 22 Jan 2021 15:06:35 +0100
Subject: [PATCH 07/31] Ensuring mail from is set

---
 parsedmarc/__init__.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/parsedmarc/__init__.py b/parsedmarc/__init__.py
index 5ac6965..73c0130 100644
--- a/parsedmarc/__init__.py
+++ b/parsedmarc/__init__.py
@@ -776,7 +776,8 @@ def parse_report_email(input_, offline=False, nameservers=None,
     subject = None
     feedback_report = None
     sample = None
-    logger.info("Parsing mail from {0}".format(msg_headers["From"]))
+    if "From" in msg_headers:
+        logger.info("Parsing mail from {0}".format(msg_headers["From"]))
     if "Subject" in msg_headers:
         subject = msg_headers["Subject"]
     for part in msg.walk():

From bc684c891340abf39b91b5a2a758f6ace623d2f9 Mon Sep 17 00:00:00 2001
From: Tom Henderson <tomhenderson@mac.com>
Date: Fri, 5 Feb 2021 13:37:09 +1300
Subject: [PATCH 08/31] Add option to process messages in batches

---
 README.rst             |  1 +
 docs/index.rst         |  1 +
 parsedmarc/__init__.py | 25 ++++++++++++++++++-------
 parsedmarc/cli.py      | 14 +++++++++++---
 4 files changed, 31 insertions(+), 10 deletions(-)

diff --git a/README.rst b/README.rst
index b028445..ce3f2ea 100644
--- a/README.rst
+++ b/README.rst
@@ -155,6 +155,7 @@ The full set of configuration options are:
     - ``watch`` - bool: Use the IMAP ``IDLE`` command to process messages as they arrive
     - ``delete`` - bool: Delete messages after processing them, instead of archiving them
     - ``test`` - bool: Do not move or delete messages
+    - ``batch_size`` - int: Number of messages to read and process before saving. Defaults to all messages if not set.
 - ``elasticsearch``
     - ``hosts`` - str: A comma separated list of hostnames and ports or URLs (e.g. ``127.0.0.1:9200`` or ``https://user:secret@localhost``)
 
diff --git a/docs/index.rst b/docs/index.rst
index 449f048..7897a30 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -161,6 +161,7 @@ The full set of configuration options are:
     - ``watch`` - bool: Use the IMAP ``IDLE`` command to process messages as they arrive
     - ``delete`` - bool: Delete messages after processing them, instead of archiving them
     - ``test`` - bool: Do not move or delete messages
+    - ``batch_size`` - int: Number of messages to read and process before saving. Defaults to all messages if not set.
 - ``elasticsearch``
     - ``hosts`` - str: A comma separated list of hostnames and ports or URLs (e.g. ``127.0.0.1:9200`` or ``https://user:secret@localhost``)
 
diff --git a/parsedmarc/__init__.py b/parsedmarc/__init__.py
index ca4f1e7..8380df7 100644
--- a/parsedmarc/__init__.py
+++ b/parsedmarc/__init__.py
@@ -1004,7 +1004,8 @@ def get_dmarc_reports_from_inbox(connection=None,
                                  nameservers=None,
                                  dns_timeout=6.0,
                                  strip_attachment_payloads=False,
-                                 results=None):
+                                 results=None,
+                                 batch_size=None):
     """
     Fetches and parses DMARC reports from an inbox
 
@@ -1028,6 +1029,7 @@ def get_dmarc_reports_from_inbox(connection=None,
         strip_attachment_payloads (bool): Remove attachment payloads from
         forensic report results
         results (dict): Results from the previous run
+        batch_size (int): Number of messages to read and process before saving
 
     Returns:
         OrderedDict: Lists of ``aggregate_reports`` and ``forensic_reports``
@@ -1069,11 +1071,18 @@ def get_dmarc_reports_from_inbox(connection=None,
     total_messages = len(messages)
     logger.debug("Found {0} messages in {1}".format(len(messages),
                                                     reports_folder))
-    for i in range(len(messages)):
+
+    if batch_size:
+        message_limit = batch_size
+    else:
+        message_limit = total_messages
+
+    logger.debug("Processing {0} messages".format(message_limit))
+
+    for i in range(message_limit):
         msg_uid = messages[i]
         logger.debug("Processing message {0} of {1}: UID {2}".format(
-            i+1, total_messages, msg_uid
-
+            i+1, message_limit, msg_uid
         ))
         msg_content = server.fetch_message(msg_uid, parse=False)
         sa = strip_attachment_payloads
@@ -1165,7 +1174,7 @@ def get_dmarc_reports_from_inbox(connection=None,
 
     total_messages = len(server.search())
 
-    if not test and total_messages > 0:
+    if not test and not batch_size and total_messages > 0:
         # Process emails that came in during the last run
         results = get_dmarc_reports_from_inbox(
             connection=server,
@@ -1187,7 +1196,7 @@ def watch_inbox(host, username, password, callback, port=None, ssl=True,
                 verify=True, reports_folder="INBOX",
                 archive_folder="Archive", delete=False, test=False,
                 idle_timeout=30, offline=False, nameservers=None,
-                dns_timeout=6.0, strip_attachment_payloads=False):
+                dns_timeout=6.0, strip_attachment_payloads=False, batch_size=None):
     """
     Use an IDLE IMAP connection to parse incoming emails, and pass the results
     to a callback function
@@ -1210,6 +1219,7 @@ def watch_inbox(host, username, password, callback, port=None, ssl=True,
         dns_timeout (float): Set the DNS query timeout
         strip_attachment_payloads (bool): Replace attachment payloads in
         forensic report samples with None
+        batch_size (int): Number of messages to read and process before saving
     """
     sa = strip_attachment_payloads
 
@@ -1222,7 +1232,8 @@ def watch_inbox(host, username, password, callback, port=None, ssl=True,
                                            offline=offline,
                                            nameservers=nameservers,
                                            dns_timeout=dns_timeout,
-                                           strip_attachment_payloads=sa)
+                                           strip_attachment_payloads=sa,
+                                           batch_size=batch_size)
         callback(res)
 
     while True:
diff --git a/parsedmarc/cli.py b/parsedmarc/cli.py
index e9119e0..df1a870 100644
--- a/parsedmarc/cli.py
+++ b/parsedmarc/cli.py
@@ -210,6 +210,7 @@ def _main():
                      imap_watch=False,
                      imap_delete=False,
                      imap_test=False,
+                     imap_batch_size=None,
                      hec=None,
                      hec_token=None,
                      hec_index=None,
@@ -327,6 +328,10 @@ def _main():
                 opts.imap_delete = imap_config.getboolean("delete")
             if "test" in imap_config:
                 opts.imap_test = imap_config.getboolean("test")
+            if "batch_size" in imap_config:
+                opts.imap_batch_size = imap_config.getint("batch_size")
+            else:
+                opts.imap_batch_size = None
         if "elasticsearch" in config:
             elasticsearch_config = config["elasticsearch"]
             if "hosts" in elasticsearch_config:
@@ -613,8 +618,9 @@ def _main():
                 offline=opts.offline,
                 nameservers=ns,
                 test=opts.imap_test,
-                strip_attachment_payloads=sa
-                                                   )
+                strip_attachment_payloads=sa,
+                batch_size=opts.imap_batch_size
+            )
 
             aggregate_reports += reports["aggregate_reports"]
             forensic_reports += reports["forensic_reports"]
@@ -670,7 +676,9 @@ def _main():
                 test=opts.imap_test,
                 nameservers=opts.nameservers,
                 dns_timeout=opts.dns_timeout,
-                strip_attachment_payloads=sa)
+                strip_attachment_payloads=sa,
+                batch_size=opts.imap_batch_size
+            )
         except FileExistsError as error:
             logger.error("{0}".format(error.__str__()))
             exit(1)

From bafa4861b153199db60145ae7020d18ddaa7a4bc Mon Sep 17 00:00:00 2001
From: Tom Henderson <tomhenderson@mac.com>
Date: Fri, 5 Feb 2021 14:27:22 +1300
Subject: [PATCH 09/31] Update docs

---
 README.rst       | 11 +++++++++--
 docs/example.ini |  4 ++++
 docs/index.rst   | 12 +++++++++---
 3 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/README.rst b/README.rst
index b028445..2d297f9 100644
--- a/README.rst
+++ b/README.rst
@@ -128,11 +128,15 @@ For example
    token = HECTokenGoesHere
    index = email
 
+   [s3]
+   bucket = my-bucket
+   path = /parsedmarc
+
 The full set of configuration options are:
 
 - ``general``
-    - ``save_aggregate`` - bool: Save aggregate report data to the Elasticsearch and/or Splunk
-    - ``save_forensic`` - bool: Save forensic report data to the Elasticsearch and/or Splunk
+    - ``save_aggregate`` - bool: Save aggregate report data to Elasticsearch, Splunk and/or S3
+    - ``save_forensic`` - bool: Save forensic report data to Elasticsearch, Splunk and/or S3
     - ``strip_attachment_payloads`` - bool: Remove attachment payloads from results
     - ``output`` - str: Directory to place JSON and CSV files in
     - ``offline`` - bool: Do not use online queries for geolocation or DNS
@@ -191,6 +195,9 @@ The full set of configuration options are:
     - ``subject`` - str: The Subject header to use in the email (Default: parsedmarc report)
     - ``attachment`` - str: The ZIP attachment filenames
     - ``message`` - str: The email message (Default: Please see the attached parsedmarc report.)
+- ``s3``
+    - ``bucket`` - str: The S3 bucket name
+    - ``path`` - int: The path to upload reports to (Default: /)
 
 
 .. warning::
diff --git a/docs/example.ini b/docs/example.ini
index a27a670..a9a2985 100644
--- a/docs/example.ini
+++ b/docs/example.ini
@@ -18,3 +18,7 @@ ssl = False
 url = https://splunkhec.example.com
 token = HECTokenGoesHere
 index = email
+
+[s3]
+bucket = my-bucket
+path = /parsedmarc
\ No newline at end of file
diff --git a/docs/index.rst b/docs/index.rst
index 449f048..7004cc4 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -132,11 +132,15 @@ For example
    token = HECTokenGoesHere
    index = email
 
+   [s3]
+   bucket = my-bucket
+   path = /parsedmarc
+
 The full set of configuration options are:
 
 - ``general``
-    - ``save_aggregate`` - bool: Save aggregate report data to the Elasticsearch and/or Splunk
-    - ``save_forensic`` - bool: Save forensic report data to the Elasticsearch and/or Splunk
+    - ``save_aggregate`` - bool: Save aggregate report data to the Elasticsearch, Splunk and/or S3
+    - ``save_forensic`` - bool: Save forensic report data to the Elasticsearch, Splunk and/or S3
     - ``strip_attachment_payloads`` - bool: Remove attachment payloads from results
     - ``output`` - str: Directory to place JSON and CSV files in
     - ``offline`` - bool: Do not use online queries for geolocation or DNS
@@ -200,7 +204,9 @@ The full set of configuration options are:
     - ``subject`` - str: The Subject header to use in the email (Default: parsedmarc report)
     - ``attachment`` - str: The ZIP attachment filenames
     - ``message`` - str: The email message (Default: Please see the attached parsedmarc report.)
-
+- ``s3``
+    - ``bucket`` - str: The S3 bucket name
+    - ``path`` - int: The path to upload reports to (Default: /)
 
 .. warning::
 

From 755ee3ded70adf4c61debf09aaddc150ab757bee Mon Sep 17 00:00:00 2001
From: Tom Henderson <tomhenderson@mac.com>
Date: Fri, 5 Feb 2021 14:28:46 +1300
Subject: [PATCH 10/31] Add new settings for s3

---
 parsedmarc/cli.py | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/parsedmarc/cli.py b/parsedmarc/cli.py
index e9119e0..a403391 100644
--- a/parsedmarc/cli.py
+++ b/parsedmarc/cli.py
@@ -241,6 +241,8 @@ def _main():
                      smtp_to=[],
                      smtp_subject="parsedmarc report",
                      smtp_message="Please see the attached DMARC results.",
+                     s3_bucket=None,
+                     s3_path=None,
                      log_file=args.log_file,
                      n_procs=1,
                      chunk_size=1
@@ -469,6 +471,22 @@ def _main():
                 opts.smtp_attachment = smtp_config["attachment"]
             if "message" in smtp_config:
                 opts.smtp_message = smtp_config["message"]
+        if "s3" in config.sections():
+            s3_config = config["s3"]
+            if "bucket" in s3_config:
+                opts.s3_bucket = s3_config["bucket"]
+            else:
+                logger.critical("bucket setting missing from the "
+                                "s3 config section")
+                exit(-1)
+            if "path" in s3_config:
+                opts.s3_path = s3_config["path"]
+                if opts.s3_path.startswith("/"):
+                    opts.s3_path = opts.s3_path[1:]
+                if opts.s3_path.endswith("/"):
+                    opts.s3_path = opts.s3_path[:-1]
+            else:
+                opts.s3_path = ""
 
     logging.basicConfig(level=logging.WARNING)
     logger.setLevel(logging.WARNING)

From 291d389f69d279b249cab2b01c6bd7e7227b79e9 Mon Sep 17 00:00:00 2001
From: Tom Henderson <tomhenderson@mac.com>
Date: Fri, 5 Feb 2021 14:29:27 +1300
Subject: [PATCH 11/31] Add boto3

---
 requirements.txt | 1 +
 setup.py         | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 4200a90..9ef3fab 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -27,3 +27,4 @@ sphinx_rtd_theme>=0.4.3
 wheel>=0.33.6
 codecov>=2.0.15
 lxml>=4.4.0
+boto3>=1.16.63
\ No newline at end of file
diff --git a/setup.py b/setup.py
index 6faaf91..6929893 100644
--- a/setup.py
+++ b/setup.py
@@ -98,7 +98,8 @@ setup(
                       'elasticsearch-dsl>=7.2.0,<8.0.0',
                       'kafka-python>=1.4.4',
                       'tqdm>=4.31.1',
-                      'lxml>=4.4.0'
+                      'lxml>=4.4.0',
+                      'boto3>=1.16.63'
                       ],
 
     entry_points={

From a4acd5f2320ae6f518446c2a6223e7e664092301 Mon Sep 17 00:00:00 2001
From: Tom Henderson <tomhenderson@mac.com>
Date: Fri, 5 Feb 2021 14:30:02 +1300
Subject: [PATCH 12/31] Add S3Client

---
 parsedmarc/s3.py | 65 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 65 insertions(+)
 create mode 100644 parsedmarc/s3.py

diff --git a/parsedmarc/s3.py b/parsedmarc/s3.py
new file mode 100644
index 0000000..8f64aa0
--- /dev/null
+++ b/parsedmarc/s3.py
@@ -0,0 +1,65 @@
+# -*- coding: utf-8 -*-
+
+import logging
+import json
+import boto3
+
+from parsedmarc.utils import human_timestamp_to_datetime
+
+logger = logging.getLogger("parsedmarc")
+
+
+class S3Client(object):
+    """A client for a Amazon S3"""
+
+    def __init__(self, bucket_name, bucket_path):
+        """
+        Initializes the S3Client
+        Args:
+            bucket_name (str): The S3 Bucket
+            bucket_path (str): The path to save reports
+        """
+        self.bucket_name = bucket_name
+        self.bucket_path = bucket_path
+        self.metadata_keys = [
+            "org_name",
+            "org_email",
+            "report_id",
+            "begin_date",
+            "end_date",
+        ]
+        
+        self.s3 = boto3.resource('s3')
+        self.bucket = self.s3.Bucket(self.bucket_name)
+
+
+    def save_aggregate_report_to_s3(self, report):
+        self.save_report_to_s3(report, 'aggregate')
+
+
+    def save_forensic_report_to_s3(self, report):
+        self.save_report_to_s3(report, 'forensic')
+
+
+    def save_report_to_s3(self, report, report_type):
+        report_date = human_timestamp_to_datetime(report["report_metadata"]["begin_date"])
+        report_id = report["report_metadata"]["report_id"]
+        object_path = "{0}/{1}/year={2}/month={3:02d}/day={4:02d}/{5}.json".format(
+            self.bucket_path,
+            report_type,
+            report_date.year,
+            report_date.month,
+            report_date.day,
+            report_id
+        )
+        logger.debug("Saving {0} report to s3://{1}/{2}".format(report_type, self.bucket_name, object_path))
+        object_metadata = {
+            k: v
+            for k, v in report["report_metadata"].items()
+            if k in self.metadata_keys
+        }
+        self.bucket.put_object(
+            Body=json.dumps(report),
+            Key=object_path,
+            Metadata=object_metadata
+        )

From 5f6b94583938ff3aedbf1d9e7c526e838e8c7c9c Mon Sep 17 00:00:00 2001
From: Tom Henderson <tomhenderson@mac.com>
Date: Fri, 5 Feb 2021 14:30:54 +1300
Subject: [PATCH 13/31] Save reports to s3

---
 parsedmarc/cli.py | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/parsedmarc/cli.py b/parsedmarc/cli.py
index a403391..1087745 100644
--- a/parsedmarc/cli.py
+++ b/parsedmarc/cli.py
@@ -19,7 +19,7 @@ from tqdm import tqdm
 from parsedmarc import get_dmarc_reports_from_inbox, watch_inbox, \
     parse_report_file, get_dmarc_reports_from_mbox, elastic, kafkaclient, \
     splunk, save_output, email_results, ParserError, __version__, \
-    InvalidDMARCReport
+    InvalidDMARCReport, s3
 from parsedmarc.utils import is_mbox
 
 logger = logging.getLogger("parsedmarc")
@@ -79,6 +79,14 @@ def _main():
                 )
             except Exception as error_:
                 logger.error("Kafka Error: {0}".format(error_.__str__()))
+        if opts.s3_bucket:
+            try:
+                s3_client = s3.S3Client(
+                    bucket_name=opts.s3_bucket,
+                    bucket_path=opts.s3_path,
+                )
+            except Exception as error_:
+                logger.error("S3 Error: {0}".format(error_.__str__()))
         if opts.save_aggregate:
             for report in reports_["aggregate_reports"]:
                 try:
@@ -104,6 +112,11 @@ def _main():
                 except Exception as error_:
                     logger.error("Kafka Error: {0}".format(
                          error_.__str__()))
+                try:
+                    if opts.s3_bucket:
+                        s3_client.save_aggregate_report_to_s3(report)
+                except Exception as error_:
+                    logger.error("S3 Error: {0}".format(error_.__str__()))
             if opts.hec:
                 try:
                     aggregate_reports_ = reports_["aggregate_reports"]
@@ -138,6 +151,11 @@ def _main():
                 except Exception as error_:
                     logger.error("Kafka Error: {0}".format(
                         error_.__str__()))
+                try:
+                    if opts.s3_bucket:
+                        s3_client.save_forensic_report_to_s3(report)
+                except Exception as error_:
+                    logger.error("S3 Error: {0}".format(error_.__str__()))
             if opts.hec:
                 try:
                     forensic_reports_ = reports_["forensic_reports"]

From eba722cddceafd50cdcee3e7c2ed6b3aff01f387 Mon Sep 17 00:00:00 2001
From: Tom Henderson <tomhenderson@mac.com>
Date: Fri, 5 Feb 2021 14:38:52 +1300
Subject: [PATCH 14/31] Fix path example

---
 README.rst       | 2 +-
 docs/example.ini | 2 +-
 docs/index.rst   | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/README.rst b/README.rst
index 2d297f9..7c85ce1 100644
--- a/README.rst
+++ b/README.rst
@@ -130,7 +130,7 @@ For example
 
    [s3]
    bucket = my-bucket
-   path = /parsedmarc
+   path = parsedmarc
 
 The full set of configuration options are:
 
diff --git a/docs/example.ini b/docs/example.ini
index a9a2985..efa56b3 100644
--- a/docs/example.ini
+++ b/docs/example.ini
@@ -21,4 +21,4 @@ index = email
 
 [s3]
 bucket = my-bucket
-path = /parsedmarc
\ No newline at end of file
+path = parsedmarc
\ No newline at end of file
diff --git a/docs/index.rst b/docs/index.rst
index 7004cc4..c63ecef 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -134,7 +134,7 @@ For example
 
    [s3]
    bucket = my-bucket
-   path = /parsedmarc
+   path = parsedmarc
 
 The full set of configuration options are:
 

From 9522c9b6e4400a3758a74d143f1b6d7a69d0a12b Mon Sep 17 00:00:00 2001
From: Tom Henderson <tomhenderson@mac.com>
Date: Fri, 5 Feb 2021 14:51:32 +1300
Subject: [PATCH 15/31] Ensure message_limit is not greater than total_messages

---
 parsedmarc/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/parsedmarc/__init__.py b/parsedmarc/__init__.py
index 8380df7..78bd356 100644
--- a/parsedmarc/__init__.py
+++ b/parsedmarc/__init__.py
@@ -1073,7 +1073,7 @@ def get_dmarc_reports_from_inbox(connection=None,
                                                     reports_folder))
 
     if batch_size:
-        message_limit = batch_size
+        message_limit = min(total_messages, batch_size)
     else:
         message_limit = total_messages
 

From de05be90df8ef7fc1233be6803c5991da7c9345c Mon Sep 17 00:00:00 2001
From: Tom Henderson <tomhenderson@mac.com>
Date: Fri, 5 Feb 2021 14:53:43 +1300
Subject: [PATCH 16/31] Fix flake8 error

---
 parsedmarc/__init__.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/parsedmarc/__init__.py b/parsedmarc/__init__.py
index 78bd356..2f192a5 100644
--- a/parsedmarc/__init__.py
+++ b/parsedmarc/__init__.py
@@ -1196,7 +1196,8 @@ def watch_inbox(host, username, password, callback, port=None, ssl=True,
                 verify=True, reports_folder="INBOX",
                 archive_folder="Archive", delete=False, test=False,
                 idle_timeout=30, offline=False, nameservers=None,
-                dns_timeout=6.0, strip_attachment_payloads=False, batch_size=None):
+                dns_timeout=6.0, strip_attachment_payloads=False,
+                batch_size=None):
     """
     Use an IDLE IMAP connection to parse incoming emails, and pass the results
     to a callback function

From 85e7fd4ce6d60163e14341ed70f3da81bbe979f6 Mon Sep 17 00:00:00 2001
From: Tom Henderson <tomhenderson@mac.com>
Date: Fri, 5 Feb 2021 15:58:57 +1300
Subject: [PATCH 17/31] Fix flake8 errors

---
 parsedmarc/s3.py | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/parsedmarc/s3.py b/parsedmarc/s3.py
index 8f64aa0..41910ed 100644
--- a/parsedmarc/s3.py
+++ b/parsedmarc/s3.py
@@ -28,23 +28,23 @@ class S3Client(object):
             "begin_date",
             "end_date",
         ]
-        
+
         self.s3 = boto3.resource('s3')
         self.bucket = self.s3.Bucket(self.bucket_name)
 
-
     def save_aggregate_report_to_s3(self, report):
         self.save_report_to_s3(report, 'aggregate')
 
-
     def save_forensic_report_to_s3(self, report):
         self.save_report_to_s3(report, 'forensic')
 
-
     def save_report_to_s3(self, report, report_type):
-        report_date = human_timestamp_to_datetime(report["report_metadata"]["begin_date"])
+        report_date = human_timestamp_to_datetime(
+            report["report_metadata"]["begin_date"]
+        )
         report_id = report["report_metadata"]["report_id"]
-        object_path = "{0}/{1}/year={2}/month={3:02d}/day={4:02d}/{5}.json".format(
+        path_template = "{0}/{1}/year={2}/month={3:02d}/day={4:02d}/{5}.json"
+        object_path = path_template.format(
             self.bucket_path,
             report_type,
             report_date.year,
@@ -52,7 +52,10 @@ class S3Client(object):
             report_date.day,
             report_id
         )
-        logger.debug("Saving {0} report to s3://{1}/{2}".format(report_type, self.bucket_name, object_path))
+        logger.debug("Saving {0} report to s3://{1}/{2}".format(
+            report_type,
+            self.bucket_name,
+            object_path))
         object_metadata = {
             k: v
             for k, v in report["report_metadata"].items()

From 394dddd2df6c826b4702620b2c637fd2d895b59f Mon Sep 17 00:00:00 2001
From: supaeasy <59504964+supaeasy@users.noreply.github.com>
Date: Fri, 5 Feb 2021 15:16:51 +0100
Subject: [PATCH 18/31] Update README.rst

I struggled too long with this to not let others know.
---
 README.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.rst b/README.rst
index b028445..3428a0c 100644
--- a/README.rst
+++ b/README.rst
@@ -145,7 +145,7 @@ The full set of configuration options are:
     - ``chunk_size`` - int: Number of files to give to each process when running in parallel. Setting this to a number larger than one can improve performance when processing thousands of files
 - ``imap``
     - ``host`` - str: The IMAP server hostname or IP address
-    - ``port`` - int: The IMAP server port (Default: 993)
+    - ``port`` - int: The IMAP server port (Default: 993) If your Hoster publishes another port, still try 993. Otherwise Error:"wrong SSL version" 
     - ``ssl`` - bool: Use an encrypted SSL/TLS connection (Default: True)
     - ``skip_certificate_verification`` - bool: Skip certificate verification (not recommended)
     - ``user`` - str: The IMAP user

From 36c592cc5a6cf52995763c62c6087402538946f9 Mon Sep 17 00:00:00 2001
From: atanas argirov <atanas@argirov.org>
Date: Thu, 11 Feb 2021 18:22:29 +0000
Subject: [PATCH 19/31] * added defaults for arg parser

---
 parsedmarc/__init__.py |  1 -
 parsedmarc/cli.py      | 17 +++++++++++------
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/parsedmarc/__init__.py b/parsedmarc/__init__.py
index 49de20b..e27aee8 100644
--- a/parsedmarc/__init__.py
+++ b/parsedmarc/__init__.py
@@ -1234,7 +1234,6 @@ def watch_inbox(host, username, password, callback, port=None, ssl=True,
         except (timeout, IMAPClientError):
             logger.warning("IMAP connection timeout. Reconnecting...")
 
-
 def save_output(results, output_directory="output", \
                 output_json_aggregate_file="aggregate.json", \
                 output_json_forensic_file="forensic.json", \
diff --git a/parsedmarc/cli.py b/parsedmarc/cli.py
index 262a82b..b0d4d72 100644
--- a/parsedmarc/cli.py
+++ b/parsedmarc/cli.py
@@ -82,7 +82,7 @@ def _main():
         if opts.save_aggregate:
             for report in reports_["aggregate_reports"]:
                 try:
-                   opts.elasticsearch_hosts:
+                    if opts.elasticsearch_hosts:
                         shards = opts.elasticsearch_number_of_shards
                         replicas = opts.elasticsearch_number_of_replicas
                         elastic.save_aggregate_report_to_elasticsearch(
@@ -161,13 +161,13 @@ def _main():
     arg_parser.add_argument("-o", "--output",
                             help="write output files to the given directory")
     arg_parser.add_argument("--output-json-aggregate-file",
-                            help="output aggregate JSON file")
+                            help="output aggregate JSON file", default="aggregate.json")
     arg_parser.add_argument("--output-json-forensic-file",
-                            help="output forensic JSON file")
+                            help="output forensic JSON file", default="forensic.json")
     arg_parser.add_argument("--output-csv-aggregate-file",
-                            help="output aggregate CSV file")
+                            help="output aggregate CSV file", default="aggregate.csv")
     arg_parser.add_argument("--output-csv-forensic-file",
-                            help="output forensic CSV file")
+                            help="output forensic CSV file", default="forensic.csv")
     arg_parser.add_argument("-n", "--nameservers", nargs="+",
                             help="nameservers to query")
     arg_parser.add_argument("-t", "--dns_timeout",
@@ -191,6 +191,7 @@ def _main():
     forensic_reports = []
 
     args = arg_parser.parse_args()
+
     opts = Namespace(file_path=args.file_path,
                      config_file=args.config_file,
                      offline=args.offline,
@@ -631,7 +632,11 @@ def _main():
                            ("forensic_reports", forensic_reports)])
 
     if opts.output:
-        save_output(results, output_directory=opts.output)
+        save_output(results, output_directory=opts.output, \
+                    output_json_aggregate_file=opts.output_json_aggregate_file, \
+                    output_json_forensic_file=opts.output_json_forensic_file, \
+                    output_csv_aggregate_file=opts.output_csv_aggregate_file, \
+                    output_csv_forensic_file=opts.output_csv_forensic_file)
 
     process_reports(results)
 

From e51f2b0127bad34903fee4c88d44b5f4660b0913 Mon Sep 17 00:00:00 2001
From: atanas argirov <atanas@argirov.org>
Date: Fri, 12 Feb 2021 10:50:25 +0000
Subject: [PATCH 20/31] * general cleanup to meet linter rules

---
 parsedmarc/__init__.py | 27 ++++++++++++++++++---------
 parsedmarc/cli.py      | 38 +++++++++++++++++++++-----------------
 2 files changed, 39 insertions(+), 26 deletions(-)

diff --git a/parsedmarc/__init__.py b/parsedmarc/__init__.py
index e27aee8..7f0c7d0 100644
--- a/parsedmarc/__init__.py
+++ b/parsedmarc/__init__.py
@@ -1234,11 +1234,12 @@ def watch_inbox(host, username, password, callback, port=None, ssl=True,
         except (timeout, IMAPClientError):
             logger.warning("IMAP connection timeout. Reconnecting...")
 
-def save_output(results, output_directory="output", \
-                output_json_aggregate_file="aggregate.json", \
-                output_json_forensic_file="forensic.json", \
-                output_csv_aggregate_file="aggregate.csv", \
-                output_csv_forensic_file="forensic.csv"):
+
+def save_output(results, output_directory="output",
+                output_json_aggregate="aggregate.json",
+                output_json_forensic="forensic.json",
+                output_csv_aggregate="aggregate.csv",
+                output_csv_forensic="forensic.csv"):
     """
     Save report data in the given directory
 
@@ -1256,22 +1257,30 @@ def save_output(results, output_directory="output", \
     else:
         os.makedirs(output_directory)
 
-    with open("{0}".format(os.path.join(output_directory, output_json_aggregate_file)),
+    with open("{0}"
+              .format(os.path.join(output_directory,
+                                   output_json_aggregate)),
               "w", newline="\n", encoding="utf-8") as agg_json:
         agg_json.write(json.dumps(aggregate_reports, ensure_ascii=False,
                                   indent=2))
 
-    with open("{0}".format(os.path.join(output_directory, output_csv_aggregate_file)),
+    with open("{0}"
+              .format(os.path.join(output_directory,
+                                   output_csv_aggregate)),
               "w", newline="\n", encoding="utf-8") as agg_csv:
         csv = parsed_aggregate_reports_to_csv(aggregate_reports)
         agg_csv.write(csv)
 
-    with open("{0}".format(os.path.join(output_directory, output_json_forensic_file)),
+    with open("{0}"
+              .format(os.path.join(output_directory,
+                                   output_json_forensic)),
               "w", newline="\n", encoding="utf-8") as for_json:
         for_json.write(json.dumps(forensic_reports, ensure_ascii=False,
                                   indent=2))
 
-    with open("{0}".format(os.path.join(output_directory, output_csv_forensic_file)),
+    with open("{0}"
+              .format(os.path.join(output_directory,
+                                   output_csv_forensic)),
               "w", newline="\n", encoding="utf-8") as for_csv:
         csv = parsed_forensic_reports_to_csv(forensic_reports)
         for_csv.write(csv)
diff --git a/parsedmarc/cli.py b/parsedmarc/cli.py
index b0d4d72..777dfc5 100644
--- a/parsedmarc/cli.py
+++ b/parsedmarc/cli.py
@@ -160,14 +160,18 @@ def _main():
                             help=strip_attachment_help, action="store_true")
     arg_parser.add_argument("-o", "--output",
                             help="write output files to the given directory")
-    arg_parser.add_argument("--output-json-aggregate-file",
-                            help="output aggregate JSON file", default="aggregate.json")
-    arg_parser.add_argument("--output-json-forensic-file",
-                            help="output forensic JSON file", default="forensic.json")
-    arg_parser.add_argument("--output-csv-aggregate-file",
-                            help="output aggregate CSV file", default="aggregate.csv")
-    arg_parser.add_argument("--output-csv-forensic-file",
-                            help="output forensic CSV file", default="forensic.csv")
+    arg_parser.add_argument("--output-json-aggregate",
+                            help="output aggregate JSON file",
+                            default="aggregate.json")
+    arg_parser.add_argument("--output-json-forensic",
+                            help="output forensic JSON file",
+                            default="forensic.json")
+    arg_parser.add_argument("--output-csv-aggregate",
+                            help="output aggregate CSV file",
+                            default="aggregate.csv")
+    arg_parser.add_argument("--output-csv-forensic",
+                            help="output forensic CSV file",
+                            default="forensic.csv")
     arg_parser.add_argument("-n", "--nameservers", nargs="+",
                             help="nameservers to query")
     arg_parser.add_argument("-t", "--dns_timeout",
@@ -197,10 +201,10 @@ def _main():
                      offline=args.offline,
                      strip_attachment_payloads=args.strip_attachment_payloads,
                      output=args.output,
-                     output_json_aggregate_file=args.output_json_aggregate_file,
-                     output_json_forensic_file=args.output_json_forensic_file,
-                     output_csv_aggregate_file=args.output_csv_aggregate_file,
-                     output_csv_forensic_file=args.output_csv_forensic_file,
+                     output_json_aggregate=args.output_json_aggregate,
+                     output_json_forensic=args.output_json_forensic,
+                     output_csv_aggregate=args.output_csv_aggregate,
+                     output_csv_forensic=args.output_csv_forensic,
                      nameservers=args.nameservers,
                      silent=args.silent,
                      dns_timeout=args.dns_timeout,
@@ -632,11 +636,11 @@ def _main():
                            ("forensic_reports", forensic_reports)])
 
     if opts.output:
-        save_output(results, output_directory=opts.output, \
-                    output_json_aggregate_file=opts.output_json_aggregate_file, \
-                    output_json_forensic_file=opts.output_json_forensic_file, \
-                    output_csv_aggregate_file=opts.output_csv_aggregate_file, \
-                    output_csv_forensic_file=opts.output_csv_forensic_file)
+        save_output(results, output_directory=opts.output,
+                    output_json_aggregate=opts.output_json_aggregate,
+                    output_json_forensic=opts.output_json_forensic,
+                    output_csv_aggregate=opts.output_csv_aggregate,
+                    output_csv_forensic=opts.output_csv_forensic)
 
     process_reports(results)
 

From 775a6f21819d7aaf389a75bd5f21d4e61fba5644 Mon Sep 17 00:00:00 2001
From: Silvian I <silvian.iosub@gmail.com>
Date: Mon, 31 May 2021 15:40:57 +0200
Subject: [PATCH 21/31] Fix server connection timeout while processiong large
 dmarc files

---
 parsedmarc/__init__.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/parsedmarc/__init__.py b/parsedmarc/__init__.py
index 562a0a8..9873316 100644
--- a/parsedmarc/__init__.py
+++ b/parsedmarc/__init__.py
@@ -304,8 +304,12 @@ def parse_aggregate_report_xml(xml, offline=False, nameservers=None,
         new_report["policy_published"] = new_policy_published
 
         if type(report["record"]) == list:
-            for record in report["record"]:
-                report_record = _parse_report_record(record,
+            for i in range(len(report["record"])):
+                if i % 20 == 0 and i > 0:
+                    logger.debug("Sending noop cmd")
+                    server.noop()
+                    logger.debug("Processed {0}/{1}".format(i, len(report["record"])))
+                report_record = _parse_report_record(report["record"][i],
                                                      offline=offline,
                                                      nameservers=nameservers,
                                                      dns_timeout=timeout,
@@ -1039,6 +1043,7 @@ def get_dmarc_reports_from_inbox(connection=None,
         raise ValueError("Must supply a connection, or a username and "
                          "password")
 
+    global server
     aggregate_reports = []
     forensic_reports = []
     aggregate_report_msg_uids = []

From 3615ad3799f058e8012b39f708aa500f51aa2cf4 Mon Sep 17 00:00:00 2001
From: Silvian I <silvian.iosub@gmail.com>
Date: Mon, 31 May 2021 15:40:57 +0200
Subject: [PATCH 22/31] Fix server connection timeout while processiong large
 dmarc files

---
 parsedmarc/__init__.py | 37 ++++++++++++++++++++++++++-----------
 1 file changed, 26 insertions(+), 11 deletions(-)

diff --git a/parsedmarc/__init__.py b/parsedmarc/__init__.py
index 562a0a8..9de72c1 100644
--- a/parsedmarc/__init__.py
+++ b/parsedmarc/__init__.py
@@ -203,7 +203,7 @@ def _parse_report_record(record, offline=False, nameservers=None,
 
 
 def parse_aggregate_report_xml(xml, offline=False, nameservers=None,
-                               timeout=2.0, parallel=False):
+                               timeout=2.0, parallel=False, server=None):
     """Parses a DMARC XML report string and returns a consistent OrderedDict
 
     Args:
@@ -213,6 +213,7 @@ def parse_aggregate_report_xml(xml, offline=False, nameservers=None,
         (Cloudflare's public DNS resolvers by default)
         timeout (float): Sets the DNS timeout in seconds
         parallel (bool): Parallel processing
+        server (IMAPClient): Connection object
 
     Returns:
         OrderedDict: The parsed aggregate DMARC report
@@ -304,8 +305,13 @@ def parse_aggregate_report_xml(xml, offline=False, nameservers=None,
         new_report["policy_published"] = new_policy_published
 
         if type(report["record"]) == list:
-            for record in report["record"]:
-                report_record = _parse_report_record(record,
+            for i in range(len(report["record"])):
+                if server is not None and i > 0 and i % 20 == 0:
+                    logger.debug("Sending noop cmd")
+                    server.noop()
+                    logger.debug("Processed {0}/{1}".format(
+                        i, len(report["record"])))
+                report_record = _parse_report_record(report["record"][i],
                                                      offline=offline,
                                                      nameservers=nameservers,
                                                      dns_timeout=timeout,
@@ -385,7 +391,8 @@ def extract_xml(input_):
 
 def parse_aggregate_report_file(_input, offline=False, nameservers=None,
                                 dns_timeout=2.0,
-                                parallel=False):
+                                parallel=False,
+                                server=None):
     """Parses a file at the given path, a file-like object. or bytes as a
     aggregate DMARC report
 
@@ -396,6 +403,7 @@ def parse_aggregate_report_file(_input, offline=False, nameservers=None,
         (Cloudflare's public DNS resolvers by default)
         dns_timeout (float): Sets the DNS timeout in seconds
         parallel (bool): Parallel processing
+        server (IMAPClient): Connection object
 
     Returns:
         OrderedDict: The parsed DMARC aggregate report
@@ -406,7 +414,8 @@ def parse_aggregate_report_file(_input, offline=False, nameservers=None,
                                       offline=offline,
                                       nameservers=nameservers,
                                       timeout=dns_timeout,
-                                      parallel=parallel)
+                                      parallel=parallel,
+                                      server=server)
 
 
 def parsed_aggregate_reports_to_csv_rows(reports):
@@ -738,7 +747,7 @@ def parsed_forensic_reports_to_csv(reports):
 
 def parse_report_email(input_, offline=False, nameservers=None,
                        dns_timeout=2.0, strip_attachment_payloads=False,
-                       parallel=False):
+                       parallel=False, server=None):
     """
     Parses a DMARC report from an email
 
@@ -750,6 +759,7 @@ def parse_report_email(input_, offline=False, nameservers=None,
         strip_attachment_payloads (bool): Remove attachment payloads from
         forensic report results
         parallel (bool): Parallel processing
+        server (IMAPClient): Connection object
 
     Returns:
         OrderedDict:
@@ -813,7 +823,8 @@ def parse_report_email(input_, offline=False, nameservers=None,
                         offline=offline,
                         nameservers=ns,
                         dns_timeout=dns_timeout,
-                        parallel=parallel)
+                        parallel=parallel,
+                        server=server)
                     result = OrderedDict([("report_type", "aggregate"),
                                           ("report", aggregate_report)])
                     return result
@@ -863,7 +874,7 @@ def parse_report_email(input_, offline=False, nameservers=None,
 
 def parse_report_file(input_, nameservers=None, dns_timeout=2.0,
                       strip_attachment_payloads=False,
-                      offline=False, parallel=False):
+                      offline=False, parallel=False, server=None):
     """Parses a DMARC aggregate or forensic file at the given path, a
     file-like object. or bytes
 
@@ -876,6 +887,7 @@ def parse_report_file(input_, nameservers=None, dns_timeout=2.0,
         forensic report results
         offline (bool): Do not make online queries for geolocation or DNS
         parallel (bool): Parallel processing
+        server (IMAPClient): Connection object
 
     Returns:
         OrderedDict: The parsed DMARC report
@@ -895,7 +907,8 @@ def parse_report_file(input_, nameservers=None, dns_timeout=2.0,
                                              offline=offline,
                                              nameservers=nameservers,
                                              dns_timeout=dns_timeout,
-                                             parallel=parallel)
+                                             parallel=parallel,
+                                             server=server)
         results = OrderedDict([("report_type", "aggregate"),
                                ("report", report)])
     except InvalidAggregateReport:
@@ -906,7 +919,8 @@ def parse_report_file(input_, nameservers=None, dns_timeout=2.0,
                                          nameservers=nameservers,
                                          dns_timeout=dns_timeout,
                                          strip_attachment_payloads=sa,
-                                         parallel=parallel)
+                                         parallel=parallel,
+                                         server=server)
         except InvalidDMARCReport:
             raise InvalidDMARCReport("Not a valid aggregate or forensic "
                                      "report")
@@ -1083,7 +1097,8 @@ def get_dmarc_reports_from_inbox(connection=None,
                                               nameservers=nameservers,
                                               dns_timeout=dns_timeout,
                                               offline=offline,
-                                              strip_attachment_payloads=sa)
+                                              strip_attachment_payloads=sa,
+                                              server=server)
             if parsed_email["report_type"] == "aggregate":
                 aggregate_reports.append(parsed_email["report"])
                 aggregate_report_msg_uids.append(msg_uid)

From 837ba7ef4d0e87fa67ce09b765145003eaed40d2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Matth=C3=A4us=20Wander?= <mwander@swznet.de>
Date: Sun, 6 Jun 2021 16:35:07 +0200
Subject: [PATCH 23/31] Added splunk installation guide

---
 splunk/README.rst | 73 +++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 68 insertions(+), 5 deletions(-)

diff --git a/splunk/README.rst b/splunk/README.rst
index 44a8ccf..42198f5 100644
--- a/splunk/README.rst
+++ b/splunk/README.rst
@@ -1,9 +1,53 @@
-=================
-Splunk dashboards
-=================
+===================
+Splunk Installation
+===================
 
-Setup guide
------------
+Install Splunk for use with Docker
+----------------------------------
+
+Download latest Splunk image::
+
+  docker pull splunk/splunk:latest
+
+Run Splunk with Docker
+----------------------
+
+Listen on all network interfaces::
+
+  docker run -d -p 8000:8000 -p 8088:8088 -e "SPLUNK_START_ARGS=--accept-license" -e "SPLUNK_PASSWORD=password1234" -e "SPLUNK_HEC_TOKEN=hec-token-1234" --name splunk splunk/splunk:latest
+
+Listen on localhost for use with reverse proxy with base URL `/splunk`::
+
+  docker run -d -p 127.0.0.1:8000:8000 -p 127.0.0.1:8088:8088 -e "SPLUNK_START_ARGS=--accept-license" -e "SPLUNK_PASSWORD=password1234" -e "SPLUNK_HEC_TOKEN=hec-token-1234" -e "SPLUNK_ROOT_ENDPOINT=/splunk" --name splunk splunk/splunk:latest
+
+Set up reverse proxy, e.g. Apache2::
+
+  ProxyPass /splunk http://127.0.0.1:8000/splunk
+  ProxyPassReverse /splunk http://127.0.0.1:8000/splunk
+
+Splunk Configuration
+--------------------
+
+Access web UI at http://127.0.0.1:8000 and log in with `admin:password1234`.
+
+Create App and Index
+~~~~~~~~~~~~~~~~~~~~
+
+- Settings > Data > Indexes: New Index
+
+  - Index name: "email"
+
+- HEC token `hec-token-1234` should be already set up. 
+
+  - Check under Settings > Data > Data inputs: HTTP Event Collector
+
+- Apps > Manage Apps: Create app
+
+  - Name: "parsedmarc"
+  - Folder name: "parsedmarc"
+
+Create Dashboards
+~~~~~~~~~~~~~~~~~
 
 1. Navigate to the app you want to add the dashboards to, or create a new app called DMARC
 2. Click Dashboards
@@ -22,3 +66,22 @@ Setup guide
 15. Paste the content of ''dmarc_forensic_dashboard.xml`` into the source editor
 16. If the index storing the DMARC data is not named email, replace index="email" accordingly
 17. Click Save
+
+==============
+Example Config 
+==============
+
+parsedmarc.ini::
+
+  [splunk_hec]
+  url = https://127.0.0.1:8088/
+  token = hec-token-1234
+  index = email
+  skip_certificate_verification = True
+
+Note that `skip_certificate_verification = True` disables security checks.
+
+Run parsedmarc::
+
+  python3 -m parsedmarc.cli -c parsedmarc.ini
+  

From ca36db5f24626f239ac7f1b239f58f68b66a343b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Matth=C3=A4us=20Wander?= <mwander@swznet.de>
Date: Sun, 6 Jun 2021 16:44:40 +0200
Subject: [PATCH 24/31] Minor formatting

---
 splunk/README.rst | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/splunk/README.rst b/splunk/README.rst
index 42198f5..acf7bce 100644
--- a/splunk/README.rst
+++ b/splunk/README.rst
@@ -16,7 +16,7 @@ Listen on all network interfaces::
 
   docker run -d -p 8000:8000 -p 8088:8088 -e "SPLUNK_START_ARGS=--accept-license" -e "SPLUNK_PASSWORD=password1234" -e "SPLUNK_HEC_TOKEN=hec-token-1234" --name splunk splunk/splunk:latest
 
-Listen on localhost for use with reverse proxy with base URL `/splunk`::
+Listen on localhost for use with reverse proxy with base URL ``/splunk``::
 
   docker run -d -p 127.0.0.1:8000:8000 -p 127.0.0.1:8088:8088 -e "SPLUNK_START_ARGS=--accept-license" -e "SPLUNK_PASSWORD=password1234" -e "SPLUNK_HEC_TOKEN=hec-token-1234" -e "SPLUNK_ROOT_ENDPOINT=/splunk" --name splunk splunk/splunk:latest
 
@@ -28,7 +28,7 @@ Set up reverse proxy, e.g. Apache2::
 Splunk Configuration
 --------------------
 
-Access web UI at http://127.0.0.1:8000 and log in with `admin:password1234`.
+Access web UI at http://127.0.0.1:8000 and log in with ``admin:password1234``.
 
 Create App and Index
 ~~~~~~~~~~~~~~~~~~~~
@@ -37,7 +37,7 @@ Create App and Index
 
   - Index name: "email"
 
-- HEC token `hec-token-1234` should be already set up. 
+- HEC token ``hec-token-1234`` should be already set up. 
 
   - Check under Settings > Data > Data inputs: HTTP Event Collector
 
@@ -79,7 +79,7 @@ parsedmarc.ini::
   index = email
   skip_certificate_verification = True
 
-Note that `skip_certificate_verification = True` disables security checks.
+Note that ``skip_certificate_verification = True`` disables security checks.
 
 Run parsedmarc::
 

From 89816bbc6efa19176ed85acf3066cc7a5290260f Mon Sep 17 00:00:00 2001
From: Ubuntu <knightian@wkit-watchtower-core.wkit.com.au>
Date: Sun, 20 Jun 2021 03:58:46 +1000
Subject: [PATCH 25/31] fix what was broken in merge train

---
 parsedmarc/cli.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/parsedmarc/cli.py b/parsedmarc/cli.py
index 26c65b9..7bbecf6 100644
--- a/parsedmarc/cli.py
+++ b/parsedmarc/cli.py
@@ -736,9 +736,8 @@ def _main():
                 nameservers=opts.nameservers,
                 dns_timeout=opts.dns_timeout,
                 strip_attachment_payloads=sa,
-                batch_size=opts.imap_batch_size
-                offline=opts.offline,
-                strip_attachment_payloads=sa)
+                batch_size=opts.imap_batch_size,
+                offline=opts.offline)
         except FileExistsError as error:
             logger.error("{0}".format(error.__str__()))
             exit(1)

From 3d0f7c8c83ee4098e1bcaa95d82f995d7c9c9bd6 Mon Sep 17 00:00:00 2001
From: Sean Whalen <44679+seanthegeek@users.noreply.github.com>
Date: Sun, 20 Jun 2021 13:10:12 -0400
Subject: [PATCH 26/31] 7.0.0

Closes issues #221 #219 #155 #103
---
 CHANGELOG.md           | 13 +++++++
 README.rst             | 39 +++++++++++++------
 docs/index.rst         | 88 ++++++++++++++++++++++++------------------
 parsedmarc/__init__.py | 24 +++++++-----
 parsedmarc/cli.py      | 40 +++++++++++--------
 parsedmarc/utils.py    |  4 +-
 6 files changed, 131 insertions(+), 77 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e4e8506..cfd49c2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,19 @@
 Changelog
 =========
 
+7.0.0
+-----
+
+- Fix issue #221: Crash when handling invalid reports without root node (PR #248)
+- Use UTC datetime objects for Elasticsearch output (PR #245) 
+- Fix issues #219, #155, and #103: IMAP connections break on large emails (PR #241)
+- Add support for saving reports to S3 buckets (PR #223)
+- Pass `offline` parameter to `wait_inbox()` (PR #216)
+- Add more details to logging (PR #220)
+- Add options customizing the names of output files (Modifications based on PR #225) 
+- Wait for 5 seconds before attempting to reconnect to an IMAP server (PR #217)
+- Add option to process messages in batches (PR #222)
+
 6.12.0
 ------
 
diff --git a/README.rst b/README.rst
index d0d41ab..517c50b 100644
--- a/README.rst
+++ b/README.rst
@@ -58,17 +58,20 @@ CLI help
 
 ::
 
-    usage: parsedmarc [-h] [-c CONFIG_FILE] [--strip-attachment-payloads]
-                      [-o OUTPUT] [-n NAMESERVERS [NAMESERVERS ...]]
-                      [-t DNS_TIMEOUT] [--offline] [-s] [--debug]
-                      [--log-file LOG_FILE] [-v]
-                      [file_path [file_path ...]]
+    usage: parsedmarc [-h] [-c CONFIG_FILE] [--strip-attachment-payloads] [-o OUTPUT]
+                      [--aggregate-json-filename AGGREGATE_JSON_FILENAME]
+                      [--forensic-json-filename FORENSIC_JSON_FILENAME]
+                      [--aggregate-csv-filename AGGREGATE_CSV_FILENAME]
+                      [--forensic-csv-filename FORENSIC_CSV_FILENAME]
+                      [-n NAMESERVERS [NAMESERVERS ...]] [-t DNS_TIMEOUT] [--offline]
+                      [-s] [--verbose] [--debug] [--log-file LOG_FILE] [-v]
+                      [file_path ...]
 
     Parses DMARC reports
 
     positional arguments:
       file_path             one or more paths to aggregate or forensic report
-                            files or emails
+                            files, emails, or mbox files'
 
     optional arguments:
       -h, --help            show this help message and exit
@@ -78,18 +81,27 @@ CLI help
                             remove attachment payloads from forensic report output
       -o OUTPUT, --output OUTPUT
                             write output files to the given directory
+      --aggregate-json-filename AGGREGATE_JSON_FILENAME
+                            filename for the aggregate JSON output file
+      --forensic-json-filename FORENSIC_JSON_FILENAME
+                            filename for the forensic JSON output file
+      --aggregate-csv-filename AGGREGATE_CSV_FILENAME
+                            filename for the aggregate CSV output file
+      --forensic-csv-filename FORENSIC_CSV_FILENAME
+                            filename for the forensic CSV output file
       -n NAMESERVERS [NAMESERVERS ...], --nameservers NAMESERVERS [NAMESERVERS ...]
-                            nameservers to query (default is Cloudflare's
-                            nameservers)
+                            nameservers to query
       -t DNS_TIMEOUT, --dns_timeout DNS_TIMEOUT
                             number of seconds to wait for an answer from DNS
                             (default: 2.0)
       --offline             do not make online queries for geolocation or DNS
       -s, --silent          only print errors and warnings
+      --verbose             more verbose output
       --debug               print debugging information
       --log-file LOG_FILE   output logging to a file
       -v, --version         show program's version number and exit
 
+
 .. note::
 
    In ``parsedmarc`` 6.0.0, most CLI options were moved to a configuration file, described below.
@@ -139,6 +151,8 @@ The full set of configuration options are:
     - ``save_forensic`` - bool: Save forensic report data to Elasticsearch, Splunk and/or S3
     - ``strip_attachment_payloads`` - bool: Remove attachment payloads from results
     - ``output`` - str: Directory to place JSON and CSV files in
+    - ``aggregate_json_filename`` - str: filename for the aggregate JSON output file
+    - ``forensic_json_filename`` - str: filename for the forensic JSON output file
     - ``offline`` - bool: Do not use online queries for geolocation or DNS
     - ``nameservers`` -  str: A comma separated list of DNS resolvers (Default: `Cloudflare's public resolvers`_)
     - ``dns_timeout`` - float: DNS timeout period
@@ -146,10 +160,14 @@ The full set of configuration options are:
     - ``silent`` - bool: Only print errors (Default: True)
     - ``log_file`` - str: Write log messages to a file at this path
     - ``n_procs`` - int: Number of process to run in parallel when parsing in CLI mode (Default: 1)
-    - ``chunk_size`` - int: Number of files to give to each process when running in parallel. Setting this to a number larger than one can improve performance when processing thousands of files
+    - ``chunk_size`` - int: Number of files to give to each process when running in parallel.
+      .. note::
+        Setting this to a number larger than one can improve performance when processing thousands of files
 - ``imap``
     - ``host`` - str: The IMAP server hostname or IP address
-    - ``port`` - int: The IMAP server port (Default: 993) If your Hoster publishes another port, still try 993. Otherwise Error:"wrong SSL version" 
+    - ``port`` - int: The IMAP server port (Default: 993).
+      .. note::
+        If your host recommends another port, still try 993
     - ``ssl`` - bool: Use an encrypted SSL/TLS connection (Default: True)
     - ``skip_certificate_verification`` - bool: Skip certificate verification (not recommended)
     - ``user`` - str: The IMAP user
@@ -162,7 +180,6 @@ The full set of configuration options are:
     - ``batch_size`` - int: Number of messages to read and process before saving. Defaults to all messages if not set.
 - ``elasticsearch``
     - ``hosts`` - str: A comma separated list of hostnames and ports or URLs (e.g. ``127.0.0.1:9200`` or ``https://user:secret@localhost``)
-
       .. note::
          Special characters in the username or password must be `URL encoded`_.
     - ``ssl`` - bool: Use an encrypted SSL/TLS connection (Default: True)
diff --git a/docs/index.rst b/docs/index.rst
index 568c11d..6e36e89 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -62,36 +62,48 @@ CLI help
 
 ::
 
-   usage: parsedmarc [-h] [-c CONFIG_FILE] [--strip-attachment-payloads]
-                     [-o OUTPUT] [-n NAMESERVERS [NAMESERVERS ...]]
-                     [-t DNS_TIMEOUT] [--offline] [-s] [--debug]
-                     [--log-file LOG_FILE] [-v]
-                     [file_path [file_path ...]]
+   usage: parsedmarc [-h] [-c CONFIG_FILE] [--strip-attachment-payloads] [-o OUTPUT]
+                      [--aggregate-json-filename AGGREGATE_JSON_FILENAME]
+                      [--forensic-json-filename FORENSIC_JSON_FILENAME]
+                      [--aggregate-csv-filename AGGREGATE_CSV_FILENAME]
+                      [--forensic-csv-filename FORENSIC_CSV_FILENAME]
+                      [-n NAMESERVERS [NAMESERVERS ...]] [-t DNS_TIMEOUT] [--offline]
+                      [-s] [--verbose] [--debug] [--log-file LOG_FILE] [-v]
+                      [file_path ...]
 
-   Parses DMARC reports
+    Parses DMARC reports
 
-   positional arguments:
-     file_path             one or more paths to aggregate or forensic report
-                           files or emails
+    positional arguments:
+      file_path             one or more paths to aggregate or forensic report
+                            files, emails, or mbox files'
 
-   optional arguments:
-     -h, --help            show this help message and exit
-     -c CONFIG_FILE, --config-file CONFIG_FILE
-                           a path to a configuration file (--silent implied)
-     --strip-attachment-payloads
-                           remove attachment payloads from forensic report output
-     -o OUTPUT, --output OUTPUT
-                           write output files to the given directory
-     -n NAMESERVERS [NAMESERVERS ...], --nameservers NAMESERVERS [NAMESERVERS ...]
-                           nameservers to query
-     -t DNS_TIMEOUT, --dns_timeout DNS_TIMEOUT
-                           number of seconds to wait for an answer from DNS
-                           (default: 2.0)
-     --offline             do not make online queries for geolocation or DNS
-     -s, --silent          only print errors and warnings
-     --debug               print debugging information
-     --log-file LOG_FILE   output logging to a file
-     -v, --version         show program's version number and exit
+    optional arguments:
+      -h, --help            show this help message and exit
+      -c CONFIG_FILE, --config-file CONFIG_FILE
+                            a path to a configuration file (--silent implied)
+      --strip-attachment-payloads
+                            remove attachment payloads from forensic report output
+      -o OUTPUT, --output OUTPUT
+                            write output files to the given directory
+      --aggregate-json-filename AGGREGATE_JSON_FILENAME
+                            filename for the aggregate JSON output file
+      --forensic-json-filename FORENSIC_JSON_FILENAME
+                            filename for the forensic JSON output file
+      --aggregate-csv-filename AGGREGATE_CSV_FILENAME
+                            filename for the aggregate CSV output file
+      --forensic-csv-filename FORENSIC_CSV_FILENAME
+                            filename for the forensic CSV output file
+      -n NAMESERVERS [NAMESERVERS ...], --nameservers NAMESERVERS [NAMESERVERS ...]
+                            nameservers to query
+      -t DNS_TIMEOUT, --dns_timeout DNS_TIMEOUT
+                            number of seconds to wait for an answer from DNS
+                            (default: 2.0)
+      --offline             do not make online queries for geolocation or DNS
+      -s, --silent          only print errors and warnings
+      --verbose             more verbose output
+      --debug               print debugging information
+      --log-file LOG_FILE   output logging to a file
+      -v, --version         show program's version number and exit
 
 
 .. note::
@@ -139,10 +151,12 @@ For example
 The full set of configuration options are:
 
 - ``general``
-    - ``save_aggregate`` - bool: Save aggregate report data to the Elasticsearch, Splunk and/or S3
-    - ``save_forensic`` - bool: Save forensic report data to the Elasticsearch, Splunk and/or S3
+    - ``save_aggregate`` - bool: Save aggregate report data to Elasticsearch, Splunk and/or S3
+    - ``save_forensic`` - bool: Save forensic report data to Elasticsearch, Splunk and/or S3
     - ``strip_attachment_payloads`` - bool: Remove attachment payloads from results
     - ``output`` - str: Directory to place JSON and CSV files in
+    - ``aggregate_json_filename`` - str: filename for the aggregate JSON output file
+    - ``forensic_json_filename`` - str: filename for the forensic JSON output file
     - ``offline`` - bool: Do not use online queries for geolocation or DNS
     - ``nameservers`` -  str: A comma separated list of DNS resolvers (Default: `Cloudflare's public resolvers`_)
     - ``dns_timeout`` - float: DNS timeout period
@@ -150,16 +164,18 @@ The full set of configuration options are:
     - ``silent`` - bool: Only print errors (Default: True)
     - ``log_file`` - str: Write log messages to a file at this path
     - ``n_procs`` - int: Number of process to run in parallel when parsing in CLI mode (Default: 1)
-    - ``chunk_size`` - int: Number of files to give to each process when running in parallel. Setting this to a number larger than one can improve performance when processing thousands of files
+    - ``chunk_size`` - int: Number of files to give to each process when running in parallel.
+      .. note::
+        Setting this to a number larger than one can improve performance when processing thousands of files
 - ``imap``
     - ``host`` - str: The IMAP server hostname or IP address
-    - ``port`` - int: The IMAP server port (Default: 993)
+    - ``port`` - int: The IMAP server port (Default: 993).
+      .. note::
+        If your host recommends another port, still try 993
     - ``ssl`` - bool: Use an encrypted SSL/TLS connection (Default: True)
     - ``skip_certificate_verification`` - bool: Skip certificate verification (not recommended)
-    - ``timeout`` - float: Timeout in seconds to wait for an IMAP operation to complete (Default: 30)
-    - ``max_retries`` - int: The maximum number of retries after a timeout
     - ``user`` - str: The IMAP user
-    - ``password`` - str: The IMAP password (escape ``%`` with a second ``%``)
+    - ``password`` - str: The IMAP password
     - ``reports_folder`` - str: The IMAP folder where the incoming reports can be found (Default: INBOX)
     - ``archive_folder`` - str:  The IMAP folder to sort processed emails into (Default: Archive)
     - ``watch`` - bool: Use the IMAP ``IDLE`` command to process messages as they arrive
@@ -168,14 +184,10 @@ The full set of configuration options are:
     - ``batch_size`` - int: Number of messages to read and process before saving. Defaults to all messages if not set.
 - ``elasticsearch``
     - ``hosts`` - str: A comma separated list of hostnames and ports or URLs (e.g. ``127.0.0.1:9200`` or ``https://user:secret@localhost``)
-
       .. note::
          Special characters in the username or password must be `URL encoded`_.
     - ``ssl`` - bool: Use an encrypted SSL/TLS connection (Default: True)
-    - ``user`` - str: Basic auth username
-    - ``password`` - str: Basic auth password
     - ``cert_path`` - str: Path to a trusted certificates
-    - ``timeout`` - float: Timeout in seconds (Default: 60)
     - ``index_suffix`` - str: A suffix to apply to the index names
     - ``monthly_indexes`` - bool: Use monthly indexes instead of daily indexes
     - ``number_of_shards`` - int: The number of shards to use when creating the index (Default: 1)
diff --git a/parsedmarc/__init__.py b/parsedmarc/__init__.py
index a07209e..f601b7a 100644
--- a/parsedmarc/__init__.py
+++ b/parsedmarc/__init__.py
@@ -36,7 +36,7 @@ from parsedmarc.utils import is_outlook_msg, convert_outlook_msg
 from parsedmarc.utils import timestamp_to_human, human_timestamp_to_datetime
 from parsedmarc.utils import parse_email
 
-__version__ = "6.12.0"
+__version__ = "7.0.0"
 
 logging.basicConfig(
     format='%(levelname)8s:%(filename)s:%(lineno)d:'
@@ -1274,16 +1274,20 @@ def watch_inbox(host, username, password, callback, port=None, ssl=True,
 
 
 def save_output(results, output_directory="output",
-                output_json_aggregate="aggregate.json",
-                output_json_forensic="forensic.json",
-                output_csv_aggregate="aggregate.csv",
-                output_csv_forensic="forensic.csv"):
+                aggregate_json_filename="aggregate.json",
+                forensic_json_filename="forensic.json",
+                aggregate_csv_filename="aggregate.csv",
+                forensic_csv_filename="forensic.csv"):
     """
     Save report data in the given directory
 
     Args:
         results (OrderedDict): Parsing results
-        output_directory: The patch to the directory to save in
+        output_directory (str): The patch to the directory to save in
+        aggregate_json_filename (str): Output filename for the aggregate JSON report
+        forensic_json_filename (str): Output filename for the forensic JSON report
+        aggregate_csv_filename (str):  Output filename for the aggregate CSV report
+        forensic_csv_filename (str):  Output filename for the forensic CSV report
     """
 
     aggregate_reports = results["aggregate_reports"]
@@ -1297,28 +1301,28 @@ def save_output(results, output_directory="output",
 
     with open("{0}"
               .format(os.path.join(output_directory,
-                                   output_json_aggregate)),
+                                   aggregate_json_filename)),
               "w", newline="\n", encoding="utf-8") as agg_json:
         agg_json.write(json.dumps(aggregate_reports, ensure_ascii=False,
                                   indent=2))
 
     with open("{0}"
               .format(os.path.join(output_directory,
-                                   output_csv_aggregate)),
+                                   aggregate_csv_filename)),
               "w", newline="\n", encoding="utf-8") as agg_csv:
         csv = parsed_aggregate_reports_to_csv(aggregate_reports)
         agg_csv.write(csv)
 
     with open("{0}"
               .format(os.path.join(output_directory,
-                                   output_json_forensic)),
+                                   forensic_json_filename)),
               "w", newline="\n", encoding="utf-8") as for_json:
         for_json.write(json.dumps(forensic_reports, ensure_ascii=False,
                                   indent=2))
 
     with open("{0}"
               .format(os.path.join(output_directory,
-                                   output_csv_forensic)),
+                                   forensic_csv_filename)),
               "w", newline="\n", encoding="utf-8") as for_csv:
         csv = parsed_forensic_reports_to_csv(forensic_reports)
         for_csv.write(csv)
diff --git a/parsedmarc/cli.py b/parsedmarc/cli.py
index 7bbecf6..ae739fe 100644
--- a/parsedmarc/cli.py
+++ b/parsedmarc/cli.py
@@ -178,17 +178,17 @@ def _main():
                             help=strip_attachment_help, action="store_true")
     arg_parser.add_argument("-o", "--output",
                             help="write output files to the given directory")
-    arg_parser.add_argument("--output-json-aggregate",
-                            help="output aggregate JSON file",
+    arg_parser.add_argument("--aggregate-json-filename",
+                            help="filename for the aggregate JSON output file",
                             default="aggregate.json")
-    arg_parser.add_argument("--output-json-forensic",
-                            help="output forensic JSON file",
+    arg_parser.add_argument("--forensic-json-filename",
+                            help="filename for the forensic JSON output file",
                             default="forensic.json")
-    arg_parser.add_argument("--output-csv-aggregate",
-                            help="output aggregate CSV file",
+    arg_parser.add_argument("--aggregate-csv-filename",
+                            help="filename for the aggregate CSV output file",
                             default="aggregate.csv")
-    arg_parser.add_argument("--output-csv-forensic",
-                            help="output forensic CSV file",
+    arg_parser.add_argument("--forensic-csv-filename",
+                            help="filename for the forensic CSV output file",
                             default="forensic.csv")
     arg_parser.add_argument("-n", "--nameservers", nargs="+",
                             help="nameservers to query")
@@ -221,10 +221,10 @@ def _main():
                      offline=args.offline,
                      strip_attachment_payloads=args.strip_attachment_payloads,
                      output=args.output,
-                     output_json_aggregate=args.output_json_aggregate,
-                     output_json_forensic=args.output_json_forensic,
-                     output_csv_aggregate=args.output_csv_aggregate,
-                     output_csv_forensic=args.output_csv_forensic,
+                     aggregate_csv_filename=args.aggregate_csv_filename,
+                     aggreate_json_filename=args.aggregate_json_filename,
+                     forensic_csv_filename=args.forensic_csv_filename,
+                     forensic_json_filename=args.forensic_json_filename,
                      nameservers=args.nameservers,
                      silent=args.silent,
                      dns_timeout=args.dns_timeout,
@@ -302,6 +302,14 @@ def _main():
                     "strip_attachment_payloads"]
             if "output" in general_config:
                 opts.output = general_config["output"]
+            if "aggregate_json_filename" in general_config:
+                opts.aggregate_json_filename = general_config["aggregate_json_filename"]
+            if "forensic_json_filename" in general_config:
+                opts.forensic_json_filename = general_config["forensic_json_filename"]
+            if "aggregate_csv_filename" in general_config:
+                opts.aggregate_csv_filename = general_config["aggregate_csv_filename"]
+            if "forensic_csv_filename" in general_config:
+                opts.forensic_csv_filename = general_config["forensic_csv_filename"]
             if "nameservers" in general_config:
                 opts.nameservers = _str_to_list(general_config["nameservers"])
             if "dns_timeout" in general_config:
@@ -689,10 +697,10 @@ def _main():
 
     if opts.output:
         save_output(results, output_directory=opts.output,
-                    output_json_aggregate=opts.output_json_aggregate,
-                    output_json_forensic=opts.output_json_forensic,
-                    output_csv_aggregate=opts.output_csv_aggregate,
-                    output_csv_forensic=opts.output_csv_forensic)
+                    aggregate_json_filename=opts.aggregate_json_filename,
+                    forensic_json_filename=opts.forensic_json_filename,
+                    aggregate_csv_filename=opts.aggregate_csv_filename,
+                    forensic_csv_filename=opts.forensic_csv_filename)
 
     process_reports(results)
 
diff --git a/parsedmarc/utils.py b/parsedmarc/utils.py
index dbf525f..6b5f980 100644
--- a/parsedmarc/utils.py
+++ b/parsedmarc/utils.py
@@ -157,7 +157,7 @@ def query_dns(domain, record_type, cache=None, nameservers=None, timeout=2.0):
     if record_type == "TXT":
         resource_records = list(map(
             lambda r: r.strings,
-            resolver.query(domain, record_type, lifetime=timeout)))
+            resolver.resolve(domain, record_type, lifetime=timeout)))
         _resource_record = [
             resource_record[0][:0].join(resource_record)
             for resource_record in resource_records if resource_record]
@@ -165,7 +165,7 @@ def query_dns(domain, record_type, cache=None, nameservers=None, timeout=2.0):
     else:
         records = list(map(
             lambda r: r.to_text().replace('"', '').rstrip("."),
-            resolver.query(domain, record_type, lifetime=timeout)))
+            resolver.resolve(domain, record_type, lifetime=timeout)))
     if cache:
         cache[cache_key] = records
 

From e841a49ca71c6e977b2c8755b69b1992c86a9109 Mon Sep 17 00:00:00 2001
From: Sean Whalen <44679+seanthegeek@users.noreply.github.com>
Date: Sun, 20 Jun 2021 14:24:49 -0400
Subject: [PATCH 27/31] Fix documentation formatting

---
 README.rst     | 9 ++++++---
 docs/index.rst | 9 ++++++---
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/README.rst b/README.rst
index 517c50b..6e3f18c 100644
--- a/README.rst
+++ b/README.rst
@@ -161,12 +161,14 @@ The full set of configuration options are:
     - ``log_file`` - str: Write log messages to a file at this path
     - ``n_procs`` - int: Number of process to run in parallel when parsing in CLI mode (Default: 1)
     - ``chunk_size`` - int: Number of files to give to each process when running in parallel.
-      .. note::
+
+    .. note::
         Setting this to a number larger than one can improve performance when processing thousands of files
 - ``imap``
     - ``host`` - str: The IMAP server hostname or IP address
     - ``port`` - int: The IMAP server port (Default: 993).
-      .. note::
+
+    .. note::
         If your host recommends another port, still try 993
     - ``ssl`` - bool: Use an encrypted SSL/TLS connection (Default: True)
     - ``skip_certificate_verification`` - bool: Skip certificate verification (not recommended)
@@ -180,7 +182,8 @@ The full set of configuration options are:
     - ``batch_size`` - int: Number of messages to read and process before saving. Defaults to all messages if not set.
 - ``elasticsearch``
     - ``hosts`` - str: A comma separated list of hostnames and ports or URLs (e.g. ``127.0.0.1:9200`` or ``https://user:secret@localhost``)
-      .. note::
+
+    .. note::
          Special characters in the username or password must be `URL encoded`_.
     - ``ssl`` - bool: Use an encrypted SSL/TLS connection (Default: True)
     - ``cert_path`` - str: Path to a trusted certificates
diff --git a/docs/index.rst b/docs/index.rst
index 6e36e89..e84c3b5 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -165,12 +165,14 @@ The full set of configuration options are:
     - ``log_file`` - str: Write log messages to a file at this path
     - ``n_procs`` - int: Number of process to run in parallel when parsing in CLI mode (Default: 1)
     - ``chunk_size`` - int: Number of files to give to each process when running in parallel.
-      .. note::
+
+    .. note::
         Setting this to a number larger than one can improve performance when processing thousands of files
 - ``imap``
     - ``host`` - str: The IMAP server hostname or IP address
     - ``port`` - int: The IMAP server port (Default: 993).
-      .. note::
+
+    .. note::
         If your host recommends another port, still try 993
     - ``ssl`` - bool: Use an encrypted SSL/TLS connection (Default: True)
     - ``skip_certificate_verification`` - bool: Skip certificate verification (not recommended)
@@ -184,7 +186,8 @@ The full set of configuration options are:
     - ``batch_size`` - int: Number of messages to read and process before saving. Defaults to all messages if not set.
 - ``elasticsearch``
     - ``hosts`` - str: A comma separated list of hostnames and ports or URLs (e.g. ``127.0.0.1:9200`` or ``https://user:secret@localhost``)
-      .. note::
+
+    .. note::
          Special characters in the username or password must be `URL encoded`_.
     - ``ssl`` - bool: Use an encrypted SSL/TLS connection (Default: True)
     - ``cert_path`` - str: Path to a trusted certificates

From 6d5f8a9ec3b0ce40087106649f8560537e874836 Mon Sep 17 00:00:00 2001
From: Sean Whalen <44679+seanthegeek@users.noreply.github.com>
Date: Sun, 20 Jun 2021 15:45:15 -0400
Subject: [PATCH 28/31] Fix documentation formatting

---
 README.rst     | 3 +++
 docs/index.rst | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/README.rst b/README.rst
index 6e3f18c..3d5684e 100644
--- a/README.rst
+++ b/README.rst
@@ -165,11 +165,13 @@ The full set of configuration options are:
     .. note::
         Setting this to a number larger than one can improve performance when processing thousands of files
 - ``imap``
+
     - ``host`` - str: The IMAP server hostname or IP address
     - ``port`` - int: The IMAP server port (Default: 993).
 
     .. note::
         If your host recommends another port, still try 993
+
     - ``ssl`` - bool: Use an encrypted SSL/TLS connection (Default: True)
     - ``skip_certificate_verification`` - bool: Skip certificate verification (not recommended)
     - ``user`` - str: The IMAP user
@@ -185,6 +187,7 @@ The full set of configuration options are:
 
     .. note::
          Special characters in the username or password must be `URL encoded`_.
+
     - ``ssl`` - bool: Use an encrypted SSL/TLS connection (Default: True)
     - ``cert_path`` - str: Path to a trusted certificates
     - ``index_suffix`` - str: A suffix to apply to the index names
diff --git a/docs/index.rst b/docs/index.rst
index e84c3b5..fa9cd62 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -169,11 +169,13 @@ The full set of configuration options are:
     .. note::
         Setting this to a number larger than one can improve performance when processing thousands of files
 - ``imap``
+
     - ``host`` - str: The IMAP server hostname or IP address
     - ``port`` - int: The IMAP server port (Default: 993).
 
     .. note::
         If your host recommends another port, still try 993
+
     - ``ssl`` - bool: Use an encrypted SSL/TLS connection (Default: True)
     - ``skip_certificate_verification`` - bool: Skip certificate verification (not recommended)
     - ``user`` - str: The IMAP user
@@ -189,6 +191,7 @@ The full set of configuration options are:
 
     .. note::
          Special characters in the username or password must be `URL encoded`_.
+
     - ``ssl`` - bool: Use an encrypted SSL/TLS connection (Default: True)
     - ``cert_path`` - str: Path to a trusted certificates
     - ``index_suffix`` - str: A suffix to apply to the index names

From 4c5a266f19abe261c45eb5eed1031d3449ecf12d Mon Sep 17 00:00:00 2001
From: Sean Whalen <44679+seanthegeek@users.noreply.github.com>
Date: Sun, 20 Jun 2021 19:07:18 -0400
Subject: [PATCH 29/31] PEP 8 fixes

---
 parsedmarc/__init__.py |  8 ++++----
 parsedmarc/cli.py      | 12 ++++++++----
 parsedmarc/elastic.py  |  6 ++++--
 3 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/parsedmarc/__init__.py b/parsedmarc/__init__.py
index f601b7a..d798225 100644
--- a/parsedmarc/__init__.py
+++ b/parsedmarc/__init__.py
@@ -1284,10 +1284,10 @@ def save_output(results, output_directory="output",
     Args:
         results (OrderedDict): Parsing results
         output_directory (str): The patch to the directory to save in
-        aggregate_json_filename (str): Output filename for the aggregate JSON report
-        forensic_json_filename (str): Output filename for the forensic JSON report
-        aggregate_csv_filename (str):  Output filename for the aggregate CSV report
-        forensic_csv_filename (str):  Output filename for the forensic CSV report
+        aggregate_json_filename (str): Filename for the aggregate JSON file
+        forensic_json_filename (str): Filename for the forensic JSON file
+        aggregate_csv_filename (str): Filename for the aggregate CSV file
+        forensic_csv_filename (str): Filename for the forensic CSV file
     """
 
     aggregate_reports = results["aggregate_reports"]
diff --git a/parsedmarc/cli.py b/parsedmarc/cli.py
index ae739fe..374e6af 100644
--- a/parsedmarc/cli.py
+++ b/parsedmarc/cli.py
@@ -303,13 +303,17 @@ def _main():
             if "output" in general_config:
                 opts.output = general_config["output"]
             if "aggregate_json_filename" in general_config:
-                opts.aggregate_json_filename = general_config["aggregate_json_filename"]
+                opts.aggregate_json_filename = general_config[
+                    "aggregate_json_filename"]
             if "forensic_json_filename" in general_config:
-                opts.forensic_json_filename = general_config["forensic_json_filename"]
+                opts.forensic_json_filename = general_config[
+                    "forensic_json_filename"]
             if "aggregate_csv_filename" in general_config:
-                opts.aggregate_csv_filename = general_config["aggregate_csv_filename"]
+                opts.aggregate_csv_filename = general_config[
+                    "aggregate_csv_filename"]
             if "forensic_csv_filename" in general_config:
-                opts.forensic_csv_filename = general_config["forensic_csv_filename"]
+                opts.forensic_csv_filename = general_config[
+                    "forensic_csv_filename"]
             if "nameservers" in general_config:
                 opts.nameservers = _str_to_list(general_config["nameservers"])
             if "dns_timeout" in general_config:
diff --git a/parsedmarc/elastic.py b/parsedmarc/elastic.py
index 992d02c..7085983 100644
--- a/parsedmarc/elastic.py
+++ b/parsedmarc/elastic.py
@@ -301,8 +301,10 @@ def save_aggregate_report_to_elasticsearch(aggregate_report,
     org_name = metadata["org_name"]
     report_id = metadata["report_id"]
     domain = aggregate_report["policy_published"]["domain"]
-    begin_date = human_timestamp_to_datetime(metadata["begin_date"], to_utc=True)
-    end_date = human_timestamp_to_datetime(metadata["end_date"], to_utc=True)
+    begin_date = human_timestamp_to_datetime(metadata["begin_date"],
+                                             to_utc=True)
+    end_date = human_timestamp_to_datetime(metadata["end_date"],
+                                           to_utc=True)
     begin_date_human = begin_date.strftime("%Y-%m-%d %H:%M:%SZ")
     end_date_human = end_date.strftime("%Y-%m-%d %H:%M:%SZ")
     if monthly_indexes:

From a10e6592fe4e433a9770f37257569bc7855f8f03 Mon Sep 17 00:00:00 2001
From: Casper da Costa-Luis <casper.dcl@physics.org>
Date: Wed, 23 Jun 2021 12:06:29 +0100
Subject: [PATCH 30/31] fix startup bug

Pretty silly typo means `parsedmarc` completely fails unless `parsedmarc.ini` has `general.aggregate_json_filename` explicitly set
---
 parsedmarc/cli.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/parsedmarc/cli.py b/parsedmarc/cli.py
index 374e6af..be2026a 100644
--- a/parsedmarc/cli.py
+++ b/parsedmarc/cli.py
@@ -222,7 +222,7 @@ def _main():
                      strip_attachment_payloads=args.strip_attachment_payloads,
                      output=args.output,
                      aggregate_csv_filename=args.aggregate_csv_filename,
-                     aggreate_json_filename=args.aggregate_json_filename,
+                     aggregate_json_filename=args.aggregate_json_filename,
                      forensic_csv_filename=args.forensic_csv_filename,
                      forensic_json_filename=args.forensic_json_filename,
                      nameservers=args.nameservers,

From 6d689ca8f552d9639fca2773ee540ae95e244203 Mon Sep 17 00:00:00 2001
From: Sean Whalen <44679+seanthegeek@users.noreply.github.com>
Date: Wed, 23 Jun 2021 15:03:12 -0400
Subject: [PATCH 31/31] 7.0.1

---
 CHANGELOG.md           | 5 +++++
 parsedmarc/__init__.py | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index cfd49c2..8cd66b3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,11 @@
 Changelog
 =========
 
+7.0.1
+-----
+
+- Fix startup error (PR #254)
+
 7.0.0
 -----
 
diff --git a/parsedmarc/__init__.py b/parsedmarc/__init__.py
index d798225..2750819 100644
--- a/parsedmarc/__init__.py
+++ b/parsedmarc/__init__.py
@@ -36,7 +36,7 @@ from parsedmarc.utils import is_outlook_msg, convert_outlook_msg
 from parsedmarc.utils import timestamp_to_human, human_timestamp_to_datetime
 from parsedmarc.utils import parse_email
 
-__version__ = "7.0.0"
+__version__ = "7.0.1"
 
 logging.basicConfig(
     format='%(levelname)8s:%(filename)s:%(lineno)d:'