From 3d0f7c8c83ee4098e1bcaa95d82f995d7c9c9bd6 Mon Sep 17 00:00:00 2001
From: Sean Whalen <44679+seanthegeek@users.noreply.github.com>
Date: Sun, 20 Jun 2021 13:10:12 -0400
Subject: [PATCH] 7.0.0

Closes issues #221 #219 #155 #103
---
 CHANGELOG.md           | 13 +++++++
 README.rst             | 39 +++++++++++++------
 docs/index.rst         | 88 ++++++++++++++++++++++++------------------
 parsedmarc/__init__.py | 24 +++++++-----
 parsedmarc/cli.py      | 40 +++++++++++--------
 parsedmarc/utils.py    |  4 +-
 6 files changed, 131 insertions(+), 77 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e4e8506..cfd49c2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,19 @@
 Changelog
 =========
 
+7.0.0
+-----
+
+- Fix issue #221: Crash when handling invalid reports without root node (PR #248)
+- Use UTC datetime objects for Elasticsearch output (PR #245) 
+- Fix issues #219, #155, and #103: IMAP connections break on large emails (PR #241)
+- Add support for saving reports to S3 buckets (PR #223)
+- Pass `offline` parameter to `wait_inbox()` (PR #216)
+- Add more details to logging (PR #220)
+- Add options customizing the names of output files (Modifications based on PR #225) 
+- Wait for 5 seconds before attempting to reconnect to an IMAP server (PR #217)
+- Add option to process messages in batches (PR #222)
+
 6.12.0
 ------
 
diff --git a/README.rst b/README.rst
index d0d41ab..517c50b 100644
--- a/README.rst
+++ b/README.rst
@@ -58,17 +58,20 @@ CLI help
 
 ::
 
-    usage: parsedmarc [-h] [-c CONFIG_FILE] [--strip-attachment-payloads]
-                      [-o OUTPUT] [-n NAMESERVERS [NAMESERVERS ...]]
-                      [-t DNS_TIMEOUT] [--offline] [-s] [--debug]
-                      [--log-file LOG_FILE] [-v]
-                      [file_path [file_path ...]]
+    usage: parsedmarc [-h] [-c CONFIG_FILE] [--strip-attachment-payloads] [-o OUTPUT]
+                      [--aggregate-json-filename AGGREGATE_JSON_FILENAME]
+                      [--forensic-json-filename FORENSIC_JSON_FILENAME]
+                      [--aggregate-csv-filename AGGREGATE_CSV_FILENAME]
+                      [--forensic-csv-filename FORENSIC_CSV_FILENAME]
+                      [-n NAMESERVERS [NAMESERVERS ...]] [-t DNS_TIMEOUT] [--offline]
+                      [-s] [--verbose] [--debug] [--log-file LOG_FILE] [-v]
+                      [file_path ...]
 
     Parses DMARC reports
 
     positional arguments:
       file_path             one or more paths to aggregate or forensic report
-                            files or emails
+                            files, emails, or mbox files'
 
     optional arguments:
       -h, --help            show this help message and exit
@@ -78,18 +81,27 @@ CLI help
                             remove attachment payloads from forensic report output
       -o OUTPUT, --output OUTPUT
                             write output files to the given directory
+      --aggregate-json-filename AGGREGATE_JSON_FILENAME
+                            filename for the aggregate JSON output file
+      --forensic-json-filename FORENSIC_JSON_FILENAME
+                            filename for the forensic JSON output file
+      --aggregate-csv-filename AGGREGATE_CSV_FILENAME
+                            filename for the aggregate CSV output file
+      --forensic-csv-filename FORENSIC_CSV_FILENAME
+                            filename for the forensic CSV output file
       -n NAMESERVERS [NAMESERVERS ...], --nameservers NAMESERVERS [NAMESERVERS ...]
-                            nameservers to query (default is Cloudflare's
-                            nameservers)
+                            nameservers to query
       -t DNS_TIMEOUT, --dns_timeout DNS_TIMEOUT
                             number of seconds to wait for an answer from DNS
                             (default: 2.0)
       --offline             do not make online queries for geolocation or DNS
       -s, --silent          only print errors and warnings
+      --verbose             more verbose output
       --debug               print debugging information
       --log-file LOG_FILE   output logging to a file
       -v, --version         show program's version number and exit
 
+
 .. note::
 
    In ``parsedmarc`` 6.0.0, most CLI options were moved to a configuration file, described below.
@@ -139,6 +151,8 @@ The full set of configuration options are:
     - ``save_forensic`` - bool: Save forensic report data to Elasticsearch, Splunk and/or S3
     - ``strip_attachment_payloads`` - bool: Remove attachment payloads from results
     - ``output`` - str: Directory to place JSON and CSV files in
+    - ``aggregate_json_filename`` - str: filename for the aggregate JSON output file
+    - ``forensic_json_filename`` - str: filename for the forensic JSON output file
     - ``offline`` - bool: Do not use online queries for geolocation or DNS
     - ``nameservers`` -  str: A comma separated list of DNS resolvers (Default: `Cloudflare's public resolvers`_)
     - ``dns_timeout`` - float: DNS timeout period
@@ -146,10 +160,14 @@ The full set of configuration options are:
     - ``silent`` - bool: Only print errors (Default: True)
     - ``log_file`` - str: Write log messages to a file at this path
     - ``n_procs`` - int: Number of process to run in parallel when parsing in CLI mode (Default: 1)
-    - ``chunk_size`` - int: Number of files to give to each process when running in parallel. Setting this to a number larger than one can improve performance when processing thousands of files
+    - ``chunk_size`` - int: Number of files to give to each process when running in parallel.
+      .. note::
+        Setting this to a number larger than one can improve performance when processing thousands of files
 - ``imap``
     - ``host`` - str: The IMAP server hostname or IP address
-    - ``port`` - int: The IMAP server port (Default: 993) If your Hoster publishes another port, still try 993. Otherwise Error:"wrong SSL version" 
+    - ``port`` - int: The IMAP server port (Default: 993).
+      .. note::
+        If your host recommends another port, still try 993
     - ``ssl`` - bool: Use an encrypted SSL/TLS connection (Default: True)
     - ``skip_certificate_verification`` - bool: Skip certificate verification (not recommended)
     - ``user`` - str: The IMAP user
@@ -162,7 +180,6 @@ The full set of configuration options are:
     - ``batch_size`` - int: Number of messages to read and process before saving. Defaults to all messages if not set.
 - ``elasticsearch``
     - ``hosts`` - str: A comma separated list of hostnames and ports or URLs (e.g. ``127.0.0.1:9200`` or ``https://user:secret@localhost``)
-
       .. note::
          Special characters in the username or password must be `URL encoded`_.
     - ``ssl`` - bool: Use an encrypted SSL/TLS connection (Default: True)
diff --git a/docs/index.rst b/docs/index.rst
index 568c11d..6e36e89 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -62,36 +62,48 @@ CLI help
 
 ::
 
-   usage: parsedmarc [-h] [-c CONFIG_FILE] [--strip-attachment-payloads]
-                     [-o OUTPUT] [-n NAMESERVERS [NAMESERVERS ...]]
-                     [-t DNS_TIMEOUT] [--offline] [-s] [--debug]
-                     [--log-file LOG_FILE] [-v]
-                     [file_path [file_path ...]]
+   usage: parsedmarc [-h] [-c CONFIG_FILE] [--strip-attachment-payloads] [-o OUTPUT]
+                      [--aggregate-json-filename AGGREGATE_JSON_FILENAME]
+                      [--forensic-json-filename FORENSIC_JSON_FILENAME]
+                      [--aggregate-csv-filename AGGREGATE_CSV_FILENAME]
+                      [--forensic-csv-filename FORENSIC_CSV_FILENAME]
+                      [-n NAMESERVERS [NAMESERVERS ...]] [-t DNS_TIMEOUT] [--offline]
+                      [-s] [--verbose] [--debug] [--log-file LOG_FILE] [-v]
+                      [file_path ...]
 
-   Parses DMARC reports
+    Parses DMARC reports
 
-   positional arguments:
-     file_path             one or more paths to aggregate or forensic report
-                           files or emails
+    positional arguments:
+      file_path             one or more paths to aggregate or forensic report
+                            files, emails, or mbox files'
 
-   optional arguments:
-     -h, --help            show this help message and exit
-     -c CONFIG_FILE, --config-file CONFIG_FILE
-                           a path to a configuration file (--silent implied)
-     --strip-attachment-payloads
-                           remove attachment payloads from forensic report output
-     -o OUTPUT, --output OUTPUT
-                           write output files to the given directory
-     -n NAMESERVERS [NAMESERVERS ...], --nameservers NAMESERVERS [NAMESERVERS ...]
-                           nameservers to query
-     -t DNS_TIMEOUT, --dns_timeout DNS_TIMEOUT
-                           number of seconds to wait for an answer from DNS
-                           (default: 2.0)
-     --offline             do not make online queries for geolocation or DNS
-     -s, --silent          only print errors and warnings
-     --debug               print debugging information
-     --log-file LOG_FILE   output logging to a file
-     -v, --version         show program's version number and exit
+    optional arguments:
+      -h, --help            show this help message and exit
+      -c CONFIG_FILE, --config-file CONFIG_FILE
+                            a path to a configuration file (--silent implied)
+      --strip-attachment-payloads
+                            remove attachment payloads from forensic report output
+      -o OUTPUT, --output OUTPUT
+                            write output files to the given directory
+      --aggregate-json-filename AGGREGATE_JSON_FILENAME
+                            filename for the aggregate JSON output file
+      --forensic-json-filename FORENSIC_JSON_FILENAME
+                            filename for the forensic JSON output file
+      --aggregate-csv-filename AGGREGATE_CSV_FILENAME
+                            filename for the aggregate CSV output file
+      --forensic-csv-filename FORENSIC_CSV_FILENAME
+                            filename for the forensic CSV output file
+      -n NAMESERVERS [NAMESERVERS ...], --nameservers NAMESERVERS [NAMESERVERS ...]
+                            nameservers to query
+      -t DNS_TIMEOUT, --dns_timeout DNS_TIMEOUT
+                            number of seconds to wait for an answer from DNS
+                            (default: 2.0)
+      --offline             do not make online queries for geolocation or DNS
+      -s, --silent          only print errors and warnings
+      --verbose             more verbose output
+      --debug               print debugging information
+      --log-file LOG_FILE   output logging to a file
+      -v, --version         show program's version number and exit
 
 
 .. note::
@@ -139,10 +151,12 @@ For example
 The full set of configuration options are:
 
 - ``general``
-    - ``save_aggregate`` - bool: Save aggregate report data to the Elasticsearch, Splunk and/or S3
-    - ``save_forensic`` - bool: Save forensic report data to the Elasticsearch, Splunk and/or S3
+    - ``save_aggregate`` - bool: Save aggregate report data to Elasticsearch, Splunk and/or S3
+    - ``save_forensic`` - bool: Save forensic report data to Elasticsearch, Splunk and/or S3
     - ``strip_attachment_payloads`` - bool: Remove attachment payloads from results
     - ``output`` - str: Directory to place JSON and CSV files in
+    - ``aggregate_json_filename`` - str: filename for the aggregate JSON output file
+    - ``forensic_json_filename`` - str: filename for the forensic JSON output file
     - ``offline`` - bool: Do not use online queries for geolocation or DNS
     - ``nameservers`` -  str: A comma separated list of DNS resolvers (Default: `Cloudflare's public resolvers`_)
     - ``dns_timeout`` - float: DNS timeout period
@@ -150,16 +164,18 @@ The full set of configuration options are:
     - ``silent`` - bool: Only print errors (Default: True)
     - ``log_file`` - str: Write log messages to a file at this path
     - ``n_procs`` - int: Number of process to run in parallel when parsing in CLI mode (Default: 1)
-    - ``chunk_size`` - int: Number of files to give to each process when running in parallel. Setting this to a number larger than one can improve performance when processing thousands of files
+    - ``chunk_size`` - int: Number of files to give to each process when running in parallel.
+      .. note::
+        Setting this to a number larger than one can improve performance when processing thousands of files
 - ``imap``
     - ``host`` - str: The IMAP server hostname or IP address
-    - ``port`` - int: The IMAP server port (Default: 993)
+    - ``port`` - int: The IMAP server port (Default: 993).
+      .. note::
+        If your host recommends another port, still try 993
     - ``ssl`` - bool: Use an encrypted SSL/TLS connection (Default: True)
     - ``skip_certificate_verification`` - bool: Skip certificate verification (not recommended)
-    - ``timeout`` - float: Timeout in seconds to wait for an IMAP operation to complete (Default: 30)
-    - ``max_retries`` - int: The maximum number of retries after a timeout
     - ``user`` - str: The IMAP user
-    - ``password`` - str: The IMAP password (escape ``%`` with a second ``%``)
+    - ``password`` - str: The IMAP password
     - ``reports_folder`` - str: The IMAP folder where the incoming reports can be found (Default: INBOX)
     - ``archive_folder`` - str:  The IMAP folder to sort processed emails into (Default: Archive)
     - ``watch`` - bool: Use the IMAP ``IDLE`` command to process messages as they arrive
@@ -168,14 +184,10 @@ The full set of configuration options are:
     - ``batch_size`` - int: Number of messages to read and process before saving. Defaults to all messages if not set.
 - ``elasticsearch``
     - ``hosts`` - str: A comma separated list of hostnames and ports or URLs (e.g. ``127.0.0.1:9200`` or ``https://user:secret@localhost``)
-
       .. note::
          Special characters in the username or password must be `URL encoded`_.
     - ``ssl`` - bool: Use an encrypted SSL/TLS connection (Default: True)
-    - ``user`` - str: Basic auth username
-    - ``password`` - str: Basic auth password
     - ``cert_path`` - str: Path to a trusted certificates
-    - ``timeout`` - float: Timeout in seconds (Default: 60)
     - ``index_suffix`` - str: A suffix to apply to the index names
     - ``monthly_indexes`` - bool: Use monthly indexes instead of daily indexes
     - ``number_of_shards`` - int: The number of shards to use when creating the index (Default: 1)
diff --git a/parsedmarc/__init__.py b/parsedmarc/__init__.py
index a07209e..f601b7a 100644
--- a/parsedmarc/__init__.py
+++ b/parsedmarc/__init__.py
@@ -36,7 +36,7 @@ from parsedmarc.utils import is_outlook_msg, convert_outlook_msg
 from parsedmarc.utils import timestamp_to_human, human_timestamp_to_datetime
 from parsedmarc.utils import parse_email
 
-__version__ = "6.12.0"
+__version__ = "7.0.0"
 
 logging.basicConfig(
     format='%(levelname)8s:%(filename)s:%(lineno)d:'
@@ -1274,16 +1274,20 @@ def watch_inbox(host, username, password, callback, port=None, ssl=True,
 
 
 def save_output(results, output_directory="output",
-                output_json_aggregate="aggregate.json",
-                output_json_forensic="forensic.json",
-                output_csv_aggregate="aggregate.csv",
-                output_csv_forensic="forensic.csv"):
+                aggregate_json_filename="aggregate.json",
+                forensic_json_filename="forensic.json",
+                aggregate_csv_filename="aggregate.csv",
+                forensic_csv_filename="forensic.csv"):
     """
     Save report data in the given directory
 
     Args:
         results (OrderedDict): Parsing results
-        output_directory: The patch to the directory to save in
+        output_directory (str): The patch to the directory to save in
+        aggregate_json_filename (str): Output filename for the aggregate JSON report
+        forensic_json_filename (str): Output filename for the forensic JSON report
+        aggregate_csv_filename (str):  Output filename for the aggregate CSV report
+        forensic_csv_filename (str):  Output filename for the forensic CSV report
     """
 
     aggregate_reports = results["aggregate_reports"]
@@ -1297,28 +1301,28 @@ def save_output(results, output_directory="output",
 
     with open("{0}"
               .format(os.path.join(output_directory,
-                                   output_json_aggregate)),
+                                   aggregate_json_filename)),
               "w", newline="\n", encoding="utf-8") as agg_json:
         agg_json.write(json.dumps(aggregate_reports, ensure_ascii=False,
                                   indent=2))
 
     with open("{0}"
               .format(os.path.join(output_directory,
-                                   output_csv_aggregate)),
+                                   aggregate_csv_filename)),
               "w", newline="\n", encoding="utf-8") as agg_csv:
         csv = parsed_aggregate_reports_to_csv(aggregate_reports)
         agg_csv.write(csv)
 
     with open("{0}"
               .format(os.path.join(output_directory,
-                                   output_json_forensic)),
+                                   forensic_json_filename)),
               "w", newline="\n", encoding="utf-8") as for_json:
         for_json.write(json.dumps(forensic_reports, ensure_ascii=False,
                                   indent=2))
 
     with open("{0}"
               .format(os.path.join(output_directory,
-                                   output_csv_forensic)),
+                                   forensic_csv_filename)),
               "w", newline="\n", encoding="utf-8") as for_csv:
         csv = parsed_forensic_reports_to_csv(forensic_reports)
         for_csv.write(csv)
diff --git a/parsedmarc/cli.py b/parsedmarc/cli.py
index 7bbecf6..ae739fe 100644
--- a/parsedmarc/cli.py
+++ b/parsedmarc/cli.py
@@ -178,17 +178,17 @@ def _main():
                             help=strip_attachment_help, action="store_true")
     arg_parser.add_argument("-o", "--output",
                             help="write output files to the given directory")
-    arg_parser.add_argument("--output-json-aggregate",
-                            help="output aggregate JSON file",
+    arg_parser.add_argument("--aggregate-json-filename",
+                            help="filename for the aggregate JSON output file",
                             default="aggregate.json")
-    arg_parser.add_argument("--output-json-forensic",
-                            help="output forensic JSON file",
+    arg_parser.add_argument("--forensic-json-filename",
+                            help="filename for the forensic JSON output file",
                             default="forensic.json")
-    arg_parser.add_argument("--output-csv-aggregate",
-                            help="output aggregate CSV file",
+    arg_parser.add_argument("--aggregate-csv-filename",
+                            help="filename for the aggregate CSV output file",
                             default="aggregate.csv")
-    arg_parser.add_argument("--output-csv-forensic",
-                            help="output forensic CSV file",
+    arg_parser.add_argument("--forensic-csv-filename",
+                            help="filename for the forensic CSV output file",
                             default="forensic.csv")
     arg_parser.add_argument("-n", "--nameservers", nargs="+",
                             help="nameservers to query")
@@ -221,10 +221,10 @@ def _main():
                      offline=args.offline,
                      strip_attachment_payloads=args.strip_attachment_payloads,
                      output=args.output,
-                     output_json_aggregate=args.output_json_aggregate,
-                     output_json_forensic=args.output_json_forensic,
-                     output_csv_aggregate=args.output_csv_aggregate,
-                     output_csv_forensic=args.output_csv_forensic,
+                     aggregate_csv_filename=args.aggregate_csv_filename,
+                     aggreate_json_filename=args.aggregate_json_filename,
+                     forensic_csv_filename=args.forensic_csv_filename,
+                     forensic_json_filename=args.forensic_json_filename,
                      nameservers=args.nameservers,
                      silent=args.silent,
                      dns_timeout=args.dns_timeout,
@@ -302,6 +302,14 @@ def _main():
                     "strip_attachment_payloads"]
             if "output" in general_config:
                 opts.output = general_config["output"]
+            if "aggregate_json_filename" in general_config:
+                opts.aggregate_json_filename = general_config["aggregate_json_filename"]
+            if "forensic_json_filename" in general_config:
+                opts.forensic_json_filename = general_config["forensic_json_filename"]
+            if "aggregate_csv_filename" in general_config:
+                opts.aggregate_csv_filename = general_config["aggregate_csv_filename"]
+            if "forensic_csv_filename" in general_config:
+                opts.forensic_csv_filename = general_config["forensic_csv_filename"]
             if "nameservers" in general_config:
                 opts.nameservers = _str_to_list(general_config["nameservers"])
             if "dns_timeout" in general_config:
@@ -689,10 +697,10 @@ def _main():
 
     if opts.output:
         save_output(results, output_directory=opts.output,
-                    output_json_aggregate=opts.output_json_aggregate,
-                    output_json_forensic=opts.output_json_forensic,
-                    output_csv_aggregate=opts.output_csv_aggregate,
-                    output_csv_forensic=opts.output_csv_forensic)
+                    aggregate_json_filename=opts.aggregate_json_filename,
+                    forensic_json_filename=opts.forensic_json_filename,
+                    aggregate_csv_filename=opts.aggregate_csv_filename,
+                    forensic_csv_filename=opts.forensic_csv_filename)
 
     process_reports(results)
 
diff --git a/parsedmarc/utils.py b/parsedmarc/utils.py
index dbf525f..6b5f980 100644
--- a/parsedmarc/utils.py
+++ b/parsedmarc/utils.py
@@ -157,7 +157,7 @@ def query_dns(domain, record_type, cache=None, nameservers=None, timeout=2.0):
     if record_type == "TXT":
         resource_records = list(map(
             lambda r: r.strings,
-            resolver.query(domain, record_type, lifetime=timeout)))
+            resolver.resolve(domain, record_type, lifetime=timeout)))
         _resource_record = [
             resource_record[0][:0].join(resource_record)
             for resource_record in resource_records if resource_record]
@@ -165,7 +165,7 @@ def query_dns(domain, record_type, cache=None, nameservers=None, timeout=2.0):
     else:
         records = list(map(
             lambda r: r.to_text().replace('"', '').rstrip("."),
-            resolver.query(domain, record_type, lifetime=timeout)))
+            resolver.resolve(domain, record_type, lifetime=timeout)))
     if cache:
         cache[cache_key] = records