mirror of
https://github.com/domainaware/parsedmarc.git
synced 2026-05-01 01:39:29 +00:00
13
CHANGELOG.md
13
CHANGELOG.md
@@ -1,6 +1,19 @@
|
||||
Changelog
|
||||
=========
|
||||
|
||||
7.0.0
|
||||
-----
|
||||
|
||||
- Fix issue #221: Crash when handling invalid reports without root node (PR #248)
|
||||
- Use UTC datetime objects for Elasticsearch output (PR #245)
|
||||
- Fix issues #219, #155, and #103: IMAP connections break on large emails (PR #241)
|
||||
- Add support for saving reports to S3 buckets (PR #223)
|
||||
- Pass `offline` parameter to `wait_inbox()` (PR #216)
|
||||
- Add more details to logging (PR #220)
|
||||
- Add options customizing the names of output files (Modifications based on PR #225)
|
||||
- Wait for 5 seconds before attempting to reconnect to an IMAP server (PR #217)
|
||||
- Add option to process messages in batches (PR #222)
|
||||
|
||||
6.12.0
|
||||
------
|
||||
|
||||
|
||||
39
README.rst
39
README.rst
@@ -58,17 +58,20 @@ CLI help
|
||||
|
||||
::
|
||||
|
||||
usage: parsedmarc [-h] [-c CONFIG_FILE] [--strip-attachment-payloads]
|
||||
[-o OUTPUT] [-n NAMESERVERS [NAMESERVERS ...]]
|
||||
[-t DNS_TIMEOUT] [--offline] [-s] [--debug]
|
||||
[--log-file LOG_FILE] [-v]
|
||||
[file_path [file_path ...]]
|
||||
usage: parsedmarc [-h] [-c CONFIG_FILE] [--strip-attachment-payloads] [-o OUTPUT]
|
||||
[--aggregate-json-filename AGGREGATE_JSON_FILENAME]
|
||||
[--forensic-json-filename FORENSIC_JSON_FILENAME]
|
||||
[--aggregate-csv-filename AGGREGATE_CSV_FILENAME]
|
||||
[--forensic-csv-filename FORENSIC_CSV_FILENAME]
|
||||
[-n NAMESERVERS [NAMESERVERS ...]] [-t DNS_TIMEOUT] [--offline]
|
||||
[-s] [--verbose] [--debug] [--log-file LOG_FILE] [-v]
|
||||
[file_path ...]
|
||||
|
||||
Parses DMARC reports
|
||||
|
||||
positional arguments:
|
||||
file_path one or more paths to aggregate or forensic report
|
||||
files or emails
|
||||
files, emails, or mbox files'
|
||||
|
||||
optional arguments:
|
||||
-h, --help show this help message and exit
|
||||
@@ -78,18 +81,27 @@ CLI help
|
||||
remove attachment payloads from forensic report output
|
||||
-o OUTPUT, --output OUTPUT
|
||||
write output files to the given directory
|
||||
--aggregate-json-filename AGGREGATE_JSON_FILENAME
|
||||
filename for the aggregate JSON output file
|
||||
--forensic-json-filename FORENSIC_JSON_FILENAME
|
||||
filename for the forensic JSON output file
|
||||
--aggregate-csv-filename AGGREGATE_CSV_FILENAME
|
||||
filename for the aggregate CSV output file
|
||||
--forensic-csv-filename FORENSIC_CSV_FILENAME
|
||||
filename for the forensic CSV output file
|
||||
-n NAMESERVERS [NAMESERVERS ...], --nameservers NAMESERVERS [NAMESERVERS ...]
|
||||
nameservers to query (default is Cloudflare's
|
||||
nameservers)
|
||||
nameservers to query
|
||||
-t DNS_TIMEOUT, --dns_timeout DNS_TIMEOUT
|
||||
number of seconds to wait for an answer from DNS
|
||||
(default: 2.0)
|
||||
--offline do not make online queries for geolocation or DNS
|
||||
-s, --silent only print errors and warnings
|
||||
--verbose more verbose output
|
||||
--debug print debugging information
|
||||
--log-file LOG_FILE output logging to a file
|
||||
-v, --version show program's version number and exit
|
||||
|
||||
|
||||
.. note::
|
||||
|
||||
In ``parsedmarc`` 6.0.0, most CLI options were moved to a configuration file, described below.
|
||||
@@ -139,6 +151,8 @@ The full set of configuration options are:
|
||||
- ``save_forensic`` - bool: Save forensic report data to Elasticsearch, Splunk and/or S3
|
||||
- ``strip_attachment_payloads`` - bool: Remove attachment payloads from results
|
||||
- ``output`` - str: Directory to place JSON and CSV files in
|
||||
- ``aggregate_json_filename`` - str: filename for the aggregate JSON output file
|
||||
- ``forensic_json_filename`` - str: filename for the forensic JSON output file
|
||||
- ``offline`` - bool: Do not use online queries for geolocation or DNS
|
||||
- ``nameservers`` - str: A comma separated list of DNS resolvers (Default: `Cloudflare's public resolvers`_)
|
||||
- ``dns_timeout`` - float: DNS timeout period
|
||||
@@ -146,10 +160,14 @@ The full set of configuration options are:
|
||||
- ``silent`` - bool: Only print errors (Default: True)
|
||||
- ``log_file`` - str: Write log messages to a file at this path
|
||||
- ``n_procs`` - int: Number of process to run in parallel when parsing in CLI mode (Default: 1)
|
||||
- ``chunk_size`` - int: Number of files to give to each process when running in parallel. Setting this to a number larger than one can improve performance when processing thousands of files
|
||||
- ``chunk_size`` - int: Number of files to give to each process when running in parallel.
|
||||
.. note::
|
||||
Setting this to a number larger than one can improve performance when processing thousands of files
|
||||
- ``imap``
|
||||
- ``host`` - str: The IMAP server hostname or IP address
|
||||
- ``port`` - int: The IMAP server port (Default: 993) If your Hoster publishes another port, still try 993. Otherwise Error:"wrong SSL version"
|
||||
- ``port`` - int: The IMAP server port (Default: 993).
|
||||
.. note::
|
||||
If your host recommends another port, still try 993
|
||||
- ``ssl`` - bool: Use an encrypted SSL/TLS connection (Default: True)
|
||||
- ``skip_certificate_verification`` - bool: Skip certificate verification (not recommended)
|
||||
- ``user`` - str: The IMAP user
|
||||
@@ -162,7 +180,6 @@ The full set of configuration options are:
|
||||
- ``batch_size`` - int: Number of messages to read and process before saving. Defaults to all messages if not set.
|
||||
- ``elasticsearch``
|
||||
- ``hosts`` - str: A comma separated list of hostnames and ports or URLs (e.g. ``127.0.0.1:9200`` or ``https://user:secret@localhost``)
|
||||
|
||||
.. note::
|
||||
Special characters in the username or password must be `URL encoded`_.
|
||||
- ``ssl`` - bool: Use an encrypted SSL/TLS connection (Default: True)
|
||||
|
||||
@@ -62,36 +62,48 @@ CLI help
|
||||
|
||||
::
|
||||
|
||||
usage: parsedmarc [-h] [-c CONFIG_FILE] [--strip-attachment-payloads]
|
||||
[-o OUTPUT] [-n NAMESERVERS [NAMESERVERS ...]]
|
||||
[-t DNS_TIMEOUT] [--offline] [-s] [--debug]
|
||||
[--log-file LOG_FILE] [-v]
|
||||
[file_path [file_path ...]]
|
||||
usage: parsedmarc [-h] [-c CONFIG_FILE] [--strip-attachment-payloads] [-o OUTPUT]
|
||||
[--aggregate-json-filename AGGREGATE_JSON_FILENAME]
|
||||
[--forensic-json-filename FORENSIC_JSON_FILENAME]
|
||||
[--aggregate-csv-filename AGGREGATE_CSV_FILENAME]
|
||||
[--forensic-csv-filename FORENSIC_CSV_FILENAME]
|
||||
[-n NAMESERVERS [NAMESERVERS ...]] [-t DNS_TIMEOUT] [--offline]
|
||||
[-s] [--verbose] [--debug] [--log-file LOG_FILE] [-v]
|
||||
[file_path ...]
|
||||
|
||||
Parses DMARC reports
|
||||
Parses DMARC reports
|
||||
|
||||
positional arguments:
|
||||
file_path one or more paths to aggregate or forensic report
|
||||
files or emails
|
||||
positional arguments:
|
||||
file_path one or more paths to aggregate or forensic report
|
||||
files, emails, or mbox files'
|
||||
|
||||
optional arguments:
|
||||
-h, --help show this help message and exit
|
||||
-c CONFIG_FILE, --config-file CONFIG_FILE
|
||||
a path to a configuration file (--silent implied)
|
||||
--strip-attachment-payloads
|
||||
remove attachment payloads from forensic report output
|
||||
-o OUTPUT, --output OUTPUT
|
||||
write output files to the given directory
|
||||
-n NAMESERVERS [NAMESERVERS ...], --nameservers NAMESERVERS [NAMESERVERS ...]
|
||||
nameservers to query
|
||||
-t DNS_TIMEOUT, --dns_timeout DNS_TIMEOUT
|
||||
number of seconds to wait for an answer from DNS
|
||||
(default: 2.0)
|
||||
--offline do not make online queries for geolocation or DNS
|
||||
-s, --silent only print errors and warnings
|
||||
--debug print debugging information
|
||||
--log-file LOG_FILE output logging to a file
|
||||
-v, --version show program's version number and exit
|
||||
optional arguments:
|
||||
-h, --help show this help message and exit
|
||||
-c CONFIG_FILE, --config-file CONFIG_FILE
|
||||
a path to a configuration file (--silent implied)
|
||||
--strip-attachment-payloads
|
||||
remove attachment payloads from forensic report output
|
||||
-o OUTPUT, --output OUTPUT
|
||||
write output files to the given directory
|
||||
--aggregate-json-filename AGGREGATE_JSON_FILENAME
|
||||
filename for the aggregate JSON output file
|
||||
--forensic-json-filename FORENSIC_JSON_FILENAME
|
||||
filename for the forensic JSON output file
|
||||
--aggregate-csv-filename AGGREGATE_CSV_FILENAME
|
||||
filename for the aggregate CSV output file
|
||||
--forensic-csv-filename FORENSIC_CSV_FILENAME
|
||||
filename for the forensic CSV output file
|
||||
-n NAMESERVERS [NAMESERVERS ...], --nameservers NAMESERVERS [NAMESERVERS ...]
|
||||
nameservers to query
|
||||
-t DNS_TIMEOUT, --dns_timeout DNS_TIMEOUT
|
||||
number of seconds to wait for an answer from DNS
|
||||
(default: 2.0)
|
||||
--offline do not make online queries for geolocation or DNS
|
||||
-s, --silent only print errors and warnings
|
||||
--verbose more verbose output
|
||||
--debug print debugging information
|
||||
--log-file LOG_FILE output logging to a file
|
||||
-v, --version show program's version number and exit
|
||||
|
||||
|
||||
.. note::
|
||||
@@ -139,10 +151,12 @@ For example
|
||||
The full set of configuration options are:
|
||||
|
||||
- ``general``
|
||||
- ``save_aggregate`` - bool: Save aggregate report data to the Elasticsearch, Splunk and/or S3
|
||||
- ``save_forensic`` - bool: Save forensic report data to the Elasticsearch, Splunk and/or S3
|
||||
- ``save_aggregate`` - bool: Save aggregate report data to Elasticsearch, Splunk and/or S3
|
||||
- ``save_forensic`` - bool: Save forensic report data to Elasticsearch, Splunk and/or S3
|
||||
- ``strip_attachment_payloads`` - bool: Remove attachment payloads from results
|
||||
- ``output`` - str: Directory to place JSON and CSV files in
|
||||
- ``aggregate_json_filename`` - str: filename for the aggregate JSON output file
|
||||
- ``forensic_json_filename`` - str: filename for the forensic JSON output file
|
||||
- ``offline`` - bool: Do not use online queries for geolocation or DNS
|
||||
- ``nameservers`` - str: A comma separated list of DNS resolvers (Default: `Cloudflare's public resolvers`_)
|
||||
- ``dns_timeout`` - float: DNS timeout period
|
||||
@@ -150,16 +164,18 @@ The full set of configuration options are:
|
||||
- ``silent`` - bool: Only print errors (Default: True)
|
||||
- ``log_file`` - str: Write log messages to a file at this path
|
||||
- ``n_procs`` - int: Number of process to run in parallel when parsing in CLI mode (Default: 1)
|
||||
- ``chunk_size`` - int: Number of files to give to each process when running in parallel. Setting this to a number larger than one can improve performance when processing thousands of files
|
||||
- ``chunk_size`` - int: Number of files to give to each process when running in parallel.
|
||||
.. note::
|
||||
Setting this to a number larger than one can improve performance when processing thousands of files
|
||||
- ``imap``
|
||||
- ``host`` - str: The IMAP server hostname or IP address
|
||||
- ``port`` - int: The IMAP server port (Default: 993)
|
||||
- ``port`` - int: The IMAP server port (Default: 993).
|
||||
.. note::
|
||||
If your host recommends another port, still try 993
|
||||
- ``ssl`` - bool: Use an encrypted SSL/TLS connection (Default: True)
|
||||
- ``skip_certificate_verification`` - bool: Skip certificate verification (not recommended)
|
||||
- ``timeout`` - float: Timeout in seconds to wait for an IMAP operation to complete (Default: 30)
|
||||
- ``max_retries`` - int: The maximum number of retries after a timeout
|
||||
- ``user`` - str: The IMAP user
|
||||
- ``password`` - str: The IMAP password (escape ``%`` with a second ``%``)
|
||||
- ``password`` - str: The IMAP password
|
||||
- ``reports_folder`` - str: The IMAP folder where the incoming reports can be found (Default: INBOX)
|
||||
- ``archive_folder`` - str: The IMAP folder to sort processed emails into (Default: Archive)
|
||||
- ``watch`` - bool: Use the IMAP ``IDLE`` command to process messages as they arrive
|
||||
@@ -168,14 +184,10 @@ The full set of configuration options are:
|
||||
- ``batch_size`` - int: Number of messages to read and process before saving. Defaults to all messages if not set.
|
||||
- ``elasticsearch``
|
||||
- ``hosts`` - str: A comma separated list of hostnames and ports or URLs (e.g. ``127.0.0.1:9200`` or ``https://user:secret@localhost``)
|
||||
|
||||
.. note::
|
||||
Special characters in the username or password must be `URL encoded`_.
|
||||
- ``ssl`` - bool: Use an encrypted SSL/TLS connection (Default: True)
|
||||
- ``user`` - str: Basic auth username
|
||||
- ``password`` - str: Basic auth password
|
||||
- ``cert_path`` - str: Path to a trusted certificates
|
||||
- ``timeout`` - float: Timeout in seconds (Default: 60)
|
||||
- ``index_suffix`` - str: A suffix to apply to the index names
|
||||
- ``monthly_indexes`` - bool: Use monthly indexes instead of daily indexes
|
||||
- ``number_of_shards`` - int: The number of shards to use when creating the index (Default: 1)
|
||||
|
||||
@@ -36,7 +36,7 @@ from parsedmarc.utils import is_outlook_msg, convert_outlook_msg
|
||||
from parsedmarc.utils import timestamp_to_human, human_timestamp_to_datetime
|
||||
from parsedmarc.utils import parse_email
|
||||
|
||||
__version__ = "6.12.0"
|
||||
__version__ = "7.0.0"
|
||||
|
||||
logging.basicConfig(
|
||||
format='%(levelname)8s:%(filename)s:%(lineno)d:'
|
||||
@@ -1274,16 +1274,20 @@ def watch_inbox(host, username, password, callback, port=None, ssl=True,
|
||||
|
||||
|
||||
def save_output(results, output_directory="output",
|
||||
output_json_aggregate="aggregate.json",
|
||||
output_json_forensic="forensic.json",
|
||||
output_csv_aggregate="aggregate.csv",
|
||||
output_csv_forensic="forensic.csv"):
|
||||
aggregate_json_filename="aggregate.json",
|
||||
forensic_json_filename="forensic.json",
|
||||
aggregate_csv_filename="aggregate.csv",
|
||||
forensic_csv_filename="forensic.csv"):
|
||||
"""
|
||||
Save report data in the given directory
|
||||
|
||||
Args:
|
||||
results (OrderedDict): Parsing results
|
||||
output_directory: The patch to the directory to save in
|
||||
output_directory (str): The patch to the directory to save in
|
||||
aggregate_json_filename (str): Output filename for the aggregate JSON report
|
||||
forensic_json_filename (str): Output filename for the forensic JSON report
|
||||
aggregate_csv_filename (str): Output filename for the aggregate CSV report
|
||||
forensic_csv_filename (str): Output filename for the forensic CSV report
|
||||
"""
|
||||
|
||||
aggregate_reports = results["aggregate_reports"]
|
||||
@@ -1297,28 +1301,28 @@ def save_output(results, output_directory="output",
|
||||
|
||||
with open("{0}"
|
||||
.format(os.path.join(output_directory,
|
||||
output_json_aggregate)),
|
||||
aggregate_json_filename)),
|
||||
"w", newline="\n", encoding="utf-8") as agg_json:
|
||||
agg_json.write(json.dumps(aggregate_reports, ensure_ascii=False,
|
||||
indent=2))
|
||||
|
||||
with open("{0}"
|
||||
.format(os.path.join(output_directory,
|
||||
output_csv_aggregate)),
|
||||
aggregate_csv_filename)),
|
||||
"w", newline="\n", encoding="utf-8") as agg_csv:
|
||||
csv = parsed_aggregate_reports_to_csv(aggregate_reports)
|
||||
agg_csv.write(csv)
|
||||
|
||||
with open("{0}"
|
||||
.format(os.path.join(output_directory,
|
||||
output_json_forensic)),
|
||||
forensic_json_filename)),
|
||||
"w", newline="\n", encoding="utf-8") as for_json:
|
||||
for_json.write(json.dumps(forensic_reports, ensure_ascii=False,
|
||||
indent=2))
|
||||
|
||||
with open("{0}"
|
||||
.format(os.path.join(output_directory,
|
||||
output_csv_forensic)),
|
||||
forensic_csv_filename)),
|
||||
"w", newline="\n", encoding="utf-8") as for_csv:
|
||||
csv = parsed_forensic_reports_to_csv(forensic_reports)
|
||||
for_csv.write(csv)
|
||||
|
||||
@@ -178,17 +178,17 @@ def _main():
|
||||
help=strip_attachment_help, action="store_true")
|
||||
arg_parser.add_argument("-o", "--output",
|
||||
help="write output files to the given directory")
|
||||
arg_parser.add_argument("--output-json-aggregate",
|
||||
help="output aggregate JSON file",
|
||||
arg_parser.add_argument("--aggregate-json-filename",
|
||||
help="filename for the aggregate JSON output file",
|
||||
default="aggregate.json")
|
||||
arg_parser.add_argument("--output-json-forensic",
|
||||
help="output forensic JSON file",
|
||||
arg_parser.add_argument("--forensic-json-filename",
|
||||
help="filename for the forensic JSON output file",
|
||||
default="forensic.json")
|
||||
arg_parser.add_argument("--output-csv-aggregate",
|
||||
help="output aggregate CSV file",
|
||||
arg_parser.add_argument("--aggregate-csv-filename",
|
||||
help="filename for the aggregate CSV output file",
|
||||
default="aggregate.csv")
|
||||
arg_parser.add_argument("--output-csv-forensic",
|
||||
help="output forensic CSV file",
|
||||
arg_parser.add_argument("--forensic-csv-filename",
|
||||
help="filename for the forensic CSV output file",
|
||||
default="forensic.csv")
|
||||
arg_parser.add_argument("-n", "--nameservers", nargs="+",
|
||||
help="nameservers to query")
|
||||
@@ -221,10 +221,10 @@ def _main():
|
||||
offline=args.offline,
|
||||
strip_attachment_payloads=args.strip_attachment_payloads,
|
||||
output=args.output,
|
||||
output_json_aggregate=args.output_json_aggregate,
|
||||
output_json_forensic=args.output_json_forensic,
|
||||
output_csv_aggregate=args.output_csv_aggregate,
|
||||
output_csv_forensic=args.output_csv_forensic,
|
||||
aggregate_csv_filename=args.aggregate_csv_filename,
|
||||
aggreate_json_filename=args.aggregate_json_filename,
|
||||
forensic_csv_filename=args.forensic_csv_filename,
|
||||
forensic_json_filename=args.forensic_json_filename,
|
||||
nameservers=args.nameservers,
|
||||
silent=args.silent,
|
||||
dns_timeout=args.dns_timeout,
|
||||
@@ -302,6 +302,14 @@ def _main():
|
||||
"strip_attachment_payloads"]
|
||||
if "output" in general_config:
|
||||
opts.output = general_config["output"]
|
||||
if "aggregate_json_filename" in general_config:
|
||||
opts.aggregate_json_filename = general_config["aggregate_json_filename"]
|
||||
if "forensic_json_filename" in general_config:
|
||||
opts.forensic_json_filename = general_config["forensic_json_filename"]
|
||||
if "aggregate_csv_filename" in general_config:
|
||||
opts.aggregate_csv_filename = general_config["aggregate_csv_filename"]
|
||||
if "forensic_csv_filename" in general_config:
|
||||
opts.forensic_csv_filename = general_config["forensic_csv_filename"]
|
||||
if "nameservers" in general_config:
|
||||
opts.nameservers = _str_to_list(general_config["nameservers"])
|
||||
if "dns_timeout" in general_config:
|
||||
@@ -689,10 +697,10 @@ def _main():
|
||||
|
||||
if opts.output:
|
||||
save_output(results, output_directory=opts.output,
|
||||
output_json_aggregate=opts.output_json_aggregate,
|
||||
output_json_forensic=opts.output_json_forensic,
|
||||
output_csv_aggregate=opts.output_csv_aggregate,
|
||||
output_csv_forensic=opts.output_csv_forensic)
|
||||
aggregate_json_filename=opts.aggregate_json_filename,
|
||||
forensic_json_filename=opts.forensic_json_filename,
|
||||
aggregate_csv_filename=opts.aggregate_csv_filename,
|
||||
forensic_csv_filename=opts.forensic_csv_filename)
|
||||
|
||||
process_reports(results)
|
||||
|
||||
|
||||
@@ -157,7 +157,7 @@ def query_dns(domain, record_type, cache=None, nameservers=None, timeout=2.0):
|
||||
if record_type == "TXT":
|
||||
resource_records = list(map(
|
||||
lambda r: r.strings,
|
||||
resolver.query(domain, record_type, lifetime=timeout)))
|
||||
resolver.resolve(domain, record_type, lifetime=timeout)))
|
||||
_resource_record = [
|
||||
resource_record[0][:0].join(resource_record)
|
||||
for resource_record in resource_records if resource_record]
|
||||
@@ -165,7 +165,7 @@ def query_dns(domain, record_type, cache=None, nameservers=None, timeout=2.0):
|
||||
else:
|
||||
records = list(map(
|
||||
lambda r: r.to_text().replace('"', '').rstrip("."),
|
||||
resolver.query(domain, record_type, lifetime=timeout)))
|
||||
resolver.resolve(domain, record_type, lifetime=timeout)))
|
||||
if cache:
|
||||
cache[cache_key] = records
|
||||
|
||||
|
||||
Reference in New Issue
Block a user