diff --git a/.gitignore b/.gitignore
index d4e54e8..080a1c9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -106,7 +106,7 @@ ENV/
.idea/
# Visual Studio Code settings
-.vscode/
+#.vscode/
# I/O files
output/
diff --git a/.vscode/settings.json b/.vscode/settings.json
new file mode 100644
index 0000000..1305574
--- /dev/null
+++ b/.vscode/settings.json
@@ -0,0 +1,105 @@
+{
+ "markdownlint.config": {
+ "MD024": false
+ },
+
+ "cSpell.words": [
+ "adkim",
+ "andrewmcgilvray",
+ "arcname",
+ "aspf",
+ "autoclass",
+ "automodule",
+ "bellsouth",
+ "brakhane",
+ "Brightmail",
+ "CEST",
+ "CHACHA",
+ "checkdmarc",
+ "dateparser",
+ "Davmail",
+ "DBIP",
+ "devel",
+ "DMARC",
+ "Dmarcian",
+ "dnspython",
+ "exampleuser",
+ "expiringdict",
+ "genindex",
+ "geoipupdate",
+ "Geolite",
+ "geolocation",
+ "githubpages",
+ "Grafana",
+ "hostnames",
+ "htpasswd",
+ "httpasswd",
+ "IMAP",
+ "Interaktive",
+ "IPDB",
+ "journalctl",
+ "keepalive",
+ "keyout",
+ "Leeman",
+ "libemail",
+ "LISTSERV",
+ "lxml",
+ "mailparser",
+ "mailrelay",
+ "mailsuite",
+ "maxdepth",
+ "maxmind",
+ "mbox",
+ "mfrom",
+ "michaeldavie",
+ "mikesiegel",
+ "mitigations",
+ "MMDB",
+ "modindex",
+ "msgconvert",
+ "msgraph",
+ "Munge",
+ "ndjson",
+ "newkey",
+ "nondigest",
+ "nosecureimap",
+ "nosniff",
+ "nwettbewerb",
+ "parsedmarc",
+ "passsword",
+ "Postorius",
+ "premade",
+ "procs",
+ "publicsuffix",
+ "publixsuffix",
+ "pypy",
+ "quickstart",
+ "Reindex",
+ "replyto",
+ "Rollup",
+ "Rpdm",
+ "SAMEORIGIN",
+ "Servernameone",
+ "setuptools",
+ "STARTTLS",
+ "toctree",
+ "TQDDM",
+ "tqdm",
+ "Übersicht",
+ "uids",
+ "uper",
+ "urllib",
+ "Valimail",
+ "venv",
+ "viewcode",
+ "virtualenv",
+ "webmail",
+ "Wettbewerber",
+ "Whalen",
+ "whitespaces",
+ "xennn",
+ "xmltodict",
+ "zscholl"
+ ],
+ "esbonio.sphinx.confDir": "docs"
+}
\ No newline at end of file
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 42a46b5..88de451 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,12 @@
Changelog
=========
+8.3.1
+-----
+
+- Handle unexpected XML parsing errors more gracefully (PR #349)
+- Migrate build from `setuptools` to `hatch`
+
8.3.0
-----
@@ -23,7 +29,7 @@ Changelog
8.1.0
-----
-- Restore compatability with <8.0.0 configuration files (with deprecation warnings)
+- Restore compatibility with <8.0.0 configuration files (with deprecation warnings)
- Set default `reports_folder` to `Inbox` (rather than `INBOX`) when `msgraph` is configured
- Mark a message as read when fetching a message from Microsoft Graph
@@ -53,7 +59,7 @@ Changelog
- Add support for Microsoft/Office 365 via Microsoft Graph API (PR #301 closes issue #111)
- Pin `elasticsearch-dsl` version at `>=7.2.0<7.14.0` (PR #297 closes issue #296)
- Properly initialize `ip_dp_path` (PR #294 closes issue #286)
-- Remove usage of `lgging.basicConfig` (PR #285)
+- Remove usage of `logging.basicConfig` (PR #285)
- Add support for the Gmail API (PR #284 and PR #307 close issue #96)
7.1.1
@@ -83,12 +89,12 @@ Changelog
-----
- Fix issue #221: Crash when handling invalid reports without root node (PR #248)
-- Use UTC datetime objects for Elasticsearch output (PR #245)
+- Use UTC datetime objects for Elasticsearch output (PR #245)
- Fix issues #219, #155, and #103: IMAP connections break on large emails (PR #241)
- Add support for saving reports to S3 buckets (PR #223)
- Pass `offline` parameter to `wait_inbox()` (PR #216)
- Add more details to logging (PR #220)
-- Add options customizing the names of output files (Modifications based on PR #225)
+- Add options customizing the names of output files (Modifications based on PR #225)
- Wait for 5 seconds before attempting to reconnect to an IMAP server (PR #217)
- Add option to process messages in batches (PR #222)
@@ -113,7 +119,6 @@ Changelog
- Fix IMAP debugging output
- Fix `User-Agent` string
-
6.10.0
------
@@ -137,7 +142,7 @@ Changelog
6.8.1
-----
-- Use `match_phrase` instead of `match` when looking for existing strings in Elasticsearch
+- Use `match_phrase` instead of `match` when looking for existing strings in Elasticsearch
6.8.0
-----
@@ -149,10 +154,11 @@ Changelog
6.7.4
-----
-- Update dependencies
+- Update dependencies
6.7.3
-----
+
- Make `dkim_aligned` and `spf_aligned` case insensitive (PR #132)
6.7.2
@@ -164,7 +170,7 @@ Changelog
-----
- Parse forensic email samples with non-standard date headers
-- Graceful handling of a failure to download the GeoIP database (issue #123)
+- Graceful handling of a failure to download the GeoIP database (issue #123)
6.7.0
-----
@@ -222,7 +228,7 @@ Changelog
- Merge PR #98 from michaeldavie
- Add functions
- `parsed_aggregate_reports_to_csv_row(reports)`
- - `parsed_forensic_reports_to_csv_row(reports)`
+ - `parsed_forensic_reports_to_csv_row(reports)`
- Require `dnspython>=1.16.0`
6.5.0
@@ -231,31 +237,30 @@ Changelog
- Move mail processing functions to the
[`mailsuite`](https://seanthegeek.github.io/mailsuite/) package
- Add offline option (closes issue #90)
-- Use UDP instead of TCP, and properly set the timeout when querying DNS
+- Use UDP instead of TCP, and properly set the timeout when querying DNS
(closes issue #79 and #92)
-- Log the current file path being processed when `--debug` is used
+- Log the current file path being processed when `--debug` is used
(closes issue #95)
-
6.4.2
-----
-- Do not attempt to convert `org_name` to a base domain if `org_name` contains
+- Do not attempt to convert `org_name` to a base domain if `org_name` contains
a space (closes issue #94)
- Always lowercase the `header_from`
-- Provide a more helpful warning message when `GeoLite2-Country.mmdb` is
+- Provide a more helpful warning message when `GeoLite2-Country.mmdb` is
missing
6.4.1
-----
-- Raise `utils.DownloadError` exception when a GeoIP database or Public
+- Raise `utils.DownloadError` exception when a GeoIP database or Public
Suffix List (PSL) download fails (closes issue #73)
6.4.0
-----
-- Add ``number_of_shards`` and ``number_of_replicas`` as possible options
+- Add ``number_of_shards`` and ``number_of_replicas`` as possible options
in the ``elasticsearch`` configuration file section (closes issue #78)
6.3.7
@@ -396,7 +401,7 @@ in the ``elasticsearch`` configuration file section (closes issue #78)
-----
- Move options from CLI to a config file (see updated installation documentation)
-- Refactoring to make argument names consistent
+- Refactoring to make argument names consistent
5.3.0
-----
@@ -411,10 +416,12 @@ in the ``elasticsearch`` configuration file section (closes issue #78)
5.2.0
-----
+
- Add filename and line number to logging output
- Improved IMAP error handling
- Add CLI options
- ```
+
+ ```text
--elasticsearch-use-ssl
Use SSL when connecting to Elasticsearch
--elasticsearch-ssl-cert-path ELASTICSEARCH_SSL_CERT_PATH
@@ -447,7 +454,7 @@ in the ``elasticsearch`` configuration file section (closes issue #78)
- Bugfix: Submit aggregate dates to Elasticsearch as lists, not tuples
- Support `elasticsearch-dsl<=6.3.0`
-- Add support for TLS/SSL and username/password auth to Kafka
+- Add support for TLS/SSL and username/password auth to Kafka
5.0.2
-----
@@ -461,20 +468,19 @@ in the ``elasticsearch`` configuration file section (closes issue #78)
- Add Elasticsearch to automated testing
- Lock `elasticsearch-dsl` required version to `6.2.1` (closes issue #25)
-
5.0.0
-----
**Note**: Re-importing `kibana_saved_objects.json` in Kibana [is required](https://domainaware.github.io/parsedmarc/#upgrading-kibana-index-patterns) when upgrading to this version!
-- Bugfix: Reindex the aggregate report index field `published_policy.fo`
+- Bugfix: Reindex the aggregate report index field `published_policy.fo`
as `text` instead of `long` (Closes issue #31)
- Bugfix: IDLE email processing in Gmail/G-Suite accounts (closes issue #33)
- Bugfix: Fix inaccurate DNS timeout in CLI documentation (closes issue #34)
- Bugfix: Forensic report processing via CLI
-- Bugfix: Duplicate aggregate report Elasticsearch query broken
-- Bugfix: Crash when `Arrival-Date` header is missing in a
-forensic/fialure/ruf report
+- Bugfix: Duplicate aggregate report Elasticsearch query broken
+- Bugfix: Crash when `Arrival-Date` header is missing in a
+forensic/failure/ruf report
- IMAP reliability improvements
- Save data in separate indexes each day to make managing data retention easier
- Cache DNS queries in memory
@@ -497,7 +503,6 @@ forensic/fialure/ruf report
- Recreated the `date_range` values from the ES client for easier parsing.
- Started sending individual record slices. Kafka default message size is 1 MB, some aggregate reports were exceeding this. Now it appends meta-data and sends record by record.
-
4.3.8
-----
@@ -510,10 +515,9 @@ and `watch_inbox()`
4.3.7
-----
-- When checking an inbox, always recheck for messages when processing is
+- When checking an inbox, always recheck for messages when processing is
complete
-
4.3.6
-----
@@ -529,7 +533,7 @@ complete
- Fix crash on empty aggregate report comments (brakhane - #25)
- Add SHA256 hashes of attachments to output
-- Add `strip_attachment_payloads` option to functions and
+- Add `strip_attachment_payloads` option to functions and
`--strip-attachment-payloads` option to the CLI (#23)
- Set `urllib3` version requirements to match `requests`
@@ -552,22 +556,22 @@ complete
4.3.0
-----
-- Fix bug where `parsedmarc` would always try to save to Elastic search,
+- Fix bug where `parsedmarc` would always try to save to Elastic search,
even if only `--hec` was used
- Add options to save reports as a Kafka topic (mikesiegel - #21)
- Major refactoring of functions
- Support parsing forensic reports generated by Brightmail
-- Make `sample_headers_only` flag more reliable
-- Functions that might be useful to other projects are now stored in
+- Make `sample_headers_only` flag more reliable
+- Functions that might be useful to other projects are now stored in
`parsedmarc.utils`:
- - `get_base_domain(domain)`
- - `get_filename_safe_string(string)`
- - `get_ip_address_country(ip_address)`
- - `get_ip_address_info(ip_address, nameservers=None, timeout=2.0)`
- - `get_reverse_dns(ip_address, nameservers=None, timeout=2.0)`
- - `human_timestamp_to_datetime(human_timestamp)`
- - `human_timestamp_to_timestamp(human_timestamp)`
- - `parse_email(data)`
+ - `get_base_domain(domain)`
+ - `get_filename_safe_string(string)`
+ - `get_ip_address_country(ip_address)`
+ - `get_ip_address_info(ip_address, nameservers=None, timeout=2.0)`
+ - `get_reverse_dns(ip_address, nameservers=None, timeout=2.0)`
+ - `human_timestamp_to_datetime(human_timestamp)`
+ - `human_timestamp_to_timestamp(human_timestamp)`
+ - `parse_email(data)`
4.2.0
------
@@ -577,11 +581,10 @@ complete
- Suppress Splunk SSL validation warnings
- Change default logging level to `WARNING`
-
4.1.9
-----
-- Workaround for forensic/ruf reports that are missing `Arrival-Date` and/or
+- Workaround for forensic/ruf reports that are missing `Arrival-Date` and/or
`Reported-Domain`
4.1.8
@@ -604,8 +607,8 @@ complete
- Only move or delete IMAP emails after they all have been parsed
- Move/delete messages one at a time - do not exit on error
-- Reconnect to IMAP if connection is broken during
-`get_dmarc_reports_from_inbox()`
+- Reconnect to IMAP if connection is broken during
+`get_dmarc_reports_from_inbox()`
- Add`--imap-port` and `--imap-no-ssl` CLI options
4.1.4
@@ -616,7 +619,7 @@ complete
4.1.3
-----
-- Fix crash introduced in 4.1.0 when creating Elasticsearch indexes (Issue #15)
+- Fix crash introduced in 4.1.0 when creating Elasticsearch indexes (Issue #15)
4.1.2
-----
@@ -636,7 +639,6 @@ complete
- If an aggregate report has the invalid `disposition` value `pass`, change
it to `none`
-
4.0.2
-----
@@ -645,24 +647,24 @@ it to `none`
4.0.1
-----
-- When saving aggregate reports in Elasticsearch store `domain` in
+- When saving aggregate reports in Elasticsearch store `domain` in
`published_policy`
-- Rename `policy_published` to `published_policy`when saving aggregate
+- Rename `policy_published` to `published_policy`when saving aggregate
reports to Splunk
4.0.0
-----
-- Add support for sending DMARC reports to a Splunk HTTP Events
+- Add support for sending DMARC reports to a Splunk HTTP Events
Collector (HEC)
-- Use a browser-like `User-Agent` when downloading the Public Suffix List and
+- Use a browser-like `User-Agent` when downloading the Public Suffix List and
GeoIP DB to avoid being blocked by security proxies
- Reduce default DNS timeout to 2.0 seconds
- Add alignment booleans to JSON output
-- Fix `.msg` parsing CLI exception when `msgconvert` is not found in the
+- Fix `.msg` parsing CLI exception when `msgconvert` is not found in the
system path
- Add `--outgoing-port` and `--outgoing-ssl` options
-- Fall back to plain text SMTP if `--outgoing-ssl` is not used and `STARTTLS`
+- Fall back to plain text SMTP if `--outgoing-ssl` is not used and `STARTTLS`
is not supported by the server
- Always use `\n` as the newline when generating CSVs
- Workaround for random Exchange/Office365 `Server Unavailable` IMAP errors
@@ -680,7 +682,7 @@ is not supported by the server
3.9.5
-----
-- Refactor to use a shared IMAP connection for inbox watching and message
+- Refactor to use a shared IMAP connection for inbox watching and message
downloads
- Gracefully recover from broken pipes in IMAP
@@ -704,17 +706,17 @@ downloads
3.9.1
-----
-- Use `COPY` and delete if an IMAP server does not support `MOVE`
+- Use `COPY` and delete if an IMAP server does not support `MOVE`
(closes issue #9)
3.9.0
-----
-- Reduce IMAP `IDLE` refresh rate to 5 minutes to avoid session timeouts in
+- Reduce IMAP `IDLE` refresh rate to 5 minutes to avoid session timeouts in
Gmail
- Fix parsing of some forensic/failure/ruf reports
- Include email subject in all warning messages
-- Fix example NGINX configuration in the installation documentation
+- Fix example NGINX configuration in the installation documentation
(closes issue #6)
3.8.2
@@ -731,7 +733,7 @@ Gmail
3.8.0
-----
-- Use `.` instead of `/` as the IMAP folder hierarchy separator when `/`
+- Use `.` instead of `/` as the IMAP folder hierarchy separator when `/`
does not work - fixes dovecot support (#5)
- Fix parsing of base64-encoded forensic report data
@@ -755,7 +757,7 @@ does not work - fixes dovecot support (#5)
3.7.0
-----
-- Fix bug where PSL would be called before it was downloaded if the PSL was
+- Fix bug where PSL would be called before it was downloaded if the PSL was
older than 24 Hours
3.6.1
@@ -777,47 +779,51 @@ older than 24 Hours
- Add country rankings to the dashboards
- Fix crash when parsing report with empty
-
3.5.0
-----
+
- Use Cloudflare's public DNS resolvers by default instead of Google's
- Fix installation from virtualenv
- Fix documentation typos
-
3.4.1
-----
+
- Documentation fixes
- Fix console output
3.4.0
-----
+
- Maintain IMAP IDLE state when watching the inbox
- The `-i`/`--idle` CLI option is now `-w`/`--watch`
- Improved Exception handling and documentation
-
3.3.0
-----
-- Fix errors when saving to Elasticsearch
+- Fix errors when saving to Elasticsearch
3.2.0
-----
+
- Fix existing aggregate report error message
3.1.0
-----
-- Fix existing aggregate report query
+- Fix existing aggregate report query
3.0.0
-----
-### New features
+
+New features
+
- Add option to select the IMAP folder where reports are stored
- Add options to send data to Elasticsearch
-### Changes
+Changes
+
- Use Google's public nameservers (`8.8.8.8` and `4.4.4.4`)
by default
- Detect aggregate report email attachments by file content rather than
@@ -827,6 +833,7 @@ file extension
2.1.2
-----
+
- Rename `parsed_dmarc_forensic_reports_to_csv()` to
`parsed_forensic_reports_to_csv()` to match other functions
- Rename `parsed_aggregate_report_to_csv()` to
@@ -835,25 +842,31 @@ file extension
2.1.1
-----
+
- Documentation fixes
2.1.0
-----
+
- Add `get_report_zip()` and `email_results()`
- Add support for sending report emails via the command line
2.0.1
-----
+
- Fix documentation
- Remove Python 2 code
2.0.0
-----
-### New features
+
+New features
+
- Parse forensic reports
- Parse reports from IMAP inbox
-### Changes
+Changes
+
- Drop support for Python 2
- Command line output is always a JSON object containing the lists
`aggregate_reports` and `forensic_reports`
@@ -862,26 +875,31 @@ file extension
1.1.0
-----
-- Add `extract_xml()` and `human_timespamp_to_datetime` methods
+- Add `extract_xml()` and `human_timestamp_to_datetime` methods
1.0.5
-----
+
- Prefix public suffix and GeoIP2 database filenames with `.`
- Properly format errors list in CSV output
1.0.3
-----
+
- Fix documentation formatting
1.0.2
-----
+
- Fix more packaging flaws
1.0.1
-----
+
- Fix packaging flaw
1.0.0
-----
+
- Initial release
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..0b7e237
--- /dev/null
+++ b/README.md
@@ -0,0 +1,41 @@
+# parsedmarc
+
+[](https://github.com/domainaware/parsedmarc/actions/workflows/python-tests.yml)
+[](https://codecov.io/gh/domainaware/parsedmarc)
+[](https://pypi.org/project/parsedmarc/)
+
+[{.align-center}](https://raw.githubusercontent.com/domainaware/parsedmarc/master/docs/_static/screenshots/dmarc-summary-charts.png)
+
+`parsedmarc` is a Python module and CLI utility for parsing DMARC
+reports. When used with Elasticsearch and Kibana (or Splunk), it works
+as a self-hosted open source alternative to commercial DMARC report
+processing services such as Agari Brand Protection, Dmarcian, OnDMARC,
+ProofPoint Email Fraud Defense, and Valimail.
+
+## Help Wanted
+
+This is a project is maintained by one developer. Please consider
+reviewing the open
+[issues](https://github.com/domainaware/parsedmarc/issues) to see how
+you can contribute code, documentation, or user support. Assistance on
+the pinned issues would be particularly helpful.
+
+Thanks to all
+[contributors](https://github.com/domainaware/parsedmarc/graphs/contributors)!
+
+## Features
+
+- Parses draft and 1.0 standard aggregate/rua reports
+- Parses forensic/failure/ruf reports
+- Can parse reports from an inbox over IMAP, Microsoft Graph, or Gmail
+ API
+- Transparently handles gzip or zip compressed reports
+- Consistent data structures
+- Simple JSON and/or CSV output
+- Optionally email the results
+- Optionally send the results to Elasticsearch and/or Splunk, for use
+ with premade dashboards
+- Optionally send reports to Apache Kafka
diff --git a/README.rst b/README.rst
deleted file mode 100644
index 003de1b..0000000
--- a/README.rst
+++ /dev/null
@@ -1,574 +0,0 @@
-==========
-parsedmarc
-==========
-
-|Build Status| |Code Coverage| |PyPI Package|
-
-.. note:: **Help Wanted**
-
- This is a project is maintained by one developer.
- Please consider reviewing the open `issues`_ to see how you can contribute code, documentation, or user support.
- Assistance on the pinned issues would be particularly helpful.
-
- Thanks to all `contributors`_!
-
-.. image:: https://raw.githubusercontent.com/domainaware/parsedmarc/master/docs/_static/screenshots/dmarc-summary-charts.png
- :alt: A screenshot of DMARC summary charts in Kibana
- :align: center
- :scale: 50
- :target: https://raw.githubusercontent.com/domainaware/parsedmarc/master/docs/_static/screenshots/dmarc-summary-charts.png
-
-``parsedmarc`` is a Python module and CLI utility for parsing DMARC reports.
-When used with Elasticsearch and Kibana (or Splunk), it works as a self-hosted
-open source alternative to commercial DMARC report processing services such
-as Agari Brand Protection, Dmarcian, OnDMARC, ProofPoint Email Fraud Defense,
-and Valimail.
-
-Features
-========
-
-* Parses draft and 1.0 standard aggregate/rua reports
-* Parses forensic/failure/ruf reports
-* Can parse reports from an inbox over IMAP, Microsoft Graph, or Gmail API
-* Transparently handles gzip or zip compressed reports
-* Consistent data structures
-* Simple JSON and/or CSV output
-* Optionally email the results
-* Optionally send the results to Elasticsearch and/or Splunk, for use with
- premade dashboards
-* Optionally send reports to Apache Kafka
-
-Resources
-=========
-
-DMARC guides
-------------
-
-* `Demystifying DMARC`_ - A complete guide to SPF, DKIM, and DMARC
-
-SPF and DMARC record validation
--------------------------------
-
-If you are looking for SPF and DMARC record validation and parsing,
-check out the sister project,
-`checkdmarc `_.
-
-Lookalike domains
------------------
-
-DMARC protects against domain spoofing, not lookalike domains. For open source
-lookalike domain monitoring, check out
-`DomainAware `_.
-
-
-CLI help
-========
-
-::
-
- usage: parsedmarc [-h] [-c CONFIG_FILE] [--strip-attachment-payloads] [-o OUTPUT]
- [--aggregate-json-filename AGGREGATE_JSON_FILENAME]
- [--forensic-json-filename FORENSIC_JSON_FILENAME]
- [--aggregate-csv-filename AGGREGATE_CSV_FILENAME]
- [--forensic-csv-filename FORENSIC_CSV_FILENAME]
- [-n NAMESERVERS [NAMESERVERS ...]] [-t DNS_TIMEOUT] [--offline]
- [-s] [--verbose] [--debug] [--log-file LOG_FILE] [-v]
- [file_path ...]
-
- Parses DMARC reports
-
- positional arguments:
- file_path one or more paths to aggregate or forensic report
- files, emails, or mbox files'
-
- optional arguments:
- -h, --help show this help message and exit
- -c CONFIG_FILE, --config-file CONFIG_FILE
- a path to a configuration file (--silent implied)
- --strip-attachment-payloads
- remove attachment payloads from forensic report output
- -o OUTPUT, --output OUTPUT
- write output files to the given directory
- --aggregate-json-filename AGGREGATE_JSON_FILENAME
- filename for the aggregate JSON output file
- --forensic-json-filename FORENSIC_JSON_FILENAME
- filename for the forensic JSON output file
- --aggregate-csv-filename AGGREGATE_CSV_FILENAME
- filename for the aggregate CSV output file
- --forensic-csv-filename FORENSIC_CSV_FILENAME
- filename for the forensic CSV output file
- -n NAMESERVERS [NAMESERVERS ...], --nameservers NAMESERVERS [NAMESERVERS ...]
- nameservers to query
- -t DNS_TIMEOUT, --dns_timeout DNS_TIMEOUT
- number of seconds to wait for an answer from DNS
- (default: 2.0)
- --offline do not make online queries for geolocation or DNS
- -s, --silent only print errors and warnings
- --verbose more verbose output
- --debug print debugging information
- --log-file LOG_FILE output logging to a file
- -v, --version show program's version number and exit
-
-
-.. note::
-
- Starting in ``parsedmarc`` 7.1.1, a static copy of the `IP to Country Lite database`_ from IPDB is
- distributed with ``parsedmarc``, under the terms of the `Creative Commons Attribution 4.0 International License`_. as
- a fallback if the `MaxMind GeoLite2 Country database`_ is not installed However, ``parsedmarc`` cannot install updated
- versions of these databases as they are released, so MaxMind's databases and `geoipupdate`_ tool is still the
- preferable solution.
-
- The location of the database file can be overridden by using the ``ip_db_path`` setting.
-
-.. note::
-
- Starting in ``parsedmarc`` 6.0.0, most CLI options were moved to a configuration file, described below.
-
-Configuration file
-==================
-
-``parsedmarc`` can be configured by supplying the path to an INI file
-
-.. code-block:: bash
-
- parsedmarc -c /etc/parsedmarc.ini
-
-For example
-
-.. code-block:: ini
-
- # This is an example comment
-
- [general]
- save_aggregate = True
- save_forensic = True
-
- [imap]
- host = imap.example.com
- user = dmarcresports@example.com
- password = $uperSecure
-
- [mailbox]
- watch = True
- delete = False
-
- [elasticsearch]
- hosts = 127.0.0.1:9200
- ssl = False
-
- [splunk_hec]
- url = https://splunkhec.example.com
- token = HECTokenGoesHere
- index = email
-
- [s3]
- bucket = my-bucket
- path = parsedmarc
-
- [syslog]
- server = localhost
- port = 514
-
-
-
-The full set of configuration options are:
-
-.. note::
- ``%`` characters must be escaped with another ``%`` character, so use ``%%`` wherever a `%`` character is used, such as a password.
-
-
-- ``general``
- - ``save_aggregate`` - bool: Save aggregate report data to Elasticsearch, Splunk and/or S3
- - ``save_forensic`` - bool: Save forensic report data to Elasticsearch, Splunk and/or S3
- - ``strip_attachment_payloads`` - bool: Remove attachment payloads from results
- - ``output`` - str: Directory to place JSON and CSV files in
- - ``aggregate_json_filename`` - str: filename for the aggregate JSON output file
- - ``forensic_json_filename`` - str: filename for the forensic JSON output file
- - ``ip_db_path`` - str: An optional custom path to a MMDB file from MaxMind or DBIP
- - ``offline`` - bool: Do not use online queries for geolocation or DNS
- - ``nameservers`` - str: A comma separated list of DNS resolvers (Default: `Cloudflare's public resolvers`_)
- - ``dns_timeout`` - float: DNS timeout period
- - ``debug`` - bool: Print debugging messages
- - ``silent`` - bool: Only print errors (Default: True)
- - ``log_file`` - str: Write log messages to a file at this path
- - ``n_procs`` - int: Number of process to run in parallel when parsing in CLI mode (Default: 1)
- - ``chunk_size`` - int: Number of files to give to each process when running in parallel.
-
- .. note::
- Setting this to a number larger than one can improve performance when processing thousands of files
-
-- ``mailbox``
- - ``reports_folder`` - str: The mailbox folder (or label for Gmail) where the incoming reports can be found (Default: INBOX)
- - ``archive_folder`` - str: The mailbox folder (or label for Gmail) to sort processed emails into (Default: Archive)
- - ``watch`` - bool: Use the IMAP ``IDLE`` command to process messages as they arrive or poll MS Graph for new messages
- - ``delete`` - bool: Delete messages after processing them, instead of archiving them
- - ``test`` - bool: Do not move or delete messages
- - ``batch_size`` - int: Number of messages to read and process before saving. Default 10. Use 0 for no limit.
- - ``check_timeout`` - int: Number of seconds to wait for a IMAP IDLE response or the number of seconds until the next mail check (Default: 30)
-
-- ``imap``
- - ``host`` - str: The IMAP server hostname or IP address
- - ``port`` - int: The IMAP server port (Default: 993).
-
- .. note::
- Starting in version 8.0.0, most options from the ``imap`` section have been moved to the ``mailbox`` section.
-
- .. note::
- If your host recommends another port, still try 993
-
- - ``ssl`` - bool: Use an encrypted SSL/TLS connection (Default: True)
- - ``skip_certificate_verification`` - bool: Skip certificate verification (not recommended)
- - ``user`` - str: The IMAP user
- - ``password`` - str: The IMAP password
-
-- ``msgraph``
- - ``auth_method`` - str: Authentication method, valid types are UsernamePassword, DeviceCode, or ClientSecret (Default: UsernamePassword).
- - ``user`` - str: The M365 user, required when the auth method is UsernamePassword
- - ``password`` - str: The user password, required when the auth method is UsernamePassword
- - ``client_id`` - str: The app registration's client ID
- - ``client_secret`` - str: The app registration's secret
- - ``tenant_id`` - str: The Azure AD tenant ID. This is required for all auth methods except UsernamePassword.
- - ``mailbox`` - str: The mailbox name. This defaults to the current user if using the UsernamePassword auth method, but could be a shared mailbox if the user has access to the mailbox
- - ``token_file`` - str: Path to save the token file (Default: .token)
-
- .. note::
- You must create an app registration in Azure AD and have an admin grant the Microsoft Graph ``Mail.ReadWrite`` (delegated) permission to the app.
- If you are using `UsernamePassword` auth and the mailbox is different from the username, you must grant the app ``Mail.ReadWrite.Shared``.
-
- .. warning::
- If you are using the `ClientSecret` auth method, you need to grant the ``Mail.ReadWrite`` (application) permission to the app.
- You must also restrict the application's access to a specific mailbox since it allows all mailboxes by default.
- Use the ``New-ApplicationAccessPolicy`` command in the Exchange PowerShell module.
- If you need to scope the policy to shared mailboxes, you can add them to a mail enabled security group and use that as the group id.
-
- ``New-ApplicationAccessPolicy -AccessRight RestrictAccess -AppId "" -PolicyScopeGroupId "" -Description "Restrict access to dmarc reports mailbox."``
-
-
-- ``elasticsearch``
- - ``hosts`` - str: A comma separated list of hostnames and ports or URLs (e.g. ``127.0.0.1:9200`` or ``https://user:secret@localhost``)
-
- .. note::
- Special characters in the username or password must be `URL encoded`_.
-
- - ``ssl`` - bool: Use an encrypted SSL/TLS connection (Default: True)
- - ``cert_path`` - str: Path to a trusted certificates
- - ``index_suffix`` - str: A suffix to apply to the index names
- - ``monthly_indexes`` - bool: Use monthly indexes instead of daily indexes
- - ``number_of_shards`` - int: The number of shards to use when creating the index (Default: 1)
- - ``number_of_replicas`` - int: The number of replicas to use when creating the index (Default: 1)
-- ``splunk_hec``
- - ``url`` - str: The URL of the Splunk HTTP Events Collector (HEC)
- - ``token`` - str: The HEC token
- - ``index`` - str: The Splunk index to use
- - ``skip_certificate_verification`` - bool: Skip certificate verification (not recommended)
-- ``kafka``
- - ``hosts`` - str: A comma separated list of Kafka hosts
- - ``user`` - str: The Kafka user
- - ``passsword`` - str: The Kafka password
- - ``ssl`` - bool: Use an encrypted SSL/TLS connection (Default: True)
- - ``skip_certificate_verification`` - bool: Skip certificate verification (not recommended)
- - ``aggregate_topic`` - str: The Kafka topic for aggregate reports
- - ``forensic_topic`` - str: The Kafka topic for forensic reports
-- ``smtp``
- - ``host`` - str: The SMTP hostname
- - ``port`` - int: The SMTP port (Default: 25)
- - ``ssl`` - bool: Require SSL/TLS instead of using STARTTLS
- - ``skip_certificate_verification`` - bool: Skip certificate verification (not recommended)
- - ``user`` - str: the SMTP username
- - ``password`` - str: the SMTP password
- - ``from`` - str: The From header to use in the email
- - ``to`` - list: A list of email addresses to send to
- - ``subject`` - str: The Subject header to use in the email (Default: parsedmarc report)
- - ``attachment`` - str: The ZIP attachment filenames
- - ``message`` - str: The email message (Default: Please see the attached parsedmarc report.)
-- ``s3``
- - ``bucket`` - str: The S3 bucket name
- - ``path`` - str: The path to upload reports to (Default: /)
- - ``region_name`` - str: The region name (Optional)
- - ``endpoint_url`` - str: The endpoint URL (Optional)
- - ``access_key_id`` - str: The access key id (Optional)
- - ``secret_access_key`` - str: The secret access key (Optional)
-- ``syslog``
- - ``server`` - str: The Syslog server name or IP address
- - ``port`` - int: The UDP port to use (Default: 514)
-- ``gmail_api``
- - ``credentials_file`` - str: Path to file containing the credentials, None to disable (Default: None)
- - ``token_file`` - str: Path to save the token file (Default: .token)
- - ``include_spam_trash`` - bool: Include messages in Spam and Trash when searching reports (Default: False)
- - ``scopes`` - str: Comma separated list of scopes to use when acquiring credentials (Default: https://www.googleapis.com/auth/gmail.modify)
- - ``oauth2_port`` - int: The TCP port for the local server to listen on for the OAuth2 response (Default: 8080)
-
-.. warning::
-
- It is **strongly recommended** to **not** use the ``nameservers`` setting.
- By default, ``parsedmarc`` uses `Cloudflare's public resolvers`_,
- which are much faster and more reliable than Google, Cisco OpenDNS, or
- even most local resolvers.
-
- The ``nameservers`` option should only be used if your network blocks DNS
- requests to outside resolvers.
-
-.. warning::
-
- ``save_aggregate`` and ``save_forensic`` are separate options because
- you may not want to save forensic reports (also known as failure reports)
- to your Elasticsearch instance, particularly if you are in a
- highly-regulated industry that handles sensitive data, such as healthcare
- or finance. If your legitimate outgoing email fails DMARC, it is possible
- that email may appear later in a forensic report.
-
- Forensic reports contain the original headers of an email that failed a
- DMARC check, and sometimes may also include the full message body,
- depending on the policy of the reporting organization.
-
- Most reporting organizations do not send forensic reports of any kind for
- privacy reasons. While aggregate DMARC reports are sent at least daily,
- it is normal to receive very few forensic reports.
-
- An alternative approach is to still collect forensic/failure/ruf reports
- in your DMARC inbox, but run ``parsedmarc`` with ``save_forensic = True``
- manually on a separate IMAP folder (using the ``reports_folder`` option),
- after you have manually moved known samples you want to save to that
- folder (e.g. malicious samples and non-sensitive legitimate samples).
-
-Docker usage
-============
-
-.. code-block:: bash
-
- docker run -v "${PWD}/config.ini:/config.ini" ghcr.io/domainaware/parsedmarc: -c /config.ini
-
-
-Sample aggregate report output
-==============================
-
-Here are the results from parsing the `example `_
-report from the dmarc.org wiki. It's actually an older draft of the the 1.0
-report schema standardized in
-`RFC 7480 Appendix C `_.
-This draft schema is still in wide use.
-
-``parsedmarc`` produces consistent, normalized output, regardless of the report
-schema.
-
-JSON
-----
-
-.. code-block:: json
-
- {
- "xml_schema": "draft",
- "report_metadata": {
- "org_name": "acme.com",
- "org_email": "noreply-dmarc-support@acme.com",
- "org_extra_contact_info": "http://acme.com/dmarc/support",
- "report_id": "9391651994964116463",
- "begin_date": "2012-04-27 20:00:00",
- "end_date": "2012-04-28 19:59:59",
- "errors": []
- },
- "policy_published": {
- "domain": "example.com",
- "adkim": "r",
- "aspf": "r",
- "p": "none",
- "sp": "none",
- "pct": "100",
- "fo": "0"
- },
- "records": [
- {
- "source": {
- "ip_address": "72.150.241.94",
- "country": "US",
- "reverse_dns": "adsl-72-150-241-94.shv.bellsouth.net",
- "base_domain": "bellsouth.net"
- },
- "count": 2,
- "alignment": {
- "spf": true,
- "dkim": false,
- "dmarc": true
- },
- "policy_evaluated": {
- "disposition": "none",
- "dkim": "fail",
- "spf": "pass",
- "policy_override_reasons": []
- },
- "identifiers": {
- "header_from": "example.com",
- "envelope_from": "example.com",
- "envelope_to": null
- },
- "auth_results": {
- "dkim": [
- {
- "domain": "example.com",
- "selector": "none",
- "result": "fail"
- }
- ],
- "spf": [
- {
- "domain": "example.com",
- "scope": "mfrom",
- "result": "pass"
- }
- ]
- }
- }
- ]
- }
-
-CSV
----
-
-::
-
- xml_schema,org_name,org_email,org_extra_contact_info,report_id,begin_date,end_date,errors,domain,adkim,aspf,p,sp,pct,fo,source_ip_address,source_country,source_reverse_dns,source_base_domain,count,spf_aligned,dkim_aligned,dmarc_aligned,disposition,policy_override_reasons,policy_override_comments,envelope_from,header_from,envelope_to,dkim_domains,dkim_selectors,dkim_results,spf_domains,spf_scopes,spf_results
- draft,acme.com,noreply-dmarc-support@acme.com,http://acme.com/dmarc/support,9391651994964116463,2012-04-27 20:00:00,2012-04-28 19:59:59,,example.com,r,r,none,none,100,0,72.150.241.94,US,adsl-72-150-241-94.shv.bellsouth.net,bellsouth.net,2,True,False,True,none,,,example.com,example.com,,example.com,none,fail,example.com,mfrom,pass
-
-
-Sample forensic report output
-=============================
-
-Thanks to Github user `xennn `_ for the anonymized
-`forensic report email sample
-`_.
-
-JSON
-----
-
-
-.. code-block:: json
-
- {
- "feedback_type": "auth-failure",
- "user_agent": "Lua/1.0",
- "version": "1.0",
- "original_mail_from": "sharepoint@domain.de",
- "original_rcpt_to": "peter.pan@domain.de",
- "arrival_date": "Mon, 01 Oct 2018 11:20:27 +0200",
- "message_id": "<38.E7.30937.BD6E1BB5@ mailrelay.de>",
- "authentication_results": "dmarc=fail (p=none, dis=none) header.from=domain.de",
- "delivery_result": "policy",
- "auth_failure": [
- "dmarc"
- ],
- "reported_domain": "domain.de",
- "arrival_date_utc": "2018-10-01 09:20:27",
- "source": {
- "ip_address": "10.10.10.10",
- "country": null,
- "reverse_dns": null,
- "base_domain": null
- },
- "authentication_mechanisms": [],
- "original_envelope_id": null,
- "dkim_domain": null,
- "sample_headers_only": false,
- "sample": "Received: from Servernameone.domain.local (Servernameone.domain.local [10.10.10.10])\n\tby mailrelay.de (mail.DOMAIN.de) with SMTP id 38.E7.30937.BD6E1BB5; Mon, 1 Oct 2018 11:20:27 +0200 (CEST)\nDate: 01 Oct 2018 11:20:27 +0200\nMessage-ID: <38.E7.30937.BD6E1BB5@ mailrelay.de>\nTo: \nfrom: \"=?utf-8?B?SW50ZXJha3RpdmUgV2V0dGJld2VyYmVyLcOcYmVyc2ljaHQ=?=\" \nSubject: Subject\nMIME-Version: 1.0\nX-Mailer: Microsoft SharePoint Foundation 2010\nContent-Type: text/html; charset=utf-8\nContent-Transfer-Encoding: quoted-printable\n\n\n",
- "parsed_sample": {
- "from": {
- "display_name": "Interaktive Wettbewerber-Übersicht",
- "address": "sharepoint@domain.de",
- "local": "sharepoint",
- "domain": "domain.de"
- },
- "to_domains": [
- "domain.de"
- ],
- "to": [
- {
- "display_name": null,
- "address": "peter.pan@domain.de",
- "local": "peter.pan",
- "domain": "domain.de"
- }
- ],
- "subject": "Subject",
- "timezone": "+2",
- "mime-version": "1.0",
- "date": "2018-10-01 09:20:27",
- "content-type": "text/html; charset=utf-8",
- "x-mailer": "Microsoft SharePoint Foundation 2010",
- "body": "",
- "received": [
- {
- "from": "Servernameone.domain.local Servernameone.domain.local 10.10.10.10",
- "by": "mailrelay.de mail.DOMAIN.de",
- "with": "SMTP id 38.E7.30937.BD6E1BB5",
- "date": "Mon, 1 Oct 2018 11:20:27 +0200 CEST",
- "hop": 1,
- "date_utc": "2018-10-01 09:20:27",
- "delay": 0
- }
- ],
- "content-transfer-encoding": "quoted-printable",
- "message-id": "<38.E7.30937.BD6E1BB5@ mailrelay.de>",
- "has_defects": false,
- "headers": {
- "Received": "from Servernameone.domain.local (Servernameone.domain.local [10.10.10.10])\n\tby mailrelay.de (mail.DOMAIN.de) with SMTP id 38.E7.30937.BD6E1BB5; Mon, 1 Oct 2018 11:20:27 +0200 (CEST)",
- "Date": "01 Oct 2018 11:20:27 +0200",
- "Message-ID": "<38.E7.30937.BD6E1BB5@ mailrelay.de>",
- "To": "",
- "from": "\"Interaktive Wettbewerber-Übersicht\" ",
- "Subject": "Subject",
- "MIME-Version": "1.0",
- "X-Mailer": "Microsoft SharePoint Foundation 2010",
- "Content-Type": "text/html; charset=utf-8",
- "Content-Transfer-Encoding": "quoted-printable"
- },
- "reply_to": [],
- "cc": [],
- "bcc": [],
- "attachments": [],
- "filename_safe_subject": "Subject"
- }
- }
-
-
-
-CSV
----
-
-::
-
- feedback_type,user_agent,version,original_envelope_id,original_mail_from,original_rcpt_to,arrival_date,arrival_date_utc,subject,message_id,authentication_results,dkim_domain,source_ip_address,source_country,source_reverse_dns,source_base_domain,delivery_result,auth_failure,reported_domain,authentication_mechanisms,sample_headers_only
- auth-failure,Lua/1.0,1.0,,sharepoint@domain.de,peter.pan@domain.de,"Mon, 01 Oct 2018 11:20:27 +0200",2018-10-01 09:20:27,Subject,<38.E7.30937.BD6E1BB5@ mailrelay.de>,"dmarc=fail (p=none, dis=none) header.from=domain.de",,10.10.10.10,,,,policy,dmarc,domain.de,,False
-
-Bug reports
-===========
-
-Please report bugs on the GitHub issue tracker
-
-https://github.com/domainaware/parsedmarc/issues
-
-.. |Build Status| image:: https://github.com/domainaware/parsedmarc/actions/workflows/python-tests.yml/badge.svg
- :target: https://github.com/domainaware/parsedmarc/actions/workflows/python-tests.yml
-
-.. |Code Coverage| image:: https://codecov.io/gh/domainaware/parsedmarc/branch/master/graph/badge.svg
- :target: https://codecov.io/gh/domainaware/parsedmarc
-
-.. |PyPI Package| image:: https://img.shields.io/pypi/v/parsedmarc.svg
- :target: https://pypi.org/project/parsedmarc/
-
-.. _issues: https://github.com/domainaware/parsedmarc/issues
-
-.. _contributors: https://github.com/domainaware/parsedmarc/graphs/contributors
-
-.. _Demystifying DMARC: https://seanthegeek.net/459/demystifying-dmarc/
-
-.. _IP to Country Lite database: https://db-ip.com/db/download/ip-to-country-lite
-
-.. _Creative Commons Attribution 4.0 International License: https://creativecommons.org/licenses/by/4.0/
-
-.. _MaxMind GeoLite2 Country database: https://dev.maxmind.com/geoip/geolite2-free-geolocation-data
-
-.. _geoipupdate: https://github.com/maxmind/geoipupdate
-
-.. _Cloudflare's public resolvers: https://1.1.1.1/
-
-.. _URL encoded: https://en.wikipedia.org/wiki/Percent-encoding#Percent-encoding_reserved_characters
-
diff --git a/docs/conf.py b/docs/conf.py
index 049dd16..f24849a 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -1,17 +1,10 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
+# Configuration file for the Sphinx documentation builder.
#
-# parsedmarc documentation build configuration file, created by
-# sphinx-quickstart on Mon Feb 5 18:25:39 2018.
-#
-# This file is execfile()d with the current directory set to its
-# containing dir.
-#
-# Note that not all possible configuration values are present in this
-# autogenerated file.
-#
-# All configuration values have a default; values that are commented out
-# serve to show the default.
+# This file only contains a selection of the most common options. For a full
+# list see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+
+# -- Path setup --------------------------------------------------------------
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
@@ -19,41 +12,12 @@
#
import os
import sys
-
-sys.path.insert(0, os.path.abspath('..'))
+sys.path.insert(0, os.path.abspath(os.path.join("..", "..")))
from parsedmarc import __version__
+# -- Project information -----------------------------------------------------
-# -- General configuration ------------------------------------------------
-
-# If your documentation needs a minimal Sphinx version, state it here.
-#
-# needs_sphinx = '1.0'
-
-# Add any Sphinx extension module names here, as strings. They can be
-# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
-# ones.
-extensions = ['sphinx.ext.autodoc',
- 'sphinx.ext.doctest',
- 'sphinx.ext.todo',
- 'sphinx.ext.viewcode',
- 'sphinx.ext.githubpages',
- 'sphinx.ext.napoleon']
-
-# Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
-
-# The suffix(es) of source filenames.
-# You can specify multiple suffix as a list of string:
-#
-# source_suffix = ['.rst', '.md']
-source_suffix = '.rst'
-
-# The master toctree document.
-master_doc = 'index'
-
-# General information about the project.
project = 'parsedmarc'
copyright = '2018, Sean Whalen'
author = 'Sean Whalen'
@@ -67,113 +31,43 @@ version = __version__
# The full version, including alpha/beta/rc tags.
release = version
-# The language for content autogenerated by Sphinx. Refer to documentation
-# for a list of supported languages.
-#
-# This is also used if you do content translation via gettext catalogs.
-# Usually you set "language" from the command line for these cases.
-language = "en"
+# -- General configuration ---------------------------------------------------
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = ['sphinx.ext.autodoc',
+ 'sphinx.ext.doctest',
+ 'sphinx.ext.todo',
+ 'sphinx.ext.viewcode',
+ 'sphinx.ext.githubpages',
+ 'sphinx.ext.napoleon',
+ 'myst_parser']
+
+myst_heading_anchors = 3
+autoclass_content = "init"
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+
+# The suffixes of source filenames.
+source_suffix = [".rst", ".md"]
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
-# This patterns also effect to html_static_path and html_extra_path
-exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
-
-# The name of the Pygments (syntax highlighting) style to use.
-pygments_style = 'sphinx'
-
-todo_include_todos = False
+# This pattern also affects html_static_path and html_extra_path.
+exclude_patterns = []
-# -- Options for HTML output ----------------------------------------------
+# -- Options for HTML output -------------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
html_theme = 'sphinx_rtd_theme'
-# Theme options are theme-specific and customize the look and feel of a theme
-# further. For a list of options available for each theme, see the
-# documentation.
-#
-# html_theme_options = {}
-
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
-
-# Custom sidebar templates, must be a dictionary that maps document names
-# to template names.
-#
-# This is required for the alabaster theme
-# refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars
-html_sidebars = {
- '**': [
- 'about.html',
- 'navigation.html',
- 'relations.html', # needs 'show_related': True theme option to display
- 'searchbox.html',
- 'donate.html',
- ]
-}
-
-
-# -- Options for HTMLHelp output ------------------------------------------
-
-# Output file base name for HTML help builder.
-htmlhelp_basename = 'parsedmarcdoc'
-
-
-# -- Options for LaTeX output ---------------------------------------------
-
-latex_elements = {
- # The paper size ('letterpaper' or 'a4paper').
- #
- # 'papersize': 'letterpaper',
-
- # The font size ('10pt', '11pt' or '12pt').
- #
- # 'pointsize': '10pt',
-
- # Additional stuff for the LaTeX preamble.
- #
- # 'preamble': '',
-
- # Latex figure (float) alignment
- #
- # 'figure_align': 'htbp',
-}
-
-# Grouping the document tree into LaTeX files. List of tuples
-# (source start file, target name, title,
-# author, documentclass [howto, manual, or own class]).
-latex_documents = [
- (master_doc, 'parsedmarc.tex', 'parsedmarc Documentation',
- 'parsedmarc', 'manual'),
-]
-
-
-# -- Options for manual page output ---------------------------------------
-
-# One entry per manual page. List of tuples
-# (source start file, name, description, authors, manual section).
-man_pages = [
- (master_doc, 'parsedmarc', 'parsedmarc Documentation',
- [author], 1)
-]
-
-
-# -- Options for Texinfo output -------------------------------------------
-
-# Grouping the document tree into Texinfo files. List of tuples
-# (source start file, target name, title, author,
-# dir menu entry, description, category)
-texinfo_documents = [
- (master_doc, 'parsedmarc', 'parsedmarc Documentation',
- author, 'parsedmarc', 'One line description of project.',
- 'Miscellaneous'),
-]
-
-
-
diff --git a/docs/index.rst b/docs/index.rst
index ef63378..88fced3 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -69,7 +69,7 @@ lookalike domain monitoring, check out `DomainAware " -PolicyScopeGroupId "" -Description "Restrict access to dmarc reports mailbox."``
+ .. code-block:: powershell
+
+ New-ApplicationAccessPolicy -AccessRight RestrictAccess
+ -AppId "" -PolicyScopeGroupId ""
+ -Description "Restrict access to dmarc reports mailbox."
- ``elasticsearch``
- - ``hosts`` - str: A comma separated list of hostnames and ports or URLs (e.g. ``127.0.0.1:9200`` or ``https://user:secret@localhost``)
+ - ``hosts`` - str: A comma separated list of hostnames and ports
+ or URLs (e.g. ``127.0.0.1:9200`` or
+ ``https://user:secret@localhost``)
.. note::
- Special characters in the username or password must be `URL encoded`_.
+ Special characters in the username or password must be
+ `URL encoded`_.
- ``ssl`` - bool: Use an encrypted SSL/TLS connection (Default: True)
- ``cert_path`` - str: Path to a trusted certificates
@@ -255,30 +300,36 @@ The full set of configuration options are:
- ``url`` - str: The URL of the Splunk HTTP Events Collector (HEC)
- ``token`` - str: The HEC token
- ``index`` - str: The Splunk index to use
- - ``skip_certificate_verification`` - bool: Skip certificate verification (not recommended)
+ - ``skip_certificate_verification`` - bool: Skip certificate
+ verification (not recommended)
- ``kafka``
- ``hosts`` - str: A comma separated list of Kafka hosts
- ``user`` - str: The Kafka user
- ``passsword`` - str: The Kafka password
- ``ssl`` - bool: Use an encrypted SSL/TLS connection (Default: True)
- - ``skip_certificate_verification`` - bool: Skip certificate verification (not recommended)
+ - ``skip_certificate_verification`` - bool: Skip certificate
+ verification (not recommended)
- ``aggregate_topic`` - str: The Kafka topic for aggregate reports
- ``forensic_topic`` - str: The Kafka topic for forensic reports
- ``smtp``
- ``host`` - str: The SMTP hostname
- ``port`` - int: The SMTP port (Default: 25)
- ``ssl`` - bool: Require SSL/TLS instead of using STARTTLS
- - ``skip_certificate_verification`` - bool: Skip certificate verification (not recommended)
+ - ``skip_certificate_verification`` - bool: Skip certificate
+ verification (not recommended)
- ``user`` - str: the SMTP username
- ``password`` - str: the SMTP password
- ``from`` - str: The From header to use in the email
- ``to`` - list: A list of email addresses to send to
- - ``subject`` - str: The Subject header to use in the email (Default: parsedmarc report)
+ - ``subject`` - str: The Subject header to use in the email
+ (Default: parsedmarc report)
- ``attachment`` - str: The ZIP attachment filenames
- - ``message`` - str: The email message (Default: Please see the attached parsedmarc report.)
+ - ``message`` - str: The email message
+ (Default: Please see the attached parsedmarc report.)
.. note::
- ``%`` characters must be escaped with another ``%`` character, so use ``%%`` wherever a ``%`` character is used.
+ ``%`` characters must be escaped with another ``%`` character,
+ so use ``%%`` wherever a ``%`` character is used.
- ``s3``
- ``bucket`` - str: The S3 bucket name
@@ -291,44 +342,52 @@ The full set of configuration options are:
- ``server`` - str: The Syslog server name or IP address
- ``port`` - int: The UDP port to use (Default: 514)
- ``gmail_api``
- - ``credentials_file`` - str: Path to file containing the credentials, None to disable (Default: None)
- - ``token_file`` - str: Path to save the token file (Default: .token)
- - ``include_spam_trash`` - bool: Include messages in Spam and Trash when searching reports (Default: False)
- - ``scopes`` - str: Comma separated list of scopes to use when acquiring credentials (Default: https://www.googleapis.com/auth/gmail.modify)
- - ``oauth2_port`` - int: The TCP port for the local server to listen on for the OAuth2 response (Default: 8080)
+ - ``credentials_file`` - str: Path to file containing the
+ credentials, None to disable (Default: None)
+ - ``token_file`` - str: Path to save the token file
+ (Default: .token)
+ - ``include_spam_trash`` - bool: Include messages in Spam and
+ Trash when searching reports (Default: False)
+ - ``scopes`` - str: Comma separated list of scopes to use when
+ acquiring credentials (Default: https://www.googleapis.com/auth/gmail.modify)
+ - ``oauth2_port`` - int: The TCP port for the local server to
+ listen on for the OAuth2 response (Default: 8080)
.. warning::
- It is **strongly recommended** to **not** use the ``nameservers`` setting.
- By default, ``parsedmarc`` uses `Cloudflare's public resolvers`_,
- which are much faster and more reliable than Google, Cisco OpenDNS, or
- even most local resolvers.
+ It is **strongly recommended** to **not** use the ``nameservers``
+ setting. By default, ``parsedmarc`` uses
+ `Cloudflare's public resolvers`_, which are much faster and more
+ reliable than Google, Cisco OpenDNS, or even most local resolvers.
- The ``nameservers`` option should only be used if your network blocks DNS
- requests to outside resolvers.
+ The ``nameservers`` option should only be used if your network
+ blocks DNS requests to outside resolvers.
.. warning::
- ``save_aggregate`` and ``save_forensic`` are separate options because
- you may not want to save forensic reports (also known as failure reports)
- to your Elasticsearch instance, particularly if you are in a
- highly-regulated industry that handles sensitive data, such as healthcare
- or finance. If your legitimate outgoing email fails DMARC, it is possible
+ ``save_aggregate`` and ``save_forensic`` are separate options
+ because you may not want to save forensic reports
+ (also known as failure reports) to your Elasticsearch instance,
+ particularly if you are in a highly-regulated industry that
+ handles sensitive data, such as healthcare or finance. If your
+ legitimate outgoing email fails DMARC, it is possible
that email may appear later in a forensic report.
- Forensic reports contain the original headers of an email that failed a
- DMARC check, and sometimes may also include the full message body,
- depending on the policy of the reporting organization.
+ Forensic reports contain the original headers of an email that
+ failed a DMARC check, and sometimes may also include the
+ full message body, depending on the policy of the reporting
+ organization.
- Most reporting organizations do not send forensic reports of any kind for
- privacy reasons. While aggregate DMARC reports are sent at least daily,
- it is normal to receive very few forensic reports.
+ Most reporting organizations do not send forensic reports of any
+ kind for privacy reasons. While aggregate DMARC reports are sent
+ at least daily, it is normal to receive very few forensic reports.
- An alternative approach is to still collect forensic/failure/ruf reports
- in your DMARC inbox, but run ``parsedmarc`` with ``save_forensic = True``
- manually on a separate IMAP folder (using the ``reports_folder`` option),
- after you have manually moved known samples you want to save to that
- folder (e.g. malicious samples and non-sensitive legitimate samples).
+ An alternative approach is to still collect forensic/failure/ruf
+ reports in your DMARC inbox, but run ``parsedmarc`` with
+ ``save_forensic = True``manually on a separate IMAP folder (using
+ the ``reports_folder`` option), after you have manually moved
+ known samples you want to save to that folder
+ (e.g. malicious samples and non-sensitive legitimate samples).
Sample aggregate report output
@@ -340,8 +399,8 @@ report schema standardized in
`RFC 7480 Appendix C `_.
This draft schema is still in wide use.
-``parsedmarc`` produces consistent, normalized output, regardless of the report
-schema.
+``parsedmarc`` produces consistent, normalized output, regardless
+of the report schema.
JSON
----
@@ -416,7 +475,7 @@ JSON
CSV
---
-::
+.. code-block:: text
xml_schema,org_name,org_email,org_extra_contact_info,report_id,begin_date,end_date,errors,domain,adkim,aspf,p,sp,pct,fo,source_ip_address,source_country,source_reverse_dns,source_base_domain,count,spf_aligned,dkim_aligned,dmarc_aligned,disposition,policy_override_reasons,policy_override_comments,envelope_from,header_from,envelope_to,dkim_domains,dkim_selectors,dkim_results,spf_domains,spf_scopes,spf_results
draft,acme.com,noreply-dmarc-support@acme.com,http://acme.com/dmarc/support,9391651994964116463,2012-04-27 20:00:00,2012-04-28 19:59:59,,example.com,r,r,none,none,100,0,72.150.241.94,US,adsl-72-150-241-94.shv.bellsouth.net,bellsouth.net,2,True,False,True,none,,,example.com,example.com,,example.com,none,fail,example.com,mfrom,pass
@@ -524,7 +583,7 @@ JSON
CSV
---
-::
+.. code-block:: text
feedback_type,user_agent,version,original_envelope_id,original_mail_from,original_rcpt_to,arrival_date,arrival_date_utc,subject,message_id,authentication_results,dkim_domain,source_ip_address,source_country,source_reverse_dns,source_base_domain,delivery_result,auth_failure,reported_domain,authentication_mechanisms,sample_headers_only
auth-failure,Lua/1.0,1.0,,sharepoint@domain.de,peter.pan@domain.de,"Mon, 01 Oct 2018 11:20:27 +0200",2018-10-01 09:20:27,Subject,<38.E7.30937.BD6E1BB5@ mailrelay.de>,"dmarc=fail (p=none, dis=none) header.from=domain.de",,10.10.10.10,,,,policy,dmarc,domain.de,,False
@@ -547,7 +606,7 @@ Installation
to use that proxy. To do this, edit ``/etc/environment`` and add your
proxy details there, for example:
- ::
+ .. code-block:: bash
http_proxy=http://user:password@prox-server:3128
https_proxy=https://user:password@prox-server:3128
@@ -555,7 +614,7 @@ Installation
Or if no credentials are needed:
- ::
+ .. code-block:: bash
http_proxy=http://prox-server:3128
https_proxy=https://prox-server:3128
@@ -607,14 +666,17 @@ On CentOS or RHEL systems, run:
sudo dnf install -y geoipupdate
-The latest builds for Linux, macOS, and Windows can be downloaded from the `geoipupdate releases page on GitHub`_.
+The latest builds for Linux, macOS, and Windows can be downloaded
+from the `geoipupdate releases page on GitHub`_.
-On December 30th, 2019, MaxMind started requiring free accounts to access the free Geolite2 databases, in order `to
+On December 30th, 2019, MaxMind started requiring free accounts to
+access the free Geolite2 databases, in order `to
comply with various privacy regulations`_.
Start by `registering for a free GeoLite2 account`_, and signing in.
-Then, navigate the to the `License Keys`_ page under your account, and create a new license key for the version of
+Then, navigate the to the `License Keys`_ page under your account,
+and create a new license key for the version of
``geoipupdate`` that was installed.
.. warning::
@@ -632,9 +694,11 @@ Then, navigate the to the `License Keys`_ page under your account, and create a
You can use ``parsedmarc`` as the description for the key.
-Once you have generated a key, download the config pre-filled configuration file.
-This file should be saved at ``/etc/GeoIP.conf`` on Linux or macOS systems, or at
-``%SystemDrive%\ProgramData\MaxMind\GeoIPUpdate\GeoIP.conf`` on Windows systems.
+Once you have generated a key, download the config pre-filled
+configuration file. This file should be saved at ``/etc/GeoIP.conf``
+on Linux or macOS systems, or at
+``%SystemDrive%\ProgramData\MaxMind\GeoIPUpdate\GeoIP.conf`` on
+Windows systems.
Then run
@@ -644,10 +708,12 @@ Then run
To download the databases for the first time.
-The GeoLite2 Country, City, and ASN databases are updated weekly, every Tuesday.
-``geoipupdate`` can be run weekly by adding a cron job or scheduled task.
+The GeoLite2 Country, City, and ASN databases are updated weekly,
+every Tuesday. ``geoipupdate`` can be run weekly by adding a cron
+job or scheduled task.
-More information about ``geoipupdate`` can be found at the `MaxMind geoipupdate page`_.
+More information about ``geoipupdate`` can be found at the
+`MaxMind geoipupdate page`_.
Installing parsedmarc
---------------------
@@ -684,7 +750,8 @@ Install parsedmarc in a virtualenv
sudo -u parsedmarc virtualenv /opt/parsedmarc/venv
-CentOS/RHEL 8 systems use Python 3.6 by default, so on those systems explicitly tell ``virtualenv`` to use ``python3.9`` instead
+CentOS/RHEL 8 systems use Python 3.6 by default, so on those systems
+explicitly tell ``virtualenv`` to use ``python3.9`` instead
.. code-block:: bash
@@ -724,7 +791,7 @@ Accessing an inbox using OWA/EWS
Starting in 8.0.0, parsedmarc supports accessing Microsoft/Office 365
inboxes via the Microsoft Graph API, which is preferred over Davmail.
-Some organisations do not allow IMAP or the Microsoft Graph API,
+Some organizations do not allow IMAP or the Microsoft Graph API,
and only support Exchange Web Services (EWS)/Outlook Web Access (OWA).
In that case, Davmail will need to be set up
as a local EWS/OWA IMAP gateway. It can even work where
@@ -966,7 +1033,8 @@ For CentOS, RHEL, and other RPM systems, follow the Elastic RPM guides for
sudo service elasticsearch start
sudo service kibana start
-Without the commercial X-Pack_ or ReadonlyREST_ products, Kibana does not have any authentication
+Without the commercial X-Pack_ or ReadonlyREST_ products, Kibana
+does not have any authentication
mechanism of its own. You can use nginx as a reverse proxy that provides basic
authentication.
@@ -1301,7 +1369,7 @@ is using a particular service. With that information, you can contact them and
have them set up DKIM.
.. note::
-
+
If you have a lot of B2C customers, you may see a high volume of emails as
your domains coming from consumer email services, such as Google/Gmail and
Yahoo! This occurs when customers have mailbox rules in place that forward
@@ -1386,24 +1454,26 @@ What if a sender won't support DKIM/DMARC?
.. warning ::
Do not alter the ``p`` or ``sp`` values of the DMARC record on the
- Top-Level Domain (TLD) – that would leave you vulnerable to spoofing of
- your TLD and/or any subdomain.
+ Top-Level Domain (TLD) – that would leave you vulnerable to
+ spoofing of your TLD and/or any subdomain.
What about mailing lists?
=========================
-When you deploy DMARC on your domain, you might find that messages relayed by
-mailing lists are failing DMARC, most likely because the mailing list is
-spoofing your from address, and modifying the subject, footer, or other part
-of the message, thereby breaking the DKIM signature.
+When you deploy DMARC on your domain, you might find that messages
+relayed by mailing lists are failing DMARC, most likely because the mailing
+list is spoofing your from address, and modifying the subject,
+footer, or other part of the message, thereby breaking the
+DKIM signature.
Mailing list list best practices
--------------------------------
-Ideally, a mailing list should forward messages without altering the headers
-or body content at all. `Joe Nelson`_ does a fantastic job of explaining exactly
-what mailing lists should and shouldn't do to be fully DMARC compliant.
-Rather than repeat his fine work, here's a summary:
+Ideally, a mailing list should forward messages without altering the
+headers or body content at all. `Joe Nelson`_ does a fantastic job of
+explaining exactly what mailing lists should and shouldn't do to be
+fully DMARC compliant. Rather than repeat his fine work, here's a
+summary:
**Do**
@@ -1473,7 +1543,7 @@ Navigate to Privacy Options> Sending Filters, and configure the settings below
====================================== ==========
**Setting** **Value**
**dmarc_moderation_action** Accept
-**dmarc_quarentine_moderation_action** Yes
+**dmarc_quarantine_moderation_action** Yes
**dmarc_none_moderation_action** Yes
====================================== ==========
@@ -1495,7 +1565,7 @@ Configure the settings below
**Include RFC2369 headers** Yes
**Include the list post header** Yes
**Explicit reply-to address**
-**First strip replyo** No
+**First strip replyto** No
**Reply goes to list** No munging
====================================== ==========
@@ -1541,7 +1611,7 @@ Navigate to Privacy Options> Sending Filters, and configure the settings below
====================================== ==========
**Setting** **Value**
**dmarc_moderation_action** Munge From
-**dmarc_quarentine_moderation_action** Yes
+**dmarc_quarantine_moderation_action** Yes
**dmarc_none_moderation_action** Yes
====================================== ==========
diff --git a/parsedmarc/__init__.py b/parsedmarc/__init__.py
index 5942109..ee87b33 100644
--- a/parsedmarc/__init__.py
+++ b/parsedmarc/__init__.py
@@ -34,7 +34,7 @@ from parsedmarc.utils import is_outlook_msg, convert_outlook_msg
from parsedmarc.utils import parse_email
from parsedmarc.utils import timestamp_to_human, human_timestamp_to_datetime
-__version__ = "8.3.0"
+__version__ = "8.3.1"
logger.debug("parsedmarc v{0}".format(__version__))
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..73a8afa
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,69 @@
+[build-system]
+requires = [
+ "hatchling>=1.8.1",
+]
+build-backend = "hatchling.build"
+
+[project]
+name = "parsedmarc"
+dynamic = [
+ "version",
+]
+description = "A Python package and CLI for parsing aggregate and forensic DMARC reports"
+readme = "README.md"
+license = "Apache 2.0"
+authors = [
+ { name = "Sean Whalen", email = "whalenster@gmail.com" },
+]
+keywords = [
+ "DMARC",
+ "parser",
+ "reporting",
+]
+classifiers = [
+ "Development Status :: 5 - Production/Stable",
+ "Intended Audience :: Developers",
+ "Intended Audience :: Information Technology",
+ "License :: OSI Approved :: Apache Software License",
+ "Operating System :: OS Independent",
+ "Programming Language :: Python :: 3"
+]
+dependencies = [
+ "azure-identity>=1.8.0",
+ "boto3>=1.16.63",
+ "dateparser>=1.1.1",
+ "dnspython>=2.0.0",
+ "elasticsearch-dsl==7.4.0",
+ "elasticsearch<7.14.0",
+ "expiringdict>=1.1.4",
+ "geoip2>=3.0.0",
+ "google-api-core>=2.4.0",
+ "google-api-python-client>=2.35.0",
+ "google-auth-httplib2>=0.1.0",
+ "google-auth-oauthlib>=0.4.6",
+ "google-auth>=2.3.3",
+ "imapclient>=2.1.0",
+ "kafka-python>=1.4.4",
+ "lxml>=4.4.0",
+ "mailsuite>=1.6.1",
+ "msgraph-core>=0.2.2",
+ "publicsuffix2>=2.20190812",
+ "requests>=2.22.0",
+ "tqdm>=4.31.1",
+ "urllib3>=1.25.7",
+ "xmltodict>=0.12.0",
+]
+
+[project.scripts]
+parsedmarc = "parsedmarc.cli:_main"
+
+[project.urls]
+Homepage = "https://domainaware.github.io/parsedmarc"
+
+[tool.hatch.version]
+path = "parsedmarc/__init__.py"
+
+[tool.hatch.build.targets.sdist]
+include = [
+ "/parsedmarc",
+]
diff --git a/requirements.txt b/requirements.txt
index 13864f5..fa27eea 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -37,3 +37,5 @@ google-api-python-client>=2.35.0
google-auth>=2.3.3
google-auth-httplib2>=0.1.0
google-auth-oauthlib>=0.4.6
+hatch>=1.5.0
+myst-parser>=0.18.0
\ No newline at end of file