diff --git a/.gitignore b/.gitignore index d4e54e8..080a1c9 100644 --- a/.gitignore +++ b/.gitignore @@ -106,7 +106,7 @@ ENV/ .idea/ # Visual Studio Code settings -.vscode/ +#.vscode/ # I/O files output/ diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..1305574 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,105 @@ +{ + "markdownlint.config": { + "MD024": false + }, + + "cSpell.words": [ + "adkim", + "andrewmcgilvray", + "arcname", + "aspf", + "autoclass", + "automodule", + "bellsouth", + "brakhane", + "Brightmail", + "CEST", + "CHACHA", + "checkdmarc", + "dateparser", + "Davmail", + "DBIP", + "devel", + "DMARC", + "Dmarcian", + "dnspython", + "exampleuser", + "expiringdict", + "genindex", + "geoipupdate", + "Geolite", + "geolocation", + "githubpages", + "Grafana", + "hostnames", + "htpasswd", + "httpasswd", + "IMAP", + "Interaktive", + "IPDB", + "journalctl", + "keepalive", + "keyout", + "Leeman", + "libemail", + "LISTSERV", + "lxml", + "mailparser", + "mailrelay", + "mailsuite", + "maxdepth", + "maxmind", + "mbox", + "mfrom", + "michaeldavie", + "mikesiegel", + "mitigations", + "MMDB", + "modindex", + "msgconvert", + "msgraph", + "Munge", + "ndjson", + "newkey", + "nondigest", + "nosecureimap", + "nosniff", + "nwettbewerb", + "parsedmarc", + "passsword", + "Postorius", + "premade", + "procs", + "publicsuffix", + "publixsuffix", + "pypy", + "quickstart", + "Reindex", + "replyto", + "Rollup", + "Rpdm", + "SAMEORIGIN", + "Servernameone", + "setuptools", + "STARTTLS", + "toctree", + "TQDDM", + "tqdm", + "Übersicht", + "uids", + "uper", + "urllib", + "Valimail", + "venv", + "viewcode", + "virtualenv", + "webmail", + "Wettbewerber", + "Whalen", + "whitespaces", + "xennn", + "xmltodict", + "zscholl" + ], + "esbonio.sphinx.confDir": "docs" +} \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index 42a46b5..88de451 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,12 @@ Changelog ========= +8.3.1 +----- + +- Handle unexpected XML parsing errors more gracefully (PR #349) +- Migrate build from `setuptools` to `hatch` + 8.3.0 ----- @@ -23,7 +29,7 @@ Changelog 8.1.0 ----- -- Restore compatability with <8.0.0 configuration files (with deprecation warnings) +- Restore compatibility with <8.0.0 configuration files (with deprecation warnings) - Set default `reports_folder` to `Inbox` (rather than `INBOX`) when `msgraph` is configured - Mark a message as read when fetching a message from Microsoft Graph @@ -53,7 +59,7 @@ Changelog - Add support for Microsoft/Office 365 via Microsoft Graph API (PR #301 closes issue #111) - Pin `elasticsearch-dsl` version at `>=7.2.0<7.14.0` (PR #297 closes issue #296) - Properly initialize `ip_dp_path` (PR #294 closes issue #286) -- Remove usage of `lgging.basicConfig` (PR #285) +- Remove usage of `logging.basicConfig` (PR #285) - Add support for the Gmail API (PR #284 and PR #307 close issue #96) 7.1.1 @@ -83,12 +89,12 @@ Changelog ----- - Fix issue #221: Crash when handling invalid reports without root node (PR #248) -- Use UTC datetime objects for Elasticsearch output (PR #245) +- Use UTC datetime objects for Elasticsearch output (PR #245) - Fix issues #219, #155, and #103: IMAP connections break on large emails (PR #241) - Add support for saving reports to S3 buckets (PR #223) - Pass `offline` parameter to `wait_inbox()` (PR #216) - Add more details to logging (PR #220) -- Add options customizing the names of output files (Modifications based on PR #225) +- Add options customizing the names of output files (Modifications based on PR #225) - Wait for 5 seconds before attempting to reconnect to an IMAP server (PR #217) - Add option to process messages in batches (PR #222) @@ -113,7 +119,6 @@ Changelog - Fix IMAP debugging output - Fix `User-Agent` string - 6.10.0 ------ @@ -137,7 +142,7 @@ Changelog 6.8.1 ----- -- Use `match_phrase` instead of `match` when looking for existing strings in Elasticsearch +- Use `match_phrase` instead of `match` when looking for existing strings in Elasticsearch 6.8.0 ----- @@ -149,10 +154,11 @@ Changelog 6.7.4 ----- -- Update dependencies +- Update dependencies 6.7.3 ----- + - Make `dkim_aligned` and `spf_aligned` case insensitive (PR #132) 6.7.2 @@ -164,7 +170,7 @@ Changelog ----- - Parse forensic email samples with non-standard date headers -- Graceful handling of a failure to download the GeoIP database (issue #123) +- Graceful handling of a failure to download the GeoIP database (issue #123) 6.7.0 ----- @@ -222,7 +228,7 @@ Changelog - Merge PR #98 from michaeldavie - Add functions - `parsed_aggregate_reports_to_csv_row(reports)` - - `parsed_forensic_reports_to_csv_row(reports)` + - `parsed_forensic_reports_to_csv_row(reports)` - Require `dnspython>=1.16.0` 6.5.0 @@ -231,31 +237,30 @@ Changelog - Move mail processing functions to the [`mailsuite`](https://seanthegeek.github.io/mailsuite/) package - Add offline option (closes issue #90) -- Use UDP instead of TCP, and properly set the timeout when querying DNS +- Use UDP instead of TCP, and properly set the timeout when querying DNS (closes issue #79 and #92) -- Log the current file path being processed when `--debug` is used +- Log the current file path being processed when `--debug` is used (closes issue #95) - 6.4.2 ----- -- Do not attempt to convert `org_name` to a base domain if `org_name` contains +- Do not attempt to convert `org_name` to a base domain if `org_name` contains a space (closes issue #94) - Always lowercase the `header_from` -- Provide a more helpful warning message when `GeoLite2-Country.mmdb` is +- Provide a more helpful warning message when `GeoLite2-Country.mmdb` is missing 6.4.1 ----- -- Raise `utils.DownloadError` exception when a GeoIP database or Public +- Raise `utils.DownloadError` exception when a GeoIP database or Public Suffix List (PSL) download fails (closes issue #73) 6.4.0 ----- -- Add ``number_of_shards`` and ``number_of_replicas`` as possible options +- Add ``number_of_shards`` and ``number_of_replicas`` as possible options in the ``elasticsearch`` configuration file section (closes issue #78) 6.3.7 @@ -396,7 +401,7 @@ in the ``elasticsearch`` configuration file section (closes issue #78) ----- - Move options from CLI to a config file (see updated installation documentation) -- Refactoring to make argument names consistent +- Refactoring to make argument names consistent 5.3.0 ----- @@ -411,10 +416,12 @@ in the ``elasticsearch`` configuration file section (closes issue #78) 5.2.0 ----- + - Add filename and line number to logging output - Improved IMAP error handling - Add CLI options - ``` + + ```text --elasticsearch-use-ssl Use SSL when connecting to Elasticsearch --elasticsearch-ssl-cert-path ELASTICSEARCH_SSL_CERT_PATH @@ -447,7 +454,7 @@ in the ``elasticsearch`` configuration file section (closes issue #78) - Bugfix: Submit aggregate dates to Elasticsearch as lists, not tuples - Support `elasticsearch-dsl<=6.3.0` -- Add support for TLS/SSL and username/password auth to Kafka +- Add support for TLS/SSL and username/password auth to Kafka 5.0.2 ----- @@ -461,20 +468,19 @@ in the ``elasticsearch`` configuration file section (closes issue #78) - Add Elasticsearch to automated testing - Lock `elasticsearch-dsl` required version to `6.2.1` (closes issue #25) - 5.0.0 ----- **Note**: Re-importing `kibana_saved_objects.json` in Kibana [is required](https://domainaware.github.io/parsedmarc/#upgrading-kibana-index-patterns) when upgrading to this version! -- Bugfix: Reindex the aggregate report index field `published_policy.fo` +- Bugfix: Reindex the aggregate report index field `published_policy.fo` as `text` instead of `long` (Closes issue #31) - Bugfix: IDLE email processing in Gmail/G-Suite accounts (closes issue #33) - Bugfix: Fix inaccurate DNS timeout in CLI documentation (closes issue #34) - Bugfix: Forensic report processing via CLI -- Bugfix: Duplicate aggregate report Elasticsearch query broken -- Bugfix: Crash when `Arrival-Date` header is missing in a -forensic/fialure/ruf report +- Bugfix: Duplicate aggregate report Elasticsearch query broken +- Bugfix: Crash when `Arrival-Date` header is missing in a +forensic/failure/ruf report - IMAP reliability improvements - Save data in separate indexes each day to make managing data retention easier - Cache DNS queries in memory @@ -497,7 +503,6 @@ forensic/fialure/ruf report - Recreated the `date_range` values from the ES client for easier parsing. - Started sending individual record slices. Kafka default message size is 1 MB, some aggregate reports were exceeding this. Now it appends meta-data and sends record by record. - 4.3.8 ----- @@ -510,10 +515,9 @@ and `watch_inbox()` 4.3.7 ----- -- When checking an inbox, always recheck for messages when processing is +- When checking an inbox, always recheck for messages when processing is complete - 4.3.6 ----- @@ -529,7 +533,7 @@ complete - Fix crash on empty aggregate report comments (brakhane - #25) - Add SHA256 hashes of attachments to output -- Add `strip_attachment_payloads` option to functions and +- Add `strip_attachment_payloads` option to functions and `--strip-attachment-payloads` option to the CLI (#23) - Set `urllib3` version requirements to match `requests` @@ -552,22 +556,22 @@ complete 4.3.0 ----- -- Fix bug where `parsedmarc` would always try to save to Elastic search, +- Fix bug where `parsedmarc` would always try to save to Elastic search, even if only `--hec` was used - Add options to save reports as a Kafka topic (mikesiegel - #21) - Major refactoring of functions - Support parsing forensic reports generated by Brightmail -- Make `sample_headers_only` flag more reliable -- Functions that might be useful to other projects are now stored in +- Make `sample_headers_only` flag more reliable +- Functions that might be useful to other projects are now stored in `parsedmarc.utils`: - - `get_base_domain(domain)` - - `get_filename_safe_string(string)` - - `get_ip_address_country(ip_address)` - - `get_ip_address_info(ip_address, nameservers=None, timeout=2.0)` - - `get_reverse_dns(ip_address, nameservers=None, timeout=2.0)` - - `human_timestamp_to_datetime(human_timestamp)` - - `human_timestamp_to_timestamp(human_timestamp)` - - `parse_email(data)` + - `get_base_domain(domain)` + - `get_filename_safe_string(string)` + - `get_ip_address_country(ip_address)` + - `get_ip_address_info(ip_address, nameservers=None, timeout=2.0)` + - `get_reverse_dns(ip_address, nameservers=None, timeout=2.0)` + - `human_timestamp_to_datetime(human_timestamp)` + - `human_timestamp_to_timestamp(human_timestamp)` + - `parse_email(data)` 4.2.0 ------ @@ -577,11 +581,10 @@ complete - Suppress Splunk SSL validation warnings - Change default logging level to `WARNING` - 4.1.9 ----- -- Workaround for forensic/ruf reports that are missing `Arrival-Date` and/or +- Workaround for forensic/ruf reports that are missing `Arrival-Date` and/or `Reported-Domain` 4.1.8 @@ -604,8 +607,8 @@ complete - Only move or delete IMAP emails after they all have been parsed - Move/delete messages one at a time - do not exit on error -- Reconnect to IMAP if connection is broken during -`get_dmarc_reports_from_inbox()` +- Reconnect to IMAP if connection is broken during +`get_dmarc_reports_from_inbox()` - Add`--imap-port` and `--imap-no-ssl` CLI options 4.1.4 @@ -616,7 +619,7 @@ complete 4.1.3 ----- -- Fix crash introduced in 4.1.0 when creating Elasticsearch indexes (Issue #15) +- Fix crash introduced in 4.1.0 when creating Elasticsearch indexes (Issue #15) 4.1.2 ----- @@ -636,7 +639,6 @@ complete - If an aggregate report has the invalid `disposition` value `pass`, change it to `none` - 4.0.2 ----- @@ -645,24 +647,24 @@ it to `none` 4.0.1 ----- -- When saving aggregate reports in Elasticsearch store `domain` in +- When saving aggregate reports in Elasticsearch store `domain` in `published_policy` -- Rename `policy_published` to `published_policy`when saving aggregate +- Rename `policy_published` to `published_policy`when saving aggregate reports to Splunk 4.0.0 ----- -- Add support for sending DMARC reports to a Splunk HTTP Events +- Add support for sending DMARC reports to a Splunk HTTP Events Collector (HEC) -- Use a browser-like `User-Agent` when downloading the Public Suffix List and +- Use a browser-like `User-Agent` when downloading the Public Suffix List and GeoIP DB to avoid being blocked by security proxies - Reduce default DNS timeout to 2.0 seconds - Add alignment booleans to JSON output -- Fix `.msg` parsing CLI exception when `msgconvert` is not found in the +- Fix `.msg` parsing CLI exception when `msgconvert` is not found in the system path - Add `--outgoing-port` and `--outgoing-ssl` options -- Fall back to plain text SMTP if `--outgoing-ssl` is not used and `STARTTLS` +- Fall back to plain text SMTP if `--outgoing-ssl` is not used and `STARTTLS` is not supported by the server - Always use `\n` as the newline when generating CSVs - Workaround for random Exchange/Office365 `Server Unavailable` IMAP errors @@ -680,7 +682,7 @@ is not supported by the server 3.9.5 ----- -- Refactor to use a shared IMAP connection for inbox watching and message +- Refactor to use a shared IMAP connection for inbox watching and message downloads - Gracefully recover from broken pipes in IMAP @@ -704,17 +706,17 @@ downloads 3.9.1 ----- -- Use `COPY` and delete if an IMAP server does not support `MOVE` +- Use `COPY` and delete if an IMAP server does not support `MOVE` (closes issue #9) 3.9.0 ----- -- Reduce IMAP `IDLE` refresh rate to 5 minutes to avoid session timeouts in +- Reduce IMAP `IDLE` refresh rate to 5 minutes to avoid session timeouts in Gmail - Fix parsing of some forensic/failure/ruf reports - Include email subject in all warning messages -- Fix example NGINX configuration in the installation documentation +- Fix example NGINX configuration in the installation documentation (closes issue #6) 3.8.2 @@ -731,7 +733,7 @@ Gmail 3.8.0 ----- -- Use `.` instead of `/` as the IMAP folder hierarchy separator when `/` +- Use `.` instead of `/` as the IMAP folder hierarchy separator when `/` does not work - fixes dovecot support (#5) - Fix parsing of base64-encoded forensic report data @@ -755,7 +757,7 @@ does not work - fixes dovecot support (#5) 3.7.0 ----- -- Fix bug where PSL would be called before it was downloaded if the PSL was +- Fix bug where PSL would be called before it was downloaded if the PSL was older than 24 Hours 3.6.1 @@ -777,47 +779,51 @@ older than 24 Hours - Add country rankings to the dashboards - Fix crash when parsing report with empty - 3.5.0 ----- + - Use Cloudflare's public DNS resolvers by default instead of Google's - Fix installation from virtualenv - Fix documentation typos - 3.4.1 ----- + - Documentation fixes - Fix console output 3.4.0 ----- + - Maintain IMAP IDLE state when watching the inbox - The `-i`/`--idle` CLI option is now `-w`/`--watch` - Improved Exception handling and documentation - 3.3.0 ----- -- Fix errors when saving to Elasticsearch +- Fix errors when saving to Elasticsearch 3.2.0 ----- + - Fix existing aggregate report error message 3.1.0 ----- -- Fix existing aggregate report query +- Fix existing aggregate report query 3.0.0 ----- -### New features + +New features + - Add option to select the IMAP folder where reports are stored - Add options to send data to Elasticsearch -### Changes +Changes + - Use Google's public nameservers (`8.8.8.8` and `4.4.4.4`) by default - Detect aggregate report email attachments by file content rather than @@ -827,6 +833,7 @@ file extension 2.1.2 ----- + - Rename `parsed_dmarc_forensic_reports_to_csv()` to `parsed_forensic_reports_to_csv()` to match other functions - Rename `parsed_aggregate_report_to_csv()` to @@ -835,25 +842,31 @@ file extension 2.1.1 ----- + - Documentation fixes 2.1.0 ----- + - Add `get_report_zip()` and `email_results()` - Add support for sending report emails via the command line 2.0.1 ----- + - Fix documentation - Remove Python 2 code 2.0.0 ----- -### New features + +New features + - Parse forensic reports - Parse reports from IMAP inbox -### Changes +Changes + - Drop support for Python 2 - Command line output is always a JSON object containing the lists `aggregate_reports` and `forensic_reports` @@ -862,26 +875,31 @@ file extension 1.1.0 ----- -- Add `extract_xml()` and `human_timespamp_to_datetime` methods +- Add `extract_xml()` and `human_timestamp_to_datetime` methods 1.0.5 ----- + - Prefix public suffix and GeoIP2 database filenames with `.` - Properly format errors list in CSV output 1.0.3 ----- + - Fix documentation formatting 1.0.2 ----- + - Fix more packaging flaws 1.0.1 ----- + - Fix packaging flaw 1.0.0 ----- + - Initial release diff --git a/README.md b/README.md new file mode 100644 index 0000000..0b7e237 --- /dev/null +++ b/README.md @@ -0,0 +1,41 @@ +# parsedmarc + +[![Build +Status](https://github.com/domainaware/parsedmarc/actions/workflows/python-tests.yml/badge.svg)](https://github.com/domainaware/parsedmarc/actions/workflows/python-tests.yml) +[![Code +Coverage](https://codecov.io/gh/domainaware/parsedmarc/branch/master/graph/badge.svg)](https://codecov.io/gh/domainaware/parsedmarc) +[![PyPI +Package](https://img.shields.io/pypi/v/parsedmarc.svg)](https://pypi.org/project/parsedmarc/) + +[![A screenshot of DMARC summary charts in Kibana](https://raw.githubusercontent.com/domainaware/parsedmarc/master/docs/_static/screenshots/dmarc-summary-charts.png){.align-center}](https://raw.githubusercontent.com/domainaware/parsedmarc/master/docs/_static/screenshots/dmarc-summary-charts.png) + +`parsedmarc` is a Python module and CLI utility for parsing DMARC +reports. When used with Elasticsearch and Kibana (or Splunk), it works +as a self-hosted open source alternative to commercial DMARC report +processing services such as Agari Brand Protection, Dmarcian, OnDMARC, +ProofPoint Email Fraud Defense, and Valimail. + +## Help Wanted + +This is a project is maintained by one developer. Please consider +reviewing the open +[issues](https://github.com/domainaware/parsedmarc/issues) to see how +you can contribute code, documentation, or user support. Assistance on +the pinned issues would be particularly helpful. + +Thanks to all +[contributors](https://github.com/domainaware/parsedmarc/graphs/contributors)! + +## Features + +- Parses draft and 1.0 standard aggregate/rua reports +- Parses forensic/failure/ruf reports +- Can parse reports from an inbox over IMAP, Microsoft Graph, or Gmail + API +- Transparently handles gzip or zip compressed reports +- Consistent data structures +- Simple JSON and/or CSV output +- Optionally email the results +- Optionally send the results to Elasticsearch and/or Splunk, for use + with premade dashboards +- Optionally send reports to Apache Kafka diff --git a/README.rst b/README.rst deleted file mode 100644 index 003de1b..0000000 --- a/README.rst +++ /dev/null @@ -1,574 +0,0 @@ -========== -parsedmarc -========== - -|Build Status| |Code Coverage| |PyPI Package| - -.. note:: **Help Wanted** - - This is a project is maintained by one developer. - Please consider reviewing the open `issues`_ to see how you can contribute code, documentation, or user support. - Assistance on the pinned issues would be particularly helpful. - - Thanks to all `contributors`_! - -.. image:: https://raw.githubusercontent.com/domainaware/parsedmarc/master/docs/_static/screenshots/dmarc-summary-charts.png - :alt: A screenshot of DMARC summary charts in Kibana - :align: center - :scale: 50 - :target: https://raw.githubusercontent.com/domainaware/parsedmarc/master/docs/_static/screenshots/dmarc-summary-charts.png - -``parsedmarc`` is a Python module and CLI utility for parsing DMARC reports. -When used with Elasticsearch and Kibana (or Splunk), it works as a self-hosted -open source alternative to commercial DMARC report processing services such -as Agari Brand Protection, Dmarcian, OnDMARC, ProofPoint Email Fraud Defense, -and Valimail. - -Features -======== - -* Parses draft and 1.0 standard aggregate/rua reports -* Parses forensic/failure/ruf reports -* Can parse reports from an inbox over IMAP, Microsoft Graph, or Gmail API -* Transparently handles gzip or zip compressed reports -* Consistent data structures -* Simple JSON and/or CSV output -* Optionally email the results -* Optionally send the results to Elasticsearch and/or Splunk, for use with - premade dashboards -* Optionally send reports to Apache Kafka - -Resources -========= - -DMARC guides ------------- - -* `Demystifying DMARC`_ - A complete guide to SPF, DKIM, and DMARC - -SPF and DMARC record validation -------------------------------- - -If you are looking for SPF and DMARC record validation and parsing, -check out the sister project, -`checkdmarc `_. - -Lookalike domains ------------------ - -DMARC protects against domain spoofing, not lookalike domains. For open source -lookalike domain monitoring, check out -`DomainAware `_. - - -CLI help -======== - -:: - - usage: parsedmarc [-h] [-c CONFIG_FILE] [--strip-attachment-payloads] [-o OUTPUT] - [--aggregate-json-filename AGGREGATE_JSON_FILENAME] - [--forensic-json-filename FORENSIC_JSON_FILENAME] - [--aggregate-csv-filename AGGREGATE_CSV_FILENAME] - [--forensic-csv-filename FORENSIC_CSV_FILENAME] - [-n NAMESERVERS [NAMESERVERS ...]] [-t DNS_TIMEOUT] [--offline] - [-s] [--verbose] [--debug] [--log-file LOG_FILE] [-v] - [file_path ...] - - Parses DMARC reports - - positional arguments: - file_path one or more paths to aggregate or forensic report - files, emails, or mbox files' - - optional arguments: - -h, --help show this help message and exit - -c CONFIG_FILE, --config-file CONFIG_FILE - a path to a configuration file (--silent implied) - --strip-attachment-payloads - remove attachment payloads from forensic report output - -o OUTPUT, --output OUTPUT - write output files to the given directory - --aggregate-json-filename AGGREGATE_JSON_FILENAME - filename for the aggregate JSON output file - --forensic-json-filename FORENSIC_JSON_FILENAME - filename for the forensic JSON output file - --aggregate-csv-filename AGGREGATE_CSV_FILENAME - filename for the aggregate CSV output file - --forensic-csv-filename FORENSIC_CSV_FILENAME - filename for the forensic CSV output file - -n NAMESERVERS [NAMESERVERS ...], --nameservers NAMESERVERS [NAMESERVERS ...] - nameservers to query - -t DNS_TIMEOUT, --dns_timeout DNS_TIMEOUT - number of seconds to wait for an answer from DNS - (default: 2.0) - --offline do not make online queries for geolocation or DNS - -s, --silent only print errors and warnings - --verbose more verbose output - --debug print debugging information - --log-file LOG_FILE output logging to a file - -v, --version show program's version number and exit - - -.. note:: - - Starting in ``parsedmarc`` 7.1.1, a static copy of the `IP to Country Lite database`_ from IPDB is - distributed with ``parsedmarc``, under the terms of the `Creative Commons Attribution 4.0 International License`_. as - a fallback if the `MaxMind GeoLite2 Country database`_ is not installed However, ``parsedmarc`` cannot install updated - versions of these databases as they are released, so MaxMind's databases and `geoipupdate`_ tool is still the - preferable solution. - - The location of the database file can be overridden by using the ``ip_db_path`` setting. - -.. note:: - - Starting in ``parsedmarc`` 6.0.0, most CLI options were moved to a configuration file, described below. - -Configuration file -================== - -``parsedmarc`` can be configured by supplying the path to an INI file - -.. code-block:: bash - - parsedmarc -c /etc/parsedmarc.ini - -For example - -.. code-block:: ini - - # This is an example comment - - [general] - save_aggregate = True - save_forensic = True - - [imap] - host = imap.example.com - user = dmarcresports@example.com - password = $uperSecure - - [mailbox] - watch = True - delete = False - - [elasticsearch] - hosts = 127.0.0.1:9200 - ssl = False - - [splunk_hec] - url = https://splunkhec.example.com - token = HECTokenGoesHere - index = email - - [s3] - bucket = my-bucket - path = parsedmarc - - [syslog] - server = localhost - port = 514 - - - -The full set of configuration options are: - -.. note:: - ``%`` characters must be escaped with another ``%`` character, so use ``%%`` wherever a `%`` character is used, such as a password. - - -- ``general`` - - ``save_aggregate`` - bool: Save aggregate report data to Elasticsearch, Splunk and/or S3 - - ``save_forensic`` - bool: Save forensic report data to Elasticsearch, Splunk and/or S3 - - ``strip_attachment_payloads`` - bool: Remove attachment payloads from results - - ``output`` - str: Directory to place JSON and CSV files in - - ``aggregate_json_filename`` - str: filename for the aggregate JSON output file - - ``forensic_json_filename`` - str: filename for the forensic JSON output file - - ``ip_db_path`` - str: An optional custom path to a MMDB file from MaxMind or DBIP - - ``offline`` - bool: Do not use online queries for geolocation or DNS - - ``nameservers`` - str: A comma separated list of DNS resolvers (Default: `Cloudflare's public resolvers`_) - - ``dns_timeout`` - float: DNS timeout period - - ``debug`` - bool: Print debugging messages - - ``silent`` - bool: Only print errors (Default: True) - - ``log_file`` - str: Write log messages to a file at this path - - ``n_procs`` - int: Number of process to run in parallel when parsing in CLI mode (Default: 1) - - ``chunk_size`` - int: Number of files to give to each process when running in parallel. - - .. note:: - Setting this to a number larger than one can improve performance when processing thousands of files - -- ``mailbox`` - - ``reports_folder`` - str: The mailbox folder (or label for Gmail) where the incoming reports can be found (Default: INBOX) - - ``archive_folder`` - str: The mailbox folder (or label for Gmail) to sort processed emails into (Default: Archive) - - ``watch`` - bool: Use the IMAP ``IDLE`` command to process messages as they arrive or poll MS Graph for new messages - - ``delete`` - bool: Delete messages after processing them, instead of archiving them - - ``test`` - bool: Do not move or delete messages - - ``batch_size`` - int: Number of messages to read and process before saving. Default 10. Use 0 for no limit. - - ``check_timeout`` - int: Number of seconds to wait for a IMAP IDLE response or the number of seconds until the next mail check (Default: 30) - -- ``imap`` - - ``host`` - str: The IMAP server hostname or IP address - - ``port`` - int: The IMAP server port (Default: 993). - - .. note:: - Starting in version 8.0.0, most options from the ``imap`` section have been moved to the ``mailbox`` section. - - .. note:: - If your host recommends another port, still try 993 - - - ``ssl`` - bool: Use an encrypted SSL/TLS connection (Default: True) - - ``skip_certificate_verification`` - bool: Skip certificate verification (not recommended) - - ``user`` - str: The IMAP user - - ``password`` - str: The IMAP password - -- ``msgraph`` - - ``auth_method`` - str: Authentication method, valid types are UsernamePassword, DeviceCode, or ClientSecret (Default: UsernamePassword). - - ``user`` - str: The M365 user, required when the auth method is UsernamePassword - - ``password`` - str: The user password, required when the auth method is UsernamePassword - - ``client_id`` - str: The app registration's client ID - - ``client_secret`` - str: The app registration's secret - - ``tenant_id`` - str: The Azure AD tenant ID. This is required for all auth methods except UsernamePassword. - - ``mailbox`` - str: The mailbox name. This defaults to the current user if using the UsernamePassword auth method, but could be a shared mailbox if the user has access to the mailbox - - ``token_file`` - str: Path to save the token file (Default: .token) - - .. note:: - You must create an app registration in Azure AD and have an admin grant the Microsoft Graph ``Mail.ReadWrite`` (delegated) permission to the app. - If you are using `UsernamePassword` auth and the mailbox is different from the username, you must grant the app ``Mail.ReadWrite.Shared``. - - .. warning:: - If you are using the `ClientSecret` auth method, you need to grant the ``Mail.ReadWrite`` (application) permission to the app. - You must also restrict the application's access to a specific mailbox since it allows all mailboxes by default. - Use the ``New-ApplicationAccessPolicy`` command in the Exchange PowerShell module. - If you need to scope the policy to shared mailboxes, you can add them to a mail enabled security group and use that as the group id. - - ``New-ApplicationAccessPolicy -AccessRight RestrictAccess -AppId "" -PolicyScopeGroupId "" -Description "Restrict access to dmarc reports mailbox."`` - - -- ``elasticsearch`` - - ``hosts`` - str: A comma separated list of hostnames and ports or URLs (e.g. ``127.0.0.1:9200`` or ``https://user:secret@localhost``) - - .. note:: - Special characters in the username or password must be `URL encoded`_. - - - ``ssl`` - bool: Use an encrypted SSL/TLS connection (Default: True) - - ``cert_path`` - str: Path to a trusted certificates - - ``index_suffix`` - str: A suffix to apply to the index names - - ``monthly_indexes`` - bool: Use monthly indexes instead of daily indexes - - ``number_of_shards`` - int: The number of shards to use when creating the index (Default: 1) - - ``number_of_replicas`` - int: The number of replicas to use when creating the index (Default: 1) -- ``splunk_hec`` - - ``url`` - str: The URL of the Splunk HTTP Events Collector (HEC) - - ``token`` - str: The HEC token - - ``index`` - str: The Splunk index to use - - ``skip_certificate_verification`` - bool: Skip certificate verification (not recommended) -- ``kafka`` - - ``hosts`` - str: A comma separated list of Kafka hosts - - ``user`` - str: The Kafka user - - ``passsword`` - str: The Kafka password - - ``ssl`` - bool: Use an encrypted SSL/TLS connection (Default: True) - - ``skip_certificate_verification`` - bool: Skip certificate verification (not recommended) - - ``aggregate_topic`` - str: The Kafka topic for aggregate reports - - ``forensic_topic`` - str: The Kafka topic for forensic reports -- ``smtp`` - - ``host`` - str: The SMTP hostname - - ``port`` - int: The SMTP port (Default: 25) - - ``ssl`` - bool: Require SSL/TLS instead of using STARTTLS - - ``skip_certificate_verification`` - bool: Skip certificate verification (not recommended) - - ``user`` - str: the SMTP username - - ``password`` - str: the SMTP password - - ``from`` - str: The From header to use in the email - - ``to`` - list: A list of email addresses to send to - - ``subject`` - str: The Subject header to use in the email (Default: parsedmarc report) - - ``attachment`` - str: The ZIP attachment filenames - - ``message`` - str: The email message (Default: Please see the attached parsedmarc report.) -- ``s3`` - - ``bucket`` - str: The S3 bucket name - - ``path`` - str: The path to upload reports to (Default: /) - - ``region_name`` - str: The region name (Optional) - - ``endpoint_url`` - str: The endpoint URL (Optional) - - ``access_key_id`` - str: The access key id (Optional) - - ``secret_access_key`` - str: The secret access key (Optional) -- ``syslog`` - - ``server`` - str: The Syslog server name or IP address - - ``port`` - int: The UDP port to use (Default: 514) -- ``gmail_api`` - - ``credentials_file`` - str: Path to file containing the credentials, None to disable (Default: None) - - ``token_file`` - str: Path to save the token file (Default: .token) - - ``include_spam_trash`` - bool: Include messages in Spam and Trash when searching reports (Default: False) - - ``scopes`` - str: Comma separated list of scopes to use when acquiring credentials (Default: https://www.googleapis.com/auth/gmail.modify) - - ``oauth2_port`` - int: The TCP port for the local server to listen on for the OAuth2 response (Default: 8080) - -.. warning:: - - It is **strongly recommended** to **not** use the ``nameservers`` setting. - By default, ``parsedmarc`` uses `Cloudflare's public resolvers`_, - which are much faster and more reliable than Google, Cisco OpenDNS, or - even most local resolvers. - - The ``nameservers`` option should only be used if your network blocks DNS - requests to outside resolvers. - -.. warning:: - - ``save_aggregate`` and ``save_forensic`` are separate options because - you may not want to save forensic reports (also known as failure reports) - to your Elasticsearch instance, particularly if you are in a - highly-regulated industry that handles sensitive data, such as healthcare - or finance. If your legitimate outgoing email fails DMARC, it is possible - that email may appear later in a forensic report. - - Forensic reports contain the original headers of an email that failed a - DMARC check, and sometimes may also include the full message body, - depending on the policy of the reporting organization. - - Most reporting organizations do not send forensic reports of any kind for - privacy reasons. While aggregate DMARC reports are sent at least daily, - it is normal to receive very few forensic reports. - - An alternative approach is to still collect forensic/failure/ruf reports - in your DMARC inbox, but run ``parsedmarc`` with ``save_forensic = True`` - manually on a separate IMAP folder (using the ``reports_folder`` option), - after you have manually moved known samples you want to save to that - folder (e.g. malicious samples and non-sensitive legitimate samples). - -Docker usage -============ - -.. code-block:: bash - - docker run -v "${PWD}/config.ini:/config.ini" ghcr.io/domainaware/parsedmarc: -c /config.ini - - -Sample aggregate report output -============================== - -Here are the results from parsing the `example `_ -report from the dmarc.org wiki. It's actually an older draft of the the 1.0 -report schema standardized in -`RFC 7480 Appendix C `_. -This draft schema is still in wide use. - -``parsedmarc`` produces consistent, normalized output, regardless of the report -schema. - -JSON ----- - -.. code-block:: json - - { - "xml_schema": "draft", - "report_metadata": { - "org_name": "acme.com", - "org_email": "noreply-dmarc-support@acme.com", - "org_extra_contact_info": "http://acme.com/dmarc/support", - "report_id": "9391651994964116463", - "begin_date": "2012-04-27 20:00:00", - "end_date": "2012-04-28 19:59:59", - "errors": [] - }, - "policy_published": { - "domain": "example.com", - "adkim": "r", - "aspf": "r", - "p": "none", - "sp": "none", - "pct": "100", - "fo": "0" - }, - "records": [ - { - "source": { - "ip_address": "72.150.241.94", - "country": "US", - "reverse_dns": "adsl-72-150-241-94.shv.bellsouth.net", - "base_domain": "bellsouth.net" - }, - "count": 2, - "alignment": { - "spf": true, - "dkim": false, - "dmarc": true - }, - "policy_evaluated": { - "disposition": "none", - "dkim": "fail", - "spf": "pass", - "policy_override_reasons": [] - }, - "identifiers": { - "header_from": "example.com", - "envelope_from": "example.com", - "envelope_to": null - }, - "auth_results": { - "dkim": [ - { - "domain": "example.com", - "selector": "none", - "result": "fail" - } - ], - "spf": [ - { - "domain": "example.com", - "scope": "mfrom", - "result": "pass" - } - ] - } - } - ] - } - -CSV ---- - -:: - - xml_schema,org_name,org_email,org_extra_contact_info,report_id,begin_date,end_date,errors,domain,adkim,aspf,p,sp,pct,fo,source_ip_address,source_country,source_reverse_dns,source_base_domain,count,spf_aligned,dkim_aligned,dmarc_aligned,disposition,policy_override_reasons,policy_override_comments,envelope_from,header_from,envelope_to,dkim_domains,dkim_selectors,dkim_results,spf_domains,spf_scopes,spf_results - draft,acme.com,noreply-dmarc-support@acme.com,http://acme.com/dmarc/support,9391651994964116463,2012-04-27 20:00:00,2012-04-28 19:59:59,,example.com,r,r,none,none,100,0,72.150.241.94,US,adsl-72-150-241-94.shv.bellsouth.net,bellsouth.net,2,True,False,True,none,,,example.com,example.com,,example.com,none,fail,example.com,mfrom,pass - - -Sample forensic report output -============================= - -Thanks to Github user `xennn `_ for the anonymized -`forensic report email sample -`_. - -JSON ----- - - -.. code-block:: json - - { - "feedback_type": "auth-failure", - "user_agent": "Lua/1.0", - "version": "1.0", - "original_mail_from": "sharepoint@domain.de", - "original_rcpt_to": "peter.pan@domain.de", - "arrival_date": "Mon, 01 Oct 2018 11:20:27 +0200", - "message_id": "<38.E7.30937.BD6E1BB5@ mailrelay.de>", - "authentication_results": "dmarc=fail (p=none, dis=none) header.from=domain.de", - "delivery_result": "policy", - "auth_failure": [ - "dmarc" - ], - "reported_domain": "domain.de", - "arrival_date_utc": "2018-10-01 09:20:27", - "source": { - "ip_address": "10.10.10.10", - "country": null, - "reverse_dns": null, - "base_domain": null - }, - "authentication_mechanisms": [], - "original_envelope_id": null, - "dkim_domain": null, - "sample_headers_only": false, - "sample": "Received: from Servernameone.domain.local (Servernameone.domain.local [10.10.10.10])\n\tby mailrelay.de (mail.DOMAIN.de) with SMTP id 38.E7.30937.BD6E1BB5; Mon, 1 Oct 2018 11:20:27 +0200 (CEST)\nDate: 01 Oct 2018 11:20:27 +0200\nMessage-ID: <38.E7.30937.BD6E1BB5@ mailrelay.de>\nTo: \nfrom: \"=?utf-8?B?SW50ZXJha3RpdmUgV2V0dGJld2VyYmVyLcOcYmVyc2ljaHQ=?=\" \nSubject: Subject\nMIME-Version: 1.0\nX-Mailer: Microsoft SharePoint Foundation 2010\nContent-Type: text/html; charset=utf-8\nContent-Transfer-Encoding: quoted-printable\n\n\n", - "parsed_sample": { - "from": { - "display_name": "Interaktive Wettbewerber-Übersicht", - "address": "sharepoint@domain.de", - "local": "sharepoint", - "domain": "domain.de" - }, - "to_domains": [ - "domain.de" - ], - "to": [ - { - "display_name": null, - "address": "peter.pan@domain.de", - "local": "peter.pan", - "domain": "domain.de" - } - ], - "subject": "Subject", - "timezone": "+2", - "mime-version": "1.0", - "date": "2018-10-01 09:20:27", - "content-type": "text/html; charset=utf-8", - "x-mailer": "Microsoft SharePoint Foundation 2010", - "body": "", - "received": [ - { - "from": "Servernameone.domain.local Servernameone.domain.local 10.10.10.10", - "by": "mailrelay.de mail.DOMAIN.de", - "with": "SMTP id 38.E7.30937.BD6E1BB5", - "date": "Mon, 1 Oct 2018 11:20:27 +0200 CEST", - "hop": 1, - "date_utc": "2018-10-01 09:20:27", - "delay": 0 - } - ], - "content-transfer-encoding": "quoted-printable", - "message-id": "<38.E7.30937.BD6E1BB5@ mailrelay.de>", - "has_defects": false, - "headers": { - "Received": "from Servernameone.domain.local (Servernameone.domain.local [10.10.10.10])\n\tby mailrelay.de (mail.DOMAIN.de) with SMTP id 38.E7.30937.BD6E1BB5; Mon, 1 Oct 2018 11:20:27 +0200 (CEST)", - "Date": "01 Oct 2018 11:20:27 +0200", - "Message-ID": "<38.E7.30937.BD6E1BB5@ mailrelay.de>", - "To": "", - "from": "\"Interaktive Wettbewerber-Übersicht\" ", - "Subject": "Subject", - "MIME-Version": "1.0", - "X-Mailer": "Microsoft SharePoint Foundation 2010", - "Content-Type": "text/html; charset=utf-8", - "Content-Transfer-Encoding": "quoted-printable" - }, - "reply_to": [], - "cc": [], - "bcc": [], - "attachments": [], - "filename_safe_subject": "Subject" - } - } - - - -CSV ---- - -:: - - feedback_type,user_agent,version,original_envelope_id,original_mail_from,original_rcpt_to,arrival_date,arrival_date_utc,subject,message_id,authentication_results,dkim_domain,source_ip_address,source_country,source_reverse_dns,source_base_domain,delivery_result,auth_failure,reported_domain,authentication_mechanisms,sample_headers_only - auth-failure,Lua/1.0,1.0,,sharepoint@domain.de,peter.pan@domain.de,"Mon, 01 Oct 2018 11:20:27 +0200",2018-10-01 09:20:27,Subject,<38.E7.30937.BD6E1BB5@ mailrelay.de>,"dmarc=fail (p=none, dis=none) header.from=domain.de",,10.10.10.10,,,,policy,dmarc,domain.de,,False - -Bug reports -=========== - -Please report bugs on the GitHub issue tracker - -https://github.com/domainaware/parsedmarc/issues - -.. |Build Status| image:: https://github.com/domainaware/parsedmarc/actions/workflows/python-tests.yml/badge.svg - :target: https://github.com/domainaware/parsedmarc/actions/workflows/python-tests.yml - -.. |Code Coverage| image:: https://codecov.io/gh/domainaware/parsedmarc/branch/master/graph/badge.svg - :target: https://codecov.io/gh/domainaware/parsedmarc - -.. |PyPI Package| image:: https://img.shields.io/pypi/v/parsedmarc.svg - :target: https://pypi.org/project/parsedmarc/ - -.. _issues: https://github.com/domainaware/parsedmarc/issues - -.. _contributors: https://github.com/domainaware/parsedmarc/graphs/contributors - -.. _Demystifying DMARC: https://seanthegeek.net/459/demystifying-dmarc/ - -.. _IP to Country Lite database: https://db-ip.com/db/download/ip-to-country-lite - -.. _Creative Commons Attribution 4.0 International License: https://creativecommons.org/licenses/by/4.0/ - -.. _MaxMind GeoLite2 Country database: https://dev.maxmind.com/geoip/geolite2-free-geolocation-data - -.. _geoipupdate: https://github.com/maxmind/geoipupdate - -.. _Cloudflare's public resolvers: https://1.1.1.1/ - -.. _URL encoded: https://en.wikipedia.org/wiki/Percent-encoding#Percent-encoding_reserved_characters - diff --git a/docs/conf.py b/docs/conf.py index 049dd16..f24849a 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,17 +1,10 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- +# Configuration file for the Sphinx documentation builder. # -# parsedmarc documentation build configuration file, created by -# sphinx-quickstart on Mon Feb 5 18:25:39 2018. -# -# This file is execfile()d with the current directory set to its -# containing dir. -# -# Note that not all possible configuration values are present in this -# autogenerated file. -# -# All configuration values have a default; values that are commented out -# serve to show the default. +# This file only contains a selection of the most common options. For a full +# list see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Path setup -------------------------------------------------------------- # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the @@ -19,41 +12,12 @@ # import os import sys - -sys.path.insert(0, os.path.abspath('..')) +sys.path.insert(0, os.path.abspath(os.path.join("..", ".."))) from parsedmarc import __version__ +# -- Project information ----------------------------------------------------- -# -- General configuration ------------------------------------------------ - -# If your documentation needs a minimal Sphinx version, state it here. -# -# needs_sphinx = '1.0' - -# Add any Sphinx extension module names here, as strings. They can be -# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom -# ones. -extensions = ['sphinx.ext.autodoc', - 'sphinx.ext.doctest', - 'sphinx.ext.todo', - 'sphinx.ext.viewcode', - 'sphinx.ext.githubpages', - 'sphinx.ext.napoleon'] - -# Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] - -# The suffix(es) of source filenames. -# You can specify multiple suffix as a list of string: -# -# source_suffix = ['.rst', '.md'] -source_suffix = '.rst' - -# The master toctree document. -master_doc = 'index' - -# General information about the project. project = 'parsedmarc' copyright = '2018, Sean Whalen' author = 'Sean Whalen' @@ -67,113 +31,43 @@ version = __version__ # The full version, including alpha/beta/rc tags. release = version -# The language for content autogenerated by Sphinx. Refer to documentation -# for a list of supported languages. -# -# This is also used if you do content translation via gettext catalogs. -# Usually you set "language" from the command line for these cases. -language = "en" +# -- General configuration --------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = ['sphinx.ext.autodoc', + 'sphinx.ext.doctest', + 'sphinx.ext.todo', + 'sphinx.ext.viewcode', + 'sphinx.ext.githubpages', + 'sphinx.ext.napoleon', + 'myst_parser'] + +myst_heading_anchors = 3 +autoclass_content = "init" + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + + +# The suffixes of source filenames. +source_suffix = [".rst", ".md"] # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. -# This patterns also effect to html_static_path and html_extra_path -exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] - -# The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' - -todo_include_todos = False +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = [] -# -- Options for HTML output ---------------------------------------------- +# -- Options for HTML output ------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # html_theme = 'sphinx_rtd_theme' -# Theme options are theme-specific and customize the look and feel of a theme -# further. For a list of options available for each theme, see the -# documentation. -# -# html_theme_options = {} - # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ['_static'] - -# Custom sidebar templates, must be a dictionary that maps document names -# to template names. -# -# This is required for the alabaster theme -# refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars -html_sidebars = { - '**': [ - 'about.html', - 'navigation.html', - 'relations.html', # needs 'show_related': True theme option to display - 'searchbox.html', - 'donate.html', - ] -} - - -# -- Options for HTMLHelp output ------------------------------------------ - -# Output file base name for HTML help builder. -htmlhelp_basename = 'parsedmarcdoc' - - -# -- Options for LaTeX output --------------------------------------------- - -latex_elements = { - # The paper size ('letterpaper' or 'a4paper'). - # - # 'papersize': 'letterpaper', - - # The font size ('10pt', '11pt' or '12pt'). - # - # 'pointsize': '10pt', - - # Additional stuff for the LaTeX preamble. - # - # 'preamble': '', - - # Latex figure (float) alignment - # - # 'figure_align': 'htbp', -} - -# Grouping the document tree into LaTeX files. List of tuples -# (source start file, target name, title, -# author, documentclass [howto, manual, or own class]). -latex_documents = [ - (master_doc, 'parsedmarc.tex', 'parsedmarc Documentation', - 'parsedmarc', 'manual'), -] - - -# -- Options for manual page output --------------------------------------- - -# One entry per manual page. List of tuples -# (source start file, name, description, authors, manual section). -man_pages = [ - (master_doc, 'parsedmarc', 'parsedmarc Documentation', - [author], 1) -] - - -# -- Options for Texinfo output ------------------------------------------- - -# Grouping the document tree into Texinfo files. List of tuples -# (source start file, target name, title, author, -# dir menu entry, description, category) -texinfo_documents = [ - (master_doc, 'parsedmarc', 'parsedmarc Documentation', - author, 'parsedmarc', 'One line description of project.', - 'Miscellaneous'), -] - - - diff --git a/docs/index.rst b/docs/index.rst index ef63378..88fced3 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -69,7 +69,7 @@ lookalike domain monitoring, check out `DomainAware " -PolicyScopeGroupId "" -Description "Restrict access to dmarc reports mailbox."`` + .. code-block:: powershell + + New-ApplicationAccessPolicy -AccessRight RestrictAccess + -AppId "" -PolicyScopeGroupId "" + -Description "Restrict access to dmarc reports mailbox." - ``elasticsearch`` - - ``hosts`` - str: A comma separated list of hostnames and ports or URLs (e.g. ``127.0.0.1:9200`` or ``https://user:secret@localhost``) + - ``hosts`` - str: A comma separated list of hostnames and ports + or URLs (e.g. ``127.0.0.1:9200`` or + ``https://user:secret@localhost``) .. note:: - Special characters in the username or password must be `URL encoded`_. + Special characters in the username or password must be + `URL encoded`_. - ``ssl`` - bool: Use an encrypted SSL/TLS connection (Default: True) - ``cert_path`` - str: Path to a trusted certificates @@ -255,30 +300,36 @@ The full set of configuration options are: - ``url`` - str: The URL of the Splunk HTTP Events Collector (HEC) - ``token`` - str: The HEC token - ``index`` - str: The Splunk index to use - - ``skip_certificate_verification`` - bool: Skip certificate verification (not recommended) + - ``skip_certificate_verification`` - bool: Skip certificate + verification (not recommended) - ``kafka`` - ``hosts`` - str: A comma separated list of Kafka hosts - ``user`` - str: The Kafka user - ``passsword`` - str: The Kafka password - ``ssl`` - bool: Use an encrypted SSL/TLS connection (Default: True) - - ``skip_certificate_verification`` - bool: Skip certificate verification (not recommended) + - ``skip_certificate_verification`` - bool: Skip certificate + verification (not recommended) - ``aggregate_topic`` - str: The Kafka topic for aggregate reports - ``forensic_topic`` - str: The Kafka topic for forensic reports - ``smtp`` - ``host`` - str: The SMTP hostname - ``port`` - int: The SMTP port (Default: 25) - ``ssl`` - bool: Require SSL/TLS instead of using STARTTLS - - ``skip_certificate_verification`` - bool: Skip certificate verification (not recommended) + - ``skip_certificate_verification`` - bool: Skip certificate + verification (not recommended) - ``user`` - str: the SMTP username - ``password`` - str: the SMTP password - ``from`` - str: The From header to use in the email - ``to`` - list: A list of email addresses to send to - - ``subject`` - str: The Subject header to use in the email (Default: parsedmarc report) + - ``subject`` - str: The Subject header to use in the email + (Default: parsedmarc report) - ``attachment`` - str: The ZIP attachment filenames - - ``message`` - str: The email message (Default: Please see the attached parsedmarc report.) + - ``message`` - str: The email message + (Default: Please see the attached parsedmarc report.) .. note:: - ``%`` characters must be escaped with another ``%`` character, so use ``%%`` wherever a ``%`` character is used. + ``%`` characters must be escaped with another ``%`` character, + so use ``%%`` wherever a ``%`` character is used. - ``s3`` - ``bucket`` - str: The S3 bucket name @@ -291,44 +342,52 @@ The full set of configuration options are: - ``server`` - str: The Syslog server name or IP address - ``port`` - int: The UDP port to use (Default: 514) - ``gmail_api`` - - ``credentials_file`` - str: Path to file containing the credentials, None to disable (Default: None) - - ``token_file`` - str: Path to save the token file (Default: .token) - - ``include_spam_trash`` - bool: Include messages in Spam and Trash when searching reports (Default: False) - - ``scopes`` - str: Comma separated list of scopes to use when acquiring credentials (Default: https://www.googleapis.com/auth/gmail.modify) - - ``oauth2_port`` - int: The TCP port for the local server to listen on for the OAuth2 response (Default: 8080) + - ``credentials_file`` - str: Path to file containing the + credentials, None to disable (Default: None) + - ``token_file`` - str: Path to save the token file + (Default: .token) + - ``include_spam_trash`` - bool: Include messages in Spam and + Trash when searching reports (Default: False) + - ``scopes`` - str: Comma separated list of scopes to use when + acquiring credentials (Default: https://www.googleapis.com/auth/gmail.modify) + - ``oauth2_port`` - int: The TCP port for the local server to + listen on for the OAuth2 response (Default: 8080) .. warning:: - It is **strongly recommended** to **not** use the ``nameservers`` setting. - By default, ``parsedmarc`` uses `Cloudflare's public resolvers`_, - which are much faster and more reliable than Google, Cisco OpenDNS, or - even most local resolvers. + It is **strongly recommended** to **not** use the ``nameservers`` + setting. By default, ``parsedmarc`` uses + `Cloudflare's public resolvers`_, which are much faster and more + reliable than Google, Cisco OpenDNS, or even most local resolvers. - The ``nameservers`` option should only be used if your network blocks DNS - requests to outside resolvers. + The ``nameservers`` option should only be used if your network + blocks DNS requests to outside resolvers. .. warning:: - ``save_aggregate`` and ``save_forensic`` are separate options because - you may not want to save forensic reports (also known as failure reports) - to your Elasticsearch instance, particularly if you are in a - highly-regulated industry that handles sensitive data, such as healthcare - or finance. If your legitimate outgoing email fails DMARC, it is possible + ``save_aggregate`` and ``save_forensic`` are separate options + because you may not want to save forensic reports + (also known as failure reports) to your Elasticsearch instance, + particularly if you are in a highly-regulated industry that + handles sensitive data, such as healthcare or finance. If your + legitimate outgoing email fails DMARC, it is possible that email may appear later in a forensic report. - Forensic reports contain the original headers of an email that failed a - DMARC check, and sometimes may also include the full message body, - depending on the policy of the reporting organization. + Forensic reports contain the original headers of an email that + failed a DMARC check, and sometimes may also include the + full message body, depending on the policy of the reporting + organization. - Most reporting organizations do not send forensic reports of any kind for - privacy reasons. While aggregate DMARC reports are sent at least daily, - it is normal to receive very few forensic reports. + Most reporting organizations do not send forensic reports of any + kind for privacy reasons. While aggregate DMARC reports are sent + at least daily, it is normal to receive very few forensic reports. - An alternative approach is to still collect forensic/failure/ruf reports - in your DMARC inbox, but run ``parsedmarc`` with ``save_forensic = True`` - manually on a separate IMAP folder (using the ``reports_folder`` option), - after you have manually moved known samples you want to save to that - folder (e.g. malicious samples and non-sensitive legitimate samples). + An alternative approach is to still collect forensic/failure/ruf + reports in your DMARC inbox, but run ``parsedmarc`` with + ``save_forensic = True``manually on a separate IMAP folder (using + the ``reports_folder`` option), after you have manually moved + known samples you want to save to that folder + (e.g. malicious samples and non-sensitive legitimate samples). Sample aggregate report output @@ -340,8 +399,8 @@ report schema standardized in `RFC 7480 Appendix C `_. This draft schema is still in wide use. -``parsedmarc`` produces consistent, normalized output, regardless of the report -schema. +``parsedmarc`` produces consistent, normalized output, regardless +of the report schema. JSON ---- @@ -416,7 +475,7 @@ JSON CSV --- -:: +.. code-block:: text xml_schema,org_name,org_email,org_extra_contact_info,report_id,begin_date,end_date,errors,domain,adkim,aspf,p,sp,pct,fo,source_ip_address,source_country,source_reverse_dns,source_base_domain,count,spf_aligned,dkim_aligned,dmarc_aligned,disposition,policy_override_reasons,policy_override_comments,envelope_from,header_from,envelope_to,dkim_domains,dkim_selectors,dkim_results,spf_domains,spf_scopes,spf_results draft,acme.com,noreply-dmarc-support@acme.com,http://acme.com/dmarc/support,9391651994964116463,2012-04-27 20:00:00,2012-04-28 19:59:59,,example.com,r,r,none,none,100,0,72.150.241.94,US,adsl-72-150-241-94.shv.bellsouth.net,bellsouth.net,2,True,False,True,none,,,example.com,example.com,,example.com,none,fail,example.com,mfrom,pass @@ -524,7 +583,7 @@ JSON CSV --- -:: +.. code-block:: text feedback_type,user_agent,version,original_envelope_id,original_mail_from,original_rcpt_to,arrival_date,arrival_date_utc,subject,message_id,authentication_results,dkim_domain,source_ip_address,source_country,source_reverse_dns,source_base_domain,delivery_result,auth_failure,reported_domain,authentication_mechanisms,sample_headers_only auth-failure,Lua/1.0,1.0,,sharepoint@domain.de,peter.pan@domain.de,"Mon, 01 Oct 2018 11:20:27 +0200",2018-10-01 09:20:27,Subject,<38.E7.30937.BD6E1BB5@ mailrelay.de>,"dmarc=fail (p=none, dis=none) header.from=domain.de",,10.10.10.10,,,,policy,dmarc,domain.de,,False @@ -547,7 +606,7 @@ Installation to use that proxy. To do this, edit ``/etc/environment`` and add your proxy details there, for example: - :: + .. code-block:: bash http_proxy=http://user:password@prox-server:3128 https_proxy=https://user:password@prox-server:3128 @@ -555,7 +614,7 @@ Installation Or if no credentials are needed: - :: + .. code-block:: bash http_proxy=http://prox-server:3128 https_proxy=https://prox-server:3128 @@ -607,14 +666,17 @@ On CentOS or RHEL systems, run: sudo dnf install -y geoipupdate -The latest builds for Linux, macOS, and Windows can be downloaded from the `geoipupdate releases page on GitHub`_. +The latest builds for Linux, macOS, and Windows can be downloaded +from the `geoipupdate releases page on GitHub`_. -On December 30th, 2019, MaxMind started requiring free accounts to access the free Geolite2 databases, in order `to +On December 30th, 2019, MaxMind started requiring free accounts to +access the free Geolite2 databases, in order `to comply with various privacy regulations`_. Start by `registering for a free GeoLite2 account`_, and signing in. -Then, navigate the to the `License Keys`_ page under your account, and create a new license key for the version of +Then, navigate the to the `License Keys`_ page under your account, +and create a new license key for the version of ``geoipupdate`` that was installed. .. warning:: @@ -632,9 +694,11 @@ Then, navigate the to the `License Keys`_ page under your account, and create a You can use ``parsedmarc`` as the description for the key. -Once you have generated a key, download the config pre-filled configuration file. -This file should be saved at ``/etc/GeoIP.conf`` on Linux or macOS systems, or at -``%SystemDrive%\ProgramData\MaxMind\GeoIPUpdate\GeoIP.conf`` on Windows systems. +Once you have generated a key, download the config pre-filled +configuration file. This file should be saved at ``/etc/GeoIP.conf`` +on Linux or macOS systems, or at +``%SystemDrive%\ProgramData\MaxMind\GeoIPUpdate\GeoIP.conf`` on +Windows systems. Then run @@ -644,10 +708,12 @@ Then run To download the databases for the first time. -The GeoLite2 Country, City, and ASN databases are updated weekly, every Tuesday. -``geoipupdate`` can be run weekly by adding a cron job or scheduled task. +The GeoLite2 Country, City, and ASN databases are updated weekly, +every Tuesday. ``geoipupdate`` can be run weekly by adding a cron +job or scheduled task. -More information about ``geoipupdate`` can be found at the `MaxMind geoipupdate page`_. +More information about ``geoipupdate`` can be found at the +`MaxMind geoipupdate page`_. Installing parsedmarc --------------------- @@ -684,7 +750,8 @@ Install parsedmarc in a virtualenv sudo -u parsedmarc virtualenv /opt/parsedmarc/venv -CentOS/RHEL 8 systems use Python 3.6 by default, so on those systems explicitly tell ``virtualenv`` to use ``python3.9`` instead +CentOS/RHEL 8 systems use Python 3.6 by default, so on those systems +explicitly tell ``virtualenv`` to use ``python3.9`` instead .. code-block:: bash @@ -724,7 +791,7 @@ Accessing an inbox using OWA/EWS Starting in 8.0.0, parsedmarc supports accessing Microsoft/Office 365 inboxes via the Microsoft Graph API, which is preferred over Davmail. -Some organisations do not allow IMAP or the Microsoft Graph API, +Some organizations do not allow IMAP or the Microsoft Graph API, and only support Exchange Web Services (EWS)/Outlook Web Access (OWA). In that case, Davmail will need to be set up as a local EWS/OWA IMAP gateway. It can even work where @@ -966,7 +1033,8 @@ For CentOS, RHEL, and other RPM systems, follow the Elastic RPM guides for sudo service elasticsearch start sudo service kibana start -Without the commercial X-Pack_ or ReadonlyREST_ products, Kibana does not have any authentication +Without the commercial X-Pack_ or ReadonlyREST_ products, Kibana +does not have any authentication mechanism of its own. You can use nginx as a reverse proxy that provides basic authentication. @@ -1301,7 +1369,7 @@ is using a particular service. With that information, you can contact them and have them set up DKIM. .. note:: - + If you have a lot of B2C customers, you may see a high volume of emails as your domains coming from consumer email services, such as Google/Gmail and Yahoo! This occurs when customers have mailbox rules in place that forward @@ -1386,24 +1454,26 @@ What if a sender won't support DKIM/DMARC? .. warning :: Do not alter the ``p`` or ``sp`` values of the DMARC record on the - Top-Level Domain (TLD) – that would leave you vulnerable to spoofing of - your TLD and/or any subdomain. + Top-Level Domain (TLD) – that would leave you vulnerable to + spoofing of your TLD and/or any subdomain. What about mailing lists? ========================= -When you deploy DMARC on your domain, you might find that messages relayed by -mailing lists are failing DMARC, most likely because the mailing list is -spoofing your from address, and modifying the subject, footer, or other part -of the message, thereby breaking the DKIM signature. +When you deploy DMARC on your domain, you might find that messages +relayed by mailing lists are failing DMARC, most likely because the mailing +list is spoofing your from address, and modifying the subject, +footer, or other part of the message, thereby breaking the +DKIM signature. Mailing list list best practices -------------------------------- -Ideally, a mailing list should forward messages without altering the headers -or body content at all. `Joe Nelson`_ does a fantastic job of explaining exactly -what mailing lists should and shouldn't do to be fully DMARC compliant. -Rather than repeat his fine work, here's a summary: +Ideally, a mailing list should forward messages without altering the +headers or body content at all. `Joe Nelson`_ does a fantastic job of +explaining exactly what mailing lists should and shouldn't do to be +fully DMARC compliant. Rather than repeat his fine work, here's a +summary: **Do** @@ -1473,7 +1543,7 @@ Navigate to Privacy Options> Sending Filters, and configure the settings below ====================================== ========== **Setting** **Value** **dmarc_moderation_action** Accept -**dmarc_quarentine_moderation_action** Yes +**dmarc_quarantine_moderation_action** Yes **dmarc_none_moderation_action** Yes ====================================== ========== @@ -1495,7 +1565,7 @@ Configure the settings below **Include RFC2369 headers** Yes **Include the list post header** Yes **Explicit reply-to address** -**First strip replyo** No +**First strip replyto** No **Reply goes to list** No munging ====================================== ========== @@ -1541,7 +1611,7 @@ Navigate to Privacy Options> Sending Filters, and configure the settings below ====================================== ========== **Setting** **Value** **dmarc_moderation_action** Munge From -**dmarc_quarentine_moderation_action** Yes +**dmarc_quarantine_moderation_action** Yes **dmarc_none_moderation_action** Yes ====================================== ========== diff --git a/parsedmarc/__init__.py b/parsedmarc/__init__.py index 5942109..ee87b33 100644 --- a/parsedmarc/__init__.py +++ b/parsedmarc/__init__.py @@ -34,7 +34,7 @@ from parsedmarc.utils import is_outlook_msg, convert_outlook_msg from parsedmarc.utils import parse_email from parsedmarc.utils import timestamp_to_human, human_timestamp_to_datetime -__version__ = "8.3.0" +__version__ = "8.3.1" logger.debug("parsedmarc v{0}".format(__version__)) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..73a8afa --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,69 @@ +[build-system] +requires = [ + "hatchling>=1.8.1", +] +build-backend = "hatchling.build" + +[project] +name = "parsedmarc" +dynamic = [ + "version", +] +description = "A Python package and CLI for parsing aggregate and forensic DMARC reports" +readme = "README.md" +license = "Apache 2.0" +authors = [ + { name = "Sean Whalen", email = "whalenster@gmail.com" }, +] +keywords = [ + "DMARC", + "parser", + "reporting", +] +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Intended Audience :: Developers", + "Intended Audience :: Information Technology", + "License :: OSI Approved :: Apache Software License", + "Operating System :: OS Independent", + "Programming Language :: Python :: 3" +] +dependencies = [ + "azure-identity>=1.8.0", + "boto3>=1.16.63", + "dateparser>=1.1.1", + "dnspython>=2.0.0", + "elasticsearch-dsl==7.4.0", + "elasticsearch<7.14.0", + "expiringdict>=1.1.4", + "geoip2>=3.0.0", + "google-api-core>=2.4.0", + "google-api-python-client>=2.35.0", + "google-auth-httplib2>=0.1.0", + "google-auth-oauthlib>=0.4.6", + "google-auth>=2.3.3", + "imapclient>=2.1.0", + "kafka-python>=1.4.4", + "lxml>=4.4.0", + "mailsuite>=1.6.1", + "msgraph-core>=0.2.2", + "publicsuffix2>=2.20190812", + "requests>=2.22.0", + "tqdm>=4.31.1", + "urllib3>=1.25.7", + "xmltodict>=0.12.0", +] + +[project.scripts] +parsedmarc = "parsedmarc.cli:_main" + +[project.urls] +Homepage = "https://domainaware.github.io/parsedmarc" + +[tool.hatch.version] +path = "parsedmarc/__init__.py" + +[tool.hatch.build.targets.sdist] +include = [ + "/parsedmarc", +] diff --git a/requirements.txt b/requirements.txt index 13864f5..fa27eea 100644 --- a/requirements.txt +++ b/requirements.txt @@ -37,3 +37,5 @@ google-api-python-client>=2.35.0 google-auth>=2.3.3 google-auth-httplib2>=0.1.0 google-auth-oauthlib>=0.4.6 +hatch>=1.5.0 +myst-parser>=0.18.0 \ No newline at end of file