Add comprehensive TypedDicts to minimize Any usage in public APIs

Co-authored-by: seanthegeek <44679+seanthegeek@users.noreply.github.com>
Fix Python 3.9 compatibility: replace pipe union syntax with Union/Optional
2026-02-18 07:26:25 +00:00 · 2025-12-17 22:28:19 +00:00 · 2025-12-17 22:18:57 +00:00 · 2025-12-17 21:45:21 +00:00 · 2025-12-17 21:42:25 +00:00 · 2025-12-17 21:36:57 +00:00
132 changed files with 75168 additions and 1371 deletions
--- a/.dockerignore
+++ b/.dockerignore
@@ -0,0 +1,5 @@
+venv/
+dist/
+build/
+test/
+parsedmarc.egg-info/
--- a/.gitattributes
+++ b/.gitattributes
@@ -0,0 +1 @@
+samples/* binary
--- a/.github/workflows/docker.yml
+++ b/.github/workflows/docker.yml
@@ -0,0 +1,55 @@
+name: Build docker image
+
+permissions:
+  contents: read
+
+on:
+  release:
+    types:
+      - published
+  push:
+    branches:
+      - master
+
+env:
+  REGISTRY: ghcr.io
+  IMAGE_NAME: ${{ github.repository }}
+
+jobs:
+  build-and-push-image:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      packages: write
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v5
+
+      - name: Docker meta
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: |
+            ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
+          # generate Docker tags based on the following events/attributes
+          tags: |
+            type=ref,event=branch
+            type=ref,event=pr
+            type=semver,pattern={{version}}
+            type=semver,pattern={{major}}.{{minor}}
+
+      - name: Log in to the Container registry
+        uses: docker/login-action@v3
+        with:
+          registry: ${{ env.REGISTRY }}
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Build and push Docker image
+        uses: docker/build-push-action@v6
+        with:
+          context: .
+          push: ${{ github.event_name == 'release' }}
+          tags:  ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
--- a/.github/workflows/python-tests.yml
+++ b/.github/workflows/python-tests.yml
@@ -0,0 +1,70 @@
+name: Python tests
+
+permissions:
+  contents: read
+
+on:
+  push:
+    branches: [ master ]
+  pull_request:
+    branches: [ master ]
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+
+    services:
+      elasticsearch:
+        image: elasticsearch:8.19.7
+        env:
+          discovery.type: single-node
+          cluster.name: parsedmarc-cluster
+          discovery.seed_hosts: elasticsearch
+          bootstrap.memory_lock: true
+          xpack.security.enabled: false
+          xpack.license.self_generated.type: basic
+        ports:
+          - 9200:9200
+          - 9300:9300
+
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
+
+    steps:
+    - uses: actions/checkout@v5
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v6
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install system dependencies
+      run: |
+        sudo apt-get -q update
+        sudo apt-get -qy install libemail-outlook-message-perl
+    - name: Install Python dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install .[build]
+    - name: Test building documentation
+      run: |
+        cd docs
+        make html
+    - name: Check code style
+      run: |
+        ruff check .
+    - name: Run unit tests
+      run: |
+        pytest --cov --cov-report=xml tests.py
+    - name: Test sample DMARC reports
+      run: |
+        pip install -e .
+        parsedmarc --debug -c ci.ini samples/aggregate/*
+        parsedmarc --debug -c ci.ini samples/forensic/*
+    - name: Test building packages
+      run: |
+        hatch build
+    - name: Upload coverage to Codecov
+      uses: codecov/codecov-action@v5
+      with:
+          token: ${{ secrets.CODECOV_TOKEN }}
--- a/.gitignore
+++ b/.gitignore
@@ -24,6 +24,7 @@ wheels/
 *.egg-info/
 .installed.cfg
 *.egg
+_tmp*

 # PyInstaller
 #  Usually these files are written by a python script from a template
@@ -45,6 +46,7 @@ nosetests.xml
 coverage.xml
 *.cover
 .hypothesis/
+.pytest_cache/

 # Translations
 *.mo
@@ -62,7 +64,7 @@ instance/
 .scrapy

 # Sphinx documentation
-docs/_build/
+docs/build/

 # PyBuilder
 target/
@@ -103,18 +105,43 @@ ENV/
 # PyCharm Project settings
 .idea/

-# I/O files
+# VS Code launch config
+#.vscode/launch.json

-*.xml
-*.zip
-*.gz
-*.json
-*.csv
+# Visual Studio Code settings
+#.vscode/
+
+# I/O files
+output/
 *.xls*

 # LibreOffice lock files
 .~*

-# ignore data files
+# Data files
 *.dat
-*.mmdb
+GeoIP*
+GeoLite*
+
+# Temp files
+tmp/
+
+# Config files
+prod*.ini
+stage*.ini
+dev*.ini
+
+# Private samples
+samples/private
+
+*.html
+*.sqlite-journal
+
+parsedmarc.ini
+scratch.py
+
+parsedmarc/resources/maps/base_reverse_dns.csv
+parsedmarc/resources/maps/unknown_base_reverse_dns.csv
+parsedmarc/resources/maps/sus_domains.csv
+parsedmarc/resources/maps/unknown_domains.txt
+*.bak
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,23 +0,0 @@
-language: python
-
-sudo: false
-
-python:
-  - '2.7'
-  - '3.4'
-  - '3.5'
-  - '3.6'
-
-# commands to install dependencies
-install:
-  - "pip install flake8 pytest-cov pytest coveralls"
-  - "pip install -r requirements.txt"
-
-# commands to run samples
-script:
-  - "flake8 *.py"
-  - "cd docs"
-  - "make html"
-  - "cd .."
-  - "python tests.py"
-  - "python setup.py bdist_wheel"
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -0,0 +1,45 @@
+{
+  // Use IntelliSense to learn about possible attributes.
+  // Hover to view descriptions of existing attributes.
+  // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+  "version": "0.2.0",
+  "configurations": [
+    {
+      "name": "Python Debugger: Current File",
+      "type": "debugpy",
+      "request": "launch",
+      "program": "${file}",
+      "console": "integratedTerminal"
+    },
+    {
+      "name": "tests.py",
+      "type": "debugpy",
+      "request": "launch",
+      "program": "tests.py",
+      "console": "integratedTerminal"
+    },
+    {
+      "name": "sample",
+      "type": "debugpy",
+      "request": "launch",
+      "module": "parsedmarc.cli",
+      "args": ["samples/private/sample"]
+    },
+    {
+      "name": "sortlists.py",
+      "type": "debugpy",
+      "request": "launch",
+      "program": "sortlists.py",
+      "cwd": "${workspaceFolder}/parsedmarc/resources/maps",
+      "console": "integratedTerminal"
+    },
+    {
+      "name": "find_unknown_base_reverse_dns.py",
+      "type": "debugpy",
+      "request": "launch",
+      "program": "find_unknown_base_reverse_dns.py",
+      "cwd": "${workspaceFolder}/parsedmarc/resources/maps",
+      "console": "integratedTerminal"
+    }
+  ]
+}
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -0,0 +1,160 @@
+{
+  "[python]": {
+    "editor.defaultFormatter": "charliermarsh.ruff",
+    "editor.formatOnSave": true,
+
+    // Let Ruff handle lint fixes + import sorting on save
+    "editor.codeActionsOnSave": {
+      "source.fixAll.ruff": "explicit",
+      "source.organizeImports.ruff": "explicit"
+    }
+  },
+    "markdownlint.config": {
+        "MD024": false
+    },
+    "cSpell.words": [
+        "adkim",
+        "akamaiedge",
+        "amsmath",
+        "andrewmcgilvray",
+        "arcname",
+        "aspf",
+        "autoclass",
+        "automodule",
+        "backported",
+        "bellsouth",
+        "boto",
+        "brakhane",
+        "Brightmail",
+        "CEST",
+        "CHACHA",
+        "checkdmarc",
+        "Codecov",
+        "confnew",
+        "dateparser",
+        "dateutil",
+        "Davmail",
+        "DBIP",
+        "dearmor",
+        "deflist",
+        "devel",
+        "DMARC",
+        "Dmarcian",
+        "dnspython",
+        "dollarmath",
+        "dpkg",
+        "exampleuser",
+        "expiringdict",
+        "fieldlist",
+        "GELF",
+        "genindex",
+        "geoip",
+        "geoipupdate",
+        "Geolite",
+        "geolocation",
+        "githubpages",
+        "Grafana",
+        "hostnames",
+        "htpasswd",
+        "httpasswd",
+        "httplib",
+        "IMAP",
+        "imapclient",
+        "infile",
+        "Interaktive",
+        "IPDB",
+        "journalctl",
+        "keepalive",
+        "keyout",
+        "keyrings",
+        "Leeman",
+        "libemail",
+        "linkify",
+        "LISTSERV",
+        "lxml",
+        "mailparser",
+        "mailrelay",
+        "mailsuite",
+        "maxdepth",
+        "MAXHEADERS",
+        "maxmind",
+        "mbox",
+        "mfrom",
+        "michaeldavie",
+        "mikesiegel",
+        "Mimecast",
+        "mitigations",
+        "MMDB",
+        "modindex",
+        "msgconvert",
+        "msgraph",
+        "MSSP",
+        "multiprocess",
+        "Munge",
+        "ndjson",
+        "newkey",
+        "Nhcm",
+        "nojekyll",
+        "nondigest",
+        "nosecureimap",
+        "nosniff",
+        "nwettbewerb",
+        "opensearch",
+        "opensearchpy",
+        "parsedmarc",
+        "passsword",
+        "Postorius",
+        "premade",
+        "procs",
+        "publicsuffix",
+        "publicsuffixlist",
+        "publixsuffix",
+        "pygelf",
+        "pypy",
+        "pytest",
+        "quickstart",
+        "Reindex",
+        "replyto",
+        "reversename",
+        "Rollup",
+        "Rpdm",
+        "SAMEORIGIN",
+        "sdist",
+        "Servernameone",
+        "setuptools",
+        "smartquotes",
+        "SMTPTLS",
+        "sortlists",
+        "sortmaps",
+        "sourcetype",
+        "STARTTLS",
+        "tasklist",
+        "timespan",
+        "tlsa",
+        "tlsrpt",
+        "toctree",
+        "TQDDM",
+        "tqdm",
+        "truststore",
+        "Übersicht",
+        "uids",
+        "Uncategorized",
+        "unparasable",
+        "uper",
+        "urllib",
+        "Valimail",
+        "venv",
+        "Vhcw",
+        "viewcode",
+        "virtualenv",
+        "WBITS",
+        "webmail",
+        "Wettbewerber",
+        "Whalen",
+        "whitespaces",
+        "xennn",
+        "xmltodict",
+        "xpack",
+        "zscholl"
+    ],
+}
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
--- a/35
+++ b/35
@@ -0,0 +1,35 @@
+ARG BASE_IMAGE=python:3.13-slim
+ARG USERNAME=parsedmarc
+ARG USER_UID=1000
+ARG USER_GID=$USER_UID
+
+## build
+
+FROM $BASE_IMAGE AS build
+
+WORKDIR /app
+
+RUN pip install hatch
+
+COPY parsedmarc/ parsedmarc/
+COPY README.md pyproject.toml ./
+
+RUN hatch build
+
+## image
+
+FROM $BASE_IMAGE
+ARG USERNAME
+ARG USER_UID
+ARG USER_GID
+
+COPY --from=build /app/dist/*.whl /tmp/dist/
+RUN set -ex; \
+    groupadd --gid ${USER_GID} ${USERNAME}; \
+    useradd --uid ${USER_UID} --gid ${USER_GID} -m ${USERNAME}; \
+    pip install /tmp/dist/*.whl; \
+    rm -rf /tmp/dist
+
+USER $USERNAME
+
+ENTRYPOINT ["parsedmarc"]
--- a/README.md
+++ b/README.md
@@ -0,0 +1,64 @@
+# parsedmarc
+
+[![Build
+Status](https://github.com/domainaware/parsedmarc/actions/workflows/python-tests.yml/badge.svg)](https://github.com/domainaware/parsedmarc/actions/workflows/python-tests.yml)
+[![Code
+Coverage](https://codecov.io/gh/domainaware/parsedmarc/branch/master/graph/badge.svg)](https://codecov.io/gh/domainaware/parsedmarc)
+[![PyPI
+Package](https://img.shields.io/pypi/v/parsedmarc.svg)](https://pypi.org/project/parsedmarc/)
+[![PyPI - Downloads](https://img.shields.io/pypi/dm/parsedmarc?color=blue)](https://pypistats.org/packages/parsedmarc)
+
+<p align="center">
+  <img src="https://raw.githubusercontent.com/domainaware/parsedmarc/refs/heads/master/docs/source/_static/screenshots/dmarc-summary-charts.png?raw=true" alt="A screenshot of DMARC summary charts in Kibana"/>
+</p>
+
+`parsedmarc` is a Python module and CLI utility for parsing DMARC
+reports. When used with Elasticsearch and Kibana (or Splunk), it works
+as a self-hosted open-source alternative to commercial DMARC report
+processing services such as Agari Brand Protection, Dmarcian, OnDMARC,
+ProofPoint Email Fraud Defense, and Valimail.
+
+> [!NOTE]
+> __Domain-based Message Authentication, Reporting, and Conformance__ (DMARC) is an email authentication protocol.
+
+## Help Wanted
+
+This project is maintained by one developer. Please consider reviewing the open
+[issues](https://github.com/domainaware/parsedmarc/issues) to see how you can
+contribute code, documentation, or user support. Assistance on the pinned
+issues would be particularly helpful.
+
+Thanks to all
+[contributors](https://github.com/domainaware/parsedmarc/graphs/contributors)!
+
+## Features
+
+- Parses draft and 1.0 standard aggregate/rua DMARC reports
+- Parses forensic/failure/ruf DMARC reports
+- Parses reports from SMTP TLS Reporting
+- Can parse reports from an inbox over IMAP, Microsoft Graph, or Gmail API
+- Transparently handles gzip or zip compressed reports
+- Consistent data structures
+- Simple JSON and/or CSV output
+- Optionally email the results
+- Optionally send the results to Elasticsearch, Opensearch, and/or Splunk, for
+  use with premade dashboards
+- Optionally send reports to Apache Kafka
+
+## Python Compatibility
+
+This project supports the following Python versions, which are either actively maintained or are the default versions
+for RHEL or Debian.
+
+| Version | Supported | Reason                                                     |
+|---------|-----------|------------------------------------------------------------|
+| < 3.6   | ❌         | End of Life (EOL)                                          |
+| 3.6     | ❌         | Used in RHEL 8, but not supported by project dependencies |
+| 3.7     | ❌         | End of Life (EOL)                                          |
+| 3.8     | ❌         | End of Life (EOL)                                          |
+| 3.9     | ✅         | Supported until August 2026 (Debian 11); May 2032 (RHEL 9) |
+| 3.10    | ✅         | Actively maintained                                        |
+| 3.11    | ✅         | Actively maintained; supported until June 2028 (Debian 12) |
+| 3.12    | ✅         | Actively maintained; supported until May 2035 (RHEL 10)    |
+| 3.13    | ✅         | Actively maintained; supported until June 2030 (Debian 13) |
+| 3.14    | ❌         | Not currently supported due to Not currently supported due to [this imapclient bug](https://github.com/mjs/imapclient/issues/618)|
--- a/README.rst
+++ b/README.rst
@@ -1,188 +0,0 @@
-checkdmarc
-==========
-
-|Build Status|
-
-``pasedmarc`` is a Python module and CLI utility for parsing aggregate DMARC reports.
-
-Features
-========
-
-* Parses draft and 1.0 standard aggregate reports
-* Transparently handles gzip or zip compressed reports
-* Consistent data structures
-* Simple JSON or CSV output
-* Python 2 and 3 support
-
-CLI help
-========
-
-::
-
-    usage: parsedmarc.py [-h] [-f FORMAT] [-o OUTPUT]
-                         [-n NAMESERVER [NAMESERVER ...]] [-t TIMEOUT] [-v]
-                         file_path [file_path ...]
-
-    Parses aggregate DMARC reports
-
-    positional arguments:
-      file_path             one or more paths of aggregate report files
-                            (compressed or uncompressed)
-
-    optional arguments:
-      -h, --help            show this help message and exit
-      -f FORMAT, --format FORMAT
-                            specify JSON or CSV output format
-      -o OUTPUT, --output OUTPUT
-                            output to a file path rather than printing to the
-                            screen
-      -n NAMESERVER [NAMESERVER ...], --nameserver NAMESERVER [NAMESERVER ...]
-                            nameservers to query
-      -t TIMEOUT, --timeout TIMEOUT
-                            number of seconds to wait for an answer from DNS
-                            (default 6.0)
-      -v, --version         show program's version number and exit
-
-
-Sample output
-=============
-
-Here are the results from parsing the `example <https://dmarc.org/wiki/FAQ#I_need_to_implement_aggregate_reports.2C_what_do_they_look_like.3F>`_
-report from the dmarc.org wiki. It's actually an older draft of the the 1.0
-report schema standardized in
-`RFC 7480 Appendix C <https://tools.ietf.org/html/rfc7489#appendix-C>`_.
-This draft schema is still in wide use.
-
-``parsedmarc`` produces consistent, normalized output, regardless of the report schema.
-
-JSON
----
-
-.. code-block:: json
-
-    {
-      "xml_schema": "draft",
-      "report_metadata": {
-        "org_name": "acme.com",
-        "org_email": "noreply-dmarc-support@acme.com",
-        "org_extra_contact_info": "http://acme.com/dmarc/support",
-        "report_id": "9391651994964116463",
-        "begin_date": "2012-04-27 20:00:00",
-        "end_date": "2012-04-28 19:59:59",
-        "errors": []
-      },
-      "policy_published": {
-        "domain": "example.com",
-        "adkim": "r",
-        "aspf": "r",
-        "p": "none",
-        "sp": "none",
-        "pct": "100",
-        "fo": "0"
-      },
-      "records": [
-        {
-          "source": {
-            "ip_address": "72.150.241.94",
-            "country": "US",
-            "reverse_dns": "adsl-72-150-241-94.shv.bellsouth.net",
-            "base_domain": "bellsouth.net"
-          },
-          "count": 2,
-          "policy_evaluated": {
-            "disposition": "none",
-            "dkim": "fail",
-            "spf": "pass",
-            "policy_override_reasons": []
-          },
-          "identifiers": {
-            "header_from": "example.com",
-            "envelope_from": "example.com",
-            "envelope_to": null
-          },
-          "auth_results": {
-            "dkim": [
-              {
-                "domain": "example.com",
-                "selector": "none",
-                "result": "fail"
-              }
-            ],
-            "spf": [
-              {
-                "domain": "example.com",
-                "scope": "mfrom",
-                "result": "pass"
-              }
-            ]
-          }
-        }
-      ]
-    }
-
-CSV
---
-
-::
-
-    xml_schema,org_name,org_email,org_extra_contact_info,report_id,begin_date,end_date,errors,domain,adkim,aspf,p,sp,pct,fo,source_ip_address,source_country,source_reverse_dns,source_base_domain,count,disposition,dkim_alignment,spf_alignment,policy_override_reasons,policy_override_comments,envelope_from,header_from,envelope_to,dkim_domains,dkim_selectors,dkim_results,spf_domains,spf_scopes,spf_results
-    draft,acme.com,noreply-dmarc-support@acme.com,http://acme.com/dmarc/support,9391651994964116463,2012-04-27 20:00:00,2012-04-28 19:59:59,[],example.com,r,r,none,none,100,0,72.150.241.94,US,adsl-72-150-241-94.shv.bellsouth.net,bellsouth.net,2,none,fail,pass,,,example.com,example.com,,example.com,none,fail,example.com,mfrom,pass
-
-What about forensic DMARC reports?
-==================================
-
-Forensic DMARC reports are emails with an attached email sample that failed a
-DMARC check. You can parse them with any email message parser, such as
-`mail-parser <https://pypi.python.org/pypi/mail-parser/>`_.
-
-Very few recipients send forensic reports, and even those who do will often
-provide only the message headers, and not the message's content, for privacy
-reasons.
-
-Installation
-============
-
-``parsedmarc`` works with Python 2 or 3, but Python 3 is preferred.
-
-On Debian or Ubuntu systems, run:
-
-.. code-block:: bash
-
-    $ sudo apt-get install python3-pip
-
-
-Python 3 installers for Windows and macOS can be found at https://www.python.org/downloads/
-
-To install or upgrade to the latest stable release of ``parsedmarc`` on macOS or Linux, run
-
-.. code-block:: bash
-
-    $ sudo -H pip3 install -U checkdmarc
-
-Or, install the latest development release directly from GitHub:
-
-.. code-block:: bash
-
-    $ sudo -H pip3 install -U git+https://github.com/domainaware/parsedmarc.git
-
-.. note::
-
-    On Windows, ``pip3`` is ``pip``, even with Python 3. So on Windows, simply
-    substitute ``pip`` as an administrator in place of ``sudo pip3``, in the above commands.
-
-
-
-Documentation
-=============
-
-https://domainaware.github.io/parsedmarc
-
-Bug reports
-===========
-
-Please report bugs on the GitHub issue tracker
-
-https://github.com/domainaware/parsedmarc/issues
-
-.. |Build Status| image:: https://travis-ci.org/domainaware/parsedmarc.svg?branch=master
-   :target: https://travis-ci.org/domainaware/parsedmarc
--- a/build.sh
+++ b/build.sh
@@ -0,0 +1,27 @@
+#!/usr/bin/env bash
+
+set -e
+
+if [ ! -d "venv" ]; then
+  virtualenv venv || exit
+fi
+
+. venv/bin/activate
+pip install .[build]
+ruff format .
+cd docs
+make clean 
+make html
+touch build/html/.nojekyll
+if [  -d "../../parsedmarc-docs" ]; then
+  cp -rf build/html/* ../../parsedmarc-docs/
+fi
+cd ..
+cd parsedmarc/resources/maps
+python3 sortlists.py
+echo "Checking for invalid UTF-8 bytes in base_reverse_dns_map.csv"
+python3 find_bad_utf8.py base_reverse_dns_map.csv
+cd ../../..
+python3 tests.py
+rm -rf dist/ build/
+hatch build
--- a/ci.ini
+++ b/ci.ini
@@ -0,0 +1,12 @@
+[general]
+save_aggregate = True
+save_forensic = True
+save_smtp_tls = True
+debug = True
+
+[elasticsearch]
+hosts = http://localhost:9200
+ssl = False
+number_of_shards=2
+number_of_replicas=2
+
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -0,0 +1,55 @@
+services:
+  elasticsearch:
+    image: docker.elastic.co/elasticsearch/elasticsearch:8.19.7
+    environment:
+      - network.host=127.0.0.1
+      - http.host=0.0.0.0
+      - node.name=elasticsearch
+      - discovery.type=single-node
+      - cluster.name=parsedmarc-cluster
+      - discovery.seed_hosts=elasticsearch
+      - bootstrap.memory_lock=true
+      - xpack.security.enabled=false
+      - xpack.license.self_generated.type=basic
+    ports:
+      - "127.0.0.1:9200:9200"
+    ulimits:
+      memlock:
+        soft: -1
+        hard: -1
+    healthcheck:
+      test:
+        [
+          "CMD-SHELL",
+          "curl -s -XGET http://localhost:9200/_cluster/health?pretty | grep status | grep -q '\\(green\\|yellow\\)'"
+        ]
+      interval: 10s
+      timeout: 10s
+      retries: 24
+
+  opensearch:
+    image: opensearchproject/opensearch:2
+    environment:
+      - network.host=127.0.0.1
+      - http.host=0.0.0.0
+      - node.name=opensearch
+      - discovery.type=single-node
+      - cluster.name=parsedmarc-cluster
+      - discovery.seed_hosts=opensearch
+      - bootstrap.memory_lock=true
+      - OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_INITIAL_ADMIN_PASSWORD}
+    ports:
+      - "127.0.0.1:9201:9200"
+    ulimits:
+      memlock:
+        soft: -1
+        hard: -1
+    healthcheck:
+      test:
+        [
+          "CMD-SHELL",
+          "curl -s -XGET http://localhost:9201/_cluster/health?pretty | grep status | grep -q '\\(green\\|yellow\\)'"
+        ]
+      interval: 10s
+      timeout: 10s
+      retries: 24
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -3,10 +3,10 @@

 # You can set these variables from the command line.
 SPHINXOPTS    =
-SPHINXBUILD   = python -msphinx
+SPHINXBUILD   = python3 -msphinx
 SPHINXPROJ    = parsedmarc
-SOURCEDIR     = .
-BUILDDIR      = _build
+SOURCEDIR     = source
+BUILDDIR      = build

 # Put it first so that "make" without argument is like "make help".
 help:
--- a/docs/_templates/placeholder
+++ b/docs/_templates/placeholder
@@ -1 +0,0 @@
-Make directory show up in git.
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -1,179 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-#
-# parsedmarc documentation build configuration file, created by
-# sphinx-quickstart on Mon Feb  5 18:25:39 2018.
-#
-# This file is execfile()d with the current directory set to its
-# containing dir.
-#
-# Note that not all possible configuration values are present in this
-# autogenerated file.
-#
-# All configuration values have a default; values that are commented out
-# serve to show the default.
-
-# If extensions (or modules to document with autodoc) are in another directory,
-# add these directories to sys.path here. If the directory is relative to the
-# documentation root, use os.path.abspath to make it absolute, like shown here.
-#
-import os
-import sys
-
-sys.path.insert(0, os.path.abspath('..'))
-
-from parsedmarc import __version__
-
-
-# -- General configuration ------------------------------------------------
-
-# If your documentation needs a minimal Sphinx version, state it here.
-#
-# needs_sphinx = '1.0'
-
-# Add any Sphinx extension module names here, as strings. They can be
-# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
-# ones.
-extensions = ['sphinx.ext.autodoc',
-              'sphinx.ext.doctest',
-              'sphinx.ext.todo',
-              'sphinx.ext.viewcode',
-              'sphinx.ext.githubpages',
-              'sphinx.ext.napoleon']
-
-# Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
-
-# The suffix(es) of source filenames.
-# You can specify multiple suffix as a list of string:
-#
-# source_suffix = ['.rst', '.md']
-source_suffix = '.rst'
-
-# The master toctree document.
-master_doc = 'index'
-
-# General information about the project.
-project = 'parsedmarc'
-copyright = '2018, Sean Whalen'
-author = 'Sean Whalen'
-
-# The version info for the project you're documenting, acts as replacement for
-# |version| and |release|, also used in various other places throughout the
-# built documents.
-#
-# The short X.Y version.
-version = __version__
-# The full version, including alpha/beta/rc tags.
-release = version
-
-# The language for content autogenerated by Sphinx. Refer to documentation
-# for a list of supported languages.
-#
-# This is also used if you do content translation via gettext catalogs.
-# Usually you set "language" from the command line for these cases.
-language = None
-
-# List of patterns, relative to source directory, that match files and
-# directories to ignore when looking for source files.
-# This patterns also effect to html_static_path and html_extra_path
-exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
-
-# The name of the Pygments (syntax highlighting) style to use.
-pygments_style = 'sphinx'
-
-todo_include_todos = False
-
-
-# -- Options for HTML output ----------------------------------------------
-
-# The theme to use for HTML and HTML Help pages.  See the documentation for
-# a list of builtin themes.
-#
-html_theme = 'sphinx_rtd_theme'
-
-# Theme options are theme-specific and customize the look and feel of a theme
-# further.  For a list of options available for each theme, see the
-# documentation.
-#
-# html_theme_options = {}
-
-# Add any paths that contain custom static files (such as style sheets) here,
-# relative to this directory. They are copied after the builtin static files,
-# so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
-
-# Custom sidebar templates, must be a dictionary that maps document names
-# to template names.
-#
-# This is required for the alabaster theme
-# refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars
-html_sidebars = {
-    '**': [
-        'about.html',
-        'navigation.html',
-        'relations.html',  # needs 'show_related': True theme option to display
-        'searchbox.html',
-        'donate.html',
-    ]
-}
-
-
-# -- Options for HTMLHelp output ------------------------------------------
-
-# Output file base name for HTML help builder.
-htmlhelp_basename = 'parsedmarcdoc'
-
-
-# -- Options for LaTeX output ---------------------------------------------
-
-latex_elements = {
-    # The paper size ('letterpaper' or 'a4paper').
-    #
-    # 'papersize': 'letterpaper',
-
-    # The font size ('10pt', '11pt' or '12pt').
-    #
-    # 'pointsize': '10pt',
-
-    # Additional stuff for the LaTeX preamble.
-    #
-    # 'preamble': '',
-
-    # Latex figure (float) alignment
-    #
-    # 'figure_align': 'htbp',
-}
-
-# Grouping the document tree into LaTeX files. List of tuples
-# (source start file, target name, title,
-#  author, documentclass [howto, manual, or own class]).
-latex_documents = [
-    (master_doc, 'parsedmarc.tex', 'parsedmarc Documentation',
-     'parsedmarc', 'manual'),
-]
-
-
-# -- Options for manual page output ---------------------------------------
-
-# One entry per manual page. List of tuples
-# (source start file, name, description, authors, manual section).
-man_pages = [
-    (master_doc, 'parsedmarc', 'parsedmarc Documentation',
-     [author], 1)
-]
-
-
-# -- Options for Texinfo output -------------------------------------------
-
-# Grouping the document tree into Texinfo files. List of tuples
-# (source start file, target name, title, author,
-#  dir menu entry, description, category)
-texinfo_documents = [
-    (master_doc, 'parsedmarc', 'parsedmarc Documentation',
-     author, 'parsedmarc', 'One line description of project.',
-     'Miscellaneous'),
-]
-
-
-
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -1,204 +0,0 @@
-.. parsedmarc documentation master file, created by
-   sphinx-quickstart on Mon Feb  5 18:25:39 2018.
-   You can adapt this file completely to your liking, but it should at least
-   contain the root `toctree` directive.
-
-Welcome to parsedmarc's documentation!
-======================================
-
-|Build Status|
-
-``pasedmarc`` is a Python module and CLI utility for parsing aggregate DMARC reports.
-
-Features
-========
-
-* Parses draft and 1.0 standard aggregate reports
-* Transparently handles gzip or zip compressed reports
-* Consistent data structures
-* Simple JSON or CSV output
-* Python 2 and 3 support
-
-CLI help
-========
-
-::
-
-    usage: parsedmarc.py [-h] [-f FORMAT] [-o OUTPUT]
-                         [-n NAMESERVER [NAMESERVER ...]] [-t TIMEOUT] [-v]
-                         file_path [file_path ...]
-
-    Parses aggregate DMARC reports
-
-    positional arguments:
-      file_path             one or more paths of aggregate report files
-                            (compressed or uncompressed)
-
-    optional arguments:
-      -h, --help            show this help message and exit
-      -f FORMAT, --format FORMAT
-                            specify JSON or CSV output format
-      -o OUTPUT, --output OUTPUT
-                            output to a file path rather than printing to the
-                            screen
-      -n NAMESERVER [NAMESERVER ...], --nameserver NAMESERVER [NAMESERVER ...]
-                            nameservers to query
-      -t TIMEOUT, --timeout TIMEOUT
-                            number of seconds to wait for an answer from DNS
-                            (default 6.0)
-      -v, --version         show program's version number and exit
-
-
-Sample output
-=============
-
-Here are the results from parsing the `example <https://dmarc.org/wiki/FAQ#I_need_to_implement_aggregate_reports.2C_what_do_they_look_like.3F>`_
-report from the dmarc.org wiki. It's actually an older draft of the the 1.0
-report schema standardized in
-`RFC 7480 Appendix C <https://tools.ietf.org/html/rfc7489#appendix-C>`_.
-This draft schema is still in wide use.
-
-``parsedmarc`` produces consistent, normalized output, regardless of the report schema.
-
-JSON
----
-
-.. code-block:: json
-
-    {
-      "xml_schema": "draft",
-      "report_metadata": {
-        "org_name": "acme.com",
-        "org_email": "noreply-dmarc-support@acme.com",
-        "org_extra_contact_info": "http://acme.com/dmarc/support",
-        "report_id": "9391651994964116463",
-        "begin_date": "2012-04-27 20:00:00",
-        "end_date": "2012-04-28 19:59:59",
-        "errors": []
-      },
-      "policy_published": {
-        "domain": "example.com",
-        "adkim": "r",
-        "aspf": "r",
-        "p": "none",
-        "sp": "none",
-        "pct": "100",
-        "fo": "0"
-      },
-      "records": [
-        {
-          "source": {
-            "ip_address": "72.150.241.94",
-            "country": "US",
-            "reverse_dns": "adsl-72-150-241-94.shv.bellsouth.net",
-            "base_domain": "bellsouth.net"
-          },
-          "count": 2,
-          "policy_evaluated": {
-            "disposition": "none",
-            "dkim": "fail",
-            "spf": "pass",
-            "policy_override_reasons": []
-          },
-          "identifiers": {
-            "header_from": "example.com",
-            "envelope_from": "example.com",
-            "envelope_to": null
-          },
-          "auth_results": {
-            "dkim": [
-              {
-                "domain": "example.com",
-                "selector": "none",
-                "result": "fail"
-              }
-            ],
-            "spf": [
-              {
-                "domain": "example.com",
-                "scope": "mfrom",
-                "result": "pass"
-              }
-            ]
-          }
-        }
-      ]
-    }
-
-CSV
---
-
-::
-
-    xml_schema,org_name,org_email,org_extra_contact_info,report_id,begin_date,end_date,errors,domain,adkim,aspf,p,sp,pct,fo,source_ip_address,source_country,source_reverse_dns,source_base_domain,count,disposition,dkim_alignment,spf_alignment,policy_override_reasons,policy_override_comments,envelope_from,header_from,envelope_to,dkim_domains,dkim_selectors,dkim_results,spf_domains,spf_scopes,spf_results
-    draft,acme.com,noreply-dmarc-support@acme.com,http://acme.com/dmarc/support,9391651994964116463,2012-04-27 20:00:00,2012-04-28 19:59:59,[],example.com,r,r,none,none,100,0,72.150.241.94,US,adsl-72-150-241-94.shv.bellsouth.net,bellsouth.net,2,none,fail,pass,,,example.com,example.com,,example.com,none,fail,example.com,mfrom,pass
-
-What about forensic DMARC reports?
-==================================
-
-Forensic DMARC reports are emails with an attached email sample that failed a
-DMARC check. You can parse them with any email message parser, such as
-`mail-parser <https://pypi.python.org/pypi/mail-parser/>`_.
-
-Very few recipients send forensic reports, and even those who do will often
-provide only the message headers, and not the message's content, for privacy
-reasons.
-
-Bug reports
-===========
-
-Please report bugs on the GitHub issue tracker
-
-https://github.com/domainaware/parsedmarc/issues
-
-Installation
-============
-
-``parsedmarc`` works with Python 2 or 3, but Python 3 is preferred.
-
-On Debian or Ubuntu systems, run:
-
-.. code-block:: bash
-
-    $ sudo apt-get install python3-pip
-
-
-Python 3 installers for Windows and macOS can be found at https://www.python.org/downloads/
-
-To install or upgrade to the latest stable release of ``parsedmarc`` on macOS or Linux, run
-
-.. code-block:: bash
-
-    $ sudo -H pip3 install -U checkdmarc
-
-Or, install the latest development release directly from GitHub:
-
-.. code-block:: bash
-
-    $ sudo -H pip3 install -U git+https://github.com/domainaware/parsedmarc.git
-
-.. note::
-
-    On Windows, ``pip3`` is ``pip``, even with Python 3. So on Windows, simply
-    substitute ``pip`` as an administrator in place of ``sudo pip3``, in the above commands.
-
-API
-===
-
-.. automodule:: parsedmarc
-   :members:
-
-.. toctree::
-   :maxdepth: 2
-   :caption: Contents:
-
-Indices and tables
-==================
-
-* :ref:`genindex`
-* :ref:`modindex`
-* :ref:`search`
-
-
-.. |Build Status| image:: https://travis-ci.org/domainaware/parsedmarc.svg?branch=master
-   :target: https://travis-ci.org/domainaware/parsedmarc
--- a/docs/make.bat
+++ b/docs/make.bat
@@ -7,8 +7,8 @@ REM Command file for Sphinx documentation
 if "%SPHINXBUILD%" == "" (
 	set SPHINXBUILD=python -msphinx
 )
-set SOURCEDIR=.
-set BUILDDIR=_build
+set SOURCEDIR=source
+set BUILDDIR=build
 set SPHINXPROJ=parsedmarc

 if "%1" == "" goto help
--- a/docs/source/_static/screenshots/confirm-overwrite.png
+++ b/docs/source/_static/screenshots/confirm-overwrite.png
--- a/docs/source/_static/screenshots/define-dmarc-aggregate.png
+++ b/docs/source/_static/screenshots/define-dmarc-aggregate.png
--- a/docs/source/_static/screenshots/define-dmarc-forensic.png
+++ b/docs/source/_static/screenshots/define-dmarc-forensic.png
--- a/docs/source/_static/screenshots/dmarc-aggregate-time-field.png
+++ b/docs/source/_static/screenshots/dmarc-aggregate-time-field.png
--- a/docs/source/_static/screenshots/dmarc-forensic-time-field.png
+++ b/docs/source/_static/screenshots/dmarc-forensic-time-field.png
--- a/docs/source/_static/screenshots/dmarc-summary-charts.png
+++ b/docs/source/_static/screenshots/dmarc-summary-charts.png
--- a/docs/source/_static/screenshots/dmarc-summary-details.png
+++ b/docs/source/_static/screenshots/dmarc-summary-details.png
--- a/docs/source/_static/screenshots/dmarc-summary-map.png
+++ b/docs/source/_static/screenshots/dmarc-summary-map.png
--- a/docs/source/_static/screenshots/index-pattern-conflicts.png
+++ b/docs/source/_static/screenshots/index-pattern-conflicts.png
--- a/docs/source/_static/screenshots/saved-objects.png
+++ b/docs/source/_static/screenshots/saved-objects.png
--- a/docs/source/_templates/placeholder
+++ b/docs/source/_templates/placeholder
--- a/docs/source/api.md
+++ b/docs/source/api.md
@@ -0,0 +1,41 @@
+# API reference
+
+## parsedmarc
+
+```{eval-rst}
+.. automodule:: parsedmarc
+   :members:
+```
+
+## parsedmarc.elastic
+
+```{eval-rst}
+.. automodule:: parsedmarc.elastic
+   :members:
+```
+
+## parsedmarc.opensearch
+
+```{eval-rst}
+.. automodule:: parsedmarc.opensearch
+   :members:
+```
+
+## parsedmarc.splunk
+
+```{eval-rst}
+.. automodule:: parsedmarc.splunk
+   :members:
+```
+
+## parsedmarc.utils
+
+```{eval-rst}
+.. automodule:: parsedmarc.utils
+   :members:
+```
+
+## Indices and tables
+
+- {ref}`genindex`
+- {ref}`modindex`
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -0,0 +1,94 @@
+# Configuration file for the Sphinx documentation builder.
+#
+# This file only contains a selection of the most common options. For a full
+# list see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+
+# -- Path setup --------------------------------------------------------------
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#
+import os
+import sys
+
+sys.path.insert(0, os.path.abspath(os.path.join("..", "..")))
+
+from parsedmarc import __version__
+
+# -- Project information -----------------------------------------------------
+
+project = "parsedmarc"
+copyright = "2018 - 2025, Sean Whalen and contributors"
+author = "Sean Whalen and contributors"
+
+# The version info for the project you're documenting, acts as replacement for
+# |version| and |release|, also used in various other places throughout the
+# built documents.
+#
+# The short X.Y version.
+version = __version__
+# The full version, including alpha/beta/rc tags.
+release = version
+
+# -- General configuration ---------------------------------------------------
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = [
+    "sphinx.ext.autodoc",
+    "sphinx.ext.doctest",
+    "sphinx.ext.todo",
+    "sphinx.ext.viewcode",
+    "sphinx.ext.githubpages",
+    "sphinx.ext.napoleon",
+    "myst_parser",
+]
+
+myst_enable_extensions = [
+    "amsmath",
+    "colon_fence",
+    "deflist",
+    "dollarmath",
+    "fieldlist",
+    "html_admonition",
+    "html_image",
+    "linkify",
+    "replacements",
+    "smartquotes",
+    "strikethrough",
+    "substitution",
+    "tasklist",
+]
+
+myst_heading_anchors = 3
+autoclass_content = "init"
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ["_templates"]
+
+
+# The suffixes of source filenames.
+source_suffix = [".rst", ".md"]
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This pattern also affects html_static_path and html_extra_path.
+exclude_patterns = []
+
+
+# -- Options for HTML output -------------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+#
+html_theme = "sphinx_rtd_theme"
+
+html_theme_options = {"globaltoc_collapse": False}
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ["_static"]
--- a/docs/source/contributing.md
+++ b/docs/source/contributing.md
@@ -0,0 +1,7 @@
+# Contributing to parsedmarc
+
+## Bug reports
+
+Please report bugs on the GitHub issue tracker
+
+<https://github.com/domainaware/parsedmarc/issues>
--- a/docs/source/davmail.md
+++ b/docs/source/davmail.md
@@ -0,0 +1,189 @@
+# Accessing an inbox using OWA/EWS
+
+:::{note}
+Starting in 8.0.0, parsedmarc supports accessing Microsoft/Office 365
+inboxes via the Microsoft Graph API, which is preferred over Davmail.
+:::
+
+Some organizations do not allow IMAP or the Microsoft Graph API,
+and only support Exchange Web Services (EWS)/Outlook Web Access (OWA).
+In that case, Davmail will need to be set up
+as a local EWS/OWA IMAP gateway. It can even work where
+[Modern Auth/multi-factor authentication] is required.
+
+To do this, download the latest `davmail-version.zip` from
+<https://sourceforge.net/projects/davmail/files/>
+
+Extract the zip using the `unzip` command.
+
+Install Java:
+
+```bash
+sudo apt-get install default-jre-headless
+```
+
+Configure Davmail by creating a `davmail.properties` file
+
+```properties
+# DavMail settings, see http://davmail.sourceforge.net/ for documentation
+
+#############################################################
+# Basic settings
+
+# Server or workstation mode
+davmail.server=true
+
+# connection mode auto, EWS or WebDav
+davmail.enableEws=auto
+
+# base Exchange OWA or EWS url
+davmail.url=https://outlook.office365.com/EWS/Exchange.asmx
+
+# Listener ports
+davmail.imapPort=1143
+
+#############################################################
+# Network settings
+
+# Network proxy settings
+davmail.enableProxy=false
+davmail.useSystemProxies=false
+davmail.proxyHost=
+davmail.proxyPort=
+davmail.proxyUser=
+davmail.proxyPassword=
+
+# proxy exclude list
+davmail.noProxyFor=
+
+# block remote connection to DavMail
+davmail.allowRemote=false
+
+# bind server sockets to the loopback address
+davmail.bindAddress=127.0.0.1
+
+# disable SSL for specified listeners
+davmail.ssl.nosecureimap=true
+
+# Send keepalive character during large folder and messages download
+davmail.enableKeepalive=true
+
+# Message count limit on folder retrieval
+davmail.folderSizeLimit=0
+
+#############################################################
+# IMAP settings
+
+# Delete messages immediately on IMAP STORE \Deleted flag
+davmail.imapAutoExpunge=true
+
+# Enable IDLE support, set polling delay in minutes
+davmail.imapIdleDelay=1
+
+# Always reply to IMAP RFC822.SIZE requests with Exchange approximate
+# message size for performance reasons
+davmail.imapAlwaysApproxMsgSize=true
+
+# Client connection timeout in seconds - default 300, 0 to disable
+davmail.clientSoTimeout=0
+
+#############################################################
+```
+
+## Running DavMail as a systemd service
+
+Use systemd to run `davmail` as a service.
+
+Create a system user
+
+```bash
+sudo useradd davmail -r -s /bin/false
+```
+
+Protect the `davmail` configuration file from prying eyes
+
+```bash
+sudo chown root:davmail /opt/davmail/davmail.properties
+sudo chmod u=rw,g=r,o= /opt/davmail/davmail.properties
+```
+
+Create the service configuration file
+
+```bash
+sudo nano /etc/systemd/system/davmail.service
+```
+
+```ini
+[Unit]
+Description=DavMail gateway service
+Documentation=https://sourceforge.net/projects/davmail/
+Wants=network-online.target
+After=syslog.target network.target
+
+[Service]
+ExecStart=/opt/davmail/davmail /opt/davmail/davmail.properties
+User=davmail
+Group=davmail
+Restart=always
+RestartSec=5m
+
+[Install]
+WantedBy=multi-user.target
+```
+
+Then, enable the service
+
+```bash
+sudo systemctl daemon-reload
+sudo systemctl enable parsedmarc.service
+sudo service davmail restart
+```
+
+:::{note}
+You must also run the above commands whenever you edit
+`davmail.service`.
+:::
+
+:::{warning}
+Always restart the service every time you upgrade to a new version of
+`davmail`:
+
+```bash
+sudo service davmail restart
+```
+
+:::
+
+To check the status of the service, run:
+
+```bash
+service davmail status
+```
+
+:::{note}
+In the event of a crash, systemd will restart the service after 5
+minutes, but the `service davmail status` command will only show the
+logs for the current process. To vew the logs for previous runs as
+well as the current process (newest to oldest), run:
+
+```bash
+journalctl -u davmail.service -r
+```
+
+:::
+
+## Configuring parsedmarc for DavMail
+
+Because you are interacting with DavMail server over the loopback
+(i.e. `127.0.0.1`), add the following options to `parsedmarc.ini`
+config file:
+
+```ini
+[imap]
+host=127.0.0.1
+port=1143
+ssl=False
+watch=True
+```
+
+[modern auth/multi-factor authentication]: https://davmail.sourceforge.net/faq.html
--- a/docs/source/dmarc.md
+++ b/docs/source/dmarc.md
@@ -0,0 +1,71 @@
+# Understanding DMARC
+
+## Resources
+
+### DMARC guides
+
+- [Demystifying DMARC] - A complete guide to SPF, DKIM, and DMARC
+
+[demystifying dmarc]: https://seanthegeek.net/459/demystifying-dmarc/
+
+### SPF and DMARC record validation
+
+If you are looking for SPF and DMARC record validation and parsing,
+check out the sister project,
+[checkdmarc](https://domainaware.github.io/checkdmarc/).
+
+### Lookalike domains
+
+DMARC protects against domain spoofing, not lookalike domains. for open source
+lookalike domain monitoring, check out [DomainAware](https://github.com/seanthegeek/domainaware).
+
+## DMARC Alignment Guide
+
+DMARC ensures that SPF and DKM authentication mechanisms actually authenticate
+against the same domain that the end user sees.
+
+A message passes a DMARC check by passing DKIM or SPF, **as long as the related
+indicators are also in alignment**.
+
+```{eval-rst}
+-----------------------+-----------------------+-----------------------+
+|                       | **DKIM**              | **SPF**               |
+-----------------------+-----------------------+-----------------------+
+| **Passing**           | The signature in the  | The mail server's IP  |
+|                       | DKIM header is        | address is listed in  |
+|                       | validated using a     | the SPF record of the |
+|                       | public key that is    | domain in the SMTP    |
+|                       | published as a DNS    | envelope's mail from  |
+|                       | record of the domain  | header                |
+|                       | name specified in the |                       |
+|                       | signature             |                       |
+-----------------------+-----------------------+-----------------------+
+| **Alignment**         | The signing domain    | The domain in the     |
+|                       | aligns with the       | SMTP envelope's mail  |
+|                       | domain in the         | from header aligns    |
+|                       | message's from header | with the domain in    |
+|                       |                       | the message's from    |
+|                       |                       | header                |
+-----------------------+-----------------------+-----------------------+
+```
+
+## What if a sender won't support DKIM/DMARC?
+
+1. Some vendors don't know about DMARC yet; ask about SPF and DKIM/email
+   authentication.
+2. Check if they can send through your email relays instead of theirs.
+3. Do they really need to spoof your domain? Why not use the display
+   name instead?
+4. Worst case, have that vendor send email as a specific subdomain of
+   your domain (e.g. `noreply@news.example.com`), and then create
+   separate SPF and DMARC records on `news.example.com`, and set
+   `p=none` in that DMARC record.
+
+:::{warning}
+Do not alter the `p` or `sp` values of the DMARC record on the
+Top-Level Domain (TLD) – that would leave you vulnerable to
+spoofing of your TLD and/or any subdomain.
+:::
+
+```{include} mailing-lists.md
+```
--- a/docs/source/elasticsearch.md
+++ b/docs/source/elasticsearch.md
@@ -0,0 +1,236 @@
+# Elasticsearch and Kibana
+
+To set up visual dashboards of DMARC data, install Elasticsearch and Kibana.
+
+:::{note}
+Elasticsearch and Kibana 6 or later are required
+:::
+
+## Installation
+
+On Debian/Ubuntu based systems, run:
+
+```bash
+sudo apt-get install -y apt-transport-https
+wget -qO - https://artifacts.elastic.co/GPG-KEY-elasticsearch | sudo gpg --dearmor -o /usr/share/keyrings/elasticsearch-keyring.gpg
+echo "deb [signed-by=/usr/share/keyrings/elasticsearch-keyring.gpg] https://artifacts.elastic.co/packages/8.x/apt stable main" | sudo tee /etc/apt/sources.list.d/elastic-8.x.list
+sudo apt-get update
+sudo apt-get install -y elasticsearch kibana
+```
+
+For CentOS, RHEL, and other RPM systems, follow the Elastic RPM guides for
+[Elasticsearch] and [Kibana].
+
+:::{note}
+Previously, the default JVM heap size for Elasticsearch was very small (1g),
+which will cause it to crash under a heavy load. To fix this, increase the
+minimum and maximum JVM heap sizes in `/etc/elasticsearch/jvm.options` to
+more reasonable levels, depending on your server's resources.
+
+Make sure the system has at least 2 GB more RAM than the assigned JVM
+heap size.
+
+Always set the minimum and maximum JVM heap sizes to the same
+value.
+
+For example, to set a 4 GB heap size, set
+
+```bash
+-Xms4g
+-Xmx4g
+```
+
+See <https://www.elastic.co/guide/en/elasticsearch/reference/current/important-settings.html#heap-size-settings>
+for more information.
+:::
+
+```bash
+sudo systemctl daemon-reload
+sudo systemctl enable elasticsearch.service
+sudo systemctl enable kibana.service
+sudo systemctl start elasticsearch.service
+sudo systemctl start kibana.service
+```
+
+As of Elasticsearch 8.7, activate secure mode (xpack.security.*.ssl)
+
+```bash
+sudo vim /etc/elasticsearch/elasticsearch.yml
+```
+
+Add the following configuration
+
+```text
+# Enable security features
+xpack.security.enabled: true
+xpack.security.enrollment.enabled: true
+# Enable encryption for HTTP API client connections, such as Kibana, Logstash, and Agents
+xpack.security.http.ssl:
+  enabled: true
+  keystore.path: certs/http.p12
+# Enable encryption and mutual authentication between cluster nodes
+xpack.security.transport.ssl:
+  enabled: true
+  verification_mode: certificate
+  keystore.path: certs/transport.p12
+  truststore.path: certs/transport.p12
+```
+
+```bash
+sudo systemctl restart elasticsearch
+```
+
+To create a self-signed certificate, run:
+
+```bash
+openssl req -x509 -nodes -days 365 -newkey rsa:4096 -keyout kibana.key -out kibana.crt
+```
+
+Or, to create a Certificate Signing Request (CSR) for a CA, run:
+
+```bash
+openssl req -newkey rsa:4096-nodes -keyout kibana.key -out kibana.csr
+```
+
+Fill in the prompts. Watch out for Common Name (e.g. server FQDN or YOUR
+domain name), which is the IP address or domain name that you will use to access Kibana. it is the most important field.
+
+If you generated a CSR, remove the CSR after you have your certs
+
+```bash
+rm -f kibana.csr
+```
+
+Move the keys into place and secure them:
+
+```bash
+sudo mv kibana.* /etc/kibana
+sudo chmod 660 /etc/kibana/kibana.key
+```
+
+Activate the HTTPS server in Kibana
+
+```bash
+sudo vim /etc/kibana/kibana.yml
+```
+
+Add the following configuration
+
+```text
+server.host: "SERVER_IP"
+server.publicBaseUrl: "https://SERVER_IP"
+server.ssl.enabled: true
+server.ssl.certificate: /etc/kibana/kibana.crt
+server.ssl.key: /etc/kibana/kibana.key
+```
+
+:::{note}
+For more security, you can configure Kibana to use a local network connexion
+to elasticsearch :
+```text
+elasticsearch.hosts: ['https://SERVER_IP:9200']
+```
+=> 
+```text
+elasticsearch.hosts: ['https://127.0.0.1:9200']
+```
+:::
+
+```bash
+sudo systemctl restart kibana
+```
+
+Enroll Kibana in Elasticsearch
+
+```bash
+sudo /usr/share/elasticsearch/bin/elasticsearch-create-enrollment-token -s kibana
+```
+
+Then access to your web server at `https://SERVER_IP:5601`, accept the self-signed
+certificate and paste the token in the "Enrollment token" field.
+
+```bash
+sudo /usr/share/kibana/bin/kibana-verification-code
+```
+
+Then put the verification code to your web browser.
+
+End Kibana configuration
+
+```bash
+sudo /usr/share/elasticsearch/bin/elasticsearch-setup-passwords interactive
+sudo /usr/share/kibana/bin/kibana-encryption-keys generate
+sudo vim /etc/kibana/kibana.yml
+```
+
+Add previously generated encryption keys
+
+```text
+xpack.encryptedSavedObjects.encryptionKey: xxxx...xxxx
+xpack.reporting.encryptionKey: xxxx...xxxx
+xpack.security.encryptionKey: xxxx...xxxx
+```
+
+```bash
+sudo systemctl restart kibana
+sudo systemctl restart elasticsearch
+```
+
+Now that Elasticsearch is up and running, use `parsedmarc` to send data to
+it.
+
+Download (right-click the link and click save as) [export.ndjson].
+
+Connect to kibana using the "elastic" user and the password you previously provide
+on the console ("End Kibana configuration" part).
+
+Import `export.ndjson` the Saved Objects tab of the Stack management
+page of Kibana. (Hamburger menu -> "Management" -> "Stack Management" ->
+"Kibana" -> "Saved Objects")
+
+It will give you the option to overwrite existing saved dashboards or
+visualizations, which could be used to restore them if you or someone else
+breaks them, as there are no permissions/access controls in Kibana without
+the commercial [X-Pack].
+
+```{image} _static/screenshots/saved-objects.png
+:align: center
+:alt: A screenshot of setting the Saved Objects Stack management UI in Kibana
+:target: _static/screenshots/saved-objects.png
+```
+
+```{image} _static/screenshots/confirm-overwrite.png
+:align: center
+:alt: A screenshot of the overwrite conformation prompt
+:target: _static/screenshots/confirm-overwrite.png
+```
+
+## Upgrading Kibana index patterns
+
+`parsedmarc` 5.0.0 makes some changes to the way data is indexed in
+Elasticsearch. if you are upgrading from a previous release of
+`parsedmarc`, you need to complete the following steps to replace the
+Kibana index patterns with versions that match the upgraded indexes:
+
+1. Login in to Kibana, and click on Management
+2. Under Kibana, click on Saved Objects
+3. Check the checkboxes for the `dmarc_aggregate` and `dmarc_forensic`
+   index patterns
+4. Click Delete
+5. Click Delete on the conformation message
+6. Download (right-click the link and click save as)
+   the latest version of [export.ndjson]
+7. Import `export.ndjson` by clicking Import from the Kibana
+   Saved Objects page
+
+## Records retention
+
+Starting in version 5.0.0, `parsedmarc` stores data in a separate
+index for each day to make it easy to comply with records
+retention regulations such as GDPR. For more information,
+check out the Elastic guide to [managing time-based indexes efficiently](https://www.elastic.co/blog/managing-time-based-indices-efficiently).
+
+[elasticsearch]: https://www.elastic.co/guide/en/elasticsearch/reference/current/rpm.html
+[export.ndjson]: https://raw.githubusercontent.com/domainaware/parsedmarc/master/kibana/export.ndjson
+[kibana]: https://www.elastic.co/guide/en/kibana/current/rpm.html
+[x-pack]: https://www.elastic.co/products/x-pack
--- a/docs/source/example.ini
+++ b/docs/source/example.ini
@@ -0,0 +1,31 @@
+# This is an example comment
+
+[general]
+save_aggregate = True
+save_forensic = True
+
+[imap]
+host = imap.example.com
+user = dmarcresports@example.com
+password = $uperSecure
+watch = True
+
+[elasticsearch]
+hosts = 127.0.0.1:9200
+ssl = False
+
+[splunk_hec]
+url = https://splunkhec.example.com
+token = HECTokenGoesHere
+index = email
+
+[s3]
+bucket = my-bucket
+path = parsedmarc
+
+[gmail_api]
+credentials_file = /etc/example/credentials.json
+token_file = /etc/example/token.json
+include_spam_trash = True
+paginate_messages = True
+scopes = https://www.googleapis.com/auth/gmail.modify
--- a/docs/source/index.md
+++ b/docs/source/index.md
@@ -0,0 +1,84 @@
+# parsedmarc documentation - Open source DMARC report analyzer and visualizer
+
+[![Build
+Status](https://github.com/domainaware/parsedmarc/actions/workflows/python-tests.yml/badge.svg)](https://github.com/domainaware/parsedmarc/actions/workflows/python-tests.yml)
+[![Code
+Coverage](https://codecov.io/gh/domainaware/parsedmarc/branch/master/graph/badge.svg)](https://codecov.io/gh/domainaware/parsedmarc)
+[![PyPI
+Package](https://img.shields.io/pypi/v/parsedmarc.svg)](https://pypi.org/project/parsedmarc/)
+[![PyPI - Downloads](https://img.shields.io/pypi/dm/parsedmarc?color=blue)](https://pypistats.org/packages/parsedmarc)
+
+:::{note}
+**Help Wanted**
+
+This is a project is maintained by one developer.
+Please consider reviewing the open [issues] to see how you can contribute code, documentation, or user support.
+Assistance on the pinned issues would be particularly helpful.
+
+Thanks to all [contributors]!
+:::
+
+```{image} _static/screenshots/dmarc-summary-charts.png
+:align: center
+:alt: A screenshot of DMARC summary charts in Kibana
+:scale: 50 %
+:target: _static/screenshots/dmarc-summary-charts.png
+```
+
+`parsedmarc` is a Python module and CLI utility for parsing DMARC reports.
+When used with Elasticsearch and Kibana (or Splunk), or with OpenSearch and Grafana, it works as a self-hosted
+open source alternative to commercial DMARC report processing services such
+as Agari Brand Protection, Dmarcian, OnDMARC, ProofPoint Email Fraud Defense,
+and Valimail.
+
+## Features
+
+- Parses draft and 1.0 standard aggregate/rua DMARC reports
+- Parses forensic/failure/ruf DMARC reports
+- Parses reports from SMTP TLS Reporting
+- Can parse reports from an inbox over IMAP, Microsoft Graph, or Gmail API
+- Transparently handles gzip or zip compressed reports
+- Consistent data structures
+- Simple JSON and/or CSV output
+- Optionally email the results
+- Optionally send the results to Elasticsearch, Opensearch, and/or Splunk, for use
+    with premade dashboards
+- Optionally send reports to Apache Kafka
+
+## Python Compatibility
+
+This project supports the following Python versions, which are either actively maintained or are the default versions
+for RHEL or Debian.
+
+| Version | Supported | Reason                                                     |
+|---------|-----------|------------------------------------------------------------|
+| < 3.6   | ❌         | End of Life (EOL)                                          |
+| 3.6     | ❌         | Used in RHEL 8, but not supported by project dependencies |
+| 3.7     | ❌         | End of Life (EOL)                                          |
+| 3.8     | ❌         | End of Life (EOL)                                          |
+| 3.9     | ✅         | Supported until August 2026 (Debian 11); May 2032 (RHEL 9) |
+| 3.10    | ✅         | Actively maintained                                        |
+| 3.11    | ✅         | Actively maintained; supported until June 2028 (Debian 12) |
+| 3.12    | ✅         | Actively maintained; supported until May 2035 (RHEL 10)    |
+| 3.13    | ✅         | Actively maintained; supported until June 2030 (Debian 13) |
+| 3.14    | ❌         | Not currently supported due to [this imapclient bug](https://github.com/mjs/imapclient/issues/618)|
+
+```{toctree}
+:caption: 'Contents'
+:maxdepth: 2
+
+installation
+usage
+output
+elasticsearch
+opensearch
+kibana
+splunk
+davmail
+dmarc
+contributing
+api
+```
+
+[contributors]: https://github.com/domainaware/parsedmarc/graphs/contributors
+[issues]: https://github.com/domainaware/parsedmarc/issues
--- a/docs/source/installation.md
+++ b/docs/source/installation.md
@@ -0,0 +1,205 @@
+# Installation
+
+## Prerequisites
+
+`parsedmarc` works with Python 3 only.
+
+### Testing multiple report analyzers
+
+If you would like to test parsedmarc and another report processing
+solution at the same time, you can have up to two `mailto` URIs in each of the rua and ruf
+tags in your DMARC record, separated by commas.
+
+### Using a web proxy
+
+If your system is behind a web proxy, you need to configure your system
+to use that proxy. To do this, edit `/etc/environment` and add your
+proxy details there, for example:
+
+```bash
+http_proxy=http://user:password@prox-server:3128
+https_proxy=https://user:password@prox-server:3128
+ftp_proxy=http://user:password@prox-server:3128
+```
+
+Or if no credentials are needed:
+
+```bash
+http_proxy=http://prox-server:3128
+https_proxy=https://prox-server:3128
+ftp_proxy=http://prox-server:3128
+```
+
+This will set the proxy up for use system-wide, including for `parsedmarc`.
+
+### Using Microsoft Exchange
+
+If your mail server is Microsoft Exchange, ensure that it is patched to at
+least:
+
+- Exchange Server 2010 Update Rollup 22 ([KB4295699])
+- Exchange Server 2013 Cumulative Update 21 ([KB4099855])
+- Exchange Server 2016 Cumulative Update 11 ([KB4134118])
+
+### geoipupdate setup
+
+:::{note}
+Starting in `parsedmarc` 7.1.0, a static copy of the
+[IP to Country Lite database] from IPDB is distributed with
+`parsedmarc`, under the terms of the
+[Creative Commons Attribution 4.0 International License].
+as a fallback if the [MaxMind GeoLite2 Country database] is not
+installed. However, `parsedmarc` cannot install updated versions of
+these databases as they are released, so MaxMind's databases and the
+[geoipupdate] tool is still the preferable solution.
+
+The location of the database file can be overridden by using the
+`ip_db_path` setting.
+:::
+
+On Debian 10 (Buster) or later, run:
+
+```bash
+sudo apt-get install -y geoipupdate
+```
+
+:::{note}
+[Component "contrib"] is required in your apt sources.
+:::
+
+On Ubuntu systems run:
+
+```bash
+sudo add-apt-repository ppa:maxmind/ppa
+sudo apt update
+sudo apt install -y geoipupdate
+```
+
+On CentOS or RHEL systems, run:
+
+```bash
+sudo dnf install -y geoipupdate
+```
+
+The latest builds for Linux, macOS, and Windows can be downloaded
+from the [geoipupdate releases page on GitHub].
+
+On December 30th, 2019, MaxMind started requiring free accounts to
+access the free Geolite2 databases, in order 
+[to comply with various privacy regulations].
+
+Start by [registering for a free GeoLite2 account], and signing in.
+
+Then, navigate to the [License Keys] page under your account,
+and create a new license key for the version of
+`geoipupdate` that was installed.
+
+:::{warning}
+The configuration file format is different for older (i.e. \<=3.1.1) and newer (i.e. >=3.1.1) versions
+of `geoipupdate`. Be sure to select the correct version for your system.
+:::
+
+:::{note}
+To check the version of `geoipupdate` that is installed, run:
+
+```bash
+geoipupdate -V
+```
+
+:::
+
+You can use `parsedmarc` as the description for the key.
+
+Once you have generated a key, download the config pre-filled
+configuration file. This file should be saved at `/etc/GeoIP.conf`
+on Linux or macOS systems, or at
+`%SystemDrive%\ProgramData\MaxMind\GeoIPUpdate\GeoIP.conf` on
+Windows systems.
+
+Then run
+
+```bash
+sudo geoipupdate
+```
+
+To download the databases for the first time.
+
+The GeoLite2 Country, City, and ASN databases are updated weekly,
+every Tuesday. `geoipupdate` can be run weekly by adding a cron
+job or scheduled task.
+
+More information about `geoipupdate` can be found at the
+[MaxMind geoipupdate page].
+
+## Installing parsedmarc
+
+On Debian or Ubuntu systems, run:
+
+```bash
+sudo apt-get install -y python3-pip python3-virtualenv python3-dev libxml2-dev libxslt-dev
+```
+
+On CentOS or RHEL systems, run:
+
+```bash
+sudo dnf install -y python39 python3-virtualenv python3-setuptools python3-devel libxml2-devel libxslt-devel
+```
+
+Python 3 installers for Windows and macOS can be found at
+<https://www.python.org/downloads/>.
+
+Create a system user
+
+```bash
+sudo mkdir /opt
+sudo useradd parsedmarc -r -s /bin/false -m -b /opt
+```
+
+Install parsedmarc in a virtualenv
+
+```bash
+sudo -u parsedmarc virtualenv /opt/parsedmarc/venv
+```
+
+CentOS/RHEL 8 systems use Python 3.6 by default, so on those systems
+explicitly tell `virtualenv` to use `python3.9` instead
+
+```bash
+sudo -u parsedmarc virtualenv -p python3.9  /opt/parsedmarc/venv
+```
+
+Activate the virtualenv
+
+```bash
+source /opt/parsedmarc/venv/bin/activate
+```
+
+To install or upgrade `parsedmarc` inside the virtualenv, run:
+
+```bash
+sudo -u parsedmarc /opt/parsedmarc/venv/bin/pip install -U parsedmarc
+```
+
+## Optional dependencies
+
+If you would like to be able to parse emails saved from Microsoft
+Outlook (i.e. OLE .msg files), install `msgconvert`:
+
+On Debian or Ubuntu systems, run:
+
+```bash
+sudo apt-get install libemail-outlook-message-perl
+```
+
+[KB4295699]: https://support.microsoft.com/KB/4295699
+[KB4099855]: https://support.microsoft.com/KB/4099855
+[KB4134118]: https://support.microsoft.com/kb/4134118
+[Component "contrib"]: https://wiki.debian.org/SourcesList#Component
+[geoipupdate]: https://github.com/maxmind/geoipupdate
+[geoipupdate releases page on github]: https://github.com/maxmind/geoipupdate/releases
+[ip to country lite database]: https://db-ip.com/db/download/ip-to-country-lite
+[license keys]: https://www.maxmind.com/en/accounts/current/license-key
+[maxmind geoipupdate page]: https://dev.maxmind.com/geoip/updating-databases/
+[maxmind geolite2 country database]: https://dev.maxmind.com/geoip/geolite2-free-geolocation-data
+[registering for a free geolite2 account]: https://www.maxmind.com/en/geolite2/signup
+[to comply with various privacy regulations]: https://blog.maxmind.com/2019/12/18/significant-changes-to-accessing-and-using-geolite2-databases/
--- a/docs/source/kibana.md
+++ b/docs/source/kibana.md
@@ -0,0 +1,87 @@
+
+# Using the Kibana dashboards
+
+The Kibana DMARC dashboards are a human-friendly way to understand the
+results from incoming DMARC reports.
+
+:::{note}
+The default dashboard is DMARC Summary. To switch between dashboards,
+click on the Dashboard link on the left side menu of Kibana.
+:::
+
+## DMARC Summary
+
+As the name suggests, this dashboard is the best place to start
+reviewing your aggregate DMARC data.
+
+Across the top of the dashboard, three pie charts display the percentage of
+alignment pass/fail for SPF, DKIM, and DMARC. Clicking on any chart segment
+will filter for that value.
+
+:::{note}
+Messages should not be considered malicious just because they failed to pass
+DMARC; especially if you have just started collecting data. It may be a
+legitimate service that needs SPF and DKIM configured correctly.
+:::
+
+Start by filtering the results to only show failed DKIM alignment. While DMARC
+passes if a message passes SPF or DKIM alignment, only DKIM alignment remains
+valid when a message is forwarded without changing the from address, which is
+often caused by a mailbox forwarding rule. This is because DKIM signatures are
+part of the message headers, whereas SPF relies on SMTP session headers.
+
+Underneath the pie charts. you can see graphs of DMARC passage and message
+disposition over time.
+
+Under the graphs you will find the most useful data tables on the dashboard. On
+the left, there is a list of organizations that are sending you DMARC reports.
+In the center, there is a list of sending servers grouped by the base domain
+in their reverse DNS. On the right, there is a list of email from domains,
+sorted by message volume.
+
+By hovering your mouse over a data table value and using the magnifying glass
+icons, you can filter on our filter out different values. Start by looking at
+the Message Sources by Reverse DNS table. Find a sender that you recognize,
+such as an email marketing service, hover over it, and click on the plus (+)
+magnifying glass icon, to add a filter that only shows results for that sender.
+Now, look at the Message From Header table to the right. That shows you the
+domains that a sender is sending as, which might tell you which brand/business
+is using a particular service. With that information, you can contact them and
+have them set up DKIM.
+
+:::{note}
+If you have a lot of B2C customers, you may see a high volume of emails as
+your domains coming from consumer email services, such as Google/Gmail and
+Yahoo! This occurs when customers have mailbox rules in place that forward
+emails from an old account to a new account, which is why DKIM
+authentication is so important, as mentioned earlier. Similar patterns may
+be observed with businesses who send from reverse DNS addressees of
+parent, subsidiary, and outdated brands.
+:::
+
+Further down the dashboard, you can filter by source country or source IP
+address.
+
+Tables showing SPF and DKIM alignment details are located under the IP address
+table.
+
+:::{note}
+Previously, the alignment tables were included in a separate dashboard
+called DMARC Alignment Failures. That dashboard has been consolidated into
+the DMARC Summary dashboard. To view failures only, use the pie chart.
+:::
+
+Any other filters work the same way. You can also add your own custom temporary
+filters by clicking on Add Filter at the upper right of the page.
+
+## DMARC Forensic Samples
+
+The DMARC Forensic Samples dashboard contains information on DMARC forensic
+reports (also known as failure reports or ruf reports). These reports contain
+samples of emails that have failed to pass DMARC.
+
+:::{note}
+Most recipients do not send forensic/failure/ruf reports at all to avoid
+privacy leaks. Some recipients (notably Chinese webmail services) will only
+supply the headers of sample emails. Very few provide the entire email.
+:::
--- a/docs/source/mailing-lists.md
+++ b/docs/source/mailing-lists.md
@@ -0,0 +1,206 @@
+## What about mailing lists?
+
+When you deploy DMARC on your domain, you might find that messages
+relayed by mailing lists are failing DMARC, most likely because the mailing
+list is spoofing your from address, and modifying the subject,
+footer, or other part of the message, thereby breaking the
+DKIM signature.
+
+### Mailing list best practices
+
+Ideally, a mailing list should forward messages without altering the
+headers or body content at all. [Joe Nelson] does a fantastic job of
+explaining exactly what mailing lists should and shouldn't do to be
+fully DMARC compliant. Rather than repeat his fine work, here's a
+summary:
+
+#### Do
+
+- Retain headers from the original message
+
+- Add [RFC 2369] List-Unsubscribe headers to outgoing messages, instead of
+  adding unsubscribe links to the body
+
+> List-Unsubscribe: <https://list.example.com/unsubscribe-link>
+
+- Add [RFC 2919] List-Id headers instead of modifying the subject
+
+  > List-Id: Example Mailing List <list.example.com>
+
+Modern mail clients and webmail services generate unsubscribe buttons based on
+these headers.
+
+#### Do not
+
+- Remove or modify any existing headers from the original message, including
+  From, Date, Subject, etc.
+- Add to or remove content from the message body, **including traditional
+  disclaimers and unsubscribe footers**
+
+In addition to complying with DMARC, this configuration ensures that Reply
+and Reply All actions work like they would with any email message. Reply
+replies to the message sender, and Reply All replies to the sender and the
+list.
+
+Even without a subject prefix or body footer, mailing list users can still
+tell that a message came from the mailing list, because the message was sent
+to the mailing list post address, and not their email address.
+
+Configuration steps for common mailing list platforms are listed below.
+
+#### Mailman 2
+
+Navigate to General Settings, and configure the settings below
+
+```{eval-rst}
+============================ ==========
+**Setting**                  **Value**
+**subject_prefix**
+**from_is_list**             No
+**first_strip_reply_to**     No
+**reply_goes_to_list**       Poster
+**include_rfc2369_headers**  Yes
+**include_list_post_header** Yes
+**include_sender_header**    No
+============================ ==========
+```
+
+Navigate to Non-digest options, and configure the settings below
+
+```{eval-rst}
+=================== ==========
+**Setting**         **Value**
+**msg_header**
+**msg_footer**
+**scrub_nondigest**  No
+=================== ==========
+```
+
+Navigate to Privacy Options> Sending Filters, and configure the settings below
+
+```{eval-rst}
+====================================== ==========
+**Setting**                            **Value**
+**dmarc_moderation_action**            Accept
+**dmarc_quarantine_moderation_action** Yes
+**dmarc_none_moderation_action**       Yes
+====================================== ==========
+```
+
+#### Mailman 3
+
+Navigate to Settings> List Identity
+
+Make Subject prefix blank.
+
+Navigate to Settings> Alter Messages
+
+Configure the settings below
+
+```{eval-rst}
+====================================== ==========
+**Setting**                            **Value**
+**Convert html to plaintext**          No
+**Include RFC2369 headers**            Yes
+**Include the list post header**       Yes
+**Explicit reply-to address**
+**First strip replyto**                 No
+**Reply goes to list**                 No munging
+====================================== ==========
+```
+
+Navigate to Settings> DMARC Mitigation
+
+Configure the settings below
+
+```{eval-rst}
+================================== ===============================
+**Setting**                            **Value**
+**DMARC mitigation action**            No DMARC mitigations
+**DMARC mitigate unconditionally** No
+================================== ===============================
+```
+
+Create a blank footer template for your mailing list to remove the message
+footer. Unfortunately, the Postorius mailing list admin UI will not allow you
+to create an empty template, so you'll have to create one using the system's
+command line instead, for example:
+
+```bash
+touch var/templates/lists/list.example.com/en/list:member:regular:footer
+```
+
+Where `list.example.com` the list ID, and `en` is the language.
+
+Then restart mailman core.
+
+#### LISTSERV
+
+[LISTSERV 16.0-2017a] and higher will rewrite the From header for domains
+that enforce with a DMARC quarantine or reject policy.
+
+Some additional steps are needed for Linux hosts.
+
+#### Workarounds
+
+If a mailing list must go **against** best practices and
+modify the message (e.g. to add a required legal footer), the mailing
+list administrator must configure the list to replace the From address of the
+message (also known as munging) with the address of the mailing list, so they
+no longer spoof email addresses with domains protected by DMARC.
+
+Configuration steps for common mailing list platforms are listed below.
+
+##### Mailman 2
+
+Navigate to Privacy Options> Sending Filters, and configure the settings below
+
+```{eval-rst}
+====================================== ==========
+**Setting**                            **Value**
+**dmarc_moderation_action**            Munge From
+**dmarc_quarantine_moderation_action** Yes
+**dmarc_none_moderation_action**       Yes
+====================================== ==========
+```
+
+:::{note}
+Message wrapping could be used as the DMARC mitigation action instead. In
+that case, the original message is added as an attachment to the mailing
+list message, but that could interfere with inbox searching, or mobile
+clients.
+
+On the other hand, replacing the From address might cause users to
+accidentally reply to the entire list, when they only intended to reply to
+the original sender.
+
+Choose the option that best fits your community.
+:::
+
+##### Mailman 3
+
+In the DMARC Mitigations tab of the Settings page, configure the settings below
+
+```{eval-rst}
+================================== ===============================
+**Setting**                            **Value**
+**DMARC mitigation action**            Replace From: with list address
+**DMARC mitigate unconditionally** No
+================================== ===============================
+```
+
+:::{note}
+Message wrapping could be used as the DMARC mitigation action instead. In
+that case, the original message is added as an attachment to the mailing
+list message, but that could interfere with inbox searching, or mobile
+clients.
+
+On the other hand, replacing the From address might cause users to
+accidentally reply to the entire list, when they only intended to reply to
+the original sender.
+:::
+
+[joe nelson]: https://begriffs.com/posts/2018-09-18-dmarc-mailing-list.html
+[listserv 16.0-2017a]: https://www.lsoft.com/news/dmarc-issue1-2018.asp
+[rfc 2369]: https://tools.ietf.org/html/rfc2369
+[rfc 2919]: https://tools.ietf.org/html/rfc2919
--- a/docs/source/opensearch.md
+++ b/docs/source/opensearch.md
@@ -0,0 +1,14 @@
+# OpenSearch and Grafana
+
+To set up visual dashboards of DMARC data, install OpenSearch and Grafana.
+
+## Installation
+
+OpenSearch: https://opensearch.org/docs/latest/install-and-configure/install-opensearch/index/
+Grafana: https://grafana.com/docs/grafana/latest/setup-grafana/installation/
+
+## Records retention
+
+Starting in version 5.0.0, `parsedmarc` stores data in a separate
+index for each day to make it easy to comply with records
+retention regulations such as GDPR.
--- a/docs/source/output.md
+++ b/docs/source/output.md
@@ -0,0 +1,241 @@
+# Sample outputs
+
+## Sample aggregate report output
+
+Here are the results from parsing the [example](https://dmarc.org/wiki/FAQ#I_need_to_implement_aggregate_reports.2C_what_do_they_look_like.3F)
+report from the dmarc.org wiki. It's actually an older draft of
+the 1.0 report schema standardized in
+[RFC 7480 Appendix C](https://tools.ietf.org/html/rfc7489#appendix-C).
+This draft schema is still in wide use.
+
+`parsedmarc` produces consistent, normalized output, regardless
+of the report schema.
+
+### JSON aggregate report
+
+```json
+{
+  "xml_schema": "draft",
+  "report_metadata": {
+    "org_name": "acme.com",
+    "org_email": "noreply-dmarc-support@acme.com",
+    "org_extra_contact_info": "http://acme.com/dmarc/support",
+    "report_id": "9391651994964116463",
+    "begin_date": "2012-04-27 20:00:00",
+    "end_date": "2012-04-28 19:59:59",
+    "timespan_requires_normalization": false,
+    "original_timespan_seconds": 86399,
+    "errors": []
+  },
+  "policy_published": {
+    "domain": "example.com",
+    "adkim": "r",
+    "aspf": "r",
+    "p": "none",
+    "sp": "none",
+    "pct": "100",
+    "fo": "0"
+  },
+  "records": [
+    {
+      "source": {
+        "ip_address": "72.150.241.94",
+        "country": "US",
+        "reverse_dns": null,
+        "base_domain": null,
+        "name": null,
+        "type": null
+      },
+      "count": 2,
+      "alignment": {
+        "spf": true,
+        "dkim": false,
+        "dmarc": true
+      },
+      "policy_evaluated": {
+        "disposition": "none",
+        "dkim": "fail",
+        "spf": "pass",
+        "policy_override_reasons": []
+      },
+      "identifiers": {
+        "header_from": "example.com",
+        "envelope_from": "example.com",
+        "envelope_to": null
+      },
+      "auth_results": {
+        "dkim": [
+          {
+            "domain": "example.com",
+            "selector": "none",
+            "result": "fail"
+          }
+        ],
+        "spf": [
+          {
+            "domain": "example.com",
+            "scope": "mfrom",
+            "result": "pass"
+          }
+        ]
+      },
+      "normalized_timespan": false,
+      "interval_begin": "2012-04-28 00:00:00",
+      "interval_end": "2012-04-28 23:59:59"
+    }
+  ]
+}
+```
+
+### CSV aggregate report
+
+```text
+xml_schema,org_name,org_email,org_extra_contact_info,report_id,begin_date,end_date,normalized_timespan,errors,domain,adkim,aspf,p,sp,pct,fo,source_ip_address,source_country,source_reverse_dns,source_base_domain,source_name,source_type,count,spf_aligned,dkim_aligned,dmarc_aligned,disposition,policy_override_reasons,policy_override_comments,envelope_from,header_from,envelope_to,dkim_domains,dkim_selectors,dkim_results,spf_domains,spf_scopes,spf_results
+draft,acme.com,noreply-dmarc-support@acme.com,http://acme.com/dmarc/support,9391651994964116463,2012-04-28 00:00:00,2012-04-28 23:59:59,False,,example.com,r,r,none,none,100,0,72.150.241.94,US,,,,,2,True,False,True,none,,,example.com,example.com,,example.com,none,fail,example.com,mfrom,pass
+draft,acme.com,noreply-dmarc-support@acme.com,http://acme.com/dmarc/support,9391651994964116463,2012-04-28 00:00:00,2012-04-28 23:59:59,False,,example.com,r,r,none,none,100,0,72.150.241.94,US,,,,,2,True,False,True,none,,,example.com,example.com,,example.com,none,fail,example.com,mfrom,pass
+
+```
+
+## Sample forensic report output
+
+Thanks to GitHub user [xennn](https://github.com/xennn) for the anonymized
+[forensic report email sample](<https://github.com/domainaware/parsedmarc/raw/master/samples/forensic/DMARC%20Failure%20Report%20for%20domain.de%20(mail-from%3Dsharepoint%40domain.de%2C%20ip%3D10.10.10.10).eml>).
+
+### JSON forensic report
+
+```json
+{
+     "feedback_type": "auth-failure",
+     "user_agent": "Lua/1.0",
+     "version": "1.0",
+     "original_mail_from": "sharepoint@domain.de",
+     "original_rcpt_to": "peter.pan@domain.de",
+     "arrival_date": "Mon, 01 Oct 2018 11:20:27 +0200",
+     "message_id": "<38.E7.30937.BD6E1BB5@ mailrelay.de>",
+     "authentication_results": "dmarc=fail (p=none, dis=none) header.from=domain.de",
+     "delivery_result": "policy",
+     "auth_failure": [
+       "dmarc"
+     ],
+     "reported_domain": "domain.de",
+     "arrival_date_utc": "2018-10-01 09:20:27",
+     "source": {
+       "ip_address": "10.10.10.10",
+       "country": null,
+       "reverse_dns": null,
+       "base_domain": null
+     },
+     "authentication_mechanisms": [],
+     "original_envelope_id": null,
+     "dkim_domain": null,
+     "sample_headers_only": false,
+     "sample": "Received: from Servernameone.domain.local (Servernameone.domain.local [10.10.10.10])\n\tby  mailrelay.de (mail.DOMAIN.de) with SMTP id 38.E7.30937.BD6E1BB5; Mon,  1 Oct 2018 11:20:27 +0200 (CEST)\nDate: 01 Oct 2018 11:20:27 +0200\nMessage-ID: <38.E7.30937.BD6E1BB5@ mailrelay.de>\nTo: <peter.pan@domain.de>\nfrom: \"=?utf-8?B?SW50ZXJha3RpdmUgV2V0dGJld2VyYmVyLcOcYmVyc2ljaHQ=?=\" <sharepoint@domain.de>\nSubject: Subject\nMIME-Version: 1.0\nX-Mailer: Microsoft SharePoint Foundation 2010\nContent-Type: text/html; charset=utf-8\nContent-Transfer-Encoding: quoted-printable\n\n<html><head><base href=3D'\nwettbewerb' /></head><body><!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 3.2//EN\"=\n><HTML><HEAD><META NAME=3D\"Generator\" CONTENT=3D\"MS Exchange Server version=\n 08.01.0240.003\"></html>\n",
+     "parsed_sample": {
+       "from": {
+         "display_name": "Interaktive Wettbewerber-Übersicht",
+         "address": "sharepoint@domain.de",
+         "local": "sharepoint",
+         "domain": "domain.de"
+       },
+       "to_domains": [
+         "domain.de"
+       ],
+       "to": [
+         {
+           "display_name": null,
+           "address": "peter.pan@domain.de",
+           "local": "peter.pan",
+           "domain": "domain.de"
+         }
+       ],
+       "subject": "Subject",
+       "timezone": "+2",
+       "mime-version": "1.0",
+       "date": "2018-10-01 09:20:27",
+       "content-type": "text/html; charset=utf-8",
+       "x-mailer": "Microsoft SharePoint Foundation 2010",
+       "body": "<html><head><base href='\nwettbewerb' /></head><body><!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 3.2//EN\"><HTML><HEAD><META NAME=\"Generator\" CONTENT=\"MS Exchange Server version 08.01.0240.003\"></html>",
+       "received": [
+         {
+           "from": "Servernameone.domain.local Servernameone.domain.local 10.10.10.10",
+           "by": "mailrelay.de mail.DOMAIN.de",
+           "with": "SMTP id 38.E7.30937.BD6E1BB5",
+           "date": "Mon, 1 Oct 2018 11:20:27 +0200 CEST",
+           "hop": 1,
+           "date_utc": "2018-10-01 09:20:27",
+           "delay": 0
+         }
+       ],
+       "content-transfer-encoding": "quoted-printable",
+       "message-id": "<38.E7.30937.BD6E1BB5@ mailrelay.de>",
+       "has_defects": false,
+       "headers": {
+         "Received": "from Servernameone.domain.local (Servernameone.domain.local [10.10.10.10])\n\tby  mailrelay.de (mail.DOMAIN.de) with SMTP id 38.E7.30937.BD6E1BB5; Mon,  1 Oct 2018 11:20:27 +0200 (CEST)",
+         "Date": "01 Oct 2018 11:20:27 +0200",
+         "Message-ID": "<38.E7.30937.BD6E1BB5@ mailrelay.de>",
+         "To": "<peter.pan@domain.de>",
+         "from": "\"Interaktive Wettbewerber-Übersicht\" <sharepoint@domain.de>",
+         "Subject": "Subject",
+         "MIME-Version": "1.0",
+         "X-Mailer": "Microsoft SharePoint Foundation 2010",
+         "Content-Type": "text/html; charset=utf-8",
+         "Content-Transfer-Encoding": "quoted-printable"
+       },
+       "reply_to": [],
+       "cc": [],
+       "bcc": [],
+       "attachments": [],
+       "filename_safe_subject": "Subject"
+     }
+   }
+```
+
+### CSV forensic report
+
+```text
+feedback_type,user_agent,version,original_envelope_id,original_mail_from,original_rcpt_to,arrival_date,arrival_date_utc,subject,message_id,authentication_results,dkim_domain,source_ip_address,source_country,source_reverse_dns,source_base_domain,delivery_result,auth_failure,reported_domain,authentication_mechanisms,sample_headers_only
+auth-failure,Lua/1.0,1.0,,sharepoint@domain.de,peter.pan@domain.de,"Mon, 01 Oct 2018 11:20:27 +0200",2018-10-01 09:20:27,Subject,<38.E7.30937.BD6E1BB5@ mailrelay.de>,"dmarc=fail (p=none, dis=none) header.from=domain.de",,10.10.10.10,,,,policy,dmarc,domain.de,,False
+```
+
+### JSON SMTP TLS report
+
+```json
+[
+  {
+    "organization_name": "Example Inc.",
+    "begin_date": "2024-01-09T00:00:00Z",
+    "end_date": "2024-01-09T23:59:59Z",
+    "report_id": "2024-01-09T00:00:00Z_example.com",
+    "policies": [
+      {
+        "policy_domain": "example.com",
+        "policy_type": "sts",
+        "policy_strings": [
+          "version: STSv1",
+          "mode: testing",
+          "mx: example.com",
+          "max_age: 86400"
+        ],
+        "successful_session_count": 0,
+        "failed_session_count": 3,
+        "failure_details": [
+          {
+            "result_type": "validation-failure",
+            "failed_session_count": 2,
+            "sending_mta_ip": "209.85.222.201",
+            "receiving_ip": "173.212.201.41",
+            "receiving_mx_hostname": "example.com"
+          },
+          {
+            "result_type": "validation-failure",
+            "failed_session_count": 1,
+            "sending_mta_ip": "209.85.208.176",
+            "receiving_ip": "173.212.201.41",
+            "receiving_mx_hostname": "example.com"
+          }
+        ]
+      }
+    ]
+  }
+]
+```
--- a/docs/source/splunk.md
+++ b/docs/source/splunk.md
@@ -0,0 +1,22 @@
+# Splunk
+
+Starting in version 4.3.0 `parsedmarc` supports sending aggregate and/or
+forensic DMARC data to a Splunk [HTTP Event collector (HEC)].
+
+The project repository contains [XML files] for premade Splunk
+dashboards for aggregate and forensic DMARC reports.
+
+Copy and paste the contents of each file into a separate Splunk
+dashboard XML editor.
+
+:::{warning}
+Change all occurrences of `index="email"` in the XML to
+match your own index name.
+:::
+
+The Splunk dashboards display the same content and layout as the
+Kibana dashboards, although the Kibana dashboards have slightly
+easier and more flexible filtering options.
+
+[xml files]: https://github.com/domainaware/parsedmarc/tree/master/splunk
+[http event collector (hec)]: http://docs.splunk.com/Documentation/Splunk/latest/Data/AboutHEC
--- a/docs/source/usage.md
+++ b/docs/source/usage.md
@@ -0,0 +1,553 @@
+# Using parsedmarc
+
+## CLI help
+
+```text
+usage: parsedmarc [-h] [-c CONFIG_FILE] [--strip-attachment-payloads] [-o OUTPUT]
+                  [--aggregate-json-filename AGGREGATE_JSON_FILENAME] [--forensic-json-filename FORENSIC_JSON_FILENAME]
+                  [--smtp-tls-json-filename SMTP_TLS_JSON_FILENAME] [--aggregate-csv-filename AGGREGATE_CSV_FILENAME]
+                  [--forensic-csv-filename FORENSIC_CSV_FILENAME] [--smtp-tls-csv-filename SMTP_TLS_CSV_FILENAME]
+                  [-n NAMESERVERS [NAMESERVERS ...]] [-t DNS_TIMEOUT] [--offline] [-s] [-w] [--verbose] [--debug]
+                  [--log-file LOG_FILE] [--no-prettify-json] [-v]
+                  [file_path ...]
+
+Parses DMARC reports
+
+positional arguments:
+  file_path             one or more paths to aggregate or forensic report files, emails, or mbox files'
+
+options:
+  -h, --help            show this help message and exit
+  -c CONFIG_FILE, --config-file CONFIG_FILE
+                        a path to a configuration file (--silent implied)
+  --strip-attachment-payloads
+                        remove attachment payloads from forensic report output
+  -o OUTPUT, --output OUTPUT
+                        write output files to the given directory
+  --aggregate-json-filename AGGREGATE_JSON_FILENAME
+                        filename for the aggregate JSON output file
+  --forensic-json-filename FORENSIC_JSON_FILENAME
+                        filename for the forensic JSON output file
+  --smtp-tls-json-filename SMTP_TLS_JSON_FILENAME
+                        filename for the SMTP TLS JSON output file
+  --aggregate-csv-filename AGGREGATE_CSV_FILENAME
+                        filename for the aggregate CSV output file
+  --forensic-csv-filename FORENSIC_CSV_FILENAME
+                        filename for the forensic CSV output file
+  --smtp-tls-csv-filename SMTP_TLS_CSV_FILENAME
+                        filename for the SMTP TLS CSV output file
+  -n NAMESERVERS [NAMESERVERS ...], --nameservers NAMESERVERS [NAMESERVERS ...]
+                        nameservers to query
+  -t DNS_TIMEOUT, --dns_timeout DNS_TIMEOUT
+                        number of seconds to wait for an answer from DNS (default: 2.0)
+  --offline             do not make online queries for geolocation or DNS
+  -s, --silent          only print errors
+  -w, --warnings        print warnings in addition to errors
+  --verbose             more verbose output
+  --debug               print debugging information
+  --log-file LOG_FILE   output logging to a file
+  --no-prettify-json    output JSON in a single line without indentation
+  -v, --version         show program's version number and exit
+```
+
+:::{note}
+Starting in `parsedmarc` 6.0.0, most CLI options were moved to a
+configuration file, described below.
+:::
+
+## Configuration file
+
+`parsedmarc` can be configured by supplying the path to an INI file
+
+```bash
+parsedmarc -c /etc/parsedmarc.ini
+```
+
+For example
+
+```ini
+# This is an example comment
+
+[general]
+save_aggregate = True
+save_forensic = True
+
+[imap]
+host = imap.example.com
+user = dmarcresports@example.com
+password = $uperSecure
+
+[mailbox]
+watch = True
+delete = False
+
+[elasticsearch]
+hosts = 127.0.0.1:9200
+ssl = False
+
+[opensearch]
+hosts = https://admin:admin@127.0.0.1:9200
+ssl = True
+
+[splunk_hec]
+url = https://splunkhec.example.com
+token = HECTokenGoesHere
+index = email
+
+[s3]
+bucket = my-bucket
+path = parsedmarc
+
+[syslog]
+server = localhost
+port = 514
+
+[gelf]
+host = logger
+port = 12201
+mode = tcp
+
+[webhook]
+aggregate_url = https://aggregate_url.example.com
+forensic_url = https://forensic_url.example.com
+smtp_tls_url = https://smtp_tls_url.example.com
+timeout = 60
+```
+
+The full set of configuration options are:
+
+- `general`
+  - `save_aggregate` - bool: Save aggregate report data to
+      Elasticsearch, Splunk and/or S3
+  - `save_forensic` - bool: Save forensic report data to
+      Elasticsearch, Splunk and/or S3
+  - `save_smtp_tls` - bool: Save SMTP-STS report data to
+      Elasticsearch, Splunk and/or S3
+  - `index_prefix_domain_map` -  bool: A path mapping of Opensearch/Elasticsearch index prefixes to domain names
+  - `strip_attachment_payloads` - bool: Remove attachment
+      payloads from results
+  - `silent` - bool: Set this to `False` to output results to STDOUT
+  - `output` - str: Directory to place JSON and CSV files in.  This is required if you set either of the JSON output file options.
+  - `aggregate_json_filename` - str: filename for the aggregate
+      JSON output file
+  - `forensic_json_filename` - str: filename for the forensic
+      JSON output file
+  - `ip_db_path` - str: An optional custom path to a MMDB file
+      from MaxMind or DBIP
+  - `offline` - bool: Do not use online queries for geolocation
+      or DNS
+  - `always_use_local_files` - Disables the download of the reverse DNS map
+  - `local_reverse_dns_map_path` - Overrides the default local file path to use for the reverse DNS map
+  - `reverse_dns_map_url` - Overrides the default download URL for the reverse DNS map
+  - `nameservers` - str: A comma separated list of
+      DNS resolvers (Default: `[Cloudflare's public resolvers]`)
+  - `dns_test_address` - str: a dummy address used for DNS pre-flight checks
+      (Default: 1.1.1.1)
+  - `dns_timeout` - float: DNS timeout period
+  - `debug` - bool: Print debugging messages
+  - `silent` - bool: Only print errors (Default: `True`)
+  - `log_file` - str: Write log messages to a file at this path
+  - `n_procs` - int: Number of process to run in parallel when
+      parsing in CLI mode (Default: `1`)
+
+    :::{note}
+    Setting this to a number larger than one can improve
+    performance when processing thousands of files
+    :::
+
+- `mailbox`
+  - `reports_folder` - str: The mailbox folder (or label for
+      Gmail) where the incoming reports can be found
+      (Default: `INBOX`)
+  - `archive_folder` - str: The mailbox folder (or label for
+      Gmail) to sort processed emails into (Default: `Archive`)
+  - `watch` - bool: Use the IMAP `IDLE` command to process
+      messages as they arrive or poll MS Graph for new messages
+  - `delete` - bool: Delete messages after processing them,
+      instead of archiving them
+  - `test` - bool: Do not move or delete messages
+  - `batch_size` - int: Number of messages to read and process
+      before saving. Default `10`. Use `0` for no limit.
+  - `check_timeout` - int: Number of seconds to wait for a IMAP
+      IDLE response or the number of seconds until the next
+      mail check (Default: `30`)
+  - `since` - str: Search for messages since certain time. (Examples: `5m|3h|2d|1w`) 
+      Acceptable units - {"m":"minutes", "h":"hours", "d":"days", "w":"weeks"}. 
+      Defaults to `1d` if incorrect value is provided.
+- `imap`
+  - `host` - str: The IMAP server hostname or IP address
+  - `port` - int: The IMAP server port (Default: `993`)
+
+    :::{note}
+    `%` characters must be escaped with another `%` character,
+    so use `%%` wherever a `%` character is used.
+    :::
+
+    :::{note}
+    Starting in version 8.0.0, most options from the `imap`
+    section have been moved to the `mailbox` section.
+    :::
+
+    :::{note}
+    If your host recommends another port, still try 993
+    :::
+
+  - `ssl` - bool: Use an encrypted SSL/TLS connection
+      (Default: `True`)
+  - `skip_certificate_verification` - bool: Skip certificate
+      verification (not recommended)
+  - `user` - str: The IMAP user
+  - `password` - str: The IMAP password
+- `msgraph`
+  - `auth_method` - str: Authentication method, valid types are
+      `UsernamePassword`, `DeviceCode`, or `ClientSecret`
+      (Default: `UsernamePassword`).
+  - `user` - str: The M365 user, required when the auth method is
+      UsernamePassword
+  - `password` - str: The user password, required when the auth
+      method is UsernamePassword
+  - `client_id` - str: The app registration's client ID
+  - `client_secret` - str: The app registration's secret
+  - `tenant_id` - str: The Azure AD tenant ID. This is required
+      for all auth methods except UsernamePassword.
+  - `mailbox` - str: The mailbox name. This defaults to the
+      current user if using the UsernamePassword auth method, but
+      could be a shared mailbox if the user has access to the mailbox
+  - `graph_url` - str: Microsoft Graph URL.  Allows for use of National Clouds (ex Azure Gov)
+      (Default: https://graph.microsoft.com)
+  - `token_file` - str: Path to save the token file
+      (Default: `.token`)
+  - `allow_unencrypted_storage` - bool: Allows the Azure Identity
+      module to fall back to unencrypted token cache (Default: `False`).
+      Even if enabled, the cache will always try encrypted storage first.
+
+    :::{note}
+    You must create an app registration in Azure AD and have an
+    admin grant the Microsoft Graph `Mail.ReadWrite`
+    (delegated) permission to the app. If you are using
+    `UsernamePassword` auth and the mailbox is different from the
+    username, you must grant the app `Mail.ReadWrite.Shared`.
+    :::
+
+    :::{warning}
+    If you are using the `ClientSecret` auth method, you need to
+    grant the `Mail.ReadWrite` (application) permission to the
+    app. You must also restrict the application's access to a
+    specific mailbox since it allows all mailboxes by default.
+    Use the `New-ApplicationAccessPolicy` command in the
+    Exchange PowerShell module. If you need to scope the policy to
+    shared mailboxes, you can add them to a mail enabled security
+    group and use that as the group id.
+
+    ```powershell
+    New-ApplicationAccessPolicy -AccessRight RestrictAccess 
+    -AppId "<CLIENT_ID>" -PolicyScopeGroupId "<MAILBOX>"
+    -Description "Restrict access to dmarc reports mailbox."
+    ```
+
+    :::
+- `elasticsearch`
+  - `hosts` - str: A comma separated list of hostnames and ports
+      or URLs (e.g. `127.0.0.1:9200` or
+      `https://user:secret@localhost`)
+
+    :::{note}
+    Special characters in the username or password must be
+    [URL encoded].
+    :::
+  - `user` - str: Basic auth username
+  - `password` - str: Basic auth password
+  - `api_key` - str: API key
+  - `ssl` - bool: Use an encrypted SSL/TLS connection
+    (Default: `True`)
+  - `timeout` - float: Timeout in seconds (Default: 60)
+  - `cert_path` - str: Path to a trusted certificates
+  - `index_suffix` - str: A suffix to apply to the index names
+  - `index_prefix` - str: A prefix to apply to the index names
+  - `monthly_indexes` - bool: Use monthly indexes instead of daily indexes
+  - `number_of_shards` - int: The number of shards to use when
+    creating the index (Default: `1`)
+  - `number_of_replicas` - int: The number of replicas to use when
+    creating the index (Default: `0`)
+- `opensearch`
+  - `hosts` - str: A comma separated list of hostnames and ports
+    or URLs (e.g. `127.0.0.1:9200` or
+    `https://user:secret@localhost`)
+
+    :::{note}
+    Special characters in the username or password must be
+    [URL encoded].
+    :::
+  - `user` - str: Basic auth username
+  - `password` - str: Basic auth password
+  - `api_key` - str: API key
+  - `ssl` - bool: Use an encrypted SSL/TLS connection
+    (Default: `True`)
+  - `timeout` - float: Timeout in seconds (Default: 60)
+  - `cert_path` - str: Path to a trusted certificates
+  - `index_suffix` - str: A suffix to apply to the index names
+  - `index_prefix` - str: A prefix to apply to the index names
+  - `monthly_indexes` - bool: Use monthly indexes instead of daily indexes
+  - `number_of_shards` - int: The number of shards to use when
+    creating the index (Default: `1`)
+  - `number_of_replicas` - int: The number of replicas to use when
+    creating the index (Default: `0`)
+- `splunk_hec`
+  - `url` - str: The URL of the Splunk HTTP Events Collector (HEC)
+  - `token` - str: The HEC token
+  - `index` - str: The Splunk index to use
+  - `skip_certificate_verification` - bool: Skip certificate
+    verification (not recommended)
+- `kafka`
+  - `hosts` - str: A comma separated list of Kafka hosts
+  - `user` - str: The Kafka user
+  - `passsword` - str: The Kafka password
+  - `ssl` - bool: Use an encrypted SSL/TLS connection (Default: `True`)
+  - `skip_certificate_verification` - bool: Skip certificate
+    verification (not recommended)
+  - `aggregate_topic` - str: The Kafka topic for aggregate reports
+  - `forensic_topic` - str: The Kafka topic for forensic reports
+- `smtp`
+  - `host` - str: The SMTP hostname
+  - `port` - int: The SMTP port (Default: `25`)
+  - `ssl` - bool: Require SSL/TLS instead of using STARTTLS
+  - `skip_certificate_verification` - bool: Skip certificate
+    verification (not recommended)
+  - `user` - str: the SMTP username
+  - `password` - str: the SMTP password
+  - `from` - str: The From header to use in the email
+  - `to` - list: A list of email addresses to send to
+  - `subject` - str: The Subject header to use in the email
+    (Default: `parsedmarc report`)
+  - `attachment` - str: The ZIP attachment filenames
+  - `message` - str: The email message
+    (Default: `Please see the attached parsedmarc report.`)
+
+    :::{note}
+    `%` characters must be escaped with another `%` character,
+    so use `%%` wherever a `%` character is used.
+    :::
+- `s3`
+  - `bucket` - str: The S3 bucket name
+  - `path` - str: The path to upload reports to (Default: `/`)
+  - `region_name` - str: The region name (Optional)
+  - `endpoint_url` - str: The endpoint URL (Optional)
+  - `access_key_id` - str: The access key id (Optional)
+  - `secret_access_key` - str: The secret access key (Optional)
+- `syslog`
+  - `server` - str: The Syslog server name or IP address
+  - `port` - int: The UDP port to use (Default: `514`)
+- `gmail_api`
+  - `credentials_file` - str: Path to file containing the
+      credentials, None to disable (Default: `None`)
+  - `token_file` - str: Path to save the token file
+      (Default: `.token`)
+      
+    :::{note}
+    credentials_file and token_file can be got with [quickstart](https://developers.google.com/gmail/api/quickstart/python).Please change the scope to `https://www.googleapis.com/auth/gmail.modify`.
+    :::
+  - `include_spam_trash` - bool: Include messages in Spam and
+      Trash when searching reports (Default: `False`)
+  - `scopes` - str: Comma separated list of scopes to use when
+      acquiring credentials
+      (Default: `https://www.googleapis.com/auth/gmail.modify`)
+  - `oauth2_port` - int: The TCP port for the local server to
+      listen on for the OAuth2 response (Default: `8080`)
+  - `paginate_messages` - bool: When `True`, fetch all applicable Gmail messages.
+      When `False`, only fetch up to 100 new messages per run (Default: `True`)
+- `log_analytics`
+  - `client_id` - str: The app registration's client ID
+  - `client_secret` - str: The app registration's client secret
+  - `tenant_id` - str: The tenant id where the app registration resides
+  - `dce` - str: The Data Collection Endpoint (DCE). Example: `https://{DCE-NAME}.{REGION}.ingest.monitor.azure.com`.
+  - `dcr_immutable_id` - str: The immutable ID of the Data Collection Rule (DCR)
+  - `dcr_aggregate_stream` - str: The stream name for aggregate reports in the DCR
+  - `dcr_forensic_stream` - str: The stream name for the forensic reports in the DCR
+  - `dcr_smtp_tls_stream` - str: The stream name for the SMTP TLS reports in the DCR
+
+  :::{note}
+    Information regarding the setup of the Data Collection Rule can be found [here](https://learn.microsoft.com/en-us/azure/azure-monitor/logs/tutorial-logs-ingestion-portal).
+    :::
+- `gelf`
+  - `host` - str: The GELF server name or IP address
+  - `port` - int: The port to use
+  - `mode` - str: The GELF transport type to use. Valid modes: `tcp`, `udp`, `tls`
+
+- `maildir`
+  - `maildir_path` - str: Full path for mailbox maidir location (Default: `INBOX`)
+  - `maildir_create` - bool: Create maildir if not present (Default: False)
+
+- `webhook` - Post the individual reports to a webhook url with the report as the JSON body
+  - `aggregate_url` - str: URL of the webhook which should receive the aggregate reports
+  - `forensic_url` - str: URL of the webhook which should receive the forensic reports
+  - `smtp_tls_url` - str: URL of the webhook which should receive the smtp_tls reports
+  - `timeout` - int: Interval in which the webhook call should timeout
+
+:::{warning}
+It is **strongly recommended** to **not** use the `nameservers`
+setting. By default, `parsedmarc` uses
+[Cloudflare's public resolvers], which are much faster and more
+reliable than Google, Cisco OpenDNS, or even most local resolvers.
+
+The `nameservers` option should only be used if your network
+blocks DNS requests to outside resolvers.
+:::
+
+:::{note}
+`save_aggregate` and `save_forensic` are separate options
+because you may not want to save forensic reports
+(also known as failure reports) to your Elasticsearch instance,
+particularly if you are in a highly-regulated industry that
+handles sensitive data, such as healthcare or finance. If your
+legitimate outgoing email fails DMARC, it is possible
+that email may appear later in a forensic report.
+
+Forensic reports contain the original headers of an email that
+failed a DMARC check, and sometimes may also include the
+full message body, depending on the policy of the reporting
+organization.
+
+Most reporting organizations do not send forensic reports of any
+kind for privacy reasons. While aggregate DMARC reports are sent
+at least daily, it is normal to receive very few forensic reports.
+
+An alternative approach is to still collect forensic/failure/ruf
+reports in your DMARC inbox, but run `parsedmarc` with
+```save_forensic = True``` manually on a separate IMAP folder (using
+the ```reports_folder``` option), after you have manually moved
+known samples you want to save to that folder
+(e.g. malicious samples and non-sensitive legitimate samples).
+:::
+
+:::{warning}
+Elasticsearch 8 change limits policy for shards, restricting by
+default to 1000. parsedmarc use a shard per analyzed day. If you
+have more than ~3 years of data, you will need to update this
+limit.
+Check current usage (from Management -> Dev Tools -> Console):
+
+```text
+GET /_cluster/health?pretty
+{
+...
+  "active_primary_shards": 932,
+  "active_shards": 932,
+...
+}
+```
+
+Update the limit to 2k per example:
+
+```text
+PUT _cluster/settings
+{
+  "persistent" : {
+    "cluster.max_shards_per_node" : 2000 
+  }
+}
+```
+
+Increasing this value increases resource usage.
+:::
+
+## Multi-tenant support
+
+Starting in `8.19.0`, ParseDMARC provides multi-tenant support by placing data into separate OpenSearch or Elasticsearch index prefixes. To set this up, create a YAML file that is formatted where each key is a tenant name, and the value is a list of domains related to that tenant, not including subdomains, like this:
+
+```yaml
+example:
+  - example.com
+  - example.net
+  - example.org
+
+whalensolutions:
+  - whalensolutions.com
+```
+
+Save it to disk where the user running ParseDMARC can read it, then set `index_prefix_domain_map` to that filepath in the `[general]` section of the ParseDMARC configuration file and do not set an `index_prefix` option in the `[elasticsearch]` or `[opensearch]` sections.
+
+When configured correctly, if ParseDMARC finds that a report is related to a domain in the mapping, the report will be saved in an index name that has the tenant name prefixed to it with a trailing underscore. Then, you can use the security features of Opensearch or the ELK stack to only grant users access to the indexes that they need.
+
+ :::{note}
+ A domain cannot be used in multiple tenant lists. Only the first prefix list that contains the matching domain is used.
+:::
+
+## Running parsedmarc as a systemd service
+
+Use systemd to run `parsedmarc` as a service and process reports as
+they arrive.
+
+Protect the `parsedmarc` configuration file from prying eyes
+
+```bash
+sudo chown root:parsedmarc /etc/parsedmarc.ini
+sudo chmod u=rw,g=r,o= /etc/parsedmarc.ini
+```
+
+Create the service configuration file
+
+```bash
+sudo nano /etc/systemd/system/parsedmarc.service
+```
+
+```ini
+[Unit]
+Description=parsedmarc mailbox watcher
+Documentation=https://domainaware.github.io/parsedmarc/
+Wants=network-online.target
+After=network.target network-online.target elasticsearch.service
+
+[Service]
+ExecStart=/opt/parsedmarc/venv/bin/parsedmarc -c /etc/parsedmarc.ini
+User=parsedmarc
+Group=parsedmarc
+Restart=always
+RestartSec=5m
+
+[Install]
+WantedBy=multi-user.target
+```
+
+Then, enable the service
+
+```bash
+sudo systemctl daemon-reload
+sudo systemctl enable parsedmarc.service
+sudo service parsedmarc restart
+```
+
+:::{note}
+You must also run the above commands whenever you edit
+`parsedmarc.service`.
+:::
+
+:::{warning}
+Always restart the service every time you upgrade to a new version of
+`parsedmarc`:
+
+```bash
+sudo service parsedmarc restart
+```
+
+:::
+
+To check the status of the service, run:
+
+```bash
+service parsedmarc status
+```
+
+:::{note}
+In the event of a crash, systemd will restart the service after 10
+minutes, but the `service parsedmarc status` command will only show
+the logs for the current process. To view the logs for previous runs
+as well as the current process (newest to oldest), run:
+
+```bash
+journalctl -u parsedmarc.service -r
+```
+
+:::
+
+[cloudflare's public resolvers]: https://1.1.1.1/
+[url encoded]: https://en.wikipedia.org/wiki/Percent-encoding#Percent-encoding_reserved_characters
--- a/grafana/Grafana-DMARC_Reports.json
+++ b/grafana/Grafana-DMARC_Reports.json
--- a/grafana/Grafana-DMARC_Reports.json-new_panel.json
+++ b/grafana/Grafana-DMARC_Reports.json-new_panel.json
--- a/grafana/README.rst
+++ b/grafana/README.rst
@@ -0,0 +1 @@
+Dashboards contributed by Github user Bhozar.
--- a/grafana/grafana-dmarc-reports00.png
+++ b/grafana/grafana-dmarc-reports00.png
--- a/grafana/grafana-dmarc-reports01.png
+++ b/grafana/grafana-dmarc-reports01.png
--- a/grafana/grafana-dmarc-reports02.png
+++ b/grafana/grafana-dmarc-reports02.png
--- a/grafana/grafana-dmarc-reports03.png
+++ b/grafana/grafana-dmarc-reports03.png
--- a/grafana/grafana-dmarc-reports04.png
+++ b/grafana/grafana-dmarc-reports04.png
--- a/grafana/grafana-dmarc-reports05.png
+++ b/grafana/grafana-dmarc-reports05.png
--- a/kibana/export.ndjson
+++ b/kibana/export.ndjson
--- a/makedocs.sh
+++ b/makedocs.sh
@@ -1,4 +0,0 @@
-#!/usr/bin/env bash
-
-. ~/venv/domainaware/bin/activate
-cd docs && make html && cp -r build/html/* ../../parsedmarc-docs/
--- a/parsedmarc.py
+++ b/parsedmarc.py
@@ -1,629 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-"""A Python module and CLI for parsing aggregate DMARC reports"""
-
-from __future__ import unicode_literals, print_function, absolute_import
-
-import logging
-from sys import version_info
-from os import path, stat
-import json
-from datetime import datetime
-from collections import OrderedDict
-from datetime import timedelta
-from io import BytesIO, StringIO
-from gzip import GzipFile
-import tarfile
-from zipfile import ZipFile
-from csv import DictWriter
-import shutil
-from argparse import ArgumentParser
-from glob import glob
-
-import publicsuffix
-import xmltodict
-import dns.reversename
-import dns.resolver
-import dns.exception
-from requests import get
-import geoip2.database
-import geoip2.errors
-
-__version__ = "1.0.0"
-
-logger = logging.getLogger(__name__)
-logger.setLevel(logging.WARNING)
-
-
-#  Python 2 comparability hack
-if version_info[0] >= 3:
-    unicode = str
-
-
-class InvalidAggregateReport(Exception):
-    """Raised when an invalid DMARC aggregate report is encountered"""
-
-
-def _get_base_domain(domain):
-    """
-    Gets the base domain name for the given domain
-
-    .. note::
-        Results are based on a list of public domain suffixes at
-        https://publicsuffix.org/list/public_suffix_list.dat.
-
-        This file is saved to the current working directory,
-        where it is used as a cache file for 24 hours.
-
-    Args:
-        domain (str): A domain or subdomain
-
-    Returns:
-        str: The base domain of the given domain
-
-    """
-    psl_path = "public_suffix_list.dat"
-
-    def download_psl():
-        fresh_psl = publicsuffix.fetch()
-        with open(psl_path, "w", encoding="utf-8") as fresh_psl_file:
-            fresh_psl_file.write(fresh_psl.read())
-
-        return publicsuffix.PublicSuffixList(fresh_psl)
-
-    if not path.exists(psl_path):
-        psl = download_psl()
-    else:
-        psl_age = datetime.now() - datetime.fromtimestamp(
-            stat(psl_path).st_mtime)
-        if psl_age > timedelta(hours=24):
-            psl = download_psl()
-        else:
-            with open(psl_path, encoding="utf-8") as psl_file:
-                psl = publicsuffix.PublicSuffixList(psl_file)
-
-    return psl.get_public_suffix(domain)
-
-
-def _query_dns(domain, record_type, nameservers=None, timeout=6.0):
-    """
-    Queries DNS
-
-    Args:
-        domain (str): The domain or subdomain to query about
-        record_type (str): The record type to query for
-        nameservers (list): A list of one or more nameservers to use
-        timeout (float): Sets the DNS timeout in seconds
-
-    Returns:
-        list: A list of answers
-    """
-    resolver = dns.resolver.Resolver()
-    timeout = float(timeout)
-    if nameservers:
-        resolver.nameservers = nameservers
-    resolver.timeout = timeout
-    resolver.lifetime = timeout
-    return list(map(
-        lambda r: r.to_text().replace(' "', '').replace('"', '').rstrip("."),
-        resolver.query(domain, record_type, tcp=True)))
-
-
-def _get_reverse_dns(ip_address, nameservers=None, timeout=6.0):
-    """
-    Resolves an IP address to a hostname using a reverse DNS query
-
-    Args:
-        ip_address (str): The IP address to resolve
-        nameservers (list): A list of nameservers to query
-        timeout (float): Sets the DNS query timeout in seconds
-
-    Returns:
-
-    """
-    hostname = None
-    try:
-        address = dns.reversename.from_address(ip_address)
-        hostname = _query_dns(address, "PTR",
-                              nameservers=nameservers,
-                              timeout=timeout)[0]
-
-    except dns.exception.DNSException:
-        pass
-
-    return hostname
-
-
-def _timestamp_to_datetime(timestamp):
-    """
-    Converts a UNIX/DMARC timestamp to a Python ``DateTime`` object
-
-    Args:
-        timestamp: The timestamp
-
-    Returns:
-        DateTime: The converted timestamp as a Python ``DateTime`` object
-    """
-    return datetime.fromtimestamp(int(timestamp))
-
-
-def _timestamp_to_human(timestamp):
-    """
-    Converts a UNIX/DMARC timestamp to a human-readable string
-
-    Args:
-        timestamp: The timestamp
-
-    Returns:
-        str: The converted timestamp in ``YYYY-MM-DD HH:MM:SS`` format
-    """
-    return _timestamp_to_datetime(timestamp).strftime("%Y-%m-%d %H:%M:%S")
-
-
-def _human_timestamp_to_datetime(human_timestamp):
-    """
-    Converts a human-readable timestamp into a Python ``DateTime`` object
-
-    Args:
-        human_timestamp (str): A timestamp in `YYYY-MM-DD HH:MM:SS`` format
-
-    Returns:
-        DateTime: The converted timestamp
-    """
-    return datetime.strptime(human_timestamp, "%Y-%m-%d %H:%M:%S")
-
-
-def _get_ip_address_country(ip_address):
-    """
-    Uses the MaxMind Geolite2 Country database to return the ISO code for the
-    country associated with the given IPv4 or IPv6 address
-
-    Args:
-        ip_address (str): The IP address to query for
-
-    Returns:
-        str: And ISO country code associated with the given IP address
-    """
-    db_filename = "GeoLite2-Country.mmdb"
-
-    def download_country_database():
-        """Downloads the MaxMind Geolite2 Country database to the current
-        working directory"""
-        url = "https://geolite.maxmind.com/download/geoip/database/" \
-              "GeoLite2-Country.tar.gz"
-        tar_file = tarfile.open(fileobj=BytesIO(get(url).content), mode="r:gz")
-        tar_dir = tar_file.getnames()[0]
-        tar_path = "{0}/{1}".format(tar_dir, db_filename)
-        tar_file.extract(tar_path)
-        shutil.move(tar_path, ".")
-        shutil.rmtree(tar_dir)
-
-    system_paths = ["/usr/local/share/GeoIP/GeoLite2-Country.mmdb",
-                    "/usr/share/GeoIP/GeoLite2-Country.mmdb"]
-    db_path = ""
-
-    for system_path in system_paths:
-        if path.exists(system_path):
-            db_path = system_path
-            break
-
-    if db_path == "":
-        if not path.exists(db_filename):
-            download_country_database()
-        else:
-            db_age = datetime.now() - datetime.fromtimestamp(
-                stat(db_filename).st_mtime)
-            if db_age > timedelta(days=60):
-                shutil.rmtree(db_path)
-                download_country_database()
-        db_path = db_filename
-
-    db_reader = geoip2.database.Reader(db_path)
-
-    country = None
-
-    try:
-        country = db_reader.country(ip_address).country.iso_code
-    except geoip2.errors.AddressNotFoundError:
-        pass
-
-    return country
-
-
-def _parse_report_record(record, nameservers=None, timeout=6.0):
-    """
-    Converts a record from a DMARC aggregate report into a more consistent
-    format
-
-    Args:
-        record (OrderedDict): The record to convert
-        nameservers (list): A list of one or more nameservers to use
-        timeout (float): Sets the DNS timeout in seconds
-
-    Returns:
-        OrderedDict: The converted record
-    """
-    record = record.copy()
-    new_record = OrderedDict()
-    new_record["source"] = OrderedDict()
-    new_record["source"]["ip_address"] = record["row"]["source_ip"]
-    reverse_dns = _get_reverse_dns(new_record["source"]["ip_address"],
-                                   nameservers=nameservers,
-                                   timeout=timeout)
-    country = _get_ip_address_country(new_record["source"]["ip_address"])
-    new_record["source"]["country"] = country
-    new_record["source"]["reverse_dns"] = reverse_dns
-    new_record["source"]["base_domain"] = None
-    if new_record["source"]["reverse_dns"] is not None:
-        base_domain = _get_base_domain(new_record["source"]["reverse_dns"])
-        new_record["source"]["base_domain"] = base_domain
-    new_record["count"] = int(record["row"]["count"])
-    policy_evaluated = record["row"]["policy_evaluated"].copy()
-    new_policy_evaluated = OrderedDict([("disposition", "none"),
-                                        ("dkim", "fail"),
-                                        ("spf", "fail"),
-                                        ("policy_override_reasons", [])
-                                        ])
-    if "disposition" in policy_evaluated:
-        new_policy_evaluated["disposition"] = policy_evaluated["disposition"]
-    if "dkim" in policy_evaluated:
-        new_policy_evaluated["dkim"] = policy_evaluated["dkim"]
-    if "spf" in policy_evaluated:
-        new_policy_evaluated["spf"] = policy_evaluated["spf"]
-    reasons = []
-    if "reason" in policy_evaluated:
-        if type(policy_evaluated["reason"]) == list:
-            reasons = policy_evaluated["reason"]
-        else:
-            reasons = [policy_evaluated["reason"]]
-    for reason in reasons:
-        if "comment" not in reason:
-            reason["comment"] = "none"
-            reasons.append(reason)
-    new_policy_evaluated["policy_override_reasons"] = reasons
-    new_record["policy_evaluated"] = new_policy_evaluated
-    new_record["identifiers"] = record["identifiers"].copy()
-    new_record["auth_results"] = OrderedDict([("dkim", []), ("spf", [])])
-    auth_results = record["auth_results"].copy()
-    if "dkim" in auth_results:
-        if type(auth_results["dkim"]) != list:
-            auth_results["dkim"] = [auth_results["dkim"]]
-        for result in auth_results["dkim"]:
-            if "domain" in result and result["domain"] is not None:
-                new_result = OrderedDict([("domain", result["domain"])])
-                if "selector" in result and result["selector"] is not None:
-                    new_result["selector"] = result["selector"]
-                else:
-                    new_result["selector"] = "none"
-                if "result" in result and result["result"] is not None:
-                    new_result["result"] = result["result"]
-                else:
-                    new_result["result"] = "none"
-                new_record["auth_results"]["dkim"].append(new_result)
-    if type(auth_results["spf"]) != list:
-        auth_results["spf"] = [auth_results["spf"]]
-    for result in auth_results["spf"]:
-        new_result = OrderedDict([("domain", result["domain"])])
-        if "scope" in result and result["scope"] is not None:
-            new_result["scope"] = result["scope"]
-        else:
-            new_result["scope"] = "mfrom"
-        if "result" in result and result["result"] is not None:
-            new_result["result"] = result["result"]
-        else:
-            new_result["result"] = "none"
-        new_record["auth_results"]["spf"].append(new_result)
-
-    if "envelope_from" not in new_record["identifiers"]:
-        envelope_from = new_record["auth_results"]["spf"][-1]["domain"].lower()
-        new_record["identifiers"]["envelope_from"] = envelope_from
-
-    elif new_record["identifiers"]["envelope_from"] is None:
-        envelope_from = new_record["auth_results"]["spf"][-1]["domain"].lower()
-        new_record["identifiers"]["envelope_from"] = envelope_from
-
-    envelope_to = None
-    if "envelope_to" in new_record["identifiers"]:
-        envelope_to = new_record["identifiers"]["envelope_to"]
-        del new_record["identifiers"]["envelope_to"]
-
-    new_record["identifiers"]["envelope_to"] = envelope_to
-
-    return new_record
-
-
-def parse_aggregate_report_xml(xml, nameservers=None, timeout=6.0):
-    """Parses a DMARC XML report string and returns a consistent OrderedDict
-
-    Args:
-        xml (str): A string of DMARC aggregate report XML
-        nameservers (list): A list of one or more nameservers to use
-        timeout (float): Sets the DNS timeout in seconds
-
-    Returns:
-        OrderedDict: The parsed aggregate DMARC report
-    """
-    try:
-        report = xmltodict.parse(xml)["feedback"]
-        report_metadata = report["report_metadata"]
-        schema = "draft"
-        if "version" in report:
-            schema = report["version"]
-        new_report = OrderedDict([("xml_schema", schema)])
-        new_report_metadata = OrderedDict()
-        new_report_metadata["org_name"] = report_metadata["org_name"]
-        new_report_metadata["org_email"] = report_metadata["email"]
-        extra = None
-        if "extra_contact_info" in report_metadata:
-            extra = report_metadata["extra_contact_info"]
-        new_report_metadata["org_extra_contact_info"] = extra
-        new_report_metadata["report_id"] = report_metadata["report_id"]
-        date_range = report["report_metadata"]["date_range"]
-        date_range["begin"] = _timestamp_to_human(date_range["begin"])
-        date_range["end"] = _timestamp_to_human(date_range["end"])
-        new_report_metadata["begin_date"] = date_range["begin"]
-        new_report_metadata["end_date"] = date_range["end"]
-        errors = []
-        if "error" in report["report_metadata"]:
-            if type(report["report_metadata"]["error"]) != list:
-                errors = [report["report_metadata"]["error"]]
-            else:
-                errors = report["report_metadata"]["error"]
-        new_report_metadata["errors"] = errors
-        new_report["report_metadata"] = new_report_metadata
-        records = []
-        policy_published = report["policy_published"]
-        new_policy_published = OrderedDict()
-        new_policy_published["domain"] = policy_published["domain"]
-        adkim = "r"
-        if "adkim" in policy_published:
-            if policy_published["adkim"] is not None:
-                adkim = policy_published["adkim"]
-        new_policy_published["adkim"] = adkim
-        aspf = "r"
-        if "aspf" in policy_published:
-            if policy_published["aspf"] is not None:
-                aspf = policy_published["aspf"]
-        new_policy_published["aspf"] = aspf
-        new_policy_published["p"] = policy_published["p"]
-        sp = new_policy_published["p"]
-        if "sp" in policy_published:
-            if policy_published["sp"] is not None:
-                sp = report["policy_published"]["sp"]
-        new_policy_published["sp"] = sp
-        pct = "100"
-        if "pct" in policy_published:
-            if policy_published["pct"] is not None:
-                pct = report["policy_published"]["pct"]
-        new_policy_published["pct"] = pct
-        fo = "0"
-        if "fo" in policy_published:
-            if policy_published["fo"] is not None:
-                fo = report["policy_published"]["fo"]
-        new_policy_published["fo"] = fo
-        new_report["policy_published"] = new_policy_published
-
-        if type(report["record"]) == list:
-            for record in report["record"]:
-                records.append(_parse_report_record(record,
-                                                    nameservers=nameservers,
-                                                    timeout=timeout))
-
-        else:
-            records.append(_parse_report_record(report["record"]))
-
-        new_report["records"] = records
-
-        return new_report
-
-    except KeyError as error:
-        raise InvalidAggregateReport("Missing field: "
-                                     "{0}".format(error.__str__()))
-
-
-def parse_aggregate_report_file(_input, nameservers=None, timeout=6.0):
-    """Parses a file at the given path, a file-like object. or bytes as a
-    aggregate DMARC report
-
-    Args:
-        _input: A path to a file, a file like object, or bytes
-        nameservers (list): A list of one or more nameservers to use
-        timeout (float): Sets the DNS timeout in seconds
-
-    Returns:
-        OrderedDict: The parsed DMARC aggregate report
-    """
-    if type(_input) == str or type(_input) == unicode:
-        file_object = open(_input, "rb")
-    elif type(_input) == bytes:
-        file_object = BytesIO(_input)
-    else:
-        file_object = _input
-    try:
-        header = file_object.read(6)
-        file_object.seek(0)
-        if header.startswith(b"\x50\x4B\x03\x04"):
-            _zip = ZipFile(file_object)
-            xml = _zip.open(_zip.namelist()[0]).read().decode()
-        elif header.startswith(b"\x1F\x8B"):
-            xml = GzipFile(fileobj=file_object).read().decode()
-        elif header.startswith(b"\x3c\x3f\x78\x6d\x6c\x20"):
-            xml = file_object.read().decode()
-        else:
-            file_object.close()
-            raise InvalidAggregateReport("Not a valid zip, gzip, or xml file")
-
-        file_object.close()
-    except UnicodeDecodeError:
-        raise InvalidAggregateReport("File objects must be opened in binary "
-                                     "(rb) mode")
-
-    return parse_aggregate_report_xml(xml,
-                                      nameservers=nameservers,
-                                      timeout=timeout)
-
-
-def parsed_aggregate_report_to_csv(_input):
-    """
-    Converts one or more parsed aggregate reports to flat CSV format, including
-    headers
-
-    Args:
-        _input: A parsed aggregate report or list of parsed aggregate reports
-
-    Returns:
-        str: Parsed aggregate report data in flat CSV format, including headers
-    """
-    fields = ["xml_schema", "org_name", "org_email",
-              "org_extra_contact_info", "report_id", "begin_date", "end_date",
-              "errors", "domain", "adkim", "aspf", "p", "sp", "pct", "fo",
-              "source_ip_address", "source_country", "source_reverse_dns",
-              "source_base_domain", "count", "disposition", "dkim_alignment",
-              "spf_alignment", "policy_override_reasons",
-              "policy_override_comments", "envelope_from", "header_from",
-              "envelope_to", "dkim_domains", "dkim_selectors", "dkim_results",
-              "spf_domains", "spf_scopes", "spf_results"]
-
-    csv_file_object = StringIO()
-    writer = DictWriter(csv_file_object, fields)
-    writer.writeheader()
-
-    if type(_input) == OrderedDict:
-        _input = [_input]
-
-    for report in _input:
-        xml_schema = report["xml_schema"]
-        org_name = report["report_metadata"]["org_name"]
-        org_email = report["report_metadata"]["org_email"]
-        org_extra_contact = report["report_metadata"]["org_extra_contact_info"]
-        report_id = report["report_metadata"]["report_id"]
-        begin_date = report["report_metadata"]["begin_date"]
-        end_date = report["report_metadata"]["end_date"]
-        errors = report["report_metadata"]["errors"]
-        domain = report["policy_published"]["domain"]
-        adkim = report["policy_published"]["adkim"]
-        aspf = report["policy_published"]["aspf"]
-        p = report["policy_published"]["p"]
-        sp = report["policy_published"]["sp"]
-        pct = report["policy_published"]["pct"]
-        fo = report["policy_published"]["fo"]
-
-        report_dict = dict(xml_schema=xml_schema, org_name=org_name,
-                           org_email=org_email,
-                           org_extra_contact_info=org_extra_contact,
-                           report_id=report_id, begin_date=begin_date,
-                           end_date=end_date, errors=errors, domain=domain,
-                           adkim=adkim, aspf=aspf, p=p, sp=sp, pct=pct, fo=fo)
-
-        for record in report["records"]:
-            row = report_dict
-            row["source_ip_address"] = record["source"]["ip_address"]
-            row["source_country"] = record["source"]["country"]
-            row["source_reverse_dns"] = record["source"]["reverse_dns"]
-            row["source_base_domain"] = record["source"]["base_domain"]
-            row["count"] = record["count"]
-            row["disposition"] = record["policy_evaluated"]["disposition"]
-            row["spf_alignment"] = record["policy_evaluated"]["spf"]
-            row["dkim_alignment"] = record["policy_evaluated"]["dkim"]
-            policy_override_reasons = list(map(lambda r: r["type"],
-                                               record["policy_evaluated"]
-                                               ["policy_override_reasons"]))
-            policy_override_comments = list(map(lambda r: r["comment"],
-                                                record["policy_evaluated"]
-                                                ["policy_override_reasons"]))
-            row["policy_override_reasons"] = ",".join(
-                policy_override_reasons)
-            row["policy_override_comments"] = "|".join(
-                policy_override_comments)
-            row["envelope_from"] = record["identifiers"]["envelope_from"]
-            row["header_from"] = record["identifiers"]["header_from"]
-            envelope_to = record["identifiers"]["envelope_to"]
-            row["envelope_to"] = envelope_to
-            dkim_domains = []
-            dkim_selectors = []
-            dkim_results = []
-            for dkim_result in record["auth_results"]["dkim"]:
-                dkim_domains.append(dkim_result["domain"])
-                if "selector" in dkim_result:
-                    dkim_selectors.append(dkim_result["selector"])
-                dkim_results.append(dkim_result["result"])
-            row["dkim_domains"] = ",".join(dkim_domains)
-            row["dkim_selectors"] = ",".join(dkim_selectors)
-            row["dkim_results"] = ",".join(dkim_results)
-            spf_domains = []
-            spf_scopes = []
-            spf_results = []
-            for spf_result in record["auth_results"]["spf"]:
-                spf_domains.append(spf_result["domain"])
-                spf_scopes.append(spf_result["scope"])
-                spf_results.append(spf_result["result"])
-            row["spf_domains"] = ",".join(spf_domains)
-            row["spf_scopes"] = ",".join(spf_scopes)
-            row["spf_results"] = ",".join(spf_results)
-
-            writer.writerow(row)
-            csv_file_object.flush()
-
-    return csv_file_object.getvalue()
-
-
-def _main():
-    """Called when the module in executed"""
-    arg_parser = ArgumentParser(description="Parses aggregate DMARC reports")
-    arg_parser.add_argument("file_path", nargs="+",
-                            help="one or more paths of aggregate report "
-                                 "files (compressed or uncompressed)")
-    arg_parser.add_argument("-f", "--format", default="json",
-                            help="specify JSON or CSV output format")
-    arg_parser.add_argument("-o", "--output",
-                            help="output to a file path rather than "
-                                 "printing to the screen")
-    arg_parser.add_argument("-n", "--nameserver", nargs="+",
-                            help="nameservers to query")
-    arg_parser.add_argument("-t", "--timeout",
-                            help="number of seconds to wait for an answer "
-                                 "from DNS (default 6.0)",
-                            type=float,
-                            default=6.0)
-    arg_parser.add_argument("-v", "--version", action="version",
-                            version=__version__)
-
-    args = arg_parser.parse_args()
-    file_paths = []
-    for file_path in args.file_path:
-        file_paths += glob(file_path)
-    file_paths = list(set(file_paths))
-
-    parsed_reports = []
-    for file_path in file_paths:
-        try:
-            report = parse_aggregate_report_file(file_path,
-                                                 nameservers=args.nameserver,
-                                                 timeout=args.timeout)
-            parsed_reports.append(report)
-        except InvalidAggregateReport as error:
-            logger.error("Unable to parse {0}: {1}".format(file_path,
-                                                           error.__str__()))
-    output = ""
-    if args.format.lower() == "json":
-        if len(parsed_reports) == 1:
-            parsed_reports = parsed_reports[0]
-        output = json.dumps(parsed_reports,
-                            ensure_ascii=False,
-                            indent=2)
-    elif args.format.lower() == "csv":
-        output = parsed_aggregate_report_to_csv(parsed_reports)
-    else:
-        logger.error("Invalid output format: {0}".format(args.format))
-        exit(-1)
-
-    if args.output:
-        with open(args.output, "w", encoding="utf-8", newline="\n") as file:
-            file.write(output)
-    else:
-        print(output)
-
-
-if __name__ == "__main__":
-    _main()
--- a/parsedmarc/init.py
+++ b/parsedmarc/init.py
--- a/parsedmarc/cli.py
+++ b/parsedmarc/cli.py
--- a/parsedmarc/constants.py
+++ b/parsedmarc/constants.py
@@ -0,0 +1,3 @@
+__version__ = "9.0.5"
+
+USER_AGENT = f"parsedmarc/{__version__}"
--- a/parsedmarc/elastic.py
+++ b/parsedmarc/elastic.py
@@ -0,0 +1,855 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import annotations
+
+from typing import Optional, Union, Any
+
+
+from elasticsearch_dsl.search import Q
+from elasticsearch_dsl import (
+    connections,
+    Object,
+    Document,
+    Index,
+    Nested,
+    InnerDoc,
+    Integer,
+    Text,
+    Boolean,
+    Ip,
+    Date,
+    Search,
+)
+from elasticsearch.helpers import reindex
+
+from parsedmarc.log import logger
+from parsedmarc.utils import human_timestamp_to_datetime
+from parsedmarc import InvalidForensicReport
+
+
+class ElasticsearchError(Exception):
+    """Raised when an Elasticsearch error occurs"""
+
+
+class _PolicyOverride(InnerDoc):
+    type = Text()
+    comment = Text()
+
+
+class _PublishedPolicy(InnerDoc):
+    domain = Text()
+    adkim = Text()
+    aspf = Text()
+    p = Text()
+    sp = Text()
+    pct = Integer()
+    fo = Text()
+
+
+class _DKIMResult(InnerDoc):
+    domain = Text()
+    selector = Text()
+    result = Text()
+
+
+class _SPFResult(InnerDoc):
+    domain = Text()
+    scope = Text()
+    results = Text()
+
+
+class _AggregateReportDoc(Document):
+    class Index:
+        name = "dmarc_aggregate"
+
+    xml_schema = Text()
+    org_name = Text()
+    org_email = Text()
+    org_extra_contact_info = Text()
+    report_id = Text()
+    date_range = Date()
+    date_begin = Date()
+    date_end = Date()
+    normalized_timespan = Boolean()
+    original_timespan_seconds = Integer
+    errors = Text()
+    published_policy = Object(_PublishedPolicy)
+    source_ip_address = Ip()
+    source_country = Text()
+    source_reverse_dns = Text()
+    source_base_domain = Text()
+    source_type = Text()
+    source_name = Text()
+    message_count = Integer
+    disposition = Text()
+    dkim_aligned = Boolean()
+    spf_aligned = Boolean()
+    passed_dmarc = Boolean()
+    policy_overrides = Nested(_PolicyOverride)
+    header_from = Text()
+    envelope_from = Text()
+    envelope_to = Text()
+    dkim_results = Nested(_DKIMResult)
+    spf_results = Nested(_SPFResult)
+
+    def add_policy_override(self, type_: str, comment: str):
+        self.policy_overrides.append(_PolicyOverride(type=type_, comment=comment))
+
+    def add_dkim_result(self, domain: str, selector: str, result: _DKIMResult):
+        self.dkim_results.append(
+            _DKIMResult(domain=domain, selector=selector, result=result)
+        )
+
+    def add_spf_result(self, domain: str, scope: str, result: _SPFResult):
+        self.spf_results.append(_SPFResult(domain=domain, scope=scope, result=result))
+
+    def save(self, **kwargs):
+        self.passed_dmarc = False
+        self.passed_dmarc = self.spf_aligned or self.dkim_aligned
+
+        return super().save(**kwargs)
+
+
+class _EmailAddressDoc(InnerDoc):
+    display_name = Text()
+    address = Text()
+
+
+class _EmailAttachmentDoc(Document):
+    filename = Text()
+    content_type = Text()
+    sha256 = Text()
+
+
+class _ForensicSampleDoc(InnerDoc):
+    raw = Text()
+    headers = Object()
+    headers_only = Boolean()
+    to = Nested(_EmailAddressDoc)
+    subject = Text()
+    filename_safe_subject = Text()
+    _from = Object(_EmailAddressDoc)
+    date = Date()
+    reply_to = Nested(_EmailAddressDoc)
+    cc = Nested(_EmailAddressDoc)
+    bcc = Nested(_EmailAddressDoc)
+    body = Text()
+    attachments = Nested(_EmailAttachmentDoc)
+
+    def add_to(self, display_name: str, address: str):
+        self.to.append(_EmailAddressDoc(display_name=display_name, address=address))
+
+    def add_reply_to(self, display_name: str, address: str):
+        self.reply_to.append(
+            _EmailAddressDoc(display_name=display_name, address=address)
+        )
+
+    def add_cc(self, display_name: str, address: str):
+        self.cc.append(_EmailAddressDoc(display_name=display_name, address=address))
+
+    def add_bcc(self, display_name: str, address: str):
+        self.bcc.append(_EmailAddressDoc(display_name=display_name, address=address))
+
+    def add_attachment(self, filename: str, content_type: str, sha256: str):
+        self.attachments.append(
+            _EmailAttachmentDoc(
+                filename=filename, content_type=content_type, sha256=sha256
+            )
+        )
+
+
+class _ForensicReportDoc(Document):
+    class Index:
+        name = "dmarc_forensic"
+
+    feedback_type = Text()
+    user_agent = Text()
+    version = Text()
+    original_mail_from = Text()
+    arrival_date = Date()
+    domain = Text()
+    original_envelope_id = Text()
+    authentication_results = Text()
+    delivery_results = Text()
+    source_ip_address = Ip()
+    source_country = Text()
+    source_reverse_dns = Text()
+    source_authentication_mechanisms = Text()
+    source_auth_failures = Text()
+    dkim_domain = Text()
+    original_rcpt_to = Text()
+    sample = Object(_ForensicSampleDoc)
+
+
+class _SMTPTLSFailureDetailsDoc(InnerDoc):
+    result_type = Text()
+    sending_mta_ip = Ip()
+    receiving_mx_helo = Text()
+    receiving_ip = Ip()
+    failed_session_count = Integer()
+    additional_information_uri = Text()
+    failure_reason_code = Text()
+
+
+class _SMTPTLSPolicyDoc(InnerDoc):
+    policy_domain = Text()
+    policy_type = Text()
+    policy_strings = Text()
+    mx_host_patterns = Text()
+    successful_session_count = Integer()
+    failed_session_count = Integer()
+    failure_details = Nested(_SMTPTLSFailureDetailsDoc)
+
+    def add_failure_details(
+        self,
+        result_type: Optional[str] = None,
+        ip_address: Optional[str] = None,
+        receiving_ip: Optional[str] = None,
+        receiving_mx_helo: Optional[str] = None,
+        failed_session_count: Optional[int] = None,
+        sending_mta_ip: Optional[str] = None,
+        receiving_mx_hostname: Optional[str] = None,
+        additional_information_uri: Optional[str] = None,
+        failure_reason_code: Union[str, int, None] = None,
+    ):
+        _details = _SMTPTLSFailureDetailsDoc(
+            result_type=result_type,
+            ip_address=ip_address,
+            sending_mta_ip=sending_mta_ip,
+            receiving_mx_hostname=receiving_mx_hostname,
+            receiving_mx_helo=receiving_mx_helo,
+            receiving_ip=receiving_ip,
+            failed_session_count=failed_session_count,
+            additional_information=additional_information_uri,
+            failure_reason_code=failure_reason_code,
+        )
+        self.failure_details.append(_details)
+
+
+class _SMTPTLSReportDoc(Document):
+    class Index:
+        name = "smtp_tls"
+
+    organization_name = Text()
+    date_range = Date()
+    date_begin = Date()
+    date_end = Date()
+    contact_info = Text()
+    report_id = Text()
+    policies = Nested(_SMTPTLSPolicyDoc)
+
+    def add_policy(
+        self,
+        policy_type: str,
+        policy_domain: str,
+        successful_session_count: int,
+        failed_session_count: int,
+        *,
+        policy_string: Optional[str] = None,
+        mx_host_patterns: Optional[list[str]] = None,
+        failure_details: Optional[str] = None,
+    ):
+        self.policies.append(
+            policy_type=policy_type,
+            policy_domain=policy_domain,
+            successful_session_count=successful_session_count,
+            failed_session_count=failed_session_count,
+            policy_string=policy_string,
+            mx_host_patterns=mx_host_patterns,
+            failure_details=failure_details,
+        )
+
+
+class AlreadySaved(ValueError):
+    """Raised when a report to be saved matches an existing report"""
+
+
+def set_hosts(
+    hosts: Union[str, list[str]],
+    *,
+    use_ssl: Optional[bool] = False,
+    ssl_cert_path: Optional[str] = None,
+    username: Optional[str] = None,
+    password: Optional[str] = None,
+    api_key: Optional[str] = None,
+    timeout: Optional[float] = 60.0,
+):
+    """
+    Sets the Elasticsearch hosts to use
+
+    Args:
+        hosts (Union[str, list[str]]): A single hostname or URL, or list of hostnames or URLs
+        use_ssl (bool): Use an HTTPS connection to the server
+        ssl_cert_path (str): Path to the certificate chain
+        username (str): The username to use for authentication
+        password (str): The password to use for authentication
+        api_key (str): The Base64 encoded API key to use for authentication
+        timeout (float): Timeout in seconds
+    """
+    if not isinstance(hosts, list):
+        hosts = [hosts]
+    conn_params = {"hosts": hosts, "timeout": timeout}
+    if use_ssl:
+        conn_params["use_ssl"] = True
+        if ssl_cert_path:
+            conn_params["verify_certs"] = True
+            conn_params["ca_certs"] = ssl_cert_path
+        else:
+            conn_params["verify_certs"] = False
+    if username and password:
+        conn_params["http_auth"] = username + ":" + password
+    if api_key:
+        conn_params["api_key"] = api_key
+    connections.create_connection(**conn_params)
+
+
+def create_indexes(names: list[str], settings: Optional[dict[str, Any]] = None):
+    """
+    Create Elasticsearch indexes
+
+    Args:
+        names (list): A list of index names
+        settings (dict): Index settings
+
+    """
+    for name in names:
+        index = Index(name)
+        try:
+            if not index.exists():
+                logger.debug("Creating Elasticsearch index: {0}".format(name))
+                if settings is None:
+                    index.settings(number_of_shards=1, number_of_replicas=0)
+                else:
+                    index.settings(**settings)
+                index.create()
+        except Exception as e:
+            raise ElasticsearchError("Elasticsearch error: {0}".format(e.__str__()))
+
+
+def migrate_indexes(
+    aggregate_indexes: Optional[list[str]] = None,
+    forensic_indexes: Optional[list[str]] = None,
+):
+    """
+    Updates index mappings
+
+    Args:
+        aggregate_indexes (list): A list of aggregate index names
+        forensic_indexes (list): A list of forensic index names
+    """
+    version = 2
+    if aggregate_indexes is None:
+        aggregate_indexes = []
+    if forensic_indexes is None:
+        forensic_indexes = []
+    for aggregate_index_name in aggregate_indexes:
+        if not Index(aggregate_index_name).exists():
+            continue
+        aggregate_index = Index(aggregate_index_name)
+        doc = "doc"
+        fo_field = "published_policy.fo"
+        fo = "fo"
+        fo_mapping = aggregate_index.get_field_mapping(fields=[fo_field])
+        fo_mapping = fo_mapping[list(fo_mapping.keys())[0]]["mappings"]
+        if doc not in fo_mapping:
+            continue
+
+        fo_mapping = fo_mapping[doc][fo_field]["mapping"][fo]
+        fo_type = fo_mapping["type"]
+        if fo_type == "long":
+            new_index_name = "{0}-v{1}".format(aggregate_index_name, version)
+            body = {
+                "properties": {
+                    "published_policy.fo": {
+                        "type": "text",
+                        "fields": {"keyword": {"type": "keyword", "ignore_above": 256}},
+                    }
+                }
+            }
+            Index(new_index_name).create()
+            Index(new_index_name).put_mapping(doc_type=doc, body=body)
+            reindex(connections.get_connection(), aggregate_index_name, new_index_name)
+            Index(aggregate_index_name).delete()
+
+    for forensic_index in forensic_indexes:
+        pass
+
+
+def save_aggregate_report_to_elasticsearch(
+    aggregate_report: dict[str, Any],
+    index_suffix: Optional[str] = None,
+    index_prefix: Optional[str] = None,
+    monthly_indexes: Optional[bool] = False,
+    number_of_shards: Optional[int] = 1,
+    number_of_replicas: Optional[int] = 0,
+):
+    """
+    Saves a parsed DMARC aggregate report to Elasticsearch
+
+    Args:
+        aggregate_report (dict): A parsed forensic report
+        index_suffix (str): The suffix of the name of the index to save to
+        index_prefix (str): The prefix of the name of the index to save to
+        monthly_indexes (bool): Use monthly indexes instead of daily indexes
+        number_of_shards (int): The number of shards to use in the index
+        number_of_replicas (int): The number of replicas to use in the index
+
+    Raises:
+            AlreadySaved
+    """
+    logger.info("Saving aggregate report to Elasticsearch")
+    aggregate_report = aggregate_report.copy()
+    metadata = aggregate_report["report_metadata"]
+    org_name = metadata["org_name"]
+    report_id = metadata["report_id"]
+    domain = aggregate_report["policy_published"]["domain"]
+    begin_date = human_timestamp_to_datetime(metadata["begin_date"], to_utc=True)
+    end_date = human_timestamp_to_datetime(metadata["end_date"], to_utc=True)
+
+    if monthly_indexes:
+        index_date = begin_date.strftime("%Y-%m")
+    else:
+        index_date = begin_date.strftime("%Y-%m-%d")
+
+    org_name_query = Q(dict(match_phrase=dict(org_name=org_name)))
+    report_id_query = Q(dict(match_phrase=dict(report_id=report_id)))
+    domain_query = Q(dict(match_phrase={"published_policy.domain": domain}))
+    begin_date_query = Q(dict(match=dict(date_begin=begin_date)))
+    end_date_query = Q(dict(match=dict(date_end=end_date)))
+
+    if index_suffix is not None:
+        search_index = "dmarc_aggregate_{0}*".format(index_suffix)
+    else:
+        search_index = "dmarc_aggregate*"
+    if index_prefix is not None:
+        search_index = "{0}{1}".format(index_prefix, search_index)
+    search = Search(index=search_index)
+    query = org_name_query & report_id_query & domain_query
+    query = query & begin_date_query & end_date_query
+    search.query = query
+
+    try:
+        existing = search.execute()
+    except Exception as error_:
+        begin_date_human = begin_date.strftime("%Y-%m-%d %H:%M:%SZ")
+        end_date_human = end_date.strftime("%Y-%m-%d %H:%M:%SZ")
+
+        raise ElasticsearchError(
+            "Elasticsearch's search for existing report \
+            error: {}".format(error_.__str__())
+        )
+
+    if len(existing) > 0:
+        raise AlreadySaved(
+            "An aggregate report ID {0} from {1} about {2} "
+            "with a date range of {3} UTC to {4} UTC already "
+            "exists in "
+            "Elasticsearch".format(
+                report_id, org_name, domain, begin_date_human, end_date_human
+            )
+        )
+    published_policy = _PublishedPolicy(
+        domain=aggregate_report["policy_published"]["domain"],
+        adkim=aggregate_report["policy_published"]["adkim"],
+        aspf=aggregate_report["policy_published"]["aspf"],
+        p=aggregate_report["policy_published"]["p"],
+        sp=aggregate_report["policy_published"]["sp"],
+        pct=aggregate_report["policy_published"]["pct"],
+        fo=aggregate_report["policy_published"]["fo"],
+    )
+
+    for record in aggregate_report["records"]:
+        begin_date = human_timestamp_to_datetime(record["interval_begin"], to_utc=True)
+        end_date = human_timestamp_to_datetime(record["interval_end"], to_utc=True)
+        normalized_timespan = record["normalized_timespan"]
+
+        if monthly_indexes:
+            index_date = begin_date.strftime("%Y-%m")
+        else:
+            index_date = begin_date.strftime("%Y-%m-%d")
+        aggregate_report["begin_date"] = begin_date
+        aggregate_report["end_date"] = end_date
+        date_range = [aggregate_report["begin_date"], aggregate_report["end_date"]]
+        agg_doc = _AggregateReportDoc(
+            xml_schema=aggregate_report["xml_schema"],
+            org_name=metadata["org_name"],
+            org_email=metadata["org_email"],
+            org_extra_contact_info=metadata["org_extra_contact_info"],
+            report_id=metadata["report_id"],
+            date_range=date_range,
+            date_begin=begin_date,
+            date_end=end_date,
+            normalized_timespan=normalized_timespan,
+            errors=metadata["errors"],
+            published_policy=published_policy,
+            source_ip_address=record["source"]["ip_address"],
+            source_country=record["source"]["country"],
+            source_reverse_dns=record["source"]["reverse_dns"],
+            source_base_domain=record["source"]["base_domain"],
+            source_type=record["source"]["type"],
+            source_name=record["source"]["name"],
+            message_count=record["count"],
+            disposition=record["policy_evaluated"]["disposition"],
+            dkim_aligned=record["policy_evaluated"]["dkim"] is not None
+            and record["policy_evaluated"]["dkim"].lower() == "pass",
+            spf_aligned=record["policy_evaluated"]["spf"] is not None
+            and record["policy_evaluated"]["spf"].lower() == "pass",
+            header_from=record["identifiers"]["header_from"],
+            envelope_from=record["identifiers"]["envelope_from"],
+            envelope_to=record["identifiers"]["envelope_to"],
+        )
+
+        for override in record["policy_evaluated"]["policy_override_reasons"]:
+            agg_doc.add_policy_override(
+                type_=override["type"], comment=override["comment"]
+            )
+
+        for dkim_result in record["auth_results"]["dkim"]:
+            agg_doc.add_dkim_result(
+                domain=dkim_result["domain"],
+                selector=dkim_result["selector"],
+                result=dkim_result["result"],
+            )
+
+        for spf_result in record["auth_results"]["spf"]:
+            agg_doc.add_spf_result(
+                domain=spf_result["domain"],
+                scope=spf_result["scope"],
+                result=spf_result["result"],
+            )
+
+        index = "dmarc_aggregate"
+        if index_suffix:
+            index = "{0}_{1}".format(index, index_suffix)
+        if index_prefix:
+            index = "{0}{1}".format(index_prefix, index)
+
+        index = "{0}-{1}".format(index, index_date)
+        index_settings = dict(
+            number_of_shards=number_of_shards, number_of_replicas=number_of_replicas
+        )
+        create_indexes([index], index_settings)
+        agg_doc.meta.index = index
+
+        try:
+            agg_doc.save()
+        except Exception as e:
+            raise ElasticsearchError("Elasticsearch error: {0}".format(e.__str__()))
+
+
+def save_forensic_report_to_elasticsearch(
+    forensic_report: dict[str, Any],
+    index_suffix: Optional[Any] = None,
+    index_prefix: Optional[str] = None,
+    monthly_indexes: Optional[bool] = False,
+    number_of_shards: int = 1,
+    number_of_replicas: int = 0,
+):
+    """
+    Saves a parsed DMARC forensic report to Elasticsearch
+
+    Args:
+        forensic_report (dict): A parsed forensic report
+        index_suffix (str): The suffix of the name of the index to save to
+        index_prefix (str): The prefix of the name of the index to save to
+        monthly_indexes (bool): Use monthly indexes instead of daily
+                                indexes
+        number_of_shards (int): The number of shards to use in the index
+        number_of_replicas (int): The number of replicas to use in the
+                                  index
+
+    Raises:
+        AlreadySaved
+
+    """
+    logger.info("Saving forensic report to Elasticsearch")
+    forensic_report = forensic_report.copy()
+    sample_date = None
+    if forensic_report["parsed_sample"]["date"] is not None:
+        sample_date = forensic_report["parsed_sample"]["date"]
+        sample_date = human_timestamp_to_datetime(sample_date)
+    original_headers = forensic_report["parsed_sample"]["headers"]
+    headers = dict()
+    for original_header in original_headers:
+        headers[original_header.lower()] = original_headers[original_header]
+
+    arrival_date = human_timestamp_to_datetime(forensic_report["arrival_date_utc"])
+    arrival_date_epoch_milliseconds = int(arrival_date.timestamp() * 1000)
+
+    if index_suffix is not None:
+        search_index = "dmarc_forensic_{0}*".format(index_suffix)
+    else:
+        search_index = "dmarc_forensic*"
+    if index_prefix is not None:
+        search_index = "{0}{1}".format(index_prefix, search_index)
+    search = Search(index=search_index)
+    q = Q(dict(match=dict(arrival_date=arrival_date_epoch_milliseconds)))
+
+    from_ = None
+    to_ = None
+    subject = None
+    if "from" in headers:
+        # We convert the FROM header from a string list to a flat string.
+        headers["from"] = headers["from"][0]
+        if headers["from"][0] == "":
+            headers["from"] = headers["from"][1]
+        else:
+            headers["from"] = " <".join(headers["from"]) + ">"
+
+        from_ = dict()
+        from_["sample.headers.from"] = headers["from"]
+        from_query = Q(dict(match_phrase=from_))
+        q = q & from_query
+    if "to" in headers:
+        # We convert the TO header from a string list to a flat string.
+        headers["to"] = headers["to"][0]
+        if headers["to"][0] == "":
+            headers["to"] = headers["to"][1]
+        else:
+            headers["to"] = " <".join(headers["to"]) + ">"
+
+        to_ = dict()
+        to_["sample.headers.to"] = headers["to"]
+        to_query = Q(dict(match_phrase=to_))
+        q = q & to_query
+    if "subject" in headers:
+        subject = headers["subject"]
+        subject_query = {"match_phrase": {"sample.headers.subject": subject}}
+        q = q & Q(subject_query)
+
+    search.query = q
+    existing = search.execute()
+
+    if len(existing) > 0:
+        raise AlreadySaved(
+            "A forensic sample to {0} from {1} "
+            "with a subject of {2} and arrival date of {3} "
+            "already exists in "
+            "Elasticsearch".format(
+                to_, from_, subject, forensic_report["arrival_date_utc"]
+            )
+        )
+
+    parsed_sample = forensic_report["parsed_sample"]
+    sample = _ForensicSampleDoc(
+        raw=forensic_report["sample"],
+        headers=headers,
+        headers_only=forensic_report["sample_headers_only"],
+        date=sample_date,
+        subject=forensic_report["parsed_sample"]["subject"],
+        filename_safe_subject=parsed_sample["filename_safe_subject"],
+        body=forensic_report["parsed_sample"]["body"],
+    )
+
+    for address in forensic_report["parsed_sample"]["to"]:
+        sample.add_to(display_name=address["display_name"], address=address["address"])
+    for address in forensic_report["parsed_sample"]["reply_to"]:
+        sample.add_reply_to(
+            display_name=address["display_name"], address=address["address"]
+        )
+    for address in forensic_report["parsed_sample"]["cc"]:
+        sample.add_cc(display_name=address["display_name"], address=address["address"])
+    for address in forensic_report["parsed_sample"]["bcc"]:
+        sample.add_bcc(display_name=address["display_name"], address=address["address"])
+    for attachment in forensic_report["parsed_sample"]["attachments"]:
+        sample.add_attachment(
+            filename=attachment["filename"],
+            content_type=attachment["mail_content_type"],
+            sha256=attachment["sha256"],
+        )
+    try:
+        forensic_doc = _ForensicReportDoc(
+            feedback_type=forensic_report["feedback_type"],
+            user_agent=forensic_report["user_agent"],
+            version=forensic_report["version"],
+            original_mail_from=forensic_report["original_mail_from"],
+            arrival_date=arrival_date_epoch_milliseconds,
+            domain=forensic_report["reported_domain"],
+            original_envelope_id=forensic_report["original_envelope_id"],
+            authentication_results=forensic_report["authentication_results"],
+            delivery_results=forensic_report["delivery_result"],
+            source_ip_address=forensic_report["source"]["ip_address"],
+            source_country=forensic_report["source"]["country"],
+            source_reverse_dns=forensic_report["source"]["reverse_dns"],
+            source_base_domain=forensic_report["source"]["base_domain"],
+            authentication_mechanisms=forensic_report["authentication_mechanisms"],
+            auth_failure=forensic_report["auth_failure"],
+            dkim_domain=forensic_report["dkim_domain"],
+            original_rcpt_to=forensic_report["original_rcpt_to"],
+            sample=sample,
+        )
+
+        index = "dmarc_forensic"
+        if index_suffix:
+            index = "{0}_{1}".format(index, index_suffix)
+        if index_prefix:
+            index = "{0}{1}".format(index_prefix, index)
+        if monthly_indexes:
+            index_date = arrival_date.strftime("%Y-%m")
+        else:
+            index_date = arrival_date.strftime("%Y-%m-%d")
+        index = "{0}-{1}".format(index, index_date)
+        index_settings = dict(
+            number_of_shards=number_of_shards, number_of_replicas=number_of_replicas
+        )
+        create_indexes([index], index_settings)
+        forensic_doc.meta.index = index
+        try:
+            forensic_doc.save()
+        except Exception as e:
+            raise ElasticsearchError("Elasticsearch error: {0}".format(e.__str__()))
+    except KeyError as e:
+        raise InvalidForensicReport(
+            "Forensic report missing required field: {0}".format(e.__str__())
+        )
+
+
+def save_smtp_tls_report_to_elasticsearch(
+    report: dict[str, Any],
+    index_suffix: Optional[str] = None,
+    index_prefix: Optional[str] = None,
+    monthly_indexes: Optional[bool] = False,
+    number_of_shards: Optional[int] = 1,
+    number_of_replicas: Optional[int] = 0,
+):
+    """
+    Saves a parsed SMTP TLS report to Elasticsearch
+
+    Args:
+        report (dict): A parsed SMTP TLS report
+        index_suffix (str): The suffix of the name of the index to save to
+        index_prefix (str): The prefix of the name of the index to save to
+        monthly_indexes (bool): Use monthly indexes instead of daily indexes
+        number_of_shards (int): The number of shards to use in the index
+        number_of_replicas (int): The number of replicas to use in the index
+
+    Raises:
+            AlreadySaved
+    """
+    logger.info("Saving smtp tls report to Elasticsearch")
+    org_name = report["organization_name"]
+    report_id = report["report_id"]
+    begin_date = human_timestamp_to_datetime(report["begin_date"], to_utc=True)
+    end_date = human_timestamp_to_datetime(report["end_date"], to_utc=True)
+    begin_date_human = begin_date.strftime("%Y-%m-%d %H:%M:%SZ")
+    end_date_human = end_date.strftime("%Y-%m-%d %H:%M:%SZ")
+    if monthly_indexes:
+        index_date = begin_date.strftime("%Y-%m")
+    else:
+        index_date = begin_date.strftime("%Y-%m-%d")
+    report["begin_date"] = begin_date
+    report["end_date"] = end_date
+
+    org_name_query = Q(dict(match_phrase=dict(org_name=org_name)))
+    report_id_query = Q(dict(match_phrase=dict(report_id=report_id)))
+    begin_date_query = Q(dict(match=dict(date_begin=begin_date)))
+    end_date_query = Q(dict(match=dict(date_end=end_date)))
+
+    if index_suffix is not None:
+        search_index = "smtp_tls_{0}*".format(index_suffix)
+    else:
+        search_index = "smtp_tls*"
+    if index_prefix is not None:
+        search_index = "{0}{1}".format(index_prefix, search_index)
+    search = Search(index=search_index)
+    query = org_name_query & report_id_query
+    query = query & begin_date_query & end_date_query
+    search.query = query
+
+    try:
+        existing = search.execute()
+    except Exception as error_:
+        raise ElasticsearchError(
+            "Elasticsearch's search for existing report \
+            error: {}".format(error_.__str__())
+        )
+
+    if len(existing) > 0:
+        raise AlreadySaved(
+            f"An SMTP TLS report ID {report_id} from "
+            f" {org_name} with a date range of "
+            f"{begin_date_human} UTC to "
+            f"{end_date_human} UTC already "
+            "exists in Elasticsearch"
+        )
+
+    index = "smtp_tls"
+    if index_suffix:
+        index = "{0}_{1}".format(index, index_suffix)
+    if index_prefix:
+        index = "{0}{1}".format(index_prefix, index)
+    index = "{0}-{1}".format(index, index_date)
+    index_settings = dict(
+        number_of_shards=number_of_shards, number_of_replicas=number_of_replicas
+    )
+
+    smtp_tls_doc = _SMTPTLSReportDoc(
+        org_name=report["organization_name"],
+        date_range=[report["begin_date"], report["end_date"]],
+        date_begin=report["begin_date"],
+        date_end=report["end_date"],
+        contact_info=report["contact_info"],
+        report_id=report["report_id"],
+    )
+
+    for policy in report["policies"]:
+        policy_strings = None
+        mx_host_patterns = None
+        if "policy_strings" in policy:
+            policy_strings = policy["policy_strings"]
+        if "mx_host_patterns" in policy:
+            mx_host_patterns = policy["mx_host_patterns"]
+        policy_doc = _SMTPTLSPolicyDoc(
+            policy_domain=policy["policy_domain"],
+            policy_type=policy["policy_type"],
+            successful_session_count=policy["successful_session_count"],
+            failed_session_count=policy["failed_session_count"],
+            policy_string=policy_strings,
+            mx_host_patterns=mx_host_patterns,
+        )
+        if "failure_details" in policy:
+            for failure_detail in policy["failure_details"]:
+                receiving_mx_hostname = None
+                additional_information_uri = None
+                failure_reason_code = None
+                ip_address = None
+                receiving_ip = None
+                receiving_mx_helo = None
+                sending_mta_ip = None
+
+                if "receiving_mx_hostname" in failure_detail:
+                    receiving_mx_hostname = failure_detail["receiving_mx_hostname"]
+                if "additional_information_uri" in failure_detail:
+                    additional_information_uri = failure_detail[
+                        "additional_information_uri"
+                    ]
+                if "failure_reason_code" in failure_detail:
+                    failure_reason_code = failure_detail["failure_reason_code"]
+                if "ip_address" in failure_detail:
+                    ip_address = failure_detail["ip_address"]
+                if "receiving_ip" in failure_detail:
+                    receiving_ip = failure_detail["receiving_ip"]
+                if "receiving_mx_helo" in failure_detail:
+                    receiving_mx_helo = failure_detail["receiving_mx_helo"]
+                if "sending_mta_ip" in failure_detail:
+                    sending_mta_ip = failure_detail["sending_mta_ip"]
+                policy_doc.add_failure_details(
+                    result_type=failure_detail["result_type"],
+                    ip_address=ip_address,
+                    receiving_ip=receiving_ip,
+                    receiving_mx_helo=receiving_mx_helo,
+                    failed_session_count=failure_detail["failed_session_count"],
+                    sending_mta_ip=sending_mta_ip,
+                    receiving_mx_hostname=receiving_mx_hostname,
+                    additional_information_uri=additional_information_uri,
+                    failure_reason_code=failure_reason_code,
+                )
+        smtp_tls_doc.policies.append(policy_doc)
+
+    create_indexes([index], index_settings)
+    smtp_tls_doc.meta.index = index
+
+    try:
+        smtp_tls_doc.save()
+    except Exception as e:
+        raise ElasticsearchError("Elasticsearch error: {0}".format(e.__str__()))
--- a/parsedmarc/gelf.py
+++ b/parsedmarc/gelf.py
@@ -0,0 +1,75 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import annotations
+
+from typing import Any
+
+import logging
+import logging.handlers
+import json
+import threading
+
+from parsedmarc import (
+    parsed_aggregate_reports_to_csv_rows,
+    parsed_forensic_reports_to_csv_rows,
+    parsed_smtp_tls_reports_to_csv_rows,
+)
+from pygelf import GelfTcpHandler, GelfUdpHandler, GelfTlsHandler
+
+
+log_context_data = threading.local()
+
+
+class ContextFilter(logging.Filter):
+    def filter(self, record):
+        record.parsedmarc = log_context_data.parsedmarc
+        return True
+
+
+class GelfClient(object):
+    """A client for the Graylog Extended Log Format"""
+
+    def __init__(self, host, port, mode):
+        """
+        Initializes the GelfClient
+        Args:
+            host (str): The GELF host
+            port (int): The GELF port
+            mode (str): The GELF transport mode
+        """
+        self.host = host
+        self.port = port
+        self.logger = logging.getLogger("parsedmarc_syslog")
+        self.logger.setLevel(logging.INFO)
+        self.logger.addFilter(ContextFilter())
+        self.gelf_mode = {
+            "udp": GelfUdpHandler,
+            "tcp": GelfTcpHandler,
+            "tls": GelfTlsHandler,
+        }
+        self.handler = self.gelf_mode[mode](
+            host=self.host, port=self.port, include_extra_fields=True
+        )
+        self.logger.addHandler(self.handler)
+
+    def save_aggregate_report_to_gelf(
+        self, aggregate_reports: list[dict[str, Any]]
+    ):
+        rows = parsed_aggregate_reports_to_csv_rows(aggregate_reports)
+        for row in rows:
+            log_context_data.parsedmarc = row
+            self.logger.info("parsedmarc aggregate report")
+
+        log_context_data.parsedmarc = None
+
+    def save_forensic_report_to_gelf(
+        self, forensic_reports: list[dict[str, Any]]
+    ):
+        rows = parsed_forensic_reports_to_csv_rows(forensic_reports)
+        for row in rows:
+            self.logger.info(json.dumps(row))
+
+    def save_smtp_tls_report_to_gelf(self, smtp_tls_reports: dict[str, Any]):
+        rows = parsed_smtp_tls_reports_to_csv_rows(smtp_tls_reports)
+        for row in rows:
+            self.logger.info(json.dumps(row))
--- a/parsedmarc/kafkaclient.py
+++ b/parsedmarc/kafkaclient.py
@@ -0,0 +1,210 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import annotations
+
+from typing import Any, Optional, Union
+from ssl import SSLContext
+
+import json
+from ssl import create_default_context
+
+from kafka import KafkaProducer
+from kafka.errors import NoBrokersAvailable, UnknownTopicOrPartitionError
+from parsedmarc.utils import human_timestamp_to_datetime
+
+from parsedmarc import __version__
+from parsedmarc.log import logger
+
+
+class KafkaError(RuntimeError):
+    """Raised when a Kafka error occurs"""
+
+
+class KafkaClient(object):
+    def __init__(
+        self,
+        kafka_hosts: list[str],
+        *,
+        ssl: Optional[bool] = False,
+        username: Optional[str] = None,
+        password: Optional[str] = None,
+        ssl_context: Optional[SSLContext] = None,
+    ):
+        """
+        Initializes the Kafka client
+        Args:
+            kafka_hosts (list): A list of Kafka hostnames
+            (with optional port numbers)
+            ssl (bool): Use a SSL/TLS connection
+            username (str): An optional username
+            password (str):  An optional password
+            ssl_context (SSLContext): SSL context options
+
+        Notes:
+            ``use_ssl=True`` is implied when a username or password are
+            supplied.
+
+            When using Azure Event Hubs, the username is literally
+            ``$ConnectionString``, and the password is the
+            Azure Event Hub connection string.
+        """
+        config = dict(
+            value_serializer=lambda v: json.dumps(v).encode("utf-8"),
+            bootstrap_servers=kafka_hosts,
+            client_id="parsedmarc-{0}".format(__version__),
+        )
+        if ssl or username or password:
+            config["security_protocol"] = "SSL"
+            config["ssl_context"] = ssl_context or create_default_context()
+            if username or password:
+                config["sasl_plain_username"] = username or ""
+                config["sasl_plain_password"] = password or ""
+        try:
+            self.producer = KafkaProducer(**config)
+        except NoBrokersAvailable:
+            raise KafkaError("No Kafka brokers available")
+
+    @staticmethod
+    def strip_metadata(report: dict[str, Any]):
+        """
+        Duplicates org_name, org_email and report_id into JSON root
+        and removes report_metadata key to bring it more inline
+        with Elastic output.
+        """
+        report["org_name"] = report["report_metadata"]["org_name"]
+        report["org_email"] = report["report_metadata"]["org_email"]
+        report["report_id"] = report["report_metadata"]["report_id"]
+        report.pop("report_metadata")
+
+        return report
+
+    @staticmethod
+    def generate_date_range(report: dict[str, Any]):
+        """
+        Creates a date_range timestamp with format YYYY-MM-DD-T-HH:MM:SS
+        based on begin and end dates for easier parsing in Kibana.
+
+        Move to utils to avoid duplication w/ elastic?
+        """
+
+        metadata = report["report_metadata"]
+        begin_date = human_timestamp_to_datetime(metadata["begin_date"])
+        end_date = human_timestamp_to_datetime(metadata["end_date"])
+        begin_date_human = begin_date.strftime("%Y-%m-%dT%H:%M:%S")
+        end_date_human = end_date.strftime("%Y-%m-%dT%H:%M:%S")
+        date_range = [begin_date_human, end_date_human]
+        logger.debug("date_range is {}".format(date_range))
+        return date_range
+
+    def save_aggregate_reports_to_kafka(
+        self,
+        aggregate_reports: Union[dict[str, Any], list[dict[str, Any]]],
+        aggregate_topic: str,
+    ):
+        """
+        Saves aggregate DMARC reports to Kafka
+
+        Args:
+            aggregate_reports (list):  A list of aggregate report dictionaries
+            to save to Kafka
+            aggregate_topic (str): The name of the Kafka topic
+
+        """
+        if isinstance(aggregate_reports, dict):
+            aggregate_reports = [aggregate_reports]
+
+        if len(aggregate_reports) < 1:
+            return
+
+        for report in aggregate_reports:
+            report["date_range"] = self.generate_date_range(report)
+            report = self.strip_metadata(report)
+
+            for slice in report["records"]:
+                slice["date_range"] = report["date_range"]
+                slice["org_name"] = report["org_name"]
+                slice["org_email"] = report["org_email"]
+                slice["policy_published"] = report["policy_published"]
+                slice["report_id"] = report["report_id"]
+                logger.debug("Sending slice.")
+                try:
+                    logger.debug("Saving aggregate report to Kafka")
+                    self.producer.send(aggregate_topic, slice)
+                except UnknownTopicOrPartitionError:
+                    raise KafkaError(
+                        "Kafka error: Unknown topic or partition on broker"
+                    )
+                except Exception as e:
+                    raise KafkaError("Kafka error: {0}".format(e.__str__()))
+                try:
+                    self.producer.flush()
+                except Exception as e:
+                    raise KafkaError("Kafka error: {0}".format(e.__str__()))
+
+    def save_forensic_reports_to_kafka(
+        self,
+        forensic_reports: Union[dict[str, Any], list[dict[str, Any]]],
+        forensic_topic: str,
+    ):
+        """
+        Saves forensic DMARC reports to Kafka, sends individual
+        records (slices) since Kafka requires messages to be <= 1MB
+        by default.
+
+        Args:
+            forensic_reports (list):  A list of forensic report dicts
+            to save to Kafka
+            forensic_topic (str): The name of the Kafka topic
+
+        """
+        if isinstance(forensic_reports, dict):
+            forensic_reports = [forensic_reports]
+
+        if len(forensic_reports) < 1:
+            return
+
+        try:
+            logger.debug("Saving forensic reports to Kafka")
+            self.producer.send(forensic_topic, forensic_reports)
+        except UnknownTopicOrPartitionError:
+            raise KafkaError("Kafka error: Unknown topic or partition on broker")
+        except Exception as e:
+            raise KafkaError("Kafka error: {0}".format(e.__str__()))
+        try:
+            self.producer.flush()
+        except Exception as e:
+            raise KafkaError("Kafka error: {0}".format(e.__str__()))
+
+    def save_smtp_tls_reports_to_kafka(
+        self,
+        smtp_tls_reports: Union[list[dict[str, Any]], dict[str, Any]],
+        smtp_tls_topic: str,
+    ):
+        """
+        Saves SMTP TLS reports to Kafka, sends individual
+        records (slices) since Kafka requires messages to be <= 1MB
+        by default.
+
+        Args:
+            smtp_tls_reports (list):  A list of forensic report dicts
+            to save to Kafka
+            smtp_tls_topic (str): The name of the Kafka topic
+
+        """
+        if isinstance(smtp_tls_reports, dict):
+            smtp_tls_reports = [smtp_tls_reports]
+
+        if len(smtp_tls_reports) < 1:
+            return
+
+        try:
+            logger.debug("Saving forensic reports to Kafka")
+            self.producer.send(smtp_tls_topic, smtp_tls_reports)
+        except UnknownTopicOrPartitionError:
+            raise KafkaError("Kafka error: Unknown topic or partition on broker")
+        except Exception as e:
+            raise KafkaError("Kafka error: {0}".format(e.__str__()))
+        try:
+            self.producer.flush()
+        except Exception as e:
+            raise KafkaError("Kafka error: {0}".format(e.__str__()))
--- a/parsedmarc/log.py
+++ b/parsedmarc/log.py
@@ -0,0 +1,4 @@
+import logging
+
+logger = logging.getLogger(__name__)
+logger.addHandler(logging.NullHandler())
--- a/parsedmarc/loganalytics.py
+++ b/parsedmarc/loganalytics.py
@@ -0,0 +1,195 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import annotations
+
+from typing import Any
+
+from parsedmarc.log import logger
+from azure.core.exceptions import HttpResponseError
+from azure.identity import ClientSecretCredential
+from azure.monitor.ingestion import LogsIngestionClient
+
+
+class LogAnalyticsException(Exception):
+    """Raised when an Elasticsearch error occurs"""
+
+
+class LogAnalyticsConfig:
+    """
+    The LogAnalyticsConfig class is used to define the configuration
+    for the Log Analytics Client.
+
+    Properties:
+        client_id (str):
+            The client ID of the service principle.
+        client_secret (str):
+            The client secret of the service principle.
+        tenant_id (str):
+            The tenant ID where
+            the service principle resides.
+        dce (str):
+            The Data Collection Endpoint (DCE)
+            used by the Data Collection Rule (DCR).
+        dcr_immutable_id (str):
+            The immutable ID of
+            the Data Collection Rule (DCR).
+        dcr_aggregate_stream (str):
+            The Stream name where
+            the Aggregate DMARC reports
+            need to be pushed.
+        dcr_forensic_stream (str):
+            The Stream name where
+            the Forensic DMARC reports
+            need to be pushed.
+        dcr_smtp_tls_stream (str):
+            The Stream name where
+            the SMTP TLS Reports
+            need to be pushed.
+    """
+
+    def __init__(
+        self,
+        client_id: str,
+        client_secret: str,
+        tenant_id: str,
+        dce: str,
+        dcr_immutable_id: str,
+        dcr_aggregate_stream: str,
+        dcr_forensic_stream: str,
+        dcr_smtp_tls_stream: str,
+    ):
+        self.client_id = client_id
+        self.client_secret = client_secret
+        self.tenant_id = tenant_id
+        self.dce = dce
+        self.dcr_immutable_id = dcr_immutable_id
+        self.dcr_aggregate_stream = dcr_aggregate_stream
+        self.dcr_forensic_stream = dcr_forensic_stream
+        self.dcr_smtp_tls_stream = dcr_smtp_tls_stream
+
+
+class LogAnalyticsClient(object):
+    """
+    The LogAnalyticsClient is used to push
+    the generated DMARC reports to Log Analytics
+    via Data Collection Rules.
+    """
+
+    def __init__(
+        self,
+        client_id: str,
+        client_secret: str,
+        tenant_id: str,
+        dce: str,
+        dcr_immutable_id: str,
+        dcr_aggregate_stream: str,
+        dcr_forensic_stream: str,
+        dcr_smtp_tls_stream: str,
+    ):
+        self.conf = LogAnalyticsConfig(
+            client_id=client_id,
+            client_secret=client_secret,
+            tenant_id=tenant_id,
+            dce=dce,
+            dcr_immutable_id=dcr_immutable_id,
+            dcr_aggregate_stream=dcr_aggregate_stream,
+            dcr_forensic_stream=dcr_forensic_stream,
+            dcr_smtp_tls_stream=dcr_smtp_tls_stream,
+        )
+        if (
+            not self.conf.client_id
+            or not self.conf.client_secret
+            or not self.conf.tenant_id
+            or not self.conf.dce
+            or not self.conf.dcr_immutable_id
+        ):
+            raise LogAnalyticsException(
+                "Invalid configuration. " + "One or more required settings are missing."
+            )
+
+    def publish_json(
+        self,
+        results,
+        logs_client: LogsIngestionClient,
+        dcr_stream: str,
+    ):
+        """
+        Background function to publish given
+        DMARC report to specific Data Collection Rule.
+
+        Args:
+            results (list):
+                The results generated by parsedmarc.
+            logs_client (LogsIngestionClient):
+                The client used to send the DMARC reports.
+            dcr_stream (str):
+                The stream name where the DMARC reports needs to be pushed.
+        """
+        try:
+            logs_client.upload(self.conf.dcr_immutable_id, dcr_stream, results)
+        except HttpResponseError as e:
+            raise LogAnalyticsException("Upload failed: {error}".format(error=e))
+
+    def publish_results(
+        self,
+        results: dict[str, dict[str, Any]],
+        save_aggregate: bool,
+        save_forensic: bool,
+        save_smtp_tls: bool,
+    ):
+        """
+        Function to publish DMARC and/or SMTP TLS reports to Log Analytics
+        via Data Collection Rules (DCR).
+        Look below for docs:
+        https://learn.microsoft.com/en-us/azure/azure-monitor/logs/logs-ingestion-api-overview
+
+        Args:
+            results (list):
+                The DMARC reports (Aggregate & Forensic)
+            save_aggregate (bool):
+                Whether Aggregate reports can be saved into Log Analytics
+            save_forensic (bool):
+                Whether Forensic reports can be saved into Log Analytics
+            save_smtp_tls (bool):
+                Whether Forensic reports can be saved into Log Analytics
+        """
+        conf = self.conf
+        credential = ClientSecretCredential(
+            tenant_id=conf.tenant_id,
+            client_id=conf.client_id,
+            client_secret=conf.client_secret,
+        )
+        logs_client = LogsIngestionClient(conf.dce, credential=credential)
+        if (
+            results["aggregate_reports"]
+            and conf.dcr_aggregate_stream
+            and len(results["aggregate_reports"]) > 0
+            and save_aggregate
+        ):
+            logger.info("Publishing aggregate reports.")
+            self.publish_json(
+                results["aggregate_reports"], logs_client, conf.dcr_aggregate_stream
+            )
+            logger.info("Successfully pushed aggregate reports.")
+        if (
+            results["forensic_reports"]
+            and conf.dcr_forensic_stream
+            and len(results["forensic_reports"]) > 0
+            and save_forensic
+        ):
+            logger.info("Publishing forensic reports.")
+            self.publish_json(
+                results["forensic_reports"], logs_client, conf.dcr_forensic_stream
+            )
+            logger.info("Successfully pushed forensic reports.")
+        if (
+            results["smtp_tls_reports"]
+            and conf.dcr_smtp_tls_stream
+            and len(results["smtp_tls_reports"]) > 0
+            and save_smtp_tls
+        ):
+            logger.info("Publishing SMTP TLS reports.")
+            self.publish_json(
+                results["smtp_tls_reports"], logs_client, conf.dcr_smtp_tls_stream
+            )
+            logger.info("Successfully pushed SMTP TLS reports.")
--- a/parsedmarc/mail/init.py
+++ b/parsedmarc/mail/init.py
@@ -0,0 +1,13 @@
+from parsedmarc.mail.mailbox_connection import MailboxConnection
+from parsedmarc.mail.graph import MSGraphConnection
+from parsedmarc.mail.gmail import GmailConnection
+from parsedmarc.mail.imap import IMAPConnection
+from parsedmarc.mail.maildir import MaildirConnection
+
+__all__ = [
+    "MailboxConnection",
+    "MSGraphConnection",
+    "GmailConnection",
+    "IMAPConnection",
+    "MaildirConnection",
+]
--- a/parsedmarc/mail/gmail.py
+++ b/parsedmarc/mail/gmail.py
@@ -0,0 +1,159 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import annotations
+
+from base64 import urlsafe_b64decode
+from functools import lru_cache
+from pathlib import Path
+from time import sleep
+from typing import List
+
+from google.auth.transport.requests import Request
+from google.oauth2.credentials import Credentials
+from google_auth_oauthlib.flow import InstalledAppFlow
+from googleapiclient.discovery import build
+from googleapiclient.errors import HttpError
+
+from parsedmarc.log import logger
+from parsedmarc.mail.mailbox_connection import MailboxConnection
+
+
+def _get_creds(token_file, credentials_file, scopes, oauth2_port):
+    creds = None
+
+    if Path(token_file).exists():
+        creds = Credentials.from_authorized_user_file(token_file, scopes)
+
+    # If there are no (valid) credentials available, let the user log in.
+    if not creds or not creds.valid:
+        if creds and creds.expired and creds.refresh_token:
+            creds.refresh(Request())
+        else:
+            flow = InstalledAppFlow.from_client_secrets_file(credentials_file, scopes)
+            creds = flow.run_local_server(open_browser=False, oauth2_port=oauth2_port)
+        # Save the credentials for the next run
+        with Path(token_file).open("w") as token:
+            token.write(creds.to_json())
+    return creds
+
+
+class GmailConnection(MailboxConnection):
+    def __init__(
+        self,
+        token_file: str,
+        credentials_file: str,
+        scopes: List[str],
+        include_spam_trash: bool,
+        reports_folder: str,
+        oauth2_port: int,
+        paginate_messages: bool,
+    ):
+        creds = _get_creds(token_file, credentials_file, scopes, oauth2_port)
+        self.service = build("gmail", "v1", credentials=creds)
+        self.include_spam_trash = include_spam_trash
+        self.reports_label_id = self._find_label_id_for_label(reports_folder)
+        self.paginate_messages = paginate_messages
+
+    def create_folder(self, folder_name: str):
+        # Gmail doesn't support the name Archive
+        if folder_name == "Archive":
+            return
+
+        logger.debug(f"Creating label {folder_name}")
+        request_body = {"name": folder_name, "messageListVisibility": "show"}
+        try:
+            self.service.users().labels().create(
+                userId="me", body=request_body
+            ).execute()
+        except HttpError as e:
+            if e.status_code == 409:
+                logger.debug(f"Folder {folder_name} already exists, skipping creation")
+            else:
+                raise e
+
+    def _fetch_all_message_ids(self, reports_label_id, page_token=None, since=None):
+        if since:
+            results = (
+                self.service.users()
+                .messages()
+                .list(
+                    userId="me",
+                    includeSpamTrash=self.include_spam_trash,
+                    labelIds=[reports_label_id],
+                    pageToken=page_token,
+                    q=f"after:{since}",
+                )
+                .execute()
+            )
+        else:
+            results = (
+                self.service.users()
+                .messages()
+                .list(
+                    userId="me",
+                    includeSpamTrash=self.include_spam_trash,
+                    labelIds=[reports_label_id],
+                    pageToken=page_token,
+                )
+                .execute()
+            )
+        messages = results.get("messages", [])
+        for message in messages:
+            yield message["id"]
+
+        if "nextPageToken" in results and self.paginate_messages:
+            yield from self._fetch_all_message_ids(
+                reports_label_id, results["nextPageToken"]
+            )
+
+    def fetch_messages(self, reports_folder: str, **kwargs) -> List[str]:
+        reports_label_id = self._find_label_id_for_label(reports_folder)
+        since = kwargs.get("since")
+        if since:
+            return [
+                id for id in self._fetch_all_message_ids(reports_label_id, since=since)
+            ]
+        else:
+            return [id for id in self._fetch_all_message_ids(reports_label_id)]
+
+    def fetch_message(self, message_id):
+        msg = (
+            self.service.users()
+            .messages()
+            .get(userId="me", id=message_id, format="raw")
+            .execute()
+        )
+        return urlsafe_b64decode(msg["raw"])
+
+    def delete_message(self, message_id: str):
+        self.service.users().messages().delete(userId="me", id=message_id)
+
+    def move_message(self, message_id: str, folder_name: str):
+        label_id = self._find_label_id_for_label(folder_name)
+        logger.debug(f"Moving message UID {message_id} to {folder_name}")
+        request_body = {
+            "addLabelIds": [label_id],
+            "removeLabelIds": [self.reports_label_id],
+        }
+        self.service.users().messages().modify(
+            userId="me", id=message_id, body=request_body
+        ).execute()
+
+    def keepalive(self):
+        # Not needed
+        pass
+
+    def watch(self, check_callback, check_timeout):
+        """Checks the mailbox for new messages every n seconds"""
+        while True:
+            sleep(check_timeout)
+            check_callback(self)
+
+    @lru_cache(maxsize=10)
+    def _find_label_id_for_label(self, label_name: str) -> str:
+        results = self.service.users().labels().list(userId="me").execute()
+        labels = results.get("labels", [])
+        for label in labels:
+            if label_name == label["id"] or label_name == label["name"]:
+                return label["id"]
+        return ""
--- a/parsedmarc/mail/graph.py
+++ b/parsedmarc/mail/graph.py
@@ -0,0 +1,269 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import annotations
+
+from enum import Enum
+from functools import lru_cache
+from pathlib import Path
+from time import sleep
+from typing import List, Optional
+
+from azure.identity import (
+    UsernamePasswordCredential,
+    DeviceCodeCredential,
+    ClientSecretCredential,
+    TokenCachePersistenceOptions,
+    AuthenticationRecord,
+)
+from msgraph.core import GraphClient
+
+from parsedmarc.log import logger
+from parsedmarc.mail.mailbox_connection import MailboxConnection
+
+
+class AuthMethod(Enum):
+    DeviceCode = 1
+    UsernamePassword = 2
+    ClientSecret = 3
+
+
+def _get_cache_args(token_path: Path, allow_unencrypted_storage):
+    cache_args = {
+        "cache_persistence_options": TokenCachePersistenceOptions(
+            name="parsedmarc", allow_unencrypted_storage=allow_unencrypted_storage
+        )
+    }
+    auth_record = _load_token(token_path)
+    if auth_record:
+        cache_args["authentication_record"] = AuthenticationRecord.deserialize(
+            auth_record
+        )
+    return cache_args
+
+
+def _load_token(token_path: Path) -> Optional[str]:
+    if not token_path.exists():
+        return None
+    with token_path.open() as token_file:
+        return token_file.read()
+
+
+def _cache_auth_record(record: AuthenticationRecord, token_path: Path):
+    token = record.serialize()
+    with token_path.open("w") as token_file:
+        token_file.write(token)
+
+
+def _generate_credential(auth_method: str, token_path: Path, **kwargs):
+    if auth_method == AuthMethod.DeviceCode.name:
+        credential = DeviceCodeCredential(
+            client_id=kwargs["client_id"],
+            disable_automatic_authentication=True,
+            tenant_id=kwargs["tenant_id"],
+            **_get_cache_args(
+                token_path,
+                allow_unencrypted_storage=kwargs["allow_unencrypted_storage"],
+            ),
+        )
+    elif auth_method == AuthMethod.UsernamePassword.name:
+        credential = UsernamePasswordCredential(
+            client_id=kwargs["client_id"],
+            client_credential=kwargs["client_secret"],
+            disable_automatic_authentication=True,
+            username=kwargs["username"],
+            password=kwargs["password"],
+            **_get_cache_args(
+                token_path,
+                allow_unencrypted_storage=kwargs["allow_unencrypted_storage"],
+            ),
+        )
+    elif auth_method == AuthMethod.ClientSecret.name:
+        credential = ClientSecretCredential(
+            client_id=kwargs["client_id"],
+            tenant_id=kwargs["tenant_id"],
+            client_secret=kwargs["client_secret"],
+        )
+    else:
+        raise RuntimeError(f"Auth method {auth_method} not found")
+    return credential
+
+
+class MSGraphConnection(MailboxConnection):
+    def __init__(
+        self,
+        auth_method: str,
+        mailbox: str,
+        graph_url: str,
+        client_id: str,
+        client_secret: str,
+        username: str,
+        password: str,
+        tenant_id: str,
+        token_file: str,
+        allow_unencrypted_storage: bool,
+    ):
+        token_path = Path(token_file)
+        credential = _generate_credential(
+            auth_method,
+            client_id=client_id,
+            client_secret=client_secret,
+            username=username,
+            password=password,
+            tenant_id=tenant_id,
+            token_path=token_path,
+            allow_unencrypted_storage=allow_unencrypted_storage,
+        )
+        client_params = {
+            "credential": credential,
+            "cloud": graph_url,
+        }
+        if not isinstance(credential, ClientSecretCredential):
+            scopes = ["Mail.ReadWrite"]
+            # Detect if mailbox is shared
+            if mailbox and username != mailbox:
+                scopes = ["Mail.ReadWrite.Shared"]
+            auth_record = credential.authenticate(scopes=scopes)
+            _cache_auth_record(auth_record, token_path)
+            client_params["scopes"] = scopes
+
+        self._client = GraphClient(**client_params)
+        self.mailbox_name = mailbox
+
+    def create_folder(self, folder_name: str):
+        sub_url = ""
+        path_parts = folder_name.split("/")
+        if len(path_parts) > 1:  # Folder is a subFolder
+            parent_folder_id = None
+            for folder in path_parts[:-1]:
+                parent_folder_id = self._find_folder_id_with_parent(
+                    folder, parent_folder_id
+                )
+            sub_url = f"/{parent_folder_id}/childFolders"
+            folder_name = path_parts[-1]
+
+        request_body = {"displayName": folder_name}
+        request_url = f"/users/{self.mailbox_name}/mailFolders{sub_url}"
+        resp = self._client.post(request_url, json=request_body)
+        if resp.status_code == 409:
+            logger.debug(f"Folder {folder_name} already exists, skipping creation")
+        elif resp.status_code == 201:
+            logger.debug(f"Created folder {folder_name}")
+        else:
+            logger.warning(f"Unknown response {resp.status_code} {resp.json()}")
+
+    def fetch_messages(self, folder_name: str, **kwargs) -> List[str]:
+        """Returns a list of message UIDs in the specified folder"""
+        folder_id = self._find_folder_id_from_folder_path(folder_name)
+        url = f"/users/{self.mailbox_name}/mailFolders/{folder_id}/messages"
+        since = kwargs.get("since")
+        if not since:
+            since = None
+        batch_size = kwargs.get("batch_size")
+        if not batch_size:
+            batch_size = 0
+        emails = self._get_all_messages(url, batch_size, since)
+        return [email["id"] for email in emails]
+
+    def _get_all_messages(self, url, batch_size, since):
+        messages: list
+        params = {"$select": "id"}
+        if since:
+            params["$filter"] = f"receivedDateTime ge {since}"
+        if batch_size and batch_size > 0:
+            params["$top"] = batch_size
+        else:
+            params["$top"] = 100
+        result = self._client.get(url, params=params)
+        if result.status_code != 200:
+            raise RuntimeError(f"Failed to fetch messages {result.text}")
+        messages = result.json()["value"]
+        # Loop if next page is present and not obtained message limit.
+        while "@odata.nextLink" in result.json() and (
+            since is not None or (batch_size == 0 or batch_size - len(messages) > 0)
+        ):
+            result = self._client.get(result.json()["@odata.nextLink"])
+            if result.status_code != 200:
+                raise RuntimeError(f"Failed to fetch messages {result.text}")
+            messages.extend(result.json()["value"])
+        return messages
+
+    def mark_message_read(self, message_id: str):
+        """Marks a message as read"""
+        url = f"/users/{self.mailbox_name}/messages/{message_id}"
+        resp = self._client.patch(url, json={"isRead": "true"})
+        if resp.status_code != 200:
+            raise RuntimeWarning(
+                f"Failed to mark message read{resp.status_code}: {resp.json()}"
+            )
+
+    def fetch_message(self, message_id: str, **kwargs):
+        url = f"/users/{self.mailbox_name}/messages/{message_id}/$value"
+        result = self._client.get(url)
+        if result.status_code != 200:
+            raise RuntimeWarning(
+                f"Failed to fetch message{result.status_code}: {result.json()}"
+            )
+        mark_read = kwargs.get("mark_read")
+        if mark_read:
+            self.mark_message_read(message_id)
+        return result.text
+
+    def delete_message(self, message_id: str):
+        url = f"/users/{self.mailbox_name}/messages/{message_id}"
+        resp = self._client.delete(url)
+        if resp.status_code != 204:
+            raise RuntimeWarning(
+                f"Failed to delete message {resp.status_code}: {resp.json()}"
+            )
+
+    def move_message(self, message_id: str, folder_name: str):
+        folder_id = self._find_folder_id_from_folder_path(folder_name)
+        request_body = {"destinationId": folder_id}
+        url = f"/users/{self.mailbox_name}/messages/{message_id}/move"
+        resp = self._client.post(url, json=request_body)
+        if resp.status_code != 201:
+            raise RuntimeWarning(
+                f"Failed to move message {resp.status_code}: {resp.json()}"
+            )
+
+    def keepalive(self):
+        # Not needed
+        pass
+
+    def watch(self, check_callback, check_timeout):
+        """Checks the mailbox for new messages every n seconds"""
+        while True:
+            sleep(check_timeout)
+            check_callback(self)
+
+    @lru_cache(maxsize=10)
+    def _find_folder_id_from_folder_path(self, folder_name: str) -> str:
+        path_parts = folder_name.split("/")
+        parent_folder_id = None
+        if len(path_parts) > 1:
+            for folder in path_parts[:-1]:
+                folder_id = self._find_folder_id_with_parent(folder, parent_folder_id)
+                parent_folder_id = folder_id
+            return self._find_folder_id_with_parent(path_parts[-1], parent_folder_id)
+        else:
+            return self._find_folder_id_with_parent(folder_name, None)
+
+    def _find_folder_id_with_parent(
+        self, folder_name: str, parent_folder_id: Optional[str]
+    ):
+        sub_url = ""
+        if parent_folder_id is not None:
+            sub_url = f"/{parent_folder_id}/childFolders"
+        url = f"/users/{self.mailbox_name}/mailFolders{sub_url}"
+        filter = f"?$filter=displayName eq '{folder_name}'"
+        folders_resp = self._client.get(url + filter)
+        if folders_resp.status_code != 200:
+            raise RuntimeWarning(f"Failed to list folders.{folders_resp.json()}")
+        folders: list = folders_resp.json()["value"]
+        matched_folders = [
+            folder for folder in folders if folder["displayName"] == folder_name
+        ]
+        if len(matched_folders) == 0:
+            raise RuntimeError(f"folder {folder_name} not found")
+        selected_folder = matched_folders[0]
+        return selected_folder["id"]
--- a/parsedmarc/mail/imap.py
+++ b/parsedmarc/mail/imap.py
@@ -0,0 +1,96 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import annotations
+
+from typing import Optional
+
+from time import sleep
+
+from imapclient.exceptions import IMAPClientError
+from mailsuite.imap import IMAPClient
+from socket import timeout
+
+from parsedmarc.log import logger
+from parsedmarc.mail.mailbox_connection import MailboxConnection
+
+
+class IMAPConnection(MailboxConnection):
+    def __init__(
+        self,
+        host: Optional[str] = None,
+        *,
+        user: Optional[str] = None,
+        password: Optional[str] = None,
+        port: Optional[str] = None,
+        ssl: Optional[bool] = True,
+        verify: Optional[bool] = True,
+        timeout: Optional[int] = 30,
+        max_retries: Optional[int] = 4,
+    ):
+        self._username = user
+        self._password = password
+        self._verify = verify
+        self._client = IMAPClient(
+            host,
+            user,
+            password,
+            port=port,
+            ssl=ssl,
+            verify=verify,
+            timeout=timeout,
+            max_retries=max_retries,
+        )
+
+    def create_folder(self, folder_name: str):
+        self._client.create_folder(folder_name)
+
+    def fetch_messages(self, reports_folder: str, **kwargs):
+        self._client.select_folder(reports_folder)
+        since = kwargs.get("since")
+        if since:
+            return self._client.search(["SINCE", since])
+        else:
+            return self._client.search()
+
+    def fetch_message(self, message_id: int):
+        return self._client.fetch_message(message_id, parse=False)
+
+    def delete_message(self, message_id: int):
+        self._client.delete_messages([message_id])
+
+    def move_message(self, message_id: int, folder_name: str):
+        self._client.move_messages([message_id], folder_name)
+
+    def keepalive(self):
+        self._client.noop()
+
+    def watch(self, check_callback, check_timeout):
+        """
+        Use an IDLE IMAP connection to parse incoming emails,
+        and pass the results to a callback function
+        """
+
+        # IDLE callback sends IMAPClient object,
+        # send back the imap connection object instead
+        def idle_callback_wrapper(client: IMAPClient):
+            self._client = client
+            check_callback(self)
+
+        while True:
+            try:
+                IMAPClient(
+                    host=self._client.host,
+                    username=self._username,
+                    password=self._password,
+                    port=self._client.port,
+                    ssl=self._client.ssl,
+                    verify=self._verify,
+                    idle_callback=idle_callback_wrapper,
+                    idle_timeout=check_timeout,
+                )
+            except (timeout, IMAPClientError):
+                logger.warning("IMAP connection timeout. Reconnecting...")
+                sleep(check_timeout)
+            except Exception as e:
+                logger.warning("IMAP connection error. {0}. Reconnecting...".format(e))
+                sleep(check_timeout)
--- a/parsedmarc/mail/mailbox_connection.py
+++ b/parsedmarc/mail/mailbox_connection.py
@@ -0,0 +1,32 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import annotations
+
+from abc import ABC
+
+
+class MailboxConnection(ABC):
+    """
+    Interface for a mailbox connection
+    """
+
+    def create_folder(self, folder_name: str):
+        raise NotImplementedError
+
+    def fetch_messages(self, reports_folder: str, **kwargs) -> list[str]:
+        raise NotImplementedError
+
+    def fetch_message(self, message_id) -> str:
+        raise NotImplementedError
+
+    def delete_message(self, message_id: str):
+        raise NotImplementedError
+
+    def move_message(self, message_id: str, folder_name: str):
+        raise NotImplementedError
+
+    def keepalive(self):
+        raise NotImplementedError
+
+    def watch(self, check_callback, check_timeout):
+        raise NotImplementedError
--- a/parsedmarc/mail/maildir.py
+++ b/parsedmarc/mail/maildir.py
@@ -0,0 +1,69 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import annotations
+
+from typing import Optional
+
+from time import sleep
+
+from parsedmarc.log import logger
+from parsedmarc.mail.mailbox_connection import MailboxConnection
+import mailbox
+import os
+
+
+class MaildirConnection(MailboxConnection):
+    def __init__(
+        self,
+        maildir_path: Optional[bool] = None,
+        maildir_create: Optional[bool] = False,
+    ):
+        self._maildir_path = maildir_path
+        self._maildir_create = maildir_create
+        maildir_owner = os.stat(maildir_path).st_uid
+        if os.getuid() != maildir_owner:
+            if os.getuid() == 0:
+                logger.warning(
+                    "Switching uid to {} to access Maildir".format(maildir_owner)
+                )
+                os.setuid(maildir_owner)
+            else:
+                ex = "runtime uid {} differ from maildir {} owner {}".format(
+                    os.getuid(), maildir_path, maildir_owner
+                )
+                raise Exception(ex)
+        self._client = mailbox.Maildir(maildir_path, create=maildir_create)
+        self._subfolder_client = {}
+
+    def create_folder(self, folder_name: str):
+        self._subfolder_client[folder_name] = self._client.add_folder(folder_name)
+        self._client.add_folder(folder_name)
+
+    def fetch_messages(self, reports_folder: str, **kwargs):
+        return self._client.keys()
+
+    def fetch_message(self, message_id: str):
+        return self._client.get(message_id).as_string()
+
+    def delete_message(self, message_id: str):
+        self._client.remove(message_id)
+
+    def move_message(self, message_id: str, folder_name: str):
+        message_data = self._client.get(message_id)
+        if folder_name not in self._subfolder_client.keys():
+            self._subfolder_client = mailbox.Maildir(
+                os.join(self.maildir_path, folder_name), create=self.maildir_create
+            )
+        self._subfolder_client[folder_name].add(message_data)
+        self._client.remove(message_id)
+
+    def keepalive(self):
+        return
+
+    def watch(self, check_callback, check_timeout):
+        while True:
+            try:
+                check_callback(self)
+            except Exception as e:
+                logger.warning("Maildir init error. {0}".format(e))
+            sleep(check_timeout)
--- a/parsedmarc/opensearch.py
+++ b/parsedmarc/opensearch.py
@@ -0,0 +1,855 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import annotations
+
+from typing import Optional, Union, Any
+
+
+from opensearchpy import (
+    Q,
+    connections,
+    Object,
+    Document,
+    Index,
+    Nested,
+    InnerDoc,
+    Integer,
+    Text,
+    Boolean,
+    Ip,
+    Date,
+    Search,
+)
+from opensearchpy.helpers import reindex
+
+from parsedmarc.log import logger
+from parsedmarc.utils import human_timestamp_to_datetime
+from parsedmarc import InvalidForensicReport
+
+
+class OpenSearchError(Exception):
+    """Raised when an OpenSearch error occurs"""
+
+
+class _PolicyOverride(InnerDoc):
+    type = Text()
+    comment = Text()
+
+
+class _PublishedPolicy(InnerDoc):
+    domain = Text()
+    adkim = Text()
+    aspf = Text()
+    p = Text()
+    sp = Text()
+    pct = Integer()
+    fo = Text()
+
+
+class _DKIMResult(InnerDoc):
+    domain = Text()
+    selector = Text()
+    result = Text()
+
+
+class _SPFResult(InnerDoc):
+    domain = Text()
+    scope = Text()
+    results = Text()
+
+
+class _AggregateReportDoc(Document):
+    class Index:
+        name = "dmarc_aggregate"
+
+    xml_schema = Text()
+    org_name = Text()
+    org_email = Text()
+    org_extra_contact_info = Text()
+    report_id = Text()
+    date_range = Date()
+    date_begin = Date()
+    date_end = Date()
+    normalized_timespan = Boolean()
+    original_timespan_seconds = Integer
+    errors = Text()
+    published_policy = Object(_PublishedPolicy)
+    source_ip_address = Ip()
+    source_country = Text()
+    source_reverse_dns = Text()
+    source_base_domain = Text()
+    source_type = Text()
+    source_name = Text()
+    message_count = Integer
+    disposition = Text()
+    dkim_aligned = Boolean()
+    spf_aligned = Boolean()
+    passed_dmarc = Boolean()
+    policy_overrides = Nested(_PolicyOverride)
+    header_from = Text()
+    envelope_from = Text()
+    envelope_to = Text()
+    dkim_results = Nested(_DKIMResult)
+    spf_results = Nested(_SPFResult)
+
+    def add_policy_override(self, type_: str, comment: str):
+        self.policy_overrides.append(_PolicyOverride(type=type_, comment=comment))
+
+    def add_dkim_result(self, domain: str, selector: str, result: _DKIMResult):
+        self.dkim_results.append(
+            _DKIMResult(domain=domain, selector=selector, result=result)
+        )
+
+    def add_spf_result(self, domain: str, scope: str, result: _SPFResult):
+        self.spf_results.append(_SPFResult(domain=domain, scope=scope, result=result))
+
+    def save(self, **kwargs):
+        self.passed_dmarc = False
+        self.passed_dmarc = self.spf_aligned or self.dkim_aligned
+
+        return super().save(**kwargs)
+
+
+class _EmailAddressDoc(InnerDoc):
+    display_name = Text()
+    address = Text()
+
+
+class _EmailAttachmentDoc(Document):
+    filename = Text()
+    content_type = Text()
+    sha256 = Text()
+
+
+class _ForensicSampleDoc(InnerDoc):
+    raw = Text()
+    headers = Object()
+    headers_only = Boolean()
+    to = Nested(_EmailAddressDoc)
+    subject = Text()
+    filename_safe_subject = Text()
+    _from = Object(_EmailAddressDoc)
+    date = Date()
+    reply_to = Nested(_EmailAddressDoc)
+    cc = Nested(_EmailAddressDoc)
+    bcc = Nested(_EmailAddressDoc)
+    body = Text()
+    attachments = Nested(_EmailAttachmentDoc)
+
+    def add_to(self, display_name: str, address: str):
+        self.to.append(_EmailAddressDoc(display_name=display_name, address=address))
+
+    def add_reply_to(self, display_name: str, address: str):
+        self.reply_to.append(
+            _EmailAddressDoc(display_name=display_name, address=address)
+        )
+
+    def add_cc(self, display_name: str, address: str):
+        self.cc.append(_EmailAddressDoc(display_name=display_name, address=address))
+
+    def add_bcc(self, display_name: str, address: str):
+        self.bcc.append(_EmailAddressDoc(display_name=display_name, address=address))
+
+    def add_attachment(self, filename: str, content_type: str, sha256: str):
+        self.attachments.append(
+            _EmailAttachmentDoc(
+                filename=filename, content_type=content_type, sha256=sha256
+            )
+        )
+
+
+class _ForensicReportDoc(Document):
+    class Index:
+        name = "dmarc_forensic"
+
+    feedback_type = Text()
+    user_agent = Text()
+    version = Text()
+    original_mail_from = Text()
+    arrival_date = Date()
+    domain = Text()
+    original_envelope_id = Text()
+    authentication_results = Text()
+    delivery_results = Text()
+    source_ip_address = Ip()
+    source_country = Text()
+    source_reverse_dns = Text()
+    source_authentication_mechanisms = Text()
+    source_auth_failures = Text()
+    dkim_domain = Text()
+    original_rcpt_to = Text()
+    sample = Object(_ForensicSampleDoc)
+
+
+class _SMTPTLSFailureDetailsDoc(InnerDoc):
+    result_type = Text()
+    sending_mta_ip = Ip()
+    receiving_mx_helo = Text()
+    receiving_ip = Ip()
+    failed_session_count = Integer()
+    additional_information_uri = Text()
+    failure_reason_code = Text()
+
+
+class _SMTPTLSPolicyDoc(InnerDoc):
+    policy_domain = Text()
+    policy_type = Text()
+    policy_strings = Text()
+    mx_host_patterns = Text()
+    successful_session_count = Integer()
+    failed_session_count = Integer()
+    failure_details = Nested(_SMTPTLSFailureDetailsDoc)
+
+    def add_failure_details(
+        self,
+        result_type: Optional[str] = None,
+        ip_address: Optional[str] = None,
+        receiving_ip: Optional[str] = None,
+        receiving_mx_helo: Optional[str] = None,
+        failed_session_count: Optional[int] = None,
+        sending_mta_ip: Optional[str] = None,
+        receiving_mx_hostname: Optional[str] = None,
+        additional_information_uri: Optional[str] = None,
+        failure_reason_code: Union[str, int, None] = None,
+    ):
+        _details = _SMTPTLSFailureDetailsDoc(
+            result_type=result_type,
+            ip_address=ip_address,
+            sending_mta_ip=sending_mta_ip,
+            receiving_mx_hostname=receiving_mx_hostname,
+            receiving_mx_helo=receiving_mx_helo,
+            receiving_ip=receiving_ip,
+            failed_session_count=failed_session_count,
+            additional_information=additional_information_uri,
+            failure_reason_code=failure_reason_code,
+        )
+        self.failure_details.append(_details)
+
+
+class _SMTPTLSReportDoc(Document):
+    class Index:
+        name = "smtp_tls"
+
+    organization_name = Text()
+    date_range = Date()
+    date_begin = Date()
+    date_end = Date()
+    contact_info = Text()
+    report_id = Text()
+    policies = Nested(_SMTPTLSPolicyDoc)
+
+    def add_policy(
+        self,
+        policy_type: str,
+        policy_domain: str,
+        successful_session_count: int,
+        failed_session_count: int,
+        *,
+        policy_string: Optional[str] = None,
+        mx_host_patterns: Optional[list[str]] = None,
+        failure_details: Optional[str] = None,
+    ):
+        self.policies.append(
+            policy_type=policy_type,
+            policy_domain=policy_domain,
+            successful_session_count=successful_session_count,
+            failed_session_count=failed_session_count,
+            policy_string=policy_string,
+            mx_host_patterns=mx_host_patterns,
+            failure_details=failure_details,
+        )
+
+
+class AlreadySaved(ValueError):
+    """Raised when a report to be saved matches an existing report"""
+
+
+def set_hosts(
+    hosts: Union[str, list[str]],
+    *,
+    use_ssl: Optional[bool] = False,
+    ssl_cert_path: Optional[str] = None,
+    username: Optional[str] = None,
+    password: Optional[str] = None,
+    api_key: Optional[str] = None,
+    timeout: Optional[float] = 60.0,
+):
+    """
+    Sets the OpenSearch hosts to use
+
+    Args:
+        hosts (str|list[str]): A single hostname or URL, or list of hostnames or URLs
+        use_ssl (bool): Use an HTTPS connection to the server
+        ssl_cert_path (str): Path to the certificate chain
+        username (str): The username to use for authentication
+        password (str): The password to use for authentication
+        api_key (str): The Base64 encoded API key to use for authentication
+        timeout (float): Timeout in seconds
+    """
+    if not isinstance(hosts, list):
+        hosts = [hosts]
+    conn_params = {"hosts": hosts, "timeout": timeout}
+    if use_ssl:
+        conn_params["use_ssl"] = True
+        if ssl_cert_path:
+            conn_params["verify_certs"] = True
+            conn_params["ca_certs"] = ssl_cert_path
+        else:
+            conn_params["verify_certs"] = False
+    if username and password:
+        conn_params["http_auth"] = username + ":" + password
+    if api_key:
+        conn_params["api_key"] = api_key
+    connections.create_connection(**conn_params)
+
+
+def create_indexes(names: list[str], settings: Optional[dict[str, Any]] = None):
+    """
+    Create OpenSearch indexes
+
+    Args:
+        names (list): A list of index names
+        settings (dict): Index settings
+
+    """
+    for name in names:
+        index = Index(name)
+        try:
+            if not index.exists():
+                logger.debug("Creating OpenSearch index: {0}".format(name))
+                if settings is None:
+                    index.settings(number_of_shards=1, number_of_replicas=0)
+                else:
+                    index.settings(**settings)
+                index.create()
+        except Exception as e:
+            raise OpenSearchError("OpenSearch error: {0}".format(e.__str__()))
+
+
+def migrate_indexes(
+    aggregate_indexes: Optional[list[str]] = None,
+    forensic_indexes: Optional[list[str]] = None,
+):
+    """
+    Updates index mappings
+
+    Args:
+        aggregate_indexes (list): A list of aggregate index names
+        forensic_indexes (list): A list of forensic index names
+    """
+    version = 2
+    if aggregate_indexes is None:
+        aggregate_indexes = []
+    if forensic_indexes is None:
+        forensic_indexes = []
+    for aggregate_index_name in aggregate_indexes:
+        if not Index(aggregate_index_name).exists():
+            continue
+        aggregate_index = Index(aggregate_index_name)
+        doc = "doc"
+        fo_field = "published_policy.fo"
+        fo = "fo"
+        fo_mapping = aggregate_index.get_field_mapping(fields=[fo_field])
+        fo_mapping = fo_mapping[list(fo_mapping.keys())[0]]["mappings"]
+        if doc not in fo_mapping:
+            continue
+
+        fo_mapping = fo_mapping[doc][fo_field]["mapping"][fo]
+        fo_type = fo_mapping["type"]
+        if fo_type == "long":
+            new_index_name = "{0}-v{1}".format(aggregate_index_name, version)
+            body = {
+                "properties": {
+                    "published_policy.fo": {
+                        "type": "text",
+                        "fields": {"keyword": {"type": "keyword", "ignore_above": 256}},
+                    }
+                }
+            }
+            Index(new_index_name).create()
+            Index(new_index_name).put_mapping(doc_type=doc, body=body)
+            reindex(connections.get_connection(), aggregate_index_name, new_index_name)
+            Index(aggregate_index_name).delete()
+
+    for forensic_index in forensic_indexes:
+        pass
+
+
+def save_aggregate_report_to_opensearch(
+    aggregate_report: dict[str, Any],
+    index_suffix: Optional[str] = None,
+    index_prefix: Optional[str] = None,
+    monthly_indexes: Optional[bool] = False,
+    number_of_shards: Optional[int] = 1,
+    number_of_replicas: Optional[int] = 0,
+):
+    """
+    Saves a parsed DMARC aggregate report to OpenSearch
+
+    Args:
+        aggregate_report (dict): A parsed forensic report
+        index_suffix (str): The suffix of the name of the index to save to
+        index_prefix (str): The prefix of the name of the index to save to
+        monthly_indexes (bool): Use monthly indexes instead of daily indexes
+        number_of_shards (int): The number of shards to use in the index
+        number_of_replicas (int): The number of replicas to use in the index
+
+    Raises:
+            AlreadySaved
+    """
+    logger.info("Saving aggregate report to OpenSearch")
+    aggregate_report = aggregate_report.copy()
+    metadata = aggregate_report["report_metadata"]
+    org_name = metadata["org_name"]
+    report_id = metadata["report_id"]
+    domain = aggregate_report["policy_published"]["domain"]
+    begin_date = human_timestamp_to_datetime(metadata["begin_date"], to_utc=True)
+    end_date = human_timestamp_to_datetime(metadata["end_date"], to_utc=True)
+
+    if monthly_indexes:
+        index_date = begin_date.strftime("%Y-%m")
+    else:
+        index_date = begin_date.strftime("%Y-%m-%d")
+
+    org_name_query = Q(dict(match_phrase=dict(org_name=org_name)))
+    report_id_query = Q(dict(match_phrase=dict(report_id=report_id)))
+    domain_query = Q(dict(match_phrase={"published_policy.domain": domain}))
+    begin_date_query = Q(dict(match=dict(date_begin=begin_date)))
+    end_date_query = Q(dict(match=dict(date_end=end_date)))
+
+    if index_suffix is not None:
+        search_index = "dmarc_aggregate_{0}*".format(index_suffix)
+    else:
+        search_index = "dmarc_aggregate*"
+    if index_prefix is not None:
+        search_index = "{0}{1}".format(index_prefix, search_index)
+    search = Search(index=search_index)
+    query = org_name_query & report_id_query & domain_query
+    query = query & begin_date_query & end_date_query
+    search.query = query
+
+    try:
+        existing = search.execute()
+    except Exception as error_:
+        begin_date_human = begin_date.strftime("%Y-%m-%d %H:%M:%SZ")
+        end_date_human = end_date.strftime("%Y-%m-%d %H:%M:%SZ")
+
+        raise OpenSearchError(
+            "OpenSearch's search for existing report \
+            error: {}".format(error_.__str__())
+        )
+
+    if len(existing) > 0:
+        raise AlreadySaved(
+            "An aggregate report ID {0} from {1} about {2} "
+            "with a date range of {3} UTC to {4} UTC already "
+            "exists in "
+            "OpenSearch".format(
+                report_id, org_name, domain, begin_date_human, end_date_human
+            )
+        )
+    published_policy = _PublishedPolicy(
+        domain=aggregate_report["policy_published"]["domain"],
+        adkim=aggregate_report["policy_published"]["adkim"],
+        aspf=aggregate_report["policy_published"]["aspf"],
+        p=aggregate_report["policy_published"]["p"],
+        sp=aggregate_report["policy_published"]["sp"],
+        pct=aggregate_report["policy_published"]["pct"],
+        fo=aggregate_report["policy_published"]["fo"],
+    )
+
+    for record in aggregate_report["records"]:
+        begin_date = human_timestamp_to_datetime(record["interval_begin"], to_utc=True)
+        end_date = human_timestamp_to_datetime(record["interval_end"], to_utc=True)
+        normalized_timespan = record["normalized_timespan"]
+
+        if monthly_indexes:
+            index_date = begin_date.strftime("%Y-%m")
+        else:
+            index_date = begin_date.strftime("%Y-%m-%d")
+        aggregate_report["begin_date"] = begin_date
+        aggregate_report["end_date"] = end_date
+        date_range = [aggregate_report["begin_date"], aggregate_report["end_date"]]
+        agg_doc = _AggregateReportDoc(
+            xml_schema=aggregate_report["xml_schema"],
+            org_name=metadata["org_name"],
+            org_email=metadata["org_email"],
+            org_extra_contact_info=metadata["org_extra_contact_info"],
+            report_id=metadata["report_id"],
+            date_range=date_range,
+            date_begin=begin_date,
+            date_end=end_date,
+            normalized_timespan=normalized_timespan,
+            errors=metadata["errors"],
+            published_policy=published_policy,
+            source_ip_address=record["source"]["ip_address"],
+            source_country=record["source"]["country"],
+            source_reverse_dns=record["source"]["reverse_dns"],
+            source_base_domain=record["source"]["base_domain"],
+            source_type=record["source"]["type"],
+            source_name=record["source"]["name"],
+            message_count=record["count"],
+            disposition=record["policy_evaluated"]["disposition"],
+            dkim_aligned=record["policy_evaluated"]["dkim"] is not None
+            and record["policy_evaluated"]["dkim"].lower() == "pass",
+            spf_aligned=record["policy_evaluated"]["spf"] is not None
+            and record["policy_evaluated"]["spf"].lower() == "pass",
+            header_from=record["identifiers"]["header_from"],
+            envelope_from=record["identifiers"]["envelope_from"],
+            envelope_to=record["identifiers"]["envelope_to"],
+        )
+
+        for override in record["policy_evaluated"]["policy_override_reasons"]:
+            agg_doc.add_policy_override(
+                type_=override["type"], comment=override["comment"]
+            )
+
+        for dkim_result in record["auth_results"]["dkim"]:
+            agg_doc.add_dkim_result(
+                domain=dkim_result["domain"],
+                selector=dkim_result["selector"],
+                result=dkim_result["result"],
+            )
+
+        for spf_result in record["auth_results"]["spf"]:
+            agg_doc.add_spf_result(
+                domain=spf_result["domain"],
+                scope=spf_result["scope"],
+                result=spf_result["result"],
+            )
+
+        index = "dmarc_aggregate"
+        if index_suffix:
+            index = "{0}_{1}".format(index, index_suffix)
+        if index_prefix:
+            index = "{0}{1}".format(index_prefix, index)
+
+        index = "{0}-{1}".format(index, index_date)
+        index_settings = dict(
+            number_of_shards=number_of_shards, number_of_replicas=number_of_replicas
+        )
+        create_indexes([index], index_settings)
+        agg_doc.meta.index = index
+
+        try:
+            agg_doc.save()
+        except Exception as e:
+            raise OpenSearchError("OpenSearch error: {0}".format(e.__str__()))
+
+
+def save_forensic_report_to_opensearch(
+    forensic_report: dict[str, Any],
+    index_suffix: Optional[str] = None,
+    index_prefix: Optional[str] = None,
+    monthly_indexes: Optional[bool] = False,
+    number_of_shards: int = 1,
+    number_of_replicas: int = 0,
+):
+    """
+    Saves a parsed DMARC forensic report to OpenSearch
+
+    Args:
+        forensic_report (dict): A parsed forensic report
+        index_suffix (str): The suffix of the name of the index to save to
+        index_prefix (str): The prefix of the name of the index to save to
+        monthly_indexes (bool): Use monthly indexes instead of daily
+                                indexes
+        number_of_shards (int): The number of shards to use in the index
+        number_of_replicas (int): The number of replicas to use in the
+                                  index
+
+    Raises:
+        AlreadySaved
+
+    """
+    logger.info("Saving forensic report to OpenSearch")
+    forensic_report = forensic_report.copy()
+    sample_date = None
+    if forensic_report["parsed_sample"]["date"] is not None:
+        sample_date = forensic_report["parsed_sample"]["date"]
+        sample_date = human_timestamp_to_datetime(sample_date)
+    original_headers = forensic_report["parsed_sample"]["headers"]
+    headers = dict()
+    for original_header in original_headers:
+        headers[original_header.lower()] = original_headers[original_header]
+
+    arrival_date = human_timestamp_to_datetime(forensic_report["arrival_date_utc"])
+    arrival_date_epoch_milliseconds = int(arrival_date.timestamp() * 1000)
+
+    if index_suffix is not None:
+        search_index = "dmarc_forensic_{0}*".format(index_suffix)
+    else:
+        search_index = "dmarc_forensic*"
+    if index_prefix is not None:
+        search_index = "{0}{1}".format(index_prefix, search_index)
+    search = Search(index=search_index)
+    q = Q(dict(match=dict(arrival_date=arrival_date_epoch_milliseconds)))
+
+    from_ = None
+    to_ = None
+    subject = None
+    if "from" in headers:
+        # We convert the FROM header from a string list to a flat string.
+        headers["from"] = headers["from"][0]
+        if headers["from"][0] == "":
+            headers["from"] = headers["from"][1]
+        else:
+            headers["from"] = " <".join(headers["from"]) + ">"
+
+        from_ = dict()
+        from_["sample.headers.from"] = headers["from"]
+        from_query = Q(dict(match_phrase=from_))
+        q = q & from_query
+    if "to" in headers:
+        # We convert the TO header from a string list to a flat string.
+        headers["to"] = headers["to"][0]
+        if headers["to"][0] == "":
+            headers["to"] = headers["to"][1]
+        else:
+            headers["to"] = " <".join(headers["to"]) + ">"
+
+        to_ = dict()
+        to_["sample.headers.to"] = headers["to"]
+        to_query = Q(dict(match_phrase=to_))
+        q = q & to_query
+    if "subject" in headers:
+        subject = headers["subject"]
+        subject_query = {"match_phrase": {"sample.headers.subject": subject}}
+        q = q & Q(subject_query)
+
+    search.query = q
+    existing = search.execute()
+
+    if len(existing) > 0:
+        raise AlreadySaved(
+            "A forensic sample to {0} from {1} "
+            "with a subject of {2} and arrival date of {3} "
+            "already exists in "
+            "OpenSearch".format(
+                to_, from_, subject, forensic_report["arrival_date_utc"]
+            )
+        )
+
+    parsed_sample = forensic_report["parsed_sample"]
+    sample = _ForensicSampleDoc(
+        raw=forensic_report["sample"],
+        headers=headers,
+        headers_only=forensic_report["sample_headers_only"],
+        date=sample_date,
+        subject=forensic_report["parsed_sample"]["subject"],
+        filename_safe_subject=parsed_sample["filename_safe_subject"],
+        body=forensic_report["parsed_sample"]["body"],
+    )
+
+    for address in forensic_report["parsed_sample"]["to"]:
+        sample.add_to(display_name=address["display_name"], address=address["address"])
+    for address in forensic_report["parsed_sample"]["reply_to"]:
+        sample.add_reply_to(
+            display_name=address["display_name"], address=address["address"]
+        )
+    for address in forensic_report["parsed_sample"]["cc"]:
+        sample.add_cc(display_name=address["display_name"], address=address["address"])
+    for address in forensic_report["parsed_sample"]["bcc"]:
+        sample.add_bcc(display_name=address["display_name"], address=address["address"])
+    for attachment in forensic_report["parsed_sample"]["attachments"]:
+        sample.add_attachment(
+            filename=attachment["filename"],
+            content_type=attachment["mail_content_type"],
+            sha256=attachment["sha256"],
+        )
+    try:
+        forensic_doc = _ForensicReportDoc(
+            feedback_type=forensic_report["feedback_type"],
+            user_agent=forensic_report["user_agent"],
+            version=forensic_report["version"],
+            original_mail_from=forensic_report["original_mail_from"],
+            arrival_date=arrival_date_epoch_milliseconds,
+            domain=forensic_report["reported_domain"],
+            original_envelope_id=forensic_report["original_envelope_id"],
+            authentication_results=forensic_report["authentication_results"],
+            delivery_results=forensic_report["delivery_result"],
+            source_ip_address=forensic_report["source"]["ip_address"],
+            source_country=forensic_report["source"]["country"],
+            source_reverse_dns=forensic_report["source"]["reverse_dns"],
+            source_base_domain=forensic_report["source"]["base_domain"],
+            authentication_mechanisms=forensic_report["authentication_mechanisms"],
+            auth_failure=forensic_report["auth_failure"],
+            dkim_domain=forensic_report["dkim_domain"],
+            original_rcpt_to=forensic_report["original_rcpt_to"],
+            sample=sample,
+        )
+
+        index = "dmarc_forensic"
+        if index_suffix:
+            index = "{0}_{1}".format(index, index_suffix)
+        if index_prefix:
+            index = "{0}{1}".format(index_prefix, index)
+        if monthly_indexes:
+            index_date = arrival_date.strftime("%Y-%m")
+        else:
+            index_date = arrival_date.strftime("%Y-%m-%d")
+        index = "{0}-{1}".format(index, index_date)
+        index_settings = dict(
+            number_of_shards=number_of_shards, number_of_replicas=number_of_replicas
+        )
+        create_indexes([index], index_settings)
+        forensic_doc.meta.index = index
+        try:
+            forensic_doc.save()
+        except Exception as e:
+            raise OpenSearchError("OpenSearch error: {0}".format(e.__str__()))
+    except KeyError as e:
+        raise InvalidForensicReport(
+            "Forensic report missing required field: {0}".format(e.__str__())
+        )
+
+
+def save_smtp_tls_report_to_opensearch(
+    report: dict[str, Any],
+    index_suffix: Optional[str] = None,
+    index_prefix: Optional[str] = None,
+    monthly_indexes: Optional[bool] = False,
+    number_of_shards: Optional[int] = 1,
+    number_of_replicas: Optional[int] = 0,
+):
+    """
+    Saves a parsed SMTP TLS report to OpenSearch
+
+    Args:
+        report (dict): A parsed SMTP TLS report
+        index_suffix (str): The suffix of the name of the index to save to
+        index_prefix (str): The prefix of the name of the index to save to
+        monthly_indexes (bool): Use monthly indexes instead of daily indexes
+        number_of_shards (int): The number of shards to use in the index
+        number_of_replicas (int): The number of replicas to use in the index
+
+    Raises:
+            AlreadySaved
+    """
+    logger.info("Saving SMTP TLS report to OpenSearch")
+    org_name = report["organization_name"]
+    report_id = report["report_id"]
+    begin_date = human_timestamp_to_datetime(report["begin_date"], to_utc=True)
+    end_date = human_timestamp_to_datetime(report["end_date"], to_utc=True)
+    begin_date_human = begin_date.strftime("%Y-%m-%d %H:%M:%SZ")
+    end_date_human = end_date.strftime("%Y-%m-%d %H:%M:%SZ")
+    if monthly_indexes:
+        index_date = begin_date.strftime("%Y-%m")
+    else:
+        index_date = begin_date.strftime("%Y-%m-%d")
+    report["begin_date"] = begin_date
+    report["end_date"] = end_date
+
+    org_name_query = Q(dict(match_phrase=dict(org_name=org_name)))
+    report_id_query = Q(dict(match_phrase=dict(report_id=report_id)))
+    begin_date_query = Q(dict(match=dict(date_begin=begin_date)))
+    end_date_query = Q(dict(match=dict(date_end=end_date)))
+
+    if index_suffix is not None:
+        search_index = "smtp_tls_{0}*".format(index_suffix)
+    else:
+        search_index = "smtp_tls*"
+    if index_prefix is not None:
+        search_index = "{0}{1}".format(index_prefix, search_index)
+    search = Search(index=search_index)
+    query = org_name_query & report_id_query
+    query = query & begin_date_query & end_date_query
+    search.query = query
+
+    try:
+        existing = search.execute()
+    except Exception as error_:
+        raise OpenSearchError(
+            "OpenSearch's search for existing report \
+            error: {}".format(error_.__str__())
+        )
+
+    if len(existing) > 0:
+        raise AlreadySaved(
+            f"An SMTP TLS report ID {report_id} from "
+            f" {org_name} with a date range of "
+            f"{begin_date_human} UTC to "
+            f"{end_date_human} UTC already "
+            "exists in OpenSearch"
+        )
+
+    index = "smtp_tls"
+    if index_suffix:
+        index = "{0}_{1}".format(index, index_suffix)
+    if index_prefix:
+        index = "{0}{1}".format(index_prefix, index)
+    index = "{0}-{1}".format(index, index_date)
+    index_settings = dict(
+        number_of_shards=number_of_shards, number_of_replicas=number_of_replicas
+    )
+
+    smtp_tls_doc = _SMTPTLSReportDoc(
+        org_name=report["organization_name"],
+        date_range=[report["begin_date"], report["end_date"]],
+        date_begin=report["begin_date"],
+        date_end=report["end_date"],
+        contact_info=report["contact_info"],
+        report_id=report["report_id"],
+    )
+
+    for policy in report["policies"]:
+        policy_strings = None
+        mx_host_patterns = None
+        if "policy_strings" in policy:
+            policy_strings = policy["policy_strings"]
+        if "mx_host_patterns" in policy:
+            mx_host_patterns = policy["mx_host_patterns"]
+        policy_doc = _SMTPTLSPolicyDoc(
+            policy_domain=policy["policy_domain"],
+            policy_type=policy["policy_type"],
+            successful_session_count=policy["successful_session_count"],
+            failed_session_count=policy["failed_session_count"],
+            policy_string=policy_strings,
+            mx_host_patterns=mx_host_patterns,
+        )
+        if "failure_details" in policy:
+            for failure_detail in policy["failure_details"]:
+                receiving_mx_hostname = None
+                additional_information_uri = None
+                failure_reason_code = None
+                ip_address = None
+                receiving_ip = None
+                receiving_mx_helo = None
+                sending_mta_ip = None
+
+                if "receiving_mx_hostname" in failure_detail:
+                    receiving_mx_hostname = failure_detail["receiving_mx_hostname"]
+                if "additional_information_uri" in failure_detail:
+                    additional_information_uri = failure_detail[
+                        "additional_information_uri"
+                    ]
+                if "failure_reason_code" in failure_detail:
+                    failure_reason_code = failure_detail["failure_reason_code"]
+                if "ip_address" in failure_detail:
+                    ip_address = failure_detail["ip_address"]
+                if "receiving_ip" in failure_detail:
+                    receiving_ip = failure_detail["receiving_ip"]
+                if "receiving_mx_helo" in failure_detail:
+                    receiving_mx_helo = failure_detail["receiving_mx_helo"]
+                if "sending_mta_ip" in failure_detail:
+                    sending_mta_ip = failure_detail["sending_mta_ip"]
+                policy_doc.add_failure_details(
+                    result_type=failure_detail["result_type"],
+                    ip_address=ip_address,
+                    receiving_ip=receiving_ip,
+                    receiving_mx_helo=receiving_mx_helo,
+                    failed_session_count=failure_detail["failed_session_count"],
+                    sending_mta_ip=sending_mta_ip,
+                    receiving_mx_hostname=receiving_mx_hostname,
+                    additional_information_uri=additional_information_uri,
+                    failure_reason_code=failure_reason_code,
+                )
+        smtp_tls_doc.policies.append(policy_doc)
+
+    create_indexes([index], index_settings)
+    smtp_tls_doc.meta.index = index
+
+    try:
+        smtp_tls_doc.save()
+    except Exception as e:
+        raise OpenSearchError("OpenSearch error: {0}".format(e.__str__()))
--- a/parsedmarc/resources/init.py
+++ b/parsedmarc/resources/init.py
--- a/parsedmarc/resources/dbip/README.md
+++ b/parsedmarc/resources/dbip/README.md
@@ -0,0 +1,7 @@
+# About
+
+`dbip-country-lite.mmdb` is provided by [dbip][dbip] under a
+[Creative Commons Attribution 4.0 International License][cc].
+
+[dbip]: https://db-ip.com/db/download/ip-to-country-lite
+[cc]: http://creativecommons.org/licenses/by/4.0/
--- a/parsedmarc/resources/dbip/init.py
+++ b/parsedmarc/resources/dbip/init.py
--- a/parsedmarc/resources/dbip/dbip-country-lite.mmdb
+++ b/parsedmarc/resources/dbip/dbip-country-lite.mmdb
--- a/parsedmarc/resources/maps/README.md
+++ b/parsedmarc/resources/maps/README.md
@@ -0,0 +1,92 @@
+# About
+
+A mapping is meant to make it easier to identify who or what a sending source is. Please consider contributing
+additional mappings in a GitHub Pull Request.
+
+Do not open these CSV files in Excel. It will replace Unicode characters with question marks. Use LibreOffice Calc instead.
+
+## base_reverse_dns_map.csv
+
+A CSV file with three fields: `base_reverse_dns`, `name`, and `type`.
+Most of the time the base reverse DNS of sending service is closely related to the name of the
+service, but not always. Sometimes services will use multiple reverse DNS domains for the same service. For example,
+Intuit Mailchimp uses the base domains `mcdlv.net`, `mcsv.net`,
+and `rsgsv.net`. Having all of these mapped to the same service name and type makes it easier to answer questions like:
+"How many emails is Intuit Mailchimp sending as my domains?"
+
+The `service_type` is based on the following rule precedence:
+
+1. All email security services are identified as `Email Security`, no matter how or where they are hosted.
+2. All marketing services are identified as `Marketing`, no matter how or where they are hosted.
+3. All telecommunications providers that offer internet access are identified as `ISP`, even if they also offer other services, such as web hosting or email hosting.
+4. All web hosting providers are identified as `Web Hosting`, even if the service also offers email hosting.
+5. All email account providers are identified as `Email Provider`, no matter how or where they are hosted
+6. All legitimate platforms offering their Software as a Service (SaaS) are identified as `SaaS`, regardless of industry. This helps simplify metrics.
+7. All other senders that use their own domain as a Reverse DNS base domain should be identified based on their industry
+
+- Agriculture
+- Automotive
+- Beauty
+- Conglomerate
+- Construction
+- Consulting
+- Defense
+- Education
+- Email Provider
+- Email Security
+- Entertainment
+- Event Planning
+- Finance
+- Food
+- Government
+- Government Media
+- Healthcare
+- IaaS
+- Industrial
+- ISP
+- Legal
+- Logistics
+- Manufacturing
+- Marketing
+- MSP
+- MSSP
+- News
+- Nonprofit
+- PaaS
+- Photography
+- Physical Security
+- Print
+- Publishing
+- Real Estate
+- Retail
+- SaaS
+- Science
+- Search Engine
+- Social Media
+- Sports
+- Staffing
+- Technology
+- Travel
+- Web Host
+
+The file currently contains over 1,400 mappings from a wide variety of email sending sources.
+
+## known_unknown_base_reverse_dns.txt
+
+A list of reverse DNS base domains that could not be identified as belonging to a particular organization, service, or industry.
+
+## base_reverse_dns.csv
+
+A CSV with the fields `source_name` and optionally `message_count`. This CSV can be generated by exporting the base DNS data from the Kibana or Splunk dashboards provided by parsedmarc. This file is not tracked by Git.
+
+## unknown_base_reverse_dns.csv
+
+A CSV file with the fields `source_name` and `message_count`. This file is not tracked by Git.
+
+## find_bad_utf8.py
+
+Locates invalid UTF-8 bytes in files and optionally tries to current them. Generated by GPT5. Helped me find where I had introduced invalid bytes in `base_reverse_dns_map.csv`.
+
+## find_unknown_base_reverse_dns.py
+
+This is a python script that reads the domains in `base_reverse_dns.csv` and writes the domains that are not in `base_reverse_dns_map.csv` or `known_unknown_base_reverse_dns.txt` to `unknown_base_reverse_dns.csv`. This is useful for identifying potential additional domains to contribute to `base_reverse_dns_map.csv` and `known_unknown_base_reverse_dns.txt`.
--- a/parsedmarc/resources/maps/init.py
+++ b/parsedmarc/resources/maps/init.py
--- a/parsedmarc/resources/maps/base_reverse_dns_map.csv
+++ b/parsedmarc/resources/maps/base_reverse_dns_map.csv
--- a/parsedmarc/resources/maps/base_reverse_dns_types.txt
+++ b/parsedmarc/resources/maps/base_reverse_dns_types.txt
@@ -0,0 +1,44 @@
+Agriculture
+Automotive
+Beauty
+Conglomerate
+Construction
+Consulting
+Defense
+Education
+Email Provider
+Email Security
+Entertainment
+Event Planning
+Finance
+Food
+Government
+Government Media
+Healthcare
+ISP
+IaaS
+Industrial
+Legal
+Logistics
+MSP
+MSSP
+Manufacturing
+Marketing
+News
+Nonprofit
+PaaS
+Photography
+Physical Security
+Print
+Publishing
+Real Estate
+Retail
+SaaS
+Science
+Search Engine
+Social Media
+Sports
+Staffing
+Technology
+Travel
+Web Host
--- a/parsedmarc/resources/maps/find_bad_utf8.py
+++ b/parsedmarc/resources/maps/find_bad_utf8.py
@@ -0,0 +1,488 @@
+#!/usr/bin/env python3
+
+
+import argparse
+import codecs
+import os
+import sys
+import shutil
+from typing import List, Tuple
+
+"""
+Locates and optionally corrects bad UTF-8 bytes in a file.
+Generated by GPT-5 Use at your own risk.
+"""
+
+# -------------------------
+# UTF-8 scanning
+# -------------------------
+
+
+def scan_line_for_utf8_errors(
+    line_bytes: bytes, line_no: int, base_offset: int, context: int
+):
+    """
+    Scan one line of raw bytes for UTF-8 decoding errors.
+    Returns a list of dicts describing each error.
+    """
+    pos = 0
+    results = []
+    while pos < len(line_bytes):
+        dec = codecs.getincrementaldecoder("utf-8")("strict")
+        try:
+            dec.decode(line_bytes[pos:], final=True)
+            break
+        except UnicodeDecodeError as e:
+            rel_index = e.start
+            abs_index_in_line = pos + rel_index
+            abs_offset = base_offset + abs_index_in_line
+
+            start_ctx = max(0, abs_index_in_line - context)
+            end_ctx = min(len(line_bytes), abs_index_in_line + 1 + context)
+            ctx_bytes = line_bytes[start_ctx:end_ctx]
+            bad_byte = line_bytes[abs_index_in_line : abs_index_in_line + 1]
+            col = abs_index_in_line + 1  # 1-based byte column
+
+            results.append(
+                {
+                    "line": line_no,
+                    "column": col,
+                    "abs_offset": abs_offset,
+                    "bad_byte_hex": bad_byte.hex(),
+                    "context_hex": ctx_bytes.hex(),
+                    "context_preview": ctx_bytes.decode("utf-8", errors="replace"),
+                }
+            )
+            # Move past the offending byte and continue
+            pos = abs_index_in_line + 1
+    return results
+
+
+def scan_file_for_utf8_errors(path: str, context: int, limit: int):
+    errors_found = 0
+    limit_val = limit if limit != 0 else float("inf")
+
+    with open(path, "rb") as f:
+        total_offset = 0
+        line_no = 0
+        while True:
+            line = f.readline()
+            if not line:
+                break
+            line_no += 1
+            results = scan_line_for_utf8_errors(line, line_no, total_offset, context)
+            for r in results:
+                errors_found += 1
+                print(
+                    f"[ERROR {errors_found}] Line {r['line']}, Column {r['column']}, "
+                    f"Absolute byte offset {r['abs_offset']}"
+                )
+                print(f"  Bad byte: 0x{r['bad_byte_hex']}")
+                print(f"  Context (hex): {r['context_hex']}")
+                print(f"  Context (preview): {r['context_preview']}")
+                print()
+                if errors_found >= limit_val:
+                    print(f"Reached limit of {limit} errors. Stopping.")
+                    return errors_found
+            total_offset += len(line)
+
+    if errors_found == 0:
+        print("No invalid UTF-8 bytes found. 🎉")
+    else:
+        print(f"Found {errors_found} invalid UTF-8 byte(s).")
+    return errors_found
+
+
+# -------------------------
+# Whole-file conversion
+# -------------------------
+
+
+def detect_encoding_text(path: str) -> Tuple[str, str]:
+    """
+    Use charset-normalizer to detect file encoding.
+    Return (encoding_name, decoded_text). Falls back to cp1252 if needed.
+    """
+    try:
+        from charset_normalizer import from_path
+    except ImportError:
+        print(
+            "Please install charset-normalizer: pip install charset-normalizer",
+            file=sys.stderr,
+        )
+        sys.exit(4)
+
+    matches = from_path(path)
+    match = matches.best()
+    if match is None or match.encoding is None:
+        # Fallback heuristic for Western single-byte text
+        with open(path, "rb") as fb:
+            data = fb.read()
+        try:
+            return "cp1252", data.decode("cp1252", errors="strict")
+        except UnicodeDecodeError:
+            print("Unable to detect encoding reliably.", file=sys.stderr)
+            sys.exit(5)
+
+    return match.encoding, str(match)
+
+
+def convert_to_utf8(src_path: str, out_path: str, src_encoding: str = None) -> str:
+    """
+    Convert an entire file to UTF-8 (re-decoding everything).
+    If src_encoding is provided, use it; else auto-detect.
+    Returns the encoding actually used.
+    """
+    if src_encoding:
+        with open(src_path, "rb") as fb:
+            data = fb.read()
+        try:
+            text = data.decode(src_encoding, errors="strict")
+        except LookupError:
+            print(f"Unknown encoding: {src_encoding}", file=sys.stderr)
+            sys.exit(6)
+        except UnicodeDecodeError as e:
+            print(f"Decoding failed with {src_encoding}: {e}", file=sys.stderr)
+            sys.exit(7)
+        used = src_encoding
+    else:
+        used, text = detect_encoding_text(src_path)
+
+    with open(out_path, "w", encoding="utf-8", newline="") as fw:
+        fw.write(text)
+    return used
+
+
+def verify_utf8_file(path: str) -> Tuple[bool, str]:
+    try:
+        with open(path, "rb") as fb:
+            fb.read().decode("utf-8", errors="strict")
+        return True, ""
+    except UnicodeDecodeError as e:
+        return False, str(e)
+
+
+# -------------------------
+# Targeted single-byte fixer
+# -------------------------
+
+
+def iter_lines_with_offsets(b: bytes):
+    """
+    Yield (line_bytes, line_start_abs_offset). Preserves LF/CRLF/CR in bytes.
+    """
+    start = 0
+    for i, byte in enumerate(b):
+        if byte == 0x0A:  # LF
+            yield b[start : i + 1], start
+            start = i + 1
+    if start < len(b):
+        yield b[start:], start
+
+
+def detect_probable_fallbacks() -> List[str]:
+    # Good defaults for Western/Portuguese text
+    return ["cp1252", "iso-8859-1", "iso-8859-15"]
+
+
+def repair_mixed_utf8_line(line: bytes, base_offset: int, fallback_chain: List[str]):
+    """
+    Strictly validate UTF-8 and fix *only* the exact offending byte when an error occurs.
+    This avoids touching adjacent valid UTF-8 (prevents mojibake like 'Ã©').
+    """
+    out_fragments: List[str] = []
+    fixes = []
+    pos = 0
+    n = len(line)
+
+    while pos < n:
+        dec = codecs.getincrementaldecoder("utf-8")("strict")
+        try:
+            s = dec.decode(line[pos:], final=True)
+            out_fragments.append(s)
+            break
+        except UnicodeDecodeError as e:
+            # Append the valid prefix before the error
+            if e.start > 0:
+                out_fragments.append(
+                    line[pos : pos + e.start].decode("utf-8", errors="strict")
+                )
+
+            bad_index = pos + e.start  # absolute index in 'line'
+            bad_slice = line[bad_index : bad_index + 1]  # FIX EXACTLY ONE BYTE
+
+            # Decode that single byte using the first working fallback
+            decoded = None
+            used_enc = None
+            for enc in fallback_chain:
+                try:
+                    decoded = bad_slice.decode(enc, errors="strict")
+                    used_enc = enc
+                    break
+                except Exception:
+                    continue
+            if decoded is None:
+                # latin-1 always succeeds (byte->same code point)
+                decoded = bad_slice.decode("latin-1")
+                used_enc = "latin-1 (fallback)"
+
+            out_fragments.append(decoded)
+
+            # Log the fix
+            col_1based = bad_index + 1  # byte-based column
+            fixes.append(
+                {
+                    "line_base_offset": base_offset,
+                    "line": None,  # caller fills line number
+                    "column": col_1based,
+                    "abs_offset": base_offset + bad_index,
+                    "bad_bytes_hex": bad_slice.hex(),
+                    "used_encoding": used_enc,
+                    "replacement_preview": decoded,
+                }
+            )
+
+            # Advance exactly one byte past the offending byte and continue
+            pos = bad_index + 1
+
+    return "".join(out_fragments), fixes
+
+
+def targeted_fix_to_utf8(
+    src_path: str,
+    out_path: str,
+    fallback_chain: List[str],
+    dry_run: bool,
+    max_fixes: int,
+):
+    with open(src_path, "rb") as fb:
+        data = fb.read()
+
+    total_fixes = 0
+    repaired_lines: List[str] = []
+    line_no = 0
+    max_val = max_fixes if max_fixes != 0 else float("inf")
+
+    for line_bytes, base_offset in iter_lines_with_offsets(data):
+        line_no += 1
+        # Fast path: keep lines that are already valid UTF-8
+        try:
+            repaired_lines.append(line_bytes.decode("utf-8", errors="strict"))
+            continue
+        except UnicodeDecodeError:
+            pass
+
+        fixed_text, fixes = repair_mixed_utf8_line(
+            line_bytes, base_offset, fallback_chain=fallback_chain
+        )
+        for f in fixes:
+            f["line"] = line_no
+
+        repaired_lines.append(fixed_text)
+
+        # Log fixes
+        for f in fixes:
+            total_fixes += 1
+            print(
+                f"[FIX {total_fixes}] Line {f['line']}, Column {f['column']}, Abs offset {f['abs_offset']}"
+            )
+            print(f"  Bad bytes: 0x{f['bad_bytes_hex']}")
+            print(f"  Used encoding: {f['used_encoding']}")
+            preview = f["replacement_preview"].replace("\r", "\\r").replace("\n", "\\n")
+            if len(preview) > 40:
+                preview = preview[:40] + "…"
+            print(f"  Replacement preview: {preview}")
+            print()
+            if total_fixes >= max_val:
+                print(f"Reached max fixes limit ({max_fixes}). Stopping scan.")
+                break
+        if total_fixes >= max_val:
+            break
+
+    if dry_run:
+        print(f"Dry run complete. Detected {total_fixes} fix(es). No file written.")
+        return total_fixes
+
+    # Join and verify result can be encoded to UTF-8
+    repaired_text = "".join(repaired_lines)
+    try:
+        repaired_text.encode("utf-8", errors="strict")
+    except UnicodeEncodeError as e:
+        print(f"Internal error: repaired text not valid UTF-8: {e}", file=sys.stderr)
+        sys.exit(3)
+
+    with open(out_path, "w", encoding="utf-8", newline="") as fw:
+        fw.write(repaired_text)
+
+    print(f"Fixed file written to: {out_path}")
+    print(f"Total fixes applied: {total_fixes}")
+    return total_fixes
+
+
+# -------------------------
+# CLI
+# -------------------------
+
+
+def main():
+    ap = argparse.ArgumentParser(
+        description=(
+            "Scan for invalid UTF-8; optionally convert whole file or fix only invalid bytes.\n\n"
+            "By default, --convert and --fix **edit the input file in place** and create a backup "
+            "named '<input>.bak' before writing. If you pass --output, the original file is left "
+            "unchanged and no backup is created. Use --dry-run to preview fixes without writing."
+        ),
+        formatter_class=argparse.RawTextHelpFormatter,
+    )
+    ap.add_argument("path", help="Path to the CSV/text file")
+    ap.add_argument(
+        "--context",
+        type=int,
+        default=20,
+        help="Bytes of context to show around errors (default: 20)",
+    )
+    ap.add_argument(
+        "--limit",
+        type=int,
+        default=100,
+        help="Max errors to report during scan (0 = unlimited)",
+    )
+    ap.add_argument(
+        "--skip-scan", action="store_true", help="Skip initial scan for speed"
+    )
+
+    # Whole-file convert
+    ap.add_argument(
+        "--convert",
+        action="store_true",
+        help="Convert entire file to UTF-8 using auto/forced encoding "
+        "(in-place by default; creates '<input>.bak').",
+    )
+    ap.add_argument(
+        "--encoding",
+        help="Force source encoding for --convert or first fallback for --fix",
+    )
+    ap.add_argument(
+        "--output",
+        help="Write to this path instead of in-place (no .bak is created in that case)",
+    )
+
+    # Targeted fix
+    ap.add_argument(
+        "--fix",
+        action="store_true",
+        help="Fix only invalid byte(s) via fallback encodings "
+        "(in-place by default; creates '<input>.bak').",
+    )
+    ap.add_argument(
+        "--fallbacks",
+        help="Comma-separated fallback encodings (default: cp1252,iso-8859-1,iso-8859-15)",
+    )
+    ap.add_argument(
+        "--dry-run",
+        action="store_true",
+        help="(fix) Print fixes but do not write or create a .bak",
+    )
+    ap.add_argument(
+        "--max-fixes",
+        type=int,
+        default=0,
+        help="(fix) Stop after N fixes (0 = unlimited)",
+    )
+
+    args = ap.parse_args()
+    path = args.path
+
+    if not os.path.isfile(path):
+        print(f"File not found: {path}", file=sys.stderr)
+        sys.exit(2)
+
+    # Optional scan first
+    if not args.skip_scan:
+        scan_file_for_utf8_errors(path, context=args.context, limit=args.limit)
+
+    # Mode selection guards
+    if args.convert and args.fix:
+        print("Choose either --convert or --fix (not both).", file=sys.stderr)
+        sys.exit(9)
+    if not args.convert and not args.fix and args.skip_scan:
+        print("No action selected (use --convert or --fix).")
+        return
+    if not args.convert and not args.fix:
+        # User only wanted a scan
+        return
+
+    # Determine output path and backup behavior
+    # In-place by default: create '<input>.bak' before overwriting.
+    if args.output:
+        out_path = args.output
+        in_place = False
+    else:
+        out_path = path
+        in_place = True
+
+    # CONVERT mode
+    if args.convert:
+        print("\n[CONVERT MODE] Converting file to UTF-8...")
+        if in_place:
+            # Create backup before overwriting original
+            backup_path = path + ".bak"
+            shutil.copy2(path, backup_path)
+            print(f"Backup created: {backup_path}")
+        used = convert_to_utf8(path, out_path, src_encoding=args.encoding)
+        print(f"Source encoding used: {used}")
+        print(f"Saved UTF-8 file as: {out_path}")
+        ok, err = verify_utf8_file(out_path)
+        if ok:
+            print("Verification: output is valid UTF-8 ✅")
+        else:
+            print(f"Verification failed: {err}")
+            sys.exit(8)
+        return
+
+    # FIX mode (targeted, single-byte)
+    if args.fix:
+        print("\n[FIX MODE] Fixing only invalid bytes to UTF-8...")
+        if args.dry_run:
+            # Dry-run: never write or create backup
+            out_path_effective = os.devnull
+            in_place_effective = False
+        else:
+            out_path_effective = out_path
+            in_place_effective = in_place
+
+        # Build fallback chain (if --encoding provided, try it first)
+        if args.fallbacks:
+            fallback_chain = [e.strip() for e in args.fallbacks.split(",") if e.strip()]
+        else:
+            fallback_chain = detect_probable_fallbacks()
+        if args.encoding and args.encoding not in fallback_chain:
+            fallback_chain = [args.encoding] + fallback_chain
+
+        if in_place_effective:
+            # Create backup before overwriting original (only when actually writing)
+            backup_path = path + ".bak"
+            shutil.copy2(path, backup_path)
+            print(f"Backup created: {backup_path}")
+
+        fix_count = targeted_fix_to_utf8(
+            path,
+            out_path_effective,
+            fallback_chain=fallback_chain,
+            dry_run=args.dry_run,
+            max_fixes=args.max_fixes,
+        )
+
+        if not args.dry_run:
+            ok, err = verify_utf8_file(out_path_effective)
+            if ok:
+                print("Verification: output is valid UTF-8 ✅")
+                print(f"Fix mode completed — {fix_count} byte(s) corrected.")
+            else:
+                print(f"Verification failed: {err}")
+                sys.exit(8)
+        return
+
+
+if __name__ == "__main__":
+    main()
--- a/parsedmarc/resources/maps/find_unknown_base_reverse_dns.py
+++ b/parsedmarc/resources/maps/find_unknown_base_reverse_dns.py
@@ -0,0 +1,78 @@
+#!/usr/bin/env python
+
+import os
+import csv
+
+
+def _main():
+    input_csv_file_path = "base_reverse_dns.csv"
+    base_reverse_dns_map_file_path = "base_reverse_dns_map.csv"
+    known_unknown_list_file_path = "known_unknown_base_reverse_dns.txt"
+    psl_overrides_file_path = "psl_overrides.txt"
+    output_csv_file_path = "unknown_base_reverse_dns.csv"
+
+    csv_headers = ["source_name", "message_count"]
+
+    known_unknown_domains = []
+    psl_overrides = []
+    known_domains = []
+    output_rows = []
+
+    def load_list(file_path, list_var):
+        if not os.path.exists(file_path):
+            print(f"Error: {file_path} does not exist")
+        print(f"Loading {file_path}")
+        with open(file_path) as f:
+            for line in f.readlines():
+                domain = line.lower().strip()
+                if domain in list_var:
+                    print(f"Error: {domain} is in {file_path} multiple times")
+                    exit(1)
+                elif domain != "":
+                    list_var.append(domain)
+
+    load_list(known_unknown_list_file_path, known_unknown_domains)
+    load_list(psl_overrides_file_path, psl_overrides)
+    if not os.path.exists(base_reverse_dns_map_file_path):
+        print(f"Error: {base_reverse_dns_map_file_path} does not exist")
+    print(f"Loading {base_reverse_dns_map_file_path}")
+    with open(base_reverse_dns_map_file_path) as f:
+        for row in csv.DictReader(f):
+            domain = row["base_reverse_dns"].lower().strip()
+            if domain in known_domains:
+                print(
+                    f"Error: {domain} is in {base_reverse_dns_map_file_path} multiple times"
+                )
+                exit()
+            else:
+                known_domains.append(domain)
+            if domain in known_unknown_domains and known_domains:
+                print(
+                    f"Error:{domain} is in {known_unknown_list_file_path} and \
+                        {base_reverse_dns_map_file_path}"
+                )
+                exit(1)
+    if not os.path.exists(input_csv_file_path):
+        print(f"Error: {base_reverse_dns_map_file_path} does not exist")
+        exit(1)
+    with open(input_csv_file_path) as f:
+        for row in csv.DictReader(f):
+            domain = row["source_name"].lower().strip()
+            if domain == "":
+                continue
+            for psl_domain in psl_overrides:
+                if domain.endswith(psl_domain):
+                    domain = psl_domain.strip(".").strip("-")
+                    break
+            if domain not in known_domains and domain not in known_unknown_domains:
+                print(f"New unknown domain found: {domain}")
+                output_rows.append(row)
+    print(f"Writing {output_csv_file_path}")
+    with open(output_csv_file_path, "w") as f:
+        writer = csv.DictWriter(f, fieldnames=csv_headers)
+        writer.writeheader()
+        writer.writerows(output_rows)
+
+
+if __name__ == "__main__":
+    _main()
--- a/parsedmarc/resources/maps/known_unknown_base_reverse_dns.txt
+++ b/parsedmarc/resources/maps/known_unknown_base_reverse_dns.txt
@@ -0,0 +1,601 @@
+1jli.site
+26.107
+444qcuhilla.com
+4xr1.com
+9services.com
+a7e.ru
+a94434500-blog.com
+aams8.jp
+abv-10.top
+acemail.co.in
+activaicon.com
+adcritic.net
+adlucrumnewsletter.com
+admin.corpivensa.gob.ve
+advantageiq.com
+advrider.ro
+aerospacevitro.us.com
+agenturserver.de
+aghories.com
+ai270.net
+albagroup-eg.com
+alchemy.net
+alohabeachcamp.net
+alsiscad.com
+aluminumpipetubing.com
+americanstorageca.com
+amplusserver.info
+anchorfundhub.com
+anglishment.com
+anteldata.net.uy
+antis.edu
+antonaoll.com
+anviklass.org
+anwrgrp.lat
+aosau.net
+arandomserver.com
+aransk.ru
+ardcs.cn
+armninl.met
+as29550.net
+asahachimaru.com
+aserv.co.za
+asmecam.it
+ateky.net.br
+aurelienvos.com
+automatech.lat
+avistaadvantage.com
+b8sales.com
+bahjs.com
+baliaura.com
+banaras.co
+bearandbullmarketnews.com
+bestinvestingtime.com
+bhjui.com
+biocorp.com
+biosophy.net
+bitter-echo.com
+bizhostingservices.com
+blguss.com
+bluenet.ch
+bluhosting.com
+bnasg.com
+bodiax.pp.ua
+bost-law.com
+brainity.com
+brazalnde.net
+brellatransplc.shop
+brnonet.cz
+broadwaycover.com
+brushinglegal.de
+brw.net
+btes.tv
+budgeteasehub.com
+buoytoys.com
+buyjapanese.jp
+c53dw7m24rj.com
+cahtelrandom.org
+casadelmarsamara.com
+cashflowmasterypro.com
+cavabeen.com
+cbti.net
+centralmalaysia.com
+chauffeurplan.co.uk
+checkpox.fun
+chegouseuvlache.org
+chinaxingyu.xyz
+christus.mx
+churchills.market
+ci-xyz.fit
+cisumrecords.com
+ckaik.cn
+clcktoact.com
+cli-eurosignal.cz
+cloud-admin.it
+cloud-edm.com
+cloudflare-email.org
+cloudhosting.rs
+cloudlogin.co
+cloudplatformpro.com
+cnode.io
+cntcloud.com
+code-it.net
+codefriend.top
+colombiaceropapel.org
+commerceinsurance.com
+comsharempc.com
+conexiona.com
+coolblaze.com
+coowo.com
+corpemail.net
+cp2-myorderbox.com
+cps.com.ar
+crnagora.net
+cross-d-bar-troutranch.com
+ctla.co.kr
+cumbalikonakhotel.com
+currencyexconverter.com
+daakbabu.com
+daikinmae.com
+dairyvalley.com.my
+dastans.ru
+datahost36.de
+ddii.network
+deep-sek.shop
+deetownsounds.com
+descarca-counter-strike.net
+detrot.xyz
+dettlaffinc.com
+dextoolse.net
+digestivedaily.com
+digi.net.my
+dinofelis.cn
+diwkyncbi.top
+dkginternet.com
+dnexpress.info
+dns-oid.com
+dnsindia.net
+domainserver.ne.jp
+domconfig.com
+doorsrv.com
+dreampox.fun
+dreamtechmedia.com
+ds.network
+dss-group.net
+dvj.theworkpc.com
+dwlcka.com
+dynamic-wiretel.in
+dyntcorp.com
+easternkingspei.com
+economiceagles.com
+egosimail.com
+eliotporterphotos.us
+emailgids.net
+emailperegrine.com
+entendercopilot.com
+entretothom.net
+epaycontrol.com
+epicinvestmentsreview.co
+epicinvestmentsreview.com
+epik.com
+epsilon-group.com
+erestaff.com
+euro-trade-gmbh.com
+example.com
+exposervers.com-new
+extendcp.co.uk
+eyecandyhosting.xyz
+fastwebnet.it
+fd9ing7wfn.com
+feipnghardware.com
+fetscorp.shop
+fewo-usedom.net
+fin-crime.com
+financeaimpoint.com
+financeupward.com
+firmflat.com
+flex-video.bnr.la
+flourishfusionlife.com
+formicidaehunt.net
+fosterheap.com
+fredi.shop
+frontiernet.net
+ftifb7tk3c.com
+gamersprotectionvpn.online
+gendns.com
+getgreencardsfast.com
+getthatroi.com
+gibbshosting.com
+gigidea.net
+giize.com
+ginous.eu.com
+gis.net
+gist-th.com
+globalglennpartners.com
+goldsboroughplace.com
+gophermedia.com
+gqlists.us.com
+gratzl.de
+greatestworldnews.com
+greennutritioncare.com
+gsbb.com
+gumbolimbo.net
+h-serv.co.uk
+haedefpartners.com
+halcyon-aboveboard.com
+hanzubon.org
+healthfuljourneyjoy.com
+hgnbroken.us.com
+highwey-diesel.com
+hirofactory.com
+hjd.asso.fr
+hongchenggco.pro
+hongkongtaxi.co
+hopsinthehanger.com
+hosted-by-worldstream.net
+hostelsucre.com
+hosting1337.com
+hostinghane.com
+hostinglotus.cloud
+hostingmichigan.com
+hostiran.name
+hostmnl.com
+hostname.localhost
+hostnetwork.com
+hosts.net.nz
+hostserv.eu
+hostwhitelabel.com
+hpms1.jp
+hunariojmk.net
+hunriokinmuim.net
+hypericine.com
+i-mecca.net
+iaasdns.com
+iam.net.ma
+iconmarketingguy.com
+idcfcloud.net
+idealconcept.live
+igmohji.com
+igppevents.org.uk
+ihglobaldns.com
+ilmessicano.com
+imjtmn.cn
+immenzaces.com
+in-addr-arpa
+in-addr.arpa
+indsalelimited.com
+indulgent-holistic.com
+industechint.org
+inshaaegypt.com
+intal.uz
+interfarma.kz
+intocpanel.com
+ip-147-135-108.us
+ip-178-33-109.eu
+ip-ptr.tech
+iswhatpercent.com
+itsidc.com
+itwebs.com
+iuon.net
+ivol.co
+jalanet.co.id
+jimishare.com
+jlccptt.net.cn
+jlenterprises.co.uk
+jmontalto.com
+joyomokei.com
+jumanra.org
+justlongshirts.com
+kahlaa.com
+kaw.theworkpc.com
+kbronet.com.tw
+kdnursing.org
+kielnet.net
+kihy.theworkpc.com
+kingschurchwirral.org
+kitchenaildbd.com
+klaomi.shop
+knkconsult.net
+kohshikai.com
+krhfund.org
+krillaglass.com
+lancorhomes.com
+landpedia.org
+lanzatuseo.es
+layerdns.cloud
+learninglinked.com
+legenditds.com
+levertechcentre.com
+lhost.no
+lideri.net.br
+lighthouse-media.com
+lightpath.net
+limogesporcelainboxes.com
+lindsaywalt.net
+linuxsunucum.com
+listertermoformadoa.com
+llsend.com
+local.net
+lohkal.com
+londionrtim.net
+lonestarmm.net
+longmarquis.com
+longwoodmgmt.com
+lse.kz
+lunvoy.com
+luxarpro.ru
+lwl-puehringer.at
+lynx.net.lb
+lyse.net
+m-sender.com.ua
+maggiolicloud.it
+magnetmail.net
+magnumgo.uz
+maia11.com
+mail-fire.com
+mailsentinel.net
+mailset.cn
+malardino.net
+managed-vps.net
+manhattanbulletpoint.com
+manpowerservices.com
+marketmysterycode.com
+marketwizardspro.com
+masterclassjournal.com
+matroguel.cam
+maximpactipo.com
+mechanicalwalk.store
+mediavobis.com
+meqlobal.com
+mgts.by
+migrans.net
+miixta.com
+milleniumsrv.com
+mindworksunlimited.com
+mirth-gale.com
+misorpresa.com
+mitomobile.com
+mitsubachi-kibako.net
+mjinn.com
+mkegs.shop
+mobius.fr
+model-ac.ink
+moderntradingnews.com
+monnaiegroup.com
+monopolizeright.com
+moonjaws.com
+morningnewscatcher.com
+motion4ever.net
+mschosting.com
+msdp1.com
+mspnet.pro
+mts-nn.ru
+multifamilydesign.com
+mxserver.ro
+mxthunder.net
+my-ihor.ru
+mycloudmailbox.com
+myfriendforum.com
+myrewards.net
+mysagestore.com
+mysecurewebserver.com
+myshanet.net
+myvps.jp
+mywedsite.net
+mywic.eu
+name.tools
+nanshenqfurniture.com
+nask.pl
+navertise.net
+ncbb.kz
+ncport.ru
+ncsdi.ws
+nebdig.com
+neovet-base.ru
+netbri.com
+netcentertelecom.net.br
+neti.ee
+netkl.org
+newinvestingguide.com
+newwallstreetcode.com
+ngvcv.cn
+nic.name
+nidix.net
+nieuwedagnetwerk.net
+nlscanme.com
+nmeuh.cn
+noisndametal.com
+nucleusemail.com
+nutriboostlife.com
+nwo.giize.com
+nwwhalewatchers.org
+ny.adsl
+nyt1.com
+offerslatedeals.com
+office365.us
+ogicom.net
+olivettilexikon.co.uk
+omegabrasil.inf.br
+onnet21.com
+onumubunumu.com
+oppt-ac.fit
+orbitel.net.co
+orfsurface.com
+orientalspot.com
+outsidences.com
+ovaltinalization.co
+overta.ru
+ox28vgrurc.com
+pamulang.net
+panaltyspot.space
+panolacountysheriffms.com
+passionatesmiles.com
+paulinelam.com
+pdi-corp.com
+peloquinbeck.com
+perimetercenter.net
+permanentscreen.com
+permasteellisagroup.com
+perumkijhyu.net
+pesnia.com.ua
+ph8ltwdi12o.com
+pharmada.com.de
+phdns3.es
+pigelixval1.com
+pipefittingsindia.com
+planethoster.net
+playamedia.io
+plesk.page
+pmnhost.net
+pokiloandhu.net
+pokupki5.ru
+polandi.net
+popiup.com
+ports.net
+posolstvostilya.com
+potia.net
+prima.com.ar
+prima.net.ar
+profsol.co.uk
+prohealthmotion.com
+promooffermarket.site
+proudserver.com
+proxado.com
+psnm.ru
+pvcwindowsprices.live
+qontenciplc.autos
+quakeclick.com
+quasarstate.store
+quatthonggiotico.com
+qxyxab44njd.com
+radianthealthrenaissance.com
+rapidns.com
+raxa.host
+reberte.com
+reethvikintl.com
+regruhosting.ru
+reliablepanel.com
+rgb365.eu
+riddlecamera.net
+riddletrends.com
+roccopugliese.com
+runnin-rebels.com
+rupar.puglia.it
+rwdhosting.ca
+s500host.com
+sageevents.co.ke
+sahacker-2020.com
+samsales.site
+sante-lorraine.fr
+saransk.ru
+satirogluet.com
+scioncontacts.com
+sdcc.my
+seaspraymta3.net
+secorp.mx
+securen.net
+securerelay.in
+securev.net
+seductiveeyes.com
+seizethedayconsulting.com
+serroplast.shop
+server290.com
+server342.com
+server3559.cc
+servershost.biz
+sfek.kz
+sgnetway.net
+shopfox.ca
+silvestrejaguar.sbs
+silvestreonca.sbs
+simplediagnostics.org
+siriuscloud.jp
+sisglobalresearch.com
+sixpacklink.net
+sjestyle.com
+smallvillages.com
+smartape-vps.com
+solusoftware.com
+sourcedns.com
+southcoastwebhosting12.com
+specialtvvs.com
+spiritualtechnologies.io
+sprout.org
+srv.cat
+stableserver.net
+statlerfa.co.uk
+stock-smtp.top
+stockepictigers.com
+stockexchangejournal.com
+subterranean-concave.com
+suksangroup.com
+swissbluetopaz.com
+switer.shop
+sysop4.com
+system.eu.com
+szhongbing.com
+t-jon.com
+tacaindo.net
+tacom.tj
+tankertelz.co
+tataidc.com
+teamveiw.com
+tecnoxia.net
+tel-xyz.fit
+tenkids.net
+terminavalley.com
+thaicloudsolutions.com
+thaikinghost.com
+thaimonster.com
+thegermainetruth.net
+thehandmaderose.com
+thepushcase.com
+ticdns.com
+tigo.bo
+toledofibra.net.br
+topdns.com
+totaal.net
+totalplay.net
+tqh.ro
+traderlearningcenter.com
+tradeukraine.site
+traveleza.com
+trwww.com
+tsuzakij.com
+tullostrucking.com
+turbinetrends.com
+twincitiesdistinctivehomes.com
+tylerfordonline.com
+uiyum.com
+ultragate.com
+uneedacollie.com
+unified.services
+unite.services
+urawasl.com
+us.servername.us
+vagebond.net
+varvia.de
+vbcploo.com
+vdc.vn
+vendimetry.com
+vibrantwellnesscorp.com
+virtualine.org
+visit.docotor
+viviotech.us
+vlflgl.com
+volganet.ru
+vrns.net
+vulterdi.edu
+vvondertex.com
+wallstreetsgossip.com
+wamego.net
+wanekoohost.com
+wealthexpertisepro.com
+web-login.eu
+weblinkinternational.com
+webnox.io
+websale.net
+welllivinghive.com
+westparkcom.com
+wetransfer-eu.com
+wheelch.me
+whoflew.com
+whpservers.com
+wisdomhard.com
+wisewealthcircle.com
+wisvis.com
+wodeniowa.com
+wordpresshosting.xyz
+wsiph2.com
+xnt.mx
+xodiax.com
+xpnuf.cn
+xsfati.us.com
+xspmail.jp
+yourciviccompass.com
+yourinvestworkbook.com
+yoursitesecure.net
+zerowebhosting.net
+zmml.uk
+znlc.jp
+ztomy.com
--- a/parsedmarc/resources/maps/psl_overrides.txt
+++ b/parsedmarc/resources/maps/psl_overrides.txt
@@ -0,0 +1,23 @@
+-applefibernet.com
+-c3.net.pl
+-celsiainternet.com
+-clientes-izzi.mx
+-clientes-zap-izzi.mx
+-imnet.com.br
+-mcnbd.com
+-smile.com.bd
+-tataidc.co.in
+-veloxfiber.com.br
+-wconect.com.br
+.amazonaws.com
+.cloudaccess.net
+.ddnsgeek.com
+.fastvps-server.com
+.in-addr-arpa
+.in-addr.arpa
+.kasserver.com
+.kinghost.net
+.linode.com
+.linodeusercontent.com
+.na4u.ru
+.sakura.ne.jp
--- a/parsedmarc/resources/maps/sortlists.py
+++ b/parsedmarc/resources/maps/sortlists.py
@@ -0,0 +1,184 @@
+#!/usr/bin/env python3
+
+from __future__ import annotations
+
+import os
+import csv
+from pathlib import Path
+from typing import Mapping, Iterable, Optional, Collection, Union, List, Dict
+
+
+class CSVValidationError(Exception):
+    def __init__(self, errors: list[str]):
+        super().__init__("\n".join(errors))
+        self.errors = errors
+
+
+def sort_csv(
+    filepath: Union[str, Path],
+    field: str,
+    *,
+    sort_field_value_must_be_unique: bool = True,
+    strip_whitespace: bool = True,
+    fields_to_lowercase: Optional[Iterable[str]] = None,
+    case_insensitive_sort: bool = False,
+    required_fields: Optional[Iterable[str]] = None,
+    allowed_values: Optional[Mapping[str, Collection[str]]] = None,
+) -> List[Dict[str, str]]:
+    """
+    Read a CSV, optionally normalize rows (strip whitespace, lowercase certain fields),
+    validate field values, and write the sorted CSV back to the same path.
+
+    - filepath: Path to the CSV to sort.
+    - field: The field name to sort by.
+    - fields_to_lowercase: Permanently lowercases these field(s) in the data.
+    - strip_whitespace: Remove all whitespace at the beginning and of field values.
+    - case_insensitive_sort: Ignore case when sorting without changing values.
+    - required_fields: A list of fields that must have data in all rows.
+    - allowed_values: A mapping of allowed values for fields.
+    """
+    path = Path(filepath)
+    required_fields = set(required_fields or [])
+    lower_set = set(fields_to_lowercase or [])
+    allowed_sets = {k: set(v) for k, v in (allowed_values or {}).items()}
+    if sort_field_value_must_be_unique:
+        seen_sort_field_values = []
+
+    with path.open("r", newline="") as infile:
+        reader = csv.DictReader(infile)
+        fieldnames = reader.fieldnames or []
+        if field not in fieldnames:
+            raise CSVValidationError([f"Missing sort column: {field!r}"])
+        missing_headers = required_fields - set(fieldnames)
+        if missing_headers:
+            raise CSVValidationError(
+                [f"Missing required header(s): {sorted(missing_headers)}"]
+            )
+        rows = list(reader)
+
+    def normalize_row(row: Dict[str, str]) -> None:
+        if strip_whitespace:
+            for k, v in row.items():
+                if isinstance(v, str):
+                    row[k] = v.strip()
+        for fld in lower_set:
+            if fld in row and isinstance(row[fld], str):
+                row[fld] = row[fld].lower()
+
+    def validate_row(
+        row: Dict[str, str], sort_field: str, line_no: int, errors: list[str]
+    ) -> None:
+        if sort_field_value_must_be_unique:
+            if row[sort_field] in seen_sort_field_values:
+                errors.append(f"Line {line_no}: Duplicate row for '{row[sort_field]}'")
+            else:
+                seen_sort_field_values.append(row[sort_field])
+        for rf in required_fields:
+            val = row.get(rf)
+            if val is None or val == "":
+                errors.append(
+                    f"Line {line_no}: Missing value for required field '{rf}'"
+                )
+        for field, allowed_values in allowed_sets.items():
+            if field in row:
+                val = row[field]
+                if val not in allowed_values:
+                    errors.append(
+                        f"Line {line_no}: '{val}' is not an allowed value for '{field}' "
+                        f"(allowed: {sorted(allowed_values)})"
+                    )
+
+    errors: list[str] = []
+    for idx, row in enumerate(rows, start=2):  # header is line 1
+        normalize_row(row)
+        validate_row(row, field, idx, errors)
+
+    if errors:
+        raise CSVValidationError(errors)
+
+    def sort_key(r: Dict[str, str]):
+        v = r.get(field, "")
+        if isinstance(v, str) and case_insensitive_sort:
+            return v.casefold()
+        return v
+
+    rows.sort(key=sort_key)
+
+    with open(filepath, "w", newline="") as outfile:
+        writer = csv.DictWriter(outfile, fieldnames=fieldnames)
+        writer.writeheader()
+        writer.writerows(rows)
+
+
+def sort_list_file(
+    filepath: Union[str, Path],
+    *,
+    lowercase: bool = True,
+    strip: bool = True,
+    deduplicate: bool = True,
+    remove_blank_lines: bool = True,
+    ending_newline: bool = True,
+    newline: Optional[str] = "\n",
+):
+    """Read a list from a file, sort it, optionally strip and deduplicate the values,
+    then write that list back to the file.
+
+    - Filepath: The path to the file.
+    - lowercase: Lowercase all values prior to sorting.
+    - remove_blank_lines: Remove any plank lines.
+    - ending_newline: End the file with a newline, even if remove_blank_lines is true.
+    - newline: The newline character to use.
+    """
+    with open(filepath, mode="r", newline=newline) as infile:
+        lines = infile.readlines()
+        for i in range(len(lines)):
+            if lowercase:
+                lines[i] = lines[i].lower()
+            if strip:
+                lines[i] = lines[i].strip()
+        if deduplicate:
+            lines = list(set(lines))
+        if remove_blank_lines:
+            while "" in lines:
+                lines.remove("")
+        lines = sorted(lines)
+        if ending_newline:
+            if lines[-1] != "":
+                lines.append("")
+    with open(filepath, mode="w", newline=newline) as outfile:
+        outfile.write("\n".join(lines))
+
+
+def _main():
+    map_file = "base_reverse_dns_map.csv"
+    map_key = "base_reverse_dns"
+    list_files = ["known_unknown_base_reverse_dns.txt", "psl_overrides.txt"]
+    types_file = "base_reverse_dns_types.txt"
+
+    with open(types_file) as f:
+        types = f.readlines()
+        while "" in types:
+            types.remove("")
+
+    map_allowed_values = {"Type": types}
+
+    for list_file in list_files:
+        if not os.path.exists(list_file):
+            print(f"Error: {list_file} does not exist")
+            exit(1)
+        sort_list_file(list_file)
+    if not os.path.exists(types_file):
+        print(f"Error: {types_file} does not exist")
+        exit(1)
+    sort_list_file(types_file, lowercase=False)
+    if not os.path.exists(map_file):
+        print(f"Error: {map_file} does not exist")
+        exit(1)
+    try:
+        sort_csv(map_file, map_key, allowed_values=map_allowed_values)
+    except CSVValidationError as e:
+        print(f"{map_file} did not validate: {e}")
+
+
+if __name__ == "__main__":
+    _main()
--- a/parsedmarc/s3.py
+++ b/parsedmarc/s3.py
@@ -0,0 +1,95 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import annotations
+
+from typing import Any
+
+import json
+import boto3
+
+from parsedmarc.log import logger
+from parsedmarc.utils import human_timestamp_to_datetime
+
+
+class S3Client(object):
+    """A client for interacting with Amazon S3"""
+
+    def __init__(
+        self,
+        bucket_name: str,
+        bucket_path: str,
+        region_name: str,
+        endpoint_url: str,
+        access_key_id: str,
+        secret_access_key: str,
+    ):
+        """
+        Initializes the S3Client
+        Args:
+            bucket_name (str): The S3 Bucket
+            bucket_path (str): The path to save reports
+            region_name (str): The region name
+            endpoint_url (str): The endpoint URL
+            access_key_id (str): The access key id
+            secret_access_key (str): The secret access key
+        """
+        self.bucket_name = bucket_name
+        self.bucket_path = bucket_path
+        self.metadata_keys = [
+            "org_name",
+            "org_email",
+            "report_id",
+            "begin_date",
+            "end_date",
+        ]
+
+        # https://github.com/boto/boto3/blob/1.24.7/boto3/session.py#L312
+        self.s3 = boto3.resource(
+            "s3",
+            region_name=region_name,
+            endpoint_url=endpoint_url,
+            aws_access_key_id=access_key_id,
+            aws_secret_access_key=secret_access_key,
+        )
+        self.bucket: Any = self.s3.Bucket(self.bucket_name)
+
+    def save_aggregate_report_to_s3(self, report: dict[str, Any]):
+        self.save_report_to_s3(report, "aggregate")
+
+    def save_forensic_report_to_s3(self, report: dict[str, Any]):
+        self.save_report_to_s3(report, "forensic")
+
+    def save_smtp_tls_report_to_s3(self, report: dict[str, Any]):
+        self.save_report_to_s3(report, "smtp_tls")
+
+    def save_report_to_s3(self, report: dict[str, Any], report_type: str):
+        if report_type == "smtp_tls":
+            report_date = report["begin_date"]
+            report_id = report["report_id"]
+        else:
+            report_date = human_timestamp_to_datetime(
+                report["report_metadata"]["begin_date"]
+            )
+            report_id = report["report_metadata"]["report_id"]
+        path_template = "{0}/{1}/year={2}/month={3:02d}/day={4:02d}/{5}.json"
+        object_path = path_template.format(
+            self.bucket_path,
+            report_type,
+            report_date.year,
+            report_date.month,
+            report_date.day,
+            report_id,
+        )
+        logger.debug(
+            "Saving {0} report to s3://{1}/{2}".format(
+                report_type, self.bucket_name, object_path
+            )
+        )
+        object_metadata = {
+            k: v
+            for k, v in report["report_metadata"].items()
+            if k in self.metadata_keys
+        }
+        self.bucket.put_object(
+            Body=json.dumps(report), Key=object_path, Metadata=object_metadata
+        )
--- a/parsedmarc/splunk.py
+++ b/parsedmarc/splunk.py
@@ -0,0 +1,211 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import annotations
+
+from typing import Any, Union
+
+
+from urllib.parse import urlparse
+import socket
+import json
+
+import urllib3
+import requests
+
+from parsedmarc.constants import USER_AGENT
+from parsedmarc.log import logger
+from parsedmarc.utils import human_timestamp_to_unix_timestamp
+
+urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+
+
+class SplunkError(RuntimeError):
+    """Raised when a Splunk API error occurs"""
+
+
+class HECClient(object):
+    """A client for a Splunk HTTP Events Collector (HEC)"""
+
+    # http://docs.splunk.com/Documentation/Splunk/latest/Data/AboutHEC
+    # http://docs.splunk.com/Documentation/Splunk/latest/RESTREF/RESTinput#services.2Fcollector
+
+    def __init__(
+        self,
+        url: str,
+        access_token: str,
+        index: str,
+        source: str = "parsedmarc",
+        verify=True,
+        timeout=60,
+    ):
+        """
+        Initializes the HECClient
+
+        Args:
+            url (str): The URL of the HEC
+            access_token (str): The HEC access token
+            index (str): The name of the index
+            source (str): The source name
+            verify (bool): Verify SSL certificates
+            timeout (float): Number of seconds to wait for the server to send
+                data before giving up
+        """
+        parsed_url = urlparse(url)
+        self.url = "{0}://{1}/services/collector/event/1.0".format(
+            parsed_url.scheme, parsed_url.netloc
+        )
+        self.access_token = access_token.lstrip("Splunk ")
+        self.index = index
+        self.host = socket.getfqdn()
+        self.source = source
+        self.session = requests.Session()
+        self.timeout = timeout
+        self.session.verify = verify
+        self._common_data: dict[str, Union[str, int, float, dict]] = dict(
+            host=self.host, source=self.source, index=self.index
+        )
+
+        self.session.headers = {
+            "User-Agent": USER_AGENT,
+            "Authorization": "Splunk {0}".format(self.access_token),
+        }
+
+    def save_aggregate_reports_to_splunk(
+        self,
+        aggregate_reports: Union[list[dict[str, Any]], dict[str, Any]],
+    ):
+        """
+        Saves aggregate DMARC reports to Splunk
+
+        Args:
+            aggregate_reports: A list of aggregate report dictionaries
+                to save in Splunk
+
+        """
+        logger.debug("Saving aggregate reports to Splunk")
+        if isinstance(aggregate_reports, dict):
+            aggregate_reports = [aggregate_reports]
+
+        if len(aggregate_reports) < 1:
+            return
+
+        data = self._common_data.copy()
+        json_str = ""
+        for report in aggregate_reports:
+            for record in report["records"]:
+                new_report: dict[str, Union[str, int, float, dict]] = dict()
+                for metadata in report["report_metadata"]:
+                    new_report[metadata] = report["report_metadata"][metadata]
+                new_report["interval_begin"] = record["interval_begin"]
+                new_report["interval_end"] = record["interval_end"]
+                new_report["normalized_timespan"] = record["normalized_timespan"]
+                new_report["published_policy"] = report["policy_published"]
+                new_report["source_ip_address"] = record["source"]["ip_address"]
+                new_report["source_country"] = record["source"]["country"]
+                new_report["source_reverse_dns"] = record["source"]["reverse_dns"]
+                new_report["source_base_domain"] = record["source"]["base_domain"]
+                new_report["source_type"] = record["source"]["type"]
+                new_report["source_name"] = record["source"]["name"]
+                new_report["message_count"] = record["count"]
+                new_report["disposition"] = record["policy_evaluated"]["disposition"]
+                new_report["spf_aligned"] = record["alignment"]["spf"]
+                new_report["dkim_aligned"] = record["alignment"]["dkim"]
+                new_report["passed_dmarc"] = record["alignment"]["dmarc"]
+                new_report["header_from"] = record["identifiers"]["header_from"]
+                new_report["envelope_from"] = record["identifiers"]["envelope_from"]
+                if "dkim" in record["auth_results"]:
+                    new_report["dkim_results"] = record["auth_results"]["dkim"]
+                if "spf" in record["auth_results"]:
+                    new_report["spf_results"] = record["auth_results"]["spf"]
+
+                data["sourcetype"] = "dmarc:aggregate"
+                timestamp = human_timestamp_to_unix_timestamp(
+                    new_report["interval_begin"]
+                )
+                data["time"] = timestamp
+                data["event"] = new_report.copy()
+                json_str += "{0}\n".format(json.dumps(data))
+
+        if not self.session.verify:
+            logger.debug("Skipping certificate verification for Splunk HEC")
+        try:
+            response = self.session.post(self.url, data=json_str, timeout=self.timeout)
+            response = response.json()
+        except Exception as e:
+            raise SplunkError(e.__str__())
+        if response["code"] != 0:
+            raise SplunkError(response["text"])
+
+    def save_forensic_reports_to_splunk(
+        self,
+        forensic_reports: Union[list[dict[str, Any]], dict[str, Any]],
+    ):
+        """
+        Saves forensic DMARC reports to Splunk
+
+        Args:
+            forensic_reports (list): A list of forensic report dictionaries
+                to save in Splunk
+        """
+        logger.debug("Saving forensic reports to Splunk")
+        if isinstance(forensic_reports, dict):
+            forensic_reports = [forensic_reports]
+
+        if len(forensic_reports) < 1:
+            return
+
+        json_str = ""
+        for report in forensic_reports:
+            data = self._common_data.copy()
+            data["sourcetype"] = "dmarc:forensic"
+            timestamp = human_timestamp_to_unix_timestamp(report["arrival_date_utc"])
+            data["time"] = timestamp
+            data["event"] = report.copy()
+            json_str += "{0}\n".format(json.dumps(data))
+
+        if not self.session.verify:
+            logger.debug("Skipping certificate verification for Splunk HEC")
+        try:
+            response = self.session.post(self.url, data=json_str, timeout=self.timeout)
+            response = response.json()
+        except Exception as e:
+            raise SplunkError(e.__str__())
+        if response["code"] != 0:
+            raise SplunkError(response["text"])
+
+    def save_smtp_tls_reports_to_splunk(
+        self, reports: Union[list[dict[str, Any]], dict[str, Any]]
+    ):
+        """
+        Saves aggregate DMARC reports to Splunk
+
+        Args:
+            reports: A list of SMTP TLS report dictionaries
+                to save in Splunk
+
+        """
+        logger.debug("Saving SMTP TLS reports to Splunk")
+        if isinstance(reports, dict):
+            reports = [reports]
+
+        if len(reports) < 1:
+            return
+
+        data = self._common_data.copy()
+        json_str = ""
+        for report in reports:
+            data["sourcetype"] = "smtp:tls"
+            timestamp = human_timestamp_to_unix_timestamp(report["begin_date"])
+            data["time"] = timestamp
+            data["event"] = report.copy()
+            json_str += "{0}\n".format(json.dumps(data))
+
+        if not self.session.verify:
+            logger.debug("Skipping certificate verification for Splunk HEC")
+        try:
+            response = self.session.post(self.url, data=json_str, timeout=self.timeout)
+            response = response.json()
+        except Exception as e:
+            raise SplunkError(e.__str__())
+        if response["code"] != 0:
+            raise SplunkError(response["text"])
--- a/parsedmarc/syslog.py
+++ b/parsedmarc/syslog.py
@@ -0,0 +1,57 @@
+# -*- coding: utf-8 -*-
+
+
+from __future__ import annotations
+
+import logging
+import logging.handlers
+
+from typing import Any
+
+
+import json
+
+from parsedmarc import (
+    parsed_aggregate_reports_to_csv_rows,
+    parsed_forensic_reports_to_csv_rows,
+    parsed_smtp_tls_reports_to_csv_rows,
+)
+
+
+class SyslogClient(object):
+    """A client for Syslog"""
+
+    def __init__(self, server_name: str, server_port: int):
+        """
+        Initializes the SyslogClient
+        Args:
+            server_name (str): The Syslog server
+            server_port (int): The Syslog UDP port
+        """
+        self.server_name = server_name
+        self.server_port = server_port
+        self.logger = logging.getLogger("parsedmarc_syslog")
+        self.logger.setLevel(logging.INFO)
+        log_handler = logging.handlers.SysLogHandler(address=(server_name, server_port))
+        self.logger.addHandler(log_handler)
+
+    def save_aggregate_report_to_syslog(
+        self, aggregate_reports: list[dict[str, Any]]
+    ):
+        rows = parsed_aggregate_reports_to_csv_rows(aggregate_reports)
+        for row in rows:
+            self.logger.info(json.dumps(row))
+
+    def save_forensic_report_to_syslog(
+        self, forensic_reports: list[dict[str, Any]]
+    ):
+        rows = parsed_forensic_reports_to_csv_rows(forensic_reports)
+        for row in rows:
+            self.logger.info(json.dumps(row))
+
+    def save_smtp_tls_report_to_syslog(
+        self, smtp_tls_reports: list[dict[str, Any]]
+    ):
+        rows = parsed_smtp_tls_reports_to_csv_rows(smtp_tls_reports)
+        for row in rows:
+            self.logger.info(json.dumps(row))
--- a/parsedmarc/utils.py
+++ b/parsedmarc/utils.py
@@ -0,0 +1,721 @@
+# -*- coding: utf-8 -*-
+
+"""Utility functions that might be useful for other projects"""
+
+from __future__ import annotations
+
+from typing import Optional, Union, TypedDict, Any
+
+import logging
+import os
+from datetime import datetime
+from datetime import timezone
+from datetime import timedelta
+from expiringdict import ExpiringDict
+import tempfile
+import subprocess
+import shutil
+import mailparser
+import json
+import hashlib
+import base64
+import mailbox
+import re
+import csv
+import io
+
+try:
+    from importlib.resources import files
+except ImportError:
+    # Try backported to PY<3 `importlib_resources`
+    from importlib.resources import files
+
+
+from dateutil.parser import parse as parse_date
+import dns.reversename
+import dns.resolver
+import dns.exception
+import geoip2.database
+import geoip2.errors
+import publicsuffixlist
+import requests
+
+from parsedmarc.log import logger
+import parsedmarc.resources.dbip
+import parsedmarc.resources.maps
+from parsedmarc.constants import USER_AGENT
+
+parenthesis_regex = re.compile(r"\s*\(.*\)\s*")
+
+null_file = open(os.devnull, "w")
+mailparser_logger = logging.getLogger("mailparser")
+mailparser_logger.setLevel(logging.CRITICAL)
+psl = publicsuffixlist.PublicSuffixList()
+psl_overrides_path = str(files(parsedmarc.resources.maps).joinpath("psl_overrides.txt"))
+with open(psl_overrides_path) as f:
+    psl_overrides = [line.rstrip() for line in f.readlines()]
+    while "" in psl_overrides:
+        psl_overrides.remove("")
+
+
+class EmailParserError(RuntimeError):
+    """Raised when an error parsing the email occurs"""
+
+
+class DownloadError(RuntimeError):
+    """Raised when an error occurs when downloading a file"""
+
+
+class EmailAddress(TypedDict):
+    """Parsed email address information"""
+    display_name: Optional[str]
+    address: str
+    local: Optional[str]
+    domain: Optional[str]
+
+
+def decode_base64(data: str) -> bytes:
+    """
+    Decodes a base64 string, with padding being optional
+
+    Args:
+        data (str): A base64 encoded string
+
+    Returns:
+        bytes: The decoded bytes
+
+    """
+    data = bytes(data, encoding="ascii")
+    missing_padding = len(data) % 4
+    if missing_padding != 0:
+        data += b"=" * (4 - missing_padding)
+    return base64.b64decode(data)
+
+
+def get_base_domain(domain: str) -> str:
+    """
+    Gets the base domain name for the given domain
+
+    .. note::
+        Results are based on a list of public domain suffixes at
+        https://publicsuffix.org/list/public_suffix_list.dat and overrides included in
+        parsedmarc.resources.maps.psl_overrides.txt
+
+    Args:
+        domain (str): A domain or subdomain
+
+    Returns:
+        str: The base domain of the given domain
+
+    """
+    domain = domain.lower()
+    publicsuffix = psl.privatesuffix(domain)
+    for override in psl_overrides:
+        if domain.endswith(override):
+            return override.strip(".").strip("-")
+    return publicsuffix
+
+
+def query_dns(
+    domain: str,
+    record_type: str,
+    *,
+    cache: Optional[ExpiringDict] = None,
+    nameservers: list[str] = None,
+    timeout: int = 2.0,
+) -> list[str]:
+    """
+    Queries DNS
+
+    Args:
+        domain (str): The domain or subdomain to query about
+        record_type (str): The record type to query for
+        cache (ExpiringDict): Cache storage
+        nameservers (list): A list of one or more nameservers to use
+            (Cloudflare's public DNS resolvers by default)
+        timeout (float): Sets the DNS timeout in seconds
+
+    Returns:
+        list: A list of answers
+    """
+    domain = str(domain).lower()
+    record_type = record_type.upper()
+    cache_key = "{0}_{1}".format(domain, record_type)
+    if cache:
+        records = cache.get(cache_key, None)
+        if records:
+            return records
+
+    resolver = dns.resolver.Resolver()
+    timeout = float(timeout)
+    if nameservers is None:
+        nameservers = [
+            "1.1.1.1",
+            "1.0.0.1",
+            "2606:4700:4700::1111",
+            "2606:4700:4700::1001",
+        ]
+    resolver.nameservers = nameservers
+    resolver.timeout = timeout
+    resolver.lifetime = timeout
+    if record_type == "TXT":
+        resource_records = list(
+            map(
+                lambda r: r.strings,
+                resolver.resolve(domain, record_type, lifetime=timeout),
+            )
+        )
+        _resource_record = [
+            resource_record[0][:0].join(resource_record)
+            for resource_record in resource_records
+            if resource_record
+        ]
+        records = [r.decode() for r in _resource_record]
+    else:
+        records = list(
+            map(
+                lambda r: r.to_text().replace('"', "").rstrip("."),
+                resolver.resolve(domain, record_type, lifetime=timeout),
+            )
+        )
+    if cache:
+        cache[cache_key] = records
+
+    return records
+
+
+def get_reverse_dns(
+    ip_address,
+    *,
+    cache: Optional[ExpiringDict] = None,
+    nameservers: list[str] = None,
+    timeout: int = 2.0,
+) -> str:
+    """
+    Resolves an IP address to a hostname using a reverse DNS query
+
+    Args:
+        ip_address (str): The IP address to resolve
+        cache (ExpiringDict): Cache storage
+        nameservers (list): A list of one or more nameservers to use
+            (Cloudflare's public DNS resolvers by default)
+        timeout (float): Sets the DNS query timeout in seconds
+
+    Returns:
+        str: The reverse DNS hostname (if any)
+    """
+    hostname = None
+    try:
+        address = dns.reversename.from_address(ip_address)
+        hostname = query_dns(
+            address, "PTR", cache=cache, nameservers=nameservers, timeout=timeout
+        )[0]
+
+    except dns.exception.DNSException as e:
+        logger.warning(f"get_reverse_dns({ip_address}) exception: {e}")
+        pass
+
+    return hostname
+
+
+def timestamp_to_datetime(timestamp: int) -> datetime:
+    """
+    Converts a UNIX/DMARC timestamp to a Python ``datetime`` object
+
+    Args:
+        timestamp (int): The timestamp
+
+    Returns:
+        datetime: The converted timestamp as a Python ``datetime`` object
+    """
+    return datetime.fromtimestamp(int(timestamp))
+
+
+def timestamp_to_human(timestamp: int) -> str:
+    """
+    Converts a UNIX/DMARC timestamp to a human-readable string
+
+    Args:
+        timestamp: The timestamp
+
+    Returns:
+        str: The converted timestamp in ``YYYY-MM-DD HH:MM:SS`` format
+    """
+    return timestamp_to_datetime(timestamp).strftime("%Y-%m-%d %H:%M:%S")
+
+
+def human_timestamp_to_datetime(
+    human_timestamp: str, *, to_utc: Optional[bool] = False
+) -> datetime:
+    """
+    Converts a human-readable timestamp into a Python ``datetime`` object
+
+    Args:
+        human_timestamp (str): A timestamp string
+        to_utc (bool): Convert the timestamp to UTC
+
+    Returns:
+        datetime: The converted timestamp
+    """
+
+    human_timestamp = human_timestamp.replace("-0000", "")
+    human_timestamp = parenthesis_regex.sub("", human_timestamp)
+
+    dt = parse_date(human_timestamp)
+    return dt.astimezone(timezone.utc) if to_utc else dt
+
+
+def human_timestamp_to_unix_timestamp(human_timestamp: str) -> int:
+    """
+    Converts a human-readable timestamp into a UNIX timestamp
+
+    Args:
+        human_timestamp (str): A timestamp in `YYYY-MM-DD HH:MM:SS`` format
+
+    Returns:
+        float: The converted timestamp
+    """
+    human_timestamp = human_timestamp.replace("T", " ")
+    return human_timestamp_to_datetime(human_timestamp).timestamp()
+
+
+def get_ip_address_country(ip_address: str, *, db_path: Optional[str] = None) -> str:
+    """
+    Returns the ISO code for the country associated
+    with the given IPv4 or IPv6 address
+
+    Args:
+        ip_address (str): The IP address to query for
+        db_path (str): Path to a MMDB file from MaxMind or DBIP
+
+    Returns:
+        str: And ISO country code associated with the given IP address
+    """
+    db_paths = [
+        "GeoLite2-Country.mmdb",
+        "/usr/local/share/GeoIP/GeoLite2-Country.mmdb",
+        "/usr/share/GeoIP/GeoLite2-Country.mmdb",
+        "/var/lib/GeoIP/GeoLite2-Country.mmdb",
+        "/var/local/lib/GeoIP/GeoLite2-Country.mmdb",
+        "/usr/local/var/GeoIP/GeoLite2-Country.mmdb",
+        "%SystemDrive%\\ProgramData\\MaxMind\\GeoIPUpdate\\GeoIP\\"
+        "GeoLite2-Country.mmdb",
+        "C:\\GeoIP\\GeoLite2-Country.mmdb",
+        "dbip-country-lite.mmdb",
+        "dbip-country.mmdb",
+    ]
+
+    if db_path is not None:
+        if not os.path.isfile(db_path):
+            db_path = None
+            logger.warning(
+                f"No file exists at {db_path}. Falling back to an "
+                "included copy of the IPDB IP to Country "
+                "Lite database."
+            )
+
+    if db_path is None:
+        for system_path in db_paths:
+            if os.path.exists(system_path):
+                db_path = system_path
+                break
+
+    if db_path is None:
+        db_path = str(
+            files(parsedmarc.resources.dbip).joinpath("dbip-country-lite.mmdb")
+        )
+
+    db_age = datetime.now() - datetime.fromtimestamp(os.stat(db_path).st_mtime)
+    if db_age > timedelta(days=30):
+        logger.warning("IP database is more than a month old")
+
+    db_reader = geoip2.database.Reader(db_path)
+
+    country = None
+
+    try:
+        country = db_reader.country(ip_address).country.iso_code
+    except geoip2.errors.AddressNotFoundError:
+        pass
+
+    return country
+
+
+def get_service_from_reverse_dns_base_domain(
+    base_domain: str,
+    *,
+    always_use_local_file: Optional[bool] = False,
+    local_file_path: Optional[str] = None,
+    url: Optional[str] = None,
+    offline: Optional[bool] = False,
+    reverse_dns_map: Optional[dict[str, Any]] = None,
+) -> dict[str, Any]:
+    """
+    Returns the service name of a given base domain name from reverse DNS.
+
+    Args:
+        base_domain (str): The base domain of the reverse DNS lookup
+        always_use_local_file (bool): Always use a local map file
+        local_file_path (str): Path to a local map file
+        url (str): URL ro a reverse DNS map
+        offline (bool): Use the built-in copy of the reverse DNS map
+        reverse_dns_map (dict): A reverse DNS map
+    Returns:
+        dict: A dictionary containing name and type.
+        If the service is unknown, the name will be
+        the supplied reverse_dns_base_domain and the type will be None
+    """
+
+    def load_csv(_csv_file):
+        reader = csv.DictReader(_csv_file)
+        for row in reader:
+            key = row["base_reverse_dns"].lower().strip()
+            reverse_dns_map[key] = dict(name=row["name"], type=row["type"])
+
+    base_domain = base_domain.lower().strip()
+    if url is None:
+        url = (
+            "https://raw.githubusercontent.com/domainaware"
+            "/parsedmarc/master/parsedmarc/"
+            "resources/maps/base_reverse_dns_map.csv"
+        )
+    if reverse_dns_map is None:
+        reverse_dns_map = dict()
+    csv_file = io.StringIO()
+
+    if not (offline or always_use_local_file) and len(reverse_dns_map) == 0:
+        try:
+            logger.debug(f"Trying to fetch reverse DNS map from {url}...")
+            headers = {"User-Agent": USER_AGENT}
+            response = requests.get(url, headers=headers)
+            response.raise_for_status()
+            csv_file.write(response.text)
+            csv_file.seek(0)
+            load_csv(csv_file)
+        except requests.exceptions.RequestException as e:
+            logger.warning(f"Failed to fetch reverse DNS map: {e}")
+        except Exception:
+            logger.warning("Not a valid CSV file")
+            csv_file.seek(0)
+            logging.debug("Response body:")
+            logger.debug(csv_file.read())
+
+    if len(reverse_dns_map) == 0:
+        logger.info("Loading included reverse DNS map...")
+        path = str(
+            files(parsedmarc.resources.maps).joinpath("base_reverse_dns_map.csv")
+        )
+        if local_file_path is not None:
+            path = local_file_path
+        with open(path) as csv_file:
+            load_csv(csv_file)
+    try:
+        service = reverse_dns_map[base_domain]
+    except KeyError:
+        service = dict(name=base_domain, type=None)
+
+    return service
+
+
+def get_ip_address_info(
+    ip_address: str,
+    *,
+    ip_db_path: Optional[str] = None,
+    reverse_dns_map_path: Optional[str] = None,
+    always_use_local_files: Optional[bool] = False,
+    reverse_dns_map_url: Optional[str] = None,
+    cache: Optional[ExpiringDict] = None,
+    reverse_dns_map: Optional[dict[str, Any]] = None,
+    offline: Optional[bool] = False,
+    nameservers: Optional[list[str]] = None,
+    timeout: Optional[float] = 2.0,
+) -> dict[str, Any]:
+    """
+    Returns reverse DNS and country information for the given IP address
+
+    Args:
+        ip_address (str): The IP address to check
+        ip_db_path (str): path to a MMDB file from MaxMind or DBIP
+        reverse_dns_map_path (str): Path to a reverse DNS map file
+        reverse_dns_map_url (str): URL to the reverse DNS map file
+        always_use_local_files (bool): Do not download files
+        cache (ExpiringDict): Cache storage
+        reverse_dns_map (dict): A reverse DNS map
+        offline (bool): Do not make online queries for geolocation or DNS
+        nameservers (list): A list of one or more nameservers to use
+            (Cloudflare's public DNS resolvers by default)
+        timeout (float): Sets the DNS timeout in seconds
+
+    Returns:
+        dict: ``ip_address``, ``reverse_dns``, ``country``
+
+    """
+    ip_address = ip_address.lower()
+    if cache is not None:
+        info = cache.get(ip_address, None)
+        if info:
+            logger.debug(f"IP address {ip_address} was found in cache")
+            return info
+    info = dict()
+    info["ip_address"] = ip_address
+    if offline:
+        reverse_dns = None
+    else:
+        reverse_dns = get_reverse_dns(
+            ip_address, nameservers=nameservers, timeout=timeout
+        )
+    country = get_ip_address_country(ip_address, db_path=ip_db_path)
+    info["country"] = country
+    info["reverse_dns"] = reverse_dns
+    info["base_domain"] = None
+    info["name"] = None
+    info["type"] = None
+    if reverse_dns is not None:
+        base_domain = get_base_domain(reverse_dns)
+        if base_domain is not None:
+            service = get_service_from_reverse_dns_base_domain(
+                base_domain,
+                offline=offline,
+                local_file_path=reverse_dns_map_path,
+                url=reverse_dns_map_url,
+                always_use_local_file=always_use_local_files,
+                reverse_dns_map=reverse_dns_map,
+            )
+            info["base_domain"] = base_domain
+            info["type"] = service["type"]
+            info["name"] = service["name"]
+
+        if cache is not None:
+            cache[ip_address] = info
+            logger.debug(f"IP address {ip_address} added to cache")
+    else:
+        logger.debug(f"IP address {ip_address} reverse_dns not found")
+
+    return info
+
+
+def parse_email_address(original_address: str) -> EmailAddress:
+    if original_address[0] == "":
+        display_name = None
+    else:
+        display_name = original_address[0]
+    address = original_address[1]
+    address_parts = address.split("@")
+    local = None
+    domain = None
+    if len(address_parts) > 1:
+        local = address_parts[0].lower()
+        domain = address_parts[-1].lower()
+
+    return {
+        "display_name": display_name,
+        "address": address,
+        "local": local,
+        "domain": domain,
+    }
+
+
+def get_filename_safe_string(string: str) -> str:
+    """
+    Converts a string to a string that is safe for a filename
+
+    Args:
+        string (str): A string to make safe for a filename
+
+    Returns:
+        str: A string safe for a filename
+    """
+    invalid_filename_chars = ["\\", "/", ":", '"', "*", "?", "|", "\n", "\r"]
+    if string is None:
+        string = "None"
+    for char in invalid_filename_chars:
+        string = string.replace(char, "")
+    string = string.rstrip(".")
+
+    string = (string[:100]) if len(string) > 100 else string
+
+    return string
+
+
+def is_mbox(path: str) -> bool:
+    """
+    Checks if the given content is an MBOX mailbox file
+
+    Args:
+        path: Content to check
+
+    Returns:
+        bool: A flag that indicates if the file is an MBOX mailbox file
+    """
+    _is_mbox = False
+    try:
+        mbox = mailbox.mbox(path)
+        if len(mbox.keys()) > 0:
+            _is_mbox = True
+    except Exception as e:
+        logger.debug("Error checking for MBOX file: {0}".format(e.__str__()))
+
+    return _is_mbox
+
+
+def is_outlook_msg(content: Union[bytes, Any]) -> bool:
+    """
+    Checks if the given content is an Outlook msg OLE/MSG file
+
+    Args:
+        content: Content to check
+
+    Returns:
+        bool: A flag that indicates if the file is an Outlook MSG file
+    """
+    return isinstance(content, bytes) and content.startswith(
+        b"\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1"
+    )
+
+
+def convert_outlook_msg(msg_bytes: bytes) -> str:
+    """
+    Uses the ``msgconvert`` Perl utility to convert an Outlook MS file to
+    standard RFC 822 format
+
+    Args:
+        msg_bytes (bytes): the content of the .msg file
+
+    Returns:
+        A RFC 822 string
+    """
+    if not is_outlook_msg(msg_bytes):
+        raise ValueError("The supplied bytes are not an Outlook MSG file")
+    orig_dir = os.getcwd()
+    tmp_dir = tempfile.mkdtemp()
+    os.chdir(tmp_dir)
+    with open("sample.msg", "wb") as msg_file:
+        msg_file.write(msg_bytes)
+    rfc822_bytes: bytes
+    try:
+        subprocess.check_call(
+            ["msgconvert", "sample.msg"], stdout=null_file, stderr=null_file
+        )
+        eml_path = "sample.eml"
+        with open(eml_path, "rb") as eml_file:
+            rfc822_bytes = eml_file.read()
+    except FileNotFoundError:
+        raise EmailParserError(
+            "Failed to convert Outlook MSG: msgconvert utility not found"
+        )
+    finally:
+        os.chdir(orig_dir)
+        shutil.rmtree(tmp_dir)
+
+    return rfc822_bytes.decode("utf-8", errors="replace")
+
+
+def parse_email(
+    data: Union[bytes, str], *, strip_attachment_payloads: Optional[bool] = False
+) -> dict[str, Any]:
+    """
+    A simplified email parser
+
+    Args:
+        data: The RFC 822 message string, or MSG binary
+        strip_attachment_payloads (bool): Remove attachment payloads
+
+    Returns:
+        dict: Parsed email data
+    """
+
+    if isinstance(data, bytes):
+        if is_outlook_msg(data):
+            data = convert_outlook_msg(data)
+        else:
+            data = data.decode("utf-8", errors="replace")
+    parsed_email = mailparser.parse_from_string(data)
+    headers = json.loads(parsed_email.headers_json).copy()
+    parsed_email = json.loads(parsed_email.mail_json).copy()
+    parsed_email["headers"] = headers
+
+    if "received" in parsed_email:
+        for received in parsed_email["received"]:
+            if "date_utc" in received:
+                if received["date_utc"] is None:
+                    del received["date_utc"]
+                else:
+                    received["date_utc"] = received["date_utc"].replace("T", " ")
+
+    if "from" not in parsed_email:
+        if "From" in parsed_email["headers"]:
+            parsed_email["from"] = parsed_email["Headers"]["From"]
+        else:
+            parsed_email["from"] = None
+
+    if parsed_email["from"] is not None:
+        parsed_email["from"] = parse_email_address(parsed_email["from"][0])
+
+    if "date" in parsed_email:
+        parsed_email["date"] = parsed_email["date"].replace("T", " ")
+    else:
+        parsed_email["date"] = None
+    if "reply_to" in parsed_email:
+        parsed_email["reply_to"] = list(
+            map(lambda x: parse_email_address(x), parsed_email["reply_to"])
+        )
+    else:
+        parsed_email["reply_to"] = []
+
+    if "to" in parsed_email:
+        parsed_email["to"] = list(
+            map(lambda x: parse_email_address(x), parsed_email["to"])
+        )
+    else:
+        parsed_email["to"] = []
+
+    if "cc" in parsed_email:
+        parsed_email["cc"] = list(
+            map(lambda x: parse_email_address(x), parsed_email["cc"])
+        )
+    else:
+        parsed_email["cc"] = []
+
+    if "bcc" in parsed_email:
+        parsed_email["bcc"] = list(
+            map(lambda x: parse_email_address(x), parsed_email["bcc"])
+        )
+    else:
+        parsed_email["bcc"] = []
+
+    if "delivered_to" in parsed_email:
+        parsed_email["delivered_to"] = list(
+            map(lambda x: parse_email_address(x), parsed_email["delivered_to"])
+        )
+
+    if "attachments" not in parsed_email:
+        parsed_email["attachments"] = []
+    else:
+        for attachment in parsed_email["attachments"]:
+            if "payload" in attachment:
+                payload = attachment["payload"]
+                try:
+                    if "content_transfer_encoding" in attachment:
+                        if attachment["content_transfer_encoding"] == "base64":
+                            payload = decode_base64(payload)
+                        else:
+                            payload = str.encode(payload)
+                    attachment["sha256"] = hashlib.sha256(payload).hexdigest()
+                except Exception as e:
+                    logger.debug("Unable to decode attachment: {0}".format(e.__str__()))
+        if strip_attachment_payloads:
+            for attachment in parsed_email["attachments"]:
+                if "payload" in attachment:
+                    del attachment["payload"]
+
+    if "subject" not in parsed_email:
+        parsed_email["subject"] = None
+
+    parsed_email["filename_safe_subject"] = get_filename_safe_string(
+        parsed_email["subject"]
+    )
+
+    if "body" not in parsed_email:
+        parsed_email["body"] = None
+
+    return parsed_email
--- a/parsedmarc/webhook.py
+++ b/parsedmarc/webhook.py
@@ -0,0 +1,65 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import annotations
+
+from typing import Any, Optional, Union
+
+import requests
+
+from parsedmarc import logger
+from parsedmarc.constants import USER_AGENT
+
+
+class WebhookClient(object):
+    """A client for webhooks"""
+
+    def __init__(
+        self,
+        aggregate_url: str,
+        forensic_url: str,
+        smtp_tls_url: str,
+        timeout: Optional[int] = 60,
+    ):
+        """
+        Initializes the WebhookClient
+        Args:
+            aggregate_url (str): The aggregate report webhook url
+            forensic_url (str): The forensic report webhook url
+            smtp_tls_url (str): The smtp_tls report webhook url
+            timeout (int): The timeout to use when calling the webhooks
+        """
+        self.aggregate_url = aggregate_url
+        self.forensic_url = forensic_url
+        self.smtp_tls_url = smtp_tls_url
+        self.timeout = timeout
+        self.session = requests.Session()
+        self.session.headers = {
+            "User-Agent": USER_AGENT,
+            "Content-Type": "application/json",
+        }
+
+    def save_forensic_report_to_webhook(self, report: str):
+        try:
+            self._send_to_webhook(self.forensic_url, report)
+        except Exception as error_:
+            logger.error("Webhook Error: {0}".format(error_.__str__()))
+
+    def save_smtp_tls_report_to_webhook(self, report: str):
+        try:
+            self._send_to_webhook(self.smtp_tls_url, report)
+        except Exception as error_:
+            logger.error("Webhook Error: {0}".format(error_.__str__()))
+
+    def save_aggregate_report_to_webhook(self, report: str):
+        try:
+            self._send_to_webhook(self.aggregate_url, report)
+        except Exception as error_:
+            logger.error("Webhook Error: {0}".format(error_.__str__()))
+
+    def _send_to_webhook(
+        self, webhook_url: str, payload: Union[bytes, str, dict[str, Any]]
+    ):
+        try:
+            self.session.post(webhook_url, data=payload, timeout=self.timeout)
+        except Exception as error_:
+            logger.error("Webhook Error: {0}".format(error_.__str__()))
--- a/publish-docs.sh
+++ b/publish-docs.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+git pull
+cd ../parsedmarc-docs || exit
+git pull
+cd ../parsedmarc || exit
+./build.sh
+cd ../parsedmarc-docs || exit
+git add .
+git commit -m "Update docs"
+git push
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -0,0 +1,98 @@
+[build-system]
+requires = [
+    "hatchling>=1.27.0",
+]
+requires_python = ">=3.9,<3.14"
+build-backend = "hatchling.build"
+
+[project]
+name = "parsedmarc"
+dynamic = [
+    "version",
+]
+description = "A Python package and CLI for parsing aggregate and forensic DMARC reports"
+readme = "README.md"
+license = "Apache-2.0"
+authors = [
+    { name = "Sean Whalen", email = "whalenster@gmail.com" },
+]
+keywords = [
+    "DMARC",
+    "parser",
+    "reporting",
+]
+classifiers = [
+    "Development Status :: 5 - Production/Stable",
+    "Intended Audience :: Developers",
+    "Intended Audience :: Information Technology",
+    "License :: OSI Approved :: Apache Software License",
+    "Operating System :: OS Independent",
+    "Programming Language :: Python :: 3"
+]
+requires-python = ">=3.9, <3.14"
+dependencies = [
+    "azure-identity>=1.8.0",
+    "azure-monitor-ingestion>=1.0.0",
+    "boto3>=1.16.63",
+    "dateparser>=1.1.1",
+    "dnspython>=2.0.0",
+    "elasticsearch-dsl==7.4.0",
+    "elasticsearch<7.14.0",
+    "expiringdict>=1.1.4",
+    "geoip2>=3.0.0",
+    "google-api-core>=2.4.0",
+    "google-api-python-client>=2.35.0",
+    "google-auth-httplib2>=0.1.0",
+    "google-auth-oauthlib>=0.4.6",
+    "google-auth>=2.3.3",
+    "imapclient>=2.1.0",
+    "kafka-python-ng>=2.2.2",
+    "lxml>=4.4.0",
+    "mailsuite>=1.9.18",
+    "msgraph-core==0.2.2",
+    "opensearch-py>=2.4.2,<=3.0.0",
+    "publicsuffixlist>=0.10.0",
+    "pygelf>=0.4.2",
+    "requests>=2.22.0",
+    "tqdm>=4.31.1",
+    "urllib3>=1.25.7",
+    "xmltodict>=0.12.0",
+    "PyYAML>=6.0.3"
+]
+
+[project.optional-dependencies]
+build = [
+    "hatch>=1.14.0",
+    "myst-parser[linkify]",
+    "nose",
+    "pytest",
+    "pytest-cov",
+    "ruff",
+    "sphinx",
+    "sphinx_rtd_theme",
+]
+
+[project.scripts]
+parsedmarc = "parsedmarc.cli:_main"
+
+[project.urls]
+Homepage = "https://domainaware.github.io/parsedmarc"
+
+[tool.hatch.version]
+path = "parsedmarc/constants.py"
+
+[tool.hatch.build.targets.sdist]
+include = [
+    "/parsedmarc",
+]
+
+[tool.hatch.build]
+exclude = [
+    "base_reverse_dns.csv",
+    "find_bad_utf8.py",
+    "find_unknown_base_reverse_dns.py",
+    "unknown_base_reverse_dns.csv",
+    "sortmaps.py",
+    "README.md",
+    "*.bak"
+]
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,10 +0,0 @@
-dnspython
-requests
-publicsuffix
-xmltodict
-geoip2
-flake8
-sphinx
-sphinx_rtd_theme
-collective.checkdocs
-wheel
--- a/samples/aggregate/!example.com!1538204542!1538463818.xml
+++ b/samples/aggregate/!example.com!1538204542!1538463818.xml
@@ -0,0 +1,40 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<feedback>
+ <report_metadata>
+  <org_name></org_name>
+  <email>administrator@accurateplastics.com</email>
+  <report_id>example.com:1538463741</report_id>
+  <date_range>
+   <begin>1538413632</begin>
+   <end>1538413632</end>
+  </date_range>
+ </report_metadata>
+ <policy_published>
+  <domain>example.com</domain>
+  <adkim>r</adkim>
+  <aspf>r</aspf>
+  <p>none</p>
+  <sp>reject</sp>
+  <pct>100</pct>
+ </policy_published>
+ <record>
+  <row>
+   <source_ip>12.20.127.122</source_ip>
+   <count>1</count>
+   <policy_evaluated>
+    <disposition>none</disposition>
+    <dkim>fail</dkim>
+    <spf>fail</spf>
+   </policy_evaluated>
+  </row>
+  <identifiers>
+   <header_from>example.com</header_from>
+  </identifiers>
+  <auth_results>
+   <spf>
+    <domain></domain>
+    <result>none</result>
+   </spf>
+  </auth_results>
+ </record>
+</feedback>
--- a/samples/aggregate/!large-example.com!1711897200!1711983600.xml
+++ b/samples/aggregate/!large-example.com!1711897200!1711983600.xml
--- a/samples/aggregate/Report
+++ b/samples/aggregate/Report
@@ -0,0 +1,181 @@
+Received: from SN6PR04MB4480.namprd04.prod.outlook.com (2603:10b6:803:41::31)
+ by SN6PR04MB4477.namprd04.prod.outlook.com with HTTPS via
+ SN4PR0501CA0054.NAMPRD05.PROD.OUTLOOK.COM; Wed, 13 Feb 2019 10:48:13 +0000
+Received: from DM5PR04CA0035.namprd04.prod.outlook.com (2603:10b6:3:12b::21)
+ by SN6PR04MB4480.namprd04.prod.outlook.com (2603:10b6:805:a5::17) with
+ Microsoft SMTP Server (version=TLS1_2,
+ cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.20.1622.16; Wed, 13 Feb
+ 2019 10:48:12 +0000
+Received: from BY2NAM03FT014.eop-NAM03.prod.protection.outlook.com
+ (2a01:111:f400:7e4a::207) by DM5PR04CA0035.outlook.office365.com
+ (2603:10b6:3:12b::21) with Microsoft SMTP Server (version=TLS1_2,
+ cipher=TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384) id 15.20.1622.16 via Frontend
+ Transport; Wed, 13 Feb 2019 10:48:12 +0000
+Authentication-Results: spf=softfail (sender IP is 199.230.200.198)
+ smtp.mailfrom=google.com; cardinalhealth.mail.onmicrosoft.com; dkim=fail
+ (signature did not verify)
+ header.d=google.com;cardinalhealth.mail.onmicrosoft.com; dmarc=fail
+ action=oreject header.from=google.com;
+Received-SPF: SoftFail (protection.outlook.com: domain of transitioning
+ google.com discourages use of 199.230.200.198 as permitted sender)
+Received: from SMTP10.cardinalhealth.com (199.230.200.198) by
+ BY2NAM03FT014.mail.protection.outlook.com (10.152.84.239) with Microsoft SMTP
+ Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384) id
+ 15.20.1580.10 via Frontend Transport; Wed, 13 Feb 2019 10:48:12 +0000
+Received: from WPOH0019EXHYB01.cardinalhealth.net (10.212.25.115) by
+ smtp10.cardinalhealth.com (10.208.183.149) with Microsoft SMTP Server (TLS)
+ id 14.3.361.1; Wed, 13 Feb 2019 05:47:36 -0500
+Received: from WPIL0210EXCAS23.cardinalhealth.net (161.244.3.66) by
+ WPOH0019EXHYB01.cardinalhealth.net (10.212.25.115) with Microsoft SMTP Server
+ (TLS) id 14.3.361.1; Wed, 13 Feb 2019 05:47:32 -0500
+Received: from smtp2.cardinal.com (198.89.161.108) by
+ WPIL0210EXCAS23.cardinalhealth.net (161.244.3.66) with Microsoft SMTP Server
+ (TLS) id 14.3.361.1; Wed, 13 Feb 2019 04:47:32 -0600
+Authentication-Results-Original: smtp2.cardinal.com; spf=Pass
+ smtp.mailfrom=noreply-dmarc-support@google.com
+Received-SPF: Pass (smtp2.cardinal.com: domain of
+  noreply-dmarc-support@google.com designates 209.85.166.201 as
+  permitted sender) identity=mailfrom;
+  client-ip=209.85.166.201; receiver=smtp2.cardinal.com;
+  envelope-from="noreply-dmarc-support@google.com";
+  x-sender="noreply-dmarc-support@google.com";
+  x-conformance=spf_only; x-record-type="v=spf1"
+X-SenderGrp: None
+X-SBRS: 3.5
+X-ExtWarning: Yes
+X-SLBL-Result: SAFE-LISTED
+X-Amp-Result: UNKNOWN
+X-Amp-Original-Verdict: FILE UNKNOWN
+X-Amp-File-Uploaded: False
+IronPort-PHdr: =?us-ascii?q?9a23=3AQPa/HRJPWXWEsohNPdmcpTZcNBhigK39O0su0rRi?=
+ =?us-ascii?q?jrtPdqq5+JG7Zh7Q4vJiiFKPVoLeuatJ?=
+X-IPAS-Result: =?us-ascii?q?A0HNBQBz9WNch8mmVdFjHAEBAR8EAQEFAQEGEIE2AoJpgQM?=
+ =?us-ascii?q?nh0+FPIsFlRGCXYIygSQDGCoSCAEEGAMTgQKDOQECg1YhATQJDQEDAQEBAQEBA?=
+ =?us-ascii?q?QECAhABAQEKCwkIKSMIBIJ0BE06MAEBAQEBAQEBAQEBAQEBAQEBAQEBAQEUAjM?=
+ =?us-ascii?q?XKQUYGwwBAThiMgEFAYNZAYIBngU8iikBAQGCCxOCeAEBBXSBFzuDPIEdBwgSg?=
+ =?us-ascii?q?luFFIF9gkSCFoFXgkyBJoF4AoF4NoR5GoEshjgJiViIBIhVgQsJgXaBRIEPgRu?=
+ =?us-ascii?q?BVosZGYFeAWOQLRmYRgGDVQ8hgSVugSFwXIESgU4JIxeBczSDIIp0ITCPbwEB?=
+X-IronPort-Outbreak-Status: No, level 0, Unknown - Unknown
+Received: from mail-it1-f201.google.com ([209.85.166.201])  by
+ smtp2.cardinal.com with ESMTP/TLS/AES128-GCM-SHA256; 13 Feb 2019 04:47:15
+ -0600
+Received: by mail-it1-f201.google.com with SMTP id p21so3350239itb.8
+        for <dmarcreports@cardinalhealth.com>; Wed, 13 Feb 2019 02:47:15 -0800
+ (PST)
+DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
+        d=google.com; s=20161025;
+        h=mime-version:date:message-id:subject:from:to:content-disposition
+         :content-transfer-encoding;
+        bh=9auxxiIHA4359CcIJ8D48sw6ZXttCPhnCsgP3zpHWC4=;
+        b=QSiY0EPGhBY11lvmfniPOmA71xd6uAv17KYGbEtmOtocmGen1BQ96kZA2rNtAtZDUx
+         IfLoJgkzO31GmVXh9k0kBi+r8vR16zXebZHuBgfBesayykHOfSSWOTZtwSukaWV3RChV
+         PPMRPMksnrITfFHNo3u6xbcx6usplxn8XS8XyENgua30BBjweJXYMrQrRkrjgLv+JpoY
+         o6fVvAtcuSnwtm3fv9j1GsqSK05sw2aVFZkJLP9DvMfhW+bXJJ2rVp4MvVqlleua20Ro
+         Y0vbFMWtbvFZseSOc+AYGvv6oL9eBilXal26kS2BrRJQ+B4Yt4GFiRDbjF4QqVSTHOd4
+         YDSw==
+X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
+        d=1e100.net; s=20161025;
+        h=x-gm-message-state:mime-version:date:message-id:subject:from:to
+         :content-disposition:content-transfer-encoding;
+        bh=9auxxiIHA4359CcIJ8D48sw6ZXttCPhnCsgP3zpHWC4=;
+        b=lqkAygJJ/s8iZJI+AnwWegB47wiLE67qFfi26J+5Uu5lQuObEqK2KTlBZGwnd51c8R
+         A2y47YQ9RqBKWTibQH9pLBKT5ChyxP/+Vk8e3wK+IfU720wG8P2eDXn91rBSBNIXOTOn
+         McNwB/z6Ukurt8GFiy4aMvni0t3yWa4/xL0c5zFQJAgwm01jhxOjgOpnnqF0ppYatzf4
+         5maCMzhvRJ9FFsDhk8sE0pJjdaWS9ybDGKOteSYr1wlGSnydTkt2z6z38IO8fgVJwT3e
+         AUiqhNgNmDdyNI3Aom8dnfJHsyqjKC3iTXGxmqsMDVui4faHwOqMx2lgjuv7bbQFyv1K
+         b//w==
+X-Gm-Message-State: AHQUAubgr+0/AsmLjETdSMNb9+rAZ5j0xlExGd75CusDHQJl4eIy2avN
+	XnhZqrYsvbPhKCNFyDE0KQ==
+X-Google-Smtp-Source: AHgI3IZS0dawFR+Q0mnSaOenxA+M5W6V6z1dMorYgiX855zYf4aqZyS2Jjol+TCgY3f/lSsVDbuiefhqmZNtNA==
+MIME-Version: 1.0
+X-Received: by 2002:a24:1c87:: with SMTP id c129mr1998419itc.11.1550054834929;
+ Wed, 13 Feb 2019 02:47:14 -0800 (PST)
+Date: Tue, 12 Feb 2019 15:59:59 -0800
+Message-ID: <949348866075514174@google.com>
+Subject: Report domain: borschow.com Submitter: google.com Report-ID: 949348866075514174
+From: <noreply-dmarc-support@google.com>
+To: <dmarcreports@cardinalhealth.com>
+	name="google.com!borschow.com!1549929600!1550015999.zip"
+	filename="google.com!borschow.com!1549929600!1550015999.zip"
+Return-Path: noreply-dmarc-support@google.com
+X-OrganizationHeadersPreserved: WPIL0210EXCAS23.cardinalhealth.net
+X-MS-Exchange-Organization-ExpirationStartTime: 13 Feb 2019 10:48:12.5214
+ (UTC)
+X-MS-Exchange-Organization-ExpirationStartTimeReason: OriginalSubmit
+X-MS-Exchange-Organization-ExpirationInterval: 2:00:00:00.0000000
+X-MS-Exchange-Organization-ExpirationIntervalReason: OriginalSubmit
+X-MS-Exchange-Organization-Network-Message-Id:
+ e9371fc9-cf12-4fcb-502a-08d691a0c038
+X-EOPAttributedMessage: 0
+X-MS-Exchange-Organization-MessageDirectionality: Originating
+X-CrossPremisesHeadersPromoted:
+ BY2NAM03FT014.eop-NAM03.prod.protection.outlook.com
+X-CrossPremisesHeadersFiltered:
+ BY2NAM03FT014.eop-NAM03.prod.protection.outlook.com
+X-Forefront-Antispam-Report:
+ CIP:199.230.200.198;IPV:NLI;CTRY:US;EFV:NLI;SFV:SKN;SFS:;DIR:INB;SFP:;SCL:-1;SRVR:SN6PR04MB4480;H:SMTP10.cardinalhealth.com;FPR:;SPF:None;LANG:en;
+X-Microsoft-Exchange-Diagnostics:
+ 1;BY2NAM03FT014;1:9gD0HAosLjXNiAwpOsmGDn2zgTJAIEDY0eKyvNoIEz6oio7FckNeMUQRNa3AUeC/Qil0Sn2rntyy6LjTvutGN6e2BsGQyNaiKEsKI3so3l0Kpb9i3CdYF21/lNVHrjKuxxEJ8F7WUBlR88jcBwDjl6x0CO2FBckAmUnBJOJv2zg=
+X-MS-Exchange-Organization-AuthSource: WPIL0210EXCAS23.cardinalhealth.net
+X-MS-Exchange-Organization-AuthAs: Anonymous
+X-OriginatorOrg: cardinalhealth.onmicrosoft.com
+X-MS-PublicTrafficType: Email
+X-MS-Office365-Filtering-Correlation-Id: e9371fc9-cf12-4fcb-502a-08d691a0c038
+X-Microsoft-Antispam:
+ BCL:0;PCL:0;RULEID:(2390118)(7020095)(4652040)(8989299)(4534185)(4627221)(201703031133081)(8559020)(8990200)(5600110)(711020)(4605077)(4710068)(4711035)(2017052603328)(7153060)(49563074);SRVR:SN6PR04MB4480;
+X-MS-TrafficTypeDiagnostic: SN6PR04MB4480:
+X-MS-Exchange-Organization-SCL: -1
+X-Microsoft-Exchange-Diagnostics:
+ 1;SN6PR04MB4480;20:9lFp0O5yeS9rEVtgFCaVjrPpXZAA0REuLHin4MfFWihk274IOvh7BRRMQfNNBwtC3q0+vTeNPc37wIBQlwVq6T7j1bNe06DnEjGgP5GWNU7zOUjt6qeq21ebYk/RV2QcTM85ZcFxr8SF2bGAKcNkj7GQLnnogH5o1GotLtqwXOht4qNZxhp46eCrIt+yQJJHFJyHFoflM9+z2WAYSl6yY8Wauhp05LBIqjduLdEN6MmU7bpPrzOmBrc4nuOmA4s1D8A3KdzBCdx0wIEwBv1zq6RjAB43UrfhpwMmh07U60CU/0QPhSrEBfn19eZLv4eTRJozsA313tp/mPylGCegahxmClixw/ku/GENI6pWxTCz3Jz1x8YCMLqJS7M+UOg7IosLPbUr26Q2CtSduue1vhk6v8peX5c5di6b9WftMKup3YMESA0RrqNbS6HbBCmH+iHSUwWjTBVva4L0fGiG82SbFbkH/UH+ZoFzkFnMtDZwDK+9pK+oHS2k97XwVzzYrzFh98JBdnk6jI/L2MmHWldt75NqJjSifAf2P/PjlploUQ8CAHsZZSRx5cu5tLaptOzUq/NVXF23VavhTslxK5C0/ntAAZAAvhmR8v/FNFU=;23:m8XEp0VuraCd8j9yukaQaVZE+Ufz0qQ9v4369t8CewCI5dikl+UkdVfYaJsMrwHTAtH3it3YrgDXpo7py6m/RDwgDnLGxviIfy/puyEEN3Qq99TJUMn19W9S5U7VJ8DgMZsnEv6RVCrjf05bNshRyA==
+X-Microsoft-Exchange-Diagnostics:
+ 1;SN6PR04MB4480;20:fJacS7QTNHPZGJt2KoBiyZLfHf3R2G+vFZ1EUyyFaqoQUdJU3WDLMmHMUqn36br0oZNxyMkV05SJMoFAz3mVO2hO/nsUX6SR7RMpr5XHYxLD+tPbbTTT2aGzo5IR+GOrJc5l3z4uX34x0WdoggvjUhi6DWaqwMn/OnkEBJ7ozYg=
+X-MS-Exchange-CrossTenant-OriginalArrivalTime: 13 Feb 2019 10:48:12.1984
+ (UTC)
+X-MS-Exchange-CrossTenant-Network-Message-Id: e9371fc9-cf12-4fcb-502a-08d691a0c038
+X-MS-Exchange-CrossTenant-Id: a86dc674-e6a2-4c25-b4ba-e36c31382530
+X-MS-Exchange-CrossTenant-OriginalAttributedTenantConnectingIp: TenantId=a86dc674-e6a2-4c25-b4ba-e36c31382530;Ip=[199.230.200.198];Helo=[SMTP10.cardinalhealth.com]
+X-MS-Exchange-CrossTenant-FromEntityHeader: HybridOnPrem
+X-MS-Exchange-Transport-CrossTenantHeadersStamped: SN6PR04MB4480
+X-MS-Exchange-Transport-EndToEndLatency: 00:00:01.1339945
+X-MS-Exchange-Processed-By-BccFoldering: 15.20.1601.011
+X-Microsoft-Antispam-Mailbox-Delivery:
+	ucf:0;jmr:0;ex:0;auth:0;dest:I;ENG:(750119)(520011016)(944506303)(944626516);
+X-Microsoft-Antispam-Message-Info:
+	Cqz80Cj8FVW4uTBbPiVvb9OH0/VEl4Uz5BiS+YWHErndckPxKMInYe6J09QImrgTO+t2bYNNpL66Km1sbVKa2o+iWj1pSIxAONbkfZuosS0y7Xbj/NEw1eqGtwavoj5WckV7MfJmBINAEBVg6UPnNhmW5rY8PTa27tmGQgYoE7wm5JLH0EH8ARuebjtTf8j+WuBg/BY6uaK7FLOdAUnnlvAtoHDKTnL/oH5E6GG59HTarZyC4wMl5sN2ibbMqZ80Aj4EczyuoWz1N2thodsW/4yzTk2w2dtHgwMdKgPI+4xALQC81kQAlgVyN9ukvBpJnRKrA4bvx+XhUUsMKsoXmpWdQCIoALqAfXheY/96JepEYN05Fqa7wzDRLkbejIfsfPq16asiX/kw8Dq2N/WG5LeQpC28iOkY4TB/GlI6CQPVd8rMY3DvzBYZPyCAcUdPhXC3nR+qxLea+zH/s1IRKaXolnF0r0zaiCki952hC6UwfdeK9F/nCA75kRb930QXJbmOS9emnEf+xqWhIhJuMdd8gV1NLSz6SDimeHRfDgXMTgNUjkk/BQQ0bCWAEQrRPxdvt/5PEiUZMZzMKZAqYh67j2RpV8FC/qJLjHPljagvtH4bUvGmpn/W9MArWgsUkk2skhNcLVletMwYbVXvJfJPr7K9Pnfpnd4p1ETHwjlzXaKcvlziIE29MYEXPUcg9rnk2t33YTM1NJHhgyiKebbrHC2/BU1O+BNrkZYQhqlkvsAu4JxBdyzld2sDz9CQdvwOSAwOkMpdlkaHV26Y0e6gPLkaprWVqXPr5KFXSUfuz2fvVUNM+FuHGV/fIFkcKdK4lw0MRufwhBz1gqudL/PSQuI8r9lQmuh7K3+gIprdWqOiYlYEELj+TMnSnZaFkbIX70rhAAkB7MoNfs/A38hIooGzxlRYzTrlPqwoIkOpyqQykDzXoNRODHwo7QJx
+Content-type: multipart/mixed;
+	boundary="B_3632898004_720446853"
+
+> This message is in MIME format. Since your mail reader does not understand
+this format, some or all of this message may not be legible.
+
+--B_3632898004_720446853
+Content-type: text/plain;
+	charset="UTF-8"
+Content-transfer-encoding: 7bit
+
+
+
+--B_3632898004_720446853
+Content-type: application/zip; name="google.com!borschow.com!1549929600!1550015999.zip";
+ x-mac-creator="4F50494D"
+Content-ID: <B399946F5954C04A8523535C1E5699A5@namprd04.prod.outlook.com>
+Content-disposition: attachment;
+	filename="google.com!borschow.com!1549929600!1550015999.zip"
+Content-transfer-encoding: base64
+
+
+UEsDBAoAAAAIAPhTTU5+28OP0QEAACgEAAAxAAAAZ29vZ2xlLmNvbSFib3JzY2hvdy5jb20h
+MTU0OTkyOTYwMCExNTUwMDE1OTk5LnhtbI1TwXakIBC85yvmzX1EJ2rGfYTsab9g9+xjoFU2
+CjzATPL3iwHRnexhT2J1d3VXNeCX92k8vIGxQsnnY5HlxwNIpriQ/fPx188fp8vx8EIecAfA
+r5S9kofDARvQyrh2Akc5dXTBPKpM30o6AemV6kfImJowSmDIgYmKkUjlGcaPE5+oYSc764Xu
+74s5MWad2doy5R0lLlWyE6RwTltvyEUS7OtFFFEpb2BQeeyrqtL7rm+1gfiKENw0pTNY3m5
+1HX+VFVFWTyVGG3RkO2VQmuo7KMWD12hF5IUVdk056bOfa+ArHGQ3EerPC+qpmn8JHIlQ3+z
+pW57S7FWo2AfrZ6vo7ADpEGUN0eSqzKWDeoWDItgyKD8VUzEYBQOEbS6+8SWb4A0MfAbmMNI
+R8RukF0xzRwpFnHL4XPYfw3m3WTKrDMadUsuWDUbBq3QpDln1WNWFHVW5GffIQXWVKZm6Zth
+FA4rHPvBGx1n7xtfA4sZwmplhfPXN02+x3aZixWdv1Y+IbkSRXcxkKzZqbzr6le1asOCg3Si
+E/75pLIBKAfTdkZNdyvaRyLVFwJMZze0Buw8uo1zN9b/7D9e7oUj6oo/Sdp2BdB9wyU5LBKj
+7dH/AVBLAQIKAAoAAAAIAPhTTU5+28OP0QEAACgEAAAxAAAAAAAAAAAAAAAAAAAAAABnb29n
+bGUuY29tIWJvcnNjaG93LmNvbSExNTQ5OTI5NjAwITE1NTAwMTU5OTkueG1sUEsFBgAAAAAB
+AAEAXwAAACACAAAAAA==
+--B_3632898004_720446853--
+
--- a/samples/aggregate/addisonfoods.com!example.com!1536105600!1536191999.xml
+++ b/samples/aggregate/addisonfoods.com!example.com!1536105600!1536191999.xml
@@ -0,0 +1,44 @@
+<?xml version="1.0"?>
+<feedback>
+  <version>1.0</version>
+  <report_metadata>
+    <org_name>addisonfoods.com</org_name>
+    <email>postmaster@addisonfoods.com</email>
+    <report_id>3ceb5548498640beaeb47327e202b0b9</report_id>
+    <date_range>
+      <begin>1536105600</begin>
+      <end>1536191999</end>
+    </date_range>
+  </report_metadata>
+  <policy_published>
+    <domain>example.com</domain>
+    <adkim>r</adkim>
+    <aspf>r</aspf>
+    <p>none</p>
+    <sp>none</sp>
+    <pct>100</pct>
+    <fo>0</fo>
+  </policy_published>
+  <record>
+    <row>
+      <source_ip>109.203.100.17</source_ip>
+      <count>1</count>
+      <policy_evaluated>
+        <disposition>none</disposition>
+        <dkim>fail</dkim>
+        <spf>fail</spf>
+      </policy_evaluated>
+    </row>
+    <identifiers>
+	   <envelope_from>example.com</envelope_from>
+      <header_from>example.com</header_from>
+    </identifiers>
+    <auth_results>
+      <dkim>
+        <domain>toptierhighticket.club</domain>
+        <selector>default</selector>
+        <result>pass</result>
+      </dkim>
+    </auth_results>
+  </record>
+</feedback>
--- a/samples/aggregate/empty_reason.xml
+++ b/samples/aggregate/empty_reason.xml
@@ -0,0 +1,56 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<feedback>
+  <version>1.0</version>
+  <report_metadata>
+    <org_name>example.org</org_name>
+    <email>noreply-dmarc-support@example.org</email>
+    <extra_contact_info>https://support.example.org/dmarc</extra_contact_info>
+    <report_id>20240125141224705995</report_id>
+    <date_range>
+      <begin>1706159544</begin>
+      <end>1706185733</end>
+    </date_range>
+  </report_metadata>
+  <policy_published>
+    <domain>example.com</domain>
+    <adkim>r</adkim>
+    <aspf>r</aspf>
+    <p>quarantine</p>
+    <sp>quarantine</sp>
+    <pct>100</pct>
+    <fo>1</fo>
+  </policy_published>
+  <record>
+    <row>
+      <source_ip>198.51.100.123</source_ip>
+      <count>2</count>
+      <policy_evaluated>
+        <disposition>none</disposition>
+        <dkim>pass</dkim>
+        <spf>fail</spf>
+        <reason>
+          <type></type>
+          <comment></comment>
+        </reason>
+      </policy_evaluated>
+    </row>
+    <identifiers>
+      <envelope_to>example.net</envelope_to>
+      <envelope_from>example.edu</envelope_from>
+      <header_from>example.com</header_from>
+    </identifiers>
+    <auth_results>
+      <dkim>
+        <domain>example.com</domain>
+        <selector>example</selector>
+        <result>pass</result>
+        <human_result>2048-bit key</human_result>
+      </dkim>
+      <spf>
+        <domain>example.edu</domain>
+        <scope>mfrom</scope>
+        <result>pass</result>
+      </spf>
+    </auth_results>
+  </record>
+</feedback>
--- a/samples/aggregate/estadocuenta1.infonacot.gob.mx!example.com!1536853302!1536939702!2940.xml.zip
+++ b/samples/aggregate/estadocuenta1.infonacot.gob.mx!example.com!1536853302!1536939702!2940.xml.zip
--- a/Show More
+++ b/Show More
				`@@ -0,0 +1 @@`
				`Dashboards contributed by Github user Bhozar.`