mirror of
https://github.com/domainaware/parsedmarc.git
synced 2026-02-18 15:36:24 +00:00
Compare commits
32 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4b786846ae | ||
|
|
23ae563cd8 | ||
|
|
cdd000e675 | ||
|
|
7d58abc67b | ||
|
|
a18ae439de | ||
|
|
d7061330a8 | ||
|
|
9d5654b8ec | ||
|
|
a0e0070dd0 | ||
|
|
cf3b7f2c29 | ||
|
|
d312522ab7 | ||
|
|
888d717476 | ||
|
|
1127f65fbb | ||
|
|
d017dfcddf | ||
|
|
5fae99aacc | ||
|
|
ba57368ac3 | ||
|
|
dc6ee5de98 | ||
|
|
158d63d205 | ||
|
|
f1933b906c | ||
|
|
4b98d795ff | ||
|
|
b1356f7dfc | ||
|
|
1969196e1a | ||
|
|
553f15f6a9 | ||
|
|
1fc9f638e2 | ||
|
|
48bff504b4 | ||
|
|
681b7cbf85 | ||
|
|
0922d6e83a | ||
|
|
baf3f95fb1 | ||
|
|
a51f945305 | ||
|
|
55dbf8e3db | ||
|
|
00267c9847 | ||
|
|
51356175e1 | ||
|
|
3be10d30dd |
10
.github/workflows/docker.yml
vendored
10
.github/workflows/docker.yml
vendored
@@ -24,11 +24,11 @@ jobs:
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v5
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@v3
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: |
|
||||
${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
|
||||
@@ -40,16 +40,14 @@ jobs:
|
||||
type=semver,pattern={{major}}.{{minor}}
|
||||
|
||||
- name: Log in to the Container registry
|
||||
# https://github.com/docker/login-action/releases/tag/v2.0.0
|
||||
uses: docker/login-action@49ed152c8eca782a232dede0303416e8f356c37b
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ${{ env.REGISTRY }}
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Build and push Docker image
|
||||
# https://github.com/docker/build-push-action/releases/tag/v3.0.0
|
||||
uses: docker/build-push-action@e551b19e49efd4e98792db7592c17c09b89db8d8
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: .
|
||||
push: ${{ github.event_name == 'release' }}
|
||||
|
||||
12
.github/workflows/python-tests.yml
vendored
12
.github/workflows/python-tests.yml
vendored
@@ -15,7 +15,7 @@ jobs:
|
||||
|
||||
services:
|
||||
elasticsearch:
|
||||
image: elasticsearch:8.18.2
|
||||
image: elasticsearch:8.19.7
|
||||
env:
|
||||
discovery.type: single-node
|
||||
cluster.name: parsedmarc-cluster
|
||||
@@ -33,15 +33,15 @@ jobs:
|
||||
python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/checkout@v5
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v5
|
||||
uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install system dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y libemail-outlook-message-perl
|
||||
sudo apt-get -q update
|
||||
sudo apt-get -qy install libemail-outlook-message-perl
|
||||
- name: Install Python dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
@@ -65,6 +65,6 @@ jobs:
|
||||
run: |
|
||||
hatch build
|
||||
- name: Upload coverage to Codecov
|
||||
uses: codecov/codecov-action@v4
|
||||
uses: codecov/codecov-action@v5
|
||||
with:
|
||||
token: ${{ secrets.CODECOV_TOKEN }}
|
||||
|
||||
13
.vscode/launch.json
vendored
13
.vscode/launch.json
vendored
@@ -19,20 +19,11 @@
|
||||
"console": "integratedTerminal"
|
||||
},
|
||||
{
|
||||
"name": "sample.eml",
|
||||
"name": "sample",
|
||||
"type": "debugpy",
|
||||
"request": "launch",
|
||||
"module": "parsedmarc.cli",
|
||||
"args": ["samples/private/sample.eml"]
|
||||
},
|
||||
{
|
||||
"name": "find_sus_domains.py",
|
||||
"type": "debugpy",
|
||||
"request": "launch",
|
||||
"program": "find_sus_domains.py",
|
||||
"args": ["-i", "unknown_domains.txt", "-o", "sus_domains.csv"],
|
||||
"cwd": "${workspaceFolder}/parsedmarc/resources/maps",
|
||||
"console": "integratedTerminal"
|
||||
"args": ["samples/private/sample"]
|
||||
},
|
||||
{
|
||||
"name": "sortlists.py",
|
||||
|
||||
6
.vscode/settings.json
vendored
6
.vscode/settings.json
vendored
@@ -36,6 +36,7 @@
|
||||
"exampleuser",
|
||||
"expiringdict",
|
||||
"fieldlist",
|
||||
"GELF",
|
||||
"genindex",
|
||||
"geoip",
|
||||
"geoipupdate",
|
||||
@@ -65,17 +66,20 @@
|
||||
"mailrelay",
|
||||
"mailsuite",
|
||||
"maxdepth",
|
||||
"MAXHEADERS",
|
||||
"maxmind",
|
||||
"mbox",
|
||||
"mfrom",
|
||||
"michaeldavie",
|
||||
"mikesiegel",
|
||||
"Mimecast",
|
||||
"mitigations",
|
||||
"MMDB",
|
||||
"modindex",
|
||||
"msgconvert",
|
||||
"msgraph",
|
||||
"MSSP",
|
||||
"multiprocess",
|
||||
"Munge",
|
||||
"ndjson",
|
||||
"newkey",
|
||||
@@ -86,6 +90,7 @@
|
||||
"nosniff",
|
||||
"nwettbewerb",
|
||||
"opensearch",
|
||||
"opensearchpy",
|
||||
"parsedmarc",
|
||||
"passsword",
|
||||
"Postorius",
|
||||
@@ -123,6 +128,7 @@
|
||||
"truststore",
|
||||
"Übersicht",
|
||||
"uids",
|
||||
"Uncategorized",
|
||||
"unparasable",
|
||||
"uper",
|
||||
"urllib",
|
||||
|
||||
592
CHANGELOG.md
592
CHANGELOG.md
File diff suppressed because it is too large
Load Diff
31
README.md
31
README.md
@@ -23,11 +23,10 @@ ProofPoint Email Fraud Defense, and Valimail.
|
||||
|
||||
## Help Wanted
|
||||
|
||||
This project is maintained by one developer. Please consider
|
||||
reviewing the open
|
||||
[issues](https://github.com/domainaware/parsedmarc/issues) to see how
|
||||
you can contribute code, documentation, or user support. Assistance on
|
||||
the pinned issues would be particularly helpful.
|
||||
This project is maintained by one developer. Please consider reviewing the open
|
||||
[issues](https://github.com/domainaware/parsedmarc/issues) to see how you can
|
||||
contribute code, documentation, or user support. Assistance on the pinned
|
||||
issues would be particularly helpful.
|
||||
|
||||
Thanks to all
|
||||
[contributors](https://github.com/domainaware/parsedmarc/graphs/contributors)!
|
||||
@@ -42,6 +41,24 @@ Thanks to all
|
||||
- Consistent data structures
|
||||
- Simple JSON and/or CSV output
|
||||
- Optionally email the results
|
||||
- Optionally send the results to Elasticsearch, Opensearch, and/or Splunk, for use
|
||||
with premade dashboards
|
||||
- Optionally send the results to Elasticsearch, Opensearch, and/or Splunk, for
|
||||
use with premade dashboards
|
||||
- Optionally send reports to Apache Kafka
|
||||
|
||||
## Python Compatibility
|
||||
|
||||
This project supports the following Python versions, which are either actively maintained or are the default versions
|
||||
for RHEL or Debian.
|
||||
|
||||
| Version | Supported | Reason |
|
||||
|---------|-----------|------------------------------------------------------------|
|
||||
| < 3.6 | ❌ | End of Life (EOL) |
|
||||
| 3.6 | ❌ | Used in RHEL 8, but not supported by project dependencies |
|
||||
| 3.7 | ❌ | End of Life (EOL) |
|
||||
| 3.8 | ❌ | End of Life (EOL) |
|
||||
| 3.9 | ✅ | Supported until August 2026 (Debian 11); May 2032 (RHEL 9) |
|
||||
| 3.10 | ✅ | Actively maintained |
|
||||
| 3.11 | ✅ | Actively maintained; supported until June 2028 (Debian 12) |
|
||||
| 3.12 | ✅ | Actively maintained; supported until May 2035 (RHEL 10) |
|
||||
| 3.13 | ✅ | Actively maintained; supported until June 2030 (Debian 13) |
|
||||
| 3.14 | ❌ | Not currently supported due to [this Python bug](https://github.com/python/cpython/issues/142307)|
|
||||
|
||||
2
build.sh
2
build.sh
@@ -14,7 +14,7 @@ cd docs
|
||||
make clean
|
||||
make html
|
||||
touch build/html/.nojekyll
|
||||
if [ -d "./../parsedmarc-docs" ]; then
|
||||
if [ -d "../../parsedmarc-docs" ]; then
|
||||
cp -rf build/html/* ../../parsedmarc-docs/
|
||||
fi
|
||||
cd ..
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
version: '3.7'
|
||||
|
||||
services:
|
||||
elasticsearch:
|
||||
image: docker.elastic.co/elasticsearch/elasticsearch:8.3.1
|
||||
image: docker.elastic.co/elasticsearch/elasticsearch:8.19.7
|
||||
environment:
|
||||
- network.host=127.0.0.1
|
||||
- http.host=0.0.0.0
|
||||
@@ -14,7 +12,7 @@ services:
|
||||
- xpack.security.enabled=false
|
||||
- xpack.license.self_generated.type=basic
|
||||
ports:
|
||||
- 127.0.0.1:9200:9200
|
||||
- "127.0.0.1:9200:9200"
|
||||
ulimits:
|
||||
memlock:
|
||||
soft: -1
|
||||
@@ -30,7 +28,7 @@ services:
|
||||
retries: 24
|
||||
|
||||
opensearch:
|
||||
image: opensearchproject/opensearch:2.18.0
|
||||
image: opensearchproject/opensearch:2
|
||||
environment:
|
||||
- network.host=127.0.0.1
|
||||
- http.host=0.0.0.0
|
||||
@@ -41,7 +39,7 @@ services:
|
||||
- bootstrap.memory_lock=true
|
||||
- OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_INITIAL_ADMIN_PASSWORD}
|
||||
ports:
|
||||
- 127.0.0.1:9201:9200
|
||||
- "127.0.0.1:9201:9200"
|
||||
ulimits:
|
||||
memlock:
|
||||
soft: -1
|
||||
|
||||
@@ -20,7 +20,7 @@ from parsedmarc import __version__
|
||||
# -- Project information -----------------------------------------------------
|
||||
|
||||
project = "parsedmarc"
|
||||
copyright = "2018 - 2023, Sean Whalen and contributors"
|
||||
copyright = "2018 - 2025, Sean Whalen and contributors"
|
||||
author = "Sean Whalen and contributors"
|
||||
|
||||
# The version info for the project you're documenting, acts as replacement for
|
||||
|
||||
@@ -45,6 +45,24 @@ and Valimail.
|
||||
with premade dashboards
|
||||
- Optionally send reports to Apache Kafka
|
||||
|
||||
## Python Compatibility
|
||||
|
||||
This project supports the following Python versions, which are either actively maintained or are the default versions
|
||||
for RHEL or Debian.
|
||||
|
||||
| Version | Supported | Reason |
|
||||
|---------|-----------|------------------------------------------------------------|
|
||||
| < 3.6 | ❌ | End of Life (EOL) |
|
||||
| 3.6 | ❌ | Used in RHEL 8, but not supported by project dependencies |
|
||||
| 3.7 | ❌ | End of Life (EOL) |
|
||||
| 3.8 | ❌ | End of Life (EOL) |
|
||||
| 3.9 | ✅ | Supported until August 2026 (Debian 11); May 2032 (RHEL 9) |
|
||||
| 3.10 | ✅ | Actively maintained |
|
||||
| 3.11 | ✅ | Actively maintained; supported until June 2028 (Debian 12) |
|
||||
| 3.12 | ✅ | Actively maintained; supported until May 2035 (RHEL 10) |
|
||||
| 3.13 | ✅ | Actively maintained; supported until June 2030 (Debian 13) |
|
||||
| 3.14 | ❌ | Not currently supported due to [this Python bug](https://github.com/python/cpython/issues/142307)|
|
||||
|
||||
```{toctree}
|
||||
:caption: 'Contents'
|
||||
:maxdepth: 2
|
||||
|
||||
@@ -199,7 +199,7 @@ sudo apt-get install libemail-outlook-message-perl
|
||||
[geoipupdate releases page on github]: https://github.com/maxmind/geoipupdate/releases
|
||||
[ip to country lite database]: https://db-ip.com/db/download/ip-to-country-lite
|
||||
[license keys]: https://www.maxmind.com/en/accounts/current/license-key
|
||||
[maxmind geoipupdate page]: https://dev.maxmind.com/geoip/geoipupdate/
|
||||
[maxmind geoipupdate page]: https://dev.maxmind.com/geoip/updating-databases/
|
||||
[maxmind geolite2 country database]: https://dev.maxmind.com/geoip/geolite2-free-geolocation-data
|
||||
[registering for a free geolite2 account]: https://www.maxmind.com/en/geolite2/signup
|
||||
[to comply with various privacy regulations]: https://blog.maxmind.com/2019/12/18/significant-changes-to-accessing-and-using-geolite2-databases/
|
||||
|
||||
@@ -23,6 +23,8 @@ of the report schema.
|
||||
"report_id": "9391651994964116463",
|
||||
"begin_date": "2012-04-27 20:00:00",
|
||||
"end_date": "2012-04-28 19:59:59",
|
||||
"timespan_requires_normalization": false,
|
||||
"original_timespan_seconds": 86399,
|
||||
"errors": []
|
||||
},
|
||||
"policy_published": {
|
||||
@@ -39,8 +41,10 @@ of the report schema.
|
||||
"source": {
|
||||
"ip_address": "72.150.241.94",
|
||||
"country": "US",
|
||||
"reverse_dns": "adsl-72-150-241-94.shv.bellsouth.net",
|
||||
"base_domain": "bellsouth.net"
|
||||
"reverse_dns": null,
|
||||
"base_domain": null,
|
||||
"name": null,
|
||||
"type": null
|
||||
},
|
||||
"count": 2,
|
||||
"alignment": {
|
||||
@@ -74,7 +78,10 @@ of the report schema.
|
||||
"result": "pass"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"normalized_timespan": false,
|
||||
"interval_begin": "2012-04-28 00:00:00",
|
||||
"interval_end": "2012-04-28 23:59:59"
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -83,8 +90,10 @@ of the report schema.
|
||||
### CSV aggregate report
|
||||
|
||||
```text
|
||||
xml_schema,org_name,org_email,org_extra_contact_info,report_id,begin_date,end_date,errors,domain,adkim,aspf,p,sp,pct,fo,source_ip_address,source_country,source_reverse_dns,source_base_domain,count,spf_aligned,dkim_aligned,dmarc_aligned,disposition,policy_override_reasons,policy_override_comments,envelope_from,header_from,envelope_to,dkim_domains,dkim_selectors,dkim_results,spf_domains,spf_scopes,spf_results
|
||||
draft,acme.com,noreply-dmarc-support@acme.com,http://acme.com/dmarc/support,9391651994964116463,2012-04-27 20:00:00,2012-04-28 19:59:59,,example.com,r,r,none,none,100,0,72.150.241.94,US,adsl-72-150-241-94.shv.bellsouth.net,bellsouth.net,2,True,False,True,none,,,example.com,example.com,,example.com,none,fail,example.com,mfrom,pass
|
||||
xml_schema,org_name,org_email,org_extra_contact_info,report_id,begin_date,end_date,normalized_timespan,errors,domain,adkim,aspf,p,sp,pct,fo,source_ip_address,source_country,source_reverse_dns,source_base_domain,source_name,source_type,count,spf_aligned,dkim_aligned,dmarc_aligned,disposition,policy_override_reasons,policy_override_comments,envelope_from,header_from,envelope_to,dkim_domains,dkim_selectors,dkim_results,spf_domains,spf_scopes,spf_results
|
||||
draft,acme.com,noreply-dmarc-support@acme.com,http://acme.com/dmarc/support,9391651994964116463,2012-04-28 00:00:00,2012-04-28 23:59:59,False,,example.com,r,r,none,none,100,0,72.150.241.94,US,,,,,2,True,False,True,none,,,example.com,example.com,,example.com,none,fail,example.com,mfrom,pass
|
||||
draft,acme.com,noreply-dmarc-support@acme.com,http://acme.com/dmarc/support,9391651994964116463,2012-04-28 00:00:00,2012-04-28 23:59:59,False,,example.com,r,r,none,none,100,0,72.150.241.94,US,,,,,2,True,False,True,none,,,example.com,example.com,,example.com,none,fail,example.com,mfrom,pass
|
||||
|
||||
```
|
||||
|
||||
## Sample forensic report output
|
||||
|
||||
@@ -4,47 +4,50 @@
|
||||
|
||||
```text
|
||||
usage: parsedmarc [-h] [-c CONFIG_FILE] [--strip-attachment-payloads] [-o OUTPUT]
|
||||
[--aggregate-json-filename AGGREGATE_JSON_FILENAME]
|
||||
[--forensic-json-filename FORENSIC_JSON_FILENAME]
|
||||
[--aggregate-csv-filename AGGREGATE_CSV_FILENAME]
|
||||
[--forensic-csv-filename FORENSIC_CSV_FILENAME]
|
||||
[-n NAMESERVERS [NAMESERVERS ...]] [-t DNS_TIMEOUT] [--offline]
|
||||
[-s] [--verbose] [--debug] [--log-file LOG_FILE] [-v]
|
||||
[file_path ...]
|
||||
[--aggregate-json-filename AGGREGATE_JSON_FILENAME] [--forensic-json-filename FORENSIC_JSON_FILENAME]
|
||||
[--smtp-tls-json-filename SMTP_TLS_JSON_FILENAME] [--aggregate-csv-filename AGGREGATE_CSV_FILENAME]
|
||||
[--forensic-csv-filename FORENSIC_CSV_FILENAME] [--smtp-tls-csv-filename SMTP_TLS_CSV_FILENAME]
|
||||
[-n NAMESERVERS [NAMESERVERS ...]] [-t DNS_TIMEOUT] [--offline] [-s] [-w] [--verbose] [--debug]
|
||||
[--log-file LOG_FILE] [--no-prettify-json] [-v]
|
||||
[file_path ...]
|
||||
|
||||
Parses DMARC reports
|
||||
Parses DMARC reports
|
||||
|
||||
positional arguments:
|
||||
file_path one or more paths to aggregate or forensic report
|
||||
files, emails, or mbox files'
|
||||
positional arguments:
|
||||
file_path one or more paths to aggregate or forensic report files, emails, or mbox files'
|
||||
|
||||
optional arguments:
|
||||
-h, --help show this help message and exit
|
||||
-c CONFIG_FILE, --config-file CONFIG_FILE
|
||||
a path to a configuration file (--silent implied)
|
||||
--strip-attachment-payloads
|
||||
remove attachment payloads from forensic report output
|
||||
-o OUTPUT, --output OUTPUT
|
||||
write output files to the given directory
|
||||
--aggregate-json-filename AGGREGATE_JSON_FILENAME
|
||||
filename for the aggregate JSON output file
|
||||
--forensic-json-filename FORENSIC_JSON_FILENAME
|
||||
filename for the forensic JSON output file
|
||||
--aggregate-csv-filename AGGREGATE_CSV_FILENAME
|
||||
filename for the aggregate CSV output file
|
||||
--forensic-csv-filename FORENSIC_CSV_FILENAME
|
||||
filename for the forensic CSV output file
|
||||
-n NAMESERVERS [NAMESERVERS ...], --nameservers NAMESERVERS [NAMESERVERS ...]
|
||||
nameservers to query
|
||||
-t DNS_TIMEOUT, --dns_timeout DNS_TIMEOUT
|
||||
number of seconds to wait for an answer from DNS
|
||||
(default: 2.0)
|
||||
--offline do not make online queries for geolocation or DNS
|
||||
-s, --silent only print errors and warnings
|
||||
--verbose more verbose output
|
||||
--debug print debugging information
|
||||
--log-file LOG_FILE output logging to a file
|
||||
-v, --version show program's version number and exit
|
||||
options:
|
||||
-h, --help show this help message and exit
|
||||
-c CONFIG_FILE, --config-file CONFIG_FILE
|
||||
a path to a configuration file (--silent implied)
|
||||
--strip-attachment-payloads
|
||||
remove attachment payloads from forensic report output
|
||||
-o OUTPUT, --output OUTPUT
|
||||
write output files to the given directory
|
||||
--aggregate-json-filename AGGREGATE_JSON_FILENAME
|
||||
filename for the aggregate JSON output file
|
||||
--forensic-json-filename FORENSIC_JSON_FILENAME
|
||||
filename for the forensic JSON output file
|
||||
--smtp-tls-json-filename SMTP_TLS_JSON_FILENAME
|
||||
filename for the SMTP TLS JSON output file
|
||||
--aggregate-csv-filename AGGREGATE_CSV_FILENAME
|
||||
filename for the aggregate CSV output file
|
||||
--forensic-csv-filename FORENSIC_CSV_FILENAME
|
||||
filename for the forensic CSV output file
|
||||
--smtp-tls-csv-filename SMTP_TLS_CSV_FILENAME
|
||||
filename for the SMTP TLS CSV output file
|
||||
-n NAMESERVERS [NAMESERVERS ...], --nameservers NAMESERVERS [NAMESERVERS ...]
|
||||
nameservers to query
|
||||
-t DNS_TIMEOUT, --dns_timeout DNS_TIMEOUT
|
||||
number of seconds to wait for an answer from DNS (default: 2.0)
|
||||
--offline do not make online queries for geolocation or DNS
|
||||
-s, --silent only print errors
|
||||
-w, --warnings print warnings in addition to errors
|
||||
--verbose more verbose output
|
||||
--debug print debugging information
|
||||
--log-file LOG_FILE output logging to a file
|
||||
--no-prettify-json output JSON in a single line without indentation
|
||||
-v, --version show program's version number and exit
|
||||
```
|
||||
|
||||
:::{note}
|
||||
@@ -169,7 +172,7 @@ The full set of configuration options are:
|
||||
IDLE response or the number of seconds until the next
|
||||
mail check (Default: `30`)
|
||||
- `since` - str: Search for messages since certain time. (Examples: `5m|3h|2d|1w`)
|
||||
Acceptable units - {"m":"minutes", "h":"hours", "d":"days", "w":"weeks"}).
|
||||
Acceptable units - {"m":"minutes", "h":"hours", "d":"days", "w":"weeks"}.
|
||||
Defaults to `1d` if incorrect value is provided.
|
||||
- `imap`
|
||||
- `host` - str: The IMAP server hostname or IP address
|
||||
@@ -254,7 +257,7 @@ The full set of configuration options are:
|
||||
:::
|
||||
- `user` - str: Basic auth username
|
||||
- `password` - str: Basic auth password
|
||||
- `apiKey` - str: API key
|
||||
- `api_key` - str: API key
|
||||
- `ssl` - bool: Use an encrypted SSL/TLS connection
|
||||
(Default: `True`)
|
||||
- `timeout` - float: Timeout in seconds (Default: 60)
|
||||
@@ -277,7 +280,7 @@ The full set of configuration options are:
|
||||
:::
|
||||
- `user` - str: Basic auth username
|
||||
- `password` - str: Basic auth password
|
||||
- `apiKey` - str: API key
|
||||
- `api_key` - str: API key
|
||||
- `ssl` - bool: Use an encrypted SSL/TLS connection
|
||||
(Default: `True`)
|
||||
- `timeout` - float: Timeout in seconds (Default: 60)
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -2,6 +2,10 @@
|
||||
|
||||
"""A Python package for parsing DMARC reports"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Dict, List, Any, Union, Optional, IO, Callable
|
||||
|
||||
import binascii
|
||||
import email
|
||||
import email.utils
|
||||
@@ -17,9 +21,8 @@ import zlib
|
||||
from base64 import b64decode
|
||||
from collections import OrderedDict
|
||||
from csv import DictWriter
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from datetime import datetime, timedelta, timezone, tzinfo
|
||||
from io import BytesIO, StringIO
|
||||
from typing import Callable
|
||||
|
||||
import mailparser
|
||||
import xmltodict
|
||||
@@ -79,16 +82,198 @@ class InvalidForensicReport(InvalidDMARCReport):
|
||||
"""Raised when an invalid DMARC forensic report is encountered"""
|
||||
|
||||
|
||||
def _bucket_interval_by_day(
|
||||
begin: datetime,
|
||||
end: datetime,
|
||||
total_count: int,
|
||||
) -> List[Dict[Any]]:
|
||||
"""
|
||||
Split the interval [begin, end) into daily buckets and distribute
|
||||
`total_count` proportionally across those buckets.
|
||||
|
||||
The function:
|
||||
1. Identifies each calendar day touched by [begin, end)
|
||||
2. Computes how many seconds of the interval fall into each day
|
||||
3. Assigns counts in proportion to those overlaps
|
||||
4. Ensures the final counts sum exactly to total_count
|
||||
|
||||
Args:
|
||||
begin: timezone-aware datetime, inclusive start of interval
|
||||
end: timezone-aware datetime, exclusive end of interval
|
||||
total_count: number of messages to distribute
|
||||
|
||||
Returns:
|
||||
A list of dicts like:
|
||||
{
|
||||
"begin": datetime,
|
||||
"end": datetime,
|
||||
"count": int
|
||||
}
|
||||
"""
|
||||
# --- Input validation ----------------------------------------------------
|
||||
if begin > end:
|
||||
raise ValueError("begin must be earlier than end")
|
||||
if begin.tzinfo is None or end.tzinfo is None:
|
||||
raise ValueError("begin and end must be timezone-aware")
|
||||
if begin.tzinfo is not end.tzinfo:
|
||||
raise ValueError("begin and end must have the same tzinfo")
|
||||
if total_count < 0:
|
||||
raise ValueError("total_count must be non-negative")
|
||||
|
||||
# --- Short-circuit trivial cases -----------------------------------------
|
||||
interval_seconds = (end - begin).total_seconds()
|
||||
if interval_seconds <= 0 or total_count == 0:
|
||||
return []
|
||||
|
||||
tz: tzinfo = begin.tzinfo
|
||||
|
||||
# --- Step 1: Determine all calendar days touched by [begin, end) ----------
|
||||
#
|
||||
# For example:
|
||||
# begin = Jan 1 12:00
|
||||
# end = Jan 3 06:00
|
||||
#
|
||||
# We need buckets for:
|
||||
# Jan 1 12:00 → Jan 2 00:00
|
||||
# Jan 2 00:00 → Jan 3 00:00
|
||||
# Jan 3 00:00 → Jan 3 06:00
|
||||
#
|
||||
|
||||
# Start at midnight on the day of `begin`.
|
||||
day_cursor = datetime(begin.year, begin.month, begin.day, tzinfo=tz)
|
||||
|
||||
# If `begin` is earlier on that day (e.g. 10:00), we want that midnight.
|
||||
# If `begin` is past that midnight (e.g. 00:30), this is correct.
|
||||
# If `begin` is BEFORE that midnight (rare unless tz shifts), adjust:
|
||||
if day_cursor > begin:
|
||||
day_cursor -= timedelta(days=1)
|
||||
|
||||
day_buckets: List[Dict[str, Any]] = []
|
||||
|
||||
while day_cursor < end:
|
||||
day_start = day_cursor
|
||||
day_end = day_cursor + timedelta(days=1)
|
||||
|
||||
# Overlap between [begin, end) and this day
|
||||
overlap_start = max(begin, day_start)
|
||||
overlap_end = min(end, day_end)
|
||||
|
||||
overlap_seconds = (overlap_end - overlap_start).total_seconds()
|
||||
|
||||
if overlap_seconds > 0:
|
||||
day_buckets.append(
|
||||
{
|
||||
"begin": overlap_start,
|
||||
"end": overlap_end,
|
||||
"seconds": overlap_seconds,
|
||||
}
|
||||
)
|
||||
|
||||
day_cursor = day_end
|
||||
|
||||
# --- Step 2: Pro-rate counts across buckets -------------------------------
|
||||
#
|
||||
# Compute the exact fractional count for each bucket:
|
||||
# bucket_fraction = bucket_seconds / interval_seconds
|
||||
# bucket_exact = total_count * bucket_fraction
|
||||
#
|
||||
# Then apply a "largest remainder" rounding strategy to ensure the sum
|
||||
# equals exactly total_count.
|
||||
|
||||
exact_values: List[float] = [
|
||||
(b["seconds"] / interval_seconds) * total_count for b in day_buckets
|
||||
]
|
||||
|
||||
floor_values: List[int] = [int(x) for x in exact_values]
|
||||
fractional_parts: List[float] = [x - int(x) for x in exact_values]
|
||||
|
||||
# How many counts do we still need to distribute after flooring?
|
||||
remainder = total_count - sum(floor_values)
|
||||
|
||||
# Sort buckets by descending fractional remainder
|
||||
indices_by_fraction = sorted(
|
||||
range(len(day_buckets)),
|
||||
key=lambda i: fractional_parts[i],
|
||||
reverse=True,
|
||||
)
|
||||
|
||||
# Start with floor values
|
||||
final_counts = floor_values[:]
|
||||
|
||||
# Add +1 to the buckets with the largest fractional parts
|
||||
for idx in indices_by_fraction[:remainder]:
|
||||
final_counts[idx] += 1
|
||||
|
||||
# --- Step 3: Build the final per-day result list -------------------------
|
||||
results: List[Dict[str, Any]] = []
|
||||
for bucket, count in zip(day_buckets, final_counts):
|
||||
if count > 0:
|
||||
results.append(
|
||||
{
|
||||
"begin": bucket["begin"],
|
||||
"end": bucket["end"],
|
||||
"count": count,
|
||||
}
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def _append_parsed_record(
|
||||
parsed_record: OrderedDict[str, Any],
|
||||
records: OrderedDict[str, Any],
|
||||
begin_dt: datetime,
|
||||
end_dt: datetime,
|
||||
normalize: bool,
|
||||
) -> None:
|
||||
"""
|
||||
Append a parsed DMARC record either unchanged or normalized.
|
||||
|
||||
Args:
|
||||
parsed_record: The record returned by _parse_report_record().
|
||||
records: Accumulating list of output records.
|
||||
begin_dt: Report-level begin datetime (UTC).
|
||||
end_dt: Report-level end datetime (UTC).
|
||||
normalize: Whether this report exceeded the allowed timespan
|
||||
and should be normalized per-day.
|
||||
"""
|
||||
|
||||
if not normalize:
|
||||
parsed_record["normalized_timespan"] = False
|
||||
parsed_record["interval_begin"] = begin_dt.strftime("%Y-%m-%d %H:%M:%S")
|
||||
parsed_record["interval_end"] = end_dt.strftime("%Y-%m-%d %H:%M:%S")
|
||||
|
||||
records.append(parsed_record)
|
||||
return
|
||||
|
||||
# Normalization path: break record into daily buckets
|
||||
total_count = int(parsed_record.get("count", 0))
|
||||
buckets = _bucket_interval_by_day(begin_dt, end_dt, total_count)
|
||||
if not buckets:
|
||||
return
|
||||
|
||||
for part_index, bucket in enumerate(buckets):
|
||||
new_rec = parsed_record.copy()
|
||||
new_rec["count"] = bucket["count"]
|
||||
new_rec["normalized_timespan"] = True
|
||||
|
||||
new_rec["interval_begin"] = bucket["begin"].strftime("%Y-%m-%d %H:%M:%S")
|
||||
new_rec["interval_end"] = bucket["end"].strftime("%Y-%m-%d %H:%M:%S")
|
||||
|
||||
records.append(new_rec)
|
||||
|
||||
|
||||
def _parse_report_record(
|
||||
record,
|
||||
ip_db_path=None,
|
||||
always_use_local_files=False,
|
||||
reverse_dns_map_path=None,
|
||||
reverse_dns_map_url=None,
|
||||
offline=False,
|
||||
nameservers=None,
|
||||
dns_timeout=2.0,
|
||||
):
|
||||
record: OrderedDict,
|
||||
*,
|
||||
ip_db_path: Optional[str] = None,
|
||||
always_use_local_files: bool = False,
|
||||
reverse_dns_map_path: Optional[str] = None,
|
||||
reverse_dns_map_url: Optional[str] = None,
|
||||
offline: bool = False,
|
||||
nameservers: Optional[list[str]] = None,
|
||||
dns_timeout: Optional[float] = 2.0,
|
||||
) -> OrderedDict[str, Any]:
|
||||
"""
|
||||
Converts a record from a DMARC aggregate report into a more consistent
|
||||
format
|
||||
@@ -242,7 +427,7 @@ def _parse_report_record(
|
||||
return new_record
|
||||
|
||||
|
||||
def _parse_smtp_tls_failure_details(failure_details):
|
||||
def _parse_smtp_tls_failure_details(failure_details: dict[str, Any]):
|
||||
try:
|
||||
new_failure_details = OrderedDict(
|
||||
result_type=failure_details["result-type"],
|
||||
@@ -278,7 +463,7 @@ def _parse_smtp_tls_failure_details(failure_details):
|
||||
raise InvalidSMTPTLSReport(str(e))
|
||||
|
||||
|
||||
def _parse_smtp_tls_report_policy(policy):
|
||||
def _parse_smtp_tls_report_policy(policy: dict[str, Any]):
|
||||
policy_types = ["tlsa", "sts", "no-policy-found"]
|
||||
try:
|
||||
policy_domain = policy["policy"]["policy-domain"]
|
||||
@@ -315,7 +500,7 @@ def _parse_smtp_tls_report_policy(policy):
|
||||
raise InvalidSMTPTLSReport(str(e))
|
||||
|
||||
|
||||
def parse_smtp_tls_report_json(report):
|
||||
def parse_smtp_tls_report_json(report: dict[str, Any]):
|
||||
"""Parses and validates an SMTP TLS report"""
|
||||
required_fields = [
|
||||
"organization-name",
|
||||
@@ -354,7 +539,7 @@ def parse_smtp_tls_report_json(report):
|
||||
raise InvalidSMTPTLSReport(str(e))
|
||||
|
||||
|
||||
def parsed_smtp_tls_reports_to_csv_rows(reports):
|
||||
def parsed_smtp_tls_reports_to_csv_rows(reports: OrderedDict[str, Any]):
|
||||
"""Converts one oor more parsed SMTP TLS reports into a list of single
|
||||
layer OrderedDict objects suitable for use in a CSV"""
|
||||
if type(reports) is OrderedDict:
|
||||
@@ -389,7 +574,7 @@ def parsed_smtp_tls_reports_to_csv_rows(reports):
|
||||
return rows
|
||||
|
||||
|
||||
def parsed_smtp_tls_reports_to_csv(reports):
|
||||
def parsed_smtp_tls_reports_to_csv(reports: OrderedDict[str, Any]) -> str:
|
||||
"""
|
||||
Converts one or more parsed SMTP TLS reports to flat CSV format, including
|
||||
headers
|
||||
@@ -435,16 +620,18 @@ def parsed_smtp_tls_reports_to_csv(reports):
|
||||
|
||||
|
||||
def parse_aggregate_report_xml(
|
||||
xml,
|
||||
ip_db_path=None,
|
||||
always_use_local_files=False,
|
||||
reverse_dns_map_path=None,
|
||||
reverse_dns_map_url=None,
|
||||
offline=False,
|
||||
nameservers=None,
|
||||
timeout=2.0,
|
||||
keep_alive=None,
|
||||
):
|
||||
xml: str,
|
||||
*,
|
||||
ip_db_path: Optional[bool] = None,
|
||||
always_use_local_files: Optional[bool] = False,
|
||||
reverse_dns_map_path: Optional[bool] = None,
|
||||
reverse_dns_map_url: Optional[bool] = None,
|
||||
offline: Optional[bool] = False,
|
||||
nameservers: Optional[list[str]] = None,
|
||||
timeout: Optional[float] = 2.0,
|
||||
keep_alive: Optional[callable] = None,
|
||||
normalize_timespan_threshold_hours: Optional[float] = 24.0,
|
||||
) -> OrderedDict[str, Any]:
|
||||
"""Parses a DMARC XML report string and returns a consistent OrderedDict
|
||||
|
||||
Args:
|
||||
@@ -458,6 +645,7 @@ def parse_aggregate_report_xml(
|
||||
(Cloudflare's public DNS resolvers by default)
|
||||
timeout (float): Sets the DNS timeout in seconds
|
||||
keep_alive (callable): Keep alive function
|
||||
normalize_timespan_threshold_hours (float): Normalize timespans beyond this
|
||||
|
||||
Returns:
|
||||
OrderedDict: The parsed aggregate DMARC report
|
||||
@@ -522,13 +710,27 @@ def parse_aggregate_report_xml(
|
||||
report_id = report_id.replace("<", "").replace(">", "").split("@")[0]
|
||||
new_report_metadata["report_id"] = report_id
|
||||
date_range = report["report_metadata"]["date_range"]
|
||||
if int(date_range["end"]) - int(date_range["begin"]) > 2 * 86400:
|
||||
_error = "Time span > 24 hours - RFC 7489 section 7.2"
|
||||
raise InvalidAggregateReport(_error)
|
||||
date_range["begin"] = timestamp_to_human(date_range["begin"])
|
||||
date_range["end"] = timestamp_to_human(date_range["end"])
|
||||
|
||||
begin_ts = int(date_range["begin"])
|
||||
end_ts = int(date_range["end"])
|
||||
span_seconds = end_ts - begin_ts
|
||||
|
||||
normalize_timespan = span_seconds > normalize_timespan_threshold_hours * 3600
|
||||
|
||||
date_range["begin"] = timestamp_to_human(begin_ts)
|
||||
date_range["end"] = timestamp_to_human(end_ts)
|
||||
|
||||
new_report_metadata["begin_date"] = date_range["begin"]
|
||||
new_report_metadata["end_date"] = date_range["end"]
|
||||
new_report_metadata["timespan_requires_normalization"] = normalize_timespan
|
||||
new_report_metadata["original_timespan_seconds"] = span_seconds
|
||||
begin_dt = human_timestamp_to_datetime(
|
||||
new_report_metadata["begin_date"], to_utc=True
|
||||
)
|
||||
end_dt = human_timestamp_to_datetime(
|
||||
new_report_metadata["end_date"], to_utc=True
|
||||
)
|
||||
|
||||
if "error" in report["report_metadata"]:
|
||||
if not isinstance(report["report_metadata"]["error"], list):
|
||||
errors = [report["report_metadata"]["error"]]
|
||||
@@ -587,7 +789,13 @@ def parse_aggregate_report_xml(
|
||||
nameservers=nameservers,
|
||||
dns_timeout=timeout,
|
||||
)
|
||||
records.append(report_record)
|
||||
_append_parsed_record(
|
||||
parsed_record=report_record,
|
||||
records=records,
|
||||
begin_dt=begin_dt,
|
||||
end_dt=end_dt,
|
||||
normalize=normalize_timespan,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning("Could not parse record: {0}".format(e))
|
||||
|
||||
@@ -602,7 +810,13 @@ def parse_aggregate_report_xml(
|
||||
nameservers=nameservers,
|
||||
dns_timeout=timeout,
|
||||
)
|
||||
records.append(report_record)
|
||||
_append_parsed_record(
|
||||
parsed_record=report_record,
|
||||
records=records,
|
||||
begin_dt=begin_dt,
|
||||
end_dt=end_dt,
|
||||
normalize=normalize_timespan,
|
||||
)
|
||||
|
||||
new_report["records"] = records
|
||||
|
||||
@@ -620,7 +834,7 @@ def parse_aggregate_report_xml(
|
||||
raise InvalidAggregateReport("Unexpected error: {0}".format(error.__str__()))
|
||||
|
||||
|
||||
def extract_report(content):
|
||||
def extract_report(content: Union[bytes, str, IO[Any]]) -> str:
|
||||
"""
|
||||
Extracts text from a zip or gzip file, as a base64-encoded string,
|
||||
file-like object, or bytes.
|
||||
@@ -639,9 +853,7 @@ def extract_report(content):
|
||||
try:
|
||||
file_object = BytesIO(b64decode(content))
|
||||
except binascii.Error:
|
||||
pass
|
||||
if file_object is None:
|
||||
file_object = open(content, "rb")
|
||||
return content
|
||||
elif type(content) is bytes:
|
||||
file_object = BytesIO(content)
|
||||
else:
|
||||
@@ -665,16 +877,18 @@ def extract_report(content):
|
||||
file_object.close()
|
||||
|
||||
except UnicodeDecodeError:
|
||||
file_object.close()
|
||||
if file_object:
|
||||
file_object.close()
|
||||
raise ParserError("File objects must be opened in binary (rb) mode")
|
||||
except Exception as error:
|
||||
file_object.close()
|
||||
if file_object:
|
||||
file_object.close()
|
||||
raise ParserError("Invalid archive file: {0}".format(error.__str__()))
|
||||
|
||||
return report
|
||||
|
||||
|
||||
def extract_report_from_file_path(file_path):
|
||||
def extract_report_from_file_path(file_path: str):
|
||||
"""Extracts report from a file at the given file_path"""
|
||||
try:
|
||||
with open(file_path, "rb") as report_file:
|
||||
@@ -684,21 +898,23 @@ def extract_report_from_file_path(file_path):
|
||||
|
||||
|
||||
def parse_aggregate_report_file(
|
||||
_input,
|
||||
offline=False,
|
||||
always_use_local_files=None,
|
||||
reverse_dns_map_path=None,
|
||||
reverse_dns_map_url=None,
|
||||
ip_db_path=None,
|
||||
nameservers=None,
|
||||
dns_timeout=2.0,
|
||||
keep_alive=None,
|
||||
):
|
||||
_input: Union[str, bytes, IO[Any]],
|
||||
*,
|
||||
offline: Optional[bool] = False,
|
||||
always_use_local_files: Optional[bool] = None,
|
||||
reverse_dns_map_path: Optional[str] = None,
|
||||
reverse_dns_map_url: Optional[str] = None,
|
||||
ip_db_path: Optional[str] = None,
|
||||
nameservers: Optional[list[str]] = None,
|
||||
dns_timeout: Optional[float] = 2.0,
|
||||
keep_alive: Optional[Callable] = None,
|
||||
normalize_timespan_threshold_hours: Optional[float] = 24.0,
|
||||
) -> OrderedDict[str, any]:
|
||||
"""Parses a file at the given path, a file-like object. or bytes as an
|
||||
aggregate DMARC report
|
||||
|
||||
Args:
|
||||
_input: A path to a file, a file like object, or bytes
|
||||
_input (str | bytes | IO): A path to a file, a file like object, or bytes
|
||||
offline (bool): Do not query online for geolocation or DNS
|
||||
always_use_local_files (bool): Do not download files
|
||||
reverse_dns_map_path (str): Path to a reverse DNS map file
|
||||
@@ -708,6 +924,7 @@ def parse_aggregate_report_file(
|
||||
(Cloudflare's public DNS resolvers by default)
|
||||
dns_timeout (float): Sets the DNS timeout in seconds
|
||||
keep_alive (callable): Keep alive function
|
||||
normalize_timespan_threshold_hours (float): Normalize timespans beyond this
|
||||
|
||||
Returns:
|
||||
OrderedDict: The parsed DMARC aggregate report
|
||||
@@ -728,10 +945,13 @@ def parse_aggregate_report_file(
|
||||
nameservers=nameservers,
|
||||
timeout=dns_timeout,
|
||||
keep_alive=keep_alive,
|
||||
normalize_timespan_threshold_hours=normalize_timespan_threshold_hours,
|
||||
)
|
||||
|
||||
|
||||
def parsed_aggregate_reports_to_csv_rows(reports):
|
||||
def parsed_aggregate_reports_to_csv_rows(
|
||||
reports: list[OrderedDict[str, Any]],
|
||||
) -> list[dict[str, Any]]:
|
||||
"""
|
||||
Converts one or more parsed aggregate reports to list of dicts in flat CSV
|
||||
format
|
||||
@@ -760,6 +980,9 @@ def parsed_aggregate_reports_to_csv_rows(reports):
|
||||
report_id = report["report_metadata"]["report_id"]
|
||||
begin_date = report["report_metadata"]["begin_date"]
|
||||
end_date = report["report_metadata"]["end_date"]
|
||||
normalized_timespan = report["report_metadata"][
|
||||
"timespan_requires_normalization"
|
||||
]
|
||||
errors = "|".join(report["report_metadata"]["errors"])
|
||||
domain = report["policy_published"]["domain"]
|
||||
adkim = report["policy_published"]["adkim"]
|
||||
@@ -777,6 +1000,7 @@ def parsed_aggregate_reports_to_csv_rows(reports):
|
||||
report_id=report_id,
|
||||
begin_date=begin_date,
|
||||
end_date=end_date,
|
||||
normalized_timespan=normalized_timespan,
|
||||
errors=errors,
|
||||
domain=domain,
|
||||
adkim=adkim,
|
||||
@@ -789,6 +1013,8 @@ def parsed_aggregate_reports_to_csv_rows(reports):
|
||||
|
||||
for record in report["records"]:
|
||||
row = report_dict.copy()
|
||||
row["begin_date"] = record["interval_begin"]
|
||||
row["end_date"] = record["interval_end"]
|
||||
row["source_ip_address"] = record["source"]["ip_address"]
|
||||
row["source_country"] = record["source"]["country"]
|
||||
row["source_reverse_dns"] = record["source"]["reverse_dns"]
|
||||
@@ -849,7 +1075,7 @@ def parsed_aggregate_reports_to_csv_rows(reports):
|
||||
return rows
|
||||
|
||||
|
||||
def parsed_aggregate_reports_to_csv(reports):
|
||||
def parsed_aggregate_reports_to_csv(reports: list[OrderedDict[str, Any]]) -> str:
|
||||
"""
|
||||
Converts one or more parsed aggregate reports to flat CSV format, including
|
||||
headers
|
||||
@@ -869,6 +1095,7 @@ def parsed_aggregate_reports_to_csv(reports):
|
||||
"report_id",
|
||||
"begin_date",
|
||||
"end_date",
|
||||
"normalized_timespan",
|
||||
"errors",
|
||||
"domain",
|
||||
"adkim",
|
||||
@@ -915,18 +1142,19 @@ def parsed_aggregate_reports_to_csv(reports):
|
||||
|
||||
|
||||
def parse_forensic_report(
|
||||
feedback_report,
|
||||
sample,
|
||||
msg_date,
|
||||
always_use_local_files=False,
|
||||
reverse_dns_map_path=None,
|
||||
reverse_dns_map_url=None,
|
||||
offline=False,
|
||||
ip_db_path=None,
|
||||
nameservers=None,
|
||||
dns_timeout=2.0,
|
||||
strip_attachment_payloads=False,
|
||||
):
|
||||
feedback_report: str,
|
||||
sample: str,
|
||||
msg_date: datetime,
|
||||
*,
|
||||
always_use_local_files: Optional[bool] = False,
|
||||
reverse_dns_map_path: Optional[str] = None,
|
||||
reverse_dns_map_url: str = None,
|
||||
offline: Optional[bool] = False,
|
||||
ip_db_path: Optional[str] = None,
|
||||
nameservers: Optional[list[str]] = None,
|
||||
dns_timeout: Optional[float] = 2.0,
|
||||
strip_attachment_payloads: Optional[bool] = False,
|
||||
) -> OrderedDict[str, Any]:
|
||||
"""
|
||||
Converts a DMARC forensic report and sample to a ``OrderedDict``
|
||||
|
||||
@@ -1054,7 +1282,7 @@ def parse_forensic_report(
|
||||
raise InvalidForensicReport("Unexpected error: {0}".format(error.__str__()))
|
||||
|
||||
|
||||
def parsed_forensic_reports_to_csv_rows(reports):
|
||||
def parsed_forensic_reports_to_csv_rows(reports: list[OrderedDict[str, Any]]):
|
||||
"""
|
||||
Converts one or more parsed forensic reports to a list of dicts in flat CSV
|
||||
format
|
||||
@@ -1090,7 +1318,7 @@ def parsed_forensic_reports_to_csv_rows(reports):
|
||||
return rows
|
||||
|
||||
|
||||
def parsed_forensic_reports_to_csv(reports):
|
||||
def parsed_forensic_reports_to_csv(reports: list[dict[str, Any]]) -> str:
|
||||
"""
|
||||
Converts one or more parsed forensic reports to flat CSV format, including
|
||||
headers
|
||||
@@ -1143,17 +1371,19 @@ def parsed_forensic_reports_to_csv(reports):
|
||||
|
||||
|
||||
def parse_report_email(
|
||||
input_,
|
||||
offline=False,
|
||||
ip_db_path=None,
|
||||
always_use_local_files=False,
|
||||
reverse_dns_map_path=None,
|
||||
reverse_dns_map_url=None,
|
||||
nameservers=None,
|
||||
dns_timeout=2.0,
|
||||
strip_attachment_payloads=False,
|
||||
keep_alive=None,
|
||||
):
|
||||
input_: Union[bytes, str],
|
||||
*,
|
||||
offline: Optional[bool] = False,
|
||||
ip_db_path: Optional[str] = None,
|
||||
always_use_local_files: Optional[bool] = False,
|
||||
reverse_dns_map_path: Optional[str] = None,
|
||||
reverse_dns_map_url: Optional[str] = None,
|
||||
nameservers: list[str] = None,
|
||||
dns_timeout: Optional[float] = 2.0,
|
||||
strip_attachment_payloads: Optional[bool] = False,
|
||||
keep_alive: Optional[callable] = None,
|
||||
normalize_timespan_threshold_hours: Optional[float] = 24.0,
|
||||
) -> OrderedDict[str, Any]:
|
||||
"""
|
||||
Parses a DMARC report from an email
|
||||
|
||||
@@ -1169,6 +1399,7 @@ def parse_report_email(
|
||||
strip_attachment_payloads (bool): Remove attachment payloads from
|
||||
forensic report results
|
||||
keep_alive (callable): keep alive function
|
||||
normalize_timespan_threshold_hours (float): Normalize timespans beyond this
|
||||
|
||||
Returns:
|
||||
OrderedDict:
|
||||
@@ -1281,6 +1512,7 @@ def parse_report_email(
|
||||
nameservers=nameservers,
|
||||
timeout=dns_timeout,
|
||||
keep_alive=keep_alive,
|
||||
normalize_timespan_threshold_hours=normalize_timespan_threshold_hours,
|
||||
)
|
||||
result = OrderedDict(
|
||||
[("report_type", "aggregate"), ("report", aggregate_report)]
|
||||
@@ -1337,22 +1569,24 @@ def parse_report_email(
|
||||
|
||||
|
||||
def parse_report_file(
|
||||
input_,
|
||||
nameservers=None,
|
||||
dns_timeout=2.0,
|
||||
strip_attachment_payloads=False,
|
||||
ip_db_path=None,
|
||||
always_use_local_files=False,
|
||||
reverse_dns_map_path=None,
|
||||
reverse_dns_map_url=None,
|
||||
offline=False,
|
||||
keep_alive=None,
|
||||
):
|
||||
input_: Union[bytes, str, IO[Any]],
|
||||
*,
|
||||
nameservers: Optional[list[str]] = None,
|
||||
dns_timeout: Optional[float] = 2.0,
|
||||
strip_attachment_payloads: Optional[bool] = False,
|
||||
ip_db_path: Optional[str] = None,
|
||||
always_use_local_files: Optional[bool] = False,
|
||||
reverse_dns_map_path: Optional[str] = None,
|
||||
reverse_dns_map_url: Optional[str] = None,
|
||||
offline: Optional[bool] = False,
|
||||
keep_alive: Optional[Callable] = None,
|
||||
normalize_timespan_threshold_hours: Optional[float] = 24,
|
||||
) -> OrderedDict[str, Any]:
|
||||
"""Parses a DMARC aggregate or forensic file at the given path, a
|
||||
file-like object. or bytes
|
||||
|
||||
Args:
|
||||
input_: A path to a file, a file like object, or bytes
|
||||
input_ (str | bytes | IO): A path to a file, a file like object, or bytes
|
||||
nameservers (list): A list of one or more nameservers to use
|
||||
(Cloudflare's public DNS resolvers by default)
|
||||
dns_timeout (float): Sets the DNS timeout in seconds
|
||||
@@ -1378,6 +1612,8 @@ def parse_report_file(
|
||||
|
||||
content = file_object.read()
|
||||
file_object.close()
|
||||
if content.startswith(MAGIC_ZIP) or content.startswith(MAGIC_GZIP):
|
||||
content = extract_report(content)
|
||||
try:
|
||||
report = parse_aggregate_report_file(
|
||||
content,
|
||||
@@ -1389,6 +1625,7 @@ def parse_report_file(
|
||||
nameservers=nameservers,
|
||||
dns_timeout=dns_timeout,
|
||||
keep_alive=keep_alive,
|
||||
normalize_timespan_threshold_hours=normalize_timespan_threshold_hours,
|
||||
)
|
||||
results = OrderedDict([("report_type", "aggregate"), ("report", report)])
|
||||
except InvalidAggregateReport:
|
||||
@@ -1409,6 +1646,7 @@ def parse_report_file(
|
||||
dns_timeout=dns_timeout,
|
||||
strip_attachment_payloads=sa,
|
||||
keep_alive=keep_alive,
|
||||
normalize_timespan_threshold_hours=normalize_timespan_threshold_hours,
|
||||
)
|
||||
except InvalidDMARCReport:
|
||||
raise ParserError("Not a valid report")
|
||||
@@ -1416,21 +1654,23 @@ def parse_report_file(
|
||||
|
||||
|
||||
def get_dmarc_reports_from_mbox(
|
||||
input_,
|
||||
nameservers=None,
|
||||
dns_timeout=2.0,
|
||||
strip_attachment_payloads=False,
|
||||
ip_db_path=None,
|
||||
always_use_local_files=False,
|
||||
reverse_dns_map_path=None,
|
||||
reverse_dns_map_url=None,
|
||||
offline=False,
|
||||
):
|
||||
input_: str,
|
||||
*,
|
||||
nameservers: Optional[list[str]] = None,
|
||||
dns_timeout: Optional[float] = 2.0,
|
||||
strip_attachment_payloads: Optional[bool] = False,
|
||||
ip_db_path: Optional[str] = None,
|
||||
always_use_local_files: Optional[bool] = False,
|
||||
reverse_dns_map_path: Optional[str] = None,
|
||||
reverse_dns_map_url: Optional[str] = None,
|
||||
offline: Optional[bool] = False,
|
||||
normalize_timespan_threshold_hours: Optional[float] = 24.0,
|
||||
) -> OrderedDict[str, OrderedDict[str, Any]]:
|
||||
"""Parses a mailbox in mbox format containing e-mails with attached
|
||||
DMARC reports
|
||||
|
||||
Args:
|
||||
input_: A path to a mbox file
|
||||
input_ (str): A path to a mbox file
|
||||
nameservers (list): A list of one or more nameservers to use
|
||||
(Cloudflare's public DNS resolvers by default)
|
||||
dns_timeout (float): Sets the DNS timeout in seconds
|
||||
@@ -1441,9 +1681,10 @@ def get_dmarc_reports_from_mbox(
|
||||
reverse_dns_map_url (str): URL to a reverse DNS map file
|
||||
ip_db_path (str): Path to a MMDB file from MaxMind or DBIP
|
||||
offline (bool): Do not make online queries for geolocation or DNS
|
||||
normalize_timespan_threshold_hours (float): Normalize timespans beyond this
|
||||
|
||||
Returns:
|
||||
OrderedDict: Lists of ``aggregate_reports`` and ``forensic_reports``
|
||||
OrderedDict: Lists of ``aggregate_reports``, ``forensic_reports``, and ``smtp_tls_reports``
|
||||
|
||||
"""
|
||||
aggregate_reports = []
|
||||
@@ -1470,6 +1711,7 @@ def get_dmarc_reports_from_mbox(
|
||||
nameservers=nameservers,
|
||||
dns_timeout=dns_timeout,
|
||||
strip_attachment_payloads=sa,
|
||||
normalize_timespan_threshold_hours=normalize_timespan_threshold_hours,
|
||||
)
|
||||
if parsed_email["report_type"] == "aggregate":
|
||||
report_org = parsed_email["report"]["report_metadata"]["org_name"]
|
||||
@@ -1502,30 +1744,32 @@ def get_dmarc_reports_from_mbox(
|
||||
|
||||
def get_dmarc_reports_from_mailbox(
|
||||
connection: MailboxConnection,
|
||||
reports_folder="INBOX",
|
||||
archive_folder="Archive",
|
||||
delete=False,
|
||||
test=False,
|
||||
ip_db_path=None,
|
||||
always_use_local_files=False,
|
||||
reverse_dns_map_path=None,
|
||||
reverse_dns_map_url=None,
|
||||
offline=False,
|
||||
nameservers=None,
|
||||
dns_timeout=6.0,
|
||||
strip_attachment_payloads=False,
|
||||
results=None,
|
||||
batch_size=10,
|
||||
since=None,
|
||||
create_folders=True,
|
||||
):
|
||||
*,
|
||||
reports_folder: Optional[str] = "INBOX",
|
||||
archive_folder: Optional[str] = "Archive",
|
||||
delete: Optional[bool] = False,
|
||||
test: Optional[bool] = False,
|
||||
ip_db_path: Optional[str] = None,
|
||||
always_use_local_files: Optional[str] = False,
|
||||
reverse_dns_map_path: Optional[str] = None,
|
||||
reverse_dns_map_url: Optional[str] = None,
|
||||
offline: Optional[bool] = False,
|
||||
nameservers: Optional[list[str]] = None,
|
||||
dns_timeout: Optional[float] = 6.0,
|
||||
strip_attachment_payloads: Optional[bool] = False,
|
||||
results: Optional[OrderedDict[str, any]] = None,
|
||||
batch_size: Optional[int] = 10,
|
||||
since: Optional[datetime] = None,
|
||||
create_folders: Optional[bool] = True,
|
||||
normalize_timespan_threshold_hours: Optional[float] = 24,
|
||||
) -> OrderedDict[str, OrderedDict[str, Any]]:
|
||||
"""
|
||||
Fetches and parses DMARC reports from a mailbox
|
||||
|
||||
Args:
|
||||
connection: A Mailbox connection object
|
||||
reports_folder: The folder where reports can be found
|
||||
archive_folder: The folder to move processed mail to
|
||||
reports_folder (str): The folder where reports can be found
|
||||
archive_folder (str): The folder to move processed mail to
|
||||
delete (bool): Delete messages after processing them
|
||||
test (bool): Do not move or delete messages after processing them
|
||||
ip_db_path (str): Path to a MMDB file from MaxMind or DBIP
|
||||
@@ -1544,9 +1788,10 @@ def get_dmarc_reports_from_mailbox(
|
||||
(units - {"m":"minutes", "h":"hours", "d":"days", "w":"weeks"})
|
||||
create_folders (bool): Whether to create the destination folders
|
||||
(not used in watch)
|
||||
normalize_timespan_threshold_hours (float): Normalize timespans beyond this
|
||||
|
||||
Returns:
|
||||
OrderedDict: Lists of ``aggregate_reports`` and ``forensic_reports``
|
||||
OrderedDict: Lists of ``aggregate_reports``, ``forensic_reports``, and ``smtp_tls_reports``
|
||||
"""
|
||||
if delete and test:
|
||||
raise ValueError("delete and test options are mutually exclusive")
|
||||
@@ -1604,7 +1849,7 @@ def get_dmarc_reports_from_mailbox(
|
||||
if isinstance(connection, IMAPConnection):
|
||||
logger.debug(
|
||||
"Only days and weeks values in 'since' option are \
|
||||
considered for IMAP conections. Examples: 2d or 1w"
|
||||
considered for IMAP connections. Examples: 2d or 1w"
|
||||
)
|
||||
since = (datetime.now(timezone.utc) - timedelta(minutes=_since)).date()
|
||||
current_time = datetime.now(timezone.utc).date()
|
||||
@@ -1661,6 +1906,7 @@ def get_dmarc_reports_from_mailbox(
|
||||
offline=offline,
|
||||
strip_attachment_payloads=sa,
|
||||
keep_alive=connection.keepalive,
|
||||
normalize_timespan_threshold_hours=normalize_timespan_threshold_hours,
|
||||
)
|
||||
if parsed_email["report_type"] == "aggregate":
|
||||
report_org = parsed_email["report"]["report_metadata"]["org_name"]
|
||||
@@ -1812,6 +2058,7 @@ def get_dmarc_reports_from_mailbox(
|
||||
reverse_dns_map_url=reverse_dns_map_url,
|
||||
offline=offline,
|
||||
since=current_time,
|
||||
normalize_timespan_threshold_hours=normalize_timespan_threshold_hours,
|
||||
)
|
||||
|
||||
return results
|
||||
@@ -1820,20 +2067,22 @@ def get_dmarc_reports_from_mailbox(
|
||||
def watch_inbox(
|
||||
mailbox_connection: MailboxConnection,
|
||||
callback: Callable,
|
||||
reports_folder="INBOX",
|
||||
archive_folder="Archive",
|
||||
delete=False,
|
||||
test=False,
|
||||
check_timeout=30,
|
||||
ip_db_path=None,
|
||||
always_use_local_files=False,
|
||||
reverse_dns_map_path=None,
|
||||
reverse_dns_map_url=None,
|
||||
offline=False,
|
||||
nameservers=None,
|
||||
dns_timeout=6.0,
|
||||
strip_attachment_payloads=False,
|
||||
batch_size=None,
|
||||
*,
|
||||
reports_folder: Optional[str] = "INBOX",
|
||||
archive_folder: Optional[str] = "Archive",
|
||||
delete: Optional[bool] = False,
|
||||
test: Optional[bool] = False,
|
||||
check_timeout: Optional[int] = 30,
|
||||
ip_db_path: Optional[str] = None,
|
||||
always_use_local_files: Optional[bool] = False,
|
||||
reverse_dns_map_path: Optional[str] = None,
|
||||
reverse_dns_map_url: Optional[str] = None,
|
||||
offline: Optional[bool] = False,
|
||||
nameservers: Optional[list[str]] = None,
|
||||
dns_timeout: Optional[float] = 6.0,
|
||||
strip_attachment_payloads: Optional[bool] = False,
|
||||
batch_size: Optional[int] = None,
|
||||
normalize_timespan_threshold_hours: Optional[float] = 24,
|
||||
):
|
||||
"""
|
||||
Watches the mailbox for new messages and
|
||||
@@ -1842,8 +2091,8 @@ def watch_inbox(
|
||||
Args:
|
||||
mailbox_connection: The mailbox connection object
|
||||
callback: The callback function to receive the parsing results
|
||||
reports_folder: The IMAP folder where reports can be found
|
||||
archive_folder: The folder to move processed mail to
|
||||
reports_folder (str): The IMAP folder where reports can be found
|
||||
archive_folder (str): The folder to move processed mail to
|
||||
delete (bool): Delete messages after processing them
|
||||
test (bool): Do not move or delete messages after processing them
|
||||
check_timeout (int): Number of seconds to wait for a IMAP IDLE response
|
||||
@@ -1859,6 +2108,7 @@ def watch_inbox(
|
||||
strip_attachment_payloads (bool): Replace attachment payloads in
|
||||
forensic report samples with None
|
||||
batch_size (int): Number of messages to read and process before saving
|
||||
normalize_timespan_threshold_hours (float): Normalize timespans beyond this
|
||||
"""
|
||||
|
||||
def check_callback(connection):
|
||||
@@ -1879,6 +2129,7 @@ def watch_inbox(
|
||||
strip_attachment_payloads=sa,
|
||||
batch_size=batch_size,
|
||||
create_folders=False,
|
||||
normalize_timespan_threshold_hours=normalize_timespan_threshold_hours,
|
||||
)
|
||||
callback(res)
|
||||
|
||||
@@ -1921,14 +2172,15 @@ def append_csv(filename, csv):
|
||||
|
||||
|
||||
def save_output(
|
||||
results,
|
||||
output_directory="output",
|
||||
aggregate_json_filename="aggregate.json",
|
||||
forensic_json_filename="forensic.json",
|
||||
smtp_tls_json_filename="smtp_tls.json",
|
||||
aggregate_csv_filename="aggregate.csv",
|
||||
forensic_csv_filename="forensic.csv",
|
||||
smtp_tls_csv_filename="smtp_tls.csv",
|
||||
results: OrderedDict[str, Any],
|
||||
*,
|
||||
output_directory: Optional[str] = "output",
|
||||
aggregate_json_filename: Optional[str] = "aggregate.json",
|
||||
forensic_json_filename: Optional[str] = "forensic.json",
|
||||
smtp_tls_json_filename: Optional[str] = "smtp_tls.json",
|
||||
aggregate_csv_filename: Optional[str] = "aggregate.csv",
|
||||
forensic_csv_filename: Optional[str] = "forensic.csv",
|
||||
smtp_tls_csv_filename: Optional[str] = "smtp_tls.csv",
|
||||
):
|
||||
"""
|
||||
Save report data in the given directory
|
||||
@@ -2006,7 +2258,7 @@ def save_output(
|
||||
sample_file.write(sample)
|
||||
|
||||
|
||||
def get_report_zip(results):
|
||||
def get_report_zip(results: OrderedDict[str, Any]) -> bytes:
|
||||
"""
|
||||
Creates a zip file of parsed report output
|
||||
|
||||
@@ -2052,27 +2304,28 @@ def get_report_zip(results):
|
||||
|
||||
|
||||
def email_results(
|
||||
results,
|
||||
host,
|
||||
mail_from,
|
||||
mail_to,
|
||||
mail_cc=None,
|
||||
mail_bcc=None,
|
||||
port=0,
|
||||
require_encryption=False,
|
||||
verify=True,
|
||||
username=None,
|
||||
password=None,
|
||||
subject=None,
|
||||
attachment_filename=None,
|
||||
message=None,
|
||||
results: OrderedDict,
|
||||
*,
|
||||
host: str,
|
||||
mail_from: str,
|
||||
mail_to: str,
|
||||
mail_cc: list = None,
|
||||
mail_bcc: list = None,
|
||||
port: int = 0,
|
||||
require_encryption: bool = False,
|
||||
verify: bool = True,
|
||||
username: str = None,
|
||||
password: str = None,
|
||||
subject: str = None,
|
||||
attachment_filename: str = None,
|
||||
message: str = None,
|
||||
):
|
||||
"""
|
||||
Emails parsing results as a zip file
|
||||
|
||||
Args:
|
||||
results (OrderedDict): Parsing results
|
||||
host: Mail server hostname or IP address
|
||||
host (str): Mail server hostname or IP address
|
||||
mail_from: The value of the message from header
|
||||
mail_to (list): A list of addresses to mail to
|
||||
mail_cc (list): A list of addresses to CC
|
||||
|
||||
@@ -77,6 +77,7 @@ def cli_parse(
|
||||
always_use_local_files,
|
||||
reverse_dns_map_path,
|
||||
reverse_dns_map_url,
|
||||
normalize_timespan_threshold_hours,
|
||||
conn,
|
||||
):
|
||||
"""Separated this function for multiprocessing"""
|
||||
@@ -91,6 +92,7 @@ def cli_parse(
|
||||
nameservers=nameservers,
|
||||
dns_timeout=dns_timeout,
|
||||
strip_attachment_payloads=sa,
|
||||
normalize_timespan_threshold_hours=normalize_timespan_threshold_hours,
|
||||
)
|
||||
conn.send([file_results, file_path])
|
||||
except ParserError as error:
|
||||
@@ -591,7 +593,7 @@ def _main():
|
||||
elasticsearch_monthly_indexes=False,
|
||||
elasticsearch_username=None,
|
||||
elasticsearch_password=None,
|
||||
elasticsearch_apiKey=None,
|
||||
elasticsearch_api_key=None,
|
||||
opensearch_hosts=None,
|
||||
opensearch_timeout=60,
|
||||
opensearch_number_of_shards=1,
|
||||
@@ -603,7 +605,7 @@ def _main():
|
||||
opensearch_monthly_indexes=False,
|
||||
opensearch_username=None,
|
||||
opensearch_password=None,
|
||||
opensearch_apiKey=None,
|
||||
opensearch_api_key=None,
|
||||
kafka_hosts=None,
|
||||
kafka_username=None,
|
||||
kafka_password=None,
|
||||
@@ -659,6 +661,7 @@ def _main():
|
||||
webhook_forensic_url=None,
|
||||
webhook_smtp_tls_url=None,
|
||||
webhook_timeout=60,
|
||||
normalize_timespan_threshold_hours=24.0,
|
||||
)
|
||||
args = arg_parser.parse_args()
|
||||
|
||||
@@ -674,8 +677,11 @@ def _main():
|
||||
if "general" in config.sections():
|
||||
general_config = config["general"]
|
||||
if "silent" in general_config:
|
||||
if general_config["silent"].lower() == "false":
|
||||
opts.silent = False
|
||||
opts.silent = general_config.getboolean("silent")
|
||||
if "normalize_timespan_threshold_hours" in general_config:
|
||||
opts.normalize_timespan_threshold_hours = general_config.getfloat(
|
||||
"normalize_timespan_threshold_hours"
|
||||
)
|
||||
if "index_prefix_domain_map" in general_config:
|
||||
with open(general_config["index_prefix_domain_map"]) as f:
|
||||
index_prefix_domain_map = yaml.safe_load(f)
|
||||
@@ -723,11 +729,11 @@ def _main():
|
||||
)
|
||||
exit(-1)
|
||||
if "save_aggregate" in general_config:
|
||||
opts.save_aggregate = general_config["save_aggregate"]
|
||||
opts.save_aggregate = general_config.getboolean("save_aggregate")
|
||||
if "save_forensic" in general_config:
|
||||
opts.save_forensic = general_config["save_forensic"]
|
||||
opts.save_forensic = general_config.getboolean("save_forensic")
|
||||
if "save_smtp_tls" in general_config:
|
||||
opts.save_smtp_tls = general_config["save_smtp_tls"]
|
||||
opts.save_smtp_tls = general_config.getboolean("save_smtp_tls")
|
||||
if "debug" in general_config:
|
||||
opts.debug = general_config.getboolean("debug")
|
||||
if "verbose" in general_config:
|
||||
@@ -798,8 +804,9 @@ def _main():
|
||||
if "ssl" in imap_config:
|
||||
opts.imap_ssl = imap_config.getboolean("ssl")
|
||||
if "skip_certificate_verification" in imap_config:
|
||||
imap_verify = imap_config.getboolean("skip_certificate_verification")
|
||||
opts.imap_skip_certificate_verification = imap_verify
|
||||
opts.imap_skip_certificate_verification = imap_config.getboolean(
|
||||
"skip_certificate_verification"
|
||||
)
|
||||
if "user" in imap_config:
|
||||
opts.imap_user = imap_config["user"]
|
||||
else:
|
||||
@@ -975,8 +982,12 @@ def _main():
|
||||
opts.elasticsearch_username = elasticsearch_config["user"]
|
||||
if "password" in elasticsearch_config:
|
||||
opts.elasticsearch_password = elasticsearch_config["password"]
|
||||
# Until 8.20
|
||||
if "apiKey" in elasticsearch_config:
|
||||
opts.elasticsearch_apiKey = elasticsearch_config["apiKey"]
|
||||
# Since 8.20
|
||||
if "api_key" in elasticsearch_config:
|
||||
opts.elasticsearch_apiKey = elasticsearch_config["api_key"]
|
||||
|
||||
if "opensearch" in config:
|
||||
opensearch_config = config["opensearch"]
|
||||
@@ -1011,8 +1022,12 @@ def _main():
|
||||
opts.opensearch_username = opensearch_config["user"]
|
||||
if "password" in opensearch_config:
|
||||
opts.opensearch_password = opensearch_config["password"]
|
||||
# Until 8.20
|
||||
if "apiKey" in opensearch_config:
|
||||
opts.opensearch_apiKey = opensearch_config["apiKey"]
|
||||
# Since 8.20
|
||||
if "api_key" in opensearch_config:
|
||||
opts.opensearch_apiKey = opensearch_config["api_key"]
|
||||
|
||||
if "splunk_hec" in config.sections():
|
||||
hec_config = config["splunk_hec"]
|
||||
@@ -1169,7 +1184,9 @@ def _main():
|
||||
)
|
||||
opts.gmail_api_scopes = _str_to_list(opts.gmail_api_scopes)
|
||||
if "oauth2_port" in gmail_api_config:
|
||||
opts.gmail_api_oauth2_port = gmail_api_config.get("oauth2_port", 8080)
|
||||
opts.gmail_api_oauth2_port = gmail_api_config.getint(
|
||||
"oauth2_port", 8080
|
||||
)
|
||||
|
||||
if "maildir" in config.sections():
|
||||
maildir_api_config = config["maildir"]
|
||||
@@ -1271,11 +1288,11 @@ def _main():
|
||||
es_smtp_tls_index = "{0}{1}".format(prefix, es_smtp_tls_index)
|
||||
elastic.set_hosts(
|
||||
opts.elasticsearch_hosts,
|
||||
opts.elasticsearch_ssl,
|
||||
opts.elasticsearch_ssl_cert_path,
|
||||
opts.elasticsearch_username,
|
||||
opts.elasticsearch_password,
|
||||
opts.elasticsearch_apiKey,
|
||||
use_ssl=opts.elasticsearch_ssl,
|
||||
ssl_cert_path=opts.elasticsearch_ssl_cert_path,
|
||||
username=opts.elasticsearch_username,
|
||||
password=opts.elasticsearch_password,
|
||||
api_key=opts.elasticsearch_api_key,
|
||||
timeout=opts.elasticsearch_timeout,
|
||||
)
|
||||
elastic.migrate_indexes(
|
||||
@@ -1303,11 +1320,11 @@ def _main():
|
||||
os_smtp_tls_index = "{0}{1}".format(prefix, os_smtp_tls_index)
|
||||
opensearch.set_hosts(
|
||||
opts.opensearch_hosts,
|
||||
opts.opensearch_ssl,
|
||||
opts.opensearch_ssl_cert_path,
|
||||
opts.opensearch_username,
|
||||
opts.opensearch_password,
|
||||
opts.opensearch_apiKey,
|
||||
use_ssl=opts.opensearch_ssl,
|
||||
ssl_cert_path=opts.opensearch_ssl_cert_path,
|
||||
username=opts.opensearch_username,
|
||||
password=opts.opensearch_password,
|
||||
api_key=opts.opensearch_api_key,
|
||||
timeout=opts.opensearch_timeout,
|
||||
)
|
||||
opensearch.migrate_indexes(
|
||||
@@ -1445,6 +1462,7 @@ def _main():
|
||||
opts.always_use_local_files,
|
||||
opts.reverse_dns_map_path,
|
||||
opts.reverse_dns_map_url,
|
||||
opts.normalize_timespan_threshold_hours,
|
||||
child_conn,
|
||||
),
|
||||
)
|
||||
@@ -1495,6 +1513,7 @@ def _main():
|
||||
reverse_dns_map_path=opts.reverse_dns_map_path,
|
||||
reverse_dns_map_url=opts.reverse_dns_map_url,
|
||||
offline=opts.offline,
|
||||
normalize_timespan_threshold_hours=opts.normalize_timespan_threshold_hours,
|
||||
)
|
||||
aggregate_reports += reports["aggregate_reports"]
|
||||
forensic_reports += reports["forensic_reports"]
|
||||
@@ -1513,7 +1532,7 @@ def _main():
|
||||
if opts.imap_skip_certificate_verification:
|
||||
logger.debug("Skipping IMAP certificate verification")
|
||||
verify = False
|
||||
if opts.imap_ssl is False:
|
||||
if not opts.imap_ssl:
|
||||
ssl = False
|
||||
|
||||
mailbox_connection = IMAPConnection(
|
||||
@@ -1604,6 +1623,7 @@ def _main():
|
||||
test=opts.mailbox_test,
|
||||
strip_attachment_payloads=opts.strip_attachment_payloads,
|
||||
since=opts.mailbox_since,
|
||||
normalize_timespan_threshold_hours=opts.normalize_timespan_threshold_hours,
|
||||
)
|
||||
|
||||
aggregate_reports += reports["aggregate_reports"]
|
||||
@@ -1666,6 +1686,7 @@ def _main():
|
||||
reverse_dns_map_path=opts.reverse_dns_map_path,
|
||||
reverse_dns_map_url=opts.reverse_dns_map_url,
|
||||
offline=opts.offline,
|
||||
normalize_timespan_threshold_hours=opts.normalize_timespan_threshold_hours,
|
||||
)
|
||||
except FileExistsError as error:
|
||||
logger.error("{0}".format(error.__str__()))
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
__version__ = "8.19.1"
|
||||
__version__ = "9.0.3"
|
||||
USER_AGENT = f"parsedmarc/{__version__}"
|
||||
|
||||
@@ -1,5 +1,9 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Optional, Union, Any
|
||||
|
||||
from collections import OrderedDict
|
||||
|
||||
from elasticsearch_dsl.search import Q
|
||||
@@ -67,6 +71,8 @@ class _AggregateReportDoc(Document):
|
||||
date_range = Date()
|
||||
date_begin = Date()
|
||||
date_end = Date()
|
||||
normalized_timespan = Boolean()
|
||||
original_timespan_seconds = Integer
|
||||
errors = Text()
|
||||
published_policy = Object(_PublishedPolicy)
|
||||
source_ip_address = Ip()
|
||||
@@ -87,15 +93,15 @@ class _AggregateReportDoc(Document):
|
||||
dkim_results = Nested(_DKIMResult)
|
||||
spf_results = Nested(_SPFResult)
|
||||
|
||||
def add_policy_override(self, type_, comment):
|
||||
def add_policy_override(self, type_: str, comment: str):
|
||||
self.policy_overrides.append(_PolicyOverride(type=type_, comment=comment))
|
||||
|
||||
def add_dkim_result(self, domain, selector, result):
|
||||
def add_dkim_result(self, domain: str, selector: str, result: _DKIMResult):
|
||||
self.dkim_results.append(
|
||||
_DKIMResult(domain=domain, selector=selector, result=result)
|
||||
)
|
||||
|
||||
def add_spf_result(self, domain, scope, result):
|
||||
def add_spf_result(self, domain: str, scope: str, result: _SPFResult):
|
||||
self.spf_results.append(_SPFResult(domain=domain, scope=scope, result=result))
|
||||
|
||||
def save(self, **kwargs):
|
||||
@@ -131,21 +137,21 @@ class _ForensicSampleDoc(InnerDoc):
|
||||
body = Text()
|
||||
attachments = Nested(_EmailAttachmentDoc)
|
||||
|
||||
def add_to(self, display_name, address):
|
||||
def add_to(self, display_name: str, address: str):
|
||||
self.to.append(_EmailAddressDoc(display_name=display_name, address=address))
|
||||
|
||||
def add_reply_to(self, display_name, address):
|
||||
def add_reply_to(self, display_name: str, address: str):
|
||||
self.reply_to.append(
|
||||
_EmailAddressDoc(display_name=display_name, address=address)
|
||||
)
|
||||
|
||||
def add_cc(self, display_name, address):
|
||||
def add_cc(self, display_name: str, address: str):
|
||||
self.cc.append(_EmailAddressDoc(display_name=display_name, address=address))
|
||||
|
||||
def add_bcc(self, display_name, address):
|
||||
def add_bcc(self, display_name: str, address: str):
|
||||
self.bcc.append(_EmailAddressDoc(display_name=display_name, address=address))
|
||||
|
||||
def add_attachment(self, filename, content_type, sha256):
|
||||
def add_attachment(self, filename: str, content_type: str, sha256: str):
|
||||
self.attachments.append(
|
||||
_EmailAttachmentDoc(
|
||||
filename=filename, content_type=content_type, sha256=sha256
|
||||
@@ -197,15 +203,15 @@ class _SMTPTLSPolicyDoc(InnerDoc):
|
||||
|
||||
def add_failure_details(
|
||||
self,
|
||||
result_type,
|
||||
ip_address,
|
||||
receiving_ip,
|
||||
receiving_mx_helo,
|
||||
failed_session_count,
|
||||
sending_mta_ip=None,
|
||||
receiving_mx_hostname=None,
|
||||
additional_information_uri=None,
|
||||
failure_reason_code=None,
|
||||
result_type: str,
|
||||
ip_address: str,
|
||||
receiving_ip: str,
|
||||
receiving_mx_helo: str,
|
||||
failed_session_count: int,
|
||||
sending_mta_ip: Optional[str] = None,
|
||||
receiving_mx_hostname: Optional[str] = None,
|
||||
additional_information_uri: Optional[str] = None,
|
||||
failure_reason_code: Union[str, int, None] = None,
|
||||
):
|
||||
_details = _SMTPTLSFailureDetailsDoc(
|
||||
result_type=result_type,
|
||||
@@ -235,13 +241,14 @@ class _SMTPTLSReportDoc(Document):
|
||||
|
||||
def add_policy(
|
||||
self,
|
||||
policy_type,
|
||||
policy_domain,
|
||||
successful_session_count,
|
||||
failed_session_count,
|
||||
policy_string=None,
|
||||
mx_host_patterns=None,
|
||||
failure_details=None,
|
||||
policy_type: str,
|
||||
policy_domain: str,
|
||||
successful_session_count: int,
|
||||
failed_session_count: int,
|
||||
*,
|
||||
policy_string: Optional[str] = None,
|
||||
mx_host_patterns: Optional[list[str]] = None,
|
||||
failure_details: Optional[str] = None,
|
||||
):
|
||||
self.policies.append(
|
||||
policy_type=policy_type,
|
||||
@@ -259,24 +266,25 @@ class AlreadySaved(ValueError):
|
||||
|
||||
|
||||
def set_hosts(
|
||||
hosts,
|
||||
use_ssl=False,
|
||||
ssl_cert_path=None,
|
||||
username=None,
|
||||
password=None,
|
||||
apiKey=None,
|
||||
timeout=60.0,
|
||||
hosts: Union[str, list[str]],
|
||||
*,
|
||||
use_ssl: Optional[bool] = False,
|
||||
ssl_cert_path: Optional[str] = None,
|
||||
username: Optional[str] = None,
|
||||
password: Optional[str] = None,
|
||||
api_key: Optional[str] = None,
|
||||
timeout: Optional[float] = 60.0,
|
||||
):
|
||||
"""
|
||||
Sets the Elasticsearch hosts to use
|
||||
|
||||
Args:
|
||||
hosts (str): A single hostname or URL, or list of hostnames or URLs
|
||||
use_ssl (bool): Use a HTTPS connection to the server
|
||||
hosts (str | list[str]): A single hostname or URL, or list of hostnames or URLs
|
||||
use_ssl (bool): Use an HTTPS connection to the server
|
||||
ssl_cert_path (str): Path to the certificate chain
|
||||
username (str): The username to use for authentication
|
||||
password (str): The password to use for authentication
|
||||
apiKey (str): The Base64 encoded API key to use for authentication
|
||||
api_key (str): The Base64 encoded API key to use for authentication
|
||||
timeout (float): Timeout in seconds
|
||||
"""
|
||||
if not isinstance(hosts, list):
|
||||
@@ -291,12 +299,12 @@ def set_hosts(
|
||||
conn_params["verify_certs"] = False
|
||||
if username:
|
||||
conn_params["http_auth"] = username + ":" + password
|
||||
if apiKey:
|
||||
conn_params["api_key"] = apiKey
|
||||
if api_key:
|
||||
conn_params["api_key"] = api_key
|
||||
connections.create_connection(**conn_params)
|
||||
|
||||
|
||||
def create_indexes(names, settings=None):
|
||||
def create_indexes(names: list[str], settings: Optional[dict[str, Any]] = None):
|
||||
"""
|
||||
Create Elasticsearch indexes
|
||||
|
||||
@@ -319,7 +327,10 @@ def create_indexes(names, settings=None):
|
||||
raise ElasticsearchError("Elasticsearch error: {0}".format(e.__str__()))
|
||||
|
||||
|
||||
def migrate_indexes(aggregate_indexes=None, forensic_indexes=None):
|
||||
def migrate_indexes(
|
||||
aggregate_indexes: Optional[list[str]] = None,
|
||||
forensic_indexes: Optional[list[str]] = None,
|
||||
):
|
||||
"""
|
||||
Updates index mappings
|
||||
|
||||
@@ -366,12 +377,12 @@ def migrate_indexes(aggregate_indexes=None, forensic_indexes=None):
|
||||
|
||||
|
||||
def save_aggregate_report_to_elasticsearch(
|
||||
aggregate_report,
|
||||
index_suffix=None,
|
||||
index_prefix=None,
|
||||
monthly_indexes=False,
|
||||
number_of_shards=1,
|
||||
number_of_replicas=0,
|
||||
aggregate_report: OrderedDict[str, Any],
|
||||
index_suffix: Optional[str] = None,
|
||||
index_prefix: Optional[str] = None,
|
||||
monthly_indexes: Optional[bool] = False,
|
||||
number_of_shards: Optional[int] = 1,
|
||||
number_of_replicas: Optional[int] = 0,
|
||||
):
|
||||
"""
|
||||
Saves a parsed DMARC aggregate report to Elasticsearch
|
||||
@@ -395,15 +406,11 @@ def save_aggregate_report_to_elasticsearch(
|
||||
domain = aggregate_report["policy_published"]["domain"]
|
||||
begin_date = human_timestamp_to_datetime(metadata["begin_date"], to_utc=True)
|
||||
end_date = human_timestamp_to_datetime(metadata["end_date"], to_utc=True)
|
||||
begin_date_human = begin_date.strftime("%Y-%m-%d %H:%M:%SZ")
|
||||
end_date_human = end_date.strftime("%Y-%m-%d %H:%M:%SZ")
|
||||
|
||||
if monthly_indexes:
|
||||
index_date = begin_date.strftime("%Y-%m")
|
||||
else:
|
||||
index_date = begin_date.strftime("%Y-%m-%d")
|
||||
aggregate_report["begin_date"] = begin_date
|
||||
aggregate_report["end_date"] = end_date
|
||||
date_range = [aggregate_report["begin_date"], aggregate_report["end_date"]]
|
||||
|
||||
org_name_query = Q(dict(match_phrase=dict(org_name=org_name)))
|
||||
report_id_query = Q(dict(match_phrase=dict(report_id=report_id)))
|
||||
@@ -425,6 +432,9 @@ def save_aggregate_report_to_elasticsearch(
|
||||
try:
|
||||
existing = search.execute()
|
||||
except Exception as error_:
|
||||
begin_date_human = begin_date.strftime("%Y-%m-%d %H:%M:%SZ")
|
||||
end_date_human = end_date.strftime("%Y-%m-%d %H:%M:%SZ")
|
||||
|
||||
raise ElasticsearchError(
|
||||
"Elasticsearch's search for existing report \
|
||||
error: {}".format(error_.__str__())
|
||||
@@ -450,6 +460,17 @@ def save_aggregate_report_to_elasticsearch(
|
||||
)
|
||||
|
||||
for record in aggregate_report["records"]:
|
||||
begin_date = human_timestamp_to_datetime(record["interval_begin"], to_utc=True)
|
||||
end_date = human_timestamp_to_datetime(record["interval_end"], to_utc=True)
|
||||
normalized_timespan = record["normalized_timespan"]
|
||||
|
||||
if monthly_indexes:
|
||||
index_date = begin_date.strftime("%Y-%m")
|
||||
else:
|
||||
index_date = begin_date.strftime("%Y-%m-%d")
|
||||
aggregate_report["begin_date"] = begin_date
|
||||
aggregate_report["end_date"] = end_date
|
||||
date_range = [aggregate_report["begin_date"], aggregate_report["end_date"]]
|
||||
agg_doc = _AggregateReportDoc(
|
||||
xml_schema=aggregate_report["xml_schema"],
|
||||
org_name=metadata["org_name"],
|
||||
@@ -457,8 +478,9 @@ def save_aggregate_report_to_elasticsearch(
|
||||
org_extra_contact_info=metadata["org_extra_contact_info"],
|
||||
report_id=metadata["report_id"],
|
||||
date_range=date_range,
|
||||
date_begin=aggregate_report["begin_date"],
|
||||
date_end=aggregate_report["end_date"],
|
||||
date_begin=begin_date,
|
||||
date_end=end_date,
|
||||
normalized_timespan=normalized_timespan,
|
||||
errors=metadata["errors"],
|
||||
published_policy=published_policy,
|
||||
source_ip_address=record["source"]["ip_address"],
|
||||
@@ -517,12 +539,12 @@ def save_aggregate_report_to_elasticsearch(
|
||||
|
||||
|
||||
def save_forensic_report_to_elasticsearch(
|
||||
forensic_report,
|
||||
index_suffix=None,
|
||||
index_prefix=None,
|
||||
monthly_indexes=False,
|
||||
number_of_shards=1,
|
||||
number_of_replicas=0,
|
||||
forensic_report: OrderedDict[str, Any],
|
||||
index_suffix: Optional[any] = None,
|
||||
index_prefix: Optional[str] = None,
|
||||
monthly_indexes: Optional[bool] = False,
|
||||
number_of_shards: int = 1,
|
||||
number_of_replicas: int = 0,
|
||||
):
|
||||
"""
|
||||
Saves a parsed DMARC forensic report to Elasticsearch
|
||||
@@ -684,12 +706,12 @@ def save_forensic_report_to_elasticsearch(
|
||||
|
||||
|
||||
def save_smtp_tls_report_to_elasticsearch(
|
||||
report,
|
||||
index_suffix=None,
|
||||
index_prefix=None,
|
||||
monthly_indexes=False,
|
||||
number_of_shards=1,
|
||||
number_of_replicas=0,
|
||||
report: OrderedDict[str, Any],
|
||||
index_suffix: str = None,
|
||||
index_prefix: str = None,
|
||||
monthly_indexes: Optional[bool] = False,
|
||||
number_of_shards: Optional[int] = 1,
|
||||
number_of_replicas: Optional[int] = 0,
|
||||
):
|
||||
"""
|
||||
Saves a parsed SMTP TLS report to Elasticsearch
|
||||
@@ -781,7 +803,7 @@ def save_smtp_tls_report_to_elasticsearch(
|
||||
policy_doc = _SMTPTLSPolicyDoc(
|
||||
policy_domain=policy["policy_domain"],
|
||||
policy_type=policy["policy_type"],
|
||||
succesful_session_count=policy["successful_session_count"],
|
||||
successful_session_count=policy["successful_session_count"],
|
||||
failed_session_count=policy["failed_session_count"],
|
||||
policy_string=policy_strings,
|
||||
mx_host_patterns=mx_host_patterns,
|
||||
|
||||
@@ -1,9 +1,14 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
import logging
|
||||
import logging.handlers
|
||||
import json
|
||||
import threading
|
||||
from collections import OrderedDict
|
||||
|
||||
from parsedmarc import (
|
||||
parsed_aggregate_reports_to_csv_rows,
|
||||
@@ -48,7 +53,7 @@ class GelfClient(object):
|
||||
)
|
||||
self.logger.addHandler(self.handler)
|
||||
|
||||
def save_aggregate_report_to_gelf(self, aggregate_reports):
|
||||
def save_aggregate_report_to_gelf(self, aggregate_reports: OrderedDict[str, Any]):
|
||||
rows = parsed_aggregate_reports_to_csv_rows(aggregate_reports)
|
||||
for row in rows:
|
||||
log_context_data.parsedmarc = row
|
||||
@@ -56,12 +61,12 @@ class GelfClient(object):
|
||||
|
||||
log_context_data.parsedmarc = None
|
||||
|
||||
def save_forensic_report_to_gelf(self, forensic_reports):
|
||||
def save_forensic_report_to_gelf(self, forensic_reports: OrderedDict[str, Any]):
|
||||
rows = parsed_forensic_reports_to_csv_rows(forensic_reports)
|
||||
for row in rows:
|
||||
self.logger.info(json.dumps(row))
|
||||
|
||||
def save_smtp_tls_report_to_gelf(self, smtp_tls_reports):
|
||||
def save_smtp_tls_report_to_gelf(self, smtp_tls_reports: OrderedDict[str, Any]):
|
||||
rows = parsed_smtp_tls_reports_to_csv_rows(smtp_tls_reports)
|
||||
for row in rows:
|
||||
self.logger.info(json.dumps(row))
|
||||
|
||||
@@ -1,5 +1,10 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Optional
|
||||
from ssl import SSLContext
|
||||
|
||||
import json
|
||||
from ssl import create_default_context
|
||||
|
||||
@@ -18,7 +23,13 @@ class KafkaError(RuntimeError):
|
||||
|
||||
class KafkaClient(object):
|
||||
def __init__(
|
||||
self, kafka_hosts, ssl=False, username=None, password=None, ssl_context=None
|
||||
self,
|
||||
kafka_hosts: list[str],
|
||||
*,
|
||||
ssl: Optional[bool] = False,
|
||||
username: Optional[str] = None,
|
||||
password: Optional[str] = None,
|
||||
ssl_context: Optional[SSLContext] = None,
|
||||
):
|
||||
"""
|
||||
Initializes the Kafka client
|
||||
@@ -28,7 +39,7 @@ class KafkaClient(object):
|
||||
ssl (bool): Use a SSL/TLS connection
|
||||
username (str): An optional username
|
||||
password (str): An optional password
|
||||
ssl_context: SSL context options
|
||||
ssl_context (SSLContext): SSL context options
|
||||
|
||||
Notes:
|
||||
``use_ssl=True`` is implied when a username or password are
|
||||
@@ -55,7 +66,7 @@ class KafkaClient(object):
|
||||
raise KafkaError("No Kafka brokers available")
|
||||
|
||||
@staticmethod
|
||||
def strip_metadata(report):
|
||||
def strip_metadata(report: OrderedDict[str, Any]):
|
||||
"""
|
||||
Duplicates org_name, org_email and report_id into JSON root
|
||||
and removes report_metadata key to bring it more inline
|
||||
@@ -69,7 +80,7 @@ class KafkaClient(object):
|
||||
return report
|
||||
|
||||
@staticmethod
|
||||
def generate_daterange(report):
|
||||
def generate_date_range(report: OrderedDict[str, Any]):
|
||||
"""
|
||||
Creates a date_range timestamp with format YYYY-MM-DD-T-HH:MM:SS
|
||||
based on begin and end dates for easier parsing in Kibana.
|
||||
@@ -86,7 +97,9 @@ class KafkaClient(object):
|
||||
logger.debug("date_range is {}".format(date_range))
|
||||
return date_range
|
||||
|
||||
def save_aggregate_reports_to_kafka(self, aggregate_reports, aggregate_topic):
|
||||
def save_aggregate_reports_to_kafka(
|
||||
self, aggregate_reports: list[OrderedDict][str, Any], aggregate_topic: str
|
||||
):
|
||||
"""
|
||||
Saves aggregate DMARC reports to Kafka
|
||||
|
||||
@@ -105,7 +118,7 @@ class KafkaClient(object):
|
||||
return
|
||||
|
||||
for report in aggregate_reports:
|
||||
report["date_range"] = self.generate_daterange(report)
|
||||
report["date_range"] = self.generate_date_range(report)
|
||||
report = self.strip_metadata(report)
|
||||
|
||||
for slice in report["records"]:
|
||||
@@ -129,7 +142,9 @@ class KafkaClient(object):
|
||||
except Exception as e:
|
||||
raise KafkaError("Kafka error: {0}".format(e.__str__()))
|
||||
|
||||
def save_forensic_reports_to_kafka(self, forensic_reports, forensic_topic):
|
||||
def save_forensic_reports_to_kafka(
|
||||
self, forensic_reports: OrderedDict[str, Any], forensic_topic: str
|
||||
):
|
||||
"""
|
||||
Saves forensic DMARC reports to Kafka, sends individual
|
||||
records (slices) since Kafka requires messages to be <= 1MB
|
||||
@@ -159,7 +174,9 @@ class KafkaClient(object):
|
||||
except Exception as e:
|
||||
raise KafkaError("Kafka error: {0}".format(e.__str__()))
|
||||
|
||||
def save_smtp_tls_reports_to_kafka(self, smtp_tls_reports, smtp_tls_topic):
|
||||
def save_smtp_tls_reports_to_kafka(
|
||||
self, smtp_tls_reports: list[OrderedDict[str, Any]], smtp_tls_topic: str
|
||||
):
|
||||
"""
|
||||
Saves SMTP TLS reports to Kafka, sends individual
|
||||
records (slices) since Kafka requires messages to be <= 1MB
|
||||
|
||||
@@ -1,4 +1,10 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
from collections import OrderedDict
|
||||
|
||||
from parsedmarc.log import logger
|
||||
from azure.core.exceptions import HttpResponseError
|
||||
from azure.identity import ClientSecretCredential
|
||||
@@ -102,7 +108,12 @@ class LogAnalyticsClient(object):
|
||||
"Invalid configuration. " + "One or more required settings are missing."
|
||||
)
|
||||
|
||||
def publish_json(self, results, logs_client: LogsIngestionClient, dcr_stream: str):
|
||||
def publish_json(
|
||||
self,
|
||||
results: OrderedDict[str, OrderedDict[str, Any]],
|
||||
logs_client: LogsIngestionClient,
|
||||
dcr_stream: str,
|
||||
):
|
||||
"""
|
||||
Background function to publish given
|
||||
DMARC report to specific Data Collection Rule.
|
||||
@@ -121,7 +132,11 @@ class LogAnalyticsClient(object):
|
||||
raise LogAnalyticsException("Upload failed: {error}".format(error=e))
|
||||
|
||||
def publish_results(
|
||||
self, results, save_aggregate: bool, save_forensic: bool, save_smtp_tls: bool
|
||||
self,
|
||||
results: OrderedDict[str, OrderedDict[str, Any]],
|
||||
save_aggregate: bool,
|
||||
save_forensic: bool,
|
||||
save_smtp_tls: bool,
|
||||
):
|
||||
"""
|
||||
Function to publish DMARC and/or SMTP TLS reports to Log Analytics
|
||||
|
||||
@@ -1,3 +1,7 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from base64 import urlsafe_b64decode
|
||||
from functools import lru_cache
|
||||
from pathlib import Path
|
||||
@@ -152,3 +156,4 @@ class GmailConnection(MailboxConnection):
|
||||
for label in labels:
|
||||
if label_name == label["id"] or label_name == label["name"]:
|
||||
return label["id"]
|
||||
return ""
|
||||
|
||||
@@ -1,3 +1,7 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from enum import Enum
|
||||
from functools import lru_cache
|
||||
from pathlib import Path
|
||||
|
||||
@@ -1,3 +1,9 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Optional
|
||||
|
||||
from time import sleep
|
||||
|
||||
from imapclient.exceptions import IMAPClientError
|
||||
@@ -11,14 +17,15 @@ from parsedmarc.mail.mailbox_connection import MailboxConnection
|
||||
class IMAPConnection(MailboxConnection):
|
||||
def __init__(
|
||||
self,
|
||||
host=None,
|
||||
user=None,
|
||||
password=None,
|
||||
port=None,
|
||||
ssl=True,
|
||||
verify=True,
|
||||
timeout=30,
|
||||
max_retries=4,
|
||||
host: Optional[str] = None,
|
||||
*,
|
||||
user: Optional[str] = None,
|
||||
password: Optional[str] = None,
|
||||
port: Optional[str] = None,
|
||||
ssl: Optional[bool] = True,
|
||||
verify: Optional[bool] = True,
|
||||
timeout: Optional[int] = 30,
|
||||
max_retries: Optional[int] = 4,
|
||||
):
|
||||
self._username = user
|
||||
self._password = password
|
||||
@@ -45,13 +52,13 @@ class IMAPConnection(MailboxConnection):
|
||||
else:
|
||||
return self._client.search()
|
||||
|
||||
def fetch_message(self, message_id):
|
||||
def fetch_message(self, message_id: int):
|
||||
return self._client.fetch_message(message_id, parse=False)
|
||||
|
||||
def delete_message(self, message_id: str):
|
||||
def delete_message(self, message_id: int):
|
||||
self._client.delete_messages([message_id])
|
||||
|
||||
def move_message(self, message_id: str, folder_name: str):
|
||||
def move_message(self, message_id: int, folder_name: str):
|
||||
self._client.move_messages([message_id], folder_name)
|
||||
|
||||
def keepalive(self):
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC
|
||||
from typing import List
|
||||
|
||||
|
||||
class MailboxConnection(ABC):
|
||||
@@ -10,7 +13,7 @@ class MailboxConnection(ABC):
|
||||
def create_folder(self, folder_name: str):
|
||||
raise NotImplementedError
|
||||
|
||||
def fetch_messages(self, reports_folder: str, **kwargs) -> List[str]:
|
||||
def fetch_messages(self, reports_folder: str, **kwargs) -> list[str]:
|
||||
raise NotImplementedError
|
||||
|
||||
def fetch_message(self, message_id) -> str:
|
||||
|
||||
@@ -1,3 +1,9 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Optional
|
||||
|
||||
from time import sleep
|
||||
|
||||
from parsedmarc.log import logger
|
||||
@@ -9,8 +15,8 @@ import os
|
||||
class MaildirConnection(MailboxConnection):
|
||||
def __init__(
|
||||
self,
|
||||
maildir_path=None,
|
||||
maildir_create=False,
|
||||
maildir_path: Optional[bool] = None,
|
||||
maildir_create: Optional[bool] = False,
|
||||
):
|
||||
self._maildir_path = maildir_path
|
||||
self._maildir_create = maildir_create
|
||||
@@ -36,7 +42,7 @@ class MaildirConnection(MailboxConnection):
|
||||
def fetch_messages(self, reports_folder: str, **kwargs):
|
||||
return self._client.keys()
|
||||
|
||||
def fetch_message(self, message_id):
|
||||
def fetch_message(self, message_id: str):
|
||||
return self._client.get(message_id).as_string()
|
||||
|
||||
def delete_message(self, message_id: str):
|
||||
|
||||
@@ -1,5 +1,9 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Optional, Union, Any
|
||||
|
||||
from collections import OrderedDict
|
||||
|
||||
from opensearchpy import (
|
||||
@@ -67,6 +71,8 @@ class _AggregateReportDoc(Document):
|
||||
date_range = Date()
|
||||
date_begin = Date()
|
||||
date_end = Date()
|
||||
normalized_timespan = Boolean()
|
||||
original_timespan_seconds = Integer
|
||||
errors = Text()
|
||||
published_policy = Object(_PublishedPolicy)
|
||||
source_ip_address = Ip()
|
||||
@@ -87,15 +93,15 @@ class _AggregateReportDoc(Document):
|
||||
dkim_results = Nested(_DKIMResult)
|
||||
spf_results = Nested(_SPFResult)
|
||||
|
||||
def add_policy_override(self, type_, comment):
|
||||
def add_policy_override(self, type_: str, comment: str):
|
||||
self.policy_overrides.append(_PolicyOverride(type=type_, comment=comment))
|
||||
|
||||
def add_dkim_result(self, domain, selector, result):
|
||||
def add_dkim_result(self, domain: str, selector: str, result: _DKIMResult):
|
||||
self.dkim_results.append(
|
||||
_DKIMResult(domain=domain, selector=selector, result=result)
|
||||
)
|
||||
|
||||
def add_spf_result(self, domain, scope, result):
|
||||
def add_spf_result(self, domain: str, scope: str, result: _SPFResult):
|
||||
self.spf_results.append(_SPFResult(domain=domain, scope=scope, result=result))
|
||||
|
||||
def save(self, **kwargs):
|
||||
@@ -131,21 +137,21 @@ class _ForensicSampleDoc(InnerDoc):
|
||||
body = Text()
|
||||
attachments = Nested(_EmailAttachmentDoc)
|
||||
|
||||
def add_to(self, display_name, address):
|
||||
def add_to(self, display_name: str, address: str):
|
||||
self.to.append(_EmailAddressDoc(display_name=display_name, address=address))
|
||||
|
||||
def add_reply_to(self, display_name, address):
|
||||
def add_reply_to(self, display_name: str, address: str):
|
||||
self.reply_to.append(
|
||||
_EmailAddressDoc(display_name=display_name, address=address)
|
||||
)
|
||||
|
||||
def add_cc(self, display_name, address):
|
||||
def add_cc(self, display_name: str, address: str):
|
||||
self.cc.append(_EmailAddressDoc(display_name=display_name, address=address))
|
||||
|
||||
def add_bcc(self, display_name, address):
|
||||
def add_bcc(self, display_name: str, address: str):
|
||||
self.bcc.append(_EmailAddressDoc(display_name=display_name, address=address))
|
||||
|
||||
def add_attachment(self, filename, content_type, sha256):
|
||||
def add_attachment(self, filename: str, content_type: str, sha256: str):
|
||||
self.attachments.append(
|
||||
_EmailAttachmentDoc(
|
||||
filename=filename, content_type=content_type, sha256=sha256
|
||||
@@ -197,15 +203,15 @@ class _SMTPTLSPolicyDoc(InnerDoc):
|
||||
|
||||
def add_failure_details(
|
||||
self,
|
||||
result_type,
|
||||
ip_address,
|
||||
receiving_ip,
|
||||
receiving_mx_helo,
|
||||
failed_session_count,
|
||||
sending_mta_ip=None,
|
||||
receiving_mx_hostname=None,
|
||||
additional_information_uri=None,
|
||||
failure_reason_code=None,
|
||||
result_type: str,
|
||||
ip_address: str,
|
||||
receiving_ip: str,
|
||||
receiving_mx_helo: str,
|
||||
failed_session_count: int,
|
||||
sending_mta_ip: Optional[str] = None,
|
||||
receiving_mx_hostname: Optional[str] = None,
|
||||
additional_information_uri: Optional[str] = None,
|
||||
failure_reason_code: Union[str, int, None] = None,
|
||||
):
|
||||
_details = _SMTPTLSFailureDetailsDoc(
|
||||
result_type=result_type,
|
||||
@@ -235,13 +241,14 @@ class _SMTPTLSReportDoc(Document):
|
||||
|
||||
def add_policy(
|
||||
self,
|
||||
policy_type,
|
||||
policy_domain,
|
||||
successful_session_count,
|
||||
failed_session_count,
|
||||
policy_string=None,
|
||||
mx_host_patterns=None,
|
||||
failure_details=None,
|
||||
policy_type: str,
|
||||
policy_domain: str,
|
||||
successful_session_count: int,
|
||||
failed_session_count: int,
|
||||
*,
|
||||
policy_string: Optional[str] = None,
|
||||
mx_host_patterns: Optional[list[str]] = None,
|
||||
failure_details: Optional[str] = None,
|
||||
):
|
||||
self.policies.append(
|
||||
policy_type=policy_type,
|
||||
@@ -259,24 +266,25 @@ class AlreadySaved(ValueError):
|
||||
|
||||
|
||||
def set_hosts(
|
||||
hosts,
|
||||
use_ssl=False,
|
||||
ssl_cert_path=None,
|
||||
username=None,
|
||||
password=None,
|
||||
apiKey=None,
|
||||
timeout=60.0,
|
||||
hosts: Union[str, list[str]],
|
||||
*,
|
||||
use_ssl: Optional[bool] = False,
|
||||
ssl_cert_path: Optional[str] = None,
|
||||
username: Optional[str] = None,
|
||||
password: Optional[str] = None,
|
||||
api_key: Optional[str] = None,
|
||||
timeout: Optional[float] = 60.0,
|
||||
):
|
||||
"""
|
||||
Sets the OpenSearch hosts to use
|
||||
|
||||
Args:
|
||||
hosts (str|list): A hostname or URL, or list of hostnames or URLs
|
||||
hosts (str|list[str]): A single hostname or URL, or list of hostnames or URLs
|
||||
use_ssl (bool): Use an HTTPS connection to the server
|
||||
ssl_cert_path (str): Path to the certificate chain
|
||||
username (str): The username to use for authentication
|
||||
password (str): The password to use for authentication
|
||||
apiKey (str): The Base64 encoded API key to use for authentication
|
||||
api_key (str): The Base64 encoded API key to use for authentication
|
||||
timeout (float): Timeout in seconds
|
||||
"""
|
||||
if not isinstance(hosts, list):
|
||||
@@ -291,12 +299,12 @@ def set_hosts(
|
||||
conn_params["verify_certs"] = False
|
||||
if username:
|
||||
conn_params["http_auth"] = username + ":" + password
|
||||
if apiKey:
|
||||
conn_params["api_key"] = apiKey
|
||||
if api_key:
|
||||
conn_params["api_key"] = api_key
|
||||
connections.create_connection(**conn_params)
|
||||
|
||||
|
||||
def create_indexes(names, settings=None):
|
||||
def create_indexes(names: list[str], settings: Optional[dict[str, Any]] = None):
|
||||
"""
|
||||
Create OpenSearch indexes
|
||||
|
||||
@@ -319,7 +327,10 @@ def create_indexes(names, settings=None):
|
||||
raise OpenSearchError("OpenSearch error: {0}".format(e.__str__()))
|
||||
|
||||
|
||||
def migrate_indexes(aggregate_indexes=None, forensic_indexes=None):
|
||||
def migrate_indexes(
|
||||
aggregate_indexes: Optional[list[str]] = None,
|
||||
forensic_indexes: Optional[list[str]] = None,
|
||||
):
|
||||
"""
|
||||
Updates index mappings
|
||||
|
||||
@@ -365,13 +376,13 @@ def migrate_indexes(aggregate_indexes=None, forensic_indexes=None):
|
||||
pass
|
||||
|
||||
|
||||
def save_aggregate_report_to_opensearch(
|
||||
aggregate_report,
|
||||
index_suffix=None,
|
||||
index_prefix=None,
|
||||
monthly_indexes=False,
|
||||
number_of_shards=1,
|
||||
number_of_replicas=0,
|
||||
def save_aggregate_report_to_elasticsearch(
|
||||
aggregate_report: OrderedDict[str, Any],
|
||||
index_suffix: Optional[str] = None,
|
||||
index_prefix: Optional[str] = None,
|
||||
monthly_indexes: Optional[bool] = False,
|
||||
number_of_shards: Optional[int] = 1,
|
||||
number_of_replicas: Optional[int] = 0,
|
||||
):
|
||||
"""
|
||||
Saves a parsed DMARC aggregate report to OpenSearch
|
||||
@@ -395,15 +406,11 @@ def save_aggregate_report_to_opensearch(
|
||||
domain = aggregate_report["policy_published"]["domain"]
|
||||
begin_date = human_timestamp_to_datetime(metadata["begin_date"], to_utc=True)
|
||||
end_date = human_timestamp_to_datetime(metadata["end_date"], to_utc=True)
|
||||
begin_date_human = begin_date.strftime("%Y-%m-%d %H:%M:%SZ")
|
||||
end_date_human = end_date.strftime("%Y-%m-%d %H:%M:%SZ")
|
||||
|
||||
if monthly_indexes:
|
||||
index_date = begin_date.strftime("%Y-%m")
|
||||
else:
|
||||
index_date = begin_date.strftime("%Y-%m-%d")
|
||||
aggregate_report["begin_date"] = begin_date
|
||||
aggregate_report["end_date"] = end_date
|
||||
date_range = [aggregate_report["begin_date"], aggregate_report["end_date"]]
|
||||
|
||||
org_name_query = Q(dict(match_phrase=dict(org_name=org_name)))
|
||||
report_id_query = Q(dict(match_phrase=dict(report_id=report_id)))
|
||||
@@ -425,6 +432,9 @@ def save_aggregate_report_to_opensearch(
|
||||
try:
|
||||
existing = search.execute()
|
||||
except Exception as error_:
|
||||
begin_date_human = begin_date.strftime("%Y-%m-%d %H:%M:%SZ")
|
||||
end_date_human = end_date.strftime("%Y-%m-%d %H:%M:%SZ")
|
||||
|
||||
raise OpenSearchError(
|
||||
"OpenSearch's search for existing report \
|
||||
error: {}".format(error_.__str__())
|
||||
@@ -450,6 +460,17 @@ def save_aggregate_report_to_opensearch(
|
||||
)
|
||||
|
||||
for record in aggregate_report["records"]:
|
||||
begin_date = human_timestamp_to_datetime(record["interval_begin"], to_utc=True)
|
||||
end_date = human_timestamp_to_datetime(record["interval_end"], to_utc=True)
|
||||
normalized_timespan = record["normalized_timespan"]
|
||||
|
||||
if monthly_indexes:
|
||||
index_date = begin_date.strftime("%Y-%m")
|
||||
else:
|
||||
index_date = begin_date.strftime("%Y-%m-%d")
|
||||
aggregate_report["begin_date"] = begin_date
|
||||
aggregate_report["end_date"] = end_date
|
||||
date_range = [aggregate_report["begin_date"], aggregate_report["end_date"]]
|
||||
agg_doc = _AggregateReportDoc(
|
||||
xml_schema=aggregate_report["xml_schema"],
|
||||
org_name=metadata["org_name"],
|
||||
@@ -457,8 +478,9 @@ def save_aggregate_report_to_opensearch(
|
||||
org_extra_contact_info=metadata["org_extra_contact_info"],
|
||||
report_id=metadata["report_id"],
|
||||
date_range=date_range,
|
||||
date_begin=aggregate_report["begin_date"],
|
||||
date_end=aggregate_report["end_date"],
|
||||
date_begin=begin_date,
|
||||
date_end=end_date,
|
||||
normalized_timespan=normalized_timespan,
|
||||
errors=metadata["errors"],
|
||||
published_policy=published_policy,
|
||||
source_ip_address=record["source"]["ip_address"],
|
||||
@@ -516,13 +538,13 @@ def save_aggregate_report_to_opensearch(
|
||||
raise OpenSearchError("OpenSearch error: {0}".format(e.__str__()))
|
||||
|
||||
|
||||
def save_forensic_report_to_opensearch(
|
||||
forensic_report,
|
||||
index_suffix=None,
|
||||
index_prefix=None,
|
||||
monthly_indexes=False,
|
||||
number_of_shards=1,
|
||||
number_of_replicas=0,
|
||||
def save_forensic_report_to_elasticsearch(
|
||||
forensic_report: OrderedDict[str, Any],
|
||||
index_suffix: Optional[any] = None,
|
||||
index_prefix: Optional[str] = None,
|
||||
monthly_indexes: Optional[bool] = False,
|
||||
number_of_shards: int = 1,
|
||||
number_of_replicas: int = 0,
|
||||
):
|
||||
"""
|
||||
Saves a parsed DMARC forensic report to OpenSearch
|
||||
@@ -683,13 +705,13 @@ def save_forensic_report_to_opensearch(
|
||||
)
|
||||
|
||||
|
||||
def save_smtp_tls_report_to_opensearch(
|
||||
report,
|
||||
index_suffix=None,
|
||||
index_prefix=None,
|
||||
monthly_indexes=False,
|
||||
number_of_shards=1,
|
||||
number_of_replicas=0,
|
||||
def save_smtp_tls_report_to_elasticsearch(
|
||||
report: OrderedDict[str, Any],
|
||||
index_suffix: str = None,
|
||||
index_prefix: str = None,
|
||||
monthly_indexes: Optional[bool] = False,
|
||||
number_of_shards: Optional[int] = 1,
|
||||
number_of_replicas: Optional[int] = 0,
|
||||
):
|
||||
"""
|
||||
Saves a parsed SMTP TLS report to OpenSearch
|
||||
@@ -705,7 +727,7 @@ def save_smtp_tls_report_to_opensearch(
|
||||
Raises:
|
||||
AlreadySaved
|
||||
"""
|
||||
logger.info("Saving aggregate report to OpenSearch")
|
||||
logger.info("Saving SMTP TLS report to OpenSearch")
|
||||
org_name = report["organization_name"]
|
||||
report_id = report["report_id"]
|
||||
begin_date = human_timestamp_to_datetime(report["begin_date"], to_utc=True)
|
||||
@@ -781,7 +803,7 @@ def save_smtp_tls_report_to_opensearch(
|
||||
policy_doc = _SMTPTLSPolicyDoc(
|
||||
policy_domain=policy["policy_domain"],
|
||||
policy_type=policy["policy_type"],
|
||||
succesful_session_count=policy["successful_session_count"],
|
||||
successful_session_count=policy["successful_session_count"],
|
||||
failed_session_count=policy["failed_session_count"],
|
||||
policy_string=policy_strings,
|
||||
mx_host_patterns=mx_host_patterns,
|
||||
|
||||
@@ -132,6 +132,7 @@ asu-vei.ru,ASU-VEI,Industrial
|
||||
atextelecom.com.br,ATEX Telecom,ISP
|
||||
atmailcloud.com,atmail,Email Provider
|
||||
ats.ca,ATS Healthcare,Healthcare
|
||||
att.net,AT&T,ISP
|
||||
atw.ne.jp,ATW,Web Host
|
||||
au-net.ne.jp,KDDI,ISP
|
||||
au.com,au,ISP
|
||||
@@ -242,6 +243,7 @@ carandainet.com.br,CN Internet,ISP
|
||||
cardhealth.com,Cardinal Health,Healthcare
|
||||
cardinal.com,Cardinal Health,Healthcare
|
||||
cardinalhealth.com,Cardinal Health,Healthcare
|
||||
cardinalscriptnet.com,Cardinal Health,Healthcare
|
||||
carecentrix.com,CareCentrix,Healthcare
|
||||
carleton.edu,Carlton College,Education
|
||||
carrierzone.com,carrierzone,Email Security
|
||||
@@ -697,6 +699,7 @@ hdsupply-email.com,HD Supply,Retail
|
||||
healthall.com,UC Health,Healthcare
|
||||
healthcaresupplypros.com,Healthcare Supply Pros,Healthcare
|
||||
healthproductsforyou.com,Health Products For You,Healthcare
|
||||
healthtouch.com,Cardinal Health,Healthcare
|
||||
helloserver6.com,1st Source Web,Marketing
|
||||
helpforcb.com,InterServer,Web Host
|
||||
helpscout.net,Help Scout,SaaS
|
||||
@@ -753,6 +756,8 @@ hostwindsdns.com,Hostwinds,Web Host
|
||||
hotnet.net.il,Hot Net Internet Services,ISP
|
||||
hp.com,HP,Technology
|
||||
hringdu.is,Hringdu,ISP
|
||||
hslda.net,Home School Legal Defense Association (HSLDA),Education
|
||||
hslda.org,Home School Legal Defense Association (HSLDA),Education
|
||||
hspherefilter.com,"DynamicNet, Inc. (DNI)",Web Host
|
||||
htc.net,HTC,ISP
|
||||
htmlservices.it,HTMLServices.it,MSP
|
||||
@@ -763,6 +768,7 @@ hughston.com,Hughston Clinic,Healthcare
|
||||
hvvc.us,Hivelocity,Web Host
|
||||
i2ts.ne.jp,i2ts,Web Host
|
||||
i4i.com,i4i,Technology
|
||||
ibindley.com,Cardinal Health,Healthcare
|
||||
ice.co.cr,Grupo ICE,Industrial
|
||||
icehosting.nl,IceHosting,Web Host
|
||||
icewarpcloud.in,IceWrap,Email Provider
|
||||
@@ -832,6 +838,7 @@ ip-5-196-151.eu,OVH,Web Host
|
||||
ip-51-161-36.net,OVH,Web Host
|
||||
ip-51-195-53.eu,OVH,Web Host
|
||||
ip-51-254-53.eu,OVH,Web Host
|
||||
ip-51-38-67.eu,OVH,Web Host
|
||||
ip-51-77-42.eu,OVH,Web Host
|
||||
ip-51-83-140.eu,OVH,Web Host
|
||||
ip-51-89-240.eu,OVH,Web Host
|
||||
@@ -1217,6 +1224,7 @@ nettoday.co.th,Net Today,Web Host
|
||||
netventure.pl,Netventure,MSP
|
||||
netvigator.com,HKT,ISP
|
||||
netvision.net.il,013 Netvision,ISP
|
||||
network-tech.com,Network Technologies International (NTI),SaaS
|
||||
network.kz,network.kz,ISP
|
||||
network80.com,Network80,Web Host
|
||||
neubox.net,Neubox,Web Host
|
||||
|
||||
|
@@ -13,8 +13,6 @@ def _main():
|
||||
|
||||
csv_headers = ["source_name", "message_count"]
|
||||
|
||||
output_rows = []
|
||||
|
||||
known_unknown_domains = []
|
||||
psl_overrides = []
|
||||
known_domains = []
|
||||
|
||||
@@ -1,23 +1,29 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
import json
|
||||
import boto3
|
||||
|
||||
from collections import OrderedDict
|
||||
|
||||
from parsedmarc.log import logger
|
||||
from parsedmarc.utils import human_timestamp_to_datetime
|
||||
|
||||
|
||||
class S3Client(object):
|
||||
"""A client for a Amazon S3"""
|
||||
"""A client for interacting with Amazon S3"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
bucket_name,
|
||||
bucket_path,
|
||||
region_name,
|
||||
endpoint_url,
|
||||
access_key_id,
|
||||
secret_access_key,
|
||||
bucket_name: str,
|
||||
bucket_path: str,
|
||||
region_name: str,
|
||||
endpoint_url: str,
|
||||
access_key_id: str,
|
||||
secret_access_key: str,
|
||||
):
|
||||
"""
|
||||
Initializes the S3Client
|
||||
@@ -49,16 +55,16 @@ class S3Client(object):
|
||||
)
|
||||
self.bucket = self.s3.Bucket(self.bucket_name)
|
||||
|
||||
def save_aggregate_report_to_s3(self, report):
|
||||
def save_aggregate_report_to_s3(self, report: OrderedDict[str, Any]):
|
||||
self.save_report_to_s3(report, "aggregate")
|
||||
|
||||
def save_forensic_report_to_s3(self, report):
|
||||
def save_forensic_report_to_s3(self, report: OrderedDict[str, Any]):
|
||||
self.save_report_to_s3(report, "forensic")
|
||||
|
||||
def save_smtp_tls_report_to_s3(self, report):
|
||||
def save_smtp_tls_report_to_s3(self, report: OrderedDict[str, Any]):
|
||||
self.save_report_to_s3(report, "smtp_tls")
|
||||
|
||||
def save_report_to_s3(self, report, report_type):
|
||||
def save_report_to_s3(self, report: OrderedDict[str, Any], report_type: str):
|
||||
if report_type == "smtp_tls":
|
||||
report_date = report["begin_date"]
|
||||
report_id = report["report_id"]
|
||||
|
||||
@@ -1,3 +1,11 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from collections import OrderedDict
|
||||
|
||||
from urllib.parse import urlparse
|
||||
import socket
|
||||
import json
|
||||
@@ -23,7 +31,13 @@ class HECClient(object):
|
||||
# http://docs.splunk.com/Documentation/Splunk/latest/RESTREF/RESTinput#services.2Fcollector
|
||||
|
||||
def __init__(
|
||||
self, url, access_token, index, source="parsedmarc", verify=True, timeout=60
|
||||
self,
|
||||
url: str,
|
||||
access_token: str,
|
||||
index: str,
|
||||
source: bool = "parsedmarc",
|
||||
verify=True,
|
||||
timeout=60,
|
||||
):
|
||||
"""
|
||||
Initializes the HECClient
|
||||
@@ -55,7 +69,9 @@ class HECClient(object):
|
||||
"Authorization": "Splunk {0}".format(self.access_token),
|
||||
}
|
||||
|
||||
def save_aggregate_reports_to_splunk(self, aggregate_reports):
|
||||
def save_aggregate_reports_to_splunk(
|
||||
self, aggregate_reports: list[OrderedDict[str, Any]]
|
||||
):
|
||||
"""
|
||||
Saves aggregate DMARC reports to Splunk
|
||||
|
||||
@@ -78,6 +94,9 @@ class HECClient(object):
|
||||
new_report = dict()
|
||||
for metadata in report["report_metadata"]:
|
||||
new_report[metadata] = report["report_metadata"][metadata]
|
||||
new_report["interval_begin"] = record["interval_begin"]
|
||||
new_report["interval_end"] = record["interval_end"]
|
||||
new_report["normalized_timespan"] = record["normalized_timespan"]
|
||||
new_report["published_policy"] = report["policy_published"]
|
||||
new_report["source_ip_address"] = record["source"]["ip_address"]
|
||||
new_report["source_country"] = record["source"]["country"]
|
||||
@@ -98,7 +117,9 @@ class HECClient(object):
|
||||
new_report["spf_results"] = record["auth_results"]["spf"]
|
||||
|
||||
data["sourcetype"] = "dmarc:aggregate"
|
||||
timestamp = human_timestamp_to_unix_timestamp(new_report["begin_date"])
|
||||
timestamp = human_timestamp_to_unix_timestamp(
|
||||
new_report["interval_begin"]
|
||||
)
|
||||
data["time"] = timestamp
|
||||
data["event"] = new_report.copy()
|
||||
json_str += "{0}\n".format(json.dumps(data))
|
||||
@@ -113,7 +134,9 @@ class HECClient(object):
|
||||
if response["code"] != 0:
|
||||
raise SplunkError(response["text"])
|
||||
|
||||
def save_forensic_reports_to_splunk(self, forensic_reports):
|
||||
def save_forensic_reports_to_splunk(
|
||||
self, forensic_reports: list[OrderedDict[str, Any]]
|
||||
):
|
||||
"""
|
||||
Saves forensic DMARC reports to Splunk
|
||||
|
||||
@@ -147,7 +170,7 @@ class HECClient(object):
|
||||
if response["code"] != 0:
|
||||
raise SplunkError(response["text"])
|
||||
|
||||
def save_smtp_tls_reports_to_splunk(self, reports):
|
||||
def save_smtp_tls_reports_to_splunk(self, reports: OrderedDict[str, Any]):
|
||||
"""
|
||||
Saves aggregate DMARC reports to Splunk
|
||||
|
||||
|
||||
@@ -1,7 +1,15 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import logging.handlers
|
||||
|
||||
from typing import Any
|
||||
|
||||
from collections import OrderedDict
|
||||
|
||||
import json
|
||||
|
||||
from parsedmarc import (
|
||||
@@ -14,7 +22,7 @@ from parsedmarc import (
|
||||
class SyslogClient(object):
|
||||
"""A client for Syslog"""
|
||||
|
||||
def __init__(self, server_name, server_port):
|
||||
def __init__(self, server_name: str, server_port: int):
|
||||
"""
|
||||
Initializes the SyslogClient
|
||||
Args:
|
||||
@@ -28,17 +36,23 @@ class SyslogClient(object):
|
||||
log_handler = logging.handlers.SysLogHandler(address=(server_name, server_port))
|
||||
self.logger.addHandler(log_handler)
|
||||
|
||||
def save_aggregate_report_to_syslog(self, aggregate_reports):
|
||||
def save_aggregate_report_to_syslog(
|
||||
self, aggregate_reports: list[OrderedDict[str, Any]]
|
||||
):
|
||||
rows = parsed_aggregate_reports_to_csv_rows(aggregate_reports)
|
||||
for row in rows:
|
||||
self.logger.info(json.dumps(row))
|
||||
|
||||
def save_forensic_report_to_syslog(self, forensic_reports):
|
||||
def save_forensic_report_to_syslog(
|
||||
self, forensic_reports: list[OrderedDict[str, Any]]
|
||||
):
|
||||
rows = parsed_forensic_reports_to_csv_rows(forensic_reports)
|
||||
for row in rows:
|
||||
self.logger.info(json.dumps(row))
|
||||
|
||||
def save_smtp_tls_report_to_syslog(self, smtp_tls_reports):
|
||||
def save_smtp_tls_report_to_syslog(
|
||||
self, smtp_tls_reports: list[OrderedDict[str, Any]]
|
||||
):
|
||||
rows = parsed_smtp_tls_reports_to_csv_rows(smtp_tls_reports)
|
||||
for row in rows:
|
||||
self.logger.info(json.dumps(row))
|
||||
|
||||
@@ -1,11 +1,18 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""Utility functions that might be useful for other projects"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Optional, Union
|
||||
|
||||
import logging
|
||||
import os
|
||||
from datetime import datetime
|
||||
from datetime import timezone
|
||||
from datetime import timedelta
|
||||
from collections import OrderedDict
|
||||
from expiringdict import ExpiringDict
|
||||
import tempfile
|
||||
import subprocess
|
||||
import shutil
|
||||
@@ -60,12 +67,12 @@ class DownloadError(RuntimeError):
|
||||
"""Raised when an error occurs when downloading a file"""
|
||||
|
||||
|
||||
def decode_base64(data):
|
||||
def decode_base64(data: str) -> bytes:
|
||||
"""
|
||||
Decodes a base64 string, with padding being optional
|
||||
|
||||
Args:
|
||||
data: A base64 encoded string
|
||||
data (str): A base64 encoded string
|
||||
|
||||
Returns:
|
||||
bytes: The decoded bytes
|
||||
@@ -78,7 +85,7 @@ def decode_base64(data):
|
||||
return base64.b64decode(data)
|
||||
|
||||
|
||||
def get_base_domain(domain):
|
||||
def get_base_domain(domain: str) -> str:
|
||||
"""
|
||||
Gets the base domain name for the given domain
|
||||
|
||||
@@ -102,7 +109,14 @@ def get_base_domain(domain):
|
||||
return publicsuffix
|
||||
|
||||
|
||||
def query_dns(domain, record_type, cache=None, nameservers=None, timeout=2.0):
|
||||
def query_dns(
|
||||
domain: str,
|
||||
record_type: str,
|
||||
*,
|
||||
cache: Optional[ExpiringDict] = None,
|
||||
nameservers: list[str] = None,
|
||||
timeout: int = 2.0,
|
||||
) -> list[str]:
|
||||
"""
|
||||
Queries DNS
|
||||
|
||||
@@ -163,7 +177,13 @@ def query_dns(domain, record_type, cache=None, nameservers=None, timeout=2.0):
|
||||
return records
|
||||
|
||||
|
||||
def get_reverse_dns(ip_address, cache=None, nameservers=None, timeout=2.0):
|
||||
def get_reverse_dns(
|
||||
ip_address,
|
||||
*,
|
||||
cache: Optional[ExpiringDict] = None,
|
||||
nameservers: list[str] = None,
|
||||
timeout: int = 2.0,
|
||||
) -> str:
|
||||
"""
|
||||
Resolves an IP address to a hostname using a reverse DNS query
|
||||
|
||||
@@ -191,7 +211,7 @@ def get_reverse_dns(ip_address, cache=None, nameservers=None, timeout=2.0):
|
||||
return hostname
|
||||
|
||||
|
||||
def timestamp_to_datetime(timestamp):
|
||||
def timestamp_to_datetime(timestamp: int) -> datetime:
|
||||
"""
|
||||
Converts a UNIX/DMARC timestamp to a Python ``datetime`` object
|
||||
|
||||
@@ -204,7 +224,7 @@ def timestamp_to_datetime(timestamp):
|
||||
return datetime.fromtimestamp(int(timestamp))
|
||||
|
||||
|
||||
def timestamp_to_human(timestamp):
|
||||
def timestamp_to_human(timestamp: int) -> str:
|
||||
"""
|
||||
Converts a UNIX/DMARC timestamp to a human-readable string
|
||||
|
||||
@@ -217,7 +237,9 @@ def timestamp_to_human(timestamp):
|
||||
return timestamp_to_datetime(timestamp).strftime("%Y-%m-%d %H:%M:%S")
|
||||
|
||||
|
||||
def human_timestamp_to_datetime(human_timestamp, to_utc=False):
|
||||
def human_timestamp_to_datetime(
|
||||
human_timestamp: str, *, to_utc: Optional[bool] = False
|
||||
) -> datetime:
|
||||
"""
|
||||
Converts a human-readable timestamp into a Python ``datetime`` object
|
||||
|
||||
@@ -236,7 +258,7 @@ def human_timestamp_to_datetime(human_timestamp, to_utc=False):
|
||||
return dt.astimezone(timezone.utc) if to_utc else dt
|
||||
|
||||
|
||||
def human_timestamp_to_unix_timestamp(human_timestamp):
|
||||
def human_timestamp_to_unix_timestamp(human_timestamp: str) -> int:
|
||||
"""
|
||||
Converts a human-readable timestamp into a UNIX timestamp
|
||||
|
||||
@@ -250,7 +272,7 @@ def human_timestamp_to_unix_timestamp(human_timestamp):
|
||||
return human_timestamp_to_datetime(human_timestamp).timestamp()
|
||||
|
||||
|
||||
def get_ip_address_country(ip_address, db_path=None):
|
||||
def get_ip_address_country(ip_address: str, *, db_path: Optional[str] = None) -> str:
|
||||
"""
|
||||
Returns the ISO code for the country associated
|
||||
with the given IPv4 or IPv6 address
|
||||
@@ -277,7 +299,7 @@ def get_ip_address_country(ip_address, db_path=None):
|
||||
]
|
||||
|
||||
if db_path is not None:
|
||||
if os.path.isfile(db_path) is False:
|
||||
if not os.path.isfile(db_path):
|
||||
db_path = None
|
||||
logger.warning(
|
||||
f"No file exists at {db_path}. Falling back to an "
|
||||
@@ -314,12 +336,13 @@ def get_ip_address_country(ip_address, db_path=None):
|
||||
|
||||
def get_service_from_reverse_dns_base_domain(
|
||||
base_domain,
|
||||
always_use_local_file=False,
|
||||
local_file_path=None,
|
||||
url=None,
|
||||
offline=False,
|
||||
reverse_dns_map=None,
|
||||
):
|
||||
*,
|
||||
always_use_local_file: Optional[bool] = False,
|
||||
local_file_path: Optional[bool] = None,
|
||||
url: Optional[bool] = None,
|
||||
offline: Optional[bool] = False,
|
||||
reverse_dns_map: Optional[bool] = None,
|
||||
) -> str:
|
||||
"""
|
||||
Returns the service name of a given base domain name from reverse DNS.
|
||||
|
||||
@@ -389,16 +412,17 @@ def get_service_from_reverse_dns_base_domain(
|
||||
|
||||
def get_ip_address_info(
|
||||
ip_address,
|
||||
ip_db_path=None,
|
||||
reverse_dns_map_path=None,
|
||||
always_use_local_files=False,
|
||||
reverse_dns_map_url=None,
|
||||
cache=None,
|
||||
reverse_dns_map=None,
|
||||
offline=False,
|
||||
nameservers=None,
|
||||
timeout=2.0,
|
||||
):
|
||||
*,
|
||||
ip_db_path: Optional[str] = None,
|
||||
reverse_dns_map_path: Optional[str] = None,
|
||||
always_use_local_files: Optional[bool] = False,
|
||||
reverse_dns_map_url: Optional[bool] = None,
|
||||
cache: Optional[ExpiringDict] = None,
|
||||
reverse_dns_map: Optional[bool] = None,
|
||||
offline: Optional[bool] = False,
|
||||
nameservers: Optional[list[str]] = None,
|
||||
timeout: Optional[float] = 2.0,
|
||||
) -> OrderedDict[str, str]:
|
||||
"""
|
||||
Returns reverse DNS and country information for the given IP address
|
||||
|
||||
@@ -416,7 +440,7 @@ def get_ip_address_info(
|
||||
timeout (float): Sets the DNS timeout in seconds
|
||||
|
||||
Returns:
|
||||
OrderedDict: ``ip_address``, ``reverse_dns``
|
||||
OrderedDict: ``ip_address``, ``reverse_dns``, ``country``
|
||||
|
||||
"""
|
||||
ip_address = ip_address.lower()
|
||||
@@ -463,7 +487,7 @@ def get_ip_address_info(
|
||||
return info
|
||||
|
||||
|
||||
def parse_email_address(original_address):
|
||||
def parse_email_address(original_address: str) -> OrderedDict[str, str]:
|
||||
if original_address[0] == "":
|
||||
display_name = None
|
||||
else:
|
||||
@@ -486,7 +510,7 @@ def parse_email_address(original_address):
|
||||
)
|
||||
|
||||
|
||||
def get_filename_safe_string(string):
|
||||
def get_filename_safe_string(string: str) -> str:
|
||||
"""
|
||||
Converts a string to a string that is safe for a filename
|
||||
|
||||
@@ -508,7 +532,7 @@ def get_filename_safe_string(string):
|
||||
return string
|
||||
|
||||
|
||||
def is_mbox(path):
|
||||
def is_mbox(path: str) -> bool:
|
||||
"""
|
||||
Checks if the given content is an MBOX mailbox file
|
||||
|
||||
@@ -529,7 +553,7 @@ def is_mbox(path):
|
||||
return _is_mbox
|
||||
|
||||
|
||||
def is_outlook_msg(content):
|
||||
def is_outlook_msg(content) -> bool:
|
||||
"""
|
||||
Checks if the given content is an Outlook msg OLE/MSG file
|
||||
|
||||
@@ -544,7 +568,7 @@ def is_outlook_msg(content):
|
||||
)
|
||||
|
||||
|
||||
def convert_outlook_msg(msg_bytes):
|
||||
def convert_outlook_msg(msg_bytes: bytes) -> str:
|
||||
"""
|
||||
Uses the ``msgconvert`` Perl utility to convert an Outlook MS file to
|
||||
standard RFC 822 format
|
||||
@@ -580,7 +604,9 @@ def convert_outlook_msg(msg_bytes):
|
||||
return rfc822
|
||||
|
||||
|
||||
def parse_email(data, strip_attachment_payloads=False):
|
||||
def parse_email(
|
||||
data: Union[bytes, str], *, strip_attachment_payloads: Optional[bool] = False
|
||||
):
|
||||
"""
|
||||
A simplified email parser
|
||||
|
||||
|
||||
@@ -1,3 +1,11 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Optional, Union
|
||||
|
||||
from collections import OrderedDict
|
||||
|
||||
import requests
|
||||
|
||||
from parsedmarc import logger
|
||||
@@ -7,7 +15,13 @@ from parsedmarc.constants import USER_AGENT
|
||||
class WebhookClient(object):
|
||||
"""A client for webhooks"""
|
||||
|
||||
def __init__(self, aggregate_url, forensic_url, smtp_tls_url, timeout=60):
|
||||
def __init__(
|
||||
self,
|
||||
aggregate_url: str,
|
||||
forensic_url: str,
|
||||
smtp_tls_url: str,
|
||||
timeout: Optional[int] = 60,
|
||||
):
|
||||
"""
|
||||
Initializes the WebhookClient
|
||||
Args:
|
||||
@@ -26,25 +40,27 @@ class WebhookClient(object):
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
|
||||
def save_forensic_report_to_webhook(self, report):
|
||||
def save_forensic_report_to_webhook(self, report: OrderedDict[str, Any]):
|
||||
try:
|
||||
self._send_to_webhook(self.forensic_url, report)
|
||||
except Exception as error_:
|
||||
logger.error("Webhook Error: {0}".format(error_.__str__()))
|
||||
|
||||
def save_smtp_tls_report_to_webhook(self, report):
|
||||
def save_smtp_tls_report_to_webhook(self, report: OrderedDict[str, Any]):
|
||||
try:
|
||||
self._send_to_webhook(self.smtp_tls_url, report)
|
||||
except Exception as error_:
|
||||
logger.error("Webhook Error: {0}".format(error_.__str__()))
|
||||
|
||||
def save_aggregate_report_to_webhook(self, report):
|
||||
def save_aggregate_report_to_webhook(self, report: OrderedDict[str, Any]):
|
||||
try:
|
||||
self._send_to_webhook(self.aggregate_url, report)
|
||||
except Exception as error_:
|
||||
logger.error("Webhook Error: {0}".format(error_.__str__()))
|
||||
|
||||
def _send_to_webhook(self, webhook_url, payload):
|
||||
def _send_to_webhook(
|
||||
self, webhook_url: str, payload: Union[bytes, str, dict[str, Any]]
|
||||
):
|
||||
try:
|
||||
self.session.post(webhook_url, data=payload, timeout=self.timeout)
|
||||
except Exception as error_:
|
||||
|
||||
@@ -28,6 +28,7 @@ classifiers = [
|
||||
"Operating System :: OS Independent",
|
||||
"Programming Language :: Python :: 3"
|
||||
]
|
||||
requires-python = ">=3.9, <3.14"
|
||||
dependencies = [
|
||||
"azure-identity>=1.8.0",
|
||||
"azure-monitor-ingestion>=1.0.0",
|
||||
@@ -86,11 +87,11 @@ include = [
|
||||
|
||||
[tool.hatch.build]
|
||||
exclude = [
|
||||
"base_reverse_dns.csv",
|
||||
"find_bad_utf8.py",
|
||||
"find_unknown_base_reverse_dns.py",
|
||||
"unknown_base_reverse_dns.csv",
|
||||
"sortmaps.py",
|
||||
"README.md",
|
||||
"*.bak"
|
||||
"base_reverse_dns.csv",
|
||||
"find_bad_utf8.py",
|
||||
"find_unknown_base_reverse_dns.py",
|
||||
"unknown_base_reverse_dns.csv",
|
||||
"sortmaps.py",
|
||||
"README.md",
|
||||
"*.bak"
|
||||
]
|
||||
|
||||
5
tests.py
5
tests.py
@@ -1,3 +1,6 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import absolute_import, print_function, unicode_literals
|
||||
|
||||
import os
|
||||
@@ -74,7 +77,7 @@ class Test(unittest.TestCase):
|
||||
print()
|
||||
file = "samples/extract_report/nice-input.xml"
|
||||
print("Testing {0}: ".format(file), end="")
|
||||
xmlout = parsedmarc.extract_report(file)
|
||||
xmlout = parsedmarc.extract_report_from_file_path(file)
|
||||
xmlin_file = open("samples/extract_report/nice-input.xml")
|
||||
xmlin = xmlin_file.read()
|
||||
xmlin_file.close()
|
||||
|
||||
Reference in New Issue
Block a user