mirror of
https://github.com/domainaware/parsedmarc.git
synced 2026-04-02 03:42:45 +00:00
Compare commits
2 Commits
copilot/dr
...
update-ela
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
66506056ac | ||
|
|
eb912ce68d |
10
.github/workflows/docker.yml
vendored
10
.github/workflows/docker.yml
vendored
@@ -24,11 +24,11 @@ jobs:
|
|||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout repository
|
- name: Checkout repository
|
||||||
uses: actions/checkout@v5
|
uses: actions/checkout@v3
|
||||||
|
|
||||||
- name: Docker meta
|
- name: Docker meta
|
||||||
id: meta
|
id: meta
|
||||||
uses: docker/metadata-action@v5
|
uses: docker/metadata-action@v3
|
||||||
with:
|
with:
|
||||||
images: |
|
images: |
|
||||||
${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
|
${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
|
||||||
@@ -40,14 +40,16 @@ jobs:
|
|||||||
type=semver,pattern={{major}}.{{minor}}
|
type=semver,pattern={{major}}.{{minor}}
|
||||||
|
|
||||||
- name: Log in to the Container registry
|
- name: Log in to the Container registry
|
||||||
uses: docker/login-action@v3
|
# https://github.com/docker/login-action/releases/tag/v2.0.0
|
||||||
|
uses: docker/login-action@49ed152c8eca782a232dede0303416e8f356c37b
|
||||||
with:
|
with:
|
||||||
registry: ${{ env.REGISTRY }}
|
registry: ${{ env.REGISTRY }}
|
||||||
username: ${{ github.actor }}
|
username: ${{ github.actor }}
|
||||||
password: ${{ secrets.GITHUB_TOKEN }}
|
password: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
|
||||||
- name: Build and push Docker image
|
- name: Build and push Docker image
|
||||||
uses: docker/build-push-action@v6
|
# https://github.com/docker/build-push-action/releases/tag/v3.0.0
|
||||||
|
uses: docker/build-push-action@e551b19e49efd4e98792db7592c17c09b89db8d8
|
||||||
with:
|
with:
|
||||||
context: .
|
context: .
|
||||||
push: ${{ github.event_name == 'release' }}
|
push: ${{ github.event_name == 'release' }}
|
||||||
|
|||||||
34
.github/workflows/python-tests.yml
vendored
34
.github/workflows/python-tests.yml
vendored
@@ -11,37 +11,31 @@ on:
|
|||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
build:
|
build:
|
||||||
runs-on: ubuntu-latest
|
|
||||||
|
|
||||||
services:
|
runs-on: ubuntu-latest
|
||||||
elasticsearch:
|
|
||||||
image: elasticsearch:8.19.7
|
|
||||||
env:
|
|
||||||
discovery.type: single-node
|
|
||||||
cluster.name: parsedmarc-cluster
|
|
||||||
discovery.seed_hosts: elasticsearch
|
|
||||||
bootstrap.memory_lock: true
|
|
||||||
xpack.security.enabled: false
|
|
||||||
xpack.license.self_generated.type: basic
|
|
||||||
ports:
|
|
||||||
- 9200:9200
|
|
||||||
- 9300:9300
|
|
||||||
|
|
||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"]
|
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v5
|
- uses: actions/checkout@v4
|
||||||
- name: Set up Python ${{ matrix.python-version }}
|
- name: Set up Python ${{ matrix.python-version }}
|
||||||
uses: actions/setup-python@v6
|
uses: actions/setup-python@v5
|
||||||
with:
|
with:
|
||||||
python-version: ${{ matrix.python-version }}
|
python-version: ${{ matrix.python-version }}
|
||||||
- name: Install system dependencies
|
- name: Install system dependencies
|
||||||
run: |
|
run: |
|
||||||
sudo apt-get -q update
|
sudo apt-get update
|
||||||
sudo apt-get -qy install libemail-outlook-message-perl
|
sudo apt-get install -y libemail-outlook-message-perl
|
||||||
|
wget -qO - https://artifacts.elastic.co/GPG-KEY-elasticsearch | sudo gpg --dearmor -o /usr/share/keyrings/elasticsearch-keyring.gpg
|
||||||
|
sudo apt-get install apt-transport-https
|
||||||
|
echo "deb [signed-by=/usr/share/keyrings/elasticsearch-keyring.gpg] https://artifacts.elastic.co/packages/8.x/apt stable main" | sudo tee /etc/apt/sources.list.d/elastic-8.x.list
|
||||||
|
sudo apt-get update && sudo apt-get install elasticsearch
|
||||||
|
sudo sed -i 's/xpack.security.enabled: true/xpack.security.enabled: false/' /etc/elasticsearch/elasticsearch.yml
|
||||||
|
sudo systemctl restart elasticsearch
|
||||||
|
sudo systemctl --no-pager status elasticsearch
|
||||||
- name: Install Python dependencies
|
- name: Install Python dependencies
|
||||||
run: |
|
run: |
|
||||||
python -m pip install --upgrade pip
|
python -m pip install --upgrade pip
|
||||||
@@ -65,6 +59,6 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
hatch build
|
hatch build
|
||||||
- name: Upload coverage to Codecov
|
- name: Upload coverage to Codecov
|
||||||
uses: codecov/codecov-action@v5
|
uses: codecov/codecov-action@v4
|
||||||
with:
|
with:
|
||||||
token: ${{ secrets.CODECOV_TOKEN }}
|
token: ${{ secrets.CODECOV_TOKEN }}
|
||||||
|
|||||||
8
.gitignore
vendored
8
.gitignore
vendored
@@ -106,7 +106,7 @@ ENV/
|
|||||||
.idea/
|
.idea/
|
||||||
|
|
||||||
# VS Code launch config
|
# VS Code launch config
|
||||||
#.vscode/launch.json
|
.vscode/launch.json
|
||||||
|
|
||||||
# Visual Studio Code settings
|
# Visual Studio Code settings
|
||||||
#.vscode/
|
#.vscode/
|
||||||
@@ -139,9 +139,3 @@ samples/private
|
|||||||
|
|
||||||
parsedmarc.ini
|
parsedmarc.ini
|
||||||
scratch.py
|
scratch.py
|
||||||
|
|
||||||
parsedmarc/resources/maps/base_reverse_dns.csv
|
|
||||||
parsedmarc/resources/maps/unknown_base_reverse_dns.csv
|
|
||||||
parsedmarc/resources/maps/sus_domains.csv
|
|
||||||
parsedmarc/resources/maps/unknown_domains.txt
|
|
||||||
*.bak
|
|
||||||
|
|||||||
45
.vscode/launch.json
vendored
45
.vscode/launch.json
vendored
@@ -1,45 +0,0 @@
|
|||||||
{
|
|
||||||
// Use IntelliSense to learn about possible attributes.
|
|
||||||
// Hover to view descriptions of existing attributes.
|
|
||||||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
|
||||||
"version": "0.2.0",
|
|
||||||
"configurations": [
|
|
||||||
{
|
|
||||||
"name": "Python Debugger: Current File",
|
|
||||||
"type": "debugpy",
|
|
||||||
"request": "launch",
|
|
||||||
"program": "${file}",
|
|
||||||
"console": "integratedTerminal"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "tests.py",
|
|
||||||
"type": "debugpy",
|
|
||||||
"request": "launch",
|
|
||||||
"program": "tests.py",
|
|
||||||
"console": "integratedTerminal"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "sample",
|
|
||||||
"type": "debugpy",
|
|
||||||
"request": "launch",
|
|
||||||
"module": "parsedmarc.cli",
|
|
||||||
"args": ["samples/private/sample"]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "sortlists.py",
|
|
||||||
"type": "debugpy",
|
|
||||||
"request": "launch",
|
|
||||||
"program": "sortlists.py",
|
|
||||||
"cwd": "${workspaceFolder}/parsedmarc/resources/maps",
|
|
||||||
"console": "integratedTerminal"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "find_unknown_base_reverse_dns.py",
|
|
||||||
"type": "debugpy",
|
|
||||||
"request": "launch",
|
|
||||||
"program": "find_unknown_base_reverse_dns.py",
|
|
||||||
"cwd": "${workspaceFolder}/parsedmarc/resources/maps",
|
|
||||||
"console": "integratedTerminal"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
284
.vscode/settings.json
vendored
284
.vscode/settings.json
vendored
@@ -1,166 +1,132 @@
|
|||||||
{
|
{
|
||||||
"[python]": {
|
|
||||||
"editor.defaultFormatter": "charliermarsh.ruff",
|
|
||||||
"editor.formatOnSave": true,
|
|
||||||
|
|
||||||
// Let Ruff handle lint fixes + import sorting on save
|
|
||||||
"editor.codeActionsOnSave": {
|
|
||||||
"source.fixAll.ruff": "explicit",
|
|
||||||
"source.organizeImports.ruff": "explicit"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"markdownlint.config": {
|
"markdownlint.config": {
|
||||||
"MD024": false
|
"MD024": false
|
||||||
},
|
},
|
||||||
"cSpell.words": [
|
"cSpell.words": [
|
||||||
"adkim",
|
"adkim",
|
||||||
"akamaiedge",
|
"akamaiedge",
|
||||||
"amsmath",
|
"amsmath",
|
||||||
"andrewmcgilvray",
|
"andrewmcgilvray",
|
||||||
"arcname",
|
"arcname",
|
||||||
"aspf",
|
"aspf",
|
||||||
"autoclass",
|
"autoclass",
|
||||||
"automodule",
|
"automodule",
|
||||||
"backported",
|
"backported",
|
||||||
"bellsouth",
|
"bellsouth",
|
||||||
"boto",
|
"brakhane",
|
||||||
"brakhane",
|
"Brightmail",
|
||||||
"Brightmail",
|
"CEST",
|
||||||
"CEST",
|
"CHACHA",
|
||||||
"CHACHA",
|
"checkdmarc",
|
||||||
"checkdmarc",
|
"Codecov",
|
||||||
"Codecov",
|
"confnew",
|
||||||
"confnew",
|
"dateparser",
|
||||||
"dateparser",
|
"dateutil",
|
||||||
"dateutil",
|
"Davmail",
|
||||||
"Davmail",
|
"DBIP",
|
||||||
"DBIP",
|
"dearmor",
|
||||||
"dearmor",
|
"deflist",
|
||||||
"deflist",
|
"devel",
|
||||||
"devel",
|
"DMARC",
|
||||||
"DMARC",
|
"Dmarcian",
|
||||||
"Dmarcian",
|
"dnspython",
|
||||||
"dnspython",
|
"dollarmath",
|
||||||
"dollarmath",
|
"dpkg",
|
||||||
"dpkg",
|
"exampleuser",
|
||||||
"exampleuser",
|
"expiringdict",
|
||||||
"expiringdict",
|
"fieldlist",
|
||||||
"fieldlist",
|
"genindex",
|
||||||
"GELF",
|
"geoipupdate",
|
||||||
"genindex",
|
"Geolite",
|
||||||
"geoip",
|
"geolocation",
|
||||||
"geoipupdate",
|
"githubpages",
|
||||||
"Geolite",
|
"Grafana",
|
||||||
"geolocation",
|
"hostnames",
|
||||||
"githubpages",
|
"htpasswd",
|
||||||
"Grafana",
|
"httpasswd",
|
||||||
"hostnames",
|
"IMAP",
|
||||||
"htpasswd",
|
"Interaktive",
|
||||||
"httpasswd",
|
"IPDB",
|
||||||
"httplib",
|
"journalctl",
|
||||||
"ifhost",
|
"keepalive",
|
||||||
"IMAP",
|
"keyout",
|
||||||
"imapclient",
|
"keyrings",
|
||||||
"infile",
|
"Leeman",
|
||||||
"Interaktive",
|
"libemail",
|
||||||
"IPDB",
|
"linkify",
|
||||||
"journalctl",
|
"LISTSERV",
|
||||||
"kafkaclient",
|
"lxml",
|
||||||
"keepalive",
|
"mailparser",
|
||||||
"keyout",
|
"mailrelay",
|
||||||
"keyrings",
|
"mailsuite",
|
||||||
"Leeman",
|
"maxdepth",
|
||||||
"libemail",
|
"maxmind",
|
||||||
"linkify",
|
"mbox",
|
||||||
"LISTSERV",
|
"mfrom",
|
||||||
"loganalytics",
|
"michaeldavie",
|
||||||
"lxml",
|
"mikesiegel",
|
||||||
"mailparser",
|
"mitigations",
|
||||||
"mailrelay",
|
"MMDB",
|
||||||
"mailsuite",
|
"modindex",
|
||||||
"maxdepth",
|
"msgconvert",
|
||||||
"MAXHEADERS",
|
"msgraph",
|
||||||
"maxmind",
|
"MSSP",
|
||||||
"mbox",
|
"Munge",
|
||||||
"mfrom",
|
"ndjson",
|
||||||
"mhdw",
|
"newkey",
|
||||||
"michaeldavie",
|
"Nhcm",
|
||||||
"mikesiegel",
|
"nojekyll",
|
||||||
"Mimecast",
|
"nondigest",
|
||||||
"mitigations",
|
"nosecureimap",
|
||||||
"MMDB",
|
"nosniff",
|
||||||
"modindex",
|
"nwettbewerb",
|
||||||
"msgconvert",
|
"parsedmarc",
|
||||||
"msgraph",
|
"passsword",
|
||||||
"MSSP",
|
"Postorius",
|
||||||
"multiprocess",
|
"premade",
|
||||||
"Munge",
|
"procs",
|
||||||
"ndjson",
|
"publicsuffix",
|
||||||
"newkey",
|
"publixsuffix",
|
||||||
"Nhcm",
|
"pypy",
|
||||||
"nojekyll",
|
"quickstart",
|
||||||
"nondigest",
|
"Reindex",
|
||||||
"nosecureimap",
|
"replyto",
|
||||||
"nosniff",
|
"reversename",
|
||||||
"nwettbewerb",
|
"Rollup",
|
||||||
"opensearch",
|
"Rpdm",
|
||||||
"opensearchpy",
|
"SAMEORIGIN",
|
||||||
"parsedmarc",
|
"Servernameone",
|
||||||
"passsword",
|
"setuptools",
|
||||||
"pbar",
|
"smartquotes",
|
||||||
"Postorius",
|
"SMTPTLS",
|
||||||
"premade",
|
"sourcetype",
|
||||||
"privatesuffix",
|
"STARTTLS",
|
||||||
"procs",
|
"tasklist",
|
||||||
"publicsuffix",
|
"timespan",
|
||||||
"publicsuffixlist",
|
"tlsa",
|
||||||
"publixsuffix",
|
"tlsrpt",
|
||||||
"pygelf",
|
"toctree",
|
||||||
"pypy",
|
"TQDDM",
|
||||||
"pytest",
|
"tqdm",
|
||||||
"quickstart",
|
"truststore",
|
||||||
"Reindex",
|
"Übersicht",
|
||||||
"replyto",
|
"uids",
|
||||||
"reversename",
|
"unparasable",
|
||||||
"Rollup",
|
"uper",
|
||||||
"Rpdm",
|
"urllib",
|
||||||
"SAMEORIGIN",
|
"Valimail",
|
||||||
"sdist",
|
"venv",
|
||||||
"Servernameone",
|
"Vhcw",
|
||||||
"setuptools",
|
"viewcode",
|
||||||
"smartquotes",
|
"virtualenv",
|
||||||
"SMTPTLS",
|
"WBITS",
|
||||||
"sortlists",
|
"webmail",
|
||||||
"sortmaps",
|
"Wettbewerber",
|
||||||
"sourcetype",
|
"Whalen",
|
||||||
"STARTTLS",
|
"whitespaces",
|
||||||
"tasklist",
|
"xennn",
|
||||||
"timespan",
|
"xmltodict",
|
||||||
"tlsa",
|
"xpack",
|
||||||
"tlsrpt",
|
"zscholl"
|
||||||
"toctree",
|
|
||||||
"TQDDM",
|
|
||||||
"tqdm",
|
|
||||||
"truststore",
|
|
||||||
"Übersicht",
|
|
||||||
"uids",
|
|
||||||
"Uncategorized",
|
|
||||||
"unparasable",
|
|
||||||
"uper",
|
|
||||||
"urllib",
|
|
||||||
"Valimail",
|
|
||||||
"venv",
|
|
||||||
"Vhcw",
|
|
||||||
"viewcode",
|
|
||||||
"virtualenv",
|
|
||||||
"WBITS",
|
|
||||||
"webmail",
|
|
||||||
"Wettbewerber",
|
|
||||||
"Whalen",
|
|
||||||
"whitespaces",
|
|
||||||
"xennn",
|
|
||||||
"xmltodict",
|
|
||||||
"xpack",
|
|
||||||
"zscholl"
|
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
687
CHANGELOG.md
687
CHANGELOG.md
File diff suppressed because it is too large
Load Diff
@@ -1,4 +1,4 @@
|
|||||||
ARG BASE_IMAGE=python:3.13-slim
|
ARG BASE_IMAGE=python:3.9-slim
|
||||||
ARG USERNAME=parsedmarc
|
ARG USERNAME=parsedmarc
|
||||||
ARG USER_UID=1000
|
ARG USER_UID=1000
|
||||||
ARG USER_GID=$USER_UID
|
ARG USER_GID=$USER_UID
|
||||||
|
|||||||
41
README.md
41
README.md
@@ -9,7 +9,7 @@ Package](https://img.shields.io/pypi/v/parsedmarc.svg)](https://pypi.org/project
|
|||||||
[](https://pypistats.org/packages/parsedmarc)
|
[](https://pypistats.org/packages/parsedmarc)
|
||||||
|
|
||||||
<p align="center">
|
<p align="center">
|
||||||
<img src="https://raw.githubusercontent.com/domainaware/parsedmarc/refs/heads/master/docs/source/_static/screenshots/dmarc-summary-charts.png?raw=true" alt="A screenshot of DMARC summary charts in Kibana"/>
|
<img src="https://github.com/domainaware/parsedmarc/raw/master/docs/source/_static/screenshots/dmarc-summary-charts.png?raw=true" alt="A screenshot of DMARC summary charts in Kibana"/>
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
`parsedmarc` is a Python module and CLI utility for parsing DMARC
|
`parsedmarc` is a Python module and CLI utility for parsing DMARC
|
||||||
@@ -23,42 +23,25 @@ ProofPoint Email Fraud Defense, and Valimail.
|
|||||||
|
|
||||||
## Help Wanted
|
## Help Wanted
|
||||||
|
|
||||||
This project is maintained by one developer. Please consider reviewing the open
|
This project is maintained by one developer. Please consider
|
||||||
[issues](https://github.com/domainaware/parsedmarc/issues) to see how you can
|
reviewing the open
|
||||||
contribute code, documentation, or user support. Assistance on the pinned
|
[issues](https://github.com/domainaware/parsedmarc/issues) to see how
|
||||||
issues would be particularly helpful.
|
you can contribute code, documentation, or user support. Assistance on
|
||||||
|
the pinned issues would be particularly helpful.
|
||||||
|
|
||||||
Thanks to all
|
Thanks to all
|
||||||
[contributors](https://github.com/domainaware/parsedmarc/graphs/contributors)!
|
[contributors](https://github.com/domainaware/parsedmarc/graphs/contributors)!
|
||||||
|
|
||||||
## Features
|
## Features
|
||||||
|
|
||||||
- Parses draft and 1.0 standard aggregate/rua DMARC reports
|
- Parses draft and 1.0 standard aggregate/rua reports
|
||||||
- Parses forensic/failure/ruf DMARC reports
|
- Parses forensic/failure/ruf reports
|
||||||
- Parses reports from SMTP TLS Reporting
|
- Can parse reports from an inbox over IMAP, Microsoft Graph, or Gmail
|
||||||
- Can parse reports from an inbox over IMAP, Microsoft Graph, or Gmail API
|
API
|
||||||
- Transparently handles gzip or zip compressed reports
|
- Transparently handles gzip or zip compressed reports
|
||||||
- Consistent data structures
|
- Consistent data structures
|
||||||
- Simple JSON and/or CSV output
|
- Simple JSON and/or CSV output
|
||||||
- Optionally email the results
|
- Optionally email the results
|
||||||
- Optionally send the results to Elasticsearch, Opensearch, and/or Splunk, for
|
- Optionally send the results to Elasticsearch, Opensearch, and/or Splunk, for use
|
||||||
use with premade dashboards
|
with premade dashboards
|
||||||
- Optionally send reports to Apache Kafka
|
- Optionally send reports to Apache Kafka
|
||||||
|
|
||||||
## Python Compatibility
|
|
||||||
|
|
||||||
This project supports the following Python versions, which are either actively maintained or are the default versions
|
|
||||||
for RHEL or Debian.
|
|
||||||
|
|
||||||
| Version | Supported | Reason |
|
|
||||||
|---------|-----------|------------------------------------------------------------|
|
|
||||||
| < 3.6 | ❌ | End of Life (EOL) |
|
|
||||||
| 3.6 | ❌ | Used in RHEL 8, but not supported by project dependencies |
|
|
||||||
| 3.7 | ❌ | End of Life (EOL) |
|
|
||||||
| 3.8 | ❌ | End of Life (EOL) |
|
|
||||||
| 3.9 | ❌ | Used in Debian 11 and RHEL 9, but not supported by project dependencies |
|
|
||||||
| 3.10 | ✅ | Actively maintained |
|
|
||||||
| 3.11 | ✅ | Actively maintained; supported until June 2028 (Debian 12) |
|
|
||||||
| 3.12 | ✅ | Actively maintained; supported until May 2035 (RHEL 10) |
|
|
||||||
| 3.13 | ✅ | Actively maintained; supported until June 2030 (Debian 13) |
|
|
||||||
| 3.14 | ✅ | Actively maintained |
|
|
||||||
|
|||||||
11
build.sh
11
build.sh
@@ -9,19 +9,16 @@ fi
|
|||||||
. venv/bin/activate
|
. venv/bin/activate
|
||||||
pip install .[build]
|
pip install .[build]
|
||||||
ruff format .
|
ruff format .
|
||||||
|
ruff check .
|
||||||
cd docs
|
cd docs
|
||||||
make clean
|
make clean
|
||||||
make html
|
make html
|
||||||
touch build/html/.nojekyll
|
touch build/html/.nojekyll
|
||||||
if [ -d "../../parsedmarc-docs" ]; then
|
if [ -d "./../parsedmarc-docs" ]; then
|
||||||
cp -rf build/html/* ../../parsedmarc-docs/
|
cp -rf build/html/* ../../parsedmarc-docs/
|
||||||
fi
|
fi
|
||||||
cd ..
|
cd ..
|
||||||
cd parsedmarc/resources/maps
|
./sortmaps.py
|
||||||
python3 sortlists.py
|
|
||||||
echo "Checking for invalid UTF-8 bytes in base_reverse_dns_map.csv"
|
|
||||||
python3 find_bad_utf8.py base_reverse_dns_map.csv
|
|
||||||
cd ../../..
|
|
||||||
python3 tests.py
|
python3 tests.py
|
||||||
rm -rf dist/ build/
|
rm -rf dist/ build/
|
||||||
hatch build
|
hatch build
|
||||||
1
ci.ini
1
ci.ini
@@ -3,7 +3,6 @@ save_aggregate = True
|
|||||||
save_forensic = True
|
save_forensic = True
|
||||||
save_smtp_tls = True
|
save_smtp_tls = True
|
||||||
debug = True
|
debug = True
|
||||||
offline = True
|
|
||||||
|
|
||||||
[elasticsearch]
|
[elasticsearch]
|
||||||
hosts = http://localhost:9200
|
hosts = http://localhost:9200
|
||||||
|
|||||||
@@ -1,6 +1,8 @@
|
|||||||
|
version: '3.7'
|
||||||
|
|
||||||
services:
|
services:
|
||||||
elasticsearch:
|
elasticsearch:
|
||||||
image: docker.elastic.co/elasticsearch/elasticsearch:8.19.7
|
image: docker.elastic.co/elasticsearch/elasticsearch:8.3.1
|
||||||
environment:
|
environment:
|
||||||
- network.host=127.0.0.1
|
- network.host=127.0.0.1
|
||||||
- http.host=0.0.0.0
|
- http.host=0.0.0.0
|
||||||
@@ -12,7 +14,7 @@ services:
|
|||||||
- xpack.security.enabled=false
|
- xpack.security.enabled=false
|
||||||
- xpack.license.self_generated.type=basic
|
- xpack.license.self_generated.type=basic
|
||||||
ports:
|
ports:
|
||||||
- "127.0.0.1:9200:9200"
|
- 127.0.0.1:9200:9200
|
||||||
ulimits:
|
ulimits:
|
||||||
memlock:
|
memlock:
|
||||||
soft: -1
|
soft: -1
|
||||||
@@ -28,7 +30,7 @@ services:
|
|||||||
retries: 24
|
retries: 24
|
||||||
|
|
||||||
opensearch:
|
opensearch:
|
||||||
image: opensearchproject/opensearch:2
|
image: opensearchproject/opensearch:2.18.0
|
||||||
environment:
|
environment:
|
||||||
- network.host=127.0.0.1
|
- network.host=127.0.0.1
|
||||||
- http.host=0.0.0.0
|
- http.host=0.0.0.0
|
||||||
@@ -39,7 +41,7 @@ services:
|
|||||||
- bootstrap.memory_lock=true
|
- bootstrap.memory_lock=true
|
||||||
- OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_INITIAL_ADMIN_PASSWORD}
|
- OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_INITIAL_ADMIN_PASSWORD}
|
||||||
ports:
|
ports:
|
||||||
- "127.0.0.1:9201:9200"
|
- 127.0.0.1:9201:9200
|
||||||
ulimits:
|
ulimits:
|
||||||
memlock:
|
memlock:
|
||||||
soft: -1
|
soft: -1
|
||||||
|
|||||||
@@ -21,6 +21,7 @@
|
|||||||
:members:
|
:members:
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
## parsedmarc.splunk
|
## parsedmarc.splunk
|
||||||
|
|
||||||
```{eval-rst}
|
```{eval-rst}
|
||||||
@@ -28,13 +29,6 @@
|
|||||||
:members:
|
:members:
|
||||||
```
|
```
|
||||||
|
|
||||||
## parsedmarc.types
|
|
||||||
|
|
||||||
```{eval-rst}
|
|
||||||
.. automodule:: parsedmarc.types
|
|
||||||
:members:
|
|
||||||
```
|
|
||||||
|
|
||||||
## parsedmarc.utils
|
## parsedmarc.utils
|
||||||
|
|
||||||
```{eval-rst}
|
```{eval-rst}
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ from parsedmarc import __version__
|
|||||||
# -- Project information -----------------------------------------------------
|
# -- Project information -----------------------------------------------------
|
||||||
|
|
||||||
project = "parsedmarc"
|
project = "parsedmarc"
|
||||||
copyright = "2018 - 2025, Sean Whalen and contributors"
|
copyright = "2018 - 2023, Sean Whalen and contributors"
|
||||||
author = "Sean Whalen and contributors"
|
author = "Sean Whalen and contributors"
|
||||||
|
|
||||||
# The version info for the project you're documenting, acts as replacement for
|
# The version info for the project you're documenting, acts as replacement for
|
||||||
|
|||||||
@@ -33,36 +33,17 @@ and Valimail.
|
|||||||
|
|
||||||
## Features
|
## Features
|
||||||
|
|
||||||
- Parses draft and 1.0 standard aggregate/rua DMARC reports
|
- Parses draft and 1.0 standard aggregate/rua reports
|
||||||
- Parses forensic/failure/ruf DMARC reports
|
- Parses forensic/failure/ruf reports
|
||||||
- Parses reports from SMTP TLS Reporting
|
|
||||||
- Can parse reports from an inbox over IMAP, Microsoft Graph, or Gmail API
|
- Can parse reports from an inbox over IMAP, Microsoft Graph, or Gmail API
|
||||||
- Transparently handles gzip or zip compressed reports
|
- Transparently handles gzip or zip compressed reports
|
||||||
- Consistent data structures
|
- Consistent data structures
|
||||||
- Simple JSON and/or CSV output
|
- Simple JSON and/or CSV output
|
||||||
- Optionally email the results
|
- Optionally email the results
|
||||||
- Optionally send the results to Elasticsearch, Opensearch, and/or Splunk, for use
|
- Optionally send the results to Elasticsearch/OpenSearch and/or Splunk, for use with
|
||||||
with premade dashboards
|
premade dashboards
|
||||||
- Optionally send reports to Apache Kafka
|
- Optionally send reports to Apache Kafka
|
||||||
|
|
||||||
## Python Compatibility
|
|
||||||
|
|
||||||
This project supports the following Python versions, which are either actively maintained or are the default versions
|
|
||||||
for RHEL or Debian.
|
|
||||||
|
|
||||||
| Version | Supported | Reason |
|
|
||||||
|---------|-----------|------------------------------------------------------------|
|
|
||||||
| < 3.6 | ❌ | End of Life (EOL) |
|
|
||||||
| 3.6 | ❌ | Used in RHEL 8, but not supported by project dependencies |
|
|
||||||
| 3.7 | ❌ | End of Life (EOL) |
|
|
||||||
| 3.8 | ❌ | End of Life (EOL) |
|
|
||||||
| 3.9 | ❌ | Used in Debian 11 and RHEL 9, but not supported by project dependencies |
|
|
||||||
| 3.10 | ✅ | Actively maintained |
|
|
||||||
| 3.11 | ✅ | Actively maintained; supported until June 2028 (Debian 12) |
|
|
||||||
| 3.12 | ✅ | Actively maintained; supported until May 2035 (RHEL 10) |
|
|
||||||
| 3.13 | ✅ | Actively maintained; supported until June 2030 (Debian 13) |
|
|
||||||
| 3.14 | ✅ | Actively maintained |
|
|
||||||
|
|
||||||
```{toctree}
|
```{toctree}
|
||||||
:caption: 'Contents'
|
:caption: 'Contents'
|
||||||
:maxdepth: 2
|
:maxdepth: 2
|
||||||
|
|||||||
@@ -162,10 +162,10 @@ sudo -u parsedmarc virtualenv /opt/parsedmarc/venv
|
|||||||
```
|
```
|
||||||
|
|
||||||
CentOS/RHEL 8 systems use Python 3.6 by default, so on those systems
|
CentOS/RHEL 8 systems use Python 3.6 by default, so on those systems
|
||||||
explicitly tell `virtualenv` to use `python3.10` instead
|
explicitly tell `virtualenv` to use `python3.9` instead
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
sudo -u parsedmarc virtualenv -p python3.10 /opt/parsedmarc/venv
|
sudo -u parsedmarc virtualenv -p python3.9 /opt/parsedmarc/venv
|
||||||
```
|
```
|
||||||
|
|
||||||
Activate the virtualenv
|
Activate the virtualenv
|
||||||
@@ -199,7 +199,7 @@ sudo apt-get install libemail-outlook-message-perl
|
|||||||
[geoipupdate releases page on github]: https://github.com/maxmind/geoipupdate/releases
|
[geoipupdate releases page on github]: https://github.com/maxmind/geoipupdate/releases
|
||||||
[ip to country lite database]: https://db-ip.com/db/download/ip-to-country-lite
|
[ip to country lite database]: https://db-ip.com/db/download/ip-to-country-lite
|
||||||
[license keys]: https://www.maxmind.com/en/accounts/current/license-key
|
[license keys]: https://www.maxmind.com/en/accounts/current/license-key
|
||||||
[maxmind geoipupdate page]: https://dev.maxmind.com/geoip/updating-databases/
|
[maxmind geoipupdate page]: https://dev.maxmind.com/geoip/geoipupdate/
|
||||||
[maxmind geolite2 country database]: https://dev.maxmind.com/geoip/geolite2-free-geolocation-data
|
[maxmind geolite2 country database]: https://dev.maxmind.com/geoip/geolite2-free-geolocation-data
|
||||||
[registering for a free geolite2 account]: https://www.maxmind.com/en/geolite2/signup
|
[registering for a free geolite2 account]: https://www.maxmind.com/en/geolite2/signup
|
||||||
[to comply with various privacy regulations]: https://blog.maxmind.com/2019/12/18/significant-changes-to-accessing-and-using-geolite2-databases/
|
[to comply with various privacy regulations]: https://blog.maxmind.com/2019/12/18/significant-changes-to-accessing-and-using-geolite2-databases/
|
||||||
|
|||||||
@@ -23,8 +23,6 @@ of the report schema.
|
|||||||
"report_id": "9391651994964116463",
|
"report_id": "9391651994964116463",
|
||||||
"begin_date": "2012-04-27 20:00:00",
|
"begin_date": "2012-04-27 20:00:00",
|
||||||
"end_date": "2012-04-28 19:59:59",
|
"end_date": "2012-04-28 19:59:59",
|
||||||
"timespan_requires_normalization": false,
|
|
||||||
"original_timespan_seconds": 86399,
|
|
||||||
"errors": []
|
"errors": []
|
||||||
},
|
},
|
||||||
"policy_published": {
|
"policy_published": {
|
||||||
@@ -41,10 +39,8 @@ of the report schema.
|
|||||||
"source": {
|
"source": {
|
||||||
"ip_address": "72.150.241.94",
|
"ip_address": "72.150.241.94",
|
||||||
"country": "US",
|
"country": "US",
|
||||||
"reverse_dns": null,
|
"reverse_dns": "adsl-72-150-241-94.shv.bellsouth.net",
|
||||||
"base_domain": null,
|
"base_domain": "bellsouth.net"
|
||||||
"name": null,
|
|
||||||
"type": null
|
|
||||||
},
|
},
|
||||||
"count": 2,
|
"count": 2,
|
||||||
"alignment": {
|
"alignment": {
|
||||||
@@ -78,10 +74,7 @@ of the report schema.
|
|||||||
"result": "pass"
|
"result": "pass"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
}
|
||||||
"normalized_timespan": false,
|
|
||||||
"interval_begin": "2012-04-28 00:00:00",
|
|
||||||
"interval_end": "2012-04-28 23:59:59"
|
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
@@ -90,10 +83,8 @@ of the report schema.
|
|||||||
### CSV aggregate report
|
### CSV aggregate report
|
||||||
|
|
||||||
```text
|
```text
|
||||||
xml_schema,org_name,org_email,org_extra_contact_info,report_id,begin_date,end_date,normalized_timespan,errors,domain,adkim,aspf,p,sp,pct,fo,source_ip_address,source_country,source_reverse_dns,source_base_domain,source_name,source_type,count,spf_aligned,dkim_aligned,dmarc_aligned,disposition,policy_override_reasons,policy_override_comments,envelope_from,header_from,envelope_to,dkim_domains,dkim_selectors,dkim_results,spf_domains,spf_scopes,spf_results
|
xml_schema,org_name,org_email,org_extra_contact_info,report_id,begin_date,end_date,errors,domain,adkim,aspf,p,sp,pct,fo,source_ip_address,source_country,source_reverse_dns,source_base_domain,count,spf_aligned,dkim_aligned,dmarc_aligned,disposition,policy_override_reasons,policy_override_comments,envelope_from,header_from,envelope_to,dkim_domains,dkim_selectors,dkim_results,spf_domains,spf_scopes,spf_results
|
||||||
draft,acme.com,noreply-dmarc-support@acme.com,http://acme.com/dmarc/support,9391651994964116463,2012-04-28 00:00:00,2012-04-28 23:59:59,False,,example.com,r,r,none,none,100,0,72.150.241.94,US,,,,,2,True,False,True,none,,,example.com,example.com,,example.com,none,fail,example.com,mfrom,pass
|
draft,acme.com,noreply-dmarc-support@acme.com,http://acme.com/dmarc/support,9391651994964116463,2012-04-27 20:00:00,2012-04-28 19:59:59,,example.com,r,r,none,none,100,0,72.150.241.94,US,adsl-72-150-241-94.shv.bellsouth.net,bellsouth.net,2,True,False,True,none,,,example.com,example.com,,example.com,none,fail,example.com,mfrom,pass
|
||||||
draft,acme.com,noreply-dmarc-support@acme.com,http://acme.com/dmarc/support,9391651994964116463,2012-04-28 00:00:00,2012-04-28 23:59:59,False,,example.com,r,r,none,none,100,0,72.150.241.94,US,,,,,2,True,False,True,none,,,example.com,example.com,,example.com,none,fail,example.com,mfrom,pass
|
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
## Sample forensic report output
|
## Sample forensic report output
|
||||||
|
|||||||
@@ -4,50 +4,47 @@
|
|||||||
|
|
||||||
```text
|
```text
|
||||||
usage: parsedmarc [-h] [-c CONFIG_FILE] [--strip-attachment-payloads] [-o OUTPUT]
|
usage: parsedmarc [-h] [-c CONFIG_FILE] [--strip-attachment-payloads] [-o OUTPUT]
|
||||||
[--aggregate-json-filename AGGREGATE_JSON_FILENAME] [--forensic-json-filename FORENSIC_JSON_FILENAME]
|
[--aggregate-json-filename AGGREGATE_JSON_FILENAME]
|
||||||
[--smtp-tls-json-filename SMTP_TLS_JSON_FILENAME] [--aggregate-csv-filename AGGREGATE_CSV_FILENAME]
|
[--forensic-json-filename FORENSIC_JSON_FILENAME]
|
||||||
[--forensic-csv-filename FORENSIC_CSV_FILENAME] [--smtp-tls-csv-filename SMTP_TLS_CSV_FILENAME]
|
[--aggregate-csv-filename AGGREGATE_CSV_FILENAME]
|
||||||
[-n NAMESERVERS [NAMESERVERS ...]] [-t DNS_TIMEOUT] [--offline] [-s] [-w] [--verbose] [--debug]
|
[--forensic-csv-filename FORENSIC_CSV_FILENAME]
|
||||||
[--log-file LOG_FILE] [--no-prettify-json] [-v]
|
[-n NAMESERVERS [NAMESERVERS ...]] [-t DNS_TIMEOUT] [--offline]
|
||||||
[file_path ...]
|
[-s] [--verbose] [--debug] [--log-file LOG_FILE] [-v]
|
||||||
|
[file_path ...]
|
||||||
|
|
||||||
Parses DMARC reports
|
Parses DMARC reports
|
||||||
|
|
||||||
positional arguments:
|
positional arguments:
|
||||||
file_path one or more paths to aggregate or forensic report files, emails, or mbox files'
|
file_path one or more paths to aggregate or forensic report
|
||||||
|
files, emails, or mbox files'
|
||||||
|
|
||||||
options:
|
optional arguments:
|
||||||
-h, --help show this help message and exit
|
-h, --help show this help message and exit
|
||||||
-c CONFIG_FILE, --config-file CONFIG_FILE
|
-c CONFIG_FILE, --config-file CONFIG_FILE
|
||||||
a path to a configuration file (--silent implied)
|
a path to a configuration file (--silent implied)
|
||||||
--strip-attachment-payloads
|
--strip-attachment-payloads
|
||||||
remove attachment payloads from forensic report output
|
remove attachment payloads from forensic report output
|
||||||
-o OUTPUT, --output OUTPUT
|
-o OUTPUT, --output OUTPUT
|
||||||
write output files to the given directory
|
write output files to the given directory
|
||||||
--aggregate-json-filename AGGREGATE_JSON_FILENAME
|
--aggregate-json-filename AGGREGATE_JSON_FILENAME
|
||||||
filename for the aggregate JSON output file
|
filename for the aggregate JSON output file
|
||||||
--forensic-json-filename FORENSIC_JSON_FILENAME
|
--forensic-json-filename FORENSIC_JSON_FILENAME
|
||||||
filename for the forensic JSON output file
|
filename for the forensic JSON output file
|
||||||
--smtp-tls-json-filename SMTP_TLS_JSON_FILENAME
|
--aggregate-csv-filename AGGREGATE_CSV_FILENAME
|
||||||
filename for the SMTP TLS JSON output file
|
filename for the aggregate CSV output file
|
||||||
--aggregate-csv-filename AGGREGATE_CSV_FILENAME
|
--forensic-csv-filename FORENSIC_CSV_FILENAME
|
||||||
filename for the aggregate CSV output file
|
filename for the forensic CSV output file
|
||||||
--forensic-csv-filename FORENSIC_CSV_FILENAME
|
-n NAMESERVERS [NAMESERVERS ...], --nameservers NAMESERVERS [NAMESERVERS ...]
|
||||||
filename for the forensic CSV output file
|
nameservers to query
|
||||||
--smtp-tls-csv-filename SMTP_TLS_CSV_FILENAME
|
-t DNS_TIMEOUT, --dns_timeout DNS_TIMEOUT
|
||||||
filename for the SMTP TLS CSV output file
|
number of seconds to wait for an answer from DNS
|
||||||
-n NAMESERVERS [NAMESERVERS ...], --nameservers NAMESERVERS [NAMESERVERS ...]
|
(default: 2.0)
|
||||||
nameservers to query
|
--offline do not make online queries for geolocation or DNS
|
||||||
-t DNS_TIMEOUT, --dns_timeout DNS_TIMEOUT
|
-s, --silent only print errors and warnings
|
||||||
number of seconds to wait for an answer from DNS (default: 2.0)
|
--verbose more verbose output
|
||||||
--offline do not make online queries for geolocation or DNS
|
--debug print debugging information
|
||||||
-s, --silent only print errors
|
--log-file LOG_FILE output logging to a file
|
||||||
-w, --warnings print warnings in addition to errors
|
-v, --version show program's version number and exit
|
||||||
--verbose more verbose output
|
|
||||||
--debug print debugging information
|
|
||||||
--log-file LOG_FILE output logging to a file
|
|
||||||
--no-prettify-json output JSON in a single line without indentation
|
|
||||||
-v, --version show program's version number and exit
|
|
||||||
```
|
```
|
||||||
|
|
||||||
:::{note}
|
:::{note}
|
||||||
@@ -123,10 +120,8 @@ The full set of configuration options are:
|
|||||||
Elasticsearch, Splunk and/or S3
|
Elasticsearch, Splunk and/or S3
|
||||||
- `save_smtp_tls` - bool: Save SMTP-STS report data to
|
- `save_smtp_tls` - bool: Save SMTP-STS report data to
|
||||||
Elasticsearch, Splunk and/or S3
|
Elasticsearch, Splunk and/or S3
|
||||||
- `index_prefix_domain_map` - bool: A path mapping of Opensearch/Elasticsearch index prefixes to domain names
|
|
||||||
- `strip_attachment_payloads` - bool: Remove attachment
|
- `strip_attachment_payloads` - bool: Remove attachment
|
||||||
payloads from results
|
payloads from results
|
||||||
- `silent` - bool: Set this to `False` to output results to STDOUT
|
|
||||||
- `output` - str: Directory to place JSON and CSV files in. This is required if you set either of the JSON output file options.
|
- `output` - str: Directory to place JSON and CSV files in. This is required if you set either of the JSON output file options.
|
||||||
- `aggregate_json_filename` - str: filename for the aggregate
|
- `aggregate_json_filename` - str: filename for the aggregate
|
||||||
JSON output file
|
JSON output file
|
||||||
@@ -171,8 +166,8 @@ The full set of configuration options are:
|
|||||||
- `check_timeout` - int: Number of seconds to wait for a IMAP
|
- `check_timeout` - int: Number of seconds to wait for a IMAP
|
||||||
IDLE response or the number of seconds until the next
|
IDLE response or the number of seconds until the next
|
||||||
mail check (Default: `30`)
|
mail check (Default: `30`)
|
||||||
- `since` - str: Search for messages since certain time. (Examples: `5m|3h|2d|1w`)
|
- `since` - str: Search for messages since certain time. (Examples: `5m|3h|2d|1w`)
|
||||||
Acceptable units - {"m":"minutes", "h":"hours", "d":"days", "w":"weeks"}.
|
Acceptable units - {"m":"minutes", "h":"hours", "d":"days", "w":"weeks"}).
|
||||||
Defaults to `1d` if incorrect value is provided.
|
Defaults to `1d` if incorrect value is provided.
|
||||||
- `imap`
|
- `imap`
|
||||||
- `host` - str: The IMAP server hostname or IP address
|
- `host` - str: The IMAP server hostname or IP address
|
||||||
@@ -240,7 +235,7 @@ The full set of configuration options are:
|
|||||||
group and use that as the group id.
|
group and use that as the group id.
|
||||||
|
|
||||||
```powershell
|
```powershell
|
||||||
New-ApplicationAccessPolicy -AccessRight RestrictAccess
|
New-ApplicationAccessPolicy -AccessRight RestrictAccess
|
||||||
-AppId "<CLIENT_ID>" -PolicyScopeGroupId "<MAILBOX>"
|
-AppId "<CLIENT_ID>" -PolicyScopeGroupId "<MAILBOX>"
|
||||||
-Description "Restrict access to dmarc reports mailbox."
|
-Description "Restrict access to dmarc reports mailbox."
|
||||||
```
|
```
|
||||||
@@ -257,7 +252,7 @@ The full set of configuration options are:
|
|||||||
:::
|
:::
|
||||||
- `user` - str: Basic auth username
|
- `user` - str: Basic auth username
|
||||||
- `password` - str: Basic auth password
|
- `password` - str: Basic auth password
|
||||||
- `api_key` - str: API key
|
- `apiKey` - str: API key
|
||||||
- `ssl` - bool: Use an encrypted SSL/TLS connection
|
- `ssl` - bool: Use an encrypted SSL/TLS connection
|
||||||
(Default: `True`)
|
(Default: `True`)
|
||||||
- `timeout` - float: Timeout in seconds (Default: 60)
|
- `timeout` - float: Timeout in seconds (Default: 60)
|
||||||
@@ -280,7 +275,7 @@ The full set of configuration options are:
|
|||||||
:::
|
:::
|
||||||
- `user` - str: Basic auth username
|
- `user` - str: Basic auth username
|
||||||
- `password` - str: Basic auth password
|
- `password` - str: Basic auth password
|
||||||
- `api_key` - str: API key
|
- `apiKey` - str: API key
|
||||||
- `ssl` - bool: Use an encrypted SSL/TLS connection
|
- `ssl` - bool: Use an encrypted SSL/TLS connection
|
||||||
(Default: `True`)
|
(Default: `True`)
|
||||||
- `timeout` - float: Timeout in seconds (Default: 60)
|
- `timeout` - float: Timeout in seconds (Default: 60)
|
||||||
@@ -336,65 +331,13 @@ The full set of configuration options are:
|
|||||||
- `secret_access_key` - str: The secret access key (Optional)
|
- `secret_access_key` - str: The secret access key (Optional)
|
||||||
- `syslog`
|
- `syslog`
|
||||||
- `server` - str: The Syslog server name or IP address
|
- `server` - str: The Syslog server name or IP address
|
||||||
- `port` - int: The port to use (Default: `514`)
|
- `port` - int: The UDP port to use (Default: `514`)
|
||||||
- `protocol` - str: The protocol to use: `udp`, `tcp`, or `tls` (Default: `udp`)
|
|
||||||
- `cafile_path` - str: Path to CA certificate file for TLS server verification (Optional)
|
|
||||||
- `certfile_path` - str: Path to client certificate file for TLS authentication (Optional)
|
|
||||||
- `keyfile_path` - str: Path to client private key file for TLS authentication (Optional)
|
|
||||||
- `timeout` - float: Connection timeout in seconds for TCP/TLS (Default: `5.0`)
|
|
||||||
- `retry_attempts` - int: Number of retry attempts for failed connections (Default: `3`)
|
|
||||||
- `retry_delay` - int: Delay in seconds between retry attempts (Default: `5`)
|
|
||||||
|
|
||||||
**Example UDP configuration (default):**
|
|
||||||
|
|
||||||
```ini
|
|
||||||
[syslog]
|
|
||||||
server = syslog.example.com
|
|
||||||
port = 514
|
|
||||||
```
|
|
||||||
|
|
||||||
**Example TCP configuration:**
|
|
||||||
|
|
||||||
```ini
|
|
||||||
[syslog]
|
|
||||||
server = syslog.example.com
|
|
||||||
port = 6514
|
|
||||||
protocol = tcp
|
|
||||||
timeout = 10.0
|
|
||||||
retry_attempts = 5
|
|
||||||
```
|
|
||||||
|
|
||||||
**Example TLS configuration with server verification:**
|
|
||||||
|
|
||||||
```ini
|
|
||||||
[syslog]
|
|
||||||
server = syslog.example.com
|
|
||||||
port = 6514
|
|
||||||
protocol = tls
|
|
||||||
cafile_path = /path/to/ca-cert.pem
|
|
||||||
timeout = 10.0
|
|
||||||
```
|
|
||||||
|
|
||||||
**Example TLS configuration with mutual authentication:**
|
|
||||||
|
|
||||||
```ini
|
|
||||||
[syslog]
|
|
||||||
server = syslog.example.com
|
|
||||||
port = 6514
|
|
||||||
protocol = tls
|
|
||||||
cafile_path = /path/to/ca-cert.pem
|
|
||||||
certfile_path = /path/to/client-cert.pem
|
|
||||||
keyfile_path = /path/to/client-key.pem
|
|
||||||
timeout = 10.0
|
|
||||||
retry_attempts = 3
|
|
||||||
retry_delay = 5
|
|
||||||
```
|
|
||||||
- `gmail_api`
|
- `gmail_api`
|
||||||
- `credentials_file` - str: Path to file containing the
|
- `credentials_file` - str: Path to file containing the
|
||||||
credentials, None to disable (Default: `None`)
|
credentials, None to disable (Default: `None`)
|
||||||
- `token_file` - str: Path to save the token file
|
- `token_file` - str: Path to save the token file
|
||||||
(Default: `.token`)
|
(Default: `.token`)
|
||||||
|
|
||||||
:::{note}
|
:::{note}
|
||||||
credentials_file and token_file can be got with [quickstart](https://developers.google.com/gmail/api/quickstart/python).Please change the scope to `https://www.googleapis.com/auth/gmail.modify`.
|
credentials_file and token_file can be got with [quickstart](https://developers.google.com/gmail/api/quickstart/python).Please change the scope to `https://www.googleapis.com/auth/gmail.modify`.
|
||||||
:::
|
:::
|
||||||
@@ -426,7 +369,7 @@ The full set of configuration options are:
|
|||||||
- `mode` - str: The GELF transport type to use. Valid modes: `tcp`, `udp`, `tls`
|
- `mode` - str: The GELF transport type to use. Valid modes: `tcp`, `udp`, `tls`
|
||||||
|
|
||||||
- `maildir`
|
- `maildir`
|
||||||
- `maildir_path` - str: Full path for mailbox maidir location (Default: `INBOX`)
|
- `reports_folder` - str: Full path for mailbox maidir location (Default: `INBOX`)
|
||||||
- `maildir_create` - bool: Create maildir if not present (Default: False)
|
- `maildir_create` - bool: Create maildir if not present (Default: False)
|
||||||
|
|
||||||
- `webhook` - Post the individual reports to a webhook url with the report as the JSON body
|
- `webhook` - Post the individual reports to a webhook url with the report as the JSON body
|
||||||
@@ -494,7 +437,7 @@ Update the limit to 2k per example:
|
|||||||
PUT _cluster/settings
|
PUT _cluster/settings
|
||||||
{
|
{
|
||||||
"persistent" : {
|
"persistent" : {
|
||||||
"cluster.max_shards_per_node" : 2000
|
"cluster.max_shards_per_node" : 2000
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
@@ -502,28 +445,6 @@ PUT _cluster/settings
|
|||||||
Increasing this value increases resource usage.
|
Increasing this value increases resource usage.
|
||||||
:::
|
:::
|
||||||
|
|
||||||
## Multi-tenant support
|
|
||||||
|
|
||||||
Starting in `8.19.0`, ParseDMARC provides multi-tenant support by placing data into separate OpenSearch or Elasticsearch index prefixes. To set this up, create a YAML file that is formatted where each key is a tenant name, and the value is a list of domains related to that tenant, not including subdomains, like this:
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
example:
|
|
||||||
- example.com
|
|
||||||
- example.net
|
|
||||||
- example.org
|
|
||||||
|
|
||||||
whalensolutions:
|
|
||||||
- whalensolutions.com
|
|
||||||
```
|
|
||||||
|
|
||||||
Save it to disk where the user running ParseDMARC can read it, then set `index_prefix_domain_map` to that filepath in the `[general]` section of the ParseDMARC configuration file and do not set an `index_prefix` option in the `[elasticsearch]` or `[opensearch]` sections.
|
|
||||||
|
|
||||||
When configured correctly, if ParseDMARC finds that a report is related to a domain in the mapping, the report will be saved in an index name that has the tenant name prefixed to it with a trailing underscore. Then, you can use the security features of Opensearch or the ELK stack to only grant users access to the indexes that they need.
|
|
||||||
|
|
||||||
:::{note}
|
|
||||||
A domain cannot be used in multiple tenant lists. Only the first prefix list that contains the matching domain is used.
|
|
||||||
:::
|
|
||||||
|
|
||||||
## Running parsedmarc as a systemd service
|
## Running parsedmarc as a systemd service
|
||||||
|
|
||||||
Use systemd to run `parsedmarc` as a service and process reports as
|
Use systemd to run `parsedmarc` as a service and process reports as
|
||||||
|
|||||||
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
@@ -3,55 +3,53 @@
|
|||||||
|
|
||||||
"""A CLI for parsing DMARC reports"""
|
"""A CLI for parsing DMARC reports"""
|
||||||
|
|
||||||
import http.client
|
from argparse import Namespace, ArgumentParser
|
||||||
import json
|
|
||||||
import logging
|
|
||||||
import os
|
import os
|
||||||
import sys
|
|
||||||
from argparse import ArgumentParser, Namespace
|
|
||||||
from configparser import ConfigParser
|
from configparser import ConfigParser
|
||||||
from glob import glob
|
from glob import glob
|
||||||
from multiprocessing import Pipe, Process
|
import logging
|
||||||
|
import math
|
||||||
|
from collections import OrderedDict
|
||||||
|
import json
|
||||||
from ssl import CERT_NONE, create_default_context
|
from ssl import CERT_NONE, create_default_context
|
||||||
|
from multiprocessing import Pipe, Process
|
||||||
import yaml
|
import sys
|
||||||
|
import http.client
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
from parsedmarc import (
|
from parsedmarc import (
|
||||||
SEEN_AGGREGATE_REPORT_IDS,
|
get_dmarc_reports_from_mailbox,
|
||||||
InvalidDMARCReport,
|
watch_inbox,
|
||||||
|
parse_report_file,
|
||||||
|
get_dmarc_reports_from_mbox,
|
||||||
|
elastic,
|
||||||
|
opensearch,
|
||||||
|
kafkaclient,
|
||||||
|
splunk,
|
||||||
|
save_output,
|
||||||
|
email_results,
|
||||||
ParserError,
|
ParserError,
|
||||||
__version__,
|
__version__,
|
||||||
elastic,
|
InvalidDMARCReport,
|
||||||
email_results,
|
|
||||||
gelf,
|
|
||||||
get_dmarc_reports_from_mailbox,
|
|
||||||
get_dmarc_reports_from_mbox,
|
|
||||||
kafkaclient,
|
|
||||||
loganalytics,
|
|
||||||
opensearch,
|
|
||||||
parse_report_file,
|
|
||||||
s3,
|
s3,
|
||||||
save_output,
|
|
||||||
splunk,
|
|
||||||
syslog,
|
syslog,
|
||||||
watch_inbox,
|
loganalytics,
|
||||||
|
gelf,
|
||||||
webhook,
|
webhook,
|
||||||
)
|
)
|
||||||
from parsedmarc.log import logger
|
|
||||||
from parsedmarc.mail import (
|
from parsedmarc.mail import (
|
||||||
GmailConnection,
|
|
||||||
IMAPConnection,
|
IMAPConnection,
|
||||||
MaildirConnection,
|
|
||||||
MSGraphConnection,
|
MSGraphConnection,
|
||||||
|
GmailConnection,
|
||||||
|
MaildirConnection,
|
||||||
)
|
)
|
||||||
from parsedmarc.mail.graph import AuthMethod
|
from parsedmarc.mail.graph import AuthMethod
|
||||||
from parsedmarc.types import ParsingResults
|
|
||||||
from parsedmarc.utils import get_base_domain, get_reverse_dns, is_mbox
|
|
||||||
|
|
||||||
# Increase the max header limit for very large emails. `_MAXHEADERS` is a
|
from parsedmarc.log import logger
|
||||||
# private stdlib attribute and may not exist in type stubs.
|
from parsedmarc.utils import is_mbox, get_reverse_dns
|
||||||
setattr(http.client, "_MAXHEADERS", 200)
|
from parsedmarc import SEEN_AGGREGATE_REPORT_IDS
|
||||||
|
|
||||||
|
http.client._MAXHEADERS = 200 # pylint:disable=protected-access
|
||||||
|
|
||||||
formatter = logging.Formatter(
|
formatter = logging.Formatter(
|
||||||
fmt="%(levelname)8s:%(filename)s:%(lineno)d:%(message)s",
|
fmt="%(levelname)8s:%(filename)s:%(lineno)d:%(message)s",
|
||||||
@@ -68,48 +66,6 @@ def _str_to_list(s):
|
|||||||
return list(map(lambda i: i.lstrip(), _list))
|
return list(map(lambda i: i.lstrip(), _list))
|
||||||
|
|
||||||
|
|
||||||
def _configure_logging(log_level, log_file=None):
|
|
||||||
"""
|
|
||||||
Configure logging for the current process.
|
|
||||||
This is needed for child processes to properly log messages.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
log_level: The logging level (e.g., logging.DEBUG, logging.WARNING)
|
|
||||||
log_file: Optional path to log file
|
|
||||||
"""
|
|
||||||
# Get the logger
|
|
||||||
from parsedmarc.log import logger
|
|
||||||
|
|
||||||
# Set the log level
|
|
||||||
logger.setLevel(log_level)
|
|
||||||
|
|
||||||
# Add StreamHandler with formatter if not already present
|
|
||||||
# Check if we already have a StreamHandler to avoid duplicates
|
|
||||||
# Use exact type check to distinguish from FileHandler subclass
|
|
||||||
has_stream_handler = any(type(h) is logging.StreamHandler for h in logger.handlers)
|
|
||||||
|
|
||||||
if not has_stream_handler:
|
|
||||||
formatter = logging.Formatter(
|
|
||||||
fmt="%(levelname)8s:%(filename)s:%(lineno)d:%(message)s",
|
|
||||||
datefmt="%Y-%m-%d:%H:%M:%S",
|
|
||||||
)
|
|
||||||
handler = logging.StreamHandler()
|
|
||||||
handler.setFormatter(formatter)
|
|
||||||
logger.addHandler(handler)
|
|
||||||
|
|
||||||
# Add FileHandler if log_file is specified
|
|
||||||
if log_file:
|
|
||||||
try:
|
|
||||||
fh = logging.FileHandler(log_file, "a")
|
|
||||||
formatter = logging.Formatter(
|
|
||||||
"%(asctime)s - %(levelname)s - [%(filename)s:%(lineno)d] - %(message)s"
|
|
||||||
)
|
|
||||||
fh.setFormatter(formatter)
|
|
||||||
logger.addHandler(fh)
|
|
||||||
except (IOError, OSError, PermissionError) as error:
|
|
||||||
logger.warning("Unable to write to log file: {}".format(error))
|
|
||||||
|
|
||||||
|
|
||||||
def cli_parse(
|
def cli_parse(
|
||||||
file_path,
|
file_path,
|
||||||
sa,
|
sa,
|
||||||
@@ -120,31 +76,9 @@ def cli_parse(
|
|||||||
always_use_local_files,
|
always_use_local_files,
|
||||||
reverse_dns_map_path,
|
reverse_dns_map_path,
|
||||||
reverse_dns_map_url,
|
reverse_dns_map_url,
|
||||||
normalize_timespan_threshold_hours,
|
|
||||||
conn,
|
conn,
|
||||||
log_level=logging.ERROR,
|
|
||||||
log_file=None,
|
|
||||||
):
|
):
|
||||||
"""Separated this function for multiprocessing
|
"""Separated this function for multiprocessing"""
|
||||||
|
|
||||||
Args:
|
|
||||||
file_path: Path to the report file
|
|
||||||
sa: Strip attachment payloads flag
|
|
||||||
nameservers: List of nameservers
|
|
||||||
dns_timeout: DNS timeout
|
|
||||||
ip_db_path: Path to IP database
|
|
||||||
offline: Offline mode flag
|
|
||||||
always_use_local_files: Always use local files flag
|
|
||||||
reverse_dns_map_path: Path to reverse DNS map
|
|
||||||
reverse_dns_map_url: URL to reverse DNS map
|
|
||||||
normalize_timespan_threshold_hours: Timespan threshold
|
|
||||||
conn: Pipe connection for IPC
|
|
||||||
log_level: Logging level for this process
|
|
||||||
log_file: Optional path to log file
|
|
||||||
"""
|
|
||||||
# Configure logging in this child process
|
|
||||||
_configure_logging(log_level, log_file)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
file_results = parse_report_file(
|
file_results = parse_report_file(
|
||||||
file_path,
|
file_path,
|
||||||
@@ -156,7 +90,6 @@ def cli_parse(
|
|||||||
nameservers=nameservers,
|
nameservers=nameservers,
|
||||||
dns_timeout=dns_timeout,
|
dns_timeout=dns_timeout,
|
||||||
strip_attachment_payloads=sa,
|
strip_attachment_payloads=sa,
|
||||||
normalize_timespan_threshold_hours=normalize_timespan_threshold_hours,
|
|
||||||
)
|
)
|
||||||
conn.send([file_results, file_path])
|
conn.send([file_results, file_path])
|
||||||
except ParserError as error:
|
except ParserError as error:
|
||||||
@@ -168,42 +101,14 @@ def cli_parse(
|
|||||||
def _main():
|
def _main():
|
||||||
"""Called when the module is executed"""
|
"""Called when the module is executed"""
|
||||||
|
|
||||||
def get_index_prefix(report):
|
|
||||||
domain = None
|
|
||||||
if index_prefix_domain_map is None:
|
|
||||||
return None
|
|
||||||
if "policy_published" in report:
|
|
||||||
domain = report["policy_published"]["domain"]
|
|
||||||
elif "reported_domain" in report:
|
|
||||||
domain = report("reported_domain")
|
|
||||||
elif "policies" in report:
|
|
||||||
domain = report["policies"][0]["domain"]
|
|
||||||
if domain:
|
|
||||||
domain = get_base_domain(domain)
|
|
||||||
for prefix in index_prefix_domain_map:
|
|
||||||
if domain in index_prefix_domain_map[prefix]:
|
|
||||||
prefix = (
|
|
||||||
prefix.lower()
|
|
||||||
.strip()
|
|
||||||
.strip("_")
|
|
||||||
.replace(" ", "_")
|
|
||||||
.replace("-", "_")
|
|
||||||
)
|
|
||||||
prefix = f"{prefix}_"
|
|
||||||
return prefix
|
|
||||||
return None
|
|
||||||
|
|
||||||
def process_reports(reports_):
|
def process_reports(reports_):
|
||||||
indent_value = 2 if opts.prettify_json else None
|
output_str = "{0}\n".format(json.dumps(reports_, ensure_ascii=False, indent=2))
|
||||||
output_str = "{0}\n".format(
|
|
||||||
json.dumps(reports_, ensure_ascii=False, indent=indent_value)
|
|
||||||
)
|
|
||||||
|
|
||||||
if not opts.silent:
|
if not opts.silent:
|
||||||
print(output_str)
|
print(output_str)
|
||||||
if opts.output:
|
if opts.output:
|
||||||
save_output(
|
save_output(
|
||||||
reports_,
|
results,
|
||||||
output_directory=opts.output,
|
output_directory=opts.output,
|
||||||
aggregate_json_filename=opts.aggregate_json_filename,
|
aggregate_json_filename=opts.aggregate_json_filename,
|
||||||
forensic_json_filename=opts.forensic_json_filename,
|
forensic_json_filename=opts.forensic_json_filename,
|
||||||
@@ -221,8 +126,7 @@ def _main():
|
|||||||
elastic.save_aggregate_report_to_elasticsearch(
|
elastic.save_aggregate_report_to_elasticsearch(
|
||||||
report,
|
report,
|
||||||
index_suffix=opts.elasticsearch_index_suffix,
|
index_suffix=opts.elasticsearch_index_suffix,
|
||||||
index_prefix=opts.elasticsearch_index_prefix
|
index_prefix=opts.elasticsearch_index_prefix,
|
||||||
or get_index_prefix(report),
|
|
||||||
monthly_indexes=opts.elasticsearch_monthly_indexes,
|
monthly_indexes=opts.elasticsearch_monthly_indexes,
|
||||||
number_of_shards=shards,
|
number_of_shards=shards,
|
||||||
number_of_replicas=replicas,
|
number_of_replicas=replicas,
|
||||||
@@ -243,8 +147,7 @@ def _main():
|
|||||||
opensearch.save_aggregate_report_to_opensearch(
|
opensearch.save_aggregate_report_to_opensearch(
|
||||||
report,
|
report,
|
||||||
index_suffix=opts.opensearch_index_suffix,
|
index_suffix=opts.opensearch_index_suffix,
|
||||||
index_prefix=opts.opensearch_index_prefix
|
index_prefix=opts.opensearch_index_prefix,
|
||||||
or get_index_prefix(report),
|
|
||||||
monthly_indexes=opts.opensearch_monthly_indexes,
|
monthly_indexes=opts.opensearch_monthly_indexes,
|
||||||
number_of_shards=shards,
|
number_of_shards=shards,
|
||||||
number_of_replicas=replicas,
|
number_of_replicas=replicas,
|
||||||
@@ -286,9 +189,8 @@ def _main():
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
if opts.webhook_aggregate_url:
|
if opts.webhook_aggregate_url:
|
||||||
indent_value = 2 if opts.prettify_json else None
|
|
||||||
webhook_client.save_aggregate_report_to_webhook(
|
webhook_client.save_aggregate_report_to_webhook(
|
||||||
json.dumps(report, ensure_ascii=False, indent=indent_value)
|
json.dumps(report, ensure_ascii=False, indent=2)
|
||||||
)
|
)
|
||||||
except Exception as error_:
|
except Exception as error_:
|
||||||
logger.error("Webhook Error: {0}".format(error_.__str__()))
|
logger.error("Webhook Error: {0}".format(error_.__str__()))
|
||||||
@@ -310,8 +212,7 @@ def _main():
|
|||||||
elastic.save_forensic_report_to_elasticsearch(
|
elastic.save_forensic_report_to_elasticsearch(
|
||||||
report,
|
report,
|
||||||
index_suffix=opts.elasticsearch_index_suffix,
|
index_suffix=opts.elasticsearch_index_suffix,
|
||||||
index_prefix=opts.elasticsearch_index_prefix
|
index_prefix=opts.elasticsearch_index_prefix,
|
||||||
or get_index_prefix(report),
|
|
||||||
monthly_indexes=opts.elasticsearch_monthly_indexes,
|
monthly_indexes=opts.elasticsearch_monthly_indexes,
|
||||||
number_of_shards=shards,
|
number_of_shards=shards,
|
||||||
number_of_replicas=replicas,
|
number_of_replicas=replicas,
|
||||||
@@ -330,8 +231,7 @@ def _main():
|
|||||||
opensearch.save_forensic_report_to_opensearch(
|
opensearch.save_forensic_report_to_opensearch(
|
||||||
report,
|
report,
|
||||||
index_suffix=opts.opensearch_index_suffix,
|
index_suffix=opts.opensearch_index_suffix,
|
||||||
index_prefix=opts.opensearch_index_prefix
|
index_prefix=opts.opensearch_index_prefix,
|
||||||
or get_index_prefix(report),
|
|
||||||
monthly_indexes=opts.opensearch_monthly_indexes,
|
monthly_indexes=opts.opensearch_monthly_indexes,
|
||||||
number_of_shards=shards,
|
number_of_shards=shards,
|
||||||
number_of_replicas=replicas,
|
number_of_replicas=replicas,
|
||||||
@@ -371,9 +271,8 @@ def _main():
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
if opts.webhook_forensic_url:
|
if opts.webhook_forensic_url:
|
||||||
indent_value = 2 if opts.prettify_json else None
|
|
||||||
webhook_client.save_forensic_report_to_webhook(
|
webhook_client.save_forensic_report_to_webhook(
|
||||||
json.dumps(report, ensure_ascii=False, indent=indent_value)
|
json.dumps(report, ensure_ascii=False, indent=2)
|
||||||
)
|
)
|
||||||
except Exception as error_:
|
except Exception as error_:
|
||||||
logger.error("Webhook Error: {0}".format(error_.__str__()))
|
logger.error("Webhook Error: {0}".format(error_.__str__()))
|
||||||
@@ -395,8 +294,7 @@ def _main():
|
|||||||
elastic.save_smtp_tls_report_to_elasticsearch(
|
elastic.save_smtp_tls_report_to_elasticsearch(
|
||||||
report,
|
report,
|
||||||
index_suffix=opts.elasticsearch_index_suffix,
|
index_suffix=opts.elasticsearch_index_suffix,
|
||||||
index_prefix=opts.elasticsearch_index_prefix
|
index_prefix=opts.elasticsearch_index_prefix,
|
||||||
or get_index_prefix(report),
|
|
||||||
monthly_indexes=opts.elasticsearch_monthly_indexes,
|
monthly_indexes=opts.elasticsearch_monthly_indexes,
|
||||||
number_of_shards=shards,
|
number_of_shards=shards,
|
||||||
number_of_replicas=replicas,
|
number_of_replicas=replicas,
|
||||||
@@ -415,8 +313,7 @@ def _main():
|
|||||||
opensearch.save_smtp_tls_report_to_opensearch(
|
opensearch.save_smtp_tls_report_to_opensearch(
|
||||||
report,
|
report,
|
||||||
index_suffix=opts.opensearch_index_suffix,
|
index_suffix=opts.opensearch_index_suffix,
|
||||||
index_prefix=opts.opensearch_index_prefix
|
index_prefix=opts.opensearch_index_prefix,
|
||||||
or get_index_prefix(report),
|
|
||||||
monthly_indexes=opts.opensearch_monthly_indexes,
|
monthly_indexes=opts.opensearch_monthly_indexes,
|
||||||
number_of_shards=shards,
|
number_of_shards=shards,
|
||||||
number_of_replicas=replicas,
|
number_of_replicas=replicas,
|
||||||
@@ -456,9 +353,8 @@ def _main():
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
if opts.webhook_smtp_tls_url:
|
if opts.webhook_smtp_tls_url:
|
||||||
indent_value = 2 if opts.prettify_json else None
|
|
||||||
webhook_client.save_smtp_tls_report_to_webhook(
|
webhook_client.save_smtp_tls_report_to_webhook(
|
||||||
json.dumps(report, ensure_ascii=False, indent=indent_value)
|
json.dumps(report, ensure_ascii=False, indent=2)
|
||||||
)
|
)
|
||||||
except Exception as error_:
|
except Exception as error_:
|
||||||
logger.error("Webhook Error: {0}".format(error_.__str__()))
|
logger.error("Webhook Error: {0}".format(error_.__str__()))
|
||||||
@@ -579,12 +475,6 @@ def _main():
|
|||||||
"--debug", action="store_true", help="print debugging information"
|
"--debug", action="store_true", help="print debugging information"
|
||||||
)
|
)
|
||||||
arg_parser.add_argument("--log-file", default=None, help="output logging to a file")
|
arg_parser.add_argument("--log-file", default=None, help="output logging to a file")
|
||||||
arg_parser.add_argument(
|
|
||||||
"--no-prettify-json",
|
|
||||||
action="store_false",
|
|
||||||
dest="prettify_json",
|
|
||||||
help="output JSON in a single line without indentation",
|
|
||||||
)
|
|
||||||
arg_parser.add_argument("-v", "--version", action="version", version=__version__)
|
arg_parser.add_argument("-v", "--version", action="version", version=__version__)
|
||||||
|
|
||||||
aggregate_reports = []
|
aggregate_reports = []
|
||||||
@@ -614,7 +504,6 @@ def _main():
|
|||||||
dns_timeout=args.dns_timeout,
|
dns_timeout=args.dns_timeout,
|
||||||
debug=args.debug,
|
debug=args.debug,
|
||||||
verbose=args.verbose,
|
verbose=args.verbose,
|
||||||
prettify_json=args.prettify_json,
|
|
||||||
save_aggregate=False,
|
save_aggregate=False,
|
||||||
save_forensic=False,
|
save_forensic=False,
|
||||||
save_smtp_tls=False,
|
save_smtp_tls=False,
|
||||||
@@ -658,7 +547,7 @@ def _main():
|
|||||||
elasticsearch_monthly_indexes=False,
|
elasticsearch_monthly_indexes=False,
|
||||||
elasticsearch_username=None,
|
elasticsearch_username=None,
|
||||||
elasticsearch_password=None,
|
elasticsearch_password=None,
|
||||||
elasticsearch_api_key=None,
|
elasticsearch_apiKey=None,
|
||||||
opensearch_hosts=None,
|
opensearch_hosts=None,
|
||||||
opensearch_timeout=60,
|
opensearch_timeout=60,
|
||||||
opensearch_number_of_shards=1,
|
opensearch_number_of_shards=1,
|
||||||
@@ -670,7 +559,7 @@ def _main():
|
|||||||
opensearch_monthly_indexes=False,
|
opensearch_monthly_indexes=False,
|
||||||
opensearch_username=None,
|
opensearch_username=None,
|
||||||
opensearch_password=None,
|
opensearch_password=None,
|
||||||
opensearch_api_key=None,
|
opensearch_apiKey=None,
|
||||||
kafka_hosts=None,
|
kafka_hosts=None,
|
||||||
kafka_username=None,
|
kafka_username=None,
|
||||||
kafka_password=None,
|
kafka_password=None,
|
||||||
@@ -697,13 +586,6 @@ def _main():
|
|||||||
s3_secret_access_key=None,
|
s3_secret_access_key=None,
|
||||||
syslog_server=None,
|
syslog_server=None,
|
||||||
syslog_port=None,
|
syslog_port=None,
|
||||||
syslog_protocol=None,
|
|
||||||
syslog_cafile_path=None,
|
|
||||||
syslog_certfile_path=None,
|
|
||||||
syslog_keyfile_path=None,
|
|
||||||
syslog_timeout=None,
|
|
||||||
syslog_retry_attempts=None,
|
|
||||||
syslog_retry_delay=None,
|
|
||||||
gmail_api_credentials_file=None,
|
gmail_api_credentials_file=None,
|
||||||
gmail_api_token_file=None,
|
gmail_api_token_file=None,
|
||||||
gmail_api_include_spam_trash=False,
|
gmail_api_include_spam_trash=False,
|
||||||
@@ -733,7 +615,6 @@ def _main():
|
|||||||
webhook_forensic_url=None,
|
webhook_forensic_url=None,
|
||||||
webhook_smtp_tls_url=None,
|
webhook_smtp_tls_url=None,
|
||||||
webhook_timeout=60,
|
webhook_timeout=60,
|
||||||
normalize_timespan_threshold_hours=24.0,
|
|
||||||
)
|
)
|
||||||
args = arg_parser.parse_args()
|
args = arg_parser.parse_args()
|
||||||
|
|
||||||
@@ -744,24 +625,14 @@ def _main():
|
|||||||
exit(-1)
|
exit(-1)
|
||||||
opts.silent = True
|
opts.silent = True
|
||||||
config = ConfigParser()
|
config = ConfigParser()
|
||||||
index_prefix_domain_map = None
|
|
||||||
config.read(args.config_file)
|
config.read(args.config_file)
|
||||||
if "general" in config.sections():
|
if "general" in config.sections():
|
||||||
general_config = config["general"]
|
general_config = config["general"]
|
||||||
if "silent" in general_config:
|
|
||||||
opts.silent = bool(general_config.getboolean("silent"))
|
|
||||||
if "normalize_timespan_threshold_hours" in general_config:
|
|
||||||
opts.normalize_timespan_threshold_hours = general_config.getfloat(
|
|
||||||
"normalize_timespan_threshold_hours"
|
|
||||||
)
|
|
||||||
if "index_prefix_domain_map" in general_config:
|
|
||||||
with open(general_config["index_prefix_domain_map"]) as f:
|
|
||||||
index_prefix_domain_map = yaml.safe_load(f)
|
|
||||||
if "offline" in general_config:
|
if "offline" in general_config:
|
||||||
opts.offline = bool(general_config.getboolean("offline"))
|
opts.offline = general_config.getboolean("offline")
|
||||||
if "strip_attachment_payloads" in general_config:
|
if "strip_attachment_payloads" in general_config:
|
||||||
opts.strip_attachment_payloads = bool(
|
opts.strip_attachment_payloads = general_config.getboolean(
|
||||||
general_config.getboolean("strip_attachment_payloads")
|
"strip_attachment_payloads"
|
||||||
)
|
)
|
||||||
if "output" in general_config:
|
if "output" in general_config:
|
||||||
opts.output = general_config["output"]
|
opts.output = general_config["output"]
|
||||||
@@ -779,8 +650,6 @@ def _main():
|
|||||||
opts.smtp_tls_csv_filename = general_config["smtp_tls_csv_filename"]
|
opts.smtp_tls_csv_filename = general_config["smtp_tls_csv_filename"]
|
||||||
if "dns_timeout" in general_config:
|
if "dns_timeout" in general_config:
|
||||||
opts.dns_timeout = general_config.getfloat("dns_timeout")
|
opts.dns_timeout = general_config.getfloat("dns_timeout")
|
||||||
if opts.dns_timeout is None:
|
|
||||||
opts.dns_timeout = 2
|
|
||||||
if "dns_test_address" in general_config:
|
if "dns_test_address" in general_config:
|
||||||
opts.dns_test_address = general_config["dns_test_address"]
|
opts.dns_test_address = general_config["dns_test_address"]
|
||||||
if "nameservers" in general_config:
|
if "nameservers" in general_config:
|
||||||
@@ -803,19 +672,19 @@ def _main():
|
|||||||
)
|
)
|
||||||
exit(-1)
|
exit(-1)
|
||||||
if "save_aggregate" in general_config:
|
if "save_aggregate" in general_config:
|
||||||
opts.save_aggregate = bool(general_config.getboolean("save_aggregate"))
|
opts.save_aggregate = general_config["save_aggregate"]
|
||||||
if "save_forensic" in general_config:
|
if "save_forensic" in general_config:
|
||||||
opts.save_forensic = bool(general_config.getboolean("save_forensic"))
|
opts.save_forensic = general_config["save_forensic"]
|
||||||
if "save_smtp_tls" in general_config:
|
if "save_smtp_tls" in general_config:
|
||||||
opts.save_smtp_tls = bool(general_config.getboolean("save_smtp_tls"))
|
opts.save_smtp_tls = general_config["save_smtp_tls"]
|
||||||
if "debug" in general_config:
|
if "debug" in general_config:
|
||||||
opts.debug = bool(general_config.getboolean("debug"))
|
opts.debug = general_config.getboolean("debug")
|
||||||
if "verbose" in general_config:
|
if "verbose" in general_config:
|
||||||
opts.verbose = bool(general_config.getboolean("verbose"))
|
opts.verbose = general_config.getboolean("verbose")
|
||||||
if "silent" in general_config:
|
if "silent" in general_config:
|
||||||
opts.silent = bool(general_config.getboolean("silent"))
|
opts.silent = general_config.getboolean("silent")
|
||||||
if "warnings" in general_config:
|
if "warnings" in general_config:
|
||||||
opts.warnings = bool(general_config.getboolean("warnings"))
|
opts.warnings = general_config.getboolean("warnings")
|
||||||
if "log_file" in general_config:
|
if "log_file" in general_config:
|
||||||
opts.log_file = general_config["log_file"]
|
opts.log_file = general_config["log_file"]
|
||||||
if "n_procs" in general_config:
|
if "n_procs" in general_config:
|
||||||
@@ -825,15 +694,13 @@ def _main():
|
|||||||
else:
|
else:
|
||||||
opts.ip_db_path = None
|
opts.ip_db_path = None
|
||||||
if "always_use_local_files" in general_config:
|
if "always_use_local_files" in general_config:
|
||||||
opts.always_use_local_files = bool(
|
opts.always_use_local_files = general_config.getboolean(
|
||||||
general_config.getboolean("always_use_local_files")
|
"always_use_local_files"
|
||||||
)
|
)
|
||||||
if "reverse_dns_map_path" in general_config:
|
if "reverse_dns_map_path" in general_config:
|
||||||
opts.reverse_dns_map_path = general_config["reverse_dns_path"]
|
opts.reverse_dns_map_path = general_config["reverse_dns_path"]
|
||||||
if "reverse_dns_map_url" in general_config:
|
if "reverse_dns_map_url" in general_config:
|
||||||
opts.reverse_dns_map_url = general_config["reverse_dns_url"]
|
opts.reverse_dns_map_url = general_config["reverse_dns_url"]
|
||||||
if "prettify_json" in general_config:
|
|
||||||
opts.prettify_json = bool(general_config.getboolean("prettify_json"))
|
|
||||||
|
|
||||||
if "mailbox" in config.sections():
|
if "mailbox" in config.sections():
|
||||||
mailbox_config = config["mailbox"]
|
mailbox_config = config["mailbox"]
|
||||||
@@ -844,11 +711,11 @@ def _main():
|
|||||||
if "archive_folder" in mailbox_config:
|
if "archive_folder" in mailbox_config:
|
||||||
opts.mailbox_archive_folder = mailbox_config["archive_folder"]
|
opts.mailbox_archive_folder = mailbox_config["archive_folder"]
|
||||||
if "watch" in mailbox_config:
|
if "watch" in mailbox_config:
|
||||||
opts.mailbox_watch = bool(mailbox_config.getboolean("watch"))
|
opts.mailbox_watch = mailbox_config.getboolean("watch")
|
||||||
if "delete" in mailbox_config:
|
if "delete" in mailbox_config:
|
||||||
opts.mailbox_delete = bool(mailbox_config.getboolean("delete"))
|
opts.mailbox_delete = mailbox_config.getboolean("delete")
|
||||||
if "test" in mailbox_config:
|
if "test" in mailbox_config:
|
||||||
opts.mailbox_test = bool(mailbox_config.getboolean("test"))
|
opts.mailbox_test = mailbox_config.getboolean("test")
|
||||||
if "batch_size" in mailbox_config:
|
if "batch_size" in mailbox_config:
|
||||||
opts.mailbox_batch_size = mailbox_config.getint("batch_size")
|
opts.mailbox_batch_size = mailbox_config.getint("batch_size")
|
||||||
if "check_timeout" in mailbox_config:
|
if "check_timeout" in mailbox_config:
|
||||||
@@ -872,15 +739,14 @@ def _main():
|
|||||||
if "port" in imap_config:
|
if "port" in imap_config:
|
||||||
opts.imap_port = imap_config.getint("port")
|
opts.imap_port = imap_config.getint("port")
|
||||||
if "timeout" in imap_config:
|
if "timeout" in imap_config:
|
||||||
opts.imap_timeout = imap_config.getint("timeout")
|
opts.imap_timeout = imap_config.getfloat("timeout")
|
||||||
if "max_retries" in imap_config:
|
if "max_retries" in imap_config:
|
||||||
opts.imap_max_retries = imap_config.getint("max_retries")
|
opts.imap_max_retries = imap_config.getint("max_retries")
|
||||||
if "ssl" in imap_config:
|
if "ssl" in imap_config:
|
||||||
opts.imap_ssl = bool(imap_config.getboolean("ssl"))
|
opts.imap_ssl = imap_config.getboolean("ssl")
|
||||||
if "skip_certificate_verification" in imap_config:
|
if "skip_certificate_verification" in imap_config:
|
||||||
opts.imap_skip_certificate_verification = bool(
|
imap_verify = imap_config.getboolean("skip_certificate_verification")
|
||||||
imap_config.getboolean("skip_certificate_verification")
|
opts.imap_skip_certificate_verification = imap_verify
|
||||||
)
|
|
||||||
if "user" in imap_config:
|
if "user" in imap_config:
|
||||||
opts.imap_user = imap_config["user"]
|
opts.imap_user = imap_config["user"]
|
||||||
else:
|
else:
|
||||||
@@ -908,7 +774,7 @@ def _main():
|
|||||||
"section instead."
|
"section instead."
|
||||||
)
|
)
|
||||||
if "watch" in imap_config:
|
if "watch" in imap_config:
|
||||||
opts.mailbox_watch = bool(imap_config.getboolean("watch"))
|
opts.mailbox_watch = imap_config.getboolean("watch")
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"Use of the watch option in the imap "
|
"Use of the watch option in the imap "
|
||||||
"configuration section has been deprecated. "
|
"configuration section has been deprecated. "
|
||||||
@@ -923,7 +789,7 @@ def _main():
|
|||||||
"section instead."
|
"section instead."
|
||||||
)
|
)
|
||||||
if "test" in imap_config:
|
if "test" in imap_config:
|
||||||
opts.mailbox_test = bool(imap_config.getboolean("test"))
|
opts.mailbox_test = imap_config.getboolean("test")
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"Use of the test option in the imap "
|
"Use of the test option in the imap "
|
||||||
"configuration section has been deprecated. "
|
"configuration section has been deprecated. "
|
||||||
@@ -1017,8 +883,8 @@ def _main():
|
|||||||
opts.graph_url = graph_config["graph_url"]
|
opts.graph_url = graph_config["graph_url"]
|
||||||
|
|
||||||
if "allow_unencrypted_storage" in graph_config:
|
if "allow_unencrypted_storage" in graph_config:
|
||||||
opts.graph_allow_unencrypted_storage = bool(
|
opts.graph_allow_unencrypted_storage = graph_config.getboolean(
|
||||||
graph_config.getboolean("allow_unencrypted_storage")
|
"allow_unencrypted_storage"
|
||||||
)
|
)
|
||||||
|
|
||||||
if "elasticsearch" in config:
|
if "elasticsearch" in config:
|
||||||
@@ -1046,22 +912,18 @@ def _main():
|
|||||||
if "index_prefix" in elasticsearch_config:
|
if "index_prefix" in elasticsearch_config:
|
||||||
opts.elasticsearch_index_prefix = elasticsearch_config["index_prefix"]
|
opts.elasticsearch_index_prefix = elasticsearch_config["index_prefix"]
|
||||||
if "monthly_indexes" in elasticsearch_config:
|
if "monthly_indexes" in elasticsearch_config:
|
||||||
monthly = bool(elasticsearch_config.getboolean("monthly_indexes"))
|
monthly = elasticsearch_config.getboolean("monthly_indexes")
|
||||||
opts.elasticsearch_monthly_indexes = monthly
|
opts.elasticsearch_monthly_indexes = monthly
|
||||||
if "ssl" in elasticsearch_config:
|
if "ssl" in elasticsearch_config:
|
||||||
opts.elasticsearch_ssl = bool(elasticsearch_config.getboolean("ssl"))
|
opts.elasticsearch_ssl = elasticsearch_config.getboolean("ssl")
|
||||||
if "cert_path" in elasticsearch_config:
|
if "cert_path" in elasticsearch_config:
|
||||||
opts.elasticsearch_ssl_cert_path = elasticsearch_config["cert_path"]
|
opts.elasticsearch_ssl_cert_path = elasticsearch_config["cert_path"]
|
||||||
if "user" in elasticsearch_config:
|
if "user" in elasticsearch_config:
|
||||||
opts.elasticsearch_username = elasticsearch_config["user"]
|
opts.elasticsearch_username = elasticsearch_config["user"]
|
||||||
if "password" in elasticsearch_config:
|
if "password" in elasticsearch_config:
|
||||||
opts.elasticsearch_password = elasticsearch_config["password"]
|
opts.elasticsearch_password = elasticsearch_config["password"]
|
||||||
# Until 8.20
|
|
||||||
if "apiKey" in elasticsearch_config:
|
if "apiKey" in elasticsearch_config:
|
||||||
opts.elasticsearch_apiKey = elasticsearch_config["apiKey"]
|
opts.elasticsearch_apiKey = elasticsearch_config["apiKey"]
|
||||||
# Since 8.20
|
|
||||||
if "api_key" in elasticsearch_config:
|
|
||||||
opts.elasticsearch_apiKey = elasticsearch_config["api_key"]
|
|
||||||
|
|
||||||
if "opensearch" in config:
|
if "opensearch" in config:
|
||||||
opensearch_config = config["opensearch"]
|
opensearch_config = config["opensearch"]
|
||||||
@@ -1086,22 +948,18 @@ def _main():
|
|||||||
if "index_prefix" in opensearch_config:
|
if "index_prefix" in opensearch_config:
|
||||||
opts.opensearch_index_prefix = opensearch_config["index_prefix"]
|
opts.opensearch_index_prefix = opensearch_config["index_prefix"]
|
||||||
if "monthly_indexes" in opensearch_config:
|
if "monthly_indexes" in opensearch_config:
|
||||||
monthly = bool(opensearch_config.getboolean("monthly_indexes"))
|
monthly = opensearch_config.getboolean("monthly_indexes")
|
||||||
opts.opensearch_monthly_indexes = monthly
|
opts.opensearch_monthly_indexes = monthly
|
||||||
if "ssl" in opensearch_config:
|
if "ssl" in opensearch_config:
|
||||||
opts.opensearch_ssl = bool(opensearch_config.getboolean("ssl"))
|
opts.opensearch_ssl = opensearch_config.getboolean("ssl")
|
||||||
if "cert_path" in opensearch_config:
|
if "cert_path" in opensearch_config:
|
||||||
opts.opensearch_ssl_cert_path = opensearch_config["cert_path"]
|
opts.opensearch_ssl_cert_path = opensearch_config["cert_path"]
|
||||||
if "user" in opensearch_config:
|
if "user" in opensearch_config:
|
||||||
opts.opensearch_username = opensearch_config["user"]
|
opts.opensearch_username = opensearch_config["user"]
|
||||||
if "password" in opensearch_config:
|
if "password" in opensearch_config:
|
||||||
opts.opensearch_password = opensearch_config["password"]
|
opts.opensearch_password = opensearch_config["password"]
|
||||||
# Until 8.20
|
|
||||||
if "apiKey" in opensearch_config:
|
if "apiKey" in opensearch_config:
|
||||||
opts.opensearch_apiKey = opensearch_config["apiKey"]
|
opts.opensearch_apiKey = opensearch_config["apiKey"]
|
||||||
# Since 8.20
|
|
||||||
if "api_key" in opensearch_config:
|
|
||||||
opts.opensearch_apiKey = opensearch_config["api_key"]
|
|
||||||
|
|
||||||
if "splunk_hec" in config.sections():
|
if "splunk_hec" in config.sections():
|
||||||
hec_config = config["splunk_hec"]
|
hec_config = config["splunk_hec"]
|
||||||
@@ -1143,11 +1001,9 @@ def _main():
|
|||||||
if "password" in kafka_config:
|
if "password" in kafka_config:
|
||||||
opts.kafka_password = kafka_config["password"]
|
opts.kafka_password = kafka_config["password"]
|
||||||
if "ssl" in kafka_config:
|
if "ssl" in kafka_config:
|
||||||
opts.kafka_ssl = bool(kafka_config.getboolean("ssl"))
|
opts.kafka_ssl = kafka_config.getboolean("ssl")
|
||||||
if "skip_certificate_verification" in kafka_config:
|
if "skip_certificate_verification" in kafka_config:
|
||||||
kafka_verify = bool(
|
kafka_verify = kafka_config.getboolean("skip_certificate_verification")
|
||||||
kafka_config.getboolean("skip_certificate_verification")
|
|
||||||
)
|
|
||||||
opts.kafka_skip_certificate_verification = kafka_verify
|
opts.kafka_skip_certificate_verification = kafka_verify
|
||||||
if "aggregate_topic" in kafka_config:
|
if "aggregate_topic" in kafka_config:
|
||||||
opts.kafka_aggregate_topic = kafka_config["aggregate_topic"]
|
opts.kafka_aggregate_topic = kafka_config["aggregate_topic"]
|
||||||
@@ -1179,11 +1035,9 @@ def _main():
|
|||||||
if "port" in smtp_config:
|
if "port" in smtp_config:
|
||||||
opts.smtp_port = smtp_config.getint("port")
|
opts.smtp_port = smtp_config.getint("port")
|
||||||
if "ssl" in smtp_config:
|
if "ssl" in smtp_config:
|
||||||
opts.smtp_ssl = bool(smtp_config.getboolean("ssl"))
|
opts.smtp_ssl = smtp_config.getboolean("ssl")
|
||||||
if "skip_certificate_verification" in smtp_config:
|
if "skip_certificate_verification" in smtp_config:
|
||||||
smtp_verify = bool(
|
smtp_verify = smtp_config.getboolean("skip_certificate_verification")
|
||||||
smtp_config.getboolean("skip_certificate_verification")
|
|
||||||
)
|
|
||||||
opts.smtp_skip_certificate_verification = smtp_verify
|
opts.smtp_skip_certificate_verification = smtp_verify
|
||||||
if "user" in smtp_config:
|
if "user" in smtp_config:
|
||||||
opts.smtp_user = smtp_config["user"]
|
opts.smtp_user = smtp_config["user"]
|
||||||
@@ -1246,54 +1100,28 @@ def _main():
|
|||||||
opts.syslog_port = syslog_config["port"]
|
opts.syslog_port = syslog_config["port"]
|
||||||
else:
|
else:
|
||||||
opts.syslog_port = 514
|
opts.syslog_port = 514
|
||||||
if "protocol" in syslog_config:
|
|
||||||
opts.syslog_protocol = syslog_config["protocol"]
|
|
||||||
else:
|
|
||||||
opts.syslog_protocol = "udp"
|
|
||||||
if "cafile_path" in syslog_config:
|
|
||||||
opts.syslog_cafile_path = syslog_config["cafile_path"]
|
|
||||||
if "certfile_path" in syslog_config:
|
|
||||||
opts.syslog_certfile_path = syslog_config["certfile_path"]
|
|
||||||
if "keyfile_path" in syslog_config:
|
|
||||||
opts.syslog_keyfile_path = syslog_config["keyfile_path"]
|
|
||||||
if "timeout" in syslog_config:
|
|
||||||
opts.syslog_timeout = float(syslog_config["timeout"])
|
|
||||||
else:
|
|
||||||
opts.syslog_timeout = 5.0
|
|
||||||
if "retry_attempts" in syslog_config:
|
|
||||||
opts.syslog_retry_attempts = int(syslog_config["retry_attempts"])
|
|
||||||
else:
|
|
||||||
opts.syslog_retry_attempts = 3
|
|
||||||
if "retry_delay" in syslog_config:
|
|
||||||
opts.syslog_retry_delay = int(syslog_config["retry_delay"])
|
|
||||||
else:
|
|
||||||
opts.syslog_retry_delay = 5
|
|
||||||
|
|
||||||
if "gmail_api" in config.sections():
|
if "gmail_api" in config.sections():
|
||||||
gmail_api_config = config["gmail_api"]
|
gmail_api_config = config["gmail_api"]
|
||||||
opts.gmail_api_credentials_file = gmail_api_config.get("credentials_file")
|
opts.gmail_api_credentials_file = gmail_api_config.get("credentials_file")
|
||||||
opts.gmail_api_token_file = gmail_api_config.get("token_file", ".token")
|
opts.gmail_api_token_file = gmail_api_config.get("token_file", ".token")
|
||||||
opts.gmail_api_include_spam_trash = bool(
|
opts.gmail_api_include_spam_trash = gmail_api_config.getboolean(
|
||||||
gmail_api_config.getboolean("include_spam_trash", False)
|
"include_spam_trash", False
|
||||||
)
|
)
|
||||||
opts.gmail_api_paginate_messages = bool(
|
opts.gmail_api_paginate_messages = gmail_api_config.getboolean(
|
||||||
gmail_api_config.getboolean("paginate_messages", True)
|
"paginate_messages", True
|
||||||
)
|
)
|
||||||
opts.gmail_api_scopes = gmail_api_config.get(
|
opts.gmail_api_scopes = gmail_api_config.get(
|
||||||
"scopes", default_gmail_api_scope
|
"scopes", default_gmail_api_scope
|
||||||
)
|
)
|
||||||
opts.gmail_api_scopes = _str_to_list(opts.gmail_api_scopes)
|
opts.gmail_api_scopes = _str_to_list(opts.gmail_api_scopes)
|
||||||
if "oauth2_port" in gmail_api_config:
|
if "oauth2_port" in gmail_api_config:
|
||||||
opts.gmail_api_oauth2_port = gmail_api_config.getint(
|
opts.gmail_api_oauth2_port = gmail_api_config.get("oauth2_port", 8080)
|
||||||
"oauth2_port", 8080
|
|
||||||
)
|
|
||||||
|
|
||||||
if "maildir" in config.sections():
|
if "maildir" in config.sections():
|
||||||
maildir_api_config = config["maildir"]
|
maildir_api_config = config["maildir"]
|
||||||
opts.maildir_path = maildir_api_config.get("maildir_path")
|
opts.maildir_path = maildir_api_config.get("maildir_path")
|
||||||
opts.maildir_create = bool(
|
opts.maildir_create = maildir_api_config.get("maildir_create")
|
||||||
maildir_api_config.getboolean("maildir_create", fallback=False)
|
|
||||||
)
|
|
||||||
|
|
||||||
if "log_analytics" in config.sections():
|
if "log_analytics" in config.sections():
|
||||||
log_analytics_config = config["log_analytics"]
|
log_analytics_config = config["log_analytics"]
|
||||||
@@ -1339,7 +1167,7 @@ def _main():
|
|||||||
if "smtp_tls_url" in webhook_config:
|
if "smtp_tls_url" in webhook_config:
|
||||||
opts.webhook_smtp_tls_url = webhook_config["smtp_tls_url"]
|
opts.webhook_smtp_tls_url = webhook_config["smtp_tls_url"]
|
||||||
if "timeout" in webhook_config:
|
if "timeout" in webhook_config:
|
||||||
opts.webhook_timeout = webhook_config.getint("timeout")
|
opts.webhook_timeout = webhook_config["timeout"]
|
||||||
|
|
||||||
logger.setLevel(logging.ERROR)
|
logger.setLevel(logging.ERROR)
|
||||||
|
|
||||||
@@ -1388,19 +1216,14 @@ def _main():
|
|||||||
es_aggregate_index = "{0}{1}".format(prefix, es_aggregate_index)
|
es_aggregate_index = "{0}{1}".format(prefix, es_aggregate_index)
|
||||||
es_forensic_index = "{0}{1}".format(prefix, es_forensic_index)
|
es_forensic_index = "{0}{1}".format(prefix, es_forensic_index)
|
||||||
es_smtp_tls_index = "{0}{1}".format(prefix, es_smtp_tls_index)
|
es_smtp_tls_index = "{0}{1}".format(prefix, es_smtp_tls_index)
|
||||||
elastic_timeout_value = (
|
|
||||||
float(opts.elasticsearch_timeout)
|
|
||||||
if opts.elasticsearch_timeout is not None
|
|
||||||
else 60.0
|
|
||||||
)
|
|
||||||
elastic.set_hosts(
|
elastic.set_hosts(
|
||||||
opts.elasticsearch_hosts,
|
opts.elasticsearch_hosts,
|
||||||
use_ssl=opts.elasticsearch_ssl,
|
opts.elasticsearch_ssl,
|
||||||
ssl_cert_path=opts.elasticsearch_ssl_cert_path,
|
opts.elasticsearch_ssl_cert_path,
|
||||||
username=opts.elasticsearch_username,
|
opts.elasticsearch_username,
|
||||||
password=opts.elasticsearch_password,
|
opts.elasticsearch_password,
|
||||||
api_key=opts.elasticsearch_api_key,
|
opts.elasticsearch_apiKey,
|
||||||
timeout=elastic_timeout_value,
|
timeout=opts.elasticsearch_timeout,
|
||||||
)
|
)
|
||||||
elastic.migrate_indexes(
|
elastic.migrate_indexes(
|
||||||
aggregate_indexes=[es_aggregate_index],
|
aggregate_indexes=[es_aggregate_index],
|
||||||
@@ -1425,19 +1248,14 @@ def _main():
|
|||||||
os_aggregate_index = "{0}{1}".format(prefix, os_aggregate_index)
|
os_aggregate_index = "{0}{1}".format(prefix, os_aggregate_index)
|
||||||
os_forensic_index = "{0}{1}".format(prefix, os_forensic_index)
|
os_forensic_index = "{0}{1}".format(prefix, os_forensic_index)
|
||||||
os_smtp_tls_index = "{0}{1}".format(prefix, os_smtp_tls_index)
|
os_smtp_tls_index = "{0}{1}".format(prefix, os_smtp_tls_index)
|
||||||
opensearch_timeout_value = (
|
|
||||||
float(opts.opensearch_timeout)
|
|
||||||
if opts.opensearch_timeout is not None
|
|
||||||
else 60.0
|
|
||||||
)
|
|
||||||
opensearch.set_hosts(
|
opensearch.set_hosts(
|
||||||
opts.opensearch_hosts,
|
opts.opensearch_hosts,
|
||||||
use_ssl=opts.opensearch_ssl,
|
opts.opensearch_ssl,
|
||||||
ssl_cert_path=opts.opensearch_ssl_cert_path,
|
opts.opensearch_ssl_cert_path,
|
||||||
username=opts.opensearch_username,
|
opts.opensearch_username,
|
||||||
password=opts.opensearch_password,
|
opts.opensearch_password,
|
||||||
api_key=opts.opensearch_api_key,
|
opts.opensearch_apiKey,
|
||||||
timeout=opensearch_timeout_value,
|
timeout=opts.opensearch_timeout,
|
||||||
)
|
)
|
||||||
opensearch.migrate_indexes(
|
opensearch.migrate_indexes(
|
||||||
aggregate_indexes=[os_aggregate_index],
|
aggregate_indexes=[os_aggregate_index],
|
||||||
@@ -1465,17 +1283,6 @@ def _main():
|
|||||||
syslog_client = syslog.SyslogClient(
|
syslog_client = syslog.SyslogClient(
|
||||||
server_name=opts.syslog_server,
|
server_name=opts.syslog_server,
|
||||||
server_port=int(opts.syslog_port),
|
server_port=int(opts.syslog_port),
|
||||||
protocol=opts.syslog_protocol or "udp",
|
|
||||||
cafile_path=opts.syslog_cafile_path,
|
|
||||||
certfile_path=opts.syslog_certfile_path,
|
|
||||||
keyfile_path=opts.syslog_keyfile_path,
|
|
||||||
timeout=opts.syslog_timeout if opts.syslog_timeout is not None else 5.0,
|
|
||||||
retry_attempts=opts.syslog_retry_attempts
|
|
||||||
if opts.syslog_retry_attempts is not None
|
|
||||||
else 3,
|
|
||||||
retry_delay=opts.syslog_retry_delay
|
|
||||||
if opts.syslog_retry_delay is not None
|
|
||||||
else 5,
|
|
||||||
)
|
)
|
||||||
except Exception as error_:
|
except Exception as error_:
|
||||||
logger.error("Syslog Error: {0}".format(error_.__str__()))
|
logger.error("Syslog Error: {0}".format(error_.__str__()))
|
||||||
@@ -1557,23 +1364,16 @@ def _main():
|
|||||||
|
|
||||||
results = []
|
results = []
|
||||||
|
|
||||||
pbar = None
|
|
||||||
if sys.stdout.isatty():
|
if sys.stdout.isatty():
|
||||||
pbar = tqdm(total=len(file_paths))
|
pbar = tqdm(total=len(file_paths))
|
||||||
|
|
||||||
n_procs = int(opts.n_procs or 1)
|
for batch_index in range(math.ceil(len(file_paths) / opts.n_procs)):
|
||||||
if n_procs < 1:
|
|
||||||
n_procs = 1
|
|
||||||
|
|
||||||
# Capture the current log level to pass to child processes
|
|
||||||
current_log_level = logger.level
|
|
||||||
current_log_file = opts.log_file
|
|
||||||
|
|
||||||
for batch_index in range((len(file_paths) + n_procs - 1) // n_procs):
|
|
||||||
processes = []
|
processes = []
|
||||||
connections = []
|
connections = []
|
||||||
|
|
||||||
for proc_index in range(n_procs * batch_index, n_procs * (batch_index + 1)):
|
for proc_index in range(
|
||||||
|
opts.n_procs * batch_index, opts.n_procs * (batch_index + 1)
|
||||||
|
):
|
||||||
if proc_index >= len(file_paths):
|
if proc_index >= len(file_paths):
|
||||||
break
|
break
|
||||||
|
|
||||||
@@ -1592,10 +1392,7 @@ def _main():
|
|||||||
opts.always_use_local_files,
|
opts.always_use_local_files,
|
||||||
opts.reverse_dns_map_path,
|
opts.reverse_dns_map_path,
|
||||||
opts.reverse_dns_map_url,
|
opts.reverse_dns_map_url,
|
||||||
opts.normalize_timespan_threshold_hours,
|
|
||||||
child_conn,
|
child_conn,
|
||||||
current_log_level,
|
|
||||||
current_log_file,
|
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
processes.append(process)
|
processes.append(process)
|
||||||
@@ -1608,15 +1405,12 @@ def _main():
|
|||||||
|
|
||||||
for proc in processes:
|
for proc in processes:
|
||||||
proc.join()
|
proc.join()
|
||||||
if pbar is not None:
|
if sys.stdout.isatty():
|
||||||
counter += 1
|
counter += 1
|
||||||
pbar.update(1)
|
pbar.update(counter - pbar.n)
|
||||||
|
|
||||||
if pbar is not None:
|
|
||||||
pbar.close()
|
|
||||||
|
|
||||||
for result in results:
|
for result in results:
|
||||||
if isinstance(result[0], ParserError) or result[0] is None:
|
if type(result[0]) is ParserError:
|
||||||
logger.error("Failed to parse {0} - {1}".format(result[1], result[0]))
|
logger.error("Failed to parse {0} - {1}".format(result[1], result[0]))
|
||||||
else:
|
else:
|
||||||
if result[0]["report_type"] == "aggregate":
|
if result[0]["report_type"] == "aggregate":
|
||||||
@@ -1637,11 +1431,6 @@ def _main():
|
|||||||
smtp_tls_reports.append(result[0]["report"])
|
smtp_tls_reports.append(result[0]["report"])
|
||||||
|
|
||||||
for mbox_path in mbox_paths:
|
for mbox_path in mbox_paths:
|
||||||
normalize_timespan_threshold_hours_value = (
|
|
||||||
float(opts.normalize_timespan_threshold_hours)
|
|
||||||
if opts.normalize_timespan_threshold_hours is not None
|
|
||||||
else 24.0
|
|
||||||
)
|
|
||||||
strip = opts.strip_attachment_payloads
|
strip = opts.strip_attachment_payloads
|
||||||
reports = get_dmarc_reports_from_mbox(
|
reports = get_dmarc_reports_from_mbox(
|
||||||
mbox_path,
|
mbox_path,
|
||||||
@@ -1653,17 +1442,12 @@ def _main():
|
|||||||
reverse_dns_map_path=opts.reverse_dns_map_path,
|
reverse_dns_map_path=opts.reverse_dns_map_path,
|
||||||
reverse_dns_map_url=opts.reverse_dns_map_url,
|
reverse_dns_map_url=opts.reverse_dns_map_url,
|
||||||
offline=opts.offline,
|
offline=opts.offline,
|
||||||
normalize_timespan_threshold_hours=normalize_timespan_threshold_hours_value,
|
|
||||||
)
|
)
|
||||||
aggregate_reports += reports["aggregate_reports"]
|
aggregate_reports += reports["aggregate_reports"]
|
||||||
forensic_reports += reports["forensic_reports"]
|
forensic_reports += reports["forensic_reports"]
|
||||||
smtp_tls_reports += reports["smtp_tls_reports"]
|
smtp_tls_reports += reports["smtp_tls_reports"]
|
||||||
|
|
||||||
mailbox_connection = None
|
mailbox_connection = None
|
||||||
mailbox_batch_size_value = 10
|
|
||||||
mailbox_check_timeout_value = 30
|
|
||||||
normalize_timespan_threshold_hours_value = 24.0
|
|
||||||
|
|
||||||
if opts.imap_host:
|
if opts.imap_host:
|
||||||
try:
|
try:
|
||||||
if opts.imap_user is None or opts.imap_password is None:
|
if opts.imap_user is None or opts.imap_password is None:
|
||||||
@@ -1676,23 +1460,16 @@ def _main():
|
|||||||
if opts.imap_skip_certificate_verification:
|
if opts.imap_skip_certificate_verification:
|
||||||
logger.debug("Skipping IMAP certificate verification")
|
logger.debug("Skipping IMAP certificate verification")
|
||||||
verify = False
|
verify = False
|
||||||
if not opts.imap_ssl:
|
if opts.imap_ssl is False:
|
||||||
ssl = False
|
ssl = False
|
||||||
|
|
||||||
imap_timeout = (
|
|
||||||
int(opts.imap_timeout) if opts.imap_timeout is not None else 30
|
|
||||||
)
|
|
||||||
imap_max_retries = (
|
|
||||||
int(opts.imap_max_retries) if opts.imap_max_retries is not None else 4
|
|
||||||
)
|
|
||||||
imap_port_value = int(opts.imap_port) if opts.imap_port is not None else 993
|
|
||||||
mailbox_connection = IMAPConnection(
|
mailbox_connection = IMAPConnection(
|
||||||
host=opts.imap_host,
|
host=opts.imap_host,
|
||||||
port=imap_port_value,
|
port=opts.imap_port,
|
||||||
ssl=ssl,
|
ssl=ssl,
|
||||||
verify=verify,
|
verify=verify,
|
||||||
timeout=imap_timeout,
|
timeout=opts.imap_timeout,
|
||||||
max_retries=imap_max_retries,
|
max_retries=opts.imap_max_retries,
|
||||||
user=opts.imap_user,
|
user=opts.imap_user,
|
||||||
password=opts.imap_password,
|
password=opts.imap_password,
|
||||||
)
|
)
|
||||||
@@ -1713,7 +1490,7 @@ def _main():
|
|||||||
username=opts.graph_user,
|
username=opts.graph_user,
|
||||||
password=opts.graph_password,
|
password=opts.graph_password,
|
||||||
token_file=opts.graph_token_file,
|
token_file=opts.graph_token_file,
|
||||||
allow_unencrypted_storage=bool(opts.graph_allow_unencrypted_storage),
|
allow_unencrypted_storage=opts.graph_allow_unencrypted_storage,
|
||||||
graph_url=opts.graph_url,
|
graph_url=opts.graph_url,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -1758,24 +1535,11 @@ def _main():
|
|||||||
exit(1)
|
exit(1)
|
||||||
|
|
||||||
if mailbox_connection:
|
if mailbox_connection:
|
||||||
mailbox_batch_size_value = (
|
|
||||||
int(opts.mailbox_batch_size) if opts.mailbox_batch_size is not None else 10
|
|
||||||
)
|
|
||||||
mailbox_check_timeout_value = (
|
|
||||||
int(opts.mailbox_check_timeout)
|
|
||||||
if opts.mailbox_check_timeout is not None
|
|
||||||
else 30
|
|
||||||
)
|
|
||||||
normalize_timespan_threshold_hours_value = (
|
|
||||||
float(opts.normalize_timespan_threshold_hours)
|
|
||||||
if opts.normalize_timespan_threshold_hours is not None
|
|
||||||
else 24.0
|
|
||||||
)
|
|
||||||
try:
|
try:
|
||||||
reports = get_dmarc_reports_from_mailbox(
|
reports = get_dmarc_reports_from_mailbox(
|
||||||
connection=mailbox_connection,
|
connection=mailbox_connection,
|
||||||
delete=opts.mailbox_delete,
|
delete=opts.mailbox_delete,
|
||||||
batch_size=mailbox_batch_size_value,
|
batch_size=opts.mailbox_batch_size,
|
||||||
reports_folder=opts.mailbox_reports_folder,
|
reports_folder=opts.mailbox_reports_folder,
|
||||||
archive_folder=opts.mailbox_archive_folder,
|
archive_folder=opts.mailbox_archive_folder,
|
||||||
ip_db_path=opts.ip_db_path,
|
ip_db_path=opts.ip_db_path,
|
||||||
@@ -1787,7 +1551,6 @@ def _main():
|
|||||||
test=opts.mailbox_test,
|
test=opts.mailbox_test,
|
||||||
strip_attachment_payloads=opts.strip_attachment_payloads,
|
strip_attachment_payloads=opts.strip_attachment_payloads,
|
||||||
since=opts.mailbox_since,
|
since=opts.mailbox_since,
|
||||||
normalize_timespan_threshold_hours=normalize_timespan_threshold_hours_value,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
aggregate_reports += reports["aggregate_reports"]
|
aggregate_reports += reports["aggregate_reports"]
|
||||||
@@ -1798,36 +1561,31 @@ def _main():
|
|||||||
logger.exception("Mailbox Error")
|
logger.exception("Mailbox Error")
|
||||||
exit(1)
|
exit(1)
|
||||||
|
|
||||||
parsing_results: ParsingResults = {
|
results = OrderedDict(
|
||||||
"aggregate_reports": aggregate_reports,
|
[
|
||||||
"forensic_reports": forensic_reports,
|
("aggregate_reports", aggregate_reports),
|
||||||
"smtp_tls_reports": smtp_tls_reports,
|
("forensic_reports", forensic_reports),
|
||||||
}
|
("smtp_tls_reports", smtp_tls_reports),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
process_reports(parsing_results)
|
process_reports(results)
|
||||||
|
|
||||||
if opts.smtp_host:
|
if opts.smtp_host:
|
||||||
try:
|
try:
|
||||||
verify = True
|
verify = True
|
||||||
if opts.smtp_skip_certificate_verification:
|
if opts.smtp_skip_certificate_verification:
|
||||||
verify = False
|
verify = False
|
||||||
smtp_port_value = int(opts.smtp_port) if opts.smtp_port is not None else 25
|
|
||||||
smtp_to_value = (
|
|
||||||
list(opts.smtp_to)
|
|
||||||
if isinstance(opts.smtp_to, list)
|
|
||||||
else _str_to_list(str(opts.smtp_to))
|
|
||||||
)
|
|
||||||
email_results(
|
email_results(
|
||||||
parsing_results,
|
results,
|
||||||
opts.smtp_host,
|
opts.smtp_host,
|
||||||
opts.smtp_from,
|
opts.smtp_from,
|
||||||
smtp_to_value,
|
opts.smtp_to,
|
||||||
port=smtp_port_value,
|
port=opts.smtp_port,
|
||||||
verify=verify,
|
verify=verify,
|
||||||
username=opts.smtp_user,
|
username=opts.smtp_user,
|
||||||
password=opts.smtp_password,
|
password=opts.smtp_password,
|
||||||
subject=opts.smtp_subject,
|
subject=opts.smtp_subject,
|
||||||
require_encryption=opts.smtp_ssl,
|
|
||||||
)
|
)
|
||||||
except Exception:
|
except Exception:
|
||||||
logger.exception("Failed to email results")
|
logger.exception("Failed to email results")
|
||||||
@@ -1844,17 +1602,16 @@ def _main():
|
|||||||
archive_folder=opts.mailbox_archive_folder,
|
archive_folder=opts.mailbox_archive_folder,
|
||||||
delete=opts.mailbox_delete,
|
delete=opts.mailbox_delete,
|
||||||
test=opts.mailbox_test,
|
test=opts.mailbox_test,
|
||||||
check_timeout=mailbox_check_timeout_value,
|
check_timeout=opts.mailbox_check_timeout,
|
||||||
nameservers=opts.nameservers,
|
nameservers=opts.nameservers,
|
||||||
dns_timeout=opts.dns_timeout,
|
dns_timeout=opts.dns_timeout,
|
||||||
strip_attachment_payloads=opts.strip_attachment_payloads,
|
strip_attachment_payloads=opts.strip_attachment_payloads,
|
||||||
batch_size=mailbox_batch_size_value,
|
batch_size=opts.mailbox_batch_size,
|
||||||
ip_db_path=opts.ip_db_path,
|
ip_db_path=opts.ip_db_path,
|
||||||
always_use_local_files=opts.always_use_local_files,
|
always_use_local_files=opts.always_use_local_files,
|
||||||
reverse_dns_map_path=opts.reverse_dns_map_path,
|
reverse_dns_map_path=opts.reverse_dns_map_path,
|
||||||
reverse_dns_map_url=opts.reverse_dns_map_url,
|
reverse_dns_map_url=opts.reverse_dns_map_url,
|
||||||
offline=opts.offline,
|
offline=opts.offline,
|
||||||
normalize_timespan_threshold_hours=normalize_timespan_threshold_hours_value,
|
|
||||||
)
|
)
|
||||||
except FileExistsError as error:
|
except FileExistsError as error:
|
||||||
logger.error("{0}".format(error.__str__()))
|
logger.error("{0}".format(error.__str__()))
|
||||||
|
|||||||
@@ -1,3 +0,0 @@
|
|||||||
__version__ = "9.1.0"
|
|
||||||
|
|
||||||
USER_AGENT = f"parsedmarc/{__version__}"
|
|
||||||
@@ -1,29 +1,27 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
from __future__ import annotations
|
from collections import OrderedDict
|
||||||
|
|
||||||
from typing import Any, Optional, Union
|
from elasticsearch_dsl.search import Q
|
||||||
|
|
||||||
from elasticsearch.helpers import reindex
|
|
||||||
from elasticsearch_dsl import (
|
from elasticsearch_dsl import (
|
||||||
Boolean,
|
connections,
|
||||||
Date,
|
Object,
|
||||||
Document,
|
Document,
|
||||||
Index,
|
Index,
|
||||||
|
Nested,
|
||||||
InnerDoc,
|
InnerDoc,
|
||||||
Integer,
|
Integer,
|
||||||
Ip,
|
|
||||||
Nested,
|
|
||||||
Object,
|
|
||||||
Search,
|
|
||||||
Text,
|
Text,
|
||||||
connections,
|
Boolean,
|
||||||
|
Ip,
|
||||||
|
Date,
|
||||||
|
Search,
|
||||||
)
|
)
|
||||||
from elasticsearch_dsl.search import Q
|
from elasticsearch.helpers import reindex
|
||||||
|
|
||||||
from parsedmarc import InvalidForensicReport
|
|
||||||
from parsedmarc.log import logger
|
from parsedmarc.log import logger
|
||||||
from parsedmarc.utils import human_timestamp_to_datetime
|
from parsedmarc.utils import human_timestamp_to_datetime
|
||||||
|
from parsedmarc import InvalidForensicReport
|
||||||
|
|
||||||
|
|
||||||
class ElasticsearchError(Exception):
|
class ElasticsearchError(Exception):
|
||||||
@@ -69,8 +67,6 @@ class _AggregateReportDoc(Document):
|
|||||||
date_range = Date()
|
date_range = Date()
|
||||||
date_begin = Date()
|
date_begin = Date()
|
||||||
date_end = Date()
|
date_end = Date()
|
||||||
normalized_timespan = Boolean()
|
|
||||||
original_timespan_seconds = Integer
|
|
||||||
errors = Text()
|
errors = Text()
|
||||||
published_policy = Object(_PublishedPolicy)
|
published_policy = Object(_PublishedPolicy)
|
||||||
source_ip_address = Ip()
|
source_ip_address = Ip()
|
||||||
@@ -91,18 +87,18 @@ class _AggregateReportDoc(Document):
|
|||||||
dkim_results = Nested(_DKIMResult)
|
dkim_results = Nested(_DKIMResult)
|
||||||
spf_results = Nested(_SPFResult)
|
spf_results = Nested(_SPFResult)
|
||||||
|
|
||||||
def add_policy_override(self, type_: str, comment: str):
|
def add_policy_override(self, type_, comment):
|
||||||
self.policy_overrides.append(_PolicyOverride(type=type_, comment=comment)) # pyright: ignore[reportCallIssue]
|
self.policy_overrides.append(_PolicyOverride(type=type_, comment=comment))
|
||||||
|
|
||||||
def add_dkim_result(self, domain: str, selector: str, result: _DKIMResult):
|
def add_dkim_result(self, domain, selector, result):
|
||||||
self.dkim_results.append(
|
self.dkim_results.append(
|
||||||
_DKIMResult(domain=domain, selector=selector, result=result)
|
_DKIMResult(domain=domain, selector=selector, result=result)
|
||||||
) # pyright: ignore[reportCallIssue]
|
)
|
||||||
|
|
||||||
def add_spf_result(self, domain: str, scope: str, result: _SPFResult):
|
def add_spf_result(self, domain, scope, result):
|
||||||
self.spf_results.append(_SPFResult(domain=domain, scope=scope, result=result)) # pyright: ignore[reportCallIssue]
|
self.spf_results.append(_SPFResult(domain=domain, scope=scope, result=result))
|
||||||
|
|
||||||
def save(self, **kwargs): # pyright: ignore[reportIncompatibleMethodOverride]
|
def save(self, **kwargs):
|
||||||
self.passed_dmarc = False
|
self.passed_dmarc = False
|
||||||
self.passed_dmarc = self.spf_aligned or self.dkim_aligned
|
self.passed_dmarc = self.spf_aligned or self.dkim_aligned
|
||||||
|
|
||||||
@@ -135,26 +131,26 @@ class _ForensicSampleDoc(InnerDoc):
|
|||||||
body = Text()
|
body = Text()
|
||||||
attachments = Nested(_EmailAttachmentDoc)
|
attachments = Nested(_EmailAttachmentDoc)
|
||||||
|
|
||||||
def add_to(self, display_name: str, address: str):
|
def add_to(self, display_name, address):
|
||||||
self.to.append(_EmailAddressDoc(display_name=display_name, address=address)) # pyright: ignore[reportCallIssue]
|
self.to.append(_EmailAddressDoc(display_name=display_name, address=address))
|
||||||
|
|
||||||
def add_reply_to(self, display_name: str, address: str):
|
def add_reply_to(self, display_name, address):
|
||||||
self.reply_to.append(
|
self.reply_to.append(
|
||||||
_EmailAddressDoc(display_name=display_name, address=address)
|
_EmailAddressDoc(display_name=display_name, address=address)
|
||||||
) # pyright: ignore[reportCallIssue]
|
)
|
||||||
|
|
||||||
def add_cc(self, display_name: str, address: str):
|
def add_cc(self, display_name, address):
|
||||||
self.cc.append(_EmailAddressDoc(display_name=display_name, address=address)) # pyright: ignore[reportCallIssue]
|
self.cc.append(_EmailAddressDoc(display_name=display_name, address=address))
|
||||||
|
|
||||||
def add_bcc(self, display_name: str, address: str):
|
def add_bcc(self, display_name, address):
|
||||||
self.bcc.append(_EmailAddressDoc(display_name=display_name, address=address)) # pyright: ignore[reportCallIssue]
|
self.bcc.append(_EmailAddressDoc(display_name=display_name, address=address))
|
||||||
|
|
||||||
def add_attachment(self, filename: str, content_type: str, sha256: str):
|
def add_attachment(self, filename, content_type, sha256):
|
||||||
self.attachments.append(
|
self.attachments.append(
|
||||||
_EmailAttachmentDoc(
|
_EmailAttachmentDoc(
|
||||||
filename=filename, content_type=content_type, sha256=sha256
|
filename=filename, content_type=content_type, sha256=sha256
|
||||||
)
|
)
|
||||||
) # pyright: ignore[reportCallIssue]
|
)
|
||||||
|
|
||||||
|
|
||||||
class _ForensicReportDoc(Document):
|
class _ForensicReportDoc(Document):
|
||||||
@@ -201,15 +197,15 @@ class _SMTPTLSPolicyDoc(InnerDoc):
|
|||||||
|
|
||||||
def add_failure_details(
|
def add_failure_details(
|
||||||
self,
|
self,
|
||||||
result_type: Optional[str] = None,
|
result_type,
|
||||||
ip_address: Optional[str] = None,
|
ip_address,
|
||||||
receiving_ip: Optional[str] = None,
|
receiving_ip,
|
||||||
receiving_mx_helo: Optional[str] = None,
|
receiving_mx_helo,
|
||||||
failed_session_count: Optional[int] = None,
|
failed_session_count,
|
||||||
sending_mta_ip: Optional[str] = None,
|
sending_mta_ip=None,
|
||||||
receiving_mx_hostname: Optional[str] = None,
|
receiving_mx_hostname=None,
|
||||||
additional_information_uri: Optional[str] = None,
|
additional_information_uri=None,
|
||||||
failure_reason_code: Union[str, int, None] = None,
|
failure_reason_code=None,
|
||||||
):
|
):
|
||||||
_details = _SMTPTLSFailureDetailsDoc(
|
_details = _SMTPTLSFailureDetailsDoc(
|
||||||
result_type=result_type,
|
result_type=result_type,
|
||||||
@@ -222,7 +218,7 @@ class _SMTPTLSPolicyDoc(InnerDoc):
|
|||||||
additional_information=additional_information_uri,
|
additional_information=additional_information_uri,
|
||||||
failure_reason_code=failure_reason_code,
|
failure_reason_code=failure_reason_code,
|
||||||
)
|
)
|
||||||
self.failure_details.append(_details) # pyright: ignore[reportCallIssue]
|
self.failure_details.append(_details)
|
||||||
|
|
||||||
|
|
||||||
class _SMTPTLSReportDoc(Document):
|
class _SMTPTLSReportDoc(Document):
|
||||||
@@ -239,14 +235,13 @@ class _SMTPTLSReportDoc(Document):
|
|||||||
|
|
||||||
def add_policy(
|
def add_policy(
|
||||||
self,
|
self,
|
||||||
policy_type: str,
|
policy_type,
|
||||||
policy_domain: str,
|
policy_domain,
|
||||||
successful_session_count: int,
|
successful_session_count,
|
||||||
failed_session_count: int,
|
failed_session_count,
|
||||||
*,
|
policy_string=None,
|
||||||
policy_string: Optional[str] = None,
|
mx_host_patterns=None,
|
||||||
mx_host_patterns: Optional[list[str]] = None,
|
failure_details=None,
|
||||||
failure_details: Optional[str] = None,
|
|
||||||
):
|
):
|
||||||
self.policies.append(
|
self.policies.append(
|
||||||
policy_type=policy_type,
|
policy_type=policy_type,
|
||||||
@@ -256,7 +251,7 @@ class _SMTPTLSReportDoc(Document):
|
|||||||
policy_string=policy_string,
|
policy_string=policy_string,
|
||||||
mx_host_patterns=mx_host_patterns,
|
mx_host_patterns=mx_host_patterns,
|
||||||
failure_details=failure_details,
|
failure_details=failure_details,
|
||||||
) # pyright: ignore[reportCallIssue]
|
)
|
||||||
|
|
||||||
|
|
||||||
class AlreadySaved(ValueError):
|
class AlreadySaved(ValueError):
|
||||||
@@ -264,25 +259,24 @@ class AlreadySaved(ValueError):
|
|||||||
|
|
||||||
|
|
||||||
def set_hosts(
|
def set_hosts(
|
||||||
hosts: Union[str, list[str]],
|
hosts,
|
||||||
*,
|
use_ssl=False,
|
||||||
use_ssl: bool = False,
|
ssl_cert_path=None,
|
||||||
ssl_cert_path: Optional[str] = None,
|
username=None,
|
||||||
username: Optional[str] = None,
|
password=None,
|
||||||
password: Optional[str] = None,
|
apiKey=None,
|
||||||
api_key: Optional[str] = None,
|
timeout=60.0,
|
||||||
timeout: float = 60.0,
|
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Sets the Elasticsearch hosts to use
|
Sets the Elasticsearch hosts to use
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
hosts (str | list[str]): A single hostname or URL, or list of hostnames or URLs
|
hosts (str): A single hostname or URL, or list of hostnames or URLs
|
||||||
use_ssl (bool): Use an HTTPS connection to the server
|
use_ssl (bool): Use a HTTPS connection to the server
|
||||||
ssl_cert_path (str): Path to the certificate chain
|
ssl_cert_path (str): Path to the certificate chain
|
||||||
username (str): The username to use for authentication
|
username (str): The username to use for authentication
|
||||||
password (str): The password to use for authentication
|
password (str): The password to use for authentication
|
||||||
api_key (str): The Base64 encoded API key to use for authentication
|
apiKey (str): The Base64 encoded API key to use for authentication
|
||||||
timeout (float): Timeout in seconds
|
timeout (float): Timeout in seconds
|
||||||
"""
|
"""
|
||||||
if not isinstance(hosts, list):
|
if not isinstance(hosts, list):
|
||||||
@@ -295,14 +289,14 @@ def set_hosts(
|
|||||||
conn_params["ca_certs"] = ssl_cert_path
|
conn_params["ca_certs"] = ssl_cert_path
|
||||||
else:
|
else:
|
||||||
conn_params["verify_certs"] = False
|
conn_params["verify_certs"] = False
|
||||||
if username and password:
|
if username:
|
||||||
conn_params["http_auth"] = username + ":" + password
|
conn_params["http_auth"] = username + ":" + password
|
||||||
if api_key:
|
if apiKey:
|
||||||
conn_params["api_key"] = api_key
|
conn_params["api_key"] = apiKey
|
||||||
connections.create_connection(**conn_params)
|
connections.create_connection(**conn_params)
|
||||||
|
|
||||||
|
|
||||||
def create_indexes(names: list[str], settings: Optional[dict[str, Any]] = None):
|
def create_indexes(names, settings=None):
|
||||||
"""
|
"""
|
||||||
Create Elasticsearch indexes
|
Create Elasticsearch indexes
|
||||||
|
|
||||||
@@ -325,10 +319,7 @@ def create_indexes(names: list[str], settings: Optional[dict[str, Any]] = None):
|
|||||||
raise ElasticsearchError("Elasticsearch error: {0}".format(e.__str__()))
|
raise ElasticsearchError("Elasticsearch error: {0}".format(e.__str__()))
|
||||||
|
|
||||||
|
|
||||||
def migrate_indexes(
|
def migrate_indexes(aggregate_indexes=None, forensic_indexes=None):
|
||||||
aggregate_indexes: Optional[list[str]] = None,
|
|
||||||
forensic_indexes: Optional[list[str]] = None,
|
|
||||||
):
|
|
||||||
"""
|
"""
|
||||||
Updates index mappings
|
Updates index mappings
|
||||||
|
|
||||||
@@ -367,7 +358,7 @@ def migrate_indexes(
|
|||||||
}
|
}
|
||||||
Index(new_index_name).create()
|
Index(new_index_name).create()
|
||||||
Index(new_index_name).put_mapping(doc_type=doc, body=body)
|
Index(new_index_name).put_mapping(doc_type=doc, body=body)
|
||||||
reindex(connections.get_connection(), aggregate_index_name, new_index_name) # pyright: ignore[reportArgumentType]
|
reindex(connections.get_connection(), aggregate_index_name, new_index_name)
|
||||||
Index(aggregate_index_name).delete()
|
Index(aggregate_index_name).delete()
|
||||||
|
|
||||||
for forensic_index in forensic_indexes:
|
for forensic_index in forensic_indexes:
|
||||||
@@ -375,18 +366,18 @@ def migrate_indexes(
|
|||||||
|
|
||||||
|
|
||||||
def save_aggregate_report_to_elasticsearch(
|
def save_aggregate_report_to_elasticsearch(
|
||||||
aggregate_report: dict[str, Any],
|
aggregate_report,
|
||||||
index_suffix: Optional[str] = None,
|
index_suffix=None,
|
||||||
index_prefix: Optional[str] = None,
|
index_prefix=None,
|
||||||
monthly_indexes: Optional[bool] = False,
|
monthly_indexes=False,
|
||||||
number_of_shards: int = 1,
|
number_of_shards=1,
|
||||||
number_of_replicas: int = 0,
|
number_of_replicas=0,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Saves a parsed DMARC aggregate report to Elasticsearch
|
Saves a parsed DMARC aggregate report to Elasticsearch
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
aggregate_report (dict): A parsed forensic report
|
aggregate_report (OrderedDict): A parsed forensic report
|
||||||
index_suffix (str): The suffix of the name of the index to save to
|
index_suffix (str): The suffix of the name of the index to save to
|
||||||
index_prefix (str): The prefix of the name of the index to save to
|
index_prefix (str): The prefix of the name of the index to save to
|
||||||
monthly_indexes (bool): Use monthly indexes instead of daily indexes
|
monthly_indexes (bool): Use monthly indexes instead of daily indexes
|
||||||
@@ -404,17 +395,21 @@ def save_aggregate_report_to_elasticsearch(
|
|||||||
domain = aggregate_report["policy_published"]["domain"]
|
domain = aggregate_report["policy_published"]["domain"]
|
||||||
begin_date = human_timestamp_to_datetime(metadata["begin_date"], to_utc=True)
|
begin_date = human_timestamp_to_datetime(metadata["begin_date"], to_utc=True)
|
||||||
end_date = human_timestamp_to_datetime(metadata["end_date"], to_utc=True)
|
end_date = human_timestamp_to_datetime(metadata["end_date"], to_utc=True)
|
||||||
|
begin_date_human = begin_date.strftime("%Y-%m-%d %H:%M:%SZ")
|
||||||
|
end_date_human = end_date.strftime("%Y-%m-%d %H:%M:%SZ")
|
||||||
if monthly_indexes:
|
if monthly_indexes:
|
||||||
index_date = begin_date.strftime("%Y-%m")
|
index_date = begin_date.strftime("%Y-%m")
|
||||||
else:
|
else:
|
||||||
index_date = begin_date.strftime("%Y-%m-%d")
|
index_date = begin_date.strftime("%Y-%m-%d")
|
||||||
|
aggregate_report["begin_date"] = begin_date
|
||||||
|
aggregate_report["end_date"] = end_date
|
||||||
|
date_range = [aggregate_report["begin_date"], aggregate_report["end_date"]]
|
||||||
|
|
||||||
org_name_query = Q(dict(match_phrase=dict(org_name=org_name))) # type: ignore
|
org_name_query = Q(dict(match_phrase=dict(org_name=org_name)))
|
||||||
report_id_query = Q(dict(match_phrase=dict(report_id=report_id))) # pyright: ignore[reportArgumentType]
|
report_id_query = Q(dict(match_phrase=dict(report_id=report_id)))
|
||||||
domain_query = Q(dict(match_phrase={"published_policy.domain": domain})) # pyright: ignore[reportArgumentType]
|
domain_query = Q(dict(match_phrase={"published_policy.domain": domain}))
|
||||||
begin_date_query = Q(dict(match=dict(date_begin=begin_date))) # pyright: ignore[reportArgumentType]
|
begin_date_query = Q(dict(match=dict(date_begin=begin_date)))
|
||||||
end_date_query = Q(dict(match=dict(date_end=end_date))) # pyright: ignore[reportArgumentType]
|
end_date_query = Q(dict(match=dict(date_end=end_date)))
|
||||||
|
|
||||||
if index_suffix is not None:
|
if index_suffix is not None:
|
||||||
search_index = "dmarc_aggregate_{0}*".format(index_suffix)
|
search_index = "dmarc_aggregate_{0}*".format(index_suffix)
|
||||||
@@ -426,8 +421,6 @@ def save_aggregate_report_to_elasticsearch(
|
|||||||
query = org_name_query & report_id_query & domain_query
|
query = org_name_query & report_id_query & domain_query
|
||||||
query = query & begin_date_query & end_date_query
|
query = query & begin_date_query & end_date_query
|
||||||
search.query = query
|
search.query = query
|
||||||
begin_date_human = begin_date.strftime("%Y-%m-%d %H:%M:%SZ")
|
|
||||||
end_date_human = end_date.strftime("%Y-%m-%d %H:%M:%SZ")
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
existing = search.execute()
|
existing = search.execute()
|
||||||
@@ -457,17 +450,6 @@ def save_aggregate_report_to_elasticsearch(
|
|||||||
)
|
)
|
||||||
|
|
||||||
for record in aggregate_report["records"]:
|
for record in aggregate_report["records"]:
|
||||||
begin_date = human_timestamp_to_datetime(record["interval_begin"], to_utc=True)
|
|
||||||
end_date = human_timestamp_to_datetime(record["interval_end"], to_utc=True)
|
|
||||||
normalized_timespan = record["normalized_timespan"]
|
|
||||||
|
|
||||||
if monthly_indexes:
|
|
||||||
index_date = begin_date.strftime("%Y-%m")
|
|
||||||
else:
|
|
||||||
index_date = begin_date.strftime("%Y-%m-%d")
|
|
||||||
aggregate_report["begin_date"] = begin_date
|
|
||||||
aggregate_report["end_date"] = end_date
|
|
||||||
date_range = [aggregate_report["begin_date"], aggregate_report["end_date"]]
|
|
||||||
agg_doc = _AggregateReportDoc(
|
agg_doc = _AggregateReportDoc(
|
||||||
xml_schema=aggregate_report["xml_schema"],
|
xml_schema=aggregate_report["xml_schema"],
|
||||||
org_name=metadata["org_name"],
|
org_name=metadata["org_name"],
|
||||||
@@ -475,9 +457,8 @@ def save_aggregate_report_to_elasticsearch(
|
|||||||
org_extra_contact_info=metadata["org_extra_contact_info"],
|
org_extra_contact_info=metadata["org_extra_contact_info"],
|
||||||
report_id=metadata["report_id"],
|
report_id=metadata["report_id"],
|
||||||
date_range=date_range,
|
date_range=date_range,
|
||||||
date_begin=begin_date,
|
date_begin=aggregate_report["begin_date"],
|
||||||
date_end=end_date,
|
date_end=aggregate_report["end_date"],
|
||||||
normalized_timespan=normalized_timespan,
|
|
||||||
errors=metadata["errors"],
|
errors=metadata["errors"],
|
||||||
published_policy=published_policy,
|
published_policy=published_policy,
|
||||||
source_ip_address=record["source"]["ip_address"],
|
source_ip_address=record["source"]["ip_address"],
|
||||||
@@ -527,7 +508,7 @@ def save_aggregate_report_to_elasticsearch(
|
|||||||
number_of_shards=number_of_shards, number_of_replicas=number_of_replicas
|
number_of_shards=number_of_shards, number_of_replicas=number_of_replicas
|
||||||
)
|
)
|
||||||
create_indexes([index], index_settings)
|
create_indexes([index], index_settings)
|
||||||
agg_doc.meta.index = index # pyright: ignore[reportOptionalMemberAccess, reportAttributeAccessIssue]
|
agg_doc.meta.index = index
|
||||||
|
|
||||||
try:
|
try:
|
||||||
agg_doc.save()
|
agg_doc.save()
|
||||||
@@ -536,18 +517,18 @@ def save_aggregate_report_to_elasticsearch(
|
|||||||
|
|
||||||
|
|
||||||
def save_forensic_report_to_elasticsearch(
|
def save_forensic_report_to_elasticsearch(
|
||||||
forensic_report: dict[str, Any],
|
forensic_report,
|
||||||
index_suffix: Optional[Any] = None,
|
index_suffix=None,
|
||||||
index_prefix: Optional[str] = None,
|
index_prefix=None,
|
||||||
monthly_indexes: Optional[bool] = False,
|
monthly_indexes=False,
|
||||||
number_of_shards: int = 1,
|
number_of_shards=1,
|
||||||
number_of_replicas: int = 0,
|
number_of_replicas=0,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Saves a parsed DMARC forensic report to Elasticsearch
|
Saves a parsed DMARC forensic report to Elasticsearch
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
forensic_report (dict): A parsed forensic report
|
forensic_report (OrderedDict): A parsed forensic report
|
||||||
index_suffix (str): The suffix of the name of the index to save to
|
index_suffix (str): The suffix of the name of the index to save to
|
||||||
index_prefix (str): The prefix of the name of the index to save to
|
index_prefix (str): The prefix of the name of the index to save to
|
||||||
monthly_indexes (bool): Use monthly indexes instead of daily
|
monthly_indexes (bool): Use monthly indexes instead of daily
|
||||||
@@ -567,12 +548,12 @@ def save_forensic_report_to_elasticsearch(
|
|||||||
sample_date = forensic_report["parsed_sample"]["date"]
|
sample_date = forensic_report["parsed_sample"]["date"]
|
||||||
sample_date = human_timestamp_to_datetime(sample_date)
|
sample_date = human_timestamp_to_datetime(sample_date)
|
||||||
original_headers = forensic_report["parsed_sample"]["headers"]
|
original_headers = forensic_report["parsed_sample"]["headers"]
|
||||||
headers: dict[str, Any] = {}
|
headers = OrderedDict()
|
||||||
for original_header in original_headers:
|
for original_header in original_headers:
|
||||||
headers[original_header.lower()] = original_headers[original_header]
|
headers[original_header.lower()] = original_headers[original_header]
|
||||||
|
|
||||||
arrival_date = human_timestamp_to_datetime(forensic_report["arrival_date_utc"])
|
arrival_date_human = forensic_report["arrival_date_utc"]
|
||||||
arrival_date_epoch_milliseconds = int(arrival_date.timestamp() * 1000)
|
arrival_date = human_timestamp_to_datetime(arrival_date_human)
|
||||||
|
|
||||||
if index_suffix is not None:
|
if index_suffix is not None:
|
||||||
search_index = "dmarc_forensic_{0}*".format(index_suffix)
|
search_index = "dmarc_forensic_{0}*".format(index_suffix)
|
||||||
@@ -581,39 +562,24 @@ def save_forensic_report_to_elasticsearch(
|
|||||||
if index_prefix is not None:
|
if index_prefix is not None:
|
||||||
search_index = "{0}{1}".format(index_prefix, search_index)
|
search_index = "{0}{1}".format(index_prefix, search_index)
|
||||||
search = Search(index=search_index)
|
search = Search(index=search_index)
|
||||||
q = Q(dict(match=dict(arrival_date=arrival_date_epoch_milliseconds))) # pyright: ignore[reportArgumentType]
|
arrival_query = {"match": {"arrival_date": arrival_date}}
|
||||||
|
q = Q(arrival_query)
|
||||||
|
|
||||||
from_ = None
|
from_ = None
|
||||||
to_ = None
|
to_ = None
|
||||||
subject = None
|
subject = None
|
||||||
if "from" in headers:
|
if "from" in headers:
|
||||||
# We convert the FROM header from a string list to a flat string.
|
from_ = headers["from"]
|
||||||
headers["from"] = headers["from"][0]
|
from_query = {"match_phrase": {"sample.headers.from": from_}}
|
||||||
if headers["from"][0] == "":
|
q = q & Q(from_query)
|
||||||
headers["from"] = headers["from"][1]
|
|
||||||
else:
|
|
||||||
headers["from"] = " <".join(headers["from"]) + ">"
|
|
||||||
|
|
||||||
from_ = dict()
|
|
||||||
from_["sample.headers.from"] = headers["from"]
|
|
||||||
from_query = Q(dict(match_phrase=from_)) # pyright: ignore[reportArgumentType]
|
|
||||||
q = q & from_query
|
|
||||||
if "to" in headers:
|
if "to" in headers:
|
||||||
# We convert the TO header from a string list to a flat string.
|
to_ = headers["to"]
|
||||||
headers["to"] = headers["to"][0]
|
to_query = {"match_phrase": {"sample.headers.to": to_}}
|
||||||
if headers["to"][0] == "":
|
q = q & Q(to_query)
|
||||||
headers["to"] = headers["to"][1]
|
|
||||||
else:
|
|
||||||
headers["to"] = " <".join(headers["to"]) + ">"
|
|
||||||
|
|
||||||
to_ = dict()
|
|
||||||
to_["sample.headers.to"] = headers["to"]
|
|
||||||
to_query = Q(dict(match_phrase=to_)) # pyright: ignore[reportArgumentType]
|
|
||||||
q = q & to_query
|
|
||||||
if "subject" in headers:
|
if "subject" in headers:
|
||||||
subject = headers["subject"]
|
subject = headers["subject"]
|
||||||
subject_query = {"match_phrase": {"sample.headers.subject": subject}}
|
subject_query = {"match_phrase": {"sample.headers.subject": subject}}
|
||||||
q = q & Q(subject_query) # pyright: ignore[reportArgumentType]
|
q = q & Q(subject_query)
|
||||||
|
|
||||||
search.query = q
|
search.query = q
|
||||||
existing = search.execute()
|
existing = search.execute()
|
||||||
@@ -623,9 +589,7 @@ def save_forensic_report_to_elasticsearch(
|
|||||||
"A forensic sample to {0} from {1} "
|
"A forensic sample to {0} from {1} "
|
||||||
"with a subject of {2} and arrival date of {3} "
|
"with a subject of {2} and arrival date of {3} "
|
||||||
"already exists in "
|
"already exists in "
|
||||||
"Elasticsearch".format(
|
"Elasticsearch".format(to_, from_, subject, arrival_date_human)
|
||||||
to_, from_, subject, forensic_report["arrival_date_utc"]
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
|
|
||||||
parsed_sample = forensic_report["parsed_sample"]
|
parsed_sample = forensic_report["parsed_sample"]
|
||||||
@@ -661,7 +625,7 @@ def save_forensic_report_to_elasticsearch(
|
|||||||
user_agent=forensic_report["user_agent"],
|
user_agent=forensic_report["user_agent"],
|
||||||
version=forensic_report["version"],
|
version=forensic_report["version"],
|
||||||
original_mail_from=forensic_report["original_mail_from"],
|
original_mail_from=forensic_report["original_mail_from"],
|
||||||
arrival_date=arrival_date_epoch_milliseconds,
|
arrival_date=arrival_date,
|
||||||
domain=forensic_report["reported_domain"],
|
domain=forensic_report["reported_domain"],
|
||||||
original_envelope_id=forensic_report["original_envelope_id"],
|
original_envelope_id=forensic_report["original_envelope_id"],
|
||||||
authentication_results=forensic_report["authentication_results"],
|
authentication_results=forensic_report["authentication_results"],
|
||||||
@@ -691,7 +655,7 @@ def save_forensic_report_to_elasticsearch(
|
|||||||
number_of_shards=number_of_shards, number_of_replicas=number_of_replicas
|
number_of_shards=number_of_shards, number_of_replicas=number_of_replicas
|
||||||
)
|
)
|
||||||
create_indexes([index], index_settings)
|
create_indexes([index], index_settings)
|
||||||
forensic_doc.meta.index = index # pyright: ignore[reportAttributeAccessIssue, reportOptionalMemberAccess]
|
forensic_doc.meta.index = index
|
||||||
try:
|
try:
|
||||||
forensic_doc.save()
|
forensic_doc.save()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@@ -703,18 +667,18 @@ def save_forensic_report_to_elasticsearch(
|
|||||||
|
|
||||||
|
|
||||||
def save_smtp_tls_report_to_elasticsearch(
|
def save_smtp_tls_report_to_elasticsearch(
|
||||||
report: dict[str, Any],
|
report,
|
||||||
index_suffix: Optional[str] = None,
|
index_suffix=None,
|
||||||
index_prefix: Optional[str] = None,
|
index_prefix=None,
|
||||||
monthly_indexes: bool = False,
|
monthly_indexes=False,
|
||||||
number_of_shards: int = 1,
|
number_of_shards=1,
|
||||||
number_of_replicas: int = 0,
|
number_of_replicas=0,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Saves a parsed SMTP TLS report to Elasticsearch
|
Saves a parsed SMTP TLS report to Elasticsearch
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
report (dict): A parsed SMTP TLS report
|
report (OrderedDict): A parsed SMTP TLS report
|
||||||
index_suffix (str): The suffix of the name of the index to save to
|
index_suffix (str): The suffix of the name of the index to save to
|
||||||
index_prefix (str): The prefix of the name of the index to save to
|
index_prefix (str): The prefix of the name of the index to save to
|
||||||
monthly_indexes (bool): Use monthly indexes instead of daily indexes
|
monthly_indexes (bool): Use monthly indexes instead of daily indexes
|
||||||
@@ -738,10 +702,10 @@ def save_smtp_tls_report_to_elasticsearch(
|
|||||||
report["begin_date"] = begin_date
|
report["begin_date"] = begin_date
|
||||||
report["end_date"] = end_date
|
report["end_date"] = end_date
|
||||||
|
|
||||||
org_name_query = Q(dict(match_phrase=dict(org_name=org_name))) # pyright: ignore[reportArgumentType]
|
org_name_query = Q(dict(match_phrase=dict(org_name=org_name)))
|
||||||
report_id_query = Q(dict(match_phrase=dict(report_id=report_id))) # pyright: ignore[reportArgumentType]
|
report_id_query = Q(dict(match_phrase=dict(report_id=report_id)))
|
||||||
begin_date_query = Q(dict(match=dict(date_begin=begin_date))) # pyright: ignore[reportArgumentType]
|
begin_date_query = Q(dict(match=dict(date_begin=begin_date)))
|
||||||
end_date_query = Q(dict(match=dict(date_end=end_date))) # pyright: ignore[reportArgumentType]
|
end_date_query = Q(dict(match=dict(date_end=end_date)))
|
||||||
|
|
||||||
if index_suffix is not None:
|
if index_suffix is not None:
|
||||||
search_index = "smtp_tls_{0}*".format(index_suffix)
|
search_index = "smtp_tls_{0}*".format(index_suffix)
|
||||||
@@ -800,7 +764,7 @@ def save_smtp_tls_report_to_elasticsearch(
|
|||||||
policy_doc = _SMTPTLSPolicyDoc(
|
policy_doc = _SMTPTLSPolicyDoc(
|
||||||
policy_domain=policy["policy_domain"],
|
policy_domain=policy["policy_domain"],
|
||||||
policy_type=policy["policy_type"],
|
policy_type=policy["policy_type"],
|
||||||
successful_session_count=policy["successful_session_count"],
|
succesful_session_count=policy["successful_session_count"],
|
||||||
failed_session_count=policy["failed_session_count"],
|
failed_session_count=policy["failed_session_count"],
|
||||||
policy_string=policy_strings,
|
policy_string=policy_strings,
|
||||||
mx_host_patterns=mx_host_patterns,
|
mx_host_patterns=mx_host_patterns,
|
||||||
@@ -842,10 +806,10 @@ def save_smtp_tls_report_to_elasticsearch(
|
|||||||
additional_information_uri=additional_information_uri,
|
additional_information_uri=additional_information_uri,
|
||||||
failure_reason_code=failure_reason_code,
|
failure_reason_code=failure_reason_code,
|
||||||
)
|
)
|
||||||
smtp_tls_doc.policies.append(policy_doc) # pyright: ignore[reportCallIssue]
|
smtp_tls_doc.policies.append(policy_doc)
|
||||||
|
|
||||||
create_indexes([index], index_settings)
|
create_indexes([index], index_settings)
|
||||||
smtp_tls_doc.meta.index = index # pyright: ignore[reportOptionalMemberAccess, reportAttributeAccessIssue]
|
smtp_tls_doc.meta.index = index
|
||||||
|
|
||||||
try:
|
try:
|
||||||
smtp_tls_doc.save()
|
smtp_tls_doc.save()
|
||||||
|
|||||||
@@ -1,19 +1,17 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import logging.handlers
|
import logging.handlers
|
||||||
|
import json
|
||||||
import threading
|
import threading
|
||||||
from typing import Any
|
|
||||||
|
|
||||||
from pygelf import GelfTcpHandler, GelfTlsHandler, GelfUdpHandler
|
|
||||||
|
|
||||||
from parsedmarc import (
|
from parsedmarc import (
|
||||||
parsed_aggregate_reports_to_csv_rows,
|
parsed_aggregate_reports_to_csv_rows,
|
||||||
parsed_forensic_reports_to_csv_rows,
|
parsed_forensic_reports_to_csv_rows,
|
||||||
parsed_smtp_tls_reports_to_csv_rows,
|
parsed_smtp_tls_reports_to_csv_rows,
|
||||||
)
|
)
|
||||||
|
from pygelf import GelfTcpHandler, GelfUdpHandler, GelfTlsHandler
|
||||||
|
|
||||||
|
|
||||||
log_context_data = threading.local()
|
log_context_data = threading.local()
|
||||||
|
|
||||||
@@ -50,7 +48,7 @@ class GelfClient(object):
|
|||||||
)
|
)
|
||||||
self.logger.addHandler(self.handler)
|
self.logger.addHandler(self.handler)
|
||||||
|
|
||||||
def save_aggregate_report_to_gelf(self, aggregate_reports: list[dict[str, Any]]):
|
def save_aggregate_report_to_gelf(self, aggregate_reports):
|
||||||
rows = parsed_aggregate_reports_to_csv_rows(aggregate_reports)
|
rows = parsed_aggregate_reports_to_csv_rows(aggregate_reports)
|
||||||
for row in rows:
|
for row in rows:
|
||||||
log_context_data.parsedmarc = row
|
log_context_data.parsedmarc = row
|
||||||
@@ -58,14 +56,12 @@ class GelfClient(object):
|
|||||||
|
|
||||||
log_context_data.parsedmarc = None
|
log_context_data.parsedmarc = None
|
||||||
|
|
||||||
def save_forensic_report_to_gelf(self, forensic_reports: list[dict[str, Any]]):
|
def save_forensic_report_to_gelf(self, forensic_reports):
|
||||||
rows = parsed_forensic_reports_to_csv_rows(forensic_reports)
|
rows = parsed_forensic_reports_to_csv_rows(forensic_reports)
|
||||||
for row in rows:
|
for row in rows:
|
||||||
log_context_data.parsedmarc = row
|
self.logger.info(json.dumps(row))
|
||||||
self.logger.info("parsedmarc forensic report")
|
|
||||||
|
|
||||||
def save_smtp_tls_report_to_gelf(self, smtp_tls_reports: dict[str, Any]):
|
def save_smtp_tls_report_to_gelf(self, smtp_tls_reports):
|
||||||
rows = parsed_smtp_tls_reports_to_csv_rows(smtp_tls_reports)
|
rows = parsed_smtp_tls_reports_to_csv_rows(smtp_tls_reports)
|
||||||
for row in rows:
|
for row in rows:
|
||||||
log_context_data.parsedmarc = row
|
self.logger.info(json.dumps(row))
|
||||||
self.logger.info("parsedmarc smtptls report")
|
|
||||||
|
|||||||
@@ -1,17 +1,15 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import json
|
import json
|
||||||
from ssl import SSLContext, create_default_context
|
from ssl import create_default_context
|
||||||
from typing import Any, Optional, Union
|
|
||||||
|
|
||||||
from kafka import KafkaProducer
|
from kafka import KafkaProducer
|
||||||
from kafka.errors import NoBrokersAvailable, UnknownTopicOrPartitionError
|
from kafka.errors import NoBrokersAvailable, UnknownTopicOrPartitionError
|
||||||
|
from collections import OrderedDict
|
||||||
|
from parsedmarc.utils import human_timestamp_to_datetime
|
||||||
|
|
||||||
from parsedmarc import __version__
|
from parsedmarc import __version__
|
||||||
from parsedmarc.log import logger
|
from parsedmarc.log import logger
|
||||||
from parsedmarc.utils import human_timestamp_to_datetime
|
|
||||||
|
|
||||||
|
|
||||||
class KafkaError(RuntimeError):
|
class KafkaError(RuntimeError):
|
||||||
@@ -20,13 +18,7 @@ class KafkaError(RuntimeError):
|
|||||||
|
|
||||||
class KafkaClient(object):
|
class KafkaClient(object):
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self, kafka_hosts, ssl=False, username=None, password=None, ssl_context=None
|
||||||
kafka_hosts: list[str],
|
|
||||||
*,
|
|
||||||
ssl: Optional[bool] = False,
|
|
||||||
username: Optional[str] = None,
|
|
||||||
password: Optional[str] = None,
|
|
||||||
ssl_context: Optional[SSLContext] = None,
|
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Initializes the Kafka client
|
Initializes the Kafka client
|
||||||
@@ -36,7 +28,7 @@ class KafkaClient(object):
|
|||||||
ssl (bool): Use a SSL/TLS connection
|
ssl (bool): Use a SSL/TLS connection
|
||||||
username (str): An optional username
|
username (str): An optional username
|
||||||
password (str): An optional password
|
password (str): An optional password
|
||||||
ssl_context (SSLContext): SSL context options
|
ssl_context: SSL context options
|
||||||
|
|
||||||
Notes:
|
Notes:
|
||||||
``use_ssl=True`` is implied when a username or password are
|
``use_ssl=True`` is implied when a username or password are
|
||||||
@@ -46,7 +38,7 @@ class KafkaClient(object):
|
|||||||
``$ConnectionString``, and the password is the
|
``$ConnectionString``, and the password is the
|
||||||
Azure Event Hub connection string.
|
Azure Event Hub connection string.
|
||||||
"""
|
"""
|
||||||
config: dict[str, Any] = dict(
|
config = dict(
|
||||||
value_serializer=lambda v: json.dumps(v).encode("utf-8"),
|
value_serializer=lambda v: json.dumps(v).encode("utf-8"),
|
||||||
bootstrap_servers=kafka_hosts,
|
bootstrap_servers=kafka_hosts,
|
||||||
client_id="parsedmarc-{0}".format(__version__),
|
client_id="parsedmarc-{0}".format(__version__),
|
||||||
@@ -63,7 +55,7 @@ class KafkaClient(object):
|
|||||||
raise KafkaError("No Kafka brokers available")
|
raise KafkaError("No Kafka brokers available")
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def strip_metadata(report: dict[str, Any]):
|
def strip_metadata(report):
|
||||||
"""
|
"""
|
||||||
Duplicates org_name, org_email and report_id into JSON root
|
Duplicates org_name, org_email and report_id into JSON root
|
||||||
and removes report_metadata key to bring it more inline
|
and removes report_metadata key to bring it more inline
|
||||||
@@ -77,7 +69,7 @@ class KafkaClient(object):
|
|||||||
return report
|
return report
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def generate_date_range(report: dict[str, Any]):
|
def generate_daterange(report):
|
||||||
"""
|
"""
|
||||||
Creates a date_range timestamp with format YYYY-MM-DD-T-HH:MM:SS
|
Creates a date_range timestamp with format YYYY-MM-DD-T-HH:MM:SS
|
||||||
based on begin and end dates for easier parsing in Kibana.
|
based on begin and end dates for easier parsing in Kibana.
|
||||||
@@ -94,11 +86,7 @@ class KafkaClient(object):
|
|||||||
logger.debug("date_range is {}".format(date_range))
|
logger.debug("date_range is {}".format(date_range))
|
||||||
return date_range
|
return date_range
|
||||||
|
|
||||||
def save_aggregate_reports_to_kafka(
|
def save_aggregate_reports_to_kafka(self, aggregate_reports, aggregate_topic):
|
||||||
self,
|
|
||||||
aggregate_reports: Union[dict[str, Any], list[dict[str, Any]]],
|
|
||||||
aggregate_topic: str,
|
|
||||||
):
|
|
||||||
"""
|
"""
|
||||||
Saves aggregate DMARC reports to Kafka
|
Saves aggregate DMARC reports to Kafka
|
||||||
|
|
||||||
@@ -108,14 +96,16 @@ class KafkaClient(object):
|
|||||||
aggregate_topic (str): The name of the Kafka topic
|
aggregate_topic (str): The name of the Kafka topic
|
||||||
|
|
||||||
"""
|
"""
|
||||||
if isinstance(aggregate_reports, dict):
|
if isinstance(aggregate_reports, dict) or isinstance(
|
||||||
|
aggregate_reports, OrderedDict
|
||||||
|
):
|
||||||
aggregate_reports = [aggregate_reports]
|
aggregate_reports = [aggregate_reports]
|
||||||
|
|
||||||
if len(aggregate_reports) < 1:
|
if len(aggregate_reports) < 1:
|
||||||
return
|
return
|
||||||
|
|
||||||
for report in aggregate_reports:
|
for report in aggregate_reports:
|
||||||
report["date_range"] = self.generate_date_range(report)
|
report["date_range"] = self.generate_daterange(report)
|
||||||
report = self.strip_metadata(report)
|
report = self.strip_metadata(report)
|
||||||
|
|
||||||
for slice in report["records"]:
|
for slice in report["records"]:
|
||||||
@@ -139,11 +129,7 @@ class KafkaClient(object):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise KafkaError("Kafka error: {0}".format(e.__str__()))
|
raise KafkaError("Kafka error: {0}".format(e.__str__()))
|
||||||
|
|
||||||
def save_forensic_reports_to_kafka(
|
def save_forensic_reports_to_kafka(self, forensic_reports, forensic_topic):
|
||||||
self,
|
|
||||||
forensic_reports: Union[dict[str, Any], list[dict[str, Any]]],
|
|
||||||
forensic_topic: str,
|
|
||||||
):
|
|
||||||
"""
|
"""
|
||||||
Saves forensic DMARC reports to Kafka, sends individual
|
Saves forensic DMARC reports to Kafka, sends individual
|
||||||
records (slices) since Kafka requires messages to be <= 1MB
|
records (slices) since Kafka requires messages to be <= 1MB
|
||||||
@@ -173,11 +159,7 @@ class KafkaClient(object):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise KafkaError("Kafka error: {0}".format(e.__str__()))
|
raise KafkaError("Kafka error: {0}".format(e.__str__()))
|
||||||
|
|
||||||
def save_smtp_tls_reports_to_kafka(
|
def save_smtp_tls_reports_to_kafka(self, smtp_tls_reports, smtp_tls_topic):
|
||||||
self,
|
|
||||||
smtp_tls_reports: Union[list[dict[str, Any]], dict[str, Any]],
|
|
||||||
smtp_tls_topic: str,
|
|
||||||
):
|
|
||||||
"""
|
"""
|
||||||
Saves SMTP TLS reports to Kafka, sends individual
|
Saves SMTP TLS reports to Kafka, sends individual
|
||||||
records (slices) since Kafka requires messages to be <= 1MB
|
records (slices) since Kafka requires messages to be <= 1MB
|
||||||
|
|||||||
@@ -1,15 +1,9 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
from parsedmarc.log import logger
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
from typing import Any
|
|
||||||
|
|
||||||
from azure.core.exceptions import HttpResponseError
|
from azure.core.exceptions import HttpResponseError
|
||||||
from azure.identity import ClientSecretCredential
|
from azure.identity import ClientSecretCredential
|
||||||
from azure.monitor.ingestion import LogsIngestionClient
|
from azure.monitor.ingestion import LogsIngestionClient
|
||||||
|
|
||||||
from parsedmarc.log import logger
|
|
||||||
|
|
||||||
|
|
||||||
class LogAnalyticsException(Exception):
|
class LogAnalyticsException(Exception):
|
||||||
"""Raised when an Elasticsearch error occurs"""
|
"""Raised when an Elasticsearch error occurs"""
|
||||||
@@ -108,12 +102,7 @@ class LogAnalyticsClient(object):
|
|||||||
"Invalid configuration. " + "One or more required settings are missing."
|
"Invalid configuration. " + "One or more required settings are missing."
|
||||||
)
|
)
|
||||||
|
|
||||||
def publish_json(
|
def publish_json(self, results, logs_client: LogsIngestionClient, dcr_stream: str):
|
||||||
self,
|
|
||||||
results,
|
|
||||||
logs_client: LogsIngestionClient,
|
|
||||||
dcr_stream: str,
|
|
||||||
):
|
|
||||||
"""
|
"""
|
||||||
Background function to publish given
|
Background function to publish given
|
||||||
DMARC report to specific Data Collection Rule.
|
DMARC report to specific Data Collection Rule.
|
||||||
@@ -132,11 +121,7 @@ class LogAnalyticsClient(object):
|
|||||||
raise LogAnalyticsException("Upload failed: {error}".format(error=e))
|
raise LogAnalyticsException("Upload failed: {error}".format(error=e))
|
||||||
|
|
||||||
def publish_results(
|
def publish_results(
|
||||||
self,
|
self, results, save_aggregate: bool, save_forensic: bool, save_smtp_tls: bool
|
||||||
results: dict[str, Any],
|
|
||||||
save_aggregate: bool,
|
|
||||||
save_forensic: bool,
|
|
||||||
save_smtp_tls: bool,
|
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Function to publish DMARC and/or SMTP TLS reports to Log Analytics
|
Function to publish DMARC and/or SMTP TLS reports to Log Analytics
|
||||||
|
|||||||
@@ -1,7 +1,3 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
from base64 import urlsafe_b64decode
|
from base64 import urlsafe_b64decode
|
||||||
from functools import lru_cache
|
from functools import lru_cache
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
@@ -116,14 +112,14 @@ class GmailConnection(MailboxConnection):
|
|||||||
else:
|
else:
|
||||||
return [id for id in self._fetch_all_message_ids(reports_label_id)]
|
return [id for id in self._fetch_all_message_ids(reports_label_id)]
|
||||||
|
|
||||||
def fetch_message(self, message_id) -> str:
|
def fetch_message(self, message_id):
|
||||||
msg = (
|
msg = (
|
||||||
self.service.users()
|
self.service.users()
|
||||||
.messages()
|
.messages()
|
||||||
.get(userId="me", id=message_id, format="raw")
|
.get(userId="me", id=message_id, format="raw")
|
||||||
.execute()
|
.execute()
|
||||||
)
|
)
|
||||||
return urlsafe_b64decode(msg["raw"]).decode(errors="replace")
|
return urlsafe_b64decode(msg["raw"])
|
||||||
|
|
||||||
def delete_message(self, message_id: str):
|
def delete_message(self, message_id: str):
|
||||||
self.service.users().messages().delete(userId="me", id=message_id)
|
self.service.users().messages().delete(userId="me", id=message_id)
|
||||||
@@ -156,4 +152,3 @@ class GmailConnection(MailboxConnection):
|
|||||||
for label in labels:
|
for label in labels:
|
||||||
if label_name == label["id"] or label_name == label["name"]:
|
if label_name == label["id"] or label_name == label["name"]:
|
||||||
return label["id"]
|
return label["id"]
|
||||||
return ""
|
|
||||||
|
|||||||
@@ -1,12 +1,8 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from functools import lru_cache
|
from functools import lru_cache
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from time import sleep
|
from time import sleep
|
||||||
from typing import Any, List, Optional, Union
|
from typing import List, Optional
|
||||||
|
|
||||||
from azure.identity import (
|
from azure.identity import (
|
||||||
UsernamePasswordCredential,
|
UsernamePasswordCredential,
|
||||||
@@ -28,7 +24,7 @@ class AuthMethod(Enum):
|
|||||||
|
|
||||||
|
|
||||||
def _get_cache_args(token_path: Path, allow_unencrypted_storage):
|
def _get_cache_args(token_path: Path, allow_unencrypted_storage):
|
||||||
cache_args: dict[str, Any] = {
|
cache_args = {
|
||||||
"cache_persistence_options": TokenCachePersistenceOptions(
|
"cache_persistence_options": TokenCachePersistenceOptions(
|
||||||
name="parsedmarc", allow_unencrypted_storage=allow_unencrypted_storage
|
name="parsedmarc", allow_unencrypted_storage=allow_unencrypted_storage
|
||||||
)
|
)
|
||||||
@@ -151,9 +147,9 @@ class MSGraphConnection(MailboxConnection):
|
|||||||
else:
|
else:
|
||||||
logger.warning(f"Unknown response {resp.status_code} {resp.json()}")
|
logger.warning(f"Unknown response {resp.status_code} {resp.json()}")
|
||||||
|
|
||||||
def fetch_messages(self, reports_folder: str, **kwargs) -> List[str]:
|
def fetch_messages(self, folder_name: str, **kwargs) -> List[str]:
|
||||||
"""Returns a list of message UIDs in the specified folder"""
|
"""Returns a list of message UIDs in the specified folder"""
|
||||||
folder_id = self._find_folder_id_from_folder_path(reports_folder)
|
folder_id = self._find_folder_id_from_folder_path(folder_name)
|
||||||
url = f"/users/{self.mailbox_name}/mailFolders/{folder_id}/messages"
|
url = f"/users/{self.mailbox_name}/mailFolders/{folder_id}/messages"
|
||||||
since = kwargs.get("since")
|
since = kwargs.get("since")
|
||||||
if not since:
|
if not since:
|
||||||
@@ -166,7 +162,7 @@ class MSGraphConnection(MailboxConnection):
|
|||||||
|
|
||||||
def _get_all_messages(self, url, batch_size, since):
|
def _get_all_messages(self, url, batch_size, since):
|
||||||
messages: list
|
messages: list
|
||||||
params: dict[str, Union[str, int]] = {"$select": "id"}
|
params = {"$select": "id"}
|
||||||
if since:
|
if since:
|
||||||
params["$filter"] = f"receivedDateTime ge {since}"
|
params["$filter"] = f"receivedDateTime ge {since}"
|
||||||
if batch_size and batch_size > 0:
|
if batch_size and batch_size > 0:
|
||||||
|
|||||||
@@ -1,9 +1,3 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
from typing import cast
|
|
||||||
|
|
||||||
from time import sleep
|
from time import sleep
|
||||||
|
|
||||||
from imapclient.exceptions import IMAPClientError
|
from imapclient.exceptions import IMAPClientError
|
||||||
@@ -17,14 +11,14 @@ from parsedmarc.mail.mailbox_connection import MailboxConnection
|
|||||||
class IMAPConnection(MailboxConnection):
|
class IMAPConnection(MailboxConnection):
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
host: str,
|
host=None,
|
||||||
user: str,
|
user=None,
|
||||||
password: str,
|
password=None,
|
||||||
port: int = 993,
|
port=None,
|
||||||
ssl: bool = True,
|
ssl=True,
|
||||||
verify: bool = True,
|
verify=True,
|
||||||
timeout: int = 30,
|
timeout=30,
|
||||||
max_retries: int = 4,
|
max_retries=4,
|
||||||
):
|
):
|
||||||
self._username = user
|
self._username = user
|
||||||
self._password = password
|
self._password = password
|
||||||
@@ -46,18 +40,18 @@ class IMAPConnection(MailboxConnection):
|
|||||||
def fetch_messages(self, reports_folder: str, **kwargs):
|
def fetch_messages(self, reports_folder: str, **kwargs):
|
||||||
self._client.select_folder(reports_folder)
|
self._client.select_folder(reports_folder)
|
||||||
since = kwargs.get("since")
|
since = kwargs.get("since")
|
||||||
if since is not None:
|
if since:
|
||||||
return self._client.search(f"SINCE {since}")
|
return self._client.search(["SINCE", since])
|
||||||
else:
|
else:
|
||||||
return self._client.search()
|
return self._client.search()
|
||||||
|
|
||||||
def fetch_message(self, message_id: int):
|
def fetch_message(self, message_id):
|
||||||
return cast(str, self._client.fetch_message(message_id, parse=False))
|
return self._client.fetch_message(message_id, parse=False)
|
||||||
|
|
||||||
def delete_message(self, message_id: int):
|
def delete_message(self, message_id: str):
|
||||||
self._client.delete_messages([message_id])
|
self._client.delete_messages([message_id])
|
||||||
|
|
||||||
def move_message(self, message_id: int, folder_name: str):
|
def move_message(self, message_id: str, folder_name: str):
|
||||||
self._client.move_messages([message_id], folder_name)
|
self._client.move_messages([message_id], folder_name)
|
||||||
|
|
||||||
def keepalive(self):
|
def keepalive(self):
|
||||||
|
|||||||
@@ -1,8 +1,5 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
from abc import ABC
|
from abc import ABC
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
|
||||||
class MailboxConnection(ABC):
|
class MailboxConnection(ABC):
|
||||||
@@ -13,16 +10,16 @@ class MailboxConnection(ABC):
|
|||||||
def create_folder(self, folder_name: str):
|
def create_folder(self, folder_name: str):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def fetch_messages(self, reports_folder: str, **kwargs):
|
def fetch_messages(self, reports_folder: str, **kwargs) -> List[str]:
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def fetch_message(self, message_id) -> str:
|
def fetch_message(self, message_id) -> str:
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def delete_message(self, message_id):
|
def delete_message(self, message_id: str):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def move_message(self, message_id, folder_name: str):
|
def move_message(self, message_id: str, folder_name: str):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def keepalive(self):
|
def keepalive(self):
|
||||||
|
|||||||
@@ -1,21 +1,16 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import mailbox
|
|
||||||
import os
|
|
||||||
from time import sleep
|
from time import sleep
|
||||||
from typing import Dict
|
|
||||||
|
|
||||||
from parsedmarc.log import logger
|
from parsedmarc.log import logger
|
||||||
from parsedmarc.mail.mailbox_connection import MailboxConnection
|
from parsedmarc.mail.mailbox_connection import MailboxConnection
|
||||||
|
import mailbox
|
||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
class MaildirConnection(MailboxConnection):
|
class MaildirConnection(MailboxConnection):
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
maildir_path: str,
|
maildir_path=None,
|
||||||
maildir_create: bool = False,
|
maildir_create=False,
|
||||||
):
|
):
|
||||||
self._maildir_path = maildir_path
|
self._maildir_path = maildir_path
|
||||||
self._maildir_create = maildir_create
|
self._maildir_create = maildir_create
|
||||||
@@ -32,31 +27,27 @@ class MaildirConnection(MailboxConnection):
|
|||||||
)
|
)
|
||||||
raise Exception(ex)
|
raise Exception(ex)
|
||||||
self._client = mailbox.Maildir(maildir_path, create=maildir_create)
|
self._client = mailbox.Maildir(maildir_path, create=maildir_create)
|
||||||
self._subfolder_client: Dict[str, mailbox.Maildir] = {}
|
self._subfolder_client = {}
|
||||||
|
|
||||||
def create_folder(self, folder_name: str):
|
def create_folder(self, folder_name: str):
|
||||||
self._subfolder_client[folder_name] = self._client.add_folder(folder_name)
|
self._subfolder_client[folder_name] = self._client.add_folder(folder_name)
|
||||||
|
self._client.add_folder(folder_name)
|
||||||
|
|
||||||
def fetch_messages(self, reports_folder: str, **kwargs):
|
def fetch_messages(self, reports_folder: str, **kwargs):
|
||||||
return self._client.keys()
|
return self._client.keys()
|
||||||
|
|
||||||
def fetch_message(self, message_id: str) -> str:
|
def fetch_message(self, message_id):
|
||||||
msg = self._client.get(message_id)
|
return self._client.get(message_id).as_string()
|
||||||
if msg is not None:
|
|
||||||
msg = msg.as_string()
|
|
||||||
if msg is not None:
|
|
||||||
return msg
|
|
||||||
return ""
|
|
||||||
|
|
||||||
def delete_message(self, message_id: str):
|
def delete_message(self, message_id: str):
|
||||||
self._client.remove(message_id)
|
self._client.remove(message_id)
|
||||||
|
|
||||||
def move_message(self, message_id: str, folder_name: str):
|
def move_message(self, message_id: str, folder_name: str):
|
||||||
message_data = self._client.get(message_id)
|
message_data = self._client.get(message_id)
|
||||||
if message_data is None:
|
if folder_name not in self._subfolder_client.keys():
|
||||||
return
|
self._subfolder_client = mailbox.Maildir(
|
||||||
if folder_name not in self._subfolder_client:
|
os.join(self.maildir_path, folder_name), create=self.maildir_create
|
||||||
self._subfolder_client[folder_name] = self._client.add_folder(folder_name)
|
)
|
||||||
self._subfolder_client[folder_name].add(message_data)
|
self._subfolder_client[folder_name].add(message_data)
|
||||||
self._client.remove(message_id)
|
self._client.remove(message_id)
|
||||||
|
|
||||||
|
|||||||
@@ -1,29 +1,27 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
from __future__ import annotations
|
from collections import OrderedDict
|
||||||
|
|
||||||
from typing import Any, Optional, Union
|
|
||||||
|
|
||||||
from opensearchpy import (
|
from opensearchpy import (
|
||||||
Boolean,
|
Q,
|
||||||
Date,
|
connections,
|
||||||
|
Object,
|
||||||
Document,
|
Document,
|
||||||
Index,
|
Index,
|
||||||
|
Nested,
|
||||||
InnerDoc,
|
InnerDoc,
|
||||||
Integer,
|
Integer,
|
||||||
Ip,
|
|
||||||
Nested,
|
|
||||||
Object,
|
|
||||||
Q,
|
|
||||||
Search,
|
|
||||||
Text,
|
Text,
|
||||||
connections,
|
Boolean,
|
||||||
|
Ip,
|
||||||
|
Date,
|
||||||
|
Search,
|
||||||
)
|
)
|
||||||
from opensearchpy.helpers import reindex
|
from opensearchpy.helpers import reindex
|
||||||
|
|
||||||
from parsedmarc import InvalidForensicReport
|
|
||||||
from parsedmarc.log import logger
|
from parsedmarc.log import logger
|
||||||
from parsedmarc.utils import human_timestamp_to_datetime
|
from parsedmarc.utils import human_timestamp_to_datetime
|
||||||
|
from parsedmarc import InvalidForensicReport
|
||||||
|
|
||||||
|
|
||||||
class OpenSearchError(Exception):
|
class OpenSearchError(Exception):
|
||||||
@@ -69,8 +67,6 @@ class _AggregateReportDoc(Document):
|
|||||||
date_range = Date()
|
date_range = Date()
|
||||||
date_begin = Date()
|
date_begin = Date()
|
||||||
date_end = Date()
|
date_end = Date()
|
||||||
normalized_timespan = Boolean()
|
|
||||||
original_timespan_seconds = Integer
|
|
||||||
errors = Text()
|
errors = Text()
|
||||||
published_policy = Object(_PublishedPolicy)
|
published_policy = Object(_PublishedPolicy)
|
||||||
source_ip_address = Ip()
|
source_ip_address = Ip()
|
||||||
@@ -91,18 +87,18 @@ class _AggregateReportDoc(Document):
|
|||||||
dkim_results = Nested(_DKIMResult)
|
dkim_results = Nested(_DKIMResult)
|
||||||
spf_results = Nested(_SPFResult)
|
spf_results = Nested(_SPFResult)
|
||||||
|
|
||||||
def add_policy_override(self, type_: str, comment: str):
|
def add_policy_override(self, type_, comment):
|
||||||
self.policy_overrides.append(_PolicyOverride(type=type_, comment=comment))
|
self.policy_overrides.append(_PolicyOverride(type=type_, comment=comment))
|
||||||
|
|
||||||
def add_dkim_result(self, domain: str, selector: str, result: _DKIMResult):
|
def add_dkim_result(self, domain, selector, result):
|
||||||
self.dkim_results.append(
|
self.dkim_results.append(
|
||||||
_DKIMResult(domain=domain, selector=selector, result=result)
|
_DKIMResult(domain=domain, selector=selector, result=result)
|
||||||
)
|
)
|
||||||
|
|
||||||
def add_spf_result(self, domain: str, scope: str, result: _SPFResult):
|
def add_spf_result(self, domain, scope, result):
|
||||||
self.spf_results.append(_SPFResult(domain=domain, scope=scope, result=result))
|
self.spf_results.append(_SPFResult(domain=domain, scope=scope, result=result))
|
||||||
|
|
||||||
def save(self, **kwargs): # pyright: ignore[reportIncompatibleMethodOverride]
|
def save(self, **kwargs):
|
||||||
self.passed_dmarc = False
|
self.passed_dmarc = False
|
||||||
self.passed_dmarc = self.spf_aligned or self.dkim_aligned
|
self.passed_dmarc = self.spf_aligned or self.dkim_aligned
|
||||||
|
|
||||||
@@ -135,21 +131,21 @@ class _ForensicSampleDoc(InnerDoc):
|
|||||||
body = Text()
|
body = Text()
|
||||||
attachments = Nested(_EmailAttachmentDoc)
|
attachments = Nested(_EmailAttachmentDoc)
|
||||||
|
|
||||||
def add_to(self, display_name: str, address: str):
|
def add_to(self, display_name, address):
|
||||||
self.to.append(_EmailAddressDoc(display_name=display_name, address=address))
|
self.to.append(_EmailAddressDoc(display_name=display_name, address=address))
|
||||||
|
|
||||||
def add_reply_to(self, display_name: str, address: str):
|
def add_reply_to(self, display_name, address):
|
||||||
self.reply_to.append(
|
self.reply_to.append(
|
||||||
_EmailAddressDoc(display_name=display_name, address=address)
|
_EmailAddressDoc(display_name=display_name, address=address)
|
||||||
)
|
)
|
||||||
|
|
||||||
def add_cc(self, display_name: str, address: str):
|
def add_cc(self, display_name, address):
|
||||||
self.cc.append(_EmailAddressDoc(display_name=display_name, address=address))
|
self.cc.append(_EmailAddressDoc(display_name=display_name, address=address))
|
||||||
|
|
||||||
def add_bcc(self, display_name: str, address: str):
|
def add_bcc(self, display_name, address):
|
||||||
self.bcc.append(_EmailAddressDoc(display_name=display_name, address=address))
|
self.bcc.append(_EmailAddressDoc(display_name=display_name, address=address))
|
||||||
|
|
||||||
def add_attachment(self, filename: str, content_type: str, sha256: str):
|
def add_attachment(self, filename, content_type, sha256):
|
||||||
self.attachments.append(
|
self.attachments.append(
|
||||||
_EmailAttachmentDoc(
|
_EmailAttachmentDoc(
|
||||||
filename=filename, content_type=content_type, sha256=sha256
|
filename=filename, content_type=content_type, sha256=sha256
|
||||||
@@ -201,15 +197,15 @@ class _SMTPTLSPolicyDoc(InnerDoc):
|
|||||||
|
|
||||||
def add_failure_details(
|
def add_failure_details(
|
||||||
self,
|
self,
|
||||||
result_type: Optional[str] = None,
|
result_type,
|
||||||
ip_address: Optional[str] = None,
|
ip_address,
|
||||||
receiving_ip: Optional[str] = None,
|
receiving_ip,
|
||||||
receiving_mx_helo: Optional[str] = None,
|
receiving_mx_helo,
|
||||||
failed_session_count: Optional[int] = None,
|
failed_session_count,
|
||||||
sending_mta_ip: Optional[str] = None,
|
sending_mta_ip=None,
|
||||||
receiving_mx_hostname: Optional[str] = None,
|
receiving_mx_hostname=None,
|
||||||
additional_information_uri: Optional[str] = None,
|
additional_information_uri=None,
|
||||||
failure_reason_code: Union[str, int, None] = None,
|
failure_reason_code=None,
|
||||||
):
|
):
|
||||||
_details = _SMTPTLSFailureDetailsDoc(
|
_details = _SMTPTLSFailureDetailsDoc(
|
||||||
result_type=result_type,
|
result_type=result_type,
|
||||||
@@ -239,14 +235,13 @@ class _SMTPTLSReportDoc(Document):
|
|||||||
|
|
||||||
def add_policy(
|
def add_policy(
|
||||||
self,
|
self,
|
||||||
policy_type: str,
|
policy_type,
|
||||||
policy_domain: str,
|
policy_domain,
|
||||||
successful_session_count: int,
|
successful_session_count,
|
||||||
failed_session_count: int,
|
failed_session_count,
|
||||||
*,
|
policy_string=None,
|
||||||
policy_string: Optional[str] = None,
|
mx_host_patterns=None,
|
||||||
mx_host_patterns: Optional[list[str]] = None,
|
failure_details=None,
|
||||||
failure_details: Optional[str] = None,
|
|
||||||
):
|
):
|
||||||
self.policies.append(
|
self.policies.append(
|
||||||
policy_type=policy_type,
|
policy_type=policy_type,
|
||||||
@@ -264,25 +259,24 @@ class AlreadySaved(ValueError):
|
|||||||
|
|
||||||
|
|
||||||
def set_hosts(
|
def set_hosts(
|
||||||
hosts: Union[str, list[str]],
|
hosts,
|
||||||
*,
|
use_ssl=False,
|
||||||
use_ssl: Optional[bool] = False,
|
ssl_cert_path=None,
|
||||||
ssl_cert_path: Optional[str] = None,
|
username=None,
|
||||||
username: Optional[str] = None,
|
password=None,
|
||||||
password: Optional[str] = None,
|
apiKey=None,
|
||||||
api_key: Optional[str] = None,
|
timeout=60.0,
|
||||||
timeout: Optional[float] = 60.0,
|
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Sets the OpenSearch hosts to use
|
Sets the OpenSearch hosts to use
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
hosts (str|list[str]): A single hostname or URL, or list of hostnames or URLs
|
hosts (str|list): A hostname or URL, or list of hostnames or URLs
|
||||||
use_ssl (bool): Use an HTTPS connection to the server
|
use_ssl (bool): Use an HTTPS connection to the server
|
||||||
ssl_cert_path (str): Path to the certificate chain
|
ssl_cert_path (str): Path to the certificate chain
|
||||||
username (str): The username to use for authentication
|
username (str): The username to use for authentication
|
||||||
password (str): The password to use for authentication
|
password (str): The password to use for authentication
|
||||||
api_key (str): The Base64 encoded API key to use for authentication
|
apiKey (str): The Base64 encoded API key to use for authentication
|
||||||
timeout (float): Timeout in seconds
|
timeout (float): Timeout in seconds
|
||||||
"""
|
"""
|
||||||
if not isinstance(hosts, list):
|
if not isinstance(hosts, list):
|
||||||
@@ -295,14 +289,14 @@ def set_hosts(
|
|||||||
conn_params["ca_certs"] = ssl_cert_path
|
conn_params["ca_certs"] = ssl_cert_path
|
||||||
else:
|
else:
|
||||||
conn_params["verify_certs"] = False
|
conn_params["verify_certs"] = False
|
||||||
if username and password:
|
if username:
|
||||||
conn_params["http_auth"] = username + ":" + password
|
conn_params["http_auth"] = username + ":" + password
|
||||||
if api_key:
|
if apiKey:
|
||||||
conn_params["api_key"] = api_key
|
conn_params["api_key"] = apiKey
|
||||||
connections.create_connection(**conn_params)
|
connections.create_connection(**conn_params)
|
||||||
|
|
||||||
|
|
||||||
def create_indexes(names: list[str], settings: Optional[dict[str, Any]] = None):
|
def create_indexes(names, settings=None):
|
||||||
"""
|
"""
|
||||||
Create OpenSearch indexes
|
Create OpenSearch indexes
|
||||||
|
|
||||||
@@ -325,10 +319,7 @@ def create_indexes(names: list[str], settings: Optional[dict[str, Any]] = None):
|
|||||||
raise OpenSearchError("OpenSearch error: {0}".format(e.__str__()))
|
raise OpenSearchError("OpenSearch error: {0}".format(e.__str__()))
|
||||||
|
|
||||||
|
|
||||||
def migrate_indexes(
|
def migrate_indexes(aggregate_indexes=None, forensic_indexes=None):
|
||||||
aggregate_indexes: Optional[list[str]] = None,
|
|
||||||
forensic_indexes: Optional[list[str]] = None,
|
|
||||||
):
|
|
||||||
"""
|
"""
|
||||||
Updates index mappings
|
Updates index mappings
|
||||||
|
|
||||||
@@ -375,18 +366,18 @@ def migrate_indexes(
|
|||||||
|
|
||||||
|
|
||||||
def save_aggregate_report_to_opensearch(
|
def save_aggregate_report_to_opensearch(
|
||||||
aggregate_report: dict[str, Any],
|
aggregate_report,
|
||||||
index_suffix: Optional[str] = None,
|
index_suffix=None,
|
||||||
index_prefix: Optional[str] = None,
|
index_prefix=None,
|
||||||
monthly_indexes: bool = False,
|
monthly_indexes=False,
|
||||||
number_of_shards: int = 1,
|
number_of_shards=1,
|
||||||
number_of_replicas: int = 0,
|
number_of_replicas=0,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Saves a parsed DMARC aggregate report to OpenSearch
|
Saves a parsed DMARC aggregate report to OpenSearch
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
aggregate_report (dict): A parsed forensic report
|
aggregate_report (OrderedDict): A parsed forensic report
|
||||||
index_suffix (str): The suffix of the name of the index to save to
|
index_suffix (str): The suffix of the name of the index to save to
|
||||||
index_prefix (str): The prefix of the name of the index to save to
|
index_prefix (str): The prefix of the name of the index to save to
|
||||||
monthly_indexes (bool): Use monthly indexes instead of daily indexes
|
monthly_indexes (bool): Use monthly indexes instead of daily indexes
|
||||||
@@ -404,11 +395,15 @@ def save_aggregate_report_to_opensearch(
|
|||||||
domain = aggregate_report["policy_published"]["domain"]
|
domain = aggregate_report["policy_published"]["domain"]
|
||||||
begin_date = human_timestamp_to_datetime(metadata["begin_date"], to_utc=True)
|
begin_date = human_timestamp_to_datetime(metadata["begin_date"], to_utc=True)
|
||||||
end_date = human_timestamp_to_datetime(metadata["end_date"], to_utc=True)
|
end_date = human_timestamp_to_datetime(metadata["end_date"], to_utc=True)
|
||||||
|
begin_date_human = begin_date.strftime("%Y-%m-%d %H:%M:%SZ")
|
||||||
|
end_date_human = end_date.strftime("%Y-%m-%d %H:%M:%SZ")
|
||||||
if monthly_indexes:
|
if monthly_indexes:
|
||||||
index_date = begin_date.strftime("%Y-%m")
|
index_date = begin_date.strftime("%Y-%m")
|
||||||
else:
|
else:
|
||||||
index_date = begin_date.strftime("%Y-%m-%d")
|
index_date = begin_date.strftime("%Y-%m-%d")
|
||||||
|
aggregate_report["begin_date"] = begin_date
|
||||||
|
aggregate_report["end_date"] = end_date
|
||||||
|
date_range = [aggregate_report["begin_date"], aggregate_report["end_date"]]
|
||||||
|
|
||||||
org_name_query = Q(dict(match_phrase=dict(org_name=org_name)))
|
org_name_query = Q(dict(match_phrase=dict(org_name=org_name)))
|
||||||
report_id_query = Q(dict(match_phrase=dict(report_id=report_id)))
|
report_id_query = Q(dict(match_phrase=dict(report_id=report_id)))
|
||||||
@@ -426,8 +421,6 @@ def save_aggregate_report_to_opensearch(
|
|||||||
query = org_name_query & report_id_query & domain_query
|
query = org_name_query & report_id_query & domain_query
|
||||||
query = query & begin_date_query & end_date_query
|
query = query & begin_date_query & end_date_query
|
||||||
search.query = query
|
search.query = query
|
||||||
begin_date_human = begin_date.strftime("%Y-%m-%d %H:%M:%SZ")
|
|
||||||
end_date_human = end_date.strftime("%Y-%m-%d %H:%M:%SZ")
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
existing = search.execute()
|
existing = search.execute()
|
||||||
@@ -457,17 +450,6 @@ def save_aggregate_report_to_opensearch(
|
|||||||
)
|
)
|
||||||
|
|
||||||
for record in aggregate_report["records"]:
|
for record in aggregate_report["records"]:
|
||||||
begin_date = human_timestamp_to_datetime(record["interval_begin"], to_utc=True)
|
|
||||||
end_date = human_timestamp_to_datetime(record["interval_end"], to_utc=True)
|
|
||||||
normalized_timespan = record["normalized_timespan"]
|
|
||||||
|
|
||||||
if monthly_indexes:
|
|
||||||
index_date = begin_date.strftime("%Y-%m")
|
|
||||||
else:
|
|
||||||
index_date = begin_date.strftime("%Y-%m-%d")
|
|
||||||
aggregate_report["begin_date"] = begin_date
|
|
||||||
aggregate_report["end_date"] = end_date
|
|
||||||
date_range = [aggregate_report["begin_date"], aggregate_report["end_date"]]
|
|
||||||
agg_doc = _AggregateReportDoc(
|
agg_doc = _AggregateReportDoc(
|
||||||
xml_schema=aggregate_report["xml_schema"],
|
xml_schema=aggregate_report["xml_schema"],
|
||||||
org_name=metadata["org_name"],
|
org_name=metadata["org_name"],
|
||||||
@@ -475,9 +457,8 @@ def save_aggregate_report_to_opensearch(
|
|||||||
org_extra_contact_info=metadata["org_extra_contact_info"],
|
org_extra_contact_info=metadata["org_extra_contact_info"],
|
||||||
report_id=metadata["report_id"],
|
report_id=metadata["report_id"],
|
||||||
date_range=date_range,
|
date_range=date_range,
|
||||||
date_begin=begin_date,
|
date_begin=aggregate_report["begin_date"],
|
||||||
date_end=end_date,
|
date_end=aggregate_report["end_date"],
|
||||||
normalized_timespan=normalized_timespan,
|
|
||||||
errors=metadata["errors"],
|
errors=metadata["errors"],
|
||||||
published_policy=published_policy,
|
published_policy=published_policy,
|
||||||
source_ip_address=record["source"]["ip_address"],
|
source_ip_address=record["source"]["ip_address"],
|
||||||
@@ -536,18 +517,18 @@ def save_aggregate_report_to_opensearch(
|
|||||||
|
|
||||||
|
|
||||||
def save_forensic_report_to_opensearch(
|
def save_forensic_report_to_opensearch(
|
||||||
forensic_report: dict[str, Any],
|
forensic_report,
|
||||||
index_suffix: Optional[str] = None,
|
index_suffix=None,
|
||||||
index_prefix: Optional[str] = None,
|
index_prefix=None,
|
||||||
monthly_indexes: bool = False,
|
monthly_indexes=False,
|
||||||
number_of_shards: int = 1,
|
number_of_shards=1,
|
||||||
number_of_replicas: int = 0,
|
number_of_replicas=0,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Saves a parsed DMARC forensic report to OpenSearch
|
Saves a parsed DMARC forensic report to OpenSearch
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
forensic_report (dict): A parsed forensic report
|
forensic_report (OrderedDict): A parsed forensic report
|
||||||
index_suffix (str): The suffix of the name of the index to save to
|
index_suffix (str): The suffix of the name of the index to save to
|
||||||
index_prefix (str): The prefix of the name of the index to save to
|
index_prefix (str): The prefix of the name of the index to save to
|
||||||
monthly_indexes (bool): Use monthly indexes instead of daily
|
monthly_indexes (bool): Use monthly indexes instead of daily
|
||||||
@@ -567,12 +548,12 @@ def save_forensic_report_to_opensearch(
|
|||||||
sample_date = forensic_report["parsed_sample"]["date"]
|
sample_date = forensic_report["parsed_sample"]["date"]
|
||||||
sample_date = human_timestamp_to_datetime(sample_date)
|
sample_date = human_timestamp_to_datetime(sample_date)
|
||||||
original_headers = forensic_report["parsed_sample"]["headers"]
|
original_headers = forensic_report["parsed_sample"]["headers"]
|
||||||
headers: dict[str, Any] = {}
|
headers = OrderedDict()
|
||||||
for original_header in original_headers:
|
for original_header in original_headers:
|
||||||
headers[original_header.lower()] = original_headers[original_header]
|
headers[original_header.lower()] = original_headers[original_header]
|
||||||
|
|
||||||
arrival_date = human_timestamp_to_datetime(forensic_report["arrival_date_utc"])
|
arrival_date_human = forensic_report["arrival_date_utc"]
|
||||||
arrival_date_epoch_milliseconds = int(arrival_date.timestamp() * 1000)
|
arrival_date = human_timestamp_to_datetime(arrival_date_human)
|
||||||
|
|
||||||
if index_suffix is not None:
|
if index_suffix is not None:
|
||||||
search_index = "dmarc_forensic_{0}*".format(index_suffix)
|
search_index = "dmarc_forensic_{0}*".format(index_suffix)
|
||||||
@@ -581,35 +562,20 @@ def save_forensic_report_to_opensearch(
|
|||||||
if index_prefix is not None:
|
if index_prefix is not None:
|
||||||
search_index = "{0}{1}".format(index_prefix, search_index)
|
search_index = "{0}{1}".format(index_prefix, search_index)
|
||||||
search = Search(index=search_index)
|
search = Search(index=search_index)
|
||||||
q = Q(dict(match=dict(arrival_date=arrival_date_epoch_milliseconds)))
|
arrival_query = {"match": {"arrival_date": arrival_date}}
|
||||||
|
q = Q(arrival_query)
|
||||||
|
|
||||||
from_ = None
|
from_ = None
|
||||||
to_ = None
|
to_ = None
|
||||||
subject = None
|
subject = None
|
||||||
if "from" in headers:
|
if "from" in headers:
|
||||||
# We convert the FROM header from a string list to a flat string.
|
from_ = headers["from"]
|
||||||
headers["from"] = headers["from"][0]
|
from_query = {"match_phrase": {"sample.headers.from": from_}}
|
||||||
if headers["from"][0] == "":
|
q = q & Q(from_query)
|
||||||
headers["from"] = headers["from"][1]
|
|
||||||
else:
|
|
||||||
headers["from"] = " <".join(headers["from"]) + ">"
|
|
||||||
|
|
||||||
from_ = dict()
|
|
||||||
from_["sample.headers.from"] = headers["from"]
|
|
||||||
from_query = Q(dict(match_phrase=from_))
|
|
||||||
q = q & from_query
|
|
||||||
if "to" in headers:
|
if "to" in headers:
|
||||||
# We convert the TO header from a string list to a flat string.
|
to_ = headers["to"]
|
||||||
headers["to"] = headers["to"][0]
|
to_query = {"match_phrase": {"sample.headers.to": to_}}
|
||||||
if headers["to"][0] == "":
|
q = q & Q(to_query)
|
||||||
headers["to"] = headers["to"][1]
|
|
||||||
else:
|
|
||||||
headers["to"] = " <".join(headers["to"]) + ">"
|
|
||||||
|
|
||||||
to_ = dict()
|
|
||||||
to_["sample.headers.to"] = headers["to"]
|
|
||||||
to_query = Q(dict(match_phrase=to_))
|
|
||||||
q = q & to_query
|
|
||||||
if "subject" in headers:
|
if "subject" in headers:
|
||||||
subject = headers["subject"]
|
subject = headers["subject"]
|
||||||
subject_query = {"match_phrase": {"sample.headers.subject": subject}}
|
subject_query = {"match_phrase": {"sample.headers.subject": subject}}
|
||||||
@@ -623,9 +589,7 @@ def save_forensic_report_to_opensearch(
|
|||||||
"A forensic sample to {0} from {1} "
|
"A forensic sample to {0} from {1} "
|
||||||
"with a subject of {2} and arrival date of {3} "
|
"with a subject of {2} and arrival date of {3} "
|
||||||
"already exists in "
|
"already exists in "
|
||||||
"OpenSearch".format(
|
"OpenSearch".format(to_, from_, subject, arrival_date_human)
|
||||||
to_, from_, subject, forensic_report["arrival_date_utc"]
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
|
|
||||||
parsed_sample = forensic_report["parsed_sample"]
|
parsed_sample = forensic_report["parsed_sample"]
|
||||||
@@ -661,7 +625,7 @@ def save_forensic_report_to_opensearch(
|
|||||||
user_agent=forensic_report["user_agent"],
|
user_agent=forensic_report["user_agent"],
|
||||||
version=forensic_report["version"],
|
version=forensic_report["version"],
|
||||||
original_mail_from=forensic_report["original_mail_from"],
|
original_mail_from=forensic_report["original_mail_from"],
|
||||||
arrival_date=arrival_date_epoch_milliseconds,
|
arrival_date=arrival_date,
|
||||||
domain=forensic_report["reported_domain"],
|
domain=forensic_report["reported_domain"],
|
||||||
original_envelope_id=forensic_report["original_envelope_id"],
|
original_envelope_id=forensic_report["original_envelope_id"],
|
||||||
authentication_results=forensic_report["authentication_results"],
|
authentication_results=forensic_report["authentication_results"],
|
||||||
@@ -703,18 +667,18 @@ def save_forensic_report_to_opensearch(
|
|||||||
|
|
||||||
|
|
||||||
def save_smtp_tls_report_to_opensearch(
|
def save_smtp_tls_report_to_opensearch(
|
||||||
report: dict[str, Any],
|
report,
|
||||||
index_suffix: Optional[str] = None,
|
index_suffix=None,
|
||||||
index_prefix: Optional[str] = None,
|
index_prefix=None,
|
||||||
monthly_indexes: bool = False,
|
monthly_indexes=False,
|
||||||
number_of_shards: int = 1,
|
number_of_shards=1,
|
||||||
number_of_replicas: int = 0,
|
number_of_replicas=0,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Saves a parsed SMTP TLS report to OpenSearch
|
Saves a parsed SMTP TLS report to OpenSearch
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
report (dict): A parsed SMTP TLS report
|
report (OrderedDict): A parsed SMTP TLS report
|
||||||
index_suffix (str): The suffix of the name of the index to save to
|
index_suffix (str): The suffix of the name of the index to save to
|
||||||
index_prefix (str): The prefix of the name of the index to save to
|
index_prefix (str): The prefix of the name of the index to save to
|
||||||
monthly_indexes (bool): Use monthly indexes instead of daily indexes
|
monthly_indexes (bool): Use monthly indexes instead of daily indexes
|
||||||
@@ -724,7 +688,7 @@ def save_smtp_tls_report_to_opensearch(
|
|||||||
Raises:
|
Raises:
|
||||||
AlreadySaved
|
AlreadySaved
|
||||||
"""
|
"""
|
||||||
logger.info("Saving SMTP TLS report to OpenSearch")
|
logger.info("Saving aggregate report to OpenSearch")
|
||||||
org_name = report["organization_name"]
|
org_name = report["organization_name"]
|
||||||
report_id = report["report_id"]
|
report_id = report["report_id"]
|
||||||
begin_date = human_timestamp_to_datetime(report["begin_date"], to_utc=True)
|
begin_date = human_timestamp_to_datetime(report["begin_date"], to_utc=True)
|
||||||
@@ -800,7 +764,7 @@ def save_smtp_tls_report_to_opensearch(
|
|||||||
policy_doc = _SMTPTLSPolicyDoc(
|
policy_doc = _SMTPTLSPolicyDoc(
|
||||||
policy_domain=policy["policy_domain"],
|
policy_domain=policy["policy_domain"],
|
||||||
policy_type=policy["policy_type"],
|
policy_type=policy["policy_type"],
|
||||||
successful_session_count=policy["successful_session_count"],
|
succesful_session_count=policy["successful_session_count"],
|
||||||
failed_session_count=policy["failed_session_count"],
|
failed_session_count=policy["failed_session_count"],
|
||||||
policy_string=policy_strings,
|
policy_string=policy_strings,
|
||||||
mx_host_patterns=mx_host_patterns,
|
mx_host_patterns=mx_host_patterns,
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
# About
|
# About
|
||||||
|
|
||||||
`dbip-country-lite.mmdb` is provided by [dbip][dbip] under a
|
`dbip-country-lite.mmdb` is provided by [dbip][dbip] under a
|
||||||
[Creative Commons Attribution 4.0 International License][cc].
|
[ Creative Commons Attribution 4.0 International License][cc].
|
||||||
|
|
||||||
[dbip]: https://db-ip.com/db/download/ip-to-country-lite
|
[dbip]: https://db-ip.com/db/lite.php
|
||||||
[cc]: http://creativecommons.org/licenses/by/4.0/
|
[cc]: http://creativecommons.org/licenses/by/4.0/
|
||||||
|
|||||||
Binary file not shown.
@@ -3,8 +3,6 @@
|
|||||||
A mapping is meant to make it easier to identify who or what a sending source is. Please consider contributing
|
A mapping is meant to make it easier to identify who or what a sending source is. Please consider contributing
|
||||||
additional mappings in a GitHub Pull Request.
|
additional mappings in a GitHub Pull Request.
|
||||||
|
|
||||||
Do not open these CSV files in Excel. It will replace Unicode characters with question marks. Use LibreOffice Calc instead.
|
|
||||||
|
|
||||||
## base_reverse_dns_map.csv
|
## base_reverse_dns_map.csv
|
||||||
|
|
||||||
A CSV file with three fields: `base_reverse_dns`, `name`, and `type`.
|
A CSV file with three fields: `base_reverse_dns`, `name`, and `type`.
|
||||||
@@ -21,72 +19,33 @@ The `service_type` is based on the following rule precedence:
|
|||||||
3. All telecommunications providers that offer internet access are identified as `ISP`, even if they also offer other services, such as web hosting or email hosting.
|
3. All telecommunications providers that offer internet access are identified as `ISP`, even if they also offer other services, such as web hosting or email hosting.
|
||||||
4. All web hosting providers are identified as `Web Hosting`, even if the service also offers email hosting.
|
4. All web hosting providers are identified as `Web Hosting`, even if the service also offers email hosting.
|
||||||
5. All email account providers are identified as `Email Provider`, no matter how or where they are hosted
|
5. All email account providers are identified as `Email Provider`, no matter how or where they are hosted
|
||||||
6. All legitimate platforms offering their Software as a Service (SaaS) are identified as `SaaS`, regardless of industry. This helps simplify metrics.
|
6. All legitimate platforms offering their Software as a Service SaaS) are identified as `SaaS`, regardless of industry. This helps simplify metrics.
|
||||||
7. All other senders that use their own domain as a Reverse DNS base domain should be identified based on their industry
|
7. All other senders that use their own domain as a Reverse DNS base domain should be identified based on their industry
|
||||||
|
|
||||||
- Agriculture
|
|
||||||
- Automotive
|
|
||||||
- Beauty
|
|
||||||
- Conglomerate
|
|
||||||
- Construction
|
|
||||||
- Consulting
|
|
||||||
- Defense
|
|
||||||
- Education
|
|
||||||
- Email Provider
|
- Email Provider
|
||||||
- Email Security
|
- Email Security
|
||||||
|
- Education
|
||||||
- Entertainment
|
- Entertainment
|
||||||
- Event Planning
|
|
||||||
- Finance
|
- Finance
|
||||||
- Food
|
- Food
|
||||||
- Government
|
- Government
|
||||||
- Government Media
|
- Government Media
|
||||||
- Healthcare
|
- Healthcare
|
||||||
- IaaS
|
|
||||||
- Industrial
|
- Industrial
|
||||||
- ISP
|
- ISP
|
||||||
- Legal
|
|
||||||
- Logistics
|
- Logistics
|
||||||
- Manufacturing
|
|
||||||
- Marketing
|
- Marketing
|
||||||
- MSP
|
- MSP
|
||||||
- MSSP
|
|
||||||
- News
|
|
||||||
- Nonprofit
|
- Nonprofit
|
||||||
- PaaS
|
|
||||||
- Photography
|
|
||||||
- Physical Security
|
|
||||||
- Print
|
- Print
|
||||||
- Publishing
|
|
||||||
- Real Estate
|
- Real Estate
|
||||||
- Retail
|
- Retail
|
||||||
- SaaS
|
- SaaS
|
||||||
- Science
|
|
||||||
- Search Engine
|
|
||||||
- Social Media
|
- Social Media
|
||||||
- Sports
|
|
||||||
- Staffing
|
|
||||||
- Technology
|
- Technology
|
||||||
- Travel
|
- Travel
|
||||||
- Web Host
|
- Web Host
|
||||||
|
|
||||||
The file currently contains over 1,400 mappings from a wide variety of email sending sources.
|
The file currently contains over 600 mappings from a wide variety of email sending services, including large email
|
||||||
|
providers, SaaS platforms, small web hosts, and healthcare companies. Ideally this mapping will continuously grow to
|
||||||
## known_unknown_base_reverse_dns.txt
|
include many other services and industries.
|
||||||
|
|
||||||
A list of reverse DNS base domains that could not be identified as belonging to a particular organization, service, or industry.
|
|
||||||
|
|
||||||
## base_reverse_dns.csv
|
|
||||||
|
|
||||||
A CSV with the fields `source_name` and optionally `message_count`. This CSV can be generated by exporting the base DNS data from the Kibana or Splunk dashboards provided by parsedmarc. This file is not tracked by Git.
|
|
||||||
|
|
||||||
## unknown_base_reverse_dns.csv
|
|
||||||
|
|
||||||
A CSV file with the fields `source_name` and `message_count`. This file is not tracked by Git.
|
|
||||||
|
|
||||||
## find_bad_utf8.py
|
|
||||||
|
|
||||||
Locates invalid UTF-8 bytes in files and optionally tries to current them. Generated by GPT5. Helped me find where I had introduced invalid bytes in `base_reverse_dns_map.csv`.
|
|
||||||
|
|
||||||
## find_unknown_base_reverse_dns.py
|
|
||||||
|
|
||||||
This is a python script that reads the domains in `base_reverse_dns.csv` and writes the domains that are not in `base_reverse_dns_map.csv` or `known_unknown_base_reverse_dns.txt` to `unknown_base_reverse_dns.csv`. This is useful for identifying potential additional domains to contribute to `base_reverse_dns_map.csv` and `known_unknown_base_reverse_dns.txt`.
|
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -1,44 +0,0 @@
|
|||||||
Agriculture
|
|
||||||
Automotive
|
|
||||||
Beauty
|
|
||||||
Conglomerate
|
|
||||||
Construction
|
|
||||||
Consulting
|
|
||||||
Defense
|
|
||||||
Education
|
|
||||||
Email Provider
|
|
||||||
Email Security
|
|
||||||
Entertainment
|
|
||||||
Event Planning
|
|
||||||
Finance
|
|
||||||
Food
|
|
||||||
Government
|
|
||||||
Government Media
|
|
||||||
Healthcare
|
|
||||||
ISP
|
|
||||||
IaaS
|
|
||||||
Industrial
|
|
||||||
Legal
|
|
||||||
Logistics
|
|
||||||
MSP
|
|
||||||
MSSP
|
|
||||||
Manufacturing
|
|
||||||
Marketing
|
|
||||||
News
|
|
||||||
Nonprofit
|
|
||||||
PaaS
|
|
||||||
Photography
|
|
||||||
Physical Security
|
|
||||||
Print
|
|
||||||
Publishing
|
|
||||||
Real Estate
|
|
||||||
Retail
|
|
||||||
SaaS
|
|
||||||
Science
|
|
||||||
Search Engine
|
|
||||||
Social Media
|
|
||||||
Sports
|
|
||||||
Staffing
|
|
||||||
Technology
|
|
||||||
Travel
|
|
||||||
Web Host
|
|
||||||
@@ -1,488 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
import codecs
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
import shutil
|
|
||||||
from typing import List, Tuple
|
|
||||||
|
|
||||||
"""
|
|
||||||
Locates and optionally corrects bad UTF-8 bytes in a file.
|
|
||||||
Generated by GPT-5 Use at your own risk.
|
|
||||||
"""
|
|
||||||
|
|
||||||
# -------------------------
|
|
||||||
# UTF-8 scanning
|
|
||||||
# -------------------------
|
|
||||||
|
|
||||||
|
|
||||||
def scan_line_for_utf8_errors(
|
|
||||||
line_bytes: bytes, line_no: int, base_offset: int, context: int
|
|
||||||
):
|
|
||||||
"""
|
|
||||||
Scan one line of raw bytes for UTF-8 decoding errors.
|
|
||||||
Returns a list of dicts describing each error.
|
|
||||||
"""
|
|
||||||
pos = 0
|
|
||||||
results = []
|
|
||||||
while pos < len(line_bytes):
|
|
||||||
dec = codecs.getincrementaldecoder("utf-8")("strict")
|
|
||||||
try:
|
|
||||||
dec.decode(line_bytes[pos:], final=True)
|
|
||||||
break
|
|
||||||
except UnicodeDecodeError as e:
|
|
||||||
rel_index = e.start
|
|
||||||
abs_index_in_line = pos + rel_index
|
|
||||||
abs_offset = base_offset + abs_index_in_line
|
|
||||||
|
|
||||||
start_ctx = max(0, abs_index_in_line - context)
|
|
||||||
end_ctx = min(len(line_bytes), abs_index_in_line + 1 + context)
|
|
||||||
ctx_bytes = line_bytes[start_ctx:end_ctx]
|
|
||||||
bad_byte = line_bytes[abs_index_in_line : abs_index_in_line + 1]
|
|
||||||
col = abs_index_in_line + 1 # 1-based byte column
|
|
||||||
|
|
||||||
results.append(
|
|
||||||
{
|
|
||||||
"line": line_no,
|
|
||||||
"column": col,
|
|
||||||
"abs_offset": abs_offset,
|
|
||||||
"bad_byte_hex": bad_byte.hex(),
|
|
||||||
"context_hex": ctx_bytes.hex(),
|
|
||||||
"context_preview": ctx_bytes.decode("utf-8", errors="replace"),
|
|
||||||
}
|
|
||||||
)
|
|
||||||
# Move past the offending byte and continue
|
|
||||||
pos = abs_index_in_line + 1
|
|
||||||
return results
|
|
||||||
|
|
||||||
|
|
||||||
def scan_file_for_utf8_errors(path: str, context: int, limit: int):
|
|
||||||
errors_found = 0
|
|
||||||
limit_val = limit if limit != 0 else float("inf")
|
|
||||||
|
|
||||||
with open(path, "rb") as f:
|
|
||||||
total_offset = 0
|
|
||||||
line_no = 0
|
|
||||||
while True:
|
|
||||||
line = f.readline()
|
|
||||||
if not line:
|
|
||||||
break
|
|
||||||
line_no += 1
|
|
||||||
results = scan_line_for_utf8_errors(line, line_no, total_offset, context)
|
|
||||||
for r in results:
|
|
||||||
errors_found += 1
|
|
||||||
print(
|
|
||||||
f"[ERROR {errors_found}] Line {r['line']}, Column {r['column']}, "
|
|
||||||
f"Absolute byte offset {r['abs_offset']}"
|
|
||||||
)
|
|
||||||
print(f" Bad byte: 0x{r['bad_byte_hex']}")
|
|
||||||
print(f" Context (hex): {r['context_hex']}")
|
|
||||||
print(f" Context (preview): {r['context_preview']}")
|
|
||||||
print()
|
|
||||||
if errors_found >= limit_val:
|
|
||||||
print(f"Reached limit of {limit} errors. Stopping.")
|
|
||||||
return errors_found
|
|
||||||
total_offset += len(line)
|
|
||||||
|
|
||||||
if errors_found == 0:
|
|
||||||
print("No invalid UTF-8 bytes found. 🎉")
|
|
||||||
else:
|
|
||||||
print(f"Found {errors_found} invalid UTF-8 byte(s).")
|
|
||||||
return errors_found
|
|
||||||
|
|
||||||
|
|
||||||
# -------------------------
|
|
||||||
# Whole-file conversion
|
|
||||||
# -------------------------
|
|
||||||
|
|
||||||
|
|
||||||
def detect_encoding_text(path: str) -> Tuple[str, str]:
|
|
||||||
"""
|
|
||||||
Use charset-normalizer to detect file encoding.
|
|
||||||
Return (encoding_name, decoded_text). Falls back to cp1252 if needed.
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
from charset_normalizer import from_path
|
|
||||||
except ImportError:
|
|
||||||
print(
|
|
||||||
"Please install charset-normalizer: pip install charset-normalizer",
|
|
||||||
file=sys.stderr,
|
|
||||||
)
|
|
||||||
sys.exit(4)
|
|
||||||
|
|
||||||
matches = from_path(path)
|
|
||||||
match = matches.best()
|
|
||||||
if match is None or match.encoding is None:
|
|
||||||
# Fallback heuristic for Western single-byte text
|
|
||||||
with open(path, "rb") as fb:
|
|
||||||
data = fb.read()
|
|
||||||
try:
|
|
||||||
return "cp1252", data.decode("cp1252", errors="strict")
|
|
||||||
except UnicodeDecodeError:
|
|
||||||
print("Unable to detect encoding reliably.", file=sys.stderr)
|
|
||||||
sys.exit(5)
|
|
||||||
|
|
||||||
return match.encoding, str(match)
|
|
||||||
|
|
||||||
|
|
||||||
def convert_to_utf8(src_path: str, out_path: str, src_encoding: str = None) -> str:
|
|
||||||
"""
|
|
||||||
Convert an entire file to UTF-8 (re-decoding everything).
|
|
||||||
If src_encoding is provided, use it; else auto-detect.
|
|
||||||
Returns the encoding actually used.
|
|
||||||
"""
|
|
||||||
if src_encoding:
|
|
||||||
with open(src_path, "rb") as fb:
|
|
||||||
data = fb.read()
|
|
||||||
try:
|
|
||||||
text = data.decode(src_encoding, errors="strict")
|
|
||||||
except LookupError:
|
|
||||||
print(f"Unknown encoding: {src_encoding}", file=sys.stderr)
|
|
||||||
sys.exit(6)
|
|
||||||
except UnicodeDecodeError as e:
|
|
||||||
print(f"Decoding failed with {src_encoding}: {e}", file=sys.stderr)
|
|
||||||
sys.exit(7)
|
|
||||||
used = src_encoding
|
|
||||||
else:
|
|
||||||
used, text = detect_encoding_text(src_path)
|
|
||||||
|
|
||||||
with open(out_path, "w", encoding="utf-8", newline="") as fw:
|
|
||||||
fw.write(text)
|
|
||||||
return used
|
|
||||||
|
|
||||||
|
|
||||||
def verify_utf8_file(path: str) -> Tuple[bool, str]:
|
|
||||||
try:
|
|
||||||
with open(path, "rb") as fb:
|
|
||||||
fb.read().decode("utf-8", errors="strict")
|
|
||||||
return True, ""
|
|
||||||
except UnicodeDecodeError as e:
|
|
||||||
return False, str(e)
|
|
||||||
|
|
||||||
|
|
||||||
# -------------------------
|
|
||||||
# Targeted single-byte fixer
|
|
||||||
# -------------------------
|
|
||||||
|
|
||||||
|
|
||||||
def iter_lines_with_offsets(b: bytes):
|
|
||||||
"""
|
|
||||||
Yield (line_bytes, line_start_abs_offset). Preserves LF/CRLF/CR in bytes.
|
|
||||||
"""
|
|
||||||
start = 0
|
|
||||||
for i, byte in enumerate(b):
|
|
||||||
if byte == 0x0A: # LF
|
|
||||||
yield b[start : i + 1], start
|
|
||||||
start = i + 1
|
|
||||||
if start < len(b):
|
|
||||||
yield b[start:], start
|
|
||||||
|
|
||||||
|
|
||||||
def detect_probable_fallbacks() -> List[str]:
|
|
||||||
# Good defaults for Western/Portuguese text
|
|
||||||
return ["cp1252", "iso-8859-1", "iso-8859-15"]
|
|
||||||
|
|
||||||
|
|
||||||
def repair_mixed_utf8_line(line: bytes, base_offset: int, fallback_chain: List[str]):
|
|
||||||
"""
|
|
||||||
Strictly validate UTF-8 and fix *only* the exact offending byte when an error occurs.
|
|
||||||
This avoids touching adjacent valid UTF-8 (prevents mojibake like 'é').
|
|
||||||
"""
|
|
||||||
out_fragments: List[str] = []
|
|
||||||
fixes = []
|
|
||||||
pos = 0
|
|
||||||
n = len(line)
|
|
||||||
|
|
||||||
while pos < n:
|
|
||||||
dec = codecs.getincrementaldecoder("utf-8")("strict")
|
|
||||||
try:
|
|
||||||
s = dec.decode(line[pos:], final=True)
|
|
||||||
out_fragments.append(s)
|
|
||||||
break
|
|
||||||
except UnicodeDecodeError as e:
|
|
||||||
# Append the valid prefix before the error
|
|
||||||
if e.start > 0:
|
|
||||||
out_fragments.append(
|
|
||||||
line[pos : pos + e.start].decode("utf-8", errors="strict")
|
|
||||||
)
|
|
||||||
|
|
||||||
bad_index = pos + e.start # absolute index in 'line'
|
|
||||||
bad_slice = line[bad_index : bad_index + 1] # FIX EXACTLY ONE BYTE
|
|
||||||
|
|
||||||
# Decode that single byte using the first working fallback
|
|
||||||
decoded = None
|
|
||||||
used_enc = None
|
|
||||||
for enc in fallback_chain:
|
|
||||||
try:
|
|
||||||
decoded = bad_slice.decode(enc, errors="strict")
|
|
||||||
used_enc = enc
|
|
||||||
break
|
|
||||||
except Exception:
|
|
||||||
continue
|
|
||||||
if decoded is None:
|
|
||||||
# latin-1 always succeeds (byte->same code point)
|
|
||||||
decoded = bad_slice.decode("latin-1")
|
|
||||||
used_enc = "latin-1 (fallback)"
|
|
||||||
|
|
||||||
out_fragments.append(decoded)
|
|
||||||
|
|
||||||
# Log the fix
|
|
||||||
col_1based = bad_index + 1 # byte-based column
|
|
||||||
fixes.append(
|
|
||||||
{
|
|
||||||
"line_base_offset": base_offset,
|
|
||||||
"line": None, # caller fills line number
|
|
||||||
"column": col_1based,
|
|
||||||
"abs_offset": base_offset + bad_index,
|
|
||||||
"bad_bytes_hex": bad_slice.hex(),
|
|
||||||
"used_encoding": used_enc,
|
|
||||||
"replacement_preview": decoded,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
# Advance exactly one byte past the offending byte and continue
|
|
||||||
pos = bad_index + 1
|
|
||||||
|
|
||||||
return "".join(out_fragments), fixes
|
|
||||||
|
|
||||||
|
|
||||||
def targeted_fix_to_utf8(
|
|
||||||
src_path: str,
|
|
||||||
out_path: str,
|
|
||||||
fallback_chain: List[str],
|
|
||||||
dry_run: bool,
|
|
||||||
max_fixes: int,
|
|
||||||
):
|
|
||||||
with open(src_path, "rb") as fb:
|
|
||||||
data = fb.read()
|
|
||||||
|
|
||||||
total_fixes = 0
|
|
||||||
repaired_lines: List[str] = []
|
|
||||||
line_no = 0
|
|
||||||
max_val = max_fixes if max_fixes != 0 else float("inf")
|
|
||||||
|
|
||||||
for line_bytes, base_offset in iter_lines_with_offsets(data):
|
|
||||||
line_no += 1
|
|
||||||
# Fast path: keep lines that are already valid UTF-8
|
|
||||||
try:
|
|
||||||
repaired_lines.append(line_bytes.decode("utf-8", errors="strict"))
|
|
||||||
continue
|
|
||||||
except UnicodeDecodeError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
fixed_text, fixes = repair_mixed_utf8_line(
|
|
||||||
line_bytes, base_offset, fallback_chain=fallback_chain
|
|
||||||
)
|
|
||||||
for f in fixes:
|
|
||||||
f["line"] = line_no
|
|
||||||
|
|
||||||
repaired_lines.append(fixed_text)
|
|
||||||
|
|
||||||
# Log fixes
|
|
||||||
for f in fixes:
|
|
||||||
total_fixes += 1
|
|
||||||
print(
|
|
||||||
f"[FIX {total_fixes}] Line {f['line']}, Column {f['column']}, Abs offset {f['abs_offset']}"
|
|
||||||
)
|
|
||||||
print(f" Bad bytes: 0x{f['bad_bytes_hex']}")
|
|
||||||
print(f" Used encoding: {f['used_encoding']}")
|
|
||||||
preview = f["replacement_preview"].replace("\r", "\\r").replace("\n", "\\n")
|
|
||||||
if len(preview) > 40:
|
|
||||||
preview = preview[:40] + "…"
|
|
||||||
print(f" Replacement preview: {preview}")
|
|
||||||
print()
|
|
||||||
if total_fixes >= max_val:
|
|
||||||
print(f"Reached max fixes limit ({max_fixes}). Stopping scan.")
|
|
||||||
break
|
|
||||||
if total_fixes >= max_val:
|
|
||||||
break
|
|
||||||
|
|
||||||
if dry_run:
|
|
||||||
print(f"Dry run complete. Detected {total_fixes} fix(es). No file written.")
|
|
||||||
return total_fixes
|
|
||||||
|
|
||||||
# Join and verify result can be encoded to UTF-8
|
|
||||||
repaired_text = "".join(repaired_lines)
|
|
||||||
try:
|
|
||||||
repaired_text.encode("utf-8", errors="strict")
|
|
||||||
except UnicodeEncodeError as e:
|
|
||||||
print(f"Internal error: repaired text not valid UTF-8: {e}", file=sys.stderr)
|
|
||||||
sys.exit(3)
|
|
||||||
|
|
||||||
with open(out_path, "w", encoding="utf-8", newline="") as fw:
|
|
||||||
fw.write(repaired_text)
|
|
||||||
|
|
||||||
print(f"Fixed file written to: {out_path}")
|
|
||||||
print(f"Total fixes applied: {total_fixes}")
|
|
||||||
return total_fixes
|
|
||||||
|
|
||||||
|
|
||||||
# -------------------------
|
|
||||||
# CLI
|
|
||||||
# -------------------------
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
ap = argparse.ArgumentParser(
|
|
||||||
description=(
|
|
||||||
"Scan for invalid UTF-8; optionally convert whole file or fix only invalid bytes.\n\n"
|
|
||||||
"By default, --convert and --fix **edit the input file in place** and create a backup "
|
|
||||||
"named '<input>.bak' before writing. If you pass --output, the original file is left "
|
|
||||||
"unchanged and no backup is created. Use --dry-run to preview fixes without writing."
|
|
||||||
),
|
|
||||||
formatter_class=argparse.RawTextHelpFormatter,
|
|
||||||
)
|
|
||||||
ap.add_argument("path", help="Path to the CSV/text file")
|
|
||||||
ap.add_argument(
|
|
||||||
"--context",
|
|
||||||
type=int,
|
|
||||||
default=20,
|
|
||||||
help="Bytes of context to show around errors (default: 20)",
|
|
||||||
)
|
|
||||||
ap.add_argument(
|
|
||||||
"--limit",
|
|
||||||
type=int,
|
|
||||||
default=100,
|
|
||||||
help="Max errors to report during scan (0 = unlimited)",
|
|
||||||
)
|
|
||||||
ap.add_argument(
|
|
||||||
"--skip-scan", action="store_true", help="Skip initial scan for speed"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Whole-file convert
|
|
||||||
ap.add_argument(
|
|
||||||
"--convert",
|
|
||||||
action="store_true",
|
|
||||||
help="Convert entire file to UTF-8 using auto/forced encoding "
|
|
||||||
"(in-place by default; creates '<input>.bak').",
|
|
||||||
)
|
|
||||||
ap.add_argument(
|
|
||||||
"--encoding",
|
|
||||||
help="Force source encoding for --convert or first fallback for --fix",
|
|
||||||
)
|
|
||||||
ap.add_argument(
|
|
||||||
"--output",
|
|
||||||
help="Write to this path instead of in-place (no .bak is created in that case)",
|
|
||||||
)
|
|
||||||
|
|
||||||
# Targeted fix
|
|
||||||
ap.add_argument(
|
|
||||||
"--fix",
|
|
||||||
action="store_true",
|
|
||||||
help="Fix only invalid byte(s) via fallback encodings "
|
|
||||||
"(in-place by default; creates '<input>.bak').",
|
|
||||||
)
|
|
||||||
ap.add_argument(
|
|
||||||
"--fallbacks",
|
|
||||||
help="Comma-separated fallback encodings (default: cp1252,iso-8859-1,iso-8859-15)",
|
|
||||||
)
|
|
||||||
ap.add_argument(
|
|
||||||
"--dry-run",
|
|
||||||
action="store_true",
|
|
||||||
help="(fix) Print fixes but do not write or create a .bak",
|
|
||||||
)
|
|
||||||
ap.add_argument(
|
|
||||||
"--max-fixes",
|
|
||||||
type=int,
|
|
||||||
default=0,
|
|
||||||
help="(fix) Stop after N fixes (0 = unlimited)",
|
|
||||||
)
|
|
||||||
|
|
||||||
args = ap.parse_args()
|
|
||||||
path = args.path
|
|
||||||
|
|
||||||
if not os.path.isfile(path):
|
|
||||||
print(f"File not found: {path}", file=sys.stderr)
|
|
||||||
sys.exit(2)
|
|
||||||
|
|
||||||
# Optional scan first
|
|
||||||
if not args.skip_scan:
|
|
||||||
scan_file_for_utf8_errors(path, context=args.context, limit=args.limit)
|
|
||||||
|
|
||||||
# Mode selection guards
|
|
||||||
if args.convert and args.fix:
|
|
||||||
print("Choose either --convert or --fix (not both).", file=sys.stderr)
|
|
||||||
sys.exit(9)
|
|
||||||
if not args.convert and not args.fix and args.skip_scan:
|
|
||||||
print("No action selected (use --convert or --fix).")
|
|
||||||
return
|
|
||||||
if not args.convert and not args.fix:
|
|
||||||
# User only wanted a scan
|
|
||||||
return
|
|
||||||
|
|
||||||
# Determine output path and backup behavior
|
|
||||||
# In-place by default: create '<input>.bak' before overwriting.
|
|
||||||
if args.output:
|
|
||||||
out_path = args.output
|
|
||||||
in_place = False
|
|
||||||
else:
|
|
||||||
out_path = path
|
|
||||||
in_place = True
|
|
||||||
|
|
||||||
# CONVERT mode
|
|
||||||
if args.convert:
|
|
||||||
print("\n[CONVERT MODE] Converting file to UTF-8...")
|
|
||||||
if in_place:
|
|
||||||
# Create backup before overwriting original
|
|
||||||
backup_path = path + ".bak"
|
|
||||||
shutil.copy2(path, backup_path)
|
|
||||||
print(f"Backup created: {backup_path}")
|
|
||||||
used = convert_to_utf8(path, out_path, src_encoding=args.encoding)
|
|
||||||
print(f"Source encoding used: {used}")
|
|
||||||
print(f"Saved UTF-8 file as: {out_path}")
|
|
||||||
ok, err = verify_utf8_file(out_path)
|
|
||||||
if ok:
|
|
||||||
print("Verification: output is valid UTF-8 ✅")
|
|
||||||
else:
|
|
||||||
print(f"Verification failed: {err}")
|
|
||||||
sys.exit(8)
|
|
||||||
return
|
|
||||||
|
|
||||||
# FIX mode (targeted, single-byte)
|
|
||||||
if args.fix:
|
|
||||||
print("\n[FIX MODE] Fixing only invalid bytes to UTF-8...")
|
|
||||||
if args.dry_run:
|
|
||||||
# Dry-run: never write or create backup
|
|
||||||
out_path_effective = os.devnull
|
|
||||||
in_place_effective = False
|
|
||||||
else:
|
|
||||||
out_path_effective = out_path
|
|
||||||
in_place_effective = in_place
|
|
||||||
|
|
||||||
# Build fallback chain (if --encoding provided, try it first)
|
|
||||||
if args.fallbacks:
|
|
||||||
fallback_chain = [e.strip() for e in args.fallbacks.split(",") if e.strip()]
|
|
||||||
else:
|
|
||||||
fallback_chain = detect_probable_fallbacks()
|
|
||||||
if args.encoding and args.encoding not in fallback_chain:
|
|
||||||
fallback_chain = [args.encoding] + fallback_chain
|
|
||||||
|
|
||||||
if in_place_effective:
|
|
||||||
# Create backup before overwriting original (only when actually writing)
|
|
||||||
backup_path = path + ".bak"
|
|
||||||
shutil.copy2(path, backup_path)
|
|
||||||
print(f"Backup created: {backup_path}")
|
|
||||||
|
|
||||||
fix_count = targeted_fix_to_utf8(
|
|
||||||
path,
|
|
||||||
out_path_effective,
|
|
||||||
fallback_chain=fallback_chain,
|
|
||||||
dry_run=args.dry_run,
|
|
||||||
max_fixes=args.max_fixes,
|
|
||||||
)
|
|
||||||
|
|
||||||
if not args.dry_run:
|
|
||||||
ok, err = verify_utf8_file(out_path_effective)
|
|
||||||
if ok:
|
|
||||||
print("Verification: output is valid UTF-8 ✅")
|
|
||||||
print(f"Fix mode completed — {fix_count} byte(s) corrected.")
|
|
||||||
else:
|
|
||||||
print(f"Verification failed: {err}")
|
|
||||||
sys.exit(8)
|
|
||||||
return
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
@@ -1,78 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
import os
|
|
||||||
import csv
|
|
||||||
|
|
||||||
|
|
||||||
def _main():
|
|
||||||
input_csv_file_path = "base_reverse_dns.csv"
|
|
||||||
base_reverse_dns_map_file_path = "base_reverse_dns_map.csv"
|
|
||||||
known_unknown_list_file_path = "known_unknown_base_reverse_dns.txt"
|
|
||||||
psl_overrides_file_path = "psl_overrides.txt"
|
|
||||||
output_csv_file_path = "unknown_base_reverse_dns.csv"
|
|
||||||
|
|
||||||
csv_headers = ["source_name", "message_count"]
|
|
||||||
|
|
||||||
known_unknown_domains = []
|
|
||||||
psl_overrides = []
|
|
||||||
known_domains = []
|
|
||||||
output_rows = []
|
|
||||||
|
|
||||||
def load_list(file_path, list_var):
|
|
||||||
if not os.path.exists(file_path):
|
|
||||||
print(f"Error: {file_path} does not exist")
|
|
||||||
print(f"Loading {file_path}")
|
|
||||||
with open(file_path) as f:
|
|
||||||
for line in f.readlines():
|
|
||||||
domain = line.lower().strip()
|
|
||||||
if domain in list_var:
|
|
||||||
print(f"Error: {domain} is in {file_path} multiple times")
|
|
||||||
exit(1)
|
|
||||||
elif domain != "":
|
|
||||||
list_var.append(domain)
|
|
||||||
|
|
||||||
load_list(known_unknown_list_file_path, known_unknown_domains)
|
|
||||||
load_list(psl_overrides_file_path, psl_overrides)
|
|
||||||
if not os.path.exists(base_reverse_dns_map_file_path):
|
|
||||||
print(f"Error: {base_reverse_dns_map_file_path} does not exist")
|
|
||||||
print(f"Loading {base_reverse_dns_map_file_path}")
|
|
||||||
with open(base_reverse_dns_map_file_path) as f:
|
|
||||||
for row in csv.DictReader(f):
|
|
||||||
domain = row["base_reverse_dns"].lower().strip()
|
|
||||||
if domain in known_domains:
|
|
||||||
print(
|
|
||||||
f"Error: {domain} is in {base_reverse_dns_map_file_path} multiple times"
|
|
||||||
)
|
|
||||||
exit()
|
|
||||||
else:
|
|
||||||
known_domains.append(domain)
|
|
||||||
if domain in known_unknown_domains and known_domains:
|
|
||||||
print(
|
|
||||||
f"Error:{domain} is in {known_unknown_list_file_path} and \
|
|
||||||
{base_reverse_dns_map_file_path}"
|
|
||||||
)
|
|
||||||
exit(1)
|
|
||||||
if not os.path.exists(input_csv_file_path):
|
|
||||||
print(f"Error: {base_reverse_dns_map_file_path} does not exist")
|
|
||||||
exit(1)
|
|
||||||
with open(input_csv_file_path) as f:
|
|
||||||
for row in csv.DictReader(f):
|
|
||||||
domain = row["source_name"].lower().strip()
|
|
||||||
if domain == "":
|
|
||||||
continue
|
|
||||||
for psl_domain in psl_overrides:
|
|
||||||
if domain.endswith(psl_domain):
|
|
||||||
domain = psl_domain.strip(".").strip("-")
|
|
||||||
break
|
|
||||||
if domain not in known_domains and domain not in known_unknown_domains:
|
|
||||||
print(f"New unknown domain found: {domain}")
|
|
||||||
output_rows.append(row)
|
|
||||||
print(f"Writing {output_csv_file_path}")
|
|
||||||
with open(output_csv_file_path, "w") as f:
|
|
||||||
writer = csv.DictWriter(f, fieldnames=csv_headers)
|
|
||||||
writer.writeheader()
|
|
||||||
writer.writerows(output_rows)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
_main()
|
|
||||||
@@ -1,601 +0,0 @@
|
|||||||
1jli.site
|
|
||||||
26.107
|
|
||||||
444qcuhilla.com
|
|
||||||
4xr1.com
|
|
||||||
9services.com
|
|
||||||
a7e.ru
|
|
||||||
a94434500-blog.com
|
|
||||||
aams8.jp
|
|
||||||
abv-10.top
|
|
||||||
acemail.co.in
|
|
||||||
activaicon.com
|
|
||||||
adcritic.net
|
|
||||||
adlucrumnewsletter.com
|
|
||||||
admin.corpivensa.gob.ve
|
|
||||||
advantageiq.com
|
|
||||||
advrider.ro
|
|
||||||
aerospacevitro.us.com
|
|
||||||
agenturserver.de
|
|
||||||
aghories.com
|
|
||||||
ai270.net
|
|
||||||
albagroup-eg.com
|
|
||||||
alchemy.net
|
|
||||||
alohabeachcamp.net
|
|
||||||
alsiscad.com
|
|
||||||
aluminumpipetubing.com
|
|
||||||
americanstorageca.com
|
|
||||||
amplusserver.info
|
|
||||||
anchorfundhub.com
|
|
||||||
anglishment.com
|
|
||||||
anteldata.net.uy
|
|
||||||
antis.edu
|
|
||||||
antonaoll.com
|
|
||||||
anviklass.org
|
|
||||||
anwrgrp.lat
|
|
||||||
aosau.net
|
|
||||||
arandomserver.com
|
|
||||||
aransk.ru
|
|
||||||
ardcs.cn
|
|
||||||
armninl.met
|
|
||||||
as29550.net
|
|
||||||
asahachimaru.com
|
|
||||||
aserv.co.za
|
|
||||||
asmecam.it
|
|
||||||
ateky.net.br
|
|
||||||
aurelienvos.com
|
|
||||||
automatech.lat
|
|
||||||
avistaadvantage.com
|
|
||||||
b8sales.com
|
|
||||||
bahjs.com
|
|
||||||
baliaura.com
|
|
||||||
banaras.co
|
|
||||||
bearandbullmarketnews.com
|
|
||||||
bestinvestingtime.com
|
|
||||||
bhjui.com
|
|
||||||
biocorp.com
|
|
||||||
biosophy.net
|
|
||||||
bitter-echo.com
|
|
||||||
bizhostingservices.com
|
|
||||||
blguss.com
|
|
||||||
bluenet.ch
|
|
||||||
bluhosting.com
|
|
||||||
bnasg.com
|
|
||||||
bodiax.pp.ua
|
|
||||||
bost-law.com
|
|
||||||
brainity.com
|
|
||||||
brazalnde.net
|
|
||||||
brellatransplc.shop
|
|
||||||
brnonet.cz
|
|
||||||
broadwaycover.com
|
|
||||||
brushinglegal.de
|
|
||||||
brw.net
|
|
||||||
btes.tv
|
|
||||||
budgeteasehub.com
|
|
||||||
buoytoys.com
|
|
||||||
buyjapanese.jp
|
|
||||||
c53dw7m24rj.com
|
|
||||||
cahtelrandom.org
|
|
||||||
casadelmarsamara.com
|
|
||||||
cashflowmasterypro.com
|
|
||||||
cavabeen.com
|
|
||||||
cbti.net
|
|
||||||
centralmalaysia.com
|
|
||||||
chauffeurplan.co.uk
|
|
||||||
checkpox.fun
|
|
||||||
chegouseuvlache.org
|
|
||||||
chinaxingyu.xyz
|
|
||||||
christus.mx
|
|
||||||
churchills.market
|
|
||||||
ci-xyz.fit
|
|
||||||
cisumrecords.com
|
|
||||||
ckaik.cn
|
|
||||||
clcktoact.com
|
|
||||||
cli-eurosignal.cz
|
|
||||||
cloud-admin.it
|
|
||||||
cloud-edm.com
|
|
||||||
cloudflare-email.org
|
|
||||||
cloudhosting.rs
|
|
||||||
cloudlogin.co
|
|
||||||
cloudplatformpro.com
|
|
||||||
cnode.io
|
|
||||||
cntcloud.com
|
|
||||||
code-it.net
|
|
||||||
codefriend.top
|
|
||||||
colombiaceropapel.org
|
|
||||||
commerceinsurance.com
|
|
||||||
comsharempc.com
|
|
||||||
conexiona.com
|
|
||||||
coolblaze.com
|
|
||||||
coowo.com
|
|
||||||
corpemail.net
|
|
||||||
cp2-myorderbox.com
|
|
||||||
cps.com.ar
|
|
||||||
crnagora.net
|
|
||||||
cross-d-bar-troutranch.com
|
|
||||||
ctla.co.kr
|
|
||||||
cumbalikonakhotel.com
|
|
||||||
currencyexconverter.com
|
|
||||||
daakbabu.com
|
|
||||||
daikinmae.com
|
|
||||||
dairyvalley.com.my
|
|
||||||
dastans.ru
|
|
||||||
datahost36.de
|
|
||||||
ddii.network
|
|
||||||
deep-sek.shop
|
|
||||||
deetownsounds.com
|
|
||||||
descarca-counter-strike.net
|
|
||||||
detrot.xyz
|
|
||||||
dettlaffinc.com
|
|
||||||
dextoolse.net
|
|
||||||
digestivedaily.com
|
|
||||||
digi.net.my
|
|
||||||
dinofelis.cn
|
|
||||||
diwkyncbi.top
|
|
||||||
dkginternet.com
|
|
||||||
dnexpress.info
|
|
||||||
dns-oid.com
|
|
||||||
dnsindia.net
|
|
||||||
domainserver.ne.jp
|
|
||||||
domconfig.com
|
|
||||||
doorsrv.com
|
|
||||||
dreampox.fun
|
|
||||||
dreamtechmedia.com
|
|
||||||
ds.network
|
|
||||||
dss-group.net
|
|
||||||
dvj.theworkpc.com
|
|
||||||
dwlcka.com
|
|
||||||
dynamic-wiretel.in
|
|
||||||
dyntcorp.com
|
|
||||||
easternkingspei.com
|
|
||||||
economiceagles.com
|
|
||||||
egosimail.com
|
|
||||||
eliotporterphotos.us
|
|
||||||
emailgids.net
|
|
||||||
emailperegrine.com
|
|
||||||
entendercopilot.com
|
|
||||||
entretothom.net
|
|
||||||
epaycontrol.com
|
|
||||||
epicinvestmentsreview.co
|
|
||||||
epicinvestmentsreview.com
|
|
||||||
epik.com
|
|
||||||
epsilon-group.com
|
|
||||||
erestaff.com
|
|
||||||
euro-trade-gmbh.com
|
|
||||||
example.com
|
|
||||||
exposervers.com-new
|
|
||||||
extendcp.co.uk
|
|
||||||
eyecandyhosting.xyz
|
|
||||||
fastwebnet.it
|
|
||||||
fd9ing7wfn.com
|
|
||||||
feipnghardware.com
|
|
||||||
fetscorp.shop
|
|
||||||
fewo-usedom.net
|
|
||||||
fin-crime.com
|
|
||||||
financeaimpoint.com
|
|
||||||
financeupward.com
|
|
||||||
firmflat.com
|
|
||||||
flex-video.bnr.la
|
|
||||||
flourishfusionlife.com
|
|
||||||
formicidaehunt.net
|
|
||||||
fosterheap.com
|
|
||||||
fredi.shop
|
|
||||||
frontiernet.net
|
|
||||||
ftifb7tk3c.com
|
|
||||||
gamersprotectionvpn.online
|
|
||||||
gendns.com
|
|
||||||
getgreencardsfast.com
|
|
||||||
getthatroi.com
|
|
||||||
gibbshosting.com
|
|
||||||
gigidea.net
|
|
||||||
giize.com
|
|
||||||
ginous.eu.com
|
|
||||||
gis.net
|
|
||||||
gist-th.com
|
|
||||||
globalglennpartners.com
|
|
||||||
goldsboroughplace.com
|
|
||||||
gophermedia.com
|
|
||||||
gqlists.us.com
|
|
||||||
gratzl.de
|
|
||||||
greatestworldnews.com
|
|
||||||
greennutritioncare.com
|
|
||||||
gsbb.com
|
|
||||||
gumbolimbo.net
|
|
||||||
h-serv.co.uk
|
|
||||||
haedefpartners.com
|
|
||||||
halcyon-aboveboard.com
|
|
||||||
hanzubon.org
|
|
||||||
healthfuljourneyjoy.com
|
|
||||||
hgnbroken.us.com
|
|
||||||
highwey-diesel.com
|
|
||||||
hirofactory.com
|
|
||||||
hjd.asso.fr
|
|
||||||
hongchenggco.pro
|
|
||||||
hongkongtaxi.co
|
|
||||||
hopsinthehanger.com
|
|
||||||
hosted-by-worldstream.net
|
|
||||||
hostelsucre.com
|
|
||||||
hosting1337.com
|
|
||||||
hostinghane.com
|
|
||||||
hostinglotus.cloud
|
|
||||||
hostingmichigan.com
|
|
||||||
hostiran.name
|
|
||||||
hostmnl.com
|
|
||||||
hostname.localhost
|
|
||||||
hostnetwork.com
|
|
||||||
hosts.net.nz
|
|
||||||
hostserv.eu
|
|
||||||
hostwhitelabel.com
|
|
||||||
hpms1.jp
|
|
||||||
hunariojmk.net
|
|
||||||
hunriokinmuim.net
|
|
||||||
hypericine.com
|
|
||||||
i-mecca.net
|
|
||||||
iaasdns.com
|
|
||||||
iam.net.ma
|
|
||||||
iconmarketingguy.com
|
|
||||||
idcfcloud.net
|
|
||||||
idealconcept.live
|
|
||||||
igmohji.com
|
|
||||||
igppevents.org.uk
|
|
||||||
ihglobaldns.com
|
|
||||||
ilmessicano.com
|
|
||||||
imjtmn.cn
|
|
||||||
immenzaces.com
|
|
||||||
in-addr-arpa
|
|
||||||
in-addr.arpa
|
|
||||||
indsalelimited.com
|
|
||||||
indulgent-holistic.com
|
|
||||||
industechint.org
|
|
||||||
inshaaegypt.com
|
|
||||||
intal.uz
|
|
||||||
interfarma.kz
|
|
||||||
intocpanel.com
|
|
||||||
ip-147-135-108.us
|
|
||||||
ip-178-33-109.eu
|
|
||||||
ip-ptr.tech
|
|
||||||
iswhatpercent.com
|
|
||||||
itsidc.com
|
|
||||||
itwebs.com
|
|
||||||
iuon.net
|
|
||||||
ivol.co
|
|
||||||
jalanet.co.id
|
|
||||||
jimishare.com
|
|
||||||
jlccptt.net.cn
|
|
||||||
jlenterprises.co.uk
|
|
||||||
jmontalto.com
|
|
||||||
joyomokei.com
|
|
||||||
jumanra.org
|
|
||||||
justlongshirts.com
|
|
||||||
kahlaa.com
|
|
||||||
kaw.theworkpc.com
|
|
||||||
kbronet.com.tw
|
|
||||||
kdnursing.org
|
|
||||||
kielnet.net
|
|
||||||
kihy.theworkpc.com
|
|
||||||
kingschurchwirral.org
|
|
||||||
kitchenaildbd.com
|
|
||||||
klaomi.shop
|
|
||||||
knkconsult.net
|
|
||||||
kohshikai.com
|
|
||||||
krhfund.org
|
|
||||||
krillaglass.com
|
|
||||||
lancorhomes.com
|
|
||||||
landpedia.org
|
|
||||||
lanzatuseo.es
|
|
||||||
layerdns.cloud
|
|
||||||
learninglinked.com
|
|
||||||
legenditds.com
|
|
||||||
levertechcentre.com
|
|
||||||
lhost.no
|
|
||||||
lideri.net.br
|
|
||||||
lighthouse-media.com
|
|
||||||
lightpath.net
|
|
||||||
limogesporcelainboxes.com
|
|
||||||
lindsaywalt.net
|
|
||||||
linuxsunucum.com
|
|
||||||
listertermoformadoa.com
|
|
||||||
llsend.com
|
|
||||||
local.net
|
|
||||||
lohkal.com
|
|
||||||
londionrtim.net
|
|
||||||
lonestarmm.net
|
|
||||||
longmarquis.com
|
|
||||||
longwoodmgmt.com
|
|
||||||
lse.kz
|
|
||||||
lunvoy.com
|
|
||||||
luxarpro.ru
|
|
||||||
lwl-puehringer.at
|
|
||||||
lynx.net.lb
|
|
||||||
lyse.net
|
|
||||||
m-sender.com.ua
|
|
||||||
maggiolicloud.it
|
|
||||||
magnetmail.net
|
|
||||||
magnumgo.uz
|
|
||||||
maia11.com
|
|
||||||
mail-fire.com
|
|
||||||
mailsentinel.net
|
|
||||||
mailset.cn
|
|
||||||
malardino.net
|
|
||||||
managed-vps.net
|
|
||||||
manhattanbulletpoint.com
|
|
||||||
manpowerservices.com
|
|
||||||
marketmysterycode.com
|
|
||||||
marketwizardspro.com
|
|
||||||
masterclassjournal.com
|
|
||||||
matroguel.cam
|
|
||||||
maximpactipo.com
|
|
||||||
mechanicalwalk.store
|
|
||||||
mediavobis.com
|
|
||||||
meqlobal.com
|
|
||||||
mgts.by
|
|
||||||
migrans.net
|
|
||||||
miixta.com
|
|
||||||
milleniumsrv.com
|
|
||||||
mindworksunlimited.com
|
|
||||||
mirth-gale.com
|
|
||||||
misorpresa.com
|
|
||||||
mitomobile.com
|
|
||||||
mitsubachi-kibako.net
|
|
||||||
mjinn.com
|
|
||||||
mkegs.shop
|
|
||||||
mobius.fr
|
|
||||||
model-ac.ink
|
|
||||||
moderntradingnews.com
|
|
||||||
monnaiegroup.com
|
|
||||||
monopolizeright.com
|
|
||||||
moonjaws.com
|
|
||||||
morningnewscatcher.com
|
|
||||||
motion4ever.net
|
|
||||||
mschosting.com
|
|
||||||
msdp1.com
|
|
||||||
mspnet.pro
|
|
||||||
mts-nn.ru
|
|
||||||
multifamilydesign.com
|
|
||||||
mxserver.ro
|
|
||||||
mxthunder.net
|
|
||||||
my-ihor.ru
|
|
||||||
mycloudmailbox.com
|
|
||||||
myfriendforum.com
|
|
||||||
myrewards.net
|
|
||||||
mysagestore.com
|
|
||||||
mysecurewebserver.com
|
|
||||||
myshanet.net
|
|
||||||
myvps.jp
|
|
||||||
mywedsite.net
|
|
||||||
mywic.eu
|
|
||||||
name.tools
|
|
||||||
nanshenqfurniture.com
|
|
||||||
nask.pl
|
|
||||||
navertise.net
|
|
||||||
ncbb.kz
|
|
||||||
ncport.ru
|
|
||||||
ncsdi.ws
|
|
||||||
nebdig.com
|
|
||||||
neovet-base.ru
|
|
||||||
netbri.com
|
|
||||||
netcentertelecom.net.br
|
|
||||||
neti.ee
|
|
||||||
netkl.org
|
|
||||||
newinvestingguide.com
|
|
||||||
newwallstreetcode.com
|
|
||||||
ngvcv.cn
|
|
||||||
nic.name
|
|
||||||
nidix.net
|
|
||||||
nieuwedagnetwerk.net
|
|
||||||
nlscanme.com
|
|
||||||
nmeuh.cn
|
|
||||||
noisndametal.com
|
|
||||||
nucleusemail.com
|
|
||||||
nutriboostlife.com
|
|
||||||
nwo.giize.com
|
|
||||||
nwwhalewatchers.org
|
|
||||||
ny.adsl
|
|
||||||
nyt1.com
|
|
||||||
offerslatedeals.com
|
|
||||||
office365.us
|
|
||||||
ogicom.net
|
|
||||||
olivettilexikon.co.uk
|
|
||||||
omegabrasil.inf.br
|
|
||||||
onnet21.com
|
|
||||||
onumubunumu.com
|
|
||||||
oppt-ac.fit
|
|
||||||
orbitel.net.co
|
|
||||||
orfsurface.com
|
|
||||||
orientalspot.com
|
|
||||||
outsidences.com
|
|
||||||
ovaltinalization.co
|
|
||||||
overta.ru
|
|
||||||
ox28vgrurc.com
|
|
||||||
pamulang.net
|
|
||||||
panaltyspot.space
|
|
||||||
panolacountysheriffms.com
|
|
||||||
passionatesmiles.com
|
|
||||||
paulinelam.com
|
|
||||||
pdi-corp.com
|
|
||||||
peloquinbeck.com
|
|
||||||
perimetercenter.net
|
|
||||||
permanentscreen.com
|
|
||||||
permasteellisagroup.com
|
|
||||||
perumkijhyu.net
|
|
||||||
pesnia.com.ua
|
|
||||||
ph8ltwdi12o.com
|
|
||||||
pharmada.com.de
|
|
||||||
phdns3.es
|
|
||||||
pigelixval1.com
|
|
||||||
pipefittingsindia.com
|
|
||||||
planethoster.net
|
|
||||||
playamedia.io
|
|
||||||
plesk.page
|
|
||||||
pmnhost.net
|
|
||||||
pokiloandhu.net
|
|
||||||
pokupki5.ru
|
|
||||||
polandi.net
|
|
||||||
popiup.com
|
|
||||||
ports.net
|
|
||||||
posolstvostilya.com
|
|
||||||
potia.net
|
|
||||||
prima.com.ar
|
|
||||||
prima.net.ar
|
|
||||||
profsol.co.uk
|
|
||||||
prohealthmotion.com
|
|
||||||
promooffermarket.site
|
|
||||||
proudserver.com
|
|
||||||
proxado.com
|
|
||||||
psnm.ru
|
|
||||||
pvcwindowsprices.live
|
|
||||||
qontenciplc.autos
|
|
||||||
quakeclick.com
|
|
||||||
quasarstate.store
|
|
||||||
quatthonggiotico.com
|
|
||||||
qxyxab44njd.com
|
|
||||||
radianthealthrenaissance.com
|
|
||||||
rapidns.com
|
|
||||||
raxa.host
|
|
||||||
reberte.com
|
|
||||||
reethvikintl.com
|
|
||||||
regruhosting.ru
|
|
||||||
reliablepanel.com
|
|
||||||
rgb365.eu
|
|
||||||
riddlecamera.net
|
|
||||||
riddletrends.com
|
|
||||||
roccopugliese.com
|
|
||||||
runnin-rebels.com
|
|
||||||
rupar.puglia.it
|
|
||||||
rwdhosting.ca
|
|
||||||
s500host.com
|
|
||||||
sageevents.co.ke
|
|
||||||
sahacker-2020.com
|
|
||||||
samsales.site
|
|
||||||
sante-lorraine.fr
|
|
||||||
saransk.ru
|
|
||||||
satirogluet.com
|
|
||||||
scioncontacts.com
|
|
||||||
sdcc.my
|
|
||||||
seaspraymta3.net
|
|
||||||
secorp.mx
|
|
||||||
securen.net
|
|
||||||
securerelay.in
|
|
||||||
securev.net
|
|
||||||
seductiveeyes.com
|
|
||||||
seizethedayconsulting.com
|
|
||||||
serroplast.shop
|
|
||||||
server290.com
|
|
||||||
server342.com
|
|
||||||
server3559.cc
|
|
||||||
servershost.biz
|
|
||||||
sfek.kz
|
|
||||||
sgnetway.net
|
|
||||||
shopfox.ca
|
|
||||||
silvestrejaguar.sbs
|
|
||||||
silvestreonca.sbs
|
|
||||||
simplediagnostics.org
|
|
||||||
siriuscloud.jp
|
|
||||||
sisglobalresearch.com
|
|
||||||
sixpacklink.net
|
|
||||||
sjestyle.com
|
|
||||||
smallvillages.com
|
|
||||||
smartape-vps.com
|
|
||||||
solusoftware.com
|
|
||||||
sourcedns.com
|
|
||||||
southcoastwebhosting12.com
|
|
||||||
specialtvvs.com
|
|
||||||
spiritualtechnologies.io
|
|
||||||
sprout.org
|
|
||||||
srv.cat
|
|
||||||
stableserver.net
|
|
||||||
statlerfa.co.uk
|
|
||||||
stock-smtp.top
|
|
||||||
stockepictigers.com
|
|
||||||
stockexchangejournal.com
|
|
||||||
subterranean-concave.com
|
|
||||||
suksangroup.com
|
|
||||||
swissbluetopaz.com
|
|
||||||
switer.shop
|
|
||||||
sysop4.com
|
|
||||||
system.eu.com
|
|
||||||
szhongbing.com
|
|
||||||
t-jon.com
|
|
||||||
tacaindo.net
|
|
||||||
tacom.tj
|
|
||||||
tankertelz.co
|
|
||||||
tataidc.com
|
|
||||||
teamveiw.com
|
|
||||||
tecnoxia.net
|
|
||||||
tel-xyz.fit
|
|
||||||
tenkids.net
|
|
||||||
terminavalley.com
|
|
||||||
thaicloudsolutions.com
|
|
||||||
thaikinghost.com
|
|
||||||
thaimonster.com
|
|
||||||
thegermainetruth.net
|
|
||||||
thehandmaderose.com
|
|
||||||
thepushcase.com
|
|
||||||
ticdns.com
|
|
||||||
tigo.bo
|
|
||||||
toledofibra.net.br
|
|
||||||
topdns.com
|
|
||||||
totaal.net
|
|
||||||
totalplay.net
|
|
||||||
tqh.ro
|
|
||||||
traderlearningcenter.com
|
|
||||||
tradeukraine.site
|
|
||||||
traveleza.com
|
|
||||||
trwww.com
|
|
||||||
tsuzakij.com
|
|
||||||
tullostrucking.com
|
|
||||||
turbinetrends.com
|
|
||||||
twincitiesdistinctivehomes.com
|
|
||||||
tylerfordonline.com
|
|
||||||
uiyum.com
|
|
||||||
ultragate.com
|
|
||||||
uneedacollie.com
|
|
||||||
unified.services
|
|
||||||
unite.services
|
|
||||||
urawasl.com
|
|
||||||
us.servername.us
|
|
||||||
vagebond.net
|
|
||||||
varvia.de
|
|
||||||
vbcploo.com
|
|
||||||
vdc.vn
|
|
||||||
vendimetry.com
|
|
||||||
vibrantwellnesscorp.com
|
|
||||||
virtualine.org
|
|
||||||
visit.docotor
|
|
||||||
viviotech.us
|
|
||||||
vlflgl.com
|
|
||||||
volganet.ru
|
|
||||||
vrns.net
|
|
||||||
vulterdi.edu
|
|
||||||
vvondertex.com
|
|
||||||
wallstreetsgossip.com
|
|
||||||
wamego.net
|
|
||||||
wanekoohost.com
|
|
||||||
wealthexpertisepro.com
|
|
||||||
web-login.eu
|
|
||||||
weblinkinternational.com
|
|
||||||
webnox.io
|
|
||||||
websale.net
|
|
||||||
welllivinghive.com
|
|
||||||
westparkcom.com
|
|
||||||
wetransfer-eu.com
|
|
||||||
wheelch.me
|
|
||||||
whoflew.com
|
|
||||||
whpservers.com
|
|
||||||
wisdomhard.com
|
|
||||||
wisewealthcircle.com
|
|
||||||
wisvis.com
|
|
||||||
wodeniowa.com
|
|
||||||
wordpresshosting.xyz
|
|
||||||
wsiph2.com
|
|
||||||
xnt.mx
|
|
||||||
xodiax.com
|
|
||||||
xpnuf.cn
|
|
||||||
xsfati.us.com
|
|
||||||
xspmail.jp
|
|
||||||
yourciviccompass.com
|
|
||||||
yourinvestworkbook.com
|
|
||||||
yoursitesecure.net
|
|
||||||
zerowebhosting.net
|
|
||||||
zmml.uk
|
|
||||||
znlc.jp
|
|
||||||
ztomy.com
|
|
||||||
@@ -1,23 +0,0 @@
|
|||||||
-applefibernet.com
|
|
||||||
-c3.net.pl
|
|
||||||
-celsiainternet.com
|
|
||||||
-clientes-izzi.mx
|
|
||||||
-clientes-zap-izzi.mx
|
|
||||||
-imnet.com.br
|
|
||||||
-mcnbd.com
|
|
||||||
-smile.com.bd
|
|
||||||
-tataidc.co.in
|
|
||||||
-veloxfiber.com.br
|
|
||||||
-wconect.com.br
|
|
||||||
.amazonaws.com
|
|
||||||
.cloudaccess.net
|
|
||||||
.ddnsgeek.com
|
|
||||||
.fastvps-server.com
|
|
||||||
.in-addr-arpa
|
|
||||||
.in-addr.arpa
|
|
||||||
.kasserver.com
|
|
||||||
.kinghost.net
|
|
||||||
.linode.com
|
|
||||||
.linodeusercontent.com
|
|
||||||
.na4u.ru
|
|
||||||
.sakura.ne.jp
|
|
||||||
@@ -1,184 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import os
|
|
||||||
import csv
|
|
||||||
from pathlib import Path
|
|
||||||
from typing import Mapping, Iterable, Optional, Collection, Union, List, Dict
|
|
||||||
|
|
||||||
|
|
||||||
class CSVValidationError(Exception):
|
|
||||||
def __init__(self, errors: list[str]):
|
|
||||||
super().__init__("\n".join(errors))
|
|
||||||
self.errors = errors
|
|
||||||
|
|
||||||
|
|
||||||
def sort_csv(
|
|
||||||
filepath: Union[str, Path],
|
|
||||||
field: str,
|
|
||||||
*,
|
|
||||||
sort_field_value_must_be_unique: bool = True,
|
|
||||||
strip_whitespace: bool = True,
|
|
||||||
fields_to_lowercase: Optional[Iterable[str]] = None,
|
|
||||||
case_insensitive_sort: bool = False,
|
|
||||||
required_fields: Optional[Iterable[str]] = None,
|
|
||||||
allowed_values: Optional[Mapping[str, Collection[str]]] = None,
|
|
||||||
) -> List[Dict[str, str]]:
|
|
||||||
"""
|
|
||||||
Read a CSV, optionally normalize rows (strip whitespace, lowercase certain fields),
|
|
||||||
validate field values, and write the sorted CSV back to the same path.
|
|
||||||
|
|
||||||
- filepath: Path to the CSV to sort.
|
|
||||||
- field: The field name to sort by.
|
|
||||||
- fields_to_lowercase: Permanently lowercases these field(s) in the data.
|
|
||||||
- strip_whitespace: Remove all whitespace at the beginning and of field values.
|
|
||||||
- case_insensitive_sort: Ignore case when sorting without changing values.
|
|
||||||
- required_fields: A list of fields that must have data in all rows.
|
|
||||||
- allowed_values: A mapping of allowed values for fields.
|
|
||||||
"""
|
|
||||||
path = Path(filepath)
|
|
||||||
required_fields = set(required_fields or [])
|
|
||||||
lower_set = set(fields_to_lowercase or [])
|
|
||||||
allowed_sets = {k: set(v) for k, v in (allowed_values or {}).items()}
|
|
||||||
if sort_field_value_must_be_unique:
|
|
||||||
seen_sort_field_values = []
|
|
||||||
|
|
||||||
with path.open("r", newline="") as infile:
|
|
||||||
reader = csv.DictReader(infile)
|
|
||||||
fieldnames = reader.fieldnames or []
|
|
||||||
if field not in fieldnames:
|
|
||||||
raise CSVValidationError([f"Missing sort column: {field!r}"])
|
|
||||||
missing_headers = required_fields - set(fieldnames)
|
|
||||||
if missing_headers:
|
|
||||||
raise CSVValidationError(
|
|
||||||
[f"Missing required header(s): {sorted(missing_headers)}"]
|
|
||||||
)
|
|
||||||
rows = list(reader)
|
|
||||||
|
|
||||||
def normalize_row(row: Dict[str, str]) -> None:
|
|
||||||
if strip_whitespace:
|
|
||||||
for k, v in row.items():
|
|
||||||
if isinstance(v, str):
|
|
||||||
row[k] = v.strip()
|
|
||||||
for fld in lower_set:
|
|
||||||
if fld in row and isinstance(row[fld], str):
|
|
||||||
row[fld] = row[fld].lower()
|
|
||||||
|
|
||||||
def validate_row(
|
|
||||||
row: Dict[str, str], sort_field: str, line_no: int, errors: list[str]
|
|
||||||
) -> None:
|
|
||||||
if sort_field_value_must_be_unique:
|
|
||||||
if row[sort_field] in seen_sort_field_values:
|
|
||||||
errors.append(f"Line {line_no}: Duplicate row for '{row[sort_field]}'")
|
|
||||||
else:
|
|
||||||
seen_sort_field_values.append(row[sort_field])
|
|
||||||
for rf in required_fields:
|
|
||||||
val = row.get(rf)
|
|
||||||
if val is None or val == "":
|
|
||||||
errors.append(
|
|
||||||
f"Line {line_no}: Missing value for required field '{rf}'"
|
|
||||||
)
|
|
||||||
for field, allowed_values in allowed_sets.items():
|
|
||||||
if field in row:
|
|
||||||
val = row[field]
|
|
||||||
if val not in allowed_values:
|
|
||||||
errors.append(
|
|
||||||
f"Line {line_no}: '{val}' is not an allowed value for '{field}' "
|
|
||||||
f"(allowed: {sorted(allowed_values)})"
|
|
||||||
)
|
|
||||||
|
|
||||||
errors: list[str] = []
|
|
||||||
for idx, row in enumerate(rows, start=2): # header is line 1
|
|
||||||
normalize_row(row)
|
|
||||||
validate_row(row, field, idx, errors)
|
|
||||||
|
|
||||||
if errors:
|
|
||||||
raise CSVValidationError(errors)
|
|
||||||
|
|
||||||
def sort_key(r: Dict[str, str]):
|
|
||||||
v = r.get(field, "")
|
|
||||||
if isinstance(v, str) and case_insensitive_sort:
|
|
||||||
return v.casefold()
|
|
||||||
return v
|
|
||||||
|
|
||||||
rows.sort(key=sort_key)
|
|
||||||
|
|
||||||
with open(filepath, "w", newline="") as outfile:
|
|
||||||
writer = csv.DictWriter(outfile, fieldnames=fieldnames)
|
|
||||||
writer.writeheader()
|
|
||||||
writer.writerows(rows)
|
|
||||||
|
|
||||||
|
|
||||||
def sort_list_file(
|
|
||||||
filepath: Union[str, Path],
|
|
||||||
*,
|
|
||||||
lowercase: bool = True,
|
|
||||||
strip: bool = True,
|
|
||||||
deduplicate: bool = True,
|
|
||||||
remove_blank_lines: bool = True,
|
|
||||||
ending_newline: bool = True,
|
|
||||||
newline: Optional[str] = "\n",
|
|
||||||
):
|
|
||||||
"""Read a list from a file, sort it, optionally strip and deduplicate the values,
|
|
||||||
then write that list back to the file.
|
|
||||||
|
|
||||||
- Filepath: The path to the file.
|
|
||||||
- lowercase: Lowercase all values prior to sorting.
|
|
||||||
- remove_blank_lines: Remove any plank lines.
|
|
||||||
- ending_newline: End the file with a newline, even if remove_blank_lines is true.
|
|
||||||
- newline: The newline character to use.
|
|
||||||
"""
|
|
||||||
with open(filepath, mode="r", newline=newline) as infile:
|
|
||||||
lines = infile.readlines()
|
|
||||||
for i in range(len(lines)):
|
|
||||||
if lowercase:
|
|
||||||
lines[i] = lines[i].lower()
|
|
||||||
if strip:
|
|
||||||
lines[i] = lines[i].strip()
|
|
||||||
if deduplicate:
|
|
||||||
lines = list(set(lines))
|
|
||||||
if remove_blank_lines:
|
|
||||||
while "" in lines:
|
|
||||||
lines.remove("")
|
|
||||||
lines = sorted(lines)
|
|
||||||
if ending_newline:
|
|
||||||
if lines[-1] != "":
|
|
||||||
lines.append("")
|
|
||||||
with open(filepath, mode="w", newline=newline) as outfile:
|
|
||||||
outfile.write("\n".join(lines))
|
|
||||||
|
|
||||||
|
|
||||||
def _main():
|
|
||||||
map_file = "base_reverse_dns_map.csv"
|
|
||||||
map_key = "base_reverse_dns"
|
|
||||||
list_files = ["known_unknown_base_reverse_dns.txt", "psl_overrides.txt"]
|
|
||||||
types_file = "base_reverse_dns_types.txt"
|
|
||||||
|
|
||||||
with open(types_file) as f:
|
|
||||||
types = f.readlines()
|
|
||||||
while "" in types:
|
|
||||||
types.remove("")
|
|
||||||
|
|
||||||
map_allowed_values = {"Type": types}
|
|
||||||
|
|
||||||
for list_file in list_files:
|
|
||||||
if not os.path.exists(list_file):
|
|
||||||
print(f"Error: {list_file} does not exist")
|
|
||||||
exit(1)
|
|
||||||
sort_list_file(list_file)
|
|
||||||
if not os.path.exists(types_file):
|
|
||||||
print(f"Error: {types_file} does not exist")
|
|
||||||
exit(1)
|
|
||||||
sort_list_file(types_file, lowercase=False)
|
|
||||||
if not os.path.exists(map_file):
|
|
||||||
print(f"Error: {map_file} does not exist")
|
|
||||||
exit(1)
|
|
||||||
try:
|
|
||||||
sort_csv(map_file, map_key, allowed_values=map_allowed_values)
|
|
||||||
except CSVValidationError as e:
|
|
||||||
print(f"{map_file} did not validate: {e}")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
_main()
|
|
||||||
@@ -1,10 +1,6 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import json
|
import json
|
||||||
from typing import Any
|
|
||||||
|
|
||||||
import boto3
|
import boto3
|
||||||
|
|
||||||
from parsedmarc.log import logger
|
from parsedmarc.log import logger
|
||||||
@@ -12,16 +8,16 @@ from parsedmarc.utils import human_timestamp_to_datetime
|
|||||||
|
|
||||||
|
|
||||||
class S3Client(object):
|
class S3Client(object):
|
||||||
"""A client for interacting with Amazon S3"""
|
"""A client for a Amazon S3"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
bucket_name: str,
|
bucket_name,
|
||||||
bucket_path: str,
|
bucket_path,
|
||||||
region_name: str,
|
region_name,
|
||||||
endpoint_url: str,
|
endpoint_url,
|
||||||
access_key_id: str,
|
access_key_id,
|
||||||
secret_access_key: str,
|
secret_access_key,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Initializes the S3Client
|
Initializes the S3Client
|
||||||
@@ -51,18 +47,18 @@ class S3Client(object):
|
|||||||
aws_access_key_id=access_key_id,
|
aws_access_key_id=access_key_id,
|
||||||
aws_secret_access_key=secret_access_key,
|
aws_secret_access_key=secret_access_key,
|
||||||
)
|
)
|
||||||
self.bucket = self.s3.Bucket(self.bucket_name) # type: ignore
|
self.bucket = self.s3.Bucket(self.bucket_name)
|
||||||
|
|
||||||
def save_aggregate_report_to_s3(self, report: dict[str, Any]):
|
def save_aggregate_report_to_s3(self, report):
|
||||||
self.save_report_to_s3(report, "aggregate")
|
self.save_report_to_s3(report, "aggregate")
|
||||||
|
|
||||||
def save_forensic_report_to_s3(self, report: dict[str, Any]):
|
def save_forensic_report_to_s3(self, report):
|
||||||
self.save_report_to_s3(report, "forensic")
|
self.save_report_to_s3(report, "forensic")
|
||||||
|
|
||||||
def save_smtp_tls_report_to_s3(self, report: dict[str, Any]):
|
def save_smtp_tls_report_to_s3(self, report):
|
||||||
self.save_report_to_s3(report, "smtp_tls")
|
self.save_report_to_s3(report, "smtp_tls")
|
||||||
|
|
||||||
def save_report_to_s3(self, report: dict[str, Any], report_type: str):
|
def save_report_to_s3(self, report, report_type):
|
||||||
if report_type == "smtp_tls":
|
if report_type == "smtp_tls":
|
||||||
report_date = report["begin_date"]
|
report_date = report["begin_date"]
|
||||||
report_id = report["report_id"]
|
report_id = report["report_id"]
|
||||||
|
|||||||
@@ -1,16 +1,11 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import json
|
|
||||||
import socket
|
|
||||||
from typing import Any, Union
|
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
|
import socket
|
||||||
|
import json
|
||||||
|
|
||||||
import requests
|
|
||||||
import urllib3
|
import urllib3
|
||||||
|
import requests
|
||||||
|
|
||||||
from parsedmarc.constants import USER_AGENT
|
from parsedmarc import __version__
|
||||||
from parsedmarc.log import logger
|
from parsedmarc.log import logger
|
||||||
from parsedmarc.utils import human_timestamp_to_unix_timestamp
|
from parsedmarc.utils import human_timestamp_to_unix_timestamp
|
||||||
|
|
||||||
@@ -28,13 +23,7 @@ class HECClient(object):
|
|||||||
# http://docs.splunk.com/Documentation/Splunk/latest/RESTREF/RESTinput#services.2Fcollector
|
# http://docs.splunk.com/Documentation/Splunk/latest/RESTREF/RESTinput#services.2Fcollector
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self, url, access_token, index, source="parsedmarc", verify=True, timeout=60
|
||||||
url: str,
|
|
||||||
access_token: str,
|
|
||||||
index: str,
|
|
||||||
source: str = "parsedmarc",
|
|
||||||
verify=True,
|
|
||||||
timeout=60,
|
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Initializes the HECClient
|
Initializes the HECClient
|
||||||
@@ -48,9 +37,9 @@ class HECClient(object):
|
|||||||
timeout (float): Number of seconds to wait for the server to send
|
timeout (float): Number of seconds to wait for the server to send
|
||||||
data before giving up
|
data before giving up
|
||||||
"""
|
"""
|
||||||
parsed_url = urlparse(url)
|
url = urlparse(url)
|
||||||
self.url = "{0}://{1}/services/collector/event/1.0".format(
|
self.url = "{0}://{1}/services/collector/event/1.0".format(
|
||||||
parsed_url.scheme, parsed_url.netloc
|
url.scheme, url.netloc
|
||||||
)
|
)
|
||||||
self.access_token = access_token.lstrip("Splunk ")
|
self.access_token = access_token.lstrip("Splunk ")
|
||||||
self.index = index
|
self.index = index
|
||||||
@@ -59,19 +48,14 @@ class HECClient(object):
|
|||||||
self.session = requests.Session()
|
self.session = requests.Session()
|
||||||
self.timeout = timeout
|
self.timeout = timeout
|
||||||
self.session.verify = verify
|
self.session.verify = verify
|
||||||
self._common_data: dict[str, Union[str, int, float, dict]] = dict(
|
self._common_data = dict(host=self.host, source=self.source, index=self.index)
|
||||||
host=self.host, source=self.source, index=self.index
|
|
||||||
)
|
|
||||||
|
|
||||||
self.session.headers = {
|
self.session.headers = {
|
||||||
"User-Agent": USER_AGENT,
|
"User-Agent": "parsedmarc/{0}".format(__version__),
|
||||||
"Authorization": "Splunk {0}".format(self.access_token),
|
"Authorization": "Splunk {0}".format(self.access_token),
|
||||||
}
|
}
|
||||||
|
|
||||||
def save_aggregate_reports_to_splunk(
|
def save_aggregate_reports_to_splunk(self, aggregate_reports):
|
||||||
self,
|
|
||||||
aggregate_reports: Union[list[dict[str, Any]], dict[str, Any]],
|
|
||||||
):
|
|
||||||
"""
|
"""
|
||||||
Saves aggregate DMARC reports to Splunk
|
Saves aggregate DMARC reports to Splunk
|
||||||
|
|
||||||
@@ -91,12 +75,9 @@ class HECClient(object):
|
|||||||
json_str = ""
|
json_str = ""
|
||||||
for report in aggregate_reports:
|
for report in aggregate_reports:
|
||||||
for record in report["records"]:
|
for record in report["records"]:
|
||||||
new_report: dict[str, Union[str, int, float, dict]] = dict()
|
new_report = dict()
|
||||||
for metadata in report["report_metadata"]:
|
for metadata in report["report_metadata"]:
|
||||||
new_report[metadata] = report["report_metadata"][metadata]
|
new_report[metadata] = report["report_metadata"][metadata]
|
||||||
new_report["interval_begin"] = record["interval_begin"]
|
|
||||||
new_report["interval_end"] = record["interval_end"]
|
|
||||||
new_report["normalized_timespan"] = record["normalized_timespan"]
|
|
||||||
new_report["published_policy"] = report["policy_published"]
|
new_report["published_policy"] = report["policy_published"]
|
||||||
new_report["source_ip_address"] = record["source"]["ip_address"]
|
new_report["source_ip_address"] = record["source"]["ip_address"]
|
||||||
new_report["source_country"] = record["source"]["country"]
|
new_report["source_country"] = record["source"]["country"]
|
||||||
@@ -117,9 +98,7 @@ class HECClient(object):
|
|||||||
new_report["spf_results"] = record["auth_results"]["spf"]
|
new_report["spf_results"] = record["auth_results"]["spf"]
|
||||||
|
|
||||||
data["sourcetype"] = "dmarc:aggregate"
|
data["sourcetype"] = "dmarc:aggregate"
|
||||||
timestamp = human_timestamp_to_unix_timestamp(
|
timestamp = human_timestamp_to_unix_timestamp(new_report["begin_date"])
|
||||||
new_report["interval_begin"]
|
|
||||||
)
|
|
||||||
data["time"] = timestamp
|
data["time"] = timestamp
|
||||||
data["event"] = new_report.copy()
|
data["event"] = new_report.copy()
|
||||||
json_str += "{0}\n".format(json.dumps(data))
|
json_str += "{0}\n".format(json.dumps(data))
|
||||||
@@ -134,10 +113,7 @@ class HECClient(object):
|
|||||||
if response["code"] != 0:
|
if response["code"] != 0:
|
||||||
raise SplunkError(response["text"])
|
raise SplunkError(response["text"])
|
||||||
|
|
||||||
def save_forensic_reports_to_splunk(
|
def save_forensic_reports_to_splunk(self, forensic_reports):
|
||||||
self,
|
|
||||||
forensic_reports: Union[list[dict[str, Any]], dict[str, Any]],
|
|
||||||
):
|
|
||||||
"""
|
"""
|
||||||
Saves forensic DMARC reports to Splunk
|
Saves forensic DMARC reports to Splunk
|
||||||
|
|
||||||
@@ -171,9 +147,7 @@ class HECClient(object):
|
|||||||
if response["code"] != 0:
|
if response["code"] != 0:
|
||||||
raise SplunkError(response["text"])
|
raise SplunkError(response["text"])
|
||||||
|
|
||||||
def save_smtp_tls_reports_to_splunk(
|
def save_smtp_tls_reports_to_splunk(self, reports):
|
||||||
self, reports: Union[list[dict[str, Any]], dict[str, Any]]
|
|
||||||
):
|
|
||||||
"""
|
"""
|
||||||
Saves aggregate DMARC reports to Splunk
|
Saves aggregate DMARC reports to Splunk
|
||||||
|
|
||||||
|
|||||||
@@ -1,15 +1,8 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import json
|
|
||||||
import logging
|
import logging
|
||||||
import logging.handlers
|
import logging.handlers
|
||||||
import socket
|
import json
|
||||||
import ssl
|
|
||||||
import time
|
|
||||||
from typing import Any, Optional
|
|
||||||
|
|
||||||
from parsedmarc import (
|
from parsedmarc import (
|
||||||
parsed_aggregate_reports_to_csv_rows,
|
parsed_aggregate_reports_to_csv_rows,
|
||||||
@@ -21,161 +14,31 @@ from parsedmarc import (
|
|||||||
class SyslogClient(object):
|
class SyslogClient(object):
|
||||||
"""A client for Syslog"""
|
"""A client for Syslog"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(self, server_name, server_port):
|
||||||
self,
|
|
||||||
server_name: str,
|
|
||||||
server_port: int,
|
|
||||||
protocol: str = "udp",
|
|
||||||
cafile_path: Optional[str] = None,
|
|
||||||
certfile_path: Optional[str] = None,
|
|
||||||
keyfile_path: Optional[str] = None,
|
|
||||||
timeout: float = 5.0,
|
|
||||||
retry_attempts: int = 3,
|
|
||||||
retry_delay: int = 5,
|
|
||||||
):
|
|
||||||
"""
|
"""
|
||||||
Initializes the SyslogClient
|
Initializes the SyslogClient
|
||||||
Args:
|
Args:
|
||||||
server_name (str): The Syslog server
|
server_name (str): The Syslog server
|
||||||
server_port (int): The Syslog port
|
server_port (int): The Syslog UDP port
|
||||||
protocol (str): The protocol to use: "udp", "tcp", or "tls" (Default: "udp")
|
|
||||||
cafile_path (str): Path to CA certificate file for TLS server verification (Optional)
|
|
||||||
certfile_path (str): Path to client certificate file for TLS authentication (Optional)
|
|
||||||
keyfile_path (str): Path to client private key file for TLS authentication (Optional)
|
|
||||||
timeout (float): Connection timeout in seconds for TCP/TLS (Default: 5.0)
|
|
||||||
retry_attempts (int): Number of retry attempts for failed connections (Default: 3)
|
|
||||||
retry_delay (int): Delay in seconds between retry attempts (Default: 5)
|
|
||||||
"""
|
"""
|
||||||
self.server_name = server_name
|
self.server_name = server_name
|
||||||
self.server_port = server_port
|
self.server_port = server_port
|
||||||
self.protocol = protocol.lower()
|
|
||||||
self.timeout = timeout
|
|
||||||
self.retry_attempts = retry_attempts
|
|
||||||
self.retry_delay = retry_delay
|
|
||||||
|
|
||||||
self.logger = logging.getLogger("parsedmarc_syslog")
|
self.logger = logging.getLogger("parsedmarc_syslog")
|
||||||
self.logger.setLevel(logging.INFO)
|
self.logger.setLevel(logging.INFO)
|
||||||
|
log_handler = logging.handlers.SysLogHandler(address=(server_name, server_port))
|
||||||
# Create the appropriate syslog handler based on protocol
|
|
||||||
log_handler = self._create_syslog_handler(
|
|
||||||
server_name,
|
|
||||||
server_port,
|
|
||||||
self.protocol,
|
|
||||||
cafile_path,
|
|
||||||
certfile_path,
|
|
||||||
keyfile_path,
|
|
||||||
timeout,
|
|
||||||
retry_attempts,
|
|
||||||
retry_delay,
|
|
||||||
)
|
|
||||||
|
|
||||||
self.logger.addHandler(log_handler)
|
self.logger.addHandler(log_handler)
|
||||||
|
|
||||||
def _create_syslog_handler(
|
def save_aggregate_report_to_syslog(self, aggregate_reports):
|
||||||
self,
|
|
||||||
server_name: str,
|
|
||||||
server_port: int,
|
|
||||||
protocol: str,
|
|
||||||
cafile_path: Optional[str],
|
|
||||||
certfile_path: Optional[str],
|
|
||||||
keyfile_path: Optional[str],
|
|
||||||
timeout: float,
|
|
||||||
retry_attempts: int,
|
|
||||||
retry_delay: int,
|
|
||||||
) -> logging.handlers.SysLogHandler:
|
|
||||||
"""
|
|
||||||
Creates a SysLogHandler with the specified protocol and TLS settings
|
|
||||||
"""
|
|
||||||
if protocol == "udp":
|
|
||||||
# UDP protocol (default, backward compatible)
|
|
||||||
return logging.handlers.SysLogHandler(
|
|
||||||
address=(server_name, server_port),
|
|
||||||
socktype=socket.SOCK_DGRAM,
|
|
||||||
)
|
|
||||||
elif protocol in ["tcp", "tls"]:
|
|
||||||
# TCP or TLS protocol with retry logic
|
|
||||||
for attempt in range(1, retry_attempts + 1):
|
|
||||||
try:
|
|
||||||
if protocol == "tcp":
|
|
||||||
# TCP without TLS
|
|
||||||
handler = logging.handlers.SysLogHandler(
|
|
||||||
address=(server_name, server_port),
|
|
||||||
socktype=socket.SOCK_STREAM,
|
|
||||||
)
|
|
||||||
# Set timeout on the socket
|
|
||||||
if hasattr(handler, "socket") and handler.socket:
|
|
||||||
handler.socket.settimeout(timeout)
|
|
||||||
return handler
|
|
||||||
else:
|
|
||||||
# TLS protocol
|
|
||||||
# Create SSL context with secure defaults
|
|
||||||
ssl_context = ssl.create_default_context()
|
|
||||||
|
|
||||||
# Explicitly set minimum TLS version to 1.2 for security
|
|
||||||
ssl_context.minimum_version = ssl.TLSVersion.TLSv1_2
|
|
||||||
|
|
||||||
# Configure server certificate verification
|
|
||||||
if cafile_path:
|
|
||||||
ssl_context.load_verify_locations(cafile=cafile_path)
|
|
||||||
|
|
||||||
# Configure client certificate authentication
|
|
||||||
if certfile_path and keyfile_path:
|
|
||||||
ssl_context.load_cert_chain(
|
|
||||||
certfile=certfile_path,
|
|
||||||
keyfile=keyfile_path,
|
|
||||||
)
|
|
||||||
elif certfile_path or keyfile_path:
|
|
||||||
# Warn if only one of the two required parameters is provided
|
|
||||||
self.logger.warning(
|
|
||||||
"Both certfile_path and keyfile_path are required for "
|
|
||||||
"client certificate authentication. Client authentication "
|
|
||||||
"will not be used."
|
|
||||||
)
|
|
||||||
|
|
||||||
# Create TCP handler first
|
|
||||||
handler = logging.handlers.SysLogHandler(
|
|
||||||
address=(server_name, server_port),
|
|
||||||
socktype=socket.SOCK_STREAM,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Wrap socket with TLS
|
|
||||||
if hasattr(handler, "socket") and handler.socket:
|
|
||||||
handler.socket = ssl_context.wrap_socket(
|
|
||||||
handler.socket,
|
|
||||||
server_hostname=server_name,
|
|
||||||
)
|
|
||||||
handler.socket.settimeout(timeout)
|
|
||||||
|
|
||||||
return handler
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
if attempt < retry_attempts:
|
|
||||||
self.logger.warning(
|
|
||||||
f"Syslog connection attempt {attempt}/{retry_attempts} failed: {e}. "
|
|
||||||
f"Retrying in {retry_delay} seconds..."
|
|
||||||
)
|
|
||||||
time.sleep(retry_delay)
|
|
||||||
else:
|
|
||||||
self.logger.error(
|
|
||||||
f"Syslog connection failed after {retry_attempts} attempts: {e}"
|
|
||||||
)
|
|
||||||
raise
|
|
||||||
else:
|
|
||||||
raise ValueError(
|
|
||||||
f"Invalid protocol '{protocol}'. Must be 'udp', 'tcp', or 'tls'."
|
|
||||||
)
|
|
||||||
|
|
||||||
def save_aggregate_report_to_syslog(self, aggregate_reports: list[dict[str, Any]]):
|
|
||||||
rows = parsed_aggregate_reports_to_csv_rows(aggregate_reports)
|
rows = parsed_aggregate_reports_to_csv_rows(aggregate_reports)
|
||||||
for row in rows:
|
for row in rows:
|
||||||
self.logger.info(json.dumps(row))
|
self.logger.info(json.dumps(row))
|
||||||
|
|
||||||
def save_forensic_report_to_syslog(self, forensic_reports: list[dict[str, Any]]):
|
def save_forensic_report_to_syslog(self, forensic_reports):
|
||||||
rows = parsed_forensic_reports_to_csv_rows(forensic_reports)
|
rows = parsed_forensic_reports_to_csv_rows(forensic_reports)
|
||||||
for row in rows:
|
for row in rows:
|
||||||
self.logger.info(json.dumps(row))
|
self.logger.info(json.dumps(row))
|
||||||
|
|
||||||
def save_smtp_tls_report_to_syslog(self, smtp_tls_reports: list[dict[str, Any]]):
|
def save_smtp_tls_report_to_syslog(self, smtp_tls_reports):
|
||||||
rows = parsed_smtp_tls_reports_to_csv_rows(smtp_tls_reports)
|
rows = parsed_smtp_tls_reports_to_csv_rows(smtp_tls_reports)
|
||||||
for row in rows:
|
for row in rows:
|
||||||
self.logger.info(json.dumps(row))
|
self.logger.info(json.dumps(row))
|
||||||
|
|||||||
@@ -1,220 +0,0 @@
|
|||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
from typing import Any, Dict, List, Literal, Optional, TypedDict, Union
|
|
||||||
|
|
||||||
# NOTE: This module is intentionally Python 3.10 compatible.
|
|
||||||
# - No PEP 604 unions (A | B)
|
|
||||||
# - No typing.NotRequired / Required (3.11+) to avoid an extra dependency.
|
|
||||||
# For optional keys, use total=False TypedDicts.
|
|
||||||
|
|
||||||
|
|
||||||
ReportType = Literal["aggregate", "forensic", "smtp_tls"]
|
|
||||||
|
|
||||||
|
|
||||||
class AggregateReportMetadata(TypedDict):
|
|
||||||
org_name: str
|
|
||||||
org_email: str
|
|
||||||
org_extra_contact_info: Optional[str]
|
|
||||||
report_id: str
|
|
||||||
begin_date: str
|
|
||||||
end_date: str
|
|
||||||
timespan_requires_normalization: bool
|
|
||||||
original_timespan_seconds: int
|
|
||||||
errors: List[str]
|
|
||||||
|
|
||||||
|
|
||||||
class AggregatePolicyPublished(TypedDict):
|
|
||||||
domain: str
|
|
||||||
adkim: str
|
|
||||||
aspf: str
|
|
||||||
p: str
|
|
||||||
sp: str
|
|
||||||
pct: str
|
|
||||||
fo: str
|
|
||||||
|
|
||||||
|
|
||||||
class IPSourceInfo(TypedDict):
|
|
||||||
ip_address: str
|
|
||||||
country: Optional[str]
|
|
||||||
reverse_dns: Optional[str]
|
|
||||||
base_domain: Optional[str]
|
|
||||||
name: Optional[str]
|
|
||||||
type: Optional[str]
|
|
||||||
|
|
||||||
|
|
||||||
class AggregateAlignment(TypedDict):
|
|
||||||
spf: bool
|
|
||||||
dkim: bool
|
|
||||||
dmarc: bool
|
|
||||||
|
|
||||||
|
|
||||||
class AggregateIdentifiers(TypedDict):
|
|
||||||
header_from: str
|
|
||||||
envelope_from: Optional[str]
|
|
||||||
envelope_to: Optional[str]
|
|
||||||
|
|
||||||
|
|
||||||
class AggregatePolicyOverrideReason(TypedDict):
|
|
||||||
type: Optional[str]
|
|
||||||
comment: Optional[str]
|
|
||||||
|
|
||||||
|
|
||||||
class AggregateAuthResultDKIM(TypedDict):
|
|
||||||
domain: str
|
|
||||||
result: str
|
|
||||||
selector: str
|
|
||||||
|
|
||||||
|
|
||||||
class AggregateAuthResultSPF(TypedDict):
|
|
||||||
domain: str
|
|
||||||
result: str
|
|
||||||
scope: str
|
|
||||||
|
|
||||||
|
|
||||||
class AggregateAuthResults(TypedDict):
|
|
||||||
dkim: List[AggregateAuthResultDKIM]
|
|
||||||
spf: List[AggregateAuthResultSPF]
|
|
||||||
|
|
||||||
|
|
||||||
class AggregatePolicyEvaluated(TypedDict):
|
|
||||||
disposition: str
|
|
||||||
dkim: str
|
|
||||||
spf: str
|
|
||||||
policy_override_reasons: List[AggregatePolicyOverrideReason]
|
|
||||||
|
|
||||||
|
|
||||||
class AggregateRecord(TypedDict):
|
|
||||||
interval_begin: str
|
|
||||||
interval_end: str
|
|
||||||
source: IPSourceInfo
|
|
||||||
count: int
|
|
||||||
alignment: AggregateAlignment
|
|
||||||
policy_evaluated: AggregatePolicyEvaluated
|
|
||||||
disposition: str
|
|
||||||
identifiers: AggregateIdentifiers
|
|
||||||
auth_results: AggregateAuthResults
|
|
||||||
|
|
||||||
|
|
||||||
class AggregateReport(TypedDict):
|
|
||||||
xml_schema: str
|
|
||||||
report_metadata: AggregateReportMetadata
|
|
||||||
policy_published: AggregatePolicyPublished
|
|
||||||
records: List[AggregateRecord]
|
|
||||||
|
|
||||||
|
|
||||||
class EmailAddress(TypedDict):
|
|
||||||
display_name: Optional[str]
|
|
||||||
address: str
|
|
||||||
local: Optional[str]
|
|
||||||
domain: Optional[str]
|
|
||||||
|
|
||||||
|
|
||||||
class EmailAttachment(TypedDict, total=False):
|
|
||||||
filename: Optional[str]
|
|
||||||
mail_content_type: Optional[str]
|
|
||||||
sha256: Optional[str]
|
|
||||||
|
|
||||||
|
|
||||||
ParsedEmail = TypedDict(
|
|
||||||
"ParsedEmail",
|
|
||||||
{
|
|
||||||
# This is a lightly-specified version of mailsuite/mailparser JSON.
|
|
||||||
# It focuses on the fields parsedmarc uses in forensic handling.
|
|
||||||
"headers": Dict[str, Any],
|
|
||||||
"subject": Optional[str],
|
|
||||||
"filename_safe_subject": Optional[str],
|
|
||||||
"date": Optional[str],
|
|
||||||
"from": EmailAddress,
|
|
||||||
"to": List[EmailAddress],
|
|
||||||
"cc": List[EmailAddress],
|
|
||||||
"bcc": List[EmailAddress],
|
|
||||||
"attachments": List[EmailAttachment],
|
|
||||||
"body": Optional[str],
|
|
||||||
"has_defects": bool,
|
|
||||||
"defects": Any,
|
|
||||||
"defects_categories": Any,
|
|
||||||
},
|
|
||||||
total=False,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class ForensicReport(TypedDict):
|
|
||||||
feedback_type: Optional[str]
|
|
||||||
user_agent: Optional[str]
|
|
||||||
version: Optional[str]
|
|
||||||
original_envelope_id: Optional[str]
|
|
||||||
original_mail_from: Optional[str]
|
|
||||||
original_rcpt_to: Optional[str]
|
|
||||||
arrival_date: str
|
|
||||||
arrival_date_utc: str
|
|
||||||
authentication_results: Optional[str]
|
|
||||||
delivery_result: Optional[str]
|
|
||||||
auth_failure: List[str]
|
|
||||||
authentication_mechanisms: List[str]
|
|
||||||
dkim_domain: Optional[str]
|
|
||||||
reported_domain: str
|
|
||||||
sample_headers_only: bool
|
|
||||||
source: IPSourceInfo
|
|
||||||
sample: str
|
|
||||||
parsed_sample: ParsedEmail
|
|
||||||
|
|
||||||
|
|
||||||
class SMTPTLSFailureDetails(TypedDict):
|
|
||||||
result_type: str
|
|
||||||
failed_session_count: int
|
|
||||||
|
|
||||||
|
|
||||||
class SMTPTLSFailureDetailsOptional(SMTPTLSFailureDetails, total=False):
|
|
||||||
sending_mta_ip: str
|
|
||||||
receiving_ip: str
|
|
||||||
receiving_mx_hostname: str
|
|
||||||
receiving_mx_helo: str
|
|
||||||
additional_info_uri: str
|
|
||||||
failure_reason_code: str
|
|
||||||
ip_address: str
|
|
||||||
|
|
||||||
|
|
||||||
class SMTPTLSPolicySummary(TypedDict):
|
|
||||||
policy_domain: str
|
|
||||||
policy_type: str
|
|
||||||
successful_session_count: int
|
|
||||||
failed_session_count: int
|
|
||||||
|
|
||||||
|
|
||||||
class SMTPTLSPolicy(SMTPTLSPolicySummary, total=False):
|
|
||||||
policy_strings: List[str]
|
|
||||||
mx_host_patterns: List[str]
|
|
||||||
failure_details: List[SMTPTLSFailureDetailsOptional]
|
|
||||||
|
|
||||||
|
|
||||||
class SMTPTLSReport(TypedDict):
|
|
||||||
organization_name: str
|
|
||||||
begin_date: str
|
|
||||||
end_date: str
|
|
||||||
contact_info: Union[str, List[str]]
|
|
||||||
report_id: str
|
|
||||||
policies: List[SMTPTLSPolicy]
|
|
||||||
|
|
||||||
|
|
||||||
class AggregateParsedReport(TypedDict):
|
|
||||||
report_type: Literal["aggregate"]
|
|
||||||
report: AggregateReport
|
|
||||||
|
|
||||||
|
|
||||||
class ForensicParsedReport(TypedDict):
|
|
||||||
report_type: Literal["forensic"]
|
|
||||||
report: ForensicReport
|
|
||||||
|
|
||||||
|
|
||||||
class SMTPTLSParsedReport(TypedDict):
|
|
||||||
report_type: Literal["smtp_tls"]
|
|
||||||
report: SMTPTLSReport
|
|
||||||
|
|
||||||
|
|
||||||
ParsedReport = Union[AggregateParsedReport, ForensicParsedReport, SMTPTLSParsedReport]
|
|
||||||
|
|
||||||
|
|
||||||
class ParsingResults(TypedDict):
|
|
||||||
aggregate_reports: List[AggregateReport]
|
|
||||||
forensic_reports: List[ForensicReport]
|
|
||||||
smtp_tls_reports: List[SMTPTLSReport]
|
|
||||||
@@ -1,59 +1,48 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
"""Utility functions that might be useful for other projects"""
|
"""Utility functions that might be useful for other projects"""
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import base64
|
|
||||||
import csv
|
|
||||||
import hashlib
|
|
||||||
import io
|
|
||||||
import json
|
|
||||||
import logging
|
import logging
|
||||||
import mailbox
|
|
||||||
import os
|
import os
|
||||||
import re
|
from datetime import datetime
|
||||||
import shutil
|
from datetime import timezone
|
||||||
import subprocess
|
from datetime import timedelta
|
||||||
|
from collections import OrderedDict
|
||||||
import tempfile
|
import tempfile
|
||||||
from datetime import datetime, timedelta, timezone
|
import subprocess
|
||||||
from typing import Optional, TypedDict, Union, cast
|
import shutil
|
||||||
|
|
||||||
import mailparser
|
import mailparser
|
||||||
from expiringdict import ExpiringDict
|
import json
|
||||||
|
import hashlib
|
||||||
|
import base64
|
||||||
|
import mailbox
|
||||||
|
import re
|
||||||
|
import csv
|
||||||
|
import io
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from importlib.resources import files
|
import importlib.resources as pkg_resources
|
||||||
except ImportError:
|
except ImportError:
|
||||||
# Try backported to PY<3 `importlib_resources`
|
# Try backported to PY<37 `importlib_resources`
|
||||||
from importlib.resources import files
|
import importlib_resources as pkg_resources
|
||||||
|
|
||||||
|
from dateutil.parser import parse as parse_date
|
||||||
import dns.exception
|
|
||||||
import dns.resolver
|
|
||||||
import dns.reversename
|
import dns.reversename
|
||||||
|
import dns.resolver
|
||||||
|
import dns.exception
|
||||||
import geoip2.database
|
import geoip2.database
|
||||||
import geoip2.errors
|
import geoip2.errors
|
||||||
import publicsuffixlist
|
import publicsuffixlist
|
||||||
import requests
|
import requests
|
||||||
from dateutil.parser import parse as parse_date
|
|
||||||
|
|
||||||
|
from parsedmarc.log import logger
|
||||||
import parsedmarc.resources.dbip
|
import parsedmarc.resources.dbip
|
||||||
import parsedmarc.resources.maps
|
import parsedmarc.resources.maps
|
||||||
from parsedmarc.constants import USER_AGENT
|
|
||||||
from parsedmarc.log import logger
|
|
||||||
|
|
||||||
parenthesis_regex = re.compile(r"\s*\(.*\)\s*")
|
parenthesis_regex = re.compile(r"\s*\(.*\)\s*")
|
||||||
|
|
||||||
null_file = open(os.devnull, "w")
|
null_file = open(os.devnull, "w")
|
||||||
mailparser_logger = logging.getLogger("mailparser")
|
mailparser_logger = logging.getLogger("mailparser")
|
||||||
mailparser_logger.setLevel(logging.CRITICAL)
|
mailparser_logger.setLevel(logging.CRITICAL)
|
||||||
psl = publicsuffixlist.PublicSuffixList()
|
|
||||||
psl_overrides_path = str(files(parsedmarc.resources.maps).joinpath("psl_overrides.txt"))
|
|
||||||
with open(psl_overrides_path) as f:
|
|
||||||
psl_overrides = [line.rstrip() for line in f.readlines()]
|
|
||||||
while "" in psl_overrides:
|
|
||||||
psl_overrides.remove("")
|
|
||||||
|
|
||||||
|
|
||||||
class EmailParserError(RuntimeError):
|
class EmailParserError(RuntimeError):
|
||||||
@@ -64,49 +53,31 @@ class DownloadError(RuntimeError):
|
|||||||
"""Raised when an error occurs when downloading a file"""
|
"""Raised when an error occurs when downloading a file"""
|
||||||
|
|
||||||
|
|
||||||
class ReverseDNSService(TypedDict):
|
def decode_base64(data):
|
||||||
name: str
|
|
||||||
type: Optional[str]
|
|
||||||
|
|
||||||
|
|
||||||
ReverseDNSMap = dict[str, ReverseDNSService]
|
|
||||||
|
|
||||||
|
|
||||||
class IPAddressInfo(TypedDict):
|
|
||||||
ip_address: str
|
|
||||||
reverse_dns: Optional[str]
|
|
||||||
country: Optional[str]
|
|
||||||
base_domain: Optional[str]
|
|
||||||
name: Optional[str]
|
|
||||||
type: Optional[str]
|
|
||||||
|
|
||||||
|
|
||||||
def decode_base64(data: str) -> bytes:
|
|
||||||
"""
|
"""
|
||||||
Decodes a base64 string, with padding being optional
|
Decodes a base64 string, with padding being optional
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
data (str): A base64 encoded string
|
data: A base64 encoded string
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
bytes: The decoded bytes
|
bytes: The decoded bytes
|
||||||
|
|
||||||
"""
|
"""
|
||||||
data_bytes = bytes(data, encoding="ascii")
|
data = bytes(data, encoding="ascii")
|
||||||
missing_padding = len(data_bytes) % 4
|
missing_padding = len(data) % 4
|
||||||
if missing_padding != 0:
|
if missing_padding != 0:
|
||||||
data_bytes += b"=" * (4 - missing_padding)
|
data += b"=" * (4 - missing_padding)
|
||||||
return base64.b64decode(data_bytes)
|
return base64.b64decode(data)
|
||||||
|
|
||||||
|
|
||||||
def get_base_domain(domain: str) -> Optional[str]:
|
def get_base_domain(domain):
|
||||||
"""
|
"""
|
||||||
Gets the base domain name for the given domain
|
Gets the base domain name for the given domain
|
||||||
|
|
||||||
.. note::
|
.. note::
|
||||||
Results are based on a list of public domain suffixes at
|
Results are based on a list of public domain suffixes at
|
||||||
https://publicsuffix.org/list/public_suffix_list.dat and overrides included in
|
https://publicsuffix.org/list/public_suffix_list.dat.
|
||||||
parsedmarc.resources.maps.psl_overrides.txt
|
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
domain (str): A domain or subdomain
|
domain (str): A domain or subdomain
|
||||||
@@ -115,22 +86,11 @@ def get_base_domain(domain: str) -> Optional[str]:
|
|||||||
str: The base domain of the given domain
|
str: The base domain of the given domain
|
||||||
|
|
||||||
"""
|
"""
|
||||||
domain = domain.lower()
|
psl = publicsuffixlist.PublicSuffixList()
|
||||||
publicsuffix = psl.privatesuffix(domain)
|
return psl.privatesuffix(domain)
|
||||||
for override in psl_overrides:
|
|
||||||
if domain.endswith(override):
|
|
||||||
return override.strip(".").strip("-")
|
|
||||||
return publicsuffix
|
|
||||||
|
|
||||||
|
|
||||||
def query_dns(
|
def query_dns(domain, record_type, cache=None, nameservers=None, timeout=2.0):
|
||||||
domain: str,
|
|
||||||
record_type: str,
|
|
||||||
*,
|
|
||||||
cache: Optional[ExpiringDict] = None,
|
|
||||||
nameservers: Optional[list[str]] = None,
|
|
||||||
timeout: float = 2.0,
|
|
||||||
) -> list[str]:
|
|
||||||
"""
|
"""
|
||||||
Queries DNS
|
Queries DNS
|
||||||
|
|
||||||
@@ -149,9 +109,9 @@ def query_dns(
|
|||||||
record_type = record_type.upper()
|
record_type = record_type.upper()
|
||||||
cache_key = "{0}_{1}".format(domain, record_type)
|
cache_key = "{0}_{1}".format(domain, record_type)
|
||||||
if cache:
|
if cache:
|
||||||
cached_records = cache.get(cache_key, None)
|
records = cache.get(cache_key, None)
|
||||||
if isinstance(cached_records, list):
|
if records:
|
||||||
return cast(list[str], cached_records)
|
return records
|
||||||
|
|
||||||
resolver = dns.resolver.Resolver()
|
resolver = dns.resolver.Resolver()
|
||||||
timeout = float(timeout)
|
timeout = float(timeout)
|
||||||
@@ -165,25 +125,33 @@ def query_dns(
|
|||||||
resolver.nameservers = nameservers
|
resolver.nameservers = nameservers
|
||||||
resolver.timeout = timeout
|
resolver.timeout = timeout
|
||||||
resolver.lifetime = timeout
|
resolver.lifetime = timeout
|
||||||
records = list(
|
if record_type == "TXT":
|
||||||
map(
|
resource_records = list(
|
||||||
lambda r: r.to_text().replace('"', "").rstrip("."),
|
map(
|
||||||
resolver.resolve(domain, record_type, lifetime=timeout),
|
lambda r: r.strings,
|
||||||
|
resolver.resolve(domain, record_type, lifetime=timeout),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
_resource_record = [
|
||||||
|
resource_record[0][:0].join(resource_record)
|
||||||
|
for resource_record in resource_records
|
||||||
|
if resource_record
|
||||||
|
]
|
||||||
|
records = [r.decode() for r in _resource_record]
|
||||||
|
else:
|
||||||
|
records = list(
|
||||||
|
map(
|
||||||
|
lambda r: r.to_text().replace('"', "").rstrip("."),
|
||||||
|
resolver.resolve(domain, record_type, lifetime=timeout),
|
||||||
|
)
|
||||||
)
|
)
|
||||||
)
|
|
||||||
if cache:
|
if cache:
|
||||||
cache[cache_key] = records
|
cache[cache_key] = records
|
||||||
|
|
||||||
return records
|
return records
|
||||||
|
|
||||||
|
|
||||||
def get_reverse_dns(
|
def get_reverse_dns(ip_address, cache=None, nameservers=None, timeout=2.0):
|
||||||
ip_address,
|
|
||||||
*,
|
|
||||||
cache: Optional[ExpiringDict] = None,
|
|
||||||
nameservers: Optional[list[str]] = None,
|
|
||||||
timeout: float = 2.0,
|
|
||||||
) -> Optional[str]:
|
|
||||||
"""
|
"""
|
||||||
Resolves an IP address to a hostname using a reverse DNS query
|
Resolves an IP address to a hostname using a reverse DNS query
|
||||||
|
|
||||||
@@ -201,7 +169,7 @@ def get_reverse_dns(
|
|||||||
try:
|
try:
|
||||||
address = dns.reversename.from_address(ip_address)
|
address = dns.reversename.from_address(ip_address)
|
||||||
hostname = query_dns(
|
hostname = query_dns(
|
||||||
str(address), "PTR", cache=cache, nameservers=nameservers, timeout=timeout
|
address, "PTR", cache=cache, nameservers=nameservers, timeout=timeout
|
||||||
)[0]
|
)[0]
|
||||||
|
|
||||||
except dns.exception.DNSException as e:
|
except dns.exception.DNSException as e:
|
||||||
@@ -211,7 +179,7 @@ def get_reverse_dns(
|
|||||||
return hostname
|
return hostname
|
||||||
|
|
||||||
|
|
||||||
def timestamp_to_datetime(timestamp: int) -> datetime:
|
def timestamp_to_datetime(timestamp):
|
||||||
"""
|
"""
|
||||||
Converts a UNIX/DMARC timestamp to a Python ``datetime`` object
|
Converts a UNIX/DMARC timestamp to a Python ``datetime`` object
|
||||||
|
|
||||||
@@ -224,7 +192,7 @@ def timestamp_to_datetime(timestamp: int) -> datetime:
|
|||||||
return datetime.fromtimestamp(int(timestamp))
|
return datetime.fromtimestamp(int(timestamp))
|
||||||
|
|
||||||
|
|
||||||
def timestamp_to_human(timestamp: int) -> str:
|
def timestamp_to_human(timestamp):
|
||||||
"""
|
"""
|
||||||
Converts a UNIX/DMARC timestamp to a human-readable string
|
Converts a UNIX/DMARC timestamp to a human-readable string
|
||||||
|
|
||||||
@@ -237,9 +205,7 @@ def timestamp_to_human(timestamp: int) -> str:
|
|||||||
return timestamp_to_datetime(timestamp).strftime("%Y-%m-%d %H:%M:%S")
|
return timestamp_to_datetime(timestamp).strftime("%Y-%m-%d %H:%M:%S")
|
||||||
|
|
||||||
|
|
||||||
def human_timestamp_to_datetime(
|
def human_timestamp_to_datetime(human_timestamp, to_utc=False):
|
||||||
human_timestamp: str, *, to_utc: bool = False
|
|
||||||
) -> datetime:
|
|
||||||
"""
|
"""
|
||||||
Converts a human-readable timestamp into a Python ``datetime`` object
|
Converts a human-readable timestamp into a Python ``datetime`` object
|
||||||
|
|
||||||
@@ -258,7 +224,7 @@ def human_timestamp_to_datetime(
|
|||||||
return dt.astimezone(timezone.utc) if to_utc else dt
|
return dt.astimezone(timezone.utc) if to_utc else dt
|
||||||
|
|
||||||
|
|
||||||
def human_timestamp_to_unix_timestamp(human_timestamp: str) -> int:
|
def human_timestamp_to_unix_timestamp(human_timestamp):
|
||||||
"""
|
"""
|
||||||
Converts a human-readable timestamp into a UNIX timestamp
|
Converts a human-readable timestamp into a UNIX timestamp
|
||||||
|
|
||||||
@@ -269,12 +235,10 @@ def human_timestamp_to_unix_timestamp(human_timestamp: str) -> int:
|
|||||||
float: The converted timestamp
|
float: The converted timestamp
|
||||||
"""
|
"""
|
||||||
human_timestamp = human_timestamp.replace("T", " ")
|
human_timestamp = human_timestamp.replace("T", " ")
|
||||||
return int(human_timestamp_to_datetime(human_timestamp).timestamp())
|
return human_timestamp_to_datetime(human_timestamp).timestamp()
|
||||||
|
|
||||||
|
|
||||||
def get_ip_address_country(
|
def get_ip_address_country(ip_address, db_path=None):
|
||||||
ip_address: str, *, db_path: Optional[str] = None
|
|
||||||
) -> Optional[str]:
|
|
||||||
"""
|
"""
|
||||||
Returns the ISO code for the country associated
|
Returns the ISO code for the country associated
|
||||||
with the given IPv4 or IPv6 address
|
with the given IPv4 or IPv6 address
|
||||||
@@ -301,7 +265,7 @@ def get_ip_address_country(
|
|||||||
]
|
]
|
||||||
|
|
||||||
if db_path is not None:
|
if db_path is not None:
|
||||||
if not os.path.isfile(db_path):
|
if os.path.isfile(db_path) is False:
|
||||||
db_path = None
|
db_path = None
|
||||||
logger.warning(
|
logger.warning(
|
||||||
f"No file exists at {db_path}. Falling back to an "
|
f"No file exists at {db_path}. Falling back to an "
|
||||||
@@ -316,13 +280,14 @@ def get_ip_address_country(
|
|||||||
break
|
break
|
||||||
|
|
||||||
if db_path is None:
|
if db_path is None:
|
||||||
db_path = str(
|
with pkg_resources.path(
|
||||||
files(parsedmarc.resources.dbip).joinpath("dbip-country-lite.mmdb")
|
parsedmarc.resources.dbip, "dbip-country-lite.mmdb"
|
||||||
)
|
) as path:
|
||||||
|
db_path = path
|
||||||
|
|
||||||
db_age = datetime.now() - datetime.fromtimestamp(os.stat(db_path).st_mtime)
|
db_age = datetime.now() - datetime.fromtimestamp(os.stat(db_path).st_mtime)
|
||||||
if db_age > timedelta(days=30):
|
if db_age > timedelta(days=30):
|
||||||
logger.warning("IP database is more than a month old")
|
logger.warning("IP database is more than a month old")
|
||||||
|
|
||||||
db_reader = geoip2.database.Reader(db_path)
|
db_reader = geoip2.database.Reader(db_path)
|
||||||
|
|
||||||
@@ -338,13 +303,12 @@ def get_ip_address_country(
|
|||||||
|
|
||||||
def get_service_from_reverse_dns_base_domain(
|
def get_service_from_reverse_dns_base_domain(
|
||||||
base_domain,
|
base_domain,
|
||||||
*,
|
always_use_local_file=False,
|
||||||
always_use_local_file: bool = False,
|
local_file_path=None,
|
||||||
local_file_path: Optional[str] = None,
|
url=None,
|
||||||
url: Optional[str] = None,
|
offline=False,
|
||||||
offline: bool = False,
|
reverse_dns_map=None,
|
||||||
reverse_dns_map: Optional[ReverseDNSMap] = None,
|
):
|
||||||
) -> ReverseDNSService:
|
|
||||||
"""
|
"""
|
||||||
Returns the service name of a given base domain name from reverse DNS.
|
Returns the service name of a given base domain name from reverse DNS.
|
||||||
|
|
||||||
@@ -361,6 +325,12 @@ def get_service_from_reverse_dns_base_domain(
|
|||||||
the supplied reverse_dns_base_domain and the type will be None
|
the supplied reverse_dns_base_domain and the type will be None
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
def load_csv(_csv_file):
|
||||||
|
reader = csv.DictReader(_csv_file)
|
||||||
|
for row in reader:
|
||||||
|
key = row["base_reverse_dns"].lower().strip()
|
||||||
|
reverse_dns_map[key] = dict(name=row["name"], type=row["type"])
|
||||||
|
|
||||||
base_domain = base_domain.lower().strip()
|
base_domain = base_domain.lower().strip()
|
||||||
if url is None:
|
if url is None:
|
||||||
url = (
|
url = (
|
||||||
@@ -368,71 +338,49 @@ def get_service_from_reverse_dns_base_domain(
|
|||||||
"/parsedmarc/master/parsedmarc/"
|
"/parsedmarc/master/parsedmarc/"
|
||||||
"resources/maps/base_reverse_dns_map.csv"
|
"resources/maps/base_reverse_dns_map.csv"
|
||||||
)
|
)
|
||||||
reverse_dns_map_value: ReverseDNSMap
|
|
||||||
if reverse_dns_map is None:
|
if reverse_dns_map is None:
|
||||||
reverse_dns_map_value = {}
|
reverse_dns_map = dict()
|
||||||
else:
|
|
||||||
reverse_dns_map_value = reverse_dns_map
|
|
||||||
|
|
||||||
def load_csv(_csv_file):
|
|
||||||
reader = csv.DictReader(_csv_file)
|
|
||||||
for row in reader:
|
|
||||||
key = row["base_reverse_dns"].lower().strip()
|
|
||||||
reverse_dns_map_value[key] = {
|
|
||||||
"name": row["name"],
|
|
||||||
"type": row["type"],
|
|
||||||
}
|
|
||||||
|
|
||||||
csv_file = io.StringIO()
|
csv_file = io.StringIO()
|
||||||
|
|
||||||
if not (offline or always_use_local_file) and len(reverse_dns_map_value) == 0:
|
if not (offline or always_use_local_file) and len(reverse_dns_map) == 0:
|
||||||
try:
|
try:
|
||||||
logger.debug(f"Trying to fetch reverse DNS map from {url}...")
|
logger.debug(f"Trying to fetch reverse DNS map from {url}...")
|
||||||
headers = {"User-Agent": USER_AGENT}
|
response = requests.get(url)
|
||||||
response = requests.get(url, headers=headers)
|
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
csv_file.write(response.text)
|
csv_file.write(response.text)
|
||||||
csv_file.seek(0)
|
csv_file.seek(0)
|
||||||
load_csv(csv_file)
|
load_csv(csv_file)
|
||||||
except requests.exceptions.RequestException as e:
|
except requests.exceptions.RequestException as e:
|
||||||
logger.warning(f"Failed to fetch reverse DNS map: {e}")
|
logger.warning(f"Failed to fetch reverse DNS map: {e}")
|
||||||
except Exception:
|
if len(reverse_dns_map) == 0:
|
||||||
logger.warning("Not a valid CSV file")
|
|
||||||
csv_file.seek(0)
|
|
||||||
logging.debug("Response body:")
|
|
||||||
logger.debug(csv_file.read())
|
|
||||||
|
|
||||||
if len(reverse_dns_map_value) == 0:
|
|
||||||
logger.info("Loading included reverse DNS map...")
|
logger.info("Loading included reverse DNS map...")
|
||||||
path = str(
|
with pkg_resources.path(
|
||||||
files(parsedmarc.resources.maps).joinpath("base_reverse_dns_map.csv")
|
parsedmarc.resources.maps, "base_reverse_dns_map.csv"
|
||||||
)
|
) as path:
|
||||||
if local_file_path is not None:
|
if local_file_path is not None:
|
||||||
path = local_file_path
|
path = local_file_path
|
||||||
with open(path) as csv_file:
|
with open(path) as csv_file:
|
||||||
load_csv(csv_file)
|
load_csv(csv_file)
|
||||||
service: ReverseDNSService
|
|
||||||
try:
|
try:
|
||||||
service = reverse_dns_map_value[base_domain]
|
service = reverse_dns_map[base_domain]
|
||||||
except KeyError:
|
except KeyError:
|
||||||
service = {"name": base_domain, "type": None}
|
service = dict(name=base_domain, type=None)
|
||||||
|
|
||||||
return service
|
return service
|
||||||
|
|
||||||
|
|
||||||
def get_ip_address_info(
|
def get_ip_address_info(
|
||||||
ip_address,
|
ip_address,
|
||||||
*,
|
ip_db_path=None,
|
||||||
ip_db_path: Optional[str] = None,
|
reverse_dns_map_path=None,
|
||||||
reverse_dns_map_path: Optional[str] = None,
|
always_use_local_files=False,
|
||||||
always_use_local_files: bool = False,
|
reverse_dns_map_url=None,
|
||||||
reverse_dns_map_url: Optional[str] = None,
|
cache=None,
|
||||||
cache: Optional[ExpiringDict] = None,
|
reverse_dns_map=None,
|
||||||
reverse_dns_map: Optional[ReverseDNSMap] = None,
|
offline=False,
|
||||||
offline: bool = False,
|
nameservers=None,
|
||||||
nameservers: Optional[list[str]] = None,
|
timeout=2.0,
|
||||||
timeout: float = 2.0,
|
):
|
||||||
) -> IPAddressInfo:
|
|
||||||
"""
|
"""
|
||||||
Returns reverse DNS and country information for the given IP address
|
Returns reverse DNS and country information for the given IP address
|
||||||
|
|
||||||
@@ -450,27 +398,17 @@ def get_ip_address_info(
|
|||||||
timeout (float): Sets the DNS timeout in seconds
|
timeout (float): Sets the DNS timeout in seconds
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
dict: ``ip_address``, ``reverse_dns``, ``country``
|
OrderedDict: ``ip_address``, ``reverse_dns``
|
||||||
|
|
||||||
"""
|
"""
|
||||||
ip_address = ip_address.lower()
|
ip_address = ip_address.lower()
|
||||||
if cache is not None:
|
if cache is not None:
|
||||||
cached_info = cache.get(ip_address, None)
|
info = cache.get(ip_address, None)
|
||||||
if (
|
if info:
|
||||||
cached_info
|
|
||||||
and isinstance(cached_info, dict)
|
|
||||||
and "ip_address" in cached_info
|
|
||||||
):
|
|
||||||
logger.debug(f"IP address {ip_address} was found in cache")
|
logger.debug(f"IP address {ip_address} was found in cache")
|
||||||
return cast(IPAddressInfo, cached_info)
|
return info
|
||||||
info: IPAddressInfo = {
|
info = OrderedDict()
|
||||||
"ip_address": ip_address,
|
info["ip_address"] = ip_address
|
||||||
"reverse_dns": None,
|
|
||||||
"country": None,
|
|
||||||
"base_domain": None,
|
|
||||||
"name": None,
|
|
||||||
"type": None,
|
|
||||||
}
|
|
||||||
if offline:
|
if offline:
|
||||||
reverse_dns = None
|
reverse_dns = None
|
||||||
else:
|
else:
|
||||||
@@ -480,6 +418,9 @@ def get_ip_address_info(
|
|||||||
country = get_ip_address_country(ip_address, db_path=ip_db_path)
|
country = get_ip_address_country(ip_address, db_path=ip_db_path)
|
||||||
info["country"] = country
|
info["country"] = country
|
||||||
info["reverse_dns"] = reverse_dns
|
info["reverse_dns"] = reverse_dns
|
||||||
|
info["base_domain"] = None
|
||||||
|
info["name"] = None
|
||||||
|
info["type"] = None
|
||||||
if reverse_dns is not None:
|
if reverse_dns is not None:
|
||||||
base_domain = get_base_domain(reverse_dns)
|
base_domain = get_base_domain(reverse_dns)
|
||||||
if base_domain is not None:
|
if base_domain is not None:
|
||||||
@@ -504,7 +445,7 @@ def get_ip_address_info(
|
|||||||
return info
|
return info
|
||||||
|
|
||||||
|
|
||||||
def parse_email_address(original_address: str) -> dict[str, Optional[str]]:
|
def parse_email_address(original_address):
|
||||||
if original_address[0] == "":
|
if original_address[0] == "":
|
||||||
display_name = None
|
display_name = None
|
||||||
else:
|
else:
|
||||||
@@ -517,15 +458,17 @@ def parse_email_address(original_address: str) -> dict[str, Optional[str]]:
|
|||||||
local = address_parts[0].lower()
|
local = address_parts[0].lower()
|
||||||
domain = address_parts[-1].lower()
|
domain = address_parts[-1].lower()
|
||||||
|
|
||||||
return {
|
return OrderedDict(
|
||||||
"display_name": display_name,
|
[
|
||||||
"address": address,
|
("display_name", display_name),
|
||||||
"local": local,
|
("address", address),
|
||||||
"domain": domain,
|
("local", local),
|
||||||
}
|
("domain", domain),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def get_filename_safe_string(string: str) -> str:
|
def get_filename_safe_string(string):
|
||||||
"""
|
"""
|
||||||
Converts a string to a string that is safe for a filename
|
Converts a string to a string that is safe for a filename
|
||||||
|
|
||||||
@@ -547,7 +490,7 @@ def get_filename_safe_string(string: str) -> str:
|
|||||||
return string
|
return string
|
||||||
|
|
||||||
|
|
||||||
def is_mbox(path: str) -> bool:
|
def is_mbox(path):
|
||||||
"""
|
"""
|
||||||
Checks if the given content is an MBOX mailbox file
|
Checks if the given content is an MBOX mailbox file
|
||||||
|
|
||||||
@@ -568,7 +511,7 @@ def is_mbox(path: str) -> bool:
|
|||||||
return _is_mbox
|
return _is_mbox
|
||||||
|
|
||||||
|
|
||||||
def is_outlook_msg(content) -> bool:
|
def is_outlook_msg(content):
|
||||||
"""
|
"""
|
||||||
Checks if the given content is an Outlook msg OLE/MSG file
|
Checks if the given content is an Outlook msg OLE/MSG file
|
||||||
|
|
||||||
@@ -583,7 +526,7 @@ def is_outlook_msg(content) -> bool:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def convert_outlook_msg(msg_bytes: bytes) -> bytes:
|
def convert_outlook_msg(msg_bytes):
|
||||||
"""
|
"""
|
||||||
Uses the ``msgconvert`` Perl utility to convert an Outlook MS file to
|
Uses the ``msgconvert`` Perl utility to convert an Outlook MS file to
|
||||||
standard RFC 822 format
|
standard RFC 822 format
|
||||||
@@ -592,7 +535,7 @@ def convert_outlook_msg(msg_bytes: bytes) -> bytes:
|
|||||||
msg_bytes (bytes): the content of the .msg file
|
msg_bytes (bytes): the content of the .msg file
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
A RFC 822 bytes payload
|
A RFC 822 string
|
||||||
"""
|
"""
|
||||||
if not is_outlook_msg(msg_bytes):
|
if not is_outlook_msg(msg_bytes):
|
||||||
raise ValueError("The supplied bytes are not an Outlook MSG file")
|
raise ValueError("The supplied bytes are not an Outlook MSG file")
|
||||||
@@ -619,9 +562,7 @@ def convert_outlook_msg(msg_bytes: bytes) -> bytes:
|
|||||||
return rfc822
|
return rfc822
|
||||||
|
|
||||||
|
|
||||||
def parse_email(
|
def parse_email(data, strip_attachment_payloads=False):
|
||||||
data: Union[bytes, str], *, strip_attachment_payloads: bool = False
|
|
||||||
) -> dict:
|
|
||||||
"""
|
"""
|
||||||
A simplified email parser
|
A simplified email parser
|
||||||
|
|
||||||
|
|||||||
@@ -1,25 +1,12 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
from typing import Any, Optional, Union
|
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
from parsedmarc import logger
|
from parsedmarc import logger
|
||||||
from parsedmarc.constants import USER_AGENT
|
|
||||||
|
|
||||||
|
|
||||||
class WebhookClient(object):
|
class WebhookClient(object):
|
||||||
"""A client for webhooks"""
|
"""A client for webhooks"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(self, aggregate_url, forensic_url, smtp_tls_url, timeout=60):
|
||||||
self,
|
|
||||||
aggregate_url: str,
|
|
||||||
forensic_url: str,
|
|
||||||
smtp_tls_url: str,
|
|
||||||
timeout: Optional[int] = 60,
|
|
||||||
):
|
|
||||||
"""
|
"""
|
||||||
Initializes the WebhookClient
|
Initializes the WebhookClient
|
||||||
Args:
|
Args:
|
||||||
@@ -34,31 +21,29 @@ class WebhookClient(object):
|
|||||||
self.timeout = timeout
|
self.timeout = timeout
|
||||||
self.session = requests.Session()
|
self.session = requests.Session()
|
||||||
self.session.headers = {
|
self.session.headers = {
|
||||||
"User-Agent": USER_AGENT,
|
"User-Agent": "parsedmarc",
|
||||||
"Content-Type": "application/json",
|
"Content-Type": "application/json",
|
||||||
}
|
}
|
||||||
|
|
||||||
def save_forensic_report_to_webhook(self, report: str):
|
def save_forensic_report_to_webhook(self, report):
|
||||||
try:
|
try:
|
||||||
self._send_to_webhook(self.forensic_url, report)
|
self._send_to_webhook(self.forensic_url, report)
|
||||||
except Exception as error_:
|
except Exception as error_:
|
||||||
logger.error("Webhook Error: {0}".format(error_.__str__()))
|
logger.error("Webhook Error: {0}".format(error_.__str__()))
|
||||||
|
|
||||||
def save_smtp_tls_report_to_webhook(self, report: str):
|
def save_smtp_tls_report_to_webhook(self, report):
|
||||||
try:
|
try:
|
||||||
self._send_to_webhook(self.smtp_tls_url, report)
|
self._send_to_webhook(self.smtp_tls_url, report)
|
||||||
except Exception as error_:
|
except Exception as error_:
|
||||||
logger.error("Webhook Error: {0}".format(error_.__str__()))
|
logger.error("Webhook Error: {0}".format(error_.__str__()))
|
||||||
|
|
||||||
def save_aggregate_report_to_webhook(self, report: str):
|
def save_aggregate_report_to_webhook(self, report):
|
||||||
try:
|
try:
|
||||||
self._send_to_webhook(self.aggregate_url, report)
|
self._send_to_webhook(self.aggregate_url, report)
|
||||||
except Exception as error_:
|
except Exception as error_:
|
||||||
logger.error("Webhook Error: {0}".format(error_.__str__()))
|
logger.error("Webhook Error: {0}".format(error_.__str__()))
|
||||||
|
|
||||||
def _send_to_webhook(
|
def _send_to_webhook(self, webhook_url, payload):
|
||||||
self, webhook_url: str, payload: Union[bytes, str, dict[str, Any]]
|
|
||||||
):
|
|
||||||
try:
|
try:
|
||||||
self.session.post(webhook_url, data=payload, timeout=self.timeout)
|
self.session.post(webhook_url, data=payload, timeout=self.timeout)
|
||||||
except Exception as error_:
|
except Exception as error_:
|
||||||
|
|||||||
@@ -2,7 +2,6 @@
|
|||||||
requires = [
|
requires = [
|
||||||
"hatchling>=1.27.0",
|
"hatchling>=1.27.0",
|
||||||
]
|
]
|
||||||
requires_python = ">=3.10,<3.14"
|
|
||||||
build-backend = "hatchling.build"
|
build-backend = "hatchling.build"
|
||||||
|
|
||||||
[project]
|
[project]
|
||||||
@@ -29,15 +28,14 @@ classifiers = [
|
|||||||
"Operating System :: OS Independent",
|
"Operating System :: OS Independent",
|
||||||
"Programming Language :: Python :: 3"
|
"Programming Language :: Python :: 3"
|
||||||
]
|
]
|
||||||
requires-python = ">=3.10"
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"azure-identity>=1.8.0",
|
"azure-identity>=1.8.0",
|
||||||
"azure-monitor-ingestion>=1.0.0",
|
"azure-monitor-ingestion>=1.0.0",
|
||||||
"boto3>=1.16.63",
|
"boto3>=1.16.63",
|
||||||
"dateparser>=1.1.1",
|
"dateparser>=1.1.1",
|
||||||
"dnspython>=2.0.0",
|
"dnspython>=2.0.0",
|
||||||
"elasticsearch-dsl==7.4.0",
|
"elasticsearch-dsl==8.17.1",
|
||||||
"elasticsearch<7.14.0",
|
"elasticsearch<=8.0.0",
|
||||||
"expiringdict>=1.1.4",
|
"expiringdict>=1.1.4",
|
||||||
"geoip2>=3.0.0",
|
"geoip2>=3.0.0",
|
||||||
"google-api-core>=2.4.0",
|
"google-api-core>=2.4.0",
|
||||||
@@ -48,7 +46,7 @@ dependencies = [
|
|||||||
"imapclient>=2.1.0",
|
"imapclient>=2.1.0",
|
||||||
"kafka-python-ng>=2.2.2",
|
"kafka-python-ng>=2.2.2",
|
||||||
"lxml>=4.4.0",
|
"lxml>=4.4.0",
|
||||||
"mailsuite>=1.11.2",
|
"mailsuite>=1.9.18",
|
||||||
"msgraph-core==0.2.2",
|
"msgraph-core==0.2.2",
|
||||||
"opensearch-py>=2.4.2,<=3.0.0",
|
"opensearch-py>=2.4.2,<=3.0.0",
|
||||||
"publicsuffixlist>=0.10.0",
|
"publicsuffixlist>=0.10.0",
|
||||||
@@ -57,7 +55,6 @@ dependencies = [
|
|||||||
"tqdm>=4.31.1",
|
"tqdm>=4.31.1",
|
||||||
"urllib3>=1.25.7",
|
"urllib3>=1.25.7",
|
||||||
"xmltodict>=0.12.0",
|
"xmltodict>=0.12.0",
|
||||||
"PyYAML>=6.0.3"
|
|
||||||
]
|
]
|
||||||
|
|
||||||
[project.optional-dependencies]
|
[project.optional-dependencies]
|
||||||
@@ -79,20 +76,9 @@ parsedmarc = "parsedmarc.cli:_main"
|
|||||||
Homepage = "https://domainaware.github.io/parsedmarc"
|
Homepage = "https://domainaware.github.io/parsedmarc"
|
||||||
|
|
||||||
[tool.hatch.version]
|
[tool.hatch.version]
|
||||||
path = "parsedmarc/constants.py"
|
path = "parsedmarc/__init__.py"
|
||||||
|
|
||||||
[tool.hatch.build.targets.sdist]
|
[tool.hatch.build.targets.sdist]
|
||||||
include = [
|
include = [
|
||||||
"/parsedmarc",
|
"/parsedmarc",
|
||||||
]
|
]
|
||||||
|
|
||||||
[tool.hatch.build]
|
|
||||||
exclude = [
|
|
||||||
"base_reverse_dns.csv",
|
|
||||||
"find_bad_utf8.py",
|
|
||||||
"find_unknown_base_reverse_dns.py",
|
|
||||||
"unknown_base_reverse_dns.csv",
|
|
||||||
"sortmaps.py",
|
|
||||||
"README.md",
|
|
||||||
"*.bak"
|
|
||||||
]
|
|
||||||
|
|||||||
25
sortmaps.py
Executable file
25
sortmaps.py
Executable file
@@ -0,0 +1,25 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import os
|
||||||
|
import glob
|
||||||
|
import csv
|
||||||
|
|
||||||
|
|
||||||
|
maps_dir = os.path.join("parsedmarc", "resources", "maps")
|
||||||
|
csv_files = glob.glob(os.path.join(maps_dir, "*.csv"))
|
||||||
|
|
||||||
|
|
||||||
|
def sort_csv(filepath, column=0):
|
||||||
|
with open(filepath, mode="r", newline="") as infile:
|
||||||
|
reader = csv.reader(infile)
|
||||||
|
header = next(reader)
|
||||||
|
sorted_rows = sorted(reader, key=lambda row: row[column])
|
||||||
|
|
||||||
|
with open(filepath, mode="w", newline="\n") as outfile:
|
||||||
|
writer = csv.writer(outfile)
|
||||||
|
writer.writerow(header)
|
||||||
|
writer.writerows(sorted_rows)
|
||||||
|
|
||||||
|
|
||||||
|
for csv_file in csv_files:
|
||||||
|
sort_csv(csv_file)
|
||||||
@@ -1,107 +0,0 @@
|
|||||||
<form version="1.1" theme="dark">
|
|
||||||
<label>SMTP TLS Reporting</label>
|
|
||||||
<fieldset submitButton="false" autoRun="true">
|
|
||||||
<input type="time" token="time">
|
|
||||||
<label></label>
|
|
||||||
<default>
|
|
||||||
<earliest>-7d@h</earliest>
|
|
||||||
<latest>now</latest>
|
|
||||||
</default>
|
|
||||||
</input>
|
|
||||||
<input type="text" token="organization_name" searchWhenChanged="true">
|
|
||||||
<label>Organization name</label>
|
|
||||||
<default>*</default>
|
|
||||||
<initialValue>*</initialValue>
|
|
||||||
</input>
|
|
||||||
<input type="text" token="policy_domain">
|
|
||||||
<label>Policy domain</label>
|
|
||||||
<default>*</default>
|
|
||||||
<initialValue>*</initialValue>
|
|
||||||
</input>
|
|
||||||
<input type="dropdown" token="policy_type" searchWhenChanged="true">
|
|
||||||
<label>Policy type</label>
|
|
||||||
<choice value="*">Any</choice>
|
|
||||||
<choice value="tlsa">tlsa</choice>
|
|
||||||
<choice value="sts">sts</choice>
|
|
||||||
<choice value="no-policy-found">no-policy-found</choice>
|
|
||||||
<default>*</default>
|
|
||||||
<initialValue>*</initialValue>
|
|
||||||
</input>
|
|
||||||
</fieldset>
|
|
||||||
<row>
|
|
||||||
<panel>
|
|
||||||
<title>Reporting organizations</title>
|
|
||||||
<table>
|
|
||||||
<search>
|
|
||||||
<query>index=email sourcetype=smtp:tls organization_name=$organization_name$ policies{}.policy_domain=$policy_domain$
|
|
||||||
| rename policies{}.policy_domain as policy_domain
|
|
||||||
| rename policies{}.policy_type as policy_type
|
|
||||||
| rename policies{}.failed_session_count as failed_sessions
|
|
||||||
| rename policies{}.failure_details{}.failed_session_count as failed_sessions
|
|
||||||
| rename policies{}.successful_session_count as successful_sessions
|
|
||||||
| rename policies{}.failure_details{}.sending_mta_ip as sending_mta_ip
|
|
||||||
| rename policies{}.failure_details{}.receiving_ip as receiving_ip
|
|
||||||
| rename policies{}.failure_details{}.receiving_mx_hostname as receiving_mx_hostname
|
|
||||||
| rename policies{}.failure_details{}.result_type as failure_type
|
|
||||||
| fillnull value=0 failed_sessions
|
|
||||||
| stats sum(failed_sessions) as failed_sessions sum(successful_sessions) as successful_sessions by organization_name
|
|
||||||
| sort -successful_sessions 0</query>
|
|
||||||
<earliest>$time.earliest$</earliest>
|
|
||||||
<latest>$time.latest$</latest>
|
|
||||||
</search>
|
|
||||||
<option name="drilldown">none</option>
|
|
||||||
<option name="refresh.display">progressbar</option>
|
|
||||||
</table>
|
|
||||||
</panel>
|
|
||||||
<panel>
|
|
||||||
<title>Domains</title>
|
|
||||||
<table>
|
|
||||||
<search>
|
|
||||||
<query>index=email sourcetype=smtp:tls organization_name=$organization_name$ policies{}.policy_domain=$policy_domain$
|
|
||||||
| rename policies{}.policy_domain as policy_domain
|
|
||||||
| rename policies{}.policy_type as policy_type
|
|
||||||
| rename policies{}.failed_session_count as failed_sessions
|
|
||||||
| rename policies{}.failure_details{}.failed_session_count as failed_sessions
|
|
||||||
| rename policies{}.successful_session_count as successful_sessions
|
|
||||||
| rename policies{}.failure_details{}.sending_mta_ip as sending_mta_ip
|
|
||||||
| rename policies{}.failure_details{}.receiving_ip as receiving_ip
|
|
||||||
| rename policies{}.failure_details{}.receiving_mx_hostname as receiving_mx_hostname
|
|
||||||
| rename policies{}.failure_details{}.result_type as failure_type
|
|
||||||
| fillnull value=0 failed_sessions
|
|
||||||
| stats sum(failed_sessions) as failed_sessions sum(successful_sessions) as successful_sessions by policy_domain
|
|
||||||
| sort -successful_sessions 0</query>
|
|
||||||
<earliest>$time.earliest$</earliest>
|
|
||||||
<latest>$time.latest$</latest>
|
|
||||||
</search>
|
|
||||||
<option name="drilldown">none</option>
|
|
||||||
<option name="refresh.display">progressbar</option>
|
|
||||||
</table>
|
|
||||||
</panel>
|
|
||||||
</row>
|
|
||||||
<row>
|
|
||||||
<panel>
|
|
||||||
<title>Failure details</title>
|
|
||||||
<table>
|
|
||||||
<search>
|
|
||||||
<query>index=email sourcetype=smtp:tls organization_name=$organization_name$ policies{}.policy_domain=$policy_domain$ policies{}.failure_details{}.result_type=*
|
|
||||||
| rename policies{}.policy_domain as policy_domain
|
|
||||||
| rename policies{}.policy_type as policy_type
|
|
||||||
| rename policies{}.failed_session_count as failed_sessions
|
|
||||||
| rename policies{}.failure_details{}.failed_session_count as failed_sessions
|
|
||||||
| rename policies{}.successful_session_count as successful_sessions
|
|
||||||
| rename policies{}.failure_details{}.sending_mta_ip as sending_mta_ip
|
|
||||||
| rename policies{}.failure_details{}.receiving_ip as receiving_ip
|
|
||||||
| rename policies{}.failure_details{}.receiving_mx_hostname as receiving_mx_hostname
|
|
||||||
| fillnull value=0 failed_sessions
|
|
||||||
| rename policies{}.failure_details{}.result_type as failure_type
|
|
||||||
| table _time organization_name policy_domain policy_type failed_sessions successful_sessions sending_mta_ip receiving_ip receiving_mx_hostname failure_type
|
|
||||||
| sort by -_time 0</query>
|
|
||||||
<earliest>$time.earliest$</earliest>
|
|
||||||
<latest>$time.latest$</latest>
|
|
||||||
</search>
|
|
||||||
<option name="drilldown">none</option>
|
|
||||||
<option name="refresh.display">progressbar</option>
|
|
||||||
</table>
|
|
||||||
</panel>
|
|
||||||
</row>
|
|
||||||
</form>
|
|
||||||
58
tests.py
Executable file → Normal file
58
tests.py
Executable file → Normal file
@@ -1,6 +1,3 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
from __future__ import absolute_import, print_function, unicode_literals
|
from __future__ import absolute_import, print_function, unicode_literals
|
||||||
|
|
||||||
import os
|
import os
|
||||||
@@ -12,9 +9,6 @@ from lxml import etree
|
|||||||
import parsedmarc
|
import parsedmarc
|
||||||
import parsedmarc.utils
|
import parsedmarc.utils
|
||||||
|
|
||||||
# Detect if running in GitHub Actions to skip DNS lookups
|
|
||||||
OFFLINE_MODE = os.environ.get("GITHUB_ACTIONS", "false").lower() == "true"
|
|
||||||
|
|
||||||
|
|
||||||
def minify_xml(xml_string):
|
def minify_xml(xml_string):
|
||||||
parser = etree.XMLParser(remove_blank_text=True)
|
parser = etree.XMLParser(remove_blank_text=True)
|
||||||
@@ -49,12 +43,11 @@ class Test(unittest.TestCase):
|
|||||||
|
|
||||||
def testExtractReportXMLComparator(self):
|
def testExtractReportXMLComparator(self):
|
||||||
"""Test XML comparator function"""
|
"""Test XML comparator function"""
|
||||||
xmlnice_file = open("samples/extract_report/nice-input.xml")
|
print()
|
||||||
xmlnice = xmlnice_file.read()
|
xmlnice = open("samples/extract_report/nice-input.xml").read()
|
||||||
xmlnice_file.close()
|
print(xmlnice)
|
||||||
xmlchanged_file = open("samples/extract_report/changed-input.xml")
|
xmlchanged = minify_xml(open("samples/extract_report/changed-input.xml").read())
|
||||||
xmlchanged = minify_xml(xmlchanged_file.read())
|
print(xmlchanged)
|
||||||
xmlchanged_file.close()
|
|
||||||
self.assertTrue(compare_xml(xmlnice, xmlnice))
|
self.assertTrue(compare_xml(xmlnice, xmlnice))
|
||||||
self.assertTrue(compare_xml(xmlchanged, xmlchanged))
|
self.assertTrue(compare_xml(xmlchanged, xmlchanged))
|
||||||
self.assertFalse(compare_xml(xmlnice, xmlchanged))
|
self.assertFalse(compare_xml(xmlnice, xmlchanged))
|
||||||
@@ -69,9 +62,7 @@ class Test(unittest.TestCase):
|
|||||||
data = f.read()
|
data = f.read()
|
||||||
print("Testing {0}: ".format(file), end="")
|
print("Testing {0}: ".format(file), end="")
|
||||||
xmlout = parsedmarc.extract_report(data)
|
xmlout = parsedmarc.extract_report(data)
|
||||||
xmlin_file = open("samples/extract_report/nice-input.xml")
|
xmlin = open("samples/extract_report/nice-input.xml").read()
|
||||||
xmlin = xmlin_file.read()
|
|
||||||
xmlin_file.close()
|
|
||||||
self.assertTrue(compare_xml(xmlout, xmlin))
|
self.assertTrue(compare_xml(xmlout, xmlin))
|
||||||
print("Passed!")
|
print("Passed!")
|
||||||
|
|
||||||
@@ -80,10 +71,8 @@ class Test(unittest.TestCase):
|
|||||||
print()
|
print()
|
||||||
file = "samples/extract_report/nice-input.xml"
|
file = "samples/extract_report/nice-input.xml"
|
||||||
print("Testing {0}: ".format(file), end="")
|
print("Testing {0}: ".format(file), end="")
|
||||||
xmlout = parsedmarc.extract_report_from_file_path(file)
|
xmlout = parsedmarc.extract_report(file)
|
||||||
xmlin_file = open("samples/extract_report/nice-input.xml")
|
xmlin = open("samples/extract_report/nice-input.xml").read()
|
||||||
xmlin = xmlin_file.read()
|
|
||||||
xmlin_file.close()
|
|
||||||
self.assertTrue(compare_xml(xmlout, xmlin))
|
self.assertTrue(compare_xml(xmlout, xmlin))
|
||||||
print("Passed!")
|
print("Passed!")
|
||||||
|
|
||||||
@@ -93,9 +82,7 @@ class Test(unittest.TestCase):
|
|||||||
file = "samples/extract_report/nice-input.xml.gz"
|
file = "samples/extract_report/nice-input.xml.gz"
|
||||||
print("Testing {0}: ".format(file), end="")
|
print("Testing {0}: ".format(file), end="")
|
||||||
xmlout = parsedmarc.extract_report_from_file_path(file)
|
xmlout = parsedmarc.extract_report_from_file_path(file)
|
||||||
xmlin_file = open("samples/extract_report/nice-input.xml")
|
xmlin = open("samples/extract_report/nice-input.xml").read()
|
||||||
xmlin = xmlin_file.read()
|
|
||||||
xmlin_file.close()
|
|
||||||
self.assertTrue(compare_xml(xmlout, xmlin))
|
self.assertTrue(compare_xml(xmlout, xmlin))
|
||||||
print("Passed!")
|
print("Passed!")
|
||||||
|
|
||||||
@@ -105,13 +92,12 @@ class Test(unittest.TestCase):
|
|||||||
file = "samples/extract_report/nice-input.xml.zip"
|
file = "samples/extract_report/nice-input.xml.zip"
|
||||||
print("Testing {0}: ".format(file), end="")
|
print("Testing {0}: ".format(file), end="")
|
||||||
xmlout = parsedmarc.extract_report_from_file_path(file)
|
xmlout = parsedmarc.extract_report_from_file_path(file)
|
||||||
xmlin_file = open("samples/extract_report/nice-input.xml")
|
print(xmlout)
|
||||||
xmlin = minify_xml(xmlin_file.read())
|
xmlin = minify_xml(open("samples/extract_report/nice-input.xml").read())
|
||||||
xmlin_file.close()
|
print(xmlin)
|
||||||
self.assertTrue(compare_xml(xmlout, xmlin))
|
self.assertTrue(compare_xml(xmlout, xmlin))
|
||||||
xmlin_file = open("samples/extract_report/changed-input.xml")
|
xmlin = minify_xml(open("samples/extract_report/changed-input.xml").read())
|
||||||
xmlin = xmlin_file.read()
|
print(xmlin)
|
||||||
xmlin_file.close()
|
|
||||||
self.assertFalse(compare_xml(xmlout, xmlin))
|
self.assertFalse(compare_xml(xmlout, xmlin))
|
||||||
print("Passed!")
|
print("Passed!")
|
||||||
|
|
||||||
@@ -124,7 +110,7 @@ class Test(unittest.TestCase):
|
|||||||
continue
|
continue
|
||||||
print("Testing {0}: ".format(sample_path), end="")
|
print("Testing {0}: ".format(sample_path), end="")
|
||||||
parsed_report = parsedmarc.parse_report_file(
|
parsed_report = parsedmarc.parse_report_file(
|
||||||
sample_path, always_use_local_files=True, offline=OFFLINE_MODE
|
sample_path, always_use_local_files=True
|
||||||
)["report"]
|
)["report"]
|
||||||
parsedmarc.parsed_aggregate_reports_to_csv(parsed_report)
|
parsedmarc.parsed_aggregate_reports_to_csv(parsed_report)
|
||||||
print("Passed!")
|
print("Passed!")
|
||||||
@@ -132,7 +118,7 @@ class Test(unittest.TestCase):
|
|||||||
def testEmptySample(self):
|
def testEmptySample(self):
|
||||||
"""Test empty/unparasable report"""
|
"""Test empty/unparasable report"""
|
||||||
with self.assertRaises(parsedmarc.ParserError):
|
with self.assertRaises(parsedmarc.ParserError):
|
||||||
parsedmarc.parse_report_file("samples/empty.xml", offline=OFFLINE_MODE)
|
parsedmarc.parse_report_file("samples/empty.xml")
|
||||||
|
|
||||||
def testForensicSamples(self):
|
def testForensicSamples(self):
|
||||||
"""Test sample forensic/ruf/failure DMARC reports"""
|
"""Test sample forensic/ruf/failure DMARC reports"""
|
||||||
@@ -142,12 +128,8 @@ class Test(unittest.TestCase):
|
|||||||
print("Testing {0}: ".format(sample_path), end="")
|
print("Testing {0}: ".format(sample_path), end="")
|
||||||
with open(sample_path) as sample_file:
|
with open(sample_path) as sample_file:
|
||||||
sample_content = sample_file.read()
|
sample_content = sample_file.read()
|
||||||
parsed_report = parsedmarc.parse_report_email(
|
parsed_report = parsedmarc.parse_report_email(sample_content)["report"]
|
||||||
sample_content, offline=OFFLINE_MODE
|
parsed_report = parsedmarc.parse_report_file(sample_path)["report"]
|
||||||
)["report"]
|
|
||||||
parsed_report = parsedmarc.parse_report_file(
|
|
||||||
sample_path, offline=OFFLINE_MODE
|
|
||||||
)["report"]
|
|
||||||
parsedmarc.parsed_forensic_reports_to_csv(parsed_report)
|
parsedmarc.parsed_forensic_reports_to_csv(parsed_report)
|
||||||
print("Passed!")
|
print("Passed!")
|
||||||
|
|
||||||
@@ -159,9 +141,7 @@ class Test(unittest.TestCase):
|
|||||||
if os.path.isdir(sample_path):
|
if os.path.isdir(sample_path):
|
||||||
continue
|
continue
|
||||||
print("Testing {0}: ".format(sample_path), end="")
|
print("Testing {0}: ".format(sample_path), end="")
|
||||||
parsed_report = parsedmarc.parse_report_file(
|
parsed_report = parsedmarc.parse_report_file(sample_path)["report"]
|
||||||
sample_path, offline=OFFLINE_MODE
|
|
||||||
)["report"]
|
|
||||||
parsedmarc.parsed_smtp_tls_reports_to_csv(parsed_report)
|
parsedmarc.parsed_smtp_tls_reports_to_csv(parsed_report)
|
||||||
print("Passed!")
|
print("Passed!")
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user