mirror of
https://github.com/domainaware/parsedmarc.git
synced 2026-03-04 13:56:26 +00:00
Compare commits
194 Commits
8.16.1
...
copilot/dr
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4219306365 | ||
|
|
a6e009c149 | ||
|
|
33384bd612 | ||
|
|
33eb2aaf62 | ||
|
|
1387fb4899 | ||
|
|
4d97bd25aa | ||
|
|
17a612df0c | ||
|
|
221bc332ef | ||
|
|
a2a75f7a81 | ||
|
|
50fcb51577 | ||
|
|
dd9ef90773 | ||
|
|
0e3a4b0f06 | ||
|
|
343b53ef18 | ||
|
|
792079a3e8 | ||
|
|
1f3a1fc843 | ||
|
|
34fa0c145d | ||
|
|
6719a06388 | ||
|
|
eafa435868 | ||
|
|
5d772c3b36 | ||
|
|
72cabbef23 | ||
|
|
3d74cd6ac0 | ||
|
|
d1ac59a016 | ||
|
|
7fdd53008f | ||
|
|
35331d4b84 | ||
|
|
de9edd3590 | ||
|
|
abf4bdba13 | ||
|
|
7b842740f5 | ||
|
|
ebe3ccf40a | ||
|
|
808285658f | ||
|
|
bc1dae29bd | ||
|
|
4b904444e5 | ||
|
|
3608bce344 | ||
|
|
fe809c4c3f | ||
|
|
a76c2f9621 | ||
|
|
bb8f4002bf | ||
|
|
b5773c6b4a | ||
|
|
b99bd67225 | ||
|
|
af9ad568ec | ||
|
|
748164d177 | ||
|
|
487e5e1149 | ||
|
|
73010cf964 | ||
|
|
a4a5475aa8 | ||
|
|
dab78880df | ||
|
|
fb54e3b742 | ||
|
|
6799f10364 | ||
|
|
445c9565a4 | ||
|
|
4b786846ae | ||
|
|
23ae563cd8 | ||
|
|
cdd000e675 | ||
|
|
7d58abc67b | ||
|
|
a18ae439de | ||
|
|
d7061330a8 | ||
|
|
9d5654b8ec | ||
|
|
a0e0070dd0 | ||
|
|
cf3b7f2c29 | ||
|
|
d312522ab7 | ||
|
|
888d717476 | ||
|
|
1127f65fbb | ||
|
|
d017dfcddf | ||
|
|
5fae99aacc | ||
|
|
ba57368ac3 | ||
|
|
dc6ee5de98 | ||
|
|
158d63d205 | ||
|
|
f1933b906c | ||
|
|
4b98d795ff | ||
|
|
b1356f7dfc | ||
|
|
1969196e1a | ||
|
|
553f15f6a9 | ||
|
|
1fc9f638e2 | ||
|
|
48bff504b4 | ||
|
|
681b7cbf85 | ||
|
|
0922d6e83a | ||
|
|
baf3f95fb1 | ||
|
|
a51f945305 | ||
|
|
55dbf8e3db | ||
|
|
00267c9847 | ||
|
|
51356175e1 | ||
|
|
3be10d30dd | ||
|
|
98342ecac6 | ||
|
|
38a3d4eaae | ||
|
|
a05c230152 | ||
|
|
17bdc3a134 | ||
|
|
858be00f22 | ||
|
|
597ca64f9f | ||
|
|
c5dbe2c4dc | ||
|
|
082b3d355f | ||
|
|
2a7ce47bb1 | ||
|
|
9882405d96 | ||
|
|
fce84763b9 | ||
|
|
8a299b8600 | ||
|
|
b4c2b21547 | ||
|
|
865c249437 | ||
|
|
013859f10e | ||
|
|
6d4a31a120 | ||
|
|
45d3dc3b2e | ||
|
|
4bbd97dbaa | ||
|
|
5df152d469 | ||
|
|
d990bef342 | ||
|
|
caf77ca6d4 | ||
|
|
4b3d32c5a6 | ||
|
|
5df5c10f80 | ||
|
|
308d4657ab | ||
|
|
0f74e33094 | ||
|
|
9f339e11f5 | ||
|
|
391e84b717 | ||
|
|
8bf06ce5af | ||
|
|
2b7ae50a27 | ||
|
|
3feb478793 | ||
|
|
01630bb61c | ||
|
|
39347cb244 | ||
|
|
ed25526d59 | ||
|
|
880d7110fe | ||
|
|
d62001f5a4 | ||
|
|
0720bffcb6 | ||
|
|
fecd55a97d | ||
|
|
a121306eed | ||
|
|
980c9c7904 | ||
|
|
963f5d796f | ||
|
|
6532f3571b | ||
|
|
ea878443a8 | ||
|
|
9f6de41958 | ||
|
|
119192701c | ||
|
|
1d650be48a | ||
|
|
a85553fb18 | ||
|
|
5975d8eb21 | ||
|
|
87ae6175f2 | ||
|
|
68b93ed580 | ||
|
|
55508b513b | ||
|
|
71511c0cfc | ||
|
|
7c45812284 | ||
|
|
607a091a5f | ||
|
|
c308bf938c | ||
|
|
918501ccb5 | ||
|
|
036c372ea3 | ||
|
|
a969d83137 | ||
|
|
e299f7d161 | ||
|
|
4c04418dae | ||
|
|
2ca9373ed0 | ||
|
|
961ef6d804 | ||
|
|
573ba1e3e9 | ||
|
|
1d8af3ccff | ||
|
|
8426daa26b | ||
|
|
d1531b86f2 | ||
|
|
8bb046798c | ||
|
|
d64e12548a | ||
|
|
380479cbf1 | ||
|
|
ace21c8084 | ||
|
|
1a1aef21ad | ||
|
|
532dbbdb7e | ||
|
|
45738ae688 | ||
|
|
9d77bd64bc | ||
|
|
140290221d | ||
|
|
187d61b770 | ||
|
|
0443b7365e | ||
|
|
d7b887a835 | ||
|
|
a805733221 | ||
|
|
9552c3ac92 | ||
|
|
5273948be0 | ||
|
|
b51756b8bd | ||
|
|
7fa7c24cb8 | ||
|
|
972237ae7e | ||
|
|
6e5333a342 | ||
|
|
47b074c80b | ||
|
|
a1cfeb3081 | ||
|
|
c7c451b1b1 | ||
|
|
669deb9755 | ||
|
|
446c018920 | ||
|
|
38c6f86973 | ||
|
|
62ccc11925 | ||
|
|
c32ca3cae3 | ||
|
|
010f1f84a7 | ||
|
|
7da57c6382 | ||
|
|
d08e29a306 | ||
|
|
e1e53ad4cb | ||
|
|
4670e9687d | ||
|
|
7f8a2c08cd | ||
|
|
e9c05dd0bf | ||
|
|
9348a474dd | ||
|
|
e0decaba8c | ||
|
|
26a651cded | ||
|
|
bcfcd93fc6 | ||
|
|
54d5ed3543 | ||
|
|
1efbc87e0e | ||
|
|
e78e7f64af | ||
|
|
ad9de65b99 | ||
|
|
b9df12700b | ||
|
|
20843b920f | ||
|
|
e5ae89fedf | ||
|
|
f148cff11c | ||
|
|
4583769e04 | ||
|
|
0ecb80b27c | ||
|
|
b8e62e6d3b | ||
|
|
c67953a2c5 | ||
|
|
27dff4298c |
10
.github/workflows/docker.yml
vendored
10
.github/workflows/docker.yml
vendored
@@ -24,11 +24,11 @@ jobs:
|
|||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout repository
|
- name: Checkout repository
|
||||||
uses: actions/checkout@v3
|
uses: actions/checkout@v5
|
||||||
|
|
||||||
- name: Docker meta
|
- name: Docker meta
|
||||||
id: meta
|
id: meta
|
||||||
uses: docker/metadata-action@v3
|
uses: docker/metadata-action@v5
|
||||||
with:
|
with:
|
||||||
images: |
|
images: |
|
||||||
${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
|
${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
|
||||||
@@ -40,16 +40,14 @@ jobs:
|
|||||||
type=semver,pattern={{major}}.{{minor}}
|
type=semver,pattern={{major}}.{{minor}}
|
||||||
|
|
||||||
- name: Log in to the Container registry
|
- name: Log in to the Container registry
|
||||||
# https://github.com/docker/login-action/releases/tag/v2.0.0
|
uses: docker/login-action@v3
|
||||||
uses: docker/login-action@49ed152c8eca782a232dede0303416e8f356c37b
|
|
||||||
with:
|
with:
|
||||||
registry: ${{ env.REGISTRY }}
|
registry: ${{ env.REGISTRY }}
|
||||||
username: ${{ github.actor }}
|
username: ${{ github.actor }}
|
||||||
password: ${{ secrets.GITHUB_TOKEN }}
|
password: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
|
||||||
- name: Build and push Docker image
|
- name: Build and push Docker image
|
||||||
# https://github.com/docker/build-push-action/releases/tag/v3.0.0
|
uses: docker/build-push-action@v6
|
||||||
uses: docker/build-push-action@e551b19e49efd4e98792db7592c17c09b89db8d8
|
|
||||||
with:
|
with:
|
||||||
context: .
|
context: .
|
||||||
push: ${{ github.event_name == 'release' }}
|
push: ${{ github.event_name == 'release' }}
|
||||||
|
|||||||
34
.github/workflows/python-tests.yml
vendored
34
.github/workflows/python-tests.yml
vendored
@@ -11,31 +11,37 @@ on:
|
|||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
build:
|
build:
|
||||||
|
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
|
services:
|
||||||
|
elasticsearch:
|
||||||
|
image: elasticsearch:8.19.7
|
||||||
|
env:
|
||||||
|
discovery.type: single-node
|
||||||
|
cluster.name: parsedmarc-cluster
|
||||||
|
discovery.seed_hosts: elasticsearch
|
||||||
|
bootstrap.memory_lock: true
|
||||||
|
xpack.security.enabled: false
|
||||||
|
xpack.license.self_generated.type: basic
|
||||||
|
ports:
|
||||||
|
- 9200:9200
|
||||||
|
- 9300:9300
|
||||||
|
|
||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
|
python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"]
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v5
|
||||||
- name: Set up Python ${{ matrix.python-version }}
|
- name: Set up Python ${{ matrix.python-version }}
|
||||||
uses: actions/setup-python@v5
|
uses: actions/setup-python@v6
|
||||||
with:
|
with:
|
||||||
python-version: ${{ matrix.python-version }}
|
python-version: ${{ matrix.python-version }}
|
||||||
- name: Install system dependencies
|
- name: Install system dependencies
|
||||||
run: |
|
run: |
|
||||||
sudo apt-get update
|
sudo apt-get -q update
|
||||||
sudo apt-get install -y libemail-outlook-message-perl
|
sudo apt-get -qy install libemail-outlook-message-perl
|
||||||
wget -qO - https://artifacts.elastic.co/GPG-KEY-elasticsearch | sudo gpg --dearmor -o /usr/share/keyrings/elasticsearch-keyring.gpg
|
|
||||||
sudo apt-get install apt-transport-https
|
|
||||||
echo "deb [signed-by=/usr/share/keyrings/elasticsearch-keyring.gpg] https://artifacts.elastic.co/packages/8.x/apt stable main" | sudo tee /etc/apt/sources.list.d/elastic-8.x.list
|
|
||||||
sudo apt-get update && sudo apt-get install elasticsearch
|
|
||||||
sudo sed -i 's/xpack.security.enabled: true/xpack.security.enabled: false/' /etc/elasticsearch/elasticsearch.yml
|
|
||||||
sudo systemctl restart elasticsearch
|
|
||||||
sudo systemctl --no-pager status elasticsearch
|
|
||||||
- name: Install Python dependencies
|
- name: Install Python dependencies
|
||||||
run: |
|
run: |
|
||||||
python -m pip install --upgrade pip
|
python -m pip install --upgrade pip
|
||||||
@@ -59,6 +65,6 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
hatch build
|
hatch build
|
||||||
- name: Upload coverage to Codecov
|
- name: Upload coverage to Codecov
|
||||||
uses: codecov/codecov-action@v4
|
uses: codecov/codecov-action@v5
|
||||||
with:
|
with:
|
||||||
token: ${{ secrets.CODECOV_TOKEN }}
|
token: ${{ secrets.CODECOV_TOKEN }}
|
||||||
|
|||||||
10
.gitignore
vendored
10
.gitignore
vendored
@@ -106,7 +106,7 @@ ENV/
|
|||||||
.idea/
|
.idea/
|
||||||
|
|
||||||
# VS Code launch config
|
# VS Code launch config
|
||||||
.vscode/launch.json
|
#.vscode/launch.json
|
||||||
|
|
||||||
# Visual Studio Code settings
|
# Visual Studio Code settings
|
||||||
#.vscode/
|
#.vscode/
|
||||||
@@ -136,4 +136,12 @@ samples/private
|
|||||||
|
|
||||||
*.html
|
*.html
|
||||||
*.sqlite-journal
|
*.sqlite-journal
|
||||||
|
|
||||||
|
parsedmarc.ini
|
||||||
scratch.py
|
scratch.py
|
||||||
|
|
||||||
|
parsedmarc/resources/maps/base_reverse_dns.csv
|
||||||
|
parsedmarc/resources/maps/unknown_base_reverse_dns.csv
|
||||||
|
parsedmarc/resources/maps/sus_domains.csv
|
||||||
|
parsedmarc/resources/maps/unknown_domains.txt
|
||||||
|
*.bak
|
||||||
|
|||||||
45
.vscode/launch.json
vendored
Normal file
45
.vscode/launch.json
vendored
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
{
|
||||||
|
// Use IntelliSense to learn about possible attributes.
|
||||||
|
// Hover to view descriptions of existing attributes.
|
||||||
|
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||||
|
"version": "0.2.0",
|
||||||
|
"configurations": [
|
||||||
|
{
|
||||||
|
"name": "Python Debugger: Current File",
|
||||||
|
"type": "debugpy",
|
||||||
|
"request": "launch",
|
||||||
|
"program": "${file}",
|
||||||
|
"console": "integratedTerminal"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "tests.py",
|
||||||
|
"type": "debugpy",
|
||||||
|
"request": "launch",
|
||||||
|
"program": "tests.py",
|
||||||
|
"console": "integratedTerminal"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "sample",
|
||||||
|
"type": "debugpy",
|
||||||
|
"request": "launch",
|
||||||
|
"module": "parsedmarc.cli",
|
||||||
|
"args": ["samples/private/sample"]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "sortlists.py",
|
||||||
|
"type": "debugpy",
|
||||||
|
"request": "launch",
|
||||||
|
"program": "sortlists.py",
|
||||||
|
"cwd": "${workspaceFolder}/parsedmarc/resources/maps",
|
||||||
|
"console": "integratedTerminal"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "find_unknown_base_reverse_dns.py",
|
||||||
|
"type": "debugpy",
|
||||||
|
"request": "launch",
|
||||||
|
"program": "find_unknown_base_reverse_dns.py",
|
||||||
|
"cwd": "${workspaceFolder}/parsedmarc/resources/maps",
|
||||||
|
"console": "integratedTerminal"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
283
.vscode/settings.json
vendored
283
.vscode/settings.json
vendored
@@ -1,131 +1,166 @@
|
|||||||
{
|
{
|
||||||
|
"[python]": {
|
||||||
|
"editor.defaultFormatter": "charliermarsh.ruff",
|
||||||
|
"editor.formatOnSave": true,
|
||||||
|
|
||||||
|
// Let Ruff handle lint fixes + import sorting on save
|
||||||
|
"editor.codeActionsOnSave": {
|
||||||
|
"source.fixAll.ruff": "explicit",
|
||||||
|
"source.organizeImports.ruff": "explicit"
|
||||||
|
}
|
||||||
|
},
|
||||||
"markdownlint.config": {
|
"markdownlint.config": {
|
||||||
"MD024": false
|
"MD024": false
|
||||||
},
|
},
|
||||||
"cSpell.words": [
|
"cSpell.words": [
|
||||||
"adkim",
|
"adkim",
|
||||||
"akamaiedge",
|
"akamaiedge",
|
||||||
"amsmath",
|
"amsmath",
|
||||||
"andrewmcgilvray",
|
"andrewmcgilvray",
|
||||||
"arcname",
|
"arcname",
|
||||||
"aspf",
|
"aspf",
|
||||||
"autoclass",
|
"autoclass",
|
||||||
"automodule",
|
"automodule",
|
||||||
"backported",
|
"backported",
|
||||||
"bellsouth",
|
"bellsouth",
|
||||||
"brakhane",
|
"boto",
|
||||||
"Brightmail",
|
"brakhane",
|
||||||
"CEST",
|
"Brightmail",
|
||||||
"CHACHA",
|
"CEST",
|
||||||
"checkdmarc",
|
"CHACHA",
|
||||||
"Codecov",
|
"checkdmarc",
|
||||||
"confnew",
|
"Codecov",
|
||||||
"dateparser",
|
"confnew",
|
||||||
"dateutil",
|
"dateparser",
|
||||||
"Davmail",
|
"dateutil",
|
||||||
"DBIP",
|
"Davmail",
|
||||||
"dearmor",
|
"DBIP",
|
||||||
"deflist",
|
"dearmor",
|
||||||
"devel",
|
"deflist",
|
||||||
"DMARC",
|
"devel",
|
||||||
"Dmarcian",
|
"DMARC",
|
||||||
"dnspython",
|
"Dmarcian",
|
||||||
"dollarmath",
|
"dnspython",
|
||||||
"dpkg",
|
"dollarmath",
|
||||||
"exampleuser",
|
"dpkg",
|
||||||
"expiringdict",
|
"exampleuser",
|
||||||
"fieldlist",
|
"expiringdict",
|
||||||
"genindex",
|
"fieldlist",
|
||||||
"geoipupdate",
|
"GELF",
|
||||||
"Geolite",
|
"genindex",
|
||||||
"geolocation",
|
"geoip",
|
||||||
"githubpages",
|
"geoipupdate",
|
||||||
"Grafana",
|
"Geolite",
|
||||||
"hostnames",
|
"geolocation",
|
||||||
"htpasswd",
|
"githubpages",
|
||||||
"httpasswd",
|
"Grafana",
|
||||||
"IMAP",
|
"hostnames",
|
||||||
"Interaktive",
|
"htpasswd",
|
||||||
"IPDB",
|
"httpasswd",
|
||||||
"journalctl",
|
"httplib",
|
||||||
"keepalive",
|
"ifhost",
|
||||||
"keyout",
|
"IMAP",
|
||||||
"keyrings",
|
"imapclient",
|
||||||
"Leeman",
|
"infile",
|
||||||
"libemail",
|
"Interaktive",
|
||||||
"linkify",
|
"IPDB",
|
||||||
"LISTSERV",
|
"journalctl",
|
||||||
"lxml",
|
"kafkaclient",
|
||||||
"mailparser",
|
"keepalive",
|
||||||
"mailrelay",
|
"keyout",
|
||||||
"mailsuite",
|
"keyrings",
|
||||||
"maxdepth",
|
"Leeman",
|
||||||
"maxmind",
|
"libemail",
|
||||||
"mbox",
|
"linkify",
|
||||||
"mfrom",
|
"LISTSERV",
|
||||||
"michaeldavie",
|
"loganalytics",
|
||||||
"mikesiegel",
|
"lxml",
|
||||||
"mitigations",
|
"mailparser",
|
||||||
"MMDB",
|
"mailrelay",
|
||||||
"modindex",
|
"mailsuite",
|
||||||
"msgconvert",
|
"maxdepth",
|
||||||
"msgraph",
|
"MAXHEADERS",
|
||||||
"Munge",
|
"maxmind",
|
||||||
"ndjson",
|
"mbox",
|
||||||
"newkey",
|
"mfrom",
|
||||||
"Nhcm",
|
"mhdw",
|
||||||
"nojekyll",
|
"michaeldavie",
|
||||||
"nondigest",
|
"mikesiegel",
|
||||||
"nosecureimap",
|
"Mimecast",
|
||||||
"nosniff",
|
"mitigations",
|
||||||
"nwettbewerb",
|
"MMDB",
|
||||||
"parsedmarc",
|
"modindex",
|
||||||
"passsword",
|
"msgconvert",
|
||||||
"Postorius",
|
"msgraph",
|
||||||
"premade",
|
"MSSP",
|
||||||
"procs",
|
"multiprocess",
|
||||||
"publicsuffix",
|
"Munge",
|
||||||
"publixsuffix",
|
"ndjson",
|
||||||
"pypy",
|
"newkey",
|
||||||
"quickstart",
|
"Nhcm",
|
||||||
"Reindex",
|
"nojekyll",
|
||||||
"replyto",
|
"nondigest",
|
||||||
"reversename",
|
"nosecureimap",
|
||||||
"Rollup",
|
"nosniff",
|
||||||
"Rpdm",
|
"nwettbewerb",
|
||||||
"SAMEORIGIN",
|
"opensearch",
|
||||||
"Servernameone",
|
"opensearchpy",
|
||||||
"setuptools",
|
"parsedmarc",
|
||||||
"smartquotes",
|
"passsword",
|
||||||
"SMTPTLS",
|
"pbar",
|
||||||
"sourcetype",
|
"Postorius",
|
||||||
"STARTTLS",
|
"premade",
|
||||||
"tasklist",
|
"privatesuffix",
|
||||||
"timespan",
|
"procs",
|
||||||
"tlsa",
|
"publicsuffix",
|
||||||
"tlsrpt",
|
"publicsuffixlist",
|
||||||
"toctree",
|
"publixsuffix",
|
||||||
"TQDDM",
|
"pygelf",
|
||||||
"tqdm",
|
"pypy",
|
||||||
"truststore",
|
"pytest",
|
||||||
"Übersicht",
|
"quickstart",
|
||||||
"uids",
|
"Reindex",
|
||||||
"unparasable",
|
"replyto",
|
||||||
"uper",
|
"reversename",
|
||||||
"urllib",
|
"Rollup",
|
||||||
"Valimail",
|
"Rpdm",
|
||||||
"venv",
|
"SAMEORIGIN",
|
||||||
"Vhcw",
|
"sdist",
|
||||||
"viewcode",
|
"Servernameone",
|
||||||
"virtualenv",
|
"setuptools",
|
||||||
"WBITS",
|
"smartquotes",
|
||||||
"webmail",
|
"SMTPTLS",
|
||||||
"Wettbewerber",
|
"sortlists",
|
||||||
"Whalen",
|
"sortmaps",
|
||||||
"whitespaces",
|
"sourcetype",
|
||||||
"xennn",
|
"STARTTLS",
|
||||||
"xmltodict",
|
"tasklist",
|
||||||
"xpack",
|
"timespan",
|
||||||
"zscholl"
|
"tlsa",
|
||||||
|
"tlsrpt",
|
||||||
|
"toctree",
|
||||||
|
"TQDDM",
|
||||||
|
"tqdm",
|
||||||
|
"truststore",
|
||||||
|
"Übersicht",
|
||||||
|
"uids",
|
||||||
|
"Uncategorized",
|
||||||
|
"unparasable",
|
||||||
|
"uper",
|
||||||
|
"urllib",
|
||||||
|
"Valimail",
|
||||||
|
"venv",
|
||||||
|
"Vhcw",
|
||||||
|
"viewcode",
|
||||||
|
"virtualenv",
|
||||||
|
"WBITS",
|
||||||
|
"webmail",
|
||||||
|
"Wettbewerber",
|
||||||
|
"Whalen",
|
||||||
|
"whitespaces",
|
||||||
|
"xennn",
|
||||||
|
"xmltodict",
|
||||||
|
"xpack",
|
||||||
|
"zscholl"
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
694
CHANGELOG.md
694
CHANGELOG.md
File diff suppressed because it is too large
Load Diff
@@ -1,4 +1,4 @@
|
|||||||
ARG BASE_IMAGE=python:3.9-slim
|
ARG BASE_IMAGE=python:3.13-slim
|
||||||
ARG USERNAME=parsedmarc
|
ARG USERNAME=parsedmarc
|
||||||
ARG USER_UID=1000
|
ARG USER_UID=1000
|
||||||
ARG USER_GID=$USER_UID
|
ARG USER_GID=$USER_UID
|
||||||
|
|||||||
41
README.md
41
README.md
@@ -9,7 +9,7 @@ Package](https://img.shields.io/pypi/v/parsedmarc.svg)](https://pypi.org/project
|
|||||||
[](https://pypistats.org/packages/parsedmarc)
|
[](https://pypistats.org/packages/parsedmarc)
|
||||||
|
|
||||||
<p align="center">
|
<p align="center">
|
||||||
<img src="https://github.com/domainaware/parsedmarc/raw/master/docs/source/_static/screenshots/dmarc-summary-charts.png?raw=true" alt="A screenshot of DMARC summary charts in Kibana"/>
|
<img src="https://raw.githubusercontent.com/domainaware/parsedmarc/refs/heads/master/docs/source/_static/screenshots/dmarc-summary-charts.png?raw=true" alt="A screenshot of DMARC summary charts in Kibana"/>
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
`parsedmarc` is a Python module and CLI utility for parsing DMARC
|
`parsedmarc` is a Python module and CLI utility for parsing DMARC
|
||||||
@@ -23,25 +23,42 @@ ProofPoint Email Fraud Defense, and Valimail.
|
|||||||
|
|
||||||
## Help Wanted
|
## Help Wanted
|
||||||
|
|
||||||
This project is maintained by one developer. Please consider
|
This project is maintained by one developer. Please consider reviewing the open
|
||||||
reviewing the open
|
[issues](https://github.com/domainaware/parsedmarc/issues) to see how you can
|
||||||
[issues](https://github.com/domainaware/parsedmarc/issues) to see how
|
contribute code, documentation, or user support. Assistance on the pinned
|
||||||
you can contribute code, documentation, or user support. Assistance on
|
issues would be particularly helpful.
|
||||||
the pinned issues would be particularly helpful.
|
|
||||||
|
|
||||||
Thanks to all
|
Thanks to all
|
||||||
[contributors](https://github.com/domainaware/parsedmarc/graphs/contributors)!
|
[contributors](https://github.com/domainaware/parsedmarc/graphs/contributors)!
|
||||||
|
|
||||||
## Features
|
## Features
|
||||||
|
|
||||||
- Parses draft and 1.0 standard aggregate/rua reports
|
- Parses draft and 1.0 standard aggregate/rua DMARC reports
|
||||||
- Parses forensic/failure/ruf reports
|
- Parses forensic/failure/ruf DMARC reports
|
||||||
- Can parse reports from an inbox over IMAP, Microsoft Graph, or Gmail
|
- Parses reports from SMTP TLS Reporting
|
||||||
API
|
- Can parse reports from an inbox over IMAP, Microsoft Graph, or Gmail API
|
||||||
- Transparently handles gzip or zip compressed reports
|
- Transparently handles gzip or zip compressed reports
|
||||||
- Consistent data structures
|
- Consistent data structures
|
||||||
- Simple JSON and/or CSV output
|
- Simple JSON and/or CSV output
|
||||||
- Optionally email the results
|
- Optionally email the results
|
||||||
- Optionally send the results to Elasticsearch and/or Splunk, for use
|
- Optionally send the results to Elasticsearch, Opensearch, and/or Splunk, for
|
||||||
with premade dashboards
|
use with premade dashboards
|
||||||
- Optionally send reports to Apache Kafka
|
- Optionally send reports to Apache Kafka
|
||||||
|
|
||||||
|
## Python Compatibility
|
||||||
|
|
||||||
|
This project supports the following Python versions, which are either actively maintained or are the default versions
|
||||||
|
for RHEL or Debian.
|
||||||
|
|
||||||
|
| Version | Supported | Reason |
|
||||||
|
|---------|-----------|------------------------------------------------------------|
|
||||||
|
| < 3.6 | ❌ | End of Life (EOL) |
|
||||||
|
| 3.6 | ❌ | Used in RHEL 8, but not supported by project dependencies |
|
||||||
|
| 3.7 | ❌ | End of Life (EOL) |
|
||||||
|
| 3.8 | ❌ | End of Life (EOL) |
|
||||||
|
| 3.9 | ❌ | Used in Debian 11 and RHEL 9, but not supported by project dependencies |
|
||||||
|
| 3.10 | ✅ | Actively maintained |
|
||||||
|
| 3.11 | ✅ | Actively maintained; supported until June 2028 (Debian 12) |
|
||||||
|
| 3.12 | ✅ | Actively maintained; supported until May 2035 (RHEL 10) |
|
||||||
|
| 3.13 | ✅ | Actively maintained; supported until June 2030 (Debian 13) |
|
||||||
|
| 3.14 | ✅ | Actively maintained |
|
||||||
|
|||||||
10
build.sh
10
build.sh
@@ -9,13 +9,19 @@ fi
|
|||||||
. venv/bin/activate
|
. venv/bin/activate
|
||||||
pip install .[build]
|
pip install .[build]
|
||||||
ruff format .
|
ruff format .
|
||||||
ruff check .
|
|
||||||
cd docs
|
cd docs
|
||||||
make clean
|
make clean
|
||||||
make html
|
make html
|
||||||
touch build/html/.nojekyll
|
touch build/html/.nojekyll
|
||||||
cp -rf build/html/* ../../parsedmarc-docs/
|
if [ -d "../../parsedmarc-docs" ]; then
|
||||||
|
cp -rf build/html/* ../../parsedmarc-docs/
|
||||||
|
fi
|
||||||
cd ..
|
cd ..
|
||||||
|
cd parsedmarc/resources/maps
|
||||||
|
python3 sortlists.py
|
||||||
|
echo "Checking for invalid UTF-8 bytes in base_reverse_dns_map.csv"
|
||||||
|
python3 find_bad_utf8.py base_reverse_dns_map.csv
|
||||||
|
cd ../../..
|
||||||
python3 tests.py
|
python3 tests.py
|
||||||
rm -rf dist/ build/
|
rm -rf dist/ build/
|
||||||
hatch build
|
hatch build
|
||||||
1
ci.ini
1
ci.ini
@@ -3,6 +3,7 @@ save_aggregate = True
|
|||||||
save_forensic = True
|
save_forensic = True
|
||||||
save_smtp_tls = True
|
save_smtp_tls = True
|
||||||
debug = True
|
debug = True
|
||||||
|
offline = True
|
||||||
|
|
||||||
[elasticsearch]
|
[elasticsearch]
|
||||||
hosts = http://localhost:9200
|
hosts = http://localhost:9200
|
||||||
|
|||||||
@@ -1,8 +1,6 @@
|
|||||||
version: '3.7'
|
|
||||||
|
|
||||||
services:
|
services:
|
||||||
elasticsearch:
|
elasticsearch:
|
||||||
image: docker.elastic.co/elasticsearch/elasticsearch:8.3.1
|
image: docker.elastic.co/elasticsearch/elasticsearch:8.19.7
|
||||||
environment:
|
environment:
|
||||||
- network.host=127.0.0.1
|
- network.host=127.0.0.1
|
||||||
- http.host=0.0.0.0
|
- http.host=0.0.0.0
|
||||||
@@ -14,7 +12,7 @@ services:
|
|||||||
- xpack.security.enabled=false
|
- xpack.security.enabled=false
|
||||||
- xpack.license.self_generated.type=basic
|
- xpack.license.self_generated.type=basic
|
||||||
ports:
|
ports:
|
||||||
- 127.0.0.1:9200:9200
|
- "127.0.0.1:9200:9200"
|
||||||
ulimits:
|
ulimits:
|
||||||
memlock:
|
memlock:
|
||||||
soft: -1
|
soft: -1
|
||||||
@@ -28,3 +26,30 @@ services:
|
|||||||
interval: 10s
|
interval: 10s
|
||||||
timeout: 10s
|
timeout: 10s
|
||||||
retries: 24
|
retries: 24
|
||||||
|
|
||||||
|
opensearch:
|
||||||
|
image: opensearchproject/opensearch:2
|
||||||
|
environment:
|
||||||
|
- network.host=127.0.0.1
|
||||||
|
- http.host=0.0.0.0
|
||||||
|
- node.name=opensearch
|
||||||
|
- discovery.type=single-node
|
||||||
|
- cluster.name=parsedmarc-cluster
|
||||||
|
- discovery.seed_hosts=opensearch
|
||||||
|
- bootstrap.memory_lock=true
|
||||||
|
- OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_INITIAL_ADMIN_PASSWORD}
|
||||||
|
ports:
|
||||||
|
- "127.0.0.1:9201:9200"
|
||||||
|
ulimits:
|
||||||
|
memlock:
|
||||||
|
soft: -1
|
||||||
|
hard: -1
|
||||||
|
healthcheck:
|
||||||
|
test:
|
||||||
|
[
|
||||||
|
"CMD-SHELL",
|
||||||
|
"curl -s -XGET http://localhost:9201/_cluster/health?pretty | grep status | grep -q '\\(green\\|yellow\\)'"
|
||||||
|
]
|
||||||
|
interval: 10s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 24
|
||||||
|
|||||||
@@ -21,7 +21,6 @@
|
|||||||
:members:
|
:members:
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
## parsedmarc.splunk
|
## parsedmarc.splunk
|
||||||
|
|
||||||
```{eval-rst}
|
```{eval-rst}
|
||||||
@@ -29,6 +28,13 @@
|
|||||||
:members:
|
:members:
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## parsedmarc.types
|
||||||
|
|
||||||
|
```{eval-rst}
|
||||||
|
.. automodule:: parsedmarc.types
|
||||||
|
:members:
|
||||||
|
```
|
||||||
|
|
||||||
## parsedmarc.utils
|
## parsedmarc.utils
|
||||||
|
|
||||||
```{eval-rst}
|
```{eval-rst}
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ from parsedmarc import __version__
|
|||||||
# -- Project information -----------------------------------------------------
|
# -- Project information -----------------------------------------------------
|
||||||
|
|
||||||
project = "parsedmarc"
|
project = "parsedmarc"
|
||||||
copyright = "2018 - 2023, Sean Whalen and contributors"
|
copyright = "2018 - 2025, Sean Whalen and contributors"
|
||||||
author = "Sean Whalen and contributors"
|
author = "Sean Whalen and contributors"
|
||||||
|
|
||||||
# The version info for the project you're documenting, acts as replacement for
|
# The version info for the project you're documenting, acts as replacement for
|
||||||
|
|||||||
@@ -33,17 +33,36 @@ and Valimail.
|
|||||||
|
|
||||||
## Features
|
## Features
|
||||||
|
|
||||||
- Parses draft and 1.0 standard aggregate/rua reports
|
- Parses draft and 1.0 standard aggregate/rua DMARC reports
|
||||||
- Parses forensic/failure/ruf reports
|
- Parses forensic/failure/ruf DMARC reports
|
||||||
|
- Parses reports from SMTP TLS Reporting
|
||||||
- Can parse reports from an inbox over IMAP, Microsoft Graph, or Gmail API
|
- Can parse reports from an inbox over IMAP, Microsoft Graph, or Gmail API
|
||||||
- Transparently handles gzip or zip compressed reports
|
- Transparently handles gzip or zip compressed reports
|
||||||
- Consistent data structures
|
- Consistent data structures
|
||||||
- Simple JSON and/or CSV output
|
- Simple JSON and/or CSV output
|
||||||
- Optionally email the results
|
- Optionally email the results
|
||||||
- Optionally send the results to Elasticsearch/OpenSearch and/or Splunk, for use with
|
- Optionally send the results to Elasticsearch, Opensearch, and/or Splunk, for use
|
||||||
premade dashboards
|
with premade dashboards
|
||||||
- Optionally send reports to Apache Kafka
|
- Optionally send reports to Apache Kafka
|
||||||
|
|
||||||
|
## Python Compatibility
|
||||||
|
|
||||||
|
This project supports the following Python versions, which are either actively maintained or are the default versions
|
||||||
|
for RHEL or Debian.
|
||||||
|
|
||||||
|
| Version | Supported | Reason |
|
||||||
|
|---------|-----------|------------------------------------------------------------|
|
||||||
|
| < 3.6 | ❌ | End of Life (EOL) |
|
||||||
|
| 3.6 | ❌ | Used in RHEL 8, but not supported by project dependencies |
|
||||||
|
| 3.7 | ❌ | End of Life (EOL) |
|
||||||
|
| 3.8 | ❌ | End of Life (EOL) |
|
||||||
|
| 3.9 | ❌ | Used in Debian 11 and RHEL 9, but not supported by project dependencies |
|
||||||
|
| 3.10 | ✅ | Actively maintained |
|
||||||
|
| 3.11 | ✅ | Actively maintained; supported until June 2028 (Debian 12) |
|
||||||
|
| 3.12 | ✅ | Actively maintained; supported until May 2035 (RHEL 10) |
|
||||||
|
| 3.13 | ✅ | Actively maintained; supported until June 2030 (Debian 13) |
|
||||||
|
| 3.14 | ✅ | Actively maintained |
|
||||||
|
|
||||||
```{toctree}
|
```{toctree}
|
||||||
:caption: 'Contents'
|
:caption: 'Contents'
|
||||||
:maxdepth: 2
|
:maxdepth: 2
|
||||||
|
|||||||
@@ -162,10 +162,10 @@ sudo -u parsedmarc virtualenv /opt/parsedmarc/venv
|
|||||||
```
|
```
|
||||||
|
|
||||||
CentOS/RHEL 8 systems use Python 3.6 by default, so on those systems
|
CentOS/RHEL 8 systems use Python 3.6 by default, so on those systems
|
||||||
explicitly tell `virtualenv` to use `python3.9` instead
|
explicitly tell `virtualenv` to use `python3.10` instead
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
sudo -u parsedmarc virtualenv -p python3.9 /opt/parsedmarc/venv
|
sudo -u parsedmarc virtualenv -p python3.10 /opt/parsedmarc/venv
|
||||||
```
|
```
|
||||||
|
|
||||||
Activate the virtualenv
|
Activate the virtualenv
|
||||||
@@ -199,7 +199,7 @@ sudo apt-get install libemail-outlook-message-perl
|
|||||||
[geoipupdate releases page on github]: https://github.com/maxmind/geoipupdate/releases
|
[geoipupdate releases page on github]: https://github.com/maxmind/geoipupdate/releases
|
||||||
[ip to country lite database]: https://db-ip.com/db/download/ip-to-country-lite
|
[ip to country lite database]: https://db-ip.com/db/download/ip-to-country-lite
|
||||||
[license keys]: https://www.maxmind.com/en/accounts/current/license-key
|
[license keys]: https://www.maxmind.com/en/accounts/current/license-key
|
||||||
[maxmind geoipupdate page]: https://dev.maxmind.com/geoip/geoipupdate/
|
[maxmind geoipupdate page]: https://dev.maxmind.com/geoip/updating-databases/
|
||||||
[maxmind geolite2 country database]: https://dev.maxmind.com/geoip/geolite2-free-geolocation-data
|
[maxmind geolite2 country database]: https://dev.maxmind.com/geoip/geolite2-free-geolocation-data
|
||||||
[registering for a free geolite2 account]: https://www.maxmind.com/en/geolite2/signup
|
[registering for a free geolite2 account]: https://www.maxmind.com/en/geolite2/signup
|
||||||
[to comply with various privacy regulations]: https://blog.maxmind.com/2019/12/18/significant-changes-to-accessing-and-using-geolite2-databases/
|
[to comply with various privacy regulations]: https://blog.maxmind.com/2019/12/18/significant-changes-to-accessing-and-using-geolite2-databases/
|
||||||
|
|||||||
@@ -23,6 +23,8 @@ of the report schema.
|
|||||||
"report_id": "9391651994964116463",
|
"report_id": "9391651994964116463",
|
||||||
"begin_date": "2012-04-27 20:00:00",
|
"begin_date": "2012-04-27 20:00:00",
|
||||||
"end_date": "2012-04-28 19:59:59",
|
"end_date": "2012-04-28 19:59:59",
|
||||||
|
"timespan_requires_normalization": false,
|
||||||
|
"original_timespan_seconds": 86399,
|
||||||
"errors": []
|
"errors": []
|
||||||
},
|
},
|
||||||
"policy_published": {
|
"policy_published": {
|
||||||
@@ -39,8 +41,10 @@ of the report schema.
|
|||||||
"source": {
|
"source": {
|
||||||
"ip_address": "72.150.241.94",
|
"ip_address": "72.150.241.94",
|
||||||
"country": "US",
|
"country": "US",
|
||||||
"reverse_dns": "adsl-72-150-241-94.shv.bellsouth.net",
|
"reverse_dns": null,
|
||||||
"base_domain": "bellsouth.net"
|
"base_domain": null,
|
||||||
|
"name": null,
|
||||||
|
"type": null
|
||||||
},
|
},
|
||||||
"count": 2,
|
"count": 2,
|
||||||
"alignment": {
|
"alignment": {
|
||||||
@@ -74,7 +78,10 @@ of the report schema.
|
|||||||
"result": "pass"
|
"result": "pass"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
},
|
||||||
|
"normalized_timespan": false,
|
||||||
|
"interval_begin": "2012-04-28 00:00:00",
|
||||||
|
"interval_end": "2012-04-28 23:59:59"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
@@ -83,8 +90,10 @@ of the report schema.
|
|||||||
### CSV aggregate report
|
### CSV aggregate report
|
||||||
|
|
||||||
```text
|
```text
|
||||||
xml_schema,org_name,org_email,org_extra_contact_info,report_id,begin_date,end_date,errors,domain,adkim,aspf,p,sp,pct,fo,source_ip_address,source_country,source_reverse_dns,source_base_domain,count,spf_aligned,dkim_aligned,dmarc_aligned,disposition,policy_override_reasons,policy_override_comments,envelope_from,header_from,envelope_to,dkim_domains,dkim_selectors,dkim_results,spf_domains,spf_scopes,spf_results
|
xml_schema,org_name,org_email,org_extra_contact_info,report_id,begin_date,end_date,normalized_timespan,errors,domain,adkim,aspf,p,sp,pct,fo,source_ip_address,source_country,source_reverse_dns,source_base_domain,source_name,source_type,count,spf_aligned,dkim_aligned,dmarc_aligned,disposition,policy_override_reasons,policy_override_comments,envelope_from,header_from,envelope_to,dkim_domains,dkim_selectors,dkim_results,spf_domains,spf_scopes,spf_results
|
||||||
draft,acme.com,noreply-dmarc-support@acme.com,http://acme.com/dmarc/support,9391651994964116463,2012-04-27 20:00:00,2012-04-28 19:59:59,,example.com,r,r,none,none,100,0,72.150.241.94,US,adsl-72-150-241-94.shv.bellsouth.net,bellsouth.net,2,True,False,True,none,,,example.com,example.com,,example.com,none,fail,example.com,mfrom,pass
|
draft,acme.com,noreply-dmarc-support@acme.com,http://acme.com/dmarc/support,9391651994964116463,2012-04-28 00:00:00,2012-04-28 23:59:59,False,,example.com,r,r,none,none,100,0,72.150.241.94,US,,,,,2,True,False,True,none,,,example.com,example.com,,example.com,none,fail,example.com,mfrom,pass
|
||||||
|
draft,acme.com,noreply-dmarc-support@acme.com,http://acme.com/dmarc/support,9391651994964116463,2012-04-28 00:00:00,2012-04-28 23:59:59,False,,example.com,r,r,none,none,100,0,72.150.241.94,US,,,,,2,True,False,True,none,,,example.com,example.com,,example.com,none,fail,example.com,mfrom,pass
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
## Sample forensic report output
|
## Sample forensic report output
|
||||||
|
|||||||
@@ -4,47 +4,50 @@
|
|||||||
|
|
||||||
```text
|
```text
|
||||||
usage: parsedmarc [-h] [-c CONFIG_FILE] [--strip-attachment-payloads] [-o OUTPUT]
|
usage: parsedmarc [-h] [-c CONFIG_FILE] [--strip-attachment-payloads] [-o OUTPUT]
|
||||||
[--aggregate-json-filename AGGREGATE_JSON_FILENAME]
|
[--aggregate-json-filename AGGREGATE_JSON_FILENAME] [--forensic-json-filename FORENSIC_JSON_FILENAME]
|
||||||
[--forensic-json-filename FORENSIC_JSON_FILENAME]
|
[--smtp-tls-json-filename SMTP_TLS_JSON_FILENAME] [--aggregate-csv-filename AGGREGATE_CSV_FILENAME]
|
||||||
[--aggregate-csv-filename AGGREGATE_CSV_FILENAME]
|
[--forensic-csv-filename FORENSIC_CSV_FILENAME] [--smtp-tls-csv-filename SMTP_TLS_CSV_FILENAME]
|
||||||
[--forensic-csv-filename FORENSIC_CSV_FILENAME]
|
[-n NAMESERVERS [NAMESERVERS ...]] [-t DNS_TIMEOUT] [--offline] [-s] [-w] [--verbose] [--debug]
|
||||||
[-n NAMESERVERS [NAMESERVERS ...]] [-t DNS_TIMEOUT] [--offline]
|
[--log-file LOG_FILE] [--no-prettify-json] [-v]
|
||||||
[-s] [--verbose] [--debug] [--log-file LOG_FILE] [-v]
|
[file_path ...]
|
||||||
[file_path ...]
|
|
||||||
|
|
||||||
Parses DMARC reports
|
Parses DMARC reports
|
||||||
|
|
||||||
positional arguments:
|
positional arguments:
|
||||||
file_path one or more paths to aggregate or forensic report
|
file_path one or more paths to aggregate or forensic report files, emails, or mbox files'
|
||||||
files, emails, or mbox files'
|
|
||||||
|
|
||||||
optional arguments:
|
options:
|
||||||
-h, --help show this help message and exit
|
-h, --help show this help message and exit
|
||||||
-c CONFIG_FILE, --config-file CONFIG_FILE
|
-c CONFIG_FILE, --config-file CONFIG_FILE
|
||||||
a path to a configuration file (--silent implied)
|
a path to a configuration file (--silent implied)
|
||||||
--strip-attachment-payloads
|
--strip-attachment-payloads
|
||||||
remove attachment payloads from forensic report output
|
remove attachment payloads from forensic report output
|
||||||
-o OUTPUT, --output OUTPUT
|
-o OUTPUT, --output OUTPUT
|
||||||
write output files to the given directory
|
write output files to the given directory
|
||||||
--aggregate-json-filename AGGREGATE_JSON_FILENAME
|
--aggregate-json-filename AGGREGATE_JSON_FILENAME
|
||||||
filename for the aggregate JSON output file
|
filename for the aggregate JSON output file
|
||||||
--forensic-json-filename FORENSIC_JSON_FILENAME
|
--forensic-json-filename FORENSIC_JSON_FILENAME
|
||||||
filename for the forensic JSON output file
|
filename for the forensic JSON output file
|
||||||
--aggregate-csv-filename AGGREGATE_CSV_FILENAME
|
--smtp-tls-json-filename SMTP_TLS_JSON_FILENAME
|
||||||
filename for the aggregate CSV output file
|
filename for the SMTP TLS JSON output file
|
||||||
--forensic-csv-filename FORENSIC_CSV_FILENAME
|
--aggregate-csv-filename AGGREGATE_CSV_FILENAME
|
||||||
filename for the forensic CSV output file
|
filename for the aggregate CSV output file
|
||||||
-n NAMESERVERS [NAMESERVERS ...], --nameservers NAMESERVERS [NAMESERVERS ...]
|
--forensic-csv-filename FORENSIC_CSV_FILENAME
|
||||||
nameservers to query
|
filename for the forensic CSV output file
|
||||||
-t DNS_TIMEOUT, --dns_timeout DNS_TIMEOUT
|
--smtp-tls-csv-filename SMTP_TLS_CSV_FILENAME
|
||||||
number of seconds to wait for an answer from DNS
|
filename for the SMTP TLS CSV output file
|
||||||
(default: 2.0)
|
-n NAMESERVERS [NAMESERVERS ...], --nameservers NAMESERVERS [NAMESERVERS ...]
|
||||||
--offline do not make online queries for geolocation or DNS
|
nameservers to query
|
||||||
-s, --silent only print errors and warnings
|
-t DNS_TIMEOUT, --dns_timeout DNS_TIMEOUT
|
||||||
--verbose more verbose output
|
number of seconds to wait for an answer from DNS (default: 2.0)
|
||||||
--debug print debugging information
|
--offline do not make online queries for geolocation or DNS
|
||||||
--log-file LOG_FILE output logging to a file
|
-s, --silent only print errors
|
||||||
-v, --version show program's version number and exit
|
-w, --warnings print warnings in addition to errors
|
||||||
|
--verbose more verbose output
|
||||||
|
--debug print debugging information
|
||||||
|
--log-file LOG_FILE output logging to a file
|
||||||
|
--no-prettify-json output JSON in a single line without indentation
|
||||||
|
-v, --version show program's version number and exit
|
||||||
```
|
```
|
||||||
|
|
||||||
:::{note}
|
:::{note}
|
||||||
@@ -120,8 +123,10 @@ The full set of configuration options are:
|
|||||||
Elasticsearch, Splunk and/or S3
|
Elasticsearch, Splunk and/or S3
|
||||||
- `save_smtp_tls` - bool: Save SMTP-STS report data to
|
- `save_smtp_tls` - bool: Save SMTP-STS report data to
|
||||||
Elasticsearch, Splunk and/or S3
|
Elasticsearch, Splunk and/or S3
|
||||||
|
- `index_prefix_domain_map` - bool: A path mapping of Opensearch/Elasticsearch index prefixes to domain names
|
||||||
- `strip_attachment_payloads` - bool: Remove attachment
|
- `strip_attachment_payloads` - bool: Remove attachment
|
||||||
payloads from results
|
payloads from results
|
||||||
|
- `silent` - bool: Set this to `False` to output results to STDOUT
|
||||||
- `output` - str: Directory to place JSON and CSV files in. This is required if you set either of the JSON output file options.
|
- `output` - str: Directory to place JSON and CSV files in. This is required if you set either of the JSON output file options.
|
||||||
- `aggregate_json_filename` - str: filename for the aggregate
|
- `aggregate_json_filename` - str: filename for the aggregate
|
||||||
JSON output file
|
JSON output file
|
||||||
@@ -167,7 +172,7 @@ The full set of configuration options are:
|
|||||||
IDLE response or the number of seconds until the next
|
IDLE response or the number of seconds until the next
|
||||||
mail check (Default: `30`)
|
mail check (Default: `30`)
|
||||||
- `since` - str: Search for messages since certain time. (Examples: `5m|3h|2d|1w`)
|
- `since` - str: Search for messages since certain time. (Examples: `5m|3h|2d|1w`)
|
||||||
Acceptable units - {"m":"minutes", "h":"hours", "d":"days", "w":"weeks"}).
|
Acceptable units - {"m":"minutes", "h":"hours", "d":"days", "w":"weeks"}.
|
||||||
Defaults to `1d` if incorrect value is provided.
|
Defaults to `1d` if incorrect value is provided.
|
||||||
- `imap`
|
- `imap`
|
||||||
- `host` - str: The IMAP server hostname or IP address
|
- `host` - str: The IMAP server hostname or IP address
|
||||||
@@ -208,6 +213,8 @@ The full set of configuration options are:
|
|||||||
- `mailbox` - str: The mailbox name. This defaults to the
|
- `mailbox` - str: The mailbox name. This defaults to the
|
||||||
current user if using the UsernamePassword auth method, but
|
current user if using the UsernamePassword auth method, but
|
||||||
could be a shared mailbox if the user has access to the mailbox
|
could be a shared mailbox if the user has access to the mailbox
|
||||||
|
- `graph_url` - str: Microsoft Graph URL. Allows for use of National Clouds (ex Azure Gov)
|
||||||
|
(Default: https://graph.microsoft.com)
|
||||||
- `token_file` - str: Path to save the token file
|
- `token_file` - str: Path to save the token file
|
||||||
(Default: `.token`)
|
(Default: `.token`)
|
||||||
- `allow_unencrypted_storage` - bool: Allows the Azure Identity
|
- `allow_unencrypted_storage` - bool: Allows the Azure Identity
|
||||||
@@ -250,7 +257,7 @@ The full set of configuration options are:
|
|||||||
:::
|
:::
|
||||||
- `user` - str: Basic auth username
|
- `user` - str: Basic auth username
|
||||||
- `password` - str: Basic auth password
|
- `password` - str: Basic auth password
|
||||||
- `apiKey` - str: API key
|
- `api_key` - str: API key
|
||||||
- `ssl` - bool: Use an encrypted SSL/TLS connection
|
- `ssl` - bool: Use an encrypted SSL/TLS connection
|
||||||
(Default: `True`)
|
(Default: `True`)
|
||||||
- `timeout` - float: Timeout in seconds (Default: 60)
|
- `timeout` - float: Timeout in seconds (Default: 60)
|
||||||
@@ -273,7 +280,7 @@ The full set of configuration options are:
|
|||||||
:::
|
:::
|
||||||
- `user` - str: Basic auth username
|
- `user` - str: Basic auth username
|
||||||
- `password` - str: Basic auth password
|
- `password` - str: Basic auth password
|
||||||
- `apiKey` - str: API key
|
- `api_key` - str: API key
|
||||||
- `ssl` - bool: Use an encrypted SSL/TLS connection
|
- `ssl` - bool: Use an encrypted SSL/TLS connection
|
||||||
(Default: `True`)
|
(Default: `True`)
|
||||||
- `timeout` - float: Timeout in seconds (Default: 60)
|
- `timeout` - float: Timeout in seconds (Default: 60)
|
||||||
@@ -329,7 +336,59 @@ The full set of configuration options are:
|
|||||||
- `secret_access_key` - str: The secret access key (Optional)
|
- `secret_access_key` - str: The secret access key (Optional)
|
||||||
- `syslog`
|
- `syslog`
|
||||||
- `server` - str: The Syslog server name or IP address
|
- `server` - str: The Syslog server name or IP address
|
||||||
- `port` - int: The UDP port to use (Default: `514`)
|
- `port` - int: The port to use (Default: `514`)
|
||||||
|
- `protocol` - str: The protocol to use: `udp`, `tcp`, or `tls` (Default: `udp`)
|
||||||
|
- `cafile_path` - str: Path to CA certificate file for TLS server verification (Optional)
|
||||||
|
- `certfile_path` - str: Path to client certificate file for TLS authentication (Optional)
|
||||||
|
- `keyfile_path` - str: Path to client private key file for TLS authentication (Optional)
|
||||||
|
- `timeout` - float: Connection timeout in seconds for TCP/TLS (Default: `5.0`)
|
||||||
|
- `retry_attempts` - int: Number of retry attempts for failed connections (Default: `3`)
|
||||||
|
- `retry_delay` - int: Delay in seconds between retry attempts (Default: `5`)
|
||||||
|
|
||||||
|
**Example UDP configuration (default):**
|
||||||
|
|
||||||
|
```ini
|
||||||
|
[syslog]
|
||||||
|
server = syslog.example.com
|
||||||
|
port = 514
|
||||||
|
```
|
||||||
|
|
||||||
|
**Example TCP configuration:**
|
||||||
|
|
||||||
|
```ini
|
||||||
|
[syslog]
|
||||||
|
server = syslog.example.com
|
||||||
|
port = 6514
|
||||||
|
protocol = tcp
|
||||||
|
timeout = 10.0
|
||||||
|
retry_attempts = 5
|
||||||
|
```
|
||||||
|
|
||||||
|
**Example TLS configuration with server verification:**
|
||||||
|
|
||||||
|
```ini
|
||||||
|
[syslog]
|
||||||
|
server = syslog.example.com
|
||||||
|
port = 6514
|
||||||
|
protocol = tls
|
||||||
|
cafile_path = /path/to/ca-cert.pem
|
||||||
|
timeout = 10.0
|
||||||
|
```
|
||||||
|
|
||||||
|
**Example TLS configuration with mutual authentication:**
|
||||||
|
|
||||||
|
```ini
|
||||||
|
[syslog]
|
||||||
|
server = syslog.example.com
|
||||||
|
port = 6514
|
||||||
|
protocol = tls
|
||||||
|
cafile_path = /path/to/ca-cert.pem
|
||||||
|
certfile_path = /path/to/client-cert.pem
|
||||||
|
keyfile_path = /path/to/client-key.pem
|
||||||
|
timeout = 10.0
|
||||||
|
retry_attempts = 3
|
||||||
|
retry_delay = 5
|
||||||
|
```
|
||||||
- `gmail_api`
|
- `gmail_api`
|
||||||
- `credentials_file` - str: Path to file containing the
|
- `credentials_file` - str: Path to file containing the
|
||||||
credentials, None to disable (Default: `None`)
|
credentials, None to disable (Default: `None`)
|
||||||
@@ -367,7 +426,7 @@ The full set of configuration options are:
|
|||||||
- `mode` - str: The GELF transport type to use. Valid modes: `tcp`, `udp`, `tls`
|
- `mode` - str: The GELF transport type to use. Valid modes: `tcp`, `udp`, `tls`
|
||||||
|
|
||||||
- `maildir`
|
- `maildir`
|
||||||
- `reports_folder` - str: Full path for mailbox maidir location (Default: `INBOX`)
|
- `maildir_path` - str: Full path for mailbox maidir location (Default: `INBOX`)
|
||||||
- `maildir_create` - bool: Create maildir if not present (Default: False)
|
- `maildir_create` - bool: Create maildir if not present (Default: False)
|
||||||
|
|
||||||
- `webhook` - Post the individual reports to a webhook url with the report as the JSON body
|
- `webhook` - Post the individual reports to a webhook url with the report as the JSON body
|
||||||
@@ -443,6 +502,28 @@ PUT _cluster/settings
|
|||||||
Increasing this value increases resource usage.
|
Increasing this value increases resource usage.
|
||||||
:::
|
:::
|
||||||
|
|
||||||
|
## Multi-tenant support
|
||||||
|
|
||||||
|
Starting in `8.19.0`, ParseDMARC provides multi-tenant support by placing data into separate OpenSearch or Elasticsearch index prefixes. To set this up, create a YAML file that is formatted where each key is a tenant name, and the value is a list of domains related to that tenant, not including subdomains, like this:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
example:
|
||||||
|
- example.com
|
||||||
|
- example.net
|
||||||
|
- example.org
|
||||||
|
|
||||||
|
whalensolutions:
|
||||||
|
- whalensolutions.com
|
||||||
|
```
|
||||||
|
|
||||||
|
Save it to disk where the user running ParseDMARC can read it, then set `index_prefix_domain_map` to that filepath in the `[general]` section of the ParseDMARC configuration file and do not set an `index_prefix` option in the `[elasticsearch]` or `[opensearch]` sections.
|
||||||
|
|
||||||
|
When configured correctly, if ParseDMARC finds that a report is related to a domain in the mapping, the report will be saved in an index name that has the tenant name prefixed to it with a trailing underscore. Then, you can use the security features of Opensearch or the ELK stack to only grant users access to the indexes that they need.
|
||||||
|
|
||||||
|
:::{note}
|
||||||
|
A domain cannot be used in multiple tenant lists. Only the first prefix list that contains the matching domain is used.
|
||||||
|
:::
|
||||||
|
|
||||||
## Running parsedmarc as a systemd service
|
## Running parsedmarc as a systemd service
|
||||||
|
|
||||||
Use systemd to run `parsedmarc` as a service and process reports as
|
Use systemd to run `parsedmarc` as a service and process reports as
|
||||||
|
|||||||
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
3
parsedmarc/constants.py
Normal file
3
parsedmarc/constants.py
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
__version__ = "9.1.0"
|
||||||
|
|
||||||
|
USER_AGENT = f"parsedmarc/{__version__}"
|
||||||
@@ -1,27 +1,29 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
from collections import OrderedDict
|
from __future__ import annotations
|
||||||
|
|
||||||
from elasticsearch_dsl.search import Q
|
from typing import Any, Optional, Union
|
||||||
|
|
||||||
|
from elasticsearch.helpers import reindex
|
||||||
from elasticsearch_dsl import (
|
from elasticsearch_dsl import (
|
||||||
connections,
|
Boolean,
|
||||||
Object,
|
Date,
|
||||||
Document,
|
Document,
|
||||||
Index,
|
Index,
|
||||||
Nested,
|
|
||||||
InnerDoc,
|
InnerDoc,
|
||||||
Integer,
|
Integer,
|
||||||
Text,
|
|
||||||
Boolean,
|
|
||||||
Ip,
|
Ip,
|
||||||
Date,
|
Nested,
|
||||||
|
Object,
|
||||||
Search,
|
Search,
|
||||||
|
Text,
|
||||||
|
connections,
|
||||||
)
|
)
|
||||||
from elasticsearch.helpers import reindex
|
from elasticsearch_dsl.search import Q
|
||||||
|
|
||||||
|
from parsedmarc import InvalidForensicReport
|
||||||
from parsedmarc.log import logger
|
from parsedmarc.log import logger
|
||||||
from parsedmarc.utils import human_timestamp_to_datetime
|
from parsedmarc.utils import human_timestamp_to_datetime
|
||||||
from parsedmarc import InvalidForensicReport
|
|
||||||
|
|
||||||
|
|
||||||
class ElasticsearchError(Exception):
|
class ElasticsearchError(Exception):
|
||||||
@@ -67,6 +69,8 @@ class _AggregateReportDoc(Document):
|
|||||||
date_range = Date()
|
date_range = Date()
|
||||||
date_begin = Date()
|
date_begin = Date()
|
||||||
date_end = Date()
|
date_end = Date()
|
||||||
|
normalized_timespan = Boolean()
|
||||||
|
original_timespan_seconds = Integer
|
||||||
errors = Text()
|
errors = Text()
|
||||||
published_policy = Object(_PublishedPolicy)
|
published_policy = Object(_PublishedPolicy)
|
||||||
source_ip_address = Ip()
|
source_ip_address = Ip()
|
||||||
@@ -87,18 +91,18 @@ class _AggregateReportDoc(Document):
|
|||||||
dkim_results = Nested(_DKIMResult)
|
dkim_results = Nested(_DKIMResult)
|
||||||
spf_results = Nested(_SPFResult)
|
spf_results = Nested(_SPFResult)
|
||||||
|
|
||||||
def add_policy_override(self, type_, comment):
|
def add_policy_override(self, type_: str, comment: str):
|
||||||
self.policy_overrides.append(_PolicyOverride(type=type_, comment=comment))
|
self.policy_overrides.append(_PolicyOverride(type=type_, comment=comment)) # pyright: ignore[reportCallIssue]
|
||||||
|
|
||||||
def add_dkim_result(self, domain, selector, result):
|
def add_dkim_result(self, domain: str, selector: str, result: _DKIMResult):
|
||||||
self.dkim_results.append(
|
self.dkim_results.append(
|
||||||
_DKIMResult(domain=domain, selector=selector, result=result)
|
_DKIMResult(domain=domain, selector=selector, result=result)
|
||||||
)
|
) # pyright: ignore[reportCallIssue]
|
||||||
|
|
||||||
def add_spf_result(self, domain, scope, result):
|
def add_spf_result(self, domain: str, scope: str, result: _SPFResult):
|
||||||
self.spf_results.append(_SPFResult(domain=domain, scope=scope, result=result))
|
self.spf_results.append(_SPFResult(domain=domain, scope=scope, result=result)) # pyright: ignore[reportCallIssue]
|
||||||
|
|
||||||
def save(self, **kwargs):
|
def save(self, **kwargs): # pyright: ignore[reportIncompatibleMethodOverride]
|
||||||
self.passed_dmarc = False
|
self.passed_dmarc = False
|
||||||
self.passed_dmarc = self.spf_aligned or self.dkim_aligned
|
self.passed_dmarc = self.spf_aligned or self.dkim_aligned
|
||||||
|
|
||||||
@@ -131,26 +135,26 @@ class _ForensicSampleDoc(InnerDoc):
|
|||||||
body = Text()
|
body = Text()
|
||||||
attachments = Nested(_EmailAttachmentDoc)
|
attachments = Nested(_EmailAttachmentDoc)
|
||||||
|
|
||||||
def add_to(self, display_name, address):
|
def add_to(self, display_name: str, address: str):
|
||||||
self.to.append(_EmailAddressDoc(display_name=display_name, address=address))
|
self.to.append(_EmailAddressDoc(display_name=display_name, address=address)) # pyright: ignore[reportCallIssue]
|
||||||
|
|
||||||
def add_reply_to(self, display_name, address):
|
def add_reply_to(self, display_name: str, address: str):
|
||||||
self.reply_to.append(
|
self.reply_to.append(
|
||||||
_EmailAddressDoc(display_name=display_name, address=address)
|
_EmailAddressDoc(display_name=display_name, address=address)
|
||||||
)
|
) # pyright: ignore[reportCallIssue]
|
||||||
|
|
||||||
def add_cc(self, display_name, address):
|
def add_cc(self, display_name: str, address: str):
|
||||||
self.cc.append(_EmailAddressDoc(display_name=display_name, address=address))
|
self.cc.append(_EmailAddressDoc(display_name=display_name, address=address)) # pyright: ignore[reportCallIssue]
|
||||||
|
|
||||||
def add_bcc(self, display_name, address):
|
def add_bcc(self, display_name: str, address: str):
|
||||||
self.bcc.append(_EmailAddressDoc(display_name=display_name, address=address))
|
self.bcc.append(_EmailAddressDoc(display_name=display_name, address=address)) # pyright: ignore[reportCallIssue]
|
||||||
|
|
||||||
def add_attachment(self, filename, content_type, sha256):
|
def add_attachment(self, filename: str, content_type: str, sha256: str):
|
||||||
self.attachments.append(
|
self.attachments.append(
|
||||||
_EmailAttachmentDoc(
|
_EmailAttachmentDoc(
|
||||||
filename=filename, content_type=content_type, sha256=sha256
|
filename=filename, content_type=content_type, sha256=sha256
|
||||||
)
|
)
|
||||||
)
|
) # pyright: ignore[reportCallIssue]
|
||||||
|
|
||||||
|
|
||||||
class _ForensicReportDoc(Document):
|
class _ForensicReportDoc(Document):
|
||||||
@@ -197,15 +201,15 @@ class _SMTPTLSPolicyDoc(InnerDoc):
|
|||||||
|
|
||||||
def add_failure_details(
|
def add_failure_details(
|
||||||
self,
|
self,
|
||||||
result_type,
|
result_type: Optional[str] = None,
|
||||||
ip_address,
|
ip_address: Optional[str] = None,
|
||||||
receiving_ip,
|
receiving_ip: Optional[str] = None,
|
||||||
receiving_mx_helo,
|
receiving_mx_helo: Optional[str] = None,
|
||||||
failed_session_count,
|
failed_session_count: Optional[int] = None,
|
||||||
sending_mta_ip=None,
|
sending_mta_ip: Optional[str] = None,
|
||||||
receiving_mx_hostname=None,
|
receiving_mx_hostname: Optional[str] = None,
|
||||||
additional_information_uri=None,
|
additional_information_uri: Optional[str] = None,
|
||||||
failure_reason_code=None,
|
failure_reason_code: Union[str, int, None] = None,
|
||||||
):
|
):
|
||||||
_details = _SMTPTLSFailureDetailsDoc(
|
_details = _SMTPTLSFailureDetailsDoc(
|
||||||
result_type=result_type,
|
result_type=result_type,
|
||||||
@@ -218,7 +222,7 @@ class _SMTPTLSPolicyDoc(InnerDoc):
|
|||||||
additional_information=additional_information_uri,
|
additional_information=additional_information_uri,
|
||||||
failure_reason_code=failure_reason_code,
|
failure_reason_code=failure_reason_code,
|
||||||
)
|
)
|
||||||
self.failure_details.append(_details)
|
self.failure_details.append(_details) # pyright: ignore[reportCallIssue]
|
||||||
|
|
||||||
|
|
||||||
class _SMTPTLSReportDoc(Document):
|
class _SMTPTLSReportDoc(Document):
|
||||||
@@ -235,13 +239,14 @@ class _SMTPTLSReportDoc(Document):
|
|||||||
|
|
||||||
def add_policy(
|
def add_policy(
|
||||||
self,
|
self,
|
||||||
policy_type,
|
policy_type: str,
|
||||||
policy_domain,
|
policy_domain: str,
|
||||||
successful_session_count,
|
successful_session_count: int,
|
||||||
failed_session_count,
|
failed_session_count: int,
|
||||||
policy_string=None,
|
*,
|
||||||
mx_host_patterns=None,
|
policy_string: Optional[str] = None,
|
||||||
failure_details=None,
|
mx_host_patterns: Optional[list[str]] = None,
|
||||||
|
failure_details: Optional[str] = None,
|
||||||
):
|
):
|
||||||
self.policies.append(
|
self.policies.append(
|
||||||
policy_type=policy_type,
|
policy_type=policy_type,
|
||||||
@@ -251,7 +256,7 @@ class _SMTPTLSReportDoc(Document):
|
|||||||
policy_string=policy_string,
|
policy_string=policy_string,
|
||||||
mx_host_patterns=mx_host_patterns,
|
mx_host_patterns=mx_host_patterns,
|
||||||
failure_details=failure_details,
|
failure_details=failure_details,
|
||||||
)
|
) # pyright: ignore[reportCallIssue]
|
||||||
|
|
||||||
|
|
||||||
class AlreadySaved(ValueError):
|
class AlreadySaved(ValueError):
|
||||||
@@ -259,24 +264,25 @@ class AlreadySaved(ValueError):
|
|||||||
|
|
||||||
|
|
||||||
def set_hosts(
|
def set_hosts(
|
||||||
hosts,
|
hosts: Union[str, list[str]],
|
||||||
use_ssl=False,
|
*,
|
||||||
ssl_cert_path=None,
|
use_ssl: bool = False,
|
||||||
username=None,
|
ssl_cert_path: Optional[str] = None,
|
||||||
password=None,
|
username: Optional[str] = None,
|
||||||
apiKey=None,
|
password: Optional[str] = None,
|
||||||
timeout=60.0,
|
api_key: Optional[str] = None,
|
||||||
|
timeout: float = 60.0,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Sets the Elasticsearch hosts to use
|
Sets the Elasticsearch hosts to use
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
hosts (str): A single hostname or URL, or list of hostnames or URLs
|
hosts (str | list[str]): A single hostname or URL, or list of hostnames or URLs
|
||||||
use_ssl (bool): Use a HTTPS connection to the server
|
use_ssl (bool): Use an HTTPS connection to the server
|
||||||
ssl_cert_path (str): Path to the certificate chain
|
ssl_cert_path (str): Path to the certificate chain
|
||||||
username (str): The username to use for authentication
|
username (str): The username to use for authentication
|
||||||
password (str): The password to use for authentication
|
password (str): The password to use for authentication
|
||||||
apiKey (str): The Base64 encoded API key to use for authentication
|
api_key (str): The Base64 encoded API key to use for authentication
|
||||||
timeout (float): Timeout in seconds
|
timeout (float): Timeout in seconds
|
||||||
"""
|
"""
|
||||||
if not isinstance(hosts, list):
|
if not isinstance(hosts, list):
|
||||||
@@ -289,14 +295,14 @@ def set_hosts(
|
|||||||
conn_params["ca_certs"] = ssl_cert_path
|
conn_params["ca_certs"] = ssl_cert_path
|
||||||
else:
|
else:
|
||||||
conn_params["verify_certs"] = False
|
conn_params["verify_certs"] = False
|
||||||
if username:
|
if username and password:
|
||||||
conn_params["http_auth"] = username + ":" + password
|
conn_params["http_auth"] = username + ":" + password
|
||||||
if apiKey:
|
if api_key:
|
||||||
conn_params["api_key"] = apiKey
|
conn_params["api_key"] = api_key
|
||||||
connections.create_connection(**conn_params)
|
connections.create_connection(**conn_params)
|
||||||
|
|
||||||
|
|
||||||
def create_indexes(names, settings=None):
|
def create_indexes(names: list[str], settings: Optional[dict[str, Any]] = None):
|
||||||
"""
|
"""
|
||||||
Create Elasticsearch indexes
|
Create Elasticsearch indexes
|
||||||
|
|
||||||
@@ -319,7 +325,10 @@ def create_indexes(names, settings=None):
|
|||||||
raise ElasticsearchError("Elasticsearch error: {0}".format(e.__str__()))
|
raise ElasticsearchError("Elasticsearch error: {0}".format(e.__str__()))
|
||||||
|
|
||||||
|
|
||||||
def migrate_indexes(aggregate_indexes=None, forensic_indexes=None):
|
def migrate_indexes(
|
||||||
|
aggregate_indexes: Optional[list[str]] = None,
|
||||||
|
forensic_indexes: Optional[list[str]] = None,
|
||||||
|
):
|
||||||
"""
|
"""
|
||||||
Updates index mappings
|
Updates index mappings
|
||||||
|
|
||||||
@@ -358,7 +367,7 @@ def migrate_indexes(aggregate_indexes=None, forensic_indexes=None):
|
|||||||
}
|
}
|
||||||
Index(new_index_name).create()
|
Index(new_index_name).create()
|
||||||
Index(new_index_name).put_mapping(doc_type=doc, body=body)
|
Index(new_index_name).put_mapping(doc_type=doc, body=body)
|
||||||
reindex(connections.get_connection(), aggregate_index_name, new_index_name)
|
reindex(connections.get_connection(), aggregate_index_name, new_index_name) # pyright: ignore[reportArgumentType]
|
||||||
Index(aggregate_index_name).delete()
|
Index(aggregate_index_name).delete()
|
||||||
|
|
||||||
for forensic_index in forensic_indexes:
|
for forensic_index in forensic_indexes:
|
||||||
@@ -366,18 +375,18 @@ def migrate_indexes(aggregate_indexes=None, forensic_indexes=None):
|
|||||||
|
|
||||||
|
|
||||||
def save_aggregate_report_to_elasticsearch(
|
def save_aggregate_report_to_elasticsearch(
|
||||||
aggregate_report,
|
aggregate_report: dict[str, Any],
|
||||||
index_suffix=None,
|
index_suffix: Optional[str] = None,
|
||||||
index_prefix=None,
|
index_prefix: Optional[str] = None,
|
||||||
monthly_indexes=False,
|
monthly_indexes: Optional[bool] = False,
|
||||||
number_of_shards=1,
|
number_of_shards: int = 1,
|
||||||
number_of_replicas=0,
|
number_of_replicas: int = 0,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Saves a parsed DMARC aggregate report to Elasticsearch
|
Saves a parsed DMARC aggregate report to Elasticsearch
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
aggregate_report (OrderedDict): A parsed forensic report
|
aggregate_report (dict): A parsed forensic report
|
||||||
index_suffix (str): The suffix of the name of the index to save to
|
index_suffix (str): The suffix of the name of the index to save to
|
||||||
index_prefix (str): The prefix of the name of the index to save to
|
index_prefix (str): The prefix of the name of the index to save to
|
||||||
monthly_indexes (bool): Use monthly indexes instead of daily indexes
|
monthly_indexes (bool): Use monthly indexes instead of daily indexes
|
||||||
@@ -395,21 +404,17 @@ def save_aggregate_report_to_elasticsearch(
|
|||||||
domain = aggregate_report["policy_published"]["domain"]
|
domain = aggregate_report["policy_published"]["domain"]
|
||||||
begin_date = human_timestamp_to_datetime(metadata["begin_date"], to_utc=True)
|
begin_date = human_timestamp_to_datetime(metadata["begin_date"], to_utc=True)
|
||||||
end_date = human_timestamp_to_datetime(metadata["end_date"], to_utc=True)
|
end_date = human_timestamp_to_datetime(metadata["end_date"], to_utc=True)
|
||||||
begin_date_human = begin_date.strftime("%Y-%m-%d %H:%M:%SZ")
|
|
||||||
end_date_human = end_date.strftime("%Y-%m-%d %H:%M:%SZ")
|
|
||||||
if monthly_indexes:
|
if monthly_indexes:
|
||||||
index_date = begin_date.strftime("%Y-%m")
|
index_date = begin_date.strftime("%Y-%m")
|
||||||
else:
|
else:
|
||||||
index_date = begin_date.strftime("%Y-%m-%d")
|
index_date = begin_date.strftime("%Y-%m-%d")
|
||||||
aggregate_report["begin_date"] = begin_date
|
|
||||||
aggregate_report["end_date"] = end_date
|
|
||||||
date_range = [aggregate_report["begin_date"], aggregate_report["end_date"]]
|
|
||||||
|
|
||||||
org_name_query = Q(dict(match_phrase=dict(org_name=org_name)))
|
org_name_query = Q(dict(match_phrase=dict(org_name=org_name))) # type: ignore
|
||||||
report_id_query = Q(dict(match_phrase=dict(report_id=report_id)))
|
report_id_query = Q(dict(match_phrase=dict(report_id=report_id))) # pyright: ignore[reportArgumentType]
|
||||||
domain_query = Q(dict(match_phrase={"published_policy.domain": domain}))
|
domain_query = Q(dict(match_phrase={"published_policy.domain": domain})) # pyright: ignore[reportArgumentType]
|
||||||
begin_date_query = Q(dict(match=dict(date_begin=begin_date)))
|
begin_date_query = Q(dict(match=dict(date_begin=begin_date))) # pyright: ignore[reportArgumentType]
|
||||||
end_date_query = Q(dict(match=dict(date_end=end_date)))
|
end_date_query = Q(dict(match=dict(date_end=end_date))) # pyright: ignore[reportArgumentType]
|
||||||
|
|
||||||
if index_suffix is not None:
|
if index_suffix is not None:
|
||||||
search_index = "dmarc_aggregate_{0}*".format(index_suffix)
|
search_index = "dmarc_aggregate_{0}*".format(index_suffix)
|
||||||
@@ -421,6 +426,8 @@ def save_aggregate_report_to_elasticsearch(
|
|||||||
query = org_name_query & report_id_query & domain_query
|
query = org_name_query & report_id_query & domain_query
|
||||||
query = query & begin_date_query & end_date_query
|
query = query & begin_date_query & end_date_query
|
||||||
search.query = query
|
search.query = query
|
||||||
|
begin_date_human = begin_date.strftime("%Y-%m-%d %H:%M:%SZ")
|
||||||
|
end_date_human = end_date.strftime("%Y-%m-%d %H:%M:%SZ")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
existing = search.execute()
|
existing = search.execute()
|
||||||
@@ -450,6 +457,17 @@ def save_aggregate_report_to_elasticsearch(
|
|||||||
)
|
)
|
||||||
|
|
||||||
for record in aggregate_report["records"]:
|
for record in aggregate_report["records"]:
|
||||||
|
begin_date = human_timestamp_to_datetime(record["interval_begin"], to_utc=True)
|
||||||
|
end_date = human_timestamp_to_datetime(record["interval_end"], to_utc=True)
|
||||||
|
normalized_timespan = record["normalized_timespan"]
|
||||||
|
|
||||||
|
if monthly_indexes:
|
||||||
|
index_date = begin_date.strftime("%Y-%m")
|
||||||
|
else:
|
||||||
|
index_date = begin_date.strftime("%Y-%m-%d")
|
||||||
|
aggregate_report["begin_date"] = begin_date
|
||||||
|
aggregate_report["end_date"] = end_date
|
||||||
|
date_range = [aggregate_report["begin_date"], aggregate_report["end_date"]]
|
||||||
agg_doc = _AggregateReportDoc(
|
agg_doc = _AggregateReportDoc(
|
||||||
xml_schema=aggregate_report["xml_schema"],
|
xml_schema=aggregate_report["xml_schema"],
|
||||||
org_name=metadata["org_name"],
|
org_name=metadata["org_name"],
|
||||||
@@ -457,8 +475,9 @@ def save_aggregate_report_to_elasticsearch(
|
|||||||
org_extra_contact_info=metadata["org_extra_contact_info"],
|
org_extra_contact_info=metadata["org_extra_contact_info"],
|
||||||
report_id=metadata["report_id"],
|
report_id=metadata["report_id"],
|
||||||
date_range=date_range,
|
date_range=date_range,
|
||||||
date_begin=aggregate_report["begin_date"],
|
date_begin=begin_date,
|
||||||
date_end=aggregate_report["end_date"],
|
date_end=end_date,
|
||||||
|
normalized_timespan=normalized_timespan,
|
||||||
errors=metadata["errors"],
|
errors=metadata["errors"],
|
||||||
published_policy=published_policy,
|
published_policy=published_policy,
|
||||||
source_ip_address=record["source"]["ip_address"],
|
source_ip_address=record["source"]["ip_address"],
|
||||||
@@ -508,7 +527,7 @@ def save_aggregate_report_to_elasticsearch(
|
|||||||
number_of_shards=number_of_shards, number_of_replicas=number_of_replicas
|
number_of_shards=number_of_shards, number_of_replicas=number_of_replicas
|
||||||
)
|
)
|
||||||
create_indexes([index], index_settings)
|
create_indexes([index], index_settings)
|
||||||
agg_doc.meta.index = index
|
agg_doc.meta.index = index # pyright: ignore[reportOptionalMemberAccess, reportAttributeAccessIssue]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
agg_doc.save()
|
agg_doc.save()
|
||||||
@@ -517,18 +536,18 @@ def save_aggregate_report_to_elasticsearch(
|
|||||||
|
|
||||||
|
|
||||||
def save_forensic_report_to_elasticsearch(
|
def save_forensic_report_to_elasticsearch(
|
||||||
forensic_report,
|
forensic_report: dict[str, Any],
|
||||||
index_suffix=None,
|
index_suffix: Optional[Any] = None,
|
||||||
index_prefix=None,
|
index_prefix: Optional[str] = None,
|
||||||
monthly_indexes=False,
|
monthly_indexes: Optional[bool] = False,
|
||||||
number_of_shards=1,
|
number_of_shards: int = 1,
|
||||||
number_of_replicas=0,
|
number_of_replicas: int = 0,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Saves a parsed DMARC forensic report to Elasticsearch
|
Saves a parsed DMARC forensic report to Elasticsearch
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
forensic_report (OrderedDict): A parsed forensic report
|
forensic_report (dict): A parsed forensic report
|
||||||
index_suffix (str): The suffix of the name of the index to save to
|
index_suffix (str): The suffix of the name of the index to save to
|
||||||
index_prefix (str): The prefix of the name of the index to save to
|
index_prefix (str): The prefix of the name of the index to save to
|
||||||
monthly_indexes (bool): Use monthly indexes instead of daily
|
monthly_indexes (bool): Use monthly indexes instead of daily
|
||||||
@@ -548,12 +567,12 @@ def save_forensic_report_to_elasticsearch(
|
|||||||
sample_date = forensic_report["parsed_sample"]["date"]
|
sample_date = forensic_report["parsed_sample"]["date"]
|
||||||
sample_date = human_timestamp_to_datetime(sample_date)
|
sample_date = human_timestamp_to_datetime(sample_date)
|
||||||
original_headers = forensic_report["parsed_sample"]["headers"]
|
original_headers = forensic_report["parsed_sample"]["headers"]
|
||||||
headers = OrderedDict()
|
headers: dict[str, Any] = {}
|
||||||
for original_header in original_headers:
|
for original_header in original_headers:
|
||||||
headers[original_header.lower()] = original_headers[original_header]
|
headers[original_header.lower()] = original_headers[original_header]
|
||||||
|
|
||||||
arrival_date_human = forensic_report["arrival_date_utc"]
|
arrival_date = human_timestamp_to_datetime(forensic_report["arrival_date_utc"])
|
||||||
arrival_date = human_timestamp_to_datetime(arrival_date_human)
|
arrival_date_epoch_milliseconds = int(arrival_date.timestamp() * 1000)
|
||||||
|
|
||||||
if index_suffix is not None:
|
if index_suffix is not None:
|
||||||
search_index = "dmarc_forensic_{0}*".format(index_suffix)
|
search_index = "dmarc_forensic_{0}*".format(index_suffix)
|
||||||
@@ -562,24 +581,39 @@ def save_forensic_report_to_elasticsearch(
|
|||||||
if index_prefix is not None:
|
if index_prefix is not None:
|
||||||
search_index = "{0}{1}".format(index_prefix, search_index)
|
search_index = "{0}{1}".format(index_prefix, search_index)
|
||||||
search = Search(index=search_index)
|
search = Search(index=search_index)
|
||||||
arrival_query = {"match": {"arrival_date": arrival_date}}
|
q = Q(dict(match=dict(arrival_date=arrival_date_epoch_milliseconds))) # pyright: ignore[reportArgumentType]
|
||||||
q = Q(arrival_query)
|
|
||||||
|
|
||||||
from_ = None
|
from_ = None
|
||||||
to_ = None
|
to_ = None
|
||||||
subject = None
|
subject = None
|
||||||
if "from" in headers:
|
if "from" in headers:
|
||||||
from_ = headers["from"]
|
# We convert the FROM header from a string list to a flat string.
|
||||||
from_query = {"match_phrase": {"sample.headers.from": from_}}
|
headers["from"] = headers["from"][0]
|
||||||
q = q & Q(from_query)
|
if headers["from"][0] == "":
|
||||||
|
headers["from"] = headers["from"][1]
|
||||||
|
else:
|
||||||
|
headers["from"] = " <".join(headers["from"]) + ">"
|
||||||
|
|
||||||
|
from_ = dict()
|
||||||
|
from_["sample.headers.from"] = headers["from"]
|
||||||
|
from_query = Q(dict(match_phrase=from_)) # pyright: ignore[reportArgumentType]
|
||||||
|
q = q & from_query
|
||||||
if "to" in headers:
|
if "to" in headers:
|
||||||
to_ = headers["to"]
|
# We convert the TO header from a string list to a flat string.
|
||||||
to_query = {"match_phrase": {"sample.headers.to": to_}}
|
headers["to"] = headers["to"][0]
|
||||||
q = q & Q(to_query)
|
if headers["to"][0] == "":
|
||||||
|
headers["to"] = headers["to"][1]
|
||||||
|
else:
|
||||||
|
headers["to"] = " <".join(headers["to"]) + ">"
|
||||||
|
|
||||||
|
to_ = dict()
|
||||||
|
to_["sample.headers.to"] = headers["to"]
|
||||||
|
to_query = Q(dict(match_phrase=to_)) # pyright: ignore[reportArgumentType]
|
||||||
|
q = q & to_query
|
||||||
if "subject" in headers:
|
if "subject" in headers:
|
||||||
subject = headers["subject"]
|
subject = headers["subject"]
|
||||||
subject_query = {"match_phrase": {"sample.headers.subject": subject}}
|
subject_query = {"match_phrase": {"sample.headers.subject": subject}}
|
||||||
q = q & Q(subject_query)
|
q = q & Q(subject_query) # pyright: ignore[reportArgumentType]
|
||||||
|
|
||||||
search.query = q
|
search.query = q
|
||||||
existing = search.execute()
|
existing = search.execute()
|
||||||
@@ -589,7 +623,9 @@ def save_forensic_report_to_elasticsearch(
|
|||||||
"A forensic sample to {0} from {1} "
|
"A forensic sample to {0} from {1} "
|
||||||
"with a subject of {2} and arrival date of {3} "
|
"with a subject of {2} and arrival date of {3} "
|
||||||
"already exists in "
|
"already exists in "
|
||||||
"Elasticsearch".format(to_, from_, subject, arrival_date_human)
|
"Elasticsearch".format(
|
||||||
|
to_, from_, subject, forensic_report["arrival_date_utc"]
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
parsed_sample = forensic_report["parsed_sample"]
|
parsed_sample = forensic_report["parsed_sample"]
|
||||||
@@ -625,7 +661,7 @@ def save_forensic_report_to_elasticsearch(
|
|||||||
user_agent=forensic_report["user_agent"],
|
user_agent=forensic_report["user_agent"],
|
||||||
version=forensic_report["version"],
|
version=forensic_report["version"],
|
||||||
original_mail_from=forensic_report["original_mail_from"],
|
original_mail_from=forensic_report["original_mail_from"],
|
||||||
arrival_date=arrival_date,
|
arrival_date=arrival_date_epoch_milliseconds,
|
||||||
domain=forensic_report["reported_domain"],
|
domain=forensic_report["reported_domain"],
|
||||||
original_envelope_id=forensic_report["original_envelope_id"],
|
original_envelope_id=forensic_report["original_envelope_id"],
|
||||||
authentication_results=forensic_report["authentication_results"],
|
authentication_results=forensic_report["authentication_results"],
|
||||||
@@ -655,7 +691,7 @@ def save_forensic_report_to_elasticsearch(
|
|||||||
number_of_shards=number_of_shards, number_of_replicas=number_of_replicas
|
number_of_shards=number_of_shards, number_of_replicas=number_of_replicas
|
||||||
)
|
)
|
||||||
create_indexes([index], index_settings)
|
create_indexes([index], index_settings)
|
||||||
forensic_doc.meta.index = index
|
forensic_doc.meta.index = index # pyright: ignore[reportAttributeAccessIssue, reportOptionalMemberAccess]
|
||||||
try:
|
try:
|
||||||
forensic_doc.save()
|
forensic_doc.save()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@@ -667,18 +703,18 @@ def save_forensic_report_to_elasticsearch(
|
|||||||
|
|
||||||
|
|
||||||
def save_smtp_tls_report_to_elasticsearch(
|
def save_smtp_tls_report_to_elasticsearch(
|
||||||
report,
|
report: dict[str, Any],
|
||||||
index_suffix=None,
|
index_suffix: Optional[str] = None,
|
||||||
index_prefix=None,
|
index_prefix: Optional[str] = None,
|
||||||
monthly_indexes=False,
|
monthly_indexes: bool = False,
|
||||||
number_of_shards=1,
|
number_of_shards: int = 1,
|
||||||
number_of_replicas=0,
|
number_of_replicas: int = 0,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Saves a parsed SMTP TLS report to Elasticsearch
|
Saves a parsed SMTP TLS report to Elasticsearch
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
report (OrderedDict): A parsed SMTP TLS report
|
report (dict): A parsed SMTP TLS report
|
||||||
index_suffix (str): The suffix of the name of the index to save to
|
index_suffix (str): The suffix of the name of the index to save to
|
||||||
index_prefix (str): The prefix of the name of the index to save to
|
index_prefix (str): The prefix of the name of the index to save to
|
||||||
monthly_indexes (bool): Use monthly indexes instead of daily indexes
|
monthly_indexes (bool): Use monthly indexes instead of daily indexes
|
||||||
@@ -702,10 +738,10 @@ def save_smtp_tls_report_to_elasticsearch(
|
|||||||
report["begin_date"] = begin_date
|
report["begin_date"] = begin_date
|
||||||
report["end_date"] = end_date
|
report["end_date"] = end_date
|
||||||
|
|
||||||
org_name_query = Q(dict(match_phrase=dict(org_name=org_name)))
|
org_name_query = Q(dict(match_phrase=dict(org_name=org_name))) # pyright: ignore[reportArgumentType]
|
||||||
report_id_query = Q(dict(match_phrase=dict(report_id=report_id)))
|
report_id_query = Q(dict(match_phrase=dict(report_id=report_id))) # pyright: ignore[reportArgumentType]
|
||||||
begin_date_query = Q(dict(match=dict(date_begin=begin_date)))
|
begin_date_query = Q(dict(match=dict(date_begin=begin_date))) # pyright: ignore[reportArgumentType]
|
||||||
end_date_query = Q(dict(match=dict(date_end=end_date)))
|
end_date_query = Q(dict(match=dict(date_end=end_date))) # pyright: ignore[reportArgumentType]
|
||||||
|
|
||||||
if index_suffix is not None:
|
if index_suffix is not None:
|
||||||
search_index = "smtp_tls_{0}*".format(index_suffix)
|
search_index = "smtp_tls_{0}*".format(index_suffix)
|
||||||
@@ -764,7 +800,7 @@ def save_smtp_tls_report_to_elasticsearch(
|
|||||||
policy_doc = _SMTPTLSPolicyDoc(
|
policy_doc = _SMTPTLSPolicyDoc(
|
||||||
policy_domain=policy["policy_domain"],
|
policy_domain=policy["policy_domain"],
|
||||||
policy_type=policy["policy_type"],
|
policy_type=policy["policy_type"],
|
||||||
succesful_session_count=policy["successful_session_count"],
|
successful_session_count=policy["successful_session_count"],
|
||||||
failed_session_count=policy["failed_session_count"],
|
failed_session_count=policy["failed_session_count"],
|
||||||
policy_string=policy_strings,
|
policy_string=policy_strings,
|
||||||
mx_host_patterns=mx_host_patterns,
|
mx_host_patterns=mx_host_patterns,
|
||||||
@@ -806,10 +842,10 @@ def save_smtp_tls_report_to_elasticsearch(
|
|||||||
additional_information_uri=additional_information_uri,
|
additional_information_uri=additional_information_uri,
|
||||||
failure_reason_code=failure_reason_code,
|
failure_reason_code=failure_reason_code,
|
||||||
)
|
)
|
||||||
smtp_tls_doc.policies.append(policy_doc)
|
smtp_tls_doc.policies.append(policy_doc) # pyright: ignore[reportCallIssue]
|
||||||
|
|
||||||
create_indexes([index], index_settings)
|
create_indexes([index], index_settings)
|
||||||
smtp_tls_doc.meta.index = index
|
smtp_tls_doc.meta.index = index # pyright: ignore[reportOptionalMemberAccess, reportAttributeAccessIssue]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
smtp_tls_doc.save()
|
smtp_tls_doc.save()
|
||||||
|
|||||||
@@ -1,17 +1,19 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import logging.handlers
|
import logging.handlers
|
||||||
import json
|
|
||||||
import threading
|
import threading
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from pygelf import GelfTcpHandler, GelfTlsHandler, GelfUdpHandler
|
||||||
|
|
||||||
from parsedmarc import (
|
from parsedmarc import (
|
||||||
parsed_aggregate_reports_to_csv_rows,
|
parsed_aggregate_reports_to_csv_rows,
|
||||||
parsed_forensic_reports_to_csv_rows,
|
parsed_forensic_reports_to_csv_rows,
|
||||||
parsed_smtp_tls_reports_to_csv_rows,
|
parsed_smtp_tls_reports_to_csv_rows,
|
||||||
)
|
)
|
||||||
from pygelf import GelfTcpHandler, GelfUdpHandler, GelfTlsHandler
|
|
||||||
|
|
||||||
|
|
||||||
log_context_data = threading.local()
|
log_context_data = threading.local()
|
||||||
|
|
||||||
@@ -48,7 +50,7 @@ class GelfClient(object):
|
|||||||
)
|
)
|
||||||
self.logger.addHandler(self.handler)
|
self.logger.addHandler(self.handler)
|
||||||
|
|
||||||
def save_aggregate_report_to_gelf(self, aggregate_reports):
|
def save_aggregate_report_to_gelf(self, aggregate_reports: list[dict[str, Any]]):
|
||||||
rows = parsed_aggregate_reports_to_csv_rows(aggregate_reports)
|
rows = parsed_aggregate_reports_to_csv_rows(aggregate_reports)
|
||||||
for row in rows:
|
for row in rows:
|
||||||
log_context_data.parsedmarc = row
|
log_context_data.parsedmarc = row
|
||||||
@@ -56,12 +58,14 @@ class GelfClient(object):
|
|||||||
|
|
||||||
log_context_data.parsedmarc = None
|
log_context_data.parsedmarc = None
|
||||||
|
|
||||||
def save_forensic_report_to_gelf(self, forensic_reports):
|
def save_forensic_report_to_gelf(self, forensic_reports: list[dict[str, Any]]):
|
||||||
rows = parsed_forensic_reports_to_csv_rows(forensic_reports)
|
rows = parsed_forensic_reports_to_csv_rows(forensic_reports)
|
||||||
for row in rows:
|
for row in rows:
|
||||||
self.logger.info(json.dumps(row))
|
log_context_data.parsedmarc = row
|
||||||
|
self.logger.info("parsedmarc forensic report")
|
||||||
|
|
||||||
def save_smtp_tls_report_to_gelf(self, smtp_tls_reports):
|
def save_smtp_tls_report_to_gelf(self, smtp_tls_reports: dict[str, Any]):
|
||||||
rows = parsed_smtp_tls_reports_to_csv_rows(smtp_tls_reports)
|
rows = parsed_smtp_tls_reports_to_csv_rows(smtp_tls_reports)
|
||||||
for row in rows:
|
for row in rows:
|
||||||
self.logger.info(json.dumps(row))
|
log_context_data.parsedmarc = row
|
||||||
|
self.logger.info("parsedmarc smtptls report")
|
||||||
|
|||||||
@@ -1,15 +1,17 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import json
|
import json
|
||||||
from ssl import create_default_context
|
from ssl import SSLContext, create_default_context
|
||||||
|
from typing import Any, Optional, Union
|
||||||
|
|
||||||
from kafka import KafkaProducer
|
from kafka import KafkaProducer
|
||||||
from kafka.errors import NoBrokersAvailable, UnknownTopicOrPartitionError
|
from kafka.errors import NoBrokersAvailable, UnknownTopicOrPartitionError
|
||||||
from collections import OrderedDict
|
|
||||||
from parsedmarc.utils import human_timestamp_to_datetime
|
|
||||||
|
|
||||||
from parsedmarc import __version__
|
from parsedmarc import __version__
|
||||||
from parsedmarc.log import logger
|
from parsedmarc.log import logger
|
||||||
|
from parsedmarc.utils import human_timestamp_to_datetime
|
||||||
|
|
||||||
|
|
||||||
class KafkaError(RuntimeError):
|
class KafkaError(RuntimeError):
|
||||||
@@ -18,7 +20,13 @@ class KafkaError(RuntimeError):
|
|||||||
|
|
||||||
class KafkaClient(object):
|
class KafkaClient(object):
|
||||||
def __init__(
|
def __init__(
|
||||||
self, kafka_hosts, ssl=False, username=None, password=None, ssl_context=None
|
self,
|
||||||
|
kafka_hosts: list[str],
|
||||||
|
*,
|
||||||
|
ssl: Optional[bool] = False,
|
||||||
|
username: Optional[str] = None,
|
||||||
|
password: Optional[str] = None,
|
||||||
|
ssl_context: Optional[SSLContext] = None,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Initializes the Kafka client
|
Initializes the Kafka client
|
||||||
@@ -28,7 +36,7 @@ class KafkaClient(object):
|
|||||||
ssl (bool): Use a SSL/TLS connection
|
ssl (bool): Use a SSL/TLS connection
|
||||||
username (str): An optional username
|
username (str): An optional username
|
||||||
password (str): An optional password
|
password (str): An optional password
|
||||||
ssl_context: SSL context options
|
ssl_context (SSLContext): SSL context options
|
||||||
|
|
||||||
Notes:
|
Notes:
|
||||||
``use_ssl=True`` is implied when a username or password are
|
``use_ssl=True`` is implied when a username or password are
|
||||||
@@ -38,7 +46,7 @@ class KafkaClient(object):
|
|||||||
``$ConnectionString``, and the password is the
|
``$ConnectionString``, and the password is the
|
||||||
Azure Event Hub connection string.
|
Azure Event Hub connection string.
|
||||||
"""
|
"""
|
||||||
config = dict(
|
config: dict[str, Any] = dict(
|
||||||
value_serializer=lambda v: json.dumps(v).encode("utf-8"),
|
value_serializer=lambda v: json.dumps(v).encode("utf-8"),
|
||||||
bootstrap_servers=kafka_hosts,
|
bootstrap_servers=kafka_hosts,
|
||||||
client_id="parsedmarc-{0}".format(__version__),
|
client_id="parsedmarc-{0}".format(__version__),
|
||||||
@@ -55,7 +63,7 @@ class KafkaClient(object):
|
|||||||
raise KafkaError("No Kafka brokers available")
|
raise KafkaError("No Kafka brokers available")
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def strip_metadata(report):
|
def strip_metadata(report: dict[str, Any]):
|
||||||
"""
|
"""
|
||||||
Duplicates org_name, org_email and report_id into JSON root
|
Duplicates org_name, org_email and report_id into JSON root
|
||||||
and removes report_metadata key to bring it more inline
|
and removes report_metadata key to bring it more inline
|
||||||
@@ -69,7 +77,7 @@ class KafkaClient(object):
|
|||||||
return report
|
return report
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def generate_daterange(report):
|
def generate_date_range(report: dict[str, Any]):
|
||||||
"""
|
"""
|
||||||
Creates a date_range timestamp with format YYYY-MM-DD-T-HH:MM:SS
|
Creates a date_range timestamp with format YYYY-MM-DD-T-HH:MM:SS
|
||||||
based on begin and end dates for easier parsing in Kibana.
|
based on begin and end dates for easier parsing in Kibana.
|
||||||
@@ -86,7 +94,11 @@ class KafkaClient(object):
|
|||||||
logger.debug("date_range is {}".format(date_range))
|
logger.debug("date_range is {}".format(date_range))
|
||||||
return date_range
|
return date_range
|
||||||
|
|
||||||
def save_aggregate_reports_to_kafka(self, aggregate_reports, aggregate_topic):
|
def save_aggregate_reports_to_kafka(
|
||||||
|
self,
|
||||||
|
aggregate_reports: Union[dict[str, Any], list[dict[str, Any]]],
|
||||||
|
aggregate_topic: str,
|
||||||
|
):
|
||||||
"""
|
"""
|
||||||
Saves aggregate DMARC reports to Kafka
|
Saves aggregate DMARC reports to Kafka
|
||||||
|
|
||||||
@@ -96,16 +108,14 @@ class KafkaClient(object):
|
|||||||
aggregate_topic (str): The name of the Kafka topic
|
aggregate_topic (str): The name of the Kafka topic
|
||||||
|
|
||||||
"""
|
"""
|
||||||
if isinstance(aggregate_reports, dict) or isinstance(
|
if isinstance(aggregate_reports, dict):
|
||||||
aggregate_reports, OrderedDict
|
|
||||||
):
|
|
||||||
aggregate_reports = [aggregate_reports]
|
aggregate_reports = [aggregate_reports]
|
||||||
|
|
||||||
if len(aggregate_reports) < 1:
|
if len(aggregate_reports) < 1:
|
||||||
return
|
return
|
||||||
|
|
||||||
for report in aggregate_reports:
|
for report in aggregate_reports:
|
||||||
report["date_range"] = self.generate_daterange(report)
|
report["date_range"] = self.generate_date_range(report)
|
||||||
report = self.strip_metadata(report)
|
report = self.strip_metadata(report)
|
||||||
|
|
||||||
for slice in report["records"]:
|
for slice in report["records"]:
|
||||||
@@ -129,7 +139,11 @@ class KafkaClient(object):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise KafkaError("Kafka error: {0}".format(e.__str__()))
|
raise KafkaError("Kafka error: {0}".format(e.__str__()))
|
||||||
|
|
||||||
def save_forensic_reports_to_kafka(self, forensic_reports, forensic_topic):
|
def save_forensic_reports_to_kafka(
|
||||||
|
self,
|
||||||
|
forensic_reports: Union[dict[str, Any], list[dict[str, Any]]],
|
||||||
|
forensic_topic: str,
|
||||||
|
):
|
||||||
"""
|
"""
|
||||||
Saves forensic DMARC reports to Kafka, sends individual
|
Saves forensic DMARC reports to Kafka, sends individual
|
||||||
records (slices) since Kafka requires messages to be <= 1MB
|
records (slices) since Kafka requires messages to be <= 1MB
|
||||||
@@ -159,7 +173,11 @@ class KafkaClient(object):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise KafkaError("Kafka error: {0}".format(e.__str__()))
|
raise KafkaError("Kafka error: {0}".format(e.__str__()))
|
||||||
|
|
||||||
def save_smtp_tls_reports_to_kafka(self, smtp_tls_reports, smtp_tls_topic):
|
def save_smtp_tls_reports_to_kafka(
|
||||||
|
self,
|
||||||
|
smtp_tls_reports: Union[list[dict[str, Any]], dict[str, Any]],
|
||||||
|
smtp_tls_topic: str,
|
||||||
|
):
|
||||||
"""
|
"""
|
||||||
Saves SMTP TLS reports to Kafka, sends individual
|
Saves SMTP TLS reports to Kafka, sends individual
|
||||||
records (slices) since Kafka requires messages to be <= 1MB
|
records (slices) since Kafka requires messages to be <= 1MB
|
||||||
|
|||||||
@@ -1,9 +1,15 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
from parsedmarc.log import logger
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
from azure.core.exceptions import HttpResponseError
|
from azure.core.exceptions import HttpResponseError
|
||||||
from azure.identity import ClientSecretCredential
|
from azure.identity import ClientSecretCredential
|
||||||
from azure.monitor.ingestion import LogsIngestionClient
|
from azure.monitor.ingestion import LogsIngestionClient
|
||||||
|
|
||||||
|
from parsedmarc.log import logger
|
||||||
|
|
||||||
|
|
||||||
class LogAnalyticsException(Exception):
|
class LogAnalyticsException(Exception):
|
||||||
"""Raised when an Elasticsearch error occurs"""
|
"""Raised when an Elasticsearch error occurs"""
|
||||||
@@ -102,7 +108,12 @@ class LogAnalyticsClient(object):
|
|||||||
"Invalid configuration. " + "One or more required settings are missing."
|
"Invalid configuration. " + "One or more required settings are missing."
|
||||||
)
|
)
|
||||||
|
|
||||||
def publish_json(self, results, logs_client: LogsIngestionClient, dcr_stream: str):
|
def publish_json(
|
||||||
|
self,
|
||||||
|
results,
|
||||||
|
logs_client: LogsIngestionClient,
|
||||||
|
dcr_stream: str,
|
||||||
|
):
|
||||||
"""
|
"""
|
||||||
Background function to publish given
|
Background function to publish given
|
||||||
DMARC report to specific Data Collection Rule.
|
DMARC report to specific Data Collection Rule.
|
||||||
@@ -121,7 +132,11 @@ class LogAnalyticsClient(object):
|
|||||||
raise LogAnalyticsException("Upload failed: {error}".format(error=e))
|
raise LogAnalyticsException("Upload failed: {error}".format(error=e))
|
||||||
|
|
||||||
def publish_results(
|
def publish_results(
|
||||||
self, results, save_aggregate: bool, save_forensic: bool, save_smtp_tls: bool
|
self,
|
||||||
|
results: dict[str, Any],
|
||||||
|
save_aggregate: bool,
|
||||||
|
save_forensic: bool,
|
||||||
|
save_smtp_tls: bool,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Function to publish DMARC and/or SMTP TLS reports to Log Analytics
|
Function to publish DMARC and/or SMTP TLS reports to Log Analytics
|
||||||
|
|||||||
@@ -1,3 +1,7 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
from base64 import urlsafe_b64decode
|
from base64 import urlsafe_b64decode
|
||||||
from functools import lru_cache
|
from functools import lru_cache
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
@@ -63,9 +67,7 @@ class GmailConnection(MailboxConnection):
|
|||||||
).execute()
|
).execute()
|
||||||
except HttpError as e:
|
except HttpError as e:
|
||||||
if e.status_code == 409:
|
if e.status_code == 409:
|
||||||
logger.debug(
|
logger.debug(f"Folder {folder_name} already exists, skipping creation")
|
||||||
f"Folder {folder_name} already exists, " f"skipping creation"
|
|
||||||
)
|
|
||||||
else:
|
else:
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
@@ -114,14 +116,14 @@ class GmailConnection(MailboxConnection):
|
|||||||
else:
|
else:
|
||||||
return [id for id in self._fetch_all_message_ids(reports_label_id)]
|
return [id for id in self._fetch_all_message_ids(reports_label_id)]
|
||||||
|
|
||||||
def fetch_message(self, message_id):
|
def fetch_message(self, message_id) -> str:
|
||||||
msg = (
|
msg = (
|
||||||
self.service.users()
|
self.service.users()
|
||||||
.messages()
|
.messages()
|
||||||
.get(userId="me", id=message_id, format="raw")
|
.get(userId="me", id=message_id, format="raw")
|
||||||
.execute()
|
.execute()
|
||||||
)
|
)
|
||||||
return urlsafe_b64decode(msg["raw"])
|
return urlsafe_b64decode(msg["raw"]).decode(errors="replace")
|
||||||
|
|
||||||
def delete_message(self, message_id: str):
|
def delete_message(self, message_id: str):
|
||||||
self.service.users().messages().delete(userId="me", id=message_id)
|
self.service.users().messages().delete(userId="me", id=message_id)
|
||||||
@@ -154,3 +156,4 @@ class GmailConnection(MailboxConnection):
|
|||||||
for label in labels:
|
for label in labels:
|
||||||
if label_name == label["id"] or label_name == label["name"]:
|
if label_name == label["id"] or label_name == label["name"]:
|
||||||
return label["id"]
|
return label["id"]
|
||||||
|
return ""
|
||||||
|
|||||||
@@ -1,8 +1,12 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from functools import lru_cache
|
from functools import lru_cache
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from time import sleep
|
from time import sleep
|
||||||
from typing import List, Optional
|
from typing import Any, List, Optional, Union
|
||||||
|
|
||||||
from azure.identity import (
|
from azure.identity import (
|
||||||
UsernamePasswordCredential,
|
UsernamePasswordCredential,
|
||||||
@@ -24,7 +28,7 @@ class AuthMethod(Enum):
|
|||||||
|
|
||||||
|
|
||||||
def _get_cache_args(token_path: Path, allow_unencrypted_storage):
|
def _get_cache_args(token_path: Path, allow_unencrypted_storage):
|
||||||
cache_args = {
|
cache_args: dict[str, Any] = {
|
||||||
"cache_persistence_options": TokenCachePersistenceOptions(
|
"cache_persistence_options": TokenCachePersistenceOptions(
|
||||||
name="parsedmarc", allow_unencrypted_storage=allow_unencrypted_storage
|
name="parsedmarc", allow_unencrypted_storage=allow_unencrypted_storage
|
||||||
)
|
)
|
||||||
@@ -89,6 +93,7 @@ class MSGraphConnection(MailboxConnection):
|
|||||||
self,
|
self,
|
||||||
auth_method: str,
|
auth_method: str,
|
||||||
mailbox: str,
|
mailbox: str,
|
||||||
|
graph_url: str,
|
||||||
client_id: str,
|
client_id: str,
|
||||||
client_secret: str,
|
client_secret: str,
|
||||||
username: str,
|
username: str,
|
||||||
@@ -108,7 +113,10 @@ class MSGraphConnection(MailboxConnection):
|
|||||||
token_path=token_path,
|
token_path=token_path,
|
||||||
allow_unencrypted_storage=allow_unencrypted_storage,
|
allow_unencrypted_storage=allow_unencrypted_storage,
|
||||||
)
|
)
|
||||||
client_params = {"credential": credential}
|
client_params = {
|
||||||
|
"credential": credential,
|
||||||
|
"cloud": graph_url,
|
||||||
|
}
|
||||||
if not isinstance(credential, ClientSecretCredential):
|
if not isinstance(credential, ClientSecretCredential):
|
||||||
scopes = ["Mail.ReadWrite"]
|
scopes = ["Mail.ReadWrite"]
|
||||||
# Detect if mailbox is shared
|
# Detect if mailbox is shared
|
||||||
@@ -137,16 +145,16 @@ class MSGraphConnection(MailboxConnection):
|
|||||||
request_url = f"/users/{self.mailbox_name}/mailFolders{sub_url}"
|
request_url = f"/users/{self.mailbox_name}/mailFolders{sub_url}"
|
||||||
resp = self._client.post(request_url, json=request_body)
|
resp = self._client.post(request_url, json=request_body)
|
||||||
if resp.status_code == 409:
|
if resp.status_code == 409:
|
||||||
logger.debug(f"Folder {folder_name} already exists, " f"skipping creation")
|
logger.debug(f"Folder {folder_name} already exists, skipping creation")
|
||||||
elif resp.status_code == 201:
|
elif resp.status_code == 201:
|
||||||
logger.debug(f"Created folder {folder_name}")
|
logger.debug(f"Created folder {folder_name}")
|
||||||
else:
|
else:
|
||||||
logger.warning(f"Unknown response " f"{resp.status_code} {resp.json()}")
|
logger.warning(f"Unknown response {resp.status_code} {resp.json()}")
|
||||||
|
|
||||||
def fetch_messages(self, folder_name: str, **kwargs) -> List[str]:
|
def fetch_messages(self, reports_folder: str, **kwargs) -> List[str]:
|
||||||
"""Returns a list of message UIDs in the specified folder"""
|
"""Returns a list of message UIDs in the specified folder"""
|
||||||
folder_id = self._find_folder_id_from_folder_path(folder_name)
|
folder_id = self._find_folder_id_from_folder_path(reports_folder)
|
||||||
url = f"/users/{self.mailbox_name}/mailFolders/" f"{folder_id}/messages"
|
url = f"/users/{self.mailbox_name}/mailFolders/{folder_id}/messages"
|
||||||
since = kwargs.get("since")
|
since = kwargs.get("since")
|
||||||
if not since:
|
if not since:
|
||||||
since = None
|
since = None
|
||||||
@@ -158,7 +166,7 @@ class MSGraphConnection(MailboxConnection):
|
|||||||
|
|
||||||
def _get_all_messages(self, url, batch_size, since):
|
def _get_all_messages(self, url, batch_size, since):
|
||||||
messages: list
|
messages: list
|
||||||
params = {"$select": "id"}
|
params: dict[str, Union[str, int]] = {"$select": "id"}
|
||||||
if since:
|
if since:
|
||||||
params["$filter"] = f"receivedDateTime ge {since}"
|
params["$filter"] = f"receivedDateTime ge {since}"
|
||||||
if batch_size and batch_size > 0:
|
if batch_size and batch_size > 0:
|
||||||
@@ -185,7 +193,7 @@ class MSGraphConnection(MailboxConnection):
|
|||||||
resp = self._client.patch(url, json={"isRead": "true"})
|
resp = self._client.patch(url, json={"isRead": "true"})
|
||||||
if resp.status_code != 200:
|
if resp.status_code != 200:
|
||||||
raise RuntimeWarning(
|
raise RuntimeWarning(
|
||||||
f"Failed to mark message read" f"{resp.status_code}: {resp.json()}"
|
f"Failed to mark message read{resp.status_code}: {resp.json()}"
|
||||||
)
|
)
|
||||||
|
|
||||||
def fetch_message(self, message_id: str, **kwargs):
|
def fetch_message(self, message_id: str, **kwargs):
|
||||||
@@ -193,7 +201,7 @@ class MSGraphConnection(MailboxConnection):
|
|||||||
result = self._client.get(url)
|
result = self._client.get(url)
|
||||||
if result.status_code != 200:
|
if result.status_code != 200:
|
||||||
raise RuntimeWarning(
|
raise RuntimeWarning(
|
||||||
f"Failed to fetch message" f"{result.status_code}: {result.json()}"
|
f"Failed to fetch message{result.status_code}: {result.json()}"
|
||||||
)
|
)
|
||||||
mark_read = kwargs.get("mark_read")
|
mark_read = kwargs.get("mark_read")
|
||||||
if mark_read:
|
if mark_read:
|
||||||
@@ -205,7 +213,7 @@ class MSGraphConnection(MailboxConnection):
|
|||||||
resp = self._client.delete(url)
|
resp = self._client.delete(url)
|
||||||
if resp.status_code != 204:
|
if resp.status_code != 204:
|
||||||
raise RuntimeWarning(
|
raise RuntimeWarning(
|
||||||
f"Failed to delete message " f"{resp.status_code}: {resp.json()}"
|
f"Failed to delete message {resp.status_code}: {resp.json()}"
|
||||||
)
|
)
|
||||||
|
|
||||||
def move_message(self, message_id: str, folder_name: str):
|
def move_message(self, message_id: str, folder_name: str):
|
||||||
@@ -215,7 +223,7 @@ class MSGraphConnection(MailboxConnection):
|
|||||||
resp = self._client.post(url, json=request_body)
|
resp = self._client.post(url, json=request_body)
|
||||||
if resp.status_code != 201:
|
if resp.status_code != 201:
|
||||||
raise RuntimeWarning(
|
raise RuntimeWarning(
|
||||||
f"Failed to move message " f"{resp.status_code}: {resp.json()}"
|
f"Failed to move message {resp.status_code}: {resp.json()}"
|
||||||
)
|
)
|
||||||
|
|
||||||
def keepalive(self):
|
def keepalive(self):
|
||||||
@@ -250,7 +258,7 @@ class MSGraphConnection(MailboxConnection):
|
|||||||
filter = f"?$filter=displayName eq '{folder_name}'"
|
filter = f"?$filter=displayName eq '{folder_name}'"
|
||||||
folders_resp = self._client.get(url + filter)
|
folders_resp = self._client.get(url + filter)
|
||||||
if folders_resp.status_code != 200:
|
if folders_resp.status_code != 200:
|
||||||
raise RuntimeWarning(f"Failed to list folders." f"{folders_resp.json()}")
|
raise RuntimeWarning(f"Failed to list folders.{folders_resp.json()}")
|
||||||
folders: list = folders_resp.json()["value"]
|
folders: list = folders_resp.json()["value"]
|
||||||
matched_folders = [
|
matched_folders = [
|
||||||
folder for folder in folders if folder["displayName"] == folder_name
|
folder for folder in folders if folder["displayName"] == folder_name
|
||||||
|
|||||||
@@ -1,3 +1,9 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import cast
|
||||||
|
|
||||||
from time import sleep
|
from time import sleep
|
||||||
|
|
||||||
from imapclient.exceptions import IMAPClientError
|
from imapclient.exceptions import IMAPClientError
|
||||||
@@ -11,14 +17,14 @@ from parsedmarc.mail.mailbox_connection import MailboxConnection
|
|||||||
class IMAPConnection(MailboxConnection):
|
class IMAPConnection(MailboxConnection):
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
host=None,
|
host: str,
|
||||||
user=None,
|
user: str,
|
||||||
password=None,
|
password: str,
|
||||||
port=None,
|
port: int = 993,
|
||||||
ssl=True,
|
ssl: bool = True,
|
||||||
verify=True,
|
verify: bool = True,
|
||||||
timeout=30,
|
timeout: int = 30,
|
||||||
max_retries=4,
|
max_retries: int = 4,
|
||||||
):
|
):
|
||||||
self._username = user
|
self._username = user
|
||||||
self._password = password
|
self._password = password
|
||||||
@@ -40,18 +46,18 @@ class IMAPConnection(MailboxConnection):
|
|||||||
def fetch_messages(self, reports_folder: str, **kwargs):
|
def fetch_messages(self, reports_folder: str, **kwargs):
|
||||||
self._client.select_folder(reports_folder)
|
self._client.select_folder(reports_folder)
|
||||||
since = kwargs.get("since")
|
since = kwargs.get("since")
|
||||||
if since:
|
if since is not None:
|
||||||
return self._client.search(["SINCE", since])
|
return self._client.search(f"SINCE {since}")
|
||||||
else:
|
else:
|
||||||
return self._client.search()
|
return self._client.search()
|
||||||
|
|
||||||
def fetch_message(self, message_id):
|
def fetch_message(self, message_id: int):
|
||||||
return self._client.fetch_message(message_id, parse=False)
|
return cast(str, self._client.fetch_message(message_id, parse=False))
|
||||||
|
|
||||||
def delete_message(self, message_id: str):
|
def delete_message(self, message_id: int):
|
||||||
self._client.delete_messages([message_id])
|
self._client.delete_messages([message_id])
|
||||||
|
|
||||||
def move_message(self, message_id: str, folder_name: str):
|
def move_message(self, message_id: int, folder_name: str):
|
||||||
self._client.move_messages([message_id], folder_name)
|
self._client.move_messages([message_id], folder_name)
|
||||||
|
|
||||||
def keepalive(self):
|
def keepalive(self):
|
||||||
@@ -85,7 +91,5 @@ class IMAPConnection(MailboxConnection):
|
|||||||
logger.warning("IMAP connection timeout. Reconnecting...")
|
logger.warning("IMAP connection timeout. Reconnecting...")
|
||||||
sleep(check_timeout)
|
sleep(check_timeout)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(
|
logger.warning("IMAP connection error. {0}. Reconnecting...".format(e))
|
||||||
"IMAP connection error. {0}. " "Reconnecting...".format(e)
|
|
||||||
)
|
|
||||||
sleep(check_timeout)
|
sleep(check_timeout)
|
||||||
|
|||||||
@@ -1,5 +1,8 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
from abc import ABC
|
from abc import ABC
|
||||||
from typing import List
|
|
||||||
|
|
||||||
|
|
||||||
class MailboxConnection(ABC):
|
class MailboxConnection(ABC):
|
||||||
@@ -10,16 +13,16 @@ class MailboxConnection(ABC):
|
|||||||
def create_folder(self, folder_name: str):
|
def create_folder(self, folder_name: str):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def fetch_messages(self, reports_folder: str, **kwargs) -> List[str]:
|
def fetch_messages(self, reports_folder: str, **kwargs):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def fetch_message(self, message_id) -> str:
|
def fetch_message(self, message_id) -> str:
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def delete_message(self, message_id: str):
|
def delete_message(self, message_id):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def move_message(self, message_id: str, folder_name: str):
|
def move_message(self, message_id, folder_name: str):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def keepalive(self):
|
def keepalive(self):
|
||||||
|
|||||||
@@ -1,16 +1,21 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import mailbox
|
||||||
|
import os
|
||||||
from time import sleep
|
from time import sleep
|
||||||
|
from typing import Dict
|
||||||
|
|
||||||
from parsedmarc.log import logger
|
from parsedmarc.log import logger
|
||||||
from parsedmarc.mail.mailbox_connection import MailboxConnection
|
from parsedmarc.mail.mailbox_connection import MailboxConnection
|
||||||
import mailbox
|
|
||||||
import os
|
|
||||||
|
|
||||||
|
|
||||||
class MaildirConnection(MailboxConnection):
|
class MaildirConnection(MailboxConnection):
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
maildir_path=None,
|
maildir_path: str,
|
||||||
maildir_create=False,
|
maildir_create: bool = False,
|
||||||
):
|
):
|
||||||
self._maildir_path = maildir_path
|
self._maildir_path = maildir_path
|
||||||
self._maildir_create = maildir_create
|
self._maildir_create = maildir_create
|
||||||
@@ -27,27 +32,31 @@ class MaildirConnection(MailboxConnection):
|
|||||||
)
|
)
|
||||||
raise Exception(ex)
|
raise Exception(ex)
|
||||||
self._client = mailbox.Maildir(maildir_path, create=maildir_create)
|
self._client = mailbox.Maildir(maildir_path, create=maildir_create)
|
||||||
self._subfolder_client = {}
|
self._subfolder_client: Dict[str, mailbox.Maildir] = {}
|
||||||
|
|
||||||
def create_folder(self, folder_name: str):
|
def create_folder(self, folder_name: str):
|
||||||
self._subfolder_client[folder_name] = self._client.add_folder(folder_name)
|
self._subfolder_client[folder_name] = self._client.add_folder(folder_name)
|
||||||
self._client.add_folder(folder_name)
|
|
||||||
|
|
||||||
def fetch_messages(self, reports_folder: str, **kwargs):
|
def fetch_messages(self, reports_folder: str, **kwargs):
|
||||||
return self._client.keys()
|
return self._client.keys()
|
||||||
|
|
||||||
def fetch_message(self, message_id):
|
def fetch_message(self, message_id: str) -> str:
|
||||||
return self._client.get(message_id).as_string()
|
msg = self._client.get(message_id)
|
||||||
|
if msg is not None:
|
||||||
|
msg = msg.as_string()
|
||||||
|
if msg is not None:
|
||||||
|
return msg
|
||||||
|
return ""
|
||||||
|
|
||||||
def delete_message(self, message_id: str):
|
def delete_message(self, message_id: str):
|
||||||
self._client.remove(message_id)
|
self._client.remove(message_id)
|
||||||
|
|
||||||
def move_message(self, message_id: str, folder_name: str):
|
def move_message(self, message_id: str, folder_name: str):
|
||||||
message_data = self._client.get(message_id)
|
message_data = self._client.get(message_id)
|
||||||
if folder_name not in self._subfolder_client.keys():
|
if message_data is None:
|
||||||
self._subfolder_client = mailbox.Maildir(
|
return
|
||||||
os.join(self.maildir_path, folder_name), create=self.maildir_create
|
if folder_name not in self._subfolder_client:
|
||||||
)
|
self._subfolder_client[folder_name] = self._client.add_folder(folder_name)
|
||||||
self._subfolder_client[folder_name].add(message_data)
|
self._subfolder_client[folder_name].add(message_data)
|
||||||
self._client.remove(message_id)
|
self._client.remove(message_id)
|
||||||
|
|
||||||
|
|||||||
@@ -1,27 +1,29 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
from collections import OrderedDict
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any, Optional, Union
|
||||||
|
|
||||||
from opensearchpy import (
|
from opensearchpy import (
|
||||||
Q,
|
Boolean,
|
||||||
connections,
|
Date,
|
||||||
Object,
|
|
||||||
Document,
|
Document,
|
||||||
Index,
|
Index,
|
||||||
Nested,
|
|
||||||
InnerDoc,
|
InnerDoc,
|
||||||
Integer,
|
Integer,
|
||||||
Text,
|
|
||||||
Boolean,
|
|
||||||
Ip,
|
Ip,
|
||||||
Date,
|
Nested,
|
||||||
|
Object,
|
||||||
|
Q,
|
||||||
Search,
|
Search,
|
||||||
|
Text,
|
||||||
|
connections,
|
||||||
)
|
)
|
||||||
from opensearchpy.helpers import reindex
|
from opensearchpy.helpers import reindex
|
||||||
|
|
||||||
|
from parsedmarc import InvalidForensicReport
|
||||||
from parsedmarc.log import logger
|
from parsedmarc.log import logger
|
||||||
from parsedmarc.utils import human_timestamp_to_datetime
|
from parsedmarc.utils import human_timestamp_to_datetime
|
||||||
from parsedmarc import InvalidForensicReport
|
|
||||||
|
|
||||||
|
|
||||||
class OpenSearchError(Exception):
|
class OpenSearchError(Exception):
|
||||||
@@ -67,6 +69,8 @@ class _AggregateReportDoc(Document):
|
|||||||
date_range = Date()
|
date_range = Date()
|
||||||
date_begin = Date()
|
date_begin = Date()
|
||||||
date_end = Date()
|
date_end = Date()
|
||||||
|
normalized_timespan = Boolean()
|
||||||
|
original_timespan_seconds = Integer
|
||||||
errors = Text()
|
errors = Text()
|
||||||
published_policy = Object(_PublishedPolicy)
|
published_policy = Object(_PublishedPolicy)
|
||||||
source_ip_address = Ip()
|
source_ip_address = Ip()
|
||||||
@@ -87,18 +91,18 @@ class _AggregateReportDoc(Document):
|
|||||||
dkim_results = Nested(_DKIMResult)
|
dkim_results = Nested(_DKIMResult)
|
||||||
spf_results = Nested(_SPFResult)
|
spf_results = Nested(_SPFResult)
|
||||||
|
|
||||||
def add_policy_override(self, type_, comment):
|
def add_policy_override(self, type_: str, comment: str):
|
||||||
self.policy_overrides.append(_PolicyOverride(type=type_, comment=comment))
|
self.policy_overrides.append(_PolicyOverride(type=type_, comment=comment))
|
||||||
|
|
||||||
def add_dkim_result(self, domain, selector, result):
|
def add_dkim_result(self, domain: str, selector: str, result: _DKIMResult):
|
||||||
self.dkim_results.append(
|
self.dkim_results.append(
|
||||||
_DKIMResult(domain=domain, selector=selector, result=result)
|
_DKIMResult(domain=domain, selector=selector, result=result)
|
||||||
)
|
)
|
||||||
|
|
||||||
def add_spf_result(self, domain, scope, result):
|
def add_spf_result(self, domain: str, scope: str, result: _SPFResult):
|
||||||
self.spf_results.append(_SPFResult(domain=domain, scope=scope, result=result))
|
self.spf_results.append(_SPFResult(domain=domain, scope=scope, result=result))
|
||||||
|
|
||||||
def save(self, **kwargs):
|
def save(self, **kwargs): # pyright: ignore[reportIncompatibleMethodOverride]
|
||||||
self.passed_dmarc = False
|
self.passed_dmarc = False
|
||||||
self.passed_dmarc = self.spf_aligned or self.dkim_aligned
|
self.passed_dmarc = self.spf_aligned or self.dkim_aligned
|
||||||
|
|
||||||
@@ -131,21 +135,21 @@ class _ForensicSampleDoc(InnerDoc):
|
|||||||
body = Text()
|
body = Text()
|
||||||
attachments = Nested(_EmailAttachmentDoc)
|
attachments = Nested(_EmailAttachmentDoc)
|
||||||
|
|
||||||
def add_to(self, display_name, address):
|
def add_to(self, display_name: str, address: str):
|
||||||
self.to.append(_EmailAddressDoc(display_name=display_name, address=address))
|
self.to.append(_EmailAddressDoc(display_name=display_name, address=address))
|
||||||
|
|
||||||
def add_reply_to(self, display_name, address):
|
def add_reply_to(self, display_name: str, address: str):
|
||||||
self.reply_to.append(
|
self.reply_to.append(
|
||||||
_EmailAddressDoc(display_name=display_name, address=address)
|
_EmailAddressDoc(display_name=display_name, address=address)
|
||||||
)
|
)
|
||||||
|
|
||||||
def add_cc(self, display_name, address):
|
def add_cc(self, display_name: str, address: str):
|
||||||
self.cc.append(_EmailAddressDoc(display_name=display_name, address=address))
|
self.cc.append(_EmailAddressDoc(display_name=display_name, address=address))
|
||||||
|
|
||||||
def add_bcc(self, display_name, address):
|
def add_bcc(self, display_name: str, address: str):
|
||||||
self.bcc.append(_EmailAddressDoc(display_name=display_name, address=address))
|
self.bcc.append(_EmailAddressDoc(display_name=display_name, address=address))
|
||||||
|
|
||||||
def add_attachment(self, filename, content_type, sha256):
|
def add_attachment(self, filename: str, content_type: str, sha256: str):
|
||||||
self.attachments.append(
|
self.attachments.append(
|
||||||
_EmailAttachmentDoc(
|
_EmailAttachmentDoc(
|
||||||
filename=filename, content_type=content_type, sha256=sha256
|
filename=filename, content_type=content_type, sha256=sha256
|
||||||
@@ -197,18 +201,20 @@ class _SMTPTLSPolicyDoc(InnerDoc):
|
|||||||
|
|
||||||
def add_failure_details(
|
def add_failure_details(
|
||||||
self,
|
self,
|
||||||
result_type,
|
result_type: Optional[str] = None,
|
||||||
ip_address,
|
ip_address: Optional[str] = None,
|
||||||
receiving_ip,
|
receiving_ip: Optional[str] = None,
|
||||||
receiving_mx_helo,
|
receiving_mx_helo: Optional[str] = None,
|
||||||
failed_session_count,
|
failed_session_count: Optional[int] = None,
|
||||||
receiving_mx_hostname=None,
|
sending_mta_ip: Optional[str] = None,
|
||||||
additional_information_uri=None,
|
receiving_mx_hostname: Optional[str] = None,
|
||||||
failure_reason_code=None,
|
additional_information_uri: Optional[str] = None,
|
||||||
|
failure_reason_code: Union[str, int, None] = None,
|
||||||
):
|
):
|
||||||
self.failure_details.append(
|
_details = _SMTPTLSFailureDetailsDoc(
|
||||||
result_type=result_type,
|
result_type=result_type,
|
||||||
ip_address=ip_address,
|
ip_address=ip_address,
|
||||||
|
sending_mta_ip=sending_mta_ip,
|
||||||
receiving_mx_hostname=receiving_mx_hostname,
|
receiving_mx_hostname=receiving_mx_hostname,
|
||||||
receiving_mx_helo=receiving_mx_helo,
|
receiving_mx_helo=receiving_mx_helo,
|
||||||
receiving_ip=receiving_ip,
|
receiving_ip=receiving_ip,
|
||||||
@@ -216,9 +222,10 @@ class _SMTPTLSPolicyDoc(InnerDoc):
|
|||||||
additional_information=additional_information_uri,
|
additional_information=additional_information_uri,
|
||||||
failure_reason_code=failure_reason_code,
|
failure_reason_code=failure_reason_code,
|
||||||
)
|
)
|
||||||
|
self.failure_details.append(_details)
|
||||||
|
|
||||||
|
|
||||||
class _SMTPTLSFailureReportDoc(Document):
|
class _SMTPTLSReportDoc(Document):
|
||||||
class Index:
|
class Index:
|
||||||
name = "smtp_tls"
|
name = "smtp_tls"
|
||||||
|
|
||||||
@@ -232,13 +239,14 @@ class _SMTPTLSFailureReportDoc(Document):
|
|||||||
|
|
||||||
def add_policy(
|
def add_policy(
|
||||||
self,
|
self,
|
||||||
policy_type,
|
policy_type: str,
|
||||||
policy_domain,
|
policy_domain: str,
|
||||||
successful_session_count,
|
successful_session_count: int,
|
||||||
failed_session_count,
|
failed_session_count: int,
|
||||||
policy_string=None,
|
*,
|
||||||
mx_host_patterns=None,
|
policy_string: Optional[str] = None,
|
||||||
failure_details=None,
|
mx_host_patterns: Optional[list[str]] = None,
|
||||||
|
failure_details: Optional[str] = None,
|
||||||
):
|
):
|
||||||
self.policies.append(
|
self.policies.append(
|
||||||
policy_type=policy_type,
|
policy_type=policy_type,
|
||||||
@@ -256,24 +264,25 @@ class AlreadySaved(ValueError):
|
|||||||
|
|
||||||
|
|
||||||
def set_hosts(
|
def set_hosts(
|
||||||
hosts,
|
hosts: Union[str, list[str]],
|
||||||
use_ssl=False,
|
*,
|
||||||
ssl_cert_path=None,
|
use_ssl: Optional[bool] = False,
|
||||||
username=None,
|
ssl_cert_path: Optional[str] = None,
|
||||||
password=None,
|
username: Optional[str] = None,
|
||||||
apiKey=None,
|
password: Optional[str] = None,
|
||||||
timeout=60.0,
|
api_key: Optional[str] = None,
|
||||||
|
timeout: Optional[float] = 60.0,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Sets the OpenSearch hosts to use
|
Sets the OpenSearch hosts to use
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
hosts (str|list): A hostname or URL, or list of hostnames or URLs
|
hosts (str|list[str]): A single hostname or URL, or list of hostnames or URLs
|
||||||
use_ssl (bool): Use an HTTPS connection to the server
|
use_ssl (bool): Use an HTTPS connection to the server
|
||||||
ssl_cert_path (str): Path to the certificate chain
|
ssl_cert_path (str): Path to the certificate chain
|
||||||
username (str): The username to use for authentication
|
username (str): The username to use for authentication
|
||||||
password (str): The password to use for authentication
|
password (str): The password to use for authentication
|
||||||
apiKey (str): The Base64 encoded API key to use for authentication
|
api_key (str): The Base64 encoded API key to use for authentication
|
||||||
timeout (float): Timeout in seconds
|
timeout (float): Timeout in seconds
|
||||||
"""
|
"""
|
||||||
if not isinstance(hosts, list):
|
if not isinstance(hosts, list):
|
||||||
@@ -286,14 +295,14 @@ def set_hosts(
|
|||||||
conn_params["ca_certs"] = ssl_cert_path
|
conn_params["ca_certs"] = ssl_cert_path
|
||||||
else:
|
else:
|
||||||
conn_params["verify_certs"] = False
|
conn_params["verify_certs"] = False
|
||||||
if username:
|
if username and password:
|
||||||
conn_params["http_auth"] = username + ":" + password
|
conn_params["http_auth"] = username + ":" + password
|
||||||
if apiKey:
|
if api_key:
|
||||||
conn_params["api_key"] = apiKey
|
conn_params["api_key"] = api_key
|
||||||
connections.create_connection(**conn_params)
|
connections.create_connection(**conn_params)
|
||||||
|
|
||||||
|
|
||||||
def create_indexes(names, settings=None):
|
def create_indexes(names: list[str], settings: Optional[dict[str, Any]] = None):
|
||||||
"""
|
"""
|
||||||
Create OpenSearch indexes
|
Create OpenSearch indexes
|
||||||
|
|
||||||
@@ -316,7 +325,10 @@ def create_indexes(names, settings=None):
|
|||||||
raise OpenSearchError("OpenSearch error: {0}".format(e.__str__()))
|
raise OpenSearchError("OpenSearch error: {0}".format(e.__str__()))
|
||||||
|
|
||||||
|
|
||||||
def migrate_indexes(aggregate_indexes=None, forensic_indexes=None):
|
def migrate_indexes(
|
||||||
|
aggregate_indexes: Optional[list[str]] = None,
|
||||||
|
forensic_indexes: Optional[list[str]] = None,
|
||||||
|
):
|
||||||
"""
|
"""
|
||||||
Updates index mappings
|
Updates index mappings
|
||||||
|
|
||||||
@@ -363,18 +375,18 @@ def migrate_indexes(aggregate_indexes=None, forensic_indexes=None):
|
|||||||
|
|
||||||
|
|
||||||
def save_aggregate_report_to_opensearch(
|
def save_aggregate_report_to_opensearch(
|
||||||
aggregate_report,
|
aggregate_report: dict[str, Any],
|
||||||
index_suffix=None,
|
index_suffix: Optional[str] = None,
|
||||||
index_prefix=None,
|
index_prefix: Optional[str] = None,
|
||||||
monthly_indexes=False,
|
monthly_indexes: bool = False,
|
||||||
number_of_shards=1,
|
number_of_shards: int = 1,
|
||||||
number_of_replicas=0,
|
number_of_replicas: int = 0,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Saves a parsed DMARC aggregate report to OpenSearch
|
Saves a parsed DMARC aggregate report to OpenSearch
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
aggregate_report (OrderedDict): A parsed forensic report
|
aggregate_report (dict): A parsed forensic report
|
||||||
index_suffix (str): The suffix of the name of the index to save to
|
index_suffix (str): The suffix of the name of the index to save to
|
||||||
index_prefix (str): The prefix of the name of the index to save to
|
index_prefix (str): The prefix of the name of the index to save to
|
||||||
monthly_indexes (bool): Use monthly indexes instead of daily indexes
|
monthly_indexes (bool): Use monthly indexes instead of daily indexes
|
||||||
@@ -392,15 +404,11 @@ def save_aggregate_report_to_opensearch(
|
|||||||
domain = aggregate_report["policy_published"]["domain"]
|
domain = aggregate_report["policy_published"]["domain"]
|
||||||
begin_date = human_timestamp_to_datetime(metadata["begin_date"], to_utc=True)
|
begin_date = human_timestamp_to_datetime(metadata["begin_date"], to_utc=True)
|
||||||
end_date = human_timestamp_to_datetime(metadata["end_date"], to_utc=True)
|
end_date = human_timestamp_to_datetime(metadata["end_date"], to_utc=True)
|
||||||
begin_date_human = begin_date.strftime("%Y-%m-%d %H:%M:%SZ")
|
|
||||||
end_date_human = end_date.strftime("%Y-%m-%d %H:%M:%SZ")
|
|
||||||
if monthly_indexes:
|
if monthly_indexes:
|
||||||
index_date = begin_date.strftime("%Y-%m")
|
index_date = begin_date.strftime("%Y-%m")
|
||||||
else:
|
else:
|
||||||
index_date = begin_date.strftime("%Y-%m-%d")
|
index_date = begin_date.strftime("%Y-%m-%d")
|
||||||
aggregate_report["begin_date"] = begin_date
|
|
||||||
aggregate_report["end_date"] = end_date
|
|
||||||
date_range = [aggregate_report["begin_date"], aggregate_report["end_date"]]
|
|
||||||
|
|
||||||
org_name_query = Q(dict(match_phrase=dict(org_name=org_name)))
|
org_name_query = Q(dict(match_phrase=dict(org_name=org_name)))
|
||||||
report_id_query = Q(dict(match_phrase=dict(report_id=report_id)))
|
report_id_query = Q(dict(match_phrase=dict(report_id=report_id)))
|
||||||
@@ -418,6 +426,8 @@ def save_aggregate_report_to_opensearch(
|
|||||||
query = org_name_query & report_id_query & domain_query
|
query = org_name_query & report_id_query & domain_query
|
||||||
query = query & begin_date_query & end_date_query
|
query = query & begin_date_query & end_date_query
|
||||||
search.query = query
|
search.query = query
|
||||||
|
begin_date_human = begin_date.strftime("%Y-%m-%d %H:%M:%SZ")
|
||||||
|
end_date_human = end_date.strftime("%Y-%m-%d %H:%M:%SZ")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
existing = search.execute()
|
existing = search.execute()
|
||||||
@@ -447,6 +457,17 @@ def save_aggregate_report_to_opensearch(
|
|||||||
)
|
)
|
||||||
|
|
||||||
for record in aggregate_report["records"]:
|
for record in aggregate_report["records"]:
|
||||||
|
begin_date = human_timestamp_to_datetime(record["interval_begin"], to_utc=True)
|
||||||
|
end_date = human_timestamp_to_datetime(record["interval_end"], to_utc=True)
|
||||||
|
normalized_timespan = record["normalized_timespan"]
|
||||||
|
|
||||||
|
if monthly_indexes:
|
||||||
|
index_date = begin_date.strftime("%Y-%m")
|
||||||
|
else:
|
||||||
|
index_date = begin_date.strftime("%Y-%m-%d")
|
||||||
|
aggregate_report["begin_date"] = begin_date
|
||||||
|
aggregate_report["end_date"] = end_date
|
||||||
|
date_range = [aggregate_report["begin_date"], aggregate_report["end_date"]]
|
||||||
agg_doc = _AggregateReportDoc(
|
agg_doc = _AggregateReportDoc(
|
||||||
xml_schema=aggregate_report["xml_schema"],
|
xml_schema=aggregate_report["xml_schema"],
|
||||||
org_name=metadata["org_name"],
|
org_name=metadata["org_name"],
|
||||||
@@ -454,8 +475,9 @@ def save_aggregate_report_to_opensearch(
|
|||||||
org_extra_contact_info=metadata["org_extra_contact_info"],
|
org_extra_contact_info=metadata["org_extra_contact_info"],
|
||||||
report_id=metadata["report_id"],
|
report_id=metadata["report_id"],
|
||||||
date_range=date_range,
|
date_range=date_range,
|
||||||
date_begin=aggregate_report["begin_date"],
|
date_begin=begin_date,
|
||||||
date_end=aggregate_report["end_date"],
|
date_end=end_date,
|
||||||
|
normalized_timespan=normalized_timespan,
|
||||||
errors=metadata["errors"],
|
errors=metadata["errors"],
|
||||||
published_policy=published_policy,
|
published_policy=published_policy,
|
||||||
source_ip_address=record["source"]["ip_address"],
|
source_ip_address=record["source"]["ip_address"],
|
||||||
@@ -499,6 +521,7 @@ def save_aggregate_report_to_opensearch(
|
|||||||
index = "{0}_{1}".format(index, index_suffix)
|
index = "{0}_{1}".format(index, index_suffix)
|
||||||
if index_prefix:
|
if index_prefix:
|
||||||
index = "{0}{1}".format(index_prefix, index)
|
index = "{0}{1}".format(index_prefix, index)
|
||||||
|
|
||||||
index = "{0}-{1}".format(index, index_date)
|
index = "{0}-{1}".format(index, index_date)
|
||||||
index_settings = dict(
|
index_settings = dict(
|
||||||
number_of_shards=number_of_shards, number_of_replicas=number_of_replicas
|
number_of_shards=number_of_shards, number_of_replicas=number_of_replicas
|
||||||
@@ -513,18 +536,18 @@ def save_aggregate_report_to_opensearch(
|
|||||||
|
|
||||||
|
|
||||||
def save_forensic_report_to_opensearch(
|
def save_forensic_report_to_opensearch(
|
||||||
forensic_report,
|
forensic_report: dict[str, Any],
|
||||||
index_suffix=None,
|
index_suffix: Optional[str] = None,
|
||||||
index_prefix=None,
|
index_prefix: Optional[str] = None,
|
||||||
monthly_indexes=False,
|
monthly_indexes: bool = False,
|
||||||
number_of_shards=1,
|
number_of_shards: int = 1,
|
||||||
number_of_replicas=0,
|
number_of_replicas: int = 0,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Saves a parsed DMARC forensic report to OpenSearch
|
Saves a parsed DMARC forensic report to OpenSearch
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
forensic_report (OrderedDict): A parsed forensic report
|
forensic_report (dict): A parsed forensic report
|
||||||
index_suffix (str): The suffix of the name of the index to save to
|
index_suffix (str): The suffix of the name of the index to save to
|
||||||
index_prefix (str): The prefix of the name of the index to save to
|
index_prefix (str): The prefix of the name of the index to save to
|
||||||
monthly_indexes (bool): Use monthly indexes instead of daily
|
monthly_indexes (bool): Use monthly indexes instead of daily
|
||||||
@@ -544,12 +567,12 @@ def save_forensic_report_to_opensearch(
|
|||||||
sample_date = forensic_report["parsed_sample"]["date"]
|
sample_date = forensic_report["parsed_sample"]["date"]
|
||||||
sample_date = human_timestamp_to_datetime(sample_date)
|
sample_date = human_timestamp_to_datetime(sample_date)
|
||||||
original_headers = forensic_report["parsed_sample"]["headers"]
|
original_headers = forensic_report["parsed_sample"]["headers"]
|
||||||
headers = OrderedDict()
|
headers: dict[str, Any] = {}
|
||||||
for original_header in original_headers:
|
for original_header in original_headers:
|
||||||
headers[original_header.lower()] = original_headers[original_header]
|
headers[original_header.lower()] = original_headers[original_header]
|
||||||
|
|
||||||
arrival_date_human = forensic_report["arrival_date_utc"]
|
arrival_date = human_timestamp_to_datetime(forensic_report["arrival_date_utc"])
|
||||||
arrival_date = human_timestamp_to_datetime(arrival_date_human)
|
arrival_date_epoch_milliseconds = int(arrival_date.timestamp() * 1000)
|
||||||
|
|
||||||
if index_suffix is not None:
|
if index_suffix is not None:
|
||||||
search_index = "dmarc_forensic_{0}*".format(index_suffix)
|
search_index = "dmarc_forensic_{0}*".format(index_suffix)
|
||||||
@@ -558,20 +581,35 @@ def save_forensic_report_to_opensearch(
|
|||||||
if index_prefix is not None:
|
if index_prefix is not None:
|
||||||
search_index = "{0}{1}".format(index_prefix, search_index)
|
search_index = "{0}{1}".format(index_prefix, search_index)
|
||||||
search = Search(index=search_index)
|
search = Search(index=search_index)
|
||||||
arrival_query = {"match": {"arrival_date": arrival_date}}
|
q = Q(dict(match=dict(arrival_date=arrival_date_epoch_milliseconds)))
|
||||||
q = Q(arrival_query)
|
|
||||||
|
|
||||||
from_ = None
|
from_ = None
|
||||||
to_ = None
|
to_ = None
|
||||||
subject = None
|
subject = None
|
||||||
if "from" in headers:
|
if "from" in headers:
|
||||||
from_ = headers["from"]
|
# We convert the FROM header from a string list to a flat string.
|
||||||
from_query = {"match_phrase": {"sample.headers.from": from_}}
|
headers["from"] = headers["from"][0]
|
||||||
q = q & Q(from_query)
|
if headers["from"][0] == "":
|
||||||
|
headers["from"] = headers["from"][1]
|
||||||
|
else:
|
||||||
|
headers["from"] = " <".join(headers["from"]) + ">"
|
||||||
|
|
||||||
|
from_ = dict()
|
||||||
|
from_["sample.headers.from"] = headers["from"]
|
||||||
|
from_query = Q(dict(match_phrase=from_))
|
||||||
|
q = q & from_query
|
||||||
if "to" in headers:
|
if "to" in headers:
|
||||||
to_ = headers["to"]
|
# We convert the TO header from a string list to a flat string.
|
||||||
to_query = {"match_phrase": {"sample.headers.to": to_}}
|
headers["to"] = headers["to"][0]
|
||||||
q = q & Q(to_query)
|
if headers["to"][0] == "":
|
||||||
|
headers["to"] = headers["to"][1]
|
||||||
|
else:
|
||||||
|
headers["to"] = " <".join(headers["to"]) + ">"
|
||||||
|
|
||||||
|
to_ = dict()
|
||||||
|
to_["sample.headers.to"] = headers["to"]
|
||||||
|
to_query = Q(dict(match_phrase=to_))
|
||||||
|
q = q & to_query
|
||||||
if "subject" in headers:
|
if "subject" in headers:
|
||||||
subject = headers["subject"]
|
subject = headers["subject"]
|
||||||
subject_query = {"match_phrase": {"sample.headers.subject": subject}}
|
subject_query = {"match_phrase": {"sample.headers.subject": subject}}
|
||||||
@@ -585,7 +623,9 @@ def save_forensic_report_to_opensearch(
|
|||||||
"A forensic sample to {0} from {1} "
|
"A forensic sample to {0} from {1} "
|
||||||
"with a subject of {2} and arrival date of {3} "
|
"with a subject of {2} and arrival date of {3} "
|
||||||
"already exists in "
|
"already exists in "
|
||||||
"OpenSearch".format(to_, from_, subject, arrival_date_human)
|
"OpenSearch".format(
|
||||||
|
to_, from_, subject, forensic_report["arrival_date_utc"]
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
parsed_sample = forensic_report["parsed_sample"]
|
parsed_sample = forensic_report["parsed_sample"]
|
||||||
@@ -621,7 +661,7 @@ def save_forensic_report_to_opensearch(
|
|||||||
user_agent=forensic_report["user_agent"],
|
user_agent=forensic_report["user_agent"],
|
||||||
version=forensic_report["version"],
|
version=forensic_report["version"],
|
||||||
original_mail_from=forensic_report["original_mail_from"],
|
original_mail_from=forensic_report["original_mail_from"],
|
||||||
arrival_date=arrival_date,
|
arrival_date=arrival_date_epoch_milliseconds,
|
||||||
domain=forensic_report["reported_domain"],
|
domain=forensic_report["reported_domain"],
|
||||||
original_envelope_id=forensic_report["original_envelope_id"],
|
original_envelope_id=forensic_report["original_envelope_id"],
|
||||||
authentication_results=forensic_report["authentication_results"],
|
authentication_results=forensic_report["authentication_results"],
|
||||||
@@ -663,18 +703,18 @@ def save_forensic_report_to_opensearch(
|
|||||||
|
|
||||||
|
|
||||||
def save_smtp_tls_report_to_opensearch(
|
def save_smtp_tls_report_to_opensearch(
|
||||||
report,
|
report: dict[str, Any],
|
||||||
index_suffix=None,
|
index_suffix: Optional[str] = None,
|
||||||
index_prefix=None,
|
index_prefix: Optional[str] = None,
|
||||||
monthly_indexes=False,
|
monthly_indexes: bool = False,
|
||||||
number_of_shards=1,
|
number_of_shards: int = 1,
|
||||||
number_of_replicas=0,
|
number_of_replicas: int = 0,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Saves a parsed SMTP TLS report to OpenSearch
|
Saves a parsed SMTP TLS report to OpenSearch
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
report (OrderedDict): A parsed SMTP TLS report
|
report (dict): A parsed SMTP TLS report
|
||||||
index_suffix (str): The suffix of the name of the index to save to
|
index_suffix (str): The suffix of the name of the index to save to
|
||||||
index_prefix (str): The prefix of the name of the index to save to
|
index_prefix (str): The prefix of the name of the index to save to
|
||||||
monthly_indexes (bool): Use monthly indexes instead of daily indexes
|
monthly_indexes (bool): Use monthly indexes instead of daily indexes
|
||||||
@@ -684,8 +724,8 @@ def save_smtp_tls_report_to_opensearch(
|
|||||||
Raises:
|
Raises:
|
||||||
AlreadySaved
|
AlreadySaved
|
||||||
"""
|
"""
|
||||||
logger.info("Saving aggregate report to OpenSearch")
|
logger.info("Saving SMTP TLS report to OpenSearch")
|
||||||
org_name = report["org_name"]
|
org_name = report["organization_name"]
|
||||||
report_id = report["report_id"]
|
report_id = report["report_id"]
|
||||||
begin_date = human_timestamp_to_datetime(report["begin_date"], to_utc=True)
|
begin_date = human_timestamp_to_datetime(report["begin_date"], to_utc=True)
|
||||||
end_date = human_timestamp_to_datetime(report["end_date"], to_utc=True)
|
end_date = human_timestamp_to_datetime(report["end_date"], to_utc=True)
|
||||||
@@ -741,11 +781,11 @@ def save_smtp_tls_report_to_opensearch(
|
|||||||
number_of_shards=number_of_shards, number_of_replicas=number_of_replicas
|
number_of_shards=number_of_shards, number_of_replicas=number_of_replicas
|
||||||
)
|
)
|
||||||
|
|
||||||
smtp_tls_doc = _SMTPTLSFailureReportDoc(
|
smtp_tls_doc = _SMTPTLSReportDoc(
|
||||||
organization_name=report["organization_name"],
|
org_name=report["organization_name"],
|
||||||
date_range=[report["date_begin"], report["date_end"]],
|
date_range=[report["begin_date"], report["end_date"]],
|
||||||
date_begin=report["date_begin"],
|
date_begin=report["begin_date"],
|
||||||
date_end=report["date_end"],
|
date_end=report["end_date"],
|
||||||
contact_info=report["contact_info"],
|
contact_info=report["contact_info"],
|
||||||
report_id=report["report_id"],
|
report_id=report["report_id"],
|
||||||
)
|
)
|
||||||
@@ -760,32 +800,48 @@ def save_smtp_tls_report_to_opensearch(
|
|||||||
policy_doc = _SMTPTLSPolicyDoc(
|
policy_doc = _SMTPTLSPolicyDoc(
|
||||||
policy_domain=policy["policy_domain"],
|
policy_domain=policy["policy_domain"],
|
||||||
policy_type=policy["policy_type"],
|
policy_type=policy["policy_type"],
|
||||||
|
successful_session_count=policy["successful_session_count"],
|
||||||
|
failed_session_count=policy["failed_session_count"],
|
||||||
policy_string=policy_strings,
|
policy_string=policy_strings,
|
||||||
mx_host_patterns=mx_host_patterns,
|
mx_host_patterns=mx_host_patterns,
|
||||||
)
|
)
|
||||||
if "failure_details" in policy:
|
if "failure_details" in policy:
|
||||||
failure_details = policy["failure_details"]
|
for failure_detail in policy["failure_details"]:
|
||||||
receiving_mx_hostname = None
|
receiving_mx_hostname = None
|
||||||
additional_information_uri = None
|
additional_information_uri = None
|
||||||
failure_reason_code = None
|
failure_reason_code = None
|
||||||
if "receiving_mx_hostname" in failure_details:
|
ip_address = None
|
||||||
receiving_mx_hostname = failure_details["receiving_mx_hostname"]
|
receiving_ip = None
|
||||||
if "additional_information_uri" in failure_details:
|
receiving_mx_helo = None
|
||||||
additional_information_uri = failure_details[
|
sending_mta_ip = None
|
||||||
"additional_information_uri"
|
|
||||||
]
|
if "receiving_mx_hostname" in failure_detail:
|
||||||
if "failure_reason_code" in failure_details:
|
receiving_mx_hostname = failure_detail["receiving_mx_hostname"]
|
||||||
failure_reason_code = failure_details["failure_reason_code"]
|
if "additional_information_uri" in failure_detail:
|
||||||
policy_doc.add_failure_details(
|
additional_information_uri = failure_detail[
|
||||||
result_type=failure_details["result_type"],
|
"additional_information_uri"
|
||||||
ip_address=failure_details["ip_address"],
|
]
|
||||||
receiving_ip=failure_details["receiving_ip"],
|
if "failure_reason_code" in failure_detail:
|
||||||
receiving_mx_helo=failure_details["receiving_mx_helo"],
|
failure_reason_code = failure_detail["failure_reason_code"]
|
||||||
failed_session_count=failure_details["failed_session_count"],
|
if "ip_address" in failure_detail:
|
||||||
receiving_mx_hostname=receiving_mx_hostname,
|
ip_address = failure_detail["ip_address"]
|
||||||
additional_information_uri=additional_information_uri,
|
if "receiving_ip" in failure_detail:
|
||||||
failure_reason_code=failure_reason_code,
|
receiving_ip = failure_detail["receiving_ip"]
|
||||||
)
|
if "receiving_mx_helo" in failure_detail:
|
||||||
|
receiving_mx_helo = failure_detail["receiving_mx_helo"]
|
||||||
|
if "sending_mta_ip" in failure_detail:
|
||||||
|
sending_mta_ip = failure_detail["sending_mta_ip"]
|
||||||
|
policy_doc.add_failure_details(
|
||||||
|
result_type=failure_detail["result_type"],
|
||||||
|
ip_address=ip_address,
|
||||||
|
receiving_ip=receiving_ip,
|
||||||
|
receiving_mx_helo=receiving_mx_helo,
|
||||||
|
failed_session_count=failure_detail["failed_session_count"],
|
||||||
|
sending_mta_ip=sending_mta_ip,
|
||||||
|
receiving_mx_hostname=receiving_mx_hostname,
|
||||||
|
additional_information_uri=additional_information_uri,
|
||||||
|
failure_reason_code=failure_reason_code,
|
||||||
|
)
|
||||||
smtp_tls_doc.policies.append(policy_doc)
|
smtp_tls_doc.policies.append(policy_doc)
|
||||||
|
|
||||||
create_indexes([index], index_settings)
|
create_indexes([index], index_settings)
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
# About
|
# About
|
||||||
|
|
||||||
`dbip-country-lite.mmdb` is provided by [dbip][dbip] under a
|
`dbip-country-lite.mmdb` is provided by [dbip][dbip] under a
|
||||||
[ Creative Commons Attribution 4.0 International License][cc].
|
[Creative Commons Attribution 4.0 International License][cc].
|
||||||
|
|
||||||
[dbip]: https://db-ip.com/db/lite.php
|
[dbip]: https://db-ip.com/db/download/ip-to-country-lite
|
||||||
[cc]: http://creativecommons.org/licenses/by/4.0/
|
[cc]: http://creativecommons.org/licenses/by/4.0/
|
||||||
|
|||||||
Binary file not shown.
@@ -3,6 +3,8 @@
|
|||||||
A mapping is meant to make it easier to identify who or what a sending source is. Please consider contributing
|
A mapping is meant to make it easier to identify who or what a sending source is. Please consider contributing
|
||||||
additional mappings in a GitHub Pull Request.
|
additional mappings in a GitHub Pull Request.
|
||||||
|
|
||||||
|
Do not open these CSV files in Excel. It will replace Unicode characters with question marks. Use LibreOffice Calc instead.
|
||||||
|
|
||||||
## base_reverse_dns_map.csv
|
## base_reverse_dns_map.csv
|
||||||
|
|
||||||
A CSV file with three fields: `base_reverse_dns`, `name`, and `type`.
|
A CSV file with three fields: `base_reverse_dns`, `name`, and `type`.
|
||||||
@@ -19,33 +21,72 @@ The `service_type` is based on the following rule precedence:
|
|||||||
3. All telecommunications providers that offer internet access are identified as `ISP`, even if they also offer other services, such as web hosting or email hosting.
|
3. All telecommunications providers that offer internet access are identified as `ISP`, even if they also offer other services, such as web hosting or email hosting.
|
||||||
4. All web hosting providers are identified as `Web Hosting`, even if the service also offers email hosting.
|
4. All web hosting providers are identified as `Web Hosting`, even if the service also offers email hosting.
|
||||||
5. All email account providers are identified as `Email Provider`, no matter how or where they are hosted
|
5. All email account providers are identified as `Email Provider`, no matter how or where they are hosted
|
||||||
6. All legitimate platforms offering their Software as a Service SaaS) are identified as `SaaS`, regardless of industry. This helps simplify metrics.
|
6. All legitimate platforms offering their Software as a Service (SaaS) are identified as `SaaS`, regardless of industry. This helps simplify metrics.
|
||||||
7. All other senders that use their own domain as a Reverse DNS base domain should be identified based on their industry
|
7. All other senders that use their own domain as a Reverse DNS base domain should be identified based on their industry
|
||||||
|
|
||||||
|
- Agriculture
|
||||||
|
- Automotive
|
||||||
|
- Beauty
|
||||||
|
- Conglomerate
|
||||||
|
- Construction
|
||||||
|
- Consulting
|
||||||
|
- Defense
|
||||||
|
- Education
|
||||||
- Email Provider
|
- Email Provider
|
||||||
- Email Security
|
- Email Security
|
||||||
- Education
|
|
||||||
- Entertainment
|
- Entertainment
|
||||||
|
- Event Planning
|
||||||
- Finance
|
- Finance
|
||||||
- Food
|
- Food
|
||||||
- Government
|
- Government
|
||||||
- Government Media
|
- Government Media
|
||||||
- Healthcare
|
- Healthcare
|
||||||
|
- IaaS
|
||||||
- Industrial
|
- Industrial
|
||||||
- ISP
|
- ISP
|
||||||
|
- Legal
|
||||||
- Logistics
|
- Logistics
|
||||||
|
- Manufacturing
|
||||||
- Marketing
|
- Marketing
|
||||||
- MSP
|
- MSP
|
||||||
|
- MSSP
|
||||||
|
- News
|
||||||
- Nonprofit
|
- Nonprofit
|
||||||
|
- PaaS
|
||||||
|
- Photography
|
||||||
|
- Physical Security
|
||||||
- Print
|
- Print
|
||||||
|
- Publishing
|
||||||
- Real Estate
|
- Real Estate
|
||||||
- Retail
|
- Retail
|
||||||
- SaaS
|
- SaaS
|
||||||
|
- Science
|
||||||
|
- Search Engine
|
||||||
- Social Media
|
- Social Media
|
||||||
|
- Sports
|
||||||
|
- Staffing
|
||||||
- Technology
|
- Technology
|
||||||
- Travel
|
- Travel
|
||||||
- Web Host
|
- Web Host
|
||||||
|
|
||||||
The file currently contains over 600 mappings from a wide variety of email sending services, including large email
|
The file currently contains over 1,400 mappings from a wide variety of email sending sources.
|
||||||
providers, SaaS platforms, small web hosts, and healthcare companies. Ideally this mapping will continuously grow to
|
|
||||||
include many other services and industries.
|
## known_unknown_base_reverse_dns.txt
|
||||||
|
|
||||||
|
A list of reverse DNS base domains that could not be identified as belonging to a particular organization, service, or industry.
|
||||||
|
|
||||||
|
## base_reverse_dns.csv
|
||||||
|
|
||||||
|
A CSV with the fields `source_name` and optionally `message_count`. This CSV can be generated by exporting the base DNS data from the Kibana or Splunk dashboards provided by parsedmarc. This file is not tracked by Git.
|
||||||
|
|
||||||
|
## unknown_base_reverse_dns.csv
|
||||||
|
|
||||||
|
A CSV file with the fields `source_name` and `message_count`. This file is not tracked by Git.
|
||||||
|
|
||||||
|
## find_bad_utf8.py
|
||||||
|
|
||||||
|
Locates invalid UTF-8 bytes in files and optionally tries to current them. Generated by GPT5. Helped me find where I had introduced invalid bytes in `base_reverse_dns_map.csv`.
|
||||||
|
|
||||||
|
## find_unknown_base_reverse_dns.py
|
||||||
|
|
||||||
|
This is a python script that reads the domains in `base_reverse_dns.csv` and writes the domains that are not in `base_reverse_dns_map.csv` or `known_unknown_base_reverse_dns.txt` to `unknown_base_reverse_dns.csv`. This is useful for identifying potential additional domains to contribute to `base_reverse_dns_map.csv` and `known_unknown_base_reverse_dns.txt`.
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
44
parsedmarc/resources/maps/base_reverse_dns_types.txt
Normal file
44
parsedmarc/resources/maps/base_reverse_dns_types.txt
Normal file
@@ -0,0 +1,44 @@
|
|||||||
|
Agriculture
|
||||||
|
Automotive
|
||||||
|
Beauty
|
||||||
|
Conglomerate
|
||||||
|
Construction
|
||||||
|
Consulting
|
||||||
|
Defense
|
||||||
|
Education
|
||||||
|
Email Provider
|
||||||
|
Email Security
|
||||||
|
Entertainment
|
||||||
|
Event Planning
|
||||||
|
Finance
|
||||||
|
Food
|
||||||
|
Government
|
||||||
|
Government Media
|
||||||
|
Healthcare
|
||||||
|
ISP
|
||||||
|
IaaS
|
||||||
|
Industrial
|
||||||
|
Legal
|
||||||
|
Logistics
|
||||||
|
MSP
|
||||||
|
MSSP
|
||||||
|
Manufacturing
|
||||||
|
Marketing
|
||||||
|
News
|
||||||
|
Nonprofit
|
||||||
|
PaaS
|
||||||
|
Photography
|
||||||
|
Physical Security
|
||||||
|
Print
|
||||||
|
Publishing
|
||||||
|
Real Estate
|
||||||
|
Retail
|
||||||
|
SaaS
|
||||||
|
Science
|
||||||
|
Search Engine
|
||||||
|
Social Media
|
||||||
|
Sports
|
||||||
|
Staffing
|
||||||
|
Technology
|
||||||
|
Travel
|
||||||
|
Web Host
|
||||||
488
parsedmarc/resources/maps/find_bad_utf8.py
Executable file
488
parsedmarc/resources/maps/find_bad_utf8.py
Executable file
@@ -0,0 +1,488 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import codecs
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import shutil
|
||||||
|
from typing import List, Tuple
|
||||||
|
|
||||||
|
"""
|
||||||
|
Locates and optionally corrects bad UTF-8 bytes in a file.
|
||||||
|
Generated by GPT-5 Use at your own risk.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# -------------------------
|
||||||
|
# UTF-8 scanning
|
||||||
|
# -------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def scan_line_for_utf8_errors(
|
||||||
|
line_bytes: bytes, line_no: int, base_offset: int, context: int
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Scan one line of raw bytes for UTF-8 decoding errors.
|
||||||
|
Returns a list of dicts describing each error.
|
||||||
|
"""
|
||||||
|
pos = 0
|
||||||
|
results = []
|
||||||
|
while pos < len(line_bytes):
|
||||||
|
dec = codecs.getincrementaldecoder("utf-8")("strict")
|
||||||
|
try:
|
||||||
|
dec.decode(line_bytes[pos:], final=True)
|
||||||
|
break
|
||||||
|
except UnicodeDecodeError as e:
|
||||||
|
rel_index = e.start
|
||||||
|
abs_index_in_line = pos + rel_index
|
||||||
|
abs_offset = base_offset + abs_index_in_line
|
||||||
|
|
||||||
|
start_ctx = max(0, abs_index_in_line - context)
|
||||||
|
end_ctx = min(len(line_bytes), abs_index_in_line + 1 + context)
|
||||||
|
ctx_bytes = line_bytes[start_ctx:end_ctx]
|
||||||
|
bad_byte = line_bytes[abs_index_in_line : abs_index_in_line + 1]
|
||||||
|
col = abs_index_in_line + 1 # 1-based byte column
|
||||||
|
|
||||||
|
results.append(
|
||||||
|
{
|
||||||
|
"line": line_no,
|
||||||
|
"column": col,
|
||||||
|
"abs_offset": abs_offset,
|
||||||
|
"bad_byte_hex": bad_byte.hex(),
|
||||||
|
"context_hex": ctx_bytes.hex(),
|
||||||
|
"context_preview": ctx_bytes.decode("utf-8", errors="replace"),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
# Move past the offending byte and continue
|
||||||
|
pos = abs_index_in_line + 1
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
def scan_file_for_utf8_errors(path: str, context: int, limit: int):
|
||||||
|
errors_found = 0
|
||||||
|
limit_val = limit if limit != 0 else float("inf")
|
||||||
|
|
||||||
|
with open(path, "rb") as f:
|
||||||
|
total_offset = 0
|
||||||
|
line_no = 0
|
||||||
|
while True:
|
||||||
|
line = f.readline()
|
||||||
|
if not line:
|
||||||
|
break
|
||||||
|
line_no += 1
|
||||||
|
results = scan_line_for_utf8_errors(line, line_no, total_offset, context)
|
||||||
|
for r in results:
|
||||||
|
errors_found += 1
|
||||||
|
print(
|
||||||
|
f"[ERROR {errors_found}] Line {r['line']}, Column {r['column']}, "
|
||||||
|
f"Absolute byte offset {r['abs_offset']}"
|
||||||
|
)
|
||||||
|
print(f" Bad byte: 0x{r['bad_byte_hex']}")
|
||||||
|
print(f" Context (hex): {r['context_hex']}")
|
||||||
|
print(f" Context (preview): {r['context_preview']}")
|
||||||
|
print()
|
||||||
|
if errors_found >= limit_val:
|
||||||
|
print(f"Reached limit of {limit} errors. Stopping.")
|
||||||
|
return errors_found
|
||||||
|
total_offset += len(line)
|
||||||
|
|
||||||
|
if errors_found == 0:
|
||||||
|
print("No invalid UTF-8 bytes found. 🎉")
|
||||||
|
else:
|
||||||
|
print(f"Found {errors_found} invalid UTF-8 byte(s).")
|
||||||
|
return errors_found
|
||||||
|
|
||||||
|
|
||||||
|
# -------------------------
|
||||||
|
# Whole-file conversion
|
||||||
|
# -------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def detect_encoding_text(path: str) -> Tuple[str, str]:
|
||||||
|
"""
|
||||||
|
Use charset-normalizer to detect file encoding.
|
||||||
|
Return (encoding_name, decoded_text). Falls back to cp1252 if needed.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
from charset_normalizer import from_path
|
||||||
|
except ImportError:
|
||||||
|
print(
|
||||||
|
"Please install charset-normalizer: pip install charset-normalizer",
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
|
sys.exit(4)
|
||||||
|
|
||||||
|
matches = from_path(path)
|
||||||
|
match = matches.best()
|
||||||
|
if match is None or match.encoding is None:
|
||||||
|
# Fallback heuristic for Western single-byte text
|
||||||
|
with open(path, "rb") as fb:
|
||||||
|
data = fb.read()
|
||||||
|
try:
|
||||||
|
return "cp1252", data.decode("cp1252", errors="strict")
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
print("Unable to detect encoding reliably.", file=sys.stderr)
|
||||||
|
sys.exit(5)
|
||||||
|
|
||||||
|
return match.encoding, str(match)
|
||||||
|
|
||||||
|
|
||||||
|
def convert_to_utf8(src_path: str, out_path: str, src_encoding: str = None) -> str:
|
||||||
|
"""
|
||||||
|
Convert an entire file to UTF-8 (re-decoding everything).
|
||||||
|
If src_encoding is provided, use it; else auto-detect.
|
||||||
|
Returns the encoding actually used.
|
||||||
|
"""
|
||||||
|
if src_encoding:
|
||||||
|
with open(src_path, "rb") as fb:
|
||||||
|
data = fb.read()
|
||||||
|
try:
|
||||||
|
text = data.decode(src_encoding, errors="strict")
|
||||||
|
except LookupError:
|
||||||
|
print(f"Unknown encoding: {src_encoding}", file=sys.stderr)
|
||||||
|
sys.exit(6)
|
||||||
|
except UnicodeDecodeError as e:
|
||||||
|
print(f"Decoding failed with {src_encoding}: {e}", file=sys.stderr)
|
||||||
|
sys.exit(7)
|
||||||
|
used = src_encoding
|
||||||
|
else:
|
||||||
|
used, text = detect_encoding_text(src_path)
|
||||||
|
|
||||||
|
with open(out_path, "w", encoding="utf-8", newline="") as fw:
|
||||||
|
fw.write(text)
|
||||||
|
return used
|
||||||
|
|
||||||
|
|
||||||
|
def verify_utf8_file(path: str) -> Tuple[bool, str]:
|
||||||
|
try:
|
||||||
|
with open(path, "rb") as fb:
|
||||||
|
fb.read().decode("utf-8", errors="strict")
|
||||||
|
return True, ""
|
||||||
|
except UnicodeDecodeError as e:
|
||||||
|
return False, str(e)
|
||||||
|
|
||||||
|
|
||||||
|
# -------------------------
|
||||||
|
# Targeted single-byte fixer
|
||||||
|
# -------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def iter_lines_with_offsets(b: bytes):
|
||||||
|
"""
|
||||||
|
Yield (line_bytes, line_start_abs_offset). Preserves LF/CRLF/CR in bytes.
|
||||||
|
"""
|
||||||
|
start = 0
|
||||||
|
for i, byte in enumerate(b):
|
||||||
|
if byte == 0x0A: # LF
|
||||||
|
yield b[start : i + 1], start
|
||||||
|
start = i + 1
|
||||||
|
if start < len(b):
|
||||||
|
yield b[start:], start
|
||||||
|
|
||||||
|
|
||||||
|
def detect_probable_fallbacks() -> List[str]:
|
||||||
|
# Good defaults for Western/Portuguese text
|
||||||
|
return ["cp1252", "iso-8859-1", "iso-8859-15"]
|
||||||
|
|
||||||
|
|
||||||
|
def repair_mixed_utf8_line(line: bytes, base_offset: int, fallback_chain: List[str]):
|
||||||
|
"""
|
||||||
|
Strictly validate UTF-8 and fix *only* the exact offending byte when an error occurs.
|
||||||
|
This avoids touching adjacent valid UTF-8 (prevents mojibake like 'é').
|
||||||
|
"""
|
||||||
|
out_fragments: List[str] = []
|
||||||
|
fixes = []
|
||||||
|
pos = 0
|
||||||
|
n = len(line)
|
||||||
|
|
||||||
|
while pos < n:
|
||||||
|
dec = codecs.getincrementaldecoder("utf-8")("strict")
|
||||||
|
try:
|
||||||
|
s = dec.decode(line[pos:], final=True)
|
||||||
|
out_fragments.append(s)
|
||||||
|
break
|
||||||
|
except UnicodeDecodeError as e:
|
||||||
|
# Append the valid prefix before the error
|
||||||
|
if e.start > 0:
|
||||||
|
out_fragments.append(
|
||||||
|
line[pos : pos + e.start].decode("utf-8", errors="strict")
|
||||||
|
)
|
||||||
|
|
||||||
|
bad_index = pos + e.start # absolute index in 'line'
|
||||||
|
bad_slice = line[bad_index : bad_index + 1] # FIX EXACTLY ONE BYTE
|
||||||
|
|
||||||
|
# Decode that single byte using the first working fallback
|
||||||
|
decoded = None
|
||||||
|
used_enc = None
|
||||||
|
for enc in fallback_chain:
|
||||||
|
try:
|
||||||
|
decoded = bad_slice.decode(enc, errors="strict")
|
||||||
|
used_enc = enc
|
||||||
|
break
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
if decoded is None:
|
||||||
|
# latin-1 always succeeds (byte->same code point)
|
||||||
|
decoded = bad_slice.decode("latin-1")
|
||||||
|
used_enc = "latin-1 (fallback)"
|
||||||
|
|
||||||
|
out_fragments.append(decoded)
|
||||||
|
|
||||||
|
# Log the fix
|
||||||
|
col_1based = bad_index + 1 # byte-based column
|
||||||
|
fixes.append(
|
||||||
|
{
|
||||||
|
"line_base_offset": base_offset,
|
||||||
|
"line": None, # caller fills line number
|
||||||
|
"column": col_1based,
|
||||||
|
"abs_offset": base_offset + bad_index,
|
||||||
|
"bad_bytes_hex": bad_slice.hex(),
|
||||||
|
"used_encoding": used_enc,
|
||||||
|
"replacement_preview": decoded,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
# Advance exactly one byte past the offending byte and continue
|
||||||
|
pos = bad_index + 1
|
||||||
|
|
||||||
|
return "".join(out_fragments), fixes
|
||||||
|
|
||||||
|
|
||||||
|
def targeted_fix_to_utf8(
|
||||||
|
src_path: str,
|
||||||
|
out_path: str,
|
||||||
|
fallback_chain: List[str],
|
||||||
|
dry_run: bool,
|
||||||
|
max_fixes: int,
|
||||||
|
):
|
||||||
|
with open(src_path, "rb") as fb:
|
||||||
|
data = fb.read()
|
||||||
|
|
||||||
|
total_fixes = 0
|
||||||
|
repaired_lines: List[str] = []
|
||||||
|
line_no = 0
|
||||||
|
max_val = max_fixes if max_fixes != 0 else float("inf")
|
||||||
|
|
||||||
|
for line_bytes, base_offset in iter_lines_with_offsets(data):
|
||||||
|
line_no += 1
|
||||||
|
# Fast path: keep lines that are already valid UTF-8
|
||||||
|
try:
|
||||||
|
repaired_lines.append(line_bytes.decode("utf-8", errors="strict"))
|
||||||
|
continue
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
fixed_text, fixes = repair_mixed_utf8_line(
|
||||||
|
line_bytes, base_offset, fallback_chain=fallback_chain
|
||||||
|
)
|
||||||
|
for f in fixes:
|
||||||
|
f["line"] = line_no
|
||||||
|
|
||||||
|
repaired_lines.append(fixed_text)
|
||||||
|
|
||||||
|
# Log fixes
|
||||||
|
for f in fixes:
|
||||||
|
total_fixes += 1
|
||||||
|
print(
|
||||||
|
f"[FIX {total_fixes}] Line {f['line']}, Column {f['column']}, Abs offset {f['abs_offset']}"
|
||||||
|
)
|
||||||
|
print(f" Bad bytes: 0x{f['bad_bytes_hex']}")
|
||||||
|
print(f" Used encoding: {f['used_encoding']}")
|
||||||
|
preview = f["replacement_preview"].replace("\r", "\\r").replace("\n", "\\n")
|
||||||
|
if len(preview) > 40:
|
||||||
|
preview = preview[:40] + "…"
|
||||||
|
print(f" Replacement preview: {preview}")
|
||||||
|
print()
|
||||||
|
if total_fixes >= max_val:
|
||||||
|
print(f"Reached max fixes limit ({max_fixes}). Stopping scan.")
|
||||||
|
break
|
||||||
|
if total_fixes >= max_val:
|
||||||
|
break
|
||||||
|
|
||||||
|
if dry_run:
|
||||||
|
print(f"Dry run complete. Detected {total_fixes} fix(es). No file written.")
|
||||||
|
return total_fixes
|
||||||
|
|
||||||
|
# Join and verify result can be encoded to UTF-8
|
||||||
|
repaired_text = "".join(repaired_lines)
|
||||||
|
try:
|
||||||
|
repaired_text.encode("utf-8", errors="strict")
|
||||||
|
except UnicodeEncodeError as e:
|
||||||
|
print(f"Internal error: repaired text not valid UTF-8: {e}", file=sys.stderr)
|
||||||
|
sys.exit(3)
|
||||||
|
|
||||||
|
with open(out_path, "w", encoding="utf-8", newline="") as fw:
|
||||||
|
fw.write(repaired_text)
|
||||||
|
|
||||||
|
print(f"Fixed file written to: {out_path}")
|
||||||
|
print(f"Total fixes applied: {total_fixes}")
|
||||||
|
return total_fixes
|
||||||
|
|
||||||
|
|
||||||
|
# -------------------------
|
||||||
|
# CLI
|
||||||
|
# -------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
ap = argparse.ArgumentParser(
|
||||||
|
description=(
|
||||||
|
"Scan for invalid UTF-8; optionally convert whole file or fix only invalid bytes.\n\n"
|
||||||
|
"By default, --convert and --fix **edit the input file in place** and create a backup "
|
||||||
|
"named '<input>.bak' before writing. If you pass --output, the original file is left "
|
||||||
|
"unchanged and no backup is created. Use --dry-run to preview fixes without writing."
|
||||||
|
),
|
||||||
|
formatter_class=argparse.RawTextHelpFormatter,
|
||||||
|
)
|
||||||
|
ap.add_argument("path", help="Path to the CSV/text file")
|
||||||
|
ap.add_argument(
|
||||||
|
"--context",
|
||||||
|
type=int,
|
||||||
|
default=20,
|
||||||
|
help="Bytes of context to show around errors (default: 20)",
|
||||||
|
)
|
||||||
|
ap.add_argument(
|
||||||
|
"--limit",
|
||||||
|
type=int,
|
||||||
|
default=100,
|
||||||
|
help="Max errors to report during scan (0 = unlimited)",
|
||||||
|
)
|
||||||
|
ap.add_argument(
|
||||||
|
"--skip-scan", action="store_true", help="Skip initial scan for speed"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Whole-file convert
|
||||||
|
ap.add_argument(
|
||||||
|
"--convert",
|
||||||
|
action="store_true",
|
||||||
|
help="Convert entire file to UTF-8 using auto/forced encoding "
|
||||||
|
"(in-place by default; creates '<input>.bak').",
|
||||||
|
)
|
||||||
|
ap.add_argument(
|
||||||
|
"--encoding",
|
||||||
|
help="Force source encoding for --convert or first fallback for --fix",
|
||||||
|
)
|
||||||
|
ap.add_argument(
|
||||||
|
"--output",
|
||||||
|
help="Write to this path instead of in-place (no .bak is created in that case)",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Targeted fix
|
||||||
|
ap.add_argument(
|
||||||
|
"--fix",
|
||||||
|
action="store_true",
|
||||||
|
help="Fix only invalid byte(s) via fallback encodings "
|
||||||
|
"(in-place by default; creates '<input>.bak').",
|
||||||
|
)
|
||||||
|
ap.add_argument(
|
||||||
|
"--fallbacks",
|
||||||
|
help="Comma-separated fallback encodings (default: cp1252,iso-8859-1,iso-8859-15)",
|
||||||
|
)
|
||||||
|
ap.add_argument(
|
||||||
|
"--dry-run",
|
||||||
|
action="store_true",
|
||||||
|
help="(fix) Print fixes but do not write or create a .bak",
|
||||||
|
)
|
||||||
|
ap.add_argument(
|
||||||
|
"--max-fixes",
|
||||||
|
type=int,
|
||||||
|
default=0,
|
||||||
|
help="(fix) Stop after N fixes (0 = unlimited)",
|
||||||
|
)
|
||||||
|
|
||||||
|
args = ap.parse_args()
|
||||||
|
path = args.path
|
||||||
|
|
||||||
|
if not os.path.isfile(path):
|
||||||
|
print(f"File not found: {path}", file=sys.stderr)
|
||||||
|
sys.exit(2)
|
||||||
|
|
||||||
|
# Optional scan first
|
||||||
|
if not args.skip_scan:
|
||||||
|
scan_file_for_utf8_errors(path, context=args.context, limit=args.limit)
|
||||||
|
|
||||||
|
# Mode selection guards
|
||||||
|
if args.convert and args.fix:
|
||||||
|
print("Choose either --convert or --fix (not both).", file=sys.stderr)
|
||||||
|
sys.exit(9)
|
||||||
|
if not args.convert and not args.fix and args.skip_scan:
|
||||||
|
print("No action selected (use --convert or --fix).")
|
||||||
|
return
|
||||||
|
if not args.convert and not args.fix:
|
||||||
|
# User only wanted a scan
|
||||||
|
return
|
||||||
|
|
||||||
|
# Determine output path and backup behavior
|
||||||
|
# In-place by default: create '<input>.bak' before overwriting.
|
||||||
|
if args.output:
|
||||||
|
out_path = args.output
|
||||||
|
in_place = False
|
||||||
|
else:
|
||||||
|
out_path = path
|
||||||
|
in_place = True
|
||||||
|
|
||||||
|
# CONVERT mode
|
||||||
|
if args.convert:
|
||||||
|
print("\n[CONVERT MODE] Converting file to UTF-8...")
|
||||||
|
if in_place:
|
||||||
|
# Create backup before overwriting original
|
||||||
|
backup_path = path + ".bak"
|
||||||
|
shutil.copy2(path, backup_path)
|
||||||
|
print(f"Backup created: {backup_path}")
|
||||||
|
used = convert_to_utf8(path, out_path, src_encoding=args.encoding)
|
||||||
|
print(f"Source encoding used: {used}")
|
||||||
|
print(f"Saved UTF-8 file as: {out_path}")
|
||||||
|
ok, err = verify_utf8_file(out_path)
|
||||||
|
if ok:
|
||||||
|
print("Verification: output is valid UTF-8 ✅")
|
||||||
|
else:
|
||||||
|
print(f"Verification failed: {err}")
|
||||||
|
sys.exit(8)
|
||||||
|
return
|
||||||
|
|
||||||
|
# FIX mode (targeted, single-byte)
|
||||||
|
if args.fix:
|
||||||
|
print("\n[FIX MODE] Fixing only invalid bytes to UTF-8...")
|
||||||
|
if args.dry_run:
|
||||||
|
# Dry-run: never write or create backup
|
||||||
|
out_path_effective = os.devnull
|
||||||
|
in_place_effective = False
|
||||||
|
else:
|
||||||
|
out_path_effective = out_path
|
||||||
|
in_place_effective = in_place
|
||||||
|
|
||||||
|
# Build fallback chain (if --encoding provided, try it first)
|
||||||
|
if args.fallbacks:
|
||||||
|
fallback_chain = [e.strip() for e in args.fallbacks.split(",") if e.strip()]
|
||||||
|
else:
|
||||||
|
fallback_chain = detect_probable_fallbacks()
|
||||||
|
if args.encoding and args.encoding not in fallback_chain:
|
||||||
|
fallback_chain = [args.encoding] + fallback_chain
|
||||||
|
|
||||||
|
if in_place_effective:
|
||||||
|
# Create backup before overwriting original (only when actually writing)
|
||||||
|
backup_path = path + ".bak"
|
||||||
|
shutil.copy2(path, backup_path)
|
||||||
|
print(f"Backup created: {backup_path}")
|
||||||
|
|
||||||
|
fix_count = targeted_fix_to_utf8(
|
||||||
|
path,
|
||||||
|
out_path_effective,
|
||||||
|
fallback_chain=fallback_chain,
|
||||||
|
dry_run=args.dry_run,
|
||||||
|
max_fixes=args.max_fixes,
|
||||||
|
)
|
||||||
|
|
||||||
|
if not args.dry_run:
|
||||||
|
ok, err = verify_utf8_file(out_path_effective)
|
||||||
|
if ok:
|
||||||
|
print("Verification: output is valid UTF-8 ✅")
|
||||||
|
print(f"Fix mode completed — {fix_count} byte(s) corrected.")
|
||||||
|
else:
|
||||||
|
print(f"Verification failed: {err}")
|
||||||
|
sys.exit(8)
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
78
parsedmarc/resources/maps/find_unknown_base_reverse_dns.py
Executable file
78
parsedmarc/resources/maps/find_unknown_base_reverse_dns.py
Executable file
@@ -0,0 +1,78 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
import os
|
||||||
|
import csv
|
||||||
|
|
||||||
|
|
||||||
|
def _main():
|
||||||
|
input_csv_file_path = "base_reverse_dns.csv"
|
||||||
|
base_reverse_dns_map_file_path = "base_reverse_dns_map.csv"
|
||||||
|
known_unknown_list_file_path = "known_unknown_base_reverse_dns.txt"
|
||||||
|
psl_overrides_file_path = "psl_overrides.txt"
|
||||||
|
output_csv_file_path = "unknown_base_reverse_dns.csv"
|
||||||
|
|
||||||
|
csv_headers = ["source_name", "message_count"]
|
||||||
|
|
||||||
|
known_unknown_domains = []
|
||||||
|
psl_overrides = []
|
||||||
|
known_domains = []
|
||||||
|
output_rows = []
|
||||||
|
|
||||||
|
def load_list(file_path, list_var):
|
||||||
|
if not os.path.exists(file_path):
|
||||||
|
print(f"Error: {file_path} does not exist")
|
||||||
|
print(f"Loading {file_path}")
|
||||||
|
with open(file_path) as f:
|
||||||
|
for line in f.readlines():
|
||||||
|
domain = line.lower().strip()
|
||||||
|
if domain in list_var:
|
||||||
|
print(f"Error: {domain} is in {file_path} multiple times")
|
||||||
|
exit(1)
|
||||||
|
elif domain != "":
|
||||||
|
list_var.append(domain)
|
||||||
|
|
||||||
|
load_list(known_unknown_list_file_path, known_unknown_domains)
|
||||||
|
load_list(psl_overrides_file_path, psl_overrides)
|
||||||
|
if not os.path.exists(base_reverse_dns_map_file_path):
|
||||||
|
print(f"Error: {base_reverse_dns_map_file_path} does not exist")
|
||||||
|
print(f"Loading {base_reverse_dns_map_file_path}")
|
||||||
|
with open(base_reverse_dns_map_file_path) as f:
|
||||||
|
for row in csv.DictReader(f):
|
||||||
|
domain = row["base_reverse_dns"].lower().strip()
|
||||||
|
if domain in known_domains:
|
||||||
|
print(
|
||||||
|
f"Error: {domain} is in {base_reverse_dns_map_file_path} multiple times"
|
||||||
|
)
|
||||||
|
exit()
|
||||||
|
else:
|
||||||
|
known_domains.append(domain)
|
||||||
|
if domain in known_unknown_domains and known_domains:
|
||||||
|
print(
|
||||||
|
f"Error:{domain} is in {known_unknown_list_file_path} and \
|
||||||
|
{base_reverse_dns_map_file_path}"
|
||||||
|
)
|
||||||
|
exit(1)
|
||||||
|
if not os.path.exists(input_csv_file_path):
|
||||||
|
print(f"Error: {base_reverse_dns_map_file_path} does not exist")
|
||||||
|
exit(1)
|
||||||
|
with open(input_csv_file_path) as f:
|
||||||
|
for row in csv.DictReader(f):
|
||||||
|
domain = row["source_name"].lower().strip()
|
||||||
|
if domain == "":
|
||||||
|
continue
|
||||||
|
for psl_domain in psl_overrides:
|
||||||
|
if domain.endswith(psl_domain):
|
||||||
|
domain = psl_domain.strip(".").strip("-")
|
||||||
|
break
|
||||||
|
if domain not in known_domains and domain not in known_unknown_domains:
|
||||||
|
print(f"New unknown domain found: {domain}")
|
||||||
|
output_rows.append(row)
|
||||||
|
print(f"Writing {output_csv_file_path}")
|
||||||
|
with open(output_csv_file_path, "w") as f:
|
||||||
|
writer = csv.DictWriter(f, fieldnames=csv_headers)
|
||||||
|
writer.writeheader()
|
||||||
|
writer.writerows(output_rows)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
_main()
|
||||||
601
parsedmarc/resources/maps/known_unknown_base_reverse_dns.txt
Normal file
601
parsedmarc/resources/maps/known_unknown_base_reverse_dns.txt
Normal file
@@ -0,0 +1,601 @@
|
|||||||
|
1jli.site
|
||||||
|
26.107
|
||||||
|
444qcuhilla.com
|
||||||
|
4xr1.com
|
||||||
|
9services.com
|
||||||
|
a7e.ru
|
||||||
|
a94434500-blog.com
|
||||||
|
aams8.jp
|
||||||
|
abv-10.top
|
||||||
|
acemail.co.in
|
||||||
|
activaicon.com
|
||||||
|
adcritic.net
|
||||||
|
adlucrumnewsletter.com
|
||||||
|
admin.corpivensa.gob.ve
|
||||||
|
advantageiq.com
|
||||||
|
advrider.ro
|
||||||
|
aerospacevitro.us.com
|
||||||
|
agenturserver.de
|
||||||
|
aghories.com
|
||||||
|
ai270.net
|
||||||
|
albagroup-eg.com
|
||||||
|
alchemy.net
|
||||||
|
alohabeachcamp.net
|
||||||
|
alsiscad.com
|
||||||
|
aluminumpipetubing.com
|
||||||
|
americanstorageca.com
|
||||||
|
amplusserver.info
|
||||||
|
anchorfundhub.com
|
||||||
|
anglishment.com
|
||||||
|
anteldata.net.uy
|
||||||
|
antis.edu
|
||||||
|
antonaoll.com
|
||||||
|
anviklass.org
|
||||||
|
anwrgrp.lat
|
||||||
|
aosau.net
|
||||||
|
arandomserver.com
|
||||||
|
aransk.ru
|
||||||
|
ardcs.cn
|
||||||
|
armninl.met
|
||||||
|
as29550.net
|
||||||
|
asahachimaru.com
|
||||||
|
aserv.co.za
|
||||||
|
asmecam.it
|
||||||
|
ateky.net.br
|
||||||
|
aurelienvos.com
|
||||||
|
automatech.lat
|
||||||
|
avistaadvantage.com
|
||||||
|
b8sales.com
|
||||||
|
bahjs.com
|
||||||
|
baliaura.com
|
||||||
|
banaras.co
|
||||||
|
bearandbullmarketnews.com
|
||||||
|
bestinvestingtime.com
|
||||||
|
bhjui.com
|
||||||
|
biocorp.com
|
||||||
|
biosophy.net
|
||||||
|
bitter-echo.com
|
||||||
|
bizhostingservices.com
|
||||||
|
blguss.com
|
||||||
|
bluenet.ch
|
||||||
|
bluhosting.com
|
||||||
|
bnasg.com
|
||||||
|
bodiax.pp.ua
|
||||||
|
bost-law.com
|
||||||
|
brainity.com
|
||||||
|
brazalnde.net
|
||||||
|
brellatransplc.shop
|
||||||
|
brnonet.cz
|
||||||
|
broadwaycover.com
|
||||||
|
brushinglegal.de
|
||||||
|
brw.net
|
||||||
|
btes.tv
|
||||||
|
budgeteasehub.com
|
||||||
|
buoytoys.com
|
||||||
|
buyjapanese.jp
|
||||||
|
c53dw7m24rj.com
|
||||||
|
cahtelrandom.org
|
||||||
|
casadelmarsamara.com
|
||||||
|
cashflowmasterypro.com
|
||||||
|
cavabeen.com
|
||||||
|
cbti.net
|
||||||
|
centralmalaysia.com
|
||||||
|
chauffeurplan.co.uk
|
||||||
|
checkpox.fun
|
||||||
|
chegouseuvlache.org
|
||||||
|
chinaxingyu.xyz
|
||||||
|
christus.mx
|
||||||
|
churchills.market
|
||||||
|
ci-xyz.fit
|
||||||
|
cisumrecords.com
|
||||||
|
ckaik.cn
|
||||||
|
clcktoact.com
|
||||||
|
cli-eurosignal.cz
|
||||||
|
cloud-admin.it
|
||||||
|
cloud-edm.com
|
||||||
|
cloudflare-email.org
|
||||||
|
cloudhosting.rs
|
||||||
|
cloudlogin.co
|
||||||
|
cloudplatformpro.com
|
||||||
|
cnode.io
|
||||||
|
cntcloud.com
|
||||||
|
code-it.net
|
||||||
|
codefriend.top
|
||||||
|
colombiaceropapel.org
|
||||||
|
commerceinsurance.com
|
||||||
|
comsharempc.com
|
||||||
|
conexiona.com
|
||||||
|
coolblaze.com
|
||||||
|
coowo.com
|
||||||
|
corpemail.net
|
||||||
|
cp2-myorderbox.com
|
||||||
|
cps.com.ar
|
||||||
|
crnagora.net
|
||||||
|
cross-d-bar-troutranch.com
|
||||||
|
ctla.co.kr
|
||||||
|
cumbalikonakhotel.com
|
||||||
|
currencyexconverter.com
|
||||||
|
daakbabu.com
|
||||||
|
daikinmae.com
|
||||||
|
dairyvalley.com.my
|
||||||
|
dastans.ru
|
||||||
|
datahost36.de
|
||||||
|
ddii.network
|
||||||
|
deep-sek.shop
|
||||||
|
deetownsounds.com
|
||||||
|
descarca-counter-strike.net
|
||||||
|
detrot.xyz
|
||||||
|
dettlaffinc.com
|
||||||
|
dextoolse.net
|
||||||
|
digestivedaily.com
|
||||||
|
digi.net.my
|
||||||
|
dinofelis.cn
|
||||||
|
diwkyncbi.top
|
||||||
|
dkginternet.com
|
||||||
|
dnexpress.info
|
||||||
|
dns-oid.com
|
||||||
|
dnsindia.net
|
||||||
|
domainserver.ne.jp
|
||||||
|
domconfig.com
|
||||||
|
doorsrv.com
|
||||||
|
dreampox.fun
|
||||||
|
dreamtechmedia.com
|
||||||
|
ds.network
|
||||||
|
dss-group.net
|
||||||
|
dvj.theworkpc.com
|
||||||
|
dwlcka.com
|
||||||
|
dynamic-wiretel.in
|
||||||
|
dyntcorp.com
|
||||||
|
easternkingspei.com
|
||||||
|
economiceagles.com
|
||||||
|
egosimail.com
|
||||||
|
eliotporterphotos.us
|
||||||
|
emailgids.net
|
||||||
|
emailperegrine.com
|
||||||
|
entendercopilot.com
|
||||||
|
entretothom.net
|
||||||
|
epaycontrol.com
|
||||||
|
epicinvestmentsreview.co
|
||||||
|
epicinvestmentsreview.com
|
||||||
|
epik.com
|
||||||
|
epsilon-group.com
|
||||||
|
erestaff.com
|
||||||
|
euro-trade-gmbh.com
|
||||||
|
example.com
|
||||||
|
exposervers.com-new
|
||||||
|
extendcp.co.uk
|
||||||
|
eyecandyhosting.xyz
|
||||||
|
fastwebnet.it
|
||||||
|
fd9ing7wfn.com
|
||||||
|
feipnghardware.com
|
||||||
|
fetscorp.shop
|
||||||
|
fewo-usedom.net
|
||||||
|
fin-crime.com
|
||||||
|
financeaimpoint.com
|
||||||
|
financeupward.com
|
||||||
|
firmflat.com
|
||||||
|
flex-video.bnr.la
|
||||||
|
flourishfusionlife.com
|
||||||
|
formicidaehunt.net
|
||||||
|
fosterheap.com
|
||||||
|
fredi.shop
|
||||||
|
frontiernet.net
|
||||||
|
ftifb7tk3c.com
|
||||||
|
gamersprotectionvpn.online
|
||||||
|
gendns.com
|
||||||
|
getgreencardsfast.com
|
||||||
|
getthatroi.com
|
||||||
|
gibbshosting.com
|
||||||
|
gigidea.net
|
||||||
|
giize.com
|
||||||
|
ginous.eu.com
|
||||||
|
gis.net
|
||||||
|
gist-th.com
|
||||||
|
globalglennpartners.com
|
||||||
|
goldsboroughplace.com
|
||||||
|
gophermedia.com
|
||||||
|
gqlists.us.com
|
||||||
|
gratzl.de
|
||||||
|
greatestworldnews.com
|
||||||
|
greennutritioncare.com
|
||||||
|
gsbb.com
|
||||||
|
gumbolimbo.net
|
||||||
|
h-serv.co.uk
|
||||||
|
haedefpartners.com
|
||||||
|
halcyon-aboveboard.com
|
||||||
|
hanzubon.org
|
||||||
|
healthfuljourneyjoy.com
|
||||||
|
hgnbroken.us.com
|
||||||
|
highwey-diesel.com
|
||||||
|
hirofactory.com
|
||||||
|
hjd.asso.fr
|
||||||
|
hongchenggco.pro
|
||||||
|
hongkongtaxi.co
|
||||||
|
hopsinthehanger.com
|
||||||
|
hosted-by-worldstream.net
|
||||||
|
hostelsucre.com
|
||||||
|
hosting1337.com
|
||||||
|
hostinghane.com
|
||||||
|
hostinglotus.cloud
|
||||||
|
hostingmichigan.com
|
||||||
|
hostiran.name
|
||||||
|
hostmnl.com
|
||||||
|
hostname.localhost
|
||||||
|
hostnetwork.com
|
||||||
|
hosts.net.nz
|
||||||
|
hostserv.eu
|
||||||
|
hostwhitelabel.com
|
||||||
|
hpms1.jp
|
||||||
|
hunariojmk.net
|
||||||
|
hunriokinmuim.net
|
||||||
|
hypericine.com
|
||||||
|
i-mecca.net
|
||||||
|
iaasdns.com
|
||||||
|
iam.net.ma
|
||||||
|
iconmarketingguy.com
|
||||||
|
idcfcloud.net
|
||||||
|
idealconcept.live
|
||||||
|
igmohji.com
|
||||||
|
igppevents.org.uk
|
||||||
|
ihglobaldns.com
|
||||||
|
ilmessicano.com
|
||||||
|
imjtmn.cn
|
||||||
|
immenzaces.com
|
||||||
|
in-addr-arpa
|
||||||
|
in-addr.arpa
|
||||||
|
indsalelimited.com
|
||||||
|
indulgent-holistic.com
|
||||||
|
industechint.org
|
||||||
|
inshaaegypt.com
|
||||||
|
intal.uz
|
||||||
|
interfarma.kz
|
||||||
|
intocpanel.com
|
||||||
|
ip-147-135-108.us
|
||||||
|
ip-178-33-109.eu
|
||||||
|
ip-ptr.tech
|
||||||
|
iswhatpercent.com
|
||||||
|
itsidc.com
|
||||||
|
itwebs.com
|
||||||
|
iuon.net
|
||||||
|
ivol.co
|
||||||
|
jalanet.co.id
|
||||||
|
jimishare.com
|
||||||
|
jlccptt.net.cn
|
||||||
|
jlenterprises.co.uk
|
||||||
|
jmontalto.com
|
||||||
|
joyomokei.com
|
||||||
|
jumanra.org
|
||||||
|
justlongshirts.com
|
||||||
|
kahlaa.com
|
||||||
|
kaw.theworkpc.com
|
||||||
|
kbronet.com.tw
|
||||||
|
kdnursing.org
|
||||||
|
kielnet.net
|
||||||
|
kihy.theworkpc.com
|
||||||
|
kingschurchwirral.org
|
||||||
|
kitchenaildbd.com
|
||||||
|
klaomi.shop
|
||||||
|
knkconsult.net
|
||||||
|
kohshikai.com
|
||||||
|
krhfund.org
|
||||||
|
krillaglass.com
|
||||||
|
lancorhomes.com
|
||||||
|
landpedia.org
|
||||||
|
lanzatuseo.es
|
||||||
|
layerdns.cloud
|
||||||
|
learninglinked.com
|
||||||
|
legenditds.com
|
||||||
|
levertechcentre.com
|
||||||
|
lhost.no
|
||||||
|
lideri.net.br
|
||||||
|
lighthouse-media.com
|
||||||
|
lightpath.net
|
||||||
|
limogesporcelainboxes.com
|
||||||
|
lindsaywalt.net
|
||||||
|
linuxsunucum.com
|
||||||
|
listertermoformadoa.com
|
||||||
|
llsend.com
|
||||||
|
local.net
|
||||||
|
lohkal.com
|
||||||
|
londionrtim.net
|
||||||
|
lonestarmm.net
|
||||||
|
longmarquis.com
|
||||||
|
longwoodmgmt.com
|
||||||
|
lse.kz
|
||||||
|
lunvoy.com
|
||||||
|
luxarpro.ru
|
||||||
|
lwl-puehringer.at
|
||||||
|
lynx.net.lb
|
||||||
|
lyse.net
|
||||||
|
m-sender.com.ua
|
||||||
|
maggiolicloud.it
|
||||||
|
magnetmail.net
|
||||||
|
magnumgo.uz
|
||||||
|
maia11.com
|
||||||
|
mail-fire.com
|
||||||
|
mailsentinel.net
|
||||||
|
mailset.cn
|
||||||
|
malardino.net
|
||||||
|
managed-vps.net
|
||||||
|
manhattanbulletpoint.com
|
||||||
|
manpowerservices.com
|
||||||
|
marketmysterycode.com
|
||||||
|
marketwizardspro.com
|
||||||
|
masterclassjournal.com
|
||||||
|
matroguel.cam
|
||||||
|
maximpactipo.com
|
||||||
|
mechanicalwalk.store
|
||||||
|
mediavobis.com
|
||||||
|
meqlobal.com
|
||||||
|
mgts.by
|
||||||
|
migrans.net
|
||||||
|
miixta.com
|
||||||
|
milleniumsrv.com
|
||||||
|
mindworksunlimited.com
|
||||||
|
mirth-gale.com
|
||||||
|
misorpresa.com
|
||||||
|
mitomobile.com
|
||||||
|
mitsubachi-kibako.net
|
||||||
|
mjinn.com
|
||||||
|
mkegs.shop
|
||||||
|
mobius.fr
|
||||||
|
model-ac.ink
|
||||||
|
moderntradingnews.com
|
||||||
|
monnaiegroup.com
|
||||||
|
monopolizeright.com
|
||||||
|
moonjaws.com
|
||||||
|
morningnewscatcher.com
|
||||||
|
motion4ever.net
|
||||||
|
mschosting.com
|
||||||
|
msdp1.com
|
||||||
|
mspnet.pro
|
||||||
|
mts-nn.ru
|
||||||
|
multifamilydesign.com
|
||||||
|
mxserver.ro
|
||||||
|
mxthunder.net
|
||||||
|
my-ihor.ru
|
||||||
|
mycloudmailbox.com
|
||||||
|
myfriendforum.com
|
||||||
|
myrewards.net
|
||||||
|
mysagestore.com
|
||||||
|
mysecurewebserver.com
|
||||||
|
myshanet.net
|
||||||
|
myvps.jp
|
||||||
|
mywedsite.net
|
||||||
|
mywic.eu
|
||||||
|
name.tools
|
||||||
|
nanshenqfurniture.com
|
||||||
|
nask.pl
|
||||||
|
navertise.net
|
||||||
|
ncbb.kz
|
||||||
|
ncport.ru
|
||||||
|
ncsdi.ws
|
||||||
|
nebdig.com
|
||||||
|
neovet-base.ru
|
||||||
|
netbri.com
|
||||||
|
netcentertelecom.net.br
|
||||||
|
neti.ee
|
||||||
|
netkl.org
|
||||||
|
newinvestingguide.com
|
||||||
|
newwallstreetcode.com
|
||||||
|
ngvcv.cn
|
||||||
|
nic.name
|
||||||
|
nidix.net
|
||||||
|
nieuwedagnetwerk.net
|
||||||
|
nlscanme.com
|
||||||
|
nmeuh.cn
|
||||||
|
noisndametal.com
|
||||||
|
nucleusemail.com
|
||||||
|
nutriboostlife.com
|
||||||
|
nwo.giize.com
|
||||||
|
nwwhalewatchers.org
|
||||||
|
ny.adsl
|
||||||
|
nyt1.com
|
||||||
|
offerslatedeals.com
|
||||||
|
office365.us
|
||||||
|
ogicom.net
|
||||||
|
olivettilexikon.co.uk
|
||||||
|
omegabrasil.inf.br
|
||||||
|
onnet21.com
|
||||||
|
onumubunumu.com
|
||||||
|
oppt-ac.fit
|
||||||
|
orbitel.net.co
|
||||||
|
orfsurface.com
|
||||||
|
orientalspot.com
|
||||||
|
outsidences.com
|
||||||
|
ovaltinalization.co
|
||||||
|
overta.ru
|
||||||
|
ox28vgrurc.com
|
||||||
|
pamulang.net
|
||||||
|
panaltyspot.space
|
||||||
|
panolacountysheriffms.com
|
||||||
|
passionatesmiles.com
|
||||||
|
paulinelam.com
|
||||||
|
pdi-corp.com
|
||||||
|
peloquinbeck.com
|
||||||
|
perimetercenter.net
|
||||||
|
permanentscreen.com
|
||||||
|
permasteellisagroup.com
|
||||||
|
perumkijhyu.net
|
||||||
|
pesnia.com.ua
|
||||||
|
ph8ltwdi12o.com
|
||||||
|
pharmada.com.de
|
||||||
|
phdns3.es
|
||||||
|
pigelixval1.com
|
||||||
|
pipefittingsindia.com
|
||||||
|
planethoster.net
|
||||||
|
playamedia.io
|
||||||
|
plesk.page
|
||||||
|
pmnhost.net
|
||||||
|
pokiloandhu.net
|
||||||
|
pokupki5.ru
|
||||||
|
polandi.net
|
||||||
|
popiup.com
|
||||||
|
ports.net
|
||||||
|
posolstvostilya.com
|
||||||
|
potia.net
|
||||||
|
prima.com.ar
|
||||||
|
prima.net.ar
|
||||||
|
profsol.co.uk
|
||||||
|
prohealthmotion.com
|
||||||
|
promooffermarket.site
|
||||||
|
proudserver.com
|
||||||
|
proxado.com
|
||||||
|
psnm.ru
|
||||||
|
pvcwindowsprices.live
|
||||||
|
qontenciplc.autos
|
||||||
|
quakeclick.com
|
||||||
|
quasarstate.store
|
||||||
|
quatthonggiotico.com
|
||||||
|
qxyxab44njd.com
|
||||||
|
radianthealthrenaissance.com
|
||||||
|
rapidns.com
|
||||||
|
raxa.host
|
||||||
|
reberte.com
|
||||||
|
reethvikintl.com
|
||||||
|
regruhosting.ru
|
||||||
|
reliablepanel.com
|
||||||
|
rgb365.eu
|
||||||
|
riddlecamera.net
|
||||||
|
riddletrends.com
|
||||||
|
roccopugliese.com
|
||||||
|
runnin-rebels.com
|
||||||
|
rupar.puglia.it
|
||||||
|
rwdhosting.ca
|
||||||
|
s500host.com
|
||||||
|
sageevents.co.ke
|
||||||
|
sahacker-2020.com
|
||||||
|
samsales.site
|
||||||
|
sante-lorraine.fr
|
||||||
|
saransk.ru
|
||||||
|
satirogluet.com
|
||||||
|
scioncontacts.com
|
||||||
|
sdcc.my
|
||||||
|
seaspraymta3.net
|
||||||
|
secorp.mx
|
||||||
|
securen.net
|
||||||
|
securerelay.in
|
||||||
|
securev.net
|
||||||
|
seductiveeyes.com
|
||||||
|
seizethedayconsulting.com
|
||||||
|
serroplast.shop
|
||||||
|
server290.com
|
||||||
|
server342.com
|
||||||
|
server3559.cc
|
||||||
|
servershost.biz
|
||||||
|
sfek.kz
|
||||||
|
sgnetway.net
|
||||||
|
shopfox.ca
|
||||||
|
silvestrejaguar.sbs
|
||||||
|
silvestreonca.sbs
|
||||||
|
simplediagnostics.org
|
||||||
|
siriuscloud.jp
|
||||||
|
sisglobalresearch.com
|
||||||
|
sixpacklink.net
|
||||||
|
sjestyle.com
|
||||||
|
smallvillages.com
|
||||||
|
smartape-vps.com
|
||||||
|
solusoftware.com
|
||||||
|
sourcedns.com
|
||||||
|
southcoastwebhosting12.com
|
||||||
|
specialtvvs.com
|
||||||
|
spiritualtechnologies.io
|
||||||
|
sprout.org
|
||||||
|
srv.cat
|
||||||
|
stableserver.net
|
||||||
|
statlerfa.co.uk
|
||||||
|
stock-smtp.top
|
||||||
|
stockepictigers.com
|
||||||
|
stockexchangejournal.com
|
||||||
|
subterranean-concave.com
|
||||||
|
suksangroup.com
|
||||||
|
swissbluetopaz.com
|
||||||
|
switer.shop
|
||||||
|
sysop4.com
|
||||||
|
system.eu.com
|
||||||
|
szhongbing.com
|
||||||
|
t-jon.com
|
||||||
|
tacaindo.net
|
||||||
|
tacom.tj
|
||||||
|
tankertelz.co
|
||||||
|
tataidc.com
|
||||||
|
teamveiw.com
|
||||||
|
tecnoxia.net
|
||||||
|
tel-xyz.fit
|
||||||
|
tenkids.net
|
||||||
|
terminavalley.com
|
||||||
|
thaicloudsolutions.com
|
||||||
|
thaikinghost.com
|
||||||
|
thaimonster.com
|
||||||
|
thegermainetruth.net
|
||||||
|
thehandmaderose.com
|
||||||
|
thepushcase.com
|
||||||
|
ticdns.com
|
||||||
|
tigo.bo
|
||||||
|
toledofibra.net.br
|
||||||
|
topdns.com
|
||||||
|
totaal.net
|
||||||
|
totalplay.net
|
||||||
|
tqh.ro
|
||||||
|
traderlearningcenter.com
|
||||||
|
tradeukraine.site
|
||||||
|
traveleza.com
|
||||||
|
trwww.com
|
||||||
|
tsuzakij.com
|
||||||
|
tullostrucking.com
|
||||||
|
turbinetrends.com
|
||||||
|
twincitiesdistinctivehomes.com
|
||||||
|
tylerfordonline.com
|
||||||
|
uiyum.com
|
||||||
|
ultragate.com
|
||||||
|
uneedacollie.com
|
||||||
|
unified.services
|
||||||
|
unite.services
|
||||||
|
urawasl.com
|
||||||
|
us.servername.us
|
||||||
|
vagebond.net
|
||||||
|
varvia.de
|
||||||
|
vbcploo.com
|
||||||
|
vdc.vn
|
||||||
|
vendimetry.com
|
||||||
|
vibrantwellnesscorp.com
|
||||||
|
virtualine.org
|
||||||
|
visit.docotor
|
||||||
|
viviotech.us
|
||||||
|
vlflgl.com
|
||||||
|
volganet.ru
|
||||||
|
vrns.net
|
||||||
|
vulterdi.edu
|
||||||
|
vvondertex.com
|
||||||
|
wallstreetsgossip.com
|
||||||
|
wamego.net
|
||||||
|
wanekoohost.com
|
||||||
|
wealthexpertisepro.com
|
||||||
|
web-login.eu
|
||||||
|
weblinkinternational.com
|
||||||
|
webnox.io
|
||||||
|
websale.net
|
||||||
|
welllivinghive.com
|
||||||
|
westparkcom.com
|
||||||
|
wetransfer-eu.com
|
||||||
|
wheelch.me
|
||||||
|
whoflew.com
|
||||||
|
whpservers.com
|
||||||
|
wisdomhard.com
|
||||||
|
wisewealthcircle.com
|
||||||
|
wisvis.com
|
||||||
|
wodeniowa.com
|
||||||
|
wordpresshosting.xyz
|
||||||
|
wsiph2.com
|
||||||
|
xnt.mx
|
||||||
|
xodiax.com
|
||||||
|
xpnuf.cn
|
||||||
|
xsfati.us.com
|
||||||
|
xspmail.jp
|
||||||
|
yourciviccompass.com
|
||||||
|
yourinvestworkbook.com
|
||||||
|
yoursitesecure.net
|
||||||
|
zerowebhosting.net
|
||||||
|
zmml.uk
|
||||||
|
znlc.jp
|
||||||
|
ztomy.com
|
||||||
23
parsedmarc/resources/maps/psl_overrides.txt
Normal file
23
parsedmarc/resources/maps/psl_overrides.txt
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
-applefibernet.com
|
||||||
|
-c3.net.pl
|
||||||
|
-celsiainternet.com
|
||||||
|
-clientes-izzi.mx
|
||||||
|
-clientes-zap-izzi.mx
|
||||||
|
-imnet.com.br
|
||||||
|
-mcnbd.com
|
||||||
|
-smile.com.bd
|
||||||
|
-tataidc.co.in
|
||||||
|
-veloxfiber.com.br
|
||||||
|
-wconect.com.br
|
||||||
|
.amazonaws.com
|
||||||
|
.cloudaccess.net
|
||||||
|
.ddnsgeek.com
|
||||||
|
.fastvps-server.com
|
||||||
|
.in-addr-arpa
|
||||||
|
.in-addr.arpa
|
||||||
|
.kasserver.com
|
||||||
|
.kinghost.net
|
||||||
|
.linode.com
|
||||||
|
.linodeusercontent.com
|
||||||
|
.na4u.ru
|
||||||
|
.sakura.ne.jp
|
||||||
184
parsedmarc/resources/maps/sortlists.py
Executable file
184
parsedmarc/resources/maps/sortlists.py
Executable file
@@ -0,0 +1,184 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
import csv
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Mapping, Iterable, Optional, Collection, Union, List, Dict
|
||||||
|
|
||||||
|
|
||||||
|
class CSVValidationError(Exception):
|
||||||
|
def __init__(self, errors: list[str]):
|
||||||
|
super().__init__("\n".join(errors))
|
||||||
|
self.errors = errors
|
||||||
|
|
||||||
|
|
||||||
|
def sort_csv(
|
||||||
|
filepath: Union[str, Path],
|
||||||
|
field: str,
|
||||||
|
*,
|
||||||
|
sort_field_value_must_be_unique: bool = True,
|
||||||
|
strip_whitespace: bool = True,
|
||||||
|
fields_to_lowercase: Optional[Iterable[str]] = None,
|
||||||
|
case_insensitive_sort: bool = False,
|
||||||
|
required_fields: Optional[Iterable[str]] = None,
|
||||||
|
allowed_values: Optional[Mapping[str, Collection[str]]] = None,
|
||||||
|
) -> List[Dict[str, str]]:
|
||||||
|
"""
|
||||||
|
Read a CSV, optionally normalize rows (strip whitespace, lowercase certain fields),
|
||||||
|
validate field values, and write the sorted CSV back to the same path.
|
||||||
|
|
||||||
|
- filepath: Path to the CSV to sort.
|
||||||
|
- field: The field name to sort by.
|
||||||
|
- fields_to_lowercase: Permanently lowercases these field(s) in the data.
|
||||||
|
- strip_whitespace: Remove all whitespace at the beginning and of field values.
|
||||||
|
- case_insensitive_sort: Ignore case when sorting without changing values.
|
||||||
|
- required_fields: A list of fields that must have data in all rows.
|
||||||
|
- allowed_values: A mapping of allowed values for fields.
|
||||||
|
"""
|
||||||
|
path = Path(filepath)
|
||||||
|
required_fields = set(required_fields or [])
|
||||||
|
lower_set = set(fields_to_lowercase or [])
|
||||||
|
allowed_sets = {k: set(v) for k, v in (allowed_values or {}).items()}
|
||||||
|
if sort_field_value_must_be_unique:
|
||||||
|
seen_sort_field_values = []
|
||||||
|
|
||||||
|
with path.open("r", newline="") as infile:
|
||||||
|
reader = csv.DictReader(infile)
|
||||||
|
fieldnames = reader.fieldnames or []
|
||||||
|
if field not in fieldnames:
|
||||||
|
raise CSVValidationError([f"Missing sort column: {field!r}"])
|
||||||
|
missing_headers = required_fields - set(fieldnames)
|
||||||
|
if missing_headers:
|
||||||
|
raise CSVValidationError(
|
||||||
|
[f"Missing required header(s): {sorted(missing_headers)}"]
|
||||||
|
)
|
||||||
|
rows = list(reader)
|
||||||
|
|
||||||
|
def normalize_row(row: Dict[str, str]) -> None:
|
||||||
|
if strip_whitespace:
|
||||||
|
for k, v in row.items():
|
||||||
|
if isinstance(v, str):
|
||||||
|
row[k] = v.strip()
|
||||||
|
for fld in lower_set:
|
||||||
|
if fld in row and isinstance(row[fld], str):
|
||||||
|
row[fld] = row[fld].lower()
|
||||||
|
|
||||||
|
def validate_row(
|
||||||
|
row: Dict[str, str], sort_field: str, line_no: int, errors: list[str]
|
||||||
|
) -> None:
|
||||||
|
if sort_field_value_must_be_unique:
|
||||||
|
if row[sort_field] in seen_sort_field_values:
|
||||||
|
errors.append(f"Line {line_no}: Duplicate row for '{row[sort_field]}'")
|
||||||
|
else:
|
||||||
|
seen_sort_field_values.append(row[sort_field])
|
||||||
|
for rf in required_fields:
|
||||||
|
val = row.get(rf)
|
||||||
|
if val is None or val == "":
|
||||||
|
errors.append(
|
||||||
|
f"Line {line_no}: Missing value for required field '{rf}'"
|
||||||
|
)
|
||||||
|
for field, allowed_values in allowed_sets.items():
|
||||||
|
if field in row:
|
||||||
|
val = row[field]
|
||||||
|
if val not in allowed_values:
|
||||||
|
errors.append(
|
||||||
|
f"Line {line_no}: '{val}' is not an allowed value for '{field}' "
|
||||||
|
f"(allowed: {sorted(allowed_values)})"
|
||||||
|
)
|
||||||
|
|
||||||
|
errors: list[str] = []
|
||||||
|
for idx, row in enumerate(rows, start=2): # header is line 1
|
||||||
|
normalize_row(row)
|
||||||
|
validate_row(row, field, idx, errors)
|
||||||
|
|
||||||
|
if errors:
|
||||||
|
raise CSVValidationError(errors)
|
||||||
|
|
||||||
|
def sort_key(r: Dict[str, str]):
|
||||||
|
v = r.get(field, "")
|
||||||
|
if isinstance(v, str) and case_insensitive_sort:
|
||||||
|
return v.casefold()
|
||||||
|
return v
|
||||||
|
|
||||||
|
rows.sort(key=sort_key)
|
||||||
|
|
||||||
|
with open(filepath, "w", newline="") as outfile:
|
||||||
|
writer = csv.DictWriter(outfile, fieldnames=fieldnames)
|
||||||
|
writer.writeheader()
|
||||||
|
writer.writerows(rows)
|
||||||
|
|
||||||
|
|
||||||
|
def sort_list_file(
|
||||||
|
filepath: Union[str, Path],
|
||||||
|
*,
|
||||||
|
lowercase: bool = True,
|
||||||
|
strip: bool = True,
|
||||||
|
deduplicate: bool = True,
|
||||||
|
remove_blank_lines: bool = True,
|
||||||
|
ending_newline: bool = True,
|
||||||
|
newline: Optional[str] = "\n",
|
||||||
|
):
|
||||||
|
"""Read a list from a file, sort it, optionally strip and deduplicate the values,
|
||||||
|
then write that list back to the file.
|
||||||
|
|
||||||
|
- Filepath: The path to the file.
|
||||||
|
- lowercase: Lowercase all values prior to sorting.
|
||||||
|
- remove_blank_lines: Remove any plank lines.
|
||||||
|
- ending_newline: End the file with a newline, even if remove_blank_lines is true.
|
||||||
|
- newline: The newline character to use.
|
||||||
|
"""
|
||||||
|
with open(filepath, mode="r", newline=newline) as infile:
|
||||||
|
lines = infile.readlines()
|
||||||
|
for i in range(len(lines)):
|
||||||
|
if lowercase:
|
||||||
|
lines[i] = lines[i].lower()
|
||||||
|
if strip:
|
||||||
|
lines[i] = lines[i].strip()
|
||||||
|
if deduplicate:
|
||||||
|
lines = list(set(lines))
|
||||||
|
if remove_blank_lines:
|
||||||
|
while "" in lines:
|
||||||
|
lines.remove("")
|
||||||
|
lines = sorted(lines)
|
||||||
|
if ending_newline:
|
||||||
|
if lines[-1] != "":
|
||||||
|
lines.append("")
|
||||||
|
with open(filepath, mode="w", newline=newline) as outfile:
|
||||||
|
outfile.write("\n".join(lines))
|
||||||
|
|
||||||
|
|
||||||
|
def _main():
|
||||||
|
map_file = "base_reverse_dns_map.csv"
|
||||||
|
map_key = "base_reverse_dns"
|
||||||
|
list_files = ["known_unknown_base_reverse_dns.txt", "psl_overrides.txt"]
|
||||||
|
types_file = "base_reverse_dns_types.txt"
|
||||||
|
|
||||||
|
with open(types_file) as f:
|
||||||
|
types = f.readlines()
|
||||||
|
while "" in types:
|
||||||
|
types.remove("")
|
||||||
|
|
||||||
|
map_allowed_values = {"Type": types}
|
||||||
|
|
||||||
|
for list_file in list_files:
|
||||||
|
if not os.path.exists(list_file):
|
||||||
|
print(f"Error: {list_file} does not exist")
|
||||||
|
exit(1)
|
||||||
|
sort_list_file(list_file)
|
||||||
|
if not os.path.exists(types_file):
|
||||||
|
print(f"Error: {types_file} does not exist")
|
||||||
|
exit(1)
|
||||||
|
sort_list_file(types_file, lowercase=False)
|
||||||
|
if not os.path.exists(map_file):
|
||||||
|
print(f"Error: {map_file} does not exist")
|
||||||
|
exit(1)
|
||||||
|
try:
|
||||||
|
sort_csv(map_file, map_key, allowed_values=map_allowed_values)
|
||||||
|
except CSVValidationError as e:
|
||||||
|
print(f"{map_file} did not validate: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
_main()
|
||||||
@@ -1,6 +1,10 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import json
|
import json
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
import boto3
|
import boto3
|
||||||
|
|
||||||
from parsedmarc.log import logger
|
from parsedmarc.log import logger
|
||||||
@@ -8,16 +12,16 @@ from parsedmarc.utils import human_timestamp_to_datetime
|
|||||||
|
|
||||||
|
|
||||||
class S3Client(object):
|
class S3Client(object):
|
||||||
"""A client for a Amazon S3"""
|
"""A client for interacting with Amazon S3"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
bucket_name,
|
bucket_name: str,
|
||||||
bucket_path,
|
bucket_path: str,
|
||||||
region_name,
|
region_name: str,
|
||||||
endpoint_url,
|
endpoint_url: str,
|
||||||
access_key_id,
|
access_key_id: str,
|
||||||
secret_access_key,
|
secret_access_key: str,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Initializes the S3Client
|
Initializes the S3Client
|
||||||
@@ -47,18 +51,18 @@ class S3Client(object):
|
|||||||
aws_access_key_id=access_key_id,
|
aws_access_key_id=access_key_id,
|
||||||
aws_secret_access_key=secret_access_key,
|
aws_secret_access_key=secret_access_key,
|
||||||
)
|
)
|
||||||
self.bucket = self.s3.Bucket(self.bucket_name)
|
self.bucket = self.s3.Bucket(self.bucket_name) # type: ignore
|
||||||
|
|
||||||
def save_aggregate_report_to_s3(self, report):
|
def save_aggregate_report_to_s3(self, report: dict[str, Any]):
|
||||||
self.save_report_to_s3(report, "aggregate")
|
self.save_report_to_s3(report, "aggregate")
|
||||||
|
|
||||||
def save_forensic_report_to_s3(self, report):
|
def save_forensic_report_to_s3(self, report: dict[str, Any]):
|
||||||
self.save_report_to_s3(report, "forensic")
|
self.save_report_to_s3(report, "forensic")
|
||||||
|
|
||||||
def save_smtp_tls_report_to_s3(self, report):
|
def save_smtp_tls_report_to_s3(self, report: dict[str, Any]):
|
||||||
self.save_report_to_s3(report, "smtp_tls")
|
self.save_report_to_s3(report, "smtp_tls")
|
||||||
|
|
||||||
def save_report_to_s3(self, report, report_type):
|
def save_report_to_s3(self, report: dict[str, Any], report_type: str):
|
||||||
if report_type == "smtp_tls":
|
if report_type == "smtp_tls":
|
||||||
report_date = report["begin_date"]
|
report_date = report["begin_date"]
|
||||||
report_id = report["report_id"]
|
report_id = report["report_id"]
|
||||||
|
|||||||
@@ -1,11 +1,16 @@
|
|||||||
from urllib.parse import urlparse
|
# -*- coding: utf-8 -*-
|
||||||
import socket
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import json
|
import json
|
||||||
|
import socket
|
||||||
|
from typing import Any, Union
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
import urllib3
|
|
||||||
import requests
|
import requests
|
||||||
|
import urllib3
|
||||||
|
|
||||||
from parsedmarc import __version__
|
from parsedmarc.constants import USER_AGENT
|
||||||
from parsedmarc.log import logger
|
from parsedmarc.log import logger
|
||||||
from parsedmarc.utils import human_timestamp_to_unix_timestamp
|
from parsedmarc.utils import human_timestamp_to_unix_timestamp
|
||||||
|
|
||||||
@@ -23,7 +28,13 @@ class HECClient(object):
|
|||||||
# http://docs.splunk.com/Documentation/Splunk/latest/RESTREF/RESTinput#services.2Fcollector
|
# http://docs.splunk.com/Documentation/Splunk/latest/RESTREF/RESTinput#services.2Fcollector
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self, url, access_token, index, source="parsedmarc", verify=True, timeout=60
|
self,
|
||||||
|
url: str,
|
||||||
|
access_token: str,
|
||||||
|
index: str,
|
||||||
|
source: str = "parsedmarc",
|
||||||
|
verify=True,
|
||||||
|
timeout=60,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Initializes the HECClient
|
Initializes the HECClient
|
||||||
@@ -37,9 +48,9 @@ class HECClient(object):
|
|||||||
timeout (float): Number of seconds to wait for the server to send
|
timeout (float): Number of seconds to wait for the server to send
|
||||||
data before giving up
|
data before giving up
|
||||||
"""
|
"""
|
||||||
url = urlparse(url)
|
parsed_url = urlparse(url)
|
||||||
self.url = "{0}://{1}/services/collector/event/1.0".format(
|
self.url = "{0}://{1}/services/collector/event/1.0".format(
|
||||||
url.scheme, url.netloc
|
parsed_url.scheme, parsed_url.netloc
|
||||||
)
|
)
|
||||||
self.access_token = access_token.lstrip("Splunk ")
|
self.access_token = access_token.lstrip("Splunk ")
|
||||||
self.index = index
|
self.index = index
|
||||||
@@ -48,14 +59,19 @@ class HECClient(object):
|
|||||||
self.session = requests.Session()
|
self.session = requests.Session()
|
||||||
self.timeout = timeout
|
self.timeout = timeout
|
||||||
self.session.verify = verify
|
self.session.verify = verify
|
||||||
self._common_data = dict(host=self.host, source=self.source, index=self.index)
|
self._common_data: dict[str, Union[str, int, float, dict]] = dict(
|
||||||
|
host=self.host, source=self.source, index=self.index
|
||||||
|
)
|
||||||
|
|
||||||
self.session.headers = {
|
self.session.headers = {
|
||||||
"User-Agent": "parsedmarc/{0}".format(__version__),
|
"User-Agent": USER_AGENT,
|
||||||
"Authorization": "Splunk {0}".format(self.access_token),
|
"Authorization": "Splunk {0}".format(self.access_token),
|
||||||
}
|
}
|
||||||
|
|
||||||
def save_aggregate_reports_to_splunk(self, aggregate_reports):
|
def save_aggregate_reports_to_splunk(
|
||||||
|
self,
|
||||||
|
aggregate_reports: Union[list[dict[str, Any]], dict[str, Any]],
|
||||||
|
):
|
||||||
"""
|
"""
|
||||||
Saves aggregate DMARC reports to Splunk
|
Saves aggregate DMARC reports to Splunk
|
||||||
|
|
||||||
@@ -75,9 +91,12 @@ class HECClient(object):
|
|||||||
json_str = ""
|
json_str = ""
|
||||||
for report in aggregate_reports:
|
for report in aggregate_reports:
|
||||||
for record in report["records"]:
|
for record in report["records"]:
|
||||||
new_report = dict()
|
new_report: dict[str, Union[str, int, float, dict]] = dict()
|
||||||
for metadata in report["report_metadata"]:
|
for metadata in report["report_metadata"]:
|
||||||
new_report[metadata] = report["report_metadata"][metadata]
|
new_report[metadata] = report["report_metadata"][metadata]
|
||||||
|
new_report["interval_begin"] = record["interval_begin"]
|
||||||
|
new_report["interval_end"] = record["interval_end"]
|
||||||
|
new_report["normalized_timespan"] = record["normalized_timespan"]
|
||||||
new_report["published_policy"] = report["policy_published"]
|
new_report["published_policy"] = report["policy_published"]
|
||||||
new_report["source_ip_address"] = record["source"]["ip_address"]
|
new_report["source_ip_address"] = record["source"]["ip_address"]
|
||||||
new_report["source_country"] = record["source"]["country"]
|
new_report["source_country"] = record["source"]["country"]
|
||||||
@@ -98,7 +117,9 @@ class HECClient(object):
|
|||||||
new_report["spf_results"] = record["auth_results"]["spf"]
|
new_report["spf_results"] = record["auth_results"]["spf"]
|
||||||
|
|
||||||
data["sourcetype"] = "dmarc:aggregate"
|
data["sourcetype"] = "dmarc:aggregate"
|
||||||
timestamp = human_timestamp_to_unix_timestamp(new_report["begin_date"])
|
timestamp = human_timestamp_to_unix_timestamp(
|
||||||
|
new_report["interval_begin"]
|
||||||
|
)
|
||||||
data["time"] = timestamp
|
data["time"] = timestamp
|
||||||
data["event"] = new_report.copy()
|
data["event"] = new_report.copy()
|
||||||
json_str += "{0}\n".format(json.dumps(data))
|
json_str += "{0}\n".format(json.dumps(data))
|
||||||
@@ -113,7 +134,10 @@ class HECClient(object):
|
|||||||
if response["code"] != 0:
|
if response["code"] != 0:
|
||||||
raise SplunkError(response["text"])
|
raise SplunkError(response["text"])
|
||||||
|
|
||||||
def save_forensic_reports_to_splunk(self, forensic_reports):
|
def save_forensic_reports_to_splunk(
|
||||||
|
self,
|
||||||
|
forensic_reports: Union[list[dict[str, Any]], dict[str, Any]],
|
||||||
|
):
|
||||||
"""
|
"""
|
||||||
Saves forensic DMARC reports to Splunk
|
Saves forensic DMARC reports to Splunk
|
||||||
|
|
||||||
@@ -147,7 +171,9 @@ class HECClient(object):
|
|||||||
if response["code"] != 0:
|
if response["code"] != 0:
|
||||||
raise SplunkError(response["text"])
|
raise SplunkError(response["text"])
|
||||||
|
|
||||||
def save_smtp_tls_reports_to_splunk(self, reports):
|
def save_smtp_tls_reports_to_splunk(
|
||||||
|
self, reports: Union[list[dict[str, Any]], dict[str, Any]]
|
||||||
|
):
|
||||||
"""
|
"""
|
||||||
Saves aggregate DMARC reports to Splunk
|
Saves aggregate DMARC reports to Splunk
|
||||||
|
|
||||||
|
|||||||
@@ -1,8 +1,15 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
import logging
|
import logging
|
||||||
import logging.handlers
|
import logging.handlers
|
||||||
import json
|
import socket
|
||||||
|
import ssl
|
||||||
|
import time
|
||||||
|
from typing import Any, Optional
|
||||||
|
|
||||||
from parsedmarc import (
|
from parsedmarc import (
|
||||||
parsed_aggregate_reports_to_csv_rows,
|
parsed_aggregate_reports_to_csv_rows,
|
||||||
@@ -14,31 +21,161 @@ from parsedmarc import (
|
|||||||
class SyslogClient(object):
|
class SyslogClient(object):
|
||||||
"""A client for Syslog"""
|
"""A client for Syslog"""
|
||||||
|
|
||||||
def __init__(self, server_name, server_port):
|
def __init__(
|
||||||
|
self,
|
||||||
|
server_name: str,
|
||||||
|
server_port: int,
|
||||||
|
protocol: str = "udp",
|
||||||
|
cafile_path: Optional[str] = None,
|
||||||
|
certfile_path: Optional[str] = None,
|
||||||
|
keyfile_path: Optional[str] = None,
|
||||||
|
timeout: float = 5.0,
|
||||||
|
retry_attempts: int = 3,
|
||||||
|
retry_delay: int = 5,
|
||||||
|
):
|
||||||
"""
|
"""
|
||||||
Initializes the SyslogClient
|
Initializes the SyslogClient
|
||||||
Args:
|
Args:
|
||||||
server_name (str): The Syslog server
|
server_name (str): The Syslog server
|
||||||
server_port (int): The Syslog UDP port
|
server_port (int): The Syslog port
|
||||||
|
protocol (str): The protocol to use: "udp", "tcp", or "tls" (Default: "udp")
|
||||||
|
cafile_path (str): Path to CA certificate file for TLS server verification (Optional)
|
||||||
|
certfile_path (str): Path to client certificate file for TLS authentication (Optional)
|
||||||
|
keyfile_path (str): Path to client private key file for TLS authentication (Optional)
|
||||||
|
timeout (float): Connection timeout in seconds for TCP/TLS (Default: 5.0)
|
||||||
|
retry_attempts (int): Number of retry attempts for failed connections (Default: 3)
|
||||||
|
retry_delay (int): Delay in seconds between retry attempts (Default: 5)
|
||||||
"""
|
"""
|
||||||
self.server_name = server_name
|
self.server_name = server_name
|
||||||
self.server_port = server_port
|
self.server_port = server_port
|
||||||
|
self.protocol = protocol.lower()
|
||||||
|
self.timeout = timeout
|
||||||
|
self.retry_attempts = retry_attempts
|
||||||
|
self.retry_delay = retry_delay
|
||||||
|
|
||||||
self.logger = logging.getLogger("parsedmarc_syslog")
|
self.logger = logging.getLogger("parsedmarc_syslog")
|
||||||
self.logger.setLevel(logging.INFO)
|
self.logger.setLevel(logging.INFO)
|
||||||
log_handler = logging.handlers.SysLogHandler(address=(server_name, server_port))
|
|
||||||
|
# Create the appropriate syslog handler based on protocol
|
||||||
|
log_handler = self._create_syslog_handler(
|
||||||
|
server_name,
|
||||||
|
server_port,
|
||||||
|
self.protocol,
|
||||||
|
cafile_path,
|
||||||
|
certfile_path,
|
||||||
|
keyfile_path,
|
||||||
|
timeout,
|
||||||
|
retry_attempts,
|
||||||
|
retry_delay,
|
||||||
|
)
|
||||||
|
|
||||||
self.logger.addHandler(log_handler)
|
self.logger.addHandler(log_handler)
|
||||||
|
|
||||||
def save_aggregate_report_to_syslog(self, aggregate_reports):
|
def _create_syslog_handler(
|
||||||
|
self,
|
||||||
|
server_name: str,
|
||||||
|
server_port: int,
|
||||||
|
protocol: str,
|
||||||
|
cafile_path: Optional[str],
|
||||||
|
certfile_path: Optional[str],
|
||||||
|
keyfile_path: Optional[str],
|
||||||
|
timeout: float,
|
||||||
|
retry_attempts: int,
|
||||||
|
retry_delay: int,
|
||||||
|
) -> logging.handlers.SysLogHandler:
|
||||||
|
"""
|
||||||
|
Creates a SysLogHandler with the specified protocol and TLS settings
|
||||||
|
"""
|
||||||
|
if protocol == "udp":
|
||||||
|
# UDP protocol (default, backward compatible)
|
||||||
|
return logging.handlers.SysLogHandler(
|
||||||
|
address=(server_name, server_port),
|
||||||
|
socktype=socket.SOCK_DGRAM,
|
||||||
|
)
|
||||||
|
elif protocol in ["tcp", "tls"]:
|
||||||
|
# TCP or TLS protocol with retry logic
|
||||||
|
for attempt in range(1, retry_attempts + 1):
|
||||||
|
try:
|
||||||
|
if protocol == "tcp":
|
||||||
|
# TCP without TLS
|
||||||
|
handler = logging.handlers.SysLogHandler(
|
||||||
|
address=(server_name, server_port),
|
||||||
|
socktype=socket.SOCK_STREAM,
|
||||||
|
)
|
||||||
|
# Set timeout on the socket
|
||||||
|
if hasattr(handler, "socket") and handler.socket:
|
||||||
|
handler.socket.settimeout(timeout)
|
||||||
|
return handler
|
||||||
|
else:
|
||||||
|
# TLS protocol
|
||||||
|
# Create SSL context with secure defaults
|
||||||
|
ssl_context = ssl.create_default_context()
|
||||||
|
|
||||||
|
# Explicitly set minimum TLS version to 1.2 for security
|
||||||
|
ssl_context.minimum_version = ssl.TLSVersion.TLSv1_2
|
||||||
|
|
||||||
|
# Configure server certificate verification
|
||||||
|
if cafile_path:
|
||||||
|
ssl_context.load_verify_locations(cafile=cafile_path)
|
||||||
|
|
||||||
|
# Configure client certificate authentication
|
||||||
|
if certfile_path and keyfile_path:
|
||||||
|
ssl_context.load_cert_chain(
|
||||||
|
certfile=certfile_path,
|
||||||
|
keyfile=keyfile_path,
|
||||||
|
)
|
||||||
|
elif certfile_path or keyfile_path:
|
||||||
|
# Warn if only one of the two required parameters is provided
|
||||||
|
self.logger.warning(
|
||||||
|
"Both certfile_path and keyfile_path are required for "
|
||||||
|
"client certificate authentication. Client authentication "
|
||||||
|
"will not be used."
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create TCP handler first
|
||||||
|
handler = logging.handlers.SysLogHandler(
|
||||||
|
address=(server_name, server_port),
|
||||||
|
socktype=socket.SOCK_STREAM,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Wrap socket with TLS
|
||||||
|
if hasattr(handler, "socket") and handler.socket:
|
||||||
|
handler.socket = ssl_context.wrap_socket(
|
||||||
|
handler.socket,
|
||||||
|
server_hostname=server_name,
|
||||||
|
)
|
||||||
|
handler.socket.settimeout(timeout)
|
||||||
|
|
||||||
|
return handler
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
if attempt < retry_attempts:
|
||||||
|
self.logger.warning(
|
||||||
|
f"Syslog connection attempt {attempt}/{retry_attempts} failed: {e}. "
|
||||||
|
f"Retrying in {retry_delay} seconds..."
|
||||||
|
)
|
||||||
|
time.sleep(retry_delay)
|
||||||
|
else:
|
||||||
|
self.logger.error(
|
||||||
|
f"Syslog connection failed after {retry_attempts} attempts: {e}"
|
||||||
|
)
|
||||||
|
raise
|
||||||
|
else:
|
||||||
|
raise ValueError(
|
||||||
|
f"Invalid protocol '{protocol}'. Must be 'udp', 'tcp', or 'tls'."
|
||||||
|
)
|
||||||
|
|
||||||
|
def save_aggregate_report_to_syslog(self, aggregate_reports: list[dict[str, Any]]):
|
||||||
rows = parsed_aggregate_reports_to_csv_rows(aggregate_reports)
|
rows = parsed_aggregate_reports_to_csv_rows(aggregate_reports)
|
||||||
for row in rows:
|
for row in rows:
|
||||||
self.logger.info(json.dumps(row))
|
self.logger.info(json.dumps(row))
|
||||||
|
|
||||||
def save_forensic_report_to_syslog(self, forensic_reports):
|
def save_forensic_report_to_syslog(self, forensic_reports: list[dict[str, Any]]):
|
||||||
rows = parsed_forensic_reports_to_csv_rows(forensic_reports)
|
rows = parsed_forensic_reports_to_csv_rows(forensic_reports)
|
||||||
for row in rows:
|
for row in rows:
|
||||||
self.logger.info(json.dumps(row))
|
self.logger.info(json.dumps(row))
|
||||||
|
|
||||||
def save_smtp_tls_report_to_syslog(self, smtp_tls_reports):
|
def save_smtp_tls_report_to_syslog(self, smtp_tls_reports: list[dict[str, Any]]):
|
||||||
rows = parsed_smtp_tls_reports_to_csv_rows(smtp_tls_reports)
|
rows = parsed_smtp_tls_reports_to_csv_rows(smtp_tls_reports)
|
||||||
for row in rows:
|
for row in rows:
|
||||||
self.logger.info(json.dumps(row))
|
self.logger.info(json.dumps(row))
|
||||||
|
|||||||
220
parsedmarc/types.py
Normal file
220
parsedmarc/types.py
Normal file
@@ -0,0 +1,220 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any, Dict, List, Literal, Optional, TypedDict, Union
|
||||||
|
|
||||||
|
# NOTE: This module is intentionally Python 3.10 compatible.
|
||||||
|
# - No PEP 604 unions (A | B)
|
||||||
|
# - No typing.NotRequired / Required (3.11+) to avoid an extra dependency.
|
||||||
|
# For optional keys, use total=False TypedDicts.
|
||||||
|
|
||||||
|
|
||||||
|
ReportType = Literal["aggregate", "forensic", "smtp_tls"]
|
||||||
|
|
||||||
|
|
||||||
|
class AggregateReportMetadata(TypedDict):
|
||||||
|
org_name: str
|
||||||
|
org_email: str
|
||||||
|
org_extra_contact_info: Optional[str]
|
||||||
|
report_id: str
|
||||||
|
begin_date: str
|
||||||
|
end_date: str
|
||||||
|
timespan_requires_normalization: bool
|
||||||
|
original_timespan_seconds: int
|
||||||
|
errors: List[str]
|
||||||
|
|
||||||
|
|
||||||
|
class AggregatePolicyPublished(TypedDict):
|
||||||
|
domain: str
|
||||||
|
adkim: str
|
||||||
|
aspf: str
|
||||||
|
p: str
|
||||||
|
sp: str
|
||||||
|
pct: str
|
||||||
|
fo: str
|
||||||
|
|
||||||
|
|
||||||
|
class IPSourceInfo(TypedDict):
|
||||||
|
ip_address: str
|
||||||
|
country: Optional[str]
|
||||||
|
reverse_dns: Optional[str]
|
||||||
|
base_domain: Optional[str]
|
||||||
|
name: Optional[str]
|
||||||
|
type: Optional[str]
|
||||||
|
|
||||||
|
|
||||||
|
class AggregateAlignment(TypedDict):
|
||||||
|
spf: bool
|
||||||
|
dkim: bool
|
||||||
|
dmarc: bool
|
||||||
|
|
||||||
|
|
||||||
|
class AggregateIdentifiers(TypedDict):
|
||||||
|
header_from: str
|
||||||
|
envelope_from: Optional[str]
|
||||||
|
envelope_to: Optional[str]
|
||||||
|
|
||||||
|
|
||||||
|
class AggregatePolicyOverrideReason(TypedDict):
|
||||||
|
type: Optional[str]
|
||||||
|
comment: Optional[str]
|
||||||
|
|
||||||
|
|
||||||
|
class AggregateAuthResultDKIM(TypedDict):
|
||||||
|
domain: str
|
||||||
|
result: str
|
||||||
|
selector: str
|
||||||
|
|
||||||
|
|
||||||
|
class AggregateAuthResultSPF(TypedDict):
|
||||||
|
domain: str
|
||||||
|
result: str
|
||||||
|
scope: str
|
||||||
|
|
||||||
|
|
||||||
|
class AggregateAuthResults(TypedDict):
|
||||||
|
dkim: List[AggregateAuthResultDKIM]
|
||||||
|
spf: List[AggregateAuthResultSPF]
|
||||||
|
|
||||||
|
|
||||||
|
class AggregatePolicyEvaluated(TypedDict):
|
||||||
|
disposition: str
|
||||||
|
dkim: str
|
||||||
|
spf: str
|
||||||
|
policy_override_reasons: List[AggregatePolicyOverrideReason]
|
||||||
|
|
||||||
|
|
||||||
|
class AggregateRecord(TypedDict):
|
||||||
|
interval_begin: str
|
||||||
|
interval_end: str
|
||||||
|
source: IPSourceInfo
|
||||||
|
count: int
|
||||||
|
alignment: AggregateAlignment
|
||||||
|
policy_evaluated: AggregatePolicyEvaluated
|
||||||
|
disposition: str
|
||||||
|
identifiers: AggregateIdentifiers
|
||||||
|
auth_results: AggregateAuthResults
|
||||||
|
|
||||||
|
|
||||||
|
class AggregateReport(TypedDict):
|
||||||
|
xml_schema: str
|
||||||
|
report_metadata: AggregateReportMetadata
|
||||||
|
policy_published: AggregatePolicyPublished
|
||||||
|
records: List[AggregateRecord]
|
||||||
|
|
||||||
|
|
||||||
|
class EmailAddress(TypedDict):
|
||||||
|
display_name: Optional[str]
|
||||||
|
address: str
|
||||||
|
local: Optional[str]
|
||||||
|
domain: Optional[str]
|
||||||
|
|
||||||
|
|
||||||
|
class EmailAttachment(TypedDict, total=False):
|
||||||
|
filename: Optional[str]
|
||||||
|
mail_content_type: Optional[str]
|
||||||
|
sha256: Optional[str]
|
||||||
|
|
||||||
|
|
||||||
|
ParsedEmail = TypedDict(
|
||||||
|
"ParsedEmail",
|
||||||
|
{
|
||||||
|
# This is a lightly-specified version of mailsuite/mailparser JSON.
|
||||||
|
# It focuses on the fields parsedmarc uses in forensic handling.
|
||||||
|
"headers": Dict[str, Any],
|
||||||
|
"subject": Optional[str],
|
||||||
|
"filename_safe_subject": Optional[str],
|
||||||
|
"date": Optional[str],
|
||||||
|
"from": EmailAddress,
|
||||||
|
"to": List[EmailAddress],
|
||||||
|
"cc": List[EmailAddress],
|
||||||
|
"bcc": List[EmailAddress],
|
||||||
|
"attachments": List[EmailAttachment],
|
||||||
|
"body": Optional[str],
|
||||||
|
"has_defects": bool,
|
||||||
|
"defects": Any,
|
||||||
|
"defects_categories": Any,
|
||||||
|
},
|
||||||
|
total=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ForensicReport(TypedDict):
|
||||||
|
feedback_type: Optional[str]
|
||||||
|
user_agent: Optional[str]
|
||||||
|
version: Optional[str]
|
||||||
|
original_envelope_id: Optional[str]
|
||||||
|
original_mail_from: Optional[str]
|
||||||
|
original_rcpt_to: Optional[str]
|
||||||
|
arrival_date: str
|
||||||
|
arrival_date_utc: str
|
||||||
|
authentication_results: Optional[str]
|
||||||
|
delivery_result: Optional[str]
|
||||||
|
auth_failure: List[str]
|
||||||
|
authentication_mechanisms: List[str]
|
||||||
|
dkim_domain: Optional[str]
|
||||||
|
reported_domain: str
|
||||||
|
sample_headers_only: bool
|
||||||
|
source: IPSourceInfo
|
||||||
|
sample: str
|
||||||
|
parsed_sample: ParsedEmail
|
||||||
|
|
||||||
|
|
||||||
|
class SMTPTLSFailureDetails(TypedDict):
|
||||||
|
result_type: str
|
||||||
|
failed_session_count: int
|
||||||
|
|
||||||
|
|
||||||
|
class SMTPTLSFailureDetailsOptional(SMTPTLSFailureDetails, total=False):
|
||||||
|
sending_mta_ip: str
|
||||||
|
receiving_ip: str
|
||||||
|
receiving_mx_hostname: str
|
||||||
|
receiving_mx_helo: str
|
||||||
|
additional_info_uri: str
|
||||||
|
failure_reason_code: str
|
||||||
|
ip_address: str
|
||||||
|
|
||||||
|
|
||||||
|
class SMTPTLSPolicySummary(TypedDict):
|
||||||
|
policy_domain: str
|
||||||
|
policy_type: str
|
||||||
|
successful_session_count: int
|
||||||
|
failed_session_count: int
|
||||||
|
|
||||||
|
|
||||||
|
class SMTPTLSPolicy(SMTPTLSPolicySummary, total=False):
|
||||||
|
policy_strings: List[str]
|
||||||
|
mx_host_patterns: List[str]
|
||||||
|
failure_details: List[SMTPTLSFailureDetailsOptional]
|
||||||
|
|
||||||
|
|
||||||
|
class SMTPTLSReport(TypedDict):
|
||||||
|
organization_name: str
|
||||||
|
begin_date: str
|
||||||
|
end_date: str
|
||||||
|
contact_info: Union[str, List[str]]
|
||||||
|
report_id: str
|
||||||
|
policies: List[SMTPTLSPolicy]
|
||||||
|
|
||||||
|
|
||||||
|
class AggregateParsedReport(TypedDict):
|
||||||
|
report_type: Literal["aggregate"]
|
||||||
|
report: AggregateReport
|
||||||
|
|
||||||
|
|
||||||
|
class ForensicParsedReport(TypedDict):
|
||||||
|
report_type: Literal["forensic"]
|
||||||
|
report: ForensicReport
|
||||||
|
|
||||||
|
|
||||||
|
class SMTPTLSParsedReport(TypedDict):
|
||||||
|
report_type: Literal["smtp_tls"]
|
||||||
|
report: SMTPTLSReport
|
||||||
|
|
||||||
|
|
||||||
|
ParsedReport = Union[AggregateParsedReport, ForensicParsedReport, SMTPTLSParsedReport]
|
||||||
|
|
||||||
|
|
||||||
|
class ParsingResults(TypedDict):
|
||||||
|
aggregate_reports: List[AggregateReport]
|
||||||
|
forensic_reports: List[ForensicReport]
|
||||||
|
smtp_tls_reports: List[SMTPTLSReport]
|
||||||
@@ -1,48 +1,59 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
"""Utility functions that might be useful for other projects"""
|
"""Utility functions that might be useful for other projects"""
|
||||||
|
|
||||||
import logging
|
from __future__ import annotations
|
||||||
import os
|
|
||||||
from datetime import datetime
|
|
||||||
from datetime import timezone
|
|
||||||
from datetime import timedelta
|
|
||||||
from collections import OrderedDict
|
|
||||||
import tempfile
|
|
||||||
import subprocess
|
|
||||||
import shutil
|
|
||||||
import mailparser
|
|
||||||
import json
|
|
||||||
import hashlib
|
|
||||||
import base64
|
import base64
|
||||||
import mailbox
|
|
||||||
import re
|
|
||||||
import csv
|
import csv
|
||||||
|
import hashlib
|
||||||
import io
|
import io
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import mailbox
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import shutil
|
||||||
|
import subprocess
|
||||||
|
import tempfile
|
||||||
|
from datetime import datetime, timedelta, timezone
|
||||||
|
from typing import Optional, TypedDict, Union, cast
|
||||||
|
|
||||||
|
import mailparser
|
||||||
|
from expiringdict import ExpiringDict
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import importlib.resources as pkg_resources
|
from importlib.resources import files
|
||||||
except ImportError:
|
except ImportError:
|
||||||
# Try backported to PY<37 `importlib_resources`
|
# Try backported to PY<3 `importlib_resources`
|
||||||
import importlib_resources as pkg_resources
|
from importlib.resources import files
|
||||||
|
|
||||||
|
|
||||||
from dateutil.parser import parse as parse_date
|
|
||||||
import dns.reversename
|
|
||||||
import dns.resolver
|
|
||||||
import dns.exception
|
import dns.exception
|
||||||
|
import dns.resolver
|
||||||
|
import dns.reversename
|
||||||
import geoip2.database
|
import geoip2.database
|
||||||
import geoip2.errors
|
import geoip2.errors
|
||||||
import publicsuffixlist
|
import publicsuffixlist
|
||||||
import requests
|
import requests
|
||||||
|
from dateutil.parser import parse as parse_date
|
||||||
|
|
||||||
from parsedmarc.log import logger
|
|
||||||
import parsedmarc.resources.dbip
|
import parsedmarc.resources.dbip
|
||||||
import parsedmarc.resources.maps
|
import parsedmarc.resources.maps
|
||||||
|
from parsedmarc.constants import USER_AGENT
|
||||||
|
from parsedmarc.log import logger
|
||||||
|
|
||||||
parenthesis_regex = re.compile(r"\s*\(.*\)\s*")
|
parenthesis_regex = re.compile(r"\s*\(.*\)\s*")
|
||||||
|
|
||||||
null_file = open(os.devnull, "w")
|
null_file = open(os.devnull, "w")
|
||||||
mailparser_logger = logging.getLogger("mailparser")
|
mailparser_logger = logging.getLogger("mailparser")
|
||||||
mailparser_logger.setLevel(logging.CRITICAL)
|
mailparser_logger.setLevel(logging.CRITICAL)
|
||||||
|
psl = publicsuffixlist.PublicSuffixList()
|
||||||
|
psl_overrides_path = str(files(parsedmarc.resources.maps).joinpath("psl_overrides.txt"))
|
||||||
|
with open(psl_overrides_path) as f:
|
||||||
|
psl_overrides = [line.rstrip() for line in f.readlines()]
|
||||||
|
while "" in psl_overrides:
|
||||||
|
psl_overrides.remove("")
|
||||||
|
|
||||||
|
|
||||||
class EmailParserError(RuntimeError):
|
class EmailParserError(RuntimeError):
|
||||||
@@ -53,31 +64,49 @@ class DownloadError(RuntimeError):
|
|||||||
"""Raised when an error occurs when downloading a file"""
|
"""Raised when an error occurs when downloading a file"""
|
||||||
|
|
||||||
|
|
||||||
def decode_base64(data):
|
class ReverseDNSService(TypedDict):
|
||||||
|
name: str
|
||||||
|
type: Optional[str]
|
||||||
|
|
||||||
|
|
||||||
|
ReverseDNSMap = dict[str, ReverseDNSService]
|
||||||
|
|
||||||
|
|
||||||
|
class IPAddressInfo(TypedDict):
|
||||||
|
ip_address: str
|
||||||
|
reverse_dns: Optional[str]
|
||||||
|
country: Optional[str]
|
||||||
|
base_domain: Optional[str]
|
||||||
|
name: Optional[str]
|
||||||
|
type: Optional[str]
|
||||||
|
|
||||||
|
|
||||||
|
def decode_base64(data: str) -> bytes:
|
||||||
"""
|
"""
|
||||||
Decodes a base64 string, with padding being optional
|
Decodes a base64 string, with padding being optional
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
data: A base64 encoded string
|
data (str): A base64 encoded string
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
bytes: The decoded bytes
|
bytes: The decoded bytes
|
||||||
|
|
||||||
"""
|
"""
|
||||||
data = bytes(data, encoding="ascii")
|
data_bytes = bytes(data, encoding="ascii")
|
||||||
missing_padding = len(data) % 4
|
missing_padding = len(data_bytes) % 4
|
||||||
if missing_padding != 0:
|
if missing_padding != 0:
|
||||||
data += b"=" * (4 - missing_padding)
|
data_bytes += b"=" * (4 - missing_padding)
|
||||||
return base64.b64decode(data)
|
return base64.b64decode(data_bytes)
|
||||||
|
|
||||||
|
|
||||||
def get_base_domain(domain):
|
def get_base_domain(domain: str) -> Optional[str]:
|
||||||
"""
|
"""
|
||||||
Gets the base domain name for the given domain
|
Gets the base domain name for the given domain
|
||||||
|
|
||||||
.. note::
|
.. note::
|
||||||
Results are based on a list of public domain suffixes at
|
Results are based on a list of public domain suffixes at
|
||||||
https://publicsuffix.org/list/public_suffix_list.dat.
|
https://publicsuffix.org/list/public_suffix_list.dat and overrides included in
|
||||||
|
parsedmarc.resources.maps.psl_overrides.txt
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
domain (str): A domain or subdomain
|
domain (str): A domain or subdomain
|
||||||
@@ -86,11 +115,22 @@ def get_base_domain(domain):
|
|||||||
str: The base domain of the given domain
|
str: The base domain of the given domain
|
||||||
|
|
||||||
"""
|
"""
|
||||||
psl = publicsuffixlist.PublicSuffixList()
|
domain = domain.lower()
|
||||||
return psl.privatesuffix(domain)
|
publicsuffix = psl.privatesuffix(domain)
|
||||||
|
for override in psl_overrides:
|
||||||
|
if domain.endswith(override):
|
||||||
|
return override.strip(".").strip("-")
|
||||||
|
return publicsuffix
|
||||||
|
|
||||||
|
|
||||||
def query_dns(domain, record_type, cache=None, nameservers=None, timeout=2.0):
|
def query_dns(
|
||||||
|
domain: str,
|
||||||
|
record_type: str,
|
||||||
|
*,
|
||||||
|
cache: Optional[ExpiringDict] = None,
|
||||||
|
nameservers: Optional[list[str]] = None,
|
||||||
|
timeout: float = 2.0,
|
||||||
|
) -> list[str]:
|
||||||
"""
|
"""
|
||||||
Queries DNS
|
Queries DNS
|
||||||
|
|
||||||
@@ -109,9 +149,9 @@ def query_dns(domain, record_type, cache=None, nameservers=None, timeout=2.0):
|
|||||||
record_type = record_type.upper()
|
record_type = record_type.upper()
|
||||||
cache_key = "{0}_{1}".format(domain, record_type)
|
cache_key = "{0}_{1}".format(domain, record_type)
|
||||||
if cache:
|
if cache:
|
||||||
records = cache.get(cache_key, None)
|
cached_records = cache.get(cache_key, None)
|
||||||
if records:
|
if isinstance(cached_records, list):
|
||||||
return records
|
return cast(list[str], cached_records)
|
||||||
|
|
||||||
resolver = dns.resolver.Resolver()
|
resolver = dns.resolver.Resolver()
|
||||||
timeout = float(timeout)
|
timeout = float(timeout)
|
||||||
@@ -125,33 +165,25 @@ def query_dns(domain, record_type, cache=None, nameservers=None, timeout=2.0):
|
|||||||
resolver.nameservers = nameservers
|
resolver.nameservers = nameservers
|
||||||
resolver.timeout = timeout
|
resolver.timeout = timeout
|
||||||
resolver.lifetime = timeout
|
resolver.lifetime = timeout
|
||||||
if record_type == "TXT":
|
records = list(
|
||||||
resource_records = list(
|
map(
|
||||||
map(
|
lambda r: r.to_text().replace('"', "").rstrip("."),
|
||||||
lambda r: r.strings,
|
resolver.resolve(domain, record_type, lifetime=timeout),
|
||||||
resolver.resolve(domain, record_type, lifetime=timeout),
|
|
||||||
)
|
|
||||||
)
|
|
||||||
_resource_record = [
|
|
||||||
resource_record[0][:0].join(resource_record)
|
|
||||||
for resource_record in resource_records
|
|
||||||
if resource_record
|
|
||||||
]
|
|
||||||
records = [r.decode() for r in _resource_record]
|
|
||||||
else:
|
|
||||||
records = list(
|
|
||||||
map(
|
|
||||||
lambda r: r.to_text().replace('"', "").rstrip("."),
|
|
||||||
resolver.resolve(domain, record_type, lifetime=timeout),
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
|
)
|
||||||
if cache:
|
if cache:
|
||||||
cache[cache_key] = records
|
cache[cache_key] = records
|
||||||
|
|
||||||
return records
|
return records
|
||||||
|
|
||||||
|
|
||||||
def get_reverse_dns(ip_address, cache=None, nameservers=None, timeout=2.0):
|
def get_reverse_dns(
|
||||||
|
ip_address,
|
||||||
|
*,
|
||||||
|
cache: Optional[ExpiringDict] = None,
|
||||||
|
nameservers: Optional[list[str]] = None,
|
||||||
|
timeout: float = 2.0,
|
||||||
|
) -> Optional[str]:
|
||||||
"""
|
"""
|
||||||
Resolves an IP address to a hostname using a reverse DNS query
|
Resolves an IP address to a hostname using a reverse DNS query
|
||||||
|
|
||||||
@@ -169,7 +201,7 @@ def get_reverse_dns(ip_address, cache=None, nameservers=None, timeout=2.0):
|
|||||||
try:
|
try:
|
||||||
address = dns.reversename.from_address(ip_address)
|
address = dns.reversename.from_address(ip_address)
|
||||||
hostname = query_dns(
|
hostname = query_dns(
|
||||||
address, "PTR", cache=cache, nameservers=nameservers, timeout=timeout
|
str(address), "PTR", cache=cache, nameservers=nameservers, timeout=timeout
|
||||||
)[0]
|
)[0]
|
||||||
|
|
||||||
except dns.exception.DNSException as e:
|
except dns.exception.DNSException as e:
|
||||||
@@ -179,7 +211,7 @@ def get_reverse_dns(ip_address, cache=None, nameservers=None, timeout=2.0):
|
|||||||
return hostname
|
return hostname
|
||||||
|
|
||||||
|
|
||||||
def timestamp_to_datetime(timestamp):
|
def timestamp_to_datetime(timestamp: int) -> datetime:
|
||||||
"""
|
"""
|
||||||
Converts a UNIX/DMARC timestamp to a Python ``datetime`` object
|
Converts a UNIX/DMARC timestamp to a Python ``datetime`` object
|
||||||
|
|
||||||
@@ -192,7 +224,7 @@ def timestamp_to_datetime(timestamp):
|
|||||||
return datetime.fromtimestamp(int(timestamp))
|
return datetime.fromtimestamp(int(timestamp))
|
||||||
|
|
||||||
|
|
||||||
def timestamp_to_human(timestamp):
|
def timestamp_to_human(timestamp: int) -> str:
|
||||||
"""
|
"""
|
||||||
Converts a UNIX/DMARC timestamp to a human-readable string
|
Converts a UNIX/DMARC timestamp to a human-readable string
|
||||||
|
|
||||||
@@ -205,7 +237,9 @@ def timestamp_to_human(timestamp):
|
|||||||
return timestamp_to_datetime(timestamp).strftime("%Y-%m-%d %H:%M:%S")
|
return timestamp_to_datetime(timestamp).strftime("%Y-%m-%d %H:%M:%S")
|
||||||
|
|
||||||
|
|
||||||
def human_timestamp_to_datetime(human_timestamp, to_utc=False):
|
def human_timestamp_to_datetime(
|
||||||
|
human_timestamp: str, *, to_utc: bool = False
|
||||||
|
) -> datetime:
|
||||||
"""
|
"""
|
||||||
Converts a human-readable timestamp into a Python ``datetime`` object
|
Converts a human-readable timestamp into a Python ``datetime`` object
|
||||||
|
|
||||||
@@ -224,7 +258,7 @@ def human_timestamp_to_datetime(human_timestamp, to_utc=False):
|
|||||||
return dt.astimezone(timezone.utc) if to_utc else dt
|
return dt.astimezone(timezone.utc) if to_utc else dt
|
||||||
|
|
||||||
|
|
||||||
def human_timestamp_to_unix_timestamp(human_timestamp):
|
def human_timestamp_to_unix_timestamp(human_timestamp: str) -> int:
|
||||||
"""
|
"""
|
||||||
Converts a human-readable timestamp into a UNIX timestamp
|
Converts a human-readable timestamp into a UNIX timestamp
|
||||||
|
|
||||||
@@ -235,10 +269,12 @@ def human_timestamp_to_unix_timestamp(human_timestamp):
|
|||||||
float: The converted timestamp
|
float: The converted timestamp
|
||||||
"""
|
"""
|
||||||
human_timestamp = human_timestamp.replace("T", " ")
|
human_timestamp = human_timestamp.replace("T", " ")
|
||||||
return human_timestamp_to_datetime(human_timestamp).timestamp()
|
return int(human_timestamp_to_datetime(human_timestamp).timestamp())
|
||||||
|
|
||||||
|
|
||||||
def get_ip_address_country(ip_address, db_path=None):
|
def get_ip_address_country(
|
||||||
|
ip_address: str, *, db_path: Optional[str] = None
|
||||||
|
) -> Optional[str]:
|
||||||
"""
|
"""
|
||||||
Returns the ISO code for the country associated
|
Returns the ISO code for the country associated
|
||||||
with the given IPv4 or IPv6 address
|
with the given IPv4 or IPv6 address
|
||||||
@@ -265,7 +301,7 @@ def get_ip_address_country(ip_address, db_path=None):
|
|||||||
]
|
]
|
||||||
|
|
||||||
if db_path is not None:
|
if db_path is not None:
|
||||||
if os.path.isfile(db_path) is False:
|
if not os.path.isfile(db_path):
|
||||||
db_path = None
|
db_path = None
|
||||||
logger.warning(
|
logger.warning(
|
||||||
f"No file exists at {db_path}. Falling back to an "
|
f"No file exists at {db_path}. Falling back to an "
|
||||||
@@ -280,14 +316,13 @@ def get_ip_address_country(ip_address, db_path=None):
|
|||||||
break
|
break
|
||||||
|
|
||||||
if db_path is None:
|
if db_path is None:
|
||||||
with pkg_resources.path(
|
db_path = str(
|
||||||
parsedmarc.resources.dbip, "dbip-country-lite.mmdb"
|
files(parsedmarc.resources.dbip).joinpath("dbip-country-lite.mmdb")
|
||||||
) as path:
|
)
|
||||||
db_path = path
|
|
||||||
|
|
||||||
db_age = datetime.now() - datetime.fromtimestamp(os.stat(db_path).st_mtime)
|
db_age = datetime.now() - datetime.fromtimestamp(os.stat(db_path).st_mtime)
|
||||||
if db_age > timedelta(days=30):
|
if db_age > timedelta(days=30):
|
||||||
logger.warning("IP database is more than a month old")
|
logger.warning("IP database is more than a month old")
|
||||||
|
|
||||||
db_reader = geoip2.database.Reader(db_path)
|
db_reader = geoip2.database.Reader(db_path)
|
||||||
|
|
||||||
@@ -303,12 +338,13 @@ def get_ip_address_country(ip_address, db_path=None):
|
|||||||
|
|
||||||
def get_service_from_reverse_dns_base_domain(
|
def get_service_from_reverse_dns_base_domain(
|
||||||
base_domain,
|
base_domain,
|
||||||
always_use_local_file=False,
|
*,
|
||||||
local_file_path=None,
|
always_use_local_file: bool = False,
|
||||||
url=None,
|
local_file_path: Optional[str] = None,
|
||||||
offline=False,
|
url: Optional[str] = None,
|
||||||
reverse_dns_map=None,
|
offline: bool = False,
|
||||||
):
|
reverse_dns_map: Optional[ReverseDNSMap] = None,
|
||||||
|
) -> ReverseDNSService:
|
||||||
"""
|
"""
|
||||||
Returns the service name of a given base domain name from reverse DNS.
|
Returns the service name of a given base domain name from reverse DNS.
|
||||||
|
|
||||||
@@ -325,12 +361,6 @@ def get_service_from_reverse_dns_base_domain(
|
|||||||
the supplied reverse_dns_base_domain and the type will be None
|
the supplied reverse_dns_base_domain and the type will be None
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def load_csv(_csv_file):
|
|
||||||
reader = csv.DictReader(_csv_file)
|
|
||||||
for row in reader:
|
|
||||||
key = row["base_reverse_dns"].lower().strip()
|
|
||||||
reverse_dns_map[key] = dict(name=row["name"], type=row["type"])
|
|
||||||
|
|
||||||
base_domain = base_domain.lower().strip()
|
base_domain = base_domain.lower().strip()
|
||||||
if url is None:
|
if url is None:
|
||||||
url = (
|
url = (
|
||||||
@@ -338,47 +368,71 @@ def get_service_from_reverse_dns_base_domain(
|
|||||||
"/parsedmarc/master/parsedmarc/"
|
"/parsedmarc/master/parsedmarc/"
|
||||||
"resources/maps/base_reverse_dns_map.csv"
|
"resources/maps/base_reverse_dns_map.csv"
|
||||||
)
|
)
|
||||||
|
reverse_dns_map_value: ReverseDNSMap
|
||||||
if reverse_dns_map is None:
|
if reverse_dns_map is None:
|
||||||
reverse_dns_map = dict()
|
reverse_dns_map_value = {}
|
||||||
|
else:
|
||||||
|
reverse_dns_map_value = reverse_dns_map
|
||||||
|
|
||||||
|
def load_csv(_csv_file):
|
||||||
|
reader = csv.DictReader(_csv_file)
|
||||||
|
for row in reader:
|
||||||
|
key = row["base_reverse_dns"].lower().strip()
|
||||||
|
reverse_dns_map_value[key] = {
|
||||||
|
"name": row["name"],
|
||||||
|
"type": row["type"],
|
||||||
|
}
|
||||||
|
|
||||||
csv_file = io.StringIO()
|
csv_file = io.StringIO()
|
||||||
|
|
||||||
if not (offline or always_use_local_file) and len(reverse_dns_map) == 0:
|
if not (offline or always_use_local_file) and len(reverse_dns_map_value) == 0:
|
||||||
try:
|
try:
|
||||||
logger.debug(f"Trying to fetch " f"reverse DNS map from {url}...")
|
logger.debug(f"Trying to fetch reverse DNS map from {url}...")
|
||||||
csv_file.write(requests.get(url).text)
|
headers = {"User-Agent": USER_AGENT}
|
||||||
|
response = requests.get(url, headers=headers)
|
||||||
|
response.raise_for_status()
|
||||||
|
csv_file.write(response.text)
|
||||||
csv_file.seek(0)
|
csv_file.seek(0)
|
||||||
load_csv(csv_file)
|
load_csv(csv_file)
|
||||||
except requests.exceptions.RequestException as e:
|
except requests.exceptions.RequestException as e:
|
||||||
logger.warning(f"Failed to fetch reverse DNS map: {e}")
|
logger.warning(f"Failed to fetch reverse DNS map: {e}")
|
||||||
if len(reverse_dns_map) == 0:
|
except Exception:
|
||||||
|
logger.warning("Not a valid CSV file")
|
||||||
|
csv_file.seek(0)
|
||||||
|
logging.debug("Response body:")
|
||||||
|
logger.debug(csv_file.read())
|
||||||
|
|
||||||
|
if len(reverse_dns_map_value) == 0:
|
||||||
logger.info("Loading included reverse DNS map...")
|
logger.info("Loading included reverse DNS map...")
|
||||||
with pkg_resources.path(
|
path = str(
|
||||||
parsedmarc.resources.maps, "base_reverse_dns_map.csv"
|
files(parsedmarc.resources.maps).joinpath("base_reverse_dns_map.csv")
|
||||||
) as path:
|
)
|
||||||
if local_file_path is not None:
|
if local_file_path is not None:
|
||||||
path = local_file_path
|
path = local_file_path
|
||||||
with open(path) as csv_file:
|
with open(path) as csv_file:
|
||||||
load_csv(csv_file)
|
load_csv(csv_file)
|
||||||
|
service: ReverseDNSService
|
||||||
try:
|
try:
|
||||||
service = reverse_dns_map[base_domain]
|
service = reverse_dns_map_value[base_domain]
|
||||||
except KeyError:
|
except KeyError:
|
||||||
service = dict(name=base_domain, type=None)
|
service = {"name": base_domain, "type": None}
|
||||||
|
|
||||||
return service
|
return service
|
||||||
|
|
||||||
|
|
||||||
def get_ip_address_info(
|
def get_ip_address_info(
|
||||||
ip_address,
|
ip_address,
|
||||||
ip_db_path=None,
|
*,
|
||||||
reverse_dns_map_path=None,
|
ip_db_path: Optional[str] = None,
|
||||||
always_use_local_files=False,
|
reverse_dns_map_path: Optional[str] = None,
|
||||||
reverse_dns_map_url=None,
|
always_use_local_files: bool = False,
|
||||||
cache=None,
|
reverse_dns_map_url: Optional[str] = None,
|
||||||
reverse_dns_map=None,
|
cache: Optional[ExpiringDict] = None,
|
||||||
offline=False,
|
reverse_dns_map: Optional[ReverseDNSMap] = None,
|
||||||
nameservers=None,
|
offline: bool = False,
|
||||||
timeout=2.0,
|
nameservers: Optional[list[str]] = None,
|
||||||
):
|
timeout: float = 2.0,
|
||||||
|
) -> IPAddressInfo:
|
||||||
"""
|
"""
|
||||||
Returns reverse DNS and country information for the given IP address
|
Returns reverse DNS and country information for the given IP address
|
||||||
|
|
||||||
@@ -396,17 +450,27 @@ def get_ip_address_info(
|
|||||||
timeout (float): Sets the DNS timeout in seconds
|
timeout (float): Sets the DNS timeout in seconds
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
OrderedDict: ``ip_address``, ``reverse_dns``
|
dict: ``ip_address``, ``reverse_dns``, ``country``
|
||||||
|
|
||||||
"""
|
"""
|
||||||
ip_address = ip_address.lower()
|
ip_address = ip_address.lower()
|
||||||
if cache is not None:
|
if cache is not None:
|
||||||
info = cache.get(ip_address, None)
|
cached_info = cache.get(ip_address, None)
|
||||||
if info:
|
if (
|
||||||
|
cached_info
|
||||||
|
and isinstance(cached_info, dict)
|
||||||
|
and "ip_address" in cached_info
|
||||||
|
):
|
||||||
logger.debug(f"IP address {ip_address} was found in cache")
|
logger.debug(f"IP address {ip_address} was found in cache")
|
||||||
return info
|
return cast(IPAddressInfo, cached_info)
|
||||||
info = OrderedDict()
|
info: IPAddressInfo = {
|
||||||
info["ip_address"] = ip_address
|
"ip_address": ip_address,
|
||||||
|
"reverse_dns": None,
|
||||||
|
"country": None,
|
||||||
|
"base_domain": None,
|
||||||
|
"name": None,
|
||||||
|
"type": None,
|
||||||
|
}
|
||||||
if offline:
|
if offline:
|
||||||
reverse_dns = None
|
reverse_dns = None
|
||||||
else:
|
else:
|
||||||
@@ -416,9 +480,6 @@ def get_ip_address_info(
|
|||||||
country = get_ip_address_country(ip_address, db_path=ip_db_path)
|
country = get_ip_address_country(ip_address, db_path=ip_db_path)
|
||||||
info["country"] = country
|
info["country"] = country
|
||||||
info["reverse_dns"] = reverse_dns
|
info["reverse_dns"] = reverse_dns
|
||||||
info["base_domain"] = None
|
|
||||||
info["name"] = None
|
|
||||||
info["type"] = None
|
|
||||||
if reverse_dns is not None:
|
if reverse_dns is not None:
|
||||||
base_domain = get_base_domain(reverse_dns)
|
base_domain = get_base_domain(reverse_dns)
|
||||||
if base_domain is not None:
|
if base_domain is not None:
|
||||||
@@ -443,7 +504,7 @@ def get_ip_address_info(
|
|||||||
return info
|
return info
|
||||||
|
|
||||||
|
|
||||||
def parse_email_address(original_address):
|
def parse_email_address(original_address: str) -> dict[str, Optional[str]]:
|
||||||
if original_address[0] == "":
|
if original_address[0] == "":
|
||||||
display_name = None
|
display_name = None
|
||||||
else:
|
else:
|
||||||
@@ -456,17 +517,15 @@ def parse_email_address(original_address):
|
|||||||
local = address_parts[0].lower()
|
local = address_parts[0].lower()
|
||||||
domain = address_parts[-1].lower()
|
domain = address_parts[-1].lower()
|
||||||
|
|
||||||
return OrderedDict(
|
return {
|
||||||
[
|
"display_name": display_name,
|
||||||
("display_name", display_name),
|
"address": address,
|
||||||
("address", address),
|
"local": local,
|
||||||
("local", local),
|
"domain": domain,
|
||||||
("domain", domain),
|
}
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def get_filename_safe_string(string):
|
def get_filename_safe_string(string: str) -> str:
|
||||||
"""
|
"""
|
||||||
Converts a string to a string that is safe for a filename
|
Converts a string to a string that is safe for a filename
|
||||||
|
|
||||||
@@ -488,7 +547,7 @@ def get_filename_safe_string(string):
|
|||||||
return string
|
return string
|
||||||
|
|
||||||
|
|
||||||
def is_mbox(path):
|
def is_mbox(path: str) -> bool:
|
||||||
"""
|
"""
|
||||||
Checks if the given content is an MBOX mailbox file
|
Checks if the given content is an MBOX mailbox file
|
||||||
|
|
||||||
@@ -509,7 +568,7 @@ def is_mbox(path):
|
|||||||
return _is_mbox
|
return _is_mbox
|
||||||
|
|
||||||
|
|
||||||
def is_outlook_msg(content):
|
def is_outlook_msg(content) -> bool:
|
||||||
"""
|
"""
|
||||||
Checks if the given content is an Outlook msg OLE/MSG file
|
Checks if the given content is an Outlook msg OLE/MSG file
|
||||||
|
|
||||||
@@ -524,7 +583,7 @@ def is_outlook_msg(content):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def convert_outlook_msg(msg_bytes):
|
def convert_outlook_msg(msg_bytes: bytes) -> bytes:
|
||||||
"""
|
"""
|
||||||
Uses the ``msgconvert`` Perl utility to convert an Outlook MS file to
|
Uses the ``msgconvert`` Perl utility to convert an Outlook MS file to
|
||||||
standard RFC 822 format
|
standard RFC 822 format
|
||||||
@@ -533,7 +592,7 @@ def convert_outlook_msg(msg_bytes):
|
|||||||
msg_bytes (bytes): the content of the .msg file
|
msg_bytes (bytes): the content of the .msg file
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
A RFC 822 string
|
A RFC 822 bytes payload
|
||||||
"""
|
"""
|
||||||
if not is_outlook_msg(msg_bytes):
|
if not is_outlook_msg(msg_bytes):
|
||||||
raise ValueError("The supplied bytes are not an Outlook MSG file")
|
raise ValueError("The supplied bytes are not an Outlook MSG file")
|
||||||
@@ -560,7 +619,9 @@ def convert_outlook_msg(msg_bytes):
|
|||||||
return rfc822
|
return rfc822
|
||||||
|
|
||||||
|
|
||||||
def parse_email(data, strip_attachment_payloads=False):
|
def parse_email(
|
||||||
|
data: Union[bytes, str], *, strip_attachment_payloads: bool = False
|
||||||
|
) -> dict:
|
||||||
"""
|
"""
|
||||||
A simplified email parser
|
A simplified email parser
|
||||||
|
|
||||||
|
|||||||
@@ -1,12 +1,25 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any, Optional, Union
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
from parsedmarc import logger
|
from parsedmarc import logger
|
||||||
|
from parsedmarc.constants import USER_AGENT
|
||||||
|
|
||||||
|
|
||||||
class WebhookClient(object):
|
class WebhookClient(object):
|
||||||
"""A client for webhooks"""
|
"""A client for webhooks"""
|
||||||
|
|
||||||
def __init__(self, aggregate_url, forensic_url, smtp_tls_url, timeout=60):
|
def __init__(
|
||||||
|
self,
|
||||||
|
aggregate_url: str,
|
||||||
|
forensic_url: str,
|
||||||
|
smtp_tls_url: str,
|
||||||
|
timeout: Optional[int] = 60,
|
||||||
|
):
|
||||||
"""
|
"""
|
||||||
Initializes the WebhookClient
|
Initializes the WebhookClient
|
||||||
Args:
|
Args:
|
||||||
@@ -21,29 +34,31 @@ class WebhookClient(object):
|
|||||||
self.timeout = timeout
|
self.timeout = timeout
|
||||||
self.session = requests.Session()
|
self.session = requests.Session()
|
||||||
self.session.headers = {
|
self.session.headers = {
|
||||||
"User-Agent": "parsedmarc",
|
"User-Agent": USER_AGENT,
|
||||||
"Content-Type": "application/json",
|
"Content-Type": "application/json",
|
||||||
}
|
}
|
||||||
|
|
||||||
def save_forensic_report_to_webhook(self, report):
|
def save_forensic_report_to_webhook(self, report: str):
|
||||||
try:
|
try:
|
||||||
self._send_to_webhook(self.forensic_url, report)
|
self._send_to_webhook(self.forensic_url, report)
|
||||||
except Exception as error_:
|
except Exception as error_:
|
||||||
logger.error("Webhook Error: {0}".format(error_.__str__()))
|
logger.error("Webhook Error: {0}".format(error_.__str__()))
|
||||||
|
|
||||||
def save_smtp_tls_report_to_webhook(self, report):
|
def save_smtp_tls_report_to_webhook(self, report: str):
|
||||||
try:
|
try:
|
||||||
self._send_to_webhook(self.smtp_tls_url, report)
|
self._send_to_webhook(self.smtp_tls_url, report)
|
||||||
except Exception as error_:
|
except Exception as error_:
|
||||||
logger.error("Webhook Error: {0}".format(error_.__str__()))
|
logger.error("Webhook Error: {0}".format(error_.__str__()))
|
||||||
|
|
||||||
def save_aggregate_report_to_webhook(self, report):
|
def save_aggregate_report_to_webhook(self, report: str):
|
||||||
try:
|
try:
|
||||||
self._send_to_webhook(self.aggregate_url, report)
|
self._send_to_webhook(self.aggregate_url, report)
|
||||||
except Exception as error_:
|
except Exception as error_:
|
||||||
logger.error("Webhook Error: {0}".format(error_.__str__()))
|
logger.error("Webhook Error: {0}".format(error_.__str__()))
|
||||||
|
|
||||||
def _send_to_webhook(self, webhook_url, payload):
|
def _send_to_webhook(
|
||||||
|
self, webhook_url: str, payload: Union[bytes, str, dict[str, Any]]
|
||||||
|
):
|
||||||
try:
|
try:
|
||||||
self.session.post(webhook_url, data=payload, timeout=self.timeout)
|
self.session.post(webhook_url, data=payload, timeout=self.timeout)
|
||||||
except Exception as error_:
|
except Exception as error_:
|
||||||
|
|||||||
@@ -2,6 +2,7 @@
|
|||||||
requires = [
|
requires = [
|
||||||
"hatchling>=1.27.0",
|
"hatchling>=1.27.0",
|
||||||
]
|
]
|
||||||
|
requires_python = ">=3.10,<3.14"
|
||||||
build-backend = "hatchling.build"
|
build-backend = "hatchling.build"
|
||||||
|
|
||||||
[project]
|
[project]
|
||||||
@@ -28,6 +29,7 @@ classifiers = [
|
|||||||
"Operating System :: OS Independent",
|
"Operating System :: OS Independent",
|
||||||
"Programming Language :: Python :: 3"
|
"Programming Language :: Python :: 3"
|
||||||
]
|
]
|
||||||
|
requires-python = ">=3.10"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"azure-identity>=1.8.0",
|
"azure-identity>=1.8.0",
|
||||||
"azure-monitor-ingestion>=1.0.0",
|
"azure-monitor-ingestion>=1.0.0",
|
||||||
@@ -46,7 +48,7 @@ dependencies = [
|
|||||||
"imapclient>=2.1.0",
|
"imapclient>=2.1.0",
|
||||||
"kafka-python-ng>=2.2.2",
|
"kafka-python-ng>=2.2.2",
|
||||||
"lxml>=4.4.0",
|
"lxml>=4.4.0",
|
||||||
"mailsuite>=1.9.18",
|
"mailsuite>=1.11.2",
|
||||||
"msgraph-core==0.2.2",
|
"msgraph-core==0.2.2",
|
||||||
"opensearch-py>=2.4.2,<=3.0.0",
|
"opensearch-py>=2.4.2,<=3.0.0",
|
||||||
"publicsuffixlist>=0.10.0",
|
"publicsuffixlist>=0.10.0",
|
||||||
@@ -55,6 +57,7 @@ dependencies = [
|
|||||||
"tqdm>=4.31.1",
|
"tqdm>=4.31.1",
|
||||||
"urllib3>=1.25.7",
|
"urllib3>=1.25.7",
|
||||||
"xmltodict>=0.12.0",
|
"xmltodict>=0.12.0",
|
||||||
|
"PyYAML>=6.0.3"
|
||||||
]
|
]
|
||||||
|
|
||||||
[project.optional-dependencies]
|
[project.optional-dependencies]
|
||||||
@@ -76,9 +79,20 @@ parsedmarc = "parsedmarc.cli:_main"
|
|||||||
Homepage = "https://domainaware.github.io/parsedmarc"
|
Homepage = "https://domainaware.github.io/parsedmarc"
|
||||||
|
|
||||||
[tool.hatch.version]
|
[tool.hatch.version]
|
||||||
path = "parsedmarc/__init__.py"
|
path = "parsedmarc/constants.py"
|
||||||
|
|
||||||
[tool.hatch.build.targets.sdist]
|
[tool.hatch.build.targets.sdist]
|
||||||
include = [
|
include = [
|
||||||
"/parsedmarc",
|
"/parsedmarc",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[tool.hatch.build]
|
||||||
|
exclude = [
|
||||||
|
"base_reverse_dns.csv",
|
||||||
|
"find_bad_utf8.py",
|
||||||
|
"find_unknown_base_reverse_dns.py",
|
||||||
|
"unknown_base_reverse_dns.csv",
|
||||||
|
"sortmaps.py",
|
||||||
|
"README.md",
|
||||||
|
"*.bak"
|
||||||
|
]
|
||||||
|
|||||||
107
splunk/smtp_tls_dashboard.xml
Normal file
107
splunk/smtp_tls_dashboard.xml
Normal file
@@ -0,0 +1,107 @@
|
|||||||
|
<form version="1.1" theme="dark">
|
||||||
|
<label>SMTP TLS Reporting</label>
|
||||||
|
<fieldset submitButton="false" autoRun="true">
|
||||||
|
<input type="time" token="time">
|
||||||
|
<label></label>
|
||||||
|
<default>
|
||||||
|
<earliest>-7d@h</earliest>
|
||||||
|
<latest>now</latest>
|
||||||
|
</default>
|
||||||
|
</input>
|
||||||
|
<input type="text" token="organization_name" searchWhenChanged="true">
|
||||||
|
<label>Organization name</label>
|
||||||
|
<default>*</default>
|
||||||
|
<initialValue>*</initialValue>
|
||||||
|
</input>
|
||||||
|
<input type="text" token="policy_domain">
|
||||||
|
<label>Policy domain</label>
|
||||||
|
<default>*</default>
|
||||||
|
<initialValue>*</initialValue>
|
||||||
|
</input>
|
||||||
|
<input type="dropdown" token="policy_type" searchWhenChanged="true">
|
||||||
|
<label>Policy type</label>
|
||||||
|
<choice value="*">Any</choice>
|
||||||
|
<choice value="tlsa">tlsa</choice>
|
||||||
|
<choice value="sts">sts</choice>
|
||||||
|
<choice value="no-policy-found">no-policy-found</choice>
|
||||||
|
<default>*</default>
|
||||||
|
<initialValue>*</initialValue>
|
||||||
|
</input>
|
||||||
|
</fieldset>
|
||||||
|
<row>
|
||||||
|
<panel>
|
||||||
|
<title>Reporting organizations</title>
|
||||||
|
<table>
|
||||||
|
<search>
|
||||||
|
<query>index=email sourcetype=smtp:tls organization_name=$organization_name$ policies{}.policy_domain=$policy_domain$
|
||||||
|
| rename policies{}.policy_domain as policy_domain
|
||||||
|
| rename policies{}.policy_type as policy_type
|
||||||
|
| rename policies{}.failed_session_count as failed_sessions
|
||||||
|
| rename policies{}.failure_details{}.failed_session_count as failed_sessions
|
||||||
|
| rename policies{}.successful_session_count as successful_sessions
|
||||||
|
| rename policies{}.failure_details{}.sending_mta_ip as sending_mta_ip
|
||||||
|
| rename policies{}.failure_details{}.receiving_ip as receiving_ip
|
||||||
|
| rename policies{}.failure_details{}.receiving_mx_hostname as receiving_mx_hostname
|
||||||
|
| rename policies{}.failure_details{}.result_type as failure_type
|
||||||
|
| fillnull value=0 failed_sessions
|
||||||
|
| stats sum(failed_sessions) as failed_sessions sum(successful_sessions) as successful_sessions by organization_name
|
||||||
|
| sort -successful_sessions 0</query>
|
||||||
|
<earliest>$time.earliest$</earliest>
|
||||||
|
<latest>$time.latest$</latest>
|
||||||
|
</search>
|
||||||
|
<option name="drilldown">none</option>
|
||||||
|
<option name="refresh.display">progressbar</option>
|
||||||
|
</table>
|
||||||
|
</panel>
|
||||||
|
<panel>
|
||||||
|
<title>Domains</title>
|
||||||
|
<table>
|
||||||
|
<search>
|
||||||
|
<query>index=email sourcetype=smtp:tls organization_name=$organization_name$ policies{}.policy_domain=$policy_domain$
|
||||||
|
| rename policies{}.policy_domain as policy_domain
|
||||||
|
| rename policies{}.policy_type as policy_type
|
||||||
|
| rename policies{}.failed_session_count as failed_sessions
|
||||||
|
| rename policies{}.failure_details{}.failed_session_count as failed_sessions
|
||||||
|
| rename policies{}.successful_session_count as successful_sessions
|
||||||
|
| rename policies{}.failure_details{}.sending_mta_ip as sending_mta_ip
|
||||||
|
| rename policies{}.failure_details{}.receiving_ip as receiving_ip
|
||||||
|
| rename policies{}.failure_details{}.receiving_mx_hostname as receiving_mx_hostname
|
||||||
|
| rename policies{}.failure_details{}.result_type as failure_type
|
||||||
|
| fillnull value=0 failed_sessions
|
||||||
|
| stats sum(failed_sessions) as failed_sessions sum(successful_sessions) as successful_sessions by policy_domain
|
||||||
|
| sort -successful_sessions 0</query>
|
||||||
|
<earliest>$time.earliest$</earliest>
|
||||||
|
<latest>$time.latest$</latest>
|
||||||
|
</search>
|
||||||
|
<option name="drilldown">none</option>
|
||||||
|
<option name="refresh.display">progressbar</option>
|
||||||
|
</table>
|
||||||
|
</panel>
|
||||||
|
</row>
|
||||||
|
<row>
|
||||||
|
<panel>
|
||||||
|
<title>Failure details</title>
|
||||||
|
<table>
|
||||||
|
<search>
|
||||||
|
<query>index=email sourcetype=smtp:tls organization_name=$organization_name$ policies{}.policy_domain=$policy_domain$ policies{}.failure_details{}.result_type=*
|
||||||
|
| rename policies{}.policy_domain as policy_domain
|
||||||
|
| rename policies{}.policy_type as policy_type
|
||||||
|
| rename policies{}.failed_session_count as failed_sessions
|
||||||
|
| rename policies{}.failure_details{}.failed_session_count as failed_sessions
|
||||||
|
| rename policies{}.successful_session_count as successful_sessions
|
||||||
|
| rename policies{}.failure_details{}.sending_mta_ip as sending_mta_ip
|
||||||
|
| rename policies{}.failure_details{}.receiving_ip as receiving_ip
|
||||||
|
| rename policies{}.failure_details{}.receiving_mx_hostname as receiving_mx_hostname
|
||||||
|
| fillnull value=0 failed_sessions
|
||||||
|
| rename policies{}.failure_details{}.result_type as failure_type
|
||||||
|
| table _time organization_name policy_domain policy_type failed_sessions successful_sessions sending_mta_ip receiving_ip receiving_mx_hostname failure_type
|
||||||
|
| sort by -_time 0</query>
|
||||||
|
<earliest>$time.earliest$</earliest>
|
||||||
|
<latest>$time.latest$</latest>
|
||||||
|
</search>
|
||||||
|
<option name="drilldown">none</option>
|
||||||
|
<option name="refresh.display">progressbar</option>
|
||||||
|
</table>
|
||||||
|
</panel>
|
||||||
|
</row>
|
||||||
|
</form>
|
||||||
58
tests.py
Normal file → Executable file
58
tests.py
Normal file → Executable file
@@ -1,3 +1,6 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
from __future__ import absolute_import, print_function, unicode_literals
|
from __future__ import absolute_import, print_function, unicode_literals
|
||||||
|
|
||||||
import os
|
import os
|
||||||
@@ -9,6 +12,9 @@ from lxml import etree
|
|||||||
import parsedmarc
|
import parsedmarc
|
||||||
import parsedmarc.utils
|
import parsedmarc.utils
|
||||||
|
|
||||||
|
# Detect if running in GitHub Actions to skip DNS lookups
|
||||||
|
OFFLINE_MODE = os.environ.get("GITHUB_ACTIONS", "false").lower() == "true"
|
||||||
|
|
||||||
|
|
||||||
def minify_xml(xml_string):
|
def minify_xml(xml_string):
|
||||||
parser = etree.XMLParser(remove_blank_text=True)
|
parser = etree.XMLParser(remove_blank_text=True)
|
||||||
@@ -43,11 +49,12 @@ class Test(unittest.TestCase):
|
|||||||
|
|
||||||
def testExtractReportXMLComparator(self):
|
def testExtractReportXMLComparator(self):
|
||||||
"""Test XML comparator function"""
|
"""Test XML comparator function"""
|
||||||
print()
|
xmlnice_file = open("samples/extract_report/nice-input.xml")
|
||||||
xmlnice = open("samples/extract_report/nice-input.xml").read()
|
xmlnice = xmlnice_file.read()
|
||||||
print(xmlnice)
|
xmlnice_file.close()
|
||||||
xmlchanged = minify_xml(open("samples/extract_report/changed-input.xml").read())
|
xmlchanged_file = open("samples/extract_report/changed-input.xml")
|
||||||
print(xmlchanged)
|
xmlchanged = minify_xml(xmlchanged_file.read())
|
||||||
|
xmlchanged_file.close()
|
||||||
self.assertTrue(compare_xml(xmlnice, xmlnice))
|
self.assertTrue(compare_xml(xmlnice, xmlnice))
|
||||||
self.assertTrue(compare_xml(xmlchanged, xmlchanged))
|
self.assertTrue(compare_xml(xmlchanged, xmlchanged))
|
||||||
self.assertFalse(compare_xml(xmlnice, xmlchanged))
|
self.assertFalse(compare_xml(xmlnice, xmlchanged))
|
||||||
@@ -62,7 +69,9 @@ class Test(unittest.TestCase):
|
|||||||
data = f.read()
|
data = f.read()
|
||||||
print("Testing {0}: ".format(file), end="")
|
print("Testing {0}: ".format(file), end="")
|
||||||
xmlout = parsedmarc.extract_report(data)
|
xmlout = parsedmarc.extract_report(data)
|
||||||
xmlin = open("samples/extract_report/nice-input.xml").read()
|
xmlin_file = open("samples/extract_report/nice-input.xml")
|
||||||
|
xmlin = xmlin_file.read()
|
||||||
|
xmlin_file.close()
|
||||||
self.assertTrue(compare_xml(xmlout, xmlin))
|
self.assertTrue(compare_xml(xmlout, xmlin))
|
||||||
print("Passed!")
|
print("Passed!")
|
||||||
|
|
||||||
@@ -71,8 +80,10 @@ class Test(unittest.TestCase):
|
|||||||
print()
|
print()
|
||||||
file = "samples/extract_report/nice-input.xml"
|
file = "samples/extract_report/nice-input.xml"
|
||||||
print("Testing {0}: ".format(file), end="")
|
print("Testing {0}: ".format(file), end="")
|
||||||
xmlout = parsedmarc.extract_report(file)
|
xmlout = parsedmarc.extract_report_from_file_path(file)
|
||||||
xmlin = open("samples/extract_report/nice-input.xml").read()
|
xmlin_file = open("samples/extract_report/nice-input.xml")
|
||||||
|
xmlin = xmlin_file.read()
|
||||||
|
xmlin_file.close()
|
||||||
self.assertTrue(compare_xml(xmlout, xmlin))
|
self.assertTrue(compare_xml(xmlout, xmlin))
|
||||||
print("Passed!")
|
print("Passed!")
|
||||||
|
|
||||||
@@ -82,7 +93,9 @@ class Test(unittest.TestCase):
|
|||||||
file = "samples/extract_report/nice-input.xml.gz"
|
file = "samples/extract_report/nice-input.xml.gz"
|
||||||
print("Testing {0}: ".format(file), end="")
|
print("Testing {0}: ".format(file), end="")
|
||||||
xmlout = parsedmarc.extract_report_from_file_path(file)
|
xmlout = parsedmarc.extract_report_from_file_path(file)
|
||||||
xmlin = open("samples/extract_report/nice-input.xml").read()
|
xmlin_file = open("samples/extract_report/nice-input.xml")
|
||||||
|
xmlin = xmlin_file.read()
|
||||||
|
xmlin_file.close()
|
||||||
self.assertTrue(compare_xml(xmlout, xmlin))
|
self.assertTrue(compare_xml(xmlout, xmlin))
|
||||||
print("Passed!")
|
print("Passed!")
|
||||||
|
|
||||||
@@ -92,12 +105,13 @@ class Test(unittest.TestCase):
|
|||||||
file = "samples/extract_report/nice-input.xml.zip"
|
file = "samples/extract_report/nice-input.xml.zip"
|
||||||
print("Testing {0}: ".format(file), end="")
|
print("Testing {0}: ".format(file), end="")
|
||||||
xmlout = parsedmarc.extract_report_from_file_path(file)
|
xmlout = parsedmarc.extract_report_from_file_path(file)
|
||||||
print(xmlout)
|
xmlin_file = open("samples/extract_report/nice-input.xml")
|
||||||
xmlin = minify_xml(open("samples/extract_report/nice-input.xml").read())
|
xmlin = minify_xml(xmlin_file.read())
|
||||||
print(xmlin)
|
xmlin_file.close()
|
||||||
self.assertTrue(compare_xml(xmlout, xmlin))
|
self.assertTrue(compare_xml(xmlout, xmlin))
|
||||||
xmlin = minify_xml(open("samples/extract_report/changed-input.xml").read())
|
xmlin_file = open("samples/extract_report/changed-input.xml")
|
||||||
print(xmlin)
|
xmlin = xmlin_file.read()
|
||||||
|
xmlin_file.close()
|
||||||
self.assertFalse(compare_xml(xmlout, xmlin))
|
self.assertFalse(compare_xml(xmlout, xmlin))
|
||||||
print("Passed!")
|
print("Passed!")
|
||||||
|
|
||||||
@@ -110,7 +124,7 @@ class Test(unittest.TestCase):
|
|||||||
continue
|
continue
|
||||||
print("Testing {0}: ".format(sample_path), end="")
|
print("Testing {0}: ".format(sample_path), end="")
|
||||||
parsed_report = parsedmarc.parse_report_file(
|
parsed_report = parsedmarc.parse_report_file(
|
||||||
sample_path, always_use_local_files=True
|
sample_path, always_use_local_files=True, offline=OFFLINE_MODE
|
||||||
)["report"]
|
)["report"]
|
||||||
parsedmarc.parsed_aggregate_reports_to_csv(parsed_report)
|
parsedmarc.parsed_aggregate_reports_to_csv(parsed_report)
|
||||||
print("Passed!")
|
print("Passed!")
|
||||||
@@ -118,7 +132,7 @@ class Test(unittest.TestCase):
|
|||||||
def testEmptySample(self):
|
def testEmptySample(self):
|
||||||
"""Test empty/unparasable report"""
|
"""Test empty/unparasable report"""
|
||||||
with self.assertRaises(parsedmarc.ParserError):
|
with self.assertRaises(parsedmarc.ParserError):
|
||||||
parsedmarc.parse_report_file("samples/empty.xml")
|
parsedmarc.parse_report_file("samples/empty.xml", offline=OFFLINE_MODE)
|
||||||
|
|
||||||
def testForensicSamples(self):
|
def testForensicSamples(self):
|
||||||
"""Test sample forensic/ruf/failure DMARC reports"""
|
"""Test sample forensic/ruf/failure DMARC reports"""
|
||||||
@@ -128,8 +142,12 @@ class Test(unittest.TestCase):
|
|||||||
print("Testing {0}: ".format(sample_path), end="")
|
print("Testing {0}: ".format(sample_path), end="")
|
||||||
with open(sample_path) as sample_file:
|
with open(sample_path) as sample_file:
|
||||||
sample_content = sample_file.read()
|
sample_content = sample_file.read()
|
||||||
parsed_report = parsedmarc.parse_report_email(sample_content)["report"]
|
parsed_report = parsedmarc.parse_report_email(
|
||||||
parsed_report = parsedmarc.parse_report_file(sample_path)["report"]
|
sample_content, offline=OFFLINE_MODE
|
||||||
|
)["report"]
|
||||||
|
parsed_report = parsedmarc.parse_report_file(
|
||||||
|
sample_path, offline=OFFLINE_MODE
|
||||||
|
)["report"]
|
||||||
parsedmarc.parsed_forensic_reports_to_csv(parsed_report)
|
parsedmarc.parsed_forensic_reports_to_csv(parsed_report)
|
||||||
print("Passed!")
|
print("Passed!")
|
||||||
|
|
||||||
@@ -141,7 +159,9 @@ class Test(unittest.TestCase):
|
|||||||
if os.path.isdir(sample_path):
|
if os.path.isdir(sample_path):
|
||||||
continue
|
continue
|
||||||
print("Testing {0}: ".format(sample_path), end="")
|
print("Testing {0}: ".format(sample_path), end="")
|
||||||
parsed_report = parsedmarc.parse_report_file(sample_path)["report"]
|
parsed_report = parsedmarc.parse_report_file(
|
||||||
|
sample_path, offline=OFFLINE_MODE
|
||||||
|
)["report"]
|
||||||
parsedmarc.parsed_smtp_tls_reports_to_csv(parsed_report)
|
parsedmarc.parsed_smtp_tls_reports_to_csv(parsed_report)
|
||||||
print("Passed!")
|
print("Passed!")
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user